diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -3,247046 +3,178929 @@ "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, - "global_step": 35289, + "global_step": 25558, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { - "epoch": 2.833744226246139e-05, + "epoch": 3.912669222943892e-05, "grad_norm": 0.0, - "learning_rate": 1.8885741265344666e-08, - "loss": 1.9338, + "learning_rate": 2.607561929595828e-08, + "loss": 2.2258, "step": 1 }, { - "epoch": 5.667488452492278e-05, + "epoch": 7.825338445887784e-05, "grad_norm": 0.0, - "learning_rate": 3.777148253068933e-08, - "loss": 1.8518, + "learning_rate": 5.215123859191656e-08, + "loss": 2.2266, "step": 2 }, { - "epoch": 8.501232678738417e-05, + "epoch": 0.00011738007668831677, "grad_norm": 0.0, - "learning_rate": 5.6657223796034004e-08, - "loss": 1.916, + "learning_rate": 7.822685788787485e-08, + "loss": 2.1685, "step": 3 }, { - "epoch": 0.00011334976904984556, + "epoch": 0.0001565067689177557, "grad_norm": 0.0, - "learning_rate": 7.554296506137866e-08, - "loss": 1.8867, + "learning_rate": 1.0430247718383312e-07, + "loss": 2.251, "step": 4 }, { - "epoch": 0.00014168721131230694, + "epoch": 0.0001956334611471946, "grad_norm": 0.0, - "learning_rate": 9.442870632672333e-08, - "loss": 1.8817, + "learning_rate": 1.3037809647979142e-07, + "loss": 2.3447, "step": 5 }, { - "epoch": 0.00017002465357476835, + "epoch": 0.00023476015337663353, "grad_norm": 0.0, - "learning_rate": 1.1331444759206801e-07, - "loss": 1.9254, + "learning_rate": 1.564537157757497e-07, + "loss": 2.3401, "step": 6 }, { - "epoch": 0.00019836209583722972, + "epoch": 0.00027388684560607245, "grad_norm": 0.0, - "learning_rate": 1.3220018885741267e-07, - "loss": 2.0063, + "learning_rate": 1.8252933507170796e-07, + "loss": 1.9788, "step": 7 }, { - "epoch": 0.00022669953809969113, + "epoch": 0.0003130135378355114, "grad_norm": 0.0, - "learning_rate": 1.5108593012275733e-07, - "loss": 1.7539, + "learning_rate": 2.0860495436766624e-07, + "loss": 2.1409, "step": 8 }, { - "epoch": 0.0002550369803621525, + "epoch": 0.0003521402300649503, "grad_norm": 0.0, - "learning_rate": 1.69971671388102e-07, - "loss": 1.948, + "learning_rate": 2.3468057366362453e-07, + "loss": 2.2629, "step": 9 }, { - "epoch": 0.0002833744226246139, + "epoch": 0.0003912669222943892, "grad_norm": 0.0, - "learning_rate": 1.8885741265344666e-07, - "loss": 1.9763, + "learning_rate": 2.6075619295958284e-07, + "loss": 1.9866, "step": 10 }, { - "epoch": 0.0003117118648870753, + "epoch": 0.00043039361452382814, "grad_norm": 0.0, - "learning_rate": 2.0774315391879134e-07, - "loss": 1.8718, + "learning_rate": 2.868318122555411e-07, + "loss": 1.8477, "step": 11 }, { - "epoch": 0.0003400493071495367, + "epoch": 0.00046952030675326707, "grad_norm": 0.0, - "learning_rate": 2.2662889518413602e-07, - "loss": 1.9429, + "learning_rate": 3.129074315514994e-07, + "loss": 2.0034, "step": 12 }, { - "epoch": 0.00036838674941199807, + "epoch": 0.000508646998982706, "grad_norm": 0.0, - "learning_rate": 2.4551463644948064e-07, - "loss": 1.7384, + "learning_rate": 3.3898305084745766e-07, + "loss": 2.1292, "step": 13 }, { - "epoch": 0.00039672419167445944, + "epoch": 0.0005477736912121449, "grad_norm": 0.0, - "learning_rate": 2.6440037771482535e-07, - "loss": 1.8365, + "learning_rate": 3.650586701434159e-07, + "loss": 1.7556, "step": 14 }, { - "epoch": 0.0004250616339369209, + "epoch": 0.0005869003834415838, "grad_norm": 0.0, - "learning_rate": 2.8328611898017e-07, - "loss": 1.807, + "learning_rate": 3.9113428943937423e-07, + "loss": 2.0747, "step": 15 }, { - "epoch": 0.00045339907619938225, + "epoch": 0.0006260270756710228, "grad_norm": 0.0, - "learning_rate": 3.0217186024551465e-07, - "loss": 1.7406, + "learning_rate": 4.172099087353325e-07, + "loss": 1.7646, "step": 16 }, { - "epoch": 0.00048173651846184363, + "epoch": 0.0006651537679004617, "grad_norm": 0.0, - "learning_rate": 3.2105760151085936e-07, - "loss": 1.7758, + "learning_rate": 4.432855280312908e-07, + "loss": 1.9348, "step": 17 }, { - "epoch": 0.000510073960724305, + "epoch": 0.0007042804601299006, "grad_norm": 0.0, - "learning_rate": 3.39943342776204e-07, - "loss": 1.9236, + "learning_rate": 4.6936114732724906e-07, + "loss": 1.873, "step": 18 }, { - "epoch": 0.0005384114029867664, + "epoch": 0.0007434071523593395, "grad_norm": 0.0, - "learning_rate": 3.588290840415486e-07, - "loss": 1.7365, + "learning_rate": 4.954367666232074e-07, + "loss": 1.8451, "step": 19 }, { - "epoch": 0.0005667488452492278, + "epoch": 0.0007825338445887784, "grad_norm": 0.0, - "learning_rate": 3.777148253068933e-07, - "loss": 1.6179, + "learning_rate": 5.215123859191657e-07, + "loss": 1.8474, "step": 20 }, { - "epoch": 0.0005950862875116892, + "epoch": 0.0008216605368182174, "grad_norm": 0.0, - "learning_rate": 3.9660056657223797e-07, - "loss": 1.7025, + "learning_rate": 5.475880052151239e-07, + "loss": 1.6666, "step": 21 }, { - "epoch": 0.0006234237297741506, + "epoch": 0.0008607872290476563, "grad_norm": 0.0, - "learning_rate": 4.154863078375827e-07, - "loss": 1.7085, + "learning_rate": 5.736636245110822e-07, + "loss": 1.5818, "step": 22 }, { - "epoch": 0.000651761172036612, + "epoch": 0.0008999139212770952, "grad_norm": 0.0, - "learning_rate": 4.3437204910292733e-07, - "loss": 1.6545, + "learning_rate": 5.997392438070405e-07, + "loss": 1.8182, "step": 23 }, { - "epoch": 0.0006800986142990734, + "epoch": 0.0009390406135065341, "grad_norm": 0.0, - "learning_rate": 4.5325779036827203e-07, - "loss": 1.5475, + "learning_rate": 6.258148631029988e-07, + "loss": 1.7399, "step": 24 }, { - "epoch": 0.0007084360565615348, + "epoch": 0.000978167305735973, "grad_norm": 0.0, - "learning_rate": 4.721435316336167e-07, - "loss": 1.6116, + "learning_rate": 6.51890482398957e-07, + "loss": 1.4778, "step": 25 }, { - "epoch": 0.0007367734988239961, + "epoch": 0.001017293997965412, "grad_norm": 0.0, - "learning_rate": 4.910292728989613e-07, - "loss": 1.5748, + "learning_rate": 6.779661016949153e-07, + "loss": 1.5547, "step": 26 }, { - "epoch": 0.0007651109410864575, + "epoch": 0.001056420690194851, "grad_norm": 0.0, - "learning_rate": 5.09915014164306e-07, - "loss": 1.6433, + "learning_rate": 7.040417209908735e-07, + "loss": 1.6879, "step": 27 }, { - "epoch": 0.0007934483833489189, + "epoch": 0.0010955473824242898, "grad_norm": 0.0, - "learning_rate": 5.288007554296507e-07, - "loss": 1.6883, + "learning_rate": 7.301173402868318e-07, + "loss": 1.5538, "step": 28 }, { - "epoch": 0.0008217858256113803, + "epoch": 0.0011346740746537287, "grad_norm": 0.0, - "learning_rate": 5.476864966949953e-07, - "loss": 1.6438, + "learning_rate": 7.561929595827903e-07, + "loss": 1.5094, "step": 29 }, { - "epoch": 0.0008501232678738418, + "epoch": 0.0011738007668831677, "grad_norm": 0.0, - "learning_rate": 5.6657223796034e-07, - "loss": 1.6052, + "learning_rate": 7.822685788787485e-07, + "loss": 1.5953, "step": 30 }, { - "epoch": 0.0008784607101363031, + "epoch": 0.0012129274591126066, "grad_norm": 0.0, - "learning_rate": 5.854579792256847e-07, - "loss": 1.744, + "learning_rate": 8.083441981747067e-07, + "loss": 1.7544, "step": 31 }, { - "epoch": 0.0009067981523987645, + "epoch": 0.0012520541513420455, "grad_norm": 0.0, - "learning_rate": 6.043437204910293e-07, - "loss": 1.5133, + "learning_rate": 8.34419817470665e-07, + "loss": 1.3999, "step": 32 }, { - "epoch": 0.0009351355946612259, + "epoch": 0.0012911808435714844, "grad_norm": 0.0, - "learning_rate": 6.232294617563739e-07, - "loss": 1.493, + "learning_rate": 8.604954367666232e-07, + "loss": 1.5848, "step": 33 }, { - "epoch": 0.0009634730369236873, + "epoch": 0.0013303075358009234, "grad_norm": 0.0, - "learning_rate": 6.421152030217187e-07, - "loss": 1.4501, + "learning_rate": 8.865710560625816e-07, + "loss": 1.6837, "step": 34 }, { - "epoch": 0.0009918104791861487, + "epoch": 0.0013694342280303623, "grad_norm": 0.0, - "learning_rate": 6.610009442870634e-07, - "loss": 1.4948, + "learning_rate": 9.126466753585399e-07, + "loss": 1.5407, "step": 35 }, { - "epoch": 0.00102014792144861, + "epoch": 0.0014085609202598012, "grad_norm": 0.0, - "learning_rate": 6.79886685552408e-07, - "loss": 1.3794, + "learning_rate": 9.387222946544981e-07, + "loss": 1.6915, "step": 36 }, { - "epoch": 0.0010484853637110715, + "epoch": 0.0014476876124892401, "grad_norm": 0.0, - "learning_rate": 6.987724268177526e-07, - "loss": 1.4128, + "learning_rate": 9.647979139504563e-07, + "loss": 1.5996, "step": 37 }, { - "epoch": 0.0010768228059735328, + "epoch": 0.001486814304718679, "grad_norm": 0.0, - "learning_rate": 7.176581680830972e-07, - "loss": 1.4231, + "learning_rate": 9.908735332464147e-07, + "loss": 1.6036, "step": 38 }, { - "epoch": 0.0011051602482359943, + "epoch": 0.001525940996948118, "grad_norm": 0.0, - "learning_rate": 7.365439093484419e-07, - "loss": 1.7042, + "learning_rate": 1.016949152542373e-06, + "loss": 1.5721, "step": 39 }, { - "epoch": 0.0011334976904984555, + "epoch": 0.0015650676891775569, "grad_norm": 0.0, - "learning_rate": 7.554296506137866e-07, - "loss": 1.4255, + "learning_rate": 1.0430247718383314e-06, + "loss": 1.4548, "step": 40 }, { - "epoch": 0.001161835132760917, + "epoch": 0.0016041943814069958, "grad_norm": 0.0, - "learning_rate": 7.743153918791313e-07, - "loss": 1.5361, + "learning_rate": 1.0691003911342896e-06, + "loss": 1.4706, "step": 41 }, { - "epoch": 0.0011901725750233785, + "epoch": 0.0016433210736364347, "grad_norm": 0.0, - "learning_rate": 7.932011331444759e-07, - "loss": 1.5909, + "learning_rate": 1.0951760104302478e-06, + "loss": 1.6071, "step": 42 }, { - "epoch": 0.0012185100172858398, + "epoch": 0.0016824477658658737, "grad_norm": 0.0, - "learning_rate": 8.120868744098206e-07, - "loss": 1.5424, + "learning_rate": 1.121251629726206e-06, + "loss": 1.4688, "step": 43 }, { - "epoch": 0.0012468474595483013, + "epoch": 0.0017215744580953126, "grad_norm": 0.0, - "learning_rate": 8.309726156751653e-07, - "loss": 1.5165, + "learning_rate": 1.1473272490221644e-06, + "loss": 1.6227, "step": 44 }, { - "epoch": 0.0012751849018107625, + "epoch": 0.0017607011503247515, "grad_norm": 0.0, - "learning_rate": 8.4985835694051e-07, - "loss": 1.3527, + "learning_rate": 1.1734028683181228e-06, + "loss": 1.4862, "step": 45 }, { - "epoch": 0.001303522344073224, + "epoch": 0.0017998278425541904, "grad_norm": 0.0, - "learning_rate": 8.687440982058547e-07, - "loss": 1.4314, + "learning_rate": 1.199478487614081e-06, + "loss": 1.6094, "step": 46 }, { - "epoch": 0.0013318597863356853, + "epoch": 0.0018389545347836293, "grad_norm": 0.0, - "learning_rate": 8.876298394711994e-07, - "loss": 1.4909, + "learning_rate": 1.2255541069100392e-06, + "loss": 1.6226, "step": 47 }, { - "epoch": 0.0013601972285981468, + "epoch": 0.0018780812270130683, "grad_norm": 0.0, - "learning_rate": 9.065155807365441e-07, - "loss": 1.4704, + "learning_rate": 1.2516297262059976e-06, + "loss": 1.5422, "step": 48 }, { - "epoch": 0.001388534670860608, + "epoch": 0.0019172079192425072, "grad_norm": 0.0, - "learning_rate": 9.254013220018887e-07, - "loss": 1.5158, + "learning_rate": 1.2777053455019558e-06, + "loss": 1.6066, "step": 49 }, { - "epoch": 0.0014168721131230695, + "epoch": 0.001956334611471946, "grad_norm": 0.0, - "learning_rate": 9.442870632672334e-07, - "loss": 1.4045, + "learning_rate": 1.303780964797914e-06, + "loss": 1.4391, "step": 50 }, { - "epoch": 0.001445209555385531, + "epoch": 0.0019954613037013852, "grad_norm": 0.0, - "learning_rate": 9.631728045325779e-07, - "loss": 1.5609, + "learning_rate": 1.3298565840938724e-06, + "loss": 1.4432, "step": 51 }, { - "epoch": 0.0014735469976479923, + "epoch": 0.002034587995930824, "grad_norm": 0.0, - "learning_rate": 9.820585457979226e-07, - "loss": 1.446, + "learning_rate": 1.3559322033898307e-06, + "loss": 1.4398, "step": 52 }, { - "epoch": 0.0015018844399104538, + "epoch": 0.002073714688160263, "grad_norm": 0.0, - "learning_rate": 1.0009442870632673e-06, - "loss": 1.2448, + "learning_rate": 1.3820078226857889e-06, + "loss": 1.4083, "step": 53 }, { - "epoch": 0.001530221882172915, + "epoch": 0.002112841380389702, "grad_norm": 0.0, - "learning_rate": 1.019830028328612e-06, - "loss": 1.4854, + "learning_rate": 1.408083441981747e-06, + "loss": 1.3663, "step": 54 }, { - "epoch": 0.0015585593244353765, + "epoch": 0.002151968072619141, "grad_norm": 0.0, - "learning_rate": 1.0387157695939567e-06, - "loss": 1.4353, + "learning_rate": 1.4341590612777055e-06, + "loss": 1.5204, "step": 55 }, { - "epoch": 0.0015868967666978378, + "epoch": 0.0021910947648485796, "grad_norm": 0.0, - "learning_rate": 1.0576015108593014e-06, - "loss": 1.4887, + "learning_rate": 1.4602346805736637e-06, + "loss": 1.4727, "step": 56 }, { - "epoch": 0.0016152342089602993, + "epoch": 0.0022302214570780188, "grad_norm": 0.0, - "learning_rate": 1.0764872521246459e-06, - "loss": 1.4382, + "learning_rate": 1.4863102998696219e-06, + "loss": 1.6703, "step": 57 }, { - "epoch": 0.0016435716512227605, + "epoch": 0.0022693481493074575, "grad_norm": 0.0, - "learning_rate": 1.0953729933899906e-06, - "loss": 1.4327, + "learning_rate": 1.5123859191655805e-06, + "loss": 1.5342, "step": 58 }, { - "epoch": 0.001671909093485222, + "epoch": 0.0023084748415368966, "grad_norm": 0.0, - "learning_rate": 1.1142587346553353e-06, - "loss": 1.5363, + "learning_rate": 1.5384615384615387e-06, + "loss": 1.3996, "step": 59 }, { - "epoch": 0.0017002465357476835, + "epoch": 0.0023476015337663353, "grad_norm": 0.0, - "learning_rate": 1.13314447592068e-06, - "loss": 1.3883, + "learning_rate": 1.564537157757497e-06, + "loss": 1.4977, "step": 60 }, { - "epoch": 0.0017285839780101448, + "epoch": 0.0023867282259957745, "grad_norm": 0.0, - "learning_rate": 1.1520302171860247e-06, - "loss": 1.4332, + "learning_rate": 1.5906127770534551e-06, + "loss": 1.5876, "step": 61 }, { - "epoch": 0.0017569214202726063, + "epoch": 0.002425854918225213, "grad_norm": 0.0, - "learning_rate": 1.1709159584513694e-06, - "loss": 1.2873, + "learning_rate": 1.6166883963494133e-06, + "loss": 1.3766, "step": 62 }, { - "epoch": 0.0017852588625350675, + "epoch": 0.0024649816104546523, "grad_norm": 0.0, - "learning_rate": 1.1898016997167141e-06, - "loss": 1.4282, + "learning_rate": 1.6427640156453717e-06, + "loss": 1.4597, "step": 63 }, { - "epoch": 0.001813596304797529, + "epoch": 0.002504108302684091, "grad_norm": 0.0, - "learning_rate": 1.2086874409820586e-06, - "loss": 1.2802, + "learning_rate": 1.66883963494133e-06, + "loss": 1.5759, "step": 64 }, { - "epoch": 0.0018419337470599903, + "epoch": 0.00254323499491353, "grad_norm": 0.0, - "learning_rate": 1.2275731822474033e-06, - "loss": 1.4505, + "learning_rate": 1.6949152542372882e-06, + "loss": 1.6177, "step": 65 }, { - "epoch": 0.0018702711893224518, + "epoch": 0.002582361687142969, "grad_norm": 0.0, - "learning_rate": 1.2464589235127478e-06, - "loss": 1.376, + "learning_rate": 1.7209908735332464e-06, + "loss": 1.5114, "step": 66 }, { - "epoch": 0.001898608631584913, + "epoch": 0.002621488379372408, "grad_norm": 0.0, - "learning_rate": 1.2653446647780925e-06, - "loss": 1.3548, + "learning_rate": 1.7470664928292048e-06, + "loss": 1.4045, "step": 67 }, { - "epoch": 0.0019269460738473745, + "epoch": 0.0026606150716018467, "grad_norm": 0.0, - "learning_rate": 1.2842304060434374e-06, - "loss": 1.2231, + "learning_rate": 1.7731421121251632e-06, + "loss": 1.3693, "step": 68 }, { - "epoch": 0.001955283516109836, + "epoch": 0.002699741763831286, "grad_norm": 0.0, - "learning_rate": 1.303116147308782e-06, - "loss": 1.3114, + "learning_rate": 1.7992177314211214e-06, + "loss": 1.4917, "step": 69 }, { - "epoch": 0.0019836209583722975, + "epoch": 0.0027388684560607245, "grad_norm": 0.0, - "learning_rate": 1.3220018885741268e-06, - "loss": 1.2469, + "learning_rate": 1.8252933507170798e-06, + "loss": 1.4686, "step": 70 }, { - "epoch": 0.0020119584006347588, + "epoch": 0.0027779951482901637, "grad_norm": 0.0, - "learning_rate": 1.3408876298394713e-06, - "loss": 1.3226, + "learning_rate": 1.851368970013038e-06, + "loss": 1.506, "step": 71 }, { - "epoch": 0.00204029584289722, + "epoch": 0.0028171218405196024, "grad_norm": 0.0, - "learning_rate": 1.359773371104816e-06, - "loss": 1.3387, + "learning_rate": 1.8774445893089962e-06, + "loss": 1.4941, "step": 72 }, { - "epoch": 0.0020686332851596813, + "epoch": 0.0028562485327490415, "grad_norm": 0.0, - "learning_rate": 1.3786591123701607e-06, - "loss": 1.2803, + "learning_rate": 1.9035202086049544e-06, + "loss": 1.5632, "step": 73 }, { - "epoch": 0.002096970727422143, + "epoch": 0.0028953752249784802, "grad_norm": 0.0, - "learning_rate": 1.3975448536355052e-06, - "loss": 1.3319, + "learning_rate": 1.9295958279009126e-06, + "loss": 1.4847, "step": 74 }, { - "epoch": 0.0021253081696846043, + "epoch": 0.0029345019172079194, "grad_norm": 0.0, - "learning_rate": 1.41643059490085e-06, - "loss": 1.4272, + "learning_rate": 1.955671447196871e-06, + "loss": 1.4893, "step": 75 }, { - "epoch": 0.0021536456119470655, + "epoch": 0.002973628609437358, "grad_norm": 0.0, - "learning_rate": 1.4353163361661944e-06, - "loss": 1.3019, + "learning_rate": 1.9817470664928295e-06, + "loss": 1.3707, "step": 76 }, { - "epoch": 0.0021819830542095272, + "epoch": 0.0030127553016667972, "grad_norm": 0.0, - "learning_rate": 1.4542020774315394e-06, - "loss": 1.3168, + "learning_rate": 2.0078226857887877e-06, + "loss": 1.484, "step": 77 }, { - "epoch": 0.0022103204964719885, + "epoch": 0.003051881993896236, "grad_norm": 0.0, - "learning_rate": 1.4730878186968839e-06, - "loss": 1.4648, + "learning_rate": 2.033898305084746e-06, + "loss": 1.4838, "step": 78 }, { - "epoch": 0.0022386579387344498, + "epoch": 0.003091008686125675, "grad_norm": 0.0, - "learning_rate": 1.4919735599622288e-06, - "loss": 1.2441, + "learning_rate": 2.0599739243807045e-06, + "loss": 1.4841, "step": 79 }, { - "epoch": 0.002266995380996911, + "epoch": 0.0031301353783551138, "grad_norm": 0.0, - "learning_rate": 1.5108593012275733e-06, - "loss": 1.2635, + "learning_rate": 2.0860495436766627e-06, + "loss": 1.4669, "step": 80 }, { - "epoch": 0.0022953328232593728, + "epoch": 0.003169262070584553, "grad_norm": 0.0, - "learning_rate": 1.529745042492918e-06, - "loss": 1.3317, + "learning_rate": 2.112125162972621e-06, + "loss": 1.385, "step": 81 }, { - "epoch": 0.002323670265521834, + "epoch": 0.0032083887628139916, "grad_norm": 0.0, - "learning_rate": 1.5486307837582627e-06, - "loss": 1.3459, + "learning_rate": 2.138200782268579e-06, + "loss": 1.4819, "step": 82 }, { - "epoch": 0.0023520077077842953, + "epoch": 0.0032475154550434308, "grad_norm": 0.0, - "learning_rate": 1.5675165250236074e-06, - "loss": 1.4001, + "learning_rate": 2.1642764015645373e-06, + "loss": 1.4581, "step": 83 }, { - "epoch": 0.002380345150046757, + "epoch": 0.0032866421472728695, "grad_norm": 0.0, - "learning_rate": 1.5864022662889519e-06, - "loss": 1.3108, + "learning_rate": 2.1903520208604955e-06, + "loss": 1.4097, "step": 84 }, { - "epoch": 0.0024086825923092183, + "epoch": 0.0033257688395023086, "grad_norm": 0.0, - "learning_rate": 1.6052880075542968e-06, - "loss": 1.299, + "learning_rate": 2.2164276401564537e-06, + "loss": 1.5341, "step": 85 }, { - "epoch": 0.0024370200345716795, + "epoch": 0.0033648955317317473, "grad_norm": 0.0, - "learning_rate": 1.6241737488196413e-06, - "loss": 1.2575, + "learning_rate": 2.242503259452412e-06, + "loss": 1.2742, "step": 86 }, { - "epoch": 0.002465357476834141, + "epoch": 0.0034040222239611864, "grad_norm": 0.0, - "learning_rate": 1.6430594900849862e-06, - "loss": 1.3018, + "learning_rate": 2.2685788787483706e-06, + "loss": 1.3947, "step": 87 }, { - "epoch": 0.0024936949190966025, + "epoch": 0.003443148916190625, "grad_norm": 0.0, - "learning_rate": 1.6619452313503307e-06, - "loss": 1.2533, + "learning_rate": 2.2946544980443288e-06, + "loss": 1.5566, "step": 88 }, { - "epoch": 0.0025220323613590638, + "epoch": 0.0034822756084200643, "grad_norm": 0.0, - "learning_rate": 1.6808309726156752e-06, - "loss": 1.373, + "learning_rate": 2.320730117340287e-06, + "loss": 1.5649, "step": 89 }, { - "epoch": 0.002550369803621525, + "epoch": 0.003521402300649503, "grad_norm": 0.0, - "learning_rate": 1.69971671388102e-06, - "loss": 1.3206, + "learning_rate": 2.3468057366362456e-06, + "loss": 1.5057, "step": 90 }, { - "epoch": 0.0025787072458839863, + "epoch": 0.003560528992878942, "grad_norm": 0.0, - "learning_rate": 1.7186024551463646e-06, - "loss": 1.2732, + "learning_rate": 2.372881355932204e-06, + "loss": 1.3528, "step": 91 }, { - "epoch": 0.002607044688146448, + "epoch": 0.003599655685108381, "grad_norm": 0.0, - "learning_rate": 1.7374881964117093e-06, - "loss": 1.3606, + "learning_rate": 2.398956975228162e-06, + "loss": 1.3575, "step": 92 }, { - "epoch": 0.0026353821304089093, + "epoch": 0.00363878237733782, "grad_norm": 0.0, - "learning_rate": 1.7563739376770538e-06, - "loss": 1.2191, + "learning_rate": 2.42503259452412e-06, + "loss": 1.4628, "step": 93 }, { - "epoch": 0.0026637195726713706, + "epoch": 0.0036779090695672587, "grad_norm": 0.0, - "learning_rate": 1.7752596789423987e-06, - "loss": 1.4163, + "learning_rate": 2.4511082138200784e-06, + "loss": 1.4552, "step": 94 }, { - "epoch": 0.0026920570149338323, + "epoch": 0.003717035761796698, "grad_norm": 0.0, - "learning_rate": 1.7941454202077432e-06, - "loss": 1.432, + "learning_rate": 2.4771838331160366e-06, + "loss": 1.4139, "step": 95 }, { - "epoch": 0.0027203944571962935, + "epoch": 0.0037561624540261365, "grad_norm": 0.0, - "learning_rate": 1.8130311614730881e-06, - "loss": 1.2738, + "learning_rate": 2.5032594524119952e-06, + "loss": 1.3741, "step": 96 }, { - "epoch": 0.002748731899458755, + "epoch": 0.0037952891462555757, "grad_norm": 0.0, - "learning_rate": 1.8319169027384326e-06, - "loss": 1.2629, + "learning_rate": 2.5293350717079534e-06, + "loss": 1.394, "step": 97 }, { - "epoch": 0.002777069341721216, + "epoch": 0.0038344158384850144, "grad_norm": 0.0, - "learning_rate": 1.8508026440037773e-06, - "loss": 1.2672, + "learning_rate": 2.5554106910039117e-06, + "loss": 1.3538, "step": 98 }, { - "epoch": 0.0028054067839836778, + "epoch": 0.0038735425307144535, "grad_norm": 0.0, - "learning_rate": 1.869688385269122e-06, - "loss": 1.5001, + "learning_rate": 2.58148631029987e-06, + "loss": 1.309, "step": 99 }, { - "epoch": 0.002833744226246139, + "epoch": 0.003912669222943892, "grad_norm": 0.0, - "learning_rate": 1.8885741265344667e-06, - "loss": 1.272, + "learning_rate": 2.607561929595828e-06, + "loss": 1.3005, "step": 100 }, { - "epoch": 0.0028620816685086003, + "epoch": 0.003951795915173331, "grad_norm": 0.0, - "learning_rate": 1.9074598677998114e-06, - "loss": 1.3843, + "learning_rate": 2.6336375488917863e-06, + "loss": 1.3753, "step": 101 }, { - "epoch": 0.002890419110771062, + "epoch": 0.0039909226074027705, "grad_norm": 0.0, - "learning_rate": 1.9263456090651557e-06, - "loss": 1.2614, + "learning_rate": 2.659713168187745e-06, + "loss": 1.2949, "step": 102 }, { - "epoch": 0.0029187565530335233, + "epoch": 0.004030049299632209, "grad_norm": 0.0, - "learning_rate": 1.945231350330501e-06, - "loss": 1.1707, + "learning_rate": 2.685788787483703e-06, + "loss": 1.3506, "step": 103 }, { - "epoch": 0.0029470939952959845, + "epoch": 0.004069175991861648, "grad_norm": 0.0, - "learning_rate": 1.964117091595845e-06, - "loss": 1.3327, + "learning_rate": 2.7118644067796613e-06, + "loss": 1.3557, "step": 104 }, { - "epoch": 0.002975431437558446, + "epoch": 0.004108302684091087, "grad_norm": 0.0, - "learning_rate": 1.98300283286119e-06, - "loss": 1.3974, + "learning_rate": 2.7379400260756195e-06, + "loss": 1.2952, "step": 105 }, { - "epoch": 0.0030037688798209075, + "epoch": 0.004147429376320526, "grad_norm": 0.0, - "learning_rate": 2.0018885741265345e-06, - "loss": 1.4677, + "learning_rate": 2.7640156453715777e-06, + "loss": 1.4055, "step": 106 }, { - "epoch": 0.0030321063220833688, + "epoch": 0.0041865560685499644, "grad_norm": 0.0, - "learning_rate": 2.0207743153918793e-06, - "loss": 1.2856, + "learning_rate": 2.790091264667536e-06, + "loss": 1.3723, "step": 107 }, { - "epoch": 0.00306044376434583, + "epoch": 0.004225682760779404, "grad_norm": 0.0, - "learning_rate": 2.039660056657224e-06, - "loss": 1.2867, + "learning_rate": 2.816166883963494e-06, + "loss": 1.3752, "step": 108 }, { - "epoch": 0.0030887812066082918, + "epoch": 0.004264809453008843, "grad_norm": 0.0, - "learning_rate": 2.0585457979225687e-06, - "loss": 1.2441, + "learning_rate": 2.8422425032594523e-06, + "loss": 1.3835, "step": 109 }, { - "epoch": 0.003117118648870753, + "epoch": 0.004303936145238282, "grad_norm": 0.0, - "learning_rate": 2.0774315391879134e-06, - "loss": 1.1903, + "learning_rate": 2.868318122555411e-06, + "loss": 1.3755, "step": 110 }, { - "epoch": 0.0031454560911332143, + "epoch": 0.00434306283746772, "grad_norm": 0.0, - "learning_rate": 2.096317280453258e-06, - "loss": 1.1802, + "learning_rate": 2.894393741851369e-06, + "loss": 1.3472, "step": 111 }, { - "epoch": 0.0031737935333956756, + "epoch": 0.004382189529697159, "grad_norm": 0.0, - "learning_rate": 2.1152030217186028e-06, - "loss": 1.2908, + "learning_rate": 2.9204693611473274e-06, + "loss": 1.3887, "step": 112 }, { - "epoch": 0.0032021309756581373, + "epoch": 0.004421316221926598, "grad_norm": 0.0, - "learning_rate": 2.1340887629839475e-06, - "loss": 1.2318, + "learning_rate": 2.9465449804432856e-06, + "loss": 1.3813, "step": 113 }, { - "epoch": 0.0032304684179205985, + "epoch": 0.0044604429141560376, "grad_norm": 0.0, - "learning_rate": 2.1529745042492918e-06, - "loss": 1.2753, + "learning_rate": 2.9726205997392438e-06, + "loss": 1.3345, "step": 114 }, { - "epoch": 0.00325880586018306, + "epoch": 0.004499569606385476, "grad_norm": 0.0, - "learning_rate": 2.171860245514637e-06, - "loss": 1.3102, + "learning_rate": 2.9986962190352024e-06, + "loss": 1.394, "step": 115 }, { - "epoch": 0.003287143302445521, + "epoch": 0.004538696298614915, "grad_norm": 0.0, - "learning_rate": 2.190745986779981e-06, - "loss": 1.2073, + "learning_rate": 3.024771838331161e-06, + "loss": 1.3119, "step": 116 }, { - "epoch": 0.0033154807447079828, + "epoch": 0.004577822990844354, "grad_norm": 0.0, - "learning_rate": 2.209631728045326e-06, - "loss": 1.362, + "learning_rate": 3.0508474576271192e-06, + "loss": 1.3721, "step": 117 }, { - "epoch": 0.003343818186970444, + "epoch": 0.004616949683073793, "grad_norm": 0.0, - "learning_rate": 2.2285174693106706e-06, - "loss": 1.3385, + "learning_rate": 3.0769230769230774e-06, + "loss": 1.505, "step": 118 }, { - "epoch": 0.0033721556292329053, + "epoch": 0.0046560763753032315, "grad_norm": 0.0, - "learning_rate": 2.2474032105760153e-06, - "loss": 1.2264, + "learning_rate": 3.1029986962190356e-06, + "loss": 1.3782, "step": 119 }, { - "epoch": 0.003400493071495367, + "epoch": 0.004695203067532671, "grad_norm": 0.0, - "learning_rate": 2.26628895184136e-06, - "loss": 1.3238, + "learning_rate": 3.129074315514994e-06, + "loss": 1.3685, "step": 120 }, { - "epoch": 0.0034288305137578283, + "epoch": 0.00473432975976211, "grad_norm": 0.0, - "learning_rate": 2.2851746931067047e-06, - "loss": 1.2363, + "learning_rate": 3.155149934810952e-06, + "loss": 1.391, "step": 121 }, { - "epoch": 0.0034571679560202895, + "epoch": 0.004773456451991549, "grad_norm": 0.0, - "learning_rate": 2.3040604343720494e-06, - "loss": 1.2355, + "learning_rate": 3.1812255541069103e-06, + "loss": 1.3251, "step": 122 }, { - "epoch": 0.003485505398282751, + "epoch": 0.004812583144220987, "grad_norm": 0.0, - "learning_rate": 2.3229461756373937e-06, - "loss": 1.2152, + "learning_rate": 3.2073011734028685e-06, + "loss": 1.5154, "step": 123 }, { - "epoch": 0.0035138428405452125, + "epoch": 0.004851709836450426, "grad_norm": 0.0, - "learning_rate": 2.341831916902739e-06, - "loss": 1.3018, + "learning_rate": 3.2333767926988267e-06, + "loss": 1.3533, "step": 124 }, { - "epoch": 0.003542180282807674, + "epoch": 0.0048908365286798655, "grad_norm": 0.0, - "learning_rate": 2.360717658168083e-06, - "loss": 1.4257, + "learning_rate": 3.2594524119947853e-06, + "loss": 1.4227, "step": 125 }, { - "epoch": 0.003570517725070135, + "epoch": 0.004929963220909305, "grad_norm": 0.0, - "learning_rate": 2.3796033994334282e-06, - "loss": 1.3066, + "learning_rate": 3.2855280312907435e-06, + "loss": 1.4224, "step": 126 }, { - "epoch": 0.0035988551673325968, + "epoch": 0.004969089913138743, "grad_norm": 0.0, - "learning_rate": 2.3984891406987725e-06, - "loss": 1.2933, + "learning_rate": 3.3116036505867017e-06, + "loss": 1.3929, "step": 127 }, { - "epoch": 0.003627192609595058, + "epoch": 0.005008216605368182, "grad_norm": 0.0, - "learning_rate": 2.4173748819641172e-06, - "loss": 1.2506, + "learning_rate": 3.33767926988266e-06, + "loss": 1.447, "step": 128 }, { - "epoch": 0.0036555300518575193, + "epoch": 0.005047343297597621, "grad_norm": 0.0, - "learning_rate": 2.436260623229462e-06, - "loss": 1.2862, + "learning_rate": 3.363754889178618e-06, + "loss": 1.3491, "step": 129 }, { - "epoch": 0.0036838674941199806, + "epoch": 0.00508646998982706, "grad_norm": 0.0, - "learning_rate": 2.4551463644948066e-06, - "loss": 1.3924, + "learning_rate": 3.3898305084745763e-06, + "loss": 1.3488, "step": 130 }, { - "epoch": 0.0037122049363824423, + "epoch": 0.005125596682056499, "grad_norm": 0.0, - "learning_rate": 2.4740321057601513e-06, - "loss": 1.1953, + "learning_rate": 3.4159061277705345e-06, + "loss": 1.5355, "step": 131 }, { - "epoch": 0.0037405423786449035, + "epoch": 0.005164723374285938, "grad_norm": 0.0, - "learning_rate": 2.4929178470254956e-06, - "loss": 1.39, + "learning_rate": 3.4419817470664927e-06, + "loss": 1.5056, "step": 132 }, { - "epoch": 0.003768879820907365, + "epoch": 0.005203850066515377, "grad_norm": 0.0, - "learning_rate": 2.5118035882908403e-06, - "loss": 1.2482, + "learning_rate": 3.4680573663624513e-06, + "loss": 1.369, "step": 133 }, { - "epoch": 0.003797217263169826, + "epoch": 0.005242976758744816, "grad_norm": 0.0, - "learning_rate": 2.530689329556185e-06, - "loss": 1.3558, + "learning_rate": 3.4941329856584096e-06, + "loss": 1.2498, "step": 134 }, { - "epoch": 0.0038255547054322878, + "epoch": 0.005282103450974254, "grad_norm": 0.0, - "learning_rate": 2.54957507082153e-06, - "loss": 1.3318, + "learning_rate": 3.520208604954368e-06, + "loss": 1.3, "step": 135 }, { - "epoch": 0.003853892147694749, + "epoch": 0.005321230143203693, "grad_norm": 0.0, - "learning_rate": 2.568460812086875e-06, - "loss": 1.2079, + "learning_rate": 3.5462842242503264e-06, + "loss": 1.4124, "step": 136 }, { - "epoch": 0.0038822295899572103, + "epoch": 0.0053603568354331325, "grad_norm": 0.0, - "learning_rate": 2.587346553352219e-06, - "loss": 1.1694, + "learning_rate": 3.5723598435462846e-06, + "loss": 1.3636, "step": 137 }, { - "epoch": 0.003910567032219672, + "epoch": 0.005399483527662572, "grad_norm": 0.0, - "learning_rate": 2.606232294617564e-06, - "loss": 1.304, + "learning_rate": 3.598435462842243e-06, + "loss": 1.4164, "step": 138 }, { - "epoch": 0.003938904474482133, + "epoch": 0.00543861021989201, "grad_norm": 0.0, - "learning_rate": 2.6251180358829086e-06, - "loss": 1.3052, + "learning_rate": 3.6245110821382014e-06, + "loss": 1.2739, "step": 139 }, { - "epoch": 0.003967241916744595, + "epoch": 0.005477736912121449, "grad_norm": 0.0, - "learning_rate": 2.6440037771482537e-06, - "loss": 1.3549, + "learning_rate": 3.6505867014341596e-06, + "loss": 1.3809, "step": 140 }, { - "epoch": 0.003995579359007056, + "epoch": 0.005516863604350888, "grad_norm": 0.0, - "learning_rate": 2.662889518413598e-06, - "loss": 1.2641, + "learning_rate": 3.676662320730118e-06, + "loss": 1.4399, "step": 141 }, { - "epoch": 0.0040239168012695175, + "epoch": 0.005555990296580327, "grad_norm": 0.0, - "learning_rate": 2.6817752596789427e-06, - "loss": 1.2177, + "learning_rate": 3.702737940026076e-06, + "loss": 1.4296, "step": 142 }, { - "epoch": 0.004052254243531979, + "epoch": 0.005595116988809766, "grad_norm": 0.0, - "learning_rate": 2.7006610009442874e-06, - "loss": 1.2983, + "learning_rate": 3.7288135593220342e-06, + "loss": 1.3194, "step": 143 }, { - "epoch": 0.00408059168579444, + "epoch": 0.005634243681039205, "grad_norm": 0.0, - "learning_rate": 2.719546742209632e-06, - "loss": 1.2385, + "learning_rate": 3.7548891786179924e-06, + "loss": 1.3386, "step": 144 }, { - "epoch": 0.004108929128056902, + "epoch": 0.005673370373268644, "grad_norm": 0.0, - "learning_rate": 2.7384324834749764e-06, - "loss": 1.295, + "learning_rate": 3.7809647979139506e-06, + "loss": 1.5277, "step": 145 }, { - "epoch": 0.004137266570319363, + "epoch": 0.005712497065498083, "grad_norm": 0.0, - "learning_rate": 2.7573182247403215e-06, - "loss": 1.306, + "learning_rate": 3.807040417209909e-06, + "loss": 1.3021, "step": 146 }, { - "epoch": 0.004165604012581824, + "epoch": 0.005751623757727521, "grad_norm": 0.0, - "learning_rate": 2.776203966005666e-06, - "loss": 1.26, + "learning_rate": 3.8331160365058675e-06, + "loss": 1.3967, "step": 147 }, { - "epoch": 0.004193941454844286, + "epoch": 0.0057907504499569605, "grad_norm": 0.0, - "learning_rate": 2.7950897072710105e-06, - "loss": 1.2722, + "learning_rate": 3.859191655801825e-06, + "loss": 1.3381, "step": 148 }, { - "epoch": 0.004222278897106747, + "epoch": 0.0058298771421864, "grad_norm": 0.0, - "learning_rate": 2.813975448536355e-06, - "loss": 1.329, + "learning_rate": 3.885267275097784e-06, + "loss": 1.338, "step": 149 }, { - "epoch": 0.0042506163393692085, + "epoch": 0.005869003834415839, "grad_norm": 0.0, - "learning_rate": 2.8328611898017e-06, - "loss": 1.3113, + "learning_rate": 3.911342894393742e-06, + "loss": 1.4635, "step": 150 }, { - "epoch": 0.00427895378163167, + "epoch": 0.005908130526645277, "grad_norm": 0.0, - "learning_rate": 2.8517469310670446e-06, - "loss": 1.2202, + "learning_rate": 3.9374185136897e-06, + "loss": 1.3329, "step": 151 }, { - "epoch": 0.004307291223894131, + "epoch": 0.005947257218874716, "grad_norm": 0.0, - "learning_rate": 2.870632672332389e-06, - "loss": 1.3385, + "learning_rate": 3.963494132985659e-06, + "loss": 1.3795, "step": 152 }, { - "epoch": 0.004335628666156593, + "epoch": 0.005986383911104155, "grad_norm": 0.0, - "learning_rate": 2.889518413597734e-06, - "loss": 1.2448, + "learning_rate": 3.989569752281617e-06, + "loss": 1.3138, "step": 153 }, { - "epoch": 0.0043639661084190545, + "epoch": 0.0060255106033335944, "grad_norm": 0.0, - "learning_rate": 2.9084041548630787e-06, - "loss": 1.3074, + "learning_rate": 4.015645371577575e-06, + "loss": 1.3652, "step": 154 }, { - "epoch": 0.004392303550681515, + "epoch": 0.006064637295563033, "grad_norm": 0.0, - "learning_rate": 2.9272898961284234e-06, - "loss": 1.2231, + "learning_rate": 4.041720990873533e-06, + "loss": 1.3557, "step": 155 }, { - "epoch": 0.004420640992943977, + "epoch": 0.006103763987792472, "grad_norm": 0.0, - "learning_rate": 2.9461756373937677e-06, - "loss": 1.24, + "learning_rate": 4.067796610169492e-06, + "loss": 1.3422, "step": 156 }, { - "epoch": 0.004448978435206438, + "epoch": 0.006142890680021911, "grad_norm": 0.0, - "learning_rate": 2.9650613786591124e-06, - "loss": 1.2204, + "learning_rate": 4.09387222946545e-06, + "loss": 1.2483, "step": 157 }, { - "epoch": 0.0044773158774688996, + "epoch": 0.00618201737225135, "grad_norm": 0.0, - "learning_rate": 2.9839471199244575e-06, - "loss": 1.2688, + "learning_rate": 4.119947848761409e-06, + "loss": 1.3649, "step": 158 }, { - "epoch": 0.004505653319731361, + "epoch": 0.006221144064480788, "grad_norm": 0.0, - "learning_rate": 3.0028328611898022e-06, - "loss": 1.1301, + "learning_rate": 4.146023468057367e-06, + "loss": 1.3962, "step": 159 }, { - "epoch": 0.004533990761993822, + "epoch": 0.0062602707567102275, "grad_norm": 0.0, - "learning_rate": 3.0217186024551465e-06, - "loss": 1.2487, + "learning_rate": 4.172099087353325e-06, + "loss": 1.3009, "step": 160 }, { - "epoch": 0.004562328204256284, + "epoch": 0.006299397448939667, "grad_norm": 0.0, - "learning_rate": 3.0406043437204912e-06, - "loss": 1.1977, + "learning_rate": 4.198174706649283e-06, + "loss": 1.408, "step": 161 }, { - "epoch": 0.0045906656465187455, + "epoch": 0.006338524141169106, "grad_norm": 0.0, - "learning_rate": 3.059490084985836e-06, - "loss": 1.1379, + "learning_rate": 4.224250325945242e-06, + "loss": 1.468, "step": 162 }, { - "epoch": 0.004619003088781206, + "epoch": 0.006377650833398544, "grad_norm": 0.0, - "learning_rate": 3.0783758262511802e-06, - "loss": 1.2034, + "learning_rate": 4.2503259452412e-06, + "loss": 1.4591, "step": 163 }, { - "epoch": 0.004647340531043668, + "epoch": 0.006416777525627983, "grad_norm": 0.0, - "learning_rate": 3.0972615675165254e-06, - "loss": 1.2661, + "learning_rate": 4.276401564537158e-06, + "loss": 1.4445, "step": 164 }, { - "epoch": 0.00467567797330613, + "epoch": 0.006455904217857422, "grad_norm": 0.0, - "learning_rate": 3.11614730878187e-06, - "loss": 1.1943, + "learning_rate": 4.302477183833116e-06, + "loss": 1.3512, "step": 165 }, { - "epoch": 0.004704015415568591, + "epoch": 0.0064950309100868615, "grad_norm": 0.0, - "learning_rate": 3.1350330500472148e-06, - "loss": 1.2672, + "learning_rate": 4.328552803129075e-06, + "loss": 1.4269, "step": 166 }, { - "epoch": 0.004732352857831052, + "epoch": 0.0065341576023163, "grad_norm": 0.0, - "learning_rate": 3.153918791312559e-06, - "loss": 1.1424, + "learning_rate": 4.354628422425033e-06, + "loss": 1.3937, "step": 167 }, { - "epoch": 0.004760690300093514, + "epoch": 0.006573284294545739, "grad_norm": 0.0, - "learning_rate": 3.1728045325779038e-06, - "loss": 1.2279, + "learning_rate": 4.380704041720991e-06, + "loss": 1.3075, "step": 168 }, { - "epoch": 0.004789027742355975, + "epoch": 0.006612410986775178, "grad_norm": 0.0, - "learning_rate": 3.191690273843249e-06, - "loss": 1.1577, + "learning_rate": 4.40677966101695e-06, + "loss": 1.2495, "step": 169 }, { - "epoch": 0.0048173651846184365, + "epoch": 0.006651537679004617, "grad_norm": 0.0, - "learning_rate": 3.2105760151085936e-06, - "loss": 1.1403, + "learning_rate": 4.4328552803129075e-06, + "loss": 1.5041, "step": 170 }, { - "epoch": 0.004845702626880897, + "epoch": 0.0066906643712340555, "grad_norm": 0.0, - "learning_rate": 3.229461756373938e-06, - "loss": 1.1198, + "learning_rate": 4.458930899608866e-06, + "loss": 1.301, "step": 171 }, { - "epoch": 0.004874040069143359, + "epoch": 0.006729791063463495, "grad_norm": 0.0, - "learning_rate": 3.2483474976392826e-06, - "loss": 1.4328, + "learning_rate": 4.485006518904824e-06, + "loss": 1.4299, "step": 172 }, { - "epoch": 0.004902377511405821, + "epoch": 0.006768917755692934, "grad_norm": 0.0, - "learning_rate": 3.2672332389046273e-06, - "loss": 1.1816, + "learning_rate": 4.5110821382007825e-06, + "loss": 1.1853, "step": 173 }, { - "epoch": 0.004930714953668282, + "epoch": 0.006808044447922373, "grad_norm": 0.0, - "learning_rate": 3.2861189801699724e-06, - "loss": 1.2372, + "learning_rate": 4.537157757496741e-06, + "loss": 1.4109, "step": 174 }, { - "epoch": 0.004959052395930743, + "epoch": 0.006847171140151811, "grad_norm": 0.0, - "learning_rate": 3.3050047214353163e-06, - "loss": 1.2335, + "learning_rate": 4.563233376792699e-06, + "loss": 1.3104, "step": 175 }, { - "epoch": 0.004987389838193205, + "epoch": 0.00688629783238125, "grad_norm": 0.0, - "learning_rate": 3.3238904627006614e-06, - "loss": 1.1978, + "learning_rate": 4.5893089960886575e-06, + "loss": 1.3376, "step": 176 }, { - "epoch": 0.005015727280455666, + "epoch": 0.006925424524610689, "grad_norm": 0.0, - "learning_rate": 3.342776203966006e-06, - "loss": 1.2584, + "learning_rate": 4.615384615384616e-06, + "loss": 1.3854, "step": 177 }, { - "epoch": 0.0050440647227181275, + "epoch": 0.006964551216840129, "grad_norm": 0.0, - "learning_rate": 3.3616619452313504e-06, - "loss": 1.2833, + "learning_rate": 4.641460234680574e-06, + "loss": 1.2471, "step": 178 }, { - "epoch": 0.005072402164980589, + "epoch": 0.007003677909069567, "grad_norm": 0.0, - "learning_rate": 3.380547686496695e-06, - "loss": 1.2027, + "learning_rate": 4.6675358539765326e-06, + "loss": 1.403, "step": 179 }, { - "epoch": 0.00510073960724305, + "epoch": 0.007042804601299006, "grad_norm": 0.0, - "learning_rate": 3.39943342776204e-06, - "loss": 1.1873, + "learning_rate": 4.693611473272491e-06, + "loss": 1.4435, "step": 180 }, { - "epoch": 0.005129077049505512, + "epoch": 0.007081931293528445, "grad_norm": 0.0, - "learning_rate": 3.418319169027385e-06, - "loss": 1.1542, + "learning_rate": 4.719687092568449e-06, + "loss": 1.4517, "step": 181 }, { - "epoch": 0.005157414491767973, + "epoch": 0.007121057985757884, "grad_norm": 0.0, - "learning_rate": 3.437204910292729e-06, - "loss": 1.2029, + "learning_rate": 4.745762711864408e-06, + "loss": 1.3156, "step": 182 }, { - "epoch": 0.005185751934030434, + "epoch": 0.0071601846779873225, "grad_norm": 0.0, - "learning_rate": 3.456090651558074e-06, - "loss": 1.1537, + "learning_rate": 4.771838331160365e-06, + "loss": 1.3231, "step": 183 }, { - "epoch": 0.005214089376292896, + "epoch": 0.007199311370216762, "grad_norm": 0.0, - "learning_rate": 3.4749763928234186e-06, - "loss": 1.2198, + "learning_rate": 4.797913950456324e-06, + "loss": 1.3873, "step": 184 }, { - "epoch": 0.005242426818555357, + "epoch": 0.007238438062446201, "grad_norm": 0.0, - "learning_rate": 3.4938621340887633e-06, - "loss": 1.1828, + "learning_rate": 4.823989569752282e-06, + "loss": 1.334, "step": 185 }, { - "epoch": 0.0052707642608178186, + "epoch": 0.00727756475467564, "grad_norm": 0.0, - "learning_rate": 3.5127478753541076e-06, - "loss": 1.207, + "learning_rate": 4.85006518904824e-06, + "loss": 1.2059, "step": 186 }, { - "epoch": 0.00529910170308028, + "epoch": 0.007316691446905078, "grad_norm": 0.0, - "learning_rate": 3.5316336166194527e-06, - "loss": 1.1825, + "learning_rate": 4.876140808344198e-06, + "loss": 1.3802, "step": 187 }, { - "epoch": 0.005327439145342741, + "epoch": 0.007355818139134517, "grad_norm": 0.0, - "learning_rate": 3.5505193578847974e-06, - "loss": 1.1684, + "learning_rate": 4.902216427640157e-06, + "loss": 1.251, "step": 188 }, { - "epoch": 0.005355776587605203, + "epoch": 0.0073949448313639565, "grad_norm": 0.0, - "learning_rate": 3.5694050991501417e-06, - "loss": 1.2487, + "learning_rate": 4.9282920469361155e-06, + "loss": 1.2852, "step": 189 }, { - "epoch": 0.0053841140298676645, + "epoch": 0.007434071523593396, "grad_norm": 0.0, - "learning_rate": 3.5882908404154864e-06, - "loss": 1.1942, + "learning_rate": 4.954367666232073e-06, + "loss": 1.3348, "step": 190 }, { - "epoch": 0.005412451472130125, + "epoch": 0.007473198215822835, "grad_norm": 0.0, - "learning_rate": 3.607176581680831e-06, - "loss": 1.2144, + "learning_rate": 4.980443285528032e-06, + "loss": 1.3463, "step": 191 }, { - "epoch": 0.005440788914392587, + "epoch": 0.007512324908052273, "grad_norm": 0.0, - "learning_rate": 3.6260623229461763e-06, - "loss": 1.4025, + "learning_rate": 5.0065189048239905e-06, + "loss": 1.2957, "step": 192 }, { - "epoch": 0.005469126356655049, + "epoch": 0.007551451600281712, "grad_norm": 0.0, - "learning_rate": 3.6449480642115205e-06, - "loss": 1.08, + "learning_rate": 5.032594524119948e-06, + "loss": 1.3495, "step": 193 }, { - "epoch": 0.00549746379891751, + "epoch": 0.007590578292511151, "grad_norm": 0.0, - "learning_rate": 3.6638338054768652e-06, - "loss": 1.1788, + "learning_rate": 5.058670143415907e-06, + "loss": 1.4135, "step": 194 }, { - "epoch": 0.005525801241179971, + "epoch": 0.0076297049847405905, "grad_norm": 0.0, - "learning_rate": 3.68271954674221e-06, - "loss": 1.2086, + "learning_rate": 5.084745762711865e-06, + "loss": 1.3567, "step": 195 }, { - "epoch": 0.005554138683442432, + "epoch": 0.007668831676970029, "grad_norm": 0.0, - "learning_rate": 3.7016052880075547e-06, - "loss": 1.2828, + "learning_rate": 5.110821382007823e-06, + "loss": 1.2578, "step": 196 }, { - "epoch": 0.005582476125704894, + "epoch": 0.007707958369199468, "grad_norm": 0.0, - "learning_rate": 3.720491029272899e-06, - "loss": 1.1159, + "learning_rate": 5.136897001303781e-06, + "loss": 1.2433, "step": 197 }, { - "epoch": 0.0056108135679673555, + "epoch": 0.007747085061428907, "grad_norm": 0.0, - "learning_rate": 3.739376770538244e-06, - "loss": 1.1252, + "learning_rate": 5.16297262059974e-06, + "loss": 1.3201, "step": 198 }, { - "epoch": 0.005639151010229816, + "epoch": 0.007786211753658346, "grad_norm": 0.0, - "learning_rate": 3.7582625118035888e-06, - "loss": 1.1085, + "learning_rate": 5.1890482398956975e-06, + "loss": 1.3608, "step": 199 }, { - "epoch": 0.005667488452492278, + "epoch": 0.007825338445887784, "grad_norm": 0.0, - "learning_rate": 3.7771482530689335e-06, - "loss": 1.2355, + "learning_rate": 5.215123859191656e-06, + "loss": 1.4556, "step": 200 }, { - "epoch": 0.00569582589475474, + "epoch": 0.007864465138117224, "grad_norm": 0.0, - "learning_rate": 3.7960339943342778e-06, - "loss": 1.2592, + "learning_rate": 5.241199478487614e-06, + "loss": 1.4248, "step": 201 }, { - "epoch": 0.005724163337017201, + "epoch": 0.007903591830346663, "grad_norm": 0.0, - "learning_rate": 3.814919735599623e-06, - "loss": 1.16, + "learning_rate": 5.2672750977835725e-06, + "loss": 1.3947, "step": 202 }, { - "epoch": 0.005752500779279662, + "epoch": 0.007942718522576102, "grad_norm": 0.0, - "learning_rate": 3.833805476864968e-06, - "loss": 1.3342, + "learning_rate": 5.29335071707953e-06, + "loss": 1.4028, "step": 203 }, { - "epoch": 0.005780838221542124, + "epoch": 0.007981845214805541, "grad_norm": 0.0, - "learning_rate": 3.8526912181303115e-06, - "loss": 1.2954, + "learning_rate": 5.31942633637549e-06, + "loss": 1.3906, "step": 204 }, { - "epoch": 0.005809175663804585, + "epoch": 0.00802097190703498, "grad_norm": 0.0, - "learning_rate": 3.871576959395656e-06, - "loss": 1.2168, + "learning_rate": 5.345501955671447e-06, + "loss": 1.3363, "step": 205 }, { - "epoch": 0.0058375131060670465, + "epoch": 0.008060098599264418, "grad_norm": 0.0, - "learning_rate": 3.890462700661002e-06, - "loss": 1.1747, + "learning_rate": 5.371577574967406e-06, + "loss": 1.4054, "step": 206 }, { - "epoch": 0.005865850548329507, + "epoch": 0.008099225291493857, "grad_norm": 0.0, - "learning_rate": 3.909348441926346e-06, - "loss": 1.2069, + "learning_rate": 5.397653194263364e-06, + "loss": 1.3038, "step": 207 }, { - "epoch": 0.005894187990591969, + "epoch": 0.008138351983723296, "grad_norm": 0.0, - "learning_rate": 3.92823418319169e-06, - "loss": 1.2817, + "learning_rate": 5.423728813559323e-06, + "loss": 1.3452, "step": 208 }, { - "epoch": 0.005922525432854431, + "epoch": 0.008177478675952735, "grad_norm": 0.0, - "learning_rate": 3.947119924457035e-06, - "loss": 1.3235, + "learning_rate": 5.449804432855281e-06, + "loss": 1.2614, "step": 209 }, { - "epoch": 0.005950862875116892, + "epoch": 0.008216605368182174, "grad_norm": 0.0, - "learning_rate": 3.96600566572238e-06, - "loss": 1.2228, + "learning_rate": 5.475880052151239e-06, + "loss": 1.3498, "step": 210 }, { - "epoch": 0.005979200317379353, + "epoch": 0.008255732060411613, "grad_norm": 0.0, - "learning_rate": 3.984891406987724e-06, - "loss": 1.2421, + "learning_rate": 5.501955671447198e-06, + "loss": 1.2994, "step": 211 }, { - "epoch": 0.006007537759641815, + "epoch": 0.008294858752641052, "grad_norm": 0.0, - "learning_rate": 4.003777148253069e-06, - "loss": 1.2018, + "learning_rate": 5.5280312907431554e-06, + "loss": 1.4379, "step": 212 }, { - "epoch": 0.006035875201904276, + "epoch": 0.008333985444870491, "grad_norm": 0.0, - "learning_rate": 4.022662889518414e-06, - "loss": 1.2424, + "learning_rate": 5.554106910039114e-06, + "loss": 1.3136, "step": 213 }, { - "epoch": 0.0060642126441667376, + "epoch": 0.008373112137099929, "grad_norm": 0.0, - "learning_rate": 4.0415486307837585e-06, - "loss": 1.2189, + "learning_rate": 5.580182529335072e-06, + "loss": 1.2997, "step": 214 }, { - "epoch": 0.006092550086429199, + "epoch": 0.008412238829329368, "grad_norm": 0.0, - "learning_rate": 4.060434372049103e-06, - "loss": 1.2545, + "learning_rate": 5.6062581486310305e-06, + "loss": 1.3591, "step": 215 }, { - "epoch": 0.00612088752869166, + "epoch": 0.008451365521558807, "grad_norm": 0.0, - "learning_rate": 4.079320113314448e-06, - "loss": 1.2996, + "learning_rate": 5.632333767926988e-06, + "loss": 1.3214, "step": 216 }, { - "epoch": 0.006149224970954122, + "epoch": 0.008490492213788246, "grad_norm": 0.0, - "learning_rate": 4.098205854579793e-06, - "loss": 1.2469, + "learning_rate": 5.658409387222948e-06, + "loss": 1.3735, "step": 217 }, { - "epoch": 0.0061775624132165835, + "epoch": 0.008529618906017685, "grad_norm": 0.0, - "learning_rate": 4.117091595845137e-06, - "loss": 1.3011, + "learning_rate": 5.684485006518905e-06, + "loss": 1.1454, "step": 218 }, { - "epoch": 0.006205899855479044, + "epoch": 0.008568745598247125, "grad_norm": 0.0, - "learning_rate": 4.135977337110482e-06, - "loss": 1.386, + "learning_rate": 5.710560625814864e-06, + "loss": 1.3994, "step": 219 }, { - "epoch": 0.006234237297741506, + "epoch": 0.008607872290476564, "grad_norm": 0.0, - "learning_rate": 4.154863078375827e-06, - "loss": 1.3004, + "learning_rate": 5.736636245110822e-06, + "loss": 1.3139, "step": 220 }, { - "epoch": 0.006262574740003967, + "epoch": 0.008646998982706003, "grad_norm": 0.0, - "learning_rate": 4.1737488196411714e-06, - "loss": 1.1944, + "learning_rate": 5.7627118644067805e-06, + "loss": 1.4988, "step": 221 }, { - "epoch": 0.006290912182266429, + "epoch": 0.00868612567493544, "grad_norm": 0.0, - "learning_rate": 4.192634560906516e-06, - "loss": 1.3088, + "learning_rate": 5.788787483702738e-06, + "loss": 1.4006, "step": 222 }, { - "epoch": 0.00631924962452889, + "epoch": 0.00872525236716488, "grad_norm": 0.0, - "learning_rate": 4.21152030217186e-06, - "loss": 1.2352, + "learning_rate": 5.814863102998697e-06, + "loss": 1.3512, "step": 223 }, { - "epoch": 0.006347587066791351, + "epoch": 0.008764379059394319, "grad_norm": 0.0, - "learning_rate": 4.2304060434372056e-06, - "loss": 1.2207, + "learning_rate": 5.840938722294655e-06, + "loss": 1.1885, "step": 224 }, { - "epoch": 0.006375924509053813, + "epoch": 0.008803505751623758, "grad_norm": 0.0, - "learning_rate": 4.24929178470255e-06, - "loss": 1.2494, + "learning_rate": 5.867014341590613e-06, + "loss": 1.3357, "step": 225 }, { - "epoch": 0.0064042619513162745, + "epoch": 0.008842632443853197, "grad_norm": 0.0, - "learning_rate": 4.268177525967895e-06, - "loss": 1.3145, + "learning_rate": 5.893089960886571e-06, + "loss": 1.3458, "step": 226 }, { - "epoch": 0.006432599393578735, + "epoch": 0.008881759136082636, "grad_norm": 0.0, - "learning_rate": 4.287063267233239e-06, - "loss": 1.1792, + "learning_rate": 5.91916558018253e-06, + "loss": 1.2893, "step": 227 }, { - "epoch": 0.006460936835841197, + "epoch": 0.008920885828312075, "grad_norm": 0.0, - "learning_rate": 4.3059490084985835e-06, - "loss": 1.1788, + "learning_rate": 5.9452411994784875e-06, + "loss": 1.3563, "step": 228 }, { - "epoch": 0.006489274278103659, + "epoch": 0.008960012520541514, "grad_norm": 0.0, - "learning_rate": 4.324834749763929e-06, - "loss": 1.1329, + "learning_rate": 5.971316818774446e-06, + "loss": 1.1919, "step": 229 }, { - "epoch": 0.00651761172036612, + "epoch": 0.008999139212770952, "grad_norm": 0.0, - "learning_rate": 4.343720491029274e-06, - "loss": 1.1944, + "learning_rate": 5.997392438070405e-06, + "loss": 1.2916, "step": 230 }, { - "epoch": 0.006545949162628581, + "epoch": 0.00903826590500039, "grad_norm": 0.0, - "learning_rate": 4.362606232294618e-06, - "loss": 1.2713, + "learning_rate": 6.023468057366363e-06, + "loss": 1.3856, "step": 231 }, { - "epoch": 0.006574286604891042, + "epoch": 0.00907739259722983, "grad_norm": 0.0, - "learning_rate": 4.381491973559962e-06, - "loss": 1.257, + "learning_rate": 6.049543676662322e-06, + "loss": 1.2431, "step": 232 }, { - "epoch": 0.006602624047153504, + "epoch": 0.009116519289459269, "grad_norm": 0.0, - "learning_rate": 4.400377714825307e-06, - "loss": 1.2758, + "learning_rate": 6.075619295958279e-06, + "loss": 1.4371, "step": 233 }, { - "epoch": 0.0066309614894159655, + "epoch": 0.009155645981688708, "grad_norm": 0.0, - "learning_rate": 4.419263456090652e-06, - "loss": 1.1614, + "learning_rate": 6.1016949152542385e-06, + "loss": 1.2924, "step": 234 }, { - "epoch": 0.006659298931678426, + "epoch": 0.009194772673918147, "grad_norm": 0.0, - "learning_rate": 4.4381491973559965e-06, - "loss": 1.2531, + "learning_rate": 6.127770534550196e-06, + "loss": 1.357, "step": 235 }, { - "epoch": 0.006687636373940888, + "epoch": 0.009233899366147586, "grad_norm": 0.0, - "learning_rate": 4.457034938621341e-06, - "loss": 1.2192, + "learning_rate": 6.153846153846155e-06, + "loss": 1.2363, "step": 236 }, { - "epoch": 0.00671597381620335, + "epoch": 0.009273026058377026, "grad_norm": 0.0, - "learning_rate": 4.475920679886686e-06, - "loss": 1.0932, + "learning_rate": 6.179921773142113e-06, + "loss": 1.3647, "step": 237 }, { - "epoch": 0.006744311258465811, + "epoch": 0.009312152750606463, "grad_norm": 0.0, - "learning_rate": 4.494806421152031e-06, - "loss": 1.2228, + "learning_rate": 6.205997392438071e-06, + "loss": 1.3705, "step": 238 }, { - "epoch": 0.006772648700728272, + "epoch": 0.009351279442835902, "grad_norm": 0.0, - "learning_rate": 4.513692162417375e-06, - "loss": 1.1165, + "learning_rate": 6.232073011734029e-06, + "loss": 1.2914, "step": 239 }, { - "epoch": 0.006800986142990734, + "epoch": 0.009390406135065341, "grad_norm": 0.0, - "learning_rate": 4.53257790368272e-06, - "loss": 1.267, + "learning_rate": 6.258148631029988e-06, + "loss": 1.1712, "step": 240 }, { - "epoch": 0.006829323585253195, + "epoch": 0.00942953282729478, "grad_norm": 0.0, - "learning_rate": 4.551463644948065e-06, - "loss": 1.2385, + "learning_rate": 6.2842242503259455e-06, + "loss": 1.2874, "step": 241 }, { - "epoch": 0.0068576610275156566, + "epoch": 0.00946865951952422, "grad_norm": 0.0, - "learning_rate": 4.570349386213409e-06, - "loss": 1.2845, + "learning_rate": 6.310299869621904e-06, + "loss": 1.5271, "step": 242 }, { - "epoch": 0.006885998469778117, + "epoch": 0.009507786211753659, "grad_norm": 0.0, - "learning_rate": 4.589235127478754e-06, - "loss": 1.1191, + "learning_rate": 6.336375488917862e-06, + "loss": 1.3885, "step": 243 }, { - "epoch": 0.006914335912040579, + "epoch": 0.009546912903983098, "grad_norm": 0.0, - "learning_rate": 4.608120868744099e-06, - "loss": 1.2372, + "learning_rate": 6.3624511082138205e-06, + "loss": 1.38, "step": 244 }, { - "epoch": 0.006942673354303041, + "epoch": 0.009586039596212537, "grad_norm": 0.0, - "learning_rate": 4.6270066100094435e-06, - "loss": 1.0528, + "learning_rate": 6.388526727509778e-06, + "loss": 1.2515, "step": 245 }, { - "epoch": 0.006971010796565502, + "epoch": 0.009625166288441974, "grad_norm": 0.0, - "learning_rate": 4.645892351274787e-06, - "loss": 1.2126, + "learning_rate": 6.414602346805737e-06, + "loss": 1.2769, "step": 246 }, { - "epoch": 0.006999348238827963, + "epoch": 0.009664292980671414, "grad_norm": 0.0, - "learning_rate": 4.664778092540133e-06, - "loss": 1.3069, + "learning_rate": 6.440677966101695e-06, + "loss": 1.2955, "step": 247 }, { - "epoch": 0.007027685681090425, + "epoch": 0.009703419672900853, "grad_norm": 0.0, - "learning_rate": 4.683663833805478e-06, - "loss": 1.2067, + "learning_rate": 6.466753585397653e-06, + "loss": 1.2698, "step": 248 }, { - "epoch": 0.007056023123352886, + "epoch": 0.009742546365130292, "grad_norm": 0.0, - "learning_rate": 4.7025495750708215e-06, - "loss": 1.2478, + "learning_rate": 6.492829204693613e-06, + "loss": 1.3268, "step": 249 }, { - "epoch": 0.007084360565615348, + "epoch": 0.009781673057359731, "grad_norm": 0.0, - "learning_rate": 4.721435316336166e-06, - "loss": 1.2856, + "learning_rate": 6.518904823989571e-06, + "loss": 1.3779, "step": 250 }, { - "epoch": 0.007112698007877809, + "epoch": 0.00982079974958917, "grad_norm": 0.0, - "learning_rate": 4.740321057601511e-06, - "loss": 1.2213, + "learning_rate": 6.544980443285529e-06, + "loss": 1.3955, "step": 251 }, { - "epoch": 0.00714103545014027, + "epoch": 0.00985992644181861, "grad_norm": 0.0, - "learning_rate": 4.7592067988668565e-06, - "loss": 1.2152, + "learning_rate": 6.571056062581487e-06, + "loss": 1.2912, "step": 252 }, { - "epoch": 0.007169372892402732, + "epoch": 0.009899053134048048, "grad_norm": 0.0, - "learning_rate": 4.7780925401322e-06, - "loss": 1.1938, + "learning_rate": 6.597131681877446e-06, + "loss": 1.329, "step": 253 }, { - "epoch": 0.0071977103346651935, + "epoch": 0.009938179826277486, "grad_norm": 0.0, - "learning_rate": 4.796978281397545e-06, - "loss": 1.2687, + "learning_rate": 6.623207301173403e-06, + "loss": 1.3978, "step": 254 }, { - "epoch": 0.007226047776927654, + "epoch": 0.009977306518506925, "grad_norm": 0.0, - "learning_rate": 4.81586402266289e-06, - "loss": 1.2059, + "learning_rate": 6.649282920469362e-06, + "loss": 1.3483, "step": 255 }, { - "epoch": 0.007254385219190116, + "epoch": 0.010016433210736364, "grad_norm": 0.0, - "learning_rate": 4.8347497639282344e-06, - "loss": 1.19, + "learning_rate": 6.67535853976532e-06, + "loss": 1.3959, "step": 256 }, { - "epoch": 0.007282722661452577, + "epoch": 0.010055559902965803, "grad_norm": 0.0, - "learning_rate": 4.853635505193579e-06, - "loss": 1.1724, + "learning_rate": 6.7014341590612784e-06, + "loss": 1.2706, "step": 257 }, { - "epoch": 0.007311060103715039, + "epoch": 0.010094686595195242, "grad_norm": 0.0, - "learning_rate": 4.872521246458924e-06, - "loss": 1.1832, + "learning_rate": 6.727509778357236e-06, + "loss": 1.2396, "step": 258 }, { - "epoch": 0.0073393975459775, + "epoch": 0.010133813287424681, "grad_norm": 0.0, - "learning_rate": 4.8914069877242686e-06, - "loss": 1.2066, + "learning_rate": 6.753585397653195e-06, + "loss": 1.4016, "step": 259 }, { - "epoch": 0.007367734988239961, + "epoch": 0.01017293997965412, "grad_norm": 0.0, - "learning_rate": 4.910292728989613e-06, - "loss": 1.1475, + "learning_rate": 6.779661016949153e-06, + "loss": 1.2732, "step": 260 }, { - "epoch": 0.007396072430502423, + "epoch": 0.01021206667188356, "grad_norm": 0.0, - "learning_rate": 4.929178470254958e-06, - "loss": 1.3201, + "learning_rate": 6.805736636245111e-06, + "loss": 1.3431, "step": 261 }, { - "epoch": 0.0074244098727648845, + "epoch": 0.010251193364112997, "grad_norm": 0.0, - "learning_rate": 4.948064211520303e-06, - "loss": 1.223, + "learning_rate": 6.831812255541069e-06, + "loss": 1.3865, "step": 262 }, { - "epoch": 0.007452747315027345, + "epoch": 0.010290320056342436, "grad_norm": 0.0, - "learning_rate": 4.966949952785647e-06, - "loss": 1.2578, + "learning_rate": 6.8578878748370285e-06, + "loss": 1.2385, "step": 263 }, { - "epoch": 0.007481084757289807, + "epoch": 0.010329446748571875, "grad_norm": 0.0, - "learning_rate": 4.985835694050991e-06, - "loss": 1.1475, + "learning_rate": 6.8839634941329854e-06, + "loss": 1.3173, "step": 264 }, { - "epoch": 0.007509422199552269, + "epoch": 0.010368573440801315, "grad_norm": 0.0, - "learning_rate": 5.004721435316337e-06, - "loss": 1.2665, + "learning_rate": 6.910039113428945e-06, + "loss": 1.4006, "step": 265 }, { - "epoch": 0.00753775964181473, + "epoch": 0.010407700133030754, "grad_norm": 0.0, - "learning_rate": 5.023607176581681e-06, - "loss": 1.2566, + "learning_rate": 6.936114732724903e-06, + "loss": 1.3444, "step": 266 }, { - "epoch": 0.007566097084077191, + "epoch": 0.010446826825260193, "grad_norm": 0.0, - "learning_rate": 5.042492917847026e-06, - "loss": 1.1874, + "learning_rate": 6.962190352020861e-06, + "loss": 1.2354, "step": 267 }, { - "epoch": 0.007594434526339652, + "epoch": 0.010485953517489632, "grad_norm": 0.0, - "learning_rate": 5.06137865911237e-06, - "loss": 1.1515, + "learning_rate": 6.988265971316819e-06, + "loss": 1.3185, "step": 268 }, { - "epoch": 0.007622771968602114, + "epoch": 0.010525080209719071, "grad_norm": 0.0, - "learning_rate": 5.080264400377716e-06, - "loss": 1.0916, + "learning_rate": 7.014341590612778e-06, + "loss": 1.2294, "step": 269 }, { - "epoch": 0.0076511094108645756, + "epoch": 0.010564206901948509, "grad_norm": 0.0, - "learning_rate": 5.09915014164306e-06, - "loss": 1.2374, + "learning_rate": 7.040417209908736e-06, + "loss": 1.2622, "step": 270 }, { - "epoch": 0.007679446853127036, + "epoch": 0.010603333594177948, "grad_norm": 0.0, - "learning_rate": 5.118035882908404e-06, - "loss": 1.2041, + "learning_rate": 7.066492829204694e-06, + "loss": 1.279, "step": 271 }, { - "epoch": 0.007707784295389498, + "epoch": 0.010642460286407387, "grad_norm": 0.0, - "learning_rate": 5.13692162417375e-06, - "loss": 1.2884, + "learning_rate": 7.092568448500653e-06, + "loss": 1.3441, "step": 272 }, { - "epoch": 0.00773612173765196, + "epoch": 0.010681586978636826, "grad_norm": 0.0, - "learning_rate": 5.155807365439094e-06, - "loss": 1.2892, + "learning_rate": 7.1186440677966106e-06, + "loss": 1.3218, "step": 273 }, { - "epoch": 0.007764459179914421, + "epoch": 0.010720713670866265, "grad_norm": 0.0, - "learning_rate": 5.174693106704438e-06, - "loss": 1.3876, + "learning_rate": 7.144719687092569e-06, + "loss": 1.3236, "step": 274 }, { - "epoch": 0.007792796622176882, + "epoch": 0.010759840363095704, "grad_norm": 0.0, - "learning_rate": 5.193578847969784e-06, - "loss": 1.21, + "learning_rate": 7.170795306388527e-06, + "loss": 1.356, "step": 275 }, { - "epoch": 0.007821134064439343, + "epoch": 0.010798967055325143, "grad_norm": 0.0, - "learning_rate": 5.212464589235128e-06, - "loss": 1.0363, + "learning_rate": 7.196870925684486e-06, + "loss": 1.2808, "step": 276 }, { - "epoch": 0.007849471506701806, + "epoch": 0.010838093747554583, "grad_norm": 0.0, - "learning_rate": 5.231350330500472e-06, - "loss": 1.2465, + "learning_rate": 7.222946544980443e-06, + "loss": 1.201, "step": 277 }, { - "epoch": 0.007877808948964267, + "epoch": 0.01087722043978402, "grad_norm": 0.0, - "learning_rate": 5.250236071765817e-06, - "loss": 1.2111, + "learning_rate": 7.249022164276403e-06, + "loss": 1.3279, "step": 278 }, { - "epoch": 0.007906146391226727, + "epoch": 0.010916347132013459, "grad_norm": 0.0, - "learning_rate": 5.269121813031162e-06, - "loss": 1.2418, + "learning_rate": 7.27509778357236e-06, + "loss": 1.2931, "step": 279 }, { - "epoch": 0.00793448383348919, + "epoch": 0.010955473824242898, "grad_norm": 0.0, - "learning_rate": 5.288007554296507e-06, - "loss": 1.0929, + "learning_rate": 7.301173402868319e-06, + "loss": 1.3418, "step": 280 }, { - "epoch": 0.00796282127575165, + "epoch": 0.010994600516472337, "grad_norm": 0.0, - "learning_rate": 5.306893295561851e-06, - "loss": 1.2004, + "learning_rate": 7.327249022164277e-06, + "loss": 1.3231, "step": 281 }, { - "epoch": 0.007991158718014112, + "epoch": 0.011033727208701776, "grad_norm": 0.0, - "learning_rate": 5.325779036827196e-06, - "loss": 1.202, + "learning_rate": 7.353324641460236e-06, + "loss": 1.2429, "step": 282 }, { - "epoch": 0.008019496160276574, + "epoch": 0.011072853900931216, "grad_norm": 0.0, - "learning_rate": 5.344664778092541e-06, - "loss": 1.2267, + "learning_rate": 7.3794002607561934e-06, + "loss": 1.3893, "step": 283 }, { - "epoch": 0.008047833602539035, + "epoch": 0.011111980593160655, "grad_norm": 0.0, - "learning_rate": 5.363550519357885e-06, - "loss": 1.2863, + "learning_rate": 7.405475880052152e-06, + "loss": 1.3075, "step": 284 }, { - "epoch": 0.008076171044801496, + "epoch": 0.011151107285390094, "grad_norm": 0.0, - "learning_rate": 5.382436260623229e-06, - "loss": 1.1041, + "learning_rate": 7.43155149934811e-06, + "loss": 1.2918, "step": 285 }, { - "epoch": 0.008104508487063958, + "epoch": 0.011190233977619531, "grad_norm": 0.0, - "learning_rate": 5.401322001888575e-06, - "loss": 1.1439, + "learning_rate": 7.4576271186440685e-06, + "loss": 1.3269, "step": 286 }, { - "epoch": 0.00813284592932642, + "epoch": 0.01122936066984897, "grad_norm": 0.0, - "learning_rate": 5.4202077431539195e-06, - "loss": 1.2829, + "learning_rate": 7.483702737940026e-06, + "loss": 1.381, "step": 287 }, { - "epoch": 0.00816118337158888, + "epoch": 0.01126848736207841, "grad_norm": 0.0, - "learning_rate": 5.439093484419264e-06, - "loss": 1.2609, + "learning_rate": 7.509778357235985e-06, + "loss": 1.2844, "step": 288 }, { - "epoch": 0.008189520813851341, + "epoch": 0.011307614054307849, "grad_norm": 0.0, - "learning_rate": 5.457979225684609e-06, - "loss": 1.277, + "learning_rate": 7.535853976531943e-06, + "loss": 1.2786, "step": 289 }, { - "epoch": 0.008217858256113804, + "epoch": 0.011346740746537288, "grad_norm": 0.0, - "learning_rate": 5.476864966949953e-06, - "loss": 1.1736, + "learning_rate": 7.561929595827901e-06, + "loss": 1.2607, "step": 290 }, { - "epoch": 0.008246195698376264, + "epoch": 0.011385867438766727, "grad_norm": 0.0, - "learning_rate": 5.495750708215298e-06, - "loss": 1.2495, + "learning_rate": 7.58800521512386e-06, + "loss": 1.2665, "step": 291 }, { - "epoch": 0.008274533140638725, + "epoch": 0.011424994130996166, "grad_norm": 0.0, - "learning_rate": 5.514636449480643e-06, - "loss": 1.2483, + "learning_rate": 7.614080834419818e-06, + "loss": 1.376, "step": 292 }, { - "epoch": 0.008302870582901188, + "epoch": 0.011464120823225605, "grad_norm": 0.0, - "learning_rate": 5.533522190745987e-06, - "loss": 1.1586, + "learning_rate": 7.640156453715776e-06, + "loss": 1.2978, "step": 293 }, { - "epoch": 0.008331208025163649, + "epoch": 0.011503247515455043, "grad_norm": 0.0, - "learning_rate": 5.552407932011332e-06, - "loss": 1.2206, + "learning_rate": 7.666232073011735e-06, + "loss": 1.4371, "step": 294 }, { - "epoch": 0.00835954546742611, + "epoch": 0.011542374207684482, "grad_norm": 0.0, - "learning_rate": 5.571293673276676e-06, - "loss": 1.1808, + "learning_rate": 7.692307692307694e-06, + "loss": 1.2955, "step": 295 }, { - "epoch": 0.008387882909688572, + "epoch": 0.011581500899913921, "grad_norm": 0.0, - "learning_rate": 5.590179414542021e-06, - "loss": 1.1801, + "learning_rate": 7.71838331160365e-06, + "loss": 1.3795, "step": 296 }, { - "epoch": 0.008416220351951033, + "epoch": 0.01162062759214336, "grad_norm": 0.0, - "learning_rate": 5.609065155807366e-06, - "loss": 1.2302, + "learning_rate": 7.744458930899609e-06, + "loss": 1.2765, "step": 297 }, { - "epoch": 0.008444557794213494, + "epoch": 0.0116597542843728, "grad_norm": 0.0, - "learning_rate": 5.62795089707271e-06, - "loss": 1.1304, + "learning_rate": 7.770534550195568e-06, + "loss": 1.3157, "step": 298 }, { - "epoch": 0.008472895236475956, + "epoch": 0.011698880976602238, "grad_norm": 0.0, - "learning_rate": 5.646836638338056e-06, - "loss": 1.2464, + "learning_rate": 7.796610169491526e-06, + "loss": 1.2561, "step": 299 }, { - "epoch": 0.008501232678738417, + "epoch": 0.011738007668831677, "grad_norm": 0.0, - "learning_rate": 5.6657223796034e-06, - "loss": 1.2396, + "learning_rate": 7.822685788787483e-06, + "loss": 1.3549, "step": 300 }, { - "epoch": 0.008529570121000878, + "epoch": 0.011777134361061117, "grad_norm": 0.0, - "learning_rate": 5.6846081208687445e-06, - "loss": 1.0963, + "learning_rate": 7.848761408083444e-06, + "loss": 1.2984, "step": 301 }, { - "epoch": 0.00855790756326334, + "epoch": 0.011816261053290554, "grad_norm": 0.0, - "learning_rate": 5.703493862134089e-06, - "loss": 1.2228, + "learning_rate": 7.8748370273794e-06, + "loss": 1.384, "step": 302 }, { - "epoch": 0.008586245005525801, + "epoch": 0.011855387745519993, "grad_norm": 0.0, - "learning_rate": 5.722379603399434e-06, - "loss": 1.2766, + "learning_rate": 7.90091264667536e-06, + "loss": 1.3005, "step": 303 }, { - "epoch": 0.008614582447788262, + "epoch": 0.011894514437749432, "grad_norm": 0.0, - "learning_rate": 5.741265344664778e-06, - "loss": 1.1713, + "learning_rate": 7.926988265971318e-06, + "loss": 1.4104, "step": 304 }, { - "epoch": 0.008642919890050725, + "epoch": 0.011933641129978871, "grad_norm": 0.0, - "learning_rate": 5.760151085930123e-06, - "loss": 1.2592, + "learning_rate": 7.953063885267276e-06, + "loss": 1.2677, "step": 305 }, { - "epoch": 0.008671257332313186, + "epoch": 0.01197276782220831, "grad_norm": 0.0, - "learning_rate": 5.779036827195468e-06, - "loss": 1.2694, + "learning_rate": 7.979139504563233e-06, + "loss": 1.264, "step": 306 }, { - "epoch": 0.008699594774575646, + "epoch": 0.01201189451443775, "grad_norm": 0.0, - "learning_rate": 5.797922568460812e-06, - "loss": 1.2725, + "learning_rate": 8.005215123859192e-06, + "loss": 1.3384, "step": 307 }, { - "epoch": 0.008727932216838109, + "epoch": 0.012051021206667189, "grad_norm": 0.0, - "learning_rate": 5.8168083097261574e-06, - "loss": 1.1114, + "learning_rate": 8.03129074315515e-06, + "loss": 1.3146, "step": 308 }, { - "epoch": 0.00875626965910057, + "epoch": 0.012090147898896628, "grad_norm": 0.0, - "learning_rate": 5.835694050991501e-06, - "loss": 1.1193, + "learning_rate": 8.05736636245111e-06, + "loss": 1.2887, "step": 309 }, { - "epoch": 0.00878460710136303, + "epoch": 0.012129274591126065, "grad_norm": 0.0, - "learning_rate": 5.854579792256847e-06, - "loss": 1.1685, + "learning_rate": 8.083441981747066e-06, + "loss": 1.3411, "step": 310 }, { - "epoch": 0.008812944543625493, + "epoch": 0.012168401283355505, "grad_norm": 0.0, - "learning_rate": 5.8734655335221916e-06, - "loss": 1.1501, + "learning_rate": 8.109517601043025e-06, + "loss": 1.2863, "step": 311 }, { - "epoch": 0.008841281985887954, + "epoch": 0.012207527975584944, "grad_norm": 0.0, - "learning_rate": 5.892351274787535e-06, - "loss": 1.1719, + "learning_rate": 8.135593220338983e-06, + "loss": 1.4172, "step": 312 }, { - "epoch": 0.008869619428150415, + "epoch": 0.012246654667814383, "grad_norm": 0.0, - "learning_rate": 5.911237016052881e-06, - "loss": 1.0672, + "learning_rate": 8.161668839634942e-06, + "loss": 1.3495, "step": 313 }, { - "epoch": 0.008897956870412876, + "epoch": 0.012285781360043822, "grad_norm": 0.0, - "learning_rate": 5.930122757318225e-06, - "loss": 1.2072, + "learning_rate": 8.1877444589309e-06, + "loss": 1.3222, "step": 314 }, { - "epoch": 0.008926294312675338, + "epoch": 0.012324908052273261, "grad_norm": 0.0, - "learning_rate": 5.9490084985835695e-06, - "loss": 1.2041, + "learning_rate": 8.213820078226858e-06, + "loss": 1.407, "step": 315 }, { - "epoch": 0.008954631754937799, + "epoch": 0.0123640347445027, "grad_norm": 0.0, - "learning_rate": 5.967894239848915e-06, - "loss": 1.1723, + "learning_rate": 8.239895697522818e-06, + "loss": 1.3616, "step": 316 }, { - "epoch": 0.00898296919720026, + "epoch": 0.01240316143673214, "grad_norm": 0.0, - "learning_rate": 5.986779981114259e-06, - "loss": 1.0968, + "learning_rate": 8.265971316818775e-06, + "loss": 1.3243, "step": 317 }, { - "epoch": 0.009011306639462723, + "epoch": 0.012442288128961577, "grad_norm": 0.0, - "learning_rate": 6.0056657223796045e-06, - "loss": 1.2642, + "learning_rate": 8.292046936114734e-06, + "loss": 1.0859, "step": 318 }, { - "epoch": 0.009039644081725183, + "epoch": 0.012481414821191016, "grad_norm": 0.0, - "learning_rate": 6.024551463644948e-06, - "loss": 1.2031, + "learning_rate": 8.318122555410692e-06, + "loss": 1.3722, "step": 319 }, { - "epoch": 0.009067981523987644, + "epoch": 0.012520541513420455, "grad_norm": 0.0, - "learning_rate": 6.043437204910293e-06, - "loss": 1.2073, + "learning_rate": 8.34419817470665e-06, + "loss": 1.241, "step": 320 }, { - "epoch": 0.009096318966250107, + "epoch": 0.012559668205649894, "grad_norm": 0.0, - "learning_rate": 6.062322946175639e-06, - "loss": 1.2341, + "learning_rate": 8.370273794002608e-06, + "loss": 1.2573, "step": 321 }, { - "epoch": 0.009124656408512568, + "epoch": 0.012598794897879333, "grad_norm": 0.0, - "learning_rate": 6.0812086874409825e-06, - "loss": 1.2157, + "learning_rate": 8.396349413298566e-06, + "loss": 1.275, "step": 322 }, { - "epoch": 0.009152993850775028, + "epoch": 0.012637921590108772, "grad_norm": 0.0, - "learning_rate": 6.100094428706327e-06, - "loss": 1.2114, + "learning_rate": 8.422425032594525e-06, + "loss": 1.3417, "step": 323 }, { - "epoch": 0.009181331293037491, + "epoch": 0.012677048282338212, "grad_norm": 0.0, - "learning_rate": 6.118980169971672e-06, - "loss": 1.2593, + "learning_rate": 8.448500651890484e-06, + "loss": 1.3077, "step": 324 }, { - "epoch": 0.009209668735299952, + "epoch": 0.01271617497456765, "grad_norm": 0.0, - "learning_rate": 6.137865911237017e-06, - "loss": 1.1956, + "learning_rate": 8.47457627118644e-06, + "loss": 1.2665, "step": 325 }, { - "epoch": 0.009238006177562413, + "epoch": 0.012755301666797088, "grad_norm": 0.0, - "learning_rate": 6.1567516525023604e-06, - "loss": 1.144, + "learning_rate": 8.5006518904824e-06, + "loss": 1.4011, "step": 326 }, { - "epoch": 0.009266343619824875, + "epoch": 0.012794428359026527, "grad_norm": 0.0, - "learning_rate": 6.175637393767706e-06, - "loss": 1.2298, + "learning_rate": 8.526727509778358e-06, + "loss": 1.2346, "step": 327 }, { - "epoch": 0.009294681062087336, + "epoch": 0.012833555051255966, "grad_norm": 0.0, - "learning_rate": 6.194523135033051e-06, - "loss": 1.2527, + "learning_rate": 8.552803129074316e-06, + "loss": 1.2982, "step": 328 }, { - "epoch": 0.009323018504349797, + "epoch": 0.012872681743485406, "grad_norm": 0.0, - "learning_rate": 6.213408876298395e-06, - "loss": 1.1456, + "learning_rate": 8.578878748370273e-06, + "loss": 1.3824, "step": 329 }, { - "epoch": 0.00935135594661226, + "epoch": 0.012911808435714845, "grad_norm": 0.0, - "learning_rate": 6.23229461756374e-06, - "loss": 1.0806, + "learning_rate": 8.604954367666232e-06, + "loss": 1.2346, "step": 330 }, { - "epoch": 0.00937969338887472, + "epoch": 0.012950935127944284, "grad_norm": 0.0, - "learning_rate": 6.251180358829084e-06, - "loss": 1.1435, + "learning_rate": 8.63102998696219e-06, + "loss": 1.3313, "step": 331 }, { - "epoch": 0.009408030831137181, + "epoch": 0.012990061820173723, "grad_norm": 0.0, - "learning_rate": 6.2700661000944295e-06, - "loss": 1.1483, + "learning_rate": 8.65710560625815e-06, + "loss": 1.4097, "step": 332 }, { - "epoch": 0.009436368273399644, + "epoch": 0.013029188512403162, "grad_norm": 0.0, - "learning_rate": 6.288951841359774e-06, - "loss": 1.233, + "learning_rate": 8.683181225554108e-06, + "loss": 1.2245, "step": 333 }, { - "epoch": 0.009464705715662105, + "epoch": 0.0130683152046326, "grad_norm": 0.0, - "learning_rate": 6.307837582625118e-06, - "loss": 1.2014, + "learning_rate": 8.709256844850067e-06, + "loss": 1.3786, "step": 334 }, { - "epoch": 0.009493043157924565, + "epoch": 0.013107441896862039, "grad_norm": 0.0, - "learning_rate": 6.326723323890464e-06, - "loss": 1.2161, + "learning_rate": 8.735332464146025e-06, + "loss": 1.3371, "step": 335 }, { - "epoch": 0.009521380600187028, + "epoch": 0.013146568589091478, "grad_norm": 0.0, - "learning_rate": 6.3456090651558075e-06, - "loss": 1.1541, + "learning_rate": 8.761408083441982e-06, + "loss": 1.1729, "step": 336 }, { - "epoch": 0.009549718042449489, + "epoch": 0.013185695281320917, "grad_norm": 0.0, - "learning_rate": 6.364494806421152e-06, - "loss": 1.1417, + "learning_rate": 8.78748370273794e-06, + "loss": 1.2436, "step": 337 }, { - "epoch": 0.00957805548471195, + "epoch": 0.013224821973550356, "grad_norm": 0.0, - "learning_rate": 6.383380547686498e-06, - "loss": 1.2574, + "learning_rate": 8.8135593220339e-06, + "loss": 1.3173, "step": 338 }, { - "epoch": 0.00960639292697441, + "epoch": 0.013263948665779795, "grad_norm": 0.0, - "learning_rate": 6.402266288951842e-06, - "loss": 1.2659, + "learning_rate": 8.839634941329858e-06, + "loss": 1.3599, "step": 339 }, { - "epoch": 0.009634730369236873, + "epoch": 0.013303075358009234, "grad_norm": 0.0, - "learning_rate": 6.421152030217187e-06, - "loss": 1.1527, + "learning_rate": 8.865710560625815e-06, + "loss": 1.2366, "step": 340 }, { - "epoch": 0.009663067811499334, + "epoch": 0.013342202050238674, "grad_norm": 0.0, - "learning_rate": 6.440037771482531e-06, - "loss": 1.1712, + "learning_rate": 8.891786179921774e-06, + "loss": 1.2457, "step": 341 }, { - "epoch": 0.009691405253761795, + "epoch": 0.013381328742468111, "grad_norm": 0.0, - "learning_rate": 6.458923512747876e-06, - "loss": 1.0903, + "learning_rate": 8.917861799217732e-06, + "loss": 1.3181, "step": 342 }, { - "epoch": 0.009719742696024257, + "epoch": 0.01342045543469755, "grad_norm": 0.0, - "learning_rate": 6.477809254013221e-06, - "loss": 1.1907, + "learning_rate": 8.94393741851369e-06, + "loss": 1.3078, "step": 343 }, { - "epoch": 0.009748080138286718, + "epoch": 0.01345958212692699, "grad_norm": 0.0, - "learning_rate": 6.496694995278565e-06, - "loss": 1.1501, + "learning_rate": 8.970013037809648e-06, + "loss": 1.2429, "step": 344 }, { - "epoch": 0.009776417580549179, + "epoch": 0.013498708819156428, "grad_norm": 0.0, - "learning_rate": 6.51558073654391e-06, - "loss": 1.1877, + "learning_rate": 8.996088657105606e-06, + "loss": 1.3337, "step": 345 }, { - "epoch": 0.009804755022811642, + "epoch": 0.013537835511385867, "grad_norm": 0.0, - "learning_rate": 6.5344664778092546e-06, - "loss": 1.1413, + "learning_rate": 9.022164276401565e-06, + "loss": 1.2596, "step": 346 }, { - "epoch": 0.009833092465074102, + "epoch": 0.013576962203615307, "grad_norm": 0.0, - "learning_rate": 6.553352219074599e-06, - "loss": 1.1768, + "learning_rate": 9.048239895697524e-06, + "loss": 1.2844, "step": 347 }, { - "epoch": 0.009861429907336563, + "epoch": 0.013616088895844746, "grad_norm": 0.0, - "learning_rate": 6.572237960339945e-06, - "loss": 1.255, + "learning_rate": 9.074315514993482e-06, + "loss": 1.2958, "step": 348 }, { - "epoch": 0.009889767349599026, + "epoch": 0.013655215588074185, "grad_norm": 0.0, - "learning_rate": 6.591123701605289e-06, - "loss": 1.167, + "learning_rate": 9.100391134289441e-06, + "loss": 1.361, "step": 349 }, { - "epoch": 0.009918104791861487, + "epoch": 0.013694342280303622, "grad_norm": 0.0, - "learning_rate": 6.6100094428706325e-06, - "loss": 1.2211, + "learning_rate": 9.126466753585398e-06, + "loss": 1.2936, "step": 350 }, { - "epoch": 0.009946442234123947, + "epoch": 0.013733468972533061, "grad_norm": 0.0, - "learning_rate": 6.628895184135978e-06, - "loss": 1.1656, + "learning_rate": 9.152542372881356e-06, + "loss": 1.4554, "step": 351 }, { - "epoch": 0.00997477967638641, + "epoch": 0.0137725956647625, "grad_norm": 0.0, - "learning_rate": 6.647780925401323e-06, - "loss": 1.1995, + "learning_rate": 9.178617992177315e-06, + "loss": 1.3511, "step": 352 }, { - "epoch": 0.01000311711864887, + "epoch": 0.01381172235699194, "grad_norm": 0.0, - "learning_rate": 6.666666666666667e-06, - "loss": 1.1136, + "learning_rate": 9.204693611473274e-06, + "loss": 1.3425, "step": 353 }, { - "epoch": 0.010031454560911332, + "epoch": 0.013850849049221379, "grad_norm": 0.0, - "learning_rate": 6.685552407932012e-06, - "loss": 1.1492, + "learning_rate": 9.230769230769232e-06, + "loss": 1.2671, "step": 354 }, { - "epoch": 0.010059792003173794, + "epoch": 0.013889975741450818, "grad_norm": 0.0, - "learning_rate": 6.704438149197356e-06, - "loss": 1.1657, + "learning_rate": 9.25684485006519e-06, + "loss": 1.3187, "step": 355 }, { - "epoch": 0.010088129445436255, + "epoch": 0.013929102433680257, "grad_norm": 0.0, - "learning_rate": 6.723323890462701e-06, - "loss": 1.2016, + "learning_rate": 9.282920469361148e-06, + "loss": 1.4668, "step": 356 }, { - "epoch": 0.010116466887698716, + "epoch": 0.013968229125909696, "grad_norm": 0.0, - "learning_rate": 6.742209631728046e-06, - "loss": 1.2551, + "learning_rate": 9.308996088657106e-06, + "loss": 1.1431, "step": 357 }, { - "epoch": 0.010144804329961178, + "epoch": 0.014007355818139134, "grad_norm": 0.0, - "learning_rate": 6.76109537299339e-06, - "loss": 1.1411, + "learning_rate": 9.335071707953065e-06, + "loss": 1.3146, "step": 358 }, { - "epoch": 0.01017314177222364, + "epoch": 0.014046482510368573, "grad_norm": 0.0, - "learning_rate": 6.779981114258736e-06, - "loss": 1.0932, + "learning_rate": 9.361147327249022e-06, + "loss": 1.3269, "step": 359 }, { - "epoch": 0.0102014792144861, + "epoch": 0.014085609202598012, "grad_norm": 0.0, - "learning_rate": 6.79886685552408e-06, - "loss": 1.2139, + "learning_rate": 9.387222946544982e-06, + "loss": 1.3187, "step": 360 }, { - "epoch": 0.010229816656748563, + "epoch": 0.014124735894827451, "grad_norm": 0.0, - "learning_rate": 6.817752596789424e-06, - "loss": 1.1497, + "learning_rate": 9.41329856584094e-06, + "loss": 1.2446, "step": 361 }, { - "epoch": 0.010258154099011024, + "epoch": 0.01416386258705689, "grad_norm": 0.0, - "learning_rate": 6.83663833805477e-06, - "loss": 1.1794, + "learning_rate": 9.439374185136898e-06, + "loss": 1.2377, "step": 362 }, { - "epoch": 0.010286491541273484, + "epoch": 0.01420298927928633, "grad_norm": 0.0, - "learning_rate": 6.855524079320114e-06, - "loss": 1.1115, + "learning_rate": 9.465449804432857e-06, + "loss": 1.3555, "step": 363 }, { - "epoch": 0.010314828983535945, + "epoch": 0.014242115971515769, "grad_norm": 0.0, - "learning_rate": 6.874409820585458e-06, - "loss": 1.2964, + "learning_rate": 9.491525423728815e-06, + "loss": 1.3267, "step": 364 }, { - "epoch": 0.010343166425798408, + "epoch": 0.014281242663745208, "grad_norm": 0.0, - "learning_rate": 6.893295561850803e-06, - "loss": 1.1651, + "learning_rate": 9.517601043024772e-06, + "loss": 1.2662, "step": 365 }, { - "epoch": 0.010371503868060869, + "epoch": 0.014320369355974645, "grad_norm": 0.0, - "learning_rate": 6.912181303116148e-06, - "loss": 1.3035, + "learning_rate": 9.54367666232073e-06, + "loss": 1.4364, "step": 366 }, { - "epoch": 0.01039984131032333, + "epoch": 0.014359496048204084, "grad_norm": 0.0, - "learning_rate": 6.931067044381492e-06, - "loss": 1.1905, + "learning_rate": 9.56975228161669e-06, + "loss": 1.2874, "step": 367 }, { - "epoch": 0.010428178752585792, + "epoch": 0.014398622740433523, "grad_norm": 0.0, - "learning_rate": 6.949952785646837e-06, - "loss": 1.188, + "learning_rate": 9.595827900912648e-06, + "loss": 1.3748, "step": 368 }, { - "epoch": 0.010456516194848253, + "epoch": 0.014437749432662962, "grad_norm": 0.0, - "learning_rate": 6.968838526912182e-06, - "loss": 1.073, + "learning_rate": 9.621903520208605e-06, + "loss": 1.376, "step": 369 }, { - "epoch": 0.010484853637110714, + "epoch": 0.014476876124892402, "grad_norm": 0.0, - "learning_rate": 6.987724268177527e-06, - "loss": 1.155, + "learning_rate": 9.647979139504564e-06, + "loss": 1.3081, "step": 370 }, { - "epoch": 0.010513191079373176, + "epoch": 0.01451600281712184, "grad_norm": 0.0, - "learning_rate": 7.006610009442871e-06, - "loss": 1.2005, + "learning_rate": 9.674054758800522e-06, + "loss": 1.3496, "step": 371 }, { - "epoch": 0.010541528521635637, + "epoch": 0.01455512950935128, "grad_norm": 0.0, - "learning_rate": 7.025495750708215e-06, - "loss": 1.14, + "learning_rate": 9.70013037809648e-06, + "loss": 1.2365, "step": 372 }, { - "epoch": 0.010569865963898098, + "epoch": 0.014594256201580719, "grad_norm": 0.0, - "learning_rate": 7.044381491973561e-06, - "loss": 1.2338, + "learning_rate": 9.726205997392438e-06, + "loss": 1.3702, "step": 373 }, { - "epoch": 0.01059820340616056, + "epoch": 0.014633382893810156, "grad_norm": 0.0, - "learning_rate": 7.0632672332389055e-06, - "loss": 1.1942, + "learning_rate": 9.752281616688396e-06, + "loss": 1.2757, "step": 374 }, { - "epoch": 0.010626540848423021, + "epoch": 0.014672509586039596, "grad_norm": 0.0, - "learning_rate": 7.082152974504249e-06, - "loss": 1.2418, + "learning_rate": 9.778357235984357e-06, + "loss": 1.3745, "step": 375 }, { - "epoch": 0.010654878290685482, + "epoch": 0.014711636278269035, "grad_norm": 0.0, - "learning_rate": 7.101038715769595e-06, - "loss": 1.2633, + "learning_rate": 9.804432855280314e-06, + "loss": 1.2296, "step": 376 }, { - "epoch": 0.010683215732947945, + "epoch": 0.014750762970498474, "grad_norm": 0.0, - "learning_rate": 7.119924457034939e-06, - "loss": 1.2393, + "learning_rate": 9.830508474576272e-06, + "loss": 1.2949, "step": 377 }, { - "epoch": 0.010711553175210406, + "epoch": 0.014789889662727913, "grad_norm": 0.0, - "learning_rate": 7.1388101983002834e-06, - "loss": 1.2675, + "learning_rate": 9.856584093872231e-06, + "loss": 1.2956, "step": 378 }, { - "epoch": 0.010739890617472866, + "epoch": 0.014829016354957352, "grad_norm": 0.0, - "learning_rate": 7.157695939565629e-06, - "loss": 1.138, + "learning_rate": 9.88265971316819e-06, + "loss": 1.369, "step": 379 }, { - "epoch": 0.010768228059735329, + "epoch": 0.014868143047186791, "grad_norm": 0.0, - "learning_rate": 7.176581680830973e-06, - "loss": 1.126, + "learning_rate": 9.908735332464146e-06, + "loss": 1.4247, "step": 380 }, { - "epoch": 0.01079656550199779, + "epoch": 0.01490726973941623, "grad_norm": 0.0, - "learning_rate": 7.195467422096318e-06, - "loss": 1.0874, + "learning_rate": 9.934810951760105e-06, + "loss": 1.2836, "step": 381 }, { - "epoch": 0.01082490294426025, + "epoch": 0.01494639643164567, "grad_norm": 0.0, - "learning_rate": 7.214353163361662e-06, - "loss": 1.2328, + "learning_rate": 9.960886571056064e-06, + "loss": 1.1904, "step": 382 }, { - "epoch": 0.010853240386522713, + "epoch": 0.014985523123875107, "grad_norm": 0.0, - "learning_rate": 7.233238904627007e-06, - "loss": 1.1919, + "learning_rate": 9.986962190352022e-06, + "loss": 1.4243, "step": 383 }, { - "epoch": 0.010881577828785174, + "epoch": 0.015024649816104546, "grad_norm": 0.0, - "learning_rate": 7.2521246458923525e-06, - "loss": 1.2048, + "learning_rate": 1.0013037809647981e-05, + "loss": 1.3124, "step": 384 }, { - "epoch": 0.010909915271047635, + "epoch": 0.015063776508333985, "grad_norm": 0.0, - "learning_rate": 7.271010387157696e-06, - "loss": 1.1932, + "learning_rate": 1.0039113428943938e-05, + "loss": 1.2766, "step": 385 }, { - "epoch": 0.010938252713310097, + "epoch": 0.015102903200563424, "grad_norm": 0.0, - "learning_rate": 7.289896128423041e-06, - "loss": 1.1366, + "learning_rate": 1.0065189048239897e-05, + "loss": 1.3047, "step": 386 }, { - "epoch": 0.010966590155572558, + "epoch": 0.015142029892792864, "grad_norm": 0.0, - "learning_rate": 7.308781869688386e-06, - "loss": 1.0938, + "learning_rate": 1.0091264667535853e-05, + "loss": 1.3464, "step": 387 }, { - "epoch": 0.01099492759783502, + "epoch": 0.015181156585022303, "grad_norm": 0.0, - "learning_rate": 7.3276676109537305e-06, - "loss": 1.1037, + "learning_rate": 1.0117340286831814e-05, + "loss": 1.2581, "step": 388 }, { - "epoch": 0.01102326504009748, + "epoch": 0.015220283277251742, "grad_norm": 0.0, - "learning_rate": 7.346553352219076e-06, - "loss": 1.0821, + "learning_rate": 1.014341590612777e-05, + "loss": 1.3329, "step": 389 }, { - "epoch": 0.011051602482359943, + "epoch": 0.015259409969481181, "grad_norm": 0.0, - "learning_rate": 7.36543909348442e-06, - "loss": 1.1518, + "learning_rate": 1.016949152542373e-05, + "loss": 1.266, "step": 390 }, { - "epoch": 0.011079939924622403, + "epoch": 0.015298536661710618, "grad_norm": 0.0, - "learning_rate": 7.384324834749765e-06, - "loss": 1.2915, + "learning_rate": 1.0195567144719686e-05, + "loss": 1.3263, "step": 391 }, { - "epoch": 0.011108277366884864, + "epoch": 0.015337663353940057, "grad_norm": 0.0, - "learning_rate": 7.403210576015109e-06, - "loss": 1.2028, + "learning_rate": 1.0221642764015647e-05, + "loss": 1.2424, "step": 392 }, { - "epoch": 0.011136614809147327, + "epoch": 0.015376790046169497, "grad_norm": 0.0, - "learning_rate": 7.422096317280454e-06, - "loss": 1.2163, + "learning_rate": 1.0247718383311605e-05, + "loss": 1.3334, "step": 393 }, { - "epoch": 0.011164952251409788, + "epoch": 0.015415916738398936, "grad_norm": 0.0, - "learning_rate": 7.440982058545798e-06, - "loss": 1.1524, + "learning_rate": 1.0273794002607562e-05, + "loss": 1.3246, "step": 394 }, { - "epoch": 0.011193289693672248, + "epoch": 0.015455043430628375, "grad_norm": 0.0, - "learning_rate": 7.4598677998111434e-06, - "loss": 1.1852, + "learning_rate": 1.0299869621903522e-05, + "loss": 1.22, "step": 395 }, { - "epoch": 0.011221627135934711, + "epoch": 0.015494170122857814, "grad_norm": 0.0, - "learning_rate": 7.478753541076488e-06, - "loss": 1.0468, + "learning_rate": 1.032594524119948e-05, + "loss": 1.3192, "step": 396 }, { - "epoch": 0.011249964578197172, + "epoch": 0.015533296815087253, "grad_norm": 0.0, - "learning_rate": 7.497639282341832e-06, - "loss": 1.1896, + "learning_rate": 1.0352020860495438e-05, + "loss": 1.1805, "step": 397 }, { - "epoch": 0.011278302020459633, + "epoch": 0.015572423507316692, "grad_norm": 0.0, - "learning_rate": 7.5165250236071775e-06, - "loss": 1.231, + "learning_rate": 1.0378096479791395e-05, + "loss": 1.3291, "step": 398 }, { - "epoch": 0.011306639462722095, + "epoch": 0.01561155019954613, "grad_norm": 0.0, - "learning_rate": 7.535410764872521e-06, - "loss": 1.1369, + "learning_rate": 1.0404172099087355e-05, + "loss": 1.3257, "step": 399 }, { - "epoch": 0.011334976904984556, + "epoch": 0.01565067689177557, "grad_norm": 0.0, - "learning_rate": 7.554296506137867e-06, - "loss": 1.2664, + "learning_rate": 1.0430247718383312e-05, + "loss": 1.1376, "step": 400 }, { - "epoch": 0.011363314347247017, + "epoch": 0.01568980358400501, "grad_norm": 0.0, - "learning_rate": 7.573182247403212e-06, - "loss": 1.2499, + "learning_rate": 1.0456323337679271e-05, + "loss": 1.3307, "step": 401 }, { - "epoch": 0.01139165178950948, + "epoch": 0.015728930276234447, "grad_norm": 0.0, - "learning_rate": 7.5920679886685555e-06, - "loss": 1.1228, + "learning_rate": 1.0482398956975228e-05, + "loss": 1.2794, "step": 402 }, { - "epoch": 0.01141998923177194, + "epoch": 0.015768056968463885, "grad_norm": 0.0, - "learning_rate": 7.610953729933901e-06, - "loss": 1.2271, + "learning_rate": 1.0508474576271188e-05, + "loss": 1.197, "step": 403 }, { - "epoch": 0.011448326674034401, + "epoch": 0.015807183660693325, "grad_norm": 0.0, - "learning_rate": 7.629839471199246e-06, - "loss": 1.2048, + "learning_rate": 1.0534550195567145e-05, + "loss": 1.3406, "step": 404 }, { - "epoch": 0.011476664116296864, + "epoch": 0.015846310352922763, "grad_norm": 0.0, - "learning_rate": 7.64872521246459e-06, - "loss": 1.2071, + "learning_rate": 1.0560625814863104e-05, + "loss": 1.366, "step": 405 }, { - "epoch": 0.011505001558559325, + "epoch": 0.015885437045152204, "grad_norm": 0.0, - "learning_rate": 7.667610953729935e-06, - "loss": 1.2441, + "learning_rate": 1.058670143415906e-05, + "loss": 1.387, "step": 406 }, { - "epoch": 0.011533339000821785, + "epoch": 0.01592456373738164, "grad_norm": 0.0, - "learning_rate": 7.686496694995279e-06, - "loss": 1.1868, + "learning_rate": 1.0612777053455021e-05, + "loss": 1.2883, "step": 407 }, { - "epoch": 0.011561676443084248, + "epoch": 0.015963690429611082, "grad_norm": 0.0, - "learning_rate": 7.705382436260623e-06, - "loss": 1.2624, + "learning_rate": 1.063885267275098e-05, + "loss": 1.3931, "step": 408 }, { - "epoch": 0.011590013885346709, + "epoch": 0.01600281712184052, "grad_norm": 0.0, - "learning_rate": 7.724268177525968e-06, - "loss": 1.3582, + "learning_rate": 1.0664928292046937e-05, + "loss": 1.3173, "step": 409 }, { - "epoch": 0.01161835132760917, + "epoch": 0.01604194381406996, "grad_norm": 0.0, - "learning_rate": 7.743153918791312e-06, - "loss": 1.2122, + "learning_rate": 1.0691003911342893e-05, + "loss": 1.3502, "step": 410 }, { - "epoch": 0.011646688769871632, + "epoch": 0.016081070506299398, "grad_norm": 0.0, - "learning_rate": 7.762039660056658e-06, - "loss": 1.2421, + "learning_rate": 1.0717079530638854e-05, + "loss": 1.3649, "step": 411 }, { - "epoch": 0.011675026212134093, + "epoch": 0.016120197198528835, "grad_norm": 0.0, - "learning_rate": 7.780925401322003e-06, - "loss": 1.1799, + "learning_rate": 1.0743155149934812e-05, + "loss": 1.385, "step": 412 }, { - "epoch": 0.011703363654396554, + "epoch": 0.016159323890758276, "grad_norm": 0.0, - "learning_rate": 7.799811142587347e-06, - "loss": 1.1546, + "learning_rate": 1.076923076923077e-05, + "loss": 1.3809, "step": 413 }, { - "epoch": 0.011731701096659015, + "epoch": 0.016198450582987713, "grad_norm": 0.0, - "learning_rate": 7.818696883852693e-06, - "loss": 1.0656, + "learning_rate": 1.0795306388526728e-05, + "loss": 1.3506, "step": 414 }, { - "epoch": 0.011760038538921477, + "epoch": 0.016237577275217154, "grad_norm": 0.0, - "learning_rate": 7.837582625118037e-06, - "loss": 1.2553, + "learning_rate": 1.0821382007822687e-05, + "loss": 1.3571, "step": 415 }, { - "epoch": 0.011788375981183938, + "epoch": 0.01627670396744659, "grad_norm": 0.0, - "learning_rate": 7.85646836638338e-06, - "loss": 1.194, + "learning_rate": 1.0847457627118645e-05, + "loss": 1.3202, "step": 416 }, { - "epoch": 0.011816713423446399, + "epoch": 0.016315830659676032, "grad_norm": 0.0, - "learning_rate": 7.875354107648726e-06, - "loss": 1.1502, + "learning_rate": 1.0873533246414602e-05, + "loss": 1.3489, "step": 417 }, { - "epoch": 0.011845050865708862, + "epoch": 0.01635495735190547, "grad_norm": 0.0, - "learning_rate": 7.89423984891407e-06, - "loss": 1.1394, + "learning_rate": 1.0899608865710562e-05, + "loss": 1.2774, "step": 418 }, { - "epoch": 0.011873388307971322, + "epoch": 0.016394084044134907, "grad_norm": 0.0, - "learning_rate": 7.913125590179416e-06, - "loss": 1.2711, + "learning_rate": 1.092568448500652e-05, + "loss": 1.1565, "step": 419 }, { - "epoch": 0.011901725750233783, + "epoch": 0.016433210736364348, "grad_norm": 0.0, - "learning_rate": 7.93201133144476e-06, - "loss": 1.3054, + "learning_rate": 1.0951760104302478e-05, + "loss": 1.3238, "step": 420 }, { - "epoch": 0.011930063192496246, + "epoch": 0.016472337428593786, "grad_norm": 0.0, - "learning_rate": 7.950897072710105e-06, - "loss": 1.2083, + "learning_rate": 1.0977835723598435e-05, + "loss": 1.4148, "step": 421 }, { - "epoch": 0.011958400634758707, + "epoch": 0.016511464120823226, "grad_norm": 0.0, - "learning_rate": 7.969782813975449e-06, - "loss": 1.223, + "learning_rate": 1.1003911342894395e-05, + "loss": 1.2267, "step": 422 }, { - "epoch": 0.011986738077021167, + "epoch": 0.016550590813052664, "grad_norm": 0.0, - "learning_rate": 7.988668555240794e-06, - "loss": 1.0549, + "learning_rate": 1.1029986962190354e-05, + "loss": 1.343, "step": 423 }, { - "epoch": 0.01201507551928363, + "epoch": 0.016589717505282105, "grad_norm": 0.0, - "learning_rate": 8.007554296506138e-06, - "loss": 1.2508, + "learning_rate": 1.1056062581486311e-05, + "loss": 1.3377, "step": 424 }, { - "epoch": 0.012043412961546091, + "epoch": 0.016628844197511542, "grad_norm": 0.0, - "learning_rate": 8.026440037771484e-06, - "loss": 1.0093, + "learning_rate": 1.108213820078227e-05, + "loss": 1.2331, "step": 425 }, { - "epoch": 0.012071750403808552, + "epoch": 0.016667970889740983, "grad_norm": 0.0, - "learning_rate": 8.045325779036828e-06, - "loss": 1.2766, + "learning_rate": 1.1108213820078228e-05, + "loss": 1.2964, "step": 426 }, { - "epoch": 0.012100087846071014, + "epoch": 0.01670709758197042, "grad_norm": 0.0, - "learning_rate": 8.064211520302171e-06, - "loss": 1.0249, + "learning_rate": 1.1134289439374187e-05, + "loss": 1.2442, "step": 427 }, { - "epoch": 0.012128425288333475, + "epoch": 0.016746224274199858, "grad_norm": 0.0, - "learning_rate": 8.083097261567517e-06, - "loss": 1.2846, + "learning_rate": 1.1160365058670144e-05, + "loss": 1.3762, "step": 428 }, { - "epoch": 0.012156762730595936, + "epoch": 0.0167853509664293, "grad_norm": 0.0, - "learning_rate": 8.101983002832861e-06, - "loss": 1.2424, + "learning_rate": 1.1186440677966102e-05, + "loss": 1.3043, "step": 429 }, { - "epoch": 0.012185100172858399, + "epoch": 0.016824477658658736, "grad_norm": 0.0, - "learning_rate": 8.120868744098206e-06, - "loss": 1.1846, + "learning_rate": 1.1212516297262061e-05, + "loss": 1.3174, "step": 430 }, { - "epoch": 0.01221343761512086, + "epoch": 0.016863604350888177, "grad_norm": 0.0, - "learning_rate": 8.139754485363552e-06, - "loss": 1.1193, + "learning_rate": 1.123859191655802e-05, + "loss": 1.2753, "step": 431 }, { - "epoch": 0.01224177505738332, + "epoch": 0.016902731043117614, "grad_norm": 0.0, - "learning_rate": 8.158640226628896e-06, - "loss": 1.1547, + "learning_rate": 1.1264667535853976e-05, + "loss": 1.3466, "step": 432 }, { - "epoch": 0.012270112499645783, + "epoch": 0.016941857735347055, "grad_norm": 0.0, - "learning_rate": 8.177525967894241e-06, - "loss": 1.0895, + "learning_rate": 1.1290743155149935e-05, + "loss": 1.3131, "step": 433 }, { - "epoch": 0.012298449941908244, + "epoch": 0.016980984427576493, "grad_norm": 0.0, - "learning_rate": 8.196411709159585e-06, - "loss": 1.0972, + "learning_rate": 1.1316818774445895e-05, + "loss": 1.3953, "step": 434 }, { - "epoch": 0.012326787384170704, + "epoch": 0.01702011111980593, "grad_norm": 0.0, - "learning_rate": 8.215297450424929e-06, - "loss": 1.1428, + "learning_rate": 1.1342894393741852e-05, + "loss": 1.2693, "step": 435 }, { - "epoch": 0.012355124826433167, + "epoch": 0.01705923781203537, "grad_norm": 0.0, - "learning_rate": 8.234183191690275e-06, - "loss": 1.1477, + "learning_rate": 1.136897001303781e-05, + "loss": 1.3479, "step": 436 }, { - "epoch": 0.012383462268695628, + "epoch": 0.01709836450426481, "grad_norm": 0.0, - "learning_rate": 8.253068932955619e-06, - "loss": 1.2009, + "learning_rate": 1.139504563233377e-05, + "loss": 1.2905, "step": 437 }, { - "epoch": 0.012411799710958089, + "epoch": 0.01713749119649425, "grad_norm": 0.0, - "learning_rate": 8.271954674220964e-06, - "loss": 1.2119, + "learning_rate": 1.1421121251629728e-05, + "loss": 1.3465, "step": 438 }, { - "epoch": 0.01244013715322055, + "epoch": 0.017176617888723687, "grad_norm": 0.0, - "learning_rate": 8.290840415486308e-06, - "loss": 1.21, + "learning_rate": 1.1447196870925685e-05, + "loss": 1.3466, "step": 439 }, { - "epoch": 0.012468474595483012, + "epoch": 0.017215744580953127, "grad_norm": 0.0, - "learning_rate": 8.309726156751653e-06, - "loss": 1.1614, + "learning_rate": 1.1473272490221644e-05, + "loss": 1.1919, "step": 440 }, { - "epoch": 0.012496812037745473, + "epoch": 0.017254871273182565, "grad_norm": 0.0, - "learning_rate": 8.328611898016999e-06, - "loss": 1.3113, + "learning_rate": 1.1499348109517602e-05, + "loss": 1.3463, "step": 441 }, { - "epoch": 0.012525149480007934, + "epoch": 0.017293997965412006, "grad_norm": 0.0, - "learning_rate": 8.347497639282343e-06, - "loss": 1.1668, + "learning_rate": 1.1525423728813561e-05, + "loss": 1.3576, "step": 442 }, { - "epoch": 0.012553486922270396, + "epoch": 0.017333124657641443, "grad_norm": 0.0, - "learning_rate": 8.366383380547687e-06, - "loss": 1.0338, + "learning_rate": 1.1551499348109518e-05, + "loss": 1.3021, "step": 443 }, { - "epoch": 0.012581824364532857, + "epoch": 0.01737225134987088, "grad_norm": 0.0, - "learning_rate": 8.385269121813032e-06, - "loss": 1.1761, + "learning_rate": 1.1577574967405477e-05, + "loss": 1.2849, "step": 444 }, { - "epoch": 0.012610161806795318, + "epoch": 0.01741137804210032, "grad_norm": 0.0, - "learning_rate": 8.404154863078376e-06, - "loss": 1.1943, + "learning_rate": 1.1603650586701435e-05, + "loss": 1.3307, "step": 445 }, { - "epoch": 0.01263849924905778, + "epoch": 0.01745050473432976, "grad_norm": 0.0, - "learning_rate": 8.42304060434372e-06, - "loss": 1.2905, + "learning_rate": 1.1629726205997394e-05, + "loss": 1.2638, "step": 446 }, { - "epoch": 0.012666836691320241, + "epoch": 0.0174896314265592, "grad_norm": 0.0, - "learning_rate": 8.441926345609066e-06, - "loss": 1.1436, + "learning_rate": 1.165580182529335e-05, + "loss": 1.3296, "step": 447 }, { - "epoch": 0.012695174133582702, + "epoch": 0.017528758118788637, "grad_norm": 0.0, - "learning_rate": 8.460812086874411e-06, - "loss": 1.1173, + "learning_rate": 1.168187744458931e-05, + "loss": 1.2498, "step": 448 }, { - "epoch": 0.012723511575845165, + "epoch": 0.017567884811018078, "grad_norm": 0.0, - "learning_rate": 8.479697828139755e-06, - "loss": 1.2465, + "learning_rate": 1.170795306388527e-05, + "loss": 1.4353, "step": 449 }, { - "epoch": 0.012751849018107626, + "epoch": 0.017607011503247515, "grad_norm": 0.0, - "learning_rate": 8.4985835694051e-06, - "loss": 1.1374, + "learning_rate": 1.1734028683181227e-05, + "loss": 1.2798, "step": 450 }, { - "epoch": 0.012780186460370086, + "epoch": 0.017646138195476953, "grad_norm": 0.0, - "learning_rate": 8.517469310670444e-06, - "loss": 1.2466, + "learning_rate": 1.1760104302477184e-05, + "loss": 1.3431, "step": 451 }, { - "epoch": 0.012808523902632549, + "epoch": 0.017685264887706394, "grad_norm": 0.0, - "learning_rate": 8.53635505193579e-06, - "loss": 1.1504, + "learning_rate": 1.1786179921773142e-05, + "loss": 1.1878, "step": 452 }, { - "epoch": 0.01283686134489501, + "epoch": 0.01772439157993583, "grad_norm": 0.0, - "learning_rate": 8.555240793201134e-06, - "loss": 1.1081, + "learning_rate": 1.1812255541069103e-05, + "loss": 1.3752, "step": 453 }, { - "epoch": 0.01286519878715747, + "epoch": 0.017763518272165272, "grad_norm": 0.0, - "learning_rate": 8.574126534466478e-06, - "loss": 1.233, + "learning_rate": 1.183833116036506e-05, + "loss": 1.3649, "step": 454 }, { - "epoch": 0.012893536229419933, + "epoch": 0.01780264496439471, "grad_norm": 0.0, - "learning_rate": 8.593012275731823e-06, - "loss": 1.155, + "learning_rate": 1.1864406779661018e-05, + "loss": 1.2357, "step": 455 }, { - "epoch": 0.012921873671682394, + "epoch": 0.01784177165662415, "grad_norm": 0.0, - "learning_rate": 8.611898016997167e-06, - "loss": 1.2705, + "learning_rate": 1.1890482398956975e-05, + "loss": 1.295, "step": 456 }, { - "epoch": 0.012950211113944855, + "epoch": 0.017880898348853588, "grad_norm": 0.0, - "learning_rate": 8.630783758262513e-06, - "loss": 1.1406, + "learning_rate": 1.1916558018252935e-05, + "loss": 1.2719, "step": 457 }, { - "epoch": 0.012978548556207318, + "epoch": 0.01792002504108303, "grad_norm": 0.0, - "learning_rate": 8.649669499527858e-06, - "loss": 1.1962, + "learning_rate": 1.1942633637548892e-05, + "loss": 1.2902, "step": 458 }, { - "epoch": 0.013006885998469778, + "epoch": 0.017959151733312466, "grad_norm": 0.0, - "learning_rate": 8.668555240793202e-06, - "loss": 1.0623, + "learning_rate": 1.1968709256844851e-05, + "loss": 1.2144, "step": 459 }, { - "epoch": 0.01303522344073224, + "epoch": 0.017998278425541903, "grad_norm": 0.0, - "learning_rate": 8.687440982058548e-06, - "loss": 1.1702, + "learning_rate": 1.199478487614081e-05, + "loss": 1.2572, "step": 460 }, { - "epoch": 0.013063560882994702, + "epoch": 0.018037405117771344, "grad_norm": 0.0, - "learning_rate": 8.706326723323891e-06, - "loss": 1.1767, + "learning_rate": 1.2020860495436768e-05, + "loss": 1.2002, "step": 461 }, { - "epoch": 0.013091898325257163, + "epoch": 0.01807653181000078, "grad_norm": 0.0, - "learning_rate": 8.725212464589235e-06, - "loss": 1.101, + "learning_rate": 1.2046936114732725e-05, + "loss": 1.3511, "step": 462 }, { - "epoch": 0.013120235767519623, + "epoch": 0.018115658502230222, "grad_norm": 0.0, - "learning_rate": 8.744098205854581e-06, - "loss": 1.2732, + "learning_rate": 1.2073011734028684e-05, + "loss": 1.3073, "step": 463 }, { - "epoch": 0.013148573209782084, + "epoch": 0.01815478519445966, "grad_norm": 0.0, - "learning_rate": 8.762983947119925e-06, - "loss": 1.1124, + "learning_rate": 1.2099087353324644e-05, + "loss": 1.254, "step": 464 }, { - "epoch": 0.013176910652044547, + "epoch": 0.0181939118866891, "grad_norm": 0.0, - "learning_rate": 8.78186968838527e-06, - "loss": 1.1329, + "learning_rate": 1.2125162972620601e-05, + "loss": 1.2787, "step": 465 }, { - "epoch": 0.013205248094307008, + "epoch": 0.018233038578918538, "grad_norm": 0.0, - "learning_rate": 8.800755429650614e-06, - "loss": 1.1687, + "learning_rate": 1.2151238591916558e-05, + "loss": 1.2566, "step": 466 }, { - "epoch": 0.013233585536569469, + "epoch": 0.018272165271147976, "grad_norm": 0.0, - "learning_rate": 8.81964117091596e-06, - "loss": 1.2106, + "learning_rate": 1.2177314211212517e-05, + "loss": 1.3323, "step": 467 }, { - "epoch": 0.013261922978831931, + "epoch": 0.018311291963377416, "grad_norm": 0.0, - "learning_rate": 8.838526912181304e-06, - "loss": 1.0714, + "learning_rate": 1.2203389830508477e-05, + "loss": 1.0441, "step": 468 }, { - "epoch": 0.013290260421094392, + "epoch": 0.018350418655606854, "grad_norm": 0.0, - "learning_rate": 8.857412653446649e-06, - "loss": 1.217, + "learning_rate": 1.2229465449804434e-05, + "loss": 1.2524, "step": 469 }, { - "epoch": 0.013318597863356853, + "epoch": 0.018389545347836295, "grad_norm": 0.0, - "learning_rate": 8.876298394711993e-06, - "loss": 1.1148, + "learning_rate": 1.2255541069100392e-05, + "loss": 1.3871, "step": 470 }, { - "epoch": 0.013346935305619315, + "epoch": 0.018428672040065732, "grad_norm": 0.0, - "learning_rate": 8.895184135977339e-06, - "loss": 1.1125, + "learning_rate": 1.228161668839635e-05, + "loss": 1.3555, "step": 471 }, { - "epoch": 0.013375272747881776, + "epoch": 0.018467798732295173, "grad_norm": 0.0, - "learning_rate": 8.914069877242682e-06, - "loss": 1.2425, + "learning_rate": 1.230769230769231e-05, + "loss": 1.401, "step": 472 }, { - "epoch": 0.013403610190144237, + "epoch": 0.01850692542452461, "grad_norm": 0.0, - "learning_rate": 8.932955618508026e-06, - "loss": 1.1992, + "learning_rate": 1.2333767926988267e-05, + "loss": 1.3975, "step": 473 }, { - "epoch": 0.0134319476324067, + "epoch": 0.01854605211675405, "grad_norm": 0.0, - "learning_rate": 8.951841359773372e-06, - "loss": 1.2588, + "learning_rate": 1.2359843546284225e-05, + "loss": 1.3734, "step": 474 }, { - "epoch": 0.01346028507466916, + "epoch": 0.01858517880898349, "grad_norm": 0.0, - "learning_rate": 8.970727101038716e-06, - "loss": 1.1115, + "learning_rate": 1.2385919165580182e-05, + "loss": 1.3063, "step": 475 }, { - "epoch": 0.013488622516931621, + "epoch": 0.018624305501212926, "grad_norm": 0.0, - "learning_rate": 8.989612842304061e-06, - "loss": 1.0824, + "learning_rate": 1.2411994784876143e-05, + "loss": 1.2352, "step": 476 }, { - "epoch": 0.013516959959194084, + "epoch": 0.018663432193442367, "grad_norm": 0.0, - "learning_rate": 9.008498583569407e-06, - "loss": 1.2454, + "learning_rate": 1.24380704041721e-05, + "loss": 1.3312, "step": 477 }, { - "epoch": 0.013545297401456545, + "epoch": 0.018702558885671804, "grad_norm": 0.0, - "learning_rate": 9.02738432483475e-06, - "loss": 1.1915, + "learning_rate": 1.2464146023468058e-05, + "loss": 1.2395, "step": 478 }, { - "epoch": 0.013573634843719005, + "epoch": 0.018741685577901245, "grad_norm": 0.0, - "learning_rate": 9.046270066100094e-06, - "loss": 1.2052, + "learning_rate": 1.2490221642764018e-05, + "loss": 1.3203, "step": 479 }, { - "epoch": 0.013601972285981468, + "epoch": 0.018780812270130683, "grad_norm": 0.0, - "learning_rate": 9.06515580736544e-06, - "loss": 1.1483, + "learning_rate": 1.2516297262059975e-05, + "loss": 1.2645, "step": 480 }, { - "epoch": 0.013630309728243929, + "epoch": 0.018819938962360123, "grad_norm": 0.0, - "learning_rate": 9.084041548630784e-06, - "loss": 1.2174, + "learning_rate": 1.2542372881355932e-05, + "loss": 1.2374, "step": 481 }, { - "epoch": 0.01365864717050639, + "epoch": 0.01885906565458956, "grad_norm": 0.0, - "learning_rate": 9.10292728989613e-06, - "loss": 1.1077, + "learning_rate": 1.2568448500651891e-05, + "loss": 1.3541, "step": 482 }, { - "epoch": 0.013686984612768852, + "epoch": 0.018898192346819, "grad_norm": 0.0, - "learning_rate": 9.121813031161473e-06, - "loss": 1.2228, + "learning_rate": 1.2594524119947851e-05, + "loss": 1.2851, "step": 483 }, { - "epoch": 0.013715322055031313, + "epoch": 0.01893731903904844, "grad_norm": 0.0, - "learning_rate": 9.140698772426819e-06, - "loss": 1.1239, + "learning_rate": 1.2620599739243808e-05, + "loss": 1.3577, "step": 484 }, { - "epoch": 0.013743659497293774, + "epoch": 0.018976445731277877, "grad_norm": 0.0, - "learning_rate": 9.159584513692163e-06, - "loss": 1.2779, + "learning_rate": 1.2646675358539767e-05, + "loss": 1.2777, "step": 485 }, { - "epoch": 0.013771996939556235, + "epoch": 0.019015572423507317, "grad_norm": 0.0, - "learning_rate": 9.178470254957508e-06, - "loss": 1.1861, + "learning_rate": 1.2672750977835724e-05, + "loss": 1.3284, "step": 486 }, { - "epoch": 0.013800334381818697, + "epoch": 0.019054699115736755, "grad_norm": 0.0, - "learning_rate": 9.197355996222852e-06, - "loss": 1.1501, + "learning_rate": 1.2698826597131684e-05, + "loss": 1.2481, "step": 487 }, { - "epoch": 0.013828671824081158, + "epoch": 0.019093825807966196, "grad_norm": 0.0, - "learning_rate": 9.216241737488198e-06, - "loss": 1.1544, + "learning_rate": 1.2724902216427641e-05, + "loss": 1.2972, "step": 488 }, { - "epoch": 0.013857009266343619, + "epoch": 0.019132952500195633, "grad_norm": 0.0, - "learning_rate": 9.235127478753542e-06, - "loss": 1.2762, + "learning_rate": 1.27509778357236e-05, + "loss": 1.2696, "step": 489 }, { - "epoch": 0.013885346708606082, + "epoch": 0.019172079192425074, "grad_norm": 0.0, - "learning_rate": 9.254013220018887e-06, - "loss": 1.1516, + "learning_rate": 1.2777053455019557e-05, + "loss": 1.3395, "step": 490 }, { - "epoch": 0.013913684150868542, + "epoch": 0.01921120588465451, "grad_norm": 0.0, - "learning_rate": 9.272898961284231e-06, - "loss": 1.178, + "learning_rate": 1.2803129074315517e-05, + "loss": 1.3043, "step": 491 }, { - "epoch": 0.013942021593131003, + "epoch": 0.01925033257688395, "grad_norm": 0.0, - "learning_rate": 9.291784702549575e-06, - "loss": 1.3495, + "learning_rate": 1.2829204693611474e-05, + "loss": 1.3562, "step": 492 }, { - "epoch": 0.013970359035393466, + "epoch": 0.01928945926911339, "grad_norm": 0.0, - "learning_rate": 9.31067044381492e-06, - "loss": 1.1719, + "learning_rate": 1.2855280312907432e-05, + "loss": 1.2086, "step": 493 }, { - "epoch": 0.013998696477655927, + "epoch": 0.019328585961342827, "grad_norm": 0.0, - "learning_rate": 9.329556185080266e-06, - "loss": 1.2208, + "learning_rate": 1.288135593220339e-05, + "loss": 1.3158, "step": 494 }, { - "epoch": 0.014027033919918388, + "epoch": 0.019367712653572268, "grad_norm": 0.0, - "learning_rate": 9.34844192634561e-06, - "loss": 1.2852, + "learning_rate": 1.290743155149935e-05, + "loss": 1.1956, "step": 495 }, { - "epoch": 0.01405537136218085, + "epoch": 0.019406839345801705, "grad_norm": 0.0, - "learning_rate": 9.367327667610955e-06, - "loss": 1.0266, + "learning_rate": 1.2933507170795307e-05, + "loss": 1.2387, "step": 496 }, { - "epoch": 0.014083708804443311, + "epoch": 0.019445966038031146, "grad_norm": 0.0, - "learning_rate": 9.3862134088763e-06, - "loss": 1.1207, + "learning_rate": 1.2959582790091265e-05, + "loss": 1.2819, "step": 497 }, { - "epoch": 0.014112046246705772, + "epoch": 0.019485092730260584, "grad_norm": 0.0, - "learning_rate": 9.405099150141643e-06, - "loss": 1.1588, + "learning_rate": 1.2985658409387226e-05, + "loss": 1.358, "step": 498 }, { - "epoch": 0.014140383688968234, + "epoch": 0.01952421942249002, "grad_norm": 0.0, - "learning_rate": 9.423984891406989e-06, - "loss": 1.1815, + "learning_rate": 1.3011734028683183e-05, + "loss": 1.2592, "step": 499 }, { - "epoch": 0.014168721131230695, + "epoch": 0.019563346114719462, "grad_norm": 0.0, - "learning_rate": 9.442870632672332e-06, - "loss": 1.2175, + "learning_rate": 1.3037809647979141e-05, + "loss": 1.2681, "step": 500 }, { - "epoch": 0.014197058573493156, + "epoch": 0.0196024728069489, "grad_norm": 0.0, - "learning_rate": 9.461756373937678e-06, - "loss": 1.091, + "learning_rate": 1.3063885267275098e-05, + "loss": 1.2669, "step": 501 }, { - "epoch": 0.014225396015755619, + "epoch": 0.01964159949917834, "grad_norm": 0.0, - "learning_rate": 9.480642115203022e-06, - "loss": 1.1046, + "learning_rate": 1.3089960886571058e-05, + "loss": 1.397, "step": 502 }, { - "epoch": 0.01425373345801808, + "epoch": 0.019680726191407778, "grad_norm": 0.0, - "learning_rate": 9.499527856468367e-06, - "loss": 1.1068, + "learning_rate": 1.3116036505867015e-05, + "loss": 1.1334, "step": 503 }, { - "epoch": 0.01428207090028054, + "epoch": 0.01971985288363722, "grad_norm": 0.0, - "learning_rate": 9.518413597733713e-06, - "loss": 1.1844, + "learning_rate": 1.3142112125162974e-05, + "loss": 1.3112, "step": 504 }, { - "epoch": 0.014310408342543003, + "epoch": 0.019758979575866656, "grad_norm": 0.0, - "learning_rate": 9.537299338999057e-06, - "loss": 1.2375, + "learning_rate": 1.3168187744458931e-05, + "loss": 1.4437, "step": 505 }, { - "epoch": 0.014338745784805464, + "epoch": 0.019798106268096097, "grad_norm": 0.0, - "learning_rate": 9.5561850802644e-06, - "loss": 1.2876, + "learning_rate": 1.3194263363754891e-05, + "loss": 1.3493, "step": 506 }, { - "epoch": 0.014367083227067924, + "epoch": 0.019837232960325534, "grad_norm": 0.0, - "learning_rate": 9.575070821529746e-06, - "loss": 1.1637, + "learning_rate": 1.3220338983050848e-05, + "loss": 1.394, "step": 507 }, { - "epoch": 0.014395420669330387, + "epoch": 0.01987635965255497, "grad_norm": 0.0, - "learning_rate": 9.59395656279509e-06, - "loss": 1.1768, + "learning_rate": 1.3246414602346807e-05, + "loss": 1.316, "step": 508 }, { - "epoch": 0.014423758111592848, + "epoch": 0.019915486344784412, "grad_norm": 0.0, - "learning_rate": 9.612842304060434e-06, - "loss": 1.1523, + "learning_rate": 1.3272490221642764e-05, + "loss": 1.4286, "step": 509 }, { - "epoch": 0.014452095553855309, + "epoch": 0.01995461303701385, "grad_norm": 0.0, - "learning_rate": 9.63172804532578e-06, - "loss": 1.161, + "learning_rate": 1.3298565840938724e-05, + "loss": 1.3679, "step": 510 }, { - "epoch": 0.01448043299611777, + "epoch": 0.01999373972924329, "grad_norm": 0.0, - "learning_rate": 9.650613786591125e-06, - "loss": 1.0583, + "learning_rate": 1.3324641460234683e-05, + "loss": 1.3904, "step": 511 }, { - "epoch": 0.014508770438380232, + "epoch": 0.020032866421472728, "grad_norm": 0.0, - "learning_rate": 9.669499527856469e-06, - "loss": 1.1178, + "learning_rate": 1.335071707953064e-05, + "loss": 1.4126, "step": 512 }, { - "epoch": 0.014537107880642693, + "epoch": 0.02007199311370217, "grad_norm": 0.0, - "learning_rate": 9.688385269121814e-06, - "loss": 1.1462, + "learning_rate": 1.3376792698826597e-05, + "loss": 1.2758, "step": 513 }, { - "epoch": 0.014565445322905154, + "epoch": 0.020111119805931606, "grad_norm": 0.0, - "learning_rate": 9.707271010387158e-06, - "loss": 1.0579, + "learning_rate": 1.3402868318122557e-05, + "loss": 1.3433, "step": 514 }, { - "epoch": 0.014593782765167616, + "epoch": 0.020150246498161044, "grad_norm": 0.0, - "learning_rate": 9.726156751652504e-06, - "loss": 1.2625, + "learning_rate": 1.3428943937418515e-05, + "loss": 1.2764, "step": 515 }, { - "epoch": 0.014622120207430077, + "epoch": 0.020189373190390485, "grad_norm": 0.0, - "learning_rate": 9.745042492917848e-06, - "loss": 1.2162, + "learning_rate": 1.3455019556714472e-05, + "loss": 1.4464, "step": 516 }, { - "epoch": 0.014650457649692538, + "epoch": 0.020228499882619922, "grad_norm": 0.0, - "learning_rate": 9.763928234183192e-06, - "loss": 1.1657, + "learning_rate": 1.3481095176010431e-05, + "loss": 1.3225, "step": 517 }, { - "epoch": 0.014678795091955, + "epoch": 0.020267626574849363, "grad_norm": 0.0, - "learning_rate": 9.782813975448537e-06, - "loss": 1.1826, + "learning_rate": 1.350717079530639e-05, + "loss": 1.2516, "step": 518 }, { - "epoch": 0.014707132534217461, + "epoch": 0.0203067532670788, "grad_norm": 0.0, - "learning_rate": 9.801699716713881e-06, - "loss": 1.2114, + "learning_rate": 1.3533246414602348e-05, + "loss": 1.2319, "step": 519 }, { - "epoch": 0.014735469976479922, + "epoch": 0.02034587995930824, "grad_norm": 0.0, - "learning_rate": 9.820585457979227e-06, - "loss": 1.267, + "learning_rate": 1.3559322033898305e-05, + "loss": 1.3469, "step": 520 }, { - "epoch": 0.014763807418742385, + "epoch": 0.02038500665153768, "grad_norm": 0.0, - "learning_rate": 9.839471199244572e-06, - "loss": 1.1984, + "learning_rate": 1.3585397653194266e-05, + "loss": 1.3397, "step": 521 }, { - "epoch": 0.014792144861004846, + "epoch": 0.02042413334376712, "grad_norm": 0.0, - "learning_rate": 9.858356940509916e-06, - "loss": 1.1805, + "learning_rate": 1.3611473272490223e-05, + "loss": 1.2921, "step": 522 }, { - "epoch": 0.014820482303267307, + "epoch": 0.020463260035996557, "grad_norm": 0.0, - "learning_rate": 9.877242681775262e-06, - "loss": 1.2211, + "learning_rate": 1.3637548891786181e-05, + "loss": 1.2772, "step": 523 }, { - "epoch": 0.014848819745529769, + "epoch": 0.020502386728225994, "grad_norm": 0.0, - "learning_rate": 9.896128423040605e-06, - "loss": 1.1942, + "learning_rate": 1.3663624511082138e-05, + "loss": 1.2426, "step": 524 }, { - "epoch": 0.01487715718779223, + "epoch": 0.020541513420455435, "grad_norm": 0.0, - "learning_rate": 9.91501416430595e-06, - "loss": 1.2608, + "learning_rate": 1.3689700130378098e-05, + "loss": 1.255, "step": 525 }, { - "epoch": 0.01490549463005469, + "epoch": 0.020580640112684873, "grad_norm": 0.0, - "learning_rate": 9.933899905571295e-06, - "loss": 1.1016, + "learning_rate": 1.3715775749674057e-05, + "loss": 1.2694, "step": 526 }, { - "epoch": 0.014933832072317153, + "epoch": 0.020619766804914313, "grad_norm": 0.0, - "learning_rate": 9.952785646836639e-06, - "loss": 1.134, + "learning_rate": 1.3741851368970014e-05, + "loss": 1.2217, "step": 527 }, { - "epoch": 0.014962169514579614, + "epoch": 0.02065889349714375, "grad_norm": 0.0, - "learning_rate": 9.971671388101982e-06, - "loss": 1.2915, + "learning_rate": 1.3767926988265971e-05, + "loss": 1.3327, "step": 528 }, { - "epoch": 0.014990506956842075, + "epoch": 0.020698020189373192, "grad_norm": 0.0, - "learning_rate": 9.990557129367328e-06, - "loss": 1.0709, + "learning_rate": 1.3794002607561931e-05, + "loss": 1.283, "step": 529 }, { - "epoch": 0.015018844399104538, + "epoch": 0.02073714688160263, "grad_norm": 0.0, - "learning_rate": 1.0009442870632674e-05, - "loss": 1.118, + "learning_rate": 1.382007822685789e-05, + "loss": 1.2919, "step": 530 }, { - "epoch": 0.015047181841366998, + "epoch": 0.020776273573832067, "grad_norm": 0.0, - "learning_rate": 1.0028328611898017e-05, - "loss": 1.0549, + "learning_rate": 1.3846153846153847e-05, + "loss": 1.2773, "step": 531 }, { - "epoch": 0.01507551928362946, + "epoch": 0.020815400266061507, "grad_norm": 0.0, - "learning_rate": 1.0047214353163361e-05, - "loss": 1.307, + "learning_rate": 1.3872229465449805e-05, + "loss": 1.3373, "step": 532 }, { - "epoch": 0.015103856725891922, + "epoch": 0.020854526958290945, "grad_norm": 0.0, - "learning_rate": 1.0066100094428709e-05, - "loss": 1.1893, + "learning_rate": 1.3898305084745764e-05, + "loss": 1.3696, "step": 533 }, { - "epoch": 0.015132194168154383, + "epoch": 0.020893653650520386, "grad_norm": 0.0, - "learning_rate": 1.0084985835694052e-05, - "loss": 1.1422, + "learning_rate": 1.3924380704041723e-05, + "loss": 1.267, "step": 534 }, { - "epoch": 0.015160531610416843, + "epoch": 0.020932780342749823, "grad_norm": 0.0, - "learning_rate": 1.0103871576959396e-05, - "loss": 1.2535, + "learning_rate": 1.395045632333768e-05, + "loss": 1.3962, "step": 535 }, { - "epoch": 0.015188869052679304, + "epoch": 0.020971907034979264, "grad_norm": 0.0, - "learning_rate": 1.012275731822474e-05, - "loss": 1.1636, + "learning_rate": 1.3976531942633638e-05, + "loss": 1.2551, "step": 536 }, { - "epoch": 0.015217206494941767, + "epoch": 0.0210110337272087, "grad_norm": 0.0, - "learning_rate": 1.0141643059490086e-05, - "loss": 1.1426, + "learning_rate": 1.4002607561929597e-05, + "loss": 1.4036, "step": 537 }, { - "epoch": 0.015245543937204228, + "epoch": 0.021050160419438142, "grad_norm": 0.0, - "learning_rate": 1.0160528800755431e-05, - "loss": 1.1871, + "learning_rate": 1.4028683181225555e-05, + "loss": 1.4045, "step": 538 }, { - "epoch": 0.015273881379466689, + "epoch": 0.02108928711166758, "grad_norm": 0.0, - "learning_rate": 1.0179414542020775e-05, - "loss": 1.1648, + "learning_rate": 1.4054758800521512e-05, + "loss": 1.2567, "step": 539 }, { - "epoch": 0.015302218821729151, + "epoch": 0.021128413803897017, "grad_norm": 0.0, - "learning_rate": 1.019830028328612e-05, - "loss": 1.1185, + "learning_rate": 1.4080834419817473e-05, + "loss": 1.3707, "step": 540 }, { - "epoch": 0.015330556263991612, + "epoch": 0.021167540496126458, "grad_norm": 0.0, - "learning_rate": 1.0217186024551465e-05, - "loss": 1.0786, + "learning_rate": 1.4106910039113431e-05, + "loss": 1.2898, "step": 541 }, { - "epoch": 0.015358893706254073, + "epoch": 0.021206667188355895, "grad_norm": 0.0, - "learning_rate": 1.0236071765816808e-05, - "loss": 1.1891, + "learning_rate": 1.4132985658409388e-05, + "loss": 1.3692, "step": 542 }, { - "epoch": 0.015387231148516535, + "epoch": 0.021245793880585336, "grad_norm": 0.0, - "learning_rate": 1.0254957507082152e-05, - "loss": 1.1305, + "learning_rate": 1.4159061277705345e-05, + "loss": 1.3638, "step": 543 }, { - "epoch": 0.015415568590778996, + "epoch": 0.021284920572814774, "grad_norm": 0.0, - "learning_rate": 1.02738432483475e-05, - "loss": 1.1986, + "learning_rate": 1.4185136897001306e-05, + "loss": 1.2377, "step": 544 }, { - "epoch": 0.015443906033041457, + "epoch": 0.021324047265044214, "grad_norm": 0.0, - "learning_rate": 1.0292728989612843e-05, - "loss": 1.2179, + "learning_rate": 1.4211212516297264e-05, + "loss": 1.3616, "step": 545 }, { - "epoch": 0.01547224347530392, + "epoch": 0.021363173957273652, "grad_norm": 0.0, - "learning_rate": 1.0311614730878187e-05, - "loss": 1.1525, + "learning_rate": 1.4237288135593221e-05, + "loss": 1.4467, "step": 546 }, { - "epoch": 0.01550058091756638, + "epoch": 0.02140230064950309, "grad_norm": 0.0, - "learning_rate": 1.0330500472143533e-05, - "loss": 1.1863, + "learning_rate": 1.426336375488918e-05, + "loss": 1.3633, "step": 547 }, { - "epoch": 0.015528918359828841, + "epoch": 0.02144142734173253, "grad_norm": 0.0, - "learning_rate": 1.0349386213408877e-05, - "loss": 1.1245, + "learning_rate": 1.4289439374185138e-05, + "loss": 1.2937, "step": 548 }, { - "epoch": 0.015557255802091304, + "epoch": 0.021480554033961968, "grad_norm": 0.0, - "learning_rate": 1.0368271954674222e-05, - "loss": 1.1768, + "learning_rate": 1.4315514993481097e-05, + "loss": 1.3168, "step": 549 }, { - "epoch": 0.015585593244353765, + "epoch": 0.02151968072619141, "grad_norm": 0.0, - "learning_rate": 1.0387157695939568e-05, - "loss": 1.1517, + "learning_rate": 1.4341590612777054e-05, + "loss": 1.2364, "step": 550 }, { - "epoch": 0.015613930686616226, + "epoch": 0.021558807418420846, "grad_norm": 0.0, - "learning_rate": 1.0406043437204912e-05, - "loss": 1.2001, + "learning_rate": 1.4367666232073013e-05, + "loss": 1.2286, "step": 551 }, { - "epoch": 0.015642268128878686, + "epoch": 0.021597934110650287, "grad_norm": 0.0, - "learning_rate": 1.0424929178470255e-05, - "loss": 1.0851, + "learning_rate": 1.4393741851368971e-05, + "loss": 1.3817, "step": 552 }, { - "epoch": 0.01567060557114115, + "epoch": 0.021637060802879724, "grad_norm": 0.0, - "learning_rate": 1.04438149197356e-05, - "loss": 1.0633, + "learning_rate": 1.441981747066493e-05, + "loss": 1.3618, "step": 553 }, { - "epoch": 0.01569894301340361, + "epoch": 0.021676187495109165, "grad_norm": 0.0, - "learning_rate": 1.0462700661000945e-05, - "loss": 1.1875, + "learning_rate": 1.4445893089960887e-05, + "loss": 1.1876, "step": 554 }, { - "epoch": 0.01572728045566607, + "epoch": 0.021715314187338602, "grad_norm": 0.0, - "learning_rate": 1.048158640226629e-05, - "loss": 1.2048, + "learning_rate": 1.4471968709256845e-05, + "loss": 1.3948, "step": 555 }, { - "epoch": 0.015755617897928533, + "epoch": 0.02175444087956804, "grad_norm": 0.0, - "learning_rate": 1.0500472143531634e-05, - "loss": 1.0852, + "learning_rate": 1.4498044328552806e-05, + "loss": 1.1573, "step": 556 }, { - "epoch": 0.015783955340190996, + "epoch": 0.02179356757179748, "grad_norm": 0.0, - "learning_rate": 1.051935788479698e-05, - "loss": 1.1334, + "learning_rate": 1.4524119947848763e-05, + "loss": 1.1481, "step": 557 }, { - "epoch": 0.015812292782453455, + "epoch": 0.021832694264026918, "grad_norm": 0.0, - "learning_rate": 1.0538243626062324e-05, - "loss": 1.198, + "learning_rate": 1.455019556714472e-05, + "loss": 1.2844, "step": 558 }, { - "epoch": 0.015840630224715917, + "epoch": 0.02187182095625636, "grad_norm": 0.0, - "learning_rate": 1.0557129367327668e-05, - "loss": 1.2544, + "learning_rate": 1.4576271186440678e-05, + "loss": 1.3307, "step": 559 }, { - "epoch": 0.01586896766697838, + "epoch": 0.021910947648485796, "grad_norm": 0.0, - "learning_rate": 1.0576015108593015e-05, - "loss": 1.1927, + "learning_rate": 1.4602346805736639e-05, + "loss": 1.392, "step": 560 }, { - "epoch": 0.01589730510924084, + "epoch": 0.021950074340715237, "grad_norm": 0.0, - "learning_rate": 1.0594900849858359e-05, - "loss": 1.1464, + "learning_rate": 1.4628422425032595e-05, + "loss": 1.2487, "step": 561 }, { - "epoch": 0.0159256425515033, + "epoch": 0.021989201032944675, "grad_norm": 0.0, - "learning_rate": 1.0613786591123702e-05, - "loss": 1.113, + "learning_rate": 1.4654498044328554e-05, + "loss": 1.3196, "step": 562 }, { - "epoch": 0.015953979993765764, + "epoch": 0.022028327725174112, "grad_norm": 0.0, - "learning_rate": 1.0632672332389046e-05, - "loss": 1.085, + "learning_rate": 1.4680573663624513e-05, + "loss": 1.1874, "step": 563 }, { - "epoch": 0.015982317436028223, + "epoch": 0.022067454417403553, "grad_norm": 0.0, - "learning_rate": 1.0651558073654392e-05, - "loss": 1.2026, + "learning_rate": 1.4706649282920471e-05, + "loss": 1.2336, "step": 564 }, { - "epoch": 0.016010654878290686, + "epoch": 0.02210658110963299, "grad_norm": 0.0, - "learning_rate": 1.0670443814919737e-05, - "loss": 1.1462, + "learning_rate": 1.4732724902216428e-05, + "loss": 1.2797, "step": 565 }, { - "epoch": 0.01603899232055315, + "epoch": 0.02214570780186243, "grad_norm": 0.0, - "learning_rate": 1.0689329556185081e-05, - "loss": 1.1565, + "learning_rate": 1.4758800521512387e-05, + "loss": 1.2125, "step": 566 }, { - "epoch": 0.016067329762815608, + "epoch": 0.02218483449409187, "grad_norm": 0.0, - "learning_rate": 1.0708215297450427e-05, - "loss": 1.1434, + "learning_rate": 1.4784876140808346e-05, + "loss": 1.3219, "step": 567 }, { - "epoch": 0.01609566720507807, + "epoch": 0.02222396118632131, "grad_norm": 0.0, - "learning_rate": 1.072710103871577e-05, - "loss": 1.1495, + "learning_rate": 1.4810951760104304e-05, + "loss": 1.2401, "step": 568 }, { - "epoch": 0.016124004647340533, + "epoch": 0.022263087878550747, "grad_norm": 0.0, - "learning_rate": 1.0745986779981115e-05, - "loss": 1.2269, + "learning_rate": 1.4837027379400261e-05, + "loss": 1.3568, "step": 569 }, { - "epoch": 0.016152342089602992, + "epoch": 0.022302214570780188, "grad_norm": 0.0, - "learning_rate": 1.0764872521246458e-05, - "loss": 1.2084, + "learning_rate": 1.486310299869622e-05, + "loss": 1.3213, "step": 570 }, { - "epoch": 0.016180679531865454, + "epoch": 0.022341341263009625, "grad_norm": 0.0, - "learning_rate": 1.0783758262511806e-05, - "loss": 1.2275, + "learning_rate": 1.488917861799218e-05, + "loss": 1.1525, "step": 571 }, { - "epoch": 0.016209016974127917, + "epoch": 0.022380467955239063, "grad_norm": 0.0, - "learning_rate": 1.080264400377715e-05, - "loss": 1.2118, + "learning_rate": 1.4915254237288137e-05, + "loss": 1.3313, "step": 572 }, { - "epoch": 0.016237354416390376, + "epoch": 0.022419594647468503, "grad_norm": 0.0, - "learning_rate": 1.0821529745042493e-05, - "loss": 1.3405, + "learning_rate": 1.4941329856584096e-05, + "loss": 1.3093, "step": 573 }, { - "epoch": 0.01626569185865284, + "epoch": 0.02245872133969794, "grad_norm": 0.0, - "learning_rate": 1.0840415486307839e-05, - "loss": 1.1417, + "learning_rate": 1.4967405475880053e-05, + "loss": 1.2823, "step": 574 }, { - "epoch": 0.016294029300915298, + "epoch": 0.02249784803192738, "grad_norm": 0.0, - "learning_rate": 1.0859301227573183e-05, - "loss": 1.1895, + "learning_rate": 1.4993481095176013e-05, + "loss": 1.3871, "step": 575 }, { - "epoch": 0.01632236674317776, + "epoch": 0.02253697472415682, "grad_norm": 0.0, - "learning_rate": 1.0878186968838528e-05, - "loss": 1.1922, + "learning_rate": 1.501955671447197e-05, + "loss": 1.316, "step": 576 }, { - "epoch": 0.016350704185440223, + "epoch": 0.02257610141638626, "grad_norm": 0.0, - "learning_rate": 1.0897072710103874e-05, - "loss": 1.1641, + "learning_rate": 1.5045632333767928e-05, + "loss": 1.2987, "step": 577 }, { - "epoch": 0.016379041627702682, + "epoch": 0.022615228108615697, "grad_norm": 0.0, - "learning_rate": 1.0915958451369218e-05, - "loss": 1.0761, + "learning_rate": 1.5071707953063885e-05, + "loss": 1.2885, "step": 578 }, { - "epoch": 0.016407379069965145, + "epoch": 0.02265435480084514, "grad_norm": 0.0, - "learning_rate": 1.0934844192634562e-05, - "loss": 1.2097, + "learning_rate": 1.5097783572359846e-05, + "loss": 1.3373, "step": 579 }, { - "epoch": 0.016435716512227607, + "epoch": 0.022693481493074576, "grad_norm": 0.0, - "learning_rate": 1.0953729933899905e-05, - "loss": 1.1974, + "learning_rate": 1.5123859191655803e-05, + "loss": 1.3535, "step": 580 }, { - "epoch": 0.016464053954490066, + "epoch": 0.022732608185304013, "grad_norm": 0.0, - "learning_rate": 1.097261567516525e-05, - "loss": 1.1616, + "learning_rate": 1.5149934810951761e-05, + "loss": 1.2526, "step": 581 }, { - "epoch": 0.01649239139675253, + "epoch": 0.022771734877533454, "grad_norm": 0.0, - "learning_rate": 1.0991501416430597e-05, - "loss": 1.1666, + "learning_rate": 1.517601043024772e-05, + "loss": 1.3046, "step": 582 }, { - "epoch": 0.01652072883901499, + "epoch": 0.02281086156976289, "grad_norm": 0.0, - "learning_rate": 1.101038715769594e-05, - "loss": 1.1404, + "learning_rate": 1.5202086049543678e-05, + "loss": 1.2877, "step": 583 }, { - "epoch": 0.01654906628127745, + "epoch": 0.022849988261992332, "grad_norm": 0.0, - "learning_rate": 1.1029272898961286e-05, - "loss": 1.1824, + "learning_rate": 1.5228161668839635e-05, + "loss": 1.2001, "step": 584 }, { - "epoch": 0.016577403723539913, + "epoch": 0.02288911495422177, "grad_norm": 0.0, - "learning_rate": 1.104815864022663e-05, - "loss": 1.1137, + "learning_rate": 1.5254237288135594e-05, + "loss": 1.3007, "step": 585 }, { - "epoch": 0.016605741165802376, + "epoch": 0.02292824164645121, "grad_norm": 0.0, - "learning_rate": 1.1067044381491974e-05, - "loss": 1.2334, + "learning_rate": 1.5280312907431553e-05, + "loss": 1.2885, "step": 586 }, { - "epoch": 0.016634078608064835, + "epoch": 0.022967368338680648, "grad_norm": 0.0, - "learning_rate": 1.1085930122757321e-05, - "loss": 1.1588, + "learning_rate": 1.5306388526727513e-05, + "loss": 1.2919, "step": 587 }, { - "epoch": 0.016662416050327297, + "epoch": 0.023006495030910085, "grad_norm": 0.0, - "learning_rate": 1.1104815864022665e-05, - "loss": 1.1977, + "learning_rate": 1.533246414602347e-05, + "loss": 1.3212, "step": 588 }, { - "epoch": 0.01669075349258976, + "epoch": 0.023045621723139526, "grad_norm": 0.0, - "learning_rate": 1.1123701605288009e-05, - "loss": 1.129, + "learning_rate": 1.5358539765319427e-05, + "loss": 1.2966, "step": 589 }, { - "epoch": 0.01671909093485222, + "epoch": 0.023084748415368964, "grad_norm": 0.0, - "learning_rate": 1.1142587346553353e-05, - "loss": 1.0936, + "learning_rate": 1.5384615384615387e-05, + "loss": 1.3199, "step": 590 }, { - "epoch": 0.01674742837711468, + "epoch": 0.023123875107598404, "grad_norm": 0.0, - "learning_rate": 1.1161473087818696e-05, - "loss": 1.2652, + "learning_rate": 1.5410691003911344e-05, + "loss": 1.3271, "step": 591 }, { - "epoch": 0.016775765819377144, + "epoch": 0.023163001799827842, "grad_norm": 0.0, - "learning_rate": 1.1180358829084042e-05, - "loss": 1.1779, + "learning_rate": 1.54367666232073e-05, + "loss": 1.2537, "step": 592 }, { - "epoch": 0.016804103261639603, + "epoch": 0.023202128492057283, "grad_norm": 0.0, - "learning_rate": 1.1199244570349388e-05, - "loss": 1.2056, + "learning_rate": 1.546284224250326e-05, + "loss": 1.2363, "step": 593 }, { - "epoch": 0.016832440703902066, + "epoch": 0.02324125518428672, "grad_norm": 0.0, - "learning_rate": 1.1218130311614731e-05, - "loss": 1.1441, + "learning_rate": 1.5488917861799218e-05, + "loss": 1.3585, "step": 594 }, { - "epoch": 0.01686077814616453, + "epoch": 0.02328038187651616, "grad_norm": 0.0, - "learning_rate": 1.1237016052880077e-05, - "loss": 1.1894, + "learning_rate": 1.551499348109518e-05, + "loss": 1.356, "step": 595 }, { - "epoch": 0.016889115588426987, + "epoch": 0.0233195085687456, "grad_norm": 0.0, - "learning_rate": 1.125590179414542e-05, - "loss": 1.2842, + "learning_rate": 1.5541069100391136e-05, + "loss": 1.3203, "step": 596 }, { - "epoch": 0.01691745303068945, + "epoch": 0.023358635260975036, "grad_norm": 0.0, - "learning_rate": 1.1274787535410765e-05, - "loss": 1.1052, + "learning_rate": 1.5567144719687092e-05, + "loss": 1.3473, "step": 597 }, { - "epoch": 0.016945790472951913, + "epoch": 0.023397761953204477, "grad_norm": 0.0, - "learning_rate": 1.1293673276676112e-05, - "loss": 1.0623, + "learning_rate": 1.5593220338983053e-05, + "loss": 1.2452, "step": 598 }, { - "epoch": 0.01697412791521437, + "epoch": 0.023436888645433914, "grad_norm": 0.0, - "learning_rate": 1.1312559017941456e-05, - "loss": 1.1542, + "learning_rate": 1.561929595827901e-05, + "loss": 1.2676, "step": 599 }, { - "epoch": 0.017002465357476834, + "epoch": 0.023476015337663355, "grad_norm": 0.0, - "learning_rate": 1.13314447592068e-05, - "loss": 1.196, + "learning_rate": 1.5645371577574967e-05, + "loss": 1.2343, "step": 600 }, { - "epoch": 0.017030802799739297, + "epoch": 0.023515142029892792, "grad_norm": 0.0, - "learning_rate": 1.1350330500472143e-05, - "loss": 1.1997, + "learning_rate": 1.5671447196870927e-05, + "loss": 1.2606, "step": 601 }, { - "epoch": 0.017059140242001756, + "epoch": 0.023554268722122233, "grad_norm": 0.0, - "learning_rate": 1.1369216241737489e-05, - "loss": 1.1639, + "learning_rate": 1.5697522816166887e-05, + "loss": 1.3198, "step": 602 }, { - "epoch": 0.01708747768426422, + "epoch": 0.02359339541435167, "grad_norm": 0.0, - "learning_rate": 1.1388101983002833e-05, - "loss": 1.1685, + "learning_rate": 1.5723598435462844e-05, + "loss": 1.2146, "step": 603 }, { - "epoch": 0.01711581512652668, + "epoch": 0.023632522106581108, "grad_norm": 0.0, - "learning_rate": 1.1406987724268178e-05, - "loss": 1.1928, + "learning_rate": 1.57496740547588e-05, + "loss": 1.302, "step": 604 }, { - "epoch": 0.01714415256878914, + "epoch": 0.02367164879881055, "grad_norm": 0.0, - "learning_rate": 1.1425873465533524e-05, - "loss": 1.1805, + "learning_rate": 1.577574967405476e-05, + "loss": 1.436, "step": 605 }, { - "epoch": 0.017172490011051603, + "epoch": 0.023710775491039986, "grad_norm": 0.0, - "learning_rate": 1.1444759206798868e-05, - "loss": 1.1537, + "learning_rate": 1.580182529335072e-05, + "loss": 1.2823, "step": 606 }, { - "epoch": 0.017200827453314065, + "epoch": 0.023749902183269427, "grad_norm": 0.0, - "learning_rate": 1.1463644948064212e-05, - "loss": 1.0925, + "learning_rate": 1.5827900912646675e-05, + "loss": 1.3143, "step": 607 }, { - "epoch": 0.017229164895576524, + "epoch": 0.023789028875498865, "grad_norm": 0.0, - "learning_rate": 1.1482530689329556e-05, - "loss": 1.2629, + "learning_rate": 1.5853976531942636e-05, + "loss": 1.412, "step": 608 }, { - "epoch": 0.017257502337838987, + "epoch": 0.023828155567728306, "grad_norm": 0.0, - "learning_rate": 1.1501416430594903e-05, - "loss": 1.138, + "learning_rate": 1.5880052151238593e-05, + "loss": 1.3329, "step": 609 }, { - "epoch": 0.01728583978010145, + "epoch": 0.023867282259957743, "grad_norm": 0.0, - "learning_rate": 1.1520302171860247e-05, - "loss": 1.1708, + "learning_rate": 1.5906127770534553e-05, + "loss": 1.2751, "step": 610 }, { - "epoch": 0.01731417722236391, + "epoch": 0.023906408952187184, "grad_norm": 0.0, - "learning_rate": 1.153918791312559e-05, - "loss": 1.0527, + "learning_rate": 1.593220338983051e-05, + "loss": 1.2751, "step": 611 }, { - "epoch": 0.01734251466462637, + "epoch": 0.02394553564441662, "grad_norm": 0.0, - "learning_rate": 1.1558073654390936e-05, - "loss": 1.1119, + "learning_rate": 1.5958279009126467e-05, + "loss": 1.2504, "step": 612 }, { - "epoch": 0.017370852106888834, + "epoch": 0.02398466233664606, "grad_norm": 0.0, - "learning_rate": 1.157695939565628e-05, - "loss": 1.0544, + "learning_rate": 1.5984354628422427e-05, + "loss": 1.3025, "step": 613 }, { - "epoch": 0.017399189549151293, + "epoch": 0.0240237890288755, "grad_norm": 0.0, - "learning_rate": 1.1595845136921624e-05, - "loss": 1.1486, + "learning_rate": 1.6010430247718384e-05, + "loss": 1.2187, "step": 614 }, { - "epoch": 0.017427526991413755, + "epoch": 0.024062915721104937, "grad_norm": 0.0, - "learning_rate": 1.1614730878186971e-05, - "loss": 1.2757, + "learning_rate": 1.603650586701434e-05, + "loss": 1.2695, "step": 615 }, { - "epoch": 0.017455864433676218, + "epoch": 0.024102042413334378, "grad_norm": 0.0, - "learning_rate": 1.1633616619452315e-05, - "loss": 1.1553, + "learning_rate": 1.60625814863103e-05, + "loss": 1.306, "step": 616 }, { - "epoch": 0.017484201875938677, + "epoch": 0.024141169105563815, "grad_norm": 0.0, - "learning_rate": 1.1652502360717659e-05, - "loss": 1.1233, + "learning_rate": 1.608865710560626e-05, + "loss": 1.1747, "step": 617 }, { - "epoch": 0.01751253931820114, + "epoch": 0.024180295797793256, "grad_norm": 0.0, - "learning_rate": 1.1671388101983003e-05, - "loss": 1.0777, + "learning_rate": 1.611473272490222e-05, + "loss": 1.327, "step": 618 }, { - "epoch": 0.017540876760463602, + "epoch": 0.024219422490022693, "grad_norm": 0.0, - "learning_rate": 1.1690273843248348e-05, - "loss": 1.1747, + "learning_rate": 1.6140808344198176e-05, + "loss": 1.3613, "step": 619 }, { - "epoch": 0.01756921420272606, + "epoch": 0.02425854918225213, "grad_norm": 0.0, - "learning_rate": 1.1709159584513694e-05, - "loss": 1.1588, + "learning_rate": 1.6166883963494132e-05, + "loss": 1.3497, "step": 620 }, { - "epoch": 0.017597551644988524, + "epoch": 0.02429767587448157, "grad_norm": 0.0, - "learning_rate": 1.1728045325779038e-05, - "loss": 1.1856, + "learning_rate": 1.6192959582790093e-05, + "loss": 1.4113, "step": 621 }, { - "epoch": 0.017625889087250986, + "epoch": 0.02433680256671101, "grad_norm": 0.0, - "learning_rate": 1.1746931067044383e-05, - "loss": 1.139, + "learning_rate": 1.621903520208605e-05, + "loss": 1.1857, "step": 622 }, { - "epoch": 0.017654226529513446, + "epoch": 0.02437592925894045, "grad_norm": 0.0, - "learning_rate": 1.1765816808309727e-05, - "loss": 1.0848, + "learning_rate": 1.624511082138201e-05, + "loss": 1.302, "step": 623 }, { - "epoch": 0.017682563971775908, + "epoch": 0.024415055951169887, "grad_norm": 0.0, - "learning_rate": 1.178470254957507e-05, - "loss": 1.2071, + "learning_rate": 1.6271186440677967e-05, + "loss": 1.2444, "step": 624 }, { - "epoch": 0.017710901414038367, + "epoch": 0.024454182643399328, "grad_norm": 0.0, - "learning_rate": 1.1803588290840415e-05, - "loss": 1.165, + "learning_rate": 1.6297262059973927e-05, + "loss": 1.3859, "step": 625 }, { - "epoch": 0.01773923885630083, + "epoch": 0.024493309335628766, "grad_norm": 0.0, - "learning_rate": 1.1822474032105762e-05, - "loss": 1.1826, + "learning_rate": 1.6323337679269884e-05, + "loss": 1.3608, "step": 626 }, { - "epoch": 0.017767576298563292, + "epoch": 0.024532436027858207, "grad_norm": 0.0, - "learning_rate": 1.1841359773371106e-05, - "loss": 1.1346, + "learning_rate": 1.634941329856584e-05, + "loss": 1.3057, "step": 627 }, { - "epoch": 0.01779591374082575, + "epoch": 0.024571562720087644, "grad_norm": 0.0, - "learning_rate": 1.186024551463645e-05, - "loss": 1.197, + "learning_rate": 1.63754889178618e-05, + "loss": 1.3798, "step": 628 }, { - "epoch": 0.017824251183088214, + "epoch": 0.02461068941231708, "grad_norm": 0.0, - "learning_rate": 1.1879131255901795e-05, - "loss": 1.1392, + "learning_rate": 1.640156453715776e-05, + "loss": 1.3898, "step": 629 }, { - "epoch": 0.017852588625350677, + "epoch": 0.024649816104546522, "grad_norm": 0.0, - "learning_rate": 1.1898016997167139e-05, - "loss": 1.0493, + "learning_rate": 1.6427640156453715e-05, + "loss": 1.2563, "step": 630 }, { - "epoch": 0.017880926067613136, + "epoch": 0.02468894279677596, "grad_norm": 0.0, - "learning_rate": 1.1916902738432485e-05, - "loss": 1.1349, + "learning_rate": 1.6453715775749676e-05, + "loss": 1.3121, "step": 631 }, { - "epoch": 0.017909263509875598, + "epoch": 0.0247280694890054, "grad_norm": 0.0, - "learning_rate": 1.193578847969783e-05, - "loss": 1.1688, + "learning_rate": 1.6479791395045636e-05, + "loss": 1.2454, "step": 632 }, { - "epoch": 0.01793760095213806, + "epoch": 0.024767196181234838, "grad_norm": 0.0, - "learning_rate": 1.1954674220963174e-05, - "loss": 1.1362, + "learning_rate": 1.6505867014341593e-05, + "loss": 1.4177, "step": 633 }, { - "epoch": 0.01796593839440052, + "epoch": 0.02480632287346428, "grad_norm": 0.0, - "learning_rate": 1.1973559962228518e-05, - "loss": 1.256, + "learning_rate": 1.653194263363755e-05, + "loss": 1.3256, "step": 634 }, { - "epoch": 0.017994275836662982, + "epoch": 0.024845449565693716, "grad_norm": 0.0, - "learning_rate": 1.1992445703493862e-05, - "loss": 1.1974, + "learning_rate": 1.6558018252933507e-05, + "loss": 1.3937, "step": 635 }, { - "epoch": 0.018022613278925445, + "epoch": 0.024884576257923154, "grad_norm": 0.0, - "learning_rate": 1.2011331444759209e-05, - "loss": 1.2349, + "learning_rate": 1.6584093872229467e-05, + "loss": 1.3501, "step": 636 }, { - "epoch": 0.018050950721187904, + "epoch": 0.024923702950152594, "grad_norm": 0.0, - "learning_rate": 1.2030217186024553e-05, - "loss": 1.1238, + "learning_rate": 1.6610169491525424e-05, + "loss": 1.2028, "step": 637 }, { - "epoch": 0.018079288163450367, + "epoch": 0.024962829642382032, "grad_norm": 0.0, - "learning_rate": 1.2049102927289897e-05, - "loss": 1.2486, + "learning_rate": 1.6636245110821384e-05, + "loss": 1.3718, "step": 638 }, { - "epoch": 0.01810762560571283, + "epoch": 0.025001956334611473, "grad_norm": 0.0, - "learning_rate": 1.2067988668555242e-05, - "loss": 1.1445, + "learning_rate": 1.666232073011734e-05, + "loss": 1.2925, "step": 639 }, { - "epoch": 0.01813596304797529, + "epoch": 0.02504108302684091, "grad_norm": 0.0, - "learning_rate": 1.2086874409820586e-05, - "loss": 1.1856, + "learning_rate": 1.66883963494133e-05, + "loss": 1.3925, "step": 640 }, { - "epoch": 0.01816430049023775, + "epoch": 0.02508020971907035, "grad_norm": 0.0, - "learning_rate": 1.210576015108593e-05, - "loss": 1.0592, + "learning_rate": 1.671447196870926e-05, + "loss": 1.3677, "step": 641 }, { - "epoch": 0.018192637932500214, + "epoch": 0.02511933641129979, "grad_norm": 0.0, - "learning_rate": 1.2124645892351277e-05, - "loss": 1.1702, + "learning_rate": 1.6740547588005215e-05, + "loss": 1.2722, "step": 642 }, { - "epoch": 0.018220975374762673, + "epoch": 0.02515846310352923, "grad_norm": 0.0, - "learning_rate": 1.2143531633616621e-05, - "loss": 1.1489, + "learning_rate": 1.6766623207301176e-05, + "loss": 1.2402, "step": 643 }, { - "epoch": 0.018249312817025135, + "epoch": 0.025197589795758667, "grad_norm": 0.0, - "learning_rate": 1.2162417374881965e-05, - "loss": 1.1703, + "learning_rate": 1.6792698826597133e-05, + "loss": 1.2477, "step": 644 }, { - "epoch": 0.018277650259287598, + "epoch": 0.025236716487988104, "grad_norm": 0.0, - "learning_rate": 1.2181303116147309e-05, - "loss": 1.2722, + "learning_rate": 1.681877444589309e-05, + "loss": 1.1802, "step": 645 }, { - "epoch": 0.018305987701550057, + "epoch": 0.025275843180217545, "grad_norm": 0.0, - "learning_rate": 1.2200188857412654e-05, - "loss": 1.1605, + "learning_rate": 1.684485006518905e-05, + "loss": 1.3247, "step": 646 }, { - "epoch": 0.01833432514381252, + "epoch": 0.025314969872446982, "grad_norm": 0.0, - "learning_rate": 1.2219074598678e-05, - "loss": 1.1337, + "learning_rate": 1.687092568448501e-05, + "loss": 1.374, "step": 647 }, { - "epoch": 0.018362662586074982, + "epoch": 0.025354096564676423, "grad_norm": 0.0, - "learning_rate": 1.2237960339943344e-05, - "loss": 0.936, + "learning_rate": 1.6897001303780967e-05, + "loss": 1.2789, "step": 648 }, { - "epoch": 0.01839100002833744, + "epoch": 0.02539322325690586, "grad_norm": 0.0, - "learning_rate": 1.225684608120869e-05, - "loss": 1.2885, + "learning_rate": 1.6923076923076924e-05, + "loss": 1.2743, "step": 649 }, { - "epoch": 0.018419337470599904, + "epoch": 0.0254323499491353, "grad_norm": 0.0, - "learning_rate": 1.2275731822474033e-05, - "loss": 1.0853, + "learning_rate": 1.694915254237288e-05, + "loss": 1.3802, "step": 650 }, { - "epoch": 0.018447674912862366, + "epoch": 0.02547147664136474, "grad_norm": 0.0, - "learning_rate": 1.2294617563739377e-05, - "loss": 1.1705, + "learning_rate": 1.697522816166884e-05, + "loss": 1.3582, "step": 651 }, { - "epoch": 0.018476012355124825, + "epoch": 0.025510603333594176, "grad_norm": 0.0, - "learning_rate": 1.2313503305004721e-05, - "loss": 1.2724, + "learning_rate": 1.70013037809648e-05, + "loss": 1.3396, "step": 652 }, { - "epoch": 0.018504349797387288, + "epoch": 0.025549730025823617, "grad_norm": 0.0, - "learning_rate": 1.2332389046270068e-05, - "loss": 1.3173, + "learning_rate": 1.702737940026076e-05, + "loss": 1.269, "step": 653 }, { - "epoch": 0.01853268723964975, + "epoch": 0.025588856718053055, "grad_norm": 0.0, - "learning_rate": 1.2351274787535412e-05, - "loss": 1.2118, + "learning_rate": 1.7053455019556716e-05, + "loss": 1.3394, "step": 654 }, { - "epoch": 0.01856102468191221, + "epoch": 0.025627983410282495, "grad_norm": 0.0, - "learning_rate": 1.2370160528800756e-05, - "loss": 1.123, + "learning_rate": 1.7079530638852676e-05, + "loss": 1.3657, "step": 655 }, { - "epoch": 0.018589362124174672, + "epoch": 0.025667110102511933, "grad_norm": 0.0, - "learning_rate": 1.2389046270066101e-05, - "loss": 1.0534, + "learning_rate": 1.7105606258148633e-05, + "loss": 1.3312, "step": 656 }, { - "epoch": 0.018617699566437135, + "epoch": 0.025706236794741374, "grad_norm": 0.0, - "learning_rate": 1.2407932011331445e-05, - "loss": 1.2331, + "learning_rate": 1.713168187744459e-05, + "loss": 1.3413, "step": 657 }, { - "epoch": 0.018646037008699594, + "epoch": 0.02574536348697081, "grad_norm": 0.0, - "learning_rate": 1.242681775259679e-05, - "loss": 1.1987, + "learning_rate": 1.7157757496740547e-05, + "loss": 1.3212, "step": 658 }, { - "epoch": 0.018674374450962056, + "epoch": 0.025784490179200252, "grad_norm": 0.0, - "learning_rate": 1.2445703493862136e-05, - "loss": 1.1526, + "learning_rate": 1.7183833116036507e-05, + "loss": 1.3818, "step": 659 }, { - "epoch": 0.01870271189322452, + "epoch": 0.02582361687142969, "grad_norm": 0.0, - "learning_rate": 1.246458923512748e-05, - "loss": 1.2217, + "learning_rate": 1.7209908735332464e-05, + "loss": 1.2554, "step": 660 }, { - "epoch": 0.018731049335486978, + "epoch": 0.025862743563659127, "grad_norm": 0.0, - "learning_rate": 1.2483474976392824e-05, - "loss": 1.1844, + "learning_rate": 1.7235984354628424e-05, + "loss": 1.2344, "step": 661 }, { - "epoch": 0.01875938677774944, + "epoch": 0.025901870255888568, "grad_norm": 0.0, - "learning_rate": 1.2502360717658168e-05, - "loss": 1.2399, + "learning_rate": 1.726205997392438e-05, + "loss": 1.3683, "step": 662 }, { - "epoch": 0.018787724220011903, + "epoch": 0.025940996948118005, "grad_norm": 0.0, - "learning_rate": 1.2521246458923513e-05, - "loss": 1.1196, + "learning_rate": 1.728813559322034e-05, + "loss": 1.2631, "step": 663 }, { - "epoch": 0.018816061662274362, + "epoch": 0.025980123640347446, "grad_norm": 0.0, - "learning_rate": 1.2540132200188859e-05, - "loss": 1.1953, + "learning_rate": 1.73142112125163e-05, + "loss": 1.312, "step": 664 }, { - "epoch": 0.018844399104536825, + "epoch": 0.026019250332576883, "grad_norm": 0.0, - "learning_rate": 1.2559017941454203e-05, - "loss": 1.2, + "learning_rate": 1.7340286831812255e-05, + "loss": 1.2645, "step": 665 }, { - "epoch": 0.018872736546799287, + "epoch": 0.026058377024806324, "grad_norm": 0.0, - "learning_rate": 1.2577903682719548e-05, - "loss": 1.2404, + "learning_rate": 1.7366362451108216e-05, + "loss": 1.3475, "step": 666 }, { - "epoch": 0.018901073989061747, + "epoch": 0.02609750371703576, "grad_norm": 0.0, - "learning_rate": 1.2596789423984892e-05, - "loss": 1.1239, + "learning_rate": 1.7392438070404173e-05, + "loss": 1.3962, "step": 667 }, { - "epoch": 0.01892941143132421, + "epoch": 0.0261366304092652, "grad_norm": 0.0, - "learning_rate": 1.2615675165250236e-05, - "loss": 1.181, + "learning_rate": 1.7418513689700133e-05, + "loss": 1.3545, "step": 668 }, { - "epoch": 0.01895774887358667, + "epoch": 0.02617575710149464, "grad_norm": 0.0, - "learning_rate": 1.2634560906515583e-05, - "loss": 1.1092, + "learning_rate": 1.744458930899609e-05, + "loss": 1.261, "step": 669 }, { - "epoch": 0.01898608631584913, + "epoch": 0.026214883793724077, "grad_norm": 0.0, - "learning_rate": 1.2653446647780927e-05, - "loss": 1.2159, + "learning_rate": 1.747066492829205e-05, + "loss": 1.2177, "step": 670 }, { - "epoch": 0.019014423758111593, + "epoch": 0.026254010485953518, "grad_norm": 0.0, - "learning_rate": 1.2672332389046271e-05, - "loss": 1.1341, + "learning_rate": 1.7496740547588007e-05, + "loss": 1.4032, "step": 671 }, { - "epoch": 0.019042761200374056, + "epoch": 0.026293137178182956, "grad_norm": 0.0, - "learning_rate": 1.2691218130311615e-05, - "loss": 1.0938, + "learning_rate": 1.7522816166883964e-05, + "loss": 1.3062, "step": 672 }, { - "epoch": 0.019071098642636515, + "epoch": 0.026332263870412397, "grad_norm": 0.0, - "learning_rate": 1.271010387157696e-05, - "loss": 1.1205, + "learning_rate": 1.754889178617992e-05, + "loss": 1.3318, "step": 673 }, { - "epoch": 0.019099436084898978, + "epoch": 0.026371390562641834, "grad_norm": 0.0, - "learning_rate": 1.2728989612842304e-05, - "loss": 1.1949, + "learning_rate": 1.757496740547588e-05, + "loss": 1.3521, "step": 674 }, { - "epoch": 0.019127773527161437, + "epoch": 0.026410517254871275, "grad_norm": 0.0, - "learning_rate": 1.274787535410765e-05, - "loss": 1.1982, + "learning_rate": 1.760104302477184e-05, + "loss": 1.2144, "step": 675 }, { - "epoch": 0.0191561109694239, + "epoch": 0.026449643947100712, "grad_norm": 0.0, - "learning_rate": 1.2766761095372996e-05, - "loss": 1.118, + "learning_rate": 1.76271186440678e-05, + "loss": 1.3651, "step": 676 }, { - "epoch": 0.019184448411686362, + "epoch": 0.02648877063933015, "grad_norm": 0.0, - "learning_rate": 1.278564683663834e-05, - "loss": 1.1493, + "learning_rate": 1.7653194263363756e-05, + "loss": 1.3687, "step": 677 }, { - "epoch": 0.01921278585394882, + "epoch": 0.02652789733155959, "grad_norm": 0.0, - "learning_rate": 1.2804532577903683e-05, - "loss": 1.2017, + "learning_rate": 1.7679269882659716e-05, + "loss": 1.3085, "step": 678 }, { - "epoch": 0.019241123296211284, + "epoch": 0.026567024023789028, "grad_norm": 0.0, - "learning_rate": 1.2823418319169027e-05, - "loss": 1.2307, + "learning_rate": 1.7705345501955673e-05, + "loss": 1.3804, "step": 679 }, { - "epoch": 0.019269460738473746, + "epoch": 0.02660615071601847, "grad_norm": 0.0, - "learning_rate": 1.2842304060434374e-05, - "loss": 1.1378, + "learning_rate": 1.773142112125163e-05, + "loss": 1.3939, "step": 680 }, { - "epoch": 0.019297798180736205, + "epoch": 0.026645277408247906, "grad_norm": 0.0, - "learning_rate": 1.2861189801699718e-05, - "loss": 1.137, + "learning_rate": 1.7757496740547587e-05, + "loss": 1.3356, "step": 681 }, { - "epoch": 0.019326135622998668, + "epoch": 0.026684404100477347, "grad_norm": 0.0, - "learning_rate": 1.2880075542965062e-05, - "loss": 1.191, + "learning_rate": 1.7783572359843547e-05, + "loss": 1.2709, "step": 682 }, { - "epoch": 0.01935447306526113, + "epoch": 0.026723530792706784, "grad_norm": 0.0, - "learning_rate": 1.2898961284230408e-05, - "loss": 1.2636, + "learning_rate": 1.7809647979139507e-05, + "loss": 1.2949, "step": 683 }, { - "epoch": 0.01938281050752359, + "epoch": 0.026762657484936222, "grad_norm": 0.0, - "learning_rate": 1.2917847025495751e-05, - "loss": 1.0973, + "learning_rate": 1.7835723598435464e-05, + "loss": 1.277, "step": 684 }, { - "epoch": 0.019411147949786052, + "epoch": 0.026801784177165663, "grad_norm": 0.0, - "learning_rate": 1.2936732766761095e-05, - "loss": 1.1726, + "learning_rate": 1.7861799217731425e-05, + "loss": 1.3973, "step": 685 }, { - "epoch": 0.019439485392048515, + "epoch": 0.0268409108693951, "grad_norm": 0.0, - "learning_rate": 1.2955618508026443e-05, - "loss": 1.2561, + "learning_rate": 1.788787483702738e-05, + "loss": 1.2893, "step": 686 }, { - "epoch": 0.019467822834310974, + "epoch": 0.02688003756162454, "grad_norm": 0.0, - "learning_rate": 1.2974504249291786e-05, - "loss": 1.0698, + "learning_rate": 1.791395045632334e-05, + "loss": 1.3168, "step": 687 }, { - "epoch": 0.019496160276573436, + "epoch": 0.02691916425385398, "grad_norm": 0.0, - "learning_rate": 1.299338999055713e-05, - "loss": 1.1329, + "learning_rate": 1.7940026075619295e-05, + "loss": 1.3116, "step": 688 }, { - "epoch": 0.0195244977188359, + "epoch": 0.02695829094608342, "grad_norm": 0.0, - "learning_rate": 1.3012275731822474e-05, - "loss": 1.2476, + "learning_rate": 1.7966101694915256e-05, + "loss": 1.2865, "step": 689 }, { - "epoch": 0.019552835161098358, + "epoch": 0.026997417638312857, "grad_norm": 0.0, - "learning_rate": 1.303116147308782e-05, - "loss": 1.1007, + "learning_rate": 1.7992177314211213e-05, + "loss": 1.3519, "step": 690 }, { - "epoch": 0.01958117260336082, + "epoch": 0.027036544330542298, "grad_norm": 0.0, - "learning_rate": 1.3050047214353165e-05, - "loss": 1.1335, + "learning_rate": 1.8018252933507173e-05, + "loss": 1.2482, "step": 691 }, { - "epoch": 0.019609510045623283, + "epoch": 0.027075671022771735, "grad_norm": 0.0, - "learning_rate": 1.3068932955618509e-05, - "loss": 1.1176, + "learning_rate": 1.804432855280313e-05, + "loss": 1.2324, "step": 692 }, { - "epoch": 0.019637847487885742, + "epoch": 0.027114797715001172, "grad_norm": 0.0, - "learning_rate": 1.3087818696883855e-05, - "loss": 1.1359, + "learning_rate": 1.807040417209909e-05, + "loss": 1.296, "step": 693 }, { - "epoch": 0.019666184930148205, + "epoch": 0.027153924407230613, "grad_norm": 0.0, - "learning_rate": 1.3106704438149199e-05, - "loss": 0.993, + "learning_rate": 1.8096479791395047e-05, + "loss": 1.2523, "step": 694 }, { - "epoch": 0.019694522372410667, + "epoch": 0.02719305109946005, "grad_norm": 0.0, - "learning_rate": 1.3125590179414542e-05, - "loss": 1.0792, + "learning_rate": 1.8122555410691004e-05, + "loss": 1.3862, "step": 695 }, { - "epoch": 0.019722859814673126, + "epoch": 0.02723217779168949, "grad_norm": 0.0, - "learning_rate": 1.314447592067989e-05, - "loss": 1.2115, + "learning_rate": 1.8148631029986964e-05, + "loss": 1.2624, "step": 696 }, { - "epoch": 0.01975119725693559, + "epoch": 0.02727130448391893, "grad_norm": 0.0, - "learning_rate": 1.3163361661945233e-05, - "loss": 1.1992, + "learning_rate": 1.817470664928292e-05, + "loss": 1.2974, "step": 697 }, { - "epoch": 0.01977953469919805, + "epoch": 0.02731043117614837, "grad_norm": 0.0, - "learning_rate": 1.3182247403210577e-05, - "loss": 1.2098, + "learning_rate": 1.8200782268578882e-05, + "loss": 1.3933, "step": 698 }, { - "epoch": 0.01980787214146051, + "epoch": 0.027349557868377807, "grad_norm": 0.0, - "learning_rate": 1.3201133144475921e-05, - "loss": 1.1838, + "learning_rate": 1.822685788787484e-05, + "loss": 1.3102, "step": 699 }, { - "epoch": 0.019836209583722973, + "epoch": 0.027388684560607245, "grad_norm": 0.0, - "learning_rate": 1.3220018885741265e-05, - "loss": 1.1792, + "learning_rate": 1.8252933507170796e-05, + "loss": 1.4967, "step": 700 }, { - "epoch": 0.019864547025985436, + "epoch": 0.027427811252836685, "grad_norm": 0.0, - "learning_rate": 1.323890462700661e-05, - "loss": 1.2637, + "learning_rate": 1.8279009126466756e-05, + "loss": 1.3389, "step": 701 }, { - "epoch": 0.019892884468247895, + "epoch": 0.027466937945066123, "grad_norm": 0.0, - "learning_rate": 1.3257790368271956e-05, - "loss": 1.1417, + "learning_rate": 1.8305084745762713e-05, + "loss": 1.3268, "step": 702 }, { - "epoch": 0.019921221910510357, + "epoch": 0.027506064637295564, "grad_norm": 0.0, - "learning_rate": 1.3276676109537302e-05, - "loss": 1.3282, + "learning_rate": 1.833116036505867e-05, + "loss": 1.3375, "step": 703 }, { - "epoch": 0.01994955935277282, + "epoch": 0.027545191329525, "grad_norm": 0.0, - "learning_rate": 1.3295561850802646e-05, - "loss": 1.1534, + "learning_rate": 1.835723598435463e-05, + "loss": 1.2125, "step": 704 }, { - "epoch": 0.01997789679503528, + "epoch": 0.027584318021754442, "grad_norm": 0.0, - "learning_rate": 1.331444759206799e-05, - "loss": 1.2514, + "learning_rate": 1.838331160365059e-05, + "loss": 1.3193, "step": 705 }, { - "epoch": 0.02000623423729774, + "epoch": 0.02762344471398388, "grad_norm": 0.0, - "learning_rate": 1.3333333333333333e-05, - "loss": 1.1548, + "learning_rate": 1.8409387222946547e-05, + "loss": 1.2108, "step": 706 }, { - "epoch": 0.020034571679560204, + "epoch": 0.02766257140621332, "grad_norm": 0.0, - "learning_rate": 1.335221907459868e-05, - "loss": 1.1824, + "learning_rate": 1.8435462842242504e-05, + "loss": 1.1917, "step": 707 }, { - "epoch": 0.020062909121822663, + "epoch": 0.027701698098442758, "grad_norm": 0.0, - "learning_rate": 1.3371104815864024e-05, - "loss": 1.1727, + "learning_rate": 1.8461538461538465e-05, + "loss": 1.3711, "step": 708 }, { - "epoch": 0.020091246564085126, + "epoch": 0.027740824790672195, "grad_norm": 0.0, - "learning_rate": 1.3389990557129368e-05, - "loss": 1.1498, + "learning_rate": 1.848761408083442e-05, + "loss": 1.3287, "step": 709 }, { - "epoch": 0.02011958400634759, + "epoch": 0.027779951482901636, "grad_norm": 0.0, - "learning_rate": 1.3408876298394712e-05, - "loss": 1.2328, + "learning_rate": 1.851368970013038e-05, + "loss": 1.3901, "step": 710 }, { - "epoch": 0.020147921448610048, + "epoch": 0.027819078175131073, "grad_norm": 0.0, - "learning_rate": 1.3427762039660058e-05, - "loss": 1.1937, + "learning_rate": 1.853976531942634e-05, + "loss": 1.2802, "step": 711 }, { - "epoch": 0.02017625889087251, + "epoch": 0.027858204867360514, "grad_norm": 0.0, - "learning_rate": 1.3446647780925402e-05, - "loss": 1.0904, + "learning_rate": 1.8565840938722296e-05, + "loss": 1.2836, "step": 712 }, { - "epoch": 0.020204596333134973, + "epoch": 0.02789733155958995, "grad_norm": 0.0, - "learning_rate": 1.3465533522190749e-05, - "loss": 1.0846, + "learning_rate": 1.8591916558018256e-05, + "loss": 1.2554, "step": 713 }, { - "epoch": 0.020232933775397432, + "epoch": 0.027936458251819393, "grad_norm": 0.0, - "learning_rate": 1.3484419263456093e-05, - "loss": 1.0255, + "learning_rate": 1.8617992177314213e-05, + "loss": 1.2759, "step": 714 }, { - "epoch": 0.020261271217659894, + "epoch": 0.02797558494404883, "grad_norm": 0.0, - "learning_rate": 1.3503305004721436e-05, - "loss": 1.1442, + "learning_rate": 1.864406779661017e-05, + "loss": 1.2703, "step": 715 }, { - "epoch": 0.020289608659922357, + "epoch": 0.028014711636278267, "grad_norm": 0.0, - "learning_rate": 1.352219074598678e-05, - "loss": 1.1349, + "learning_rate": 1.867014341590613e-05, + "loss": 1.4824, "step": 716 }, { - "epoch": 0.020317946102184816, + "epoch": 0.028053838328507708, "grad_norm": 0.0, - "learning_rate": 1.3541076487252124e-05, - "loss": 1.1148, + "learning_rate": 1.8696219035202087e-05, + "loss": 1.2855, "step": 717 }, { - "epoch": 0.02034628354444728, + "epoch": 0.028092965020737146, "grad_norm": 0.0, - "learning_rate": 1.3559962228517471e-05, - "loss": 1.1137, + "learning_rate": 1.8722294654498044e-05, + "loss": 1.3131, "step": 718 }, { - "epoch": 0.02037462098670974, + "epoch": 0.028132091712966587, "grad_norm": 0.0, - "learning_rate": 1.3578847969782815e-05, - "loss": 1.1217, + "learning_rate": 1.8748370273794004e-05, + "loss": 1.2399, "step": 719 }, { - "epoch": 0.0204029584289722, + "epoch": 0.028171218405196024, "grad_norm": 0.0, - "learning_rate": 1.359773371104816e-05, - "loss": 1.2358, + "learning_rate": 1.8774445893089965e-05, + "loss": 1.2284, "step": 720 }, { - "epoch": 0.020431295871234663, + "epoch": 0.028210345097425465, "grad_norm": 0.0, - "learning_rate": 1.3616619452313505e-05, - "loss": 1.2145, + "learning_rate": 1.880052151238592e-05, + "loss": 1.3964, "step": 721 }, { - "epoch": 0.020459633313497125, + "epoch": 0.028249471789654902, "grad_norm": 0.0, - "learning_rate": 1.3635505193578849e-05, - "loss": 1.2072, + "learning_rate": 1.882659713168188e-05, + "loss": 1.3767, "step": 722 }, { - "epoch": 0.020487970755759585, + "epoch": 0.028288598481884343, "grad_norm": 0.0, - "learning_rate": 1.3654390934844192e-05, - "loss": 1.2495, + "learning_rate": 1.8852672750977836e-05, + "loss": 1.2424, "step": 723 }, { - "epoch": 0.020516308198022047, + "epoch": 0.02832772517411378, "grad_norm": 0.0, - "learning_rate": 1.367327667610954e-05, - "loss": 1.1351, + "learning_rate": 1.8878748370273796e-05, + "loss": 1.3093, "step": 724 }, { - "epoch": 0.020544645640284506, + "epoch": 0.028366851866343218, "grad_norm": 0.0, - "learning_rate": 1.3692162417374884e-05, - "loss": 1.123, + "learning_rate": 1.8904823989569753e-05, + "loss": 1.4847, "step": 725 }, { - "epoch": 0.02057298308254697, + "epoch": 0.02840597855857266, "grad_norm": 0.0, - "learning_rate": 1.3711048158640227e-05, - "loss": 1.1348, + "learning_rate": 1.8930899608865713e-05, + "loss": 1.2566, "step": 726 }, { - "epoch": 0.02060132052480943, + "epoch": 0.028445105250802096, "grad_norm": 0.0, - "learning_rate": 1.3729933899905571e-05, - "loss": 1.2349, + "learning_rate": 1.895697522816167e-05, + "loss": 1.424, "step": 727 }, { - "epoch": 0.02062965796707189, + "epoch": 0.028484231943031537, "grad_norm": 0.0, - "learning_rate": 1.3748819641170917e-05, - "loss": 1.1243, + "learning_rate": 1.898305084745763e-05, + "loss": 1.2705, "step": 728 }, { - "epoch": 0.020657995409334353, + "epoch": 0.028523358635260974, "grad_norm": 0.0, - "learning_rate": 1.3767705382436262e-05, - "loss": 1.2044, + "learning_rate": 1.9009126466753587e-05, + "loss": 1.2917, "step": 729 }, { - "epoch": 0.020686332851596816, + "epoch": 0.028562485327490415, "grad_norm": 0.0, - "learning_rate": 1.3786591123701606e-05, - "loss": 1.1366, + "learning_rate": 1.9035202086049544e-05, + "loss": 1.3721, "step": 730 }, { - "epoch": 0.020714670293859275, + "epoch": 0.028601612019719853, "grad_norm": 0.0, - "learning_rate": 1.3805476864966952e-05, - "loss": 1.2708, + "learning_rate": 1.9061277705345505e-05, + "loss": 1.2763, "step": 731 }, { - "epoch": 0.020743007736121737, + "epoch": 0.02864073871194929, "grad_norm": 0.0, - "learning_rate": 1.3824362606232296e-05, - "loss": 1.1847, + "learning_rate": 1.908735332464146e-05, + "loss": 1.3436, "step": 732 }, { - "epoch": 0.0207713451783842, + "epoch": 0.02867986540417873, "grad_norm": 0.0, - "learning_rate": 1.384324834749764e-05, - "loss": 1.1143, + "learning_rate": 1.911342894393742e-05, + "loss": 1.2905, "step": 733 }, { - "epoch": 0.02079968262064666, + "epoch": 0.02871899209640817, "grad_norm": 0.0, - "learning_rate": 1.3862134088762983e-05, - "loss": 1.0666, + "learning_rate": 1.913950456323338e-05, + "loss": 1.2676, "step": 734 }, { - "epoch": 0.02082802006290912, + "epoch": 0.02875811878863761, "grad_norm": 0.0, - "learning_rate": 1.388101983002833e-05, - "loss": 1.2192, + "learning_rate": 1.916558018252934e-05, + "loss": 1.1979, "step": 735 }, { - "epoch": 0.020856357505171584, + "epoch": 0.028797245480867047, "grad_norm": 0.0, - "learning_rate": 1.3899905571293674e-05, - "loss": 1.1173, + "learning_rate": 1.9191655801825296e-05, + "loss": 1.1898, "step": 736 }, { - "epoch": 0.020884694947434043, + "epoch": 0.028836372173096488, "grad_norm": 0.0, - "learning_rate": 1.3918791312559018e-05, - "loss": 1.1929, + "learning_rate": 1.9217731421121253e-05, + "loss": 1.2773, "step": 737 }, { - "epoch": 0.020913032389696506, + "epoch": 0.028875498865325925, "grad_norm": 0.0, - "learning_rate": 1.3937677053824364e-05, - "loss": 1.1838, + "learning_rate": 1.924380704041721e-05, + "loss": 1.3876, "step": 738 }, { - "epoch": 0.02094136983195897, + "epoch": 0.028914625557555366, "grad_norm": 0.0, - "learning_rate": 1.3956562795089708e-05, - "loss": 1.1719, + "learning_rate": 1.926988265971317e-05, + "loss": 1.2874, "step": 739 }, { - "epoch": 0.020969707274221427, + "epoch": 0.028953752249784803, "grad_norm": 0.0, - "learning_rate": 1.3975448536355053e-05, - "loss": 1.1893, + "learning_rate": 1.9295958279009127e-05, + "loss": 1.4272, "step": 740 }, { - "epoch": 0.02099804471648389, + "epoch": 0.02899287894201424, "grad_norm": 0.0, - "learning_rate": 1.3994334277620399e-05, - "loss": 1.1397, + "learning_rate": 1.9322033898305087e-05, + "loss": 1.3996, "step": 741 }, { - "epoch": 0.021026382158746353, + "epoch": 0.02903200563424368, "grad_norm": 0.0, - "learning_rate": 1.4013220018885743e-05, - "loss": 1.0652, + "learning_rate": 1.9348109517601044e-05, + "loss": 1.4507, "step": 742 }, { - "epoch": 0.02105471960100881, + "epoch": 0.02907113232647312, "grad_norm": 0.0, - "learning_rate": 1.4032105760151087e-05, - "loss": 1.1364, + "learning_rate": 1.9374185136897005e-05, + "loss": 1.366, "step": 743 }, { - "epoch": 0.021083057043271274, + "epoch": 0.02911025901870256, "grad_norm": 0.0, - "learning_rate": 1.405099150141643e-05, - "loss": 1.0389, + "learning_rate": 1.940026075619296e-05, + "loss": 1.3633, "step": 744 }, { - "epoch": 0.021111394485533737, + "epoch": 0.029149385710931997, "grad_norm": 0.0, - "learning_rate": 1.4069877242681776e-05, - "loss": 1.1844, + "learning_rate": 1.942633637548892e-05, + "loss": 1.3103, "step": 745 }, { - "epoch": 0.021139731927796196, + "epoch": 0.029188512403161438, "grad_norm": 0.0, - "learning_rate": 1.4088762983947122e-05, - "loss": 1.16, + "learning_rate": 1.9452411994784876e-05, + "loss": 1.3976, "step": 746 }, { - "epoch": 0.02116806937005866, + "epoch": 0.029227639095390875, "grad_norm": 0.0, - "learning_rate": 1.4107648725212465e-05, - "loss": 1.2575, + "learning_rate": 1.9478487614080836e-05, + "loss": 1.4258, "step": 747 }, { - "epoch": 0.02119640681232112, + "epoch": 0.029266765787620313, "grad_norm": 0.0, - "learning_rate": 1.4126534466477811e-05, - "loss": 1.2132, + "learning_rate": 1.9504563233376793e-05, + "loss": 1.2855, "step": 748 }, { - "epoch": 0.02122474425458358, + "epoch": 0.029305892479849754, "grad_norm": 0.0, - "learning_rate": 1.4145420207743155e-05, - "loss": 1.2802, + "learning_rate": 1.9530638852672753e-05, + "loss": 1.3599, "step": 749 }, { - "epoch": 0.021253081696846043, + "epoch": 0.02934501917207919, "grad_norm": 0.0, - "learning_rate": 1.4164305949008499e-05, - "loss": 1.1954, + "learning_rate": 1.9556714471968713e-05, + "loss": 1.299, "step": 750 }, { - "epoch": 0.021281419139108505, + "epoch": 0.029384145864308632, "grad_norm": 0.0, - "learning_rate": 1.4183191690273846e-05, - "loss": 1.137, + "learning_rate": 1.958279009126467e-05, + "loss": 1.2695, "step": 751 }, { - "epoch": 0.021309756581370964, + "epoch": 0.02942327255653807, "grad_norm": 0.0, - "learning_rate": 1.420207743153919e-05, - "loss": 1.141, + "learning_rate": 1.9608865710560627e-05, + "loss": 1.3786, "step": 752 }, { - "epoch": 0.021338094023633427, + "epoch": 0.02946239924876751, "grad_norm": 0.0, - "learning_rate": 1.4220963172804534e-05, - "loss": 1.1968, + "learning_rate": 1.9634941329856584e-05, + "loss": 1.3497, "step": 753 }, { - "epoch": 0.02136643146589589, + "epoch": 0.029501525940996948, "grad_norm": 0.0, - "learning_rate": 1.4239848914069877e-05, - "loss": 1.2523, + "learning_rate": 1.9661016949152545e-05, + "loss": 1.1986, "step": 754 }, { - "epoch": 0.02139476890815835, + "epoch": 0.02954065263322639, "grad_norm": 0.0, - "learning_rate": 1.4258734655335223e-05, - "loss": 1.139, + "learning_rate": 1.96870925684485e-05, + "loss": 1.3461, "step": 755 }, { - "epoch": 0.02142310635042081, + "epoch": 0.029579779325455826, "grad_norm": 0.0, - "learning_rate": 1.4277620396600567e-05, - "loss": 1.119, + "learning_rate": 1.9713168187744462e-05, + "loss": 1.3925, "step": 756 }, { - "epoch": 0.021451443792683274, + "epoch": 0.029618906017685263, "grad_norm": 0.0, - "learning_rate": 1.4296506137865912e-05, - "loss": 1.0361, + "learning_rate": 1.973924380704042e-05, + "loss": 1.325, "step": 757 }, { - "epoch": 0.021479781234945733, + "epoch": 0.029658032709914704, "grad_norm": 0.0, - "learning_rate": 1.4315391879131258e-05, - "loss": 1.0984, + "learning_rate": 1.976531942633638e-05, + "loss": 1.4158, "step": 758 }, { - "epoch": 0.021508118677208195, + "epoch": 0.02969715940214414, "grad_norm": 0.0, - "learning_rate": 1.4334277620396602e-05, - "loss": 1.1778, + "learning_rate": 1.9791395045632336e-05, + "loss": 1.417, "step": 759 }, { - "epoch": 0.021536456119470658, + "epoch": 0.029736286094373583, "grad_norm": 0.0, - "learning_rate": 1.4353163361661946e-05, - "loss": 1.1036, + "learning_rate": 1.9817470664928293e-05, + "loss": 1.3062, "step": 760 }, { - "epoch": 0.021564793561733117, + "epoch": 0.02977541278660302, "grad_norm": 0.0, - "learning_rate": 1.437204910292729e-05, - "loss": 1.0767, + "learning_rate": 1.984354628422425e-05, + "loss": 1.4702, "step": 761 }, { - "epoch": 0.02159313100399558, + "epoch": 0.02981453947883246, "grad_norm": 0.0, - "learning_rate": 1.4390934844192637e-05, - "loss": 1.1501, + "learning_rate": 1.986962190352021e-05, + "loss": 1.3258, "step": 762 }, { - "epoch": 0.021621468446258042, + "epoch": 0.029853666171061898, "grad_norm": 0.0, - "learning_rate": 1.440982058545798e-05, - "loss": 1.1225, + "learning_rate": 1.9895697522816167e-05, + "loss": 1.3329, "step": 763 }, { - "epoch": 0.0216498058885205, + "epoch": 0.02989279286329134, "grad_norm": 0.0, - "learning_rate": 1.4428706326723325e-05, - "loss": 1.1545, + "learning_rate": 1.9921773142112127e-05, + "loss": 1.4747, "step": 764 }, { - "epoch": 0.021678143330782964, + "epoch": 0.029931919555520776, "grad_norm": 0.0, - "learning_rate": 1.444759206798867e-05, - "loss": 1.0387, + "learning_rate": 1.9947848761408084e-05, + "loss": 1.2937, "step": 765 }, { - "epoch": 0.021706480773045427, + "epoch": 0.029971046247750214, "grad_norm": 0.0, - "learning_rate": 1.4466477809254014e-05, - "loss": 1.1342, + "learning_rate": 1.9973924380704045e-05, + "loss": 1.2008, "step": 766 }, { - "epoch": 0.021734818215307886, + "epoch": 0.030010172939979655, "grad_norm": 0.0, - "learning_rate": 1.448536355051936e-05, - "loss": 1.2411, + "learning_rate": 2e-05, + "loss": 1.2762, "step": 767 }, { - "epoch": 0.021763155657570348, + "epoch": 0.030049299632209092, "grad_norm": 0.0, - "learning_rate": 1.4504249291784705e-05, - "loss": 1.2343, + "learning_rate": 1.9999999919706266e-05, + "loss": 1.254, "step": 768 }, { - "epoch": 0.02179149309983281, + "epoch": 0.030088426324438533, "grad_norm": 0.0, - "learning_rate": 1.4523135033050049e-05, - "loss": 1.0298, + "learning_rate": 1.9999999678825064e-05, + "loss": 1.401, "step": 769 }, { - "epoch": 0.02181983054209527, + "epoch": 0.03012755301666797, "grad_norm": 0.0, - "learning_rate": 1.4542020774315393e-05, - "loss": 1.1876, + "learning_rate": 1.9999999277356397e-05, + "loss": 1.299, "step": 770 }, { - "epoch": 0.021848167984357732, + "epoch": 0.03016667970889741, "grad_norm": 0.0, - "learning_rate": 1.4560906515580737e-05, - "loss": 1.257, + "learning_rate": 1.9999998715300272e-05, + "loss": 1.3342, "step": 771 }, { - "epoch": 0.021876505426620195, + "epoch": 0.03020580640112685, "grad_norm": 0.0, - "learning_rate": 1.4579792256846082e-05, - "loss": 1.1286, + "learning_rate": 1.99999979926567e-05, + "loss": 1.3817, "step": 772 }, { - "epoch": 0.021904842868882654, + "epoch": 0.030244933093356286, "grad_norm": 0.0, - "learning_rate": 1.4598677998111428e-05, - "loss": 1.2214, + "learning_rate": 1.999999710942569e-05, + "loss": 1.2819, "step": 773 }, { - "epoch": 0.021933180311145117, + "epoch": 0.030284059785585727, "grad_norm": 0.0, - "learning_rate": 1.4617563739376772e-05, - "loss": 1.2448, + "learning_rate": 1.9999996065607256e-05, + "loss": 1.3702, "step": 774 }, { - "epoch": 0.021961517753407576, + "epoch": 0.030323186477815164, "grad_norm": 0.0, - "learning_rate": 1.4636449480642117e-05, - "loss": 1.2062, + "learning_rate": 1.999999486120142e-05, + "loss": 1.2876, "step": 775 }, { - "epoch": 0.02198985519567004, + "epoch": 0.030362313170044605, "grad_norm": 0.0, - "learning_rate": 1.4655335221907461e-05, - "loss": 1.1511, + "learning_rate": 1.999999349620819e-05, + "loss": 1.506, "step": 776 }, { - "epoch": 0.0220181926379325, + "epoch": 0.030401439862274043, "grad_norm": 0.0, - "learning_rate": 1.4674220963172805e-05, - "loss": 1.2255, + "learning_rate": 1.9999991970627597e-05, + "loss": 1.3424, "step": 777 }, { - "epoch": 0.02204653008019496, + "epoch": 0.030440566554503484, "grad_norm": 0.0, - "learning_rate": 1.4693106704438152e-05, - "loss": 1.186, + "learning_rate": 1.999999028445967e-05, + "loss": 1.408, "step": 778 }, { - "epoch": 0.022074867522457423, + "epoch": 0.03047969324673292, "grad_norm": 0.0, - "learning_rate": 1.4711992445703496e-05, - "loss": 1.1645, + "learning_rate": 1.9999988437704427e-05, + "loss": 1.2872, "step": 779 }, { - "epoch": 0.022103204964719885, + "epoch": 0.030518819938962362, "grad_norm": 0.0, - "learning_rate": 1.473087818696884e-05, - "loss": 1.1131, + "learning_rate": 1.9999986430361896e-05, + "loss": 1.3309, "step": 780 }, { - "epoch": 0.022131542406982344, + "epoch": 0.0305579466311918, "grad_norm": 0.0, - "learning_rate": 1.4749763928234184e-05, - "loss": 0.9958, + "learning_rate": 1.9999984262432116e-05, + "loss": 1.4011, "step": 781 }, { - "epoch": 0.022159879849244807, + "epoch": 0.030597073323421237, "grad_norm": 0.0, - "learning_rate": 1.476864966949953e-05, - "loss": 1.1723, + "learning_rate": 1.999998193391512e-05, + "loss": 1.4145, "step": 782 }, { - "epoch": 0.02218821729150727, + "epoch": 0.030636200015650678, "grad_norm": 0.0, - "learning_rate": 1.4787535410764873e-05, - "loss": 1.0923, + "learning_rate": 1.9999979444810947e-05, + "loss": 1.2733, "step": 783 }, { - "epoch": 0.02221655473376973, + "epoch": 0.030675326707880115, "grad_norm": 0.0, - "learning_rate": 1.4806421152030219e-05, - "loss": 1.1617, + "learning_rate": 1.9999976795119632e-05, + "loss": 1.1775, "step": 784 }, { - "epoch": 0.02224489217603219, + "epoch": 0.030714453400109556, "grad_norm": 0.0, - "learning_rate": 1.4825306893295564e-05, - "loss": 1.193, + "learning_rate": 1.9999973984841218e-05, + "loss": 1.3878, "step": 785 }, { - "epoch": 0.022273229618294654, + "epoch": 0.030753580092338993, "grad_norm": 0.0, - "learning_rate": 1.4844192634560908e-05, - "loss": 1.136, + "learning_rate": 1.9999971013975758e-05, + "loss": 1.282, "step": 786 }, { - "epoch": 0.022301567060557113, + "epoch": 0.030792706784568434, "grad_norm": 0.0, - "learning_rate": 1.4863078375826252e-05, - "loss": 1.1424, + "learning_rate": 1.9999967882523294e-05, + "loss": 1.2957, "step": 787 }, { - "epoch": 0.022329904502819575, + "epoch": 0.03083183347679787, "grad_norm": 0.0, - "learning_rate": 1.4881964117091596e-05, - "loss": 1.1222, + "learning_rate": 1.9999964590483872e-05, + "loss": 1.3677, "step": 788 }, { - "epoch": 0.022358241945082038, + "epoch": 0.03087096016902731, "grad_norm": 0.0, - "learning_rate": 1.4900849858356943e-05, - "loss": 1.0813, + "learning_rate": 1.999996113785755e-05, + "loss": 1.308, "step": 789 }, { - "epoch": 0.022386579387344497, + "epoch": 0.03091008686125675, "grad_norm": 0.0, - "learning_rate": 1.4919735599622287e-05, - "loss": 1.1384, + "learning_rate": 1.9999957524644385e-05, + "loss": 1.2306, "step": 790 }, { - "epoch": 0.02241491682960696, + "epoch": 0.030949213553486187, "grad_norm": 0.0, - "learning_rate": 1.493862134088763e-05, - "loss": 1.1887, + "learning_rate": 1.999995375084443e-05, + "loss": 1.3874, "step": 791 }, { - "epoch": 0.022443254271869422, + "epoch": 0.030988340245715628, "grad_norm": 0.0, - "learning_rate": 1.4957507082152976e-05, - "loss": 1.2347, + "learning_rate": 1.999994981645775e-05, + "loss": 1.4169, "step": 792 }, { - "epoch": 0.02247159171413188, + "epoch": 0.031027466937945065, "grad_norm": 0.0, - "learning_rate": 1.497639282341832e-05, - "loss": 1.1316, + "learning_rate": 1.9999945721484407e-05, + "loss": 1.274, "step": 793 }, { - "epoch": 0.022499929156394344, + "epoch": 0.031066593630174506, "grad_norm": 0.0, - "learning_rate": 1.4995278564683664e-05, - "loss": 1.3047, + "learning_rate": 1.999994146592447e-05, + "loss": 1.3195, "step": 794 }, { - "epoch": 0.022528266598656806, + "epoch": 0.031105720322403944, "grad_norm": 0.0, - "learning_rate": 1.5014164305949011e-05, - "loss": 1.1646, + "learning_rate": 1.9999937049777998e-05, + "loss": 1.3754, "step": 795 }, { - "epoch": 0.022556604040919265, + "epoch": 0.031144847014633385, "grad_norm": 0.0, - "learning_rate": 1.5033050047214355e-05, - "loss": 1.2541, + "learning_rate": 1.999993247304507e-05, + "loss": 1.3586, "step": 796 }, { - "epoch": 0.022584941483181728, + "epoch": 0.031183973706862822, "grad_norm": 0.0, - "learning_rate": 1.5051935788479699e-05, - "loss": 1.1222, + "learning_rate": 1.9999927735725756e-05, + "loss": 1.2333, "step": 797 }, { - "epoch": 0.02261327892544419, + "epoch": 0.03122310039909226, "grad_norm": 0.0, - "learning_rate": 1.5070821529745043e-05, - "loss": 1.2504, + "learning_rate": 1.9999922837820134e-05, + "loss": 1.2209, "step": 798 }, { - "epoch": 0.02264161636770665, + "epoch": 0.0312622270913217, "grad_norm": 0.0, - "learning_rate": 1.5089707271010388e-05, - "loss": 1.2826, + "learning_rate": 1.999991777932828e-05, + "loss": 1.2996, "step": 799 }, { - "epoch": 0.022669953809969112, + "epoch": 0.03130135378355114, "grad_norm": 0.0, - "learning_rate": 1.5108593012275734e-05, - "loss": 1.2552, + "learning_rate": 1.999991256025028e-05, + "loss": 1.343, "step": 800 }, { - "epoch": 0.022698291252231575, + "epoch": 0.03134048047578058, "grad_norm": 0.0, - "learning_rate": 1.5127478753541078e-05, - "loss": 1.1766, + "learning_rate": 1.9999907180586212e-05, + "loss": 1.3289, "step": 801 }, { - "epoch": 0.022726628694494034, + "epoch": 0.03137960716801002, "grad_norm": 0.0, - "learning_rate": 1.5146364494806423e-05, - "loss": 1.2169, + "learning_rate": 1.999990164033617e-05, + "loss": 1.2967, "step": 802 }, { - "epoch": 0.022754966136756496, + "epoch": 0.03141873386023945, "grad_norm": 0.0, - "learning_rate": 1.5165250236071767e-05, - "loss": 1.0581, + "learning_rate": 1.9999895939500235e-05, + "loss": 1.2498, "step": 803 }, { - "epoch": 0.02278330357901896, + "epoch": 0.031457860552468894, "grad_norm": 0.0, - "learning_rate": 1.5184135977337111e-05, - "loss": 1.1857, + "learning_rate": 1.99998900780785e-05, + "loss": 1.4691, "step": 804 }, { - "epoch": 0.022811641021281418, + "epoch": 0.031496987244698335, "grad_norm": 0.0, - "learning_rate": 1.5203021718602455e-05, - "loss": 1.2317, + "learning_rate": 1.9999884056071065e-05, + "loss": 1.3203, "step": 805 }, { - "epoch": 0.02283997846354388, + "epoch": 0.03153611393692777, "grad_norm": 0.0, - "learning_rate": 1.5221907459867802e-05, - "loss": 1.2722, + "learning_rate": 1.999987787347802e-05, + "loss": 1.3314, "step": 806 }, { - "epoch": 0.022868315905806343, + "epoch": 0.03157524062915721, "grad_norm": 0.0, - "learning_rate": 1.5240793201133146e-05, - "loss": 1.1079, + "learning_rate": 1.9999871530299466e-05, + "loss": 1.4283, "step": 807 }, { - "epoch": 0.022896653348068802, + "epoch": 0.03161436732138665, "grad_norm": 0.0, - "learning_rate": 1.525967894239849e-05, - "loss": 1.2094, + "learning_rate": 1.999986502653551e-05, + "loss": 1.2816, "step": 808 }, { - "epoch": 0.022924990790331265, + "epoch": 0.03165349401361609, "grad_norm": 0.0, - "learning_rate": 1.5278564683663834e-05, - "loss": 1.1315, + "learning_rate": 1.9999858362186247e-05, + "loss": 1.3999, "step": 809 }, { - "epoch": 0.022953328232593728, + "epoch": 0.031692620705845526, "grad_norm": 0.0, - "learning_rate": 1.529745042492918e-05, - "loss": 1.1677, + "learning_rate": 1.999985153725179e-05, + "loss": 1.3618, "step": 810 }, { - "epoch": 0.022981665674856187, + "epoch": 0.031731747398074966, "grad_norm": 0.0, - "learning_rate": 1.5316336166194525e-05, - "loss": 1.2019, + "learning_rate": 1.999984455173225e-05, + "loss": 1.193, "step": 811 }, { - "epoch": 0.02301000311711865, + "epoch": 0.03177087409030441, "grad_norm": 0.0, - "learning_rate": 1.533522190745987e-05, - "loss": 1.2004, + "learning_rate": 1.9999837405627737e-05, + "loss": 1.2946, "step": 812 }, { - "epoch": 0.023038340559381112, + "epoch": 0.03181000078253384, "grad_norm": 0.0, - "learning_rate": 1.5354107648725213e-05, - "loss": 1.1124, + "learning_rate": 1.9999830098938364e-05, + "loss": 1.3202, "step": 813 }, { - "epoch": 0.02306667800164357, + "epoch": 0.03184912747476328, "grad_norm": 0.0, - "learning_rate": 1.5372993389990558e-05, - "loss": 1.2642, + "learning_rate": 1.999982263166425e-05, + "loss": 1.3276, "step": 814 }, { - "epoch": 0.023095015443906033, + "epoch": 0.03188825416699272, "grad_norm": 0.0, - "learning_rate": 1.5391879131255904e-05, - "loss": 1.1183, + "learning_rate": 1.9999815003805518e-05, + "loss": 1.2808, "step": 815 }, { - "epoch": 0.023123352886168496, + "epoch": 0.031927380859222164, "grad_norm": 0.0, - "learning_rate": 1.5410764872521246e-05, - "loss": 1.025, + "learning_rate": 1.9999807215362284e-05, + "loss": 1.2472, "step": 816 }, { - "epoch": 0.023151690328430955, + "epoch": 0.0319665075514516, "grad_norm": 0.0, - "learning_rate": 1.5429650613786595e-05, - "loss": 1.1365, + "learning_rate": 1.9999799266334682e-05, + "loss": 1.2759, "step": 817 }, { - "epoch": 0.023180027770693418, + "epoch": 0.03200563424368104, "grad_norm": 0.0, - "learning_rate": 1.5448536355051937e-05, - "loss": 1.1144, + "learning_rate": 1.9999791156722827e-05, + "loss": 1.329, "step": 818 }, { - "epoch": 0.02320836521295588, + "epoch": 0.03204476093591048, "grad_norm": 0.0, - "learning_rate": 1.5467422096317282e-05, - "loss": 1.0889, + "learning_rate": 1.9999782886526863e-05, + "loss": 1.2081, "step": 819 }, { - "epoch": 0.02323670265521834, + "epoch": 0.03208388762813992, "grad_norm": 0.0, - "learning_rate": 1.5486307837582625e-05, - "loss": 1.1187, + "learning_rate": 1.999977445574691e-05, + "loss": 1.2971, "step": 820 }, { - "epoch": 0.023265040097480802, + "epoch": 0.032123014320369354, "grad_norm": 0.0, - "learning_rate": 1.550519357884797e-05, - "loss": 1.1475, + "learning_rate": 1.9999765864383115e-05, + "loss": 1.2996, "step": 821 }, { - "epoch": 0.023293377539743264, + "epoch": 0.032162141012598795, "grad_norm": 0.0, - "learning_rate": 1.5524079320113316e-05, - "loss": 1.2467, + "learning_rate": 1.9999757112435608e-05, + "loss": 1.433, "step": 822 }, { - "epoch": 0.023321714982005724, + "epoch": 0.032201267704828236, "grad_norm": 0.0, - "learning_rate": 1.554296506137866e-05, - "loss": 1.1599, + "learning_rate": 1.9999748199904535e-05, + "loss": 1.385, "step": 823 }, { - "epoch": 0.023350052424268186, + "epoch": 0.03224039439705767, "grad_norm": 0.0, - "learning_rate": 1.5561850802644007e-05, - "loss": 1.1513, + "learning_rate": 1.9999739126790032e-05, + "loss": 1.1989, "step": 824 }, { - "epoch": 0.023378389866530645, + "epoch": 0.03227952108928711, "grad_norm": 0.0, - "learning_rate": 1.558073654390935e-05, - "loss": 1.1563, + "learning_rate": 1.999972989309225e-05, + "loss": 1.4124, "step": 825 }, { - "epoch": 0.023406727308793108, + "epoch": 0.03231864778151655, "grad_norm": 0.0, - "learning_rate": 1.5599622285174695e-05, - "loss": 1.1157, + "learning_rate": 1.9999720498811335e-05, + "loss": 1.2709, "step": 826 }, { - "epoch": 0.02343506475105557, + "epoch": 0.03235777447374599, "grad_norm": 0.0, - "learning_rate": 1.5618508026440037e-05, - "loss": 1.031, + "learning_rate": 1.9999710943947447e-05, + "loss": 1.3668, "step": 827 }, { - "epoch": 0.02346340219331803, + "epoch": 0.03239690116597543, "grad_norm": 0.0, - "learning_rate": 1.5637393767705386e-05, - "loss": 1.0976, + "learning_rate": 1.9999701228500724e-05, + "loss": 1.4264, "step": 828 }, { - "epoch": 0.023491739635580492, + "epoch": 0.03243602785820487, "grad_norm": 0.0, - "learning_rate": 1.5656279508970728e-05, - "loss": 1.1357, + "learning_rate": 1.999969135247133e-05, + "loss": 1.3146, "step": 829 }, { - "epoch": 0.023520077077842955, + "epoch": 0.03247515455043431, "grad_norm": 0.0, - "learning_rate": 1.5675165250236073e-05, - "loss": 1.2262, + "learning_rate": 1.999968131585943e-05, + "loss": 1.2606, "step": 830 }, { - "epoch": 0.023548414520105414, + "epoch": 0.03251428124266374, "grad_norm": 0.0, - "learning_rate": 1.5694050991501416e-05, - "loss": 1.2255, + "learning_rate": 1.999967111866517e-05, + "loss": 1.327, "step": 831 }, { - "epoch": 0.023576751962367876, + "epoch": 0.03255340793489318, "grad_norm": 0.0, - "learning_rate": 1.571293673276676e-05, - "loss": 1.1171, + "learning_rate": 1.9999660760888722e-05, + "loss": 1.3347, "step": 832 }, { - "epoch": 0.02360508940463034, + "epoch": 0.032592534627122624, "grad_norm": 0.0, - "learning_rate": 1.5731822474032107e-05, - "loss": 1.21, + "learning_rate": 1.9999650242530257e-05, + "loss": 1.4439, "step": 833 }, { - "epoch": 0.023633426846892798, + "epoch": 0.032631661319352065, "grad_norm": 0.0, - "learning_rate": 1.5750708215297452e-05, - "loss": 1.2825, + "learning_rate": 1.999963956358994e-05, + "loss": 1.1837, "step": 834 }, { - "epoch": 0.02366176428915526, + "epoch": 0.0326707880115815, "grad_norm": 0.0, - "learning_rate": 1.5769593956562798e-05, - "loss": 1.155, + "learning_rate": 1.999962872406794e-05, + "loss": 1.2983, "step": 835 }, { - "epoch": 0.023690101731417723, + "epoch": 0.03270991470381094, "grad_norm": 0.0, - "learning_rate": 1.578847969782814e-05, - "loss": 1.1092, + "learning_rate": 1.9999617723964434e-05, + "loss": 1.2805, "step": 836 }, { - "epoch": 0.023718439173680182, + "epoch": 0.03274904139604038, "grad_norm": 0.0, - "learning_rate": 1.5807365439093485e-05, - "loss": 1.0843, + "learning_rate": 1.9999606563279594e-05, + "loss": 1.2205, "step": 837 }, { - "epoch": 0.023746776615942645, + "epoch": 0.032788168088269815, "grad_norm": 0.0, - "learning_rate": 1.582625118035883e-05, - "loss": 1.2233, + "learning_rate": 1.9999595242013604e-05, + "loss": 1.2717, "step": 838 }, { - "epoch": 0.023775114058205107, + "epoch": 0.032827294780499255, "grad_norm": 0.0, - "learning_rate": 1.5845136921624177e-05, - "loss": 1.186, + "learning_rate": 1.9999583760166646e-05, + "loss": 1.3795, "step": 839 }, { - "epoch": 0.023803451500467566, + "epoch": 0.032866421472728696, "grad_norm": 0.0, - "learning_rate": 1.586402266288952e-05, - "loss": 1.288, + "learning_rate": 1.9999572117738902e-05, + "loss": 1.4551, "step": 840 }, { - "epoch": 0.02383178894273003, + "epoch": 0.03290554816495814, "grad_norm": 0.0, - "learning_rate": 1.5882908404154864e-05, - "loss": 1.2964, + "learning_rate": 1.9999560314730563e-05, + "loss": 1.2886, "step": 841 }, { - "epoch": 0.02386012638499249, + "epoch": 0.03294467485718757, "grad_norm": 0.0, - "learning_rate": 1.590179414542021e-05, - "loss": 1.156, + "learning_rate": 1.9999548351141813e-05, + "loss": 1.2686, "step": 842 }, { - "epoch": 0.02388846382725495, + "epoch": 0.03298380154941701, "grad_norm": 0.0, - "learning_rate": 1.5920679886685552e-05, - "loss": 1.2202, + "learning_rate": 1.9999536226972845e-05, + "loss": 1.2619, "step": 843 }, { - "epoch": 0.023916801269517413, + "epoch": 0.03302292824164645, "grad_norm": 0.0, - "learning_rate": 1.5939565627950898e-05, - "loss": 1.3331, + "learning_rate": 1.999952394222386e-05, + "loss": 1.26, "step": 844 }, { - "epoch": 0.023945138711779876, + "epoch": 0.03306205493387589, "grad_norm": 0.0, - "learning_rate": 1.5958451369216243e-05, - "loss": 1.1904, + "learning_rate": 1.9999511496895047e-05, + "loss": 1.4089, "step": 845 }, { - "epoch": 0.023973476154042335, + "epoch": 0.03310118162610533, "grad_norm": 0.0, - "learning_rate": 1.597733711048159e-05, - "loss": 1.1658, + "learning_rate": 1.999949889098661e-05, + "loss": 1.2825, "step": 846 }, { - "epoch": 0.024001813596304798, + "epoch": 0.03314030831833477, "grad_norm": 0.0, - "learning_rate": 1.599622285174693e-05, - "loss": 1.1521, + "learning_rate": 1.999948612449875e-05, + "loss": 1.3613, "step": 847 }, { - "epoch": 0.02403015103856726, + "epoch": 0.03317943501056421, "grad_norm": 0.0, - "learning_rate": 1.6015108593012276e-05, - "loss": 1.1029, + "learning_rate": 1.9999473197431677e-05, + "loss": 1.2984, "step": 848 }, { - "epoch": 0.02405848848082972, + "epoch": 0.03321856170279364, "grad_norm": 0.0, - "learning_rate": 1.6033994334277622e-05, - "loss": 1.1145, + "learning_rate": 1.999946010978559e-05, + "loss": 1.3828, "step": 849 }, { - "epoch": 0.024086825923092182, + "epoch": 0.033257688395023084, "grad_norm": 0.0, - "learning_rate": 1.6052880075542968e-05, - "loss": 1.0924, + "learning_rate": 1.9999446861560704e-05, + "loss": 1.3482, "step": 850 }, { - "epoch": 0.024115163365354644, + "epoch": 0.033296815087252525, "grad_norm": 0.0, - "learning_rate": 1.607176581680831e-05, - "loss": 1.2271, + "learning_rate": 1.9999433452757234e-05, + "loss": 1.3253, "step": 851 }, { - "epoch": 0.024143500807617103, + "epoch": 0.033335941779481966, "grad_norm": 0.0, - "learning_rate": 1.6090651558073655e-05, - "loss": 1.0907, + "learning_rate": 1.9999419883375393e-05, + "loss": 1.2423, "step": 852 }, { - "epoch": 0.024171838249879566, + "epoch": 0.0333750684717114, "grad_norm": 0.0, - "learning_rate": 1.6109537299339e-05, - "loss": 1.1396, + "learning_rate": 1.9999406153415397e-05, + "loss": 1.4014, "step": 853 }, { - "epoch": 0.02420017569214203, + "epoch": 0.03341419516394084, "grad_norm": 0.0, - "learning_rate": 1.6128423040604343e-05, - "loss": 1.2059, + "learning_rate": 1.999939226287747e-05, + "loss": 1.2924, "step": 854 }, { - "epoch": 0.024228513134404488, + "epoch": 0.03345332185617028, "grad_norm": 0.0, - "learning_rate": 1.6147308781869692e-05, - "loss": 1.2788, + "learning_rate": 1.999937821176183e-05, + "loss": 1.4016, "step": 855 }, { - "epoch": 0.02425685057666695, + "epoch": 0.033492448548399716, "grad_norm": 0.0, - "learning_rate": 1.6166194523135034e-05, - "loss": 1.0941, + "learning_rate": 1.9999364000068703e-05, + "loss": 1.2872, "step": 856 }, { - "epoch": 0.024285188018929413, + "epoch": 0.033531575240629156, "grad_norm": 0.0, - "learning_rate": 1.618508026440038e-05, - "loss": 1.1942, + "learning_rate": 1.9999349627798324e-05, + "loss": 1.2828, "step": 857 }, { - "epoch": 0.024313525461191872, + "epoch": 0.0335707019328586, "grad_norm": 0.0, - "learning_rate": 1.6203966005665722e-05, - "loss": 1.2222, + "learning_rate": 1.9999335094950922e-05, + "loss": 1.3521, "step": 858 }, { - "epoch": 0.024341862903454334, + "epoch": 0.03360982862508804, "grad_norm": 0.0, - "learning_rate": 1.6222851746931067e-05, - "loss": 1.14, + "learning_rate": 1.9999320401526727e-05, + "loss": 1.4202, "step": 859 }, { - "epoch": 0.024370200345716797, + "epoch": 0.03364895531731747, "grad_norm": 0.0, - "learning_rate": 1.6241737488196413e-05, - "loss": 1.207, + "learning_rate": 1.9999305547525977e-05, + "loss": 1.3143, "step": 860 }, { - "epoch": 0.024398537787979256, + "epoch": 0.03368808200954691, "grad_norm": 0.0, - "learning_rate": 1.626062322946176e-05, - "loss": 1.2058, + "learning_rate": 1.9999290532948908e-05, + "loss": 1.2248, "step": 861 }, { - "epoch": 0.02442687523024172, + "epoch": 0.033727208701776354, "grad_norm": 0.0, - "learning_rate": 1.6279508970727104e-05, - "loss": 1.1768, + "learning_rate": 1.999927535779576e-05, + "loss": 1.3083, "step": 862 }, { - "epoch": 0.02445521267250418, + "epoch": 0.03376633539400579, "grad_norm": 0.0, - "learning_rate": 1.6298394711992446e-05, - "loss": 1.1348, + "learning_rate": 1.9999260022066784e-05, + "loss": 1.3148, "step": 863 }, { - "epoch": 0.02448355011476664, + "epoch": 0.03380546208623523, "grad_norm": 0.0, - "learning_rate": 1.631728045325779e-05, - "loss": 1.208, + "learning_rate": 1.999924452576222e-05, + "loss": 1.2968, "step": 864 }, { - "epoch": 0.024511887557029103, + "epoch": 0.03384458877846467, "grad_norm": 0.0, - "learning_rate": 1.6336166194523134e-05, - "loss": 1.2534, + "learning_rate": 1.999922886888232e-05, + "loss": 1.2639, "step": 865 }, { - "epoch": 0.024540224999291566, + "epoch": 0.03388371547069411, "grad_norm": 0.0, - "learning_rate": 1.6355051935788483e-05, - "loss": 1.1918, + "learning_rate": 1.9999213051427336e-05, + "loss": 1.2764, "step": 866 }, { - "epoch": 0.024568562441554025, + "epoch": 0.033922842162923544, "grad_norm": 0.0, - "learning_rate": 1.6373937677053825e-05, - "loss": 1.22, + "learning_rate": 1.9999197073397517e-05, + "loss": 1.4142, "step": 867 }, { - "epoch": 0.024596899883816487, + "epoch": 0.033961968855152985, "grad_norm": 0.0, - "learning_rate": 1.639282341831917e-05, - "loss": 1.1084, + "learning_rate": 1.999918093479312e-05, + "loss": 1.272, "step": 868 }, { - "epoch": 0.02462523732607895, + "epoch": 0.034001095547382426, "grad_norm": 0.0, - "learning_rate": 1.6411709159584516e-05, - "loss": 1.2494, + "learning_rate": 1.9999164635614413e-05, + "loss": 1.2815, "step": 869 }, { - "epoch": 0.02465357476834141, + "epoch": 0.03404022223961186, "grad_norm": 0.0, - "learning_rate": 1.6430594900849858e-05, - "loss": 0.9888, + "learning_rate": 1.9999148175861646e-05, + "loss": 1.2863, "step": 870 }, { - "epoch": 0.02468191221060387, + "epoch": 0.0340793489318413, "grad_norm": 0.0, - "learning_rate": 1.6449480642115204e-05, - "loss": 1.1365, + "learning_rate": 1.999913155553509e-05, + "loss": 1.3367, "step": 871 }, { - "epoch": 0.024710249652866334, + "epoch": 0.03411847562407074, "grad_norm": 0.0, - "learning_rate": 1.646836638338055e-05, - "loss": 1.119, + "learning_rate": 1.9999114774635013e-05, + "loss": 1.3163, "step": 872 }, { - "epoch": 0.024738587095128793, + "epoch": 0.03415760231630018, "grad_norm": 0.0, - "learning_rate": 1.6487252124645895e-05, - "loss": 1.0938, + "learning_rate": 1.9999097833161683e-05, + "loss": 1.3362, "step": 873 }, { - "epoch": 0.024766924537391256, + "epoch": 0.03419672900852962, "grad_norm": 0.0, - "learning_rate": 1.6506137865911237e-05, - "loss": 1.2347, + "learning_rate": 1.999908073111537e-05, + "loss": 1.3036, "step": 874 }, { - "epoch": 0.024795261979653715, + "epoch": 0.03423585570075906, "grad_norm": 0.0, - "learning_rate": 1.6525023607176583e-05, - "loss": 1.1958, + "learning_rate": 1.999906346849635e-05, + "loss": 1.3046, "step": 875 }, { - "epoch": 0.024823599421916177, + "epoch": 0.0342749823929885, "grad_norm": 0.0, - "learning_rate": 1.6543909348441928e-05, - "loss": 1.2559, + "learning_rate": 1.99990460453049e-05, + "loss": 1.3578, "step": 876 }, { - "epoch": 0.02485193686417864, + "epoch": 0.03431410908521793, "grad_norm": 0.0, - "learning_rate": 1.6562795089707274e-05, - "loss": 1.1729, + "learning_rate": 1.99990284615413e-05, + "loss": 1.4672, "step": 877 }, { - "epoch": 0.0248802743064411, + "epoch": 0.03435323577744737, "grad_norm": 0.0, - "learning_rate": 1.6581680830972616e-05, - "loss": 0.998, + "learning_rate": 1.9999010717205832e-05, + "loss": 1.2216, "step": 878 }, { - "epoch": 0.02490861174870356, + "epoch": 0.034392362469676814, "grad_norm": 0.0, - "learning_rate": 1.660056657223796e-05, - "loss": 1.1483, + "learning_rate": 1.9998992812298783e-05, + "loss": 1.2873, "step": 879 }, { - "epoch": 0.024936949190966024, + "epoch": 0.034431489161906255, "grad_norm": 0.0, - "learning_rate": 1.6619452313503307e-05, - "loss": 1.0743, + "learning_rate": 1.999897474682044e-05, + "loss": 1.2637, "step": 880 }, { - "epoch": 0.024965286633228483, + "epoch": 0.03447061585413569, "grad_norm": 0.0, - "learning_rate": 1.663833805476865e-05, - "loss": 1.1937, + "learning_rate": 1.9998956520771088e-05, + "loss": 1.2993, "step": 881 }, { - "epoch": 0.024993624075490946, + "epoch": 0.03450974254636513, "grad_norm": 0.0, - "learning_rate": 1.6657223796033998e-05, - "loss": 1.1681, + "learning_rate": 1.9998938134151028e-05, + "loss": 1.4233, "step": 882 }, { - "epoch": 0.02502196151775341, + "epoch": 0.03454886923859457, "grad_norm": 0.0, - "learning_rate": 1.667610953729934e-05, - "loss": 1.1093, + "learning_rate": 1.9998919586960545e-05, + "loss": 1.2816, "step": 883 }, { - "epoch": 0.025050298960015868, + "epoch": 0.03458799593082401, "grad_norm": 0.0, - "learning_rate": 1.6694995278564686e-05, - "loss": 1.1331, + "learning_rate": 1.9998900879199948e-05, + "loss": 1.3059, "step": 884 }, { - "epoch": 0.02507863640227833, + "epoch": 0.034627122623053445, "grad_norm": 0.0, - "learning_rate": 1.6713881019830028e-05, - "loss": 1.1682, + "learning_rate": 1.999888201086953e-05, + "loss": 1.2919, "step": 885 }, { - "epoch": 0.025106973844540793, + "epoch": 0.034666249315282886, "grad_norm": 0.0, - "learning_rate": 1.6732766761095374e-05, - "loss": 1.1375, + "learning_rate": 1.9998862981969597e-05, + "loss": 1.2939, "step": 886 }, { - "epoch": 0.025135311286803252, + "epoch": 0.03470537600751233, "grad_norm": 0.0, - "learning_rate": 1.675165250236072e-05, - "loss": 1.0139, + "learning_rate": 1.9998843792500454e-05, + "loss": 1.3831, "step": 887 }, { - "epoch": 0.025163648729065714, + "epoch": 0.03474450269974176, "grad_norm": 0.0, - "learning_rate": 1.6770538243626065e-05, - "loss": 1.2227, + "learning_rate": 1.9998824442462407e-05, + "loss": 1.3323, "step": 888 }, { - "epoch": 0.025191986171328177, + "epoch": 0.0347836293919712, "grad_norm": 0.0, - "learning_rate": 1.678942398489141e-05, - "loss": 1.2067, + "learning_rate": 1.9998804931855772e-05, + "loss": 1.4501, "step": 889 }, { - "epoch": 0.025220323613590636, + "epoch": 0.03482275608420064, "grad_norm": 0.0, - "learning_rate": 1.6808309726156752e-05, - "loss": 1.0951, + "learning_rate": 1.9998785260680855e-05, + "loss": 1.238, "step": 890 }, { - "epoch": 0.0252486610558531, + "epoch": 0.034861882776430084, "grad_norm": 0.0, - "learning_rate": 1.6827195467422098e-05, - "loss": 1.2504, + "learning_rate": 1.999876542893798e-05, + "loss": 1.3301, "step": 891 }, { - "epoch": 0.02527699849811556, + "epoch": 0.03490100946865952, "grad_norm": 0.0, - "learning_rate": 1.684608120868744e-05, - "loss": 0.9901, + "learning_rate": 1.9998745436627458e-05, + "loss": 1.3508, "step": 892 }, { - "epoch": 0.02530533594037802, + "epoch": 0.03494013616088896, "grad_norm": 0.0, - "learning_rate": 1.686496694995279e-05, - "loss": 1.2271, + "learning_rate": 1.9998725283749617e-05, + "loss": 1.2231, "step": 893 }, { - "epoch": 0.025333673382640483, + "epoch": 0.0349792628531184, "grad_norm": 0.0, - "learning_rate": 1.688385269121813e-05, - "loss": 1.0905, + "learning_rate": 1.9998704970304772e-05, + "loss": 1.4127, "step": 894 }, { - "epoch": 0.025362010824902945, + "epoch": 0.03501838954534783, "grad_norm": 0.0, - "learning_rate": 1.6902738432483477e-05, - "loss": 1.1918, + "learning_rate": 1.9998684496293258e-05, + "loss": 1.3048, "step": 895 }, { - "epoch": 0.025390348267165404, + "epoch": 0.035057516237577274, "grad_norm": 0.0, - "learning_rate": 1.6921624173748822e-05, - "loss": 1.1195, + "learning_rate": 1.9998663861715397e-05, + "loss": 1.2628, "step": 896 }, { - "epoch": 0.025418685709427867, + "epoch": 0.035096642929806715, "grad_norm": 0.0, - "learning_rate": 1.6940509915014164e-05, - "loss": 1.0983, + "learning_rate": 1.9998643066571527e-05, + "loss": 1.2761, "step": 897 }, { - "epoch": 0.02544702315169033, + "epoch": 0.035135769622036156, "grad_norm": 0.0, - "learning_rate": 1.695939565627951e-05, - "loss": 1.1206, + "learning_rate": 1.9998622110861978e-05, + "loss": 1.3774, "step": 898 }, { - "epoch": 0.02547536059395279, + "epoch": 0.03517489631426559, "grad_norm": 0.0, - "learning_rate": 1.6978281397544856e-05, - "loss": 1.2123, + "learning_rate": 1.9998600994587085e-05, + "loss": 1.3322, "step": 899 }, { - "epoch": 0.02550369803621525, + "epoch": 0.03521402300649503, "grad_norm": 0.0, - "learning_rate": 1.69971671388102e-05, - "loss": 1.1556, + "learning_rate": 1.999857971774719e-05, + "loss": 1.3008, "step": 900 }, { - "epoch": 0.025532035478477714, + "epoch": 0.03525314969872447, "grad_norm": 0.0, - "learning_rate": 1.7016052880075543e-05, - "loss": 1.1118, + "learning_rate": 1.9998558280342634e-05, + "loss": 1.3599, "step": 901 }, { - "epoch": 0.025560372920740173, + "epoch": 0.035292276390953906, "grad_norm": 0.0, - "learning_rate": 1.703493862134089e-05, - "loss": 1.2187, + "learning_rate": 1.999853668237376e-05, + "loss": 1.2042, "step": 902 }, { - "epoch": 0.025588710363002636, + "epoch": 0.035331403083183346, "grad_norm": 0.0, - "learning_rate": 1.7053824362606234e-05, - "loss": 1.1808, + "learning_rate": 1.9998514923840916e-05, + "loss": 1.4352, "step": 903 }, { - "epoch": 0.025617047805265098, + "epoch": 0.03537052977541279, "grad_norm": 0.0, - "learning_rate": 1.707271010387158e-05, - "loss": 1.1165, + "learning_rate": 1.999849300474445e-05, + "loss": 1.289, "step": 904 }, { - "epoch": 0.025645385247527557, + "epoch": 0.03540965646764223, "grad_norm": 0.0, - "learning_rate": 1.7091595845136922e-05, - "loss": 1.0584, + "learning_rate": 1.9998470925084715e-05, + "loss": 1.3278, "step": 905 }, { - "epoch": 0.02567372268979002, + "epoch": 0.03544878315987166, "grad_norm": 0.0, - "learning_rate": 1.7110481586402268e-05, - "loss": 1.0872, + "learning_rate": 1.999844868486207e-05, + "loss": 1.3103, "step": 906 }, { - "epoch": 0.025702060132052482, + "epoch": 0.0354879098521011, "grad_norm": 0.0, - "learning_rate": 1.7129367327667613e-05, - "loss": 1.1257, + "learning_rate": 1.9998426284076862e-05, + "loss": 1.3226, "step": 907 }, { - "epoch": 0.02573039757431494, + "epoch": 0.035527036544330544, "grad_norm": 0.0, - "learning_rate": 1.7148253068932955e-05, - "loss": 1.251, + "learning_rate": 1.999840372272946e-05, + "loss": 1.3013, "step": 908 }, { - "epoch": 0.025758735016577404, + "epoch": 0.03556616323655998, "grad_norm": 0.0, - "learning_rate": 1.7167138810198304e-05, - "loss": 1.1633, + "learning_rate": 1.9998381000820226e-05, + "loss": 1.2596, "step": 909 }, { - "epoch": 0.025787072458839867, + "epoch": 0.03560528992878942, "grad_norm": 0.0, - "learning_rate": 1.7186024551463646e-05, - "loss": 1.1605, + "learning_rate": 1.9998358118349513e-05, + "loss": 1.3431, "step": 910 }, { - "epoch": 0.025815409901102326, + "epoch": 0.03564441662101886, "grad_norm": 0.0, - "learning_rate": 1.7204910292728992e-05, - "loss": 1.2009, + "learning_rate": 1.9998335075317706e-05, + "loss": 1.4734, "step": 911 }, { - "epoch": 0.025843747343364788, + "epoch": 0.0356835433132483, "grad_norm": 0.0, - "learning_rate": 1.7223796033994334e-05, - "loss": 1.0993, + "learning_rate": 1.9998311871725162e-05, + "loss": 1.3673, "step": 912 }, { - "epoch": 0.02587208478562725, + "epoch": 0.035722670005477734, "grad_norm": 0.0, - "learning_rate": 1.724268177525968e-05, - "loss": 1.1467, + "learning_rate": 1.9998288507572258e-05, + "loss": 1.187, "step": 913 }, { - "epoch": 0.02590042222788971, + "epoch": 0.035761796697707175, "grad_norm": 0.0, - "learning_rate": 1.7261567516525025e-05, - "loss": 1.2084, + "learning_rate": 1.999826498285937e-05, + "loss": 1.2999, "step": 914 }, { - "epoch": 0.025928759670152172, + "epoch": 0.035800923389936616, "grad_norm": 0.0, - "learning_rate": 1.728045325779037e-05, - "loss": 1.1201, + "learning_rate": 1.9998241297586876e-05, + "loss": 1.3326, "step": 915 }, { - "epoch": 0.025957097112414635, + "epoch": 0.03584005008216606, "grad_norm": 0.0, - "learning_rate": 1.7299338999055716e-05, - "loss": 1.1795, + "learning_rate": 1.9998217451755154e-05, + "loss": 1.2893, "step": 916 }, { - "epoch": 0.025985434554677094, + "epoch": 0.03587917677439549, "grad_norm": 0.0, - "learning_rate": 1.731822474032106e-05, - "loss": 1.1232, + "learning_rate": 1.9998193445364586e-05, + "loss": 1.3876, "step": 917 }, { - "epoch": 0.026013771996939557, + "epoch": 0.03591830346662493, "grad_norm": 0.0, - "learning_rate": 1.7337110481586404e-05, - "loss": 1.1712, + "learning_rate": 1.9998169278415562e-05, + "loss": 1.295, "step": 918 }, { - "epoch": 0.02604210943920202, + "epoch": 0.03595743015885437, "grad_norm": 0.0, - "learning_rate": 1.7355996222851746e-05, - "loss": 1.1458, + "learning_rate": 1.9998144950908468e-05, + "loss": 1.3507, "step": 919 }, { - "epoch": 0.02607044688146448, + "epoch": 0.03599655685108381, "grad_norm": 0.0, - "learning_rate": 1.7374881964117095e-05, - "loss": 1.1636, + "learning_rate": 1.9998120462843694e-05, + "loss": 1.0994, "step": 920 }, { - "epoch": 0.02609878432372694, + "epoch": 0.03603568354331325, "grad_norm": 0.0, - "learning_rate": 1.7393767705382437e-05, - "loss": 1.2953, + "learning_rate": 1.9998095814221636e-05, + "loss": 1.3184, "step": 921 }, { - "epoch": 0.026127121765989404, + "epoch": 0.03607481023554269, "grad_norm": 0.0, - "learning_rate": 1.7412653446647783e-05, - "loss": 1.1941, + "learning_rate": 1.9998071005042683e-05, + "loss": 1.4352, "step": 922 }, { - "epoch": 0.026155459208251863, + "epoch": 0.03611393692777213, "grad_norm": 0.0, - "learning_rate": 1.743153918791313e-05, - "loss": 1.1356, + "learning_rate": 1.999804603530724e-05, + "loss": 1.2288, "step": 923 }, { - "epoch": 0.026183796650514325, + "epoch": 0.03615306362000156, "grad_norm": 0.0, - "learning_rate": 1.745042492917847e-05, - "loss": 1.2153, + "learning_rate": 1.9998020905015705e-05, + "loss": 1.2621, "step": 924 }, { - "epoch": 0.026212134092776784, + "epoch": 0.036192190312231004, "grad_norm": 0.0, - "learning_rate": 1.7469310670443816e-05, - "loss": 1.2854, + "learning_rate": 1.9997995614168486e-05, + "loss": 1.3243, "step": 925 }, { - "epoch": 0.026240471535039247, + "epoch": 0.036231317004460445, "grad_norm": 0.0, - "learning_rate": 1.7488196411709162e-05, - "loss": 1.1179, + "learning_rate": 1.9997970162765985e-05, + "loss": 1.3518, "step": 926 }, { - "epoch": 0.02626880897730171, + "epoch": 0.03627044369668988, "grad_norm": 0.0, - "learning_rate": 1.7507082152974507e-05, - "loss": 1.171, + "learning_rate": 1.999794455080861e-05, + "loss": 1.5003, "step": 927 }, { - "epoch": 0.02629714641956417, + "epoch": 0.03630957038891932, "grad_norm": 0.0, - "learning_rate": 1.752596789423985e-05, - "loss": 1.2213, + "learning_rate": 1.9997918778296772e-05, + "loss": 1.349, "step": 928 }, { - "epoch": 0.02632548386182663, + "epoch": 0.03634869708114876, "grad_norm": 0.0, - "learning_rate": 1.7544853635505195e-05, - "loss": 1.1704, + "learning_rate": 1.999789284523089e-05, + "loss": 1.208, "step": 929 }, { - "epoch": 0.026353821304089094, + "epoch": 0.0363878237733782, "grad_norm": 0.0, - "learning_rate": 1.756373937677054e-05, - "loss": 1.0771, + "learning_rate": 1.9997866751611373e-05, + "loss": 1.3976, "step": 930 }, { - "epoch": 0.026382158746351553, + "epoch": 0.036426950465607635, "grad_norm": 0.0, - "learning_rate": 1.7582625118035886e-05, - "loss": 1.2738, + "learning_rate": 1.9997840497438648e-05, + "loss": 1.3269, "step": 931 }, { - "epoch": 0.026410496188614015, + "epoch": 0.036466077157837076, "grad_norm": 0.0, - "learning_rate": 1.7601510859301228e-05, - "loss": 1.0872, + "learning_rate": 1.9997814082713128e-05, + "loss": 1.2702, "step": 932 }, { - "epoch": 0.026438833630876478, + "epoch": 0.03650520385006652, "grad_norm": 0.0, - "learning_rate": 1.7620396600566574e-05, - "loss": 1.1313, + "learning_rate": 1.9997787507435244e-05, + "loss": 1.2599, "step": 933 }, { - "epoch": 0.026467171073138937, + "epoch": 0.03654433054229595, "grad_norm": 0.0, - "learning_rate": 1.763928234183192e-05, - "loss": 1.1945, + "learning_rate": 1.9997760771605423e-05, + "loss": 1.2624, "step": 934 }, { - "epoch": 0.0264955085154014, + "epoch": 0.03658345723452539, "grad_norm": 0.0, - "learning_rate": 1.765816808309726e-05, - "loss": 1.2142, + "learning_rate": 1.9997733875224088e-05, + "loss": 1.4012, "step": 935 }, { - "epoch": 0.026523845957663862, + "epoch": 0.03662258392675483, "grad_norm": 0.0, - "learning_rate": 1.7677053824362607e-05, - "loss": 1.06, + "learning_rate": 1.999770681829168e-05, + "loss": 1.2995, "step": 936 }, { - "epoch": 0.02655218339992632, + "epoch": 0.036661710618984274, "grad_norm": 0.0, - "learning_rate": 1.7695939565627953e-05, - "loss": 1.0525, + "learning_rate": 1.999767960080862e-05, + "loss": 1.3032, "step": 937 }, { - "epoch": 0.026580520842188784, + "epoch": 0.03670083731121371, "grad_norm": 0.0, - "learning_rate": 1.7714825306893298e-05, - "loss": 1.0756, + "learning_rate": 1.9997652222775363e-05, + "loss": 1.4407, "step": 938 }, { - "epoch": 0.026608858284451246, + "epoch": 0.03673996400344315, "grad_norm": 0.0, - "learning_rate": 1.773371104815864e-05, - "loss": 1.2549, + "learning_rate": 1.9997624684192332e-05, + "loss": 1.2351, "step": 939 }, { - "epoch": 0.026637195726713706, + "epoch": 0.03677909069567259, "grad_norm": 0.0, - "learning_rate": 1.7752596789423986e-05, - "loss": 1.1296, + "learning_rate": 1.9997596985059977e-05, + "loss": 1.3862, "step": 940 }, { - "epoch": 0.026665533168976168, + "epoch": 0.03681821738790202, "grad_norm": 0.0, - "learning_rate": 1.777148253068933e-05, - "loss": 1.012, + "learning_rate": 1.9997569125378743e-05, + "loss": 1.2936, "step": 941 }, { - "epoch": 0.02669387061123863, + "epoch": 0.036857344080131464, "grad_norm": 0.0, - "learning_rate": 1.7790368271954677e-05, - "loss": 1.1195, + "learning_rate": 1.999754110514908e-05, + "loss": 1.3401, "step": 942 }, { - "epoch": 0.02672220805350109, + "epoch": 0.036896470772360905, "grad_norm": 0.0, - "learning_rate": 1.7809254013220023e-05, - "loss": 1.1443, + "learning_rate": 1.9997512924371432e-05, + "loss": 1.2708, "step": 943 }, { - "epoch": 0.026750545495763552, + "epoch": 0.036935597464590346, "grad_norm": 0.0, - "learning_rate": 1.7828139754485365e-05, - "loss": 1.1257, + "learning_rate": 1.9997484583046255e-05, + "loss": 1.3372, "step": 944 }, { - "epoch": 0.026778882938026015, + "epoch": 0.03697472415681978, "grad_norm": 0.0, - "learning_rate": 1.784702549575071e-05, - "loss": 1.0789, + "learning_rate": 1.9997456081174e-05, + "loss": 1.3243, "step": 945 }, { - "epoch": 0.026807220380288474, + "epoch": 0.03701385084904922, "grad_norm": 0.0, - "learning_rate": 1.7865911237016052e-05, - "loss": 1.2325, + "learning_rate": 1.9997427418755132e-05, + "loss": 1.2847, "step": 946 }, { - "epoch": 0.026835557822550937, + "epoch": 0.03705297754127866, "grad_norm": 0.0, - "learning_rate": 1.7884796978281398e-05, - "loss": 1.1865, + "learning_rate": 1.9997398595790104e-05, + "loss": 1.2844, "step": 947 }, { - "epoch": 0.0268638952648134, + "epoch": 0.0370921042335081, "grad_norm": 0.0, - "learning_rate": 1.7903682719546744e-05, - "loss": 1.2458, + "learning_rate": 1.9997369612279383e-05, + "loss": 1.2772, "step": 948 }, { - "epoch": 0.026892232707075858, + "epoch": 0.037131230925737536, "grad_norm": 0.0, - "learning_rate": 1.792256846081209e-05, - "loss": 1.4362, + "learning_rate": 1.999734046822343e-05, + "loss": 1.4015, "step": 949 }, { - "epoch": 0.02692057014933832, + "epoch": 0.03717035761796698, "grad_norm": 0.0, - "learning_rate": 1.794145420207743e-05, - "loss": 1.1303, + "learning_rate": 1.9997311163622722e-05, + "loss": 1.2936, "step": 950 }, { - "epoch": 0.026948907591600783, + "epoch": 0.03720948431019642, "grad_norm": 0.0, - "learning_rate": 1.7960339943342777e-05, - "loss": 1.1712, + "learning_rate": 1.999728169847772e-05, + "loss": 1.2478, "step": 951 }, { - "epoch": 0.026977245033863242, + "epoch": 0.03724861100242585, "grad_norm": 0.0, - "learning_rate": 1.7979225684608122e-05, - "loss": 1.1671, + "learning_rate": 1.9997252072788903e-05, + "loss": 1.3051, "step": 952 }, { - "epoch": 0.027005582476125705, + "epoch": 0.03728773769465529, "grad_norm": 0.0, - "learning_rate": 1.7998111425873468e-05, - "loss": 1.1608, + "learning_rate": 1.9997222286556747e-05, + "loss": 1.3505, "step": 953 }, { - "epoch": 0.027033919918388168, + "epoch": 0.037326864386884734, "grad_norm": 0.0, - "learning_rate": 1.8016997167138813e-05, - "loss": 1.178, + "learning_rate": 1.9997192339781724e-05, + "loss": 1.2867, "step": 954 }, { - "epoch": 0.027062257360650627, + "epoch": 0.037365991079114175, "grad_norm": 0.0, - "learning_rate": 1.8035882908404156e-05, - "loss": 1.3507, + "learning_rate": 1.9997162232464325e-05, + "loss": 1.2798, "step": 955 }, { - "epoch": 0.02709059480291309, + "epoch": 0.03740511777134361, "grad_norm": 0.0, - "learning_rate": 1.80547686496695e-05, - "loss": 1.0958, + "learning_rate": 1.999713196460502e-05, + "loss": 1.3127, "step": 956 }, { - "epoch": 0.027118932245175552, + "epoch": 0.03744424446357305, "grad_norm": 0.0, - "learning_rate": 1.8073654390934843e-05, - "loss": 1.2291, + "learning_rate": 1.999710153620431e-05, + "loss": 1.3055, "step": 957 }, { - "epoch": 0.02714726968743801, + "epoch": 0.03748337115580249, "grad_norm": 0.0, - "learning_rate": 1.809254013220019e-05, - "loss": 1.0882, + "learning_rate": 1.9997070947262674e-05, + "loss": 1.3083, "step": 958 }, { - "epoch": 0.027175607129700474, + "epoch": 0.037522497848031924, "grad_norm": 0.0, - "learning_rate": 1.8111425873465534e-05, - "loss": 1.1105, + "learning_rate": 1.9997040197780605e-05, + "loss": 1.2365, "step": 959 }, { - "epoch": 0.027203944571962936, + "epoch": 0.037561624540261365, "grad_norm": 0.0, - "learning_rate": 1.813031161473088e-05, - "loss": 1.2769, + "learning_rate": 1.9997009287758596e-05, + "loss": 1.3403, "step": 960 }, { - "epoch": 0.027232282014225395, + "epoch": 0.037600751232490806, "grad_norm": 0.0, - "learning_rate": 1.8149197355996226e-05, - "loss": 1.1504, + "learning_rate": 1.9996978217197145e-05, + "loss": 1.2811, "step": 961 }, { - "epoch": 0.027260619456487858, + "epoch": 0.03763987792472025, "grad_norm": 0.0, - "learning_rate": 1.8168083097261568e-05, - "loss": 1.1042, + "learning_rate": 1.9996946986096754e-05, + "loss": 1.2516, "step": 962 }, { - "epoch": 0.02728895689875032, + "epoch": 0.03767900461694968, "grad_norm": 0.0, - "learning_rate": 1.8186968838526913e-05, - "loss": 1.1164, + "learning_rate": 1.999691559445792e-05, + "loss": 1.3697, "step": 963 }, { - "epoch": 0.02731729434101278, + "epoch": 0.03771813130917912, "grad_norm": 0.0, - "learning_rate": 1.820585457979226e-05, - "loss": 1.1423, + "learning_rate": 1.9996884042281145e-05, + "loss": 1.3818, "step": 964 }, { - "epoch": 0.027345631783275242, + "epoch": 0.03775725800140856, "grad_norm": 0.0, - "learning_rate": 1.8224740321057604e-05, - "loss": 1.2349, + "learning_rate": 1.9996852329566944e-05, + "loss": 1.257, "step": 965 }, { - "epoch": 0.027373969225537705, + "epoch": 0.037796384693638, "grad_norm": 0.0, - "learning_rate": 1.8243626062322947e-05, - "loss": 1.1776, + "learning_rate": 1.9996820456315818e-05, + "loss": 1.2632, "step": 966 }, { - "epoch": 0.027402306667800164, + "epoch": 0.03783551138586744, "grad_norm": 0.0, - "learning_rate": 1.8262511803588292e-05, - "loss": 1.2228, + "learning_rate": 1.9996788422528283e-05, + "loss": 1.3012, "step": 967 }, { - "epoch": 0.027430644110062626, + "epoch": 0.03787463807809688, "grad_norm": 0.0, - "learning_rate": 1.8281397544853638e-05, - "loss": 1.0574, + "learning_rate": 1.9996756228204853e-05, + "loss": 1.283, "step": 968 }, { - "epoch": 0.02745898155232509, + "epoch": 0.03791376477032632, "grad_norm": 0.0, - "learning_rate": 1.8300283286118983e-05, - "loss": 1.1566, + "learning_rate": 1.9996723873346043e-05, + "loss": 1.2121, "step": 969 }, { - "epoch": 0.027487318994587548, + "epoch": 0.03795289146255575, "grad_norm": 0.0, - "learning_rate": 1.8319169027384325e-05, - "loss": 1.1717, + "learning_rate": 1.9996691357952376e-05, + "loss": 1.2932, "step": 970 }, { - "epoch": 0.02751565643685001, + "epoch": 0.037992018154785194, "grad_norm": 0.0, - "learning_rate": 1.833805476864967e-05, - "loss": 1.1215, + "learning_rate": 1.9996658682024373e-05, + "loss": 1.4485, "step": 971 }, { - "epoch": 0.02754399387911247, + "epoch": 0.038031144847014635, "grad_norm": 0.0, - "learning_rate": 1.8356940509915016e-05, - "loss": 1.2098, + "learning_rate": 1.9996625845562555e-05, + "loss": 1.3214, "step": 972 }, { - "epoch": 0.027572331321374932, + "epoch": 0.038070271539244076, "grad_norm": 0.0, - "learning_rate": 1.837582625118036e-05, - "loss": 1.2334, + "learning_rate": 1.9996592848567455e-05, + "loss": 1.3696, "step": 973 }, { - "epoch": 0.027600668763637395, + "epoch": 0.03810939823147351, "grad_norm": 0.0, - "learning_rate": 1.8394711992445704e-05, - "loss": 1.111, + "learning_rate": 1.99965596910396e-05, + "loss": 1.3929, "step": 974 }, { - "epoch": 0.027629006205899854, + "epoch": 0.03814852492370295, "grad_norm": 0.0, - "learning_rate": 1.841359773371105e-05, - "loss": 1.186, + "learning_rate": 1.9996526372979522e-05, + "loss": 1.3751, "step": 975 }, { - "epoch": 0.027657343648162316, + "epoch": 0.03818765161593239, "grad_norm": 0.0, - "learning_rate": 1.8432483474976395e-05, - "loss": 1.1011, + "learning_rate": 1.999649289438776e-05, + "loss": 1.3842, "step": 976 }, { - "epoch": 0.02768568109042478, + "epoch": 0.038226778308161825, "grad_norm": 0.0, - "learning_rate": 1.8451369216241737e-05, - "loss": 1.1928, + "learning_rate": 1.9996459255264843e-05, + "loss": 1.2668, "step": 977 }, { - "epoch": 0.027714018532687238, + "epoch": 0.038265905000391266, "grad_norm": 0.0, - "learning_rate": 1.8470254957507083e-05, - "loss": 1.0721, + "learning_rate": 1.999642545561132e-05, + "loss": 1.346, "step": 978 }, { - "epoch": 0.0277423559749497, + "epoch": 0.03830503169262071, "grad_norm": 0.0, - "learning_rate": 1.848914069877243e-05, - "loss": 1.1285, + "learning_rate": 1.999639149542773e-05, + "loss": 1.1881, "step": 979 }, { - "epoch": 0.027770693417212163, + "epoch": 0.03834415838485015, "grad_norm": 0.0, - "learning_rate": 1.8508026440037774e-05, - "loss": 1.1862, + "learning_rate": 1.999635737471462e-05, + "loss": 1.2051, "step": 980 }, { - "epoch": 0.027799030859474622, + "epoch": 0.03838328507707958, "grad_norm": 0.0, - "learning_rate": 1.852691218130312e-05, - "loss": 1.1775, + "learning_rate": 1.9996323093472535e-05, + "loss": 1.3511, "step": 981 }, { - "epoch": 0.027827368301737085, + "epoch": 0.03842241176930902, "grad_norm": 0.0, - "learning_rate": 1.8545797922568462e-05, - "loss": 1.233, + "learning_rate": 1.9996288651702028e-05, + "loss": 1.3239, "step": 982 }, { - "epoch": 0.027855705743999547, + "epoch": 0.038461538461538464, "grad_norm": 0.0, - "learning_rate": 1.8564683663833807e-05, - "loss": 1.1717, + "learning_rate": 1.999625404940365e-05, + "loss": 1.2927, "step": 983 }, { - "epoch": 0.027884043186262007, + "epoch": 0.0385006651537679, "grad_norm": 0.0, - "learning_rate": 1.858356940509915e-05, - "loss": 1.1349, + "learning_rate": 1.9996219286577957e-05, + "loss": 1.2457, "step": 984 }, { - "epoch": 0.02791238062852447, + "epoch": 0.03853979184599734, "grad_norm": 0.0, - "learning_rate": 1.8602455146364495e-05, - "loss": 1.2029, + "learning_rate": 1.9996184363225512e-05, + "loss": 1.4034, "step": 985 }, { - "epoch": 0.02794071807078693, + "epoch": 0.03857891853822678, "grad_norm": 0.0, - "learning_rate": 1.862134088762984e-05, - "loss": 1.0713, + "learning_rate": 1.999614927934687e-05, + "loss": 1.2393, "step": 986 }, { - "epoch": 0.02796905551304939, + "epoch": 0.03861804523045622, "grad_norm": 0.0, - "learning_rate": 1.8640226628895186e-05, - "loss": 1.2452, + "learning_rate": 1.9996114034942594e-05, + "loss": 1.236, "step": 987 }, { - "epoch": 0.027997392955311853, + "epoch": 0.038657171922685654, "grad_norm": 0.0, - "learning_rate": 1.8659112370160532e-05, - "loss": 1.17, + "learning_rate": 1.9996078630013253e-05, + "loss": 1.3185, "step": 988 }, { - "epoch": 0.028025730397574316, + "epoch": 0.038696298614915095, "grad_norm": 0.0, - "learning_rate": 1.8677998111425874e-05, - "loss": 1.2217, + "learning_rate": 1.999604306455942e-05, + "loss": 1.1909, "step": 989 }, { - "epoch": 0.028054067839836775, + "epoch": 0.038735425307144536, "grad_norm": 0.0, - "learning_rate": 1.869688385269122e-05, - "loss": 1.1859, + "learning_rate": 1.9996007338581656e-05, + "loss": 1.42, "step": 990 }, { - "epoch": 0.028082405282099238, + "epoch": 0.03877455199937397, "grad_norm": 0.0, - "learning_rate": 1.8715769593956565e-05, - "loss": 1.143, + "learning_rate": 1.9995971452080543e-05, + "loss": 1.3624, "step": 991 }, { - "epoch": 0.0281107427243617, + "epoch": 0.03881367869160341, "grad_norm": 0.0, - "learning_rate": 1.873465533522191e-05, - "loss": 1.1924, + "learning_rate": 1.9995935405056653e-05, + "loss": 1.3718, "step": 992 }, { - "epoch": 0.02813908016662416, + "epoch": 0.03885280538383285, "grad_norm": 0.0, - "learning_rate": 1.8753541076487253e-05, - "loss": 1.197, + "learning_rate": 1.9995899197510567e-05, + "loss": 1.3787, "step": 993 }, { - "epoch": 0.028167417608886622, + "epoch": 0.03889193207606229, "grad_norm": 0.0, - "learning_rate": 1.87724268177526e-05, - "loss": 1.0934, + "learning_rate": 1.9995862829442864e-05, + "loss": 1.3403, "step": 994 }, { - "epoch": 0.028195755051149084, + "epoch": 0.038931058768291726, "grad_norm": 0.0, - "learning_rate": 1.8791312559017944e-05, - "loss": 1.1078, + "learning_rate": 1.999582630085413e-05, + "loss": 1.4604, "step": 995 }, { - "epoch": 0.028224092493411543, + "epoch": 0.03897018546052117, "grad_norm": 0.0, - "learning_rate": 1.8810198300283286e-05, - "loss": 1.2384, + "learning_rate": 1.9995789611744947e-05, + "loss": 1.2492, "step": 996 }, { - "epoch": 0.028252429935674006, + "epoch": 0.03900931215275061, "grad_norm": 0.0, - "learning_rate": 1.882908404154863e-05, - "loss": 1.1484, + "learning_rate": 1.9995752762115917e-05, + "loss": 1.3782, "step": 997 }, { - "epoch": 0.02828076737793647, + "epoch": 0.03904843884498004, "grad_norm": 0.0, - "learning_rate": 1.8847969782813977e-05, - "loss": 1.2137, + "learning_rate": 1.9995715751967614e-05, + "loss": 1.3665, "step": 998 }, { - "epoch": 0.028309104820198928, + "epoch": 0.03908756553720948, "grad_norm": 0.0, - "learning_rate": 1.8866855524079323e-05, - "loss": 1.0782, + "learning_rate": 1.999567858130065e-05, + "loss": 1.3026, "step": 999 }, { - "epoch": 0.02833744226246139, + "epoch": 0.039126692229438924, "grad_norm": 0.0, - "learning_rate": 1.8885741265344665e-05, - "loss": 1.137, + "learning_rate": 1.9995641250115606e-05, + "loss": 1.3451, "step": 1000 }, { - "epoch": 0.028365779704723853, + "epoch": 0.039165818921668365, "grad_norm": 0.0, - "learning_rate": 1.890462700661001e-05, - "loss": 1.194, + "learning_rate": 1.999560375841309e-05, + "loss": 1.3078, "step": 1001 }, { - "epoch": 0.028394117146986312, + "epoch": 0.0392049456138978, "grad_norm": 0.0, - "learning_rate": 1.8923512747875356e-05, - "loss": 1.0541, + "learning_rate": 1.9995566106193706e-05, + "loss": 1.1863, "step": 1002 }, { - "epoch": 0.028422454589248775, + "epoch": 0.03924407230612724, "grad_norm": 0.0, - "learning_rate": 1.89423984891407e-05, - "loss": 1.2079, + "learning_rate": 1.9995528293458056e-05, + "loss": 1.293, "step": 1003 }, { - "epoch": 0.028450792031511237, + "epoch": 0.03928319899835668, "grad_norm": 0.0, - "learning_rate": 1.8961284230406044e-05, - "loss": 1.187, + "learning_rate": 1.9995490320206743e-05, + "loss": 1.3221, "step": 1004 }, { - "epoch": 0.028479129473773696, + "epoch": 0.03932232569058612, "grad_norm": 0.0, - "learning_rate": 1.898016997167139e-05, - "loss": 1.1599, + "learning_rate": 1.9995452186440382e-05, + "loss": 1.3593, "step": 1005 }, { - "epoch": 0.02850746691603616, + "epoch": 0.039361452382815555, "grad_norm": 0.0, - "learning_rate": 1.8999055712936735e-05, - "loss": 1.1718, + "learning_rate": 1.9995413892159587e-05, + "loss": 1.2421, "step": 1006 }, { - "epoch": 0.02853580435829862, + "epoch": 0.039400579075044996, "grad_norm": 0.0, - "learning_rate": 1.9017941454202077e-05, - "loss": 1.1938, + "learning_rate": 1.9995375437364964e-05, + "loss": 1.4045, "step": 1007 }, { - "epoch": 0.02856414180056108, + "epoch": 0.03943970576727444, "grad_norm": 0.0, - "learning_rate": 1.9036827195467426e-05, - "loss": 1.2468, + "learning_rate": 1.9995336822057137e-05, + "loss": 1.2922, "step": 1008 }, { - "epoch": 0.028592479242823543, + "epoch": 0.03947883245950387, "grad_norm": 0.0, - "learning_rate": 1.9055712936732768e-05, - "loss": 1.1701, + "learning_rate": 1.999529804623673e-05, + "loss": 1.3707, "step": 1009 }, { - "epoch": 0.028620816685086006, + "epoch": 0.03951795915173331, "grad_norm": 0.0, - "learning_rate": 1.9074598677998114e-05, - "loss": 1.1414, + "learning_rate": 1.999525910990436e-05, + "loss": 1.267, "step": 1010 }, { - "epoch": 0.028649154127348465, + "epoch": 0.03955708584396275, "grad_norm": 0.0, - "learning_rate": 1.9093484419263456e-05, - "loss": 1.1783, + "learning_rate": 1.999522001306065e-05, + "loss": 1.3087, "step": 1011 }, { - "epoch": 0.028677491569610927, + "epoch": 0.039596212536192193, "grad_norm": 0.0, - "learning_rate": 1.91123701605288e-05, - "loss": 1.2122, + "learning_rate": 1.9995180755706234e-05, + "loss": 1.3307, "step": 1012 }, { - "epoch": 0.02870582901187339, + "epoch": 0.03963533922842163, "grad_norm": 0.0, - "learning_rate": 1.9131255901794147e-05, - "loss": 1.2389, + "learning_rate": 1.999514133784174e-05, + "loss": 1.4219, "step": 1013 }, { - "epoch": 0.02873416645413585, + "epoch": 0.03967446592065107, "grad_norm": 0.0, - "learning_rate": 1.9150141643059492e-05, - "loss": 1.1016, + "learning_rate": 1.99951017594678e-05, + "loss": 1.3727, "step": 1014 }, { - "epoch": 0.02876250389639831, + "epoch": 0.03971359261288051, "grad_norm": 0.0, - "learning_rate": 1.9169027384324838e-05, - "loss": 1.0969, + "learning_rate": 1.999506202058505e-05, + "loss": 1.2426, "step": 1015 }, { - "epoch": 0.028790841338660774, + "epoch": 0.03975271930510994, "grad_norm": 0.0, - "learning_rate": 1.918791312559018e-05, - "loss": 1.1701, + "learning_rate": 1.999502212119413e-05, + "loss": 1.2742, "step": 1016 }, { - "epoch": 0.028819178780923233, + "epoch": 0.039791845997339384, "grad_norm": 0.0, - "learning_rate": 1.9206798866855526e-05, - "loss": 1.2404, + "learning_rate": 1.9994982061295676e-05, + "loss": 1.268, "step": 1017 }, { - "epoch": 0.028847516223185696, + "epoch": 0.039830972689568825, "grad_norm": 0.0, - "learning_rate": 1.9225684608120868e-05, - "loss": 1.1728, + "learning_rate": 1.9994941840890338e-05, + "loss": 1.4445, "step": 1018 }, { - "epoch": 0.02887585366544816, + "epoch": 0.039870099381798266, "grad_norm": 0.0, - "learning_rate": 1.9244570349386217e-05, - "loss": 1.1865, + "learning_rate": 1.999490145997876e-05, + "loss": 1.3713, "step": 1019 }, { - "epoch": 0.028904191107710617, + "epoch": 0.0399092260740277, "grad_norm": 0.0, - "learning_rate": 1.926345609065156e-05, - "loss": 1.1162, + "learning_rate": 1.9994860918561584e-05, + "loss": 1.2727, "step": 1020 }, { - "epoch": 0.02893252854997308, + "epoch": 0.03994835276625714, "grad_norm": 0.0, - "learning_rate": 1.9282341831916905e-05, - "loss": 1.1747, + "learning_rate": 1.999482021663947e-05, + "loss": 1.1573, "step": 1021 }, { - "epoch": 0.02896086599223554, + "epoch": 0.03998747945848658, "grad_norm": 0.0, - "learning_rate": 1.930122757318225e-05, - "loss": 1.2077, + "learning_rate": 1.999477935421306e-05, + "loss": 1.2461, "step": 1022 }, { - "epoch": 0.028989203434498, + "epoch": 0.040026606150716015, "grad_norm": 0.0, - "learning_rate": 1.9320113314447592e-05, - "loss": 1.1709, + "learning_rate": 1.9994738331283026e-05, + "loss": 1.3088, "step": 1023 }, { - "epoch": 0.029017540876760464, + "epoch": 0.040065732842945456, "grad_norm": 0.0, - "learning_rate": 1.9338999055712938e-05, - "loss": 1.2383, + "learning_rate": 1.9994697147850016e-05, + "loss": 1.3212, "step": 1024 }, { - "epoch": 0.029045878319022923, + "epoch": 0.0401048595351749, "grad_norm": 0.0, - "learning_rate": 1.9357884796978283e-05, - "loss": 1.1296, + "learning_rate": 1.999465580391469e-05, + "loss": 1.1978, "step": 1025 }, { - "epoch": 0.029074215761285386, + "epoch": 0.04014398622740434, "grad_norm": 0.0, - "learning_rate": 1.937677053824363e-05, - "loss": 1.1473, + "learning_rate": 1.9994614299477723e-05, + "loss": 1.38, "step": 1026 }, { - "epoch": 0.02910255320354785, + "epoch": 0.04018311291963377, "grad_norm": 0.0, - "learning_rate": 1.939565627950897e-05, - "loss": 1.2365, + "learning_rate": 1.9994572634539767e-05, + "loss": 1.3429, "step": 1027 }, { - "epoch": 0.029130890645810308, + "epoch": 0.04022223961186321, "grad_norm": 0.0, - "learning_rate": 1.9414542020774317e-05, - "loss": 1.2007, + "learning_rate": 1.9994530809101503e-05, + "loss": 1.3749, "step": 1028 }, { - "epoch": 0.02915922808807277, + "epoch": 0.040261366304092654, "grad_norm": 0.0, - "learning_rate": 1.9433427762039662e-05, - "loss": 1.1995, + "learning_rate": 1.99944888231636e-05, + "loss": 1.319, "step": 1029 }, { - "epoch": 0.029187565530335233, + "epoch": 0.04030049299632209, "grad_norm": 0.0, - "learning_rate": 1.9452313503305008e-05, - "loss": 1.164, + "learning_rate": 1.9994446676726723e-05, + "loss": 1.3461, "step": 1030 }, { - "epoch": 0.029215902972597692, + "epoch": 0.04033961968855153, "grad_norm": 0.0, - "learning_rate": 1.947119924457035e-05, - "loss": 1.2151, + "learning_rate": 1.9994404369791563e-05, + "loss": 1.2871, "step": 1031 }, { - "epoch": 0.029244240414860154, + "epoch": 0.04037874638078097, "grad_norm": 0.0, - "learning_rate": 1.9490084985835695e-05, - "loss": 1.1885, + "learning_rate": 1.999436190235879e-05, + "loss": 1.434, "step": 1032 }, { - "epoch": 0.029272577857122617, + "epoch": 0.04041787307301041, "grad_norm": 0.0, - "learning_rate": 1.950897072710104e-05, - "loss": 1.2051, + "learning_rate": 1.9994319274429088e-05, + "loss": 1.3204, "step": 1033 }, { - "epoch": 0.029300915299385076, + "epoch": 0.040456999765239844, "grad_norm": 0.0, - "learning_rate": 1.9527856468366383e-05, - "loss": 1.1603, + "learning_rate": 1.999427648600314e-05, + "loss": 1.3728, "step": 1034 }, { - "epoch": 0.02932925274164754, + "epoch": 0.040496126457469285, "grad_norm": 0.0, - "learning_rate": 1.9546742209631732e-05, - "loss": 1.2018, + "learning_rate": 1.999423353708164e-05, + "loss": 1.3203, "step": 1035 }, { - "epoch": 0.02935759018391, + "epoch": 0.040535253149698726, "grad_norm": 0.0, - "learning_rate": 1.9565627950897074e-05, - "loss": 1.1445, + "learning_rate": 1.999419042766527e-05, + "loss": 1.2211, "step": 1036 }, { - "epoch": 0.02938592762617246, + "epoch": 0.04057437984192817, "grad_norm": 0.0, - "learning_rate": 1.958451369216242e-05, - "loss": 1.2405, + "learning_rate": 1.9994147157754727e-05, + "loss": 1.2312, "step": 1037 }, { - "epoch": 0.029414265068434923, + "epoch": 0.0406135065341576, "grad_norm": 0.0, - "learning_rate": 1.9603399433427762e-05, - "loss": 1.1291, + "learning_rate": 1.9994103727350702e-05, + "loss": 1.2343, "step": 1038 }, { - "epoch": 0.029442602510697385, + "epoch": 0.04065263322638704, "grad_norm": 0.0, - "learning_rate": 1.9622285174693108e-05, - "loss": 1.1038, + "learning_rate": 1.9994060136453894e-05, + "loss": 1.1444, "step": 1039 }, { - "epoch": 0.029470939952959845, + "epoch": 0.04069175991861648, "grad_norm": 0.0, - "learning_rate": 1.9641170915958453e-05, - "loss": 1.1838, + "learning_rate": 1.9994016385065005e-05, + "loss": 1.3528, "step": 1040 }, { - "epoch": 0.029499277395222307, + "epoch": 0.040730886610845916, "grad_norm": 0.0, - "learning_rate": 1.96600566572238e-05, - "loss": 1.1879, + "learning_rate": 1.9993972473184736e-05, + "loss": 1.4061, "step": 1041 }, { - "epoch": 0.02952761483748477, + "epoch": 0.04077001330307536, "grad_norm": 0.0, - "learning_rate": 1.9678942398489144e-05, - "loss": 1.2451, + "learning_rate": 1.999392840081379e-05, + "loss": 1.2121, "step": 1042 }, { - "epoch": 0.02955595227974723, + "epoch": 0.0408091399953048, "grad_norm": 0.0, - "learning_rate": 1.9697828139754486e-05, - "loss": 1.1383, + "learning_rate": 1.999388416795288e-05, + "loss": 1.2636, "step": 1043 }, { - "epoch": 0.02958428972200969, + "epoch": 0.04084826668753424, "grad_norm": 0.0, - "learning_rate": 1.9716713881019832e-05, - "loss": 1.1562, + "learning_rate": 1.999383977460271e-05, + "loss": 1.3624, "step": 1044 }, { - "epoch": 0.029612627164272154, + "epoch": 0.04088739337976367, "grad_norm": 0.0, - "learning_rate": 1.9735599622285174e-05, - "loss": 1.1584, + "learning_rate": 1.9993795220763997e-05, + "loss": 1.2493, "step": 1045 }, { - "epoch": 0.029640964606534613, + "epoch": 0.040926520071993114, "grad_norm": 0.0, - "learning_rate": 1.9754485363550523e-05, - "loss": 1.1212, + "learning_rate": 1.999375050643746e-05, + "loss": 1.3727, "step": 1046 }, { - "epoch": 0.029669302048797076, + "epoch": 0.040965646764222555, "grad_norm": 0.0, - "learning_rate": 1.9773371104815865e-05, - "loss": 1.2668, + "learning_rate": 1.9993705631623807e-05, + "loss": 1.2955, "step": 1047 }, { - "epoch": 0.029697639491059538, + "epoch": 0.04100477345645199, "grad_norm": 0.0, - "learning_rate": 1.979225684608121e-05, - "loss": 1.1325, + "learning_rate": 1.999366059632377e-05, + "loss": 1.3702, "step": 1048 }, { - "epoch": 0.029725976933321997, + "epoch": 0.04104390014868143, "grad_norm": 0.0, - "learning_rate": 1.9811142587346556e-05, - "loss": 1.2148, + "learning_rate": 1.999361540053806e-05, + "loss": 1.2275, "step": 1049 }, { - "epoch": 0.02975431437558446, + "epoch": 0.04108302684091087, "grad_norm": 0.0, - "learning_rate": 1.98300283286119e-05, - "loss": 1.1513, + "learning_rate": 1.9993570044267415e-05, + "loss": 1.3579, "step": 1050 }, { - "epoch": 0.029782651817846922, + "epoch": 0.04112215353314031, "grad_norm": 0.0, - "learning_rate": 1.9848914069877244e-05, - "loss": 1.1302, + "learning_rate": 1.9993524527512556e-05, + "loss": 1.3281, "step": 1051 }, { - "epoch": 0.02981098926010938, + "epoch": 0.041161280225369745, "grad_norm": 0.0, - "learning_rate": 1.986779981114259e-05, - "loss": 1.0855, + "learning_rate": 1.9993478850274214e-05, + "loss": 1.3505, "step": 1052 }, { - "epoch": 0.029839326702371844, + "epoch": 0.041200406917599186, "grad_norm": 0.0, - "learning_rate": 1.9886685552407935e-05, - "loss": 1.195, + "learning_rate": 1.9993433012553128e-05, + "loss": 1.1343, "step": 1053 }, { - "epoch": 0.029867664144634307, + "epoch": 0.04123953360982863, "grad_norm": 0.0, - "learning_rate": 1.9905571293673277e-05, - "loss": 1.1566, + "learning_rate": 1.9993387014350027e-05, + "loss": 1.2709, "step": 1054 }, { - "epoch": 0.029896001586896766, + "epoch": 0.04127866030205806, "grad_norm": 0.0, - "learning_rate": 1.9924457034938623e-05, - "loss": 1.1047, + "learning_rate": 1.9993340855665656e-05, + "loss": 1.3373, "step": 1055 }, { - "epoch": 0.02992433902915923, + "epoch": 0.0413177869942875, "grad_norm": 0.0, - "learning_rate": 1.9943342776203965e-05, - "loss": 1.1091, + "learning_rate": 1.999329453650075e-05, + "loss": 1.2472, "step": 1056 }, { - "epoch": 0.02995267647142169, + "epoch": 0.04135691368651694, "grad_norm": 0.0, - "learning_rate": 1.9962228517469314e-05, - "loss": 1.0957, + "learning_rate": 1.9993248056856055e-05, + "loss": 1.2744, "step": 1057 }, { - "epoch": 0.02998101391368415, + "epoch": 0.041396040378746383, "grad_norm": 0.0, - "learning_rate": 1.9981114258734656e-05, - "loss": 1.1157, + "learning_rate": 1.9993201416732322e-05, + "loss": 1.4343, "step": 1058 }, { - "epoch": 0.030009351355946613, + "epoch": 0.04143516707097582, "grad_norm": 0.0, - "learning_rate": 2e-05, - "loss": 1.2177, + "learning_rate": 1.9993154616130293e-05, + "loss": 1.3251, "step": 1059 }, { - "epoch": 0.030037688798209075, + "epoch": 0.04147429376320526, "grad_norm": 0.0, - "learning_rate": 1.9999999957883145e-05, - "loss": 1.1057, + "learning_rate": 1.9993107655050727e-05, + "loss": 1.2797, "step": 1060 }, { - "epoch": 0.030066026240471534, + "epoch": 0.0415134204554347, "grad_norm": 0.0, - "learning_rate": 1.9999999831532575e-05, - "loss": 1.1277, + "learning_rate": 1.999306053349437e-05, + "loss": 1.2936, "step": 1061 }, { - "epoch": 0.030094363682733997, + "epoch": 0.04155254714766413, "grad_norm": 0.0, - "learning_rate": 1.9999999620948292e-05, - "loss": 1.1524, + "learning_rate": 1.9993013251461987e-05, + "loss": 1.2452, "step": 1062 }, { - "epoch": 0.03012270112499646, + "epoch": 0.041591673839893574, "grad_norm": 0.0, - "learning_rate": 1.9999999326130303e-05, - "loss": 1.2402, + "learning_rate": 1.999296580895433e-05, + "loss": 1.2907, "step": 1063 }, { - "epoch": 0.03015103856725892, + "epoch": 0.041630800532123015, "grad_norm": 0.0, - "learning_rate": 1.9999998947078603e-05, - "loss": 1.1557, + "learning_rate": 1.9992918205972164e-05, + "loss": 1.255, "step": 1064 }, { - "epoch": 0.03017937600952138, + "epoch": 0.041669927224352456, "grad_norm": 0.0, - "learning_rate": 1.9999998483793198e-05, - "loss": 1.1688, + "learning_rate": 1.9992870442516257e-05, + "loss": 1.2142, "step": 1065 }, { - "epoch": 0.030207713451783844, + "epoch": 0.04170905391658189, "grad_norm": 0.0, - "learning_rate": 1.9999997936274092e-05, - "loss": 1.123, + "learning_rate": 1.999282251858737e-05, + "loss": 1.3812, "step": 1066 }, { - "epoch": 0.030236050894046303, + "epoch": 0.04174818060881133, "grad_norm": 0.0, - "learning_rate": 1.999999730452129e-05, - "loss": 1.0443, + "learning_rate": 1.9992774434186275e-05, + "loss": 1.363, "step": 1067 }, { - "epoch": 0.030264388336308765, + "epoch": 0.04178730730104077, "grad_norm": 0.0, - "learning_rate": 1.99999965885348e-05, - "loss": 1.0908, + "learning_rate": 1.999272618931374e-05, + "loss": 1.1848, "step": 1068 }, { - "epoch": 0.030292725778571228, + "epoch": 0.04182643399327021, "grad_norm": 0.0, - "learning_rate": 1.9999995788314622e-05, - "loss": 1.2873, + "learning_rate": 1.999267778397055e-05, + "loss": 1.2239, "step": 1069 }, { - "epoch": 0.030321063220833687, + "epoch": 0.041865560685499646, "grad_norm": 0.0, - "learning_rate": 1.9999994903860772e-05, - "loss": 1.201, + "learning_rate": 1.9992629218157478e-05, + "loss": 1.3157, "step": 1070 }, { - "epoch": 0.03034940066309615, + "epoch": 0.04190468737772909, "grad_norm": 0.0, - "learning_rate": 1.9999993935173247e-05, - "loss": 1.1473, + "learning_rate": 1.9992580491875296e-05, + "loss": 1.2838, "step": 1071 }, { - "epoch": 0.03037773810535861, + "epoch": 0.04194381406995853, "grad_norm": 0.0, - "learning_rate": 1.999999288225206e-05, - "loss": 1.1736, + "learning_rate": 1.99925316051248e-05, + "loss": 1.3483, "step": 1072 }, { - "epoch": 0.03040607554762107, + "epoch": 0.04198294076218796, "grad_norm": 0.0, - "learning_rate": 1.9999991745097218e-05, - "loss": 1.1696, + "learning_rate": 1.999248255790676e-05, + "loss": 1.2932, "step": 1073 }, { - "epoch": 0.030434412989883534, + "epoch": 0.0420220674544174, "grad_norm": 0.0, - "learning_rate": 1.9999990523708736e-05, - "loss": 1.0495, + "learning_rate": 1.9992433350221976e-05, + "loss": 1.3293, "step": 1074 }, { - "epoch": 0.030462750432145993, + "epoch": 0.042061194146646844, "grad_norm": 0.0, - "learning_rate": 1.9999989218086615e-05, - "loss": 1.1643, + "learning_rate": 1.999238398207123e-05, + "loss": 1.3144, "step": 1075 }, { - "epoch": 0.030491087874408455, + "epoch": 0.042100320838876285, "grad_norm": 0.0, - "learning_rate": 1.9999987828230875e-05, - "loss": 1.1394, + "learning_rate": 1.9992334453455322e-05, + "loss": 1.3781, "step": 1076 }, { - "epoch": 0.030519425316670918, + "epoch": 0.04213944753110572, "grad_norm": 0.0, - "learning_rate": 1.9999986354141524e-05, - "loss": 1.1703, + "learning_rate": 1.9992284764375042e-05, + "loss": 1.3487, "step": 1077 }, { - "epoch": 0.030547762758933377, + "epoch": 0.04217857422333516, "grad_norm": 0.0, - "learning_rate": 1.9999984795818572e-05, - "loss": 1.1324, + "learning_rate": 1.999223491483119e-05, + "loss": 1.2717, "step": 1078 }, { - "epoch": 0.03057610020119584, + "epoch": 0.0422177009155646, "grad_norm": 0.0, - "learning_rate": 1.9999983153262038e-05, - "loss": 1.1898, + "learning_rate": 1.9992184904824566e-05, + "loss": 1.4091, "step": 1079 }, { - "epoch": 0.030604437643458302, + "epoch": 0.042256827607794034, "grad_norm": 0.0, - "learning_rate": 1.999998142647193e-05, - "loss": 1.2068, + "learning_rate": 1.9992134734355974e-05, + "loss": 1.0168, "step": 1080 }, { - "epoch": 0.03063277508572076, + "epoch": 0.042295954300023475, "grad_norm": 0.0, - "learning_rate": 1.999997961544827e-05, - "loss": 1.2494, + "learning_rate": 1.999208440342622e-05, + "loss": 1.1735, "step": 1081 }, { - "epoch": 0.030661112527983224, + "epoch": 0.042335080992252916, "grad_norm": 0.0, - "learning_rate": 1.9999977720191063e-05, - "loss": 1.2412, + "learning_rate": 1.999203391203611e-05, + "loss": 1.256, "step": 1082 }, { - "epoch": 0.030689449970245686, + "epoch": 0.04237420768448236, "grad_norm": 0.0, - "learning_rate": 1.999997574070033e-05, - "loss": 1.228, + "learning_rate": 1.9991983260186452e-05, + "loss": 1.3036, "step": 1083 }, { - "epoch": 0.030717787412508146, + "epoch": 0.04241333437671179, "grad_norm": 0.0, - "learning_rate": 1.999997367697609e-05, - "loss": 1.2275, + "learning_rate": 1.999193244787807e-05, + "loss": 1.2602, "step": 1084 }, { - "epoch": 0.030746124854770608, + "epoch": 0.04245246106894123, "grad_norm": 0.0, - "learning_rate": 1.999997152901836e-05, - "loss": 1.071, + "learning_rate": 1.9991881475111773e-05, + "loss": 1.4188, "step": 1085 }, { - "epoch": 0.03077446229703307, + "epoch": 0.04249158776117067, "grad_norm": 0.0, - "learning_rate": 1.9999969296827152e-05, - "loss": 1.2005, + "learning_rate": 1.9991830341888375e-05, + "loss": 1.3242, "step": 1086 }, { - "epoch": 0.03080279973929553, + "epoch": 0.042530714453400106, "grad_norm": 0.0, - "learning_rate": 1.9999966980402495e-05, - "loss": 1.1568, + "learning_rate": 1.9991779048208707e-05, + "loss": 1.3475, "step": 1087 }, { - "epoch": 0.030831137181557992, + "epoch": 0.04256984114562955, "grad_norm": 0.0, - "learning_rate": 1.99999645797444e-05, - "loss": 1.0773, + "learning_rate": 1.9991727594073585e-05, + "loss": 1.2897, "step": 1088 }, { - "epoch": 0.030859474623820455, + "epoch": 0.04260896783785899, "grad_norm": 0.0, - "learning_rate": 1.999996209485289e-05, - "loss": 1.1944, + "learning_rate": 1.9991675979483844e-05, + "loss": 1.3208, "step": 1089 }, { - "epoch": 0.030887812066082914, + "epoch": 0.04264809453008843, "grad_norm": 0.0, - "learning_rate": 1.9999959525727983e-05, - "loss": 1.2032, + "learning_rate": 1.99916242044403e-05, + "loss": 1.4097, "step": 1090 }, { - "epoch": 0.030916149508345377, + "epoch": 0.04268722122231786, "grad_norm": 0.0, - "learning_rate": 1.9999956872369706e-05, - "loss": 1.2258, + "learning_rate": 1.9991572268943793e-05, + "loss": 1.3672, "step": 1091 }, { - "epoch": 0.03094448695060784, + "epoch": 0.042726347914547304, "grad_norm": 0.0, - "learning_rate": 1.999995413477808e-05, - "loss": 1.1927, + "learning_rate": 1.9991520172995158e-05, + "loss": 1.2902, "step": 1092 }, { - "epoch": 0.0309728243928703, + "epoch": 0.042765474606776745, "grad_norm": 0.0, - "learning_rate": 1.999995131295312e-05, - "loss": 1.2244, + "learning_rate": 1.9991467916595226e-05, + "loss": 1.1862, "step": 1093 }, { - "epoch": 0.03100116183513276, + "epoch": 0.04280460129900618, "grad_norm": 0.0, - "learning_rate": 1.9999948406894868e-05, - "loss": 1.1487, + "learning_rate": 1.9991415499744842e-05, + "loss": 1.2642, "step": 1094 }, { - "epoch": 0.031029499277395223, + "epoch": 0.04284372799123562, "grad_norm": 0.0, - "learning_rate": 1.999994541660333e-05, - "loss": 1.1634, + "learning_rate": 1.9991362922444842e-05, + "loss": 1.2922, "step": 1095 }, { - "epoch": 0.031057836719657683, + "epoch": 0.04288285468346506, "grad_norm": 0.0, - "learning_rate": 1.999994234207854e-05, - "loss": 1.2123, + "learning_rate": 1.9991310184696076e-05, + "loss": 1.2566, "step": 1096 }, { - "epoch": 0.031086174161920145, + "epoch": 0.0429219813756945, "grad_norm": 0.0, - "learning_rate": 1.9999939183320523e-05, - "loss": 1.1459, + "learning_rate": 1.9991257286499386e-05, + "loss": 1.1525, "step": 1097 }, { - "epoch": 0.031114511604182608, + "epoch": 0.042961108067923935, "grad_norm": 0.0, - "learning_rate": 1.9999935940329304e-05, - "loss": 1.2354, + "learning_rate": 1.9991204227855627e-05, + "loss": 1.2256, "step": 1098 }, { - "epoch": 0.031142849046445067, + "epoch": 0.043000234760153376, "grad_norm": 0.0, - "learning_rate": 1.999993261310491e-05, - "loss": 1.0645, + "learning_rate": 1.9991151008765644e-05, + "loss": 1.4133, "step": 1099 }, { - "epoch": 0.03117118648870753, + "epoch": 0.04303936145238282, "grad_norm": 0.0, - "learning_rate": 1.999992920164737e-05, - "loss": 1.1756, + "learning_rate": 1.9991097629230298e-05, + "loss": 1.1948, "step": 1100 }, { - "epoch": 0.031199523930969992, + "epoch": 0.04307848814461226, "grad_norm": 0.0, - "learning_rate": 1.9999925705956716e-05, - "loss": 1.1328, + "learning_rate": 1.999104408925044e-05, + "loss": 1.5092, "step": 1101 }, { - "epoch": 0.03122786137323245, + "epoch": 0.04311761483684169, "grad_norm": 0.0, - "learning_rate": 1.9999922126032975e-05, - "loss": 1.1094, + "learning_rate": 1.999099038882694e-05, + "loss": 1.2379, "step": 1102 }, { - "epoch": 0.031256198815494914, + "epoch": 0.04315674152907113, "grad_norm": 0.0, - "learning_rate": 1.9999918461876174e-05, - "loss": 1.0765, + "learning_rate": 1.999093652796065e-05, + "loss": 1.2388, "step": 1103 }, { - "epoch": 0.03128453625775737, + "epoch": 0.04319586822130057, "grad_norm": 0.0, - "learning_rate": 1.9999914713486344e-05, - "loss": 1.1599, + "learning_rate": 1.999088250665244e-05, + "loss": 1.4178, "step": 1104 }, { - "epoch": 0.03131287370001984, + "epoch": 0.04323499491353001, "grad_norm": 0.0, - "learning_rate": 1.9999910880863523e-05, - "loss": 1.1106, + "learning_rate": 1.999082832490317e-05, + "loss": 1.2202, "step": 1105 }, { - "epoch": 0.0313412111422823, + "epoch": 0.04327412160575945, "grad_norm": 0.0, - "learning_rate": 1.9999906964007738e-05, - "loss": 1.178, + "learning_rate": 1.9990773982713725e-05, + "loss": 1.1854, "step": 1106 }, { - "epoch": 0.03136954858454476, + "epoch": 0.04331324829798889, "grad_norm": 0.0, - "learning_rate": 1.999990296291902e-05, - "loss": 1.1269, + "learning_rate": 1.9990719480084966e-05, + "loss": 1.3325, "step": 1107 }, { - "epoch": 0.03139788602680722, + "epoch": 0.04335237499021833, "grad_norm": 0.0, - "learning_rate": 1.9999898877597412e-05, - "loss": 1.1661, + "learning_rate": 1.999066481701777e-05, + "loss": 1.2999, "step": 1108 }, { - "epoch": 0.03142622346906968, + "epoch": 0.043391501682447764, "grad_norm": 0.0, - "learning_rate": 1.9999894708042943e-05, - "loss": 1.1697, + "learning_rate": 1.9990609993513014e-05, + "loss": 1.3221, "step": 1109 }, { - "epoch": 0.03145456091133214, + "epoch": 0.043430628374677205, "grad_norm": 0.0, - "learning_rate": 1.9999890454255642e-05, - "loss": 1.2137, + "learning_rate": 1.9990555009571582e-05, + "loss": 1.3391, "step": 1110 }, { - "epoch": 0.03148289835359461, + "epoch": 0.043469755066906646, "grad_norm": 0.0, - "learning_rate": 1.9999886116235553e-05, - "loss": 1.2826, + "learning_rate": 1.999049986519436e-05, + "loss": 1.2958, "step": 1111 }, { - "epoch": 0.031511235795857066, + "epoch": 0.04350888175913608, "grad_norm": 0.0, - "learning_rate": 1.999988169398271e-05, - "loss": 1.1034, + "learning_rate": 1.9990444560382224e-05, + "loss": 1.2324, "step": 1112 }, { - "epoch": 0.031539573238119525, + "epoch": 0.04354800845136552, "grad_norm": 0.0, - "learning_rate": 1.9999877187497148e-05, - "loss": 1.0012, + "learning_rate": 1.9990389095136068e-05, + "loss": 1.3754, "step": 1113 }, { - "epoch": 0.03156791068038199, + "epoch": 0.04358713514359496, "grad_norm": 0.0, - "learning_rate": 1.9999872596778908e-05, - "loss": 1.2349, + "learning_rate": 1.9990333469456784e-05, + "loss": 1.3578, "step": 1114 }, { - "epoch": 0.03159624812264445, + "epoch": 0.0436262618358244, "grad_norm": 0.0, - "learning_rate": 1.9999867921828028e-05, - "loss": 1.1709, + "learning_rate": 1.999027768334526e-05, + "loss": 1.272, "step": 1115 }, { - "epoch": 0.03162458556490691, + "epoch": 0.043665388528053836, "grad_norm": 0.0, - "learning_rate": 1.999986316264455e-05, - "loss": 1.2156, + "learning_rate": 1.9990221736802398e-05, + "loss": 1.1941, "step": 1116 }, { - "epoch": 0.031652923007169376, + "epoch": 0.04370451522028328, "grad_norm": 0.0, - "learning_rate": 1.999985831922851e-05, - "loss": 1.1354, + "learning_rate": 1.999016562982909e-05, + "loss": 1.4199, "step": 1117 }, { - "epoch": 0.031681260449431835, + "epoch": 0.04374364191251272, "grad_norm": 0.0, - "learning_rate": 1.9999853391579946e-05, - "loss": 1.2399, + "learning_rate": 1.9990109362426243e-05, + "loss": 1.3228, "step": 1118 }, { - "epoch": 0.031709597891694294, + "epoch": 0.04378276860474215, "grad_norm": 0.0, - "learning_rate": 1.9999848379698906e-05, - "loss": 1.3628, + "learning_rate": 1.9990052934594753e-05, + "loss": 1.247, "step": 1119 }, { - "epoch": 0.03173793533395676, + "epoch": 0.04382189529697159, "grad_norm": 0.0, - "learning_rate": 1.999984328358543e-05, - "loss": 1.1736, + "learning_rate": 1.998999634633554e-05, + "loss": 1.2927, "step": 1120 }, { - "epoch": 0.03176627277621922, + "epoch": 0.043861021989201034, "grad_norm": 0.0, - "learning_rate": 1.999983810323956e-05, - "loss": 1.1956, + "learning_rate": 1.9989939597649497e-05, + "loss": 1.4026, "step": 1121 }, { - "epoch": 0.03179461021848168, + "epoch": 0.043900148681430474, "grad_norm": 0.0, - "learning_rate": 1.9999832838661343e-05, - "loss": 1.0685, + "learning_rate": 1.9989882688537542e-05, + "loss": 1.1652, "step": 1122 }, { - "epoch": 0.031822947660744144, + "epoch": 0.04393927537365991, "grad_norm": 0.0, - "learning_rate": 1.9999827489850817e-05, - "loss": 1.1263, + "learning_rate": 1.9989825619000593e-05, + "loss": 1.3982, "step": 1123 }, { - "epoch": 0.0318512851030066, + "epoch": 0.04397840206588935, "grad_norm": 0.0, - "learning_rate": 1.9999822056808035e-05, - "loss": 1.2598, + "learning_rate": 1.998976838903956e-05, + "loss": 1.2191, "step": 1124 }, { - "epoch": 0.03187962254526906, + "epoch": 0.04401752875811879, "grad_norm": 0.0, - "learning_rate": 1.9999816539533033e-05, - "loss": 1.3718, + "learning_rate": 1.9989710998655365e-05, + "loss": 1.3917, "step": 1125 }, { - "epoch": 0.03190795998753153, + "epoch": 0.044056655450348224, "grad_norm": 0.0, - "learning_rate": 1.9999810938025867e-05, - "loss": 1.1281, + "learning_rate": 1.998965344784893e-05, + "loss": 1.4061, "step": 1126 }, { - "epoch": 0.03193629742979399, + "epoch": 0.044095782142577665, "grad_norm": 0.0, - "learning_rate": 1.999980525228658e-05, - "loss": 1.1145, + "learning_rate": 1.9989595736621178e-05, + "loss": 1.2428, "step": 1127 }, { - "epoch": 0.03196463487205645, + "epoch": 0.044134908834807106, "grad_norm": 0.0, - "learning_rate": 1.9999799482315216e-05, - "loss": 1.2564, + "learning_rate": 1.9989537864973037e-05, + "loss": 1.2302, "step": 1128 }, { - "epoch": 0.03199297231431891, + "epoch": 0.04417403552703655, "grad_norm": 0.0, - "learning_rate": 1.9999793628111833e-05, - "loss": 1.0937, + "learning_rate": 1.9989479832905432e-05, + "loss": 1.1662, "step": 1129 }, { - "epoch": 0.03202130975658137, + "epoch": 0.04421316221926598, "grad_norm": 0.0, - "learning_rate": 1.999978768967647e-05, - "loss": 1.1761, + "learning_rate": 1.99894216404193e-05, + "loss": 1.3441, "step": 1130 }, { - "epoch": 0.03204964719884383, + "epoch": 0.04425228891149542, "grad_norm": 0.0, - "learning_rate": 1.9999781667009185e-05, - "loss": 1.0614, + "learning_rate": 1.9989363287515577e-05, + "loss": 1.3217, "step": 1131 }, { - "epoch": 0.0320779846411063, + "epoch": 0.04429141560372486, "grad_norm": 0.0, - "learning_rate": 1.9999775560110026e-05, - "loss": 1.2065, + "learning_rate": 1.9989304774195194e-05, + "loss": 1.1744, "step": 1132 }, { - "epoch": 0.032106322083368756, + "epoch": 0.0443305422959543, "grad_norm": 0.0, - "learning_rate": 1.9999769368979044e-05, - "loss": 1.1468, + "learning_rate": 1.998924610045909e-05, + "loss": 1.2409, "step": 1133 }, { - "epoch": 0.032134659525631215, + "epoch": 0.04436966898818374, "grad_norm": 0.0, - "learning_rate": 1.999976309361629e-05, - "loss": 1.2514, + "learning_rate": 1.998918726630822e-05, + "loss": 1.3866, "step": 1134 }, { - "epoch": 0.03216299696789368, + "epoch": 0.04440879568041318, "grad_norm": 0.0, - "learning_rate": 1.999975673402182e-05, - "loss": 1.2632, + "learning_rate": 1.9989128271743512e-05, + "loss": 1.2281, "step": 1135 }, { - "epoch": 0.03219133441015614, + "epoch": 0.04444792237264262, "grad_norm": 0.0, - "learning_rate": 1.9999750290195684e-05, - "loss": 1.2292, + "learning_rate": 1.9989069116765924e-05, + "loss": 1.3711, "step": 1136 }, { - "epoch": 0.0322196718524186, + "epoch": 0.04448704906487205, "grad_norm": 0.0, - "learning_rate": 1.9999743762137937e-05, - "loss": 1.2195, + "learning_rate": 1.9989009801376403e-05, + "loss": 1.3267, "step": 1137 }, { - "epoch": 0.032248009294681065, + "epoch": 0.044526175757101494, "grad_norm": 0.0, - "learning_rate": 1.9999737149848638e-05, - "loss": 1.1015, + "learning_rate": 1.99889503255759e-05, + "loss": 1.4145, "step": 1138 }, { - "epoch": 0.032276346736943524, + "epoch": 0.044565302449330935, "grad_norm": 0.0, - "learning_rate": 1.999973045332784e-05, - "loss": 1.279, + "learning_rate": 1.9988890689365374e-05, + "loss": 1.3125, "step": 1139 }, { - "epoch": 0.032304684179205984, + "epoch": 0.044604429141560376, "grad_norm": 0.0, - "learning_rate": 1.9999723672575592e-05, - "loss": 1.1362, + "learning_rate": 1.998883089274578e-05, + "loss": 1.3785, "step": 1140 }, { - "epoch": 0.03233302162146845, + "epoch": 0.04464355583378981, "grad_norm": 0.0, - "learning_rate": 1.9999716807591967e-05, - "loss": 1.0932, + "learning_rate": 1.9988770935718075e-05, + "loss": 1.1849, "step": 1141 }, { - "epoch": 0.03236135906373091, + "epoch": 0.04468268252601925, "grad_norm": 0.0, - "learning_rate": 1.9999709858377008e-05, - "loss": 0.9578, + "learning_rate": 1.998871081828323e-05, + "loss": 1.2139, "step": 1142 }, { - "epoch": 0.03238969650599337, + "epoch": 0.04472180921824869, "grad_norm": 0.0, - "learning_rate": 1.999970282493078e-05, - "loss": 1.2375, + "learning_rate": 1.9988650540442207e-05, + "loss": 1.2544, "step": 1143 }, { - "epoch": 0.032418033948255834, + "epoch": 0.044760935910478125, "grad_norm": 0.0, - "learning_rate": 1.9999695707253345e-05, - "loss": 1.0653, + "learning_rate": 1.9988590102195968e-05, + "loss": 1.1888, "step": 1144 }, { - "epoch": 0.03244637139051829, + "epoch": 0.044800062602707566, "grad_norm": 0.0, - "learning_rate": 1.9999688505344757e-05, - "loss": 1.2114, + "learning_rate": 1.9988529503545488e-05, + "loss": 1.267, "step": 1145 }, { - "epoch": 0.03247470883278075, + "epoch": 0.04483918929493701, "grad_norm": 0.0, - "learning_rate": 1.9999681219205085e-05, - "loss": 1.2282, + "learning_rate": 1.9988468744491744e-05, + "loss": 1.1844, "step": 1146 }, { - "epoch": 0.03250304627504321, + "epoch": 0.04487831598716645, "grad_norm": 0.0, - "learning_rate": 1.999967384883438e-05, - "loss": 1.0339, + "learning_rate": 1.9988407825035704e-05, + "loss": 1.2526, "step": 1147 }, { - "epoch": 0.03253138371730568, + "epoch": 0.04491744267939588, "grad_norm": 0.0, - "learning_rate": 1.999966639423271e-05, - "loss": 1.1278, + "learning_rate": 1.9988346745178356e-05, + "loss": 1.2491, "step": 1148 }, { - "epoch": 0.032559721159568136, + "epoch": 0.04495656937162532, "grad_norm": 0.0, - "learning_rate": 1.9999658855400135e-05, - "loss": 1.1992, + "learning_rate": 1.9988285504920672e-05, + "loss": 1.25, "step": 1149 }, { - "epoch": 0.032588058601830595, + "epoch": 0.04499569606385476, "grad_norm": 0.0, - "learning_rate": 1.9999651232336723e-05, - "loss": 1.2797, + "learning_rate": 1.9988224104263642e-05, + "loss": 1.3207, "step": 1150 }, { - "epoch": 0.03261639604409306, + "epoch": 0.0450348227560842, "grad_norm": 0.0, - "learning_rate": 1.9999643525042532e-05, - "loss": 1.1696, + "learning_rate": 1.9988162543208245e-05, + "loss": 1.172, "step": 1151 }, { - "epoch": 0.03264473348635552, + "epoch": 0.04507394944831364, "grad_norm": 0.0, - "learning_rate": 1.9999635733517634e-05, - "loss": 1.0923, + "learning_rate": 1.9988100821755474e-05, + "loss": 1.3838, "step": 1152 }, { - "epoch": 0.03267307092861798, + "epoch": 0.04511307614054308, "grad_norm": 0.0, - "learning_rate": 1.9999627857762088e-05, - "loss": 1.1005, + "learning_rate": 1.998803893990632e-05, + "loss": 1.2557, "step": 1153 }, { - "epoch": 0.032701408370880446, + "epoch": 0.04515220283277252, "grad_norm": 0.0, - "learning_rate": 1.9999619897775963e-05, - "loss": 1.1058, + "learning_rate": 1.9987976897661777e-05, + "loss": 1.2975, "step": 1154 }, { - "epoch": 0.032729745813142905, + "epoch": 0.045191329525001954, "grad_norm": 0.0, - "learning_rate": 1.999961185355933e-05, - "loss": 1.2092, + "learning_rate": 1.998791469502284e-05, + "loss": 1.3393, "step": 1155 }, { - "epoch": 0.032758083255405364, + "epoch": 0.045230456217231395, "grad_norm": 0.0, - "learning_rate": 1.999960372511225e-05, - "loss": 1.1573, + "learning_rate": 1.998785233199051e-05, + "loss": 1.2532, "step": 1156 }, { - "epoch": 0.03278642069766783, + "epoch": 0.045269582909460836, "grad_norm": 0.0, - "learning_rate": 1.9999595512434794e-05, - "loss": 1.2426, + "learning_rate": 1.9987789808565785e-05, + "loss": 1.3035, "step": 1157 }, { - "epoch": 0.03281475813993029, + "epoch": 0.04530870960169028, "grad_norm": 0.0, - "learning_rate": 1.9999587215527034e-05, - "loss": 1.1599, + "learning_rate": 1.9987727124749673e-05, + "loss": 1.4033, "step": 1158 }, { - "epoch": 0.03284309558219275, + "epoch": 0.04534783629391971, "grad_norm": 0.0, - "learning_rate": 1.9999578834389036e-05, - "loss": 1.337, + "learning_rate": 1.9987664280543177e-05, + "loss": 1.2786, "step": 1159 }, { - "epoch": 0.032871433024455214, + "epoch": 0.04538696298614915, "grad_norm": 0.0, - "learning_rate": 1.9999570369020876e-05, - "loss": 1.1805, + "learning_rate": 1.998760127594731e-05, + "loss": 1.312, "step": 1160 }, { - "epoch": 0.03289977046671767, + "epoch": 0.04542608967837859, "grad_norm": 0.0, - "learning_rate": 1.9999561819422615e-05, - "loss": 1.2177, + "learning_rate": 1.998753811096308e-05, + "loss": 1.2175, "step": 1161 }, { - "epoch": 0.03292810790898013, + "epoch": 0.045465216370608026, "grad_norm": 0.0, - "learning_rate": 1.9999553185594337e-05, - "loss": 1.1292, + "learning_rate": 1.9987474785591502e-05, + "loss": 1.4963, "step": 1162 }, { - "epoch": 0.0329564453512426, + "epoch": 0.04550434306283747, "grad_norm": 0.0, - "learning_rate": 1.9999544467536106e-05, - "loss": 1.1381, + "learning_rate": 1.9987411299833598e-05, + "loss": 1.2417, "step": 1163 }, { - "epoch": 0.03298478279350506, + "epoch": 0.04554346975506691, "grad_norm": 0.0, - "learning_rate": 1.9999535665248e-05, - "loss": 1.1651, + "learning_rate": 1.998734765369038e-05, + "loss": 1.2422, "step": 1164 }, { - "epoch": 0.03301312023576752, + "epoch": 0.04558259644729635, "grad_norm": 0.0, - "learning_rate": 1.9999526778730092e-05, - "loss": 1.2767, + "learning_rate": 1.9987283847162873e-05, + "loss": 1.2487, "step": 1165 }, { - "epoch": 0.03304145767802998, + "epoch": 0.04562172313952578, "grad_norm": 0.0, - "learning_rate": 1.9999517807982455e-05, - "loss": 1.1718, + "learning_rate": 1.99872198802521e-05, + "loss": 1.2296, "step": 1166 }, { - "epoch": 0.03306979512029244, + "epoch": 0.045660849831755224, "grad_norm": 0.0, - "learning_rate": 1.999950875300517e-05, - "loss": 1.217, + "learning_rate": 1.9987155752959094e-05, + "loss": 1.3336, "step": 1167 }, { - "epoch": 0.0330981325625549, + "epoch": 0.045699976523984664, "grad_norm": 0.0, - "learning_rate": 1.9999499613798306e-05, - "loss": 1.1891, + "learning_rate": 1.9987091465284884e-05, + "loss": 1.3399, "step": 1168 }, { - "epoch": 0.03312647000481737, + "epoch": 0.0457391032162141, "grad_norm": 0.0, - "learning_rate": 1.9999490390361947e-05, - "loss": 1.1319, + "learning_rate": 1.9987027017230497e-05, + "loss": 1.3513, "step": 1169 }, { - "epoch": 0.033154807447079826, + "epoch": 0.04577822990844354, "grad_norm": 0.0, - "learning_rate": 1.9999481082696164e-05, - "loss": 1.216, + "learning_rate": 1.9986962408796972e-05, + "loss": 1.3708, "step": 1170 }, { - "epoch": 0.033183144889342285, + "epoch": 0.04581735660067298, "grad_norm": 0.0, - "learning_rate": 1.999947169080104e-05, - "loss": 1.2047, + "learning_rate": 1.998689763998534e-05, + "loss": 1.3723, "step": 1171 }, { - "epoch": 0.03321148233160475, + "epoch": 0.04585648329290242, "grad_norm": 0.0, - "learning_rate": 1.999946221467665e-05, - "loss": 1.0793, + "learning_rate": 1.998683271079665e-05, + "loss": 1.2317, "step": 1172 }, { - "epoch": 0.03323981977386721, + "epoch": 0.045895609985131855, "grad_norm": 0.0, - "learning_rate": 1.999945265432308e-05, - "loss": 1.204, + "learning_rate": 1.998676762123194e-05, + "loss": 1.4271, "step": 1173 }, { - "epoch": 0.03326815721612967, + "epoch": 0.045934736677361296, "grad_norm": 0.0, - "learning_rate": 1.9999443009740406e-05, - "loss": 1.1729, + "learning_rate": 1.9986702371292256e-05, + "loss": 1.4222, "step": 1174 }, { - "epoch": 0.033296494658392135, + "epoch": 0.04597386336959074, "grad_norm": 0.0, - "learning_rate": 1.9999433280928713e-05, - "loss": 1.0794, + "learning_rate": 1.9986636960978646e-05, + "loss": 1.163, "step": 1175 }, { - "epoch": 0.033324832100654594, + "epoch": 0.04601299006182017, "grad_norm": 0.0, - "learning_rate": 1.9999423467888078e-05, - "loss": 1.1641, + "learning_rate": 1.9986571390292162e-05, + "loss": 1.425, "step": 1176 }, { - "epoch": 0.033353169542917054, + "epoch": 0.04605211675404961, "grad_norm": 0.0, - "learning_rate": 1.9999413570618588e-05, - "loss": 1.1555, + "learning_rate": 1.9986505659233853e-05, + "loss": 1.3381, "step": 1177 }, { - "epoch": 0.03338150698517952, + "epoch": 0.04609124344627905, "grad_norm": 0.0, - "learning_rate": 1.9999403589120317e-05, - "loss": 1.13, + "learning_rate": 1.998643976780478e-05, + "loss": 1.3591, "step": 1178 }, { - "epoch": 0.03340984442744198, + "epoch": 0.04613037013850849, "grad_norm": 0.0, - "learning_rate": 1.9999393523393365e-05, - "loss": 1.1579, + "learning_rate": 1.9986373716005995e-05, + "loss": 1.3948, "step": 1179 }, { - "epoch": 0.03343818186970444, + "epoch": 0.04616949683073793, "grad_norm": 0.0, - "learning_rate": 1.9999383373437803e-05, - "loss": 1.1213, + "learning_rate": 1.9986307503838563e-05, + "loss": 1.3328, "step": 1180 }, { - "epoch": 0.033466519311966904, + "epoch": 0.04620862352296737, "grad_norm": 0.0, - "learning_rate": 1.9999373139253724e-05, - "loss": 1.2017, + "learning_rate": 1.9986241131303545e-05, + "loss": 1.2915, "step": 1181 }, { - "epoch": 0.03349485675422936, + "epoch": 0.04624775021519681, "grad_norm": 0.0, - "learning_rate": 1.999936282084121e-05, - "loss": 1.2267, + "learning_rate": 1.9986174598402012e-05, + "loss": 1.2141, "step": 1182 }, { - "epoch": 0.03352319419649182, + "epoch": 0.04628687690742624, "grad_norm": 0.0, - "learning_rate": 1.999935241820035e-05, - "loss": 1.2183, + "learning_rate": 1.998610790513502e-05, + "loss": 1.319, "step": 1183 }, { - "epoch": 0.03355153163875429, + "epoch": 0.046326003599655684, "grad_norm": 0.0, - "learning_rate": 1.9999341931331234e-05, - "loss": 1.1418, + "learning_rate": 1.9986041051503656e-05, + "loss": 1.2749, "step": 1184 }, { - "epoch": 0.03357986908101675, + "epoch": 0.046365130291885125, "grad_norm": 0.0, - "learning_rate": 1.9999331360233946e-05, - "loss": 0.979, + "learning_rate": 1.9985974037508984e-05, + "loss": 1.2271, "step": 1185 }, { - "epoch": 0.033608206523279206, + "epoch": 0.046404256984114566, "grad_norm": 0.0, - "learning_rate": 1.9999320704908576e-05, - "loss": 1.2149, + "learning_rate": 1.998590686315208e-05, + "loss": 1.1765, "step": 1186 }, { - "epoch": 0.03363654396554167, + "epoch": 0.046443383676344, "grad_norm": 0.0, - "learning_rate": 1.9999309965355215e-05, - "loss": 1.1176, + "learning_rate": 1.9985839528434024e-05, + "loss": 1.4022, "step": 1187 }, { - "epoch": 0.03366488140780413, + "epoch": 0.04648251036857344, "grad_norm": 0.0, - "learning_rate": 1.9999299141573955e-05, - "loss": 1.3741, + "learning_rate": 1.9985772033355904e-05, + "loss": 1.2337, "step": 1188 }, { - "epoch": 0.03369321885006659, + "epoch": 0.04652163706080288, "grad_norm": 0.0, - "learning_rate": 1.999928823356488e-05, - "loss": 1.1312, + "learning_rate": 1.9985704377918794e-05, + "loss": 1.3912, "step": 1189 }, { - "epoch": 0.03372155629232906, + "epoch": 0.04656076375303232, "grad_norm": 0.0, - "learning_rate": 1.9999277241328093e-05, - "loss": 1.136, + "learning_rate": 1.9985636562123782e-05, + "loss": 1.2811, "step": 1190 }, { - "epoch": 0.033749893734591516, + "epoch": 0.046599890445261756, "grad_norm": 0.0, - "learning_rate": 1.9999266164863678e-05, - "loss": 0.9818, + "learning_rate": 1.9985568585971965e-05, + "loss": 1.2588, "step": 1191 }, { - "epoch": 0.033778231176853975, + "epoch": 0.0466390171374912, "grad_norm": 0.0, - "learning_rate": 1.9999255004171732e-05, - "loss": 1.1506, + "learning_rate": 1.9985500449464427e-05, + "loss": 1.3692, "step": 1192 }, { - "epoch": 0.03380656861911644, + "epoch": 0.04667814382972064, "grad_norm": 0.0, - "learning_rate": 1.9999243759252345e-05, - "loss": 1.1532, + "learning_rate": 1.9985432152602263e-05, + "loss": 1.2274, "step": 1193 }, { - "epoch": 0.0338349060613789, + "epoch": 0.04671727052195007, "grad_norm": 0.0, - "learning_rate": 1.9999232430105618e-05, - "loss": 1.0527, + "learning_rate": 1.9985363695386574e-05, + "loss": 1.3125, "step": 1194 }, { - "epoch": 0.03386324350364136, + "epoch": 0.04675639721417951, "grad_norm": 0.0, - "learning_rate": 1.9999221016731646e-05, - "loss": 1.179, + "learning_rate": 1.998529507781845e-05, + "loss": 1.3956, "step": 1195 }, { - "epoch": 0.033891580945903825, + "epoch": 0.04679552390640895, "grad_norm": 0.0, - "learning_rate": 1.9999209519130516e-05, - "loss": 1.1043, + "learning_rate": 1.9985226299899006e-05, + "loss": 1.3129, "step": 1196 }, { - "epoch": 0.033919918388166284, + "epoch": 0.046834650598638394, "grad_norm": 0.0, - "learning_rate": 1.999919793730233e-05, - "loss": 1.1042, + "learning_rate": 1.9985157361629338e-05, + "loss": 1.3396, "step": 1197 }, { - "epoch": 0.03394825583042874, + "epoch": 0.04687377729086783, "grad_norm": 0.0, - "learning_rate": 1.9999186271247192e-05, - "loss": 1.1712, + "learning_rate": 1.9985088263010554e-05, + "loss": 1.2857, "step": 1198 }, { - "epoch": 0.03397659327269121, + "epoch": 0.04691290398309727, "grad_norm": 0.0, - "learning_rate": 1.9999174520965194e-05, - "loss": 1.16, + "learning_rate": 1.998501900404376e-05, + "loss": 1.3507, "step": 1199 }, { - "epoch": 0.03400493071495367, + "epoch": 0.04695203067532671, "grad_norm": 0.0, - "learning_rate": 1.9999162686456435e-05, - "loss": 1.0082, + "learning_rate": 1.9984949584730082e-05, + "loss": 1.3057, "step": 1200 }, { - "epoch": 0.03403326815721613, + "epoch": 0.046991157367556144, "grad_norm": 0.0, - "learning_rate": 1.9999150767721014e-05, - "loss": 1.1883, + "learning_rate": 1.998488000507062e-05, + "loss": 1.3711, "step": 1201 }, { - "epoch": 0.034061605599478594, + "epoch": 0.047030284059785585, "grad_norm": 0.0, - "learning_rate": 1.9999138764759035e-05, - "loss": 1.2209, + "learning_rate": 1.99848102650665e-05, + "loss": 1.2334, "step": 1202 }, { - "epoch": 0.03408994304174105, + "epoch": 0.047069410752015026, "grad_norm": 0.0, - "learning_rate": 1.9999126677570597e-05, - "loss": 1.082, + "learning_rate": 1.9984740364718833e-05, + "loss": 1.4353, "step": 1203 }, { - "epoch": 0.03411828048400351, + "epoch": 0.04710853744424447, "grad_norm": 0.0, - "learning_rate": 1.99991145061558e-05, - "loss": 1.1062, + "learning_rate": 1.998467030402875e-05, + "loss": 1.3066, "step": 1204 }, { - "epoch": 0.03414661792626598, + "epoch": 0.0471476641364739, "grad_norm": 0.0, - "learning_rate": 1.9999102250514753e-05, - "loss": 1.1429, + "learning_rate": 1.9984600082997376e-05, + "loss": 1.2462, "step": 1205 }, { - "epoch": 0.03417495536852844, + "epoch": 0.04718679082870334, "grad_norm": 0.0, - "learning_rate": 1.999908991064755e-05, - "loss": 1.222, + "learning_rate": 1.9984529701625838e-05, + "loss": 1.4004, "step": 1206 }, { - "epoch": 0.034203292810790896, + "epoch": 0.04722591752093278, "grad_norm": 0.0, - "learning_rate": 1.9999077486554302e-05, - "loss": 1.1794, + "learning_rate": 1.9984459159915256e-05, + "loss": 1.2304, "step": 1207 }, { - "epoch": 0.03423163025305336, + "epoch": 0.047265044213162216, "grad_norm": 0.0, - "learning_rate": 1.999906497823511e-05, - "loss": 1.1129, + "learning_rate": 1.998438845786678e-05, + "loss": 1.2768, "step": 1208 }, { - "epoch": 0.03425996769531582, + "epoch": 0.04730417090539166, "grad_norm": 0.0, - "learning_rate": 1.999905238569008e-05, - "loss": 1.2514, + "learning_rate": 1.998431759548153e-05, + "loss": 1.1823, "step": 1209 }, { - "epoch": 0.03428830513757828, + "epoch": 0.0473432975976211, "grad_norm": 0.0, - "learning_rate": 1.9999039708919322e-05, - "loss": 1.2388, + "learning_rate": 1.9984246572760653e-05, + "loss": 1.3138, "step": 1210 }, { - "epoch": 0.034316642579840746, + "epoch": 0.04738242428985054, "grad_norm": 0.0, - "learning_rate": 1.9999026947922937e-05, - "loss": 1.1348, + "learning_rate": 1.9984175389705286e-05, + "loss": 1.226, "step": 1211 }, { - "epoch": 0.034344980022103205, + "epoch": 0.04742155098207997, "grad_norm": 0.0, - "learning_rate": 1.999901410270103e-05, - "loss": 1.2103, + "learning_rate": 1.9984104046316576e-05, + "loss": 1.2095, "step": 1212 }, { - "epoch": 0.034373317464365664, + "epoch": 0.047460677674309414, "grad_norm": 0.0, - "learning_rate": 1.9999001173253724e-05, - "loss": 1.1261, + "learning_rate": 1.9984032542595663e-05, + "loss": 1.3292, "step": 1213 }, { - "epoch": 0.03440165490662813, + "epoch": 0.047499804366538854, "grad_norm": 0.0, - "learning_rate": 1.9998988159581116e-05, - "loss": 1.0942, + "learning_rate": 1.99839608785437e-05, + "loss": 1.2545, "step": 1214 }, { - "epoch": 0.03442999234889059, + "epoch": 0.04753893105876829, "grad_norm": 0.0, - "learning_rate": 1.9998975061683312e-05, - "loss": 1.1217, + "learning_rate": 1.9983889054161835e-05, + "loss": 1.3263, "step": 1215 }, { - "epoch": 0.03445832979115305, + "epoch": 0.04757805775099773, "grad_norm": 0.0, - "learning_rate": 1.999896187956043e-05, - "loss": 1.0616, + "learning_rate": 1.9983817069451223e-05, + "loss": 1.3216, "step": 1216 }, { - "epoch": 0.034486667233415515, + "epoch": 0.04761718444322717, "grad_norm": 0.0, - "learning_rate": 1.9998948613212583e-05, - "loss": 1.158, + "learning_rate": 1.998374492441302e-05, + "loss": 1.2236, "step": 1217 }, { - "epoch": 0.034515004675677974, + "epoch": 0.04765631113545661, "grad_norm": 0.0, - "learning_rate": 1.9998935262639877e-05, - "loss": 1.1301, + "learning_rate": 1.998367261904838e-05, + "loss": 1.3915, "step": 1218 }, { - "epoch": 0.03454334211794043, + "epoch": 0.047695437827686045, "grad_norm": 0.0, - "learning_rate": 1.9998921827842423e-05, - "loss": 1.2439, + "learning_rate": 1.998360015335847e-05, + "loss": 1.2211, "step": 1219 }, { - "epoch": 0.0345716795602029, + "epoch": 0.047734564519915486, "grad_norm": 0.0, - "learning_rate": 1.9998908308820343e-05, - "loss": 1.171, + "learning_rate": 1.998352752734445e-05, + "loss": 1.1931, "step": 1220 }, { - "epoch": 0.03460001700246536, + "epoch": 0.04777369121214493, "grad_norm": 0.0, - "learning_rate": 1.999889470557374e-05, - "loss": 1.1764, + "learning_rate": 1.9983454741007492e-05, + "loss": 1.2599, "step": 1221 }, { - "epoch": 0.03462835444472782, + "epoch": 0.04781281790437437, "grad_norm": 0.0, - "learning_rate": 1.9998881018102735e-05, - "loss": 1.0937, + "learning_rate": 1.998338179434876e-05, + "loss": 1.2902, "step": 1222 }, { - "epoch": 0.03465669188699028, + "epoch": 0.0478519445966038, "grad_norm": 0.0, - "learning_rate": 1.9998867246407447e-05, - "loss": 1.2155, + "learning_rate": 1.9983308687369423e-05, + "loss": 1.2902, "step": 1223 }, { - "epoch": 0.03468502932925274, + "epoch": 0.04789107128883324, "grad_norm": 0.0, - "learning_rate": 1.999885339048798e-05, - "loss": 1.2572, + "learning_rate": 1.998323542007066e-05, + "loss": 1.3695, "step": 1224 }, { - "epoch": 0.0347133667715152, + "epoch": 0.04793019798106268, "grad_norm": 0.0, - "learning_rate": 1.999883945034446e-05, - "loss": 1.192, + "learning_rate": 1.9983161992453646e-05, + "loss": 1.274, "step": 1225 }, { - "epoch": 0.03474170421377767, + "epoch": 0.04796932467329212, "grad_norm": 0.0, - "learning_rate": 1.9998825425977007e-05, - "loss": 1.1452, + "learning_rate": 1.998308840451956e-05, + "loss": 1.16, "step": 1226 }, { - "epoch": 0.03477004165604013, + "epoch": 0.04800845136552156, "grad_norm": 0.0, - "learning_rate": 1.9998811317385728e-05, - "loss": 1.0504, + "learning_rate": 1.9983014656269584e-05, + "loss": 1.2758, "step": 1227 }, { - "epoch": 0.034798379098302586, + "epoch": 0.048047578057751, "grad_norm": 0.0, - "learning_rate": 1.999879712457075e-05, - "loss": 1.181, + "learning_rate": 1.99829407477049e-05, + "loss": 1.2446, "step": 1228 }, { - "epoch": 0.03482671654056505, + "epoch": 0.04808670474998044, "grad_norm": 0.0, - "learning_rate": 1.9998782847532195e-05, - "loss": 1.2238, + "learning_rate": 1.99828666788267e-05, + "loss": 1.3047, "step": 1229 }, { - "epoch": 0.03485505398282751, + "epoch": 0.048125831442209874, "grad_norm": 0.0, - "learning_rate": 1.9998768486270178e-05, - "loss": 1.1603, + "learning_rate": 1.9982792449636167e-05, + "loss": 1.2758, "step": 1230 }, { - "epoch": 0.03488339142508997, + "epoch": 0.048164958134439315, "grad_norm": 0.0, - "learning_rate": 1.999875404078482e-05, - "loss": 1.1111, + "learning_rate": 1.99827180601345e-05, + "loss": 1.1571, "step": 1231 }, { - "epoch": 0.034911728867352436, + "epoch": 0.048204084826668755, "grad_norm": 0.0, - "learning_rate": 1.999873951107624e-05, - "loss": 1.1611, + "learning_rate": 1.998264351032289e-05, + "loss": 1.304, "step": 1232 }, { - "epoch": 0.034940066309614895, + "epoch": 0.04824321151889819, "grad_norm": 0.0, - "learning_rate": 1.999872489714457e-05, - "loss": 1.0623, + "learning_rate": 1.9982568800202532e-05, + "loss": 1.1532, "step": 1233 }, { - "epoch": 0.034968403751877354, + "epoch": 0.04828233821112763, "grad_norm": 0.0, - "learning_rate": 1.9998710198989923e-05, - "loss": 1.2147, + "learning_rate": 1.9982493929774627e-05, + "loss": 1.2505, "step": 1234 }, { - "epoch": 0.03499674119413982, + "epoch": 0.04832146490335707, "grad_norm": 0.0, - "learning_rate": 1.9998695416612432e-05, - "loss": 1.1675, + "learning_rate": 1.998241889904038e-05, + "loss": 1.3256, "step": 1235 }, { - "epoch": 0.03502507863640228, + "epoch": 0.04836059159558651, "grad_norm": 0.0, - "learning_rate": 1.9998680550012212e-05, - "loss": 1.2388, + "learning_rate": 1.9982343708000994e-05, + "loss": 1.297, "step": 1236 }, { - "epoch": 0.03505341607866474, + "epoch": 0.048399718287815946, "grad_norm": 0.0, - "learning_rate": 1.9998665599189392e-05, - "loss": 1.2023, + "learning_rate": 1.9982268356657675e-05, + "loss": 1.4338, "step": 1237 }, { - "epoch": 0.035081753520927204, + "epoch": 0.04843884498004539, "grad_norm": 0.0, - "learning_rate": 1.99986505641441e-05, - "loss": 1.219, + "learning_rate": 1.998219284501164e-05, + "loss": 1.2986, "step": 1238 }, { - "epoch": 0.035110090963189663, + "epoch": 0.04847797167227483, "grad_norm": 0.0, - "learning_rate": 1.999863544487646e-05, - "loss": 1.1605, + "learning_rate": 1.998211717306409e-05, + "loss": 1.2132, "step": 1239 }, { - "epoch": 0.03513842840545212, + "epoch": 0.04851709836450426, "grad_norm": 0.0, - "learning_rate": 1.9998620241386606e-05, - "loss": 1.1453, + "learning_rate": 1.998204134081625e-05, + "loss": 1.3682, "step": 1240 }, { - "epoch": 0.03516676584771459, + "epoch": 0.0485562250567337, "grad_norm": 0.0, - "learning_rate": 1.999860495367466e-05, - "loss": 1.2155, + "learning_rate": 1.9981965348269333e-05, + "loss": 1.2611, "step": 1241 }, { - "epoch": 0.03519510328997705, + "epoch": 0.04859535174896314, "grad_norm": 0.0, - "learning_rate": 1.9998589581740746e-05, - "loss": 1.0904, + "learning_rate": 1.998188919542456e-05, + "loss": 1.3163, "step": 1242 }, { - "epoch": 0.03522344073223951, + "epoch": 0.048634478441192584, "grad_norm": 0.0, - "learning_rate": 1.9998574125585005e-05, - "loss": 1.1765, + "learning_rate": 1.9981812882283157e-05, + "loss": 1.2839, "step": 1243 }, { - "epoch": 0.03525177817450197, + "epoch": 0.04867360513342202, "grad_norm": 0.0, - "learning_rate": 1.9998558585207556e-05, - "loss": 1.1675, + "learning_rate": 1.9981736408846345e-05, + "loss": 1.3353, "step": 1244 }, { - "epoch": 0.03528011561676443, + "epoch": 0.04871273182565146, "grad_norm": 0.0, - "learning_rate": 1.999854296060854e-05, - "loss": 1.229, + "learning_rate": 1.9981659775115357e-05, + "loss": 1.301, "step": 1245 }, { - "epoch": 0.03530845305902689, + "epoch": 0.0487518585178809, "grad_norm": 0.0, - "learning_rate": 1.999852725178808e-05, - "loss": 1.2728, + "learning_rate": 1.9981582981091418e-05, + "loss": 1.3343, "step": 1246 }, { - "epoch": 0.03533679050128935, + "epoch": 0.048790985210110334, "grad_norm": 0.0, - "learning_rate": 1.9998511458746314e-05, - "loss": 1.2739, + "learning_rate": 1.9981506026775763e-05, + "loss": 1.2767, "step": 1247 }, { - "epoch": 0.035365127943551816, + "epoch": 0.048830111902339775, "grad_norm": 0.0, - "learning_rate": 1.9998495581483373e-05, - "loss": 1.1815, + "learning_rate": 1.998142891216963e-05, + "loss": 1.2328, "step": 1248 }, { - "epoch": 0.035393465385814275, + "epoch": 0.048869238594569216, "grad_norm": 0.0, - "learning_rate": 1.999847961999939e-05, - "loss": 1.1095, + "learning_rate": 1.9981351637274254e-05, + "loss": 1.3132, "step": 1249 }, { - "epoch": 0.035421802828076734, + "epoch": 0.048908365286798657, "grad_norm": 0.0, - "learning_rate": 1.9998463574294505e-05, - "loss": 1.1174, + "learning_rate": 1.9981274202090884e-05, + "loss": 1.2146, "step": 1250 }, { - "epoch": 0.0354501402703392, + "epoch": 0.04894749197902809, "grad_norm": 0.0, - "learning_rate": 1.9998447444368843e-05, - "loss": 1.1722, + "learning_rate": 1.9981196606620755e-05, + "loss": 1.2127, "step": 1251 }, { - "epoch": 0.03547847771260166, + "epoch": 0.04898661867125753, "grad_norm": 0.0, - "learning_rate": 1.9998431230222545e-05, - "loss": 1.172, + "learning_rate": 1.998111885086511e-05, + "loss": 1.238, "step": 1252 }, { - "epoch": 0.03550681515486412, + "epoch": 0.04902574536348697, "grad_norm": 0.0, - "learning_rate": 1.999841493185575e-05, - "loss": 1.1559, + "learning_rate": 1.9981040934825212e-05, + "loss": 1.3955, "step": 1253 }, { - "epoch": 0.035535152597126585, + "epoch": 0.04906487205571641, "grad_norm": 0.0, - "learning_rate": 1.9998398549268594e-05, - "loss": 1.0451, + "learning_rate": 1.99809628585023e-05, + "loss": 1.2358, "step": 1254 }, { - "epoch": 0.035563490039389044, + "epoch": 0.04910399874794585, "grad_norm": 0.0, - "learning_rate": 1.9998382082461214e-05, - "loss": 1.166, + "learning_rate": 1.9980884621897627e-05, + "loss": 1.3937, "step": 1255 }, { - "epoch": 0.0355918274816515, + "epoch": 0.04914312544017529, "grad_norm": 0.0, - "learning_rate": 1.999836553143375e-05, - "loss": 1.0435, + "learning_rate": 1.9980806225012456e-05, + "loss": 1.4086, "step": 1256 }, { - "epoch": 0.03562016492391397, + "epoch": 0.04918225213240473, "grad_norm": 0.0, - "learning_rate": 1.9998348896186336e-05, - "loss": 1.1311, + "learning_rate": 1.9980727667848047e-05, + "loss": 1.3369, "step": 1257 }, { - "epoch": 0.03564850236617643, + "epoch": 0.04922137882463416, "grad_norm": 0.0, - "learning_rate": 1.999833217671912e-05, - "loss": 1.1125, + "learning_rate": 1.9980648950405656e-05, + "loss": 1.2438, "step": 1258 }, { - "epoch": 0.03567683980843889, + "epoch": 0.049260505516863604, "grad_norm": 0.0, - "learning_rate": 1.9998315373032238e-05, - "loss": 1.1952, + "learning_rate": 1.9980570072686548e-05, + "loss": 1.3087, "step": 1259 }, { - "epoch": 0.03570517725070135, + "epoch": 0.049299632209093044, "grad_norm": 0.0, - "learning_rate": 1.999829848512583e-05, - "loss": 1.1578, + "learning_rate": 1.9980491034691994e-05, + "loss": 1.2571, "step": 1260 }, { - "epoch": 0.03573351469296381, + "epoch": 0.049338758901322485, "grad_norm": 0.0, - "learning_rate": 1.9998281513000046e-05, - "loss": 1.1906, + "learning_rate": 1.9980411836423256e-05, + "loss": 1.3216, "step": 1261 }, { - "epoch": 0.03576185213522627, + "epoch": 0.04937788559355192, "grad_norm": 0.0, - "learning_rate": 1.999826445665502e-05, - "loss": 1.2062, + "learning_rate": 1.9980332477881613e-05, + "loss": 1.2319, "step": 1262 }, { - "epoch": 0.03579018957748874, + "epoch": 0.04941701228578136, "grad_norm": 0.0, - "learning_rate": 1.9998247316090903e-05, - "loss": 1.1735, + "learning_rate": 1.998025295906834e-05, + "loss": 1.1716, "step": 1263 }, { - "epoch": 0.035818527019751197, + "epoch": 0.0494561389780108, "grad_norm": 0.0, - "learning_rate": 1.9998230091307834e-05, - "loss": 1.16, + "learning_rate": 1.9980173279984706e-05, + "loss": 1.3242, "step": 1264 }, { - "epoch": 0.035846864462013656, + "epoch": 0.049495265670240235, "grad_norm": 0.0, - "learning_rate": 1.9998212782305963e-05, - "loss": 1.1991, + "learning_rate": 1.9980093440631997e-05, + "loss": 1.1636, "step": 1265 }, { - "epoch": 0.03587520190427612, + "epoch": 0.049534392362469676, "grad_norm": 0.0, - "learning_rate": 1.9998195389085432e-05, - "loss": 1.1449, + "learning_rate": 1.998001344101149e-05, + "loss": 1.2932, "step": 1266 }, { - "epoch": 0.03590353934653858, + "epoch": 0.04957351905469912, "grad_norm": 0.0, - "learning_rate": 1.9998177911646387e-05, - "loss": 1.1, + "learning_rate": 1.9979933281124474e-05, + "loss": 1.3546, "step": 1267 }, { - "epoch": 0.03593187678880104, + "epoch": 0.04961264574692856, "grad_norm": 0.0, - "learning_rate": 1.9998160349988977e-05, - "loss": 1.1306, + "learning_rate": 1.9979852960972235e-05, + "loss": 1.2999, "step": 1268 }, { - "epoch": 0.035960214231063506, + "epoch": 0.04965177243915799, "grad_norm": 0.0, - "learning_rate": 1.999814270411335e-05, - "loss": 1.1344, + "learning_rate": 1.9979772480556063e-05, + "loss": 1.3027, "step": 1269 }, { - "epoch": 0.035988551673325965, + "epoch": 0.04969089913138743, "grad_norm": 0.0, - "learning_rate": 1.9998124974019656e-05, - "loss": 1.1772, + "learning_rate": 1.997969183987725e-05, + "loss": 1.3887, "step": 1270 }, { - "epoch": 0.036016889115588424, + "epoch": 0.04973002582361687, "grad_norm": 0.0, - "learning_rate": 1.999810715970804e-05, - "loss": 1.1909, + "learning_rate": 1.9979611038937096e-05, + "loss": 1.302, "step": 1271 }, { - "epoch": 0.03604522655785089, + "epoch": 0.04976915251584631, "grad_norm": 0.0, - "learning_rate": 1.9998089261178656e-05, - "loss": 1.0425, + "learning_rate": 1.997953007773689e-05, + "loss": 1.2841, "step": 1272 }, { - "epoch": 0.03607356400011335, + "epoch": 0.04980827920807575, "grad_norm": 0.0, - "learning_rate": 1.999807127843165e-05, - "loss": 1.1695, + "learning_rate": 1.9979448956277932e-05, + "loss": 1.3502, "step": 1273 }, { - "epoch": 0.03610190144237581, + "epoch": 0.04984740590030519, "grad_norm": 0.0, - "learning_rate": 1.999805321146718e-05, - "loss": 1.21, + "learning_rate": 1.9979367674561535e-05, + "loss": 1.2405, "step": 1274 }, { - "epoch": 0.036130238884638274, + "epoch": 0.04988653259253463, "grad_norm": 0.0, - "learning_rate": 1.9998035060285398e-05, - "loss": 1.1601, + "learning_rate": 1.9979286232588995e-05, + "loss": 1.228, "step": 1275 }, { - "epoch": 0.036158576326900733, + "epoch": 0.049925659284764064, "grad_norm": 0.0, - "learning_rate": 1.9998016824886452e-05, - "loss": 1.2114, + "learning_rate": 1.9979204630361622e-05, + "loss": 1.3185, "step": 1276 }, { - "epoch": 0.03618691376916319, + "epoch": 0.049964785976993505, "grad_norm": 0.0, - "learning_rate": 1.9997998505270496e-05, - "loss": 1.0878, + "learning_rate": 1.997912286788073e-05, + "loss": 1.3049, "step": 1277 }, { - "epoch": 0.03621525121142566, + "epoch": 0.050003912669222945, "grad_norm": 0.0, - "learning_rate": 1.9997980101437687e-05, - "loss": 1.1833, + "learning_rate": 1.997904094514763e-05, + "loss": 1.3333, "step": 1278 }, { - "epoch": 0.03624358865368812, + "epoch": 0.05004303936145238, "grad_norm": 0.0, - "learning_rate": 1.9997961613388176e-05, - "loss": 1.0976, + "learning_rate": 1.9978958862163634e-05, + "loss": 1.3312, "step": 1279 }, { - "epoch": 0.03627192609595058, + "epoch": 0.05008216605368182, "grad_norm": 0.0, - "learning_rate": 1.9997943041122126e-05, - "loss": 1.1192, + "learning_rate": 1.997887661893006e-05, + "loss": 1.2845, "step": 1280 }, { - "epoch": 0.03630026353821304, + "epoch": 0.05012129274591126, "grad_norm": 0.0, - "learning_rate": 1.9997924384639687e-05, - "loss": 1.0392, + "learning_rate": 1.9978794215448237e-05, + "loss": 1.2443, "step": 1281 }, { - "epoch": 0.0363286009804755, + "epoch": 0.0501604194381407, "grad_norm": 0.0, - "learning_rate": 1.9997905643941017e-05, - "loss": 1.071, + "learning_rate": 1.997871165171948e-05, + "loss": 1.3696, "step": 1282 }, { - "epoch": 0.03635693842273796, + "epoch": 0.050199546130370136, "grad_norm": 0.0, - "learning_rate": 1.999788681902628e-05, - "loss": 1.1653, + "learning_rate": 1.9978628927745123e-05, + "loss": 1.2037, "step": 1283 }, { - "epoch": 0.03638527586500043, + "epoch": 0.05023867282259958, "grad_norm": 0.0, - "learning_rate": 1.9997867909895626e-05, - "loss": 1.0753, + "learning_rate": 1.9978546043526487e-05, + "loss": 1.1984, "step": 1284 }, { - "epoch": 0.036413613307262886, + "epoch": 0.05027779951482902, "grad_norm": 0.0, - "learning_rate": 1.9997848916549217e-05, - "loss": 1.231, + "learning_rate": 1.9978462999064903e-05, + "loss": 1.3674, "step": 1285 }, { - "epoch": 0.036441950749525345, + "epoch": 0.05031692620705846, "grad_norm": 0.0, - "learning_rate": 1.9997829838987215e-05, - "loss": 1.1675, + "learning_rate": 1.997837979436171e-05, + "loss": 1.3292, "step": 1286 }, { - "epoch": 0.03647028819178781, + "epoch": 0.05035605289928789, "grad_norm": 0.0, - "learning_rate": 1.999781067720978e-05, - "loss": 1.3116, + "learning_rate": 1.9978296429418237e-05, + "loss": 1.3183, "step": 1287 }, { - "epoch": 0.03649862563405027, + "epoch": 0.05039517959151733, "grad_norm": 0.0, - "learning_rate": 1.9997791431217073e-05, - "loss": 1.1049, + "learning_rate": 1.997821290423583e-05, + "loss": 1.2584, "step": 1288 }, { - "epoch": 0.03652696307631273, + "epoch": 0.050434306283746774, "grad_norm": 0.0, - "learning_rate": 1.9997772101009255e-05, - "loss": 1.1519, + "learning_rate": 1.9978129218815824e-05, + "loss": 1.3247, "step": 1289 }, { - "epoch": 0.036555300518575196, + "epoch": 0.05047343297597621, "grad_norm": 0.0, - "learning_rate": 1.9997752686586495e-05, - "loss": 1.1761, + "learning_rate": 1.9978045373159573e-05, + "loss": 1.2837, "step": 1290 }, { - "epoch": 0.036583637960837655, + "epoch": 0.05051255966820565, "grad_norm": 0.0, - "learning_rate": 1.9997733187948947e-05, - "loss": 1.162, + "learning_rate": 1.9977961367268408e-05, + "loss": 1.1142, "step": 1291 }, { - "epoch": 0.036611975403100114, + "epoch": 0.05055168636043509, "grad_norm": 0.0, - "learning_rate": 1.9997713605096782e-05, - "loss": 1.2352, + "learning_rate": 1.9977877201143692e-05, + "loss": 1.2507, "step": 1292 }, { - "epoch": 0.03664031284536258, + "epoch": 0.05059081305266453, "grad_norm": 0.0, - "learning_rate": 1.9997693938030162e-05, - "loss": 0.9322, + "learning_rate": 1.997779287478677e-05, + "loss": 1.322, "step": 1293 }, { - "epoch": 0.03666865028762504, + "epoch": 0.050629939744893965, "grad_norm": 0.0, - "learning_rate": 1.999767418674925e-05, - "loss": 1.1376, + "learning_rate": 1.9977708388198997e-05, + "loss": 1.2856, "step": 1294 }, { - "epoch": 0.0366969877298875, + "epoch": 0.050669066437123406, "grad_norm": 0.0, - "learning_rate": 1.999765435125422e-05, - "loss": 1.1626, + "learning_rate": 1.9977623741381728e-05, + "loss": 1.2303, "step": 1295 }, { - "epoch": 0.036725325172149964, + "epoch": 0.050708193129352847, "grad_norm": 0.0, - "learning_rate": 1.999763443154523e-05, - "loss": 1.0471, + "learning_rate": 1.997753893433633e-05, + "loss": 1.2891, "step": 1296 }, { - "epoch": 0.03675366261441242, + "epoch": 0.05074731982158228, "grad_norm": 0.0, - "learning_rate": 1.999761442762246e-05, - "loss": 1.2251, + "learning_rate": 1.9977453967064154e-05, + "loss": 1.1869, "step": 1297 }, { - "epoch": 0.03678200005667488, + "epoch": 0.05078644651381172, "grad_norm": 0.0, - "learning_rate": 1.9997594339486065e-05, - "loss": 1.1693, + "learning_rate": 1.997736883956657e-05, + "loss": 1.2087, "step": 1298 }, { - "epoch": 0.03681033749893735, + "epoch": 0.05082557320604116, "grad_norm": 0.0, - "learning_rate": 1.9997574167136225e-05, - "loss": 1.0763, + "learning_rate": 1.997728355184495e-05, + "loss": 1.2941, "step": 1299 }, { - "epoch": 0.03683867494119981, + "epoch": 0.0508646998982706, "grad_norm": 0.0, - "learning_rate": 1.99975539105731e-05, - "loss": 1.0429, + "learning_rate": 1.997719810390065e-05, + "loss": 1.3341, "step": 1300 }, { - "epoch": 0.036867012383462267, + "epoch": 0.05090382659050004, "grad_norm": 0.0, - "learning_rate": 1.999753356979687e-05, - "loss": 1.2119, + "learning_rate": 1.9977112495735057e-05, + "loss": 1.2844, "step": 1301 }, { - "epoch": 0.03689534982572473, + "epoch": 0.05094295328272948, "grad_norm": 0.0, - "learning_rate": 1.9997513144807704e-05, - "loss": 1.0796, + "learning_rate": 1.9977026727349536e-05, + "loss": 1.3175, "step": 1302 }, { - "epoch": 0.03692368726798719, + "epoch": 0.05098207997495892, "grad_norm": 0.0, - "learning_rate": 1.9997492635605766e-05, - "loss": 1.1086, + "learning_rate": 1.997694079874547e-05, + "loss": 1.2095, "step": 1303 }, { - "epoch": 0.03695202471024965, + "epoch": 0.05102120666718835, "grad_norm": 0.0, - "learning_rate": 1.9997472042191243e-05, - "loss": 1.1471, + "learning_rate": 1.9976854709924235e-05, + "loss": 1.2428, "step": 1304 }, { - "epoch": 0.03698036215251212, + "epoch": 0.051060333359417794, "grad_norm": 0.0, - "learning_rate": 1.9997451364564294e-05, - "loss": 1.0806, + "learning_rate": 1.9976768460887216e-05, + "loss": 1.098, "step": 1305 }, { - "epoch": 0.037008699594774576, + "epoch": 0.051099460051647234, "grad_norm": 0.0, - "learning_rate": 1.99974306027251e-05, - "loss": 1.1058, + "learning_rate": 1.9976682051635795e-05, + "loss": 1.3397, "step": 1306 }, { - "epoch": 0.037037037037037035, + "epoch": 0.051138586743876675, "grad_norm": 0.0, - "learning_rate": 1.9997409756673838e-05, - "loss": 1.2297, + "learning_rate": 1.9976595482171365e-05, + "loss": 1.1931, "step": 1307 }, { - "epoch": 0.0370653744792995, + "epoch": 0.05117771343610611, "grad_norm": 0.0, - "learning_rate": 1.9997388826410683e-05, - "loss": 1.2601, + "learning_rate": 1.997650875249531e-05, + "loss": 1.2776, "step": 1308 }, { - "epoch": 0.03709371192156196, + "epoch": 0.05121684012833555, "grad_norm": 0.0, - "learning_rate": 1.9997367811935807e-05, - "loss": 1.1926, + "learning_rate": 1.9976421862609027e-05, + "loss": 1.3566, "step": 1309 }, { - "epoch": 0.03712204936382442, + "epoch": 0.05125596682056499, "grad_norm": 0.0, - "learning_rate": 1.9997346713249387e-05, - "loss": 1.2147, + "learning_rate": 1.9976334812513912e-05, + "loss": 1.225, "step": 1310 }, { - "epoch": 0.037150386806086885, + "epoch": 0.051295093512794425, "grad_norm": 0.0, - "learning_rate": 1.9997325530351605e-05, - "loss": 1.0473, + "learning_rate": 1.997624760221136e-05, + "loss": 1.2328, "step": 1311 }, { - "epoch": 0.037178724248349344, + "epoch": 0.051334220205023866, "grad_norm": 0.0, - "learning_rate": 1.9997304263242638e-05, - "loss": 1.2541, + "learning_rate": 1.9976160231702774e-05, + "loss": 1.1437, "step": 1312 }, { - "epoch": 0.0372070616906118, + "epoch": 0.05137334689725331, "grad_norm": 0.0, - "learning_rate": 1.9997282911922667e-05, - "loss": 1.1086, + "learning_rate": 1.997607270098955e-05, + "loss": 1.2817, "step": 1313 }, { - "epoch": 0.03723539913287427, + "epoch": 0.05141247358948275, "grad_norm": 0.0, - "learning_rate": 1.9997261476391867e-05, - "loss": 1.1202, + "learning_rate": 1.99759850100731e-05, + "loss": 1.1863, "step": 1314 }, { - "epoch": 0.03726373657513673, + "epoch": 0.05145160028171218, "grad_norm": 0.0, - "learning_rate": 1.999723995665042e-05, - "loss": 1.2344, + "learning_rate": 1.9975897158954835e-05, + "loss": 1.3195, "step": 1315 }, { - "epoch": 0.03729207401739919, + "epoch": 0.05149072697394162, "grad_norm": 0.0, - "learning_rate": 1.9997218352698514e-05, - "loss": 1.1859, + "learning_rate": 1.9975809147636163e-05, + "loss": 1.2756, "step": 1316 }, { - "epoch": 0.037320411459661654, + "epoch": 0.05152985366617106, "grad_norm": 0.0, - "learning_rate": 1.999719666453632e-05, - "loss": 1.3773, + "learning_rate": 1.9975720976118492e-05, + "loss": 1.2133, "step": 1317 }, { - "epoch": 0.03734874890192411, + "epoch": 0.051568980358400504, "grad_norm": 0.0, - "learning_rate": 1.9997174892164026e-05, - "loss": 1.1663, + "learning_rate": 1.9975632644403245e-05, + "loss": 1.222, "step": 1318 }, { - "epoch": 0.03737708634418657, + "epoch": 0.05160810705062994, "grad_norm": 0.0, - "learning_rate": 1.999715303558182e-05, - "loss": 1.1291, + "learning_rate": 1.997554415249184e-05, + "loss": 1.3347, "step": 1319 }, { - "epoch": 0.03740542378644904, + "epoch": 0.05164723374285938, "grad_norm": 0.0, - "learning_rate": 1.9997131094789875e-05, - "loss": 1.1042, + "learning_rate": 1.9975455500385692e-05, + "loss": 1.2095, "step": 1320 }, { - "epoch": 0.0374337612287115, + "epoch": 0.05168636043508882, "grad_norm": 0.0, - "learning_rate": 1.999710906978839e-05, - "loss": 1.104, + "learning_rate": 1.997536668808623e-05, + "loss": 1.3668, "step": 1321 }, { - "epoch": 0.037462098670973956, + "epoch": 0.051725487127318254, "grad_norm": 0.0, - "learning_rate": 1.9997086960577538e-05, - "loss": 1.1544, + "learning_rate": 1.997527771559488e-05, + "loss": 1.3558, "step": 1322 }, { - "epoch": 0.03749043611323642, + "epoch": 0.051764613819547695, "grad_norm": 0.0, - "learning_rate": 1.9997064767157514e-05, - "loss": 1.1846, + "learning_rate": 1.9975188582913067e-05, + "loss": 1.29, "step": 1323 }, { - "epoch": 0.03751877355549888, + "epoch": 0.051803740511777135, "grad_norm": 0.0, - "learning_rate": 1.99970424895285e-05, - "loss": 1.1973, + "learning_rate": 1.9975099290042226e-05, + "loss": 1.2385, "step": 1324 }, { - "epoch": 0.03754711099776134, + "epoch": 0.051842867204006576, "grad_norm": 0.0, - "learning_rate": 1.999702012769068e-05, - "loss": 1.1953, + "learning_rate": 1.997500983698379e-05, + "loss": 1.342, "step": 1325 }, { - "epoch": 0.037575448440023806, + "epoch": 0.05188199389623601, "grad_norm": 0.0, - "learning_rate": 1.9996997681644252e-05, - "loss": 1.0909, + "learning_rate": 1.9974920223739195e-05, + "loss": 1.3779, "step": 1326 }, { - "epoch": 0.037603785882286266, + "epoch": 0.05192112058846545, "grad_norm": 0.0, - "learning_rate": 1.99969751513894e-05, - "loss": 1.1836, + "learning_rate": 1.9974830450309883e-05, + "loss": 1.2609, "step": 1327 }, { - "epoch": 0.037632123324548725, + "epoch": 0.05196024728069489, "grad_norm": 0.0, - "learning_rate": 1.9996952536926312e-05, - "loss": 1.3152, + "learning_rate": 1.997474051669729e-05, + "loss": 1.2148, "step": 1328 }, { - "epoch": 0.03766046076681119, + "epoch": 0.051999373972924326, "grad_norm": 0.0, - "learning_rate": 1.999692983825518e-05, - "loss": 1.2467, + "learning_rate": 1.9974650422902866e-05, + "loss": 1.2375, "step": 1329 }, { - "epoch": 0.03768879820907365, + "epoch": 0.05203850066515377, "grad_norm": 0.0, - "learning_rate": 1.99969070553762e-05, - "loss": 1.2765, + "learning_rate": 1.9974560168928054e-05, + "loss": 1.3319, "step": 1330 }, { - "epoch": 0.03771713565133611, + "epoch": 0.05207762735738321, "grad_norm": 0.0, - "learning_rate": 1.9996884188289557e-05, - "loss": 1.1455, + "learning_rate": 1.9974469754774307e-05, + "loss": 1.3246, "step": 1331 }, { - "epoch": 0.037745473093598575, + "epoch": 0.05211675404961265, "grad_norm": 0.0, - "learning_rate": 1.9996861236995443e-05, - "loss": 1.2548, + "learning_rate": 1.9974379180443072e-05, + "loss": 1.3878, "step": 1332 }, { - "epoch": 0.037773810535861034, + "epoch": 0.05215588074184208, "grad_norm": 0.0, - "learning_rate": 1.999683820149406e-05, - "loss": 1.1906, + "learning_rate": 1.9974288445935803e-05, + "loss": 1.1826, "step": 1333 }, { - "epoch": 0.03780214797812349, + "epoch": 0.05219500743407152, "grad_norm": 0.0, - "learning_rate": 1.999681508178559e-05, - "loss": 1.1898, + "learning_rate": 1.9974197551253963e-05, + "loss": 1.2886, "step": 1334 }, { - "epoch": 0.03783048542038596, + "epoch": 0.052234134126300964, "grad_norm": 0.0, - "learning_rate": 1.9996791877870242e-05, - "loss": 1.1996, + "learning_rate": 1.997410649639901e-05, + "loss": 1.2247, "step": 1335 }, { - "epoch": 0.03785882286264842, + "epoch": 0.0522732608185304, "grad_norm": 0.0, - "learning_rate": 1.9996768589748198e-05, - "loss": 1.2151, + "learning_rate": 1.9974015281372402e-05, + "loss": 1.2919, "step": 1336 }, { - "epoch": 0.03788716030491088, + "epoch": 0.05231238751075984, "grad_norm": 0.0, - "learning_rate": 1.9996745217419664e-05, - "loss": 1.2276, + "learning_rate": 1.9973923906175608e-05, + "loss": 1.377, "step": 1337 }, { - "epoch": 0.03791549774717334, + "epoch": 0.05235151420298928, "grad_norm": 0.0, - "learning_rate": 1.9996721760884833e-05, - "loss": 1.1868, + "learning_rate": 1.9973832370810095e-05, + "loss": 1.3671, "step": 1338 }, { - "epoch": 0.0379438351894358, + "epoch": 0.05239064089521872, "grad_norm": 0.0, - "learning_rate": 1.99966982201439e-05, - "loss": 1.1559, + "learning_rate": 1.997374067527733e-05, + "loss": 1.2843, "step": 1339 }, { - "epoch": 0.03797217263169826, + "epoch": 0.052429767587448155, "grad_norm": 0.0, - "learning_rate": 1.9996674595197065e-05, - "loss": 1.178, + "learning_rate": 1.997364881957879e-05, + "loss": 1.2309, "step": 1340 }, { - "epoch": 0.03800051007396073, + "epoch": 0.052468894279677596, "grad_norm": 0.0, - "learning_rate": 1.999665088604453e-05, - "loss": 1.227, + "learning_rate": 1.9973556803715944e-05, + "loss": 1.338, "step": 1341 }, { - "epoch": 0.03802884751622319, + "epoch": 0.052508020971907036, "grad_norm": 0.0, - "learning_rate": 1.999662709268649e-05, - "loss": 1.1806, + "learning_rate": 1.997346462769028e-05, + "loss": 1.4231, "step": 1342 }, { - "epoch": 0.038057184958485646, + "epoch": 0.05254714766413648, "grad_norm": 0.0, - "learning_rate": 1.9996603215123152e-05, - "loss": 1.1605, + "learning_rate": 1.9973372291503266e-05, + "loss": 1.2411, "step": 1343 }, { - "epoch": 0.03808552240074811, + "epoch": 0.05258627435636591, "grad_norm": 0.0, - "learning_rate": 1.999657925335471e-05, - "loss": 1.1041, + "learning_rate": 1.9973279795156394e-05, + "loss": 1.3165, "step": 1344 }, { - "epoch": 0.03811385984301057, + "epoch": 0.05262540104859535, "grad_norm": 0.0, - "learning_rate": 1.9996555207381368e-05, - "loss": 1.0201, + "learning_rate": 1.9973187138651143e-05, + "loss": 1.344, "step": 1345 }, { - "epoch": 0.03814219728527303, + "epoch": 0.05266452774082479, "grad_norm": 0.0, - "learning_rate": 1.999653107720333e-05, - "loss": 1.0124, + "learning_rate": 1.9973094321989e-05, + "loss": 1.3028, "step": 1346 }, { - "epoch": 0.03817053472753549, + "epoch": 0.05270365443305423, "grad_norm": 0.0, - "learning_rate": 1.9996506862820798e-05, - "loss": 1.1854, + "learning_rate": 1.9973001345171465e-05, + "loss": 1.3562, "step": 1347 }, { - "epoch": 0.038198872169797955, + "epoch": 0.05274278112528367, "grad_norm": 0.0, - "learning_rate": 1.9996482564233977e-05, - "loss": 1.1107, + "learning_rate": 1.9972908208200023e-05, + "loss": 1.265, "step": 1348 }, { - "epoch": 0.038227209612060414, + "epoch": 0.05278190781751311, "grad_norm": 0.0, - "learning_rate": 1.9996458181443072e-05, - "loss": 1.0607, + "learning_rate": 1.9972814911076175e-05, + "loss": 1.1354, "step": 1349 }, { - "epoch": 0.03825554705432287, + "epoch": 0.05282103450974255, "grad_norm": 0.0, - "learning_rate": 1.999643371444829e-05, - "loss": 1.1161, + "learning_rate": 1.9972721453801412e-05, + "loss": 1.254, "step": 1350 }, { - "epoch": 0.03828388449658534, + "epoch": 0.052860161201971984, "grad_norm": 0.0, - "learning_rate": 1.999640916324983e-05, - "loss": 1.2166, + "learning_rate": 1.9972627836377242e-05, + "loss": 1.3676, "step": 1351 }, { - "epoch": 0.0383122219388478, + "epoch": 0.052899287894201424, "grad_norm": 0.0, - "learning_rate": 1.9996384527847907e-05, - "loss": 1.1802, + "learning_rate": 1.9972534058805163e-05, + "loss": 1.422, "step": 1352 }, { - "epoch": 0.03834055938111026, + "epoch": 0.052938414586430865, "grad_norm": 0.0, - "learning_rate": 1.9996359808242723e-05, - "loss": 1.1202, + "learning_rate": 1.997244012108668e-05, + "loss": 1.2822, "step": 1353 }, { - "epoch": 0.038368896823372724, + "epoch": 0.0529775412786603, "grad_norm": 0.0, - "learning_rate": 1.9996335004434488e-05, - "loss": 1.1287, + "learning_rate": 1.9972346023223313e-05, + "loss": 1.2349, "step": 1354 }, { - "epoch": 0.03839723426563518, + "epoch": 0.05301666797088974, "grad_norm": 0.0, - "learning_rate": 1.9996310116423412e-05, - "loss": 1.1328, + "learning_rate": 1.997225176521656e-05, + "loss": 1.2072, "step": 1355 }, { - "epoch": 0.03842557170789764, + "epoch": 0.05305579466311918, "grad_norm": 0.0, - "learning_rate": 1.9996285144209706e-05, - "loss": 1.1721, + "learning_rate": 1.997215734706794e-05, + "loss": 1.2492, "step": 1356 }, { - "epoch": 0.03845390915016011, + "epoch": 0.05309492135534862, "grad_norm": 0.0, - "learning_rate": 1.9996260087793577e-05, - "loss": 1.173, + "learning_rate": 1.997206276877897e-05, + "loss": 1.2317, "step": 1357 }, { - "epoch": 0.03848224659242257, + "epoch": 0.053134048047578056, "grad_norm": 0.0, - "learning_rate": 1.9996234947175237e-05, - "loss": 1.1288, + "learning_rate": 1.9971968030351166e-05, + "loss": 1.2672, "step": 1358 }, { - "epoch": 0.038510584034685026, + "epoch": 0.0531731747398075, "grad_norm": 0.0, - "learning_rate": 1.9996209722354896e-05, - "loss": 1.171, + "learning_rate": 1.9971873131786052e-05, + "loss": 1.2523, "step": 1359 }, { - "epoch": 0.03853892147694749, + "epoch": 0.05321230143203694, "grad_norm": 0.0, - "learning_rate": 1.9996184413332772e-05, - "loss": 1.1878, + "learning_rate": 1.997177807308515e-05, + "loss": 1.3531, "step": 1360 }, { - "epoch": 0.03856725891920995, + "epoch": 0.05325142812426637, "grad_norm": 0.0, - "learning_rate": 1.999615902010907e-05, - "loss": 1.0476, + "learning_rate": 1.997168285424999e-05, + "loss": 1.21, "step": 1361 }, { - "epoch": 0.03859559636147241, + "epoch": 0.05329055481649581, "grad_norm": 0.0, - "learning_rate": 1.9996133542684014e-05, - "loss": 1.2512, + "learning_rate": 1.99715874752821e-05, + "loss": 1.2273, "step": 1362 }, { - "epoch": 0.038623933803734876, + "epoch": 0.05332968150872525, "grad_norm": 0.0, - "learning_rate": 1.9996107981057807e-05, - "loss": 1.1218, + "learning_rate": 1.9971491936183005e-05, + "loss": 1.2141, "step": 1363 }, { - "epoch": 0.038652271245997336, + "epoch": 0.053368808200954694, "grad_norm": 0.0, - "learning_rate": 1.9996082335230674e-05, - "loss": 1.2286, + "learning_rate": 1.9971396236954247e-05, + "loss": 1.3196, "step": 1364 }, { - "epoch": 0.038680608688259795, + "epoch": 0.05340793489318413, "grad_norm": 0.0, - "learning_rate": 1.9996056605202828e-05, - "loss": 1.1038, + "learning_rate": 1.997130037759736e-05, + "loss": 1.2297, "step": 1365 }, { - "epoch": 0.03870894613052226, + "epoch": 0.05344706158541357, "grad_norm": 0.0, - "learning_rate": 1.999603079097449e-05, - "loss": 1.1014, + "learning_rate": 1.9971204358113882e-05, + "loss": 1.2087, "step": 1366 }, { - "epoch": 0.03873728357278472, + "epoch": 0.05348618827764301, "grad_norm": 0.0, - "learning_rate": 1.9996004892545864e-05, - "loss": 1.1469, + "learning_rate": 1.997110817850536e-05, + "loss": 1.3336, "step": 1367 }, { - "epoch": 0.03876562101504718, + "epoch": 0.053525314969872444, "grad_norm": 0.0, - "learning_rate": 1.999597890991718e-05, - "loss": 1.3159, + "learning_rate": 1.997101183877333e-05, + "loss": 1.2298, "step": 1368 }, { - "epoch": 0.038793958457309645, + "epoch": 0.053564441662101885, "grad_norm": 0.0, - "learning_rate": 1.999595284308866e-05, - "loss": 1.2518, + "learning_rate": 1.997091533891935e-05, + "loss": 1.3546, "step": 1369 }, { - "epoch": 0.038822295899572104, + "epoch": 0.053603568354331325, "grad_norm": 0.0, - "learning_rate": 1.9995926692060508e-05, - "loss": 1.2407, + "learning_rate": 1.9970818678944962e-05, + "loss": 1.3578, "step": 1370 }, { - "epoch": 0.03885063334183456, + "epoch": 0.053642695046560766, "grad_norm": 0.0, - "learning_rate": 1.999590045683296e-05, - "loss": 1.1633, + "learning_rate": 1.997072185885172e-05, + "loss": 1.283, "step": 1371 }, { - "epoch": 0.03887897078409703, + "epoch": 0.0536818217387902, "grad_norm": 0.0, - "learning_rate": 1.9995874137406225e-05, - "loss": 1.0846, + "learning_rate": 1.9970624878641178e-05, + "loss": 1.2152, "step": 1372 }, { - "epoch": 0.03890730822635949, + "epoch": 0.05372094843101964, "grad_norm": 0.0, - "learning_rate": 1.9995847733780534e-05, - "loss": 1.0664, + "learning_rate": 1.9970527738314898e-05, + "loss": 1.2372, "step": 1373 }, { - "epoch": 0.03893564566862195, + "epoch": 0.05376007512324908, "grad_norm": 0.0, - "learning_rate": 1.99958212459561e-05, - "loss": 1.1512, + "learning_rate": 1.9970430437874434e-05, + "loss": 1.2915, "step": 1374 }, { - "epoch": 0.03896398311088441, + "epoch": 0.05379920181547852, "grad_norm": 0.0, - "learning_rate": 1.999579467393316e-05, - "loss": 1.2939, + "learning_rate": 1.9970332977321348e-05, + "loss": 1.2908, "step": 1375 }, { - "epoch": 0.03899232055314687, + "epoch": 0.05383832850770796, "grad_norm": 0.0, - "learning_rate": 1.9995768017711925e-05, - "loss": 1.1418, + "learning_rate": 1.9970235356657212e-05, + "loss": 1.2699, "step": 1376 }, { - "epoch": 0.03902065799540933, + "epoch": 0.0538774551999374, "grad_norm": 0.0, - "learning_rate": 1.9995741277292625e-05, - "loss": 1.1754, + "learning_rate": 1.9970137575883584e-05, + "loss": 1.2947, "step": 1377 }, { - "epoch": 0.0390489954376718, + "epoch": 0.05391658189216684, "grad_norm": 0.0, - "learning_rate": 1.9995714452675485e-05, - "loss": 1.1089, + "learning_rate": 1.9970039635002044e-05, + "loss": 1.3246, "step": 1378 }, { - "epoch": 0.03907733287993426, + "epoch": 0.05395570858439627, "grad_norm": 0.0, - "learning_rate": 1.999568754386073e-05, - "loss": 1.1553, + "learning_rate": 1.996994153401416e-05, + "loss": 1.2508, "step": 1379 }, { - "epoch": 0.039105670322196716, + "epoch": 0.05399483527662571, "grad_norm": 0.0, - "learning_rate": 1.9995660550848588e-05, - "loss": 1.0953, + "learning_rate": 1.996984327292151e-05, + "loss": 1.4706, "step": 1380 }, { - "epoch": 0.03913400776445918, + "epoch": 0.054033961968855154, "grad_norm": 0.0, - "learning_rate": 1.9995633473639285e-05, - "loss": 1.2136, + "learning_rate": 1.996974485172567e-05, + "loss": 1.2924, "step": 1381 }, { - "epoch": 0.03916234520672164, + "epoch": 0.054073088661084595, "grad_norm": 0.0, - "learning_rate": 1.9995606312233046e-05, - "loss": 1.213, + "learning_rate": 1.9969646270428216e-05, + "loss": 1.2487, "step": 1382 }, { - "epoch": 0.0391906826489841, + "epoch": 0.05411221535331403, "grad_norm": 0.0, - "learning_rate": 1.999557906663011e-05, - "loss": 1.2153, + "learning_rate": 1.996954752903074e-05, + "loss": 1.2987, "step": 1383 }, { - "epoch": 0.039219020091246566, + "epoch": 0.05415134204554347, "grad_norm": 0.0, - "learning_rate": 1.9995551736830693e-05, - "loss": 1.1929, + "learning_rate": 1.996944862753482e-05, + "loss": 1.2137, "step": 1384 }, { - "epoch": 0.039247357533509025, + "epoch": 0.05419046873777291, "grad_norm": 0.0, - "learning_rate": 1.9995524322835035e-05, - "loss": 1.0591, + "learning_rate": 1.9969349565942048e-05, + "loss": 1.1837, "step": 1385 }, { - "epoch": 0.039275694975771484, + "epoch": 0.054229595430002345, "grad_norm": 0.0, - "learning_rate": 1.9995496824643364e-05, - "loss": 1.1785, + "learning_rate": 1.9969250344254016e-05, + "loss": 1.2672, "step": 1386 }, { - "epoch": 0.03930403241803395, + "epoch": 0.054268722122231786, "grad_norm": 0.0, - "learning_rate": 1.9995469242255913e-05, - "loss": 1.1962, + "learning_rate": 1.9969150962472315e-05, + "loss": 1.1474, "step": 1387 }, { - "epoch": 0.03933236986029641, + "epoch": 0.054307848814461226, "grad_norm": 0.0, - "learning_rate": 1.999544157567291e-05, - "loss": 1.143, + "learning_rate": 1.996905142059854e-05, + "loss": 1.1169, "step": 1388 }, { - "epoch": 0.03936070730255887, + "epoch": 0.05434697550669067, "grad_norm": 0.0, - "learning_rate": 1.9995413824894593e-05, - "loss": 1.1651, + "learning_rate": 1.9968951718634293e-05, + "loss": 1.1744, "step": 1389 }, { - "epoch": 0.039389044744821335, + "epoch": 0.0543861021989201, "grad_norm": 0.0, - "learning_rate": 1.999538598992119e-05, - "loss": 1.1373, + "learning_rate": 1.9968851856581174e-05, + "loss": 1.4155, "step": 1390 }, { - "epoch": 0.039417382187083794, + "epoch": 0.05442522889114954, "grad_norm": 0.0, - "learning_rate": 1.999535807075294e-05, - "loss": 1.2197, + "learning_rate": 1.9968751834440783e-05, + "loss": 1.345, "step": 1391 }, { - "epoch": 0.03944571962934625, + "epoch": 0.05446435558337898, "grad_norm": 0.0, - "learning_rate": 1.999533006739008e-05, - "loss": 1.2374, + "learning_rate": 1.996865165221473e-05, + "loss": 1.3215, "step": 1392 }, { - "epoch": 0.03947405707160872, + "epoch": 0.05450348227560842, "grad_norm": 0.0, - "learning_rate": 1.999530197983284e-05, - "loss": 1.1947, + "learning_rate": 1.9968551309904622e-05, + "loss": 1.2598, "step": 1393 }, { - "epoch": 0.03950239451387118, + "epoch": 0.05454260896783786, "grad_norm": 0.0, - "learning_rate": 1.9995273808081467e-05, - "loss": 1.163, + "learning_rate": 1.9968450807512074e-05, + "loss": 1.3619, "step": 1394 }, { - "epoch": 0.03953073195613364, + "epoch": 0.0545817356600673, "grad_norm": 0.0, - "learning_rate": 1.9995245552136185e-05, - "loss": 1.2045, + "learning_rate": 1.99683501450387e-05, + "loss": 1.271, "step": 1395 }, { - "epoch": 0.0395590693983961, + "epoch": 0.05462086235229674, "grad_norm": 0.0, - "learning_rate": 1.9995217211997237e-05, - "loss": 1.0729, + "learning_rate": 1.9968249322486108e-05, + "loss": 1.1515, "step": 1396 }, { - "epoch": 0.03958740684065856, + "epoch": 0.054659989044526174, "grad_norm": 0.0, - "learning_rate": 1.9995188787664864e-05, - "loss": 1.2337, + "learning_rate": 1.9968148339855925e-05, + "loss": 1.2378, "step": 1397 }, { - "epoch": 0.03961574428292102, + "epoch": 0.054699115736755614, "grad_norm": 0.0, - "learning_rate": 1.9995160279139306e-05, - "loss": 1.0849, + "learning_rate": 1.9968047197149766e-05, + "loss": 1.2681, "step": 1398 }, { - "epoch": 0.03964408172518349, + "epoch": 0.054738242428985055, "grad_norm": 0.0, - "learning_rate": 1.9995131686420802e-05, - "loss": 1.1712, + "learning_rate": 1.9967945894369264e-05, + "loss": 1.2922, "step": 1399 }, { - "epoch": 0.039672419167445946, + "epoch": 0.05477736912121449, "grad_norm": 0.0, - "learning_rate": 1.9995103009509587e-05, - "loss": 1.07, + "learning_rate": 1.996784443151604e-05, + "loss": 1.0974, "step": 1400 }, { - "epoch": 0.039700756609708406, + "epoch": 0.05481649581344393, "grad_norm": 0.0, - "learning_rate": 1.9995074248405913e-05, - "loss": 1.3056, + "learning_rate": 1.996774280859173e-05, + "loss": 1.4211, "step": 1401 }, { - "epoch": 0.03972909405197087, + "epoch": 0.05485562250567337, "grad_norm": 0.0, - "learning_rate": 1.9995045403110017e-05, - "loss": 1.1272, + "learning_rate": 1.9967641025597953e-05, + "loss": 1.2762, "step": 1402 }, { - "epoch": 0.03975743149423333, + "epoch": 0.05489474919790281, "grad_norm": 0.0, - "learning_rate": 1.999501647362214e-05, - "loss": 1.1729, + "learning_rate": 1.9967539082536356e-05, + "loss": 1.2609, "step": 1403 }, { - "epoch": 0.03978576893649579, + "epoch": 0.054933875890132246, "grad_norm": 0.0, - "learning_rate": 1.9994987459942528e-05, - "loss": 1.3098, + "learning_rate": 1.996743697940857e-05, + "loss": 1.2465, "step": 1404 }, { - "epoch": 0.039814106378758256, + "epoch": 0.05497300258236169, "grad_norm": 0.0, - "learning_rate": 1.9994958362071422e-05, - "loss": 0.9509, + "learning_rate": 1.9967334716216234e-05, + "loss": 1.1572, "step": 1405 }, { - "epoch": 0.039842443821020715, + "epoch": 0.05501212927459113, "grad_norm": 0.0, - "learning_rate": 1.9994929180009074e-05, - "loss": 1.1913, + "learning_rate": 1.9967232292960997e-05, + "loss": 1.3344, "step": 1406 }, { - "epoch": 0.039870781263283174, + "epoch": 0.05505125596682057, "grad_norm": 0.0, - "learning_rate": 1.9994899913755726e-05, - "loss": 1.1403, + "learning_rate": 1.9967129709644495e-05, + "loss": 1.3733, "step": 1407 }, { - "epoch": 0.03989911870554564, + "epoch": 0.05509038265905, "grad_norm": 0.0, - "learning_rate": 1.9994870563311627e-05, - "loss": 1.1866, + "learning_rate": 1.996702696626838e-05, + "loss": 1.3813, "step": 1408 }, { - "epoch": 0.0399274561478081, + "epoch": 0.05512950935127944, "grad_norm": 0.0, - "learning_rate": 1.999484112867702e-05, - "loss": 1.29, + "learning_rate": 1.9966924062834306e-05, + "loss": 1.2879, "step": 1409 }, { - "epoch": 0.03995579359007056, + "epoch": 0.055168636043508884, "grad_norm": 0.0, - "learning_rate": 1.999481160985215e-05, - "loss": 1.057, + "learning_rate": 1.9966820999343913e-05, + "loss": 1.2905, "step": 1410 }, { - "epoch": 0.039984131032333024, + "epoch": 0.05520776273573832, "grad_norm": 0.0, - "learning_rate": 1.9994782006837275e-05, - "loss": 1.1576, + "learning_rate": 1.996671777579887e-05, + "loss": 1.2346, "step": 1411 }, { - "epoch": 0.04001246847459548, + "epoch": 0.05524688942796776, "grad_norm": 0.0, - "learning_rate": 1.999475231963264e-05, - "loss": 1.1844, + "learning_rate": 1.9966614392200827e-05, + "loss": 1.3134, "step": 1412 }, { - "epoch": 0.04004080591685794, + "epoch": 0.0552860161201972, "grad_norm": 0.0, - "learning_rate": 1.999472254823849e-05, - "loss": 1.239, + "learning_rate": 1.9966510848551444e-05, + "loss": 1.2278, "step": 1413 }, { - "epoch": 0.04006914335912041, + "epoch": 0.05532514281242664, "grad_norm": 0.0, - "learning_rate": 1.9994692692655082e-05, - "loss": 1.0931, + "learning_rate": 1.996640714485239e-05, + "loss": 1.3411, "step": 1414 }, { - "epoch": 0.04009748080138287, + "epoch": 0.055364269504656075, "grad_norm": 0.0, - "learning_rate": 1.999466275288267e-05, - "loss": 1.2036, + "learning_rate": 1.996630328110532e-05, + "loss": 1.3084, "step": 1415 }, { - "epoch": 0.04012581824364533, + "epoch": 0.055403396196885515, "grad_norm": 0.0, - "learning_rate": 1.9994632728921498e-05, - "loss": 1.214, + "learning_rate": 1.996619925731191e-05, + "loss": 1.3545, "step": 1416 }, { - "epoch": 0.04015415568590779, + "epoch": 0.055442522889114956, "grad_norm": 0.0, - "learning_rate": 1.9994602620771825e-05, - "loss": 1.1485, + "learning_rate": 1.9966095073473828e-05, + "loss": 1.2815, "step": 1417 }, { - "epoch": 0.04018249312817025, + "epoch": 0.05548164958134439, "grad_norm": 0.0, - "learning_rate": 1.9994572428433906e-05, - "loss": 1.2238, + "learning_rate": 1.9965990729592748e-05, + "loss": 1.1809, "step": 1418 }, { - "epoch": 0.04021083057043271, + "epoch": 0.05552077627357383, "grad_norm": 0.0, - "learning_rate": 1.9994542151907988e-05, - "loss": 1.1336, + "learning_rate": 1.9965886225670346e-05, + "loss": 1.3041, "step": 1419 }, { - "epoch": 0.04023916801269518, + "epoch": 0.05555990296580327, "grad_norm": 0.0, - "learning_rate": 1.999451179119433e-05, - "loss": 1.2018, + "learning_rate": 1.99657815617083e-05, + "loss": 1.3282, "step": 1420 }, { - "epoch": 0.040267505454957636, + "epoch": 0.05559902965803271, "grad_norm": 0.0, - "learning_rate": 1.9994481346293186e-05, - "loss": 1.2988, + "learning_rate": 1.9965676737708284e-05, + "loss": 1.2586, "step": 1421 }, { - "epoch": 0.040295842897220095, + "epoch": 0.05563815635026215, "grad_norm": 0.0, - "learning_rate": 1.9994450817204817e-05, - "loss": 1.2561, + "learning_rate": 1.996557175367199e-05, + "loss": 1.3147, "step": 1422 }, { - "epoch": 0.04032418033948256, + "epoch": 0.05567728304249159, "grad_norm": 0.0, - "learning_rate": 1.9994420203929478e-05, - "loss": 1.093, + "learning_rate": 1.9965466609601105e-05, + "loss": 1.3737, "step": 1423 }, { - "epoch": 0.04035251778174502, + "epoch": 0.05571640973472103, "grad_norm": 0.0, - "learning_rate": 1.9994389506467423e-05, - "loss": 1.1607, + "learning_rate": 1.996536130549731e-05, + "loss": 1.1664, "step": 1424 }, { - "epoch": 0.04038085522400748, + "epoch": 0.05575553642695046, "grad_norm": 0.0, - "learning_rate": 1.999435872481892e-05, - "loss": 1.0986, + "learning_rate": 1.9965255841362303e-05, + "loss": 1.2214, "step": 1425 }, { - "epoch": 0.040409192666269945, + "epoch": 0.0557946631191799, "grad_norm": 0.0, - "learning_rate": 1.9994327858984216e-05, - "loss": 1.2865, + "learning_rate": 1.996515021719777e-05, + "loss": 1.1888, "step": 1426 }, { - "epoch": 0.040437530108532405, + "epoch": 0.055833789811409344, "grad_norm": 0.0, - "learning_rate": 1.9994296908963583e-05, - "loss": 1.1758, + "learning_rate": 1.9965044433005418e-05, + "loss": 1.3079, "step": 1427 }, { - "epoch": 0.040465867550794864, + "epoch": 0.055872916503638785, "grad_norm": 0.0, - "learning_rate": 1.999426587475727e-05, - "loss": 1.1516, + "learning_rate": 1.996493848878693e-05, + "loss": 1.3465, "step": 1428 }, { - "epoch": 0.04049420499305733, + "epoch": 0.05591204319586822, "grad_norm": 0.0, - "learning_rate": 1.9994234756365547e-05, - "loss": 1.1385, + "learning_rate": 1.996483238454402e-05, + "loss": 1.1924, "step": 1429 }, { - "epoch": 0.04052254243531979, + "epoch": 0.05595116988809766, "grad_norm": 0.0, - "learning_rate": 1.9994203553788675e-05, - "loss": 1.0487, + "learning_rate": 1.9964726120278394e-05, + "loss": 1.2723, "step": 1430 }, { - "epoch": 0.04055087987758225, + "epoch": 0.0559902965803271, "grad_norm": 0.0, - "learning_rate": 1.9994172267026912e-05, - "loss": 1.1918, + "learning_rate": 1.996461969599175e-05, + "loss": 1.2961, "step": 1431 }, { - "epoch": 0.040579217319844714, + "epoch": 0.056029423272556535, "grad_norm": 0.0, - "learning_rate": 1.9994140896080524e-05, - "loss": 1.196, + "learning_rate": 1.99645131116858e-05, + "loss": 1.3085, "step": 1432 }, { - "epoch": 0.04060755476210717, + "epoch": 0.056068549964785976, "grad_norm": 0.0, - "learning_rate": 1.999410944094978e-05, - "loss": 1.0945, + "learning_rate": 1.9964406367362253e-05, + "loss": 1.1934, "step": 1433 }, { - "epoch": 0.04063589220436963, + "epoch": 0.056107676657015416, "grad_norm": 0.0, - "learning_rate": 1.999407790163494e-05, - "loss": 1.1753, + "learning_rate": 1.9964299463022827e-05, + "loss": 1.3039, "step": 1434 }, { - "epoch": 0.0406642296466321, + "epoch": 0.05614680334924486, "grad_norm": 0.0, - "learning_rate": 1.9994046278136266e-05, - "loss": 1.2095, + "learning_rate": 1.996419239866924e-05, + "loss": 1.199, "step": 1435 }, { - "epoch": 0.04069256708889456, + "epoch": 0.05618593004147429, "grad_norm": 0.0, - "learning_rate": 1.9994014570454034e-05, - "loss": 1.2234, + "learning_rate": 1.996408517430321e-05, + "loss": 1.2791, "step": 1436 }, { - "epoch": 0.040720904531157016, + "epoch": 0.05622505673370373, "grad_norm": 0.0, - "learning_rate": 1.9993982778588507e-05, - "loss": 1.1647, + "learning_rate": 1.996397778992645e-05, + "loss": 1.2206, "step": 1437 }, { - "epoch": 0.04074924197341948, + "epoch": 0.05626418342593317, "grad_norm": 0.0, - "learning_rate": 1.999395090253995e-05, - "loss": 1.1169, + "learning_rate": 1.99638702455407e-05, + "loss": 1.2244, "step": 1438 }, { - "epoch": 0.04077757941568194, + "epoch": 0.056303310118162614, "grad_norm": 0.0, - "learning_rate": 1.999391894230863e-05, - "loss": 1.1719, + "learning_rate": 1.9963762541147676e-05, + "loss": 1.271, "step": 1439 }, { - "epoch": 0.0408059168579444, + "epoch": 0.05634243681039205, "grad_norm": 0.0, - "learning_rate": 1.9993886897894823e-05, - "loss": 1.2498, + "learning_rate": 1.996365467674911e-05, + "loss": 1.2829, "step": 1440 }, { - "epoch": 0.04083425430020687, + "epoch": 0.05638156350262149, "grad_norm": 0.0, - "learning_rate": 1.9993854769298795e-05, - "loss": 1.2118, + "learning_rate": 1.9963546652346736e-05, + "loss": 1.1707, "step": 1441 }, { - "epoch": 0.040862591742469326, + "epoch": 0.05642069019485093, "grad_norm": 0.0, - "learning_rate": 1.9993822556520818e-05, - "loss": 1.1168, + "learning_rate": 1.9963438467942288e-05, + "loss": 1.3314, "step": 1442 }, { - "epoch": 0.040890929184731785, + "epoch": 0.056459816887080364, "grad_norm": 0.0, - "learning_rate": 1.9993790259561162e-05, - "loss": 1.2049, + "learning_rate": 1.9963330123537507e-05, + "loss": 1.1826, "step": 1443 }, { - "epoch": 0.04091926662699425, + "epoch": 0.056498943579309804, "grad_norm": 0.0, - "learning_rate": 1.99937578784201e-05, - "loss": 1.2181, + "learning_rate": 1.9963221619134125e-05, + "loss": 1.2538, "step": 1444 }, { - "epoch": 0.04094760406925671, + "epoch": 0.056538070271539245, "grad_norm": 0.0, - "learning_rate": 1.99937254130979e-05, - "loss": 1.1192, + "learning_rate": 1.9963112954733886e-05, + "loss": 1.2751, "step": 1445 }, { - "epoch": 0.04097594151151917, + "epoch": 0.056577196963768686, "grad_norm": 0.0, - "learning_rate": 1.9993692863594846e-05, - "loss": 1.1827, + "learning_rate": 1.9963004130338543e-05, + "loss": 1.3487, "step": 1446 }, { - "epoch": 0.04100427895378163, + "epoch": 0.05661632365599812, "grad_norm": 0.0, - "learning_rate": 1.9993660229911205e-05, - "loss": 1.1492, + "learning_rate": 1.9962895145949833e-05, + "loss": 1.1616, "step": 1447 }, { - "epoch": 0.041032616396044094, + "epoch": 0.05665545034822756, "grad_norm": 0.0, - "learning_rate": 1.9993627512047248e-05, - "loss": 1.1906, + "learning_rate": 1.9962786001569515e-05, + "loss": 1.3129, "step": 1448 }, { - "epoch": 0.04106095383830655, + "epoch": 0.056694577040457, "grad_norm": 0.0, - "learning_rate": 1.9993594710003262e-05, - "loss": 1.1278, + "learning_rate": 1.9962676697199333e-05, + "loss": 1.1373, "step": 1449 }, { - "epoch": 0.04108929128056901, + "epoch": 0.056733703732686436, "grad_norm": 0.0, - "learning_rate": 1.9993561823779512e-05, - "loss": 0.9848, + "learning_rate": 1.9962567232841054e-05, + "loss": 1.3264, "step": 1450 }, { - "epoch": 0.04111762872283148, + "epoch": 0.05677283042491588, "grad_norm": 0.0, - "learning_rate": 1.999352885337628e-05, - "loss": 1.1902, + "learning_rate": 1.9962457608496424e-05, + "loss": 1.3156, "step": 1451 }, { - "epoch": 0.04114596616509394, + "epoch": 0.05681195711714532, "grad_norm": 0.0, - "learning_rate": 1.9993495798793846e-05, - "loss": 0.9908, + "learning_rate": 1.996234782416721e-05, + "loss": 1.405, "step": 1452 }, { - "epoch": 0.0411743036073564, + "epoch": 0.05685108380937476, "grad_norm": 0.0, - "learning_rate": 1.9993462660032484e-05, - "loss": 1.1605, + "learning_rate": 1.9962237879855174e-05, + "loss": 1.2362, "step": 1453 }, { - "epoch": 0.04120264104961886, + "epoch": 0.05689021050160419, "grad_norm": 0.0, - "learning_rate": 1.9993429437092474e-05, - "loss": 1.2059, + "learning_rate": 1.996212777556208e-05, + "loss": 1.353, "step": 1454 }, { - "epoch": 0.04123097849188132, + "epoch": 0.05692933719383363, "grad_norm": 0.0, - "learning_rate": 1.99933961299741e-05, - "loss": 1.1671, + "learning_rate": 1.9962017511289696e-05, + "loss": 1.3002, "step": 1455 }, { - "epoch": 0.04125931593414378, + "epoch": 0.056968463886063074, "grad_norm": 0.0, - "learning_rate": 1.9993362738677637e-05, - "loss": 1.1593, + "learning_rate": 1.9961907087039796e-05, + "loss": 1.267, "step": 1456 }, { - "epoch": 0.04128765337640625, + "epoch": 0.05700759057829251, "grad_norm": 0.0, - "learning_rate": 1.999332926320337e-05, - "loss": 1.1607, + "learning_rate": 1.996179650281415e-05, + "loss": 1.3147, "step": 1457 }, { - "epoch": 0.041315990818668706, + "epoch": 0.05704671727052195, "grad_norm": 0.0, - "learning_rate": 1.999329570355158e-05, - "loss": 1.1681, + "learning_rate": 1.9961685758614537e-05, + "loss": 1.2137, "step": 1458 }, { - "epoch": 0.041344328260931165, + "epoch": 0.05708584396275139, "grad_norm": 0.0, - "learning_rate": 1.9993262059722548e-05, - "loss": 1.1999, + "learning_rate": 1.996157485444273e-05, + "loss": 1.2678, "step": 1459 }, { - "epoch": 0.04137266570319363, + "epoch": 0.05712497065498083, "grad_norm": 0.0, - "learning_rate": 1.9993228331716558e-05, - "loss": 1.0933, + "learning_rate": 1.9961463790300518e-05, + "loss": 1.2242, "step": 1460 }, { - "epoch": 0.04140100314545609, + "epoch": 0.057164097347210265, "grad_norm": 0.0, - "learning_rate": 1.9993194519533896e-05, - "loss": 1.1418, + "learning_rate": 1.9961352566189677e-05, + "loss": 1.2961, "step": 1461 }, { - "epoch": 0.04142934058771855, + "epoch": 0.057203224039439705, "grad_norm": 0.0, - "learning_rate": 1.9993160623174846e-05, - "loss": 1.1238, + "learning_rate": 1.9961241182111996e-05, + "loss": 1.4189, "step": 1462 }, { - "epoch": 0.041457678029981015, + "epoch": 0.057242350731669146, "grad_norm": 0.0, - "learning_rate": 1.9993126642639694e-05, - "loss": 1.1756, + "learning_rate": 1.9961129638069266e-05, + "loss": 1.342, "step": 1463 }, { - "epoch": 0.041486015472243475, + "epoch": 0.05728147742389858, "grad_norm": 0.0, - "learning_rate": 1.9993092577928725e-05, - "loss": 1.1047, + "learning_rate": 1.9961017934063273e-05, + "loss": 1.2049, "step": 1464 }, { - "epoch": 0.041514352914505934, + "epoch": 0.05732060411612802, "grad_norm": 0.0, - "learning_rate": 1.9993058429042224e-05, - "loss": 1.197, + "learning_rate": 1.9960906070095815e-05, + "loss": 1.2917, "step": 1465 }, { - "epoch": 0.0415426903567684, + "epoch": 0.05735973080835746, "grad_norm": 0.0, - "learning_rate": 1.9993024195980485e-05, - "loss": 1.2997, + "learning_rate": 1.9960794046168687e-05, + "loss": 1.3035, "step": 1466 }, { - "epoch": 0.04157102779903086, + "epoch": 0.0573988575005869, "grad_norm": 0.0, - "learning_rate": 1.999298987874379e-05, - "loss": 1.0721, + "learning_rate": 1.996068186228369e-05, + "loss": 1.2864, "step": 1467 }, { - "epoch": 0.04159936524129332, + "epoch": 0.05743798419281634, "grad_norm": 0.0, - "learning_rate": 1.999295547733243e-05, - "loss": 1.1686, + "learning_rate": 1.996056951844262e-05, + "loss": 1.3156, "step": 1468 }, { - "epoch": 0.041627702683555784, + "epoch": 0.05747711088504578, "grad_norm": 0.0, - "learning_rate": 1.9992920991746694e-05, - "loss": 1.1622, + "learning_rate": 1.996045701464729e-05, + "loss": 1.283, "step": 1469 }, { - "epoch": 0.04165604012581824, + "epoch": 0.05751623757727522, "grad_norm": 0.0, - "learning_rate": 1.9992886421986876e-05, - "loss": 1.2737, + "learning_rate": 1.9960344350899495e-05, + "loss": 1.3331, "step": 1470 }, { - "epoch": 0.0416843775680807, + "epoch": 0.05755536426950466, "grad_norm": 0.0, - "learning_rate": 1.9992851768053267e-05, - "loss": 1.1769, + "learning_rate": 1.9960231527201056e-05, + "loss": 1.3807, "step": 1471 }, { - "epoch": 0.04171271501034317, + "epoch": 0.05759449096173409, "grad_norm": 0.0, - "learning_rate": 1.999281702994615e-05, - "loss": 1.1957, + "learning_rate": 1.9960118543553776e-05, + "loss": 1.2469, "step": 1472 }, { - "epoch": 0.04174105245260563, + "epoch": 0.057633617653963534, "grad_norm": 0.0, - "learning_rate": 1.9992782207665835e-05, - "loss": 1.2108, + "learning_rate": 1.9960005399959477e-05, + "loss": 1.3196, "step": 1473 }, { - "epoch": 0.041769389894868086, + "epoch": 0.057672744346192975, "grad_norm": 0.0, - "learning_rate": 1.9992747301212595e-05, - "loss": 1.1319, + "learning_rate": 1.995989209641997e-05, + "loss": 1.3495, "step": 1474 }, { - "epoch": 0.04179772733713055, + "epoch": 0.05771187103842241, "grad_norm": 0.0, - "learning_rate": 1.9992712310586736e-05, - "loss": 1.245, + "learning_rate": 1.9959778632937074e-05, + "loss": 1.0748, "step": 1475 }, { - "epoch": 0.04182606477939301, + "epoch": 0.05775099773065185, "grad_norm": 0.0, - "learning_rate": 1.9992677235788554e-05, - "loss": 1.1249, + "learning_rate": 1.9959665009512616e-05, + "loss": 1.2234, "step": 1476 }, { - "epoch": 0.04185440222165547, + "epoch": 0.05779012442288129, "grad_norm": 0.0, - "learning_rate": 1.9992642076818337e-05, - "loss": 1.2102, + "learning_rate": 1.9959551226148416e-05, + "loss": 1.4152, "step": 1477 }, { - "epoch": 0.04188273966391794, + "epoch": 0.05782925111511073, "grad_norm": 0.0, - "learning_rate": 1.999260683367639e-05, - "loss": 1.0872, + "learning_rate": 1.9959437282846306e-05, + "loss": 1.3237, "step": 1478 }, { - "epoch": 0.041911077106180396, + "epoch": 0.057868377807340166, "grad_norm": 0.0, - "learning_rate": 1.9992571506362997e-05, - "loss": 1.0854, + "learning_rate": 1.995932317960811e-05, + "loss": 1.3206, "step": 1479 }, { - "epoch": 0.041939414548442855, + "epoch": 0.057907504499569606, "grad_norm": 0.0, - "learning_rate": 1.999253609487847e-05, - "loss": 1.166, + "learning_rate": 1.9959208916435665e-05, + "loss": 1.3827, "step": 1480 }, { - "epoch": 0.04196775199070532, + "epoch": 0.05794663119179905, "grad_norm": 0.0, - "learning_rate": 1.99925005992231e-05, - "loss": 1.0839, + "learning_rate": 1.9959094493330806e-05, + "loss": 1.3763, "step": 1481 }, { - "epoch": 0.04199608943296778, + "epoch": 0.05798575788402848, "grad_norm": 0.0, - "learning_rate": 1.9992465019397186e-05, - "loss": 1.1321, + "learning_rate": 1.9958979910295367e-05, + "loss": 1.3118, "step": 1482 }, { - "epoch": 0.04202442687523024, + "epoch": 0.05802488457625792, "grad_norm": 0.0, - "learning_rate": 1.9992429355401026e-05, - "loss": 1.1354, + "learning_rate": 1.9958865167331193e-05, + "loss": 1.2704, "step": 1483 }, { - "epoch": 0.042052764317492705, + "epoch": 0.05806401126848736, "grad_norm": 0.0, - "learning_rate": 1.9992393607234928e-05, - "loss": 1.2089, + "learning_rate": 1.9958750264440118e-05, + "loss": 1.2126, "step": 1484 }, { - "epoch": 0.042081101759755164, + "epoch": 0.058103137960716804, "grad_norm": 0.0, - "learning_rate": 1.9992357774899185e-05, - "loss": 1.1997, + "learning_rate": 1.9958635201624e-05, + "loss": 1.3442, "step": 1485 }, { - "epoch": 0.04210943920201762, + "epoch": 0.05814226465294624, "grad_norm": 0.0, - "learning_rate": 1.9992321858394104e-05, - "loss": 1.1533, + "learning_rate": 1.995851997888467e-05, + "loss": 1.2122, "step": 1486 }, { - "epoch": 0.04213777664428009, + "epoch": 0.05818139134517568, "grad_norm": 0.0, - "learning_rate": 1.9992285857719983e-05, - "loss": 1.0807, + "learning_rate": 1.9958404596223997e-05, + "loss": 1.1628, "step": 1487 }, { - "epoch": 0.04216611408654255, + "epoch": 0.05822051803740512, "grad_norm": 0.0, - "learning_rate": 1.999224977287713e-05, - "loss": 1.1669, + "learning_rate": 1.995828905364382e-05, + "loss": 1.2178, "step": 1488 }, { - "epoch": 0.04219445152880501, + "epoch": 0.058259644729634553, "grad_norm": 0.0, - "learning_rate": 1.999221360386584e-05, - "loss": 1.2362, + "learning_rate": 1.9958173351146e-05, + "loss": 1.3267, "step": 1489 }, { - "epoch": 0.042222788971067474, + "epoch": 0.058298771421863994, "grad_norm": 0.0, - "learning_rate": 1.999217735068643e-05, - "loss": 1.1141, + "learning_rate": 1.9958057488732393e-05, + "loss": 1.1824, "step": 1490 }, { - "epoch": 0.04225112641332993, + "epoch": 0.058337898114093435, "grad_norm": 0.0, - "learning_rate": 1.99921410133392e-05, - "loss": 1.0372, + "learning_rate": 1.9957941466404865e-05, + "loss": 1.2064, "step": 1491 }, { - "epoch": 0.04227946385559239, + "epoch": 0.058377024806322876, "grad_norm": 0.0, - "learning_rate": 1.9992104591824457e-05, - "loss": 1.1208, + "learning_rate": 1.9957825284165272e-05, + "loss": 1.2435, "step": 1492 }, { - "epoch": 0.04230780129785486, + "epoch": 0.05841615149855231, "grad_norm": 0.0, - "learning_rate": 1.9992068086142506e-05, - "loss": 1.2494, + "learning_rate": 1.9957708942015484e-05, + "loss": 1.3102, "step": 1493 }, { - "epoch": 0.04233613874011732, + "epoch": 0.05845527819078175, "grad_norm": 0.0, - "learning_rate": 1.9992031496293652e-05, - "loss": 1.0466, + "learning_rate": 1.9957592439957368e-05, + "loss": 1.3268, "step": 1494 }, { - "epoch": 0.042364476182379776, + "epoch": 0.05849440488301119, "grad_norm": 0.0, - "learning_rate": 1.9991994822278207e-05, - "loss": 1.1244, + "learning_rate": 1.9957475777992794e-05, + "loss": 1.2578, "step": 1495 }, { - "epoch": 0.04239281362464224, + "epoch": 0.058533531575240626, "grad_norm": 0.0, - "learning_rate": 1.999195806409648e-05, - "loss": 1.1599, + "learning_rate": 1.9957358956123637e-05, + "loss": 1.2845, "step": 1496 }, { - "epoch": 0.0424211510669047, + "epoch": 0.05857265826747007, "grad_norm": 0.0, - "learning_rate": 1.999192122174878e-05, - "loss": 1.0945, + "learning_rate": 1.9957241974351772e-05, + "loss": 1.1483, "step": 1497 }, { - "epoch": 0.04244948850916716, + "epoch": 0.05861178495969951, "grad_norm": 0.0, - "learning_rate": 1.9991884295235414e-05, - "loss": 1.1151, + "learning_rate": 1.9957124832679078e-05, + "loss": 1.29, "step": 1498 }, { - "epoch": 0.042477825951429626, + "epoch": 0.05865091165192895, "grad_norm": 0.0, - "learning_rate": 1.9991847284556703e-05, - "loss": 1.2072, + "learning_rate": 1.9957007531107437e-05, + "loss": 1.2665, "step": 1499 }, { - "epoch": 0.042506163393692085, + "epoch": 0.05869003834415838, "grad_norm": 0.0, - "learning_rate": 1.9991810189712945e-05, - "loss": 1.0923, + "learning_rate": 1.995689006963873e-05, + "loss": 1.3008, "step": 1500 }, { - "epoch": 0.042534500835954545, + "epoch": 0.05872916503638782, "grad_norm": 0.0, - "learning_rate": 1.9991773010704464e-05, - "loss": 1.123, + "learning_rate": 1.995677244827485e-05, + "loss": 1.2659, "step": 1501 }, { - "epoch": 0.04256283827821701, + "epoch": 0.058768291728617264, "grad_norm": 0.0, - "learning_rate": 1.9991735747531566e-05, - "loss": 1.1725, + "learning_rate": 1.9956654667017676e-05, + "loss": 1.3276, "step": 1502 }, { - "epoch": 0.04259117572047947, + "epoch": 0.058807418420846705, "grad_norm": 0.0, - "learning_rate": 1.999169840019457e-05, - "loss": 1.2181, + "learning_rate": 1.9956536725869105e-05, + "loss": 1.3114, "step": 1503 }, { - "epoch": 0.04261951316274193, + "epoch": 0.05884654511307614, "grad_norm": 0.0, - "learning_rate": 1.9991660968693788e-05, - "loss": 1.0854, + "learning_rate": 1.995641862483103e-05, + "loss": 1.32, "step": 1504 }, { - "epoch": 0.042647850605004395, + "epoch": 0.05888567180530558, "grad_norm": 0.0, - "learning_rate": 1.999162345302953e-05, - "loss": 1.21, + "learning_rate": 1.9956300363905348e-05, + "loss": 1.1043, "step": 1505 }, { - "epoch": 0.042676188047266854, + "epoch": 0.05892479849753502, "grad_norm": 0.0, - "learning_rate": 1.999158585320212e-05, - "loss": 1.1608, + "learning_rate": 1.9956181943093963e-05, + "loss": 1.2742, "step": 1506 }, { - "epoch": 0.04270452548952931, + "epoch": 0.058963925189764455, "grad_norm": 0.0, - "learning_rate": 1.9991548169211875e-05, - "loss": 1.2053, + "learning_rate": 1.9956063362398766e-05, + "loss": 1.1654, "step": 1507 }, { - "epoch": 0.04273286293179178, + "epoch": 0.059003051881993895, "grad_norm": 0.0, - "learning_rate": 1.999151040105911e-05, - "loss": 1.1307, + "learning_rate": 1.995594462182167e-05, + "loss": 1.3583, "step": 1508 }, { - "epoch": 0.04276120037405424, + "epoch": 0.059042178574223336, "grad_norm": 0.0, - "learning_rate": 1.999147254874414e-05, - "loss": 1.1392, + "learning_rate": 1.9955825721364576e-05, + "loss": 1.3568, "step": 1509 }, { - "epoch": 0.0427895378163167, + "epoch": 0.05908130526645278, "grad_norm": 0.0, - "learning_rate": 1.9991434612267286e-05, - "loss": 1.0836, + "learning_rate": 1.99557066610294e-05, + "loss": 1.2193, "step": 1510 }, { - "epoch": 0.04281787525857916, + "epoch": 0.05912043195868221, "grad_norm": 0.0, - "learning_rate": 1.999139659162887e-05, - "loss": 1.1379, + "learning_rate": 1.9955587440818047e-05, + "loss": 1.2645, "step": 1511 }, { - "epoch": 0.04284621270084162, + "epoch": 0.05915955865091165, "grad_norm": 0.0, - "learning_rate": 1.999135848682921e-05, - "loss": 1.1965, + "learning_rate": 1.995546806073244e-05, + "loss": 1.2852, "step": 1512 }, { - "epoch": 0.04287455014310408, + "epoch": 0.05919868534314109, "grad_norm": 0.0, - "learning_rate": 1.9991320297868624e-05, - "loss": 1.2512, + "learning_rate": 1.9955348520774484e-05, + "loss": 1.2707, "step": 1513 }, { - "epoch": 0.04290288758536655, + "epoch": 0.05923781203537053, "grad_norm": 0.0, - "learning_rate": 1.999128202474744e-05, - "loss": 1.1153, + "learning_rate": 1.995522882094611e-05, + "loss": 1.2805, "step": 1514 }, { - "epoch": 0.04293122502762901, + "epoch": 0.05927693872759997, "grad_norm": 0.0, - "learning_rate": 1.9991243667465978e-05, - "loss": 1.1956, + "learning_rate": 1.9955108961249235e-05, + "loss": 1.3357, "step": 1515 }, { - "epoch": 0.042959562469891466, + "epoch": 0.05931606541982941, "grad_norm": 0.0, - "learning_rate": 1.9991205226024558e-05, - "loss": 1.2061, + "learning_rate": 1.9954988941685784e-05, + "loss": 1.2684, "step": 1516 }, { - "epoch": 0.04298789991215393, + "epoch": 0.05935519211205885, "grad_norm": 0.0, - "learning_rate": 1.9991166700423504e-05, - "loss": 1.1572, + "learning_rate": 1.9954868762257685e-05, + "loss": 1.2357, "step": 1517 }, { - "epoch": 0.04301623735441639, + "epoch": 0.05939431880428828, "grad_norm": 0.0, - "learning_rate": 1.9991128090663146e-05, - "loss": 1.2078, + "learning_rate": 1.9954748422966866e-05, + "loss": 1.1926, "step": 1518 }, { - "epoch": 0.04304457479667885, + "epoch": 0.059433445496517724, "grad_norm": 0.0, - "learning_rate": 1.9991089396743808e-05, - "loss": 1.2017, + "learning_rate": 1.9954627923815265e-05, + "loss": 1.3364, "step": 1519 }, { - "epoch": 0.043072912238941316, + "epoch": 0.059472572188747165, "grad_norm": 0.0, - "learning_rate": 1.9991050618665813e-05, - "loss": 1.2557, + "learning_rate": 1.995450726480481e-05, + "loss": 1.2817, "step": 1520 }, { - "epoch": 0.043101249681203775, + "epoch": 0.0595116988809766, "grad_norm": 0.0, - "learning_rate": 1.9991011756429488e-05, - "loss": 1.1407, + "learning_rate": 1.9954386445937444e-05, + "loss": 1.3055, "step": 1521 }, { - "epoch": 0.043129587123466234, + "epoch": 0.05955082557320604, "grad_norm": 0.0, - "learning_rate": 1.999097281003516e-05, - "loss": 1.121, + "learning_rate": 1.9954265467215104e-05, + "loss": 1.2646, "step": 1522 }, { - "epoch": 0.0431579245657287, + "epoch": 0.05958995226543548, "grad_norm": 0.0, - "learning_rate": 1.9990933779483156e-05, - "loss": 1.0251, + "learning_rate": 1.9954144328639737e-05, + "loss": 1.3732, "step": 1523 }, { - "epoch": 0.04318626200799116, + "epoch": 0.05962907895766492, "grad_norm": 0.0, - "learning_rate": 1.999089466477381e-05, - "loss": 1.2241, + "learning_rate": 1.995402303021328e-05, + "loss": 1.2338, "step": 1524 }, { - "epoch": 0.04321459945025362, + "epoch": 0.059668205649894356, "grad_norm": 0.0, - "learning_rate": 1.999085546590745e-05, - "loss": 1.1525, + "learning_rate": 1.9953901571937688e-05, + "loss": 1.1318, "step": 1525 }, { - "epoch": 0.043242936892516085, + "epoch": 0.059707332342123796, "grad_norm": 0.0, - "learning_rate": 1.99908161828844e-05, - "loss": 1.1146, + "learning_rate": 1.9953779953814912e-05, + "loss": 1.2487, "step": 1526 }, { - "epoch": 0.043271274334778544, + "epoch": 0.05974645903435324, "grad_norm": 0.0, - "learning_rate": 1.9990776815704996e-05, - "loss": 1.1461, + "learning_rate": 1.9953658175846903e-05, + "loss": 1.2556, "step": 1527 }, { - "epoch": 0.043299611777041, + "epoch": 0.05978558572658268, "grad_norm": 0.0, - "learning_rate": 1.9990737364369572e-05, - "loss": 1.0974, + "learning_rate": 1.9953536238035614e-05, + "loss": 1.4261, "step": 1528 }, { - "epoch": 0.04332794921930347, + "epoch": 0.05982471241881211, "grad_norm": 0.0, - "learning_rate": 1.9990697828878453e-05, - "loss": 1.1629, + "learning_rate": 1.9953414140383005e-05, + "loss": 1.1581, "step": 1529 }, { - "epoch": 0.04335628666156593, + "epoch": 0.05986383911104155, "grad_norm": 0.0, - "learning_rate": 1.999065820923198e-05, - "loss": 1.1964, + "learning_rate": 1.995329188289104e-05, + "loss": 1.2424, "step": 1530 }, { - "epoch": 0.04338462410382839, + "epoch": 0.059902965803270994, "grad_norm": 0.0, - "learning_rate": 1.9990618505430483e-05, - "loss": 1.1736, + "learning_rate": 1.9953169465561677e-05, + "loss": 1.1438, "step": 1531 }, { - "epoch": 0.04341296154609085, + "epoch": 0.05994209249550043, "grad_norm": 0.0, - "learning_rate": 1.9990578717474292e-05, - "loss": 1.0162, + "learning_rate": 1.9953046888396886e-05, + "loss": 1.3212, "step": 1532 }, { - "epoch": 0.04344129898835331, + "epoch": 0.05998121918772987, "grad_norm": 0.0, - "learning_rate": 1.9990538845363752e-05, - "loss": 1.2, + "learning_rate": 1.9952924151398634e-05, + "loss": 1.2181, "step": 1533 }, { - "epoch": 0.04346963643061577, + "epoch": 0.06002034587995931, "grad_norm": 0.0, - "learning_rate": 1.999049888909919e-05, - "loss": 1.1454, + "learning_rate": 1.995280125456889e-05, + "loss": 1.3993, "step": 1534 }, { - "epoch": 0.04349797387287824, + "epoch": 0.06005947257218875, "grad_norm": 0.0, - "learning_rate": 1.9990458848680948e-05, - "loss": 1.3031, + "learning_rate": 1.995267819790963e-05, + "loss": 1.2994, "step": 1535 }, { - "epoch": 0.043526311315140696, + "epoch": 0.060098599264418184, "grad_norm": 0.0, - "learning_rate": 1.9990418724109358e-05, - "loss": 1.1825, + "learning_rate": 1.9952554981422832e-05, + "loss": 1.3257, "step": 1536 }, { - "epoch": 0.043554648757403155, + "epoch": 0.060137725956647625, "grad_norm": 0.0, - "learning_rate": 1.9990378515384763e-05, - "loss": 1.2813, + "learning_rate": 1.995243160511047e-05, + "loss": 1.1563, "step": 1537 }, { - "epoch": 0.04358298619966562, + "epoch": 0.060176852648877066, "grad_norm": 0.0, - "learning_rate": 1.9990338222507503e-05, - "loss": 1.0866, + "learning_rate": 1.9952308068974527e-05, + "loss": 1.2894, "step": 1538 }, { - "epoch": 0.04361132364192808, + "epoch": 0.0602159793411065, "grad_norm": 0.0, - "learning_rate": 1.999029784547791e-05, - "loss": 1.1573, + "learning_rate": 1.995218437301699e-05, + "loss": 1.2744, "step": 1539 }, { - "epoch": 0.04363966108419054, + "epoch": 0.06025510603333594, "grad_norm": 0.0, - "learning_rate": 1.999025738429633e-05, - "loss": 1.0753, + "learning_rate": 1.9952060517239838e-05, + "loss": 1.2245, "step": 1540 }, { - "epoch": 0.043667998526453006, + "epoch": 0.06029423272556538, "grad_norm": 0.0, - "learning_rate": 1.9990216838963103e-05, - "loss": 1.1795, + "learning_rate": 1.995193650164507e-05, + "loss": 1.2327, "step": 1541 }, { - "epoch": 0.043696335968715465, + "epoch": 0.06033335941779482, "grad_norm": 0.0, - "learning_rate": 1.9990176209478568e-05, - "loss": 1.0508, + "learning_rate": 1.995181232623467e-05, + "loss": 1.3288, "step": 1542 }, { - "epoch": 0.043724673410977924, + "epoch": 0.06037248611002426, "grad_norm": 0.0, - "learning_rate": 1.9990135495843068e-05, - "loss": 1.2252, + "learning_rate": 1.9951687991010634e-05, + "loss": 1.3696, "step": 1543 }, { - "epoch": 0.04375301085324039, + "epoch": 0.0604116128022537, "grad_norm": 0.0, - "learning_rate": 1.999009469805695e-05, - "loss": 1.2156, + "learning_rate": 1.995156349597496e-05, + "loss": 1.3776, "step": 1544 }, { - "epoch": 0.04378134829550285, + "epoch": 0.06045073949448314, "grad_norm": 0.0, - "learning_rate": 1.9990053816120553e-05, - "loss": 1.127, + "learning_rate": 1.9951438841129647e-05, + "loss": 1.302, "step": 1545 }, { - "epoch": 0.04380968573776531, + "epoch": 0.06048986618671257, "grad_norm": 0.0, - "learning_rate": 1.9990012850034225e-05, - "loss": 1.1925, + "learning_rate": 1.9951314026476693e-05, + "loss": 1.2681, "step": 1546 }, { - "epoch": 0.04383802318002777, + "epoch": 0.06052899287894201, "grad_norm": 0.0, - "learning_rate": 1.9989971799798308e-05, - "loss": 1.0796, + "learning_rate": 1.995118905201811e-05, + "loss": 1.3726, "step": 1547 }, { - "epoch": 0.04386636062229023, + "epoch": 0.060568119571171454, "grad_norm": 0.0, - "learning_rate": 1.9989930665413148e-05, - "loss": 1.1499, + "learning_rate": 1.9951063917755897e-05, + "loss": 1.1849, "step": 1548 }, { - "epoch": 0.04389469806455269, + "epoch": 0.060607246263400895, "grad_norm": 0.0, - "learning_rate": 1.9989889446879092e-05, - "loss": 1.0745, + "learning_rate": 1.9950938623692066e-05, + "loss": 1.1852, "step": 1549 }, { - "epoch": 0.04392303550681515, + "epoch": 0.06064637295563033, "grad_norm": 0.0, - "learning_rate": 1.9989848144196488e-05, - "loss": 1.0678, + "learning_rate": 1.995081316982863e-05, + "loss": 1.3372, "step": 1550 }, { - "epoch": 0.04395137294907762, + "epoch": 0.06068549964785977, "grad_norm": 0.0, - "learning_rate": 1.9989806757365686e-05, - "loss": 1.1985, + "learning_rate": 1.9950687556167606e-05, + "loss": 1.2148, "step": 1551 }, { - "epoch": 0.04397971039134008, + "epoch": 0.06072462634008921, "grad_norm": 0.0, - "learning_rate": 1.998976528638703e-05, - "loss": 1.1352, + "learning_rate": 1.995056178271101e-05, + "loss": 1.3265, "step": 1552 }, { - "epoch": 0.044008047833602536, + "epoch": 0.060763753032318644, "grad_norm": 0.0, - "learning_rate": 1.998972373126087e-05, - "loss": 1.1768, + "learning_rate": 1.9950435849460856e-05, + "loss": 1.3611, "step": 1553 }, { - "epoch": 0.044036385275865, + "epoch": 0.060802879724548085, "grad_norm": 0.0, - "learning_rate": 1.9989682091987558e-05, - "loss": 1.1711, + "learning_rate": 1.9950309756419174e-05, + "loss": 1.2398, "step": 1554 }, { - "epoch": 0.04406472271812746, + "epoch": 0.060842006416777526, "grad_norm": 0.0, - "learning_rate": 1.9989640368567446e-05, - "loss": 1.104, + "learning_rate": 1.9950183503587987e-05, + "loss": 1.4207, "step": 1555 }, { - "epoch": 0.04409306016038992, + "epoch": 0.06088113310900697, "grad_norm": 0.0, - "learning_rate": 1.9989598561000882e-05, - "loss": 1.0659, + "learning_rate": 1.9950057090969315e-05, + "loss": 1.3572, "step": 1556 }, { - "epoch": 0.044121397602652386, + "epoch": 0.0609202598012364, "grad_norm": 0.0, - "learning_rate": 1.9989556669288222e-05, - "loss": 1.0741, + "learning_rate": 1.99499305185652e-05, + "loss": 1.0651, "step": 1557 }, { - "epoch": 0.044149735044914845, + "epoch": 0.06095938649346584, "grad_norm": 0.0, - "learning_rate": 1.9989514693429815e-05, - "loss": 1.1014, + "learning_rate": 1.9949803786377665e-05, + "loss": 1.2227, "step": 1558 }, { - "epoch": 0.044178072487177304, + "epoch": 0.06099851318569528, "grad_norm": 0.0, - "learning_rate": 1.9989472633426016e-05, - "loss": 1.2155, + "learning_rate": 1.994967689440875e-05, + "loss": 1.1921, "step": 1559 }, { - "epoch": 0.04420640992943977, + "epoch": 0.061037639877924724, "grad_norm": 0.0, - "learning_rate": 1.998943048927718e-05, - "loss": 1.2013, + "learning_rate": 1.9949549842660495e-05, + "loss": 1.2289, "step": 1560 }, { - "epoch": 0.04423474737170223, + "epoch": 0.06107676657015416, "grad_norm": 0.0, - "learning_rate": 1.998938826098366e-05, - "loss": 1.159, + "learning_rate": 1.9949422631134934e-05, + "loss": 1.3741, "step": 1561 }, { - "epoch": 0.04426308481396469, + "epoch": 0.0611158932623836, "grad_norm": 0.0, - "learning_rate": 1.9989345948545816e-05, - "loss": 0.9875, + "learning_rate": 1.9949295259834114e-05, + "loss": 1.3209, "step": 1562 }, { - "epoch": 0.044291422256227155, + "epoch": 0.06115501995461304, "grad_norm": 0.0, - "learning_rate": 1.9989303551963996e-05, - "loss": 1.1832, + "learning_rate": 1.994916772876008e-05, + "loss": 1.3702, "step": 1563 }, { - "epoch": 0.044319759698489614, + "epoch": 0.06119414664684247, "grad_norm": 0.0, - "learning_rate": 1.998926107123857e-05, - "loss": 1.1692, + "learning_rate": 1.9949040037914876e-05, + "loss": 1.232, "step": 1564 }, { - "epoch": 0.04434809714075207, + "epoch": 0.061233273339071914, "grad_norm": 0.0, - "learning_rate": 1.9989218506369883e-05, - "loss": 1.1447, + "learning_rate": 1.994891218730056e-05, + "loss": 1.2184, "step": 1565 }, { - "epoch": 0.04437643458301454, + "epoch": 0.061272400031301355, "grad_norm": 0.0, - "learning_rate": 1.9989175857358303e-05, - "loss": 1.2132, + "learning_rate": 1.994878417691918e-05, + "loss": 1.2621, "step": 1566 }, { - "epoch": 0.044404772025277, + "epoch": 0.061311526723530796, "grad_norm": 0.0, - "learning_rate": 1.9989133124204183e-05, - "loss": 1.1251, + "learning_rate": 1.994865600677279e-05, + "loss": 1.1561, "step": 1567 }, { - "epoch": 0.04443310946753946, + "epoch": 0.06135065341576023, "grad_norm": 0.0, - "learning_rate": 1.9989090306907885e-05, - "loss": 1.1332, + "learning_rate": 1.9948527676863453e-05, + "loss": 1.2736, "step": 1568 }, { - "epoch": 0.04446144690980192, + "epoch": 0.06138978010798967, "grad_norm": 0.0, - "learning_rate": 1.9989047405469772e-05, - "loss": 1.0839, + "learning_rate": 1.994839918719323e-05, + "loss": 1.2659, "step": 1569 }, { - "epoch": 0.04448978435206438, + "epoch": 0.06142890680021911, "grad_norm": 0.0, - "learning_rate": 1.9989004419890202e-05, - "loss": 1.167, + "learning_rate": 1.9948270537764177e-05, + "loss": 1.1981, "step": 1570 }, { - "epoch": 0.04451812179432684, + "epoch": 0.061468033492448546, "grad_norm": 0.0, - "learning_rate": 1.998896135016954e-05, - "loss": 1.0933, + "learning_rate": 1.9948141728578366e-05, + "loss": 1.2188, "step": 1571 }, { - "epoch": 0.04454645923658931, + "epoch": 0.061507160184677986, "grad_norm": 0.0, - "learning_rate": 1.9988918196308146e-05, - "loss": 1.2792, + "learning_rate": 1.9948012759637865e-05, + "loss": 1.3298, "step": 1572 }, { - "epoch": 0.044574796678851766, + "epoch": 0.06154628687690743, "grad_norm": 0.0, - "learning_rate": 1.9988874958306385e-05, - "loss": 1.2734, + "learning_rate": 1.994788363094475e-05, + "loss": 1.21, "step": 1573 }, { - "epoch": 0.044603134121114225, + "epoch": 0.06158541356913687, "grad_norm": 0.0, - "learning_rate": 1.998883163616462e-05, - "loss": 1.113, + "learning_rate": 1.9947754342501082e-05, + "loss": 1.3038, "step": 1574 }, { - "epoch": 0.04463147156337669, + "epoch": 0.0616245402613663, "grad_norm": 0.0, - "learning_rate": 1.9988788229883215e-05, - "loss": 1.141, + "learning_rate": 1.9947624894308947e-05, + "loss": 1.2179, "step": 1575 }, { - "epoch": 0.04465980900563915, + "epoch": 0.06166366695359574, "grad_norm": 0.0, - "learning_rate": 1.9988744739462542e-05, - "loss": 1.1528, + "learning_rate": 1.9947495286370423e-05, + "loss": 1.2944, "step": 1576 }, { - "epoch": 0.04468814644790161, + "epoch": 0.061702793645825184, "grad_norm": 0.0, - "learning_rate": 1.9988701164902963e-05, - "loss": 1.1722, + "learning_rate": 1.994736551868759e-05, + "loss": 1.2939, "step": 1577 }, { - "epoch": 0.044716483890164076, + "epoch": 0.06174192033805462, "grad_norm": 0.0, - "learning_rate": 1.998865750620484e-05, - "loss": 1.1647, + "learning_rate": 1.994723559126253e-05, + "loss": 1.2991, "step": 1578 }, { - "epoch": 0.044744821332426535, + "epoch": 0.06178104703028406, "grad_norm": 0.0, - "learning_rate": 1.9988613763368548e-05, - "loss": 1.1084, + "learning_rate": 1.9947105504097328e-05, + "loss": 1.3268, "step": 1579 }, { - "epoch": 0.044773158774688994, + "epoch": 0.0618201737225135, "grad_norm": 0.0, - "learning_rate": 1.9988569936394454e-05, - "loss": 1.1717, + "learning_rate": 1.994697525719408e-05, + "loss": 1.1271, "step": 1580 }, { - "epoch": 0.04480149621695146, + "epoch": 0.06185930041474294, "grad_norm": 0.0, - "learning_rate": 1.9988526025282925e-05, - "loss": 1.192, + "learning_rate": 1.994684485055487e-05, + "loss": 1.1627, "step": 1581 }, { - "epoch": 0.04482983365921392, + "epoch": 0.061898427106972374, "grad_norm": 0.0, - "learning_rate": 1.9988482030034336e-05, - "loss": 1.1396, + "learning_rate": 1.9946714284181802e-05, + "loss": 1.2455, "step": 1582 }, { - "epoch": 0.04485817110147638, + "epoch": 0.061937553799201815, "grad_norm": 0.0, - "learning_rate": 1.9988437950649052e-05, - "loss": 1.0624, + "learning_rate": 1.994658355807696e-05, + "loss": 1.4002, "step": 1583 }, { - "epoch": 0.044886508543738844, + "epoch": 0.061976680491431256, "grad_norm": 0.0, - "learning_rate": 1.9988393787127444e-05, - "loss": 1.0321, + "learning_rate": 1.9946452672242452e-05, + "loss": 1.2309, "step": 1584 }, { - "epoch": 0.0449148459860013, + "epoch": 0.06201580718366069, "grad_norm": 0.0, - "learning_rate": 1.9988349539469887e-05, - "loss": 1.1952, + "learning_rate": 1.9946321626680382e-05, + "loss": 1.1601, "step": 1585 }, { - "epoch": 0.04494318342826376, + "epoch": 0.06205493387589013, "grad_norm": 0.0, - "learning_rate": 1.998830520767675e-05, - "loss": 1.0956, + "learning_rate": 1.9946190421392845e-05, + "loss": 1.2192, "step": 1586 }, { - "epoch": 0.04497152087052623, + "epoch": 0.06209406056811957, "grad_norm": 0.0, - "learning_rate": 1.998826079174841e-05, - "loss": 1.044, + "learning_rate": 1.9946059056381952e-05, + "loss": 1.3606, "step": 1587 }, { - "epoch": 0.04499985831278869, + "epoch": 0.06213318726034901, "grad_norm": 0.0, - "learning_rate": 1.9988216291685245e-05, - "loss": 1.1883, + "learning_rate": 1.9945927531649816e-05, + "loss": 1.3104, "step": 1588 }, { - "epoch": 0.04502819575505115, + "epoch": 0.06217231395257845, "grad_norm": 0.0, - "learning_rate": 1.9988171707487624e-05, - "loss": 1.1112, + "learning_rate": 1.9945795847198547e-05, + "loss": 1.2811, "step": 1589 }, { - "epoch": 0.04505653319731361, + "epoch": 0.06221144064480789, "grad_norm": 0.0, - "learning_rate": 1.998812703915592e-05, - "loss": 1.2099, + "learning_rate": 1.994566400303026e-05, + "loss": 1.2063, "step": 1590 }, { - "epoch": 0.04508487063957607, + "epoch": 0.06225056733703733, "grad_norm": 0.0, - "learning_rate": 1.9988082286690512e-05, - "loss": 1.2394, + "learning_rate": 1.9945531999147073e-05, + "loss": 1.2283, "step": 1591 }, { - "epoch": 0.04511320808183853, + "epoch": 0.06228969402926677, "grad_norm": 0.0, - "learning_rate": 1.998803745009178e-05, - "loss": 1.0596, + "learning_rate": 1.99453998355511e-05, + "loss": 1.1428, "step": 1592 }, { - "epoch": 0.045141545524101, + "epoch": 0.0623288207214962, "grad_norm": 0.0, - "learning_rate": 1.9987992529360105e-05, - "loss": 1.1722, + "learning_rate": 1.9945267512244473e-05, + "loss": 1.2547, "step": 1593 }, { - "epoch": 0.045169882966363456, + "epoch": 0.062367947413725644, "grad_norm": 0.0, - "learning_rate": 1.9987947524495853e-05, - "loss": 1.0988, + "learning_rate": 1.994513502922931e-05, + "loss": 1.1899, "step": 1594 }, { - "epoch": 0.045198220408625915, + "epoch": 0.062407074105955085, "grad_norm": 0.0, - "learning_rate": 1.9987902435499412e-05, - "loss": 1.1262, + "learning_rate": 1.994500238650774e-05, + "loss": 1.2372, "step": 1595 }, { - "epoch": 0.04522655785088838, + "epoch": 0.06244620079818452, "grad_norm": 0.0, - "learning_rate": 1.998785726237116e-05, - "loss": 1.1632, + "learning_rate": 1.994486958408189e-05, + "loss": 1.3384, "step": 1596 }, { - "epoch": 0.04525489529315084, + "epoch": 0.06248532749041396, "grad_norm": 0.0, - "learning_rate": 1.998781200511148e-05, - "loss": 1.1863, + "learning_rate": 1.99447366219539e-05, + "loss": 1.1882, "step": 1597 }, { - "epoch": 0.0452832327354133, + "epoch": 0.0625244541826434, "grad_norm": 0.0, - "learning_rate": 1.9987766663720746e-05, - "loss": 1.1689, + "learning_rate": 1.9944603500125905e-05, + "loss": 1.3441, "step": 1598 }, { - "epoch": 0.045311570177675765, + "epoch": 0.06256358087487283, "grad_norm": 0.0, - "learning_rate": 1.9987721238199345e-05, - "loss": 1.2169, + "learning_rate": 1.994447021860003e-05, + "loss": 1.2297, "step": 1599 }, { - "epoch": 0.045339907619938224, + "epoch": 0.06260270756710228, "grad_norm": 0.0, - "learning_rate": 1.9987675728547665e-05, - "loss": 1.0249, + "learning_rate": 1.994433677737843e-05, + "loss": 1.3197, "step": 1600 }, { - "epoch": 0.045368245062200684, + "epoch": 0.06264183425933172, "grad_norm": 0.0, - "learning_rate": 1.998763013476608e-05, - "loss": 1.1408, + "learning_rate": 1.9944203176463244e-05, + "loss": 1.3176, "step": 1601 }, { - "epoch": 0.04539658250446315, + "epoch": 0.06268096095156116, "grad_norm": 0.0, - "learning_rate": 1.9987584456854977e-05, - "loss": 1.1243, + "learning_rate": 1.9944069415856612e-05, + "loss": 1.29, "step": 1602 }, { - "epoch": 0.04542491994672561, + "epoch": 0.0627200876437906, "grad_norm": 0.0, - "learning_rate": 1.998753869481474e-05, - "loss": 1.0693, + "learning_rate": 1.9943935495560688e-05, + "loss": 1.0977, "step": 1603 }, { - "epoch": 0.04545325738898807, + "epoch": 0.06275921433602004, "grad_norm": 0.0, - "learning_rate": 1.998749284864576e-05, - "loss": 1.056, + "learning_rate": 1.9943801415577616e-05, + "loss": 1.2789, "step": 1604 }, { - "epoch": 0.045481594831250534, + "epoch": 0.06279834102824947, "grad_norm": 0.0, - "learning_rate": 1.9987446918348412e-05, - "loss": 1.1372, + "learning_rate": 1.994366717590956e-05, + "loss": 1.2147, "step": 1605 }, { - "epoch": 0.04550993227351299, + "epoch": 0.0628374677204789, "grad_norm": 0.0, - "learning_rate": 1.9987400903923096e-05, - "loss": 1.029, + "learning_rate": 1.9943532776558665e-05, + "loss": 1.3025, "step": 1606 }, { - "epoch": 0.04553826971577545, + "epoch": 0.06287659441270835, "grad_norm": 0.0, - "learning_rate": 1.998735480537019e-05, - "loss": 1.2014, + "learning_rate": 1.9943398217527094e-05, + "loss": 1.2639, "step": 1607 }, { - "epoch": 0.04556660715803792, + "epoch": 0.06291572110493779, "grad_norm": 0.0, - "learning_rate": 1.9987308622690087e-05, - "loss": 1.1394, + "learning_rate": 1.994326349881701e-05, + "loss": 1.29, "step": 1608 }, { - "epoch": 0.04559494460030038, + "epoch": 0.06295484779716723, "grad_norm": 0.0, - "learning_rate": 1.9987262355883173e-05, - "loss": 1.1237, + "learning_rate": 1.994312862043057e-05, + "loss": 1.3134, "step": 1609 }, { - "epoch": 0.045623282042562836, + "epoch": 0.06299397448939667, "grad_norm": 0.0, - "learning_rate": 1.9987216004949845e-05, - "loss": 1.1449, + "learning_rate": 1.9942993582369947e-05, + "loss": 1.2051, "step": 1610 }, { - "epoch": 0.0456516194848253, + "epoch": 0.06303310118162611, "grad_norm": 0.0, - "learning_rate": 1.998716956989048e-05, - "loss": 1.1588, + "learning_rate": 1.9942858384637306e-05, + "loss": 1.326, "step": 1611 }, { - "epoch": 0.04567995692708776, + "epoch": 0.06307222787385554, "grad_norm": 0.0, - "learning_rate": 1.9987123050705483e-05, - "loss": 1.2648, + "learning_rate": 1.9942723027234817e-05, + "loss": 1.1853, "step": 1612 }, { - "epoch": 0.04570829436935022, + "epoch": 0.06311135456608498, "grad_norm": 0.0, - "learning_rate": 1.998707644739524e-05, - "loss": 1.068, + "learning_rate": 1.9942587510164657e-05, + "loss": 1.2507, "step": 1613 }, { - "epoch": 0.04573663181161269, + "epoch": 0.06315048125831442, "grad_norm": 0.0, - "learning_rate": 1.9987029759960142e-05, - "loss": 1.092, + "learning_rate": 1.9942451833429e-05, + "loss": 1.2822, "step": 1614 }, { - "epoch": 0.045764969253875146, + "epoch": 0.06318960795054386, "grad_norm": 0.0, - "learning_rate": 1.9986982988400582e-05, - "loss": 1.1199, + "learning_rate": 1.9942315997030022e-05, + "loss": 1.2184, "step": 1615 }, { - "epoch": 0.045793306696137605, + "epoch": 0.0632287346427733, "grad_norm": 0.0, - "learning_rate": 1.9986936132716956e-05, - "loss": 1.0711, + "learning_rate": 1.994218000096991e-05, + "loss": 1.1006, "step": 1616 }, { - "epoch": 0.04582164413840007, + "epoch": 0.06326786133500274, "grad_norm": 0.0, - "learning_rate": 1.9986889192909663e-05, - "loss": 1.1003, + "learning_rate": 1.9942043845250845e-05, + "loss": 1.2996, "step": 1617 }, { - "epoch": 0.04584998158066253, + "epoch": 0.06330698802723218, "grad_norm": 0.0, - "learning_rate": 1.9986842168979087e-05, - "loss": 1.1393, + "learning_rate": 1.994190752987501e-05, + "loss": 1.3077, "step": 1618 }, { - "epoch": 0.04587831902292499, + "epoch": 0.06334611471946161, "grad_norm": 0.0, - "learning_rate": 1.9986795060925636e-05, - "loss": 1.1565, + "learning_rate": 1.9941771054844604e-05, + "loss": 1.2872, "step": 1619 }, { - "epoch": 0.045906656465187455, + "epoch": 0.06338524141169105, "grad_norm": 0.0, - "learning_rate": 1.99867478687497e-05, - "loss": 1.1716, + "learning_rate": 1.9941634420161812e-05, + "loss": 1.3132, "step": 1620 }, { - "epoch": 0.045934993907449914, + "epoch": 0.06342436810392049, "grad_norm": 0.0, - "learning_rate": 1.998670059245168e-05, - "loss": 1.0574, + "learning_rate": 1.9941497625828827e-05, + "loss": 1.2499, "step": 1621 }, { - "epoch": 0.04596333134971237, + "epoch": 0.06346349479614993, "grad_norm": 0.0, - "learning_rate": 1.9986653232031968e-05, - "loss": 1.1005, + "learning_rate": 1.9941360671847847e-05, + "loss": 1.2687, "step": 1622 }, { - "epoch": 0.04599166879197484, + "epoch": 0.06350262148837937, "grad_norm": 0.0, - "learning_rate": 1.998660578749097e-05, - "loss": 1.1426, + "learning_rate": 1.9941223558221073e-05, + "loss": 1.4062, "step": 1623 }, { - "epoch": 0.0460200062342373, + "epoch": 0.06354174818060881, "grad_norm": 0.0, - "learning_rate": 1.9986558258829082e-05, - "loss": 1.0476, + "learning_rate": 1.9941086284950706e-05, + "loss": 1.2335, "step": 1624 }, { - "epoch": 0.04604834367649976, + "epoch": 0.06358087487283826, "grad_norm": 0.0, - "learning_rate": 1.998651064604671e-05, - "loss": 1.1653, + "learning_rate": 1.994094885203895e-05, + "loss": 1.2642, "step": 1625 }, { - "epoch": 0.046076681118762224, + "epoch": 0.06362000156506768, "grad_norm": 0.0, - "learning_rate": 1.9986462949144245e-05, - "loss": 1.0946, + "learning_rate": 1.9940811259488012e-05, + "loss": 1.3231, "step": 1626 }, { - "epoch": 0.04610501856102468, + "epoch": 0.06365912825729712, "grad_norm": 0.0, - "learning_rate": 1.9986415168122094e-05, - "loss": 1.2324, + "learning_rate": 1.99406735073001e-05, + "loss": 1.2703, "step": 1627 }, { - "epoch": 0.04613335600328714, + "epoch": 0.06369825494952656, "grad_norm": 0.0, - "learning_rate": 1.998636730298066e-05, - "loss": 1.1582, + "learning_rate": 1.994053559547743e-05, + "loss": 1.2773, "step": 1628 }, { - "epoch": 0.04616169344554961, + "epoch": 0.063737381641756, "grad_norm": 0.0, - "learning_rate": 1.9986319353720353e-05, - "loss": 1.1306, + "learning_rate": 1.9940397524022213e-05, + "loss": 1.1769, "step": 1629 }, { - "epoch": 0.04619003088781207, + "epoch": 0.06377650833398545, "grad_norm": 0.0, - "learning_rate": 1.998627132034156e-05, - "loss": 1.1763, + "learning_rate": 1.994025929293667e-05, + "loss": 1.2841, "step": 1630 }, { - "epoch": 0.046218368330074526, + "epoch": 0.06381563502621489, "grad_norm": 0.0, - "learning_rate": 1.99862232028447e-05, - "loss": 1.1631, + "learning_rate": 1.994012090222302e-05, + "loss": 1.257, "step": 1631 }, { - "epoch": 0.04624670577233699, + "epoch": 0.06385476171844433, "grad_norm": 0.0, - "learning_rate": 1.9986175001230173e-05, - "loss": 1.0751, + "learning_rate": 1.993998235188348e-05, + "loss": 1.3123, "step": 1632 }, { - "epoch": 0.04627504321459945, + "epoch": 0.06389388841067375, "grad_norm": 0.0, - "learning_rate": 1.9986126715498386e-05, - "loss": 1.1131, + "learning_rate": 1.993984364192028e-05, + "loss": 1.3221, "step": 1633 }, { - "epoch": 0.04630338065686191, + "epoch": 0.0639330151029032, "grad_norm": 0.0, - "learning_rate": 1.998607834564975e-05, - "loss": 1.1818, + "learning_rate": 1.9939704772335645e-05, + "loss": 1.3371, "step": 1634 }, { - "epoch": 0.046331718099124376, + "epoch": 0.06397214179513264, "grad_norm": 0.0, - "learning_rate": 1.998602989168466e-05, - "loss": 1.1834, + "learning_rate": 1.993956574313181e-05, + "loss": 1.1579, "step": 1635 }, { - "epoch": 0.046360055541386835, + "epoch": 0.06401126848736208, "grad_norm": 0.0, - "learning_rate": 1.9985981353603536e-05, - "loss": 1.1015, + "learning_rate": 1.9939426554311e-05, + "loss": 1.3828, "step": 1636 }, { - "epoch": 0.046388392983649294, + "epoch": 0.06405039517959152, "grad_norm": 0.0, - "learning_rate": 1.9985932731406782e-05, - "loss": 1.1154, + "learning_rate": 1.9939287205875456e-05, + "loss": 1.2478, "step": 1637 }, { - "epoch": 0.04641673042591176, + "epoch": 0.06408952187182096, "grad_norm": 0.0, - "learning_rate": 1.998588402509481e-05, - "loss": 1.132, + "learning_rate": 1.9939147697827415e-05, + "loss": 1.1887, "step": 1638 }, { - "epoch": 0.04644506786817422, + "epoch": 0.0641286485640504, "grad_norm": 0.0, - "learning_rate": 1.9985835234668025e-05, - "loss": 1.1398, + "learning_rate": 1.9939008030169117e-05, + "loss": 1.3044, "step": 1639 }, { - "epoch": 0.04647340531043668, + "epoch": 0.06416777525627984, "grad_norm": 0.0, - "learning_rate": 1.9985786360126845e-05, - "loss": 1.0533, + "learning_rate": 1.99388682029028e-05, + "loss": 1.3647, "step": 1640 }, { - "epoch": 0.046501742752699145, + "epoch": 0.06420690194850927, "grad_norm": 0.0, - "learning_rate": 1.9985737401471677e-05, - "loss": 1.1557, + "learning_rate": 1.993872821603072e-05, + "loss": 1.3687, "step": 1641 }, { - "epoch": 0.046530080194961604, + "epoch": 0.06424602864073871, "grad_norm": 0.0, - "learning_rate": 1.9985688358702933e-05, - "loss": 1.0483, + "learning_rate": 1.9938588069555116e-05, + "loss": 1.2043, "step": 1642 }, { - "epoch": 0.04655841763722406, + "epoch": 0.06428515533296815, "grad_norm": 0.0, - "learning_rate": 1.9985639231821033e-05, - "loss": 1.1147, + "learning_rate": 1.993844776347824e-05, + "loss": 1.3668, "step": 1643 }, { - "epoch": 0.04658675507948653, + "epoch": 0.06432428202519759, "grad_norm": 0.0, - "learning_rate": 1.9985590020826382e-05, - "loss": 1.0967, + "learning_rate": 1.993830729780235e-05, + "loss": 1.3565, "step": 1644 }, { - "epoch": 0.04661509252174899, + "epoch": 0.06436340871742703, "grad_norm": 0.0, - "learning_rate": 1.9985540725719398e-05, - "loss": 1.1373, + "learning_rate": 1.9938166672529695e-05, + "loss": 1.4568, "step": 1645 }, { - "epoch": 0.04664342996401145, + "epoch": 0.06440253540965647, "grad_norm": 0.0, - "learning_rate": 1.9985491346500497e-05, - "loss": 1.1015, + "learning_rate": 1.993802588766254e-05, + "loss": 1.2877, "step": 1646 }, { - "epoch": 0.046671767406273906, + "epoch": 0.06444166210188591, "grad_norm": 0.0, - "learning_rate": 1.9985441883170096e-05, - "loss": 1.1761, + "learning_rate": 1.9937884943203137e-05, + "loss": 1.1927, "step": 1647 }, { - "epoch": 0.04670010484853637, + "epoch": 0.06448078879411534, "grad_norm": 0.0, - "learning_rate": 1.998539233572861e-05, - "loss": 1.1247, + "learning_rate": 1.9937743839153757e-05, + "loss": 1.3301, "step": 1648 }, { - "epoch": 0.04672844229079883, + "epoch": 0.06451991548634478, "grad_norm": 0.0, - "learning_rate": 1.998534270417645e-05, - "loss": 1.1121, + "learning_rate": 1.9937602575516664e-05, + "loss": 1.1594, "step": 1649 }, { - "epoch": 0.04675677973306129, + "epoch": 0.06455904217857422, "grad_norm": 0.0, - "learning_rate": 1.9985292988514048e-05, - "loss": 1.065, + "learning_rate": 1.9937461152294125e-05, + "loss": 1.2841, "step": 1650 }, { - "epoch": 0.04678511717532376, + "epoch": 0.06459816887080366, "grad_norm": 0.0, - "learning_rate": 1.998524318874181e-05, - "loss": 1.1384, + "learning_rate": 1.9937319569488414e-05, + "loss": 1.2, "step": 1651 }, { - "epoch": 0.046813454617586216, + "epoch": 0.0646372955630331, "grad_norm": 0.0, - "learning_rate": 1.9985193304860165e-05, - "loss": 1.1734, + "learning_rate": 1.99371778271018e-05, + "loss": 1.1874, "step": 1652 }, { - "epoch": 0.046841792059848675, + "epoch": 0.06467642225526254, "grad_norm": 0.0, - "learning_rate": 1.9985143336869527e-05, - "loss": 1.222, + "learning_rate": 1.9937035925136566e-05, + "loss": 1.2169, "step": 1653 }, { - "epoch": 0.04687012950211114, + "epoch": 0.06471554894749199, "grad_norm": 0.0, - "learning_rate": 1.998509328477032e-05, - "loss": 1.2401, + "learning_rate": 1.9936893863594986e-05, + "loss": 1.2186, "step": 1654 }, { - "epoch": 0.0468984669443736, + "epoch": 0.06475467563972141, "grad_norm": 0.0, - "learning_rate": 1.9985043148562962e-05, - "loss": 1.1404, + "learning_rate": 1.993675164247934e-05, + "loss": 1.3478, "step": 1655 }, { - "epoch": 0.04692680438663606, + "epoch": 0.06479380233195085, "grad_norm": 0.0, - "learning_rate": 1.998499292824788e-05, - "loss": 1.1152, + "learning_rate": 1.9936609261791917e-05, + "loss": 1.4156, "step": 1656 }, { - "epoch": 0.046955141828898525, + "epoch": 0.0648329290241803, "grad_norm": 0.0, - "learning_rate": 1.9984942623825495e-05, - "loss": 1.23, + "learning_rate": 1.9936466721534996e-05, + "loss": 1.3256, "step": 1657 }, { - "epoch": 0.046983479271160984, + "epoch": 0.06487205571640974, "grad_norm": 0.0, - "learning_rate": 1.9984892235296226e-05, - "loss": 1.0758, + "learning_rate": 1.9936324021710874e-05, + "loss": 1.275, "step": 1658 }, { - "epoch": 0.04701181671342344, + "epoch": 0.06491118240863918, "grad_norm": 0.0, - "learning_rate": 1.9984841762660508e-05, - "loss": 1.1927, + "learning_rate": 1.9936181162321842e-05, + "loss": 1.3887, "step": 1659 }, { - "epoch": 0.04704015415568591, + "epoch": 0.06495030910086862, "grad_norm": 0.0, - "learning_rate": 1.9984791205918755e-05, - "loss": 1.145, + "learning_rate": 1.9936038143370187e-05, + "loss": 1.2706, "step": 1660 }, { - "epoch": 0.04706849159794837, + "epoch": 0.06498943579309806, "grad_norm": 0.0, - "learning_rate": 1.9984740565071405e-05, - "loss": 1.1656, + "learning_rate": 1.9935894964858212e-05, + "loss": 1.1506, "step": 1661 }, { - "epoch": 0.04709682904021083, + "epoch": 0.06502856248532748, "grad_norm": 0.0, - "learning_rate": 1.998468984011887e-05, - "loss": 1.2784, + "learning_rate": 1.9935751626788212e-05, + "loss": 1.1631, "step": 1662 }, { - "epoch": 0.047125166482473294, + "epoch": 0.06506768917755693, "grad_norm": 0.0, - "learning_rate": 1.9984639031061584e-05, - "loss": 1.1433, + "learning_rate": 1.993560812916249e-05, + "loss": 1.324, "step": 1663 }, { - "epoch": 0.04715350392473575, + "epoch": 0.06510681586978637, "grad_norm": 0.0, - "learning_rate": 1.998458813789998e-05, - "loss": 1.0957, + "learning_rate": 1.9935464471983354e-05, + "loss": 1.1899, "step": 1664 }, { - "epoch": 0.04718184136699821, + "epoch": 0.06514594256201581, "grad_norm": 0.0, - "learning_rate": 1.9984537160634482e-05, - "loss": 1.1377, + "learning_rate": 1.9935320655253107e-05, + "loss": 1.1978, "step": 1665 }, { - "epoch": 0.04721017880926068, + "epoch": 0.06518506925424525, "grad_norm": 0.0, - "learning_rate": 1.9984486099265522e-05, - "loss": 1.141, + "learning_rate": 1.993517667897406e-05, + "loss": 1.2755, "step": 1666 }, { - "epoch": 0.04723851625152314, + "epoch": 0.06522419594647469, "grad_norm": 0.0, - "learning_rate": 1.9984434953793523e-05, - "loss": 1.0675, + "learning_rate": 1.993503254314853e-05, + "loss": 1.2968, "step": 1667 }, { - "epoch": 0.047266853693785596, + "epoch": 0.06526332263870413, "grad_norm": 0.0, - "learning_rate": 1.9984383724218924e-05, - "loss": 1.0986, + "learning_rate": 1.9934888247778823e-05, + "loss": 1.2979, "step": 1668 }, { - "epoch": 0.04729519113604806, + "epoch": 0.06530244933093356, "grad_norm": 0.0, - "learning_rate": 1.9984332410542153e-05, - "loss": 1.1381, + "learning_rate": 1.993474379286726e-05, + "loss": 1.2623, "step": 1669 }, { - "epoch": 0.04732352857831052, + "epoch": 0.065341576023163, "grad_norm": 0.0, - "learning_rate": 1.998428101276364e-05, - "loss": 1.1016, + "learning_rate": 1.9934599178416158e-05, + "loss": 1.3064, "step": 1670 }, { - "epoch": 0.04735186602057298, + "epoch": 0.06538070271539244, "grad_norm": 0.0, - "learning_rate": 1.9984229530883822e-05, - "loss": 1.1666, + "learning_rate": 1.9934454404427845e-05, + "loss": 1.3279, "step": 1671 }, { - "epoch": 0.047380203462835446, + "epoch": 0.06541982940762188, "grad_norm": 0.0, - "learning_rate": 1.9984177964903133e-05, - "loss": 1.1497, + "learning_rate": 1.993430947090464e-05, + "loss": 1.2786, "step": 1672 }, { - "epoch": 0.047408540905097905, + "epoch": 0.06545895609985132, "grad_norm": 0.0, - "learning_rate": 1.9984126314822002e-05, - "loss": 1.2125, + "learning_rate": 1.9934164377848873e-05, + "loss": 1.343, "step": 1673 }, { - "epoch": 0.047436878347360364, + "epoch": 0.06549808279208076, "grad_norm": 0.0, - "learning_rate": 1.998407458064087e-05, - "loss": 1.067, + "learning_rate": 1.993401912526288e-05, + "loss": 1.3094, "step": 1674 }, { - "epoch": 0.04746521578962283, + "epoch": 0.0655372094843102, "grad_norm": 0.0, - "learning_rate": 1.998402276236017e-05, - "loss": 1.1461, + "learning_rate": 1.9933873713148983e-05, + "loss": 1.2059, "step": 1675 }, { - "epoch": 0.04749355323188529, + "epoch": 0.06557633617653963, "grad_norm": 0.0, - "learning_rate": 1.998397085998034e-05, - "loss": 1.1654, + "learning_rate": 1.9933728141509524e-05, + "loss": 1.297, "step": 1676 }, { - "epoch": 0.04752189067414775, + "epoch": 0.06561546286876907, "grad_norm": 0.0, - "learning_rate": 1.998391887350181e-05, - "loss": 1.1968, + "learning_rate": 1.993358241034684e-05, + "loss": 1.1859, "step": 1677 }, { - "epoch": 0.047550228116410215, + "epoch": 0.06565458956099851, "grad_norm": 0.0, - "learning_rate": 1.9983866802925035e-05, - "loss": 1.2508, + "learning_rate": 1.9933436519663265e-05, + "loss": 1.2485, "step": 1678 }, { - "epoch": 0.047578565558672674, + "epoch": 0.06569371625322795, "grad_norm": 0.0, - "learning_rate": 1.9983814648250434e-05, - "loss": 1.1032, + "learning_rate": 1.993329046946115e-05, + "loss": 1.2874, "step": 1679 }, { - "epoch": 0.04760690300093513, + "epoch": 0.06573284294545739, "grad_norm": 0.0, - "learning_rate": 1.9983762409478457e-05, - "loss": 1.0652, + "learning_rate": 1.9933144259742837e-05, + "loss": 1.2261, "step": 1680 }, { - "epoch": 0.0476352404431976, + "epoch": 0.06577196963768683, "grad_norm": 0.0, - "learning_rate": 1.998371008660954e-05, - "loss": 1.1792, + "learning_rate": 1.9932997890510676e-05, + "loss": 1.2794, "step": 1681 }, { - "epoch": 0.04766357788546006, + "epoch": 0.06581109632991627, "grad_norm": 0.0, - "learning_rate": 1.9983657679644128e-05, - "loss": 1.1492, + "learning_rate": 1.9932851361767012e-05, + "loss": 1.1898, "step": 1682 }, { - "epoch": 0.04769191532772252, + "epoch": 0.0658502230221457, "grad_norm": 0.0, - "learning_rate": 1.9983605188582656e-05, - "loss": 1.1469, + "learning_rate": 1.9932704673514203e-05, + "loss": 1.2879, "step": 1683 }, { - "epoch": 0.04772025276998498, + "epoch": 0.06588934971437514, "grad_norm": 0.0, - "learning_rate": 1.9983552613425575e-05, - "loss": 1.0813, + "learning_rate": 1.9932557825754604e-05, + "loss": 1.376, "step": 1684 }, { - "epoch": 0.04774859021224744, + "epoch": 0.06592847640660458, "grad_norm": 0.0, - "learning_rate": 1.998349995417332e-05, - "loss": 1.1306, + "learning_rate": 1.9932410818490573e-05, + "loss": 1.3007, "step": 1685 }, { - "epoch": 0.0477769276545099, + "epoch": 0.06596760309883402, "grad_norm": 0.0, - "learning_rate": 1.9983447210826342e-05, - "loss": 1.2669, + "learning_rate": 1.9932263651724467e-05, + "loss": 1.2952, "step": 1686 }, { - "epoch": 0.04780526509677237, + "epoch": 0.06600672979106346, "grad_norm": 0.0, - "learning_rate": 1.998339438338508e-05, - "loss": 1.1578, + "learning_rate": 1.9932116325458656e-05, + "loss": 1.2074, "step": 1687 }, { - "epoch": 0.04783360253903483, + "epoch": 0.0660458564832929, "grad_norm": 0.0, - "learning_rate": 1.9983341471849974e-05, - "loss": 1.0731, + "learning_rate": 1.99319688396955e-05, + "loss": 1.2127, "step": 1688 }, { - "epoch": 0.047861939981297286, + "epoch": 0.06608498317552235, "grad_norm": 0.0, - "learning_rate": 1.9983288476221482e-05, - "loss": 1.1475, + "learning_rate": 1.9931821194437374e-05, + "loss": 1.1739, "step": 1689 }, { - "epoch": 0.04789027742355975, + "epoch": 0.06612410986775177, "grad_norm": 0.0, - "learning_rate": 1.998323539650004e-05, - "loss": 1.062, + "learning_rate": 1.9931673389686642e-05, + "loss": 1.097, "step": 1690 }, { - "epoch": 0.04791861486582221, + "epoch": 0.06616323655998121, "grad_norm": 0.0, - "learning_rate": 1.99831822326861e-05, - "loss": 1.1561, + "learning_rate": 1.9931525425445678e-05, + "loss": 1.1993, "step": 1691 }, { - "epoch": 0.04794695230808467, + "epoch": 0.06620236325221066, "grad_norm": 0.0, - "learning_rate": 1.998312898478011e-05, - "loss": 1.0126, + "learning_rate": 1.9931377301716867e-05, + "loss": 1.3179, "step": 1692 }, { - "epoch": 0.047975289750347136, + "epoch": 0.0662414899444401, "grad_norm": 0.0, - "learning_rate": 1.9983075652782516e-05, - "loss": 1.1917, + "learning_rate": 1.9931229018502577e-05, + "loss": 1.295, "step": 1693 }, { - "epoch": 0.048003627192609595, + "epoch": 0.06628061663666954, "grad_norm": 0.0, - "learning_rate": 1.9983022236693767e-05, - "loss": 1.1014, + "learning_rate": 1.9931080575805193e-05, + "loss": 1.2687, "step": 1694 }, { - "epoch": 0.048031964634872054, + "epoch": 0.06631974332889898, "grad_norm": 0.0, - "learning_rate": 1.998296873651432e-05, - "loss": 1.2932, + "learning_rate": 1.9930931973627097e-05, + "loss": 1.399, "step": 1695 }, { - "epoch": 0.04806030207713452, + "epoch": 0.06635887002112842, "grad_norm": 0.0, - "learning_rate": 1.9982915152244617e-05, - "loss": 1.1955, + "learning_rate": 1.9930783211970682e-05, + "loss": 1.3044, "step": 1696 }, { - "epoch": 0.04808863951939698, + "epoch": 0.06639799671335785, "grad_norm": 0.0, - "learning_rate": 1.9982861483885113e-05, - "loss": 1.1049, + "learning_rate": 1.9930634290838332e-05, + "loss": 1.2271, "step": 1697 }, { - "epoch": 0.04811697696165944, + "epoch": 0.06643712340558729, "grad_norm": 0.0, - "learning_rate": 1.9982807731436257e-05, - "loss": 1.0198, + "learning_rate": 1.993048521023244e-05, + "loss": 1.3456, "step": 1698 }, { - "epoch": 0.048145314403921904, + "epoch": 0.06647625009781673, "grad_norm": 0.0, - "learning_rate": 1.9982753894898507e-05, - "loss": 1.013, + "learning_rate": 1.99303359701554e-05, + "loss": 1.1822, "step": 1699 }, { - "epoch": 0.048173651846184364, + "epoch": 0.06651537679004617, "grad_norm": 0.0, - "learning_rate": 1.9982699974272314e-05, - "loss": 1.018, + "learning_rate": 1.9930186570609602e-05, + "loss": 1.1696, "step": 1700 }, { - "epoch": 0.04820198928844682, + "epoch": 0.06655450348227561, "grad_norm": 0.0, - "learning_rate": 1.998264596955813e-05, - "loss": 1.1485, + "learning_rate": 1.9930037011597455e-05, + "loss": 1.3401, "step": 1701 }, { - "epoch": 0.04823032673070929, + "epoch": 0.06659363017450505, "grad_norm": 0.0, - "learning_rate": 1.9982591880756418e-05, - "loss": 1.1277, + "learning_rate": 1.9929887293121357e-05, + "loss": 1.2614, "step": 1702 }, { - "epoch": 0.04825866417297175, + "epoch": 0.06663275686673449, "grad_norm": 0.0, - "learning_rate": 1.998253770786762e-05, - "loss": 1.1297, + "learning_rate": 1.992973741518371e-05, + "loss": 1.0578, "step": 1703 }, { - "epoch": 0.04828700161523421, + "epoch": 0.06667188355896393, "grad_norm": 0.0, - "learning_rate": 1.9982483450892206e-05, - "loss": 1.135, + "learning_rate": 1.9929587377786924e-05, + "loss": 1.3089, "step": 1704 }, { - "epoch": 0.04831533905749667, + "epoch": 0.06671101025119336, "grad_norm": 0.0, - "learning_rate": 1.9982429109830625e-05, - "loss": 1.1642, + "learning_rate": 1.9929437180933407e-05, + "loss": 1.3494, "step": 1705 }, { - "epoch": 0.04834367649975913, + "epoch": 0.0667501369434228, "grad_norm": 0.0, - "learning_rate": 1.9982374684683337e-05, - "loss": 1.0823, + "learning_rate": 1.992928682462557e-05, + "loss": 1.1106, "step": 1706 }, { - "epoch": 0.04837201394202159, + "epoch": 0.06678926363565224, "grad_norm": 0.0, - "learning_rate": 1.9982320175450798e-05, - "loss": 1.1767, + "learning_rate": 1.9929136308865828e-05, + "loss": 1.2758, "step": 1707 }, { - "epoch": 0.04840035138428406, + "epoch": 0.06682839032788168, "grad_norm": 0.0, - "learning_rate": 1.998226558213347e-05, - "loss": 1.1535, + "learning_rate": 1.9928985633656604e-05, + "loss": 1.266, "step": 1708 }, { - "epoch": 0.048428688826546516, + "epoch": 0.06686751702011112, "grad_norm": 0.0, - "learning_rate": 1.9982210904731812e-05, - "loss": 1.2077, + "learning_rate": 1.992883479900031e-05, + "loss": 1.2996, "step": 1709 }, { - "epoch": 0.048457026268808975, + "epoch": 0.06690664371234056, "grad_norm": 0.0, - "learning_rate": 1.9982156143246288e-05, - "loss": 1.0996, + "learning_rate": 1.9928683804899368e-05, + "loss": 1.3372, "step": 1710 }, { - "epoch": 0.04848536371107144, + "epoch": 0.06694577040457, "grad_norm": 0.0, - "learning_rate": 1.998210129767735e-05, - "loss": 1.0905, + "learning_rate": 1.9928532651356205e-05, + "loss": 1.3061, "step": 1711 }, { - "epoch": 0.0485137011533339, + "epoch": 0.06698489709679943, "grad_norm": 0.0, - "learning_rate": 1.998204636802547e-05, - "loss": 1.0293, + "learning_rate": 1.9928381338373252e-05, + "loss": 1.222, "step": 1712 }, { - "epoch": 0.04854203859559636, + "epoch": 0.06702402378902887, "grad_norm": 0.0, - "learning_rate": 1.998199135429111e-05, - "loss": 1.1018, + "learning_rate": 1.9928229865952935e-05, + "loss": 1.3434, "step": 1713 }, { - "epoch": 0.048570376037858826, + "epoch": 0.06706315048125831, "grad_norm": 0.0, - "learning_rate": 1.9981936256474727e-05, - "loss": 1.1817, + "learning_rate": 1.9928078234097687e-05, + "loss": 1.3143, "step": 1714 }, { - "epoch": 0.048598713480121285, + "epoch": 0.06710227717348775, "grad_norm": 0.0, - "learning_rate": 1.9981881074576786e-05, - "loss": 1.1575, + "learning_rate": 1.9927926442809943e-05, + "loss": 1.2975, "step": 1715 }, { - "epoch": 0.048627050922383744, + "epoch": 0.0671414038657172, "grad_norm": 0.0, - "learning_rate": 1.9981825808597757e-05, - "loss": 1.1098, + "learning_rate": 1.9927774492092137e-05, + "loss": 1.3589, "step": 1716 }, { - "epoch": 0.04865538836464621, + "epoch": 0.06718053055794664, "grad_norm": 0.0, - "learning_rate": 1.99817704585381e-05, - "loss": 1.1764, + "learning_rate": 1.9927622381946718e-05, + "loss": 1.4301, "step": 1717 }, { - "epoch": 0.04868372580690867, + "epoch": 0.06721965725017608, "grad_norm": 0.0, - "learning_rate": 1.9981715024398286e-05, - "loss": 1.1432, + "learning_rate": 1.9927470112376122e-05, + "loss": 1.0717, "step": 1718 }, { - "epoch": 0.04871206324917113, + "epoch": 0.0672587839424055, "grad_norm": 0.0, - "learning_rate": 1.9981659506178778e-05, - "loss": 1.1265, + "learning_rate": 1.9927317683382795e-05, + "loss": 1.2332, "step": 1719 }, { - "epoch": 0.048740400691433594, + "epoch": 0.06729791063463494, "grad_norm": 0.0, - "learning_rate": 1.9981603903880046e-05, - "loss": 1.1039, + "learning_rate": 1.9927165094969187e-05, + "loss": 1.3154, "step": 1720 }, { - "epoch": 0.04876873813369605, + "epoch": 0.06733703732686439, "grad_norm": 0.0, - "learning_rate": 1.998154821750256e-05, - "loss": 0.9821, + "learning_rate": 1.9927012347137748e-05, + "loss": 1.2991, "step": 1721 }, { - "epoch": 0.04879707557595851, + "epoch": 0.06737616401909383, "grad_norm": 0.0, - "learning_rate": 1.998149244704678e-05, - "loss": 1.151, + "learning_rate": 1.9926859439890927e-05, + "loss": 1.2992, "step": 1722 }, { - "epoch": 0.04882541301822098, + "epoch": 0.06741529071132327, "grad_norm": 0.0, - "learning_rate": 1.9981436592513194e-05, - "loss": 1.0789, + "learning_rate": 1.9926706373231184e-05, + "loss": 1.1696, "step": 1723 }, { - "epoch": 0.04885375046048344, + "epoch": 0.06745441740355271, "grad_norm": 0.0, - "learning_rate": 1.9981380653902253e-05, - "loss": 1.2288, + "learning_rate": 1.9926553147160975e-05, + "loss": 1.3211, "step": 1724 }, { - "epoch": 0.0488820879027459, + "epoch": 0.06749354409578215, "grad_norm": 0.0, - "learning_rate": 1.9981324631214435e-05, - "loss": 1.1392, + "learning_rate": 1.992639976168276e-05, + "loss": 1.2443, "step": 1725 }, { - "epoch": 0.04891042534500836, + "epoch": 0.06753267078801158, "grad_norm": 0.0, - "learning_rate": 1.9981268524450218e-05, - "loss": 1.0779, + "learning_rate": 1.9926246216799003e-05, + "loss": 1.1652, "step": 1726 }, { - "epoch": 0.04893876278727082, + "epoch": 0.06757179748024102, "grad_norm": 0.0, - "learning_rate": 1.9981212333610066e-05, - "loss": 1.1175, + "learning_rate": 1.9926092512512172e-05, + "loss": 1.1624, "step": 1727 }, { - "epoch": 0.04896710022953328, + "epoch": 0.06761092417247046, "grad_norm": 0.0, - "learning_rate": 1.9981156058694458e-05, - "loss": 1.0591, + "learning_rate": 1.9925938648824733e-05, + "loss": 1.2501, "step": 1728 }, { - "epoch": 0.04899543767179575, + "epoch": 0.0676500508646999, "grad_norm": 0.0, - "learning_rate": 1.9981099699703866e-05, - "loss": 1.0387, + "learning_rate": 1.9925784625739157e-05, + "loss": 1.2884, "step": 1729 }, { - "epoch": 0.049023775114058206, + "epoch": 0.06768917755692934, "grad_norm": 0.0, - "learning_rate": 1.9981043256638767e-05, - "loss": 1.0138, + "learning_rate": 1.9925630443257918e-05, + "loss": 1.1824, "step": 1730 }, { - "epoch": 0.049052112556320665, + "epoch": 0.06772830424915878, "grad_norm": 0.0, - "learning_rate": 1.998098672949963e-05, - "loss": 1.0895, + "learning_rate": 1.9925476101383493e-05, + "loss": 1.2595, "step": 1731 }, { - "epoch": 0.04908044999858313, + "epoch": 0.06776743094138822, "grad_norm": 0.0, - "learning_rate": 1.9980930118286937e-05, - "loss": 1.1282, + "learning_rate": 1.992532160011836e-05, + "loss": 1.3229, "step": 1732 }, { - "epoch": 0.04910878744084559, + "epoch": 0.06780655763361765, "grad_norm": 0.0, - "learning_rate": 1.9980873423001162e-05, - "loss": 1.1985, + "learning_rate": 1.992516693946499e-05, + "loss": 1.1683, "step": 1733 }, { - "epoch": 0.04913712488310805, + "epoch": 0.06784568432584709, "grad_norm": 0.0, - "learning_rate": 1.9980816643642787e-05, - "loss": 0.9883, + "learning_rate": 1.9925012119425885e-05, + "loss": 1.4284, "step": 1734 }, { - "epoch": 0.049165462325370515, + "epoch": 0.06788481101807653, "grad_norm": 0.0, - "learning_rate": 1.9980759780212288e-05, - "loss": 1.0941, + "learning_rate": 1.9924857140003523e-05, + "loss": 1.2672, "step": 1735 }, { - "epoch": 0.049193799767632974, + "epoch": 0.06792393771030597, "grad_norm": 0.0, - "learning_rate": 1.9980702832710137e-05, - "loss": 1.1881, + "learning_rate": 1.9924702001200386e-05, + "loss": 1.2678, "step": 1736 }, { - "epoch": 0.049222137209895434, + "epoch": 0.06796306440253541, "grad_norm": 0.0, - "learning_rate": 1.9980645801136826e-05, - "loss": 1.0761, + "learning_rate": 1.9924546703018974e-05, + "loss": 1.3033, "step": 1737 }, { - "epoch": 0.0492504746521579, + "epoch": 0.06800219109476485, "grad_norm": 0.0, - "learning_rate": 1.9980588685492824e-05, - "loss": 1.0646, + "learning_rate": 1.9924391245461777e-05, + "loss": 1.2269, "step": 1738 }, { - "epoch": 0.04927881209442036, + "epoch": 0.0680413177869943, "grad_norm": 0.0, - "learning_rate": 1.9980531485778624e-05, - "loss": 1.1684, + "learning_rate": 1.9924235628531292e-05, + "loss": 1.2873, "step": 1739 }, { - "epoch": 0.04930714953668282, + "epoch": 0.06808044447922372, "grad_norm": 0.0, - "learning_rate": 1.9980474201994693e-05, - "loss": 0.9842, + "learning_rate": 1.992407985223002e-05, + "loss": 1.3174, "step": 1740 }, { - "epoch": 0.049335486978945284, + "epoch": 0.06811957117145316, "grad_norm": 0.0, - "learning_rate": 1.9980416834141526e-05, - "loss": 1.1026, + "learning_rate": 1.992392391656046e-05, + "loss": 1.3036, "step": 1741 }, { - "epoch": 0.04936382442120774, + "epoch": 0.0681586978636826, "grad_norm": 0.0, - "learning_rate": 1.9980359382219603e-05, - "loss": 1.1242, + "learning_rate": 1.992376782152512e-05, + "loss": 1.192, "step": 1742 }, { - "epoch": 0.0493921618634702, + "epoch": 0.06819782455591204, "grad_norm": 0.0, - "learning_rate": 1.9980301846229406e-05, - "loss": 0.9794, + "learning_rate": 1.9923611567126505e-05, + "loss": 1.2625, "step": 1743 }, { - "epoch": 0.04942049930573267, + "epoch": 0.06823695124814148, "grad_norm": 0.0, - "learning_rate": 1.998024422617142e-05, - "loss": 1.1716, + "learning_rate": 1.992345515336712e-05, + "loss": 1.2366, "step": 1744 }, { - "epoch": 0.04944883674799513, + "epoch": 0.06827607794037092, "grad_norm": 0.0, - "learning_rate": 1.998018652204613e-05, - "loss": 1.1234, + "learning_rate": 1.992329858024948e-05, + "loss": 1.4681, "step": 1745 }, { - "epoch": 0.049477174190257586, + "epoch": 0.06831520463260037, "grad_norm": 0.0, - "learning_rate": 1.9980128733854026e-05, - "loss": 1.1419, + "learning_rate": 1.9923141847776098e-05, + "loss": 1.3658, "step": 1746 }, { - "epoch": 0.049505511632520045, + "epoch": 0.06835433132482979, "grad_norm": 0.0, - "learning_rate": 1.9980070861595585e-05, - "loss": 1.1511, + "learning_rate": 1.9922984955949497e-05, + "loss": 1.1591, "step": 1747 }, { - "epoch": 0.04953384907478251, + "epoch": 0.06839345801705923, "grad_norm": 0.0, - "learning_rate": 1.9980012905271305e-05, - "loss": 1.1447, + "learning_rate": 1.9922827904772187e-05, + "loss": 1.2719, "step": 1748 }, { - "epoch": 0.04956218651704497, + "epoch": 0.06843258470928867, "grad_norm": 0.0, - "learning_rate": 1.9979954864881672e-05, - "loss": 1.1773, + "learning_rate": 1.99226706942467e-05, + "loss": 1.2632, "step": 1749 }, { - "epoch": 0.04959052395930743, + "epoch": 0.06847171140151811, "grad_norm": 0.0, - "learning_rate": 1.9979896740427173e-05, - "loss": 1.0789, + "learning_rate": 1.992251332437555e-05, + "loss": 1.2985, "step": 1750 }, { - "epoch": 0.049618861401569896, + "epoch": 0.06851083809374756, "grad_norm": 0.0, - "learning_rate": 1.9979838531908297e-05, - "loss": 1.1811, + "learning_rate": 1.992235579516127e-05, + "loss": 1.2123, "step": 1751 }, { - "epoch": 0.049647198843832355, + "epoch": 0.068549964785977, "grad_norm": 0.0, - "learning_rate": 1.9979780239325534e-05, - "loss": 1.032, + "learning_rate": 1.9922198106606393e-05, + "loss": 1.3221, "step": 1752 }, { - "epoch": 0.049675536286094814, + "epoch": 0.06858909147820644, "grad_norm": 0.0, - "learning_rate": 1.9979721862679376e-05, - "loss": 1.1744, + "learning_rate": 1.9922040258713447e-05, + "loss": 1.2205, "step": 1753 }, { - "epoch": 0.04970387372835728, + "epoch": 0.06862821817043586, "grad_norm": 0.0, - "learning_rate": 1.9979663401970317e-05, - "loss": 1.1508, + "learning_rate": 1.9921882251484967e-05, + "loss": 1.2539, "step": 1754 }, { - "epoch": 0.04973221117061974, + "epoch": 0.0686673448626653, "grad_norm": 0.0, - "learning_rate": 1.9979604857198845e-05, - "loss": 1.0338, + "learning_rate": 1.992172408492349e-05, + "loss": 1.2136, "step": 1755 }, { - "epoch": 0.0497605486128822, + "epoch": 0.06870647155489475, "grad_norm": 0.0, - "learning_rate": 1.9979546228365456e-05, - "loss": 1.1589, + "learning_rate": 1.9921565759031557e-05, + "loss": 1.1508, "step": 1756 }, { - "epoch": 0.049788886055144664, + "epoch": 0.06874559824712419, "grad_norm": 0.0, - "learning_rate": 1.9979487515470647e-05, - "loss": 1.0867, + "learning_rate": 1.992140727381171e-05, + "loss": 1.2848, "step": 1757 }, { - "epoch": 0.04981722349740712, + "epoch": 0.06878472493935363, "grad_norm": 0.0, - "learning_rate": 1.9979428718514905e-05, - "loss": 1.0386, + "learning_rate": 1.9921248629266495e-05, + "loss": 1.3842, "step": 1758 }, { - "epoch": 0.04984556093966958, + "epoch": 0.06882385163158307, "grad_norm": 0.0, - "learning_rate": 1.997936983749873e-05, - "loss": 1.1689, + "learning_rate": 1.992108982539846e-05, + "loss": 1.262, "step": 1759 }, { - "epoch": 0.04987389838193205, + "epoch": 0.06886297832381251, "grad_norm": 0.0, - "learning_rate": 1.9979310872422615e-05, - "loss": 1.125, + "learning_rate": 1.992093086221015e-05, + "loss": 1.278, "step": 1760 }, { - "epoch": 0.04990223582419451, + "epoch": 0.06890210501604195, "grad_norm": 0.0, - "learning_rate": 1.997925182328706e-05, - "loss": 1.0927, + "learning_rate": 1.9920771739704127e-05, + "loss": 1.3209, "step": 1761 }, { - "epoch": 0.049930573266456967, + "epoch": 0.06894123170827138, "grad_norm": 0.0, - "learning_rate": 1.9979192690092563e-05, - "loss": 1.079, + "learning_rate": 1.992061245788294e-05, + "loss": 1.173, "step": 1762 }, { - "epoch": 0.04995891070871943, + "epoch": 0.06898035840050082, "grad_norm": 0.0, - "learning_rate": 1.997913347283962e-05, - "loss": 1.1627, + "learning_rate": 1.9920453016749146e-05, + "loss": 1.2296, "step": 1763 }, { - "epoch": 0.04998724815098189, + "epoch": 0.06901948509273026, "grad_norm": 0.0, - "learning_rate": 1.997907417152873e-05, - "loss": 1.2574, + "learning_rate": 1.992029341630531e-05, + "loss": 1.319, "step": 1764 }, { - "epoch": 0.05001558559324435, + "epoch": 0.0690586117849597, "grad_norm": 0.0, - "learning_rate": 1.997901478616039e-05, - "loss": 1.1503, + "learning_rate": 1.992013365655399e-05, + "loss": 1.1454, "step": 1765 }, { - "epoch": 0.05004392303550682, + "epoch": 0.06909773847718914, "grad_norm": 0.0, - "learning_rate": 1.9978955316735106e-05, - "loss": 1.1519, + "learning_rate": 1.991997373749776e-05, + "loss": 1.1742, "step": 1766 }, { - "epoch": 0.050072260477769276, + "epoch": 0.06913686516941858, "grad_norm": 0.0, - "learning_rate": 1.9978895763253375e-05, - "loss": 1.1204, + "learning_rate": 1.9919813659139177e-05, + "loss": 1.1975, "step": 1767 }, { - "epoch": 0.050100597920031735, + "epoch": 0.06917599186164802, "grad_norm": 0.0, - "learning_rate": 1.99788361257157e-05, - "loss": 1.0301, + "learning_rate": 1.9919653421480816e-05, + "loss": 1.3209, "step": 1768 }, { - "epoch": 0.0501289353622942, + "epoch": 0.06921511855387745, "grad_norm": 0.0, - "learning_rate": 1.997877640412258e-05, - "loss": 1.1121, + "learning_rate": 1.9919493024525255e-05, + "loss": 1.2196, "step": 1769 }, { - "epoch": 0.05015727280455666, + "epoch": 0.06925424524610689, "grad_norm": 0.0, - "learning_rate": 1.9978716598474523e-05, - "loss": 1.0607, + "learning_rate": 1.9919332468275062e-05, + "loss": 1.2317, "step": 1770 }, { - "epoch": 0.05018561024681912, + "epoch": 0.06929337193833633, "grad_norm": 0.0, - "learning_rate": 1.9978656708772032e-05, - "loss": 1.0575, + "learning_rate": 1.991917175273282e-05, + "loss": 1.3445, "step": 1771 }, { - "epoch": 0.050213947689081585, + "epoch": 0.06933249863056577, "grad_norm": 0.0, - "learning_rate": 1.9978596735015606e-05, - "loss": 1.1725, + "learning_rate": 1.991901087790111e-05, + "loss": 1.3221, "step": 1772 }, { - "epoch": 0.050242285131344044, + "epoch": 0.06937162532279521, "grad_norm": 0.0, - "learning_rate": 1.9978536677205756e-05, - "loss": 1.1834, + "learning_rate": 1.9918849843782513e-05, + "loss": 1.2411, "step": 1773 }, { - "epoch": 0.050270622573606503, + "epoch": 0.06941075201502465, "grad_norm": 0.0, - "learning_rate": 1.9978476535342986e-05, - "loss": 1.1149, + "learning_rate": 1.9918688650379622e-05, + "loss": 1.3253, "step": 1774 }, { - "epoch": 0.05029896001586897, + "epoch": 0.0694498787072541, "grad_norm": 0.0, - "learning_rate": 1.9978416309427806e-05, - "loss": 1.1801, + "learning_rate": 1.9918527297695014e-05, + "loss": 1.2332, "step": 1775 }, { - "epoch": 0.05032729745813143, + "epoch": 0.06948900539948352, "grad_norm": 0.0, - "learning_rate": 1.9978355999460716e-05, - "loss": 1.1351, + "learning_rate": 1.991836578573129e-05, + "loss": 1.1119, "step": 1776 }, { - "epoch": 0.05035563490039389, + "epoch": 0.06952813209171296, "grad_norm": 0.0, - "learning_rate": 1.997829560544223e-05, - "loss": 1.1759, + "learning_rate": 1.9918204114491034e-05, + "loss": 1.2925, "step": 1777 }, { - "epoch": 0.050383972342656354, + "epoch": 0.0695672587839424, "grad_norm": 0.0, - "learning_rate": 1.9978235127372854e-05, - "loss": 1.1715, + "learning_rate": 1.9918042283976855e-05, + "loss": 1.3717, "step": 1778 }, { - "epoch": 0.05041230978491881, + "epoch": 0.06960638547617184, "grad_norm": 0.0, - "learning_rate": 1.9978174565253096e-05, - "loss": 1.157, + "learning_rate": 1.991788029419134e-05, + "loss": 1.2773, "step": 1779 }, { - "epoch": 0.05044064722718127, + "epoch": 0.06964551216840129, "grad_norm": 0.0, - "learning_rate": 1.9978113919083474e-05, - "loss": 1.2176, + "learning_rate": 1.99177181451371e-05, + "loss": 1.2186, "step": 1780 }, { - "epoch": 0.05046898466944374, + "epoch": 0.06968463886063073, "grad_norm": 0.0, - "learning_rate": 1.997805318886449e-05, - "loss": 1.1417, + "learning_rate": 1.991755583681673e-05, + "loss": 1.2682, "step": 1781 }, { - "epoch": 0.0504973221117062, + "epoch": 0.06972376555286017, "grad_norm": 0.0, - "learning_rate": 1.9977992374596657e-05, - "loss": 1.1234, + "learning_rate": 1.9917393369232843e-05, + "loss": 1.3552, "step": 1782 }, { - "epoch": 0.050525659553968656, + "epoch": 0.0697628922450896, "grad_norm": 0.0, - "learning_rate": 1.9977931476280492e-05, - "loss": 1.1135, + "learning_rate": 1.9917230742388046e-05, + "loss": 1.2062, "step": 1783 }, { - "epoch": 0.05055399699623112, + "epoch": 0.06980201893731904, "grad_norm": 0.0, - "learning_rate": 1.99778704939165e-05, - "loss": 1.1416, + "learning_rate": 1.9917067956284947e-05, + "loss": 1.3907, "step": 1784 }, { - "epoch": 0.05058233443849358, + "epoch": 0.06984114562954848, "grad_norm": 0.0, - "learning_rate": 1.9977809427505204e-05, - "loss": 1.2223, + "learning_rate": 1.9916905010926165e-05, + "loss": 1.292, "step": 1785 }, { - "epoch": 0.05061067188075604, + "epoch": 0.06988027232177792, "grad_norm": 0.0, - "learning_rate": 1.9977748277047114e-05, - "loss": 1.217, + "learning_rate": 1.9916741906314317e-05, + "loss": 1.1696, "step": 1786 }, { - "epoch": 0.050639009323018506, + "epoch": 0.06991939901400736, "grad_norm": 0.0, - "learning_rate": 1.9977687042542743e-05, - "loss": 1.0527, + "learning_rate": 1.991657864245202e-05, + "loss": 1.2238, "step": 1787 }, { - "epoch": 0.050667346765280966, + "epoch": 0.0699585257062368, "grad_norm": 0.0, - "learning_rate": 1.9977625723992614e-05, - "loss": 1.1157, + "learning_rate": 1.9916415219341895e-05, + "loss": 1.2587, "step": 1788 }, { - "epoch": 0.050695684207543425, + "epoch": 0.06999765239846624, "grad_norm": 0.0, - "learning_rate": 1.9977564321397234e-05, - "loss": 1.0002, + "learning_rate": 1.9916251636986568e-05, + "loss": 1.146, "step": 1789 }, { - "epoch": 0.05072402164980589, + "epoch": 0.07003677909069567, "grad_norm": 0.0, - "learning_rate": 1.9977502834757124e-05, - "loss": 1.2269, + "learning_rate": 1.9916087895388664e-05, + "loss": 1.2631, "step": 1790 }, { - "epoch": 0.05075235909206835, + "epoch": 0.07007590578292511, "grad_norm": 0.0, - "learning_rate": 1.9977441264072803e-05, - "loss": 1.1353, + "learning_rate": 1.9915923994550816e-05, + "loss": 1.2489, "step": 1791 }, { - "epoch": 0.05078069653433081, + "epoch": 0.07011503247515455, "grad_norm": 0.0, - "learning_rate": 1.997737960934479e-05, - "loss": 1.1912, + "learning_rate": 1.9915759934475653e-05, + "loss": 1.4487, "step": 1792 }, { - "epoch": 0.050809033976593275, + "epoch": 0.07015415916738399, "grad_norm": 0.0, - "learning_rate": 1.9977317870573605e-05, - "loss": 1.2416, + "learning_rate": 1.991559571516581e-05, + "loss": 1.2693, "step": 1793 }, { - "epoch": 0.050837371418855734, + "epoch": 0.07019328585961343, "grad_norm": 0.0, - "learning_rate": 1.9977256047759765e-05, - "loss": 1.0607, + "learning_rate": 1.9915431336623928e-05, + "loss": 1.2261, "step": 1794 }, { - "epoch": 0.05086570886111819, + "epoch": 0.07023241255184287, "grad_norm": 0.0, - "learning_rate": 1.997719414090379e-05, - "loss": 1.176, + "learning_rate": 1.9915266798852642e-05, + "loss": 1.3287, "step": 1795 }, { - "epoch": 0.05089404630338066, + "epoch": 0.07027153924407231, "grad_norm": 0.0, - "learning_rate": 1.997713215000621e-05, - "loss": 1.0828, + "learning_rate": 1.9915102101854594e-05, + "loss": 1.2385, "step": 1796 }, { - "epoch": 0.05092238374564312, + "epoch": 0.07031066593630174, "grad_norm": 0.0, - "learning_rate": 1.9977070075067536e-05, - "loss": 1.1515, + "learning_rate": 1.9914937245632432e-05, + "loss": 1.3311, "step": 1797 }, { - "epoch": 0.05095072118790558, + "epoch": 0.07034979262853118, "grad_norm": 0.0, - "learning_rate": 1.99770079160883e-05, - "loss": 1.1667, + "learning_rate": 1.9914772230188797e-05, + "loss": 1.1758, "step": 1798 }, { - "epoch": 0.05097905863016804, + "epoch": 0.07038891932076062, "grad_norm": 0.0, - "learning_rate": 1.9976945673069017e-05, - "loss": 1.0558, + "learning_rate": 1.991460705552635e-05, + "loss": 1.2943, "step": 1799 }, { - "epoch": 0.0510073960724305, + "epoch": 0.07042804601299006, "grad_norm": 0.0, - "learning_rate": 1.997688334601022e-05, - "loss": 1.0997, + "learning_rate": 1.9914441721647737e-05, + "loss": 1.2666, "step": 1800 }, { - "epoch": 0.05103573351469296, + "epoch": 0.0704671727052195, "grad_norm": 0.0, - "learning_rate": 1.9976820934912425e-05, - "loss": 1.2069, + "learning_rate": 1.9914276228555613e-05, + "loss": 1.374, "step": 1801 }, { - "epoch": 0.05106407095695543, + "epoch": 0.07050629939744894, "grad_norm": 0.0, - "learning_rate": 1.9976758439776166e-05, - "loss": 1.1555, + "learning_rate": 1.991411057625263e-05, + "loss": 1.2898, "step": 1802 }, { - "epoch": 0.05109240839921789, + "epoch": 0.07054542608967838, "grad_norm": 0.0, - "learning_rate": 1.9976695860601962e-05, - "loss": 1.1417, + "learning_rate": 1.9913944764741463e-05, + "loss": 1.3965, "step": 1803 }, { - "epoch": 0.051120745841480346, + "epoch": 0.07058455278190781, "grad_norm": 0.0, - "learning_rate": 1.9976633197390347e-05, - "loss": 1.0876, + "learning_rate": 1.9913778794024764e-05, + "loss": 1.2716, "step": 1804 }, { - "epoch": 0.05114908328374281, + "epoch": 0.07062367947413725, "grad_norm": 0.0, - "learning_rate": 1.9976570450141845e-05, - "loss": 1.0375, + "learning_rate": 1.9913612664105196e-05, + "loss": 1.1607, "step": 1805 }, { - "epoch": 0.05117742072600527, + "epoch": 0.07066280616636669, "grad_norm": 0.0, - "learning_rate": 1.9976507618856986e-05, - "loss": 1.1278, + "learning_rate": 1.9913446374985434e-05, + "loss": 1.1209, "step": 1806 }, { - "epoch": 0.05120575816826773, + "epoch": 0.07070193285859613, "grad_norm": 0.0, - "learning_rate": 1.9976444703536297e-05, - "loss": 1.1155, + "learning_rate": 1.9913279926668146e-05, + "loss": 1.2715, "step": 1807 }, { - "epoch": 0.051234095610530196, + "epoch": 0.07074105955082557, "grad_norm": 0.0, - "learning_rate": 1.997638170418031e-05, - "loss": 1.091, + "learning_rate": 1.9913113319156e-05, + "loss": 1.2874, "step": 1808 }, { - "epoch": 0.051262433052792655, + "epoch": 0.07078018624305502, "grad_norm": 0.0, - "learning_rate": 1.997631862078956e-05, - "loss": 1.103, + "learning_rate": 1.9912946552451683e-05, + "loss": 1.276, "step": 1809 }, { - "epoch": 0.051290770495055114, + "epoch": 0.07081931293528446, "grad_norm": 0.0, - "learning_rate": 1.9976255453364567e-05, - "loss": 1.1281, + "learning_rate": 1.991277962655786e-05, + "loss": 1.4103, "step": 1810 }, { - "epoch": 0.05131910793731758, + "epoch": 0.07085843962751388, "grad_norm": 0.0, - "learning_rate": 1.9976192201905877e-05, - "loss": 1.0823, + "learning_rate": 1.9912612541477222e-05, + "loss": 1.3237, "step": 1811 }, { - "epoch": 0.05134744537958004, + "epoch": 0.07089756631974332, "grad_norm": 0.0, - "learning_rate": 1.997612886641401e-05, - "loss": 1.0674, + "learning_rate": 1.9912445297212442e-05, + "loss": 1.2756, "step": 1812 }, { - "epoch": 0.0513757828218425, + "epoch": 0.07093669301197277, "grad_norm": 0.0, - "learning_rate": 1.9976065446889505e-05, - "loss": 1.2554, + "learning_rate": 1.9912277893766218e-05, + "loss": 1.2252, "step": 1813 }, { - "epoch": 0.051404120264104965, + "epoch": 0.0709758197042022, "grad_norm": 0.0, - "learning_rate": 1.9976001943332898e-05, - "loss": 1.1699, + "learning_rate": 1.991211033114123e-05, + "loss": 1.4095, "step": 1814 }, { - "epoch": 0.051432457706367424, + "epoch": 0.07101494639643165, "grad_norm": 0.0, - "learning_rate": 1.997593835574472e-05, - "loss": 1.14, + "learning_rate": 1.991194260934017e-05, + "loss": 1.2072, "step": 1815 }, { - "epoch": 0.05146079514862988, + "epoch": 0.07105407308866109, "grad_norm": 0.0, - "learning_rate": 1.997587468412551e-05, - "loss": 1.1084, + "learning_rate": 1.9911774728365732e-05, + "loss": 1.3199, "step": 1816 }, { - "epoch": 0.05148913259089235, + "epoch": 0.07109319978089053, "grad_norm": 0.0, - "learning_rate": 1.9975810928475806e-05, - "loss": 1.0619, + "learning_rate": 1.991160668822061e-05, + "loss": 1.3018, "step": 1817 }, { - "epoch": 0.05151747003315481, + "epoch": 0.07113232647311996, "grad_norm": 0.0, - "learning_rate": 1.997574708879614e-05, - "loss": 1.1281, + "learning_rate": 1.9911438488907506e-05, + "loss": 1.1051, "step": 1818 }, { - "epoch": 0.05154580747541727, + "epoch": 0.0711714531653494, "grad_norm": 0.0, - "learning_rate": 1.997568316508705e-05, - "loss": 1.1306, + "learning_rate": 1.991127013042912e-05, + "loss": 1.4069, "step": 1819 }, { - "epoch": 0.05157414491767973, + "epoch": 0.07121057985757884, "grad_norm": 0.0, - "learning_rate": 1.9975619157349076e-05, - "loss": 1.1702, + "learning_rate": 1.9911101612788157e-05, + "loss": 1.2693, "step": 1820 }, { - "epoch": 0.05160248235994219, + "epoch": 0.07124970654980828, "grad_norm": 0.0, - "learning_rate": 1.9975555065582762e-05, - "loss": 1.1324, + "learning_rate": 1.991093293598732e-05, + "loss": 1.3079, "step": 1821 }, { - "epoch": 0.05163081980220465, + "epoch": 0.07128883324203772, "grad_norm": 0.0, - "learning_rate": 1.9975490889788638e-05, - "loss": 1.2453, + "learning_rate": 1.9910764100029316e-05, + "loss": 1.2044, "step": 1822 }, { - "epoch": 0.05165915724446712, + "epoch": 0.07132795993426716, "grad_norm": 0.0, - "learning_rate": 1.9975426629967252e-05, - "loss": 1.0948, + "learning_rate": 1.9910595104916864e-05, + "loss": 1.2864, "step": 1823 }, { - "epoch": 0.051687494686729576, + "epoch": 0.0713670866264966, "grad_norm": 0.0, - "learning_rate": 1.9975362286119145e-05, - "loss": 1.2132, + "learning_rate": 1.991042595065267e-05, + "loss": 1.275, "step": 1824 }, { - "epoch": 0.051715832128992036, + "epoch": 0.07140621331872604, "grad_norm": 0.0, - "learning_rate": 1.9975297858244858e-05, - "loss": 1.0815, + "learning_rate": 1.9910256637239455e-05, + "loss": 1.1634, "step": 1825 }, { - "epoch": 0.0517441695712545, + "epoch": 0.07144534001095547, "grad_norm": 0.0, - "learning_rate": 1.997523334634493e-05, - "loss": 1.0534, + "learning_rate": 1.9910087164679938e-05, + "loss": 1.2861, "step": 1826 }, { - "epoch": 0.05177250701351696, + "epoch": 0.07148446670318491, "grad_norm": 0.0, - "learning_rate": 1.9975168750419906e-05, - "loss": 1.2018, + "learning_rate": 1.9909917532976838e-05, + "loss": 1.2358, "step": 1827 }, { - "epoch": 0.05180084445577942, + "epoch": 0.07152359339541435, "grad_norm": 0.0, - "learning_rate": 1.9975104070470335e-05, - "loss": 1.1486, + "learning_rate": 1.990974774213288e-05, + "loss": 1.1472, "step": 1828 }, { - "epoch": 0.051829181898041886, + "epoch": 0.07156272008764379, "grad_norm": 0.0, - "learning_rate": 1.997503930649676e-05, - "loss": 1.1404, + "learning_rate": 1.990957779215079e-05, + "loss": 1.2343, "step": 1829 }, { - "epoch": 0.051857519340304345, + "epoch": 0.07160184677987323, "grad_norm": 0.0, - "learning_rate": 1.9974974458499717e-05, - "loss": 1.1775, + "learning_rate": 1.9909407683033296e-05, + "loss": 1.3584, "step": 1830 }, { - "epoch": 0.051885856782566804, + "epoch": 0.07164097347210267, "grad_norm": 0.0, - "learning_rate": 1.9974909526479768e-05, - "loss": 1.0654, + "learning_rate": 1.9909237414783137e-05, + "loss": 1.2531, "step": 1831 }, { - "epoch": 0.05191419422482927, + "epoch": 0.07168010016433211, "grad_norm": 0.0, - "learning_rate": 1.9974844510437444e-05, - "loss": 1.1645, + "learning_rate": 1.990906698740304e-05, + "loss": 1.3658, "step": 1832 }, { - "epoch": 0.05194253166709173, + "epoch": 0.07171922685656154, "grad_norm": 0.0, - "learning_rate": 1.9974779410373307e-05, - "loss": 1.2009, + "learning_rate": 1.9908896400895745e-05, + "loss": 1.3233, "step": 1833 }, { - "epoch": 0.05197086910935419, + "epoch": 0.07175835354879098, "grad_norm": 0.0, - "learning_rate": 1.9974714226287896e-05, - "loss": 1.1679, + "learning_rate": 1.9908725655263986e-05, + "loss": 1.221, "step": 1834 }, { - "epoch": 0.051999206551616654, + "epoch": 0.07179748024102042, "grad_norm": 0.0, - "learning_rate": 1.9974648958181767e-05, - "loss": 1.1725, + "learning_rate": 1.9908554750510513e-05, + "loss": 1.1685, "step": 1835 }, { - "epoch": 0.05202754399387911, + "epoch": 0.07183660693324986, "grad_norm": 0.0, - "learning_rate": 1.997458360605546e-05, - "loss": 0.9829, + "learning_rate": 1.9908383686638068e-05, + "loss": 1.3413, "step": 1836 }, { - "epoch": 0.05205588143614157, + "epoch": 0.0718757336254793, "grad_norm": 0.0, - "learning_rate": 1.9974518169909536e-05, - "loss": 1.1014, + "learning_rate": 1.9908212463649396e-05, + "loss": 1.2809, "step": 1837 }, { - "epoch": 0.05208421887840404, + "epoch": 0.07191486031770875, "grad_norm": 0.0, - "learning_rate": 1.997445264974454e-05, - "loss": 1.1302, + "learning_rate": 1.990804108154725e-05, + "loss": 1.4349, "step": 1838 }, { - "epoch": 0.0521125563206665, + "epoch": 0.07195398700993819, "grad_norm": 0.0, - "learning_rate": 1.9974387045561022e-05, - "loss": 1.0835, + "learning_rate": 1.9907869540334374e-05, + "loss": 1.2101, "step": 1839 }, { - "epoch": 0.05214089376292896, + "epoch": 0.07199311370216761, "grad_norm": 0.0, - "learning_rate": 1.9974321357359545e-05, - "loss": 1.152, + "learning_rate": 1.9907697840013532e-05, + "loss": 1.3693, "step": 1840 }, { - "epoch": 0.05216923120519142, + "epoch": 0.07203224039439705, "grad_norm": 0.0, - "learning_rate": 1.9974255585140653e-05, - "loss": 1.1074, + "learning_rate": 1.9907525980587475e-05, + "loss": 1.2379, "step": 1841 }, { - "epoch": 0.05219756864745388, + "epoch": 0.0720713670866265, "grad_norm": 0.0, - "learning_rate": 1.9974189728904898e-05, - "loss": 1.1082, + "learning_rate": 1.9907353962058968e-05, + "loss": 1.1862, "step": 1842 }, { - "epoch": 0.05222590608971634, + "epoch": 0.07211049377885594, "grad_norm": 0.0, - "learning_rate": 1.9974123788652843e-05, - "loss": 0.9775, + "learning_rate": 1.9907181784430768e-05, + "loss": 1.2073, "step": 1843 }, { - "epoch": 0.05225424353197881, + "epoch": 0.07214962047108538, "grad_norm": 0.0, - "learning_rate": 1.997405776438504e-05, - "loss": 1.2061, + "learning_rate": 1.9907009447705646e-05, + "loss": 1.2107, "step": 1844 }, { - "epoch": 0.052282580974241266, + "epoch": 0.07218874716331482, "grad_norm": 0.0, - "learning_rate": 1.9973991656102042e-05, - "loss": 1.2722, + "learning_rate": 1.9906836951886365e-05, + "loss": 1.1639, "step": 1845 }, { - "epoch": 0.052310918416503725, + "epoch": 0.07222787385554426, "grad_norm": 0.0, - "learning_rate": 1.997392546380441e-05, - "loss": 1.0663, + "learning_rate": 1.9906664296975696e-05, + "loss": 1.1622, "step": 1846 }, { - "epoch": 0.052339255858766184, + "epoch": 0.07226700054777369, "grad_norm": 0.0, - "learning_rate": 1.9973859187492698e-05, - "loss": 1.1721, + "learning_rate": 1.9906491482976413e-05, + "loss": 1.4441, "step": 1847 }, { - "epoch": 0.05236759330102865, + "epoch": 0.07230612724000313, "grad_norm": 0.0, - "learning_rate": 1.997379282716747e-05, - "loss": 1.1248, + "learning_rate": 1.9906318509891292e-05, + "loss": 1.1623, "step": 1848 }, { - "epoch": 0.05239593074329111, + "epoch": 0.07234525393223257, "grad_norm": 0.0, - "learning_rate": 1.997372638282928e-05, - "loss": 1.0971, + "learning_rate": 1.9906145377723107e-05, + "loss": 1.2059, "step": 1849 }, { - "epoch": 0.05242426818555357, + "epoch": 0.07238438062446201, "grad_norm": 0.0, - "learning_rate": 1.9973659854478685e-05, - "loss": 1.1805, + "learning_rate": 1.990597208647464e-05, + "loss": 1.1954, "step": 1850 }, { - "epoch": 0.052452605627816035, + "epoch": 0.07242350731669145, "grad_norm": 0.0, - "learning_rate": 1.9973593242116256e-05, - "loss": 1.1869, + "learning_rate": 1.9905798636148675e-05, + "loss": 1.2042, "step": 1851 }, { - "epoch": 0.052480943070078494, + "epoch": 0.07246263400892089, "grad_norm": 0.0, - "learning_rate": 1.9973526545742544e-05, - "loss": 1.278, + "learning_rate": 1.9905625026748e-05, + "loss": 1.2872, "step": 1852 }, { - "epoch": 0.05250928051234095, + "epoch": 0.07250176070115033, "grad_norm": 0.0, - "learning_rate": 1.9973459765358116e-05, - "loss": 1.1877, + "learning_rate": 1.9905451258275396e-05, + "loss": 1.2136, "step": 1853 }, { - "epoch": 0.05253761795460342, + "epoch": 0.07254088739337976, "grad_norm": 0.0, - "learning_rate": 1.997339290096353e-05, - "loss": 1.1408, + "learning_rate": 1.9905277330733655e-05, + "loss": 1.3086, "step": 1854 }, { - "epoch": 0.05256595539686588, + "epoch": 0.0725800140856092, "grad_norm": 0.0, - "learning_rate": 1.9973325952559353e-05, - "loss": 1.0456, + "learning_rate": 1.9905103244125573e-05, + "loss": 1.2618, "step": 1855 }, { - "epoch": 0.05259429283912834, + "epoch": 0.07261914077783864, "grad_norm": 0.0, - "learning_rate": 1.997325892014615e-05, - "loss": 1.1369, + "learning_rate": 1.9904928998453947e-05, + "loss": 1.1924, "step": 1856 }, { - "epoch": 0.0526226302813908, + "epoch": 0.07265826747006808, "grad_norm": 0.0, - "learning_rate": 1.9973191803724484e-05, - "loss": 1.0458, + "learning_rate": 1.9904754593721575e-05, + "loss": 1.2982, "step": 1857 }, { - "epoch": 0.05265096772365326, + "epoch": 0.07269739416229752, "grad_norm": 0.0, - "learning_rate": 1.9973124603294916e-05, - "loss": 1.1751, + "learning_rate": 1.990458002993125e-05, + "loss": 1.2673, "step": 1858 }, { - "epoch": 0.05267930516591572, + "epoch": 0.07273652085452696, "grad_norm": 0.0, - "learning_rate": 1.997305731885802e-05, - "loss": 1.2021, + "learning_rate": 1.990440530708578e-05, + "loss": 1.2605, "step": 1859 }, { - "epoch": 0.05270764260817819, + "epoch": 0.0727756475467564, "grad_norm": 0.0, - "learning_rate": 1.9972989950414355e-05, - "loss": 1.1383, + "learning_rate": 1.9904230425187978e-05, + "loss": 1.2355, "step": 1860 }, { - "epoch": 0.052735980050440646, + "epoch": 0.07281477423898583, "grad_norm": 0.0, - "learning_rate": 1.9972922497964497e-05, - "loss": 1.1292, + "learning_rate": 1.9904055384240642e-05, + "loss": 1.3147, "step": 1861 }, { - "epoch": 0.052764317492703106, + "epoch": 0.07285390093121527, "grad_norm": 0.0, - "learning_rate": 1.9972854961509007e-05, - "loss": 1.1114, + "learning_rate": 1.990388018424659e-05, + "loss": 1.2831, "step": 1862 }, { - "epoch": 0.05279265493496557, + "epoch": 0.07289302762344471, "grad_norm": 0.0, - "learning_rate": 1.9972787341048456e-05, - "loss": 1.1264, + "learning_rate": 1.990370482520863e-05, + "loss": 1.215, "step": 1863 }, { - "epoch": 0.05282099237722803, + "epoch": 0.07293215431567415, "grad_norm": 0.0, - "learning_rate": 1.997271963658341e-05, - "loss": 1.0153, + "learning_rate": 1.9903529307129582e-05, + "loss": 1.2944, "step": 1864 }, { - "epoch": 0.05284932981949049, + "epoch": 0.0729712810079036, "grad_norm": 0.0, - "learning_rate": 1.997265184811445e-05, - "loss": 1.1483, + "learning_rate": 1.9903353630012262e-05, + "loss": 1.3025, "step": 1865 }, { - "epoch": 0.052877667261752956, + "epoch": 0.07301040770013303, "grad_norm": 0.0, - "learning_rate": 1.997258397564214e-05, - "loss": 1.0812, + "learning_rate": 1.9903177793859488e-05, + "loss": 1.3029, "step": 1866 }, { - "epoch": 0.052906004704015415, + "epoch": 0.07304953439236248, "grad_norm": 0.0, - "learning_rate": 1.997251601916705e-05, - "loss": 1.245, + "learning_rate": 1.9903001798674097e-05, + "loss": 1.2471, "step": 1867 }, { - "epoch": 0.052934342146277874, + "epoch": 0.0730886610845919, "grad_norm": 0.0, - "learning_rate": 1.9972447978689752e-05, - "loss": 1.0658, + "learning_rate": 1.99028256444589e-05, + "loss": 1.2047, "step": 1868 }, { - "epoch": 0.05296267958854034, + "epoch": 0.07312778777682134, "grad_norm": 0.0, - "learning_rate": 1.9972379854210824e-05, - "loss": 1.2059, + "learning_rate": 1.9902649331216732e-05, + "loss": 1.1525, "step": 1869 }, { - "epoch": 0.0529910170308028, + "epoch": 0.07316691446905078, "grad_norm": 0.0, - "learning_rate": 1.9972311645730836e-05, - "loss": 1.2262, + "learning_rate": 1.9902472858950428e-05, + "loss": 1.2337, "step": 1870 }, { - "epoch": 0.05301935447306526, + "epoch": 0.07320604116128022, "grad_norm": 0.0, - "learning_rate": 1.9972243353250363e-05, - "loss": 0.9525, + "learning_rate": 1.9902296227662815e-05, + "loss": 1.1521, "step": 1871 }, { - "epoch": 0.053047691915327724, + "epoch": 0.07324516785350967, "grad_norm": 0.0, - "learning_rate": 1.9972174976769986e-05, - "loss": 1.1363, + "learning_rate": 1.9902119437356737e-05, + "loss": 1.2357, "step": 1872 }, { - "epoch": 0.05307602935759018, + "epoch": 0.0732842945457391, "grad_norm": 0.0, - "learning_rate": 1.9972106516290272e-05, - "loss": 1.0641, + "learning_rate": 1.9901942488035026e-05, + "loss": 1.2852, "step": 1873 }, { - "epoch": 0.05310436679985264, + "epoch": 0.07332342123796855, "grad_norm": 0.0, - "learning_rate": 1.9972037971811802e-05, - "loss": 1.1533, + "learning_rate": 1.9901765379700527e-05, + "loss": 1.2617, "step": 1874 }, { - "epoch": 0.05313270424211511, + "epoch": 0.07336254793019797, "grad_norm": 0.0, - "learning_rate": 1.9971969343335152e-05, - "loss": 1.0649, + "learning_rate": 1.9901588112356084e-05, + "loss": 1.2689, "step": 1875 }, { - "epoch": 0.05316104168437757, + "epoch": 0.07340167462242742, "grad_norm": 0.0, - "learning_rate": 1.9971900630860904e-05, - "loss": 1.0726, + "learning_rate": 1.990141068600454e-05, + "loss": 1.2642, "step": 1876 }, { - "epoch": 0.05318937912664003, + "epoch": 0.07344080131465686, "grad_norm": 0.0, - "learning_rate": 1.9971831834389634e-05, - "loss": 1.1701, + "learning_rate": 1.990123310064875e-05, + "loss": 1.2461, "step": 1877 }, { - "epoch": 0.05321771656890249, + "epoch": 0.0734799280068863, "grad_norm": 0.0, - "learning_rate": 1.9971762953921922e-05, - "loss": 1.2333, + "learning_rate": 1.9901055356291567e-05, + "loss": 1.2837, "step": 1878 }, { - "epoch": 0.05324605401116495, + "epoch": 0.07351905469911574, "grad_norm": 0.0, - "learning_rate": 1.9971693989458347e-05, - "loss": 1.3014, + "learning_rate": 1.9900877452935837e-05, + "loss": 1.3474, "step": 1879 }, { - "epoch": 0.05327439145342741, + "epoch": 0.07355818139134518, "grad_norm": 0.0, - "learning_rate": 1.997162494099949e-05, - "loss": 1.0148, + "learning_rate": 1.9900699390584424e-05, + "loss": 1.2607, "step": 1880 }, { - "epoch": 0.05330272889568988, + "epoch": 0.07359730808357462, "grad_norm": 0.0, - "learning_rate": 1.9971555808545932e-05, - "loss": 1.1745, + "learning_rate": 1.9900521169240182e-05, + "loss": 1.1183, "step": 1881 }, { - "epoch": 0.053331066337952336, + "epoch": 0.07363643477580405, "grad_norm": 0.0, - "learning_rate": 1.9971486592098258e-05, - "loss": 1.1155, + "learning_rate": 1.990034278890598e-05, + "loss": 1.1786, "step": 1882 }, { - "epoch": 0.053359403780214795, + "epoch": 0.07367556146803349, "grad_norm": 0.0, - "learning_rate": 1.997141729165705e-05, - "loss": 1.0367, + "learning_rate": 1.9900164249584676e-05, + "loss": 1.2324, "step": 1883 }, { - "epoch": 0.05338774122247726, + "epoch": 0.07371468816026293, "grad_norm": 0.0, - "learning_rate": 1.997134790722289e-05, - "loss": 1.2091, + "learning_rate": 1.989998555127914e-05, + "loss": 1.2347, "step": 1884 }, { - "epoch": 0.05341607866473972, + "epoch": 0.07375381485249237, "grad_norm": 0.0, - "learning_rate": 1.9971278438796365e-05, - "loss": 1.042, + "learning_rate": 1.9899806693992242e-05, + "loss": 1.1398, "step": 1885 }, { - "epoch": 0.05344441610700218, + "epoch": 0.07379294154472181, "grad_norm": 0.0, - "learning_rate": 1.9971208886378056e-05, - "loss": 1.1306, + "learning_rate": 1.9899627677726855e-05, + "loss": 1.2064, "step": 1886 }, { - "epoch": 0.053472753549264646, + "epoch": 0.07383206823695125, "grad_norm": 0.0, - "learning_rate": 1.9971139249968556e-05, - "loss": 1.0661, + "learning_rate": 1.989944850248585e-05, + "loss": 1.2109, "step": 1887 }, { - "epoch": 0.053501090991527105, + "epoch": 0.07387119492918069, "grad_norm": 0.0, - "learning_rate": 1.9971069529568446e-05, - "loss": 1.1177, + "learning_rate": 1.9899269168272107e-05, + "loss": 1.1867, "step": 1888 }, { - "epoch": 0.053529428433789564, + "epoch": 0.07391032162141013, "grad_norm": 0.0, - "learning_rate": 1.9970999725178313e-05, - "loss": 1.125, + "learning_rate": 1.9899089675088505e-05, + "loss": 1.2373, "step": 1889 }, { - "epoch": 0.05355776587605203, + "epoch": 0.07394944831363956, "grad_norm": 0.0, - "learning_rate": 1.9970929836798748e-05, - "loss": 1.0324, + "learning_rate": 1.989891002293793e-05, + "loss": 1.2576, "step": 1890 }, { - "epoch": 0.05358610331831449, + "epoch": 0.073988575005869, "grad_norm": 0.0, - "learning_rate": 1.997085986443034e-05, - "loss": 1.1618, + "learning_rate": 1.989873021182326e-05, + "loss": 1.1708, "step": 1891 }, { - "epoch": 0.05361444076057695, + "epoch": 0.07402770169809844, "grad_norm": 0.0, - "learning_rate": 1.9970789808073676e-05, - "loss": 1.2082, + "learning_rate": 1.989855024174739e-05, + "loss": 1.278, "step": 1892 }, { - "epoch": 0.053642778202839414, + "epoch": 0.07406682839032788, "grad_norm": 0.0, - "learning_rate": 1.9970719667729344e-05, - "loss": 1.1814, + "learning_rate": 1.9898370112713204e-05, + "loss": 1.3256, "step": 1893 }, { - "epoch": 0.05367111564510187, + "epoch": 0.07410595508255732, "grad_norm": 0.0, - "learning_rate": 1.9970649443397942e-05, - "loss": 1.0203, + "learning_rate": 1.9898189824723602e-05, + "loss": 1.3955, "step": 1894 }, { - "epoch": 0.05369945308736433, + "epoch": 0.07414508177478676, "grad_norm": 0.0, - "learning_rate": 1.997057913508005e-05, - "loss": 1.1511, + "learning_rate": 1.989800937778147e-05, + "loss": 1.1417, "step": 1895 }, { - "epoch": 0.0537277905296268, + "epoch": 0.0741842084670162, "grad_norm": 0.0, - "learning_rate": 1.9970508742776276e-05, - "loss": 1.0481, + "learning_rate": 1.9897828771889715e-05, + "loss": 1.2388, "step": 1896 }, { - "epoch": 0.05375612797188926, + "epoch": 0.07422333515924563, "grad_norm": 0.0, - "learning_rate": 1.99704382664872e-05, - "loss": 1.0831, + "learning_rate": 1.989764800705123e-05, + "loss": 1.3481, "step": 1897 }, { - "epoch": 0.053784465414151716, + "epoch": 0.07426246185147507, "grad_norm": 0.0, - "learning_rate": 1.9970367706213422e-05, - "loss": 1.1632, + "learning_rate": 1.989746708326892e-05, + "loss": 1.2322, "step": 1898 }, { - "epoch": 0.05381280285641418, + "epoch": 0.07430158854370451, "grad_norm": 0.0, - "learning_rate": 1.9970297061955533e-05, - "loss": 1.1097, + "learning_rate": 1.9897286000545688e-05, + "loss": 1.1375, "step": 1899 }, { - "epoch": 0.05384114029867664, + "epoch": 0.07434071523593395, "grad_norm": 0.0, - "learning_rate": 1.997022633371413e-05, - "loss": 1.2398, + "learning_rate": 1.9897104758884448e-05, + "loss": 1.3627, "step": 1900 }, { - "epoch": 0.0538694777409391, + "epoch": 0.0743798419281634, "grad_norm": 0.0, - "learning_rate": 1.9970155521489808e-05, - "loss": 1.0594, + "learning_rate": 1.989692335828811e-05, + "loss": 1.2697, "step": 1901 }, { - "epoch": 0.05389781518320157, + "epoch": 0.07441896862039284, "grad_norm": 0.0, - "learning_rate": 1.9970084625283164e-05, - "loss": 1.1589, + "learning_rate": 1.9896741798759578e-05, + "loss": 1.3601, "step": 1902 }, { - "epoch": 0.053926152625464026, + "epoch": 0.07445809531262228, "grad_norm": 0.0, - "learning_rate": 1.9970013645094796e-05, - "loss": 1.1047, + "learning_rate": 1.9896560080301775e-05, + "loss": 1.2968, "step": 1903 }, { - "epoch": 0.053954490067726485, + "epoch": 0.0744972220048517, "grad_norm": 0.0, - "learning_rate": 1.99699425809253e-05, - "loss": 1.2534, + "learning_rate": 1.9896378202917623e-05, + "loss": 1.4075, "step": 1904 }, { - "epoch": 0.05398282750998895, + "epoch": 0.07453634869708115, "grad_norm": 0.0, - "learning_rate": 1.9969871432775273e-05, - "loss": 1.0549, + "learning_rate": 1.9896196166610036e-05, + "loss": 1.3802, "step": 1905 }, { - "epoch": 0.05401116495225141, + "epoch": 0.07457547538931059, "grad_norm": 0.0, - "learning_rate": 1.996980020064532e-05, - "loss": 1.1117, + "learning_rate": 1.989601397138194e-05, + "loss": 1.3283, "step": 1906 }, { - "epoch": 0.05403950239451387, + "epoch": 0.07461460208154003, "grad_norm": 0.0, - "learning_rate": 1.9969728884536035e-05, - "loss": 1.1588, + "learning_rate": 1.9895831617236258e-05, + "loss": 1.1471, "step": 1907 }, { - "epoch": 0.054067839836776335, + "epoch": 0.07465372877376947, "grad_norm": 0.0, - "learning_rate": 1.996965748444802e-05, - "loss": 1.0671, + "learning_rate": 1.9895649104175922e-05, + "loss": 1.2631, "step": 1908 }, { - "epoch": 0.054096177279038794, + "epoch": 0.07469285546599891, "grad_norm": 0.0, - "learning_rate": 1.9969586000381884e-05, - "loss": 1.1024, + "learning_rate": 1.9895466432203857e-05, + "loss": 1.3337, "step": 1909 }, { - "epoch": 0.05412451472130125, + "epoch": 0.07473198215822835, "grad_norm": 0.0, - "learning_rate": 1.996951443233822e-05, - "loss": 1.0492, + "learning_rate": 1.9895283601323007e-05, + "loss": 1.2415, "step": 1910 }, { - "epoch": 0.05415285216356372, + "epoch": 0.07477110885045778, "grad_norm": 0.0, - "learning_rate": 1.996944278031763e-05, - "loss": 1.0605, + "learning_rate": 1.98951006115363e-05, + "loss": 1.3805, "step": 1911 }, { - "epoch": 0.05418118960582618, + "epoch": 0.07481023554268722, "grad_norm": 0.0, - "learning_rate": 1.9969371044320728e-05, - "loss": 1.1163, + "learning_rate": 1.989491746284667e-05, + "loss": 1.1025, "step": 1912 }, { - "epoch": 0.05420952704808864, + "epoch": 0.07484936223491666, "grad_norm": 0.0, - "learning_rate": 1.9969299224348107e-05, - "loss": 1.2451, + "learning_rate": 1.9894734155257074e-05, + "loss": 1.2964, "step": 1913 }, { - "epoch": 0.054237864490351104, + "epoch": 0.0748884889271461, "grad_norm": 0.0, - "learning_rate": 1.996922732040038e-05, - "loss": 1.0366, + "learning_rate": 1.9894550688770442e-05, + "loss": 1.3315, "step": 1914 }, { - "epoch": 0.05426620193261356, + "epoch": 0.07492761561937554, "grad_norm": 0.0, - "learning_rate": 1.9969155332478144e-05, - "loss": 1.1458, + "learning_rate": 1.9894367063389727e-05, + "loss": 1.2904, "step": 1915 }, { - "epoch": 0.05429453937487602, + "epoch": 0.07496674231160498, "grad_norm": 0.0, - "learning_rate": 1.9969083260582017e-05, - "loss": 1.2632, + "learning_rate": 1.989418327911787e-05, + "loss": 1.2506, "step": 1916 }, { - "epoch": 0.05432287681713849, + "epoch": 0.07500586900383442, "grad_norm": 0.0, - "learning_rate": 1.9969011104712596e-05, - "loss": 1.1838, + "learning_rate": 1.989399933595783e-05, + "loss": 1.2877, "step": 1917 }, { - "epoch": 0.05435121425940095, + "epoch": 0.07504499569606385, "grad_norm": 0.0, - "learning_rate": 1.9968938864870494e-05, - "loss": 1.1986, + "learning_rate": 1.989381523391256e-05, + "loss": 1.2717, "step": 1918 }, { - "epoch": 0.054379551701663406, + "epoch": 0.07508412238829329, "grad_norm": 0.0, - "learning_rate": 1.9968866541056317e-05, - "loss": 1.1715, + "learning_rate": 1.9893630972985016e-05, + "loss": 1.277, "step": 1919 }, { - "epoch": 0.05440788914392587, + "epoch": 0.07512324908052273, "grad_norm": 0.0, - "learning_rate": 1.9968794133270678e-05, - "loss": 1.1875, + "learning_rate": 1.9893446553178154e-05, + "loss": 1.2408, "step": 1920 }, { - "epoch": 0.05443622658618833, + "epoch": 0.07516237577275217, "grad_norm": 0.0, - "learning_rate": 1.996872164151418e-05, - "loss": 1.0745, + "learning_rate": 1.989326197449494e-05, + "loss": 1.2579, "step": 1921 }, { - "epoch": 0.05446456402845079, + "epoch": 0.07520150246498161, "grad_norm": 0.0, - "learning_rate": 1.996864906578744e-05, - "loss": 1.2325, + "learning_rate": 1.9893077236938332e-05, + "loss": 1.2526, "step": 1922 }, { - "epoch": 0.054492901470713256, + "epoch": 0.07524062915721105, "grad_norm": 0.0, - "learning_rate": 1.9968576406091066e-05, - "loss": 1.2031, + "learning_rate": 1.98928923405113e-05, + "loss": 1.1964, "step": 1923 }, { - "epoch": 0.054521238912975716, + "epoch": 0.0752797558494405, "grad_norm": 0.0, - "learning_rate": 1.9968503662425672e-05, - "loss": 1.1322, + "learning_rate": 1.9892707285216816e-05, + "loss": 1.2639, "step": 1924 }, { - "epoch": 0.054549576355238175, + "epoch": 0.07531888254166992, "grad_norm": 0.0, - "learning_rate": 1.996843083479187e-05, - "loss": 1.2078, + "learning_rate": 1.9892522071057848e-05, + "loss": 1.2607, "step": 1925 }, { - "epoch": 0.05457791379750064, + "epoch": 0.07535800923389936, "grad_norm": 0.0, - "learning_rate": 1.9968357923190275e-05, - "loss": 1.0382, + "learning_rate": 1.9892336698037373e-05, + "loss": 1.1143, "step": 1926 }, { - "epoch": 0.0546062512397631, + "epoch": 0.0753971359261288, "grad_norm": 0.0, - "learning_rate": 1.9968284927621498e-05, - "loss": 1.1956, + "learning_rate": 1.9892151166158366e-05, + "loss": 1.222, "step": 1927 }, { - "epoch": 0.05463458868202556, + "epoch": 0.07543626261835824, "grad_norm": 0.0, - "learning_rate": 1.9968211848086155e-05, - "loss": 1.1644, + "learning_rate": 1.9891965475423808e-05, + "loss": 1.3527, "step": 1928 }, { - "epoch": 0.054662926124288025, + "epoch": 0.07547538931058768, "grad_norm": 0.0, - "learning_rate": 1.9968138684584862e-05, - "loss": 1.1375, + "learning_rate": 1.9891779625836677e-05, + "loss": 1.2106, "step": 1929 }, { - "epoch": 0.054691263566550484, + "epoch": 0.07551451600281713, "grad_norm": 0.0, - "learning_rate": 1.9968065437118238e-05, - "loss": 1.1125, + "learning_rate": 1.9891593617399962e-05, + "loss": 1.3357, "step": 1930 }, { - "epoch": 0.05471960100881294, + "epoch": 0.07555364269504657, "grad_norm": 0.0, - "learning_rate": 1.9967992105686893e-05, - "loss": 1.1808, + "learning_rate": 1.989140745011665e-05, + "loss": 1.3646, "step": 1931 }, { - "epoch": 0.05474793845107541, + "epoch": 0.075592769387276, "grad_norm": 0.0, - "learning_rate": 1.9967918690291454e-05, - "loss": 1.1215, + "learning_rate": 1.9891221123989727e-05, + "loss": 1.4304, "step": 1932 }, { - "epoch": 0.05477627589333787, + "epoch": 0.07563189607950543, "grad_norm": 0.0, - "learning_rate": 1.9967845190932528e-05, - "loss": 1.0481, + "learning_rate": 1.9891034639022184e-05, + "loss": 1.2086, "step": 1933 }, { - "epoch": 0.05480461333560033, + "epoch": 0.07567102277173487, "grad_norm": 0.0, - "learning_rate": 1.9967771607610746e-05, - "loss": 1.1567, + "learning_rate": 1.9890847995217022e-05, + "loss": 1.2603, "step": 1934 }, { - "epoch": 0.05483295077786279, + "epoch": 0.07571014946396432, "grad_norm": 0.0, - "learning_rate": 1.996769794032672e-05, - "loss": 1.0885, + "learning_rate": 1.9890661192577236e-05, + "loss": 1.2716, "step": 1935 }, { - "epoch": 0.05486128822012525, + "epoch": 0.07574927615619376, "grad_norm": 0.0, - "learning_rate": 1.996762418908107e-05, - "loss": 1.1683, + "learning_rate": 1.9890474231105822e-05, + "loss": 1.205, "step": 1936 }, { - "epoch": 0.05488962566238771, + "epoch": 0.0757884028484232, "grad_norm": 0.0, - "learning_rate": 1.9967550353874426e-05, - "loss": 1.1141, + "learning_rate": 1.9890287110805787e-05, + "loss": 1.3789, "step": 1937 }, { - "epoch": 0.05491796310465018, + "epoch": 0.07582752954065264, "grad_norm": 0.0, - "learning_rate": 1.99674764347074e-05, - "loss": 1.2018, + "learning_rate": 1.9890099831680135e-05, + "loss": 1.3787, "step": 1938 }, { - "epoch": 0.05494630054691264, + "epoch": 0.07586665623288207, "grad_norm": 0.0, - "learning_rate": 1.996740243158062e-05, - "loss": 1.167, + "learning_rate": 1.988991239373187e-05, + "loss": 1.3361, "step": 1939 }, { - "epoch": 0.054974637989175096, + "epoch": 0.0759057829251115, "grad_norm": 0.0, - "learning_rate": 1.9967328344494708e-05, - "loss": 1.1054, + "learning_rate": 1.988972479696401e-05, + "loss": 1.2819, "step": 1940 }, { - "epoch": 0.05500297543143756, + "epoch": 0.07594490961734095, "grad_norm": 0.0, - "learning_rate": 1.996725417345029e-05, - "loss": 1.1057, + "learning_rate": 1.988953704137956e-05, + "loss": 1.1558, "step": 1941 }, { - "epoch": 0.05503131287370002, + "epoch": 0.07598403630957039, "grad_norm": 0.0, - "learning_rate": 1.9967179918447982e-05, - "loss": 1.1088, + "learning_rate": 1.9889349126981535e-05, + "loss": 1.2296, "step": 1942 }, { - "epoch": 0.05505965031596248, + "epoch": 0.07602316300179983, "grad_norm": 0.0, - "learning_rate": 1.996710557948842e-05, - "loss": 1.1828, + "learning_rate": 1.9889161053772958e-05, + "loss": 1.2879, "step": 1943 }, { - "epoch": 0.05508798775822494, + "epoch": 0.07606228969402927, "grad_norm": 0.0, - "learning_rate": 1.9967031156572233e-05, - "loss": 1.1163, + "learning_rate": 1.9888972821756846e-05, + "loss": 1.2853, "step": 1944 }, { - "epoch": 0.055116325200487405, + "epoch": 0.07610141638625871, "grad_norm": 0.0, - "learning_rate": 1.9966956649700034e-05, - "loss": 1.1067, + "learning_rate": 1.988878443093622e-05, + "loss": 1.3057, "step": 1945 }, { - "epoch": 0.055144662642749864, + "epoch": 0.07614054307848815, "grad_norm": 0.0, - "learning_rate": 1.996688205887246e-05, - "loss": 1.08, + "learning_rate": 1.9888595881314106e-05, + "loss": 1.2603, "step": 1946 }, { - "epoch": 0.05517300008501232, + "epoch": 0.07617966977071758, "grad_norm": 0.0, - "learning_rate": 1.9966807384090135e-05, - "loss": 1.0103, + "learning_rate": 1.9888407172893536e-05, + "loss": 1.3647, "step": 1947 }, { - "epoch": 0.05520133752727479, + "epoch": 0.07621879646294702, "grad_norm": 0.0, - "learning_rate": 1.9966732625353695e-05, - "loss": 1.036, + "learning_rate": 1.9888218305677534e-05, + "loss": 1.1335, "step": 1948 }, { - "epoch": 0.05522967496953725, + "epoch": 0.07625792315517646, "grad_norm": 0.0, - "learning_rate": 1.996665778266376e-05, - "loss": 1.1416, + "learning_rate": 1.9888029279669143e-05, + "loss": 1.113, "step": 1949 }, { - "epoch": 0.05525801241179971, + "epoch": 0.0762970498474059, "grad_norm": 0.0, - "learning_rate": 1.996658285602097e-05, - "loss": 1.1298, + "learning_rate": 1.988784009487139e-05, + "loss": 1.3087, "step": 1950 }, { - "epoch": 0.055286349854062174, + "epoch": 0.07633617653963534, "grad_norm": 0.0, - "learning_rate": 1.9966507845425948e-05, - "loss": 1.1344, + "learning_rate": 1.9887650751287314e-05, + "loss": 1.1516, "step": 1951 }, { - "epoch": 0.05531468729632463, + "epoch": 0.07637530323186478, "grad_norm": 0.0, - "learning_rate": 1.9966432750879332e-05, - "loss": 1.1267, + "learning_rate": 1.9887461248919956e-05, + "loss": 1.1907, "step": 1952 }, { - "epoch": 0.05534302473858709, + "epoch": 0.07641442992409422, "grad_norm": 0.0, - "learning_rate": 1.996635757238175e-05, - "loss": 1.1283, + "learning_rate": 1.9887271587772363e-05, + "loss": 1.24, "step": 1953 }, { - "epoch": 0.05537136218084956, + "epoch": 0.07645355661632365, "grad_norm": 0.0, - "learning_rate": 1.996628230993384e-05, - "loss": 1.061, + "learning_rate": 1.9887081767847577e-05, + "loss": 1.2995, "step": 1954 }, { - "epoch": 0.05539969962311202, + "epoch": 0.07649268330855309, "grad_norm": 0.0, - "learning_rate": 1.996620696353623e-05, - "loss": 1.0035, + "learning_rate": 1.9886891789148643e-05, + "loss": 1.1891, "step": 1955 }, { - "epoch": 0.055428037065374476, + "epoch": 0.07653181000078253, "grad_norm": 0.0, - "learning_rate": 1.996613153318956e-05, - "loss": 1.1704, + "learning_rate": 1.9886701651678618e-05, + "loss": 1.2789, "step": 1956 }, { - "epoch": 0.05545637450763694, + "epoch": 0.07657093669301197, "grad_norm": 0.0, - "learning_rate": 1.9966056018894464e-05, - "loss": 1.1116, + "learning_rate": 1.9886511355440558e-05, + "loss": 1.2745, "step": 1957 }, { - "epoch": 0.0554847119498994, + "epoch": 0.07661006338524141, "grad_norm": 0.0, - "learning_rate": 1.9965980420651575e-05, - "loss": 0.9919, + "learning_rate": 1.9886320900437508e-05, + "loss": 1.1973, "step": 1958 }, { - "epoch": 0.05551304939216186, + "epoch": 0.07664919007747086, "grad_norm": 0.0, - "learning_rate": 1.9965904738461534e-05, - "loss": 1.037, + "learning_rate": 1.9886130286672532e-05, + "loss": 1.1722, "step": 1959 }, { - "epoch": 0.055541386834424326, + "epoch": 0.0766883167697003, "grad_norm": 0.0, - "learning_rate": 1.9965828972324974e-05, - "loss": 1.0784, + "learning_rate": 1.9885939514148696e-05, + "loss": 1.2322, "step": 1960 }, { - "epoch": 0.055569724276686785, + "epoch": 0.07672744346192972, "grad_norm": 0.0, - "learning_rate": 1.996575312224254e-05, - "loss": 1.1694, + "learning_rate": 1.9885748582869056e-05, + "loss": 1.307, "step": 1961 }, { - "epoch": 0.055598061718949245, + "epoch": 0.07676657015415916, "grad_norm": 0.0, - "learning_rate": 1.9965677188214863e-05, - "loss": 1.1564, + "learning_rate": 1.9885557492836685e-05, + "loss": 1.309, "step": 1962 }, { - "epoch": 0.05562639916121171, + "epoch": 0.0768056968463886, "grad_norm": 0.0, - "learning_rate": 1.996560117024259e-05, - "loss": 1.102, + "learning_rate": 1.9885366244054646e-05, + "loss": 1.2284, "step": 1963 }, { - "epoch": 0.05565473660347417, + "epoch": 0.07684482353861805, "grad_norm": 0.0, - "learning_rate": 1.9965525068326355e-05, - "loss": 1.0561, + "learning_rate": 1.988517483652601e-05, + "loss": 1.3523, "step": 1964 }, { - "epoch": 0.05568307404573663, + "epoch": 0.07688395023084749, "grad_norm": 0.0, - "learning_rate": 1.99654488824668e-05, - "loss": 1.1088, + "learning_rate": 1.9884983270253855e-05, + "loss": 1.3691, "step": 1965 }, { - "epoch": 0.055711411487999095, + "epoch": 0.07692307692307693, "grad_norm": 0.0, - "learning_rate": 1.9965372612664572e-05, - "loss": 1.0643, + "learning_rate": 1.9884791545241256e-05, + "loss": 1.2338, "step": 1966 }, { - "epoch": 0.055739748930261554, + "epoch": 0.07696220361530637, "grad_norm": 0.0, - "learning_rate": 1.996529625892031e-05, - "loss": 1.1185, + "learning_rate": 1.988459966149129e-05, + "loss": 1.2858, "step": 1967 }, { - "epoch": 0.05576808637252401, + "epoch": 0.0770013303075358, "grad_norm": 0.0, - "learning_rate": 1.9965219821234653e-05, - "loss": 1.1049, + "learning_rate": 1.988440761900704e-05, + "loss": 1.1157, "step": 1968 }, { - "epoch": 0.05579642381478648, + "epoch": 0.07704045699976524, "grad_norm": 0.0, - "learning_rate": 1.9965143299608253e-05, - "loss": 1.054, + "learning_rate": 1.9884215417791587e-05, + "loss": 1.1881, "step": 1969 }, { - "epoch": 0.05582476125704894, + "epoch": 0.07707958369199468, "grad_norm": 0.0, - "learning_rate": 1.996506669404175e-05, - "loss": 1.1921, + "learning_rate": 1.9884023057848025e-05, + "loss": 1.2648, "step": 1970 }, { - "epoch": 0.0558530986993114, + "epoch": 0.07711871038422412, "grad_norm": 0.0, - "learning_rate": 1.996499000453579e-05, - "loss": 1.1811, + "learning_rate": 1.9883830539179435e-05, + "loss": 1.2596, "step": 1971 }, { - "epoch": 0.05588143614157386, + "epoch": 0.07715783707645356, "grad_norm": 0.0, - "learning_rate": 1.9964913231091017e-05, - "loss": 1.1104, + "learning_rate": 1.988363786178891e-05, + "loss": 1.2316, "step": 1972 }, { - "epoch": 0.05590977358383632, + "epoch": 0.077196963768683, "grad_norm": 0.0, - "learning_rate": 1.9964836373708078e-05, - "loss": 1.312, + "learning_rate": 1.9883445025679552e-05, + "loss": 1.1535, "step": 1973 }, { - "epoch": 0.05593811102609878, + "epoch": 0.07723609046091244, "grad_norm": 0.0, - "learning_rate": 1.9964759432387626e-05, - "loss": 1.1518, + "learning_rate": 1.9883252030854444e-05, + "loss": 1.2886, "step": 1974 }, { - "epoch": 0.05596644846836125, + "epoch": 0.07727521715314187, "grad_norm": 0.0, - "learning_rate": 1.9964682407130302e-05, - "loss": 0.987, + "learning_rate": 1.9883058877316697e-05, + "loss": 1.3214, "step": 1975 }, { - "epoch": 0.05599478591062371, + "epoch": 0.07731434384537131, "grad_norm": 0.0, - "learning_rate": 1.996460529793676e-05, - "loss": 1.0355, + "learning_rate": 1.9882865565069408e-05, + "loss": 1.2138, "step": 1976 }, { - "epoch": 0.056023123352886166, + "epoch": 0.07735347053760075, "grad_norm": 0.0, - "learning_rate": 1.9964528104807647e-05, - "loss": 1.2065, + "learning_rate": 1.9882672094115683e-05, + "loss": 1.2271, "step": 1977 }, { - "epoch": 0.05605146079514863, + "epoch": 0.07739259722983019, "grad_norm": 0.0, - "learning_rate": 1.9964450827743613e-05, - "loss": 1.1846, + "learning_rate": 1.9882478464458622e-05, + "loss": 1.1528, "step": 1978 }, { - "epoch": 0.05607979823741109, + "epoch": 0.07743172392205963, "grad_norm": 0.0, - "learning_rate": 1.996437346674531e-05, - "loss": 1.0977, + "learning_rate": 1.9882284676101347e-05, + "loss": 1.2858, "step": 1979 }, { - "epoch": 0.05610813567967355, + "epoch": 0.07747085061428907, "grad_norm": 0.0, - "learning_rate": 1.996429602181339e-05, - "loss": 1.0652, + "learning_rate": 1.988209072904696e-05, + "loss": 1.2916, "step": 1980 }, { - "epoch": 0.056136473121936016, + "epoch": 0.07750997730651851, "grad_norm": 0.0, - "learning_rate": 1.99642184929485e-05, - "loss": 1.0375, + "learning_rate": 1.9881896623298582e-05, + "loss": 1.1839, "step": 1981 }, { - "epoch": 0.056164810564198475, + "epoch": 0.07754910399874794, "grad_norm": 0.0, - "learning_rate": 1.9964140880151302e-05, - "loss": 1.2057, + "learning_rate": 1.9881702358859323e-05, + "loss": 1.1323, "step": 1982 }, { - "epoch": 0.056193148006460934, + "epoch": 0.07758823069097738, "grad_norm": 0.0, - "learning_rate": 1.9964063183422443e-05, - "loss": 1.1166, + "learning_rate": 1.9881507935732308e-05, + "loss": 1.2905, "step": 1983 }, { - "epoch": 0.0562214854487234, + "epoch": 0.07762735738320682, "grad_norm": 0.0, - "learning_rate": 1.9963985402762577e-05, - "loss": 1.11, + "learning_rate": 1.988131335392066e-05, + "loss": 1.1903, "step": 1984 }, { - "epoch": 0.05624982289098586, + "epoch": 0.07766648407543626, "grad_norm": 0.0, - "learning_rate": 1.9963907538172367e-05, - "loss": 1.0883, + "learning_rate": 1.9881118613427502e-05, + "loss": 1.2212, "step": 1985 }, { - "epoch": 0.05627816033324832, + "epoch": 0.0777056107676657, "grad_norm": 0.0, - "learning_rate": 1.9963829589652462e-05, - "loss": 1.0476, + "learning_rate": 1.9880923714255956e-05, + "loss": 1.1963, "step": 1986 }, { - "epoch": 0.056306497775510785, + "epoch": 0.07774473745989514, "grad_norm": 0.0, - "learning_rate": 1.996375155720352e-05, - "loss": 1.1479, + "learning_rate": 1.988072865640916e-05, + "loss": 1.2706, "step": 1987 }, { - "epoch": 0.056334835217773244, + "epoch": 0.07778386415212458, "grad_norm": 0.0, - "learning_rate": 1.99636734408262e-05, - "loss": 1.0582, + "learning_rate": 1.9880533439890245e-05, + "loss": 1.307, "step": 1988 }, { - "epoch": 0.0563631726600357, + "epoch": 0.07782299084435401, "grad_norm": 0.0, - "learning_rate": 1.9963595240521158e-05, - "loss": 1.1201, + "learning_rate": 1.9880338064702337e-05, + "loss": 1.2079, "step": 1989 }, { - "epoch": 0.05639151010229817, + "epoch": 0.07786211753658345, "grad_norm": 0.0, - "learning_rate": 1.9963516956289054e-05, - "loss": 1.0836, + "learning_rate": 1.9880142530848587e-05, + "loss": 1.3634, "step": 1990 }, { - "epoch": 0.05641984754456063, + "epoch": 0.0779012442288129, "grad_norm": 0.0, - "learning_rate": 1.9963438588130547e-05, - "loss": 1.0437, + "learning_rate": 1.9879946838332125e-05, + "loss": 1.3547, "step": 1991 }, { - "epoch": 0.05644818498682309, + "epoch": 0.07794037092104233, "grad_norm": 0.0, - "learning_rate": 1.9963360136046295e-05, - "loss": 1.2092, + "learning_rate": 1.9879750987156095e-05, + "loss": 1.2313, "step": 1992 }, { - "epoch": 0.05647652242908555, + "epoch": 0.07797949761327178, "grad_norm": 0.0, - "learning_rate": 1.996328160003696e-05, - "loss": 1.2022, + "learning_rate": 1.9879554977323653e-05, + "loss": 1.2162, "step": 1993 }, { - "epoch": 0.05650485987134801, + "epoch": 0.07801862430550122, "grad_norm": 0.0, - "learning_rate": 1.996320298010321e-05, - "loss": 1.068, + "learning_rate": 1.987935880883793e-05, + "loss": 1.1378, "step": 1994 }, { - "epoch": 0.05653319731361047, + "epoch": 0.07805775099773066, "grad_norm": 0.0, - "learning_rate": 1.9963124276245695e-05, - "loss": 1.2255, + "learning_rate": 1.9879162481702084e-05, + "loss": 1.1661, "step": 1995 }, { - "epoch": 0.05656153475587294, + "epoch": 0.07809687768996008, "grad_norm": 0.0, - "learning_rate": 1.9963045488465088e-05, - "loss": 0.9931, + "learning_rate": 1.987896599591927e-05, + "loss": 1.191, "step": 1996 }, { - "epoch": 0.056589872198135396, + "epoch": 0.07813600438218953, "grad_norm": 0.0, - "learning_rate": 1.996296661676205e-05, - "loss": 1.1163, + "learning_rate": 1.9878769351492644e-05, + "loss": 1.325, "step": 1997 }, { - "epoch": 0.056618209640397855, + "epoch": 0.07817513107441897, "grad_norm": 0.0, - "learning_rate": 1.996288766113724e-05, - "loss": 1.1258, + "learning_rate": 1.9878572548425356e-05, + "loss": 1.2504, "step": 1998 }, { - "epoch": 0.05664654708266032, + "epoch": 0.0782142577666484, "grad_norm": 0.0, - "learning_rate": 1.9962808621591334e-05, - "loss": 1.1813, + "learning_rate": 1.9878375586720574e-05, + "loss": 1.1767, "step": 1999 }, { - "epoch": 0.05667488452492278, + "epoch": 0.07825338445887785, "grad_norm": 0.0, - "learning_rate": 1.9962729498124983e-05, - "loss": 1.1348, + "learning_rate": 1.987817846638146e-05, + "loss": 1.2444, "step": 2000 }, { - "epoch": 0.05670322196718524, + "epoch": 0.07829251115110729, "grad_norm": 0.0, - "learning_rate": 1.9962650290738866e-05, - "loss": 1.1097, + "learning_rate": 1.9877981187411176e-05, + "loss": 1.3528, "step": 2001 }, { - "epoch": 0.056731559409447706, + "epoch": 0.07833163784333673, "grad_norm": 0.0, - "learning_rate": 1.9962570999433648e-05, - "loss": 1.0868, + "learning_rate": 1.9877783749812892e-05, + "loss": 1.2441, "step": 2002 }, { - "epoch": 0.056759896851710165, + "epoch": 0.07837076453556616, "grad_norm": 0.0, - "learning_rate": 1.9962491624209996e-05, - "loss": 1.1245, + "learning_rate": 1.9877586153589776e-05, + "loss": 1.332, "step": 2003 }, { - "epoch": 0.056788234293972624, + "epoch": 0.0784098912277956, "grad_norm": 0.0, - "learning_rate": 1.9962412165068575e-05, - "loss": 1.19, + "learning_rate": 1.9877388398745006e-05, + "loss": 1.3743, "step": 2004 }, { - "epoch": 0.05681657173623509, + "epoch": 0.07844901792002504, "grad_norm": 0.0, - "learning_rate": 1.9962332622010057e-05, - "loss": 0.9895, + "learning_rate": 1.9877190485281756e-05, + "loss": 1.2224, "step": 2005 }, { - "epoch": 0.05684490917849755, + "epoch": 0.07848814461225448, "grad_norm": 0.0, - "learning_rate": 1.996225299503511e-05, - "loss": 1.1247, + "learning_rate": 1.9876992413203205e-05, + "loss": 1.2276, "step": 2006 }, { - "epoch": 0.05687324662076001, + "epoch": 0.07852727130448392, "grad_norm": 0.0, - "learning_rate": 1.996217328414441e-05, - "loss": 1.0665, + "learning_rate": 1.987679418251253e-05, + "loss": 1.3881, "step": 2007 }, { - "epoch": 0.056901584063022474, + "epoch": 0.07856639799671336, "grad_norm": 0.0, - "learning_rate": 1.9962093489338622e-05, - "loss": 1.1043, + "learning_rate": 1.9876595793212916e-05, + "loss": 1.235, "step": 2008 }, { - "epoch": 0.05692992150528493, + "epoch": 0.0786055246889428, "grad_norm": 0.0, - "learning_rate": 1.9962013610618423e-05, - "loss": 1.2517, + "learning_rate": 1.9876397245307552e-05, + "loss": 1.3197, "step": 2009 }, { - "epoch": 0.05695825894754739, + "epoch": 0.07864465138117224, "grad_norm": 0.0, - "learning_rate": 1.9961933647984487e-05, - "loss": 1.1782, + "learning_rate": 1.9876198538799622e-05, + "loss": 1.2609, "step": 2010 }, { - "epoch": 0.05698659638980986, + "epoch": 0.07868377807340167, "grad_norm": 0.0, - "learning_rate": 1.996185360143748e-05, - "loss": 1.171, + "learning_rate": 1.9875999673692318e-05, + "loss": 1.2108, "step": 2011 }, { - "epoch": 0.05701493383207232, + "epoch": 0.07872290476563111, "grad_norm": 0.0, - "learning_rate": 1.9961773470978083e-05, - "loss": 1.2876, + "learning_rate": 1.9875800649988835e-05, + "loss": 1.1584, "step": 2012 }, { - "epoch": 0.05704327127433478, + "epoch": 0.07876203145786055, "grad_norm": 0.0, - "learning_rate": 1.9961693256606968e-05, - "loss": 1.0604, + "learning_rate": 1.9875601467692372e-05, + "loss": 1.1816, "step": 2013 }, { - "epoch": 0.05707160871659724, + "epoch": 0.07880115815008999, "grad_norm": 0.0, - "learning_rate": 1.996161295832481e-05, - "loss": 1.1537, + "learning_rate": 1.9875402126806126e-05, + "loss": 1.1968, "step": 2014 }, { - "epoch": 0.0570999461588597, + "epoch": 0.07884028484231943, "grad_norm": 0.0, - "learning_rate": 1.996153257613229e-05, - "loss": 1.1883, + "learning_rate": 1.987520262733329e-05, + "loss": 1.2242, "step": 2015 }, { - "epoch": 0.05712828360112216, + "epoch": 0.07887941153454887, "grad_norm": 0.0, - "learning_rate": 1.9961452110030082e-05, - "loss": 1.2714, + "learning_rate": 1.987500296927708e-05, + "loss": 1.2639, "step": 2016 }, { - "epoch": 0.05715662104338463, + "epoch": 0.07891853822677831, "grad_norm": 0.0, - "learning_rate": 1.9961371560018864e-05, - "loss": 1.0589, + "learning_rate": 1.987480315264069e-05, + "loss": 1.1299, "step": 2017 }, { - "epoch": 0.057184958485647086, + "epoch": 0.07895766491900774, "grad_norm": 0.0, - "learning_rate": 1.9961290926099313e-05, - "loss": 1.1746, + "learning_rate": 1.9874603177427337e-05, + "loss": 1.2459, "step": 2018 }, { - "epoch": 0.057213295927909545, + "epoch": 0.07899679161123718, "grad_norm": 0.0, - "learning_rate": 1.996121020827211e-05, - "loss": 1.1079, + "learning_rate": 1.9874403043640234e-05, + "loss": 1.3464, "step": 2019 }, { - "epoch": 0.05724163337017201, + "epoch": 0.07903591830346662, "grad_norm": 0.0, - "learning_rate": 1.9961129406537932e-05, - "loss": 1.1394, + "learning_rate": 1.9874202751282587e-05, + "loss": 1.3138, "step": 2020 }, { - "epoch": 0.05726997081243447, + "epoch": 0.07907504499569606, "grad_norm": 0.0, - "learning_rate": 1.9961048520897465e-05, - "loss": 1.0392, + "learning_rate": 1.987400230035762e-05, + "loss": 1.3686, "step": 2021 }, { - "epoch": 0.05729830825469693, + "epoch": 0.0791141716879255, "grad_norm": 0.0, - "learning_rate": 1.9960967551351385e-05, - "loss": 1.18, + "learning_rate": 1.9873801690868548e-05, + "loss": 1.1448, "step": 2022 }, { - "epoch": 0.057326645696959395, + "epoch": 0.07915329838015495, "grad_norm": 0.0, - "learning_rate": 1.996088649790038e-05, - "loss": 1.1443, + "learning_rate": 1.9873600922818593e-05, + "loss": 1.2674, "step": 2023 }, { - "epoch": 0.057354983139221855, + "epoch": 0.07919242507238439, "grad_norm": 0.0, - "learning_rate": 1.9960805360545124e-05, - "loss": 1.0931, + "learning_rate": 1.9873399996210983e-05, + "loss": 1.3005, "step": 2024 }, { - "epoch": 0.057383320581484314, + "epoch": 0.07923155176461381, "grad_norm": 0.0, - "learning_rate": 1.996072413928631e-05, - "loss": 1.1556, + "learning_rate": 1.987319891104894e-05, + "loss": 1.4323, "step": 2025 }, { - "epoch": 0.05741165802374678, + "epoch": 0.07927067845684325, "grad_norm": 0.0, - "learning_rate": 1.9960642834124614e-05, - "loss": 1.1006, + "learning_rate": 1.987299766733569e-05, + "loss": 1.3154, "step": 2026 }, { - "epoch": 0.05743999546600924, + "epoch": 0.0793098051490727, "grad_norm": 0.0, - "learning_rate": 1.9960561445060726e-05, - "loss": 1.0974, + "learning_rate": 1.987279626507447e-05, + "loss": 1.2755, "step": 2027 }, { - "epoch": 0.0574683329082717, + "epoch": 0.07934893184130214, "grad_norm": 0.0, - "learning_rate": 1.996047997209533e-05, - "loss": 1.1049, + "learning_rate": 1.9872594704268516e-05, + "loss": 1.2774, "step": 2028 }, { - "epoch": 0.057496670350534164, + "epoch": 0.07938805853353158, "grad_norm": 0.0, - "learning_rate": 1.9960398415229114e-05, - "loss": 1.09, + "learning_rate": 1.987239298492106e-05, + "loss": 1.3884, "step": 2029 }, { - "epoch": 0.05752500779279662, + "epoch": 0.07942718522576102, "grad_norm": 0.0, - "learning_rate": 1.9960316774462766e-05, - "loss": 1.2575, + "learning_rate": 1.9872191107035347e-05, + "loss": 1.2461, "step": 2030 }, { - "epoch": 0.05755334523505908, + "epoch": 0.07946631191799046, "grad_norm": 0.0, - "learning_rate": 1.9960235049796967e-05, - "loss": 1.1513, + "learning_rate": 1.987198907061461e-05, + "loss": 1.2164, "step": 2031 }, { - "epoch": 0.05758168267732155, + "epoch": 0.07950543861021989, "grad_norm": 0.0, - "learning_rate": 1.996015324123241e-05, - "loss": 1.0776, + "learning_rate": 1.9871786875662102e-05, + "loss": 1.2709, "step": 2032 }, { - "epoch": 0.05761002011958401, + "epoch": 0.07954456530244933, "grad_norm": 0.0, - "learning_rate": 1.9960071348769783e-05, - "loss": 1.1188, + "learning_rate": 1.9871584522181068e-05, + "loss": 1.1746, "step": 2033 }, { - "epoch": 0.057638357561846466, + "epoch": 0.07958369199467877, "grad_norm": 0.0, - "learning_rate": 1.9959989372409777e-05, - "loss": 1.1252, + "learning_rate": 1.9871382010174755e-05, + "loss": 1.3473, "step": 2034 }, { - "epoch": 0.05766669500410893, + "epoch": 0.07962281868690821, "grad_norm": 0.0, - "learning_rate": 1.9959907312153085e-05, - "loss": 1.1537, + "learning_rate": 1.9871179339646415e-05, + "loss": 1.2283, "step": 2035 }, { - "epoch": 0.05769503244637139, + "epoch": 0.07966194537913765, "grad_norm": 0.0, - "learning_rate": 1.995982516800039e-05, - "loss": 1.1602, + "learning_rate": 1.9870976510599305e-05, + "loss": 1.2024, "step": 2036 }, { - "epoch": 0.05772336988863385, + "epoch": 0.07970107207136709, "grad_norm": 0.0, - "learning_rate": 1.9959742939952393e-05, - "loss": 1.0836, + "learning_rate": 1.987077352303668e-05, + "loss": 1.2036, "step": 2037 }, { - "epoch": 0.05775170733089632, + "epoch": 0.07974019876359653, "grad_norm": 0.0, - "learning_rate": 1.9959660628009782e-05, - "loss": 1.1567, + "learning_rate": 1.9870570376961805e-05, + "loss": 1.3672, "step": 2038 }, { - "epoch": 0.057780044773158776, + "epoch": 0.07977932545582596, "grad_norm": 0.0, - "learning_rate": 1.995957823217325e-05, - "loss": 1.0687, + "learning_rate": 1.9870367072377937e-05, + "loss": 1.2617, "step": 2039 }, { - "epoch": 0.057808382215421235, + "epoch": 0.0798184521480554, "grad_norm": 0.0, - "learning_rate": 1.995949575244349e-05, - "loss": 1.2011, + "learning_rate": 1.9870163609288336e-05, + "loss": 1.3557, "step": 2040 }, { - "epoch": 0.0578367196576837, + "epoch": 0.07985757884028484, "grad_norm": 0.0, - "learning_rate": 1.9959413188821204e-05, - "loss": 1.1225, + "learning_rate": 1.9869959987696282e-05, + "loss": 1.2239, "step": 2041 }, { - "epoch": 0.05786505709994616, + "epoch": 0.07989670553251428, "grad_norm": 0.0, - "learning_rate": 1.995933054130708e-05, - "loss": 1.1398, + "learning_rate": 1.9869756207605038e-05, + "loss": 1.1373, "step": 2042 }, { - "epoch": 0.05789339454220862, + "epoch": 0.07993583222474372, "grad_norm": 0.0, - "learning_rate": 1.9959247809901817e-05, - "loss": 1.067, + "learning_rate": 1.9869552269017876e-05, + "loss": 1.1147, "step": 2043 }, { - "epoch": 0.05792173198447108, + "epoch": 0.07997495891697316, "grad_norm": 0.0, - "learning_rate": 1.995916499460611e-05, - "loss": 1.1537, + "learning_rate": 1.9869348171938074e-05, + "loss": 1.2496, "step": 2044 }, { - "epoch": 0.057950069426733544, + "epoch": 0.0800140856092026, "grad_norm": 0.0, - "learning_rate": 1.9959082095420658e-05, - "loss": 1.0695, + "learning_rate": 1.9869143916368903e-05, + "loss": 1.1543, "step": 2045 }, { - "epoch": 0.057978406868996, + "epoch": 0.08005321230143203, "grad_norm": 0.0, - "learning_rate": 1.995899911234616e-05, - "loss": 1.0679, + "learning_rate": 1.9868939502313647e-05, + "loss": 1.2766, "step": 2046 }, { - "epoch": 0.05800674431125846, + "epoch": 0.08009233899366147, "grad_norm": 0.0, - "learning_rate": 1.9958916045383317e-05, - "loss": 1.1238, + "learning_rate": 1.986873492977559e-05, + "loss": 1.356, "step": 2047 }, { - "epoch": 0.05803508175352093, + "epoch": 0.08013146568589091, "grad_norm": 0.0, - "learning_rate": 1.9958832894532824e-05, - "loss": 1.076, + "learning_rate": 1.986853019875802e-05, + "loss": 1.2643, "step": 2048 }, { - "epoch": 0.05806341919578339, + "epoch": 0.08017059237812035, "grad_norm": 0.0, - "learning_rate": 1.9958749659795382e-05, - "loss": 1.0526, + "learning_rate": 1.9868325309264217e-05, + "loss": 1.3496, "step": 2049 }, { - "epoch": 0.05809175663804585, + "epoch": 0.0802097190703498, "grad_norm": 0.0, - "learning_rate": 1.9958666341171694e-05, - "loss": 1.0263, + "learning_rate": 1.9868120261297476e-05, + "loss": 1.2707, "step": 2050 }, { - "epoch": 0.05812009408030831, + "epoch": 0.08024884576257924, "grad_norm": 0.0, - "learning_rate": 1.9958582938662464e-05, - "loss": 1.0834, + "learning_rate": 1.986791505486109e-05, + "loss": 1.1767, "step": 2051 }, { - "epoch": 0.05814843152257077, + "epoch": 0.08028797245480868, "grad_norm": 0.0, - "learning_rate": 1.995849945226839e-05, - "loss": 1.1623, + "learning_rate": 1.9867709689958352e-05, + "loss": 1.4019, "step": 2052 }, { - "epoch": 0.05817676896483323, + "epoch": 0.0803270991470381, "grad_norm": 0.0, - "learning_rate": 1.9958415881990176e-05, - "loss": 1.1835, + "learning_rate": 1.9867504166592563e-05, + "loss": 1.2551, "step": 2053 }, { - "epoch": 0.0582051064070957, + "epoch": 0.08036622583926754, "grad_norm": 0.0, - "learning_rate": 1.9958332227828525e-05, - "loss": 1.125, + "learning_rate": 1.9867298484767022e-05, + "loss": 1.244, "step": 2054 }, { - "epoch": 0.058233443849358156, + "epoch": 0.08040535253149698, "grad_norm": 0.0, - "learning_rate": 1.995824848978415e-05, - "loss": 1.0357, + "learning_rate": 1.986709264448503e-05, + "loss": 1.1669, "step": 2055 }, { - "epoch": 0.058261781291620615, + "epoch": 0.08044447922372643, "grad_norm": 0.0, - "learning_rate": 1.9958164667857747e-05, - "loss": 1.0747, + "learning_rate": 1.9866886645749895e-05, + "loss": 1.3257, "step": 2056 }, { - "epoch": 0.05829011873388308, + "epoch": 0.08048360591595587, "grad_norm": 0.0, - "learning_rate": 1.9958080762050023e-05, - "loss": 1.1406, + "learning_rate": 1.9866680488564927e-05, + "loss": 1.2372, "step": 2057 }, { - "epoch": 0.05831845617614554, + "epoch": 0.08052273260818531, "grad_norm": 0.0, - "learning_rate": 1.995799677236169e-05, - "loss": 1.1326, + "learning_rate": 1.986647417293343e-05, + "loss": 1.3104, "step": 2058 }, { - "epoch": 0.058346793618408, + "epoch": 0.08056185930041475, "grad_norm": 0.0, - "learning_rate": 1.995791269879345e-05, - "loss": 1.1656, + "learning_rate": 1.9866267698858722e-05, + "loss": 1.4178, "step": 2059 }, { - "epoch": 0.058375131060670465, + "epoch": 0.08060098599264418, "grad_norm": 0.0, - "learning_rate": 1.9957828541346014e-05, - "loss": 1.0488, + "learning_rate": 1.986606106634412e-05, + "loss": 1.203, "step": 2060 }, { - "epoch": 0.058403468502932925, + "epoch": 0.08064011268487362, "grad_norm": 0.0, - "learning_rate": 1.995774430002009e-05, - "loss": 1.069, + "learning_rate": 1.986585427539294e-05, + "loss": 1.4532, "step": 2061 }, { - "epoch": 0.058431805945195384, + "epoch": 0.08067923937710306, "grad_norm": 0.0, - "learning_rate": 1.9957659974816393e-05, - "loss": 1.0923, + "learning_rate": 1.98656473260085e-05, + "loss": 1.3433, "step": 2062 }, { - "epoch": 0.05846014338745785, + "epoch": 0.0807183660693325, "grad_norm": 0.0, - "learning_rate": 1.995757556573562e-05, - "loss": 1.1646, + "learning_rate": 1.9865440218194126e-05, + "loss": 1.2357, "step": 2063 }, { - "epoch": 0.05848848082972031, + "epoch": 0.08075749276156194, "grad_norm": 0.0, - "learning_rate": 1.99574910727785e-05, - "loss": 1.1666, + "learning_rate": 1.9865232951953145e-05, + "loss": 1.2809, "step": 2064 }, { - "epoch": 0.05851681827198277, + "epoch": 0.08079661945379138, "grad_norm": 0.0, - "learning_rate": 1.9957406495945725e-05, - "loss": 1.1722, + "learning_rate": 1.9865025527288888e-05, + "loss": 1.1917, "step": 2065 }, { - "epoch": 0.058545155714245234, + "epoch": 0.08083574614602082, "grad_norm": 0.0, - "learning_rate": 1.9957321835238024e-05, - "loss": 1.1044, + "learning_rate": 1.9864817944204682e-05, + "loss": 1.1599, "step": 2066 }, { - "epoch": 0.05857349315650769, + "epoch": 0.08087487283825025, "grad_norm": 0.0, - "learning_rate": 1.9957237090656104e-05, - "loss": 1.1116, + "learning_rate": 1.9864610202703858e-05, + "loss": 1.2741, "step": 2067 }, { - "epoch": 0.05860183059877015, + "epoch": 0.08091399953047969, "grad_norm": 0.0, - "learning_rate": 1.9957152262200673e-05, - "loss": 1.0358, + "learning_rate": 1.9864402302789757e-05, + "loss": 1.1345, "step": 2068 }, { - "epoch": 0.05863016804103262, + "epoch": 0.08095312622270913, "grad_norm": 0.0, - "learning_rate": 1.9957067349872457e-05, - "loss": 1.083, + "learning_rate": 1.9864194244465715e-05, + "loss": 1.2741, "step": 2069 }, { - "epoch": 0.05865850548329508, + "epoch": 0.08099225291493857, "grad_norm": 0.0, - "learning_rate": 1.9956982353672163e-05, - "loss": 1.0949, + "learning_rate": 1.9863986027735077e-05, + "loss": 1.3484, "step": 2070 }, { - "epoch": 0.058686842925557536, + "epoch": 0.08103137960716801, "grad_norm": 0.0, - "learning_rate": 1.995689727360051e-05, - "loss": 1.0919, + "learning_rate": 1.9863777652601178e-05, + "loss": 1.2541, "step": 2071 }, { - "epoch": 0.05871518036782, + "epoch": 0.08107050629939745, "grad_norm": 0.0, - "learning_rate": 1.995681210965821e-05, - "loss": 1.1145, + "learning_rate": 1.9863569119067373e-05, + "loss": 1.2376, "step": 2072 }, { - "epoch": 0.05874351781008246, + "epoch": 0.08110963299162689, "grad_norm": 0.0, - "learning_rate": 1.995672686184599e-05, - "loss": 1.1235, + "learning_rate": 1.986336042713701e-05, + "loss": 1.2126, "step": 2073 }, { - "epoch": 0.05877185525234492, + "epoch": 0.08114875968385633, "grad_norm": 0.0, - "learning_rate": 1.995664153016456e-05, - "loss": 1.0961, + "learning_rate": 1.9863151576813434e-05, + "loss": 1.3079, "step": 2074 }, { - "epoch": 0.05880019269460739, + "epoch": 0.08118788637608576, "grad_norm": 0.0, - "learning_rate": 1.9956556114614638e-05, - "loss": 1.2047, + "learning_rate": 1.9862942568100004e-05, + "loss": 1.1422, "step": 2075 }, { - "epoch": 0.058828530136869846, + "epoch": 0.0812270130683152, "grad_norm": 0.0, - "learning_rate": 1.995647061519695e-05, - "loss": 1.2156, + "learning_rate": 1.9862733401000077e-05, + "loss": 1.2272, "step": 2076 }, { - "epoch": 0.058856867579132305, + "epoch": 0.08126613976054464, "grad_norm": 0.0, - "learning_rate": 1.995638503191221e-05, - "loss": 1.0861, + "learning_rate": 1.986252407551701e-05, + "loss": 1.2081, "step": 2077 }, { - "epoch": 0.05888520502139477, + "epoch": 0.08130526645277408, "grad_norm": 0.0, - "learning_rate": 1.9956299364761143e-05, - "loss": 1.1419, + "learning_rate": 1.9862314591654163e-05, + "loss": 1.1804, "step": 2078 }, { - "epoch": 0.05891354246365723, + "epoch": 0.08134439314500352, "grad_norm": 0.0, - "learning_rate": 1.995621361374447e-05, - "loss": 1.0717, + "learning_rate": 1.9862104949414902e-05, + "loss": 1.1267, "step": 2079 }, { - "epoch": 0.05894187990591969, + "epoch": 0.08138351983723296, "grad_norm": 0.0, - "learning_rate": 1.9956127778862917e-05, - "loss": 1.0584, + "learning_rate": 1.9861895148802594e-05, + "loss": 1.2889, "step": 2080 }, { - "epoch": 0.058970217348182155, + "epoch": 0.0814226465294624, "grad_norm": 0.0, - "learning_rate": 1.9956041860117196e-05, - "loss": 1.1074, + "learning_rate": 1.9861685189820608e-05, + "loss": 1.3091, "step": 2081 }, { - "epoch": 0.058998554790444614, + "epoch": 0.08146177322169183, "grad_norm": 0.0, - "learning_rate": 1.9955955857508038e-05, - "loss": 1.0676, + "learning_rate": 1.9861475072472313e-05, + "loss": 1.3091, "step": 2082 }, { - "epoch": 0.05902689223270707, + "epoch": 0.08150089991392127, "grad_norm": 0.0, - "learning_rate": 1.9955869771036167e-05, - "loss": 1.1502, + "learning_rate": 1.986126479676109e-05, + "loss": 1.2316, "step": 2083 }, { - "epoch": 0.05905522967496954, + "epoch": 0.08154002660615071, "grad_norm": 0.0, - "learning_rate": 1.9955783600702308e-05, - "loss": 1.1626, + "learning_rate": 1.986105436269031e-05, + "loss": 1.1272, "step": 2084 }, { - "epoch": 0.059083567117232, + "epoch": 0.08157915329838016, "grad_norm": 0.0, - "learning_rate": 1.9955697346507187e-05, - "loss": 1.1803, + "learning_rate": 1.986084377026335e-05, + "loss": 1.1979, "step": 2085 }, { - "epoch": 0.05911190455949446, + "epoch": 0.0816182799906096, "grad_norm": 0.0, - "learning_rate": 1.995561100845153e-05, - "loss": 1.0986, + "learning_rate": 1.98606330194836e-05, + "loss": 1.2325, "step": 2086 }, { - "epoch": 0.059140242001756924, + "epoch": 0.08165740668283904, "grad_norm": 0.0, - "learning_rate": 1.9955524586536067e-05, - "loss": 1.1199, + "learning_rate": 1.9860422110354435e-05, + "loss": 1.3016, "step": 2087 }, { - "epoch": 0.05916857944401938, + "epoch": 0.08169653337506848, "grad_norm": 0.0, - "learning_rate": 1.9955438080761525e-05, - "loss": 1.0354, + "learning_rate": 1.986021104287925e-05, + "loss": 1.2198, "step": 2088 }, { - "epoch": 0.05919691688628184, + "epoch": 0.0817356600672979, "grad_norm": 0.0, - "learning_rate": 1.9955351491128624e-05, - "loss": 0.9964, + "learning_rate": 1.985999981706143e-05, + "loss": 1.1501, "step": 2089 }, { - "epoch": 0.05922525432854431, + "epoch": 0.08177478675952735, "grad_norm": 0.0, - "learning_rate": 1.9955264817638105e-05, - "loss": 1.1279, + "learning_rate": 1.985978843290437e-05, + "loss": 1.1739, "step": 2090 }, { - "epoch": 0.05925359177080677, + "epoch": 0.08181391345175679, "grad_norm": 0.0, - "learning_rate": 1.9955178060290695e-05, - "loss": 1.0486, + "learning_rate": 1.985957689041146e-05, + "loss": 1.2869, "step": 2091 }, { - "epoch": 0.059281929213069226, + "epoch": 0.08185304014398623, "grad_norm": 0.0, - "learning_rate": 1.9955091219087125e-05, - "loss": 1.3294, + "learning_rate": 1.9859365189586102e-05, + "loss": 1.3025, "step": 2092 }, { - "epoch": 0.05931026665533169, + "epoch": 0.08189216683621567, "grad_norm": 0.0, - "learning_rate": 1.995500429402812e-05, - "loss": 1.0923, + "learning_rate": 1.9859153330431692e-05, + "loss": 1.3046, "step": 2093 }, { - "epoch": 0.05933860409759415, + "epoch": 0.08193129352844511, "grad_norm": 0.0, - "learning_rate": 1.9954917285114418e-05, - "loss": 1.2206, + "learning_rate": 1.9858941312951633e-05, + "loss": 1.2119, "step": 2094 }, { - "epoch": 0.05936694153985661, + "epoch": 0.08197042022067455, "grad_norm": 0.0, - "learning_rate": 1.9954830192346752e-05, - "loss": 1.1447, + "learning_rate": 1.985872913714933e-05, + "loss": 1.3358, "step": 2095 }, { - "epoch": 0.059395278982119076, + "epoch": 0.08200954691290398, "grad_norm": 0.0, - "learning_rate": 1.9954743015725856e-05, - "loss": 1.0134, + "learning_rate": 1.9858516803028193e-05, + "loss": 1.2052, "step": 2096 }, { - "epoch": 0.059423616424381535, + "epoch": 0.08204867360513342, "grad_norm": 0.0, - "learning_rate": 1.9954655755252463e-05, - "loss": 1.1626, + "learning_rate": 1.985830431059163e-05, + "loss": 1.1996, "step": 2097 }, { - "epoch": 0.059451953866643995, + "epoch": 0.08208780029736286, "grad_norm": 0.0, - "learning_rate": 1.99545684109273e-05, - "loss": 1.1118, + "learning_rate": 1.9858091659843054e-05, + "loss": 1.2026, "step": 2098 }, { - "epoch": 0.05948029130890646, + "epoch": 0.0821269269895923, "grad_norm": 0.0, - "learning_rate": 1.995448098275112e-05, - "loss": 0.9279, + "learning_rate": 1.9857878850785877e-05, + "loss": 1.284, "step": 2099 }, { - "epoch": 0.05950862875116892, + "epoch": 0.08216605368182174, "grad_norm": 0.0, - "learning_rate": 1.995439347072465e-05, - "loss": 1.0775, + "learning_rate": 1.9857665883423518e-05, + "loss": 1.2417, "step": 2100 }, { - "epoch": 0.05953696619343138, + "epoch": 0.08220518037405118, "grad_norm": 0.0, - "learning_rate": 1.995430587484862e-05, - "loss": 1.1198, + "learning_rate": 1.9857452757759398e-05, + "loss": 1.1869, "step": 2101 }, { - "epoch": 0.059565303635693845, + "epoch": 0.08224430706628062, "grad_norm": 0.0, - "learning_rate": 1.9954218195123782e-05, - "loss": 1.11, + "learning_rate": 1.9857239473796936e-05, + "loss": 1.2828, "step": 2102 }, { - "epoch": 0.059593641077956304, + "epoch": 0.08228343375851005, "grad_norm": 0.0, - "learning_rate": 1.9954130431550867e-05, - "loss": 1.2081, + "learning_rate": 1.985702603153956e-05, + "loss": 1.342, "step": 2103 }, { - "epoch": 0.05962197852021876, + "epoch": 0.08232256045073949, "grad_norm": 0.0, - "learning_rate": 1.9954042584130614e-05, - "loss": 1.002, + "learning_rate": 1.9856812430990705e-05, + "loss": 1.1622, "step": 2104 }, { - "epoch": 0.05965031596248123, + "epoch": 0.08236168714296893, "grad_norm": 0.0, - "learning_rate": 1.9953954652863763e-05, - "loss": 1.0647, + "learning_rate": 1.9856598672153783e-05, + "loss": 1.245, "step": 2105 }, { - "epoch": 0.05967865340474369, + "epoch": 0.08240081383519837, "grad_norm": 0.0, - "learning_rate": 1.9953866637751054e-05, - "loss": 1.0983, + "learning_rate": 1.9856384755032245e-05, + "loss": 1.2698, "step": 2106 }, { - "epoch": 0.05970699084700615, + "epoch": 0.08243994052742781, "grad_norm": 0.0, - "learning_rate": 1.9953778538793235e-05, - "loss": 1.1225, + "learning_rate": 1.9856170679629516e-05, + "loss": 1.15, "step": 2107 }, { - "epoch": 0.05973532828926861, + "epoch": 0.08247906721965725, "grad_norm": 0.0, - "learning_rate": 1.995369035599104e-05, - "loss": 1.0398, + "learning_rate": 1.9855956445949037e-05, + "loss": 1.1405, "step": 2108 }, { - "epoch": 0.05976366573153107, + "epoch": 0.0825181939118867, "grad_norm": 0.0, - "learning_rate": 1.9953602089345215e-05, - "loss": 1.2196, + "learning_rate": 1.9855742053994246e-05, + "loss": 1.3607, "step": 2109 }, { - "epoch": 0.05979200317379353, + "epoch": 0.08255732060411612, "grad_norm": 0.0, - "learning_rate": 1.9953513738856506e-05, - "loss": 1.1114, + "learning_rate": 1.9855527503768587e-05, + "loss": 1.4602, "step": 2110 }, { - "epoch": 0.059820340616056, + "epoch": 0.08259644729634556, "grad_norm": 0.0, - "learning_rate": 1.995342530452565e-05, - "loss": 1.1993, + "learning_rate": 1.985531279527551e-05, + "loss": 1.2855, "step": 2111 }, { - "epoch": 0.05984867805831846, + "epoch": 0.082635573988575, "grad_norm": 0.0, - "learning_rate": 1.99533367863534e-05, - "loss": 1.2133, + "learning_rate": 1.9855097928518456e-05, + "loss": 1.3413, "step": 2112 }, { - "epoch": 0.059877015500580916, + "epoch": 0.08267470068080444, "grad_norm": 0.0, - "learning_rate": 1.9953248184340497e-05, - "loss": 1.1438, + "learning_rate": 1.985488290350088e-05, + "loss": 1.3031, "step": 2113 }, { - "epoch": 0.05990535294284338, + "epoch": 0.08271382737303389, "grad_norm": 0.0, - "learning_rate": 1.995315949848769e-05, - "loss": 1.1371, + "learning_rate": 1.985466772022623e-05, + "loss": 1.228, "step": 2114 }, { - "epoch": 0.05993369038510584, + "epoch": 0.08275295406526333, "grad_norm": 0.0, - "learning_rate": 1.995307072879572e-05, - "loss": 1.1832, + "learning_rate": 1.9854452378697968e-05, + "loss": 1.2708, "step": 2115 }, { - "epoch": 0.0599620278273683, + "epoch": 0.08279208075749277, "grad_norm": 0.0, - "learning_rate": 1.9952981875265346e-05, - "loss": 1.1483, + "learning_rate": 1.985423687891955e-05, + "loss": 1.3062, "step": 2116 }, { - "epoch": 0.059990365269630766, + "epoch": 0.0828312074497222, "grad_norm": 0.0, - "learning_rate": 1.9952892937897304e-05, - "loss": 1.1371, + "learning_rate": 1.9854021220894433e-05, + "loss": 1.1624, "step": 2117 }, { - "epoch": 0.060018702711893225, + "epoch": 0.08287033414195163, "grad_norm": 0.0, - "learning_rate": 1.9952803916692352e-05, - "loss": 1.1276, + "learning_rate": 1.9853805404626084e-05, + "loss": 1.3208, "step": 2118 }, { - "epoch": 0.060047040154155684, + "epoch": 0.08290946083418108, "grad_norm": 0.0, - "learning_rate": 1.9952714811651234e-05, - "loss": 1.1456, + "learning_rate": 1.985358943011797e-05, + "loss": 1.2802, "step": 2119 }, { - "epoch": 0.06007537759641815, + "epoch": 0.08294858752641052, "grad_norm": 0.0, - "learning_rate": 1.9952625622774708e-05, - "loss": 1.2197, + "learning_rate": 1.9853373297373554e-05, + "loss": 1.2278, "step": 2120 }, { - "epoch": 0.06010371503868061, + "epoch": 0.08298771421863996, "grad_norm": 0.0, - "learning_rate": 1.9952536350063516e-05, - "loss": 1.0703, + "learning_rate": 1.9853157006396312e-05, + "loss": 1.2787, "step": 2121 }, { - "epoch": 0.06013205248094307, + "epoch": 0.0830268409108694, "grad_norm": 0.0, - "learning_rate": 1.9952446993518417e-05, - "loss": 1.1731, + "learning_rate": 1.985294055718971e-05, + "loss": 1.3176, "step": 2122 }, { - "epoch": 0.060160389923205534, + "epoch": 0.08306596760309884, "grad_norm": 0.0, - "learning_rate": 1.995235755314016e-05, - "loss": 1.1009, + "learning_rate": 1.9852723949757234e-05, + "loss": 1.2279, "step": 2123 }, { - "epoch": 0.060188727365467994, + "epoch": 0.08310509429532827, "grad_norm": 0.0, - "learning_rate": 1.9952268028929497e-05, - "loss": 1.1136, + "learning_rate": 1.9852507184102356e-05, + "loss": 1.2298, "step": 2124 }, { - "epoch": 0.06021706480773045, + "epoch": 0.08314422098755771, "grad_norm": 0.0, - "learning_rate": 1.995217842088719e-05, - "loss": 1.0654, + "learning_rate": 1.9852290260228558e-05, + "loss": 1.2598, "step": 2125 }, { - "epoch": 0.06024540224999292, + "epoch": 0.08318334767978715, "grad_norm": 0.0, - "learning_rate": 1.9952088729013985e-05, - "loss": 1.1465, + "learning_rate": 1.985207317813933e-05, + "loss": 1.2824, "step": 2126 }, { - "epoch": 0.06027373969225538, + "epoch": 0.08322247437201659, "grad_norm": 0.0, - "learning_rate": 1.995199895331064e-05, - "loss": 1.0484, + "learning_rate": 1.9851855937838144e-05, + "loss": 1.3816, "step": 2127 }, { - "epoch": 0.06030207713451784, + "epoch": 0.08326160106424603, "grad_norm": 0.0, - "learning_rate": 1.9951909093777917e-05, - "loss": 1.0954, + "learning_rate": 1.98516385393285e-05, + "loss": 1.2876, "step": 2128 }, { - "epoch": 0.0603304145767803, + "epoch": 0.08330072775647547, "grad_norm": 0.0, - "learning_rate": 1.9951819150416564e-05, - "loss": 1.1063, + "learning_rate": 1.9851420982613888e-05, + "loss": 1.2319, "step": 2129 }, { - "epoch": 0.06035875201904276, + "epoch": 0.08333985444870491, "grad_norm": 0.0, - "learning_rate": 1.9951729123227346e-05, - "loss": 1.1536, + "learning_rate": 1.9851203267697796e-05, + "loss": 1.3121, "step": 2130 }, { - "epoch": 0.06038708946130522, + "epoch": 0.08337898114093435, "grad_norm": 0.0, - "learning_rate": 1.9951639012211017e-05, - "loss": 1.1517, + "learning_rate": 1.9850985394583725e-05, + "loss": 1.2464, "step": 2131 }, { - "epoch": 0.06041542690356769, + "epoch": 0.08341810783316378, "grad_norm": 0.0, - "learning_rate": 1.9951548817368337e-05, - "loss": 1.2477, + "learning_rate": 1.985076736327517e-05, + "loss": 1.1261, "step": 2132 }, { - "epoch": 0.060443764345830146, + "epoch": 0.08345723452539322, "grad_norm": 0.0, - "learning_rate": 1.9951458538700065e-05, - "loss": 1.1092, + "learning_rate": 1.985054917377564e-05, + "loss": 1.1036, "step": 2133 }, { - "epoch": 0.060472101788092605, + "epoch": 0.08349636121762266, "grad_norm": 0.0, - "learning_rate": 1.9951368176206962e-05, - "loss": 1.121, + "learning_rate": 1.9850330826088632e-05, + "loss": 1.2057, "step": 2134 }, { - "epoch": 0.06050043923035507, + "epoch": 0.0835354879098521, "grad_norm": 0.0, - "learning_rate": 1.9951277729889792e-05, - "loss": 1.0853, + "learning_rate": 1.9850112320217652e-05, + "loss": 1.209, "step": 2135 }, { - "epoch": 0.06052877667261753, + "epoch": 0.08357461460208154, "grad_norm": 0.0, - "learning_rate": 1.9951187199749313e-05, - "loss": 1.1932, + "learning_rate": 1.984989365616621e-05, + "loss": 1.227, "step": 2136 }, { - "epoch": 0.06055711411487999, + "epoch": 0.08361374129431098, "grad_norm": 0.0, - "learning_rate": 1.9951096585786287e-05, - "loss": 1.0425, + "learning_rate": 1.984967483393782e-05, + "loss": 1.1523, "step": 2137 }, { - "epoch": 0.060585451557142456, + "epoch": 0.08365286798654042, "grad_norm": 0.0, - "learning_rate": 1.995100588800148e-05, - "loss": 1.1474, + "learning_rate": 1.9849455853535996e-05, + "loss": 1.3145, "step": 2138 }, { - "epoch": 0.060613788999404915, + "epoch": 0.08369199467876985, "grad_norm": 0.0, - "learning_rate": 1.995091510639566e-05, - "loss": 1.2123, + "learning_rate": 1.9849236714964256e-05, + "loss": 1.2721, "step": 2139 }, { - "epoch": 0.060642126441667374, + "epoch": 0.08373112137099929, "grad_norm": 0.0, - "learning_rate": 1.9950824240969582e-05, - "loss": 1.0498, + "learning_rate": 1.984901741822611e-05, + "loss": 1.1634, "step": 2140 }, { - "epoch": 0.06067046388392984, + "epoch": 0.08377024806322873, "grad_norm": 0.0, - "learning_rate": 1.9950733291724018e-05, - "loss": 1.1389, + "learning_rate": 1.984879796332509e-05, + "loss": 1.2434, "step": 2141 }, { - "epoch": 0.0606988013261923, + "epoch": 0.08380937475545817, "grad_norm": 0.0, - "learning_rate": 1.995064225865973e-05, - "loss": 1.0969, + "learning_rate": 1.9848578350264713e-05, + "loss": 1.188, "step": 2142 }, { - "epoch": 0.06072713876845476, + "epoch": 0.08384850144768762, "grad_norm": 0.0, - "learning_rate": 1.9950551141777487e-05, - "loss": 1.0574, + "learning_rate": 1.984835857904851e-05, + "loss": 1.2159, "step": 2143 }, { - "epoch": 0.06075547621071722, + "epoch": 0.08388762813991706, "grad_norm": 0.0, - "learning_rate": 1.995045994107806e-05, - "loss": 1.1234, + "learning_rate": 1.9848138649680008e-05, + "loss": 1.2916, "step": 2144 }, { - "epoch": 0.06078381365297968, + "epoch": 0.0839267548321465, "grad_norm": 0.0, - "learning_rate": 1.995036865656221e-05, - "loss": 1.0447, + "learning_rate": 1.984791856216274e-05, + "loss": 1.2823, "step": 2145 }, { - "epoch": 0.06081215109524214, + "epoch": 0.08396588152437592, "grad_norm": 0.0, - "learning_rate": 1.9950277288230714e-05, - "loss": 1.0986, + "learning_rate": 1.984769831650024e-05, + "loss": 1.1257, "step": 2146 }, { - "epoch": 0.0608404885375046, + "epoch": 0.08400500821660536, "grad_norm": 0.0, - "learning_rate": 1.9950185836084338e-05, - "loss": 1.251, + "learning_rate": 1.9847477912696046e-05, + "loss": 1.3354, "step": 2147 }, { - "epoch": 0.06086882597976707, + "epoch": 0.0840441349088348, "grad_norm": 0.0, - "learning_rate": 1.9950094300123845e-05, - "loss": 1.1264, + "learning_rate": 1.98472573507537e-05, + "loss": 1.2372, "step": 2148 }, { - "epoch": 0.06089716342202953, + "epoch": 0.08408326160106425, "grad_norm": 0.0, - "learning_rate": 1.995000268035002e-05, - "loss": 1.2418, + "learning_rate": 1.984703663067673e-05, + "loss": 1.2838, "step": 2149 }, { - "epoch": 0.060925500864291986, + "epoch": 0.08412238829329369, "grad_norm": 0.0, - "learning_rate": 1.9949910976763623e-05, - "loss": 1.1882, + "learning_rate": 1.98468157524687e-05, + "loss": 1.2218, "step": 2150 }, { - "epoch": 0.06095383830655445, + "epoch": 0.08416151498552313, "grad_norm": 0.0, - "learning_rate": 1.9949819189365432e-05, - "loss": 1.1331, + "learning_rate": 1.9846594716133144e-05, + "loss": 1.1861, "step": 2151 }, { - "epoch": 0.06098217574881691, + "epoch": 0.08420064167775257, "grad_norm": 0.0, - "learning_rate": 1.994972731815622e-05, - "loss": 1.1281, + "learning_rate": 1.9846373521673613e-05, + "loss": 1.3203, "step": 2152 }, { - "epoch": 0.06101051319107937, + "epoch": 0.084239768369982, "grad_norm": 0.0, - "learning_rate": 1.9949635363136762e-05, - "loss": 1.1967, + "learning_rate": 1.9846152169093663e-05, + "loss": 1.3984, "step": 2153 }, { - "epoch": 0.061038850633341836, + "epoch": 0.08427889506221144, "grad_norm": 0.0, - "learning_rate": 1.9949543324307828e-05, - "loss": 1.1319, + "learning_rate": 1.984593065839685e-05, + "loss": 1.1664, "step": 2154 }, { - "epoch": 0.061067188075604295, + "epoch": 0.08431802175444088, "grad_norm": 0.0, - "learning_rate": 1.99494512016702e-05, - "loss": 1.2044, + "learning_rate": 1.9845708989586726e-05, + "loss": 1.1274, "step": 2155 }, { - "epoch": 0.061095525517866754, + "epoch": 0.08435714844667032, "grad_norm": 0.0, - "learning_rate": 1.9949358995224645e-05, - "loss": 1.1752, + "learning_rate": 1.9845487162666853e-05, + "loss": 1.1508, "step": 2156 }, { - "epoch": 0.06112386296012922, + "epoch": 0.08439627513889976, "grad_norm": 0.0, - "learning_rate": 1.9949266704971945e-05, - "loss": 1.1295, + "learning_rate": 1.9845265177640795e-05, + "loss": 1.1303, "step": 2157 }, { - "epoch": 0.06115220040239168, + "epoch": 0.0844354018311292, "grad_norm": 0.0, - "learning_rate": 1.994917433091288e-05, - "loss": 1.1369, + "learning_rate": 1.984504303451211e-05, + "loss": 1.2322, "step": 2158 }, { - "epoch": 0.06118053784465414, + "epoch": 0.08447452852335864, "grad_norm": 0.0, - "learning_rate": 1.9949081873048222e-05, - "loss": 1.1305, + "learning_rate": 1.984482073328438e-05, + "loss": 1.2189, "step": 2159 }, { - "epoch": 0.061208875286916604, + "epoch": 0.08451365521558807, "grad_norm": 0.0, - "learning_rate": 1.9948989331378755e-05, - "loss": 1.2303, + "learning_rate": 1.9844598273961156e-05, + "loss": 1.1429, "step": 2160 }, { - "epoch": 0.061237212729179064, + "epoch": 0.08455278190781751, "grad_norm": 0.0, - "learning_rate": 1.9948896705905255e-05, - "loss": 1.1487, + "learning_rate": 1.9844375656546023e-05, + "loss": 1.132, "step": 2161 }, { - "epoch": 0.06126555017144152, + "epoch": 0.08459190860004695, "grad_norm": 0.0, - "learning_rate": 1.9948803996628503e-05, - "loss": 1.1034, + "learning_rate": 1.9844152881042554e-05, + "loss": 1.3393, "step": 2162 }, { - "epoch": 0.06129388761370399, + "epoch": 0.08463103529227639, "grad_norm": 0.0, - "learning_rate": 1.9948711203549282e-05, - "loss": 1.1289, + "learning_rate": 1.9843929947454325e-05, + "loss": 1.3237, "step": 2163 }, { - "epoch": 0.06132222505596645, + "epoch": 0.08467016198450583, "grad_norm": 0.0, - "learning_rate": 1.9948618326668373e-05, - "loss": 1.1278, + "learning_rate": 1.9843706855784916e-05, + "loss": 1.1533, "step": 2164 }, { - "epoch": 0.06135056249822891, + "epoch": 0.08470928867673527, "grad_norm": 0.0, - "learning_rate": 1.9948525365986554e-05, - "loss": 1.1317, + "learning_rate": 1.984348360603791e-05, + "loss": 1.2504, "step": 2165 }, { - "epoch": 0.06137889994049137, + "epoch": 0.08474841536896471, "grad_norm": 0.0, - "learning_rate": 1.9948432321504617e-05, - "loss": 1.1111, + "learning_rate": 1.9843260198216888e-05, + "loss": 1.2516, "step": 2166 }, { - "epoch": 0.06140723738275383, + "epoch": 0.08478754206119414, "grad_norm": 0.0, - "learning_rate": 1.9948339193223333e-05, - "loss": 1.057, + "learning_rate": 1.984303663232544e-05, + "loss": 1.3307, "step": 2167 }, { - "epoch": 0.06143557482501629, + "epoch": 0.08482666875342358, "grad_norm": 0.0, - "learning_rate": 1.99482459811435e-05, - "loss": 1.171, + "learning_rate": 1.9842812908367167e-05, + "loss": 1.3076, "step": 2168 }, { - "epoch": 0.06146391226727876, + "epoch": 0.08486579544565302, "grad_norm": 0.0, - "learning_rate": 1.9948152685265896e-05, - "loss": 1.1791, + "learning_rate": 1.9842589026345647e-05, + "loss": 1.3237, "step": 2169 }, { - "epoch": 0.061492249709541216, + "epoch": 0.08490492213788246, "grad_norm": 0.0, - "learning_rate": 1.9948059305591304e-05, - "loss": 1.1042, + "learning_rate": 1.984236498626448e-05, + "loss": 1.2139, "step": 2170 }, { - "epoch": 0.061520587151803675, + "epoch": 0.0849440488301119, "grad_norm": 0.0, - "learning_rate": 1.994796584212052e-05, - "loss": 1.0222, + "learning_rate": 1.9842140788127264e-05, + "loss": 1.1368, "step": 2171 }, { - "epoch": 0.06154892459406614, + "epoch": 0.08498317552234134, "grad_norm": 0.0, - "learning_rate": 1.994787229485432e-05, - "loss": 1.207, + "learning_rate": 1.98419164319376e-05, + "loss": 1.267, "step": 2172 }, { - "epoch": 0.0615772620363286, + "epoch": 0.08502230221457079, "grad_norm": 0.0, - "learning_rate": 1.9947778663793502e-05, - "loss": 1.0902, + "learning_rate": 1.9841691917699096e-05, + "loss": 1.2498, "step": 2173 }, { - "epoch": 0.06160559947859106, + "epoch": 0.08506142890680021, "grad_norm": 0.0, - "learning_rate": 1.994768494893885e-05, - "loss": 1.0511, + "learning_rate": 1.9841467245415348e-05, + "loss": 1.1814, "step": 2174 }, { - "epoch": 0.061633936920853526, + "epoch": 0.08510055559902965, "grad_norm": 0.0, - "learning_rate": 1.994759115029115e-05, - "loss": 1.1349, + "learning_rate": 1.9841242415089967e-05, + "loss": 1.1507, "step": 2175 }, { - "epoch": 0.061662274363115985, + "epoch": 0.0851396822912591, "grad_norm": 0.0, - "learning_rate": 1.9947497267851198e-05, - "loss": 1.1862, + "learning_rate": 1.9841017426726568e-05, + "loss": 1.3197, "step": 2176 }, { - "epoch": 0.061690611805378444, + "epoch": 0.08517880898348854, "grad_norm": 0.0, - "learning_rate": 1.9947403301619782e-05, - "loss": 1.0393, + "learning_rate": 1.984079228032876e-05, + "loss": 1.1353, "step": 2177 }, { - "epoch": 0.06171894924764091, + "epoch": 0.08521793567571798, "grad_norm": 0.0, - "learning_rate": 1.9947309251597695e-05, - "loss": 1.2064, + "learning_rate": 1.9840566975900155e-05, + "loss": 1.3154, "step": 2178 }, { - "epoch": 0.06174728668990337, + "epoch": 0.08525706236794742, "grad_norm": 0.0, - "learning_rate": 1.9947215117785727e-05, - "loss": 1.1678, + "learning_rate": 1.984034151344438e-05, + "loss": 1.1562, "step": 2179 }, { - "epoch": 0.06177562413216583, + "epoch": 0.08529618906017686, "grad_norm": 0.0, - "learning_rate": 1.9947120900184674e-05, - "loss": 1.0056, + "learning_rate": 1.9840115892965045e-05, + "loss": 1.3832, "step": 2180 }, { - "epoch": 0.061803961574428294, + "epoch": 0.08533531575240628, "grad_norm": 0.0, - "learning_rate": 1.9947026598795327e-05, - "loss": 1.1119, + "learning_rate": 1.983989011446578e-05, + "loss": 1.3523, "step": 2181 }, { - "epoch": 0.06183229901669075, + "epoch": 0.08537444244463573, "grad_norm": 0.0, - "learning_rate": 1.994693221361848e-05, - "loss": 1.0211, + "learning_rate": 1.983966417795022e-05, + "loss": 1.2655, "step": 2182 }, { - "epoch": 0.06186063645895321, + "epoch": 0.08541356913686517, "grad_norm": 0.0, - "learning_rate": 1.9946837744654933e-05, - "loss": 1.1151, + "learning_rate": 1.9839438083421974e-05, + "loss": 1.1927, "step": 2183 }, { - "epoch": 0.06188897390121568, + "epoch": 0.08545269582909461, "grad_norm": 0.0, - "learning_rate": 1.9946743191905473e-05, - "loss": 1.1855, + "learning_rate": 1.9839211830884682e-05, + "loss": 1.1671, "step": 2184 }, { - "epoch": 0.06191731134347814, + "epoch": 0.08549182252132405, "grad_norm": 0.0, - "learning_rate": 1.9946648555370905e-05, - "loss": 1.1223, + "learning_rate": 1.983898542034198e-05, + "loss": 1.1765, "step": 2185 }, { - "epoch": 0.0619456487857406, + "epoch": 0.08553094921355349, "grad_norm": 0.0, - "learning_rate": 1.9946553835052023e-05, - "loss": 1.1472, + "learning_rate": 1.98387588517975e-05, + "loss": 1.374, "step": 2186 }, { - "epoch": 0.06197398622800306, + "epoch": 0.08557007590578293, "grad_norm": 0.0, - "learning_rate": 1.9946459030949622e-05, - "loss": 1.1352, + "learning_rate": 1.9838532125254883e-05, + "loss": 1.1452, "step": 2187 }, { - "epoch": 0.06200232367026552, + "epoch": 0.08560920259801236, "grad_norm": 0.0, - "learning_rate": 1.9946364143064506e-05, - "loss": 1.0556, + "learning_rate": 1.9838305240717765e-05, + "loss": 1.1881, "step": 2188 }, { - "epoch": 0.06203066111252798, + "epoch": 0.0856483292902418, "grad_norm": 0.0, - "learning_rate": 1.9946269171397467e-05, - "loss": 1.2574, + "learning_rate": 1.9838078198189798e-05, + "loss": 1.2064, "step": 2189 }, { - "epoch": 0.06205899855479045, + "epoch": 0.08568745598247124, "grad_norm": 0.0, - "learning_rate": 1.9946174115949315e-05, - "loss": 1.1415, + "learning_rate": 1.9837850997674618e-05, + "loss": 1.1797, "step": 2190 }, { - "epoch": 0.062087335997052906, + "epoch": 0.08572658267470068, "grad_norm": 0.0, - "learning_rate": 1.9946078976720842e-05, - "loss": 1.1939, + "learning_rate": 1.983762363917588e-05, + "loss": 1.2832, "step": 2191 }, { - "epoch": 0.062115673439315365, + "epoch": 0.08576570936693012, "grad_norm": 0.0, - "learning_rate": 1.9945983753712853e-05, - "loss": 1.0247, + "learning_rate": 1.9837396122697235e-05, + "loss": 1.1912, "step": 2192 }, { - "epoch": 0.06214401088157783, + "epoch": 0.08580483605915956, "grad_norm": 0.0, - "learning_rate": 1.9945888446926146e-05, - "loss": 1.1019, + "learning_rate": 1.983716844824233e-05, + "loss": 1.162, "step": 2193 }, { - "epoch": 0.06217234832384029, + "epoch": 0.085843962751389, "grad_norm": 0.0, - "learning_rate": 1.9945793056361527e-05, - "loss": 1.1456, + "learning_rate": 1.9836940615814836e-05, + "loss": 1.2322, "step": 2194 }, { - "epoch": 0.06220068576610275, + "epoch": 0.08588308944361844, "grad_norm": 0.0, - "learning_rate": 1.9945697582019807e-05, - "loss": 1.1074, + "learning_rate": 1.9836712625418393e-05, + "loss": 1.0967, "step": 2195 }, { - "epoch": 0.062229023208365215, + "epoch": 0.08592221613584787, "grad_norm": 0.0, - "learning_rate": 1.994560202390178e-05, - "loss": 1.1806, + "learning_rate": 1.9836484477056676e-05, + "loss": 1.2963, "step": 2196 }, { - "epoch": 0.062257360650627674, + "epoch": 0.08596134282807731, "grad_norm": 0.0, - "learning_rate": 1.994550638200825e-05, - "loss": 1.0583, + "learning_rate": 1.9836256170733343e-05, + "loss": 1.2928, "step": 2197 }, { - "epoch": 0.062285698092890134, + "epoch": 0.08600046952030675, "grad_norm": 0.0, - "learning_rate": 1.994541065634003e-05, - "loss": 1.1735, + "learning_rate": 1.9836027706452063e-05, + "loss": 1.1191, "step": 2198 }, { - "epoch": 0.0623140355351526, + "epoch": 0.08603959621253619, "grad_norm": 0.0, - "learning_rate": 1.9945314846897922e-05, - "loss": 1.1033, + "learning_rate": 1.98357990842165e-05, + "loss": 1.1942, "step": 2199 }, { - "epoch": 0.06234237297741506, + "epoch": 0.08607872290476563, "grad_norm": 0.0, - "learning_rate": 1.9945218953682736e-05, - "loss": 1.1378, + "learning_rate": 1.983557030403033e-05, + "loss": 1.3035, "step": 2200 }, { - "epoch": 0.06237071041967752, + "epoch": 0.08611784959699507, "grad_norm": 0.0, - "learning_rate": 1.9945122976695274e-05, - "loss": 1.1703, + "learning_rate": 1.983534136589723e-05, + "loss": 1.1857, "step": 2201 }, { - "epoch": 0.062399047861939984, + "epoch": 0.08615697628922452, "grad_norm": 0.0, - "learning_rate": 1.994502691593635e-05, - "loss": 1.0831, + "learning_rate": 1.9835112269820867e-05, + "loss": 1.1791, "step": 2202 }, { - "epoch": 0.06242738530420244, + "epoch": 0.08619610298145394, "grad_norm": 0.0, - "learning_rate": 1.994493077140677e-05, - "loss": 1.1604, + "learning_rate": 1.9834883015804926e-05, + "loss": 1.1729, "step": 2203 }, { - "epoch": 0.0624557227464649, + "epoch": 0.08623522967368338, "grad_norm": 0.0, - "learning_rate": 1.9944834543107347e-05, - "loss": 1.1613, + "learning_rate": 1.9834653603853088e-05, + "loss": 1.1847, "step": 2204 }, { - "epoch": 0.06248406018872737, + "epoch": 0.08627435636591282, "grad_norm": 0.0, - "learning_rate": 1.994473823103889e-05, - "loss": 1.1571, + "learning_rate": 1.9834424033969033e-05, + "loss": 1.1985, "step": 2205 }, { - "epoch": 0.06251239763098983, + "epoch": 0.08631348305814227, "grad_norm": 0.0, - "learning_rate": 1.9944641835202212e-05, - "loss": 1.1514, + "learning_rate": 1.9834194306156455e-05, + "loss": 1.1304, "step": 2206 }, { - "epoch": 0.0625407350732523, + "epoch": 0.0863526097503717, "grad_norm": 0.0, - "learning_rate": 1.994454535559812e-05, - "loss": 1.1915, + "learning_rate": 1.9833964420419042e-05, + "loss": 1.3164, "step": 2207 }, { - "epoch": 0.06256907251551475, + "epoch": 0.08639173644260115, "grad_norm": 0.0, - "learning_rate": 1.994444879222743e-05, - "loss": 1.0662, + "learning_rate": 1.9833734376760478e-05, + "loss": 1.269, "step": 2208 }, { - "epoch": 0.06259740995777721, + "epoch": 0.08643086313483059, "grad_norm": 0.0, - "learning_rate": 1.9944352145090954e-05, - "loss": 1.0993, + "learning_rate": 1.9833504175184462e-05, + "loss": 1.2407, "step": 2209 }, { - "epoch": 0.06262574740003968, + "epoch": 0.08646998982706001, "grad_norm": 0.0, - "learning_rate": 1.9944255414189508e-05, - "loss": 0.9759, + "learning_rate": 1.9833273815694695e-05, + "loss": 1.2067, "step": 2210 }, { - "epoch": 0.06265408484230213, + "epoch": 0.08650911651928946, "grad_norm": 0.0, - "learning_rate": 1.9944158599523902e-05, - "loss": 1.1164, + "learning_rate": 1.983304329829487e-05, + "loss": 1.2029, "step": 2211 }, { - "epoch": 0.0626824222845646, + "epoch": 0.0865482432115189, "grad_norm": 0.0, - "learning_rate": 1.9944061701094962e-05, - "loss": 0.8985, + "learning_rate": 1.9832812622988694e-05, + "loss": 1.3021, "step": 2212 }, { - "epoch": 0.06271075972682706, + "epoch": 0.08658736990374834, "grad_norm": 0.0, - "learning_rate": 1.9943964718903495e-05, - "loss": 1.1307, + "learning_rate": 1.9832581789779864e-05, + "loss": 1.3237, "step": 2213 }, { - "epoch": 0.06273909716908951, + "epoch": 0.08662649659597778, "grad_norm": 0.0, - "learning_rate": 1.9943867652950323e-05, - "loss": 1.0702, + "learning_rate": 1.9832350798672096e-05, + "loss": 1.267, "step": 2214 }, { - "epoch": 0.06276743461135198, + "epoch": 0.08666562328820722, "grad_norm": 0.0, - "learning_rate": 1.994377050323626e-05, - "loss": 1.0714, + "learning_rate": 1.983211964966909e-05, + "loss": 1.3251, "step": 2215 }, { - "epoch": 0.06279577205361445, + "epoch": 0.08670474998043666, "grad_norm": 0.0, - "learning_rate": 1.994367326976212e-05, - "loss": 1.0877, + "learning_rate": 1.9831888342774565e-05, + "loss": 1.1714, "step": 2216 }, { - "epoch": 0.0628241094958769, + "epoch": 0.08674387667266609, "grad_norm": 0.0, - "learning_rate": 1.9943575952528734e-05, - "loss": 1.0834, + "learning_rate": 1.9831656877992233e-05, + "loss": 1.3182, "step": 2217 }, { - "epoch": 0.06285244693813936, + "epoch": 0.08678300336489553, "grad_norm": 0.0, - "learning_rate": 1.9943478551536914e-05, - "loss": 1.0209, + "learning_rate": 1.983142525532581e-05, + "loss": 1.2028, "step": 2218 }, { - "epoch": 0.06288078438040183, + "epoch": 0.08682213005712497, "grad_norm": 0.0, - "learning_rate": 1.994338106678748e-05, - "loss": 1.1868, + "learning_rate": 1.983119347477902e-05, + "loss": 1.1022, "step": 2219 }, { - "epoch": 0.06290912182266428, + "epoch": 0.08686125674935441, "grad_norm": 0.0, - "learning_rate": 1.9943283498281256e-05, - "loss": 1.113, + "learning_rate": 1.983096153635558e-05, + "loss": 1.0848, "step": 2220 }, { - "epoch": 0.06293745926492675, + "epoch": 0.08690038344158385, "grad_norm": 0.0, - "learning_rate": 1.9943185846019064e-05, - "loss": 0.9675, + "learning_rate": 1.983072944005922e-05, + "loss": 1.1321, "step": 2221 }, { - "epoch": 0.06296579670718921, + "epoch": 0.08693951013381329, "grad_norm": 0.0, - "learning_rate": 1.9943088110001722e-05, - "loss": 1.1119, + "learning_rate": 1.9830497185893657e-05, + "loss": 1.2548, "step": 2222 }, { - "epoch": 0.06299413414945167, + "epoch": 0.08697863682604273, "grad_norm": 0.0, - "learning_rate": 1.9942990290230057e-05, - "loss": 1.0558, + "learning_rate": 1.9830264773862633e-05, + "loss": 1.2845, "step": 2223 }, { - "epoch": 0.06302247159171413, + "epoch": 0.08701776351827216, "grad_norm": 0.0, - "learning_rate": 1.9942892386704896e-05, - "loss": 1.1546, + "learning_rate": 1.9830032203969873e-05, + "loss": 1.2539, "step": 2224 }, { - "epoch": 0.0630508090339766, + "epoch": 0.0870568902105016, "grad_norm": 0.0, - "learning_rate": 1.994279439942706e-05, - "loss": 1.1823, + "learning_rate": 1.9829799476219113e-05, + "loss": 1.3281, "step": 2225 }, { - "epoch": 0.06307914647623905, + "epoch": 0.08709601690273104, "grad_norm": 0.0, - "learning_rate": 1.994269632839737e-05, - "loss": 1.154, + "learning_rate": 1.9829566590614093e-05, + "loss": 1.3756, "step": 2226 }, { - "epoch": 0.06310748391850152, + "epoch": 0.08713514359496048, "grad_norm": 0.0, - "learning_rate": 1.994259817361666e-05, - "loss": 1.2231, + "learning_rate": 1.9829333547158547e-05, + "loss": 1.2143, "step": 2227 }, { - "epoch": 0.06313582136076398, + "epoch": 0.08717427028718992, "grad_norm": 0.0, - "learning_rate": 1.9942499935085754e-05, - "loss": 1.2347, + "learning_rate": 1.9829100345856224e-05, + "loss": 1.2003, "step": 2228 }, { - "epoch": 0.06316415880302644, + "epoch": 0.08721339697941936, "grad_norm": 0.0, - "learning_rate": 1.9942401612805478e-05, - "loss": 1.1562, + "learning_rate": 1.9828866986710865e-05, + "loss": 1.2588, "step": 2229 }, { - "epoch": 0.0631924962452889, + "epoch": 0.0872525236716488, "grad_norm": 0.0, - "learning_rate": 1.9942303206776662e-05, - "loss": 1.0302, + "learning_rate": 1.982863346972622e-05, + "loss": 1.2687, "step": 2230 }, { - "epoch": 0.06322083368755137, + "epoch": 0.08729165036387823, "grad_norm": 0.0, - "learning_rate": 1.9942204717000133e-05, - "loss": 1.1305, + "learning_rate": 1.9828399794906037e-05, + "loss": 1.3709, "step": 2231 }, { - "epoch": 0.06324917112981382, + "epoch": 0.08733077705610767, "grad_norm": 0.0, - "learning_rate": 1.9942106143476722e-05, - "loss": 1.1193, + "learning_rate": 1.9828165962254065e-05, + "loss": 1.3713, "step": 2232 }, { - "epoch": 0.06327750857207629, + "epoch": 0.08736990374833711, "grad_norm": 0.0, - "learning_rate": 1.9942007486207258e-05, - "loss": 1.0275, + "learning_rate": 1.9827931971774068e-05, + "loss": 1.2647, "step": 2233 }, { - "epoch": 0.06330584601433875, + "epoch": 0.08740903044056655, "grad_norm": 0.0, - "learning_rate": 1.9941908745192575e-05, - "loss": 1.1331, + "learning_rate": 1.9827697823469797e-05, + "loss": 1.2028, "step": 2234 }, { - "epoch": 0.0633341834566012, + "epoch": 0.087448157132796, "grad_norm": 0.0, - "learning_rate": 1.9941809920433503e-05, - "loss": 1.0607, + "learning_rate": 1.9827463517345015e-05, + "loss": 1.2396, "step": 2235 }, { - "epoch": 0.06336252089886367, + "epoch": 0.08748728382502544, "grad_norm": 0.0, - "learning_rate": 1.994171101193087e-05, - "loss": 1.095, + "learning_rate": 1.982722905340348e-05, + "loss": 1.3017, "step": 2236 }, { - "epoch": 0.06339085834112614, + "epoch": 0.08752641051725488, "grad_norm": 0.0, - "learning_rate": 1.994161201968552e-05, - "loss": 0.9699, + "learning_rate": 1.9826994431648964e-05, + "loss": 1.2489, "step": 2237 }, { - "epoch": 0.06341919578338859, + "epoch": 0.0875655372094843, "grad_norm": 0.0, - "learning_rate": 1.9941512943698277e-05, - "loss": 1.1581, + "learning_rate": 1.982675965208523e-05, + "loss": 1.3699, "step": 2238 }, { - "epoch": 0.06344753322565105, + "epoch": 0.08760466390171374, "grad_norm": 0.0, - "learning_rate": 1.994141378396998e-05, - "loss": 1.1995, + "learning_rate": 1.982652471471605e-05, + "loss": 1.1547, "step": 2239 }, { - "epoch": 0.06347587066791352, + "epoch": 0.08764379059394319, "grad_norm": 0.0, - "learning_rate": 1.994131454050146e-05, - "loss": 1.0669, + "learning_rate": 1.9826289619545194e-05, + "loss": 1.3557, "step": 2240 }, { - "epoch": 0.06350420811017597, + "epoch": 0.08768291728617263, "grad_norm": 0.0, - "learning_rate": 1.9941215213293558e-05, - "loss": 1.2537, + "learning_rate": 1.9826054366576443e-05, + "loss": 1.1804, "step": 2241 }, { - "epoch": 0.06353254555243844, + "epoch": 0.08772204397840207, "grad_norm": 0.0, - "learning_rate": 1.9941115802347106e-05, - "loss": 1.0339, + "learning_rate": 1.982581895581357e-05, + "loss": 1.2526, "step": 2242 }, { - "epoch": 0.0635608829947009, + "epoch": 0.08776117067063151, "grad_norm": 0.0, - "learning_rate": 1.9941016307662947e-05, - "loss": 1.1796, + "learning_rate": 1.9825583387260355e-05, + "loss": 1.3167, "step": 2243 }, { - "epoch": 0.06358922043696336, + "epoch": 0.08780029736286095, "grad_norm": 0.0, - "learning_rate": 1.9940916729241918e-05, - "loss": 1.1446, + "learning_rate": 1.9825347660920588e-05, + "loss": 1.2136, "step": 2244 }, { - "epoch": 0.06361755787922582, + "epoch": 0.08783942405509038, "grad_norm": 0.0, - "learning_rate": 1.994081706708485e-05, - "loss": 1.0132, + "learning_rate": 1.9825111776798044e-05, + "loss": 1.1724, "step": 2245 }, { - "epoch": 0.06364589532148829, + "epoch": 0.08787855074731982, "grad_norm": 0.0, - "learning_rate": 1.9940717321192593e-05, - "loss": 1.0606, + "learning_rate": 1.9824875734896517e-05, + "loss": 1.3711, "step": 2246 }, { - "epoch": 0.06367423276375074, + "epoch": 0.08791767743954926, "grad_norm": 0.0, - "learning_rate": 1.9940617491565982e-05, - "loss": 1.1016, + "learning_rate": 1.9824639535219804e-05, + "loss": 1.1638, "step": 2247 }, { - "epoch": 0.0637025702060132, + "epoch": 0.0879568041317787, "grad_norm": 0.0, - "learning_rate": 1.994051757820586e-05, - "loss": 1.2218, + "learning_rate": 1.982440317777168e-05, + "loss": 1.2206, "step": 2248 }, { - "epoch": 0.06373090764827567, + "epoch": 0.08799593082400814, "grad_norm": 0.0, - "learning_rate": 1.9940417581113062e-05, - "loss": 1.0999, + "learning_rate": 1.982416666255596e-05, + "loss": 1.1562, "step": 2249 }, { - "epoch": 0.06375924509053812, + "epoch": 0.08803505751623758, "grad_norm": 0.0, - "learning_rate": 1.994031750028844e-05, - "loss": 1.0579, + "learning_rate": 1.9823929989576433e-05, + "loss": 1.3079, "step": 2250 }, { - "epoch": 0.06378758253280059, + "epoch": 0.08807418420846702, "grad_norm": 0.0, - "learning_rate": 1.994021733573283e-05, - "loss": 1.1116, + "learning_rate": 1.9823693158836898e-05, + "loss": 1.3649, "step": 2251 }, { - "epoch": 0.06381591997506306, + "epoch": 0.08811331090069645, "grad_norm": 0.0, - "learning_rate": 1.994011708744708e-05, - "loss": 1.0636, + "learning_rate": 1.9823456170341162e-05, + "loss": 1.2612, "step": 2252 }, { - "epoch": 0.06384425741732551, + "epoch": 0.08815243759292589, "grad_norm": 0.0, - "learning_rate": 1.9940016755432032e-05, - "loss": 1.0879, + "learning_rate": 1.9823219024093028e-05, + "loss": 1.2248, "step": 2253 }, { - "epoch": 0.06387259485958798, + "epoch": 0.08819156428515533, "grad_norm": 0.0, - "learning_rate": 1.993991633968853e-05, - "loss": 1.0974, + "learning_rate": 1.982298172009631e-05, + "loss": 1.2576, "step": 2254 }, { - "epoch": 0.06390093230185044, + "epoch": 0.08823069097738477, "grad_norm": 0.0, - "learning_rate": 1.9939815840217425e-05, - "loss": 1.0279, + "learning_rate": 1.9822744258354806e-05, + "loss": 1.1277, "step": 2255 }, { - "epoch": 0.0639292697441129, + "epoch": 0.08826981766961421, "grad_norm": 0.0, - "learning_rate": 1.9939715257019557e-05, - "loss": 0.9973, + "learning_rate": 1.9822506638872346e-05, + "loss": 1.1804, "step": 2256 }, { - "epoch": 0.06395760718637536, + "epoch": 0.08830894436184365, "grad_norm": 0.0, - "learning_rate": 1.993961459009578e-05, - "loss": 1.0595, + "learning_rate": 1.9822268861652733e-05, + "loss": 1.2752, "step": 2257 }, { - "epoch": 0.06398594462863783, + "epoch": 0.0883480710540731, "grad_norm": 0.0, - "learning_rate": 1.9939513839446934e-05, - "loss": 1.1546, + "learning_rate": 1.982203092669979e-05, + "loss": 1.332, "step": 2258 }, { - "epoch": 0.06401428207090028, + "epoch": 0.08838719774630253, "grad_norm": 0.0, - "learning_rate": 1.9939413005073873e-05, - "loss": 1.1158, + "learning_rate": 1.9821792834017343e-05, + "loss": 1.2223, "step": 2259 }, { - "epoch": 0.06404261951316274, + "epoch": 0.08842632443853196, "grad_norm": 0.0, - "learning_rate": 1.9939312086977446e-05, - "loss": 1.0166, + "learning_rate": 1.9821554583609205e-05, + "loss": 1.2651, "step": 2260 }, { - "epoch": 0.06407095695542521, + "epoch": 0.0884654511307614, "grad_norm": 0.0, - "learning_rate": 1.9939211085158504e-05, - "loss": 1.0737, + "learning_rate": 1.982131617547921e-05, + "loss": 1.293, "step": 2261 }, { - "epoch": 0.06409929439768766, + "epoch": 0.08850457782299084, "grad_norm": 0.0, - "learning_rate": 1.9939109999617894e-05, - "loss": 1.075, + "learning_rate": 1.9821077609631184e-05, + "loss": 1.2205, "step": 2262 }, { - "epoch": 0.06412763183995013, + "epoch": 0.08854370451522028, "grad_norm": 0.0, - "learning_rate": 1.993900883035647e-05, - "loss": 1.1959, + "learning_rate": 1.9820838886068958e-05, + "loss": 1.1507, "step": 2263 }, { - "epoch": 0.0641559692822126, + "epoch": 0.08858283120744972, "grad_norm": 0.0, - "learning_rate": 1.9938907577375084e-05, - "loss": 1.062, + "learning_rate": 1.9820600004796363e-05, + "loss": 1.2588, "step": 2264 }, { - "epoch": 0.06418430672447505, + "epoch": 0.08862195789967917, "grad_norm": 0.0, - "learning_rate": 1.9938806240674592e-05, - "loss": 1.2193, + "learning_rate": 1.982036096581724e-05, + "loss": 1.189, "step": 2265 }, { - "epoch": 0.06421264416673751, + "epoch": 0.0886610845919086, "grad_norm": 0.0, - "learning_rate": 1.9938704820255837e-05, - "loss": 1.0648, + "learning_rate": 1.9820121769135428e-05, + "loss": 1.299, "step": 2266 }, { - "epoch": 0.06424098160899998, + "epoch": 0.08870021128413803, "grad_norm": 0.0, - "learning_rate": 1.993860331611969e-05, - "loss": 1.0619, + "learning_rate": 1.9819882414754762e-05, + "loss": 1.317, "step": 2267 }, { - "epoch": 0.06426931905126243, + "epoch": 0.08873933797636747, "grad_norm": 0.0, - "learning_rate": 1.993850172826699e-05, - "loss": 0.9873, + "learning_rate": 1.981964290267909e-05, + "loss": 1.3607, "step": 2268 }, { - "epoch": 0.0642976564935249, + "epoch": 0.08877846466859692, "grad_norm": 0.0, - "learning_rate": 1.99384000566986e-05, - "loss": 1.1358, + "learning_rate": 1.9819403232912258e-05, + "loss": 1.2207, "step": 2269 }, { - "epoch": 0.06432599393578736, + "epoch": 0.08881759136082636, "grad_norm": 0.0, - "learning_rate": 1.9938298301415376e-05, - "loss": 1.1101, + "learning_rate": 1.9819163405458118e-05, + "loss": 1.2434, "step": 2270 }, { - "epoch": 0.06435433137804981, + "epoch": 0.0888567180530558, "grad_norm": 0.0, - "learning_rate": 1.9938196462418177e-05, - "loss": 1.0958, + "learning_rate": 1.9818923420320514e-05, + "loss": 1.3551, "step": 2271 }, { - "epoch": 0.06438266882031228, + "epoch": 0.08889584474528524, "grad_norm": 0.0, - "learning_rate": 1.9938094539707857e-05, - "loss": 1.058, + "learning_rate": 1.9818683277503302e-05, + "loss": 1.2858, "step": 2272 }, { - "epoch": 0.06441100626257475, + "epoch": 0.08893497143751468, "grad_norm": 0.0, - "learning_rate": 1.993799253328528e-05, - "loss": 1.1458, + "learning_rate": 1.9818442977010344e-05, + "loss": 1.2442, "step": 2273 }, { - "epoch": 0.0644393437048372, + "epoch": 0.0889740981297441, "grad_norm": 0.0, - "learning_rate": 1.9937890443151294e-05, - "loss": 1.088, + "learning_rate": 1.9818202518845493e-05, + "loss": 1.3613, "step": 2274 }, { - "epoch": 0.06446768114709966, + "epoch": 0.08901322482197355, "grad_norm": 0.0, - "learning_rate": 1.993778826930677e-05, - "loss": 1.1125, + "learning_rate": 1.981796190301261e-05, + "loss": 1.2528, "step": 2275 }, { - "epoch": 0.06449601858936213, + "epoch": 0.08905235151420299, "grad_norm": 0.0, - "learning_rate": 1.9937686011752567e-05, - "loss": 1.1838, + "learning_rate": 1.9817721129515565e-05, + "loss": 1.2122, "step": 2276 }, { - "epoch": 0.06452435603162458, + "epoch": 0.08909147820643243, "grad_norm": 0.0, - "learning_rate": 1.9937583670489547e-05, - "loss": 1.0788, + "learning_rate": 1.981748019835822e-05, + "loss": 1.239, "step": 2277 }, { - "epoch": 0.06455269347388705, + "epoch": 0.08913060489866187, "grad_norm": 0.0, - "learning_rate": 1.9937481245518563e-05, - "loss": 1.0874, + "learning_rate": 1.9817239109544447e-05, + "loss": 1.2894, "step": 2278 }, { - "epoch": 0.06458103091614951, + "epoch": 0.08916973159089131, "grad_norm": 0.0, - "learning_rate": 1.9937378736840486e-05, - "loss": 1.1125, + "learning_rate": 1.9816997863078115e-05, + "loss": 1.1482, "step": 2279 }, { - "epoch": 0.06460936835841197, + "epoch": 0.08920885828312075, "grad_norm": 0.0, - "learning_rate": 1.993727614445618e-05, - "loss": 1.0747, + "learning_rate": 1.9816756458963094e-05, + "loss": 1.1641, "step": 2280 }, { - "epoch": 0.06463770580067443, + "epoch": 0.08924798497535018, "grad_norm": 0.0, - "learning_rate": 1.9937173468366508e-05, - "loss": 1.1601, + "learning_rate": 1.9816514897203272e-05, + "loss": 1.2695, "step": 2281 }, { - "epoch": 0.0646660432429369, + "epoch": 0.08928711166757962, "grad_norm": 0.0, - "learning_rate": 1.993707070857233e-05, - "loss": 1.1155, + "learning_rate": 1.9816273177802517e-05, + "loss": 1.2448, "step": 2282 }, { - "epoch": 0.06469438068519935, + "epoch": 0.08932623835980906, "grad_norm": 0.0, - "learning_rate": 1.9936967865074517e-05, - "loss": 1.0369, + "learning_rate": 1.981603130076472e-05, + "loss": 1.1563, "step": 2283 }, { - "epoch": 0.06472271812746182, + "epoch": 0.0893653650520385, "grad_norm": 0.0, - "learning_rate": 1.9936864937873935e-05, - "loss": 1.1487, + "learning_rate": 1.9815789266093755e-05, + "loss": 1.3999, "step": 2284 }, { - "epoch": 0.06475105556972428, + "epoch": 0.08940449174426794, "grad_norm": 0.0, - "learning_rate": 1.993676192697145e-05, - "loss": 1.1892, + "learning_rate": 1.9815547073793516e-05, + "loss": 1.0562, "step": 2285 }, { - "epoch": 0.06477939301198674, + "epoch": 0.08944361843649738, "grad_norm": 0.0, - "learning_rate": 1.9936658832367927e-05, - "loss": 1.131, + "learning_rate": 1.9815304723867893e-05, + "loss": 1.361, "step": 2286 }, { - "epoch": 0.0648077304542492, + "epoch": 0.08948274512872682, "grad_norm": 0.0, - "learning_rate": 1.9936555654064237e-05, - "loss": 1.1184, + "learning_rate": 1.9815062216320772e-05, + "loss": 1.2057, "step": 2287 }, { - "epoch": 0.06483606789651167, + "epoch": 0.08952187182095625, "grad_norm": 0.0, - "learning_rate": 1.9936452392061248e-05, - "loss": 1.0859, + "learning_rate": 1.981481955115605e-05, + "loss": 1.1443, "step": 2288 }, { - "epoch": 0.06486440533877412, + "epoch": 0.08956099851318569, "grad_norm": 0.0, - "learning_rate": 1.9936349046359833e-05, - "loss": 1.1078, + "learning_rate": 1.9814576728377627e-05, + "loss": 1.2842, "step": 2289 }, { - "epoch": 0.06489274278103659, + "epoch": 0.08960012520541513, "grad_norm": 0.0, - "learning_rate": 1.993624561696086e-05, - "loss": 1.1253, + "learning_rate": 1.98143337479894e-05, + "loss": 1.3607, "step": 2290 }, { - "epoch": 0.06492108022329905, + "epoch": 0.08963925189764457, "grad_norm": 0.0, - "learning_rate": 1.9936142103865198e-05, - "loss": 1.1153, + "learning_rate": 1.9814090609995273e-05, + "loss": 1.3335, "step": 2291 }, { - "epoch": 0.0649494176655615, + "epoch": 0.08967837858987401, "grad_norm": 0.0, - "learning_rate": 1.9936038507073723e-05, - "loss": 1.2475, + "learning_rate": 1.9813847314399147e-05, + "loss": 1.1827, "step": 2292 }, { - "epoch": 0.06497775510782397, + "epoch": 0.08971750528210345, "grad_norm": 0.0, - "learning_rate": 1.9935934826587306e-05, - "loss": 1.0027, + "learning_rate": 1.9813603861204928e-05, + "loss": 1.2151, "step": 2293 }, { - "epoch": 0.06500609255008642, + "epoch": 0.0897566319743329, "grad_norm": 0.0, - "learning_rate": 1.993583106240682e-05, - "loss": 1.1178, + "learning_rate": 1.9813360250416532e-05, + "loss": 1.0854, "step": 2294 }, { - "epoch": 0.06503442999234889, + "epoch": 0.08979575866656232, "grad_norm": 0.0, - "learning_rate": 1.993572721453314e-05, - "loss": 1.0519, + "learning_rate": 1.9813116482037864e-05, + "loss": 1.3364, "step": 2295 }, { - "epoch": 0.06506276743461135, + "epoch": 0.08983488535879176, "grad_norm": 0.0, - "learning_rate": 1.993562328296714e-05, - "loss": 1.0649, + "learning_rate": 1.9812872556072845e-05, + "loss": 1.195, "step": 2296 }, { - "epoch": 0.0650911048768738, + "epoch": 0.0898740120510212, "grad_norm": 0.0, - "learning_rate": 1.9935519267709694e-05, - "loss": 1.1605, + "learning_rate": 1.9812628472525387e-05, + "loss": 1.2096, "step": 2297 }, { - "epoch": 0.06511944231913627, + "epoch": 0.08991313874325065, "grad_norm": 0.0, - "learning_rate": 1.9935415168761682e-05, - "loss": 1.1432, + "learning_rate": 1.981238423139941e-05, + "loss": 1.288, "step": 2298 }, { - "epoch": 0.06514777976139874, + "epoch": 0.08995226543548009, "grad_norm": 0.0, - "learning_rate": 1.993531098612398e-05, - "loss": 1.1731, + "learning_rate": 1.981213983269884e-05, + "loss": 1.2783, "step": 2299 }, { - "epoch": 0.06517611720366119, + "epoch": 0.08999139212770953, "grad_norm": 0.0, - "learning_rate": 1.993520671979746e-05, - "loss": 1.0399, + "learning_rate": 1.98118952764276e-05, + "loss": 1.2295, "step": 2300 }, { - "epoch": 0.06520445464592366, + "epoch": 0.09003051881993897, "grad_norm": 0.0, - "learning_rate": 1.9935102369783008e-05, - "loss": 1.0989, + "learning_rate": 1.9811650562589616e-05, + "loss": 1.2576, "step": 2301 }, { - "epoch": 0.06523279208818612, + "epoch": 0.0900696455121684, "grad_norm": 0.0, - "learning_rate": 1.9934997936081497e-05, - "loss": 1.0885, + "learning_rate": 1.9811405691188817e-05, + "loss": 1.1266, "step": 2302 }, { - "epoch": 0.06526112953044858, + "epoch": 0.09010877220439784, "grad_norm": 0.0, - "learning_rate": 1.993489341869381e-05, - "loss": 0.9802, + "learning_rate": 1.981116066222914e-05, + "loss": 1.2207, "step": 2303 }, { - "epoch": 0.06528946697271104, + "epoch": 0.09014789889662728, "grad_norm": 0.0, - "learning_rate": 1.9934788817620827e-05, - "loss": 1.0713, + "learning_rate": 1.9810915475714514e-05, + "loss": 1.0493, "step": 2304 }, { - "epoch": 0.06531780441497351, + "epoch": 0.09018702558885672, "grad_norm": 0.0, - "learning_rate": 1.9934684132863427e-05, - "loss": 1.1595, + "learning_rate": 1.9810670131648884e-05, + "loss": 1.145, "step": 2305 }, { - "epoch": 0.06534614185723596, + "epoch": 0.09022615228108616, "grad_norm": 0.0, - "learning_rate": 1.9934579364422495e-05, - "loss": 1.004, + "learning_rate": 1.981042463003618e-05, + "loss": 1.224, "step": 2306 }, { - "epoch": 0.06537447929949843, + "epoch": 0.0902652789733156, "grad_norm": 0.0, - "learning_rate": 1.9934474512298912e-05, - "loss": 1.1614, + "learning_rate": 1.981017897088035e-05, + "loss": 1.2671, "step": 2307 }, { - "epoch": 0.06540281674176089, + "epoch": 0.09030440566554504, "grad_norm": 0.0, - "learning_rate": 1.993436957649356e-05, - "loss": 1.219, + "learning_rate": 1.980993315418534e-05, + "loss": 1.191, "step": 2308 }, { - "epoch": 0.06543115418402334, + "epoch": 0.09034353235777447, "grad_norm": 0.0, - "learning_rate": 1.9934264557007323e-05, - "loss": 1.0677, + "learning_rate": 1.9809687179955096e-05, + "loss": 1.3811, "step": 2309 }, { - "epoch": 0.06545949162628581, + "epoch": 0.09038265905000391, "grad_norm": 0.0, - "learning_rate": 1.993415945384109e-05, - "loss": 1.0383, + "learning_rate": 1.980944104819357e-05, + "loss": 1.1893, "step": 2310 }, { - "epoch": 0.06548782906854828, + "epoch": 0.09042178574223335, "grad_norm": 0.0, - "learning_rate": 1.9934054266995742e-05, - "loss": 1.0, + "learning_rate": 1.9809194758904712e-05, + "loss": 1.3173, "step": 2311 }, { - "epoch": 0.06551616651081073, + "epoch": 0.09046091243446279, "grad_norm": 0.0, - "learning_rate": 1.9933948996472162e-05, - "loss": 1.0974, + "learning_rate": 1.9808948312092475e-05, + "loss": 1.0584, "step": 2312 }, { - "epoch": 0.0655445039530732, + "epoch": 0.09050003912669223, "grad_norm": 0.0, - "learning_rate": 1.9933843642271243e-05, - "loss": 1.0512, + "learning_rate": 1.9808701707760824e-05, + "loss": 1.1687, "step": 2313 }, { - "epoch": 0.06557284139533566, + "epoch": 0.09053916581892167, "grad_norm": 0.0, - "learning_rate": 1.9933738204393874e-05, - "loss": 1.2226, + "learning_rate": 1.980845494591371e-05, + "loss": 1.3087, "step": 2314 }, { - "epoch": 0.06560117883759811, + "epoch": 0.09057829251115111, "grad_norm": 0.0, - "learning_rate": 1.993363268284094e-05, - "loss": 0.9012, + "learning_rate": 1.9808208026555103e-05, + "loss": 1.3307, "step": 2315 }, { - "epoch": 0.06562951627986058, + "epoch": 0.09061741920338055, "grad_norm": 0.0, - "learning_rate": 1.9933527077613323e-05, - "loss": 1.0672, + "learning_rate": 1.980796094968896e-05, + "loss": 1.2764, "step": 2316 }, { - "epoch": 0.06565785372212304, + "epoch": 0.09065654589560998, "grad_norm": 0.0, - "learning_rate": 1.993342138871192e-05, - "loss": 1.0753, + "learning_rate": 1.9807713715319262e-05, + "loss": 1.327, "step": 2317 }, { - "epoch": 0.0656861911643855, + "epoch": 0.09069567258783942, "grad_norm": 0.0, - "learning_rate": 1.993331561613762e-05, - "loss": 1.1455, + "learning_rate": 1.980746632344997e-05, + "loss": 1.3403, "step": 2318 }, { - "epoch": 0.06571452860664796, + "epoch": 0.09073479928006886, "grad_norm": 0.0, - "learning_rate": 1.9933209759891318e-05, - "loss": 0.9811, + "learning_rate": 1.9807218774085055e-05, + "loss": 1.1846, "step": 2319 }, { - "epoch": 0.06574286604891043, + "epoch": 0.0907739259722983, "grad_norm": 0.0, - "learning_rate": 1.9933103819973896e-05, - "loss": 1.0836, + "learning_rate": 1.9806971067228496e-05, + "loss": 1.2686, "step": 2320 }, { - "epoch": 0.06577120349117288, + "epoch": 0.09081305266452774, "grad_norm": 0.0, - "learning_rate": 1.9932997796386254e-05, - "loss": 1.2329, + "learning_rate": 1.9806723202884273e-05, + "loss": 1.2883, "step": 2321 }, { - "epoch": 0.06579954093343535, + "epoch": 0.09085217935675718, "grad_norm": 0.0, - "learning_rate": 1.9932891689129284e-05, - "loss": 1.0504, + "learning_rate": 1.980647518105636e-05, + "loss": 1.329, "step": 2322 }, { - "epoch": 0.06582787837569781, + "epoch": 0.09089130604898663, "grad_norm": 0.0, - "learning_rate": 1.993278549820388e-05, - "loss": 1.1073, + "learning_rate": 1.9806227001748748e-05, + "loss": 1.226, "step": 2323 }, { - "epoch": 0.06585621581796026, + "epoch": 0.09093043274121605, "grad_norm": 0.0, - "learning_rate": 1.9932679223610934e-05, - "loss": 1.08, + "learning_rate": 1.9805978664965417e-05, + "loss": 1.3793, "step": 2324 }, { - "epoch": 0.06588455326022273, + "epoch": 0.0909695594334455, "grad_norm": 0.0, - "learning_rate": 1.9932572865351342e-05, - "loss": 1.1455, + "learning_rate": 1.9805730170710354e-05, + "loss": 1.3254, "step": 2325 }, { - "epoch": 0.0659128907024852, + "epoch": 0.09100868612567493, "grad_norm": 0.0, - "learning_rate": 1.9932466423425997e-05, - "loss": 1.0344, + "learning_rate": 1.9805481518987553e-05, + "loss": 1.2914, "step": 2326 }, { - "epoch": 0.06594122814474765, + "epoch": 0.09104781281790437, "grad_norm": 0.0, - "learning_rate": 1.9932359897835805e-05, - "loss": 1.029, + "learning_rate": 1.9805232709801008e-05, + "loss": 1.2378, "step": 2327 }, { - "epoch": 0.06596956558701011, + "epoch": 0.09108693951013382, "grad_norm": 0.0, - "learning_rate": 1.9932253288581656e-05, - "loss": 1.0478, + "learning_rate": 1.980498374315471e-05, + "loss": 1.2058, "step": 2328 }, { - "epoch": 0.06599790302927258, + "epoch": 0.09112606620236326, "grad_norm": 0.0, - "learning_rate": 1.993214659566445e-05, - "loss": 1.1882, + "learning_rate": 1.980473461905266e-05, + "loss": 1.3414, "step": 2329 }, { - "epoch": 0.06602624047153503, + "epoch": 0.0911651928945927, "grad_norm": 0.0, - "learning_rate": 1.993203981908508e-05, - "loss": 1.1348, + "learning_rate": 1.9804485337498857e-05, + "loss": 1.1805, "step": 2330 }, { - "epoch": 0.0660545779137975, + "epoch": 0.09120431958682212, "grad_norm": 0.0, - "learning_rate": 1.9931932958844453e-05, - "loss": 0.9928, + "learning_rate": 1.9804235898497305e-05, + "loss": 1.2378, "step": 2331 }, { - "epoch": 0.06608291535605997, + "epoch": 0.09124344627905157, "grad_norm": 0.0, - "learning_rate": 1.993182601494347e-05, - "loss": 1.1611, + "learning_rate": 1.980398630205201e-05, + "loss": 1.1586, "step": 2332 }, { - "epoch": 0.06611125279832242, + "epoch": 0.091282572971281, "grad_norm": 0.0, - "learning_rate": 1.9931718987383024e-05, - "loss": 0.9771, + "learning_rate": 1.9803736548166984e-05, + "loss": 1.2111, "step": 2333 }, { - "epoch": 0.06613959024058488, + "epoch": 0.09132169966351045, "grad_norm": 0.0, - "learning_rate": 1.9931611876164024e-05, - "loss": 1.0283, + "learning_rate": 1.980348663684623e-05, + "loss": 1.4274, "step": 2334 }, { - "epoch": 0.06616792768284735, + "epoch": 0.09136082635573989, "grad_norm": 0.0, - "learning_rate": 1.9931504681287364e-05, - "loss": 1.1536, + "learning_rate": 1.9803236568093765e-05, + "loss": 1.1889, "step": 2335 }, { - "epoch": 0.0661962651251098, + "epoch": 0.09139995304796933, "grad_norm": 0.0, - "learning_rate": 1.9931397402753957e-05, - "loss": 1.1496, + "learning_rate": 1.980298634191361e-05, + "loss": 1.1687, "step": 2336 }, { - "epoch": 0.06622460256737227, + "epoch": 0.09143907974019877, "grad_norm": 0.0, - "learning_rate": 1.9931290040564702e-05, - "loss": 1.0712, + "learning_rate": 1.980273595830977e-05, + "loss": 1.1526, "step": 2337 }, { - "epoch": 0.06625294000963473, + "epoch": 0.0914782064324282, "grad_norm": 0.0, - "learning_rate": 1.99311825947205e-05, - "loss": 1.1574, + "learning_rate": 1.980248541728628e-05, + "loss": 1.1613, "step": 2338 }, { - "epoch": 0.06628127745189719, + "epoch": 0.09151733312465764, "grad_norm": 0.0, - "learning_rate": 1.993107506522226e-05, - "loss": 1.1206, + "learning_rate": 1.9802234718847156e-05, + "loss": 1.199, "step": 2339 }, { - "epoch": 0.06630961489415965, + "epoch": 0.09155645981688708, "grad_norm": 0.0, - "learning_rate": 1.993096745207089e-05, - "loss": 1.1196, + "learning_rate": 1.9801983862996423e-05, + "loss": 1.2476, "step": 2340 }, { - "epoch": 0.06633795233642212, + "epoch": 0.09159558650911652, "grad_norm": 0.0, - "learning_rate": 1.993085975526729e-05, - "loss": 1.149, + "learning_rate": 1.9801732849738114e-05, + "loss": 1.2103, "step": 2341 }, { - "epoch": 0.06636628977868457, + "epoch": 0.09163471320134596, "grad_norm": 0.0, - "learning_rate": 1.993075197481237e-05, - "loss": 1.009, + "learning_rate": 1.9801481679076256e-05, + "loss": 1.2821, "step": 2342 }, { - "epoch": 0.06639462722094704, + "epoch": 0.0916738398935754, "grad_norm": 0.0, - "learning_rate": 1.9930644110707042e-05, - "loss": 1.1116, + "learning_rate": 1.9801230351014887e-05, + "loss": 1.2827, "step": 2343 }, { - "epoch": 0.0664229646632095, + "epoch": 0.09171296658580484, "grad_norm": 0.0, - "learning_rate": 1.993053616295221e-05, - "loss": 1.1918, + "learning_rate": 1.9800978865558038e-05, + "loss": 1.0976, "step": 2344 }, { - "epoch": 0.06645130210547195, + "epoch": 0.09175209327803427, "grad_norm": 0.0, - "learning_rate": 1.9930428131548782e-05, - "loss": 1.1996, + "learning_rate": 1.980072722270975e-05, + "loss": 1.334, "step": 2345 }, { - "epoch": 0.06647963954773442, + "epoch": 0.09179121997026371, "grad_norm": 0.0, - "learning_rate": 1.993032001649767e-05, - "loss": 1.2266, + "learning_rate": 1.9800475422474064e-05, + "loss": 1.3531, "step": 2346 }, { - "epoch": 0.06650797698999689, + "epoch": 0.09183034666249315, "grad_norm": 0.0, - "learning_rate": 1.9930211817799788e-05, - "loss": 1.1833, + "learning_rate": 1.9800223464855022e-05, + "loss": 1.1334, "step": 2347 }, { - "epoch": 0.06653631443225934, + "epoch": 0.09186947335472259, "grad_norm": 0.0, - "learning_rate": 1.9930103535456044e-05, - "loss": 1.2622, + "learning_rate": 1.9799971349856673e-05, + "loss": 1.2913, "step": 2348 }, { - "epoch": 0.0665646518745218, + "epoch": 0.09190860004695203, "grad_norm": 0.0, - "learning_rate": 1.9929995169467346e-05, - "loss": 1.0689, + "learning_rate": 1.9799719077483065e-05, + "loss": 1.3014, "step": 2349 }, { - "epoch": 0.06659298931678427, + "epoch": 0.09194772673918147, "grad_norm": 0.0, - "learning_rate": 1.9929886719834615e-05, - "loss": 1.0932, + "learning_rate": 1.9799466647738247e-05, + "loss": 1.3028, "step": 2350 }, { - "epoch": 0.06662132675904672, + "epoch": 0.09198685343141091, "grad_norm": 0.0, - "learning_rate": 1.9929778186558763e-05, - "loss": 1.0738, + "learning_rate": 1.9799214060626275e-05, + "loss": 1.2131, "step": 2351 }, { - "epoch": 0.06664966420130919, + "epoch": 0.09202598012364034, "grad_norm": 0.0, - "learning_rate": 1.9929669569640697e-05, - "loss": 1.1044, + "learning_rate": 1.9798961316151203e-05, + "loss": 1.132, "step": 2352 }, { - "epoch": 0.06667800164357165, + "epoch": 0.09206510681586978, "grad_norm": 0.0, - "learning_rate": 1.992956086908134e-05, - "loss": 1.1367, + "learning_rate": 1.9798708414317095e-05, + "loss": 1.1347, "step": 2353 }, { - "epoch": 0.06670633908583411, + "epoch": 0.09210423350809922, "grad_norm": 0.0, - "learning_rate": 1.9929452084881604e-05, - "loss": 1.0648, + "learning_rate": 1.9798455355128003e-05, + "loss": 1.1703, "step": 2354 }, { - "epoch": 0.06673467652809657, + "epoch": 0.09214336020032866, "grad_norm": 0.0, - "learning_rate": 1.9929343217042404e-05, - "loss": 1.1247, + "learning_rate": 1.9798202138588e-05, + "loss": 1.3021, "step": 2355 }, { - "epoch": 0.06676301397035904, + "epoch": 0.0921824868925581, "grad_norm": 0.0, - "learning_rate": 1.992923426556466e-05, - "loss": 1.0271, + "learning_rate": 1.9797948764701145e-05, + "loss": 1.2634, "step": 2356 }, { - "epoch": 0.06679135141262149, + "epoch": 0.09222161358478755, "grad_norm": 0.0, - "learning_rate": 1.992912523044929e-05, - "loss": 1.0633, + "learning_rate": 1.979769523347151e-05, + "loss": 1.209, "step": 2357 }, { - "epoch": 0.06681968885488396, + "epoch": 0.09226074027701699, "grad_norm": 0.0, - "learning_rate": 1.992901611169721e-05, - "loss": 1.1111, + "learning_rate": 1.9797441544903173e-05, + "loss": 1.299, "step": 2358 }, { - "epoch": 0.06684802629714642, + "epoch": 0.09229986696924641, "grad_norm": 0.0, - "learning_rate": 1.9928906909309342e-05, - "loss": 1.076, + "learning_rate": 1.9797187699000196e-05, + "loss": 1.3671, "step": 2359 }, { - "epoch": 0.06687636373940888, + "epoch": 0.09233899366147585, "grad_norm": 0.0, - "learning_rate": 1.9928797623286602e-05, - "loss": 1.0611, + "learning_rate": 1.9796933695766663e-05, + "loss": 1.0894, "step": 2360 }, { - "epoch": 0.06690470118167134, + "epoch": 0.0923781203537053, "grad_norm": 0.0, - "learning_rate": 1.9928688253629916e-05, - "loss": 1.1221, + "learning_rate": 1.979667953520665e-05, + "loss": 1.251, "step": 2361 }, { - "epoch": 0.06693303862393381, + "epoch": 0.09241724704593474, "grad_norm": 0.0, - "learning_rate": 1.99285788003402e-05, - "loss": 1.1129, + "learning_rate": 1.9796425217324244e-05, + "loss": 1.2676, "step": 2362 }, { - "epoch": 0.06696137606619626, + "epoch": 0.09245637373816418, "grad_norm": 0.0, - "learning_rate": 1.9928469263418376e-05, - "loss": 1.1818, + "learning_rate": 1.979617074212352e-05, + "loss": 1.2667, "step": 2363 }, { - "epoch": 0.06698971350845873, + "epoch": 0.09249550043039362, "grad_norm": 0.0, - "learning_rate": 1.992835964286537e-05, - "loss": 1.1475, + "learning_rate": 1.979591610960857e-05, + "loss": 1.3065, "step": 2364 }, { - "epoch": 0.06701805095072119, + "epoch": 0.09253462712262306, "grad_norm": 0.0, - "learning_rate": 1.9928249938682103e-05, - "loss": 1.0209, + "learning_rate": 1.979566131978348e-05, + "loss": 1.3518, "step": 2365 }, { - "epoch": 0.06704638839298364, + "epoch": 0.09257375381485249, "grad_norm": 0.0, - "learning_rate": 1.99281401508695e-05, - "loss": 1.0138, + "learning_rate": 1.9795406372652345e-05, + "loss": 1.0041, "step": 2366 }, { - "epoch": 0.06707472583524611, + "epoch": 0.09261288050708193, "grad_norm": 0.0, - "learning_rate": 1.9928030279428487e-05, - "loss": 1.164, + "learning_rate": 1.979515126821926e-05, + "loss": 1.2136, "step": 2367 }, { - "epoch": 0.06710306327750858, + "epoch": 0.09265200719931137, "grad_norm": 0.0, - "learning_rate": 1.9927920324359985e-05, - "loss": 1.1115, + "learning_rate": 1.979489600648832e-05, + "loss": 1.2261, "step": 2368 }, { - "epoch": 0.06713140071977103, + "epoch": 0.09269113389154081, "grad_norm": 0.0, - "learning_rate": 1.9927810285664928e-05, - "loss": 1.186, + "learning_rate": 1.9794640587463622e-05, + "loss": 1.2607, "step": 2369 }, { - "epoch": 0.0671597381620335, + "epoch": 0.09273026058377025, "grad_norm": 0.0, - "learning_rate": 1.9927700163344238e-05, - "loss": 1.0709, + "learning_rate": 1.979438501114927e-05, + "loss": 1.2678, "step": 2370 }, { - "epoch": 0.06718807560429596, + "epoch": 0.09276938727599969, "grad_norm": 0.0, - "learning_rate": 1.992758995739884e-05, - "loss": 1.0367, + "learning_rate": 1.979412927754937e-05, + "loss": 1.3452, "step": 2371 }, { - "epoch": 0.06721641304655841, + "epoch": 0.09280851396822913, "grad_norm": 0.0, - "learning_rate": 1.9927479667829667e-05, - "loss": 1.1045, + "learning_rate": 1.9793873386668023e-05, + "loss": 1.2584, "step": 2372 }, { - "epoch": 0.06724475048882088, + "epoch": 0.09284764066045856, "grad_norm": 0.0, - "learning_rate": 1.9927369294637646e-05, - "loss": 1.0785, + "learning_rate": 1.9793617338509344e-05, + "loss": 1.1936, "step": 2373 }, { - "epoch": 0.06727308793108334, + "epoch": 0.092886767352688, "grad_norm": 0.0, - "learning_rate": 1.9927258837823707e-05, - "loss": 1.1686, + "learning_rate": 1.9793361133077444e-05, + "loss": 1.229, "step": 2374 }, { - "epoch": 0.0673014253733458, + "epoch": 0.09292589404491744, "grad_norm": 0.0, - "learning_rate": 1.992714829738878e-05, - "loss": 1.1664, + "learning_rate": 1.979310477037643e-05, + "loss": 1.1553, "step": 2375 }, { - "epoch": 0.06732976281560826, + "epoch": 0.09296502073714688, "grad_norm": 0.0, - "learning_rate": 1.9927037673333797e-05, - "loss": 1.1123, + "learning_rate": 1.979284825041043e-05, + "loss": 1.2519, "step": 2376 }, { - "epoch": 0.06735810025787073, + "epoch": 0.09300414742937632, "grad_norm": 0.0, - "learning_rate": 1.992692696565969e-05, - "loss": 1.0549, + "learning_rate": 1.9792591573183556e-05, + "loss": 1.4362, "step": 2377 }, { - "epoch": 0.06738643770013318, + "epoch": 0.09304327412160576, "grad_norm": 0.0, - "learning_rate": 1.9926816174367388e-05, - "loss": 1.1912, + "learning_rate": 1.9792334738699934e-05, + "loss": 1.355, "step": 2378 }, { - "epoch": 0.06741477514239565, + "epoch": 0.0930824008138352, "grad_norm": 0.0, - "learning_rate": 1.992670529945783e-05, - "loss": 1.1256, + "learning_rate": 1.9792077746963686e-05, + "loss": 1.2569, "step": 2379 }, { - "epoch": 0.06744311258465811, + "epoch": 0.09312152750606464, "grad_norm": 0.0, - "learning_rate": 1.9926594340931947e-05, - "loss": 1.0616, + "learning_rate": 1.9791820597978942e-05, + "loss": 1.2335, "step": 2380 }, { - "epoch": 0.06747145002692057, + "epoch": 0.09316065419829407, "grad_norm": 0.0, - "learning_rate": 1.9926483298790672e-05, - "loss": 1.1398, + "learning_rate": 1.9791563291749824e-05, + "loss": 1.1928, "step": 2381 }, { - "epoch": 0.06749978746918303, + "epoch": 0.09319978089052351, "grad_norm": 0.0, - "learning_rate": 1.9926372173034946e-05, - "loss": 1.0969, + "learning_rate": 1.9791305828280473e-05, + "loss": 1.2739, "step": 2382 }, { - "epoch": 0.0675281249114455, + "epoch": 0.09323890758275295, "grad_norm": 0.0, - "learning_rate": 1.9926260963665694e-05, - "loss": 1.0591, + "learning_rate": 1.9791048207575018e-05, + "loss": 1.1231, "step": 2383 }, { - "epoch": 0.06755646235370795, + "epoch": 0.0932780342749824, "grad_norm": 0.0, - "learning_rate": 1.992614967068387e-05, - "loss": 1.0712, + "learning_rate": 1.9790790429637597e-05, + "loss": 1.2415, "step": 2384 }, { - "epoch": 0.06758479979597042, + "epoch": 0.09331716096721183, "grad_norm": 0.0, - "learning_rate": 1.9926038294090394e-05, - "loss": 1.1298, + "learning_rate": 1.979053249447235e-05, + "loss": 1.2538, "step": 2385 }, { - "epoch": 0.06761313723823288, + "epoch": 0.09335628765944128, "grad_norm": 0.0, - "learning_rate": 1.992592683388621e-05, - "loss": 1.0937, + "learning_rate": 1.979027440208342e-05, + "loss": 1.2688, "step": 2386 }, { - "epoch": 0.06764147468049533, + "epoch": 0.09339541435167072, "grad_norm": 0.0, - "learning_rate": 1.9925815290072263e-05, - "loss": 1.0515, + "learning_rate": 1.9790016152474952e-05, + "loss": 1.1998, "step": 2387 }, { - "epoch": 0.0676698121227578, + "epoch": 0.09343454104390014, "grad_norm": 0.0, - "learning_rate": 1.9925703662649483e-05, - "loss": 1.0517, + "learning_rate": 1.9789757745651093e-05, + "loss": 1.2414, "step": 2388 }, { - "epoch": 0.06769814956502027, + "epoch": 0.09347366773612958, "grad_norm": 0.0, - "learning_rate": 1.9925591951618822e-05, - "loss": 1.0324, + "learning_rate": 1.978949918161599e-05, + "loss": 1.253, "step": 2389 }, { - "epoch": 0.06772648700728272, + "epoch": 0.09351279442835903, "grad_norm": 0.0, - "learning_rate": 1.992548015698121e-05, - "loss": 1.1754, + "learning_rate": 1.9789240460373794e-05, + "loss": 1.0985, "step": 2390 }, { - "epoch": 0.06775482444954518, + "epoch": 0.09355192112058847, "grad_norm": 0.0, - "learning_rate": 1.9925368278737594e-05, - "loss": 1.0702, + "learning_rate": 1.978898158192867e-05, + "loss": 1.2473, "step": 2391 }, { - "epoch": 0.06778316189180765, + "epoch": 0.0935910478128179, "grad_norm": 0.0, - "learning_rate": 1.9925256316888917e-05, - "loss": 1.0881, + "learning_rate": 1.978872254628476e-05, + "loss": 1.0877, "step": 2392 }, { - "epoch": 0.0678114993340701, + "epoch": 0.09363017450504735, "grad_norm": 0.0, - "learning_rate": 1.9925144271436116e-05, - "loss": 1.1709, + "learning_rate": 1.978846335344624e-05, + "loss": 1.1967, "step": 2393 }, { - "epoch": 0.06783983677633257, + "epoch": 0.09366930119727679, "grad_norm": 0.0, - "learning_rate": 1.9925032142380144e-05, - "loss": 1.0284, + "learning_rate": 1.978820400341726e-05, + "loss": 1.236, "step": 2394 }, { - "epoch": 0.06786817421859503, + "epoch": 0.09370842788950622, "grad_norm": 0.0, - "learning_rate": 1.9924919929721938e-05, - "loss": 1.1756, + "learning_rate": 1.9787944496201988e-05, + "loss": 1.2208, "step": 2395 }, { - "epoch": 0.06789651166085749, + "epoch": 0.09374755458173566, "grad_norm": 0.0, - "learning_rate": 1.9924807633462445e-05, - "loss": 1.1696, + "learning_rate": 1.9787684831804596e-05, + "loss": 1.2623, "step": 2396 }, { - "epoch": 0.06792484910311995, + "epoch": 0.0937866812739651, "grad_norm": 0.0, - "learning_rate": 1.9924695253602612e-05, - "loss": 1.0963, + "learning_rate": 1.9787425010229246e-05, + "loss": 1.2856, "step": 2397 }, { - "epoch": 0.06795318654538242, + "epoch": 0.09382580796619454, "grad_norm": 0.0, - "learning_rate": 1.992458279014339e-05, - "loss": 1.1144, + "learning_rate": 1.978716503148012e-05, + "loss": 1.1584, "step": 2398 }, { - "epoch": 0.06798152398764487, + "epoch": 0.09386493465842398, "grad_norm": 0.0, - "learning_rate": 1.9924470243085716e-05, - "loss": 1.138, + "learning_rate": 1.9786904895561382e-05, + "loss": 1.3575, "step": 2399 }, { - "epoch": 0.06800986142990734, + "epoch": 0.09390406135065342, "grad_norm": 0.0, - "learning_rate": 1.9924357612430544e-05, - "loss": 1.1039, + "learning_rate": 1.9786644602477217e-05, + "loss": 1.1601, "step": 2400 }, { - "epoch": 0.0680381988721698, + "epoch": 0.09394318804288286, "grad_norm": 0.0, - "learning_rate": 1.9924244898178825e-05, - "loss": 1.1077, + "learning_rate": 1.9786384152231804e-05, + "loss": 1.3351, "step": 2401 }, { - "epoch": 0.06806653631443225, + "epoch": 0.09398231473511229, "grad_norm": 0.0, - "learning_rate": 1.9924132100331505e-05, - "loss": 1.0972, + "learning_rate": 1.978612354482932e-05, + "loss": 1.3103, "step": 2402 }, { - "epoch": 0.06809487375669472, + "epoch": 0.09402144142734173, "grad_norm": 0.0, - "learning_rate": 1.9924019218889536e-05, - "loss": 1.0897, + "learning_rate": 1.978586278027396e-05, + "loss": 1.2827, "step": 2403 }, { - "epoch": 0.06812321119895719, + "epoch": 0.09406056811957117, "grad_norm": 0.0, - "learning_rate": 1.9923906253853867e-05, - "loss": 1.1765, + "learning_rate": 1.9785601858569907e-05, + "loss": 1.162, "step": 2404 }, { - "epoch": 0.06815154864121964, + "epoch": 0.09409969481180061, "grad_norm": 0.0, - "learning_rate": 1.9923793205225453e-05, - "loss": 1.1544, + "learning_rate": 1.9785340779721348e-05, + "loss": 1.1744, "step": 2405 }, { - "epoch": 0.0681798860834821, + "epoch": 0.09413882150403005, "grad_norm": 0.0, - "learning_rate": 1.9923680073005244e-05, - "loss": 1.0639, + "learning_rate": 1.9785079543732476e-05, + "loss": 1.1522, "step": 2406 }, { - "epoch": 0.06820822352574457, + "epoch": 0.09417794819625949, "grad_norm": 0.0, - "learning_rate": 1.992356685719419e-05, - "loss": 1.1114, + "learning_rate": 1.978481815060749e-05, + "loss": 1.1475, "step": 2407 }, { - "epoch": 0.06823656096800702, + "epoch": 0.09421707488848893, "grad_norm": 0.0, - "learning_rate": 1.9923453557793247e-05, - "loss": 1.0855, + "learning_rate": 1.9784556600350583e-05, + "loss": 1.2178, "step": 2408 }, { - "epoch": 0.06826489841026949, + "epoch": 0.09425620158071836, "grad_norm": 0.0, - "learning_rate": 1.992334017480337e-05, - "loss": 1.1768, + "learning_rate": 1.978429489296596e-05, + "loss": 1.2318, "step": 2409 }, { - "epoch": 0.06829323585253196, + "epoch": 0.0942953282729478, "grad_norm": 0.0, - "learning_rate": 1.992322670822551e-05, - "loss": 1.042, + "learning_rate": 1.978403302845782e-05, + "loss": 1.3063, "step": 2410 }, { - "epoch": 0.06832157329479441, + "epoch": 0.09433445496517724, "grad_norm": 0.0, - "learning_rate": 1.9923113158060632e-05, - "loss": 1.0388, + "learning_rate": 1.978377100683037e-05, + "loss": 1.3357, "step": 2411 }, { - "epoch": 0.06834991073705687, + "epoch": 0.09437358165740668, "grad_norm": 0.0, - "learning_rate": 1.9922999524309684e-05, - "loss": 1.2375, + "learning_rate": 1.9783508828087822e-05, + "loss": 1.161, "step": 2412 }, { - "epoch": 0.06837824817931934, + "epoch": 0.09441270834963612, "grad_norm": 0.0, - "learning_rate": 1.992288580697363e-05, - "loss": 1.1494, + "learning_rate": 1.9783246492234376e-05, + "loss": 1.1031, "step": 2413 }, { - "epoch": 0.06840658562158179, + "epoch": 0.09445183504186556, "grad_norm": 0.0, - "learning_rate": 1.9922772006053424e-05, - "loss": 0.9964, + "learning_rate": 1.9782983999274252e-05, + "loss": 1.2414, "step": 2414 }, { - "epoch": 0.06843492306384426, + "epoch": 0.094490961734095, "grad_norm": 0.0, - "learning_rate": 1.9922658121550024e-05, - "loss": 1.036, + "learning_rate": 1.9782721349211664e-05, + "loss": 1.0992, "step": 2415 }, { - "epoch": 0.06846326050610672, + "epoch": 0.09453008842632443, "grad_norm": 0.0, - "learning_rate": 1.9922544153464387e-05, - "loss": 1.0998, + "learning_rate": 1.978245854205083e-05, + "loss": 1.2701, "step": 2416 }, { - "epoch": 0.06849159794836918, + "epoch": 0.09456921511855387, "grad_norm": 0.0, - "learning_rate": 1.9922430101797476e-05, - "loss": 1.069, + "learning_rate": 1.978219557779597e-05, + "loss": 1.1993, "step": 2417 }, { - "epoch": 0.06851993539063164, + "epoch": 0.09460834181078331, "grad_norm": 0.0, - "learning_rate": 1.9922315966550253e-05, - "loss": 1.1255, + "learning_rate": 1.978193245645131e-05, + "loss": 1.2811, "step": 2418 }, { - "epoch": 0.06854827283289411, + "epoch": 0.09464746850301275, "grad_norm": 0.0, - "learning_rate": 1.992220174772368e-05, - "loss": 1.1455, + "learning_rate": 1.9781669178021066e-05, + "loss": 1.301, "step": 2419 }, { - "epoch": 0.06857661027515656, + "epoch": 0.0946865951952422, "grad_norm": 0.0, - "learning_rate": 1.9922087445318713e-05, - "loss": 1.227, + "learning_rate": 1.9781405742509475e-05, + "loss": 1.2021, "step": 2420 }, { - "epoch": 0.06860494771741903, + "epoch": 0.09472572188747164, "grad_norm": 0.0, - "learning_rate": 1.9921973059336324e-05, - "loss": 1.2068, + "learning_rate": 1.9781142149920763e-05, + "loss": 1.2181, "step": 2421 }, { - "epoch": 0.06863328515968149, + "epoch": 0.09476484857970108, "grad_norm": 0.0, - "learning_rate": 1.9921858589777466e-05, - "loss": 1.1687, + "learning_rate": 1.9780878400259163e-05, + "loss": 1.1689, "step": 2422 }, { - "epoch": 0.06866162260194394, + "epoch": 0.0948039752719305, "grad_norm": 0.0, - "learning_rate": 1.9921744036643113e-05, - "loss": 1.0588, + "learning_rate": 1.9780614493528917e-05, + "loss": 1.2856, "step": 2423 }, { - "epoch": 0.06868996004420641, + "epoch": 0.09484310196415995, "grad_norm": 0.0, - "learning_rate": 1.9921629399934224e-05, - "loss": 1.1398, + "learning_rate": 1.9780350429734256e-05, + "loss": 1.3085, "step": 2424 }, { - "epoch": 0.06871829748646888, + "epoch": 0.09488222865638939, "grad_norm": 0.0, - "learning_rate": 1.9921514679651767e-05, - "loss": 1.0794, + "learning_rate": 1.978008620887942e-05, + "loss": 1.2037, "step": 2425 }, { - "epoch": 0.06874663492873133, + "epoch": 0.09492135534861883, "grad_norm": 0.0, - "learning_rate": 1.9921399875796705e-05, - "loss": 1.1241, + "learning_rate": 1.977982183096866e-05, + "loss": 1.2634, "step": 2426 }, { - "epoch": 0.0687749723709938, + "epoch": 0.09496048204084827, "grad_norm": 0.0, - "learning_rate": 1.992128498837001e-05, - "loss": 1.1485, + "learning_rate": 1.9779557296006213e-05, + "loss": 1.1938, "step": 2427 }, { - "epoch": 0.06880330981325626, + "epoch": 0.09499960873307771, "grad_norm": 0.0, - "learning_rate": 1.9921170017372645e-05, - "loss": 1.0587, + "learning_rate": 1.977929260399633e-05, + "loss": 1.2869, "step": 2428 }, { - "epoch": 0.06883164725551871, + "epoch": 0.09503873542530715, "grad_norm": 0.0, - "learning_rate": 1.9921054962805586e-05, - "loss": 1.1708, + "learning_rate": 1.977902775494326e-05, + "loss": 1.3593, "step": 2429 }, { - "epoch": 0.06885998469778118, + "epoch": 0.09507786211753658, "grad_norm": 0.0, - "learning_rate": 1.992093982466979e-05, - "loss": 1.0154, + "learning_rate": 1.977876274885126e-05, + "loss": 1.2872, "step": 2430 }, { - "epoch": 0.06888832214004365, + "epoch": 0.09511698880976602, "grad_norm": 0.0, - "learning_rate": 1.992082460296624e-05, - "loss": 1.0878, + "learning_rate": 1.9778497585724586e-05, + "loss": 1.2473, "step": 2431 }, { - "epoch": 0.0689166595823061, + "epoch": 0.09515611550199546, "grad_norm": 0.0, - "learning_rate": 1.99207092976959e-05, - "loss": 1.1061, + "learning_rate": 1.9778232265567493e-05, + "loss": 1.0958, "step": 2432 }, { - "epoch": 0.06894499702456856, + "epoch": 0.0951952421942249, "grad_norm": 0.0, - "learning_rate": 1.9920593908859737e-05, - "loss": 1.1554, + "learning_rate": 1.9777966788384243e-05, + "loss": 1.2195, "step": 2433 }, { - "epoch": 0.06897333446683103, + "epoch": 0.09523436888645434, "grad_norm": 0.0, - "learning_rate": 1.9920478436458734e-05, - "loss": 1.1511, + "learning_rate": 1.9777701154179097e-05, + "loss": 1.2337, "step": 2434 }, { - "epoch": 0.06900167190909348, + "epoch": 0.09527349557868378, "grad_norm": 0.0, - "learning_rate": 1.992036288049385e-05, - "loss": 1.1885, + "learning_rate": 1.9777435362956322e-05, + "loss": 1.1921, "step": 2435 }, { - "epoch": 0.06903000935135595, + "epoch": 0.09531262227091322, "grad_norm": 0.0, - "learning_rate": 1.9920247240966072e-05, - "loss": 1.0139, + "learning_rate": 1.977716941472019e-05, + "loss": 1.2926, "step": 2436 }, { - "epoch": 0.06905834679361841, + "epoch": 0.09535174896314265, "grad_norm": 0.0, - "learning_rate": 1.992013151787636e-05, - "loss": 1.1512, + "learning_rate": 1.9776903309474965e-05, + "loss": 1.1451, "step": 2437 }, { - "epoch": 0.06908668423588087, + "epoch": 0.09539087565537209, "grad_norm": 0.0, - "learning_rate": 1.9920015711225705e-05, - "loss": 1.1169, + "learning_rate": 1.9776637047224927e-05, + "loss": 1.2238, "step": 2438 }, { - "epoch": 0.06911502167814333, + "epoch": 0.09543000234760153, "grad_norm": 0.0, - "learning_rate": 1.9919899821015066e-05, - "loss": 1.0951, + "learning_rate": 1.9776370627974347e-05, + "loss": 1.1623, "step": 2439 }, { - "epoch": 0.0691433591204058, + "epoch": 0.09546912903983097, "grad_norm": 0.0, - "learning_rate": 1.9919783847245436e-05, - "loss": 1.14, + "learning_rate": 1.9776104051727505e-05, + "loss": 1.2244, "step": 2440 }, { - "epoch": 0.06917169656266825, + "epoch": 0.09550825573206041, "grad_norm": 0.0, - "learning_rate": 1.9919667789917775e-05, - "loss": 1.1148, + "learning_rate": 1.9775837318488683e-05, + "loss": 1.2841, "step": 2441 }, { - "epoch": 0.06920003400493072, + "epoch": 0.09554738242428985, "grad_norm": 0.0, - "learning_rate": 1.9919551649033074e-05, - "loss": 1.0568, + "learning_rate": 1.9775570428262164e-05, + "loss": 1.1956, "step": 2442 }, { - "epoch": 0.06922837144719318, + "epoch": 0.0955865091165193, "grad_norm": 0.0, - "learning_rate": 1.99194354245923e-05, - "loss": 1.0253, + "learning_rate": 1.9775303381052234e-05, + "loss": 1.2427, "step": 2443 }, { - "epoch": 0.06925670888945563, + "epoch": 0.09562563580874874, "grad_norm": 0.0, - "learning_rate": 1.9919319116596446e-05, - "loss": 1.1598, + "learning_rate": 1.9775036176863178e-05, + "loss": 1.2264, "step": 2444 }, { - "epoch": 0.0692850463317181, + "epoch": 0.09566476250097816, "grad_norm": 0.0, - "learning_rate": 1.9919202725046477e-05, - "loss": 1.1743, + "learning_rate": 1.977476881569929e-05, + "loss": 1.2567, "step": 2445 }, { - "epoch": 0.06931338377398057, + "epoch": 0.0957038891932076, "grad_norm": 0.0, - "learning_rate": 1.9919086249943382e-05, - "loss": 1.1424, + "learning_rate": 1.9774501297564864e-05, + "loss": 1.2451, "step": 2446 }, { - "epoch": 0.06934172121624302, + "epoch": 0.09574301588543704, "grad_norm": 0.0, - "learning_rate": 1.991896969128814e-05, - "loss": 1.0466, + "learning_rate": 1.9774233622464196e-05, + "loss": 1.2675, "step": 2447 }, { - "epoch": 0.06937005865850548, + "epoch": 0.09578214257766648, "grad_norm": 0.0, - "learning_rate": 1.9918853049081736e-05, - "loss": 1.0323, + "learning_rate": 1.9773965790401583e-05, + "loss": 1.1664, "step": 2448 }, { - "epoch": 0.06939839610076795, + "epoch": 0.09582126926989593, "grad_norm": 0.0, - "learning_rate": 1.9918736323325146e-05, - "loss": 0.9834, + "learning_rate": 1.977369780138133e-05, + "loss": 1.2742, "step": 2449 }, { - "epoch": 0.0694267335430304, + "epoch": 0.09586039596212537, "grad_norm": 0.0, - "learning_rate": 1.9918619514019357e-05, - "loss": 1.0334, + "learning_rate": 1.9773429655407734e-05, + "loss": 1.3243, "step": 2450 }, { - "epoch": 0.06945507098529287, + "epoch": 0.09589952265435481, "grad_norm": 0.0, - "learning_rate": 1.9918502621165355e-05, - "loss": 1.238, + "learning_rate": 1.9773161352485106e-05, + "loss": 1.1566, "step": 2451 }, { - "epoch": 0.06948340842755533, + "epoch": 0.09593864934658423, "grad_norm": 0.0, - "learning_rate": 1.991838564476412e-05, - "loss": 1.1186, + "learning_rate": 1.9772892892617753e-05, + "loss": 1.2048, "step": 2452 }, { - "epoch": 0.06951174586981779, + "epoch": 0.09597777603881368, "grad_norm": 0.0, - "learning_rate": 1.9918268584816644e-05, - "loss": 1.1108, + "learning_rate": 1.9772624275809984e-05, + "loss": 1.2483, "step": 2453 }, { - "epoch": 0.06954008331208025, + "epoch": 0.09601690273104312, "grad_norm": 0.0, - "learning_rate": 1.99181514413239e-05, - "loss": 1.0081, + "learning_rate": 1.977235550206612e-05, + "loss": 1.232, "step": 2454 }, { - "epoch": 0.06956842075434272, + "epoch": 0.09605602942327256, "grad_norm": 0.0, - "learning_rate": 1.991803421428689e-05, - "loss": 1.0707, + "learning_rate": 1.9772086571390467e-05, + "loss": 1.1658, "step": 2455 }, { - "epoch": 0.06959675819660517, + "epoch": 0.096095156115502, "grad_norm": 0.0, - "learning_rate": 1.9917916903706592e-05, - "loss": 1.1066, + "learning_rate": 1.977181748378735e-05, + "loss": 1.2995, "step": 2456 }, { - "epoch": 0.06962509563886764, + "epoch": 0.09613428280773144, "grad_norm": 0.0, - "learning_rate": 1.9917799509583998e-05, - "loss": 1.0096, + "learning_rate": 1.9771548239261088e-05, + "loss": 1.3193, "step": 2457 }, { - "epoch": 0.0696534330811301, + "epoch": 0.09617340949996088, "grad_norm": 0.0, - "learning_rate": 1.9917682031920096e-05, - "loss": 0.9724, + "learning_rate": 1.977127883781601e-05, + "loss": 1.2826, "step": 2458 }, { - "epoch": 0.06968177052339256, + "epoch": 0.0962125361921903, "grad_norm": 0.0, - "learning_rate": 1.9917564470715876e-05, - "loss": 1.097, + "learning_rate": 1.9771009279456436e-05, + "loss": 1.2234, "step": 2459 }, { - "epoch": 0.06971010796565502, + "epoch": 0.09625166288441975, "grad_norm": 0.0, - "learning_rate": 1.991744682597233e-05, - "loss": 1.1008, + "learning_rate": 1.9770739564186695e-05, + "loss": 1.1762, "step": 2460 }, { - "epoch": 0.06973844540791749, + "epoch": 0.09629078957664919, "grad_norm": 0.0, - "learning_rate": 1.991732909769044e-05, - "loss": 1.0598, + "learning_rate": 1.977046969201112e-05, + "loss": 1.2862, "step": 2461 }, { - "epoch": 0.06976678285017994, + "epoch": 0.09632991626887863, "grad_norm": 0.0, - "learning_rate": 1.991721128587121e-05, - "loss": 1.098, + "learning_rate": 1.977019966293405e-05, + "loss": 1.079, "step": 2462 }, { - "epoch": 0.0697951202924424, + "epoch": 0.09636904296110807, "grad_norm": 0.0, - "learning_rate": 1.9917093390515626e-05, - "loss": 1.1189, + "learning_rate": 1.9769929476959812e-05, + "loss": 1.2704, "step": 2463 }, { - "epoch": 0.06982345773470487, + "epoch": 0.09640816965333751, "grad_norm": 0.0, - "learning_rate": 1.991697541162468e-05, - "loss": 1.0738, + "learning_rate": 1.976965913409275e-05, + "loss": 1.1914, "step": 2464 }, { - "epoch": 0.06985179517696732, + "epoch": 0.09644729634556695, "grad_norm": 0.0, - "learning_rate": 1.9916857349199366e-05, - "loss": 1.2111, + "learning_rate": 1.9769388634337202e-05, + "loss": 1.1525, "step": 2465 }, { - "epoch": 0.06988013261922979, + "epoch": 0.09648642303779638, "grad_norm": 0.0, - "learning_rate": 1.9916739203240682e-05, - "loss": 1.1232, + "learning_rate": 1.9769117977697513e-05, + "loss": 1.1359, "step": 2466 }, { - "epoch": 0.06990847006149226, + "epoch": 0.09652554973002582, "grad_norm": 0.0, - "learning_rate": 1.991662097374962e-05, - "loss": 1.1147, + "learning_rate": 1.9768847164178036e-05, + "loss": 1.2455, "step": 2467 }, { - "epoch": 0.06993680750375471, + "epoch": 0.09656467642225526, "grad_norm": 0.0, - "learning_rate": 1.991650266072718e-05, - "loss": 1.0475, + "learning_rate": 1.976857619378311e-05, + "loss": 1.3289, "step": 2468 }, { - "epoch": 0.06996514494601717, + "epoch": 0.0966038031144847, "grad_norm": 0.0, - "learning_rate": 1.9916384264174354e-05, - "loss": 1.1089, + "learning_rate": 1.9768305066517093e-05, + "loss": 1.2919, "step": 2469 }, { - "epoch": 0.06999348238827964, + "epoch": 0.09664292980671414, "grad_norm": 0.0, - "learning_rate": 1.991626578409214e-05, - "loss": 0.9518, + "learning_rate": 1.9768033782384338e-05, + "loss": 1.2004, "step": 2470 }, { - "epoch": 0.07002181983054209, + "epoch": 0.09668205649894358, "grad_norm": 0.0, - "learning_rate": 1.991614722048154e-05, - "loss": 1.141, + "learning_rate": 1.97677623413892e-05, + "loss": 1.2002, "step": 2471 }, { - "epoch": 0.07005015727280456, + "epoch": 0.09672118319117302, "grad_norm": 0.0, - "learning_rate": 1.9916028573343548e-05, - "loss": 1.0788, + "learning_rate": 1.9767490743536037e-05, + "loss": 1.1664, "step": 2472 }, { - "epoch": 0.07007849471506702, + "epoch": 0.09676030988340245, "grad_norm": 0.0, - "learning_rate": 1.9915909842679166e-05, - "loss": 1.0674, + "learning_rate": 1.9767218988829212e-05, + "loss": 1.2495, "step": 2473 }, { - "epoch": 0.07010683215732948, + "epoch": 0.09679943657563189, "grad_norm": 0.0, - "learning_rate": 1.991579102848939e-05, - "loss": 1.2218, + "learning_rate": 1.9766947077273092e-05, + "loss": 1.3549, "step": 2474 }, { - "epoch": 0.07013516959959194, + "epoch": 0.09683856326786133, "grad_norm": 0.0, - "learning_rate": 1.9915672130775226e-05, - "loss": 1.2065, + "learning_rate": 1.976667500887204e-05, + "loss": 1.1138, "step": 2475 }, { - "epoch": 0.07016350704185441, + "epoch": 0.09687768996009077, "grad_norm": 0.0, - "learning_rate": 1.991555314953767e-05, - "loss": 1.2054, + "learning_rate": 1.9766402783630424e-05, + "loss": 1.2346, "step": 2476 }, { - "epoch": 0.07019184448411686, + "epoch": 0.09691681665232021, "grad_norm": 0.0, - "learning_rate": 1.9915434084777738e-05, - "loss": 1.1859, + "learning_rate": 1.9766130401552617e-05, + "loss": 1.2761, "step": 2477 }, { - "epoch": 0.07022018192637933, + "epoch": 0.09695594334454966, "grad_norm": 0.0, - "learning_rate": 1.9915314936496412e-05, - "loss": 1.0495, + "learning_rate": 1.976585786264299e-05, + "loss": 1.2886, "step": 2478 }, { - "epoch": 0.07024851936864179, + "epoch": 0.0969950700367791, "grad_norm": 0.0, - "learning_rate": 1.9915195704694714e-05, - "loss": 1.014, + "learning_rate": 1.976558516690593e-05, + "loss": 1.158, "step": 2479 }, { - "epoch": 0.07027685681090425, + "epoch": 0.09703419672900852, "grad_norm": 0.0, - "learning_rate": 1.9915076389373635e-05, - "loss": 1.1375, + "learning_rate": 1.9765312314345807e-05, + "loss": 1.2901, "step": 2480 }, { - "epoch": 0.07030519425316671, + "epoch": 0.09707332342123796, "grad_norm": 0.0, - "learning_rate": 1.9914956990534187e-05, - "loss": 1.0633, + "learning_rate": 1.9765039304967004e-05, + "loss": 1.2234, "step": 2481 }, { - "epoch": 0.07033353169542918, + "epoch": 0.0971124501134674, "grad_norm": 0.0, - "learning_rate": 1.9914837508177375e-05, - "loss": 1.0493, + "learning_rate": 1.976476613877391e-05, + "loss": 1.0668, "step": 2482 }, { - "epoch": 0.07036186913769163, + "epoch": 0.09715157680569685, "grad_norm": 0.0, - "learning_rate": 1.9914717942304205e-05, - "loss": 1.0796, + "learning_rate": 1.97644928157709e-05, + "loss": 1.1866, "step": 2483 }, { - "epoch": 0.0703902065799541, + "epoch": 0.09719070349792629, "grad_norm": 0.0, - "learning_rate": 1.9914598292915684e-05, - "loss": 1.126, + "learning_rate": 1.9764219335962376e-05, + "loss": 1.2631, "step": 2484 }, { - "epoch": 0.07041854402221656, + "epoch": 0.09722983019015573, "grad_norm": 0.0, - "learning_rate": 1.991447856001282e-05, - "loss": 1.0625, + "learning_rate": 1.976394569935272e-05, + "loss": 1.2759, "step": 2485 }, { - "epoch": 0.07044688146447901, + "epoch": 0.09726895688238517, "grad_norm": 0.0, - "learning_rate": 1.9914358743596623e-05, - "loss": 1.0393, + "learning_rate": 1.9763671905946338e-05, + "loss": 1.0764, "step": 2486 }, { - "epoch": 0.07047521890674148, + "epoch": 0.0973080835746146, "grad_norm": 0.0, - "learning_rate": 1.9914238843668096e-05, - "loss": 1.2194, + "learning_rate": 1.9763397955747617e-05, + "loss": 1.1696, "step": 2487 }, { - "epoch": 0.07050355634900395, + "epoch": 0.09734721026684404, "grad_norm": 0.0, - "learning_rate": 1.991411886022826e-05, - "loss": 1.0561, + "learning_rate": 1.9763123848760956e-05, + "loss": 1.2881, "step": 2488 }, { - "epoch": 0.0705318937912664, + "epoch": 0.09738633695907348, "grad_norm": 0.0, - "learning_rate": 1.9913998793278116e-05, - "loss": 1.065, + "learning_rate": 1.9762849584990763e-05, + "loss": 1.2899, "step": 2489 }, { - "epoch": 0.07056023123352886, + "epoch": 0.09742546365130292, "grad_norm": 0.0, - "learning_rate": 1.9913878642818676e-05, - "loss": 1.1102, + "learning_rate": 1.976257516444144e-05, + "loss": 1.3429, "step": 2490 }, { - "epoch": 0.07058856867579133, + "epoch": 0.09746459034353236, "grad_norm": 0.0, - "learning_rate": 1.991375840885096e-05, - "loss": 0.9936, + "learning_rate": 1.976230058711739e-05, + "loss": 1.1832, "step": 2491 }, { - "epoch": 0.07061690611805378, + "epoch": 0.0975037170357618, "grad_norm": 0.0, - "learning_rate": 1.9913638091375972e-05, - "loss": 1.0798, + "learning_rate": 1.9762025853023025e-05, + "loss": 1.3469, "step": 2492 }, { - "epoch": 0.07064524356031625, + "epoch": 0.09754284372799124, "grad_norm": 0.0, - "learning_rate": 1.991351769039473e-05, - "loss": 1.1465, + "learning_rate": 1.976175096216276e-05, + "loss": 1.3391, "step": 2493 }, { - "epoch": 0.0706735810025787, + "epoch": 0.09758197042022067, "grad_norm": 0.0, - "learning_rate": 1.9913397205908248e-05, - "loss": 1.0569, + "learning_rate": 1.9761475914541008e-05, + "loss": 1.3527, "step": 2494 }, { - "epoch": 0.07070191844484117, + "epoch": 0.09762109711245011, "grad_norm": 0.0, - "learning_rate": 1.9913276637917537e-05, - "loss": 1.071, + "learning_rate": 1.9761200710162184e-05, + "loss": 1.2901, "step": 2495 }, { - "epoch": 0.07073025588710363, + "epoch": 0.09766022380467955, "grad_norm": 0.0, - "learning_rate": 1.9913155986423618e-05, - "loss": 1.106, + "learning_rate": 1.9760925349030704e-05, + "loss": 1.2701, "step": 2496 }, { - "epoch": 0.07075859332936608, + "epoch": 0.09769935049690899, "grad_norm": 0.0, - "learning_rate": 1.9913035251427507e-05, - "loss": 1.1087, + "learning_rate": 1.9760649831150997e-05, + "loss": 1.2017, "step": 2497 }, { - "epoch": 0.07078693077162855, + "epoch": 0.09773847718913843, "grad_norm": 0.0, - "learning_rate": 1.9912914432930213e-05, - "loss": 1.1509, + "learning_rate": 1.9760374156527484e-05, + "loss": 1.2106, "step": 2498 }, { - "epoch": 0.07081526821389102, + "epoch": 0.09777760388136787, "grad_norm": 0.0, - "learning_rate": 1.9912793530932765e-05, - "loss": 0.9951, + "learning_rate": 1.9760098325164593e-05, + "loss": 1.3197, "step": 2499 }, { - "epoch": 0.07084360565615347, + "epoch": 0.09781673057359731, "grad_norm": 0.0, - "learning_rate": 1.9912672545436177e-05, - "loss": 1.1615, + "learning_rate": 1.9759822337066753e-05, + "loss": 1.2597, "step": 2500 }, { - "epoch": 0.07087194309841593, + "epoch": 0.09785585726582675, "grad_norm": 0.0, - "learning_rate": 1.9912551476441463e-05, - "loss": 1.1492, + "learning_rate": 1.97595461922384e-05, + "loss": 1.1885, "step": 2501 }, { - "epoch": 0.0709002805406784, + "epoch": 0.09789498395805618, "grad_norm": 0.0, - "learning_rate": 1.991243032394965e-05, - "loss": 1.1332, + "learning_rate": 1.9759269890683958e-05, + "loss": 1.1138, "step": 2502 }, { - "epoch": 0.07092861798294085, + "epoch": 0.09793411065028562, "grad_norm": 0.0, - "learning_rate": 1.9912309087961753e-05, - "loss": 1.2188, + "learning_rate": 1.9758993432407873e-05, + "loss": 1.3549, "step": 2503 }, { - "epoch": 0.07095695542520332, + "epoch": 0.09797323734251506, "grad_norm": 0.0, - "learning_rate": 1.99121877684788e-05, - "loss": 1.2138, + "learning_rate": 1.975871681741458e-05, + "loss": 1.2849, "step": 2504 }, { - "epoch": 0.07098529286746579, + "epoch": 0.0980123640347445, "grad_norm": 0.0, - "learning_rate": 1.9912066365501804e-05, - "loss": 1.0815, + "learning_rate": 1.9758440045708523e-05, + "loss": 1.1179, "step": 2505 }, { - "epoch": 0.07101363030972824, + "epoch": 0.09805149072697394, "grad_norm": 0.0, - "learning_rate": 1.9911944879031794e-05, - "loss": 1.2, + "learning_rate": 1.975816311729415e-05, + "loss": 1.1721, "step": 2506 }, { - "epoch": 0.0710419677519907, + "epoch": 0.09809061741920339, "grad_norm": 0.0, - "learning_rate": 1.991182330906979e-05, - "loss": 1.145, + "learning_rate": 1.9757886032175903e-05, + "loss": 1.2861, "step": 2507 }, { - "epoch": 0.07107030519425317, + "epoch": 0.09812974411143283, "grad_norm": 0.0, - "learning_rate": 1.991170165561682e-05, - "loss": 1.2362, + "learning_rate": 1.9757608790358234e-05, + "loss": 1.1195, "step": 2508 }, { - "epoch": 0.07109864263651562, + "epoch": 0.09816887080366225, "grad_norm": 0.0, - "learning_rate": 1.9911579918673903e-05, - "loss": 1.0967, + "learning_rate": 1.9757331391845596e-05, + "loss": 1.2068, "step": 2509 }, { - "epoch": 0.07112698007877809, + "epoch": 0.0982079974958917, "grad_norm": 0.0, - "learning_rate": 1.9911458098242072e-05, - "loss": 1.0776, + "learning_rate": 1.9757053836642444e-05, + "loss": 1.2511, "step": 2510 }, { - "epoch": 0.07115531752104055, + "epoch": 0.09824712418812113, "grad_norm": 0.0, - "learning_rate": 1.9911336194322347e-05, - "loss": 1.0296, + "learning_rate": 1.9756776124753233e-05, + "loss": 1.2838, "step": 2511 }, { - "epoch": 0.071183654963303, + "epoch": 0.09828625088035058, "grad_norm": 0.0, - "learning_rate": 1.9911214206915758e-05, - "loss": 1.0834, + "learning_rate": 1.9756498256182422e-05, + "loss": 1.2736, "step": 2512 }, { - "epoch": 0.07121199240556547, + "epoch": 0.09832537757258002, "grad_norm": 0.0, - "learning_rate": 1.991109213602333e-05, - "loss": 1.0704, + "learning_rate": 1.9756220230934474e-05, + "loss": 1.1744, "step": 2513 }, { - "epoch": 0.07124032984782794, + "epoch": 0.09836450426480946, "grad_norm": 0.0, - "learning_rate": 1.991096998164609e-05, - "loss": 1.1282, + "learning_rate": 1.9755942049013853e-05, + "loss": 1.2357, "step": 2514 }, { - "epoch": 0.07126866729009039, + "epoch": 0.0984036309570389, "grad_norm": 0.0, - "learning_rate": 1.9910847743785077e-05, - "loss": 1.1039, + "learning_rate": 1.975566371042503e-05, + "loss": 1.1631, "step": 2515 }, { - "epoch": 0.07129700473235286, + "epoch": 0.09844275764926833, "grad_norm": 0.0, - "learning_rate": 1.9910725422441305e-05, - "loss": 1.0017, + "learning_rate": 1.975538521517247e-05, + "loss": 1.1289, "step": 2516 }, { - "epoch": 0.07132534217461532, + "epoch": 0.09848188434149777, "grad_norm": 0.0, - "learning_rate": 1.9910603017615816e-05, - "loss": 1.1631, + "learning_rate": 1.975510656326065e-05, + "loss": 1.3187, "step": 2517 }, { - "epoch": 0.07135367961687777, + "epoch": 0.09852101103372721, "grad_norm": 0.0, - "learning_rate": 1.991048052930964e-05, - "loss": 1.1009, + "learning_rate": 1.9754827754694043e-05, + "loss": 1.289, "step": 2518 }, { - "epoch": 0.07138201705914024, + "epoch": 0.09856013772595665, "grad_norm": 0.0, - "learning_rate": 1.99103579575238e-05, - "loss": 1.0869, + "learning_rate": 1.9754548789477126e-05, + "loss": 1.0887, "step": 2519 }, { - "epoch": 0.0714103545014027, + "epoch": 0.09859926441818609, "grad_norm": 0.0, - "learning_rate": 1.9910235302259344e-05, - "loss": 1.1244, + "learning_rate": 1.9754269667614378e-05, + "loss": 1.3399, "step": 2520 }, { - "epoch": 0.07143869194366516, + "epoch": 0.09863839111041553, "grad_norm": 0.0, - "learning_rate": 1.9910112563517288e-05, - "loss": 1.1914, + "learning_rate": 1.975399038911028e-05, + "loss": 1.1662, "step": 2521 }, { - "epoch": 0.07146702938592762, + "epoch": 0.09867751780264497, "grad_norm": 0.0, - "learning_rate": 1.9909989741298676e-05, - "loss": 1.0518, + "learning_rate": 1.975371095396932e-05, + "loss": 1.1399, "step": 2522 }, { - "epoch": 0.07149536682819009, + "epoch": 0.0987166444948744, "grad_norm": 0.0, - "learning_rate": 1.9909866835604542e-05, - "loss": 1.0879, + "learning_rate": 1.9753431362195985e-05, + "loss": 1.2209, "step": 2523 }, { - "epoch": 0.07152370427045254, + "epoch": 0.09875577118710384, "grad_norm": 0.0, - "learning_rate": 1.9909743846435916e-05, - "loss": 1.132, + "learning_rate": 1.9753151613794763e-05, + "loss": 1.3686, "step": 2524 }, { - "epoch": 0.07155204171271501, + "epoch": 0.09879489787933328, "grad_norm": 0.0, - "learning_rate": 1.990962077379384e-05, - "loss": 1.0859, + "learning_rate": 1.9752871708770146e-05, + "loss": 1.2068, "step": 2525 }, { - "epoch": 0.07158037915497747, + "epoch": 0.09883402457156272, "grad_norm": 0.0, - "learning_rate": 1.990949761767935e-05, - "loss": 1.1337, + "learning_rate": 1.9752591647126633e-05, + "loss": 1.1475, "step": 2526 }, { - "epoch": 0.07160871659723993, + "epoch": 0.09887315126379216, "grad_norm": 0.0, - "learning_rate": 1.990937437809348e-05, - "loss": 1.1406, + "learning_rate": 1.9752311428868716e-05, + "loss": 1.2056, "step": 2527 }, { - "epoch": 0.07163705403950239, + "epoch": 0.0989122779560216, "grad_norm": 0.0, - "learning_rate": 1.9909251055037272e-05, - "loss": 1.1811, + "learning_rate": 1.9752031054000903e-05, + "loss": 1.1562, "step": 2528 }, { - "epoch": 0.07166539148176486, + "epoch": 0.09895140464825104, "grad_norm": 0.0, - "learning_rate": 1.9909127648511758e-05, - "loss": 1.0289, + "learning_rate": 1.9751750522527686e-05, + "loss": 1.2678, "step": 2529 }, { - "epoch": 0.07169372892402731, + "epoch": 0.09899053134048047, "grad_norm": 0.0, - "learning_rate": 1.9909004158517984e-05, - "loss": 1.0702, + "learning_rate": 1.9751469834453577e-05, + "loss": 1.0852, "step": 2530 }, { - "epoch": 0.07172206636628978, + "epoch": 0.09902965803270991, "grad_norm": 0.0, - "learning_rate": 1.990888058505699e-05, - "loss": 1.1262, + "learning_rate": 1.975118898978308e-05, + "loss": 1.3014, "step": 2531 }, { - "epoch": 0.07175040380855224, + "epoch": 0.09906878472493935, "grad_norm": 0.0, - "learning_rate": 1.9908756928129814e-05, - "loss": 0.9265, + "learning_rate": 1.975090798852071e-05, + "loss": 1.2078, "step": 2532 }, { - "epoch": 0.0717787412508147, + "epoch": 0.09910791141716879, "grad_norm": 0.0, - "learning_rate": 1.9908633187737502e-05, - "loss": 1.2058, + "learning_rate": 1.9750626830670976e-05, + "loss": 1.2271, "step": 2533 }, { - "epoch": 0.07180707869307716, + "epoch": 0.09914703810939823, "grad_norm": 0.0, - "learning_rate": 1.990850936388109e-05, - "loss": 1.1541, + "learning_rate": 1.975034551623839e-05, + "loss": 1.245, "step": 2534 }, { - "epoch": 0.07183541613533963, + "epoch": 0.09918616480162767, "grad_norm": 0.0, - "learning_rate": 1.9908385456561624e-05, - "loss": 0.992, + "learning_rate": 1.9750064045227474e-05, + "loss": 1.1929, "step": 2535 }, { - "epoch": 0.07186375357760208, + "epoch": 0.09922529149385712, "grad_norm": 0.0, - "learning_rate": 1.990826146578015e-05, - "loss": 1.1069, + "learning_rate": 1.974978241764275e-05, + "loss": 1.3653, "step": 2536 }, { - "epoch": 0.07189209101986455, + "epoch": 0.09926441818608654, "grad_norm": 0.0, - "learning_rate": 1.9908137391537708e-05, - "loss": 1.0753, + "learning_rate": 1.9749500633488736e-05, + "loss": 1.2261, "step": 2537 }, { - "epoch": 0.07192042846212701, + "epoch": 0.09930354487831598, "grad_norm": 0.0, - "learning_rate": 1.9908013233835346e-05, - "loss": 1.0791, + "learning_rate": 1.9749218692769958e-05, + "loss": 1.2863, "step": 2538 }, { - "epoch": 0.07194876590438946, + "epoch": 0.09934267157054542, "grad_norm": 0.0, - "learning_rate": 1.990788899267411e-05, - "loss": 1.0737, + "learning_rate": 1.9748936595490943e-05, + "loss": 1.2886, "step": 2539 }, { - "epoch": 0.07197710334665193, + "epoch": 0.09938179826277486, "grad_norm": 0.0, - "learning_rate": 1.9907764668055046e-05, - "loss": 1.0934, + "learning_rate": 1.9748654341656225e-05, + "loss": 1.2417, "step": 2540 }, { - "epoch": 0.0720054407889144, + "epoch": 0.0994209249550043, "grad_norm": 0.0, - "learning_rate": 1.99076402599792e-05, - "loss": 1.0193, + "learning_rate": 1.9748371931270333e-05, + "loss": 1.2285, "step": 2541 }, { - "epoch": 0.07203377823117685, + "epoch": 0.09946005164723375, "grad_norm": 0.0, - "learning_rate": 1.9907515768447622e-05, - "loss": 1.1414, + "learning_rate": 1.9748089364337803e-05, + "loss": 1.3273, "step": 2542 }, { - "epoch": 0.07206211567343931, + "epoch": 0.09949917833946319, "grad_norm": 0.0, - "learning_rate": 1.9907391193461357e-05, - "loss": 1.1377, + "learning_rate": 1.9747806640863174e-05, + "loss": 1.3798, "step": 2543 }, { - "epoch": 0.07209045311570178, + "epoch": 0.09953830503169261, "grad_norm": 0.0, - "learning_rate": 1.9907266535021465e-05, - "loss": 1.0948, + "learning_rate": 1.9747523760850984e-05, + "loss": 1.2285, "step": 2544 }, { - "epoch": 0.07211879055796423, + "epoch": 0.09957743172392206, "grad_norm": 0.0, - "learning_rate": 1.990714179312898e-05, - "loss": 1.0475, + "learning_rate": 1.9747240724305773e-05, + "loss": 1.1813, "step": 2545 }, { - "epoch": 0.0721471280002267, + "epoch": 0.0996165584161515, "grad_norm": 0.0, - "learning_rate": 1.9907016967784963e-05, - "loss": 1.0776, + "learning_rate": 1.9746957531232097e-05, + "loss": 1.2482, "step": 2546 }, { - "epoch": 0.07217546544248916, + "epoch": 0.09965568510838094, "grad_norm": 0.0, - "learning_rate": 1.990689205899046e-05, - "loss": 1.0515, + "learning_rate": 1.974667418163449e-05, + "loss": 1.3182, "step": 2547 }, { - "epoch": 0.07220380288475162, + "epoch": 0.09969481180061038, "grad_norm": 0.0, - "learning_rate": 1.9906767066746532e-05, - "loss": 1.0063, + "learning_rate": 1.9746390675517514e-05, + "loss": 1.3567, "step": 2548 }, { - "epoch": 0.07223214032701408, + "epoch": 0.09973393849283982, "grad_norm": 0.0, - "learning_rate": 1.9906641991054222e-05, - "loss": 1.1796, + "learning_rate": 1.9746107012885715e-05, + "loss": 1.1809, "step": 2549 }, { - "epoch": 0.07226047776927655, + "epoch": 0.09977306518506926, "grad_norm": 0.0, - "learning_rate": 1.9906516831914592e-05, - "loss": 1.1831, + "learning_rate": 1.9745823193743648e-05, + "loss": 1.2922, "step": 2550 }, { - "epoch": 0.072288815211539, + "epoch": 0.09981219187729869, "grad_norm": 0.0, - "learning_rate": 1.9906391589328687e-05, - "loss": 1.0424, + "learning_rate": 1.9745539218095876e-05, + "loss": 1.11, "step": 2551 }, { - "epoch": 0.07231715265380147, + "epoch": 0.09985131856952813, "grad_norm": 0.0, - "learning_rate": 1.9906266263297572e-05, - "loss": 1.0894, + "learning_rate": 1.9745255085946955e-05, + "loss": 1.1371, "step": 2552 }, { - "epoch": 0.07234549009606393, + "epoch": 0.09989044526175757, "grad_norm": 0.0, - "learning_rate": 1.9906140853822294e-05, - "loss": 1.0374, + "learning_rate": 1.9744970797301447e-05, + "loss": 1.135, "step": 2553 }, { - "epoch": 0.07237382753832639, + "epoch": 0.09992957195398701, "grad_norm": 0.0, - "learning_rate": 1.9906015360903913e-05, - "loss": 0.9755, + "learning_rate": 1.974468635216392e-05, + "loss": 1.4044, "step": 2554 }, { - "epoch": 0.07240216498058885, + "epoch": 0.09996869864621645, "grad_norm": 0.0, - "learning_rate": 1.990588978454349e-05, - "loss": 1.1503, + "learning_rate": 1.9744401750538943e-05, + "loss": 1.2329, "step": 2555 }, { - "epoch": 0.07243050242285132, + "epoch": 0.10000782533844589, "grad_norm": 0.0, - "learning_rate": 1.9905764124742074e-05, - "loss": 1.0421, + "learning_rate": 1.9744116992431082e-05, + "loss": 1.2093, "step": 2556 }, { - "epoch": 0.07245883986511377, + "epoch": 0.10004695203067533, "grad_norm": 0.0, - "learning_rate": 1.990563838150073e-05, - "loss": 1.1569, + "learning_rate": 1.974383207784491e-05, + "loss": 1.296, "step": 2557 }, { - "epoch": 0.07248717730737624, + "epoch": 0.10008607872290476, "grad_norm": 0.0, - "learning_rate": 1.9905512554820516e-05, - "loss": 1.0815, + "learning_rate": 1.9743547006785012e-05, + "loss": 1.3259, "step": 2558 }, { - "epoch": 0.0725155147496387, + "epoch": 0.1001252054151342, "grad_norm": 0.0, - "learning_rate": 1.9905386644702495e-05, - "loss": 1.0945, + "learning_rate": 1.9743261779255954e-05, + "loss": 1.1331, "step": 2559 }, { - "epoch": 0.07254385219190115, + "epoch": 0.10016433210736364, "grad_norm": 0.0, - "learning_rate": 1.990526065114772e-05, - "loss": 1.0645, + "learning_rate": 1.974297639526232e-05, + "loss": 1.0946, "step": 2560 }, { - "epoch": 0.07257218963416362, + "epoch": 0.10020345879959308, "grad_norm": 0.0, - "learning_rate": 1.9905134574157255e-05, - "loss": 1.1198, + "learning_rate": 1.9742690854808692e-05, + "loss": 1.3524, "step": 2561 }, { - "epoch": 0.07260052707642609, + "epoch": 0.10024258549182252, "grad_norm": 0.0, - "learning_rate": 1.9905008413732164e-05, - "loss": 1.0877, + "learning_rate": 1.974240515789966e-05, + "loss": 1.2665, "step": 2562 }, { - "epoch": 0.07262886451868854, + "epoch": 0.10028171218405196, "grad_norm": 0.0, - "learning_rate": 1.9904882169873512e-05, - "loss": 1.0368, + "learning_rate": 1.9742119304539807e-05, + "loss": 1.2438, "step": 2563 }, { - "epoch": 0.072657201960951, + "epoch": 0.1003208388762814, "grad_norm": 0.0, - "learning_rate": 1.9904755842582358e-05, - "loss": 1.2388, + "learning_rate": 1.9741833294733728e-05, + "loss": 1.088, "step": 2564 }, { - "epoch": 0.07268553940321347, + "epoch": 0.10035996556851084, "grad_norm": 0.0, - "learning_rate": 1.9904629431859767e-05, - "loss": 1.0848, + "learning_rate": 1.974154712848601e-05, + "loss": 1.2794, "step": 2565 }, { - "epoch": 0.07271387684547592, + "epoch": 0.10039909226074027, "grad_norm": 0.0, - "learning_rate": 1.9904502937706806e-05, - "loss": 1.0349, + "learning_rate": 1.9741260805801258e-05, + "loss": 1.3518, "step": 2566 }, { - "epoch": 0.07274221428773839, + "epoch": 0.10043821895296971, "grad_norm": 0.0, - "learning_rate": 1.9904376360124534e-05, - "loss": 1.1479, + "learning_rate": 1.9740974326684062e-05, + "loss": 1.1483, "step": 2567 }, { - "epoch": 0.07277055173000085, + "epoch": 0.10047734564519915, "grad_norm": 0.0, - "learning_rate": 1.9904249699114027e-05, - "loss": 1.1365, + "learning_rate": 1.974068769113902e-05, + "loss": 1.1661, "step": 2568 }, { - "epoch": 0.0727988891722633, + "epoch": 0.1005164723374286, "grad_norm": 0.0, - "learning_rate": 1.9904122954676345e-05, - "loss": 1.084, + "learning_rate": 1.9740400899170744e-05, + "loss": 1.2635, "step": 2569 }, { - "epoch": 0.07282722661452577, + "epoch": 0.10055559902965804, "grad_norm": 0.0, - "learning_rate": 1.9903996126812555e-05, - "loss": 0.9718, + "learning_rate": 1.974011395078383e-05, + "loss": 1.113, "step": 2570 }, { - "epoch": 0.07285556405678824, + "epoch": 0.10059472572188748, "grad_norm": 0.0, - "learning_rate": 1.990386921552373e-05, - "loss": 1.1841, + "learning_rate": 1.9739826845982896e-05, + "loss": 1.2604, "step": 2571 }, { - "epoch": 0.07288390149905069, + "epoch": 0.10063385241411692, "grad_norm": 0.0, - "learning_rate": 1.990374222081094e-05, - "loss": 1.1064, + "learning_rate": 1.9739539584772546e-05, + "loss": 1.1924, "step": 2572 }, { - "epoch": 0.07291223894131316, + "epoch": 0.10067297910634634, "grad_norm": 0.0, - "learning_rate": 1.9903615142675247e-05, - "loss": 1.0711, + "learning_rate": 1.9739252167157393e-05, + "loss": 1.2683, "step": 2573 }, { - "epoch": 0.07294057638357562, + "epoch": 0.10071210579857579, "grad_norm": 0.0, - "learning_rate": 1.9903487981117732e-05, - "loss": 1.0527, + "learning_rate": 1.973896459314206e-05, + "loss": 1.2744, "step": 2574 }, { - "epoch": 0.07296891382583807, + "epoch": 0.10075123249080523, "grad_norm": 0.0, - "learning_rate": 1.9903360736139455e-05, - "loss": 1.1222, + "learning_rate": 1.9738676862731153e-05, + "loss": 1.2283, "step": 2575 }, { - "epoch": 0.07299725126810054, + "epoch": 0.10079035918303467, "grad_norm": 0.0, - "learning_rate": 1.9903233407741494e-05, - "loss": 1.1266, + "learning_rate": 1.9738388975929303e-05, + "loss": 1.2079, "step": 2576 }, { - "epoch": 0.073025588710363, + "epoch": 0.10082948587526411, "grad_norm": 0.0, - "learning_rate": 1.990310599592492e-05, - "loss": 1.0409, + "learning_rate": 1.9738100932741125e-05, + "loss": 1.2112, "step": 2577 }, { - "epoch": 0.07305392615262546, + "epoch": 0.10086861256749355, "grad_norm": 0.0, - "learning_rate": 1.990297850069081e-05, - "loss": 1.1448, + "learning_rate": 1.973781273317125e-05, + "loss": 1.2003, "step": 2578 }, { - "epoch": 0.07308226359488793, + "epoch": 0.10090773925972299, "grad_norm": 0.0, - "learning_rate": 1.9902850922040227e-05, - "loss": 1.0356, + "learning_rate": 1.97375243772243e-05, + "loss": 1.2418, "step": 2579 }, { - "epoch": 0.07311060103715039, + "epoch": 0.10094686595195242, "grad_norm": 0.0, - "learning_rate": 1.990272325997426e-05, - "loss": 1.0259, + "learning_rate": 1.973723586490492e-05, + "loss": 1.2783, "step": 2580 }, { - "epoch": 0.07313893847941284, + "epoch": 0.10098599264418186, "grad_norm": 0.0, - "learning_rate": 1.990259551449398e-05, - "loss": 1.0751, + "learning_rate": 1.9736947196217726e-05, + "loss": 1.3773, "step": 2581 }, { - "epoch": 0.07316727592167531, + "epoch": 0.1010251193364113, "grad_norm": 0.0, - "learning_rate": 1.9902467685600456e-05, - "loss": 1.0505, + "learning_rate": 1.9736658371167366e-05, + "loss": 1.3026, "step": 2582 }, { - "epoch": 0.07319561336393778, + "epoch": 0.10106424602864074, "grad_norm": 0.0, - "learning_rate": 1.990233977329477e-05, - "loss": 1.1739, + "learning_rate": 1.973636938975847e-05, + "loss": 1.2067, "step": 2583 }, { - "epoch": 0.07322395080620023, + "epoch": 0.10110337272087018, "grad_norm": 0.0, - "learning_rate": 1.9902211777577998e-05, - "loss": 1.1284, + "learning_rate": 1.973608025199568e-05, + "loss": 1.2513, "step": 2584 }, { - "epoch": 0.0732522882484627, + "epoch": 0.10114249941309962, "grad_norm": 0.0, - "learning_rate": 1.9902083698451222e-05, - "loss": 1.0204, + "learning_rate": 1.9735790957883645e-05, + "loss": 1.2469, "step": 2585 }, { - "epoch": 0.07328062569072516, + "epoch": 0.10118162610532906, "grad_norm": 0.0, - "learning_rate": 1.9901955535915517e-05, - "loss": 1.0604, + "learning_rate": 1.9735501507427007e-05, + "loss": 1.1902, "step": 2586 }, { - "epoch": 0.07330896313298761, + "epoch": 0.10122075279755849, "grad_norm": 0.0, - "learning_rate": 1.9901827289971962e-05, - "loss": 1.1032, + "learning_rate": 1.9735211900630414e-05, + "loss": 1.202, "step": 2587 }, { - "epoch": 0.07333730057525008, + "epoch": 0.10125987948978793, "grad_norm": 0.0, - "learning_rate": 1.9901698960621642e-05, - "loss": 1.0025, + "learning_rate": 1.9734922137498516e-05, + "loss": 1.2738, "step": 2588 }, { - "epoch": 0.07336563801751254, + "epoch": 0.10129900618201737, "grad_norm": 0.0, - "learning_rate": 1.990157054786563e-05, - "loss": 1.0899, + "learning_rate": 1.9734632218035964e-05, + "loss": 1.3317, "step": 2589 }, { - "epoch": 0.073393975459775, + "epoch": 0.10133813287424681, "grad_norm": 0.0, - "learning_rate": 1.9901442051705016e-05, - "loss": 1.0463, + "learning_rate": 1.973434214224742e-05, + "loss": 1.2226, "step": 2590 }, { - "epoch": 0.07342231290203746, + "epoch": 0.10137725956647625, "grad_norm": 0.0, - "learning_rate": 1.990131347214088e-05, - "loss": 1.085, + "learning_rate": 1.973405191013754e-05, + "loss": 1.2065, "step": 2591 }, { - "epoch": 0.07345065034429993, + "epoch": 0.10141638625870569, "grad_norm": 0.0, - "learning_rate": 1.99011848091743e-05, - "loss": 1.1575, + "learning_rate": 1.973376152171098e-05, + "loss": 1.2964, "step": 2592 }, { - "epoch": 0.07347898778656238, + "epoch": 0.10145551295093513, "grad_norm": 0.0, - "learning_rate": 1.9901056062806363e-05, - "loss": 1.1707, + "learning_rate": 1.973347097697241e-05, + "loss": 1.2242, "step": 2593 }, { - "epoch": 0.07350732522882485, + "epoch": 0.10149463964316456, "grad_norm": 0.0, - "learning_rate": 1.990092723303816e-05, - "loss": 1.0328, + "learning_rate": 1.973318027592649e-05, + "loss": 1.1012, "step": 2594 }, { - "epoch": 0.07353566267108731, + "epoch": 0.101533766335394, "grad_norm": 0.0, - "learning_rate": 1.9900798319870763e-05, - "loss": 1.0881, + "learning_rate": 1.9732889418577897e-05, + "loss": 1.2178, "step": 2595 }, { - "epoch": 0.07356400011334976, + "epoch": 0.10157289302762344, "grad_norm": 0.0, - "learning_rate": 1.9900669323305272e-05, - "loss": 1.0588, + "learning_rate": 1.9732598404931293e-05, + "loss": 1.2803, "step": 2596 }, { - "epoch": 0.07359233755561223, + "epoch": 0.10161201971985288, "grad_norm": 0.0, - "learning_rate": 1.9900540243342764e-05, - "loss": 1.1279, + "learning_rate": 1.973230723499135e-05, + "loss": 1.1878, "step": 2597 }, { - "epoch": 0.0736206749978747, + "epoch": 0.10165114641208232, "grad_norm": 0.0, - "learning_rate": 1.990041107998433e-05, - "loss": 1.1071, + "learning_rate": 1.973201590876275e-05, + "loss": 1.1793, "step": 2598 }, { - "epoch": 0.07364901244013715, + "epoch": 0.10169027310431177, "grad_norm": 0.0, - "learning_rate": 1.990028183323105e-05, - "loss": 1.0656, + "learning_rate": 1.9731724426250173e-05, + "loss": 1.2279, "step": 2599 }, { - "epoch": 0.07367734988239961, + "epoch": 0.1017293997965412, "grad_norm": 0.0, - "learning_rate": 1.9900152503084026e-05, - "loss": 1.0815, + "learning_rate": 1.9731432787458294e-05, + "loss": 1.2753, "step": 2600 }, { - "epoch": 0.07370568732466208, + "epoch": 0.10176852648877063, "grad_norm": 0.0, - "learning_rate": 1.990002308954434e-05, - "loss": 1.0487, + "learning_rate": 1.9731140992391798e-05, + "loss": 1.2893, "step": 2601 }, { - "epoch": 0.07373402476692453, + "epoch": 0.10180765318100007, "grad_norm": 0.0, - "learning_rate": 1.9899893592613086e-05, - "loss": 1.1263, + "learning_rate": 1.9730849041055373e-05, + "loss": 1.2783, "step": 2602 }, { - "epoch": 0.073762362209187, + "epoch": 0.10184677987322951, "grad_norm": 0.0, - "learning_rate": 1.989976401229135e-05, - "loss": 1.1362, + "learning_rate": 1.9730556933453706e-05, + "loss": 1.2683, "step": 2603 }, { - "epoch": 0.07379069965144947, + "epoch": 0.10188590656545896, "grad_norm": 0.0, - "learning_rate": 1.9899634348580226e-05, - "loss": 1.1052, + "learning_rate": 1.973026466959149e-05, + "loss": 1.1532, "step": 2604 }, { - "epoch": 0.07381903709371192, + "epoch": 0.1019250332576884, "grad_norm": 0.0, - "learning_rate": 1.9899504601480805e-05, - "loss": 1.0272, + "learning_rate": 1.9729972249473408e-05, + "loss": 1.2394, "step": 2605 }, { - "epoch": 0.07384737453597438, + "epoch": 0.10196415994991784, "grad_norm": 0.0, - "learning_rate": 1.9899374770994183e-05, - "loss": 1.1248, + "learning_rate": 1.972967967310417e-05, + "loss": 1.2303, "step": 2606 }, { - "epoch": 0.07387571197823685, + "epoch": 0.10200328664214728, "grad_norm": 0.0, - "learning_rate": 1.9899244857121446e-05, - "loss": 1.0934, + "learning_rate": 1.9729386940488467e-05, + "loss": 1.2133, "step": 2607 }, { - "epoch": 0.0739040494204993, + "epoch": 0.1020424133343767, "grad_norm": 0.0, - "learning_rate": 1.9899114859863696e-05, - "loss": 1.1363, + "learning_rate": 1.9729094051631003e-05, + "loss": 1.173, "step": 2608 }, { - "epoch": 0.07393238686276177, + "epoch": 0.10208154002660615, "grad_norm": 0.0, - "learning_rate": 1.9898984779222027e-05, - "loss": 1.0716, + "learning_rate": 1.9728801006536478e-05, + "loss": 1.2592, "step": 2609 }, { - "epoch": 0.07396072430502423, + "epoch": 0.10212066671883559, "grad_norm": 0.0, - "learning_rate": 1.9898854615197534e-05, - "loss": 1.1161, + "learning_rate": 1.97285078052096e-05, + "loss": 1.2031, "step": 2610 }, { - "epoch": 0.07398906174728669, + "epoch": 0.10215979341106503, "grad_norm": 0.0, - "learning_rate": 1.989872436779131e-05, - "loss": 1.1558, + "learning_rate": 1.9728214447655076e-05, + "loss": 1.0997, "step": 2611 }, { - "epoch": 0.07401739918954915, + "epoch": 0.10219892010329447, "grad_norm": 0.0, - "learning_rate": 1.9898594037004457e-05, - "loss": 1.1013, + "learning_rate": 1.972792093387762e-05, + "loss": 1.2413, "step": 2612 }, { - "epoch": 0.07404573663181162, + "epoch": 0.10223804679552391, "grad_norm": 0.0, - "learning_rate": 1.9898463622838073e-05, - "loss": 1.0933, + "learning_rate": 1.9727627263881942e-05, + "loss": 1.211, "step": 2613 }, { - "epoch": 0.07407407407407407, + "epoch": 0.10227717348775335, "grad_norm": 0.0, - "learning_rate": 1.9898333125293248e-05, - "loss": 1.2441, + "learning_rate": 1.9727333437672763e-05, + "loss": 1.2003, "step": 2614 }, { - "epoch": 0.07410241151633654, + "epoch": 0.10231630017998278, "grad_norm": 0.0, - "learning_rate": 1.989820254437109e-05, - "loss": 0.9972, + "learning_rate": 1.9727039455254794e-05, + "loss": 1.2532, "step": 2615 }, { - "epoch": 0.074130748958599, + "epoch": 0.10235542687221222, "grad_norm": 0.0, - "learning_rate": 1.9898071880072696e-05, - "loss": 1.0027, + "learning_rate": 1.9726745316632762e-05, + "loss": 1.2714, "step": 2616 }, { - "epoch": 0.07415908640086145, + "epoch": 0.10239455356444166, "grad_norm": 0.0, - "learning_rate": 1.989794113239917e-05, - "loss": 1.1868, + "learning_rate": 1.9726451021811387e-05, + "loss": 1.1354, "step": 2617 }, { - "epoch": 0.07418742384312392, + "epoch": 0.1024336802566711, "grad_norm": 0.0, - "learning_rate": 1.9897810301351607e-05, - "loss": 1.0598, + "learning_rate": 1.97261565707954e-05, + "loss": 1.1111, "step": 2618 }, { - "epoch": 0.07421576128538639, + "epoch": 0.10247280694890054, "grad_norm": 0.0, - "learning_rate": 1.9897679386931115e-05, - "loss": 0.9171, + "learning_rate": 1.972586196358952e-05, + "loss": 1.1769, "step": 2619 }, { - "epoch": 0.07424409872764884, + "epoch": 0.10251193364112998, "grad_norm": 0.0, - "learning_rate": 1.989754838913879e-05, - "loss": 1.0286, + "learning_rate": 1.9725567200198486e-05, + "loss": 1.2462, "step": 2620 }, { - "epoch": 0.0742724361699113, + "epoch": 0.10255106033335942, "grad_norm": 0.0, - "learning_rate": 1.9897417307975742e-05, - "loss": 1.167, + "learning_rate": 1.9725272280627036e-05, + "loss": 1.3253, "step": 2621 }, { - "epoch": 0.07430077361217377, + "epoch": 0.10259018702558885, "grad_norm": 0.0, - "learning_rate": 1.9897286143443076e-05, - "loss": 1.0674, + "learning_rate": 1.9724977204879894e-05, + "loss": 1.3265, "step": 2622 }, { - "epoch": 0.07432911105443622, + "epoch": 0.10262931371781829, "grad_norm": 0.0, - "learning_rate": 1.9897154895541888e-05, - "loss": 1.0511, + "learning_rate": 1.9724681972961806e-05, + "loss": 1.2364, "step": 2623 }, { - "epoch": 0.07435744849669869, + "epoch": 0.10266844041004773, "grad_norm": 0.0, - "learning_rate": 1.989702356427329e-05, - "loss": 1.0806, + "learning_rate": 1.972438658487751e-05, + "loss": 1.266, "step": 2624 }, { - "epoch": 0.07438578593896115, + "epoch": 0.10270756710227717, "grad_norm": 0.0, - "learning_rate": 1.9896892149638393e-05, - "loss": 1.1736, + "learning_rate": 1.972409104063175e-05, + "loss": 1.2046, "step": 2625 }, { - "epoch": 0.0744141233812236, + "epoch": 0.10274669379450661, "grad_norm": 0.0, - "learning_rate": 1.9896760651638292e-05, - "loss": 1.1153, + "learning_rate": 1.9723795340229274e-05, + "loss": 1.1349, "step": 2626 }, { - "epoch": 0.07444246082348607, + "epoch": 0.10278582048673605, "grad_norm": 0.0, - "learning_rate": 1.9896629070274103e-05, - "loss": 0.9971, + "learning_rate": 1.972349948367483e-05, + "loss": 1.1608, "step": 2627 }, { - "epoch": 0.07447079826574854, + "epoch": 0.1028249471789655, "grad_norm": 0.0, - "learning_rate": 1.9896497405546934e-05, - "loss": 1.1486, + "learning_rate": 1.9723203470973168e-05, + "loss": 1.2825, "step": 2628 }, { - "epoch": 0.07449913570801099, + "epoch": 0.10286407387119494, "grad_norm": 0.0, - "learning_rate": 1.989636565745789e-05, - "loss": 1.1877, + "learning_rate": 1.9722907302129042e-05, + "loss": 1.2864, "step": 2629 }, { - "epoch": 0.07452747315027346, + "epoch": 0.10290320056342436, "grad_norm": 0.0, - "learning_rate": 1.9896233826008083e-05, - "loss": 1.2034, + "learning_rate": 1.9722610977147203e-05, + "loss": 1.3141, "step": 2630 }, { - "epoch": 0.07455581059253592, + "epoch": 0.1029423272556538, "grad_norm": 0.0, - "learning_rate": 1.9896101911198624e-05, - "loss": 1.0739, + "learning_rate": 1.9722314496032422e-05, + "loss": 1.228, "step": 2631 }, { - "epoch": 0.07458414803479838, + "epoch": 0.10298145394788324, "grad_norm": 0.0, - "learning_rate": 1.9895969913030626e-05, - "loss": 1.0031, + "learning_rate": 1.972201785878945e-05, + "loss": 1.2372, "step": 2632 }, { - "epoch": 0.07461248547706084, + "epoch": 0.10302058064011269, "grad_norm": 0.0, - "learning_rate": 1.9895837831505195e-05, - "loss": 1.1129, + "learning_rate": 1.9721721065423055e-05, + "loss": 1.2178, "step": 2633 }, { - "epoch": 0.07464082291932331, + "epoch": 0.10305970733234213, "grad_norm": 0.0, - "learning_rate": 1.989570566662345e-05, - "loss": 1.0268, + "learning_rate": 1.9721424115938004e-05, + "loss": 1.0566, "step": 2634 }, { - "epoch": 0.07466916036158576, + "epoch": 0.10309883402457157, "grad_norm": 0.0, - "learning_rate": 1.98955734183865e-05, - "loss": 1.1015, + "learning_rate": 1.9721127010339057e-05, + "loss": 1.3369, "step": 2635 }, { - "epoch": 0.07469749780384823, + "epoch": 0.10313796071680101, "grad_norm": 0.0, - "learning_rate": 1.989544108679546e-05, - "loss": 1.0172, + "learning_rate": 1.9720829748630997e-05, + "loss": 1.1698, "step": 2636 }, { - "epoch": 0.07472583524611069, + "epoch": 0.10317708740903044, "grad_norm": 0.0, - "learning_rate": 1.989530867185145e-05, - "loss": 0.9604, + "learning_rate": 1.972053233081859e-05, + "loss": 1.3497, "step": 2637 }, { - "epoch": 0.07475417268837314, + "epoch": 0.10321621410125988, "grad_norm": 0.0, - "learning_rate": 1.9895176173555574e-05, - "loss": 1.1029, + "learning_rate": 1.9720234756906613e-05, + "loss": 1.2905, "step": 2638 }, { - "epoch": 0.07478251013063561, + "epoch": 0.10325534079348932, "grad_norm": 0.0, - "learning_rate": 1.989504359190896e-05, - "loss": 1.036, + "learning_rate": 1.971993702689985e-05, + "loss": 1.2036, "step": 2639 }, { - "epoch": 0.07481084757289808, + "epoch": 0.10329446748571876, "grad_norm": 0.0, - "learning_rate": 1.9894910926912713e-05, - "loss": 1.0609, + "learning_rate": 1.9719639140803073e-05, + "loss": 1.3002, "step": 2640 }, { - "epoch": 0.07483918501516053, + "epoch": 0.1033335941779482, "grad_norm": 0.0, - "learning_rate": 1.989477817856796e-05, - "loss": 1.1414, + "learning_rate": 1.9719341098621074e-05, + "loss": 1.1849, "step": 2641 }, { - "epoch": 0.074867522457423, + "epoch": 0.10337272087017764, "grad_norm": 0.0, - "learning_rate": 1.989464534687582e-05, - "loss": 1.3318, + "learning_rate": 1.9719042900358635e-05, + "loss": 1.298, "step": 2642 }, { - "epoch": 0.07489585989968546, + "epoch": 0.10341184756240708, "grad_norm": 0.0, - "learning_rate": 1.9894512431837404e-05, - "loss": 1.1566, + "learning_rate": 1.9718744546020547e-05, + "loss": 1.2909, "step": 2643 }, { - "epoch": 0.07492419734194791, + "epoch": 0.10345097425463651, "grad_norm": 0.0, - "learning_rate": 1.9894379433453837e-05, - "loss": 1.0005, + "learning_rate": 1.97184460356116e-05, + "loss": 1.1343, "step": 2644 }, { - "epoch": 0.07495253478421038, + "epoch": 0.10349010094686595, "grad_norm": 0.0, - "learning_rate": 1.9894246351726236e-05, - "loss": 1.0466, + "learning_rate": 1.9718147369136584e-05, + "loss": 1.1638, "step": 2645 }, { - "epoch": 0.07498087222647284, + "epoch": 0.10352922763909539, "grad_norm": 0.0, - "learning_rate": 1.9894113186655726e-05, - "loss": 0.9971, + "learning_rate": 1.97178485466003e-05, + "loss": 1.3221, "step": 2646 }, { - "epoch": 0.0750092096687353, + "epoch": 0.10356835433132483, "grad_norm": 0.0, - "learning_rate": 1.9893979938243422e-05, - "loss": 1.1265, + "learning_rate": 1.9717549568007544e-05, + "loss": 1.1782, "step": 2647 }, { - "epoch": 0.07503754711099776, + "epoch": 0.10360748102355427, "grad_norm": 0.0, - "learning_rate": 1.9893846606490456e-05, - "loss": 1.1168, + "learning_rate": 1.9717250433363125e-05, + "loss": 1.3007, "step": 2648 }, { - "epoch": 0.07506588455326023, + "epoch": 0.10364660771578371, "grad_norm": 0.0, - "learning_rate": 1.9893713191397944e-05, - "loss": 1.0296, + "learning_rate": 1.9716951142671835e-05, + "loss": 1.2807, "step": 2649 }, { - "epoch": 0.07509422199552268, + "epoch": 0.10368573440801315, "grad_norm": 0.0, - "learning_rate": 1.9893579692967013e-05, - "loss": 1.0948, + "learning_rate": 1.9716651695938488e-05, + "loss": 1.3334, "step": 2650 }, { - "epoch": 0.07512255943778515, + "epoch": 0.10372486110024258, "grad_norm": 0.0, - "learning_rate": 1.9893446111198788e-05, - "loss": 1.0868, + "learning_rate": 1.971635209316789e-05, + "loss": 1.2803, "step": 2651 }, { - "epoch": 0.07515089688004761, + "epoch": 0.10376398779247202, "grad_norm": 0.0, - "learning_rate": 1.9893312446094392e-05, - "loss": 1.1094, + "learning_rate": 1.971605233436485e-05, + "loss": 1.1603, "step": 2652 }, { - "epoch": 0.07517923432231007, + "epoch": 0.10380311448470146, "grad_norm": 0.0, - "learning_rate": 1.9893178697654948e-05, - "loss": 1.2118, + "learning_rate": 1.971575241953419e-05, + "loss": 1.1904, "step": 2653 }, { - "epoch": 0.07520757176457253, + "epoch": 0.1038422411769309, "grad_norm": 0.0, - "learning_rate": 1.9893044865881586e-05, - "loss": 1.0815, + "learning_rate": 1.9715452348680716e-05, + "loss": 1.1935, "step": 2654 }, { - "epoch": 0.075235909206835, + "epoch": 0.10388136786916034, "grad_norm": 0.0, - "learning_rate": 1.9892910950775433e-05, - "loss": 1.096, + "learning_rate": 1.9715152121809253e-05, + "loss": 1.3083, "step": 2655 }, { - "epoch": 0.07526424664909745, + "epoch": 0.10392049456138978, "grad_norm": 0.0, - "learning_rate": 1.9892776952337623e-05, - "loss": 1.0316, + "learning_rate": 1.971485173892462e-05, + "loss": 1.202, "step": 2656 }, { - "epoch": 0.07529258409135992, + "epoch": 0.10395962125361922, "grad_norm": 0.0, - "learning_rate": 1.9892642870569277e-05, - "loss": 1.1467, + "learning_rate": 1.9714551200031644e-05, + "loss": 1.16, "step": 2657 }, { - "epoch": 0.07532092153362238, + "epoch": 0.10399874794584865, "grad_norm": 0.0, - "learning_rate": 1.9892508705471524e-05, - "loss": 1.2167, + "learning_rate": 1.9714250505135144e-05, + "loss": 1.1212, "step": 2658 }, { - "epoch": 0.07534925897588483, + "epoch": 0.10403787463807809, "grad_norm": 0.0, - "learning_rate": 1.98923744570455e-05, - "loss": 1.1184, + "learning_rate": 1.9713949654239956e-05, + "loss": 1.1828, "step": 2659 }, { - "epoch": 0.0753775964181473, + "epoch": 0.10407700133030753, "grad_norm": 0.0, - "learning_rate": 1.989224012529233e-05, - "loss": 1.0102, + "learning_rate": 1.9713648647350912e-05, + "loss": 1.1162, "step": 2660 }, { - "epoch": 0.07540593386040977, + "epoch": 0.10411612802253697, "grad_norm": 0.0, - "learning_rate": 1.9892105710213148e-05, - "loss": 1.1088, + "learning_rate": 1.971334748447284e-05, + "loss": 1.3428, "step": 2661 }, { - "epoch": 0.07543427130267222, + "epoch": 0.10415525471476642, "grad_norm": 0.0, - "learning_rate": 1.9891971211809085e-05, - "loss": 1.1181, + "learning_rate": 1.9713046165610576e-05, + "loss": 1.2352, "step": 2662 }, { - "epoch": 0.07546260874493468, + "epoch": 0.10419438140699586, "grad_norm": 0.0, - "learning_rate": 1.9891836630081277e-05, - "loss": 1.0883, + "learning_rate": 1.9712744690768967e-05, + "loss": 1.2942, "step": 2663 }, { - "epoch": 0.07549094618719715, + "epoch": 0.1042335080992253, "grad_norm": 0.0, - "learning_rate": 1.9891701965030855e-05, - "loss": 1.1366, + "learning_rate": 1.9712443059952845e-05, + "loss": 1.1516, "step": 2664 }, { - "epoch": 0.0755192836294596, + "epoch": 0.10427263479145472, "grad_norm": 0.0, - "learning_rate": 1.9891567216658957e-05, - "loss": 1.0333, + "learning_rate": 1.9712141273167058e-05, + "loss": 1.2319, "step": 2665 }, { - "epoch": 0.07554762107172207, + "epoch": 0.10431176148368417, "grad_norm": 0.0, - "learning_rate": 1.9891432384966715e-05, - "loss": 1.0488, + "learning_rate": 1.9711839330416453e-05, + "loss": 1.2684, "step": 2666 }, { - "epoch": 0.07557595851398453, + "epoch": 0.1043508881759136, "grad_norm": 0.0, - "learning_rate": 1.989129746995526e-05, - "loss": 1.116, + "learning_rate": 1.971153723170588e-05, + "loss": 1.3095, "step": 2667 }, { - "epoch": 0.07560429595624699, + "epoch": 0.10439001486814305, "grad_norm": 0.0, - "learning_rate": 1.989116247162574e-05, - "loss": 1.1175, + "learning_rate": 1.9711234977040187e-05, + "loss": 1.3777, "step": 2668 }, { - "epoch": 0.07563263339850945, + "epoch": 0.10442914156037249, "grad_norm": 0.0, - "learning_rate": 1.989102738997928e-05, - "loss": 1.1709, + "learning_rate": 1.971093256642423e-05, + "loss": 1.2677, "step": 2669 }, { - "epoch": 0.07566097084077192, + "epoch": 0.10446826825260193, "grad_norm": 0.0, - "learning_rate": 1.9890892225017024e-05, - "loss": 1.1595, + "learning_rate": 1.971062999986286e-05, + "loss": 1.2097, "step": 2670 }, { - "epoch": 0.07568930828303437, + "epoch": 0.10450739494483137, "grad_norm": 0.0, - "learning_rate": 1.9890756976740115e-05, - "loss": 1.0566, + "learning_rate": 1.9710327277360942e-05, + "loss": 1.2749, "step": 2671 }, { - "epoch": 0.07571764572529684, + "epoch": 0.1045465216370608, "grad_norm": 0.0, - "learning_rate": 1.989062164514968e-05, - "loss": 0.977, + "learning_rate": 1.9710024398923338e-05, + "loss": 1.3203, "step": 2672 }, { - "epoch": 0.0757459831675593, + "epoch": 0.10458564832929024, "grad_norm": 0.0, - "learning_rate": 1.989048623024687e-05, - "loss": 1.1085, + "learning_rate": 1.970972136455491e-05, + "loss": 1.2589, "step": 2673 }, { - "epoch": 0.07577432060982175, + "epoch": 0.10462477502151968, "grad_norm": 0.0, - "learning_rate": 1.989035073203282e-05, - "loss": 1.0717, + "learning_rate": 1.9709418174260523e-05, + "loss": 1.0568, "step": 2674 }, { - "epoch": 0.07580265805208422, + "epoch": 0.10466390171374912, "grad_norm": 0.0, - "learning_rate": 1.9890215150508677e-05, - "loss": 1.1456, + "learning_rate": 1.9709114828045046e-05, + "loss": 1.1962, "step": 2675 }, { - "epoch": 0.07583099549434669, + "epoch": 0.10470302840597856, "grad_norm": 0.0, - "learning_rate": 1.9890079485675577e-05, - "loss": 1.0835, + "learning_rate": 1.9708811325913352e-05, + "loss": 1.1312, "step": 2676 }, { - "epoch": 0.07585933293660914, + "epoch": 0.104742155098208, "grad_norm": 0.0, - "learning_rate": 1.9889943737534662e-05, - "loss": 1.0467, + "learning_rate": 1.9708507667870312e-05, + "loss": 1.3019, "step": 2677 }, { - "epoch": 0.0758876703788716, + "epoch": 0.10478128179043744, "grad_norm": 0.0, - "learning_rate": 1.988980790608708e-05, - "loss": 1.1644, + "learning_rate": 1.9708203853920803e-05, + "loss": 1.2592, "step": 2678 }, { - "epoch": 0.07591600782113407, + "epoch": 0.10482040848266687, "grad_norm": 0.0, - "learning_rate": 1.9889671991333976e-05, - "loss": 1.2069, + "learning_rate": 1.970789988406971e-05, + "loss": 1.188, "step": 2679 }, { - "epoch": 0.07594434526339652, + "epoch": 0.10485953517489631, "grad_norm": 0.0, - "learning_rate": 1.988953599327649e-05, - "loss": 1.0942, + "learning_rate": 1.9707595758321906e-05, + "loss": 1.1658, "step": 2680 }, { - "epoch": 0.07597268270565899, + "epoch": 0.10489866186712575, "grad_norm": 0.0, - "learning_rate": 1.988939991191577e-05, - "loss": 1.1376, + "learning_rate": 1.970729147668228e-05, + "loss": 1.1459, "step": 2681 }, { - "epoch": 0.07600102014792146, + "epoch": 0.10493778855935519, "grad_norm": 0.0, - "learning_rate": 1.9889263747252962e-05, - "loss": 1.196, + "learning_rate": 1.9706987039155715e-05, + "loss": 1.1649, "step": 2682 }, { - "epoch": 0.07602935759018391, + "epoch": 0.10497691525158463, "grad_norm": 0.0, - "learning_rate": 1.9889127499289215e-05, - "loss": 1.0136, + "learning_rate": 1.9706682445747104e-05, + "loss": 1.1313, "step": 2683 }, { - "epoch": 0.07605769503244637, + "epoch": 0.10501604194381407, "grad_norm": 0.0, - "learning_rate": 1.9888991168025673e-05, - "loss": 1.1094, + "learning_rate": 1.9706377696461337e-05, + "loss": 1.2822, "step": 2684 }, { - "epoch": 0.07608603247470884, + "epoch": 0.10505516863604351, "grad_norm": 0.0, - "learning_rate": 1.9888854753463487e-05, - "loss": 0.9808, + "learning_rate": 1.9706072791303303e-05, + "loss": 1.2679, "step": 2685 }, { - "epoch": 0.07611436991697129, + "epoch": 0.10509429532827295, "grad_norm": 0.0, - "learning_rate": 1.9888718255603804e-05, + "learning_rate": 1.9705767730277905e-05, "loss": 1.2129, "step": 2686 }, { - "epoch": 0.07614270735923376, + "epoch": 0.10513342202050238, "grad_norm": 0.0, - "learning_rate": 1.9888581674447778e-05, - "loss": 1.0743, + "learning_rate": 1.970546251339004e-05, + "loss": 1.3077, "step": 2687 }, { - "epoch": 0.07617104480149622, + "epoch": 0.10517254871273182, "grad_norm": 0.0, - "learning_rate": 1.9888445009996555e-05, - "loss": 0.9278, + "learning_rate": 1.9705157140644608e-05, + "loss": 1.169, "step": 2688 }, { - "epoch": 0.07619938224375868, + "epoch": 0.10521167540496126, "grad_norm": 0.0, - "learning_rate": 1.9888308262251286e-05, - "loss": 1.052, + "learning_rate": 1.970485161204651e-05, + "loss": 1.1033, "step": 2689 }, { - "epoch": 0.07622771968602114, + "epoch": 0.1052508020971907, "grad_norm": 0.0, - "learning_rate": 1.9888171431213128e-05, - "loss": 1.1953, + "learning_rate": 1.970454592760066e-05, + "loss": 1.1514, "step": 2690 }, { - "epoch": 0.07625605712828361, + "epoch": 0.10528992878942015, "grad_norm": 0.0, - "learning_rate": 1.9888034516883228e-05, - "loss": 1.1812, + "learning_rate": 1.9704240087311963e-05, + "loss": 1.2596, "step": 2691 }, { - "epoch": 0.07628439457054606, + "epoch": 0.10532905548164959, "grad_norm": 0.0, - "learning_rate": 1.988789751926274e-05, - "loss": 1.0788, + "learning_rate": 1.9703934091185325e-05, + "loss": 1.3158, "step": 2692 }, { - "epoch": 0.07631273201280853, + "epoch": 0.10536818217387903, "grad_norm": 0.0, - "learning_rate": 1.988776043835282e-05, - "loss": 1.0387, + "learning_rate": 1.9703627939225673e-05, + "loss": 1.1985, "step": 2693 }, { - "epoch": 0.07634106945507098, + "epoch": 0.10540730886610845, "grad_norm": 0.0, - "learning_rate": 1.9887623274154623e-05, - "loss": 1.0157, + "learning_rate": 1.970332163143791e-05, + "loss": 1.1464, "step": 2694 }, { - "epoch": 0.07636940689733344, + "epoch": 0.1054464355583379, "grad_norm": 0.0, - "learning_rate": 1.9887486026669304e-05, - "loss": 1.0316, + "learning_rate": 1.9703015167826963e-05, + "loss": 1.2519, "step": 2695 }, { - "epoch": 0.07639774433959591, + "epoch": 0.10548556225056734, "grad_norm": 0.0, - "learning_rate": 1.988734869589802e-05, - "loss": 1.095, + "learning_rate": 1.970270854839775e-05, + "loss": 1.1286, "step": 2696 }, { - "epoch": 0.07642608178185836, + "epoch": 0.10552468894279678, "grad_norm": 0.0, - "learning_rate": 1.9887211281841924e-05, - "loss": 1.0666, + "learning_rate": 1.9702401773155197e-05, + "loss": 1.2816, "step": 2697 }, { - "epoch": 0.07645441922412083, + "epoch": 0.10556381563502622, "grad_norm": 0.0, - "learning_rate": 1.9887073784502177e-05, - "loss": 1.1067, + "learning_rate": 1.9702094842104228e-05, + "loss": 1.0856, "step": 2698 }, { - "epoch": 0.0764827566663833, + "epoch": 0.10560294232725566, "grad_norm": 0.0, - "learning_rate": 1.9886936203879935e-05, - "loss": 1.0352, + "learning_rate": 1.9701787755249774e-05, + "loss": 1.1471, "step": 2699 }, { - "epoch": 0.07651109410864575, + "epoch": 0.1056420690194851, "grad_norm": 0.0, - "learning_rate": 1.9886798539976357e-05, - "loss": 1.1385, + "learning_rate": 1.9701480512596767e-05, + "loss": 1.3154, "step": 2700 }, { - "epoch": 0.07653943155090821, + "epoch": 0.10568119571171453, "grad_norm": 0.0, - "learning_rate": 1.9886660792792607e-05, - "loss": 1.0626, + "learning_rate": 1.9701173114150137e-05, + "loss": 1.2383, "step": 2701 }, { - "epoch": 0.07656776899317068, + "epoch": 0.10572032240394397, "grad_norm": 0.0, - "learning_rate": 1.988652296232984e-05, - "loss": 1.1956, + "learning_rate": 1.9700865559914823e-05, + "loss": 1.2812, "step": 2702 }, { - "epoch": 0.07659610643543313, + "epoch": 0.10575944909617341, "grad_norm": 0.0, - "learning_rate": 1.9886385048589217e-05, - "loss": 1.1272, + "learning_rate": 1.9700557849895764e-05, + "loss": 1.1826, "step": 2703 }, { - "epoch": 0.0766244438776956, + "epoch": 0.10579857578840285, "grad_norm": 0.0, - "learning_rate": 1.9886247051571904e-05, - "loss": 1.1613, + "learning_rate": 1.9700249984097907e-05, + "loss": 1.147, "step": 2704 }, { - "epoch": 0.07665278131995806, + "epoch": 0.10583770248063229, "grad_norm": 0.0, - "learning_rate": 1.9886108971279058e-05, - "loss": 1.0966, + "learning_rate": 1.9699941962526184e-05, + "loss": 1.3214, "step": 2705 }, { - "epoch": 0.07668111876222052, + "epoch": 0.10587682917286173, "grad_norm": 0.0, - "learning_rate": 1.988597080771185e-05, - "loss": 1.0156, + "learning_rate": 1.9699633785185546e-05, + "loss": 1.1892, "step": 2706 }, { - "epoch": 0.07670945620448298, + "epoch": 0.10591595586509117, "grad_norm": 0.0, - "learning_rate": 1.9885832560871434e-05, - "loss": 1.1205, + "learning_rate": 1.969932545208095e-05, + "loss": 1.0544, "step": 2707 }, { - "epoch": 0.07673779364674545, + "epoch": 0.1059550825573206, "grad_norm": 0.0, - "learning_rate": 1.988569423075898e-05, - "loss": 1.2141, + "learning_rate": 1.969901696321734e-05, + "loss": 1.0943, "step": 2708 }, { - "epoch": 0.0767661310890079, + "epoch": 0.10599420924955004, "grad_norm": 0.0, - "learning_rate": 1.9885555817375656e-05, - "loss": 0.9988, + "learning_rate": 1.969870831859967e-05, + "loss": 1.1971, "step": 2709 }, { - "epoch": 0.07679446853127037, + "epoch": 0.10603333594177948, "grad_norm": 0.0, - "learning_rate": 1.9885417320722623e-05, - "loss": 1.0173, + "learning_rate": 1.9698399518232895e-05, + "loss": 1.2338, "step": 2710 }, { - "epoch": 0.07682280597353283, + "epoch": 0.10607246263400892, "grad_norm": 0.0, - "learning_rate": 1.9885278740801047e-05, - "loss": 1.1372, + "learning_rate": 1.969809056212198e-05, + "loss": 1.219, "step": 2711 }, { - "epoch": 0.07685114341579528, + "epoch": 0.10611158932623836, "grad_norm": 0.0, - "learning_rate": 1.98851400776121e-05, - "loss": 1.0104, + "learning_rate": 1.969778145027188e-05, + "loss": 1.3264, "step": 2712 }, { - "epoch": 0.07687948085805775, + "epoch": 0.1061507160184678, "grad_norm": 0.0, - "learning_rate": 1.9885001331156943e-05, - "loss": 1.1355, + "learning_rate": 1.9697472182687564e-05, + "loss": 1.1141, "step": 2713 }, { - "epoch": 0.07690781830032022, + "epoch": 0.10618984271069724, "grad_norm": 0.0, - "learning_rate": 1.988486250143675e-05, - "loss": 0.9501, + "learning_rate": 1.9697162759373997e-05, + "loss": 1.2715, "step": 2714 }, { - "epoch": 0.07693615574258267, + "epoch": 0.10622896940292667, "grad_norm": 0.0, - "learning_rate": 1.9884723588452693e-05, - "loss": 1.1429, + "learning_rate": 1.9696853180336146e-05, + "loss": 1.2425, "step": 2715 }, { - "epoch": 0.07696449318484513, + "epoch": 0.10626809609515611, "grad_norm": 0.0, - "learning_rate": 1.9884584592205937e-05, - "loss": 1.165, + "learning_rate": 1.9696543445578983e-05, + "loss": 1.1389, "step": 2716 }, { - "epoch": 0.0769928306271076, + "epoch": 0.10630722278738555, "grad_norm": 0.0, - "learning_rate": 1.9884445512697657e-05, - "loss": 1.2158, + "learning_rate": 1.9696233555107484e-05, + "loss": 1.2482, "step": 2717 }, { - "epoch": 0.07702116806937005, + "epoch": 0.106346349479615, "grad_norm": 0.0, - "learning_rate": 1.9884306349929018e-05, - "loss": 1.1305, + "learning_rate": 1.9695923508926626e-05, + "loss": 1.0629, "step": 2718 }, { - "epoch": 0.07704950551163252, + "epoch": 0.10638547617184443, "grad_norm": 0.0, - "learning_rate": 1.9884167103901196e-05, - "loss": 1.1347, + "learning_rate": 1.969561330704138e-05, + "loss": 1.3921, "step": 2719 }, { - "epoch": 0.07707784295389498, + "epoch": 0.10642460286407388, "grad_norm": 0.0, - "learning_rate": 1.988402777461537e-05, - "loss": 1.0845, + "learning_rate": 1.969530294945674e-05, + "loss": 1.2714, "step": 2720 }, { - "epoch": 0.07710618039615744, + "epoch": 0.10646372955630332, "grad_norm": 0.0, - "learning_rate": 1.9883888362072702e-05, - "loss": 1.0671, + "learning_rate": 1.9694992436177683e-05, + "loss": 1.1523, "step": 2721 }, { - "epoch": 0.0771345178384199, + "epoch": 0.10650285624853274, "grad_norm": 0.0, - "learning_rate": 1.9883748866274373e-05, - "loss": 1.0572, + "learning_rate": 1.9694681767209194e-05, + "loss": 1.147, "step": 2722 }, { - "epoch": 0.07716285528068237, + "epoch": 0.10654198294076218, "grad_norm": 0.0, - "learning_rate": 1.988360928722156e-05, - "loss": 1.1049, + "learning_rate": 1.969437094255626e-05, + "loss": 1.17, "step": 2723 }, { - "epoch": 0.07719119272294482, + "epoch": 0.10658110963299162, "grad_norm": 0.0, - "learning_rate": 1.988346962491543e-05, - "loss": 1.0665, + "learning_rate": 1.9694059962223885e-05, + "loss": 1.2913, "step": 2724 }, { - "epoch": 0.07721953016520729, + "epoch": 0.10662023632522107, "grad_norm": 0.0, - "learning_rate": 1.988332987935717e-05, - "loss": 1.1854, + "learning_rate": 1.969374882621705e-05, + "loss": 1.2441, "step": 2725 }, { - "epoch": 0.07724786760746975, + "epoch": 0.1066593630174505, "grad_norm": 0.0, - "learning_rate": 1.988319005054795e-05, - "loss": 1.0888, + "learning_rate": 1.9693437534540753e-05, + "loss": 1.2241, "step": 2726 }, { - "epoch": 0.0772762050497322, + "epoch": 0.10669848970967995, "grad_norm": 0.0, - "learning_rate": 1.988305013848895e-05, - "loss": 1.0159, + "learning_rate": 1.96931260872e-05, + "loss": 1.207, "step": 2727 }, { - "epoch": 0.07730454249199467, + "epoch": 0.10673761640190939, "grad_norm": 0.0, - "learning_rate": 1.988291014318135e-05, - "loss": 1.2017, + "learning_rate": 1.9692814484199785e-05, + "loss": 1.1948, "step": 2728 }, { - "epoch": 0.07733287993425714, + "epoch": 0.10677674309413882, "grad_norm": 0.0, - "learning_rate": 1.988277006462633e-05, - "loss": 1.0145, + "learning_rate": 1.9692502725545116e-05, + "loss": 1.2203, "step": 2729 }, { - "epoch": 0.07736121737651959, + "epoch": 0.10681586978636826, "grad_norm": 0.0, - "learning_rate": 1.9882629902825067e-05, - "loss": 1.1687, + "learning_rate": 1.9692190811241e-05, + "loss": 1.3416, "step": 2730 }, { - "epoch": 0.07738955481878206, + "epoch": 0.1068549964785977, "grad_norm": 0.0, - "learning_rate": 1.9882489657778737e-05, - "loss": 1.1924, + "learning_rate": 1.9691878741292444e-05, + "loss": 1.2997, "step": 2731 }, { - "epoch": 0.07741789226104452, + "epoch": 0.10689412317082714, "grad_norm": 0.0, - "learning_rate": 1.9882349329488534e-05, - "loss": 1.0663, + "learning_rate": 1.969156651570446e-05, + "loss": 1.1766, "step": 2732 }, { - "epoch": 0.07744622970330697, + "epoch": 0.10693324986305658, "grad_norm": 0.0, - "learning_rate": 1.988220891795563e-05, - "loss": 1.0445, + "learning_rate": 1.9691254134482062e-05, + "loss": 1.2845, "step": 2733 }, { - "epoch": 0.07747456714556944, + "epoch": 0.10697237655528602, "grad_norm": 0.0, - "learning_rate": 1.9882068423181208e-05, - "loss": 1.1123, + "learning_rate": 1.9690941597630266e-05, + "loss": 1.2617, "step": 2734 }, { - "epoch": 0.0775029045878319, + "epoch": 0.10701150324751546, "grad_norm": 0.0, - "learning_rate": 1.9881927845166457e-05, - "loss": 1.1082, + "learning_rate": 1.9690628905154092e-05, + "loss": 1.1873, "step": 2735 }, { - "epoch": 0.07753124203009436, + "epoch": 0.10705062993974489, "grad_norm": 0.0, - "learning_rate": 1.9881787183912558e-05, - "loss": 1.0369, + "learning_rate": 1.969031605705856e-05, + "loss": 1.1574, "step": 2736 }, { - "epoch": 0.07755957947235682, + "epoch": 0.10708975663197433, "grad_norm": 0.0, - "learning_rate": 1.9881646439420695e-05, - "loss": 1.0709, + "learning_rate": 1.9690003053348698e-05, + "loss": 1.2865, "step": 2737 }, { - "epoch": 0.07758791691461929, + "epoch": 0.10712888332420377, "grad_norm": 0.0, - "learning_rate": 1.9881505611692056e-05, - "loss": 1.0193, + "learning_rate": 1.9689689894029526e-05, + "loss": 1.181, "step": 2738 }, { - "epoch": 0.07761625435688174, + "epoch": 0.10716801001643321, "grad_norm": 0.0, - "learning_rate": 1.9881364700727827e-05, - "loss": 0.9699, + "learning_rate": 1.9689376579106075e-05, + "loss": 1.2419, "step": 2739 }, { - "epoch": 0.07764459179914421, + "epoch": 0.10720713670866265, "grad_norm": 0.0, - "learning_rate": 1.988122370652919e-05, - "loss": 1.0925, + "learning_rate": 1.968906310858338e-05, + "loss": 1.1766, "step": 2740 }, { - "epoch": 0.07767292924140667, + "epoch": 0.10724626340089209, "grad_norm": 0.0, - "learning_rate": 1.9881082629097336e-05, - "loss": 1.0935, + "learning_rate": 1.968874948246647e-05, + "loss": 1.2691, "step": 2741 }, { - "epoch": 0.07770126668366913, + "epoch": 0.10728539009312153, "grad_norm": 0.0, - "learning_rate": 1.988094146843346e-05, - "loss": 1.0979, + "learning_rate": 1.9688435700760388e-05, + "loss": 1.3247, "step": 2742 }, { - "epoch": 0.07772960412593159, + "epoch": 0.10732451678535096, "grad_norm": 0.0, - "learning_rate": 1.988080022453874e-05, - "loss": 1.0413, + "learning_rate": 1.9688121763470165e-05, + "loss": 1.2122, "step": 2743 }, { - "epoch": 0.07775794156819406, + "epoch": 0.1073636434775804, "grad_norm": 0.0, - "learning_rate": 1.9880658897414368e-05, - "loss": 1.209, + "learning_rate": 1.9687807670600847e-05, + "loss": 1.3059, "step": 2744 }, { - "epoch": 0.07778627901045651, + "epoch": 0.10740277016980984, "grad_norm": 0.0, - "learning_rate": 1.9880517487061543e-05, - "loss": 1.0603, + "learning_rate": 1.968749342215748e-05, + "loss": 1.2368, "step": 2745 }, { - "epoch": 0.07781461645271898, + "epoch": 0.10744189686203928, "grad_norm": 0.0, - "learning_rate": 1.9880375993481446e-05, - "loss": 1.1876, + "learning_rate": 1.9687179018145105e-05, + "loss": 1.1801, "step": 2746 }, { - "epoch": 0.07784295389498144, + "epoch": 0.10748102355426872, "grad_norm": 0.0, - "learning_rate": 1.9880234416675276e-05, - "loss": 1.0509, + "learning_rate": 1.968686445856878e-05, + "loss": 1.2588, "step": 2747 }, { - "epoch": 0.0778712913372439, + "epoch": 0.10752015024649816, "grad_norm": 0.0, - "learning_rate": 1.988009275664422e-05, - "loss": 1.1627, + "learning_rate": 1.968654974343354e-05, + "loss": 1.2386, "step": 2748 }, { - "epoch": 0.07789962877950636, + "epoch": 0.1075592769387276, "grad_norm": 0.0, - "learning_rate": 1.9879951013389475e-05, - "loss": 1.1063, + "learning_rate": 1.9686234872744454e-05, + "loss": 1.2441, "step": 2749 }, { - "epoch": 0.07792796622176883, + "epoch": 0.10759840363095705, "grad_norm": 0.0, - "learning_rate": 1.9879809186912237e-05, - "loss": 1.0658, + "learning_rate": 1.9685919846506577e-05, + "loss": 1.2722, "step": 2750 }, { - "epoch": 0.07795630366403128, + "epoch": 0.10763753032318647, "grad_norm": 0.0, - "learning_rate": 1.9879667277213692e-05, - "loss": 1.1374, + "learning_rate": 1.968560466472496e-05, + "loss": 1.2854, "step": 2751 }, { - "epoch": 0.07798464110629375, + "epoch": 0.10767665701541591, "grad_norm": 0.0, - "learning_rate": 1.9879525284295042e-05, - "loss": 1.2266, + "learning_rate": 1.9685289327404668e-05, + "loss": 1.1225, "step": 2752 }, { - "epoch": 0.07801297854855621, + "epoch": 0.10771578370764535, "grad_norm": 0.0, - "learning_rate": 1.9879383208157484e-05, - "loss": 0.9927, + "learning_rate": 1.968497383455077e-05, + "loss": 1.2634, "step": 2753 }, { - "epoch": 0.07804131599081866, + "epoch": 0.1077549103998748, "grad_norm": 0.0, - "learning_rate": 1.9879241048802213e-05, - "loss": 1.2076, + "learning_rate": 1.9684658186168324e-05, + "loss": 1.2858, "step": 2754 }, { - "epoch": 0.07806965343308113, + "epoch": 0.10779403709210424, "grad_norm": 0.0, - "learning_rate": 1.9879098806230424e-05, - "loss": 1.135, + "learning_rate": 1.9684342382262404e-05, + "loss": 1.1946, "step": 2755 }, { - "epoch": 0.0780979908753436, + "epoch": 0.10783316378433368, "grad_norm": 0.0, - "learning_rate": 1.9878956480443315e-05, - "loss": 1.0484, + "learning_rate": 1.968402642283808e-05, + "loss": 1.0994, "step": 2756 }, { - "epoch": 0.07812632831760605, + "epoch": 0.10787229047656312, "grad_norm": 0.0, - "learning_rate": 1.987881407144209e-05, - "loss": 1.1097, + "learning_rate": 1.968371030790043e-05, + "loss": 1.2589, "step": 2757 }, { - "epoch": 0.07815466575986851, + "epoch": 0.10791141716879254, "grad_norm": 0.0, - "learning_rate": 1.9878671579227947e-05, - "loss": 1.0672, + "learning_rate": 1.9683394037454522e-05, + "loss": 1.2103, "step": 2758 }, { - "epoch": 0.07818300320213098, + "epoch": 0.10795054386102199, "grad_norm": 0.0, - "learning_rate": 1.9878529003802086e-05, - "loss": 1.1445, + "learning_rate": 1.9683077611505443e-05, + "loss": 1.3177, "step": 2759 }, { - "epoch": 0.07821134064439343, + "epoch": 0.10798967055325143, "grad_norm": 0.0, - "learning_rate": 1.98783863451657e-05, - "loss": 1.0228, + "learning_rate": 1.968276103005827e-05, + "loss": 1.3246, "step": 2760 }, { - "epoch": 0.0782396780866559, + "epoch": 0.10802879724548087, "grad_norm": 0.0, - "learning_rate": 1.9878243603320003e-05, - "loss": 1.023, + "learning_rate": 1.968244429311809e-05, + "loss": 1.2635, "step": 2761 }, { - "epoch": 0.07826801552891836, + "epoch": 0.10806792393771031, "grad_norm": 0.0, - "learning_rate": 1.9878100778266193e-05, - "loss": 1.0992, + "learning_rate": 1.9682127400689986e-05, + "loss": 1.1378, "step": 2762 }, { - "epoch": 0.07829635297118082, + "epoch": 0.10810705062993975, "grad_norm": 0.0, - "learning_rate": 1.9877957870005468e-05, - "loss": 1.0496, + "learning_rate": 1.9681810352779047e-05, + "loss": 1.2765, "step": 2763 }, { - "epoch": 0.07832469041344328, + "epoch": 0.10814617732216919, "grad_norm": 0.0, - "learning_rate": 1.987781487853904e-05, - "loss": 1.1851, + "learning_rate": 1.9681493149390366e-05, + "loss": 1.1438, "step": 2764 }, { - "epoch": 0.07835302785570575, + "epoch": 0.10818530401439862, "grad_norm": 0.0, - "learning_rate": 1.9877671803868106e-05, - "loss": 1.1094, + "learning_rate": 1.968117579052904e-05, + "loss": 1.1374, "step": 2765 }, { - "epoch": 0.0783813652979682, + "epoch": 0.10822443070662806, "grad_norm": 0.0, - "learning_rate": 1.9877528645993876e-05, - "loss": 1.1273, + "learning_rate": 1.9680858276200156e-05, + "loss": 1.0966, "step": 2766 }, { - "epoch": 0.07840970274023067, + "epoch": 0.1082635573988575, "grad_norm": 0.0, - "learning_rate": 1.9877385404917554e-05, - "loss": 1.1935, + "learning_rate": 1.9680540606408826e-05, + "loss": 1.2269, "step": 2767 }, { - "epoch": 0.07843804018249313, + "epoch": 0.10830268409108694, "grad_norm": 0.0, - "learning_rate": 1.9877242080640347e-05, - "loss": 1.1176, + "learning_rate": 1.968022278116014e-05, + "loss": 1.1691, "step": 2768 }, { - "epoch": 0.07846637762475558, + "epoch": 0.10834181078331638, "grad_norm": 0.0, - "learning_rate": 1.987709867316346e-05, - "loss": 1.1185, + "learning_rate": 1.9679904800459205e-05, + "loss": 1.2585, "step": 2769 }, { - "epoch": 0.07849471506701805, + "epoch": 0.10838093747554582, "grad_norm": 0.0, - "learning_rate": 1.9876955182488105e-05, - "loss": 1.0594, + "learning_rate": 1.967958666431113e-05, + "loss": 1.3185, "step": 2770 }, { - "epoch": 0.07852305250928052, + "epoch": 0.10842006416777526, "grad_norm": 0.0, - "learning_rate": 1.987681160861549e-05, - "loss": 1.1109, + "learning_rate": 1.9679268372721025e-05, + "loss": 1.0474, "step": 2771 }, { - "epoch": 0.07855138995154297, + "epoch": 0.10845919086000469, "grad_norm": 0.0, - "learning_rate": 1.9876667951546823e-05, - "loss": 1.1156, + "learning_rate": 1.9678949925693996e-05, + "loss": 1.1672, "step": 2772 }, { - "epoch": 0.07857972739380543, + "epoch": 0.10849831755223413, "grad_norm": 0.0, - "learning_rate": 1.987652421128331e-05, - "loss": 1.0738, + "learning_rate": 1.967863132323516e-05, + "loss": 1.2904, "step": 2773 }, { - "epoch": 0.0786080648360679, + "epoch": 0.10853744424446357, "grad_norm": 0.0, - "learning_rate": 1.9876380387826168e-05, - "loss": 1.1967, + "learning_rate": 1.967831256534963e-05, + "loss": 1.2274, "step": 2774 }, { - "epoch": 0.07863640227833035, + "epoch": 0.10857657093669301, "grad_norm": 0.0, - "learning_rate": 1.9876236481176608e-05, - "loss": 1.2526, + "learning_rate": 1.9677993652042532e-05, + "loss": 1.1387, "step": 2775 }, { - "epoch": 0.07866473972059282, + "epoch": 0.10861569762892245, "grad_norm": 0.0, - "learning_rate": 1.987609249133584e-05, - "loss": 1.0559, + "learning_rate": 1.9677674583318982e-05, + "loss": 1.3404, "step": 2776 }, { - "epoch": 0.07869307716285528, + "epoch": 0.1086548243211519, "grad_norm": 0.0, - "learning_rate": 1.9875948418305078e-05, - "loss": 1.0648, + "learning_rate": 1.9677355359184104e-05, + "loss": 1.2654, "step": 2777 }, { - "epoch": 0.07872141460511774, + "epoch": 0.10869395101338133, "grad_norm": 0.0, - "learning_rate": 1.9875804262085534e-05, - "loss": 1.1708, + "learning_rate": 1.9677035979643027e-05, + "loss": 1.1057, "step": 2778 }, { - "epoch": 0.0787497520473802, + "epoch": 0.10873307770561076, "grad_norm": 0.0, - "learning_rate": 1.9875660022678427e-05, - "loss": 1.1151, + "learning_rate": 1.9676716444700877e-05, + "loss": 1.2428, "step": 2779 }, { - "epoch": 0.07877808948964267, + "epoch": 0.1087722043978402, "grad_norm": 0.0, - "learning_rate": 1.9875515700084963e-05, - "loss": 1.1115, + "learning_rate": 1.967639675436279e-05, + "loss": 1.1557, "step": 2780 }, { - "epoch": 0.07880642693190512, + "epoch": 0.10881133109006964, "grad_norm": 0.0, - "learning_rate": 1.9875371294306368e-05, - "loss": 1.0802, + "learning_rate": 1.967607690863389e-05, + "loss": 1.2642, "step": 2781 }, { - "epoch": 0.07883476437416759, + "epoch": 0.10885045778229908, "grad_norm": 0.0, - "learning_rate": 1.987522680534385e-05, - "loss": 1.1699, + "learning_rate": 1.9675756907519325e-05, + "loss": 1.363, "step": 2782 }, { - "epoch": 0.07886310181643005, + "epoch": 0.10888958447452853, "grad_norm": 0.0, - "learning_rate": 1.987508223319863e-05, - "loss": 1.0207, + "learning_rate": 1.9675436751024222e-05, + "loss": 1.3279, "step": 2783 }, { - "epoch": 0.0788914392586925, + "epoch": 0.10892871116675797, "grad_norm": 0.0, - "learning_rate": 1.9874937577871928e-05, - "loss": 1.1191, + "learning_rate": 1.9675116439153736e-05, + "loss": 1.0835, "step": 2784 }, { - "epoch": 0.07891977670095497, + "epoch": 0.1089678378589874, "grad_norm": 0.0, - "learning_rate": 1.9874792839364958e-05, - "loss": 1.1202, + "learning_rate": 1.9674795971913e-05, + "loss": 1.2189, "step": 2785 }, { - "epoch": 0.07894811414321744, + "epoch": 0.10900696455121683, "grad_norm": 0.0, - "learning_rate": 1.987464801767894e-05, - "loss": 1.0967, + "learning_rate": 1.9674475349307163e-05, + "loss": 1.1454, "step": 2786 }, { - "epoch": 0.07897645158547989, + "epoch": 0.10904609124344627, "grad_norm": 0.0, - "learning_rate": 1.9874503112815094e-05, - "loss": 1.1931, + "learning_rate": 1.9674154571341378e-05, + "loss": 1.2034, "step": 2787 }, { - "epoch": 0.07900478902774236, + "epoch": 0.10908521793567572, "grad_norm": 0.0, - "learning_rate": 1.9874358124774644e-05, - "loss": 1.0545, + "learning_rate": 1.9673833638020793e-05, + "loss": 1.2103, "step": 2788 }, { - "epoch": 0.07903312647000482, + "epoch": 0.10912434462790516, "grad_norm": 0.0, - "learning_rate": 1.9874213053558807e-05, - "loss": 1.0585, + "learning_rate": 1.9673512549350557e-05, + "loss": 1.1517, "step": 2789 }, { - "epoch": 0.07906146391226727, + "epoch": 0.1091634713201346, "grad_norm": 0.0, - "learning_rate": 1.987406789916881e-05, - "loss": 1.087, + "learning_rate": 1.9673191305335833e-05, + "loss": 1.1562, "step": 2790 }, { - "epoch": 0.07908980135452974, + "epoch": 0.10920259801236404, "grad_norm": 0.0, - "learning_rate": 1.9873922661605868e-05, - "loss": 1.1425, + "learning_rate": 1.967286990598178e-05, + "loss": 1.3098, "step": 2791 }, { - "epoch": 0.0791181387967922, + "epoch": 0.10924172470459348, "grad_norm": 0.0, - "learning_rate": 1.987377734087121e-05, - "loss": 1.0472, + "learning_rate": 1.9672548351293555e-05, + "loss": 1.1395, "step": 2792 }, { - "epoch": 0.07914647623905466, + "epoch": 0.1092808513968229, "grad_norm": 0.0, - "learning_rate": 1.987363193696606e-05, - "loss": 0.9835, + "learning_rate": 1.9672226641276327e-05, + "loss": 1.2114, "step": 2793 }, { - "epoch": 0.07917481368131712, + "epoch": 0.10931997808905235, "grad_norm": 0.0, - "learning_rate": 1.9873486449891643e-05, - "loss": 1.1221, + "learning_rate": 1.9671904775935256e-05, + "loss": 1.2544, "step": 2794 }, { - "epoch": 0.07920315112357959, + "epoch": 0.10935910478128179, "grad_norm": 0.0, - "learning_rate": 1.9873340879649182e-05, - "loss": 1.0598, + "learning_rate": 1.9671582755275515e-05, + "loss": 1.2516, "step": 2795 }, { - "epoch": 0.07923148856584204, + "epoch": 0.10939823147351123, "grad_norm": 0.0, - "learning_rate": 1.9873195226239903e-05, - "loss": 1.0457, + "learning_rate": 1.9671260579302275e-05, + "loss": 1.2263, "step": 2796 }, { - "epoch": 0.07925982600810451, + "epoch": 0.10943735816574067, "grad_norm": 0.0, - "learning_rate": 1.9873049489665036e-05, - "loss": 1.2837, + "learning_rate": 1.9670938248020706e-05, + "loss": 1.2156, "step": 2797 }, { - "epoch": 0.07928816345036697, + "epoch": 0.10947648485797011, "grad_norm": 0.0, - "learning_rate": 1.9872903669925807e-05, - "loss": 1.1016, + "learning_rate": 1.967061576143599e-05, + "loss": 1.1986, "step": 2798 }, { - "epoch": 0.07931650089262943, + "epoch": 0.10951561155019955, "grad_norm": 0.0, - "learning_rate": 1.9872757767023445e-05, - "loss": 1.0023, + "learning_rate": 1.96702931195533e-05, + "loss": 1.2402, "step": 2799 }, { - "epoch": 0.07934483833489189, + "epoch": 0.10955473824242898, "grad_norm": 0.0, - "learning_rate": 1.9872611780959173e-05, - "loss": 1.0913, + "learning_rate": 1.9669970322377824e-05, + "loss": 1.0554, "step": 2800 }, { - "epoch": 0.07937317577715436, + "epoch": 0.10959386493465842, "grad_norm": 0.0, - "learning_rate": 1.987246571173423e-05, - "loss": 1.0265, + "learning_rate": 1.966964736991474e-05, + "loss": 1.1972, "step": 2801 }, { - "epoch": 0.07940151321941681, + "epoch": 0.10963299162688786, "grad_norm": 0.0, - "learning_rate": 1.9872319559349843e-05, - "loss": 1.1411, + "learning_rate": 1.9669324262169234e-05, + "loss": 1.2529, "step": 2802 }, { - "epoch": 0.07942985066167928, + "epoch": 0.1096721183191173, "grad_norm": 0.0, - "learning_rate": 1.9872173323807244e-05, - "loss": 1.0501, + "learning_rate": 1.96690009991465e-05, + "loss": 1.1158, "step": 2803 }, { - "epoch": 0.07945818810394174, + "epoch": 0.10971124501134674, "grad_norm": 0.0, - "learning_rate": 1.987202700510766e-05, - "loss": 1.1715, + "learning_rate": 1.9668677580851723e-05, + "loss": 1.1614, "step": 2804 }, { - "epoch": 0.0794865255462042, + "epoch": 0.10975037170357618, "grad_norm": 0.0, - "learning_rate": 1.9871880603252326e-05, - "loss": 1.2684, + "learning_rate": 1.96683540072901e-05, + "loss": 1.305, "step": 2805 }, { - "epoch": 0.07951486298846666, + "epoch": 0.10978949839580562, "grad_norm": 0.0, - "learning_rate": 1.9871734118242477e-05, - "loss": 1.0096, + "learning_rate": 1.966803027846683e-05, + "loss": 1.1697, "step": 2806 }, { - "epoch": 0.07954320043072913, + "epoch": 0.10982862508803505, "grad_norm": 0.0, - "learning_rate": 1.9871587550079346e-05, - "loss": 1.1006, + "learning_rate": 1.9667706394387107e-05, + "loss": 1.1731, "step": 2807 }, { - "epoch": 0.07957153787299158, + "epoch": 0.10986775178026449, "grad_norm": 0.0, - "learning_rate": 1.9871440898764163e-05, - "loss": 1.1165, + "learning_rate": 1.9667382355056128e-05, + "loss": 1.1976, "step": 2808 }, { - "epoch": 0.07959987531525405, + "epoch": 0.10990687847249393, "grad_norm": 0.0, - "learning_rate": 1.9871294164298175e-05, - "loss": 0.9776, + "learning_rate": 1.9667058160479108e-05, + "loss": 1.3775, "step": 2809 }, { - "epoch": 0.07962821275751651, + "epoch": 0.10994600516472337, "grad_norm": 0.0, - "learning_rate": 1.9871147346682605e-05, - "loss": 1.067, + "learning_rate": 1.9666733810661247e-05, + "loss": 1.1266, "step": 2810 }, { - "epoch": 0.07965655019977896, + "epoch": 0.10998513185695281, "grad_norm": 0.0, - "learning_rate": 1.98710004459187e-05, - "loss": 1.1079, + "learning_rate": 1.9666409305607753e-05, + "loss": 1.2328, "step": 2811 }, { - "epoch": 0.07968488764204143, + "epoch": 0.11002425854918226, "grad_norm": 0.0, - "learning_rate": 1.9870853462007688e-05, - "loss": 1.1216, + "learning_rate": 1.966608464532384e-05, + "loss": 1.215, "step": 2812 }, { - "epoch": 0.0797132250843039, + "epoch": 0.1100633852414117, "grad_norm": 0.0, - "learning_rate": 1.9870706394950815e-05, - "loss": 1.1304, + "learning_rate": 1.9665759829814717e-05, + "loss": 1.2556, "step": 2813 }, { - "epoch": 0.07974156252656635, + "epoch": 0.11010251193364114, "grad_norm": 0.0, - "learning_rate": 1.9870559244749317e-05, - "loss": 1.0374, + "learning_rate": 1.9665434859085602e-05, + "loss": 1.1801, "step": 2814 }, { - "epoch": 0.07976989996882881, + "epoch": 0.11014163862587056, "grad_norm": 0.0, - "learning_rate": 1.987041201140443e-05, - "loss": 1.032, + "learning_rate": 1.9665109733141718e-05, + "loss": 1.2233, "step": 2815 }, { - "epoch": 0.07979823741109128, + "epoch": 0.1101807653181, "grad_norm": 0.0, - "learning_rate": 1.98702646949174e-05, - "loss": 1.1454, + "learning_rate": 1.966478445198828e-05, + "loss": 1.3419, "step": 2816 }, { - "epoch": 0.07982657485335373, + "epoch": 0.11021989201032945, "grad_norm": 0.0, - "learning_rate": 1.9870117295289463e-05, - "loss": 1.2065, + "learning_rate": 1.9664459015630518e-05, + "loss": 1.2139, "step": 2817 }, { - "epoch": 0.0798549122956162, + "epoch": 0.11025901870255889, "grad_norm": 0.0, - "learning_rate": 1.9869969812521867e-05, - "loss": 1.1763, + "learning_rate": 1.9664133424073656e-05, + "loss": 1.0851, "step": 2818 }, { - "epoch": 0.07988324973787866, + "epoch": 0.11029814539478833, "grad_norm": 0.0, - "learning_rate": 1.9869822246615846e-05, - "loss": 1.0221, + "learning_rate": 1.9663807677322916e-05, + "loss": 1.1959, "step": 2819 }, { - "epoch": 0.07991158718014112, + "epoch": 0.11033727208701777, "grad_norm": 0.0, - "learning_rate": 1.9869674597572647e-05, - "loss": 1.1331, + "learning_rate": 1.9663481775383535e-05, + "loss": 1.0554, "step": 2820 }, { - "epoch": 0.07993992462240358, + "epoch": 0.11037639877924721, "grad_norm": 0.0, - "learning_rate": 1.986952686539352e-05, - "loss": 1.117, + "learning_rate": 1.9663155718260746e-05, + "loss": 1.2462, "step": 2821 }, { - "epoch": 0.07996826206466605, + "epoch": 0.11041552547147664, "grad_norm": 0.0, - "learning_rate": 1.9869379050079697e-05, - "loss": 1.1081, + "learning_rate": 1.9662829505959786e-05, + "loss": 1.2999, "step": 2822 }, { - "epoch": 0.0799965995069285, + "epoch": 0.11045465216370608, "grad_norm": 0.0, - "learning_rate": 1.986923115163243e-05, - "loss": 1.0714, + "learning_rate": 1.966250313848589e-05, + "loss": 1.1733, "step": 2823 }, { - "epoch": 0.08002493694919097, + "epoch": 0.11049377885593552, "grad_norm": 0.0, - "learning_rate": 1.9869083170052966e-05, - "loss": 1.0837, + "learning_rate": 1.9662176615844304e-05, + "loss": 1.098, "step": 2824 }, { - "epoch": 0.08005327439145343, + "epoch": 0.11053290554816496, "grad_norm": 0.0, - "learning_rate": 1.9868935105342548e-05, - "loss": 0.9729, + "learning_rate": 1.966184993804027e-05, + "loss": 1.3754, "step": 2825 }, { - "epoch": 0.08008161183371588, + "epoch": 0.1105720322403944, "grad_norm": 0.0, - "learning_rate": 1.9868786957502425e-05, - "loss": 1.1455, + "learning_rate": 1.966152310507903e-05, + "loss": 1.2242, "step": 2826 }, { - "epoch": 0.08010994927597835, + "epoch": 0.11061115893262384, "grad_norm": 0.0, - "learning_rate": 1.9868638726533846e-05, - "loss": 1.1014, + "learning_rate": 1.9661196116965838e-05, + "loss": 1.1589, "step": 2827 }, { - "epoch": 0.08013828671824082, + "epoch": 0.11065028562485328, "grad_norm": 0.0, - "learning_rate": 1.9868490412438056e-05, - "loss": 1.1126, + "learning_rate": 1.9660868973705938e-05, + "loss": 1.2359, "step": 2828 }, { - "epoch": 0.08016662416050327, + "epoch": 0.11068941231708271, "grad_norm": 0.0, - "learning_rate": 1.9868342015216312e-05, - "loss": 1.0302, + "learning_rate": 1.966054167530459e-05, + "loss": 1.2999, "step": 2829 }, { - "epoch": 0.08019496160276574, + "epoch": 0.11072853900931215, "grad_norm": 0.0, - "learning_rate": 1.9868193534869852e-05, - "loss": 0.9951, + "learning_rate": 1.9660214221767053e-05, + "loss": 1.2822, "step": 2830 }, { - "epoch": 0.0802232990450282, + "epoch": 0.11076766570154159, "grad_norm": 0.0, - "learning_rate": 1.986804497139994e-05, - "loss": 0.9446, + "learning_rate": 1.9659886613098574e-05, + "loss": 1.2144, "step": 2831 }, { - "epoch": 0.08025163648729065, + "epoch": 0.11080679239377103, "grad_norm": 0.0, - "learning_rate": 1.9867896324807817e-05, - "loss": 1.1667, + "learning_rate": 1.9659558849304424e-05, + "loss": 1.1837, "step": 2832 }, { - "epoch": 0.08027997392955312, + "epoch": 0.11084591908600047, "grad_norm": 0.0, - "learning_rate": 1.986774759509474e-05, - "loss": 1.1367, + "learning_rate": 1.965923093038986e-05, + "loss": 1.1823, "step": 2833 }, { - "epoch": 0.08030831137181559, + "epoch": 0.11088504577822991, "grad_norm": 0.0, - "learning_rate": 1.9867598782261958e-05, - "loss": 1.1104, + "learning_rate": 1.9658902856360153e-05, + "loss": 1.1815, "step": 2834 }, { - "epoch": 0.08033664881407804, + "epoch": 0.11092417247045935, "grad_norm": 0.0, - "learning_rate": 1.986744988631073e-05, - "loss": 1.0681, + "learning_rate": 1.965857462722057e-05, + "loss": 1.2321, "step": 2835 }, { - "epoch": 0.0803649862563405, + "epoch": 0.11096329916268878, "grad_norm": 0.0, - "learning_rate": 1.9867300907242308e-05, - "loss": 1.1126, + "learning_rate": 1.965824624297638e-05, + "loss": 1.231, "step": 2836 }, { - "epoch": 0.08039332369860297, + "epoch": 0.11100242585491822, "grad_norm": 0.0, - "learning_rate": 1.9867151845057942e-05, - "loss": 1.1549, + "learning_rate": 1.965791770363286e-05, + "loss": 1.2219, "step": 2837 }, { - "epoch": 0.08042166114086542, + "epoch": 0.11104155254714766, "grad_norm": 0.0, - "learning_rate": 1.98670026997589e-05, - "loss": 1.093, + "learning_rate": 1.965758900919528e-05, + "loss": 1.1547, "step": 2838 }, { - "epoch": 0.08044999858312789, + "epoch": 0.1110806792393771, "grad_norm": 0.0, - "learning_rate": 1.986685347134642e-05, - "loss": 1.068, + "learning_rate": 1.965726015966893e-05, + "loss": 1.2701, "step": 2839 }, { - "epoch": 0.08047833602539035, + "epoch": 0.11111980593160654, "grad_norm": 0.0, - "learning_rate": 1.9866704159821776e-05, - "loss": 1.1722, + "learning_rate": 1.9656931155059077e-05, + "loss": 1.1033, "step": 2840 }, { - "epoch": 0.0805066734676528, + "epoch": 0.11115893262383598, "grad_norm": 0.0, - "learning_rate": 1.9866554765186218e-05, - "loss": 1.1248, + "learning_rate": 1.965660199537101e-05, + "loss": 1.3561, "step": 2841 }, { - "epoch": 0.08053501090991527, + "epoch": 0.11119805931606543, "grad_norm": 0.0, - "learning_rate": 1.9866405287441004e-05, - "loss": 1.0738, + "learning_rate": 1.9656272680610015e-05, + "loss": 1.2312, "step": 2842 }, { - "epoch": 0.08056334835217774, + "epoch": 0.11123718600829485, "grad_norm": 0.0, - "learning_rate": 1.9866255726587397e-05, - "loss": 0.9881, + "learning_rate": 1.9655943210781384e-05, + "loss": 1.2881, "step": 2843 }, { - "epoch": 0.08059168579444019, + "epoch": 0.1112763127005243, "grad_norm": 0.0, - "learning_rate": 1.986610608262665e-05, - "loss": 1.0092, + "learning_rate": 1.96556135858904e-05, + "loss": 1.2299, "step": 2844 }, { - "epoch": 0.08062002323670266, + "epoch": 0.11131543939275373, "grad_norm": 0.0, - "learning_rate": 1.986595635556003e-05, - "loss": 1.1278, + "learning_rate": 1.9655283805942364e-05, + "loss": 1.2087, "step": 2845 }, { - "epoch": 0.08064836067896512, + "epoch": 0.11135456608498318, "grad_norm": 0.0, - "learning_rate": 1.9865806545388796e-05, - "loss": 1.0429, + "learning_rate": 1.965495387094257e-05, + "loss": 1.2642, "step": 2846 }, { - "epoch": 0.08067669812122757, + "epoch": 0.11139369277721262, "grad_norm": 0.0, - "learning_rate": 1.986565665211421e-05, - "loss": 1.0396, + "learning_rate": 1.9654623780896313e-05, + "loss": 1.2253, "step": 2847 }, { - "epoch": 0.08070503556349004, + "epoch": 0.11143281946944206, "grad_norm": 0.0, - "learning_rate": 1.9865506675737537e-05, - "loss": 1.0293, + "learning_rate": 1.9654293535808895e-05, + "loss": 1.2693, "step": 2848 }, { - "epoch": 0.0807333730057525, + "epoch": 0.1114719461616715, "grad_norm": 0.0, - "learning_rate": 1.9865356616260035e-05, - "loss": 1.0558, + "learning_rate": 1.9653963135685622e-05, + "loss": 1.169, "step": 2849 }, { - "epoch": 0.08076171044801496, + "epoch": 0.11151107285390092, "grad_norm": 0.0, - "learning_rate": 1.986520647368297e-05, - "loss": 1.0516, + "learning_rate": 1.96536325805318e-05, + "loss": 1.0095, "step": 2850 }, { - "epoch": 0.08079004789027742, + "epoch": 0.11155019954613037, "grad_norm": 0.0, - "learning_rate": 1.986505624800761e-05, - "loss": 1.0892, + "learning_rate": 1.9653301870352733e-05, + "loss": 1.2183, "step": 2851 }, { - "epoch": 0.08081838533253989, + "epoch": 0.1115893262383598, "grad_norm": 0.0, - "learning_rate": 1.9864905939235215e-05, - "loss": 1.052, + "learning_rate": 1.9652971005153735e-05, + "loss": 1.2426, "step": 2852 }, { - "epoch": 0.08084672277480234, + "epoch": 0.11162845293058925, "grad_norm": 0.0, - "learning_rate": 1.9864755547367056e-05, - "loss": 1.0351, + "learning_rate": 1.965263998494012e-05, + "loss": 1.2117, "step": 2853 }, { - "epoch": 0.08087506021706481, + "epoch": 0.11166757962281869, "grad_norm": 0.0, - "learning_rate": 1.9864605072404397e-05, - "loss": 1.0693, + "learning_rate": 1.96523088097172e-05, + "loss": 1.1135, "step": 2854 }, { - "epoch": 0.08090339765932728, + "epoch": 0.11170670631504813, "grad_norm": 0.0, - "learning_rate": 1.986445451434851e-05, - "loss": 1.1118, + "learning_rate": 1.9651977479490293e-05, + "loss": 1.2654, "step": 2855 }, { - "epoch": 0.08093173510158973, + "epoch": 0.11174583300727757, "grad_norm": 0.0, - "learning_rate": 1.9864303873200655e-05, - "loss": 1.1924, + "learning_rate": 1.965164599426473e-05, + "loss": 1.1665, "step": 2856 }, { - "epoch": 0.0809600725438522, + "epoch": 0.111784959699507, "grad_norm": 0.0, - "learning_rate": 1.9864153148962108e-05, - "loss": 1.1679, + "learning_rate": 1.9651314354045817e-05, + "loss": 1.17, "step": 2857 }, { - "epoch": 0.08098840998611466, + "epoch": 0.11182408639173644, "grad_norm": 0.0, - "learning_rate": 1.9864002341634134e-05, - "loss": 1.0466, + "learning_rate": 1.9650982558838897e-05, + "loss": 1.2178, "step": 2858 }, { - "epoch": 0.08101674742837711, + "epoch": 0.11186321308396588, "grad_norm": 0.0, - "learning_rate": 1.9863851451218006e-05, - "loss": 1.0168, + "learning_rate": 1.9650650608649285e-05, + "loss": 1.3123, "step": 2859 }, { - "epoch": 0.08104508487063958, + "epoch": 0.11190233977619532, "grad_norm": 0.0, - "learning_rate": 1.9863700477714992e-05, - "loss": 1.0823, + "learning_rate": 1.9650318503482323e-05, + "loss": 1.1476, "step": 2860 }, { - "epoch": 0.08107342231290204, + "epoch": 0.11194146646842476, "grad_norm": 0.0, - "learning_rate": 1.986354942112637e-05, - "loss": 1.0919, + "learning_rate": 1.9649986243343335e-05, + "loss": 1.2916, "step": 2861 }, { - "epoch": 0.0811017597551645, + "epoch": 0.1119805931606542, "grad_norm": 0.0, - "learning_rate": 1.986339828145341e-05, - "loss": 1.0728, + "learning_rate": 1.964965382823766e-05, + "loss": 1.2771, "step": 2862 }, { - "epoch": 0.08113009719742696, + "epoch": 0.11201971985288364, "grad_norm": 0.0, - "learning_rate": 1.9863247058697383e-05, - "loss": 1.0567, + "learning_rate": 1.9649321258170634e-05, + "loss": 1.2462, "step": 2863 }, { - "epoch": 0.08115843463968943, + "epoch": 0.11205884654511307, "grad_norm": 0.0, - "learning_rate": 1.986309575285956e-05, - "loss": 1.0154, + "learning_rate": 1.96489885331476e-05, + "loss": 1.1943, "step": 2864 }, { - "epoch": 0.08118677208195188, + "epoch": 0.11209797323734251, "grad_norm": 0.0, - "learning_rate": 1.986294436394122e-05, - "loss": 1.115, + "learning_rate": 1.9648655653173906e-05, + "loss": 1.2294, "step": 2865 }, { - "epoch": 0.08121510952421435, + "epoch": 0.11213709992957195, "grad_norm": 0.0, - "learning_rate": 1.9862792891943634e-05, - "loss": 1.0879, + "learning_rate": 1.9648322618254888e-05, + "loss": 1.2223, "step": 2866 }, { - "epoch": 0.08124344696647681, + "epoch": 0.11217622662180139, "grad_norm": 0.0, - "learning_rate": 1.9862641336868087e-05, - "loss": 0.9942, + "learning_rate": 1.96479894283959e-05, + "loss": 1.1136, "step": 2867 }, { - "epoch": 0.08127178440873926, + "epoch": 0.11221535331403083, "grad_norm": 0.0, - "learning_rate": 1.9862489698715847e-05, - "loss": 1.1025, + "learning_rate": 1.9647656083602292e-05, + "loss": 1.37, "step": 2868 }, { - "epoch": 0.08130012185100173, + "epoch": 0.11225448000626027, "grad_norm": 0.0, - "learning_rate": 1.9862337977488194e-05, - "loss": 1.0286, + "learning_rate": 1.964732258387942e-05, + "loss": 1.2302, "step": 2869 }, { - "epoch": 0.0813284592932642, + "epoch": 0.11229360669848971, "grad_norm": 0.0, - "learning_rate": 1.9862186173186406e-05, - "loss": 1.1483, + "learning_rate": 1.964698892923263e-05, + "loss": 1.1993, "step": 2870 }, { - "epoch": 0.08135679673552665, + "epoch": 0.11233273339071916, "grad_norm": 0.0, - "learning_rate": 1.986203428581176e-05, - "loss": 1.1105, + "learning_rate": 1.964665511966729e-05, + "loss": 1.1821, "step": 2871 }, { - "epoch": 0.08138513417778911, + "epoch": 0.11237186008294858, "grad_norm": 0.0, - "learning_rate": 1.986188231536554e-05, - "loss": 1.1034, + "learning_rate": 1.9646321155188755e-05, + "loss": 1.2419, "step": 2872 }, { - "epoch": 0.08141347162005158, + "epoch": 0.11241098677517802, "grad_norm": 0.0, - "learning_rate": 1.986173026184902e-05, - "loss": 1.0736, + "learning_rate": 1.964598703580239e-05, + "loss": 1.1566, "step": 2873 }, { - "epoch": 0.08144180906231403, + "epoch": 0.11245011346740746, "grad_norm": 0.0, - "learning_rate": 1.9861578125263484e-05, - "loss": 1.0719, + "learning_rate": 1.964565276151356e-05, + "loss": 1.2423, "step": 2874 }, { - "epoch": 0.0814701465045765, + "epoch": 0.1124892401596369, "grad_norm": 0.0, - "learning_rate": 1.9861425905610216e-05, - "loss": 1.2083, + "learning_rate": 1.9645318332327633e-05, + "loss": 1.2894, "step": 2875 }, { - "epoch": 0.08149848394683896, + "epoch": 0.11252836685186635, "grad_norm": 0.0, - "learning_rate": 1.9861273602890495e-05, - "loss": 1.0404, + "learning_rate": 1.9644983748249982e-05, + "loss": 1.1785, "step": 2876 }, { - "epoch": 0.08152682138910142, + "epoch": 0.11256749354409579, "grad_norm": 0.0, - "learning_rate": 1.9861121217105603e-05, - "loss": 1.0992, + "learning_rate": 1.9644649009285977e-05, + "loss": 1.1757, "step": 2877 }, { - "epoch": 0.08155515883136388, + "epoch": 0.11260662023632523, "grad_norm": 0.0, - "learning_rate": 1.9860968748256828e-05, - "loss": 1.1324, + "learning_rate": 1.9644314115440995e-05, + "loss": 1.3589, "step": 2878 }, { - "epoch": 0.08158349627362635, + "epoch": 0.11264574692855465, "grad_norm": 0.0, - "learning_rate": 1.986081619634545e-05, - "loss": 1.0368, + "learning_rate": 1.9643979066720412e-05, + "loss": 1.253, "step": 2879 }, { - "epoch": 0.0816118337158888, + "epoch": 0.1126848736207841, "grad_norm": 0.0, - "learning_rate": 1.9860663561372756e-05, - "loss": 0.9447, + "learning_rate": 1.964364386312961e-05, + "loss": 1.2175, "step": 2880 }, { - "epoch": 0.08164017115815127, + "epoch": 0.11272400031301354, "grad_norm": 0.0, - "learning_rate": 1.9860510843340027e-05, - "loss": 1.1104, + "learning_rate": 1.964330850467397e-05, + "loss": 1.2092, "step": 2881 }, { - "epoch": 0.08166850860041373, + "epoch": 0.11276312700524298, "grad_norm": 0.0, - "learning_rate": 1.986035804224856e-05, - "loss": 1.0815, + "learning_rate": 1.9642972991358883e-05, + "loss": 1.2203, "step": 2882 }, { - "epoch": 0.08169684604267619, + "epoch": 0.11280225369747242, "grad_norm": 0.0, - "learning_rate": 1.9860205158099632e-05, - "loss": 1.0205, + "learning_rate": 1.964263732318973e-05, + "loss": 1.1689, "step": 2883 }, { - "epoch": 0.08172518348493865, + "epoch": 0.11284138038970186, "grad_norm": 0.0, - "learning_rate": 1.9860052190894533e-05, - "loss": 1.1959, + "learning_rate": 1.9642301500171904e-05, + "loss": 1.257, "step": 2884 }, { - "epoch": 0.08175352092720112, + "epoch": 0.1128805070819313, "grad_norm": 0.0, - "learning_rate": 1.9859899140634557e-05, - "loss": 1.0907, + "learning_rate": 1.96419655223108e-05, + "loss": 1.0759, "step": 2885 }, { - "epoch": 0.08178185836946357, + "epoch": 0.11291963377416073, "grad_norm": 0.0, - "learning_rate": 1.9859746007320985e-05, - "loss": 1.1218, + "learning_rate": 1.9641629389611813e-05, + "loss": 1.3393, "step": 2886 }, { - "epoch": 0.08181019581172604, + "epoch": 0.11295876046639017, "grad_norm": 0.0, - "learning_rate": 1.9859592790955114e-05, - "loss": 1.1169, + "learning_rate": 1.964129310208034e-05, + "loss": 1.2073, "step": 2887 }, { - "epoch": 0.0818385332539885, + "epoch": 0.11299788715861961, "grad_norm": 0.0, - "learning_rate": 1.9859439491538232e-05, - "loss": 1.0685, + "learning_rate": 1.9640956659721775e-05, + "loss": 1.1423, "step": 2888 }, { - "epoch": 0.08186687069625095, + "epoch": 0.11303701385084905, "grad_norm": 0.0, - "learning_rate": 1.9859286109071626e-05, - "loss": 1.1708, + "learning_rate": 1.9640620062541532e-05, + "loss": 1.2097, "step": 2889 }, { - "epoch": 0.08189520813851342, + "epoch": 0.11307614054307849, "grad_norm": 0.0, - "learning_rate": 1.9859132643556597e-05, - "loss": 1.0984, + "learning_rate": 1.9640283310545012e-05, + "loss": 1.2206, "step": 2890 }, { - "epoch": 0.08192354558077587, + "epoch": 0.11311526723530793, "grad_norm": 0.0, - "learning_rate": 1.985897909499443e-05, - "loss": 1.1219, + "learning_rate": 1.963994640373762e-05, + "loss": 1.2523, "step": 2891 }, { - "epoch": 0.08195188302303834, + "epoch": 0.11315439392753737, "grad_norm": 0.0, - "learning_rate": 1.985882546338642e-05, - "loss": 1.1277, + "learning_rate": 1.9639609342124768e-05, + "loss": 1.2167, "step": 2892 }, { - "epoch": 0.0819802204653008, + "epoch": 0.1131935206197668, "grad_norm": 0.0, - "learning_rate": 1.9858671748733863e-05, - "loss": 1.0952, + "learning_rate": 1.963927212571187e-05, + "loss": 1.324, "step": 2893 }, { - "epoch": 0.08200855790756326, + "epoch": 0.11323264731199624, "grad_norm": 0.0, - "learning_rate": 1.9858517951038056e-05, - "loss": 1.0421, + "learning_rate": 1.963893475450434e-05, + "loss": 1.2341, "step": 2894 }, { - "epoch": 0.08203689534982572, + "epoch": 0.11327177400422568, "grad_norm": 0.0, - "learning_rate": 1.985836407030029e-05, - "loss": 1.0503, + "learning_rate": 1.9638597228507596e-05, + "loss": 1.2193, "step": 2895 }, { - "epoch": 0.08206523279208819, + "epoch": 0.11331090069645512, "grad_norm": 0.0, - "learning_rate": 1.985821010652186e-05, - "loss": 1.1386, + "learning_rate": 1.9638259547727058e-05, + "loss": 1.3411, "step": 2896 }, { - "epoch": 0.08209357023435064, + "epoch": 0.11335002738868456, "grad_norm": 0.0, - "learning_rate": 1.9858056059704068e-05, - "loss": 1.127, + "learning_rate": 1.963792171216815e-05, + "loss": 1.257, "step": 2897 }, { - "epoch": 0.0821219076766131, + "epoch": 0.113389154080914, "grad_norm": 0.0, - "learning_rate": 1.985790192984821e-05, - "loss": 0.9929, + "learning_rate": 1.9637583721836294e-05, + "loss": 1.1802, "step": 2898 }, { - "epoch": 0.08215024511887557, + "epoch": 0.11342828077314344, "grad_norm": 0.0, - "learning_rate": 1.985774771695558e-05, - "loss": 0.9807, + "learning_rate": 1.9637245576736923e-05, + "loss": 1.1543, "step": 2899 }, { - "epoch": 0.08217858256113802, + "epoch": 0.11346740746537287, "grad_norm": 0.0, - "learning_rate": 1.9857593421027483e-05, - "loss": 1.071, + "learning_rate": 1.963690727687546e-05, + "loss": 1.1737, "step": 2900 }, { - "epoch": 0.08220692000340049, + "epoch": 0.11350653415760231, "grad_norm": 0.0, - "learning_rate": 1.9857439042065215e-05, - "loss": 1.1364, + "learning_rate": 1.9636568822257345e-05, + "loss": 1.2693, "step": 2901 }, { - "epoch": 0.08223525744566296, + "epoch": 0.11354566084983175, "grad_norm": 0.0, - "learning_rate": 1.9857284580070077e-05, - "loss": 1.0837, + "learning_rate": 1.963623021288801e-05, + "loss": 1.222, "step": 2902 }, { - "epoch": 0.08226359488792541, + "epoch": 0.1135847875420612, "grad_norm": 0.0, - "learning_rate": 1.985713003504337e-05, - "loss": 1.1477, + "learning_rate": 1.9635891448772894e-05, + "loss": 1.2264, "step": 2903 }, { - "epoch": 0.08229193233018788, + "epoch": 0.11362391423429063, "grad_norm": 0.0, - "learning_rate": 1.98569754069864e-05, - "loss": 1.0889, + "learning_rate": 1.9635552529917433e-05, + "loss": 1.2833, "step": 2904 }, { - "epoch": 0.08232026977245034, + "epoch": 0.11366304092652008, "grad_norm": 0.0, - "learning_rate": 1.985682069590046e-05, - "loss": 1.0888, + "learning_rate": 1.9635213456327074e-05, + "loss": 1.2541, "step": 2905 }, { - "epoch": 0.0823486072147128, + "epoch": 0.11370216761874952, "grad_norm": 0.0, - "learning_rate": 1.985666590178686e-05, - "loss": 1.0876, + "learning_rate": 1.9634874228007262e-05, + "loss": 1.1522, "step": 2906 }, { - "epoch": 0.08237694465697526, + "epoch": 0.11374129431097894, "grad_norm": 0.0, - "learning_rate": 1.985651102464691e-05, - "loss": 1.0665, + "learning_rate": 1.963453484496344e-05, + "loss": 1.2626, "step": 2907 }, { - "epoch": 0.08240528209923773, + "epoch": 0.11378042100320838, "grad_norm": 0.0, - "learning_rate": 1.9856356064481902e-05, - "loss": 1.048, + "learning_rate": 1.963419530720106e-05, + "loss": 1.2087, "step": 2908 }, { - "epoch": 0.08243361954150018, + "epoch": 0.11381954769543783, "grad_norm": 0.0, - "learning_rate": 1.985620102129315e-05, - "loss": 1.1371, + "learning_rate": 1.963385561472558e-05, + "loss": 1.1992, "step": 2909 }, { - "epoch": 0.08246195698376264, + "epoch": 0.11385867438766727, "grad_norm": 0.0, - "learning_rate": 1.9856045895081952e-05, - "loss": 1.1315, + "learning_rate": 1.9633515767542448e-05, + "loss": 1.2354, "step": 2910 }, { - "epoch": 0.08249029442602511, + "epoch": 0.11389780107989671, "grad_norm": 0.0, - "learning_rate": 1.985589068584962e-05, - "loss": 1.0065, + "learning_rate": 1.9633175765657125e-05, + "loss": 1.0726, "step": 2911 }, { - "epoch": 0.08251863186828756, + "epoch": 0.11393692777212615, "grad_norm": 0.0, - "learning_rate": 1.9855735393597463e-05, - "loss": 1.0386, + "learning_rate": 1.9632835609075072e-05, + "loss": 1.2561, "step": 2912 }, { - "epoch": 0.08254696931055003, + "epoch": 0.11397605446435559, "grad_norm": 0.0, - "learning_rate": 1.9855580018326786e-05, - "loss": 1.1522, + "learning_rate": 1.963249529780175e-05, + "loss": 1.1329, "step": 2913 }, { - "epoch": 0.0825753067528125, + "epoch": 0.11401518115658502, "grad_norm": 0.0, - "learning_rate": 1.98554245600389e-05, - "loss": 1.133, + "learning_rate": 1.963215483184262e-05, + "loss": 1.2843, "step": 2914 }, { - "epoch": 0.08260364419507495, + "epoch": 0.11405430784881446, "grad_norm": 0.0, - "learning_rate": 1.9855269018735113e-05, - "loss": 1.0239, + "learning_rate": 1.963181421120315e-05, + "loss": 1.2226, "step": 2915 }, { - "epoch": 0.08263198163733741, + "epoch": 0.1140934345410439, "grad_norm": 0.0, - "learning_rate": 1.9855113394416736e-05, - "loss": 1.0728, + "learning_rate": 1.9631473435888822e-05, + "loss": 1.2336, "step": 2916 }, { - "epoch": 0.08266031907959988, + "epoch": 0.11413256123327334, "grad_norm": 0.0, - "learning_rate": 1.9854957687085078e-05, - "loss": 1.0256, + "learning_rate": 1.9631132505905095e-05, + "loss": 1.1706, "step": 2917 }, { - "epoch": 0.08268865652186233, + "epoch": 0.11417168792550278, "grad_norm": 0.0, - "learning_rate": 1.9854801896741453e-05, - "loss": 1.1686, + "learning_rate": 1.9630791421257447e-05, + "loss": 1.3123, "step": 2918 }, { - "epoch": 0.0827169939641248, + "epoch": 0.11421081461773222, "grad_norm": 0.0, - "learning_rate": 1.9854646023387173e-05, - "loss": 1.1617, + "learning_rate": 1.9630450181951362e-05, + "loss": 1.2655, "step": 2919 }, { - "epoch": 0.08274533140638726, + "epoch": 0.11424994130996166, "grad_norm": 0.0, - "learning_rate": 1.9854490067023546e-05, - "loss": 1.0114, + "learning_rate": 1.963010878799231e-05, + "loss": 1.1207, "step": 2920 }, { - "epoch": 0.08277366884864971, + "epoch": 0.11428906800219109, "grad_norm": 0.0, - "learning_rate": 1.9854334027651892e-05, - "loss": 1.0827, + "learning_rate": 1.962976723938578e-05, + "loss": 1.1572, "step": 2921 }, { - "epoch": 0.08280200629091218, + "epoch": 0.11432819469442053, "grad_norm": 0.0, - "learning_rate": 1.9854177905273524e-05, - "loss": 1.1406, + "learning_rate": 1.9629425536137253e-05, + "loss": 1.2174, "step": 2922 }, { - "epoch": 0.08283034373317465, + "epoch": 0.11436732138664997, "grad_norm": 0.0, - "learning_rate": 1.9854021699889756e-05, - "loss": 1.1166, + "learning_rate": 1.9629083678252222e-05, + "loss": 1.2844, "step": 2923 }, { - "epoch": 0.0828586811754371, + "epoch": 0.11440644807887941, "grad_norm": 0.0, - "learning_rate": 1.98538654115019e-05, - "loss": 1.0906, + "learning_rate": 1.962874166573617e-05, + "loss": 1.2619, "step": 2924 }, { - "epoch": 0.08288701861769956, + "epoch": 0.11444557477110885, "grad_norm": 0.0, - "learning_rate": 1.9853709040111283e-05, - "loss": 1.1975, + "learning_rate": 1.962839949859459e-05, + "loss": 1.2829, "step": 2925 }, { - "epoch": 0.08291535605996203, + "epoch": 0.11448470146333829, "grad_norm": 0.0, - "learning_rate": 1.985355258571921e-05, - "loss": 1.007, + "learning_rate": 1.9628057176832986e-05, + "loss": 1.2003, "step": 2926 }, { - "epoch": 0.08294369350222448, + "epoch": 0.11452382815556773, "grad_norm": 0.0, - "learning_rate": 1.985339604832701e-05, - "loss": 1.1346, + "learning_rate": 1.9627714700456844e-05, + "loss": 1.238, "step": 2927 }, { - "epoch": 0.08297203094448695, + "epoch": 0.11456295484779716, "grad_norm": 0.0, - "learning_rate": 1.985323942793599e-05, - "loss": 1.1358, + "learning_rate": 1.9627372069471668e-05, + "loss": 1.2435, "step": 2928 }, { - "epoch": 0.08300036838674942, + "epoch": 0.1146020815400266, "grad_norm": 0.0, - "learning_rate": 1.9853082724547482e-05, - "loss": 1.0165, + "learning_rate": 1.962702928388296e-05, + "loss": 1.2025, "step": 2929 }, { - "epoch": 0.08302870582901187, + "epoch": 0.11464120823225604, "grad_norm": 0.0, - "learning_rate": 1.9852925938162795e-05, - "loss": 1.1275, + "learning_rate": 1.9626686343696227e-05, + "loss": 1.2721, "step": 2930 }, { - "epoch": 0.08305704327127433, + "epoch": 0.11468033492448548, "grad_norm": 0.0, - "learning_rate": 1.985276906878326e-05, - "loss": 1.1423, + "learning_rate": 1.9626343248916972e-05, + "loss": 1.25, "step": 2931 }, { - "epoch": 0.0830853807135368, + "epoch": 0.11471946161671492, "grad_norm": 0.0, - "learning_rate": 1.9852612116410184e-05, - "loss": 1.0939, + "learning_rate": 1.9625999999550708e-05, + "loss": 1.2314, "step": 2932 }, { - "epoch": 0.08311371815579925, + "epoch": 0.11475858830894436, "grad_norm": 0.0, - "learning_rate": 1.9852455081044902e-05, - "loss": 1.0115, + "learning_rate": 1.9625656595602947e-05, + "loss": 1.2749, "step": 2933 }, { - "epoch": 0.08314205559806172, + "epoch": 0.1147977150011738, "grad_norm": 0.0, - "learning_rate": 1.985229796268873e-05, - "loss": 1.0919, + "learning_rate": 1.96253130370792e-05, + "loss": 1.2615, "step": 2934 }, { - "epoch": 0.08317039304032418, + "epoch": 0.11483684169340325, "grad_norm": 0.0, - "learning_rate": 1.9852140761342997e-05, - "loss": 1.0871, + "learning_rate": 1.9624969323984994e-05, + "loss": 1.2669, "step": 2935 }, { - "epoch": 0.08319873048258664, + "epoch": 0.11487596838563267, "grad_norm": 0.0, - "learning_rate": 1.985198347700902e-05, - "loss": 1.1105, + "learning_rate": 1.962462545632583e-05, + "loss": 1.0994, "step": 2936 }, { - "epoch": 0.0832270679248491, + "epoch": 0.11491509507786211, "grad_norm": 0.0, - "learning_rate": 1.985182610968813e-05, - "loss": 0.9424, + "learning_rate": 1.962428143410725e-05, + "loss": 1.2295, "step": 2937 }, { - "epoch": 0.08325540536711157, + "epoch": 0.11495422177009156, "grad_norm": 0.0, - "learning_rate": 1.985166865938165e-05, - "loss": 1.0992, + "learning_rate": 1.9623937257334767e-05, + "loss": 1.2246, "step": 2938 }, { - "epoch": 0.08328374280937402, + "epoch": 0.114993348462321, "grad_norm": 0.0, - "learning_rate": 1.9851511126090908e-05, - "loss": 1.0424, + "learning_rate": 1.9623592926013915e-05, + "loss": 1.3218, "step": 2939 }, { - "epoch": 0.08331208025163649, + "epoch": 0.11503247515455044, "grad_norm": 0.0, - "learning_rate": 1.9851353509817228e-05, - "loss": 1.2083, + "learning_rate": 1.9623248440150212e-05, + "loss": 1.222, "step": 2940 }, { - "epoch": 0.08334041769389895, + "epoch": 0.11507160184677988, "grad_norm": 0.0, - "learning_rate": 1.9851195810561935e-05, - "loss": 1.105, + "learning_rate": 1.9622903799749203e-05, + "loss": 1.3933, "step": 2941 }, { - "epoch": 0.0833687551361614, + "epoch": 0.11511072853900932, "grad_norm": 0.0, - "learning_rate": 1.9851038028326368e-05, - "loss": 1.0077, + "learning_rate": 1.9622559004816418e-05, + "loss": 1.1322, "step": 2942 }, { - "epoch": 0.08339709257842387, + "epoch": 0.11514985523123875, "grad_norm": 0.0, - "learning_rate": 1.985088016311185e-05, - "loss": 1.203, + "learning_rate": 1.9622214055357393e-05, + "loss": 1.1219, "step": 2943 }, { - "epoch": 0.08342543002068634, + "epoch": 0.11518898192346819, "grad_norm": 0.0, - "learning_rate": 1.9850722214919706e-05, - "loss": 1.0995, + "learning_rate": 1.9621868951377664e-05, + "loss": 1.227, "step": 2944 }, { - "epoch": 0.08345376746294879, + "epoch": 0.11522810861569763, "grad_norm": 0.0, - "learning_rate": 1.985056418375127e-05, - "loss": 1.1053, + "learning_rate": 1.962152369288278e-05, + "loss": 1.1367, "step": 2945 }, { - "epoch": 0.08348210490521125, + "epoch": 0.11526723530792707, "grad_norm": 0.0, - "learning_rate": 1.985040606960788e-05, - "loss": 1.1217, + "learning_rate": 1.9621178279878277e-05, + "loss": 1.2072, "step": 2946 }, { - "epoch": 0.08351044234747372, + "epoch": 0.11530636200015651, "grad_norm": 0.0, - "learning_rate": 1.9850247872490855e-05, - "loss": 1.0989, + "learning_rate": 1.9620832712369712e-05, + "loss": 1.2796, "step": 2947 }, { - "epoch": 0.08353877978973617, + "epoch": 0.11534548869238595, "grad_norm": 0.0, - "learning_rate": 1.985008959240154e-05, - "loss": 1.001, + "learning_rate": 1.962048699036263e-05, + "loss": 1.1904, "step": 2948 }, { - "epoch": 0.08356711723199864, + "epoch": 0.11538461538461539, "grad_norm": 0.0, - "learning_rate": 1.9849931229341258e-05, - "loss": 1.1493, + "learning_rate": 1.9620141113862578e-05, + "loss": 1.1425, "step": 2949 }, { - "epoch": 0.0835954546742611, + "epoch": 0.11542374207684482, "grad_norm": 0.0, - "learning_rate": 1.984977278331135e-05, - "loss": 1.1741, + "learning_rate": 1.9619795082875118e-05, + "loss": 1.2401, "step": 2950 }, { - "epoch": 0.08362379211652356, + "epoch": 0.11546286876907426, "grad_norm": 0.0, - "learning_rate": 1.9849614254313147e-05, - "loss": 1.1618, + "learning_rate": 1.96194488974058e-05, + "loss": 1.2546, "step": 2951 }, { - "epoch": 0.08365212955878602, + "epoch": 0.1155019954613037, "grad_norm": 0.0, - "learning_rate": 1.984945564234799e-05, - "loss": 1.0872, + "learning_rate": 1.9619102557460188e-05, + "loss": 1.2434, "step": 2952 }, { - "epoch": 0.08368046700104849, + "epoch": 0.11554112215353314, "grad_norm": 0.0, - "learning_rate": 1.9849296947417206e-05, - "loss": 1.1373, + "learning_rate": 1.961875606304384e-05, + "loss": 1.1864, "step": 2953 }, { - "epoch": 0.08370880444331094, + "epoch": 0.11558024884576258, "grad_norm": 0.0, - "learning_rate": 1.984913816952214e-05, - "loss": 1.02, + "learning_rate": 1.9618409414162326e-05, + "loss": 1.3434, "step": 2954 }, { - "epoch": 0.08373714188557341, + "epoch": 0.11561937553799202, "grad_norm": 0.0, - "learning_rate": 1.984897930866412e-05, - "loss": 1.0572, + "learning_rate": 1.961806261082121e-05, + "loss": 1.2977, "step": 2955 }, { - "epoch": 0.08376547932783587, + "epoch": 0.11565850223022146, "grad_norm": 0.0, - "learning_rate": 1.9848820364844494e-05, - "loss": 1.1358, + "learning_rate": 1.9617715653026056e-05, + "loss": 1.3497, "step": 2956 }, { - "epoch": 0.08379381677009833, + "epoch": 0.11569762892245089, "grad_norm": 0.0, - "learning_rate": 1.9848661338064597e-05, - "loss": 1.04, + "learning_rate": 1.9617368540782444e-05, + "loss": 1.0754, "step": 2957 }, { - "epoch": 0.08382215421236079, + "epoch": 0.11573675561468033, "grad_norm": 0.0, - "learning_rate": 1.9848502228325764e-05, - "loss": 1.0859, + "learning_rate": 1.9617021274095945e-05, + "loss": 1.261, "step": 2958 }, { - "epoch": 0.08385049165462326, + "epoch": 0.11577588230690977, "grad_norm": 0.0, - "learning_rate": 1.9848343035629345e-05, - "loss": 0.9892, + "learning_rate": 1.9616673852972133e-05, + "loss": 1.265, "step": 2959 }, { - "epoch": 0.08387882909688571, + "epoch": 0.11581500899913921, "grad_norm": 0.0, - "learning_rate": 1.9848183759976674e-05, - "loss": 0.9995, + "learning_rate": 1.9616326277416587e-05, + "loss": 1.1463, "step": 2960 }, { - "epoch": 0.08390716653914818, + "epoch": 0.11585413569136865, "grad_norm": 0.0, - "learning_rate": 1.9848024401369092e-05, - "loss": 1.0896, + "learning_rate": 1.9615978547434896e-05, + "loss": 1.1737, "step": 2961 }, { - "epoch": 0.08393550398141064, + "epoch": 0.1158932623835981, "grad_norm": 0.0, - "learning_rate": 1.9847864959807946e-05, - "loss": 1.0728, + "learning_rate": 1.9615630663032635e-05, + "loss": 1.1252, "step": 2962 }, { - "epoch": 0.0839638414236731, + "epoch": 0.11593238907582754, "grad_norm": 0.0, - "learning_rate": 1.9847705435294575e-05, - "loss": 1.0055, + "learning_rate": 1.9615282624215397e-05, + "loss": 1.2376, "step": 2963 }, { - "epoch": 0.08399217886593556, + "epoch": 0.11597151576805696, "grad_norm": 0.0, - "learning_rate": 1.9847545827830327e-05, - "loss": 1.0578, + "learning_rate": 1.961493443098877e-05, + "loss": 1.1813, "step": 2964 }, { - "epoch": 0.08402051630819803, + "epoch": 0.1160106424602864, "grad_norm": 0.0, - "learning_rate": 1.984738613741654e-05, - "loss": 1.0571, + "learning_rate": 1.961458608335834e-05, + "loss": 1.2247, "step": 2965 }, { - "epoch": 0.08404885375046048, + "epoch": 0.11604976915251584, "grad_norm": 0.0, - "learning_rate": 1.9847226364054567e-05, - "loss": 1.0249, + "learning_rate": 1.9614237581329707e-05, + "loss": 1.2148, "step": 2966 }, { - "epoch": 0.08407719119272294, + "epoch": 0.11608889584474529, "grad_norm": 0.0, - "learning_rate": 1.9847066507745744e-05, - "loss": 1.1222, + "learning_rate": 1.9613888924908468e-05, + "loss": 1.2306, "step": 2967 }, { - "epoch": 0.08410552863498541, + "epoch": 0.11612802253697473, "grad_norm": 0.0, - "learning_rate": 1.984690656849143e-05, - "loss": 1.0893, + "learning_rate": 1.9613540114100214e-05, + "loss": 1.1149, "step": 2968 }, { - "epoch": 0.08413386607724786, + "epoch": 0.11616714922920417, "grad_norm": 0.0, - "learning_rate": 1.9846746546292958e-05, - "loss": 1.032, + "learning_rate": 1.961319114891056e-05, + "loss": 1.322, "step": 2969 }, { - "epoch": 0.08416220351951033, + "epoch": 0.11620627592143361, "grad_norm": 0.0, - "learning_rate": 1.984658644115169e-05, - "loss": 1.0661, + "learning_rate": 1.96128420293451e-05, + "loss": 1.2139, "step": 2970 }, { - "epoch": 0.0841905409617728, + "epoch": 0.11624540261366303, "grad_norm": 0.0, - "learning_rate": 1.9846426253068963e-05, - "loss": 1.1204, + "learning_rate": 1.961249275540944e-05, + "loss": 1.2635, "step": 2971 }, { - "epoch": 0.08421887840403525, + "epoch": 0.11628452930589248, "grad_norm": 0.0, - "learning_rate": 1.9846265982046134e-05, - "loss": 0.9626, + "learning_rate": 1.961214332710919e-05, + "loss": 1.2184, "step": 2972 }, { - "epoch": 0.08424721584629771, + "epoch": 0.11632365599812192, "grad_norm": 0.0, - "learning_rate": 1.9846105628084553e-05, - "loss": 1.1655, + "learning_rate": 1.9611793744449964e-05, + "loss": 1.2052, "step": 2973 }, { - "epoch": 0.08427555328856018, + "epoch": 0.11636278269035136, "grad_norm": 0.0, - "learning_rate": 1.9845945191185564e-05, - "loss": 1.0271, + "learning_rate": 1.961144400743738e-05, + "loss": 1.1665, "step": 2974 }, { - "epoch": 0.08430389073082263, + "epoch": 0.1164019093825808, "grad_norm": 0.0, - "learning_rate": 1.984578467135052e-05, - "loss": 1.152, + "learning_rate": 1.9611094116077042e-05, + "loss": 1.2104, "step": 2975 }, { - "epoch": 0.0843322281730851, + "epoch": 0.11644103607481024, "grad_norm": 0.0, - "learning_rate": 1.984562406858078e-05, - "loss": 1.0253, + "learning_rate": 1.9610744070374583e-05, + "loss": 1.2936, "step": 2976 }, { - "epoch": 0.08436056561534756, + "epoch": 0.11648016276703968, "grad_norm": 0.0, - "learning_rate": 1.984546338287769e-05, - "loss": 1.1982, + "learning_rate": 1.961039387033561e-05, + "loss": 1.1256, "step": 2977 }, { - "epoch": 0.08438890305761002, + "epoch": 0.11651928945926911, "grad_norm": 0.0, - "learning_rate": 1.9845302614242608e-05, - "loss": 1.0363, + "learning_rate": 1.9610043515965757e-05, + "loss": 1.3054, "step": 2978 }, { - "epoch": 0.08441724049987248, + "epoch": 0.11655841615149855, "grad_norm": 0.0, - "learning_rate": 1.9845141762676885e-05, - "loss": 1.0858, + "learning_rate": 1.9609693007270647e-05, + "loss": 1.1966, "step": 2979 }, { - "epoch": 0.08444557794213495, + "epoch": 0.11659754284372799, "grad_norm": 0.0, - "learning_rate": 1.9844980828181876e-05, - "loss": 1.1335, + "learning_rate": 1.960934234425591e-05, + "loss": 1.1532, "step": 2980 }, { - "epoch": 0.0844739153843974, + "epoch": 0.11663666953595743, "grad_norm": 0.0, - "learning_rate": 1.984481981075894e-05, - "loss": 1.199, + "learning_rate": 1.9608991526927176e-05, + "loss": 1.1965, "step": 2981 }, { - "epoch": 0.08450225282665987, + "epoch": 0.11667579622818687, "grad_norm": 0.0, - "learning_rate": 1.9844658710409428e-05, - "loss": 1.0612, + "learning_rate": 1.9608640555290077e-05, + "loss": 1.2074, "step": 2982 }, { - "epoch": 0.08453059026892233, + "epoch": 0.11671492292041631, "grad_norm": 0.0, - "learning_rate": 1.9844497527134703e-05, - "loss": 1.0569, + "learning_rate": 1.960828942935025e-05, + "loss": 1.2655, "step": 2983 }, { - "epoch": 0.08455892771118478, + "epoch": 0.11675404961264575, "grad_norm": 0.0, - "learning_rate": 1.984433626093612e-05, - "loss": 1.1121, + "learning_rate": 1.960793814911334e-05, + "loss": 1.2532, "step": 2984 }, { - "epoch": 0.08458726515344725, + "epoch": 0.11679317630487518, "grad_norm": 0.0, - "learning_rate": 1.9844174911815034e-05, - "loss": 1.1471, + "learning_rate": 1.9607586714584977e-05, + "loss": 1.174, "step": 2985 }, { - "epoch": 0.08461560259570972, + "epoch": 0.11683230299710462, "grad_norm": 0.0, - "learning_rate": 1.9844013479772808e-05, - "loss": 1.0953, + "learning_rate": 1.9607235125770816e-05, + "loss": 1.3483, "step": 2986 }, { - "epoch": 0.08464394003797217, + "epoch": 0.11687142968933406, "grad_norm": 0.0, - "learning_rate": 1.98438519648108e-05, - "loss": 1.0705, + "learning_rate": 1.9606883382676493e-05, + "loss": 1.2077, "step": 2987 }, { - "epoch": 0.08467227748023463, + "epoch": 0.1169105563815635, "grad_norm": 0.0, - "learning_rate": 1.9843690366930374e-05, - "loss": 1.0753, + "learning_rate": 1.9606531485307664e-05, + "loss": 1.2349, "step": 2988 }, { - "epoch": 0.0847006149224971, + "epoch": 0.11694968307379294, "grad_norm": 0.0, - "learning_rate": 1.984352868613289e-05, - "loss": 1.1442, + "learning_rate": 1.960617943366997e-05, + "loss": 1.2697, "step": 2989 }, { - "epoch": 0.08472895236475955, + "epoch": 0.11698880976602238, "grad_norm": 0.0, - "learning_rate": 1.9843366922419704e-05, - "loss": 0.9807, + "learning_rate": 1.960582722776908e-05, + "loss": 1.1312, "step": 2990 }, { - "epoch": 0.08475728980702202, + "epoch": 0.11702793645825182, "grad_norm": 0.0, - "learning_rate": 1.9843205075792187e-05, - "loss": 1.094, + "learning_rate": 1.9605474867610636e-05, + "loss": 1.2018, "step": 2991 }, { - "epoch": 0.08478562724928448, + "epoch": 0.11706706315048125, "grad_norm": 0.0, - "learning_rate": 1.9843043146251698e-05, - "loss": 1.0728, + "learning_rate": 1.9605122353200308e-05, + "loss": 1.2532, "step": 2992 }, { - "epoch": 0.08481396469154694, + "epoch": 0.11710618984271069, "grad_norm": 0.0, - "learning_rate": 1.98428811337996e-05, - "loss": 1.0352, + "learning_rate": 1.9604769684543745e-05, + "loss": 1.1836, "step": 2993 }, { - "epoch": 0.0848423021338094, + "epoch": 0.11714531653494013, "grad_norm": 0.0, - "learning_rate": 1.984271903843726e-05, - "loss": 1.0453, + "learning_rate": 1.960441686164662e-05, + "loss": 0.9823, "step": 2994 }, { - "epoch": 0.08487063957607187, + "epoch": 0.11718444322716957, "grad_norm": 0.0, - "learning_rate": 1.984255686016604e-05, - "loss": 0.979, + "learning_rate": 1.9604063884514592e-05, + "loss": 1.1428, "step": 2995 }, { - "epoch": 0.08489897701833432, + "epoch": 0.11722356991939901, "grad_norm": 0.0, - "learning_rate": 1.9842394598987313e-05, - "loss": 1.1074, + "learning_rate": 1.9603710753153335e-05, + "loss": 1.3118, "step": 2996 }, { - "epoch": 0.08492731446059679, + "epoch": 0.11726269661162846, "grad_norm": 0.0, - "learning_rate": 1.9842232254902435e-05, - "loss": 1.1525, + "learning_rate": 1.9603357467568514e-05, + "loss": 1.2168, "step": 2997 }, { - "epoch": 0.08495565190285925, + "epoch": 0.1173018233038579, "grad_norm": 0.0, - "learning_rate": 1.9842069827912787e-05, - "loss": 1.1325, + "learning_rate": 1.960300402776581e-05, + "loss": 1.1177, "step": 2998 }, { - "epoch": 0.0849839893451217, + "epoch": 0.11734094999608734, "grad_norm": 0.0, - "learning_rate": 1.9841907318019726e-05, - "loss": 1.1185, + "learning_rate": 1.9602650433750893e-05, + "loss": 1.3507, "step": 2999 }, { - "epoch": 0.08501232678738417, + "epoch": 0.11738007668831676, "grad_norm": 0.0, - "learning_rate": 1.9841744725224626e-05, - "loss": 0.9764, + "learning_rate": 1.9602296685529442e-05, + "loss": 1.3184, "step": 3000 }, { - "epoch": 0.08504066422964664, + "epoch": 0.1174192033805462, "grad_norm": 0.0, - "learning_rate": 1.9841582049528856e-05, - "loss": 1.1576, + "learning_rate": 1.9601942783107138e-05, + "loss": 1.23, "step": 3001 }, { - "epoch": 0.08506900167190909, + "epoch": 0.11745833007277565, "grad_norm": 0.0, - "learning_rate": 1.9841419290933786e-05, - "loss": 1.1029, + "learning_rate": 1.960158872648967e-05, + "loss": 1.2313, "step": 3002 }, { - "epoch": 0.08509733911417156, + "epoch": 0.11749745676500509, "grad_norm": 0.0, - "learning_rate": 1.9841256449440783e-05, - "loss": 1.0123, + "learning_rate": 1.9601234515682712e-05, + "loss": 1.0298, "step": 3003 }, { - "epoch": 0.08512567655643402, + "epoch": 0.11753658345723453, "grad_norm": 0.0, - "learning_rate": 1.9841093525051227e-05, - "loss": 1.1049, + "learning_rate": 1.960088015069196e-05, + "loss": 1.2634, "step": 3004 }, { - "epoch": 0.08515401399869647, + "epoch": 0.11757571014946397, "grad_norm": 0.0, - "learning_rate": 1.9840930517766484e-05, - "loss": 1.0391, + "learning_rate": 1.9600525631523108e-05, + "loss": 1.2368, "step": 3005 }, { - "epoch": 0.08518235144095894, + "epoch": 0.11761483684169341, "grad_norm": 0.0, - "learning_rate": 1.9840767427587932e-05, - "loss": 1.0722, + "learning_rate": 1.9600170958181838e-05, + "loss": 1.2236, "step": 3006 }, { - "epoch": 0.0852106888832214, + "epoch": 0.11765396353392284, "grad_norm": 0.0, - "learning_rate": 1.984060425451694e-05, - "loss": 1.0992, + "learning_rate": 1.959981613067386e-05, + "loss": 1.2992, "step": 3007 }, { - "epoch": 0.08523902632548386, + "epoch": 0.11769309022615228, "grad_norm": 0.0, - "learning_rate": 1.9840440998554883e-05, - "loss": 0.9975, + "learning_rate": 1.9599461149004857e-05, + "loss": 1.2113, "step": 3008 }, { - "epoch": 0.08526736376774632, + "epoch": 0.11773221691838172, "grad_norm": 0.0, - "learning_rate": 1.9840277659703138e-05, - "loss": 1.1404, + "learning_rate": 1.959910601318054e-05, + "loss": 1.1097, "step": 3009 }, { - "epoch": 0.08529570121000879, + "epoch": 0.11777134361061116, "grad_norm": 0.0, - "learning_rate": 1.9840114237963076e-05, - "loss": 1.107, + "learning_rate": 1.9598750723206606e-05, + "loss": 1.3312, "step": 3010 }, { - "epoch": 0.08532403865227124, + "epoch": 0.1178104703028406, "grad_norm": 0.0, - "learning_rate": 1.9839950733336084e-05, - "loss": 1.0539, + "learning_rate": 1.9598395279088765e-05, + "loss": 1.1579, "step": 3011 }, { - "epoch": 0.08535237609453371, + "epoch": 0.11784959699507004, "grad_norm": 0.0, - "learning_rate": 1.983978714582353e-05, - "loss": 1.1037, + "learning_rate": 1.9598039680832724e-05, + "loss": 1.2227, "step": 3012 }, { - "epoch": 0.08538071353679617, + "epoch": 0.11788872368729948, "grad_norm": 0.0, - "learning_rate": 1.9839623475426795e-05, - "loss": 1.0165, + "learning_rate": 1.9597683928444195e-05, + "loss": 1.1776, "step": 3013 }, { - "epoch": 0.08540905097905863, + "epoch": 0.11792785037952891, "grad_norm": 0.0, - "learning_rate": 1.983945972214726e-05, - "loss": 1.172, + "learning_rate": 1.9597328021928886e-05, + "loss": 1.1709, "step": 3014 }, { - "epoch": 0.08543738842132109, + "epoch": 0.11796697707175835, "grad_norm": 0.0, - "learning_rate": 1.98392958859863e-05, - "loss": 1.0317, + "learning_rate": 1.9596971961292514e-05, + "loss": 1.1575, "step": 3015 }, { - "epoch": 0.08546572586358356, + "epoch": 0.11800610376398779, "grad_norm": 0.0, - "learning_rate": 1.9839131966945297e-05, - "loss": 1.1346, + "learning_rate": 1.9596615746540798e-05, + "loss": 1.1876, "step": 3016 }, { - "epoch": 0.08549406330584601, + "epoch": 0.11804523045621723, "grad_norm": 0.0, - "learning_rate": 1.9838967965025627e-05, - "loss": 1.1731, + "learning_rate": 1.959625937767946e-05, + "loss": 1.2363, "step": 3017 }, { - "epoch": 0.08552240074810848, + "epoch": 0.11808435714844667, "grad_norm": 0.0, - "learning_rate": 1.9838803880228682e-05, - "loss": 1.0384, + "learning_rate": 1.959590285471422e-05, + "loss": 1.1892, "step": 3018 }, { - "epoch": 0.08555073819037094, + "epoch": 0.11812348384067611, "grad_norm": 0.0, - "learning_rate": 1.9838639712555842e-05, - "loss": 1.1313, + "learning_rate": 1.9595546177650807e-05, + "loss": 1.2354, "step": 3019 }, { - "epoch": 0.0855790756326334, + "epoch": 0.11816261053290555, "grad_norm": 0.0, - "learning_rate": 1.9838475462008478e-05, - "loss": 1.02, + "learning_rate": 1.9595189346494943e-05, + "loss": 1.1965, "step": 3020 }, { - "epoch": 0.08560741307489586, + "epoch": 0.11820173722513498, "grad_norm": 0.0, - "learning_rate": 1.9838311128587987e-05, - "loss": 1.0097, + "learning_rate": 1.9594832361252364e-05, + "loss": 1.1741, "step": 3021 }, { - "epoch": 0.08563575051715833, + "epoch": 0.11824086391736442, "grad_norm": 0.0, - "learning_rate": 1.9838146712295747e-05, - "loss": 1.0354, + "learning_rate": 1.9594475221928797e-05, + "loss": 1.1715, "step": 3022 }, { - "epoch": 0.08566408795942078, + "epoch": 0.11827999060959386, "grad_norm": 0.0, - "learning_rate": 1.9837982213133144e-05, - "loss": 0.9756, + "learning_rate": 1.9594117928529984e-05, + "loss": 1.2479, "step": 3023 }, { - "epoch": 0.08569242540168324, + "epoch": 0.1183191173018233, "grad_norm": 0.0, - "learning_rate": 1.983781763110156e-05, - "loss": 1.0657, + "learning_rate": 1.9593760481061655e-05, + "loss": 1.1636, "step": 3024 }, { - "epoch": 0.08572076284394571, + "epoch": 0.11835824399405274, "grad_norm": 0.0, - "learning_rate": 1.983765296620239e-05, - "loss": 1.0364, + "learning_rate": 1.959340287952956e-05, + "loss": 1.1867, "step": 3025 }, { - "epoch": 0.08574910028620816, + "epoch": 0.11839737068628219, "grad_norm": 0.0, - "learning_rate": 1.9837488218437012e-05, - "loss": 1.0583, + "learning_rate": 1.9593045123939433e-05, + "loss": 1.205, "step": 3026 }, { - "epoch": 0.08577743772847063, + "epoch": 0.11843649737851163, "grad_norm": 0.0, - "learning_rate": 1.983732338780682e-05, - "loss": 1.0968, + "learning_rate": 1.9592687214297022e-05, + "loss": 1.222, "step": 3027 }, { - "epoch": 0.0858057751707331, + "epoch": 0.11847562407074105, "grad_norm": 0.0, - "learning_rate": 1.9837158474313198e-05, - "loss": 1.1124, + "learning_rate": 1.9592329150608074e-05, + "loss": 1.2007, "step": 3028 }, { - "epoch": 0.08583411261299555, + "epoch": 0.1185147507629705, "grad_norm": 0.0, - "learning_rate": 1.983699347795754e-05, - "loss": 1.0727, + "learning_rate": 1.959197093287834e-05, + "loss": 1.2657, "step": 3029 }, { - "epoch": 0.08586245005525801, + "epoch": 0.11855387745519994, "grad_norm": 0.0, - "learning_rate": 1.9836828398741234e-05, - "loss": 1.0477, + "learning_rate": 1.9591612561113574e-05, + "loss": 1.2141, "step": 3030 }, { - "epoch": 0.08589078749752048, + "epoch": 0.11859300414742938, "grad_norm": 0.0, - "learning_rate": 1.9836663236665666e-05, - "loss": 1.1523, + "learning_rate": 1.9591254035319526e-05, + "loss": 1.2358, "step": 3031 }, { - "epoch": 0.08591912493978293, + "epoch": 0.11863213083965882, "grad_norm": 0.0, - "learning_rate": 1.983649799173223e-05, - "loss": 1.0283, + "learning_rate": 1.959089535550196e-05, + "loss": 1.1337, "step": 3032 }, { - "epoch": 0.0859474623820454, + "epoch": 0.11867125753188826, "grad_norm": 0.0, - "learning_rate": 1.9836332663942323e-05, - "loss": 1.2161, + "learning_rate": 1.9590536521666633e-05, + "loss": 1.1425, "step": 3033 }, { - "epoch": 0.08597579982430786, + "epoch": 0.1187103842241177, "grad_norm": 0.0, - "learning_rate": 1.983616725329733e-05, - "loss": 1.047, + "learning_rate": 1.9590177533819304e-05, + "loss": 1.1985, "step": 3034 }, { - "epoch": 0.08600413726657032, + "epoch": 0.11874951091634713, "grad_norm": 0.0, - "learning_rate": 1.9836001759798647e-05, - "loss": 1.1454, + "learning_rate": 1.9589818391965742e-05, + "loss": 1.1699, "step": 3035 }, { - "epoch": 0.08603247470883278, + "epoch": 0.11878863760857657, "grad_norm": 0.0, - "learning_rate": 1.983583618344767e-05, - "loss": 1.0008, + "learning_rate": 1.9589459096111714e-05, + "loss": 1.1105, "step": 3036 }, { - "epoch": 0.08606081215109525, + "epoch": 0.11882776430080601, "grad_norm": 0.0, - "learning_rate": 1.9835670524245793e-05, - "loss": 1.1606, + "learning_rate": 1.958909964626299e-05, + "loss": 1.1913, "step": 3037 }, { - "epoch": 0.0860891495933577, + "epoch": 0.11886689099303545, "grad_norm": 0.0, - "learning_rate": 1.9835504782194413e-05, - "loss": 1.0931, + "learning_rate": 1.958874004242534e-05, + "loss": 1.1781, "step": 3038 }, { - "epoch": 0.08611748703562017, + "epoch": 0.11890601768526489, "grad_norm": 0.0, - "learning_rate": 1.983533895729492e-05, - "loss": 0.9839, + "learning_rate": 1.9588380284604543e-05, + "loss": 1.2075, "step": 3039 }, { - "epoch": 0.08614582447788263, + "epoch": 0.11894514437749433, "grad_norm": 0.0, - "learning_rate": 1.9835173049548716e-05, - "loss": 1.176, + "learning_rate": 1.958802037280637e-05, + "loss": 1.254, "step": 3040 }, { - "epoch": 0.08617416192014508, + "epoch": 0.11898427106972377, "grad_norm": 0.0, - "learning_rate": 1.98350070589572e-05, - "loss": 1.0373, + "learning_rate": 1.9587660307036605e-05, + "loss": 1.1544, "step": 3041 }, { - "epoch": 0.08620249936240755, + "epoch": 0.1190233977619532, "grad_norm": 0.0, - "learning_rate": 1.9834840985521765e-05, - "loss": 1.0749, + "learning_rate": 1.958730008730103e-05, + "loss": 1.2151, "step": 3042 }, { - "epoch": 0.08623083680467002, + "epoch": 0.11906252445418264, "grad_norm": 0.0, - "learning_rate": 1.9834674829243813e-05, - "loss": 1.1319, + "learning_rate": 1.9586939713605428e-05, + "loss": 1.1163, "step": 3043 }, { - "epoch": 0.08625917424693247, + "epoch": 0.11910165114641208, "grad_norm": 0.0, - "learning_rate": 1.983450859012474e-05, - "loss": 1.1582, + "learning_rate": 1.958657918595559e-05, + "loss": 1.3146, "step": 3044 }, { - "epoch": 0.08628751168919493, + "epoch": 0.11914077783864152, "grad_norm": 0.0, - "learning_rate": 1.983434226816595e-05, - "loss": 1.1161, + "learning_rate": 1.95862185043573e-05, + "loss": 1.1325, "step": 3045 }, { - "epoch": 0.0863158491314574, + "epoch": 0.11917990453087096, "grad_norm": 0.0, - "learning_rate": 1.9834175863368847e-05, - "loss": 1.047, + "learning_rate": 1.9585857668816355e-05, + "loss": 1.2328, "step": 3046 }, { - "epoch": 0.08634418657371985, + "epoch": 0.1192190312231004, "grad_norm": 0.0, - "learning_rate": 1.9834009375734825e-05, - "loss": 0.9932, + "learning_rate": 1.9585496679338547e-05, + "loss": 1.2382, "step": 3047 }, { - "epoch": 0.08637252401598232, + "epoch": 0.11925815791532984, "grad_norm": 0.0, - "learning_rate": 1.9833842805265293e-05, - "loss": 0.9427, + "learning_rate": 1.9585135535929674e-05, + "loss": 1.1025, "step": 3048 }, { - "epoch": 0.08640086145824478, + "epoch": 0.11929728460755927, "grad_norm": 0.0, - "learning_rate": 1.9833676151961648e-05, - "loss": 1.0727, + "learning_rate": 1.9584774238595535e-05, + "loss": 1.0647, "step": 3049 }, { - "epoch": 0.08642919890050724, + "epoch": 0.11933641129978871, "grad_norm": 0.0, - "learning_rate": 1.98335094158253e-05, - "loss": 1.0862, + "learning_rate": 1.958441278734193e-05, + "loss": 1.143, "step": 3050 }, { - "epoch": 0.0864575363427697, + "epoch": 0.11937553799201815, "grad_norm": 0.0, - "learning_rate": 1.983334259685765e-05, - "loss": 1.0276, + "learning_rate": 1.958405118217467e-05, + "loss": 1.2031, "step": 3051 }, { - "epoch": 0.08648587378503217, + "epoch": 0.11941466468424759, "grad_norm": 0.0, - "learning_rate": 1.9833175695060102e-05, - "loss": 1.1306, + "learning_rate": 1.958368942309955e-05, + "loss": 1.2022, "step": 3052 }, { - "epoch": 0.08651421122729462, + "epoch": 0.11945379137647703, "grad_norm": 0.0, - "learning_rate": 1.9833008710434068e-05, - "loss": 1.1186, + "learning_rate": 1.9583327510122397e-05, + "loss": 1.2994, "step": 3053 }, { - "epoch": 0.08654254866955709, + "epoch": 0.11949291806870647, "grad_norm": 0.0, - "learning_rate": 1.9832841642980948e-05, - "loss": 1.0201, + "learning_rate": 1.9582965443249007e-05, + "loss": 1.231, "step": 3054 }, { - "epoch": 0.08657088611181955, + "epoch": 0.11953204476093592, "grad_norm": 0.0, - "learning_rate": 1.9832674492702148e-05, - "loss": 1.1393, + "learning_rate": 1.95826032224852e-05, + "loss": 1.2745, "step": 3055 }, { - "epoch": 0.086599223554082, + "epoch": 0.11957117145316536, "grad_norm": 0.0, - "learning_rate": 1.9832507259599084e-05, - "loss": 1.0459, + "learning_rate": 1.9582240847836795e-05, + "loss": 1.1747, "step": 3056 }, { - "epoch": 0.08662756099634447, + "epoch": 0.11961029814539478, "grad_norm": 0.0, - "learning_rate": 1.983233994367316e-05, - "loss": 1.1187, + "learning_rate": 1.9581878319309608e-05, + "loss": 1.1562, "step": 3057 }, { - "epoch": 0.08665589843860694, + "epoch": 0.11964942483762422, "grad_norm": 0.0, - "learning_rate": 1.983217254492578e-05, - "loss": 1.0966, + "learning_rate": 1.958151563690946e-05, + "loss": 1.1288, "step": 3058 }, { - "epoch": 0.08668423588086939, + "epoch": 0.11968855152985367, "grad_norm": 0.0, - "learning_rate": 1.9832005063358366e-05, - "loss": 1.0113, + "learning_rate": 1.958115280064218e-05, + "loss": 1.1932, "step": 3059 }, { - "epoch": 0.08671257332313186, + "epoch": 0.1197276782220831, "grad_norm": 0.0, - "learning_rate": 1.983183749897232e-05, - "loss": 0.9979, + "learning_rate": 1.958078981051359e-05, + "loss": 1.2739, "step": 3060 }, { - "epoch": 0.08674091076539432, + "epoch": 0.11976680491431255, "grad_norm": 0.0, - "learning_rate": 1.9831669851769054e-05, - "loss": 1.0941, + "learning_rate": 1.9580426666529522e-05, + "loss": 1.2226, "step": 3061 }, { - "epoch": 0.08676924820765677, + "epoch": 0.11980593160654199, "grad_norm": 0.0, - "learning_rate": 1.9831502121749985e-05, - "loss": 1.0813, + "learning_rate": 1.9580063368695808e-05, + "loss": 1.2717, "step": 3062 }, { - "epoch": 0.08679758564991924, + "epoch": 0.11984505829877143, "grad_norm": 0.0, - "learning_rate": 1.9831334308916518e-05, - "loss": 1.0978, + "learning_rate": 1.9579699917018278e-05, + "loss": 1.1193, "step": 3063 }, { - "epoch": 0.0868259230921817, + "epoch": 0.11988418499100086, "grad_norm": 0.0, - "learning_rate": 1.9831166413270076e-05, - "loss": 1.1431, + "learning_rate": 1.9579336311502772e-05, + "loss": 1.1057, "step": 3064 }, { - "epoch": 0.08685426053444416, + "epoch": 0.1199233116832303, "grad_norm": 0.0, - "learning_rate": 1.983099843481207e-05, - "loss": 0.961, + "learning_rate": 1.957897255215513e-05, + "loss": 1.1776, "step": 3065 }, { - "epoch": 0.08688259797670662, + "epoch": 0.11996243837545974, "grad_norm": 0.0, - "learning_rate": 1.983083037354391e-05, - "loss": 1.103, + "learning_rate": 1.9578608638981192e-05, + "loss": 1.1598, "step": 3066 }, { - "epoch": 0.08691093541896909, + "epoch": 0.12000156506768918, "grad_norm": 0.0, - "learning_rate": 1.9830662229467015e-05, - "loss": 1.1378, + "learning_rate": 1.95782445719868e-05, + "loss": 1.2907, "step": 3067 }, { - "epoch": 0.08693927286123154, + "epoch": 0.12004069175991862, "grad_norm": 0.0, - "learning_rate": 1.9830494002582803e-05, - "loss": 1.1418, + "learning_rate": 1.9577880351177803e-05, + "loss": 1.2163, "step": 3068 }, { - "epoch": 0.08696761030349401, + "epoch": 0.12007981845214806, "grad_norm": 0.0, - "learning_rate": 1.983032569289269e-05, - "loss": 1.0572, + "learning_rate": 1.957751597656005e-05, + "loss": 1.1404, "step": 3069 }, { - "epoch": 0.08699594774575647, + "epoch": 0.1201189451443775, "grad_norm": 0.0, - "learning_rate": 1.983015730039809e-05, - "loss": 1.1629, + "learning_rate": 1.957715144813939e-05, + "loss": 1.1484, "step": 3070 }, { - "epoch": 0.08702428518801893, + "epoch": 0.12015807183660693, "grad_norm": 0.0, - "learning_rate": 1.9829988825100427e-05, - "loss": 1.1714, + "learning_rate": 1.9576786765921682e-05, + "loss": 1.0851, "step": 3071 }, { - "epoch": 0.08705262263028139, + "epoch": 0.12019719852883637, "grad_norm": 0.0, - "learning_rate": 1.9829820267001118e-05, - "loss": 1.0148, + "learning_rate": 1.9576421929912775e-05, + "loss": 1.3209, "step": 3072 }, { - "epoch": 0.08708096007254386, + "epoch": 0.12023632522106581, "grad_norm": 0.0, - "learning_rate": 1.9829651626101584e-05, - "loss": 1.0718, + "learning_rate": 1.9576056940118534e-05, + "loss": 1.2961, "step": 3073 }, { - "epoch": 0.08710929751480631, + "epoch": 0.12027545191329525, "grad_norm": 0.0, - "learning_rate": 1.982948290240324e-05, - "loss": 1.0874, + "learning_rate": 1.957569179654482e-05, + "loss": 1.1642, "step": 3074 }, { - "epoch": 0.08713763495706878, + "epoch": 0.12031457860552469, "grad_norm": 0.0, - "learning_rate": 1.9829314095907516e-05, - "loss": 1.1415, + "learning_rate": 1.9575326499197492e-05, + "loss": 1.2048, "step": 3075 }, { - "epoch": 0.08716597239933124, + "epoch": 0.12035370529775413, "grad_norm": 0.0, - "learning_rate": 1.9829145206615828e-05, - "loss": 1.0512, + "learning_rate": 1.957496104808242e-05, + "loss": 1.3037, "step": 3076 }, { - "epoch": 0.0871943098415937, + "epoch": 0.12039283198998357, "grad_norm": 0.0, - "learning_rate": 1.98289762345296e-05, - "loss": 1.0142, + "learning_rate": 1.957459544320547e-05, + "loss": 1.0001, "step": 3077 }, { - "epoch": 0.08722264728385616, + "epoch": 0.120431958682213, "grad_norm": 0.0, - "learning_rate": 1.9828807179650255e-05, - "loss": 0.9702, + "learning_rate": 1.9574229684572518e-05, + "loss": 1.1837, "step": 3078 }, { - "epoch": 0.08725098472611863, + "epoch": 0.12047108537444244, "grad_norm": 0.0, - "learning_rate": 1.9828638041979216e-05, - "loss": 1.1006, + "learning_rate": 1.9573863772189432e-05, + "loss": 1.2416, "step": 3079 }, { - "epoch": 0.08727932216838108, + "epoch": 0.12051021206667188, "grad_norm": 0.0, - "learning_rate": 1.9828468821517913e-05, - "loss": 1.0176, + "learning_rate": 1.957349770606209e-05, + "loss": 1.2726, "step": 3080 }, { - "epoch": 0.08730765961064355, + "epoch": 0.12054933875890132, "grad_norm": 0.0, - "learning_rate": 1.9828299518267763e-05, - "loss": 0.9862, + "learning_rate": 1.9573131486196372e-05, + "loss": 1.1475, "step": 3081 }, { - "epoch": 0.08733599705290601, + "epoch": 0.12058846545113076, "grad_norm": 0.0, - "learning_rate": 1.9828130132230198e-05, - "loss": 1.0573, + "learning_rate": 1.9572765112598157e-05, + "loss": 1.058, "step": 3082 }, { - "epoch": 0.08736433449516846, + "epoch": 0.1206275921433602, "grad_norm": 0.0, - "learning_rate": 1.982796066340664e-05, - "loss": 1.1034, + "learning_rate": 1.9572398585273333e-05, + "loss": 1.2417, "step": 3083 }, { - "epoch": 0.08739267193743093, + "epoch": 0.12066671883558965, "grad_norm": 0.0, - "learning_rate": 1.9827791111798526e-05, - "loss": 0.9754, + "learning_rate": 1.957203190422778e-05, + "loss": 1.2473, "step": 3084 }, { - "epoch": 0.0874210093796934, + "epoch": 0.12070584552781907, "grad_norm": 0.0, - "learning_rate": 1.9827621477407275e-05, - "loss": 1.1594, + "learning_rate": 1.957166506946739e-05, + "loss": 1.2734, "step": 3085 }, { - "epoch": 0.08744934682195585, + "epoch": 0.12074497222004851, "grad_norm": 0.0, - "learning_rate": 1.982745176023432e-05, - "loss": 1.2016, + "learning_rate": 1.9571298080998052e-05, + "loss": 1.1303, "step": 3086 }, { - "epoch": 0.08747768426421831, + "epoch": 0.12078409891227795, "grad_norm": 0.0, - "learning_rate": 1.982728196028109e-05, - "loss": 1.1492, + "learning_rate": 1.9570930938825662e-05, + "loss": 1.2042, "step": 3087 }, { - "epoch": 0.08750602170648078, + "epoch": 0.1208232256045074, "grad_norm": 0.0, - "learning_rate": 1.9827112077549012e-05, - "loss": 1.1118, + "learning_rate": 1.9570563642956114e-05, + "loss": 1.2542, "step": 3088 }, { - "epoch": 0.08753435914874323, + "epoch": 0.12086235229673684, "grad_norm": 0.0, - "learning_rate": 1.982694211203952e-05, - "loss": 1.0243, + "learning_rate": 1.9570196193395305e-05, + "loss": 1.2094, "step": 3089 }, { - "epoch": 0.0875626965910057, + "epoch": 0.12090147898896628, "grad_norm": 0.0, - "learning_rate": 1.9826772063754047e-05, - "loss": 1.1502, + "learning_rate": 1.9569828590149135e-05, + "loss": 1.2554, "step": 3090 }, { - "epoch": 0.08759103403326815, + "epoch": 0.12094060568119572, "grad_norm": 0.0, - "learning_rate": 1.9826601932694023e-05, - "loss": 1.063, + "learning_rate": 1.9569460833223512e-05, + "loss": 1.1374, "step": 3091 }, { - "epoch": 0.08761937147553062, + "epoch": 0.12097973237342514, "grad_norm": 0.0, - "learning_rate": 1.9826431718860882e-05, - "loss": 1.1215, + "learning_rate": 1.956909292262434e-05, + "loss": 1.2595, "step": 3092 }, { - "epoch": 0.08764770891779308, + "epoch": 0.12101885906565459, "grad_norm": 0.0, - "learning_rate": 1.982626142225606e-05, - "loss": 1.0782, + "learning_rate": 1.9568724858357527e-05, + "loss": 1.2979, "step": 3093 }, { - "epoch": 0.08767604636005553, + "epoch": 0.12105798575788403, "grad_norm": 0.0, - "learning_rate": 1.9826091042880984e-05, - "loss": 1.181, + "learning_rate": 1.956835664042898e-05, + "loss": 1.3547, "step": 3094 }, { - "epoch": 0.087704383802318, + "epoch": 0.12109711245011347, "grad_norm": 0.0, - "learning_rate": 1.98259205807371e-05, - "loss": 1.0228, + "learning_rate": 1.956798826884462e-05, + "loss": 1.2149, "step": 3095 }, { - "epoch": 0.08773272124458047, + "epoch": 0.12113623914234291, "grad_norm": 0.0, - "learning_rate": 1.9825750035825834e-05, - "loss": 1.0806, + "learning_rate": 1.9567619743610354e-05, + "loss": 1.1578, "step": 3096 }, { - "epoch": 0.08776105868684292, + "epoch": 0.12117536583457235, "grad_norm": 0.0, - "learning_rate": 1.982557940814863e-05, - "loss": 1.1021, + "learning_rate": 1.9567251064732105e-05, + "loss": 1.3519, "step": 3097 }, { - "epoch": 0.08778939612910538, + "epoch": 0.12121449252680179, "grad_norm": 0.0, - "learning_rate": 1.9825408697706917e-05, - "loss": 1.2054, + "learning_rate": 1.9566882232215788e-05, + "loss": 1.2251, "step": 3098 }, { - "epoch": 0.08781773357136785, + "epoch": 0.12125361921903122, "grad_norm": 0.0, - "learning_rate": 1.9825237904502143e-05, - "loss": 1.0743, + "learning_rate": 1.9566513246067335e-05, + "loss": 1.0903, "step": 3099 }, { - "epoch": 0.0878460710136303, + "epoch": 0.12129274591126066, "grad_norm": 0.0, - "learning_rate": 1.982506702853574e-05, - "loss": 1.1223, + "learning_rate": 1.956614410629267e-05, + "loss": 1.2116, "step": 3100 }, { - "epoch": 0.08787440845589277, + "epoch": 0.1213318726034901, "grad_norm": 0.0, - "learning_rate": 1.9824896069809148e-05, - "loss": 1.1786, + "learning_rate": 1.956577481289771e-05, + "loss": 1.2716, "step": 3101 }, { - "epoch": 0.08790274589815524, + "epoch": 0.12137099929571954, "grad_norm": 0.0, - "learning_rate": 1.9824725028323808e-05, - "loss": 1.092, + "learning_rate": 1.95654053658884e-05, + "loss": 1.2904, "step": 3102 }, { - "epoch": 0.08793108334041769, + "epoch": 0.12141012598794898, "grad_norm": 0.0, - "learning_rate": 1.9824553904081163e-05, - "loss": 1.1059, + "learning_rate": 1.956503576527066e-05, + "loss": 1.1221, "step": 3103 }, { - "epoch": 0.08795942078268015, + "epoch": 0.12144925268017842, "grad_norm": 0.0, - "learning_rate": 1.982438269708265e-05, - "loss": 1.06, + "learning_rate": 1.9564666011050435e-05, + "loss": 1.1664, "step": 3104 }, { - "epoch": 0.08798775822494262, + "epoch": 0.12148837937240786, "grad_norm": 0.0, - "learning_rate": 1.9824211407329717e-05, - "loss": 1.1001, + "learning_rate": 1.956429610323366e-05, + "loss": 1.1993, "step": 3105 }, { - "epoch": 0.08801609566720507, + "epoch": 0.12152750606463729, "grad_norm": 0.0, - "learning_rate": 1.9824040034823796e-05, - "loss": 1.1433, + "learning_rate": 1.9563926041826272e-05, + "loss": 1.2843, "step": 3106 }, { - "epoch": 0.08804443310946754, + "epoch": 0.12156663275686673, "grad_norm": 0.0, - "learning_rate": 1.9823868579566344e-05, - "loss": 1.0677, + "learning_rate": 1.9563555826834214e-05, + "loss": 1.1272, "step": 3107 }, { - "epoch": 0.08807277055173, + "epoch": 0.12160575944909617, "grad_norm": 0.0, - "learning_rate": 1.9823697041558797e-05, - "loss": 1.0771, + "learning_rate": 1.9563185458263437e-05, + "loss": 1.2098, "step": 3108 }, { - "epoch": 0.08810110799399246, + "epoch": 0.12164488614132561, "grad_norm": 0.0, - "learning_rate": 1.9823525420802603e-05, - "loss": 1.0831, + "learning_rate": 1.9562814936119885e-05, + "loss": 1.1389, "step": 3109 }, { - "epoch": 0.08812944543625492, + "epoch": 0.12168401283355505, "grad_norm": 0.0, - "learning_rate": 1.9823353717299205e-05, - "loss": 1.0201, + "learning_rate": 1.9562444260409507e-05, + "loss": 1.1987, "step": 3110 }, { - "epoch": 0.08815778287851739, + "epoch": 0.1217231395257845, "grad_norm": 0.0, - "learning_rate": 1.9823181931050052e-05, - "loss": 1.0529, + "learning_rate": 1.9562073431138255e-05, + "loss": 1.1802, "step": 3111 }, { - "epoch": 0.08818612032077984, + "epoch": 0.12176226621801393, "grad_norm": 0.0, - "learning_rate": 1.982301006205659e-05, - "loss": 1.1139, + "learning_rate": 1.9561702448312084e-05, + "loss": 1.1123, "step": 3112 }, { - "epoch": 0.0882144577630423, + "epoch": 0.12180139291024336, "grad_norm": 0.0, - "learning_rate": 1.9822838110320265e-05, - "loss": 1.1873, + "learning_rate": 1.956133131193696e-05, + "loss": 1.1779, "step": 3113 }, { - "epoch": 0.08824279520530477, + "epoch": 0.1218405196024728, "grad_norm": 0.0, - "learning_rate": 1.9822666075842527e-05, - "loss": 1.0791, + "learning_rate": 1.956096002201883e-05, + "loss": 1.3257, "step": 3114 }, { - "epoch": 0.08827113264756722, + "epoch": 0.12187964629470224, "grad_norm": 0.0, - "learning_rate": 1.9822493958624825e-05, - "loss": 1.1232, + "learning_rate": 1.9560588578563667e-05, + "loss": 1.2579, "step": 3115 }, { - "epoch": 0.08829947008982969, + "epoch": 0.12191877298693168, "grad_norm": 0.0, - "learning_rate": 1.982232175866861e-05, - "loss": 1.1282, + "learning_rate": 1.9560216981577426e-05, + "loss": 1.1532, "step": 3116 }, { - "epoch": 0.08832780753209216, + "epoch": 0.12195789967916112, "grad_norm": 0.0, - "learning_rate": 1.982214947597533e-05, - "loss": 1.0048, + "learning_rate": 1.9559845231066084e-05, + "loss": 1.158, "step": 3117 }, { - "epoch": 0.08835614497435461, + "epoch": 0.12199702637139057, "grad_norm": 0.0, - "learning_rate": 1.982197711054644e-05, - "loss": 1.0263, + "learning_rate": 1.9559473327035607e-05, + "loss": 1.0392, "step": 3118 }, { - "epoch": 0.08838448241661707, + "epoch": 0.12203615306362, "grad_norm": 0.0, - "learning_rate": 1.9821804662383388e-05, - "loss": 1.0527, + "learning_rate": 1.9559101269491965e-05, + "loss": 1.2292, "step": 3119 }, { - "epoch": 0.08841281985887954, + "epoch": 0.12207527975584945, "grad_norm": 0.0, - "learning_rate": 1.9821632131487626e-05, - "loss": 1.068, + "learning_rate": 1.9558729058441135e-05, + "loss": 1.1035, "step": 3120 }, { - "epoch": 0.08844115730114199, + "epoch": 0.12211440644807887, "grad_norm": 0.0, - "learning_rate": 1.9821459517860614e-05, - "loss": 1.1339, + "learning_rate": 1.9558356693889098e-05, + "loss": 1.1942, "step": 3121 }, { - "epoch": 0.08846949474340446, + "epoch": 0.12215353314030832, "grad_norm": 0.0, - "learning_rate": 1.98212868215038e-05, - "loss": 1.0897, + "learning_rate": 1.9557984175841825e-05, + "loss": 1.1974, "step": 3122 }, { - "epoch": 0.08849783218566692, + "epoch": 0.12219265983253776, "grad_norm": 0.0, - "learning_rate": 1.9821114042418638e-05, - "loss": 1.105, + "learning_rate": 1.9557611504305305e-05, + "loss": 1.2513, "step": 3123 }, { - "epoch": 0.08852616962792938, + "epoch": 0.1222317865247672, "grad_norm": 0.0, - "learning_rate": 1.982094118060659e-05, - "loss": 1.1649, + "learning_rate": 1.955723867928552e-05, + "loss": 1.1013, "step": 3124 }, { - "epoch": 0.08855450707019184, + "epoch": 0.12227091321699664, "grad_norm": 0.0, - "learning_rate": 1.98207682360691e-05, - "loss": 1.0689, + "learning_rate": 1.9556865700788457e-05, + "loss": 1.2515, "step": 3125 }, { - "epoch": 0.08858284451245431, + "epoch": 0.12231003990922608, "grad_norm": 0.0, - "learning_rate": 1.9820595208807636e-05, - "loss": 1.0516, + "learning_rate": 1.9556492568820107e-05, + "loss": 1.1331, "step": 3126 }, { - "epoch": 0.08861118195471676, + "epoch": 0.12234916660145552, "grad_norm": 0.0, - "learning_rate": 1.9820422098823653e-05, - "loss": 1.066, + "learning_rate": 1.9556119283386463e-05, + "loss": 1.1569, "step": 3127 }, { - "epoch": 0.08863951939697923, + "epoch": 0.12238829329368495, "grad_norm": 0.0, - "learning_rate": 1.9820248906118606e-05, - "loss": 1.107, + "learning_rate": 1.955574584449352e-05, + "loss": 1.1786, "step": 3128 }, { - "epoch": 0.0886678568392417, + "epoch": 0.12242741998591439, "grad_norm": 0.0, - "learning_rate": 1.9820075630693955e-05, - "loss": 1.0604, + "learning_rate": 1.9555372252147264e-05, + "loss": 1.2644, "step": 3129 }, { - "epoch": 0.08869619428150415, + "epoch": 0.12246654667814383, "grad_norm": 0.0, - "learning_rate": 1.9819902272551162e-05, - "loss": 1.0695, + "learning_rate": 1.9554998506353707e-05, + "loss": 1.1954, "step": 3130 }, { - "epoch": 0.08872453172376661, + "epoch": 0.12250567337037327, "grad_norm": 0.0, - "learning_rate": 1.981972883169169e-05, - "loss": 1.0245, + "learning_rate": 1.955462460711885e-05, + "loss": 1.2015, "step": 3131 }, { - "epoch": 0.08875286916602908, + "epoch": 0.12254480006260271, "grad_norm": 0.0, - "learning_rate": 1.981955530811699e-05, - "loss": 1.1393, + "learning_rate": 1.9554250554448692e-05, + "loss": 1.1463, "step": 3132 }, { - "epoch": 0.08878120660829153, + "epoch": 0.12258392675483215, "grad_norm": 0.0, - "learning_rate": 1.9819381701828532e-05, - "loss": 1.102, + "learning_rate": 1.9553876348349242e-05, + "loss": 1.1703, "step": 3133 }, { - "epoch": 0.088809544050554, + "epoch": 0.12262305344706159, "grad_norm": 0.0, - "learning_rate": 1.9819208012827772e-05, - "loss": 1.1015, + "learning_rate": 1.9553501988826514e-05, + "loss": 1.2977, "step": 3134 }, { - "epoch": 0.08883788149281646, + "epoch": 0.12266218013929102, "grad_norm": 0.0, - "learning_rate": 1.981903424111618e-05, - "loss": 1.1519, + "learning_rate": 1.955312747588651e-05, + "loss": 1.2676, "step": 3135 }, { - "epoch": 0.08886621893507891, + "epoch": 0.12270130683152046, "grad_norm": 0.0, - "learning_rate": 1.9818860386695214e-05, - "loss": 1.09, + "learning_rate": 1.955275280953525e-05, + "loss": 1.2216, "step": 3136 }, { - "epoch": 0.08889455637734138, + "epoch": 0.1227404335237499, "grad_norm": 0.0, - "learning_rate": 1.981868644956634e-05, - "loss": 1.0935, + "learning_rate": 1.9552377989778754e-05, + "loss": 1.2294, "step": 3137 }, { - "epoch": 0.08892289381960385, + "epoch": 0.12277956021597934, "grad_norm": 0.0, - "learning_rate": 1.981851242973103e-05, - "loss": 0.9823, + "learning_rate": 1.9552003016623032e-05, + "loss": 1.2809, "step": 3138 }, { - "epoch": 0.0889512312618663, + "epoch": 0.12281868690820878, "grad_norm": 0.0, - "learning_rate": 1.981833832719074e-05, - "loss": 1.0794, + "learning_rate": 1.9551627890074115e-05, + "loss": 1.4247, "step": 3139 }, { - "epoch": 0.08897956870412876, + "epoch": 0.12285781360043822, "grad_norm": 0.0, - "learning_rate": 1.9818164141946938e-05, - "loss": 0.9609, + "learning_rate": 1.955125261013802e-05, + "loss": 1.2419, "step": 3140 }, { - "epoch": 0.08900790614639123, + "epoch": 0.12289694029266766, "grad_norm": 0.0, - "learning_rate": 1.9817989874001096e-05, - "loss": 1.0413, + "learning_rate": 1.955087717682078e-05, + "loss": 1.1672, "step": 3141 }, { - "epoch": 0.08903624358865368, + "epoch": 0.12293606698489709, "grad_norm": 0.0, - "learning_rate": 1.981781552335468e-05, - "loss": 1.1547, + "learning_rate": 1.9550501590128418e-05, + "loss": 1.2898, "step": 3142 }, { - "epoch": 0.08906458103091615, + "epoch": 0.12297519367712653, "grad_norm": 0.0, - "learning_rate": 1.9817641090009157e-05, - "loss": 1.1032, + "learning_rate": 1.955012585006697e-05, + "loss": 1.2273, "step": 3143 }, { - "epoch": 0.08909291847317861, + "epoch": 0.12301432036935597, "grad_norm": 0.0, - "learning_rate": 1.9817466573965996e-05, - "loss": 1.0793, + "learning_rate": 1.9549749956642464e-05, + "loss": 1.2841, "step": 3144 }, { - "epoch": 0.08912125591544107, + "epoch": 0.12305344706158541, "grad_norm": 0.0, - "learning_rate": 1.981729197522667e-05, - "loss": 1.0169, + "learning_rate": 1.9549373909860944e-05, + "loss": 1.3269, "step": 3145 }, { - "epoch": 0.08914959335770353, + "epoch": 0.12309257375381485, "grad_norm": 0.0, - "learning_rate": 1.981711729379265e-05, - "loss": 1.129, + "learning_rate": 1.9548997709728443e-05, + "loss": 1.283, "step": 3146 }, { - "epoch": 0.089177930799966, + "epoch": 0.1231317004460443, "grad_norm": 0.0, - "learning_rate": 1.9816942529665404e-05, - "loss": 1.0429, + "learning_rate": 1.9548621356251004e-05, + "loss": 1.2133, "step": 3147 }, { - "epoch": 0.08920626824222845, + "epoch": 0.12317082713827374, "grad_norm": 0.0, - "learning_rate": 1.9816767682846404e-05, - "loss": 1.1974, + "learning_rate": 1.9548244849434673e-05, + "loss": 1.219, "step": 3148 }, { - "epoch": 0.08923460568449092, + "epoch": 0.12320995383050316, "grad_norm": 0.0, - "learning_rate": 1.9816592753337125e-05, - "loss": 1.027, + "learning_rate": 1.9547868189285493e-05, + "loss": 1.2233, "step": 3149 }, { - "epoch": 0.08926294312675338, + "epoch": 0.1232490805227326, "grad_norm": 0.0, - "learning_rate": 1.9816417741139042e-05, - "loss": 1.0033, + "learning_rate": 1.9547491375809512e-05, + "loss": 1.2968, "step": 3150 }, { - "epoch": 0.08929128056901584, + "epoch": 0.12328820721496205, "grad_norm": 0.0, - "learning_rate": 1.9816242646253626e-05, - "loss": 1.208, + "learning_rate": 1.954711440901279e-05, + "loss": 1.3376, "step": 3151 }, { - "epoch": 0.0893196180112783, + "epoch": 0.12332733390719149, "grad_norm": 0.0, - "learning_rate": 1.9816067468682352e-05, - "loss": 1.1665, + "learning_rate": 1.9546737288901364e-05, + "loss": 1.2108, "step": 3152 }, { - "epoch": 0.08934795545354077, + "epoch": 0.12336646059942093, "grad_norm": 0.0, - "learning_rate": 1.9815892208426696e-05, - "loss": 1.1225, + "learning_rate": 1.9546360015481306e-05, + "loss": 1.167, "step": 3153 }, { - "epoch": 0.08937629289580322, + "epoch": 0.12340558729165037, "grad_norm": 0.0, - "learning_rate": 1.9815716865488136e-05, - "loss": 1.0728, + "learning_rate": 1.954598258875867e-05, + "loss": 1.2242, "step": 3154 }, { - "epoch": 0.08940463033806569, + "epoch": 0.12344471398387981, "grad_norm": 0.0, - "learning_rate": 1.9815541439868152e-05, - "loss": 0.9888, + "learning_rate": 1.954560500873951e-05, + "loss": 1.2247, "step": 3155 }, { - "epoch": 0.08943296778032815, + "epoch": 0.12348384067610924, "grad_norm": 0.0, - "learning_rate": 1.9815365931568212e-05, - "loss": 1.0914, + "learning_rate": 1.9545227275429898e-05, + "loss": 1.3351, "step": 3156 }, { - "epoch": 0.0894613052225906, + "epoch": 0.12352296736833868, "grad_norm": 0.0, - "learning_rate": 1.9815190340589802e-05, - "loss": 1.0446, + "learning_rate": 1.95448493888359e-05, + "loss": 1.1411, "step": 3157 }, { - "epoch": 0.08948964266485307, + "epoch": 0.12356209406056812, "grad_norm": 0.0, - "learning_rate": 1.9815014666934397e-05, - "loss": 1.027, + "learning_rate": 1.9544471348963578e-05, + "loss": 1.2416, "step": 3158 }, { - "epoch": 0.08951798010711554, + "epoch": 0.12360122075279756, "grad_norm": 0.0, - "learning_rate": 1.981483891060348e-05, - "loss": 1.0981, + "learning_rate": 1.9544093155819004e-05, + "loss": 1.2729, "step": 3159 }, { - "epoch": 0.08954631754937799, + "epoch": 0.123640347445027, "grad_norm": 0.0, - "learning_rate": 1.9814663071598534e-05, - "loss": 1.211, + "learning_rate": 1.9543714809408258e-05, + "loss": 1.1623, "step": 3160 }, { - "epoch": 0.08957465499164045, + "epoch": 0.12367947413725644, "grad_norm": 0.0, - "learning_rate": 1.9814487149921033e-05, - "loss": 1.0152, + "learning_rate": 1.9543336309737406e-05, + "loss": 1.1922, "step": 3161 }, { - "epoch": 0.08960299243390292, + "epoch": 0.12371860082948588, "grad_norm": 0.0, - "learning_rate": 1.981431114557246e-05, - "loss": 0.9996, + "learning_rate": 1.9542957656812534e-05, + "loss": 1.194, "step": 3162 }, { - "epoch": 0.08963132987616537, + "epoch": 0.12375772752171531, "grad_norm": 0.0, - "learning_rate": 1.9814135058554303e-05, - "loss": 1.0118, + "learning_rate": 1.9542578850639717e-05, + "loss": 1.1314, "step": 3163 }, { - "epoch": 0.08965966731842784, + "epoch": 0.12379685421394475, "grad_norm": 0.0, - "learning_rate": 1.981395888886804e-05, - "loss": 1.0508, + "learning_rate": 1.9542199891225046e-05, + "loss": 1.1385, "step": 3164 }, { - "epoch": 0.0896880047606903, + "epoch": 0.12383598090617419, "grad_norm": 0.0, - "learning_rate": 1.9813782636515157e-05, - "loss": 0.9637, + "learning_rate": 1.9541820778574597e-05, + "loss": 1.2313, "step": 3165 }, { - "epoch": 0.08971634220295276, + "epoch": 0.12387510759840363, "grad_norm": 0.0, - "learning_rate": 1.9813606301497138e-05, - "loss": 1.0316, + "learning_rate": 1.954144151269447e-05, + "loss": 1.121, "step": 3166 }, { - "epoch": 0.08974467964521522, + "epoch": 0.12391423429063307, "grad_norm": 0.0, - "learning_rate": 1.981342988381547e-05, - "loss": 1.0972, + "learning_rate": 1.954106209359074e-05, + "loss": 1.0292, "step": 3167 }, { - "epoch": 0.08977301708747769, + "epoch": 0.12395336098286251, "grad_norm": 0.0, - "learning_rate": 1.9813253383471643e-05, - "loss": 1.0892, + "learning_rate": 1.9540682521269515e-05, + "loss": 1.2003, "step": 3168 }, { - "epoch": 0.08980135452974014, + "epoch": 0.12399248767509195, "grad_norm": 0.0, - "learning_rate": 1.9813076800467134e-05, - "loss": 1.0829, + "learning_rate": 1.9540302795736878e-05, + "loss": 1.2007, "step": 3169 }, { - "epoch": 0.0898296919720026, + "epoch": 0.12403161436732138, "grad_norm": 0.0, - "learning_rate": 1.981290013480343e-05, - "loss": 1.2329, + "learning_rate": 1.9539922916998935e-05, + "loss": 1.239, "step": 3170 }, { - "epoch": 0.08985802941426507, + "epoch": 0.12407074105955082, "grad_norm": 0.0, - "learning_rate": 1.9812723386482032e-05, - "loss": 1.0954, + "learning_rate": 1.9539542885061785e-05, + "loss": 1.1366, "step": 3171 }, { - "epoch": 0.08988636685652752, + "epoch": 0.12410986775178026, "grad_norm": 0.0, - "learning_rate": 1.981254655550442e-05, - "loss": 0.9765, + "learning_rate": 1.9539162699931534e-05, + "loss": 1.0818, "step": 3172 }, { - "epoch": 0.08991470429878999, + "epoch": 0.1241489944440097, "grad_norm": 0.0, - "learning_rate": 1.9812369641872083e-05, - "loss": 0.9642, + "learning_rate": 1.9538782361614277e-05, + "loss": 1.2155, "step": 3173 }, { - "epoch": 0.08994304174105246, + "epoch": 0.12418812113623914, "grad_norm": 0.0, - "learning_rate": 1.981219264558651e-05, - "loss": 1.0875, + "learning_rate": 1.9538401870116132e-05, + "loss": 1.2468, "step": 3174 }, { - "epoch": 0.08997137918331491, + "epoch": 0.12422724782846858, "grad_norm": 0.0, - "learning_rate": 1.98120155666492e-05, - "loss": 0.9621, + "learning_rate": 1.9538021225443202e-05, + "loss": 1.1047, "step": 3175 }, { - "epoch": 0.08999971662557738, + "epoch": 0.12426637452069803, "grad_norm": 0.0, - "learning_rate": 1.9811838405061638e-05, - "loss": 1.0229, + "learning_rate": 1.9537640427601605e-05, + "loss": 1.2318, "step": 3176 }, { - "epoch": 0.09002805406783984, + "epoch": 0.12430550121292745, "grad_norm": 0.0, - "learning_rate": 1.9811661160825314e-05, - "loss": 1.0797, + "learning_rate": 1.9537259476597455e-05, + "loss": 1.2393, "step": 3177 }, { - "epoch": 0.0900563915101023, + "epoch": 0.1243446279051569, "grad_norm": 0.0, - "learning_rate": 1.9811483833941726e-05, - "loss": 1.0211, + "learning_rate": 1.9536878372436866e-05, + "loss": 1.1534, "step": 3178 }, { - "epoch": 0.09008472895236476, + "epoch": 0.12438375459738633, "grad_norm": 0.0, - "learning_rate": 1.9811306424412368e-05, - "loss": 1.1318, + "learning_rate": 1.953649711512596e-05, + "loss": 1.1396, "step": 3179 }, { - "epoch": 0.09011306639462723, + "epoch": 0.12442288128961577, "grad_norm": 0.0, - "learning_rate": 1.9811128932238733e-05, - "loss": 1.1936, + "learning_rate": 1.9536115704670865e-05, + "loss": 1.1818, "step": 3180 }, { - "epoch": 0.09014140383688968, + "epoch": 0.12446200798184522, "grad_norm": 0.0, - "learning_rate": 1.9810951357422313e-05, - "loss": 1.1418, + "learning_rate": 1.9535734141077694e-05, + "loss": 1.0723, "step": 3181 }, { - "epoch": 0.09016974127915214, + "epoch": 0.12450113467407466, "grad_norm": 0.0, - "learning_rate": 1.981077369996461e-05, - "loss": 1.133, + "learning_rate": 1.9535352424352588e-05, + "loss": 1.1146, "step": 3182 }, { - "epoch": 0.09019807872141461, + "epoch": 0.1245402613663041, "grad_norm": 0.0, - "learning_rate": 1.9810595959867114e-05, - "loss": 1.1461, + "learning_rate": 1.953497055450167e-05, + "loss": 1.079, "step": 3183 }, { - "epoch": 0.09022641616367706, + "epoch": 0.12457938805853354, "grad_norm": 0.0, - "learning_rate": 1.9810418137131326e-05, - "loss": 1.1681, + "learning_rate": 1.953458853153107e-05, + "loss": 1.1658, "step": 3184 }, { - "epoch": 0.09025475360593953, + "epoch": 0.12461851475076297, "grad_norm": 0.0, - "learning_rate": 1.9810240231758743e-05, - "loss": 1.1524, + "learning_rate": 1.9534206355446927e-05, + "loss": 1.2498, "step": 3185 }, { - "epoch": 0.090283091048202, + "epoch": 0.1246576414429924, "grad_norm": 0.0, - "learning_rate": 1.9810062243750866e-05, - "loss": 1.1247, + "learning_rate": 1.953382402625538e-05, + "loss": 1.2148, "step": 3186 }, { - "epoch": 0.09031142849046445, + "epoch": 0.12469676813522185, "grad_norm": 0.0, - "learning_rate": 1.980988417310919e-05, - "loss": 1.112, + "learning_rate": 1.953344154396256e-05, + "loss": 1.2584, "step": 3187 }, { - "epoch": 0.09033976593272691, + "epoch": 0.12473589482745129, "grad_norm": 0.0, - "learning_rate": 1.9809706019835215e-05, - "loss": 1.0931, + "learning_rate": 1.9533058908574617e-05, + "loss": 1.229, "step": 3188 }, { - "epoch": 0.09036810337498938, + "epoch": 0.12477502151968073, "grad_norm": 0.0, - "learning_rate": 1.9809527783930444e-05, - "loss": 1.2107, + "learning_rate": 1.9532676120097696e-05, + "loss": 1.1368, "step": 3189 }, { - "epoch": 0.09039644081725183, + "epoch": 0.12481414821191017, "grad_norm": 0.0, - "learning_rate": 1.980934946539638e-05, - "loss": 1.1019, + "learning_rate": 1.953229317853794e-05, + "loss": 1.2589, "step": 3190 }, { - "epoch": 0.0904247782595143, + "epoch": 0.12485327490413961, "grad_norm": 0.0, - "learning_rate": 1.9809171064234524e-05, - "loss": 1.0751, + "learning_rate": 1.95319100839015e-05, + "loss": 1.3189, "step": 3191 }, { - "epoch": 0.09045311570177676, + "epoch": 0.12489240159636904, "grad_norm": 0.0, - "learning_rate": 1.9808992580446374e-05, - "loss": 1.1934, + "learning_rate": 1.9531526836194526e-05, + "loss": 1.1721, "step": 3192 }, { - "epoch": 0.09048145314403921, + "epoch": 0.12493152828859848, "grad_norm": 0.0, - "learning_rate": 1.9808814014033436e-05, - "loss": 0.9684, + "learning_rate": 1.9531143435423176e-05, + "loss": 1.2721, "step": 3193 }, { - "epoch": 0.09050979058630168, + "epoch": 0.12497065498082792, "grad_norm": 0.0, - "learning_rate": 1.9808635364997218e-05, - "loss": 1.14, + "learning_rate": 1.953075988159361e-05, + "loss": 1.2012, "step": 3194 }, { - "epoch": 0.09053812802856415, + "epoch": 0.12500978167305735, "grad_norm": 0.0, - "learning_rate": 1.980845663333922e-05, - "loss": 0.9881, + "learning_rate": 1.9530376174711977e-05, + "loss": 1.298, "step": 3195 }, { - "epoch": 0.0905664654708266, + "epoch": 0.1250489083652868, "grad_norm": 0.0, - "learning_rate": 1.9808277819060953e-05, - "loss": 1.0559, + "learning_rate": 1.9529992314784446e-05, + "loss": 1.1802, "step": 3196 }, { - "epoch": 0.09059480291308906, + "epoch": 0.12508803505751623, "grad_norm": 0.0, - "learning_rate": 1.9808098922163918e-05, - "loss": 1.0278, + "learning_rate": 1.952960830181718e-05, + "loss": 1.0813, "step": 3197 }, { - "epoch": 0.09062314035535153, + "epoch": 0.12512716174974567, "grad_norm": 0.0, - "learning_rate": 1.980791994264962e-05, - "loss": 1.1498, + "learning_rate": 1.9529224135816348e-05, + "loss": 1.2982, "step": 3198 }, { - "epoch": 0.09065147779761398, + "epoch": 0.1251662884419751, "grad_norm": 0.0, - "learning_rate": 1.9807740880519574e-05, - "loss": 1.1366, + "learning_rate": 1.952883981678812e-05, + "loss": 1.2296, "step": 3199 }, { - "epoch": 0.09067981523987645, + "epoch": 0.12520541513420455, "grad_norm": 0.0, - "learning_rate": 1.9807561735775285e-05, - "loss": 1.1052, + "learning_rate": 1.952845534473866e-05, + "loss": 1.205, "step": 3200 }, { - "epoch": 0.09070815268213892, + "epoch": 0.125244541826434, "grad_norm": 0.0, - "learning_rate": 1.980738250841826e-05, - "loss": 1.0977, + "learning_rate": 1.952807071967415e-05, + "loss": 1.0558, "step": 3201 }, { - "epoch": 0.09073649012440137, + "epoch": 0.12528366851866343, "grad_norm": 0.0, - "learning_rate": 1.980720319845001e-05, - "loss": 1.113, + "learning_rate": 1.9527685941600762e-05, + "loss": 1.0873, "step": 3202 }, { - "epoch": 0.09076482756666383, + "epoch": 0.12532279521089287, "grad_norm": 0.0, - "learning_rate": 1.9807023805872047e-05, - "loss": 1.0482, + "learning_rate": 1.9527301010524677e-05, + "loss": 1.3458, "step": 3203 }, { - "epoch": 0.0907931650089263, + "epoch": 0.12536192190312231, "grad_norm": 0.0, - "learning_rate": 1.980684433068588e-05, - "loss": 1.204, + "learning_rate": 1.9526915926452073e-05, + "loss": 1.2303, "step": 3204 }, { - "epoch": 0.09082150245118875, + "epoch": 0.12540104859535176, "grad_norm": 0.0, - "learning_rate": 1.9806664772893024e-05, - "loss": 1.0219, + "learning_rate": 1.952653068938914e-05, + "loss": 1.1429, "step": 3205 }, { - "epoch": 0.09084983989345122, + "epoch": 0.1254401752875812, "grad_norm": 0.0, - "learning_rate": 1.9806485132494984e-05, - "loss": 1.0367, + "learning_rate": 1.9526145299342063e-05, + "loss": 1.1616, "step": 3206 }, { - "epoch": 0.09087817733571368, + "epoch": 0.12547930197981064, "grad_norm": 0.0, - "learning_rate": 1.980630540949328e-05, - "loss": 1.12, + "learning_rate": 1.9525759756317026e-05, + "loss": 1.2377, "step": 3207 }, { - "epoch": 0.09090651477797614, + "epoch": 0.12551842867204008, "grad_norm": 0.0, - "learning_rate": 1.9806125603889424e-05, - "loss": 0.9457, + "learning_rate": 1.9525374060320228e-05, + "loss": 1.2079, "step": 3208 }, { - "epoch": 0.0909348522202386, + "epoch": 0.1255575553642695, "grad_norm": 0.0, - "learning_rate": 1.9805945715684933e-05, - "loss": 1.182, + "learning_rate": 1.9524988211357855e-05, + "loss": 1.2371, "step": 3209 }, { - "epoch": 0.09096318966250107, + "epoch": 0.12559668205649893, "grad_norm": 0.0, - "learning_rate": 1.980576574488132e-05, - "loss": 1.138, + "learning_rate": 1.952460220943611e-05, + "loss": 1.1642, "step": 3210 }, { - "epoch": 0.09099152710476352, + "epoch": 0.12563580874872837, "grad_norm": 0.0, - "learning_rate": 1.9805585691480098e-05, - "loss": 1.0334, + "learning_rate": 1.9524216054561186e-05, + "loss": 1.1035, "step": 3211 }, { - "epoch": 0.09101986454702599, + "epoch": 0.1256749354409578, "grad_norm": 0.0, - "learning_rate": 1.9805405555482786e-05, - "loss": 1.048, + "learning_rate": 1.9523829746739286e-05, + "loss": 1.1353, "step": 3212 }, { - "epoch": 0.09104820198928845, + "epoch": 0.12571406213318725, "grad_norm": 0.0, - "learning_rate": 1.9805225336890905e-05, - "loss": 1.054, + "learning_rate": 1.9523443285976617e-05, + "loss": 1.2573, "step": 3213 }, { - "epoch": 0.0910765394315509, + "epoch": 0.1257531888254167, "grad_norm": 0.0, - "learning_rate": 1.9805045035705966e-05, - "loss": 1.0702, + "learning_rate": 1.952305667227938e-05, + "loss": 1.0728, "step": 3214 }, { - "epoch": 0.09110487687381337, + "epoch": 0.12579231551764614, "grad_norm": 0.0, - "learning_rate": 1.9804864651929495e-05, - "loss": 1.1541, + "learning_rate": 1.9522669905653787e-05, + "loss": 1.3169, "step": 3215 }, { - "epoch": 0.09113321431607584, + "epoch": 0.12583144220987558, "grad_norm": 0.0, - "learning_rate": 1.980468418556301e-05, - "loss": 1.2114, + "learning_rate": 1.9522282986106045e-05, + "loss": 1.1599, "step": 3216 }, { - "epoch": 0.09116155175833829, + "epoch": 0.12587056890210502, "grad_norm": 0.0, - "learning_rate": 1.9804503636608026e-05, - "loss": 1.1546, + "learning_rate": 1.9521895913642375e-05, + "loss": 1.261, "step": 3217 }, { - "epoch": 0.09118988920060075, + "epoch": 0.12590969559433446, "grad_norm": 0.0, - "learning_rate": 1.9804323005066066e-05, - "loss": 1.0983, + "learning_rate": 1.9521508688268986e-05, + "loss": 1.2161, "step": 3218 }, { - "epoch": 0.09121822664286322, + "epoch": 0.1259488222865639, "grad_norm": 0.0, - "learning_rate": 1.9804142290938654e-05, - "loss": 1.131, + "learning_rate": 1.95211213099921e-05, + "loss": 1.2865, "step": 3219 }, { - "epoch": 0.09124656408512567, + "epoch": 0.12598794897879334, "grad_norm": 0.0, - "learning_rate": 1.980396149422731e-05, - "loss": 1.1437, + "learning_rate": 1.9520733778817936e-05, + "loss": 1.1806, "step": 3220 }, { - "epoch": 0.09127490152738814, + "epoch": 0.12602707567102278, "grad_norm": 0.0, - "learning_rate": 1.980378061493356e-05, - "loss": 0.9281, + "learning_rate": 1.9520346094752716e-05, + "loss": 1.1368, "step": 3221 }, { - "epoch": 0.0913032389696506, + "epoch": 0.12606620236325222, "grad_norm": 0.0, - "learning_rate": 1.980359965305892e-05, - "loss": 1.0454, + "learning_rate": 1.9519958257802668e-05, + "loss": 1.2318, "step": 3222 }, { - "epoch": 0.09133157641191306, + "epoch": 0.12610532905548166, "grad_norm": 0.0, - "learning_rate": 1.9803418608604926e-05, - "loss": 1.1277, + "learning_rate": 1.951957026797402e-05, + "loss": 1.2843, "step": 3223 }, { - "epoch": 0.09135991385417552, + "epoch": 0.12614445574771108, "grad_norm": 0.0, - "learning_rate": 1.9803237481573096e-05, - "loss": 1.091, + "learning_rate": 1.9519182125273e-05, + "loss": 1.0461, "step": 3224 }, { - "epoch": 0.09138825129643799, + "epoch": 0.12618358243994052, "grad_norm": 0.0, - "learning_rate": 1.9803056271964952e-05, - "loss": 1.1185, + "learning_rate": 1.9518793829705846e-05, + "loss": 1.2207, "step": 3225 }, { - "epoch": 0.09141658873870044, + "epoch": 0.12622270913216996, "grad_norm": 0.0, - "learning_rate": 1.980287497978203e-05, - "loss": 1.0383, + "learning_rate": 1.9518405381278793e-05, + "loss": 1.27, "step": 3226 }, { - "epoch": 0.09144492618096291, + "epoch": 0.1262618358243994, "grad_norm": 0.0, - "learning_rate": 1.980269360502585e-05, - "loss": 1.1241, + "learning_rate": 1.951801677999807e-05, + "loss": 1.2505, "step": 3227 }, { - "epoch": 0.09147326362322537, + "epoch": 0.12630096251662884, "grad_norm": 0.0, - "learning_rate": 1.980251214769794e-05, - "loss": 1.0319, + "learning_rate": 1.951762802586993e-05, + "loss": 1.1607, "step": 3228 }, { - "epoch": 0.09150160106548783, + "epoch": 0.12634008920885828, "grad_norm": 0.0, - "learning_rate": 1.9802330607799832e-05, - "loss": 1.0914, + "learning_rate": 1.9517239118900607e-05, + "loss": 1.2053, "step": 3229 }, { - "epoch": 0.09152993850775029, + "epoch": 0.12637921590108772, "grad_norm": 0.0, - "learning_rate": 1.980214898533305e-05, - "loss": 1.1389, + "learning_rate": 1.951685005909635e-05, + "loss": 1.0711, "step": 3230 }, { - "epoch": 0.09155827595001276, + "epoch": 0.12641834259331716, "grad_norm": 0.0, - "learning_rate": 1.980196728029913e-05, - "loss": 1.0461, + "learning_rate": 1.9516460846463408e-05, + "loss": 1.2365, "step": 3231 }, { - "epoch": 0.09158661339227521, + "epoch": 0.1264574692855466, "grad_norm": 0.0, - "learning_rate": 1.98017854926996e-05, - "loss": 1.1457, + "learning_rate": 1.951607148100803e-05, + "loss": 1.2357, "step": 3232 }, { - "epoch": 0.09161495083453768, + "epoch": 0.12649659597777604, "grad_norm": 0.0, - "learning_rate": 1.980160362253599e-05, - "loss": 1.0156, + "learning_rate": 1.9515681962736467e-05, + "loss": 1.3944, "step": 3233 }, { - "epoch": 0.09164328827680014, + "epoch": 0.12653572267000548, "grad_norm": 0.0, - "learning_rate": 1.9801421669809833e-05, - "loss": 0.9429, + "learning_rate": 1.9515292291654976e-05, + "loss": 1.0451, "step": 3234 }, { - "epoch": 0.0916716257190626, + "epoch": 0.12657484936223493, "grad_norm": 0.0, - "learning_rate": 1.980123963452266e-05, - "loss": 1.0626, + "learning_rate": 1.9514902467769812e-05, + "loss": 1.0244, "step": 3235 }, { - "epoch": 0.09169996316132506, + "epoch": 0.12661397605446437, "grad_norm": 0.0, - "learning_rate": 1.9801057516676008e-05, - "loss": 1.068, + "learning_rate": 1.951451249108724e-05, + "loss": 1.2073, "step": 3236 }, { - "epoch": 0.09172830060358753, + "epoch": 0.1266531027466938, "grad_norm": 0.0, - "learning_rate": 1.980087531627141e-05, - "loss": 1.1352, + "learning_rate": 1.951412236161352e-05, + "loss": 1.2225, "step": 3237 }, { - "epoch": 0.09175663804584998, + "epoch": 0.12669222943892322, "grad_norm": 0.0, - "learning_rate": 1.98006930333104e-05, - "loss": 1.0424, + "learning_rate": 1.9513732079354912e-05, + "loss": 1.1441, "step": 3238 }, { - "epoch": 0.09178497548811244, + "epoch": 0.12673135613115266, "grad_norm": 0.0, - "learning_rate": 1.980051066779451e-05, - "loss": 1.1624, + "learning_rate": 1.9513341644317692e-05, + "loss": 1.1394, "step": 3239 }, { - "epoch": 0.09181331293037491, + "epoch": 0.1267704828233821, "grad_norm": 0.0, - "learning_rate": 1.980032821972528e-05, - "loss": 1.061, + "learning_rate": 1.9512951056508126e-05, + "loss": 1.1866, "step": 3240 }, { - "epoch": 0.09184165037263736, + "epoch": 0.12680960951561154, "grad_norm": 0.0, - "learning_rate": 1.9800145689104247e-05, - "loss": 1.0277, + "learning_rate": 1.9512560315932485e-05, + "loss": 1.2719, "step": 3241 }, { - "epoch": 0.09186998781489983, + "epoch": 0.12684873620784098, "grad_norm": 0.0, - "learning_rate": 1.979996307593295e-05, - "loss": 1.1172, + "learning_rate": 1.9512169422597048e-05, + "loss": 1.2017, "step": 3242 }, { - "epoch": 0.0918983252571623, + "epoch": 0.12688786290007043, "grad_norm": 0.0, - "learning_rate": 1.9799780380212922e-05, - "loss": 1.1345, + "learning_rate": 1.9511778376508088e-05, + "loss": 1.2266, "step": 3243 }, { - "epoch": 0.09192666269942475, + "epoch": 0.12692698959229987, "grad_norm": 0.0, - "learning_rate": 1.97995976019457e-05, - "loss": 1.1045, + "learning_rate": 1.9511387177671885e-05, + "loss": 1.181, "step": 3244 }, { - "epoch": 0.09195500014168721, + "epoch": 0.1269661162845293, "grad_norm": 0.0, - "learning_rate": 1.9799414741132836e-05, - "loss": 1.1283, + "learning_rate": 1.9510995826094723e-05, + "loss": 1.2143, "step": 3245 }, { - "epoch": 0.09198333758394968, + "epoch": 0.12700524297675875, "grad_norm": 0.0, - "learning_rate": 1.979923179777586e-05, - "loss": 0.9911, + "learning_rate": 1.9510604321782887e-05, + "loss": 1.2482, "step": 3246 }, { - "epoch": 0.09201167502621213, + "epoch": 0.1270443696689882, "grad_norm": 0.0, - "learning_rate": 1.9799048771876316e-05, - "loss": 1.1567, + "learning_rate": 1.9510212664742663e-05, + "loss": 1.1944, "step": 3247 }, { - "epoch": 0.0920400124684746, + "epoch": 0.12708349636121763, "grad_norm": 0.0, - "learning_rate": 1.9798865663435746e-05, - "loss": 1.1348, + "learning_rate": 1.9509820854980338e-05, + "loss": 1.3411, "step": 3248 }, { - "epoch": 0.09206834991073706, + "epoch": 0.12712262305344707, "grad_norm": 0.0, - "learning_rate": 1.9798682472455694e-05, - "loss": 0.9442, + "learning_rate": 1.9509428892502208e-05, + "loss": 1.1804, "step": 3249 }, { - "epoch": 0.09209668735299952, + "epoch": 0.1271617497456765, "grad_norm": 0.0, - "learning_rate": 1.9798499198937696e-05, - "loss": 1.1203, + "learning_rate": 1.9509036777314568e-05, + "loss": 1.2894, "step": 3250 }, { - "epoch": 0.09212502479526198, + "epoch": 0.12720087643790595, "grad_norm": 0.0, - "learning_rate": 1.9798315842883303e-05, - "loss": 1.0555, + "learning_rate": 1.9508644509423712e-05, + "loss": 1.2301, "step": 3251 }, { - "epoch": 0.09215336223752445, + "epoch": 0.12724000313013537, "grad_norm": 0.0, - "learning_rate": 1.9798132404294057e-05, - "loss": 1.1581, + "learning_rate": 1.9508252088835938e-05, + "loss": 1.2495, "step": 3252 }, { - "epoch": 0.0921816996797869, + "epoch": 0.1272791298223648, "grad_norm": 0.0, - "learning_rate": 1.9797948883171503e-05, - "loss": 1.0685, + "learning_rate": 1.950785951555755e-05, + "loss": 1.256, "step": 3253 }, { - "epoch": 0.09221003712204937, + "epoch": 0.12731825651459425, "grad_norm": 0.0, - "learning_rate": 1.9797765279517186e-05, - "loss": 1.1183, + "learning_rate": 1.9507466789594853e-05, + "loss": 1.155, "step": 3254 }, { - "epoch": 0.09223837456431183, + "epoch": 0.1273573832068237, "grad_norm": 0.0, - "learning_rate": 1.9797581593332657e-05, - "loss": 1.0393, + "learning_rate": 1.9507073910954154e-05, + "loss": 1.1413, "step": 3255 }, { - "epoch": 0.09226671200657428, + "epoch": 0.12739650989905313, "grad_norm": 0.0, - "learning_rate": 1.979739782461946e-05, - "loss": 1.1105, + "learning_rate": 1.950668087964176e-05, + "loss": 1.1444, "step": 3256 }, { - "epoch": 0.09229504944883675, + "epoch": 0.12743563659128257, "grad_norm": 0.0, - "learning_rate": 1.979721397337914e-05, - "loss": 1.0468, + "learning_rate": 1.9506287695663986e-05, + "loss": 1.181, "step": 3257 }, { - "epoch": 0.09232338689109922, + "epoch": 0.127474763283512, "grad_norm": 0.0, - "learning_rate": 1.979703003961325e-05, - "loss": 1.0193, + "learning_rate": 1.950589435902714e-05, + "loss": 1.0276, "step": 3258 }, { - "epoch": 0.09235172433336167, + "epoch": 0.12751388997574145, "grad_norm": 0.0, - "learning_rate": 1.9796846023323336e-05, - "loss": 1.1248, + "learning_rate": 1.9505500869737545e-05, + "loss": 1.2935, "step": 3259 }, { - "epoch": 0.09238006177562413, + "epoch": 0.1275530166679709, "grad_norm": 0.0, - "learning_rate": 1.9796661924510952e-05, - "loss": 1.0962, + "learning_rate": 1.9505107227801515e-05, + "loss": 1.1765, "step": 3260 }, { - "epoch": 0.0924083992178866, + "epoch": 0.12759214336020033, "grad_norm": 0.0, - "learning_rate": 1.9796477743177648e-05, - "loss": 1.1765, + "learning_rate": 1.9504713433225374e-05, + "loss": 1.0912, "step": 3261 }, { - "epoch": 0.09243673666014905, + "epoch": 0.12763127005242977, "grad_norm": 0.0, - "learning_rate": 1.9796293479324974e-05, - "loss": 0.9516, + "learning_rate": 1.9504319486015448e-05, + "loss": 1.143, "step": 3262 }, { - "epoch": 0.09246507410241152, + "epoch": 0.12767039674465921, "grad_norm": 0.0, - "learning_rate": 1.979610913295448e-05, - "loss": 1.0213, + "learning_rate": 1.950392538617806e-05, + "loss": 1.207, "step": 3263 }, { - "epoch": 0.09249341154467398, + "epoch": 0.12770952343688866, "grad_norm": 0.0, - "learning_rate": 1.979592470406772e-05, - "loss": 1.0694, + "learning_rate": 1.9503531133719535e-05, + "loss": 1.225, "step": 3264 }, { - "epoch": 0.09252174898693644, + "epoch": 0.1277486501291181, "grad_norm": 0.0, - "learning_rate": 1.979574019266625e-05, - "loss": 1.1627, + "learning_rate": 1.9503136728646213e-05, + "loss": 1.1548, "step": 3265 }, { - "epoch": 0.0925500864291989, + "epoch": 0.1277877768213475, "grad_norm": 0.0, - "learning_rate": 1.9795555598751623e-05, - "loss": 1.0976, + "learning_rate": 1.9502742170964422e-05, + "loss": 1.1017, "step": 3266 }, { - "epoch": 0.09257842387146137, + "epoch": 0.12782690351357695, "grad_norm": 0.0, - "learning_rate": 1.9795370922325396e-05, - "loss": 1.006, + "learning_rate": 1.9502347460680498e-05, + "loss": 1.2088, "step": 3267 }, { - "epoch": 0.09260676131372382, + "epoch": 0.1278660302058064, "grad_norm": 0.0, - "learning_rate": 1.979518616338912e-05, - "loss": 0.9158, + "learning_rate": 1.9501952597800783e-05, + "loss": 1.2263, "step": 3268 }, { - "epoch": 0.09263509875598629, + "epoch": 0.12790515689803583, "grad_norm": 0.0, - "learning_rate": 1.9795001321944354e-05, - "loss": 0.9979, + "learning_rate": 1.9501557582331613e-05, + "loss": 1.2111, "step": 3269 }, { - "epoch": 0.09266343619824875, + "epoch": 0.12794428359026527, "grad_norm": 0.0, - "learning_rate": 1.9794816397992656e-05, - "loss": 1.1813, + "learning_rate": 1.9501162414279337e-05, + "loss": 1.096, "step": 3270 }, { - "epoch": 0.0926917736405112, + "epoch": 0.12798341028249471, "grad_norm": 0.0, - "learning_rate": 1.9794631391535576e-05, - "loss": 1.1033, + "learning_rate": 1.9500767093650298e-05, + "loss": 1.2023, "step": 3271 }, { - "epoch": 0.09272011108277367, + "epoch": 0.12802253697472415, "grad_norm": 0.0, - "learning_rate": 1.9794446302574687e-05, - "loss": 1.0755, + "learning_rate": 1.9500371620450842e-05, + "loss": 1.2285, "step": 3272 }, { - "epoch": 0.09274844852503614, + "epoch": 0.1280616636669536, "grad_norm": 0.0, - "learning_rate": 1.9794261131111537e-05, - "loss": 1.0145, + "learning_rate": 1.9499975994687322e-05, + "loss": 1.2983, "step": 3273 }, { - "epoch": 0.09277678596729859, + "epoch": 0.12810079035918304, "grad_norm": 0.0, - "learning_rate": 1.9794075877147688e-05, - "loss": 1.0116, + "learning_rate": 1.9499580216366097e-05, + "loss": 1.1591, "step": 3274 }, { - "epoch": 0.09280512340956105, + "epoch": 0.12813991705141248, "grad_norm": 0.0, - "learning_rate": 1.9793890540684698e-05, - "loss": 0.9694, + "learning_rate": 1.9499184285493516e-05, + "loss": 1.2352, "step": 3275 }, { - "epoch": 0.09283346085182352, + "epoch": 0.12817904374364192, "grad_norm": 0.0, - "learning_rate": 1.9793705121724134e-05, - "loss": 1.1041, + "learning_rate": 1.9498788202075936e-05, + "loss": 1.0651, "step": 3276 }, { - "epoch": 0.09286179829408597, + "epoch": 0.12821817043587136, "grad_norm": 0.0, - "learning_rate": 1.9793519620267555e-05, - "loss": 1.2185, + "learning_rate": 1.949839196611972e-05, + "loss": 1.1392, "step": 3277 }, { - "epoch": 0.09289013573634844, + "epoch": 0.1282572971281008, "grad_norm": 0.0, - "learning_rate": 1.9793334036316523e-05, - "loss": 1.1447, + "learning_rate": 1.9497995577631233e-05, + "loss": 1.2328, "step": 3278 }, { - "epoch": 0.0929184731786109, + "epoch": 0.12829642382033024, "grad_norm": 0.0, - "learning_rate": 1.97931483698726e-05, - "loss": 1.0361, + "learning_rate": 1.9497599036616836e-05, + "loss": 1.2552, "step": 3279 }, { - "epoch": 0.09294681062087336, + "epoch": 0.12833555051255968, "grad_norm": 0.0, - "learning_rate": 1.9792962620937354e-05, - "loss": 1.0216, + "learning_rate": 1.9497202343082905e-05, + "loss": 1.2954, "step": 3280 }, { - "epoch": 0.09297514806313582, + "epoch": 0.1283746772047891, "grad_norm": 0.0, - "learning_rate": 1.9792776789512348e-05, - "loss": 1.0876, + "learning_rate": 1.94968054970358e-05, + "loss": 1.2921, "step": 3281 }, { - "epoch": 0.09300348550539829, + "epoch": 0.12841380389701854, "grad_norm": 0.0, - "learning_rate": 1.9792590875599144e-05, - "loss": 1.0812, + "learning_rate": 1.94964084984819e-05, + "loss": 1.2255, "step": 3282 }, { - "epoch": 0.09303182294766074, + "epoch": 0.12845293058924798, "grad_norm": 0.0, - "learning_rate": 1.9792404879199313e-05, - "loss": 0.9881, + "learning_rate": 1.949601134742758e-05, + "loss": 1.2021, "step": 3283 }, { - "epoch": 0.09306016038992321, + "epoch": 0.12849205728147742, "grad_norm": 0.0, - "learning_rate": 1.979221880031442e-05, - "loss": 0.9501, + "learning_rate": 1.9495614043879216e-05, + "loss": 1.2324, "step": 3284 }, { - "epoch": 0.09308849783218567, + "epoch": 0.12853118397370686, "grad_norm": 0.0, - "learning_rate": 1.979203263894603e-05, - "loss": 1.1689, + "learning_rate": 1.949521658784319e-05, + "loss": 1.1784, "step": 3285 }, { - "epoch": 0.09311683527444813, + "epoch": 0.1285703106659363, "grad_norm": 0.0, - "learning_rate": 1.9791846395095715e-05, - "loss": 1.1178, + "learning_rate": 1.949481897932588e-05, + "loss": 1.3462, "step": 3286 }, { - "epoch": 0.09314517271671059, + "epoch": 0.12860943735816574, "grad_norm": 0.0, - "learning_rate": 1.9791660068765038e-05, - "loss": 1.0817, + "learning_rate": 1.949442121833368e-05, + "loss": 1.1408, "step": 3287 }, { - "epoch": 0.09317351015897306, + "epoch": 0.12864856405039518, "grad_norm": 0.0, - "learning_rate": 1.9791473659955575e-05, - "loss": 1.0757, + "learning_rate": 1.9494023304872975e-05, + "loss": 1.2241, "step": 3288 }, { - "epoch": 0.09320184760123551, + "epoch": 0.12868769074262462, "grad_norm": 0.0, - "learning_rate": 1.979128716866889e-05, - "loss": 1.004, + "learning_rate": 1.9493625238950143e-05, + "loss": 1.3329, "step": 3289 }, { - "epoch": 0.09323018504349798, + "epoch": 0.12872681743485406, "grad_norm": 0.0, - "learning_rate": 1.979110059490656e-05, - "loss": 1.0155, + "learning_rate": 1.9493227020571593e-05, + "loss": 1.2157, "step": 3290 }, { - "epoch": 0.09325852248576043, + "epoch": 0.1287659441270835, "grad_norm": 0.0, - "learning_rate": 1.9790913938670155e-05, - "loss": 1.1328, + "learning_rate": 1.949282864974371e-05, + "loss": 1.227, "step": 3291 }, { - "epoch": 0.0932868599280229, + "epoch": 0.12880507081931294, "grad_norm": 0.0, - "learning_rate": 1.9790727199961244e-05, - "loss": 0.9713, + "learning_rate": 1.9492430126472897e-05, + "loss": 1.0506, "step": 3292 }, { - "epoch": 0.09331519737028536, + "epoch": 0.12884419751154239, "grad_norm": 0.0, - "learning_rate": 1.9790540378781403e-05, - "loss": 1.0642, + "learning_rate": 1.9492031450765548e-05, + "loss": 1.2, "step": 3293 }, { - "epoch": 0.09334353481254781, + "epoch": 0.12888332420377183, "grad_norm": 0.0, - "learning_rate": 1.9790353475132206e-05, - "loss": 1.1281, + "learning_rate": 1.9491632622628067e-05, + "loss": 1.1318, "step": 3294 }, { - "epoch": 0.09337187225481028, + "epoch": 0.12892245089600124, "grad_norm": 0.0, - "learning_rate": 1.9790166489015223e-05, - "loss": 1.0574, + "learning_rate": 1.949123364206686e-05, + "loss": 1.1996, "step": 3295 }, { - "epoch": 0.09340020969707274, + "epoch": 0.12896157758823068, "grad_norm": 0.0, - "learning_rate": 1.978997942043203e-05, - "loss": 0.9748, + "learning_rate": 1.9490834509088336e-05, + "loss": 1.2516, "step": 3296 }, { - "epoch": 0.0934285471393352, + "epoch": 0.12900070428046012, "grad_norm": 0.0, - "learning_rate": 1.9789792269384212e-05, - "loss": 1.1106, + "learning_rate": 1.9490435223698902e-05, + "loss": 1.0964, "step": 3297 }, { - "epoch": 0.09345688458159766, + "epoch": 0.12903983097268956, "grad_norm": 0.0, - "learning_rate": 1.9789605035873338e-05, - "loss": 1.0826, + "learning_rate": 1.9490035785904972e-05, + "loss": 1.1927, "step": 3298 }, { - "epoch": 0.09348522202386013, + "epoch": 0.129078957664919, "grad_norm": 0.0, - "learning_rate": 1.978941771990098e-05, - "loss": 1.0788, + "learning_rate": 1.948963619571296e-05, + "loss": 1.0611, "step": 3299 }, { - "epoch": 0.09351355946612258, + "epoch": 0.12911808435714844, "grad_norm": 0.0, - "learning_rate": 1.9789230321468725e-05, - "loss": 1.1229, + "learning_rate": 1.9489236453129276e-05, + "loss": 1.2014, "step": 3300 }, { - "epoch": 0.09354189690838505, + "epoch": 0.12915721104937788, "grad_norm": 0.0, - "learning_rate": 1.9789042840578148e-05, - "loss": 1.0075, + "learning_rate": 1.948883655816035e-05, + "loss": 1.2199, "step": 3301 }, { - "epoch": 0.09357023435064751, + "epoch": 0.12919633774160733, "grad_norm": 0.0, - "learning_rate": 1.9788855277230824e-05, - "loss": 1.15, + "learning_rate": 1.9488436510812594e-05, + "loss": 1.1394, "step": 3302 }, { - "epoch": 0.09359857179290997, + "epoch": 0.12923546443383677, "grad_norm": 0.0, - "learning_rate": 1.978866763142834e-05, - "loss": 0.9116, + "learning_rate": 1.9488036311092442e-05, + "loss": 1.091, "step": 3303 }, { - "epoch": 0.09362690923517243, + "epoch": 0.1292745911260662, "grad_norm": 0.0, - "learning_rate": 1.9788479903172276e-05, - "loss": 1.0754, + "learning_rate": 1.9487635959006314e-05, + "loss": 1.252, "step": 3304 }, { - "epoch": 0.0936552466774349, + "epoch": 0.12931371781829565, "grad_norm": 0.0, - "learning_rate": 1.9788292092464207e-05, - "loss": 1.1451, + "learning_rate": 1.9487235454560642e-05, + "loss": 1.1823, "step": 3305 }, { - "epoch": 0.09368358411969735, + "epoch": 0.1293528445105251, "grad_norm": 0.0, - "learning_rate": 1.978810419930572e-05, - "loss": 0.8193, + "learning_rate": 1.9486834797761855e-05, + "loss": 1.1141, "step": 3306 }, { - "epoch": 0.09371192156195982, + "epoch": 0.12939197120275453, "grad_norm": 0.0, - "learning_rate": 1.9787916223698397e-05, - "loss": 1.0987, + "learning_rate": 1.9486433988616392e-05, + "loss": 1.1714, "step": 3307 }, { - "epoch": 0.09374025900422228, + "epoch": 0.12943109789498397, "grad_norm": 0.0, - "learning_rate": 1.9787728165643822e-05, - "loss": 1.0575, + "learning_rate": 1.9486033027130685e-05, + "loss": 1.2712, "step": 3308 }, { - "epoch": 0.09376859644648473, + "epoch": 0.12947022458721338, "grad_norm": 0.0, - "learning_rate": 1.9787540025143576e-05, - "loss": 1.0443, + "learning_rate": 1.9485631913311175e-05, + "loss": 1.1418, "step": 3309 }, { - "epoch": 0.0937969338887472, + "epoch": 0.12950935127944282, "grad_norm": 0.0, - "learning_rate": 1.978735180219925e-05, - "loss": 1.1397, + "learning_rate": 1.9485230647164298e-05, + "loss": 1.1676, "step": 3310 }, { - "epoch": 0.09382527133100967, + "epoch": 0.12954847797167227, "grad_norm": 0.0, - "learning_rate": 1.978716349681242e-05, - "loss": 1.1447, + "learning_rate": 1.948482922869651e-05, + "loss": 1.215, "step": 3311 }, { - "epoch": 0.09385360877327212, + "epoch": 0.1295876046639017, "grad_norm": 0.0, - "learning_rate": 1.978697510898468e-05, - "loss": 1.0484, + "learning_rate": 1.9484427657914248e-05, + "loss": 1.1418, "step": 3312 }, { - "epoch": 0.09388194621553458, + "epoch": 0.12962673135613115, "grad_norm": 0.0, - "learning_rate": 1.9786786638717615e-05, - "loss": 0.987, + "learning_rate": 1.9484025934823955e-05, + "loss": 1.3107, "step": 3313 }, { - "epoch": 0.09391028365779705, + "epoch": 0.1296658580483606, "grad_norm": 0.0, - "learning_rate": 1.978659808601281e-05, - "loss": 0.9798, + "learning_rate": 1.9483624059432097e-05, + "loss": 1.2852, "step": 3314 }, { - "epoch": 0.0939386211000595, + "epoch": 0.12970498474059003, "grad_norm": 0.0, - "learning_rate": 1.9786409450871855e-05, - "loss": 1.0741, + "learning_rate": 1.9483222031745118e-05, + "loss": 1.3485, "step": 3315 }, { - "epoch": 0.09396695854232197, + "epoch": 0.12974411143281947, "grad_norm": 0.0, - "learning_rate": 1.9786220733296343e-05, - "loss": 1.0911, + "learning_rate": 1.9482819851769475e-05, + "loss": 1.2416, "step": 3316 }, { - "epoch": 0.09399529598458443, + "epoch": 0.1297832381250489, "grad_norm": 0.0, - "learning_rate": 1.9786031933287855e-05, - "loss": 1.0642, + "learning_rate": 1.948241751951163e-05, + "loss": 1.1521, "step": 3317 }, { - "epoch": 0.09402363342684689, + "epoch": 0.12982236481727835, "grad_norm": 0.0, - "learning_rate": 1.9785843050847988e-05, - "loss": 1.0886, + "learning_rate": 1.948201503497804e-05, + "loss": 1.2405, "step": 3318 }, { - "epoch": 0.09405197086910935, + "epoch": 0.1298614915095078, "grad_norm": 0.0, - "learning_rate": 1.9785654085978334e-05, - "loss": 1.1423, + "learning_rate": 1.9481612398175175e-05, + "loss": 1.233, "step": 3319 }, { - "epoch": 0.09408030831137182, + "epoch": 0.12990061820173723, "grad_norm": 0.0, - "learning_rate": 1.9785465038680474e-05, - "loss": 0.9769, + "learning_rate": 1.948120960910949e-05, + "loss": 1.3065, "step": 3320 }, { - "epoch": 0.09410864575363427, + "epoch": 0.12993974489396667, "grad_norm": 0.0, - "learning_rate": 1.9785275908956016e-05, - "loss": 1.0896, + "learning_rate": 1.948080666778746e-05, + "loss": 1.1542, "step": 3321 }, { - "epoch": 0.09413698319589674, + "epoch": 0.12997887158619612, "grad_norm": 0.0, - "learning_rate": 1.9785086696806544e-05, - "loss": 0.9945, + "learning_rate": 1.948040357421556e-05, + "loss": 1.1672, "step": 3322 }, { - "epoch": 0.0941653206381592, + "epoch": 0.13001799827842553, "grad_norm": 0.0, - "learning_rate": 1.9784897402233652e-05, - "loss": 1.0322, + "learning_rate": 1.9480000328400254e-05, + "loss": 1.1623, "step": 3323 }, { - "epoch": 0.09419365808042165, + "epoch": 0.13005712497065497, "grad_norm": 0.0, - "learning_rate": 1.9784708025238935e-05, - "loss": 1.068, + "learning_rate": 1.9479596930348024e-05, + "loss": 1.1709, "step": 3324 }, { - "epoch": 0.09422199552268412, + "epoch": 0.1300962516628844, "grad_norm": 0.0, - "learning_rate": 1.9784518565823988e-05, - "loss": 1.0007, + "learning_rate": 1.9479193380065343e-05, + "loss": 1.1249, "step": 3325 }, { - "epoch": 0.09425033296494659, + "epoch": 0.13013537835511385, "grad_norm": 0.0, - "learning_rate": 1.9784329023990408e-05, - "loss": 1.0566, + "learning_rate": 1.9478789677558697e-05, + "loss": 1.2272, "step": 3326 }, { - "epoch": 0.09427867040720904, + "epoch": 0.1301745050473433, "grad_norm": 0.0, - "learning_rate": 1.9784139399739794e-05, - "loss": 1.0726, + "learning_rate": 1.9478385822834563e-05, + "loss": 1.2231, "step": 3327 }, { - "epoch": 0.0943070078494715, + "epoch": 0.13021363173957273, "grad_norm": 0.0, - "learning_rate": 1.9783949693073738e-05, - "loss": 0.9866, + "learning_rate": 1.9477981815899435e-05, + "loss": 1.2506, "step": 3328 }, { - "epoch": 0.09433534529173397, + "epoch": 0.13025275843180217, "grad_norm": 0.0, - "learning_rate": 1.9783759903993843e-05, - "loss": 1.0289, + "learning_rate": 1.947757765675979e-05, + "loss": 1.2028, "step": 3329 }, { - "epoch": 0.09436368273399642, + "epoch": 0.13029188512403161, "grad_norm": 0.0, - "learning_rate": 1.97835700325017e-05, - "loss": 1.0341, + "learning_rate": 1.9477173345422126e-05, + "loss": 1.1967, "step": 3330 }, { - "epoch": 0.09439202017625889, + "epoch": 0.13033101181626106, "grad_norm": 0.0, - "learning_rate": 1.978338007859892e-05, - "loss": 1.0627, + "learning_rate": 1.947676888189294e-05, + "loss": 1.3072, "step": 3331 }, { - "epoch": 0.09442035761852136, + "epoch": 0.1303701385084905, "grad_norm": 0.0, - "learning_rate": 1.9783190042287093e-05, - "loss": 1.0961, + "learning_rate": 1.947636426617871e-05, + "loss": 1.1506, "step": 3332 }, { - "epoch": 0.09444869506078381, + "epoch": 0.13040926520071994, "grad_norm": 0.0, - "learning_rate": 1.9782999923567826e-05, - "loss": 0.9663, + "learning_rate": 1.947595949828595e-05, + "loss": 1.169, "step": 3333 }, { - "epoch": 0.09447703250304627, + "epoch": 0.13044839189294938, "grad_norm": 0.0, - "learning_rate": 1.9782809722442713e-05, - "loss": 1.0592, + "learning_rate": 1.9475554578221154e-05, + "loss": 1.2239, "step": 3334 }, { - "epoch": 0.09450536994530874, + "epoch": 0.13048751858517882, "grad_norm": 0.0, - "learning_rate": 1.9782619438913365e-05, - "loss": 1.0327, + "learning_rate": 1.9475149505990828e-05, + "loss": 1.2318, "step": 3335 }, { - "epoch": 0.09453370738757119, + "epoch": 0.13052664527740826, "grad_norm": 0.0, - "learning_rate": 1.978242907298138e-05, - "loss": 1.0303, + "learning_rate": 1.947474428160147e-05, + "loss": 1.2253, "step": 3336 }, { - "epoch": 0.09456204482983366, + "epoch": 0.1305657719696377, "grad_norm": 0.0, - "learning_rate": 1.9782238624648363e-05, - "loss": 1.0515, + "learning_rate": 1.947433890505959e-05, + "loss": 1.2246, "step": 3337 }, { - "epoch": 0.09459038227209612, + "epoch": 0.1306048986618671, "grad_norm": 0.0, - "learning_rate": 1.9782048093915916e-05, - "loss": 1.136, + "learning_rate": 1.9473933376371704e-05, + "loss": 1.1895, "step": 3338 }, { - "epoch": 0.09461871971435858, + "epoch": 0.13064402535409655, "grad_norm": 0.0, - "learning_rate": 1.9781857480785645e-05, - "loss": 1.0947, + "learning_rate": 1.9473527695544316e-05, + "loss": 1.1238, "step": 3339 }, { - "epoch": 0.09464705715662104, + "epoch": 0.130683152046326, "grad_norm": 0.0, - "learning_rate": 1.978166678525916e-05, - "loss": 1.0109, + "learning_rate": 1.9473121862583946e-05, + "loss": 1.2136, "step": 3340 }, { - "epoch": 0.09467539459888351, + "epoch": 0.13072227873855544, "grad_norm": 0.0, - "learning_rate": 1.9781476007338058e-05, - "loss": 1.1552, + "learning_rate": 1.947271587749711e-05, + "loss": 1.1461, "step": 3341 }, { - "epoch": 0.09470373204114596, + "epoch": 0.13076140543078488, "grad_norm": 0.0, - "learning_rate": 1.9781285147023953e-05, - "loss": 1.0166, + "learning_rate": 1.9472309740290324e-05, + "loss": 1.2243, "step": 3342 }, { - "epoch": 0.09473206948340843, + "epoch": 0.13080053212301432, "grad_norm": 0.0, - "learning_rate": 1.9781094204318455e-05, - "loss": 1.0925, + "learning_rate": 1.9471903450970116e-05, + "loss": 1.1277, "step": 3343 }, { - "epoch": 0.09476040692567089, + "epoch": 0.13083965881524376, "grad_norm": 0.0, - "learning_rate": 1.978090317922316e-05, - "loss": 1.106, + "learning_rate": 1.9471497009543005e-05, + "loss": 1.1697, "step": 3344 }, { - "epoch": 0.09478874436793334, + "epoch": 0.1308787855074732, "grad_norm": 0.0, - "learning_rate": 1.978071207173969e-05, - "loss": 0.9142, + "learning_rate": 1.9471090416015522e-05, + "loss": 1.1996, "step": 3345 }, { - "epoch": 0.09481708181019581, + "epoch": 0.13091791219970264, "grad_norm": 0.0, - "learning_rate": 1.9780520881869653e-05, - "loss": 1.0904, + "learning_rate": 1.9470683670394194e-05, + "loss": 1.1721, "step": 3346 }, { - "epoch": 0.09484541925245828, + "epoch": 0.13095703889193208, "grad_norm": 0.0, - "learning_rate": 1.9780329609614654e-05, - "loss": 1.1256, + "learning_rate": 1.9470276772685555e-05, + "loss": 1.2899, "step": 3347 }, { - "epoch": 0.09487375669472073, + "epoch": 0.13099616558416152, "grad_norm": 0.0, - "learning_rate": 1.978013825497631e-05, - "loss": 1.1051, + "learning_rate": 1.946986972289614e-05, + "loss": 1.1893, "step": 3348 }, { - "epoch": 0.0949020941369832, + "epoch": 0.13103529227639096, "grad_norm": 0.0, - "learning_rate": 1.9779946817956227e-05, - "loss": 1.1282, + "learning_rate": 1.946946252103248e-05, + "loss": 1.1683, "step": 3349 }, { - "epoch": 0.09493043157924566, + "epoch": 0.1310744189686204, "grad_norm": 0.0, - "learning_rate": 1.977975529855602e-05, - "loss": 1.0733, + "learning_rate": 1.9469055167101115e-05, + "loss": 1.1762, "step": 3350 }, { - "epoch": 0.09495876902150811, + "epoch": 0.13111354566084985, "grad_norm": 0.0, - "learning_rate": 1.9779563696777303e-05, - "loss": 1.0797, + "learning_rate": 1.9468647661108592e-05, + "loss": 1.1758, "step": 3351 }, { - "epoch": 0.09498710646377058, + "epoch": 0.13115267235307926, "grad_norm": 0.0, - "learning_rate": 1.9779372012621688e-05, - "loss": 1.0021, + "learning_rate": 1.9468240003061455e-05, + "loss": 1.2016, "step": 3352 }, { - "epoch": 0.09501544390603305, + "epoch": 0.1311917990453087, "grad_norm": 0.0, - "learning_rate": 1.9779180246090793e-05, - "loss": 1.2443, + "learning_rate": 1.9467832192966246e-05, + "loss": 1.206, "step": 3353 }, { - "epoch": 0.0950437813482955, + "epoch": 0.13123092573753814, "grad_norm": 0.0, - "learning_rate": 1.977898839718623e-05, - "loss": 1.1248, + "learning_rate": 1.9467424230829514e-05, + "loss": 1.3585, "step": 3354 }, { - "epoch": 0.09507211879055796, + "epoch": 0.13127005242976758, "grad_norm": 0.0, - "learning_rate": 1.977879646590962e-05, - "loss": 1.1069, + "learning_rate": 1.9467016116657818e-05, + "loss": 1.2476, "step": 3355 }, { - "epoch": 0.09510045623282043, + "epoch": 0.13130917912199702, "grad_norm": 0.0, - "learning_rate": 1.977860445226257e-05, - "loss": 1.0163, + "learning_rate": 1.94666078504577e-05, + "loss": 1.2427, "step": 3356 }, { - "epoch": 0.09512879367508288, + "epoch": 0.13134830581422646, "grad_norm": 0.0, - "learning_rate": 1.977841235624671e-05, - "loss": 1.1124, + "learning_rate": 1.9466199432235726e-05, + "loss": 1.2397, "step": 3357 }, { - "epoch": 0.09515713111734535, + "epoch": 0.1313874325064559, "grad_norm": 0.0, - "learning_rate": 1.977822017786365e-05, - "loss": 1.0461, + "learning_rate": 1.946579086199845e-05, + "loss": 1.2164, "step": 3358 }, { - "epoch": 0.09518546855960781, + "epoch": 0.13142655919868534, "grad_norm": 0.0, - "learning_rate": 1.9778027917115006e-05, - "loss": 0.9961, + "learning_rate": 1.9465382139752433e-05, + "loss": 1.3044, "step": 3359 }, { - "epoch": 0.09521380600187027, + "epoch": 0.13146568589091479, "grad_norm": 0.0, - "learning_rate": 1.9777835574002405e-05, - "loss": 1.1132, + "learning_rate": 1.9464973265504243e-05, + "loss": 1.1435, "step": 3360 }, { - "epoch": 0.09524214344413273, + "epoch": 0.13150481258314423, "grad_norm": 0.0, - "learning_rate": 1.9777643148527464e-05, - "loss": 1.1483, + "learning_rate": 1.9464564239260436e-05, + "loss": 1.2938, "step": 3361 }, { - "epoch": 0.0952704808863952, + "epoch": 0.13154393927537367, "grad_norm": 0.0, - "learning_rate": 1.97774506406918e-05, - "loss": 0.954, + "learning_rate": 1.946415506102759e-05, + "loss": 1.2368, "step": 3362 }, { - "epoch": 0.09529881832865765, + "epoch": 0.1315830659676031, "grad_norm": 0.0, - "learning_rate": 1.9777258050497044e-05, - "loss": 1.0103, + "learning_rate": 1.9463745730812276e-05, + "loss": 1.1584, "step": 3363 }, { - "epoch": 0.09532715577092012, + "epoch": 0.13162219265983255, "grad_norm": 0.0, - "learning_rate": 1.9777065377944812e-05, - "loss": 1.0309, + "learning_rate": 1.9463336248621062e-05, + "loss": 1.1682, "step": 3364 }, { - "epoch": 0.09535549321318258, + "epoch": 0.131661319352062, "grad_norm": 0.0, - "learning_rate": 1.9776872623036725e-05, - "loss": 1.0269, + "learning_rate": 1.9462926614460527e-05, + "loss": 1.1924, "step": 3365 }, { - "epoch": 0.09538383065544503, + "epoch": 0.1317004460442914, "grad_norm": 0.0, - "learning_rate": 1.9776679785774412e-05, - "loss": 1.0365, + "learning_rate": 1.9462516828337245e-05, + "loss": 1.1755, "step": 3366 }, { - "epoch": 0.0954121680977075, + "epoch": 0.13173957273652084, "grad_norm": 0.0, - "learning_rate": 1.977648686615949e-05, - "loss": 0.9487, + "learning_rate": 1.9462106890257805e-05, + "loss": 1.4261, "step": 3367 }, { - "epoch": 0.09544050553996997, + "epoch": 0.13177869942875028, "grad_norm": 0.0, - "learning_rate": 1.9776293864193594e-05, - "loss": 1.053, + "learning_rate": 1.9461696800228783e-05, + "loss": 1.2324, "step": 3368 }, { - "epoch": 0.09546884298223242, + "epoch": 0.13181782612097973, "grad_norm": 0.0, - "learning_rate": 1.9776100779878344e-05, - "loss": 1.0973, + "learning_rate": 1.9461286558256764e-05, + "loss": 1.1129, "step": 3369 }, { - "epoch": 0.09549718042449488, + "epoch": 0.13185695281320917, "grad_norm": 0.0, - "learning_rate": 1.9775907613215364e-05, - "loss": 1.111, + "learning_rate": 1.9460876164348342e-05, + "loss": 1.1345, "step": 3370 }, { - "epoch": 0.09552551786675735, + "epoch": 0.1318960795054386, "grad_norm": 0.0, - "learning_rate": 1.9775714364206288e-05, - "loss": 1.0477, + "learning_rate": 1.9460465618510104e-05, + "loss": 1.3838, "step": 3371 }, { - "epoch": 0.0955538553090198, + "epoch": 0.13193520619766805, "grad_norm": 0.0, - "learning_rate": 1.9775521032852737e-05, - "loss": 1.1122, + "learning_rate": 1.946005492074864e-05, + "loss": 1.2642, "step": 3372 }, { - "epoch": 0.09558219275128227, + "epoch": 0.1319743328898975, "grad_norm": 0.0, - "learning_rate": 1.977532761915634e-05, - "loss": 1.144, + "learning_rate": 1.945964407107055e-05, + "loss": 1.1796, "step": 3373 }, { - "epoch": 0.09561053019354473, + "epoch": 0.13201345958212693, "grad_norm": 0.0, - "learning_rate": 1.9775134123118732e-05, - "loss": 1.0691, + "learning_rate": 1.945923306948243e-05, + "loss": 1.1628, "step": 3374 }, { - "epoch": 0.09563886763580719, + "epoch": 0.13205258627435637, "grad_norm": 0.0, - "learning_rate": 1.9774940544741537e-05, - "loss": 1.0052, + "learning_rate": 1.9458821915990877e-05, + "loss": 1.2267, "step": 3375 }, { - "epoch": 0.09566720507806965, + "epoch": 0.1320917129665858, "grad_norm": 0.0, - "learning_rate": 1.977474688402639e-05, - "loss": 1.1351, + "learning_rate": 1.94584106106025e-05, + "loss": 1.2283, "step": 3376 }, { - "epoch": 0.09569554252033212, + "epoch": 0.13213083965881525, "grad_norm": 0.0, - "learning_rate": 1.9774553140974922e-05, - "loss": 1.0968, + "learning_rate": 1.94579991533239e-05, + "loss": 1.244, "step": 3377 }, { - "epoch": 0.09572387996259457, + "epoch": 0.1321699663510447, "grad_norm": 0.0, - "learning_rate": 1.9774359315588758e-05, - "loss": 1.0869, + "learning_rate": 1.9457587544161686e-05, + "loss": 1.1121, "step": 3378 }, { - "epoch": 0.09575221740485704, + "epoch": 0.13220909304327413, "grad_norm": 0.0, - "learning_rate": 1.9774165407869538e-05, - "loss": 1.1705, + "learning_rate": 1.9457175783122464e-05, + "loss": 1.2671, "step": 3379 }, { - "epoch": 0.0957805548471195, + "epoch": 0.13224821973550355, "grad_norm": 0.0, - "learning_rate": 1.9773971417818894e-05, - "loss": 1.0578, + "learning_rate": 1.9456763870212853e-05, + "loss": 1.2242, "step": 3380 }, { - "epoch": 0.09580889228938196, + "epoch": 0.132287346427733, "grad_norm": 0.0, - "learning_rate": 1.977377734543846e-05, - "loss": 1.0942, + "learning_rate": 1.945635180543946e-05, + "loss": 1.1901, "step": 3381 }, { - "epoch": 0.09583722973164442, + "epoch": 0.13232647311996243, "grad_norm": 0.0, - "learning_rate": 1.977358319072987e-05, - "loss": 1.1325, + "learning_rate": 1.945593958880891e-05, + "loss": 1.2449, "step": 3382 }, { - "epoch": 0.09586556717390689, + "epoch": 0.13236559981219187, "grad_norm": 0.0, - "learning_rate": 1.9773388953694758e-05, - "loss": 1.1034, + "learning_rate": 1.9455527220327816e-05, + "loss": 1.295, "step": 3383 }, { - "epoch": 0.09589390461616934, + "epoch": 0.1324047265044213, "grad_norm": 0.0, - "learning_rate": 1.9773194634334764e-05, - "loss": 0.907, + "learning_rate": 1.9455114700002808e-05, + "loss": 1.0955, "step": 3384 }, { - "epoch": 0.0959222420584318, + "epoch": 0.13244385319665075, "grad_norm": 0.0, - "learning_rate": 1.977300023265152e-05, - "loss": 1.077, + "learning_rate": 1.9454702027840503e-05, + "loss": 1.2501, "step": 3385 }, { - "epoch": 0.09595057950069427, + "epoch": 0.1324829798888802, "grad_norm": 0.0, - "learning_rate": 1.9772805748646667e-05, - "loss": 1.2027, + "learning_rate": 1.945428920384753e-05, + "loss": 1.2301, "step": 3386 }, { - "epoch": 0.09597891694295672, + "epoch": 0.13252210658110963, "grad_norm": 0.0, - "learning_rate": 1.977261118232184e-05, - "loss": 1.0629, + "learning_rate": 1.945387622803052e-05, + "loss": 1.1758, "step": 3387 }, { - "epoch": 0.09600725438521919, + "epoch": 0.13256123327333907, "grad_norm": 0.0, - "learning_rate": 1.9772416533678683e-05, - "loss": 1.0634, + "learning_rate": 1.9453463100396103e-05, + "loss": 1.169, "step": 3388 }, { - "epoch": 0.09603559182748166, + "epoch": 0.13260035996556852, "grad_norm": 0.0, - "learning_rate": 1.977222180271883e-05, - "loss": 0.9976, + "learning_rate": 1.9453049820950918e-05, + "loss": 1.2303, "step": 3389 }, { - "epoch": 0.09606392926974411, + "epoch": 0.13263948665779796, "grad_norm": 0.0, - "learning_rate": 1.977202698944393e-05, - "loss": 1.0624, + "learning_rate": 1.9452636389701593e-05, + "loss": 1.1628, "step": 3390 }, { - "epoch": 0.09609226671200657, + "epoch": 0.1326786133500274, "grad_norm": 0.0, - "learning_rate": 1.977183209385561e-05, - "loss": 1.0057, + "learning_rate": 1.9452222806654778e-05, + "loss": 1.149, "step": 3391 }, { - "epoch": 0.09612060415426904, + "epoch": 0.13271774004225684, "grad_norm": 0.0, - "learning_rate": 1.9771637115955524e-05, - "loss": 1.0427, + "learning_rate": 1.9451809071817105e-05, + "loss": 1.3072, "step": 3392 }, { - "epoch": 0.09614894159653149, + "epoch": 0.13275686673448628, "grad_norm": 0.0, - "learning_rate": 1.977144205574531e-05, - "loss": 1.1241, + "learning_rate": 1.9451395185195224e-05, + "loss": 1.1671, "step": 3393 }, { - "epoch": 0.09617727903879396, + "epoch": 0.1327959934267157, "grad_norm": 0.0, - "learning_rate": 1.977124691322661e-05, - "loss": 1.1173, + "learning_rate": 1.945098114679578e-05, + "loss": 1.3302, "step": 3394 }, { - "epoch": 0.09620561648105642, + "epoch": 0.13283512011894513, "grad_norm": 0.0, - "learning_rate": 1.9771051688401065e-05, - "loss": 1.0773, + "learning_rate": 1.945056695662542e-05, + "loss": 1.1697, "step": 3395 }, { - "epoch": 0.09623395392331888, + "epoch": 0.13287424681117457, "grad_norm": 0.0, - "learning_rate": 1.977085638127033e-05, - "loss": 0.9946, + "learning_rate": 1.9450152614690798e-05, + "loss": 1.1204, "step": 3396 }, { - "epoch": 0.09626229136558134, + "epoch": 0.13291337350340401, "grad_norm": 0.0, - "learning_rate": 1.977066099183604e-05, - "loss": 1.0383, + "learning_rate": 1.9449738120998563e-05, + "loss": 1.2796, "step": 3397 }, { - "epoch": 0.09629062880784381, + "epoch": 0.13295250019563346, "grad_norm": 0.0, - "learning_rate": 1.9770465520099843e-05, - "loss": 1.2223, + "learning_rate": 1.9449323475555383e-05, + "loss": 1.2104, "step": 3398 }, { - "epoch": 0.09631896625010626, + "epoch": 0.1329916268878629, "grad_norm": 0.0, - "learning_rate": 1.977026996606339e-05, - "loss": 1.0887, + "learning_rate": 1.9448908678367903e-05, + "loss": 1.1573, "step": 3399 }, { - "epoch": 0.09634730369236873, + "epoch": 0.13303075358009234, "grad_norm": 0.0, - "learning_rate": 1.977007432972832e-05, - "loss": 1.0717, + "learning_rate": 1.944849372944279e-05, + "loss": 1.2334, "step": 3400 }, { - "epoch": 0.0963756411346312, + "epoch": 0.13306988027232178, "grad_norm": 0.0, - "learning_rate": 1.976987861109629e-05, - "loss": 0.9318, + "learning_rate": 1.944807862878671e-05, + "loss": 1.2523, "step": 3401 }, { - "epoch": 0.09640397857689365, + "epoch": 0.13310900696455122, "grad_norm": 0.0, - "learning_rate": 1.9769682810168944e-05, - "loss": 0.9876, + "learning_rate": 1.9447663376406323e-05, + "loss": 1.2692, "step": 3402 }, { - "epoch": 0.09643231601915611, + "epoch": 0.13314813365678066, "grad_norm": 0.0, - "learning_rate": 1.976948692694793e-05, - "loss": 1.0123, + "learning_rate": 1.9447247972308305e-05, + "loss": 1.2343, "step": 3403 }, { - "epoch": 0.09646065346141858, + "epoch": 0.1331872603490101, "grad_norm": 0.0, - "learning_rate": 1.9769290961434904e-05, - "loss": 1.1154, + "learning_rate": 1.9446832416499316e-05, + "loss": 1.2901, "step": 3404 }, { - "epoch": 0.09648899090368103, + "epoch": 0.13322638704123954, "grad_norm": 0.0, - "learning_rate": 1.976909491363151e-05, - "loss": 1.0998, + "learning_rate": 1.944641670898604e-05, + "loss": 1.1995, "step": 3405 }, { - "epoch": 0.0965173283459435, + "epoch": 0.13326551373346898, "grad_norm": 0.0, - "learning_rate": 1.97688987835394e-05, - "loss": 1.0666, + "learning_rate": 1.944600084977515e-05, + "loss": 1.2014, "step": 3406 }, { - "epoch": 0.09654566578820596, + "epoch": 0.13330464042569842, "grad_norm": 0.0, - "learning_rate": 1.976870257116023e-05, - "loss": 1.0893, + "learning_rate": 1.9445584838873318e-05, + "loss": 1.0504, "step": 3407 }, { - "epoch": 0.09657400323046841, + "epoch": 0.13334376711792786, "grad_norm": 0.0, - "learning_rate": 1.9768506276495652e-05, - "loss": 1.114, + "learning_rate": 1.9445168676287233e-05, + "loss": 1.2958, "step": 3408 }, { - "epoch": 0.09660234067273088, + "epoch": 0.13338289381015728, "grad_norm": 0.0, - "learning_rate": 1.9768309899547313e-05, - "loss": 1.1288, + "learning_rate": 1.9444752362023575e-05, + "loss": 1.0613, "step": 3409 }, { - "epoch": 0.09663067811499335, + "epoch": 0.13342202050238672, "grad_norm": 0.0, - "learning_rate": 1.9768113440316878e-05, - "loss": 1.0777, + "learning_rate": 1.944433589608903e-05, + "loss": 1.2456, "step": 3410 }, { - "epoch": 0.0966590155572558, + "epoch": 0.13346114719461616, "grad_norm": 0.0, - "learning_rate": 1.9767916898805994e-05, - "loss": 1.0658, + "learning_rate": 1.9443919278490278e-05, + "loss": 1.113, "step": 3411 }, { - "epoch": 0.09668735299951826, + "epoch": 0.1335002738868456, "grad_norm": 0.0, - "learning_rate": 1.976772027501632e-05, - "loss": 1.0935, + "learning_rate": 1.9443502509234026e-05, + "loss": 1.2078, "step": 3412 }, { - "epoch": 0.09671569044178073, + "epoch": 0.13353940057907504, "grad_norm": 0.0, - "learning_rate": 1.9767523568949506e-05, - "loss": 1.116, + "learning_rate": 1.944308558832695e-05, + "loss": 1.2049, "step": 3413 }, { - "epoch": 0.09674402788404318, + "epoch": 0.13357852727130448, "grad_norm": 0.0, - "learning_rate": 1.9767326780607218e-05, - "loss": 0.9515, + "learning_rate": 1.9442668515775755e-05, + "loss": 1.188, "step": 3414 }, { - "epoch": 0.09677236532630565, + "epoch": 0.13361765396353392, "grad_norm": 0.0, - "learning_rate": 1.976712990999111e-05, - "loss": 0.9176, + "learning_rate": 1.9442251291587136e-05, + "loss": 1.3024, "step": 3415 }, { - "epoch": 0.09680070276856811, + "epoch": 0.13365678065576336, "grad_norm": 0.0, - "learning_rate": 1.976693295710284e-05, - "loss": 1.0984, + "learning_rate": 1.9441833915767795e-05, + "loss": 1.186, "step": 3416 }, { - "epoch": 0.09682904021083057, + "epoch": 0.1336959073479928, "grad_norm": 0.0, - "learning_rate": 1.9766735921944064e-05, - "loss": 1.0618, + "learning_rate": 1.9441416388324427e-05, + "loss": 1.3642, "step": 3417 }, { - "epoch": 0.09685737765309303, + "epoch": 0.13373503404022224, "grad_norm": 0.0, - "learning_rate": 1.976653880451645e-05, - "loss": 1.125, + "learning_rate": 1.9440998709263747e-05, + "loss": 1.222, "step": 3418 }, { - "epoch": 0.0968857150953555, + "epoch": 0.13377416073245169, "grad_norm": 0.0, - "learning_rate": 1.9766341604821646e-05, - "loss": 0.9579, + "learning_rate": 1.944058087859246e-05, + "loss": 1.113, "step": 3419 }, { - "epoch": 0.09691405253761795, + "epoch": 0.13381328742468113, "grad_norm": 0.0, - "learning_rate": 1.9766144322861323e-05, - "loss": 1.093, + "learning_rate": 1.9440162896317268e-05, + "loss": 1.0491, "step": 3420 }, { - "epoch": 0.09694238997988042, + "epoch": 0.13385241411691057, "grad_norm": 0.0, - "learning_rate": 1.976594695863714e-05, - "loss": 1.109, + "learning_rate": 1.9439744762444893e-05, + "loss": 1.2544, "step": 3421 }, { - "epoch": 0.09697072742214288, + "epoch": 0.13389154080914, "grad_norm": 0.0, - "learning_rate": 1.976574951215076e-05, - "loss": 1.017, + "learning_rate": 1.9439326476982044e-05, + "loss": 1.321, "step": 3422 }, { - "epoch": 0.09699906486440533, + "epoch": 0.13393066750136942, "grad_norm": 0.0, - "learning_rate": 1.9765551983403844e-05, - "loss": 0.965, + "learning_rate": 1.943890803993544e-05, + "loss": 1.2798, "step": 3423 }, { - "epoch": 0.0970274023066678, + "epoch": 0.13396979419359886, "grad_norm": 0.0, - "learning_rate": 1.976535437239806e-05, - "loss": 1.0113, + "learning_rate": 1.9438489451311802e-05, + "loss": 1.164, "step": 3424 }, { - "epoch": 0.09705573974893027, + "epoch": 0.1340089208858283, "grad_norm": 0.0, - "learning_rate": 1.9765156679135067e-05, - "loss": 1.1984, + "learning_rate": 1.9438070711117848e-05, + "loss": 1.1968, "step": 3425 }, { - "epoch": 0.09708407719119272, + "epoch": 0.13404804757805774, "grad_norm": 0.0, - "learning_rate": 1.9764958903616533e-05, - "loss": 1.0045, + "learning_rate": 1.9437651819360308e-05, + "loss": 1.1601, "step": 3426 }, { - "epoch": 0.09711241463345519, + "epoch": 0.13408717427028718, "grad_norm": 0.0, - "learning_rate": 1.9764761045844128e-05, - "loss": 0.9674, + "learning_rate": 1.9437232776045903e-05, + "loss": 1.0734, "step": 3427 }, { - "epoch": 0.09714075207571765, + "epoch": 0.13412630096251663, "grad_norm": 0.0, - "learning_rate": 1.976456310581951e-05, - "loss": 0.9355, + "learning_rate": 1.9436813581181366e-05, + "loss": 1.2102, "step": 3428 }, { - "epoch": 0.0971690895179801, + "epoch": 0.13416542765474607, "grad_norm": 0.0, - "learning_rate": 1.976436508354435e-05, - "loss": 1.1005, + "learning_rate": 1.943639423477343e-05, + "loss": 1.2148, "step": 3429 }, { - "epoch": 0.09719742696024257, + "epoch": 0.1342045543469755, "grad_norm": 0.0, - "learning_rate": 1.976416697902032e-05, - "loss": 1.1038, + "learning_rate": 1.9435974736828825e-05, + "loss": 1.2086, "step": 3430 }, { - "epoch": 0.09722576440250504, + "epoch": 0.13424368103920495, "grad_norm": 0.0, - "learning_rate": 1.9763968792249087e-05, - "loss": 1.1368, + "learning_rate": 1.943555508735429e-05, + "loss": 1.2175, "step": 3431 }, { - "epoch": 0.09725410184476749, + "epoch": 0.1342828077314344, "grad_norm": 0.0, - "learning_rate": 1.976377052323232e-05, - "loss": 1.1147, + "learning_rate": 1.9435135286356563e-05, + "loss": 1.2202, "step": 3432 }, { - "epoch": 0.09728243928702995, + "epoch": 0.13432193442366383, "grad_norm": 0.0, - "learning_rate": 1.9763572171971685e-05, - "loss": 1.1428, + "learning_rate": 1.9434715333842383e-05, + "loss": 1.3006, "step": 3433 }, { - "epoch": 0.09731077672929242, + "epoch": 0.13436106111589327, "grad_norm": 0.0, - "learning_rate": 1.9763373738468857e-05, - "loss": 1.0717, + "learning_rate": 1.9434295229818505e-05, + "loss": 1.0957, "step": 3434 }, { - "epoch": 0.09733911417155487, + "epoch": 0.1344001878081227, "grad_norm": 0.0, - "learning_rate": 1.9763175222725507e-05, - "loss": 1.066, + "learning_rate": 1.943387497429166e-05, + "loss": 1.1854, "step": 3435 }, { - "epoch": 0.09736745161381734, + "epoch": 0.13443931450035215, "grad_norm": 0.0, - "learning_rate": 1.9762976624743304e-05, - "loss": 1.1079, + "learning_rate": 1.9433454567268607e-05, + "loss": 1.226, "step": 3436 }, { - "epoch": 0.0973957890560798, + "epoch": 0.13447844119258157, "grad_norm": 0.0, - "learning_rate": 1.9762777944523926e-05, - "loss": 1.0825, + "learning_rate": 1.9433034008756096e-05, + "loss": 1.3373, "step": 3437 }, { - "epoch": 0.09742412649834226, + "epoch": 0.134517567884811, "grad_norm": 0.0, - "learning_rate": 1.9762579182069043e-05, - "loss": 1.0645, + "learning_rate": 1.943261329876088e-05, + "loss": 1.2419, "step": 3438 }, { - "epoch": 0.09745246394060472, + "epoch": 0.13455669457704045, "grad_norm": 0.0, - "learning_rate": 1.976238033738033e-05, - "loss": 1.1491, + "learning_rate": 1.9432192437289712e-05, + "loss": 1.1807, "step": 3439 }, { - "epoch": 0.09748080138286719, + "epoch": 0.1345958212692699, "grad_norm": 0.0, - "learning_rate": 1.976218141045946e-05, - "loss": 0.9743, + "learning_rate": 1.9431771424349354e-05, + "loss": 1.1665, "step": 3440 }, { - "epoch": 0.09750913882512964, + "epoch": 0.13463494796149933, "grad_norm": 0.0, - "learning_rate": 1.9761982401308116e-05, - "loss": 1.0506, + "learning_rate": 1.9431350259946563e-05, + "loss": 1.1431, "step": 3441 }, { - "epoch": 0.0975374762673921, + "epoch": 0.13467407465372877, "grad_norm": 0.0, - "learning_rate": 1.9761783309927968e-05, - "loss": 1.0476, + "learning_rate": 1.9430928944088107e-05, + "loss": 1.0773, "step": 3442 }, { - "epoch": 0.09756581370965457, + "epoch": 0.1347132013459582, "grad_norm": 0.0, - "learning_rate": 1.976158413632069e-05, - "loss": 1.1582, + "learning_rate": 1.943050747678075e-05, + "loss": 1.2581, "step": 3443 }, { - "epoch": 0.09759415115191702, + "epoch": 0.13475232803818765, "grad_norm": 0.0, - "learning_rate": 1.9761384880487967e-05, - "loss": 1.0475, + "learning_rate": 1.9430085858031258e-05, + "loss": 1.3192, "step": 3444 }, { - "epoch": 0.09762248859417949, + "epoch": 0.1347914547304171, "grad_norm": 0.0, - "learning_rate": 1.9761185542431475e-05, - "loss": 1.0494, + "learning_rate": 1.9429664087846407e-05, + "loss": 1.2042, "step": 3445 }, { - "epoch": 0.09765082603644196, + "epoch": 0.13483058142264653, "grad_norm": 0.0, - "learning_rate": 1.9760986122152896e-05, - "loss": 1.0636, + "learning_rate": 1.9429242166232966e-05, + "loss": 1.0297, "step": 3446 }, { - "epoch": 0.09767916347870441, + "epoch": 0.13486970811487597, "grad_norm": 0.0, - "learning_rate": 1.97607866196539e-05, - "loss": 1.11, + "learning_rate": 1.9428820093197708e-05, + "loss": 1.1585, "step": 3447 }, { - "epoch": 0.09770750092096687, + "epoch": 0.13490883480710542, "grad_norm": 0.0, - "learning_rate": 1.9760587034936175e-05, - "loss": 1.1228, + "learning_rate": 1.9428397868747416e-05, + "loss": 1.2093, "step": 3448 }, { - "epoch": 0.09773583836322934, + "epoch": 0.13494796149933486, "grad_norm": 0.0, - "learning_rate": 1.97603873680014e-05, - "loss": 1.1406, + "learning_rate": 1.9427975492888868e-05, + "loss": 1.3597, "step": 3449 }, { - "epoch": 0.0977641758054918, + "epoch": 0.1349870881915643, "grad_norm": 0.0, - "learning_rate": 1.9760187618851262e-05, - "loss": 1.1306, + "learning_rate": 1.9427552965628848e-05, + "loss": 1.1675, "step": 3450 }, { - "epoch": 0.09779251324775426, + "epoch": 0.1350262148837937, "grad_norm": 0.0, - "learning_rate": 1.9759987787487437e-05, - "loss": 1.1852, + "learning_rate": 1.9427130286974144e-05, + "loss": 1.1821, "step": 3451 }, { - "epoch": 0.09782085069001673, + "epoch": 0.13506534157602315, "grad_norm": 0.0, - "learning_rate": 1.9759787873911608e-05, - "loss": 0.9895, + "learning_rate": 1.9426707456931534e-05, + "loss": 1.2452, "step": 3452 }, { - "epoch": 0.09784918813227918, + "epoch": 0.1351044682682526, "grad_norm": 0.0, - "learning_rate": 1.9759587878125468e-05, - "loss": 1.1215, + "learning_rate": 1.942628447550782e-05, + "loss": 1.2345, "step": 3453 }, { - "epoch": 0.09787752557454164, + "epoch": 0.13514359496048203, "grad_norm": 0.0, - "learning_rate": 1.975938780013069e-05, - "loss": 1.0945, + "learning_rate": 1.9425861342709788e-05, + "loss": 1.2383, "step": 3454 }, { - "epoch": 0.09790586301680411, + "epoch": 0.13518272165271147, "grad_norm": 0.0, - "learning_rate": 1.9759187639928967e-05, - "loss": 1.166, + "learning_rate": 1.9425438058544233e-05, + "loss": 1.1517, "step": 3455 }, { - "epoch": 0.09793420045906656, + "epoch": 0.13522184834494091, "grad_norm": 0.0, - "learning_rate": 1.9758987397521977e-05, - "loss": 1.0244, + "learning_rate": 1.9425014623017953e-05, + "loss": 1.2261, "step": 3456 }, { - "epoch": 0.09796253790132903, + "epoch": 0.13526097503717036, "grad_norm": 0.0, - "learning_rate": 1.975878707291142e-05, - "loss": 1.0898, + "learning_rate": 1.942459103613775e-05, + "loss": 1.3705, "step": 3457 }, { - "epoch": 0.0979908753435915, + "epoch": 0.1353001017293998, "grad_norm": 0.0, - "learning_rate": 1.975858666609897e-05, - "loss": 1.061, + "learning_rate": 1.9424167297910425e-05, + "loss": 1.2511, "step": 3458 }, { - "epoch": 0.09801921278585395, + "epoch": 0.13533922842162924, "grad_norm": 0.0, - "learning_rate": 1.9758386177086324e-05, - "loss": 1.0441, + "learning_rate": 1.942374340834278e-05, + "loss": 1.2046, "step": 3459 }, { - "epoch": 0.09804755022811641, + "epoch": 0.13537835511385868, "grad_norm": 0.0, - "learning_rate": 1.9758185605875165e-05, - "loss": 1.1163, + "learning_rate": 1.9423319367441625e-05, + "loss": 1.158, "step": 3460 }, { - "epoch": 0.09807588767037888, + "epoch": 0.13541748180608812, "grad_norm": 0.0, - "learning_rate": 1.9757984952467186e-05, - "loss": 1.1481, + "learning_rate": 1.9422895175213772e-05, + "loss": 1.2061, "step": 3461 }, { - "epoch": 0.09810422511264133, + "epoch": 0.13545660849831756, "grad_norm": 0.0, - "learning_rate": 1.975778421686408e-05, - "loss": 1.0272, + "learning_rate": 1.942247083166603e-05, + "loss": 1.1968, "step": 3462 }, { - "epoch": 0.0981325625549038, + "epoch": 0.135495735190547, "grad_norm": 0.0, - "learning_rate": 1.975758339906753e-05, - "loss": 1.0663, + "learning_rate": 1.9422046336805207e-05, + "loss": 1.2328, "step": 3463 }, { - "epoch": 0.09816089999716626, + "epoch": 0.13553486188277644, "grad_norm": 0.0, - "learning_rate": 1.975738249907923e-05, - "loss": 1.101, + "learning_rate": 1.942162169063813e-05, + "loss": 1.1639, "step": 3464 }, { - "epoch": 0.09818923743942871, + "epoch": 0.13557398857500588, "grad_norm": 0.0, - "learning_rate": 1.975718151690088e-05, - "loss": 1.0046, + "learning_rate": 1.9421196893171617e-05, + "loss": 1.1855, "step": 3465 }, { - "epoch": 0.09821757488169118, + "epoch": 0.1356131152672353, "grad_norm": 0.0, - "learning_rate": 1.975698045253416e-05, - "loss": 1.1042, + "learning_rate": 1.9420771944412486e-05, + "loss": 1.3157, "step": 3466 }, { - "epoch": 0.09824591232395365, + "epoch": 0.13565224195946474, "grad_norm": 0.0, - "learning_rate": 1.9756779305980775e-05, - "loss": 1.0837, + "learning_rate": 1.9420346844367562e-05, + "loss": 1.2004, "step": 3467 }, { - "epoch": 0.0982742497662161, + "epoch": 0.13569136865169418, "grad_norm": 0.0, - "learning_rate": 1.975657807724241e-05, - "loss": 0.9861, + "learning_rate": 1.941992159304367e-05, + "loss": 1.187, "step": 3468 }, { - "epoch": 0.09830258720847856, + "epoch": 0.13573049534392362, "grad_norm": 0.0, - "learning_rate": 1.975637676632077e-05, - "loss": 1.0969, + "learning_rate": 1.9419496190447645e-05, + "loss": 1.174, "step": 3469 }, { - "epoch": 0.09833092465074103, + "epoch": 0.13576962203615306, "grad_norm": 0.0, - "learning_rate": 1.9756175373217547e-05, - "loss": 1.0774, + "learning_rate": 1.941907063658631e-05, + "loss": 1.1549, "step": 3470 }, { - "epoch": 0.09835926209300348, + "epoch": 0.1358087487283825, "grad_norm": 0.0, - "learning_rate": 1.975597389793443e-05, - "loss": 1.077, + "learning_rate": 1.9418644931466507e-05, + "loss": 1.1732, "step": 3471 }, { - "epoch": 0.09838759953526595, + "epoch": 0.13584787542061194, "grad_norm": 0.0, - "learning_rate": 1.9755772340473124e-05, - "loss": 1.0545, + "learning_rate": 1.941821907509507e-05, + "loss": 1.1872, "step": 3472 }, { - "epoch": 0.09841593697752841, + "epoch": 0.13588700211284138, "grad_norm": 0.0, - "learning_rate": 1.9755570700835327e-05, - "loss": 1.132, + "learning_rate": 1.9417793067478832e-05, + "loss": 1.3188, "step": 3473 }, { - "epoch": 0.09844427441979087, + "epoch": 0.13592612880507082, "grad_norm": 0.0, - "learning_rate": 1.9755368979022734e-05, - "loss": 1.0358, + "learning_rate": 1.9417366908624638e-05, + "loss": 1.1635, "step": 3474 }, { - "epoch": 0.09847261186205333, + "epoch": 0.13596525549730026, "grad_norm": 0.0, - "learning_rate": 1.9755167175037044e-05, - "loss": 1.0256, + "learning_rate": 1.9416940598539335e-05, + "loss": 1.1415, "step": 3475 }, { - "epoch": 0.0985009493043158, + "epoch": 0.1360043821895297, "grad_norm": 0.0, - "learning_rate": 1.975496528887996e-05, - "loss": 1.1766, + "learning_rate": 1.9416514137229767e-05, + "loss": 1.0001, "step": 3476 }, { - "epoch": 0.09852928674657825, + "epoch": 0.13604350888175915, "grad_norm": 0.0, - "learning_rate": 1.975476332055318e-05, - "loss": 1.1709, + "learning_rate": 1.941608752470278e-05, + "loss": 1.195, "step": 3477 }, { - "epoch": 0.09855762418884072, + "epoch": 0.1360826355739886, "grad_norm": 0.0, - "learning_rate": 1.975456127005841e-05, - "loss": 1.0787, + "learning_rate": 1.9415660760965223e-05, + "loss": 1.2423, "step": 3478 }, { - "epoch": 0.09858596163110318, + "epoch": 0.13612176226621803, "grad_norm": 0.0, - "learning_rate": 1.9754359137397343e-05, - "loss": 1.0323, + "learning_rate": 1.941523384602396e-05, + "loss": 1.2302, "step": 3479 }, { - "epoch": 0.09861429907336564, + "epoch": 0.13616088895844744, "grad_norm": 0.0, - "learning_rate": 1.975415692257169e-05, - "loss": 1.0739, + "learning_rate": 1.9414806779885836e-05, + "loss": 1.2225, "step": 3480 }, { - "epoch": 0.0986426365156281, + "epoch": 0.13620001565067688, "grad_norm": 0.0, - "learning_rate": 1.975395462558315e-05, - "loss": 1.101, + "learning_rate": 1.941437956255771e-05, + "loss": 1.1946, "step": 3481 }, { - "epoch": 0.09867097395789057, + "epoch": 0.13623914234290632, "grad_norm": 0.0, - "learning_rate": 1.9753752246433427e-05, - "loss": 1.1934, + "learning_rate": 1.941395219404645e-05, + "loss": 1.1821, "step": 3482 }, { - "epoch": 0.09869931140015302, + "epoch": 0.13627826903513576, "grad_norm": 0.0, - "learning_rate": 1.975354978512423e-05, - "loss": 1.1924, + "learning_rate": 1.9413524674358907e-05, + "loss": 1.1967, "step": 3483 }, { - "epoch": 0.09872764884241549, + "epoch": 0.1363173957273652, "grad_norm": 0.0, - "learning_rate": 1.9753347241657258e-05, - "loss": 1.2427, + "learning_rate": 1.941309700350196e-05, + "loss": 1.132, "step": 3484 }, { - "epoch": 0.09875598628467795, + "epoch": 0.13635652241959464, "grad_norm": 0.0, - "learning_rate": 1.9753144616034224e-05, - "loss": 1.0785, + "learning_rate": 1.9412669181482467e-05, + "loss": 1.1053, "step": 3485 }, { - "epoch": 0.0987843237269404, + "epoch": 0.13639564911182409, "grad_norm": 0.0, - "learning_rate": 1.9752941908256828e-05, - "loss": 1.0203, + "learning_rate": 1.94122412083073e-05, + "loss": 1.2185, "step": 3486 }, { - "epoch": 0.09881266116920287, + "epoch": 0.13643477580405353, "grad_norm": 0.0, - "learning_rate": 1.975273911832678e-05, - "loss": 0.9946, + "learning_rate": 1.941181308398334e-05, + "loss": 1.1915, "step": 3487 }, { - "epoch": 0.09884099861146534, + "epoch": 0.13647390249628297, "grad_norm": 0.0, - "learning_rate": 1.9752536246245793e-05, - "loss": 1.1077, + "learning_rate": 1.941138480851745e-05, + "loss": 1.1175, "step": 3488 }, { - "epoch": 0.09886933605372779, + "epoch": 0.1365130291885124, "grad_norm": 0.0, - "learning_rate": 1.9752333292015565e-05, - "loss": 1.0039, + "learning_rate": 1.9410956381916514e-05, + "loss": 1.2513, "step": 3489 }, { - "epoch": 0.09889767349599025, + "epoch": 0.13655215588074185, "grad_norm": 0.0, - "learning_rate": 1.975213025563782e-05, - "loss": 0.9673, + "learning_rate": 1.9410527804187412e-05, + "loss": 1.2429, "step": 3490 }, { - "epoch": 0.0989260109382527, + "epoch": 0.1365912825729713, "grad_norm": 0.0, - "learning_rate": 1.9751927137114255e-05, - "loss": 1.0639, + "learning_rate": 1.9410099075337028e-05, + "loss": 1.1631, "step": 3491 }, { - "epoch": 0.09895434838051517, + "epoch": 0.13663040926520073, "grad_norm": 0.0, - "learning_rate": 1.975172393644659e-05, - "loss": 0.9836, + "learning_rate": 1.940967019537224e-05, + "loss": 1.038, "step": 3492 }, { - "epoch": 0.09898268582277764, + "epoch": 0.13666953595743017, "grad_norm": 0.0, - "learning_rate": 1.9751520653636528e-05, - "loss": 1.0858, + "learning_rate": 1.9409241164299942e-05, + "loss": 1.1463, "step": 3493 }, { - "epoch": 0.09901102326504009, + "epoch": 0.13670866264965958, "grad_norm": 0.0, - "learning_rate": 1.975131728868579e-05, - "loss": 1.0818, + "learning_rate": 1.940881198212702e-05, + "loss": 1.2415, "step": 3494 }, { - "epoch": 0.09903936070730256, + "epoch": 0.13674778934188903, "grad_norm": 0.0, - "learning_rate": 1.9751113841596087e-05, - "loss": 1.034, + "learning_rate": 1.940838264886037e-05, + "loss": 1.2462, "step": 3495 }, { - "epoch": 0.09906769814956502, + "epoch": 0.13678691603411847, "grad_norm": 0.0, - "learning_rate": 1.975091031236913e-05, - "loss": 1.0846, + "learning_rate": 1.940795316450688e-05, + "loss": 1.3203, "step": 3496 }, { - "epoch": 0.09909603559182747, + "epoch": 0.1368260427263479, "grad_norm": 0.0, - "learning_rate": 1.9750706701006632e-05, - "loss": 1.0581, + "learning_rate": 1.9407523529073455e-05, + "loss": 1.2718, "step": 3497 }, { - "epoch": 0.09912437303408994, + "epoch": 0.13686516941857735, "grad_norm": 0.0, - "learning_rate": 1.9750503007510314e-05, - "loss": 1.1159, + "learning_rate": 1.9407093742566988e-05, + "loss": 1.155, "step": 3498 }, { - "epoch": 0.09915271047635241, + "epoch": 0.1369042961108068, "grad_norm": 0.0, - "learning_rate": 1.9750299231881887e-05, - "loss": 1.0616, + "learning_rate": 1.9406663804994384e-05, + "loss": 1.1572, "step": 3499 }, { - "epoch": 0.09918104791861486, + "epoch": 0.13694342280303623, "grad_norm": 0.0, - "learning_rate": 1.9750095374123067e-05, - "loss": 0.9562, + "learning_rate": 1.9406233716362544e-05, + "loss": 1.225, "step": 3500 }, { - "epoch": 0.09920938536087733, + "epoch": 0.13698254949526567, "grad_norm": 0.0, - "learning_rate": 1.9749891434235575e-05, - "loss": 1.0468, + "learning_rate": 1.940580347667838e-05, + "loss": 1.1915, "step": 3501 }, { - "epoch": 0.09923772280313979, + "epoch": 0.1370216761874951, "grad_norm": 0.0, - "learning_rate": 1.974968741222113e-05, - "loss": 1.0079, + "learning_rate": 1.94053730859488e-05, + "loss": 1.2568, "step": 3502 }, { - "epoch": 0.09926606024540224, + "epoch": 0.13706080287972455, "grad_norm": 0.0, - "learning_rate": 1.974948330808144e-05, - "loss": 1.0041, + "learning_rate": 1.940494254418071e-05, + "loss": 1.2125, "step": 3503 }, { - "epoch": 0.09929439768766471, + "epoch": 0.137099929571954, "grad_norm": 0.0, - "learning_rate": 1.9749279121818235e-05, - "loss": 1.0911, + "learning_rate": 1.9404511851381032e-05, + "loss": 1.2465, "step": 3504 }, { - "epoch": 0.09932273512992718, + "epoch": 0.13713905626418343, "grad_norm": 0.0, - "learning_rate": 1.9749074853433236e-05, - "loss": 1.0849, + "learning_rate": 1.9404081007556673e-05, + "loss": 1.1589, "step": 3505 }, { - "epoch": 0.09935107257218963, + "epoch": 0.13717818295641288, "grad_norm": 0.0, - "learning_rate": 1.9748870502928155e-05, - "loss": 1.1301, + "learning_rate": 1.9403650012714563e-05, + "loss": 1.0957, "step": 3506 }, { - "epoch": 0.0993794100144521, + "epoch": 0.13721730964864232, "grad_norm": 0.0, - "learning_rate": 1.9748666070304717e-05, - "loss": 0.9775, + "learning_rate": 1.940321886686161e-05, + "loss": 1.0853, "step": 3507 }, { - "epoch": 0.09940774745671456, + "epoch": 0.13725643634087173, "grad_norm": 0.0, - "learning_rate": 1.9748461555564647e-05, - "loss": 0.9924, + "learning_rate": 1.940278757000475e-05, + "loss": 1.2385, "step": 3508 }, { - "epoch": 0.09943608489897701, + "epoch": 0.13729556303310117, "grad_norm": 0.0, - "learning_rate": 1.9748256958709666e-05, - "loss": 1.0114, + "learning_rate": 1.94023561221509e-05, + "loss": 1.294, "step": 3509 }, { - "epoch": 0.09946442234123948, + "epoch": 0.1373346897253306, "grad_norm": 0.0, - "learning_rate": 1.9748052279741494e-05, - "loss": 1.1147, + "learning_rate": 1.9401924523306998e-05, + "loss": 1.251, "step": 3510 }, { - "epoch": 0.09949275978350194, + "epoch": 0.13737381641756005, "grad_norm": 0.0, - "learning_rate": 1.974784751866186e-05, - "loss": 1.1669, + "learning_rate": 1.9401492773479966e-05, + "loss": 1.1728, "step": 3511 }, { - "epoch": 0.0995210972257644, + "epoch": 0.1374129431097895, "grad_norm": 0.0, - "learning_rate": 1.9747642675472484e-05, - "loss": 1.0164, + "learning_rate": 1.940106087267674e-05, + "loss": 1.1326, "step": 3512 }, { - "epoch": 0.09954943466802686, + "epoch": 0.13745206980201893, "grad_norm": 0.0, - "learning_rate": 1.9747437750175097e-05, - "loss": 1.2054, + "learning_rate": 1.940062882090426e-05, + "loss": 1.2356, "step": 3513 }, { - "epoch": 0.09957777211028933, + "epoch": 0.13749119649424837, "grad_norm": 0.0, - "learning_rate": 1.974723274277142e-05, - "loss": 1.0868, + "learning_rate": 1.940019661816946e-05, + "loss": 1.3602, "step": 3514 }, { - "epoch": 0.09960610955255178, + "epoch": 0.13753032318647782, "grad_norm": 0.0, - "learning_rate": 1.9747027653263183e-05, - "loss": 1.1428, + "learning_rate": 1.939976426447928e-05, + "loss": 1.1753, "step": 3515 }, { - "epoch": 0.09963444699481425, + "epoch": 0.13756944987870726, "grad_norm": 0.0, - "learning_rate": 1.974682248165211e-05, - "loss": 1.1701, + "learning_rate": 1.9399331759840664e-05, + "loss": 1.3192, "step": 3516 }, { - "epoch": 0.09966278443707671, + "epoch": 0.1376085765709367, "grad_norm": 0.0, - "learning_rate": 1.9746617227939935e-05, - "loss": 0.983, + "learning_rate": 1.939889910426056e-05, + "loss": 1.2093, "step": 3517 }, { - "epoch": 0.09969112187933916, + "epoch": 0.13764770326316614, "grad_norm": 0.0, - "learning_rate": 1.9746411892128383e-05, - "loss": 0.972, + "learning_rate": 1.939846629774591e-05, + "loss": 1.0891, "step": 3518 }, { - "epoch": 0.09971945932160163, + "epoch": 0.13768682995539558, "grad_norm": 0.0, - "learning_rate": 1.9746206474219182e-05, - "loss": 1.0782, + "learning_rate": 1.939803334030367e-05, + "loss": 1.2791, "step": 3519 }, { - "epoch": 0.0997477967638641, + "epoch": 0.13772595664762502, "grad_norm": 0.0, - "learning_rate": 1.9746000974214067e-05, - "loss": 1.0247, + "learning_rate": 1.9397600231940795e-05, + "loss": 1.1771, "step": 3520 }, { - "epoch": 0.09977613420612655, + "epoch": 0.13776508333985446, "grad_norm": 0.0, - "learning_rate": 1.974579539211477e-05, - "loss": 1.05, + "learning_rate": 1.9397166972664232e-05, + "loss": 1.1584, "step": 3521 }, { - "epoch": 0.09980447164838901, + "epoch": 0.1378042100320839, "grad_norm": 0.0, - "learning_rate": 1.9745589727923014e-05, - "loss": 1.0891, + "learning_rate": 1.9396733562480943e-05, + "loss": 1.2996, "step": 3522 }, { - "epoch": 0.09983280909065148, + "epoch": 0.13784333672431331, "grad_norm": 0.0, - "learning_rate": 1.974538398164054e-05, - "loss": 1.0952, + "learning_rate": 1.9396300001397888e-05, + "loss": 1.2183, "step": 3523 }, { - "epoch": 0.09986114653291393, + "epoch": 0.13788246341654276, "grad_norm": 0.0, - "learning_rate": 1.9745178153269075e-05, - "loss": 1.0352, + "learning_rate": 1.939586628942203e-05, + "loss": 1.1156, "step": 3524 }, { - "epoch": 0.0998894839751764, + "epoch": 0.1379215901087722, "grad_norm": 0.0, - "learning_rate": 1.974497224281036e-05, - "loss": 1.1188, + "learning_rate": 1.9395432426560332e-05, + "loss": 1.2031, "step": 3525 }, { - "epoch": 0.09991782141743887, + "epoch": 0.13796071680100164, "grad_norm": 0.0, - "learning_rate": 1.974476625026612e-05, - "loss": 1.1099, + "learning_rate": 1.9394998412819763e-05, + "loss": 1.2116, "step": 3526 }, { - "epoch": 0.09994615885970132, + "epoch": 0.13799984349323108, "grad_norm": 0.0, - "learning_rate": 1.97445601756381e-05, - "loss": 1.0125, + "learning_rate": 1.939456424820729e-05, + "loss": 1.1798, "step": 3527 }, { - "epoch": 0.09997449630196378, + "epoch": 0.13803897018546052, "grad_norm": 0.0, - "learning_rate": 1.974435401892803e-05, - "loss": 1.0477, + "learning_rate": 1.9394129932729893e-05, + "loss": 1.2037, "step": 3528 }, { - "epoch": 0.10000283374422625, + "epoch": 0.13807809687768996, "grad_norm": 0.0, - "learning_rate": 1.9744147780137644e-05, - "loss": 1.103, + "learning_rate": 1.9393695466394535e-05, + "loss": 1.1572, "step": 3529 }, { - "epoch": 0.1000311711864887, + "epoch": 0.1381172235699194, "grad_norm": 0.0, - "learning_rate": 1.974394145926869e-05, - "loss": 1.2067, + "learning_rate": 1.9393260849208202e-05, + "loss": 1.2089, "step": 3530 }, { - "epoch": 0.10005950862875117, + "epoch": 0.13815635026214884, "grad_norm": 0.0, - "learning_rate": 1.974373505632289e-05, - "loss": 1.1214, + "learning_rate": 1.939282608117787e-05, + "loss": 1.1366, "step": 3531 }, { - "epoch": 0.10008784607101363, + "epoch": 0.13819547695437828, "grad_norm": 0.0, - "learning_rate": 1.9743528571301996e-05, - "loss": 1.0717, + "learning_rate": 1.9392391162310516e-05, + "loss": 1.1314, "step": 3532 }, { - "epoch": 0.10011618351327609, + "epoch": 0.13823460364660772, "grad_norm": 0.0, - "learning_rate": 1.9743322004207743e-05, - "loss": 0.999, + "learning_rate": 1.9391956092613132e-05, + "loss": 1.1837, "step": 3533 }, { - "epoch": 0.10014452095553855, + "epoch": 0.13827373033883716, "grad_norm": 0.0, - "learning_rate": 1.9743115355041868e-05, - "loss": 1.1105, + "learning_rate": 1.9391520872092705e-05, + "loss": 1.2556, "step": 3534 }, { - "epoch": 0.10017285839780102, + "epoch": 0.1383128570310666, "grad_norm": 0.0, - "learning_rate": 1.9742908623806117e-05, - "loss": 1.0576, + "learning_rate": 1.9391085500756223e-05, + "loss": 1.2723, "step": 3535 }, { - "epoch": 0.10020119584006347, + "epoch": 0.13835198372329605, "grad_norm": 0.0, - "learning_rate": 1.9742701810502228e-05, - "loss": 1.1117, + "learning_rate": 1.939064997861067e-05, + "loss": 1.1822, "step": 3536 }, { - "epoch": 0.10022953328232594, + "epoch": 0.13839111041552546, "grad_norm": 0.0, - "learning_rate": 1.9742494915131943e-05, - "loss": 1.0069, + "learning_rate": 1.9390214305663048e-05, + "loss": 1.1912, "step": 3537 }, { - "epoch": 0.1002578707245884, + "epoch": 0.1384302371077549, "grad_norm": 0.0, - "learning_rate": 1.9742287937697006e-05, - "loss": 1.0966, + "learning_rate": 1.938977848192035e-05, + "loss": 1.1299, "step": 3538 }, { - "epoch": 0.10028620816685085, + "epoch": 0.13846936379998434, "grad_norm": 0.0, - "learning_rate": 1.974208087819916e-05, - "loss": 1.0849, + "learning_rate": 1.9389342507389573e-05, + "loss": 1.2942, "step": 3539 }, { - "epoch": 0.10031454560911332, + "epoch": 0.13850849049221378, "grad_norm": 0.0, - "learning_rate": 1.974187373664015e-05, - "loss": 1.1066, + "learning_rate": 1.9388906382077724e-05, + "loss": 1.1631, "step": 3540 }, { - "epoch": 0.10034288305137579, + "epoch": 0.13854761718444322, "grad_norm": 0.0, - "learning_rate": 1.9741666513021718e-05, - "loss": 1.0194, + "learning_rate": 1.9388470105991805e-05, + "loss": 1.1323, "step": 3541 }, { - "epoch": 0.10037122049363824, + "epoch": 0.13858674387667266, "grad_norm": 0.0, - "learning_rate": 1.9741459207345614e-05, - "loss": 1.1118, + "learning_rate": 1.9388033679138818e-05, + "loss": 1.1668, "step": 3542 }, { - "epoch": 0.1003995579359007, + "epoch": 0.1386258705689021, "grad_norm": 0.0, - "learning_rate": 1.974125181961358e-05, - "loss": 1.0517, + "learning_rate": 1.9387597101525775e-05, + "loss": 1.2866, "step": 3543 }, { - "epoch": 0.10042789537816317, + "epoch": 0.13866499726113155, "grad_norm": 0.0, - "learning_rate": 1.9741044349827365e-05, - "loss": 1.138, + "learning_rate": 1.9387160373159684e-05, + "loss": 1.2017, "step": 3544 }, { - "epoch": 0.10045623282042562, + "epoch": 0.13870412395336099, "grad_norm": 0.0, - "learning_rate": 1.9740836797988717e-05, - "loss": 1.0591, + "learning_rate": 1.938672349404756e-05, + "loss": 1.1885, "step": 3545 }, { - "epoch": 0.10048457026268809, + "epoch": 0.13874325064559043, "grad_norm": 0.0, - "learning_rate": 1.9740629164099384e-05, - "loss": 1.0843, + "learning_rate": 1.938628646419642e-05, + "loss": 1.1525, "step": 3546 }, { - "epoch": 0.10051290770495055, + "epoch": 0.13878237733781987, "grad_norm": 0.0, - "learning_rate": 1.974042144816111e-05, - "loss": 1.062, + "learning_rate": 1.9385849283613282e-05, + "loss": 1.2365, "step": 3547 }, { - "epoch": 0.10054124514721301, + "epoch": 0.1388215040300493, "grad_norm": 0.0, - "learning_rate": 1.9740213650175653e-05, - "loss": 1.0266, + "learning_rate": 1.9385411952305166e-05, + "loss": 1.2576, "step": 3548 }, { - "epoch": 0.10056958258947547, + "epoch": 0.13886063072227875, "grad_norm": 0.0, - "learning_rate": 1.9740005770144762e-05, - "loss": 1.2122, + "learning_rate": 1.9384974470279093e-05, + "loss": 1.3671, "step": 3549 }, { - "epoch": 0.10059792003173794, + "epoch": 0.1388997574145082, "grad_norm": 0.0, - "learning_rate": 1.973979780807018e-05, - "loss": 1.0826, + "learning_rate": 1.938453683754209e-05, + "loss": 1.2363, "step": 3550 }, { - "epoch": 0.10062625747400039, + "epoch": 0.1389388841067376, "grad_norm": 0.0, - "learning_rate": 1.9739589763953665e-05, - "loss": 1.0234, + "learning_rate": 1.9384099054101187e-05, + "loss": 1.1158, "step": 3551 }, { - "epoch": 0.10065459491626286, + "epoch": 0.13897801079896704, "grad_norm": 0.0, - "learning_rate": 1.9739381637796972e-05, - "loss": 1.0022, + "learning_rate": 1.938366111996341e-05, + "loss": 1.2565, "step": 3552 }, { - "epoch": 0.10068293235852532, + "epoch": 0.13901713749119649, "grad_norm": 0.0, - "learning_rate": 1.973917342960185e-05, - "loss": 1.0731, + "learning_rate": 1.9383223035135798e-05, + "loss": 1.2094, "step": 3553 }, { - "epoch": 0.10071126980078778, + "epoch": 0.13905626418342593, "grad_norm": 0.0, - "learning_rate": 1.9738965139370052e-05, - "loss": 1.0465, + "learning_rate": 1.9382784799625377e-05, + "loss": 1.1343, "step": 3554 }, { - "epoch": 0.10073960724305024, + "epoch": 0.13909539087565537, "grad_norm": 0.0, - "learning_rate": 1.9738756767103335e-05, - "loss": 1.0931, + "learning_rate": 1.938234641343919e-05, + "loss": 1.1859, "step": 3555 }, { - "epoch": 0.10076794468531271, + "epoch": 0.1391345175678848, "grad_norm": 0.0, - "learning_rate": 1.9738548312803452e-05, - "loss": 1.0911, + "learning_rate": 1.9381907876584277e-05, + "loss": 1.2136, "step": 3556 }, { - "epoch": 0.10079628212757516, + "epoch": 0.13917364426011425, "grad_norm": 0.0, - "learning_rate": 1.9738339776472162e-05, - "loss": 1.1167, + "learning_rate": 1.938146918906768e-05, + "loss": 1.2056, "step": 3557 }, { - "epoch": 0.10082461956983763, + "epoch": 0.1392127709523437, "grad_norm": 0.0, - "learning_rate": 1.973813115811122e-05, - "loss": 1.1978, + "learning_rate": 1.938103035089644e-05, + "loss": 1.146, "step": 3558 }, { - "epoch": 0.10085295701210009, + "epoch": 0.13925189764457313, "grad_norm": 0.0, - "learning_rate": 1.973792245772238e-05, - "loss": 0.9921, + "learning_rate": 1.9380591362077614e-05, + "loss": 1.2881, "step": 3559 }, { - "epoch": 0.10088129445436254, + "epoch": 0.13929102433680257, "grad_norm": 0.0, - "learning_rate": 1.973771367530741e-05, - "loss": 1.117, + "learning_rate": 1.9380152222618243e-05, + "loss": 1.3289, "step": 3560 }, { - "epoch": 0.10090963189662501, + "epoch": 0.139330151029032, "grad_norm": 0.0, - "learning_rate": 1.9737504810868055e-05, - "loss": 1.046, + "learning_rate": 1.937971293252538e-05, + "loss": 1.1838, "step": 3561 }, { - "epoch": 0.10093796933888748, + "epoch": 0.13936927772126145, "grad_norm": 0.0, - "learning_rate": 1.9737295864406082e-05, - "loss": 1.1212, + "learning_rate": 1.937927349180608e-05, + "loss": 1.0883, "step": 3562 }, { - "epoch": 0.10096630678114993, + "epoch": 0.1394084044134909, "grad_norm": 0.0, - "learning_rate": 1.9737086835923252e-05, - "loss": 1.0531, + "learning_rate": 1.9378833900467403e-05, + "loss": 1.2726, "step": 3563 }, { - "epoch": 0.1009946442234124, + "epoch": 0.13944753110572033, "grad_norm": 0.0, - "learning_rate": 1.9736877725421325e-05, - "loss": 1.0668, + "learning_rate": 1.9378394158516406e-05, + "loss": 1.1296, "step": 3564 }, { - "epoch": 0.10102298166567486, + "epoch": 0.13948665779794975, "grad_norm": 0.0, - "learning_rate": 1.973666853290206e-05, - "loss": 1.0375, + "learning_rate": 1.9377954265960147e-05, + "loss": 1.2663, "step": 3565 }, { - "epoch": 0.10105131910793731, + "epoch": 0.1395257844901792, "grad_norm": 0.0, - "learning_rate": 1.973645925836722e-05, - "loss": 1.0373, + "learning_rate": 1.9377514222805698e-05, + "loss": 1.1847, "step": 3566 }, { - "epoch": 0.10107965655019978, + "epoch": 0.13956491118240863, "grad_norm": 0.0, - "learning_rate": 1.9736249901818567e-05, - "loss": 1.0182, + "learning_rate": 1.9377074029060118e-05, + "loss": 1.0638, "step": 3567 }, { - "epoch": 0.10110799399246224, + "epoch": 0.13960403787463807, "grad_norm": 0.0, - "learning_rate": 1.9736040463257865e-05, - "loss": 1.0484, + "learning_rate": 1.937663368473048e-05, + "loss": 1.179, "step": 3568 }, { - "epoch": 0.1011363314347247, + "epoch": 0.1396431645668675, "grad_norm": 0.0, - "learning_rate": 1.973583094268688e-05, - "loss": 1.0696, + "learning_rate": 1.9376193189823858e-05, + "loss": 1.325, "step": 3569 }, { - "epoch": 0.10116466887698716, + "epoch": 0.13968229125909695, "grad_norm": 0.0, - "learning_rate": 1.9735621340107376e-05, - "loss": 1.1539, + "learning_rate": 1.937575254434732e-05, + "loss": 1.2375, "step": 3570 }, { - "epoch": 0.10119300631924963, + "epoch": 0.1397214179513264, "grad_norm": 0.0, - "learning_rate": 1.9735411655521116e-05, - "loss": 1.1487, + "learning_rate": 1.9375311748307944e-05, + "loss": 1.1657, "step": 3571 }, { - "epoch": 0.10122134376151208, + "epoch": 0.13976054464355583, "grad_norm": 0.0, - "learning_rate": 1.9735201888929873e-05, - "loss": 0.9521, + "learning_rate": 1.937487080171281e-05, + "loss": 1.1071, "step": 3572 }, { - "epoch": 0.10124968120377455, + "epoch": 0.13979967133578527, "grad_norm": 0.0, - "learning_rate": 1.973499204033541e-05, - "loss": 1.0187, + "learning_rate": 1.9374429704568997e-05, + "loss": 1.1248, "step": 3573 }, { - "epoch": 0.10127801864603701, + "epoch": 0.13983879802801472, "grad_norm": 0.0, - "learning_rate": 1.9734782109739488e-05, - "loss": 1.0448, + "learning_rate": 1.937398845688359e-05, + "loss": 1.1899, "step": 3574 }, { - "epoch": 0.10130635608829947, + "epoch": 0.13987792472024416, "grad_norm": 0.0, - "learning_rate": 1.9734572097143884e-05, - "loss": 1.0934, + "learning_rate": 1.9373547058663674e-05, + "loss": 1.2156, "step": 3575 }, { - "epoch": 0.10133469353056193, + "epoch": 0.1399170514124736, "grad_norm": 0.0, - "learning_rate": 1.9734362002550363e-05, - "loss": 0.9802, + "learning_rate": 1.9373105509916338e-05, + "loss": 1.178, "step": 3576 }, { - "epoch": 0.1013630309728244, + "epoch": 0.13995617810470304, "grad_norm": 0.0, - "learning_rate": 1.97341518259607e-05, - "loss": 1.0839, + "learning_rate": 1.9372663810648675e-05, + "loss": 1.1644, "step": 3577 }, { - "epoch": 0.10139136841508685, + "epoch": 0.13999530479693248, "grad_norm": 0.0, - "learning_rate": 1.973394156737666e-05, - "loss": 1.1013, + "learning_rate": 1.9372221960867773e-05, + "loss": 1.2004, "step": 3578 }, { - "epoch": 0.10141970585734932, + "epoch": 0.1400344314891619, "grad_norm": 0.0, - "learning_rate": 1.9733731226800016e-05, - "loss": 1.0564, + "learning_rate": 1.9371779960580735e-05, + "loss": 1.0425, "step": 3579 }, { - "epoch": 0.10144804329961178, + "epoch": 0.14007355818139133, "grad_norm": 0.0, - "learning_rate": 1.9733520804232536e-05, - "loss": 1.1415, + "learning_rate": 1.937133780979465e-05, + "loss": 1.1696, "step": 3580 }, { - "epoch": 0.10147638074187423, + "epoch": 0.14011268487362077, "grad_norm": 0.0, - "learning_rate": 1.9733310299675995e-05, - "loss": 1.0121, + "learning_rate": 1.9370895508516624e-05, + "loss": 1.1192, "step": 3581 }, { - "epoch": 0.1015047181841367, + "epoch": 0.14015181156585022, "grad_norm": 0.0, - "learning_rate": 1.973309971313217e-05, - "loss": 1.0988, + "learning_rate": 1.9370453056753757e-05, + "loss": 1.075, "step": 3582 }, { - "epoch": 0.10153305562639917, + "epoch": 0.14019093825807966, "grad_norm": 0.0, - "learning_rate": 1.9732889044602836e-05, - "loss": 1.1119, + "learning_rate": 1.9370010454513156e-05, + "loss": 1.2256, "step": 3583 }, { - "epoch": 0.10156139306866162, + "epoch": 0.1402300649503091, "grad_norm": 0.0, - "learning_rate": 1.9732678294089756e-05, - "loss": 1.1064, + "learning_rate": 1.9369567701801933e-05, + "loss": 1.2134, "step": 3584 }, { - "epoch": 0.10158973051092408, + "epoch": 0.14026919164253854, "grad_norm": 0.0, - "learning_rate": 1.973246746159472e-05, - "loss": 1.1348, + "learning_rate": 1.936912479862719e-05, + "loss": 1.1851, "step": 3585 }, { - "epoch": 0.10161806795318655, + "epoch": 0.14030831833476798, "grad_norm": 0.0, - "learning_rate": 1.973225654711949e-05, - "loss": 1.0573, + "learning_rate": 1.9368681744996043e-05, + "loss": 1.1244, "step": 3586 }, { - "epoch": 0.101646405395449, + "epoch": 0.14034744502699742, "grad_norm": 0.0, - "learning_rate": 1.9732045550665853e-05, - "loss": 1.1024, + "learning_rate": 1.936823854091561e-05, + "loss": 1.0472, "step": 3587 }, { - "epoch": 0.10167474283771147, + "epoch": 0.14038657171922686, "grad_norm": 0.0, - "learning_rate": 1.973183447223558e-05, - "loss": 1.0648, + "learning_rate": 1.9367795186392996e-05, + "loss": 1.2226, "step": 3588 }, { - "epoch": 0.10170308027997393, + "epoch": 0.1404256984114563, "grad_norm": 0.0, - "learning_rate": 1.973162331183045e-05, - "loss": 1.0344, + "learning_rate": 1.936735168143534e-05, + "loss": 1.186, "step": 3589 }, { - "epoch": 0.10173141772223639, + "epoch": 0.14046482510368574, "grad_norm": 0.0, - "learning_rate": 1.9731412069452248e-05, - "loss": 1.1367, + "learning_rate": 1.9366908026049747e-05, + "loss": 1.2861, "step": 3590 }, { - "epoch": 0.10175975516449885, + "epoch": 0.14050395179591518, "grad_norm": 0.0, - "learning_rate": 1.9731200745102742e-05, - "loss": 0.9938, + "learning_rate": 1.9366464220243352e-05, + "loss": 1.2133, "step": 3591 }, { - "epoch": 0.10178809260676132, + "epoch": 0.14054307848814462, "grad_norm": 0.0, - "learning_rate": 1.9730989338783724e-05, - "loss": 1.0586, + "learning_rate": 1.936602026402328e-05, + "loss": 1.1085, "step": 3592 }, { - "epoch": 0.10181643004902377, + "epoch": 0.14058220518037406, "grad_norm": 0.0, - "learning_rate": 1.9730777850496968e-05, - "loss": 0.972, + "learning_rate": 1.9365576157396652e-05, + "loss": 1.1426, "step": 3593 }, { - "epoch": 0.10184476749128624, + "epoch": 0.14062133187260348, "grad_norm": 0.0, - "learning_rate": 1.9730566280244256e-05, - "loss": 0.9592, + "learning_rate": 1.9365131900370612e-05, + "loss": 1.1984, "step": 3594 }, { - "epoch": 0.1018731049335487, + "epoch": 0.14066045856483292, "grad_norm": 0.0, - "learning_rate": 1.9730354628027372e-05, - "loss": 1.1676, + "learning_rate": 1.9364687492952286e-05, + "loss": 1.1729, "step": 3595 }, { - "epoch": 0.10190144237581115, + "epoch": 0.14069958525706236, "grad_norm": 0.0, - "learning_rate": 1.9730142893848097e-05, - "loss": 1.1302, + "learning_rate": 1.9364242935148817e-05, + "loss": 1.2335, "step": 3596 }, { - "epoch": 0.10192977981807362, + "epoch": 0.1407387119492918, "grad_norm": 0.0, - "learning_rate": 1.9729931077708216e-05, - "loss": 1.0425, + "learning_rate": 1.936379822696734e-05, + "loss": 1.1885, "step": 3597 }, { - "epoch": 0.10195811726033609, + "epoch": 0.14077783864152124, "grad_norm": 0.0, - "learning_rate": 1.972971917960951e-05, - "loss": 1.0483, + "learning_rate": 1.9363353368414995e-05, + "loss": 1.2887, "step": 3598 }, { - "epoch": 0.10198645470259854, + "epoch": 0.14081696533375068, "grad_norm": 0.0, - "learning_rate": 1.9729507199553767e-05, - "loss": 1.1092, + "learning_rate": 1.9362908359498932e-05, + "loss": 1.0678, "step": 3599 }, { - "epoch": 0.102014792144861, + "epoch": 0.14085609202598012, "grad_norm": 0.0, - "learning_rate": 1.9729295137542773e-05, - "loss": 1.0671, + "learning_rate": 1.9362463200226288e-05, + "loss": 1.1898, "step": 3600 }, { - "epoch": 0.10204312958712347, + "epoch": 0.14089521871820956, "grad_norm": 0.0, - "learning_rate": 1.9729082993578315e-05, - "loss": 1.1135, + "learning_rate": 1.9362017890604215e-05, + "loss": 1.1555, "step": 3601 }, { - "epoch": 0.10207146702938592, + "epoch": 0.140934345410439, "grad_norm": 0.0, - "learning_rate": 1.9728870767662177e-05, - "loss": 1.0196, + "learning_rate": 1.9361572430639873e-05, + "loss": 1.1439, "step": 3602 }, { - "epoch": 0.10209980447164839, + "epoch": 0.14097347210266845, "grad_norm": 0.0, - "learning_rate": 1.9728658459796143e-05, - "loss": 1.1172, + "learning_rate": 1.9361126820340406e-05, + "loss": 1.2336, "step": 3603 }, { - "epoch": 0.10212814191391086, + "epoch": 0.1410125987948979, "grad_norm": 0.0, - "learning_rate": 1.972844606998201e-05, - "loss": 1.0339, + "learning_rate": 1.936068105971297e-05, + "loss": 1.304, "step": 3604 }, { - "epoch": 0.10215647935617331, + "epoch": 0.14105172548712733, "grad_norm": 0.0, - "learning_rate": 1.9728233598221565e-05, - "loss": 1.0825, + "learning_rate": 1.936023514876473e-05, + "loss": 1.2192, "step": 3605 }, { - "epoch": 0.10218481679843577, + "epoch": 0.14109085217935677, "grad_norm": 0.0, - "learning_rate": 1.972802104451659e-05, - "loss": 1.0265, + "learning_rate": 1.9359789087502837e-05, + "loss": 1.1263, "step": 3606 }, { - "epoch": 0.10221315424069824, + "epoch": 0.1411299788715862, "grad_norm": 0.0, - "learning_rate": 1.972780840886889e-05, - "loss": 1.0859, + "learning_rate": 1.935934287593446e-05, + "loss": 1.1036, "step": 3607 }, { - "epoch": 0.10224149168296069, + "epoch": 0.14116910556381562, "grad_norm": 0.0, - "learning_rate": 1.972759569128024e-05, - "loss": 1.1656, + "learning_rate": 1.935889651406677e-05, + "loss": 1.2493, "step": 3608 }, { - "epoch": 0.10226982912522316, + "epoch": 0.14120823225604506, "grad_norm": 0.0, - "learning_rate": 1.9727382891752446e-05, - "loss": 1.0925, + "learning_rate": 1.9358450001906926e-05, + "loss": 1.1955, "step": 3609 }, { - "epoch": 0.10229816656748562, + "epoch": 0.1412473589482745, "grad_norm": 0.0, - "learning_rate": 1.9727170010287287e-05, - "loss": 1.202, + "learning_rate": 1.9358003339462103e-05, + "loss": 1.219, "step": 3610 }, { - "epoch": 0.10232650400974808, + "epoch": 0.14128648564050394, "grad_norm": 0.0, - "learning_rate": 1.9726957046886565e-05, - "loss": 1.0642, + "learning_rate": 1.9357556526739468e-05, + "loss": 1.2116, "step": 3611 }, { - "epoch": 0.10235484145201054, + "epoch": 0.14132561233273339, "grad_norm": 0.0, - "learning_rate": 1.9726744001552075e-05, - "loss": 1.0254, + "learning_rate": 1.935710956374621e-05, + "loss": 1.2627, "step": 3612 }, { - "epoch": 0.10238317889427301, + "epoch": 0.14136473902496283, "grad_norm": 0.0, - "learning_rate": 1.9726530874285602e-05, - "loss": 0.9982, + "learning_rate": 1.935666245048949e-05, + "loss": 1.2943, "step": 3613 }, { - "epoch": 0.10241151633653546, + "epoch": 0.14140386571719227, "grad_norm": 0.0, - "learning_rate": 1.9726317665088953e-05, - "loss": 1.062, + "learning_rate": 1.9356215186976496e-05, + "loss": 1.1857, "step": 3614 }, { - "epoch": 0.10243985377879793, + "epoch": 0.1414429924094217, "grad_norm": 0.0, - "learning_rate": 1.9726104373963916e-05, - "loss": 0.9914, + "learning_rate": 1.9355767773214414e-05, + "loss": 1.2057, "step": 3615 }, { - "epoch": 0.10246819122106039, + "epoch": 0.14148211910165115, "grad_norm": 0.0, - "learning_rate": 1.9725891000912294e-05, - "loss": 1.1086, + "learning_rate": 1.935532020921042e-05, + "loss": 1.2388, "step": 3616 }, { - "epoch": 0.10249652866332284, + "epoch": 0.1415212457938806, "grad_norm": 0.0, - "learning_rate": 1.9725677545935876e-05, - "loss": 1.1443, + "learning_rate": 1.935487249497171e-05, + "loss": 1.229, "step": 3617 }, { - "epoch": 0.10252486610558531, + "epoch": 0.14156037248611003, "grad_norm": 0.0, - "learning_rate": 1.9725464009036467e-05, - "loss": 1.0017, + "learning_rate": 1.935442463050547e-05, + "loss": 1.0476, "step": 3618 }, { - "epoch": 0.10255320354784778, + "epoch": 0.14159949917833947, "grad_norm": 0.0, - "learning_rate": 1.9725250390215863e-05, - "loss": 0.964, + "learning_rate": 1.935397661581889e-05, + "loss": 1.246, "step": 3619 }, { - "epoch": 0.10258154099011023, + "epoch": 0.1416386258705689, "grad_norm": 0.0, - "learning_rate": 1.972503668947586e-05, - "loss": 0.9787, + "learning_rate": 1.935352845091917e-05, + "loss": 1.2667, "step": 3620 }, { - "epoch": 0.1026098784323727, + "epoch": 0.14167775256279835, "grad_norm": 0.0, - "learning_rate": 1.9724822906818265e-05, - "loss": 1.1158, + "learning_rate": 1.93530801358135e-05, + "loss": 1.1583, "step": 3621 }, { - "epoch": 0.10263821587463516, + "epoch": 0.14171687925502777, "grad_norm": 0.0, - "learning_rate": 1.9724609042244876e-05, - "loss": 1.1319, + "learning_rate": 1.9352631670509082e-05, + "loss": 1.2857, "step": 3622 }, { - "epoch": 0.10266655331689761, + "epoch": 0.1417560059472572, "grad_norm": 0.0, - "learning_rate": 1.9724395095757495e-05, - "loss": 1.0278, + "learning_rate": 1.9352183055013124e-05, + "loss": 1.2337, "step": 3623 }, { - "epoch": 0.10269489075916008, + "epoch": 0.14179513263948665, "grad_norm": 0.0, - "learning_rate": 1.9724181067357918e-05, - "loss": 1.0162, + "learning_rate": 1.935173428933282e-05, + "loss": 1.1395, "step": 3624 }, { - "epoch": 0.10272322820142255, + "epoch": 0.1418342593317161, "grad_norm": 0.0, - "learning_rate": 1.9723966957047955e-05, - "loss": 1.0199, + "learning_rate": 1.9351285373475388e-05, + "loss": 1.192, "step": 3625 }, { - "epoch": 0.102751565643685, + "epoch": 0.14187338602394553, "grad_norm": 0.0, - "learning_rate": 1.972375276482941e-05, - "loss": 1.0909, + "learning_rate": 1.9350836307448023e-05, + "loss": 1.1307, "step": 3626 }, { - "epoch": 0.10277990308594746, + "epoch": 0.14191251271617497, "grad_norm": 0.0, - "learning_rate": 1.972353849070408e-05, - "loss": 1.0604, + "learning_rate": 1.9350387091257952e-05, + "loss": 1.251, "step": 3627 }, { - "epoch": 0.10280824052820993, + "epoch": 0.1419516394084044, "grad_norm": 0.0, - "learning_rate": 1.972332413467378e-05, - "loss": 0.9317, + "learning_rate": 1.934993772491238e-05, + "loss": 1.1837, "step": 3628 }, { - "epoch": 0.10283657797047238, + "epoch": 0.14199076610063385, "grad_norm": 0.0, - "learning_rate": 1.972310969674031e-05, - "loss": 1.0945, + "learning_rate": 1.934948820841852e-05, + "loss": 1.2009, "step": 3629 }, { - "epoch": 0.10286491541273485, + "epoch": 0.1420298927928633, "grad_norm": 0.0, - "learning_rate": 1.972289517690547e-05, - "loss": 1.052, + "learning_rate": 1.9349038541783592e-05, + "loss": 1.2358, "step": 3630 }, { - "epoch": 0.10289325285499731, + "epoch": 0.14206901948509273, "grad_norm": 0.0, - "learning_rate": 1.9722680575171077e-05, - "loss": 1.1219, + "learning_rate": 1.9348588725014826e-05, + "loss": 1.1697, "step": 3631 }, { - "epoch": 0.10292159029725977, + "epoch": 0.14210814617732218, "grad_norm": 0.0, - "learning_rate": 1.9722465891538935e-05, - "loss": 1.16, + "learning_rate": 1.9348138758119437e-05, + "loss": 1.2476, "step": 3632 }, { - "epoch": 0.10294992773952223, + "epoch": 0.14214727286955162, "grad_norm": 0.0, - "learning_rate": 1.9722251126010854e-05, - "loss": 1.1703, + "learning_rate": 1.9347688641104657e-05, + "loss": 1.2087, "step": 3633 }, { - "epoch": 0.1029782651817847, + "epoch": 0.14218639956178106, "grad_norm": 0.0, - "learning_rate": 1.9722036278588642e-05, - "loss": 1.0697, + "learning_rate": 1.934723837397771e-05, + "loss": 1.2316, "step": 3634 }, { - "epoch": 0.10300660262404715, + "epoch": 0.1422255262540105, "grad_norm": 0.0, - "learning_rate": 1.9721821349274102e-05, - "loss": 1.0873, + "learning_rate": 1.9346787956745822e-05, + "loss": 1.1872, "step": 3635 }, { - "epoch": 0.10303494006630962, + "epoch": 0.1422646529462399, "grad_norm": 0.0, - "learning_rate": 1.9721606338069058e-05, - "loss": 1.1411, + "learning_rate": 1.9346337389416234e-05, + "loss": 1.1292, "step": 3636 }, { - "epoch": 0.10306327750857208, + "epoch": 0.14230377963846935, "grad_norm": 0.0, - "learning_rate": 1.972139124497531e-05, - "loss": 1.1768, + "learning_rate": 1.9345886671996183e-05, + "loss": 1.3246, "step": 3637 }, { - "epoch": 0.10309161495083453, + "epoch": 0.1423429063306988, "grad_norm": 0.0, - "learning_rate": 1.9721176069994677e-05, - "loss": 1.1022, + "learning_rate": 1.9345435804492898e-05, + "loss": 1.1966, "step": 3638 }, { - "epoch": 0.103119952393097, + "epoch": 0.14238203302292823, "grad_norm": 0.0, - "learning_rate": 1.9720960813128966e-05, - "loss": 1.0727, + "learning_rate": 1.9344984786913627e-05, + "loss": 1.3227, "step": 3639 }, { - "epoch": 0.10314828983535947, + "epoch": 0.14242115971515767, "grad_norm": 0.0, - "learning_rate": 1.972074547437999e-05, - "loss": 1.0517, + "learning_rate": 1.934453361926561e-05, + "loss": 1.2079, "step": 3640 }, { - "epoch": 0.10317662727762192, + "epoch": 0.14246028640738712, "grad_norm": 0.0, - "learning_rate": 1.972053005374957e-05, - "loss": 1.0936, + "learning_rate": 1.9344082301556093e-05, + "loss": 1.3097, "step": 3641 }, { - "epoch": 0.10320496471988438, + "epoch": 0.14249941309961656, "grad_norm": 0.0, - "learning_rate": 1.972031455123951e-05, - "loss": 1.0961, + "learning_rate": 1.9343630833792322e-05, + "loss": 1.0661, "step": 3642 }, { - "epoch": 0.10323330216214685, + "epoch": 0.142538539791846, "grad_norm": 0.0, - "learning_rate": 1.9720098966851635e-05, - "loss": 1.032, + "learning_rate": 1.934317921598155e-05, + "loss": 1.0558, "step": 3643 }, { - "epoch": 0.1032616396044093, + "epoch": 0.14257766648407544, "grad_norm": 0.0, - "learning_rate": 1.9719883300587755e-05, - "loss": 1.0193, + "learning_rate": 1.9342727448131025e-05, + "loss": 1.2701, "step": 3644 }, { - "epoch": 0.10328997704667177, + "epoch": 0.14261679317630488, "grad_norm": 0.0, - "learning_rate": 1.971966755244969e-05, - "loss": 1.0495, + "learning_rate": 1.9342275530248006e-05, + "loss": 1.2429, "step": 3645 }, { - "epoch": 0.10331831448893423, + "epoch": 0.14265591986853432, "grad_norm": 0.0, - "learning_rate": 1.9719451722439255e-05, - "loss": 0.9925, + "learning_rate": 1.934182346233975e-05, + "loss": 1.2597, "step": 3646 }, { - "epoch": 0.10334665193119669, + "epoch": 0.14269504656076376, "grad_norm": 0.0, - "learning_rate": 1.971923581055827e-05, - "loss": 1.1706, + "learning_rate": 1.934137124441351e-05, + "loss": 1.2032, "step": 3647 }, { - "epoch": 0.10337498937345915, + "epoch": 0.1427341732529932, "grad_norm": 0.0, - "learning_rate": 1.9719019816808553e-05, - "loss": 1.0163, + "learning_rate": 1.9340918876476557e-05, + "loss": 1.2709, "step": 3648 }, { - "epoch": 0.10340332681572162, + "epoch": 0.14277329994522264, "grad_norm": 0.0, - "learning_rate": 1.9718803741191918e-05, - "loss": 1.0653, + "learning_rate": 1.934046635853615e-05, + "loss": 1.2425, "step": 3649 }, { - "epoch": 0.10343166425798407, + "epoch": 0.14281242663745208, "grad_norm": 0.0, - "learning_rate": 1.9718587583710196e-05, - "loss": 1.0804, + "learning_rate": 1.934001369059956e-05, + "loss": 1.2718, "step": 3650 }, { - "epoch": 0.10346000170024654, + "epoch": 0.1428515533296815, "grad_norm": 0.0, - "learning_rate": 1.97183713443652e-05, - "loss": 1.0573, + "learning_rate": 1.9339560872674052e-05, + "loss": 1.214, "step": 3651 }, { - "epoch": 0.103488339142509, + "epoch": 0.14289068002191094, "grad_norm": 0.0, - "learning_rate": 1.9718155023158752e-05, - "loss": 0.9924, + "learning_rate": 1.93391079047669e-05, + "loss": 1.2889, "step": 3652 }, { - "epoch": 0.10351667658477146, + "epoch": 0.14292980671414038, "grad_norm": 0.0, - "learning_rate": 1.971793862009268e-05, - "loss": 1.066, + "learning_rate": 1.9338654786885377e-05, + "loss": 1.1192, "step": 3653 }, { - "epoch": 0.10354501402703392, + "epoch": 0.14296893340636982, "grad_norm": 0.0, - "learning_rate": 1.9717722135168796e-05, - "loss": 1.1921, + "learning_rate": 1.933820151903676e-05, + "loss": 1.0825, "step": 3654 }, { - "epoch": 0.10357335146929639, + "epoch": 0.14300806009859926, "grad_norm": 0.0, - "learning_rate": 1.9717505568388936e-05, - "loss": 1.1383, + "learning_rate": 1.933774810122833e-05, + "loss": 1.2767, "step": 3655 }, { - "epoch": 0.10360168891155884, + "epoch": 0.1430471867908287, "grad_norm": 0.0, - "learning_rate": 1.9717288919754912e-05, - "loss": 1.0003, + "learning_rate": 1.9337294533467363e-05, + "loss": 1.2165, "step": 3656 }, { - "epoch": 0.1036300263538213, + "epoch": 0.14308631348305814, "grad_norm": 0.0, - "learning_rate": 1.9717072189268558e-05, - "loss": 1.1339, + "learning_rate": 1.933684081576115e-05, + "loss": 1.277, "step": 3657 }, { - "epoch": 0.10365836379608377, + "epoch": 0.14312544017528758, "grad_norm": 0.0, - "learning_rate": 1.9716855376931696e-05, - "loss": 1.0485, + "learning_rate": 1.933638694811697e-05, + "loss": 1.2098, "step": 3658 }, { - "epoch": 0.10368670123834622, + "epoch": 0.14316456686751702, "grad_norm": 0.0, - "learning_rate": 1.9716638482746155e-05, - "loss": 0.9912, + "learning_rate": 1.9335932930542115e-05, + "loss": 1.1779, "step": 3659 }, { - "epoch": 0.10371503868060869, + "epoch": 0.14320369355974646, "grad_norm": 0.0, - "learning_rate": 1.9716421506713758e-05, - "loss": 1.1128, + "learning_rate": 1.933547876304388e-05, + "loss": 1.1437, "step": 3660 }, { - "epoch": 0.10374337612287116, + "epoch": 0.1432428202519759, "grad_norm": 0.0, - "learning_rate": 1.9716204448836335e-05, - "loss": 1.0812, + "learning_rate": 1.933502444562955e-05, + "loss": 1.1221, "step": 3661 }, { - "epoch": 0.10377171356513361, + "epoch": 0.14328194694420535, "grad_norm": 0.0, - "learning_rate": 1.9715987309115713e-05, - "loss": 1.0742, + "learning_rate": 1.9334569978306423e-05, + "loss": 1.2159, "step": 3662 }, { - "epoch": 0.10380005100739607, + "epoch": 0.1433210736364348, "grad_norm": 0.0, - "learning_rate": 1.9715770087553723e-05, - "loss": 1.1678, + "learning_rate": 1.9334115361081803e-05, + "loss": 1.3313, "step": 3663 }, { - "epoch": 0.10382838844965854, + "epoch": 0.14336020032866423, "grad_norm": 0.0, - "learning_rate": 1.9715552784152193e-05, - "loss": 1.0779, + "learning_rate": 1.9333660593962984e-05, + "loss": 1.0428, "step": 3664 }, { - "epoch": 0.10385672589192099, + "epoch": 0.14339932702089364, "grad_norm": 0.0, - "learning_rate": 1.9715335398912955e-05, - "loss": 1.0699, + "learning_rate": 1.9333205676957273e-05, + "loss": 1.0949, "step": 3665 }, { - "epoch": 0.10388506333418346, + "epoch": 0.14343845371312308, "grad_norm": 0.0, - "learning_rate": 1.971511793183784e-05, - "loss": 1.0783, + "learning_rate": 1.9332750610071972e-05, + "loss": 1.1753, "step": 3666 }, { - "epoch": 0.10391340077644592, + "epoch": 0.14347758040535252, "grad_norm": 0.0, - "learning_rate": 1.9714900382928674e-05, - "loss": 1.0643, + "learning_rate": 1.9332295393314397e-05, + "loss": 1.2275, "step": 3667 }, { - "epoch": 0.10394173821870838, + "epoch": 0.14351670709758196, "grad_norm": 0.0, - "learning_rate": 1.97146827521873e-05, - "loss": 1.0194, + "learning_rate": 1.9331840026691844e-05, + "loss": 1.15, "step": 3668 }, { - "epoch": 0.10397007566097084, + "epoch": 0.1435558337898114, "grad_norm": 0.0, - "learning_rate": 1.9714465039615545e-05, - "loss": 1.2052, + "learning_rate": 1.9331384510211636e-05, + "loss": 1.1489, "step": 3669 }, { - "epoch": 0.10399841310323331, + "epoch": 0.14359496048204085, "grad_norm": 0.0, - "learning_rate": 1.9714247245215242e-05, - "loss": 1.158, + "learning_rate": 1.933092884388109e-05, + "loss": 1.1802, "step": 3670 }, { - "epoch": 0.10402675054549576, + "epoch": 0.1436340871742703, "grad_norm": 0.0, - "learning_rate": 1.9714029368988227e-05, - "loss": 1.0572, + "learning_rate": 1.9330473027707515e-05, + "loss": 1.2457, "step": 3671 }, { - "epoch": 0.10405508798775823, + "epoch": 0.14367321386649973, "grad_norm": 0.0, - "learning_rate": 1.9713811410936338e-05, - "loss": 1.0448, + "learning_rate": 1.9330017061698235e-05, + "loss": 1.251, "step": 3672 }, { - "epoch": 0.10408342543002069, + "epoch": 0.14371234055872917, "grad_norm": 0.0, - "learning_rate": 1.9713593371061405e-05, - "loss": 1.1029, + "learning_rate": 1.9329560945860573e-05, + "loss": 1.1429, "step": 3673 }, { - "epoch": 0.10411176287228315, + "epoch": 0.1437514672509586, "grad_norm": 0.0, - "learning_rate": 1.9713375249365268e-05, - "loss": 1.1209, + "learning_rate": 1.932910468020185e-05, + "loss": 1.1658, "step": 3674 }, { - "epoch": 0.10414010031454561, + "epoch": 0.14379059394318805, "grad_norm": 0.0, - "learning_rate": 1.971315704584977e-05, - "loss": 0.8932, + "learning_rate": 1.93286482647294e-05, + "loss": 1.0803, "step": 3675 }, { - "epoch": 0.10416843775680808, + "epoch": 0.1438297206354175, "grad_norm": 0.0, - "learning_rate": 1.9712938760516737e-05, - "loss": 1.032, + "learning_rate": 1.932819169945055e-05, + "loss": 1.2377, "step": 3676 }, { - "epoch": 0.10419677519907053, + "epoch": 0.14386884732764693, "grad_norm": 0.0, - "learning_rate": 1.9712720393368013e-05, - "loss": 1.1116, + "learning_rate": 1.9327734984372622e-05, + "loss": 1.2753, "step": 3677 }, { - "epoch": 0.104225112641333, + "epoch": 0.14390797401987637, "grad_norm": 0.0, - "learning_rate": 1.9712501944405443e-05, - "loss": 1.1773, + "learning_rate": 1.932727811950297e-05, + "loss": 1.2803, "step": 3678 }, { - "epoch": 0.10425345008359546, + "epoch": 0.14394710071210579, "grad_norm": 0.0, - "learning_rate": 1.9712283413630865e-05, - "loss": 1.0518, + "learning_rate": 1.932682110484891e-05, + "loss": 1.1581, "step": 3679 }, { - "epoch": 0.10428178752585791, + "epoch": 0.14398622740433523, "grad_norm": 0.0, - "learning_rate": 1.9712064801046108e-05, - "loss": 1.0656, + "learning_rate": 1.932636394041779e-05, + "loss": 1.1617, "step": 3680 }, { - "epoch": 0.10431012496812038, + "epoch": 0.14402535409656467, "grad_norm": 0.0, - "learning_rate": 1.971184610665303e-05, - "loss": 1.0214, + "learning_rate": 1.9325906626216957e-05, + "loss": 1.3583, "step": 3681 }, { - "epoch": 0.10433846241038285, + "epoch": 0.1440644807887941, "grad_norm": 0.0, - "learning_rate": 1.9711627330453465e-05, - "loss": 1.0892, + "learning_rate": 1.9325449162253746e-05, + "loss": 1.1524, "step": 3682 }, { - "epoch": 0.1043667998526453, + "epoch": 0.14410360748102355, "grad_norm": 0.0, - "learning_rate": 1.9711408472449256e-05, - "loss": 1.0183, + "learning_rate": 1.932499154853551e-05, + "loss": 1.2338, "step": 3683 }, { - "epoch": 0.10439513729490776, + "epoch": 0.144142734173253, "grad_norm": 0.0, - "learning_rate": 1.9711189532642244e-05, - "loss": 0.9626, + "learning_rate": 1.932453378506959e-05, + "loss": 1.0989, "step": 3684 }, { - "epoch": 0.10442347473717023, + "epoch": 0.14418186086548243, "grad_norm": 0.0, - "learning_rate": 1.9710970511034282e-05, - "loss": 1.2065, + "learning_rate": 1.9324075871863347e-05, + "loss": 1.1981, "step": 3685 }, { - "epoch": 0.10445181217943268, + "epoch": 0.14422098755771187, "grad_norm": 0.0, - "learning_rate": 1.9710751407627203e-05, - "loss": 1.0839, + "learning_rate": 1.9323617808924124e-05, + "loss": 1.1583, "step": 3686 }, { - "epoch": 0.10448014962169515, + "epoch": 0.1442601142499413, "grad_norm": 0.0, - "learning_rate": 1.9710532222422864e-05, - "loss": 1.0734, + "learning_rate": 1.9323159596259288e-05, + "loss": 1.2072, "step": 3687 }, { - "epoch": 0.10450848706395761, + "epoch": 0.14429924094217075, "grad_norm": 0.0, - "learning_rate": 1.9710312955423104e-05, - "loss": 1.1335, + "learning_rate": 1.9322701233876188e-05, + "loss": 1.1793, "step": 3688 }, { - "epoch": 0.10453682450622007, + "epoch": 0.1443383676344002, "grad_norm": 0.0, - "learning_rate": 1.9710093606629774e-05, - "loss": 1.0705, + "learning_rate": 1.9322242721782186e-05, + "loss": 1.2598, "step": 3689 }, { - "epoch": 0.10456516194848253, + "epoch": 0.14437749432662964, "grad_norm": 0.0, - "learning_rate": 1.9709874176044717e-05, - "loss": 1.0575, + "learning_rate": 1.9321784059984652e-05, + "loss": 1.243, "step": 3690 }, { - "epoch": 0.10459349939074498, + "epoch": 0.14441662101885908, "grad_norm": 0.0, - "learning_rate": 1.9709654663669785e-05, - "loss": 1.1032, + "learning_rate": 1.9321325248490945e-05, + "loss": 1.0851, "step": 3691 }, { - "epoch": 0.10462183683300745, + "epoch": 0.14445574771108852, "grad_norm": 0.0, - "learning_rate": 1.970943506950683e-05, - "loss": 1.0441, + "learning_rate": 1.9320866287308433e-05, + "loss": 1.3304, "step": 3692 }, { - "epoch": 0.10465017427526992, + "epoch": 0.14449487440331793, "grad_norm": 0.0, - "learning_rate": 1.9709215393557693e-05, - "loss": 1.006, + "learning_rate": 1.932040717644449e-05, + "loss": 1.1573, "step": 3693 }, { - "epoch": 0.10467851171753237, + "epoch": 0.14453400109554737, "grad_norm": 0.0, - "learning_rate": 1.970899563582423e-05, - "loss": 0.9965, + "learning_rate": 1.9319947915906487e-05, + "loss": 1.1022, "step": 3694 }, { - "epoch": 0.10470684915979483, + "epoch": 0.1445731277877768, "grad_norm": 0.0, - "learning_rate": 1.9708775796308293e-05, - "loss": 1.0967, + "learning_rate": 1.9319488505701794e-05, + "loss": 1.2855, "step": 3695 }, { - "epoch": 0.1047351866020573, + "epoch": 0.14461225448000625, "grad_norm": 0.0, - "learning_rate": 1.970855587501173e-05, - "loss": 0.968, + "learning_rate": 1.93190289458378e-05, + "loss": 1.3092, "step": 3696 }, { - "epoch": 0.10476352404431975, + "epoch": 0.1446513811722357, "grad_norm": 0.0, - "learning_rate": 1.97083358719364e-05, - "loss": 1.0287, + "learning_rate": 1.9318569236321877e-05, + "loss": 1.2092, "step": 3697 }, { - "epoch": 0.10479186148658222, + "epoch": 0.14469050786446513, "grad_norm": 0.0, - "learning_rate": 1.970811578708415e-05, - "loss": 1.0994, + "learning_rate": 1.9318109377161408e-05, + "loss": 1.1852, "step": 3698 }, { - "epoch": 0.10482019892884469, + "epoch": 0.14472963455669458, "grad_norm": 0.0, - "learning_rate": 1.9707895620456832e-05, - "loss": 1.1656, + "learning_rate": 1.931764936836378e-05, + "loss": 1.1559, "step": 3699 }, { - "epoch": 0.10484853637110714, + "epoch": 0.14476876124892402, "grad_norm": 0.0, - "learning_rate": 1.9707675372056308e-05, - "loss": 1.0966, + "learning_rate": 1.931718920993638e-05, + "loss": 1.2532, "step": 3700 }, { - "epoch": 0.1048768738133696, + "epoch": 0.14480788794115346, "grad_norm": 0.0, - "learning_rate": 1.9707455041884428e-05, - "loss": 1.0413, + "learning_rate": 1.931672890188659e-05, + "loss": 1.319, "step": 3701 }, { - "epoch": 0.10490521125563207, + "epoch": 0.1448470146333829, "grad_norm": 0.0, - "learning_rate": 1.9707234629943048e-05, - "loss": 1.0631, + "learning_rate": 1.9316268444221814e-05, + "loss": 1.2281, "step": 3702 }, { - "epoch": 0.10493354869789452, + "epoch": 0.14488614132561234, "grad_norm": 0.0, - "learning_rate": 1.970701413623403e-05, - "loss": 1.0288, + "learning_rate": 1.931580783694944e-05, + "loss": 1.3149, "step": 3703 }, { - "epoch": 0.10496188614015699, + "epoch": 0.14492526801784178, "grad_norm": 0.0, - "learning_rate": 1.9706793560759223e-05, - "loss": 0.9742, + "learning_rate": 1.9315347080076865e-05, + "loss": 1.1848, "step": 3704 }, { - "epoch": 0.10499022358241945, + "epoch": 0.14496439471007122, "grad_norm": 0.0, - "learning_rate": 1.9706572903520492e-05, - "loss": 1.113, + "learning_rate": 1.9314886173611487e-05, + "loss": 1.2897, "step": 3705 }, { - "epoch": 0.1050185610246819, + "epoch": 0.14500352140230066, "grad_norm": 0.0, - "learning_rate": 1.9706352164519694e-05, - "loss": 1.0184, + "learning_rate": 1.931442511756071e-05, + "loss": 1.247, "step": 3706 }, { - "epoch": 0.10504689846694437, + "epoch": 0.1450426480945301, "grad_norm": 0.0, - "learning_rate": 1.9706131343758685e-05, - "loss": 0.962, + "learning_rate": 1.9313963911931938e-05, + "loss": 1.1266, "step": 3707 }, { - "epoch": 0.10507523590920684, + "epoch": 0.14508177478675952, "grad_norm": 0.0, - "learning_rate": 1.9705910441239328e-05, - "loss": 1.0219, + "learning_rate": 1.9313502556732577e-05, + "loss": 1.2671, "step": 3708 }, { - "epoch": 0.10510357335146929, + "epoch": 0.14512090147898896, "grad_norm": 0.0, - "learning_rate": 1.9705689456963484e-05, - "loss": 1.0188, + "learning_rate": 1.9313041051970037e-05, + "loss": 1.0233, "step": 3709 }, { - "epoch": 0.10513191079373176, + "epoch": 0.1451600281712184, "grad_norm": 0.0, - "learning_rate": 1.9705468390933012e-05, - "loss": 1.013, + "learning_rate": 1.9312579397651724e-05, + "loss": 1.244, "step": 3710 }, { - "epoch": 0.10516024823599422, + "epoch": 0.14519915486344784, "grad_norm": 0.0, - "learning_rate": 1.9705247243149777e-05, - "loss": 1.0734, + "learning_rate": 1.931211759378506e-05, + "loss": 1.1212, "step": 3711 }, { - "epoch": 0.10518858567825667, + "epoch": 0.14523828155567728, "grad_norm": 0.0, - "learning_rate": 1.970502601361564e-05, - "loss": 1.0881, + "learning_rate": 1.931165564037745e-05, + "loss": 1.2619, "step": 3712 }, { - "epoch": 0.10521692312051914, + "epoch": 0.14527740824790672, "grad_norm": 0.0, - "learning_rate": 1.9704804702332465e-05, - "loss": 1.1523, + "learning_rate": 1.931119353743632e-05, + "loss": 1.2358, "step": 3713 }, { - "epoch": 0.1052452605627816, + "epoch": 0.14531653494013616, "grad_norm": 0.0, - "learning_rate": 1.9704583309302115e-05, - "loss": 1.0443, + "learning_rate": 1.9310731284969093e-05, + "loss": 1.2182, "step": 3714 }, { - "epoch": 0.10527359800504406, + "epoch": 0.1453556616323656, "grad_norm": 0.0, - "learning_rate": 1.970436183452646e-05, - "loss": 1.1552, + "learning_rate": 1.9310268882983187e-05, + "loss": 1.2907, "step": 3715 }, { - "epoch": 0.10530193544730652, + "epoch": 0.14539478832459504, "grad_norm": 0.0, - "learning_rate": 1.970414027800736e-05, - "loss": 1.0198, + "learning_rate": 1.9309806331486028e-05, + "loss": 1.1672, "step": 3716 }, { - "epoch": 0.10533027288956899, + "epoch": 0.14543391501682448, "grad_norm": 0.0, - "learning_rate": 1.970391863974668e-05, - "loss": 1.1173, + "learning_rate": 1.9309343630485043e-05, + "loss": 1.0732, "step": 3717 }, { - "epoch": 0.10535861033183144, + "epoch": 0.14547304170905392, "grad_norm": 0.0, - "learning_rate": 1.9703696919746292e-05, - "loss": 1.0869, + "learning_rate": 1.930888077998767e-05, + "loss": 1.1106, "step": 3718 }, { - "epoch": 0.10538694777409391, + "epoch": 0.14551216840128336, "grad_norm": 0.0, - "learning_rate": 1.970347511800806e-05, - "loss": 1.0507, + "learning_rate": 1.9308417780001334e-05, + "loss": 1.2704, "step": 3719 }, { - "epoch": 0.10541528521635637, + "epoch": 0.1455512950935128, "grad_norm": 0.0, - "learning_rate": 1.9703253234533858e-05, - "loss": 1.0537, + "learning_rate": 1.9307954630533474e-05, + "loss": 1.1747, "step": 3720 }, { - "epoch": 0.10544362265861883, + "epoch": 0.14559042178574225, "grad_norm": 0.0, - "learning_rate": 1.9703031269325546e-05, - "loss": 0.9951, + "learning_rate": 1.930749133159152e-05, + "loss": 1.0312, "step": 3721 }, { - "epoch": 0.10547196010088129, + "epoch": 0.14562954847797166, "grad_norm": 0.0, - "learning_rate": 1.9702809222385002e-05, - "loss": 1.0143, + "learning_rate": 1.930702788318292e-05, + "loss": 1.2213, "step": 3722 }, { - "epoch": 0.10550029754314376, + "epoch": 0.1456686751702011, "grad_norm": 0.0, - "learning_rate": 1.9702587093714093e-05, - "loss": 1.2041, + "learning_rate": 1.930656428531512e-05, + "loss": 1.2408, "step": 3723 }, { - "epoch": 0.10552863498540621, + "epoch": 0.14570780186243054, "grad_norm": 0.0, - "learning_rate": 1.9702364883314687e-05, - "loss": 1.0989, + "learning_rate": 1.930610053799556e-05, + "loss": 1.1646, "step": 3724 }, { - "epoch": 0.10555697242766868, + "epoch": 0.14574692855465998, "grad_norm": 0.0, - "learning_rate": 1.970214259118866e-05, - "loss": 1.1199, + "learning_rate": 1.930563664123168e-05, + "loss": 1.2969, "step": 3725 }, { - "epoch": 0.10558530986993114, + "epoch": 0.14578605524688942, "grad_norm": 0.0, - "learning_rate": 1.9701920217337883e-05, - "loss": 1.0898, + "learning_rate": 1.9305172595030938e-05, + "loss": 1.1553, "step": 3726 }, { - "epoch": 0.1056136473121936, + "epoch": 0.14582518193911886, "grad_norm": 0.0, - "learning_rate": 1.9701697761764227e-05, - "loss": 1.1368, + "learning_rate": 1.9304708399400785e-05, + "loss": 1.2579, "step": 3727 }, { - "epoch": 0.10564198475445606, + "epoch": 0.1458643086313483, "grad_norm": 0.0, - "learning_rate": 1.970147522446957e-05, - "loss": 0.9589, + "learning_rate": 1.9304244054348674e-05, + "loss": 1.2567, "step": 3728 }, { - "epoch": 0.10567032219671853, + "epoch": 0.14590343532357775, "grad_norm": 0.0, - "learning_rate": 1.970125260545579e-05, - "loss": 1.1004, + "learning_rate": 1.9303779559882065e-05, + "loss": 1.1833, "step": 3729 }, { - "epoch": 0.10569865963898098, + "epoch": 0.1459425620158072, "grad_norm": 0.0, - "learning_rate": 1.970102990472475e-05, - "loss": 1.129, + "learning_rate": 1.9303314916008413e-05, + "loss": 1.1748, "step": 3730 }, { - "epoch": 0.10572699708124345, + "epoch": 0.14598168870803663, "grad_norm": 0.0, - "learning_rate": 1.9700807122278336e-05, - "loss": 1.0615, + "learning_rate": 1.930285012273518e-05, + "loss": 1.2799, "step": 3731 }, { - "epoch": 0.10575533452350591, + "epoch": 0.14602081540026607, "grad_norm": 0.0, - "learning_rate": 1.970058425811842e-05, - "loss": 1.031, + "learning_rate": 1.9302385180069832e-05, + "loss": 1.1276, "step": 3732 }, { - "epoch": 0.10578367196576836, + "epoch": 0.1460599420924955, "grad_norm": 0.0, - "learning_rate": 1.9700361312246877e-05, - "loss": 1.0915, + "learning_rate": 1.9301920088019836e-05, + "loss": 1.2288, "step": 3733 }, { - "epoch": 0.10581200940803083, + "epoch": 0.14609906878472495, "grad_norm": 0.0, - "learning_rate": 1.9700138284665593e-05, - "loss": 0.9732, + "learning_rate": 1.930145484659266e-05, + "loss": 1.1057, "step": 3734 }, { - "epoch": 0.1058403468502933, + "epoch": 0.1461381954769544, "grad_norm": 0.0, - "learning_rate": 1.9699915175376442e-05, - "loss": 1.1599, + "learning_rate": 1.930098945579577e-05, + "loss": 1.2768, "step": 3735 }, { - "epoch": 0.10586868429255575, + "epoch": 0.1461773221691838, "grad_norm": 0.0, - "learning_rate": 1.9699691984381304e-05, - "loss": 1.0504, + "learning_rate": 1.9300523915636645e-05, + "loss": 1.2765, "step": 3736 }, { - "epoch": 0.10589702173481821, + "epoch": 0.14621644886141325, "grad_norm": 0.0, - "learning_rate": 1.9699468711682055e-05, - "loss": 1.1015, + "learning_rate": 1.9300058226122764e-05, + "loss": 1.2313, "step": 3737 }, { - "epoch": 0.10592535917708068, + "epoch": 0.14625557555364269, "grad_norm": 0.0, - "learning_rate": 1.9699245357280583e-05, - "loss": 1.012, + "learning_rate": 1.92995923872616e-05, + "loss": 1.261, "step": 3738 }, { - "epoch": 0.10595369661934313, + "epoch": 0.14629470224587213, "grad_norm": 0.0, - "learning_rate": 1.9699021921178762e-05, - "loss": 0.9917, + "learning_rate": 1.9299126399060634e-05, + "loss": 1.1788, "step": 3739 }, { - "epoch": 0.1059820340616056, + "epoch": 0.14633382893810157, "grad_norm": 0.0, - "learning_rate": 1.9698798403378482e-05, - "loss": 1.1673, + "learning_rate": 1.929866026152735e-05, + "loss": 1.2869, "step": 3740 }, { - "epoch": 0.10601037150386806, + "epoch": 0.146372955630331, "grad_norm": 0.0, - "learning_rate": 1.969857480388162e-05, - "loss": 1.1025, + "learning_rate": 1.9298193974669237e-05, + "loss": 1.1149, "step": 3741 }, { - "epoch": 0.10603870894613052, + "epoch": 0.14641208232256045, "grad_norm": 0.0, - "learning_rate": 1.969835112269006e-05, - "loss": 0.9856, + "learning_rate": 1.9297727538493777e-05, + "loss": 1.0993, "step": 3742 }, { - "epoch": 0.10606704638839298, + "epoch": 0.1464512090147899, "grad_norm": 0.0, - "learning_rate": 1.969812735980569e-05, - "loss": 0.9996, + "learning_rate": 1.9297260953008464e-05, + "loss": 1.115, "step": 3743 }, { - "epoch": 0.10609538383065545, + "epoch": 0.14649033570701933, "grad_norm": 0.0, - "learning_rate": 1.9697903515230387e-05, - "loss": 0.9551, + "learning_rate": 1.929679421822079e-05, + "loss": 1.2657, "step": 3744 }, { - "epoch": 0.1061237212729179, + "epoch": 0.14652946239924877, "grad_norm": 0.0, - "learning_rate": 1.9697679588966042e-05, - "loss": 1.0164, + "learning_rate": 1.9296327334138252e-05, + "loss": 1.1631, "step": 3745 }, { - "epoch": 0.10615205871518037, + "epoch": 0.1465685890914782, "grad_norm": 0.0, - "learning_rate": 1.969745558101454e-05, - "loss": 1.0359, + "learning_rate": 1.9295860300768345e-05, + "loss": 1.1642, "step": 3746 }, { - "epoch": 0.10618039615744283, + "epoch": 0.14660771578370765, "grad_norm": 0.0, - "learning_rate": 1.9697231491377775e-05, - "loss": 1.0122, + "learning_rate": 1.929539311811857e-05, + "loss": 1.0728, "step": 3747 }, { - "epoch": 0.10620873359970529, + "epoch": 0.1466468424759371, "grad_norm": 0.0, - "learning_rate": 1.9697007320057624e-05, - "loss": 0.9777, + "learning_rate": 1.929492578619643e-05, + "loss": 1.1979, "step": 3748 }, { - "epoch": 0.10623707104196775, + "epoch": 0.14668596916816654, "grad_norm": 0.0, - "learning_rate": 1.969678306705598e-05, - "loss": 1.0326, + "learning_rate": 1.929445830500943e-05, + "loss": 1.177, "step": 3749 }, { - "epoch": 0.10626540848423022, + "epoch": 0.14672509586039595, "grad_norm": 0.0, - "learning_rate": 1.969655873237473e-05, - "loss": 1.0152, + "learning_rate": 1.9293990674565074e-05, + "loss": 1.3344, "step": 3750 }, { - "epoch": 0.10629374592649267, + "epoch": 0.1467642225526254, "grad_norm": 0.0, - "learning_rate": 1.9696334316015768e-05, - "loss": 1.127, + "learning_rate": 1.9293522894870875e-05, + "loss": 1.1491, "step": 3751 }, { - "epoch": 0.10632208336875514, + "epoch": 0.14680334924485483, "grad_norm": 0.0, - "learning_rate": 1.9696109817980978e-05, - "loss": 1.0287, + "learning_rate": 1.9293054965934344e-05, + "loss": 1.0909, "step": 3752 }, { - "epoch": 0.1063504208110176, + "epoch": 0.14684247593708427, "grad_norm": 0.0, - "learning_rate": 1.9695885238272256e-05, - "loss": 0.986, + "learning_rate": 1.929258688776299e-05, + "loss": 1.1918, "step": 3753 }, { - "epoch": 0.10637875825328005, + "epoch": 0.1468816026293137, "grad_norm": 0.0, - "learning_rate": 1.969566057689149e-05, - "loss": 1.0052, + "learning_rate": 1.9292118660364342e-05, + "loss": 1.2003, "step": 3754 }, { - "epoch": 0.10640709569554252, + "epoch": 0.14692072932154315, "grad_norm": 0.0, - "learning_rate": 1.969543583384058e-05, - "loss": 1.141, + "learning_rate": 1.929165028374591e-05, + "loss": 1.1227, "step": 3755 }, { - "epoch": 0.10643543313780499, + "epoch": 0.1469598560137726, "grad_norm": 0.0, - "learning_rate": 1.9695211009121407e-05, - "loss": 1.0683, + "learning_rate": 1.9291181757915216e-05, + "loss": 1.0366, "step": 3756 }, { - "epoch": 0.10646377058006744, + "epoch": 0.14699898270600203, "grad_norm": 0.0, - "learning_rate": 1.9694986102735876e-05, - "loss": 0.9666, + "learning_rate": 1.9290713082879786e-05, + "loss": 1.2307, "step": 3757 }, { - "epoch": 0.1064921080223299, + "epoch": 0.14703810939823148, "grad_norm": 0.0, - "learning_rate": 1.9694761114685876e-05, - "loss": 1.0638, + "learning_rate": 1.9290244258647146e-05, + "loss": 1.2132, "step": 3758 }, { - "epoch": 0.10652044546459237, + "epoch": 0.14707723609046092, "grad_norm": 0.0, - "learning_rate": 1.9694536044973303e-05, - "loss": 1.0857, + "learning_rate": 1.9289775285224824e-05, + "loss": 1.2889, "step": 3759 }, { - "epoch": 0.10654878290685482, + "epoch": 0.14711636278269036, "grad_norm": 0.0, - "learning_rate": 1.9694310893600053e-05, - "loss": 1.011, + "learning_rate": 1.9289306162620353e-05, + "loss": 1.1815, "step": 3760 }, { - "epoch": 0.10657712034911729, + "epoch": 0.1471554894749198, "grad_norm": 0.0, - "learning_rate": 1.9694085660568023e-05, - "loss": 1.1065, + "learning_rate": 1.9288836890841263e-05, + "loss": 1.1805, "step": 3761 }, { - "epoch": 0.10660545779137975, + "epoch": 0.14719461616714924, "grad_norm": 0.0, - "learning_rate": 1.969386034587911e-05, - "loss": 1.0303, + "learning_rate": 1.9288367469895095e-05, + "loss": 1.2657, "step": 3762 }, { - "epoch": 0.1066337952336422, + "epoch": 0.14723374285937868, "grad_norm": 0.0, - "learning_rate": 1.969363494953521e-05, - "loss": 1.0221, + "learning_rate": 1.9287897899789382e-05, + "loss": 1.1929, "step": 3763 }, { - "epoch": 0.10666213267590467, + "epoch": 0.1472728695516081, "grad_norm": 0.0, - "learning_rate": 1.9693409471538222e-05, - "loss": 0.9822, + "learning_rate": 1.928742818053167e-05, + "loss": 1.1764, "step": 3764 }, { - "epoch": 0.10669047011816714, + "epoch": 0.14731199624383753, "grad_norm": 0.0, - "learning_rate": 1.9693183911890047e-05, - "loss": 1.1811, + "learning_rate": 1.9286958312129495e-05, + "loss": 1.3082, "step": 3765 }, { - "epoch": 0.10671880756042959, + "epoch": 0.14735112293606697, "grad_norm": 0.0, - "learning_rate": 1.9692958270592587e-05, - "loss": 1.1432, + "learning_rate": 1.928648829459041e-05, + "loss": 1.0905, "step": 3766 }, { - "epoch": 0.10674714500269206, + "epoch": 0.14739024962829642, "grad_norm": 0.0, - "learning_rate": 1.969273254764774e-05, - "loss": 1.0012, + "learning_rate": 1.928601812792196e-05, + "loss": 1.2421, "step": 3767 }, { - "epoch": 0.10677548244495452, + "epoch": 0.14742937632052586, "grad_norm": 0.0, - "learning_rate": 1.9692506743057405e-05, - "loss": 1.1021, + "learning_rate": 1.9285547812131692e-05, + "loss": 1.1182, "step": 3768 }, { - "epoch": 0.10680381988721697, + "epoch": 0.1474685030127553, "grad_norm": 0.0, - "learning_rate": 1.9692280856823486e-05, - "loss": 1.0934, + "learning_rate": 1.9285077347227164e-05, + "loss": 1.238, "step": 3769 }, { - "epoch": 0.10683215732947944, + "epoch": 0.14750762970498474, "grad_norm": 0.0, - "learning_rate": 1.969205488894789e-05, - "loss": 1.069, + "learning_rate": 1.9284606733215925e-05, + "loss": 1.2739, "step": 3770 }, { - "epoch": 0.1068604947717419, + "epoch": 0.14754675639721418, "grad_norm": 0.0, - "learning_rate": 1.969182883943251e-05, - "loss": 1.1023, + "learning_rate": 1.928413597010554e-05, + "loss": 1.1909, "step": 3771 }, { - "epoch": 0.10688883221400436, + "epoch": 0.14758588308944362, "grad_norm": 0.0, - "learning_rate": 1.9691602708279266e-05, - "loss": 1.0707, + "learning_rate": 1.9283665057903566e-05, + "loss": 1.2244, "step": 3772 }, { - "epoch": 0.10691716965626683, + "epoch": 0.14762500978167306, "grad_norm": 0.0, - "learning_rate": 1.9691376495490046e-05, - "loss": 1.1901, + "learning_rate": 1.928319399661756e-05, + "loss": 1.1545, "step": 3773 }, { - "epoch": 0.10694550709852929, + "epoch": 0.1476641364739025, "grad_norm": 0.0, - "learning_rate": 1.9691150201066765e-05, - "loss": 1.0557, + "learning_rate": 1.9282722786255092e-05, + "loss": 1.1113, "step": 3774 }, { - "epoch": 0.10697384454079174, + "epoch": 0.14770326316613194, "grad_norm": 0.0, - "learning_rate": 1.969092382501133e-05, - "loss": 0.9284, + "learning_rate": 1.928225142682373e-05, + "loss": 1.1992, "step": 3775 }, { - "epoch": 0.10700218198305421, + "epoch": 0.14774238985836138, "grad_norm": 0.0, - "learning_rate": 1.9690697367325642e-05, - "loss": 1.0519, + "learning_rate": 1.928177991833104e-05, + "loss": 1.2185, "step": 3776 }, { - "epoch": 0.10703051942531668, + "epoch": 0.14778151655059082, "grad_norm": 0.0, - "learning_rate": 1.969047082801161e-05, - "loss": 1.1081, + "learning_rate": 1.9281308260784595e-05, + "loss": 1.2766, "step": 3777 }, { - "epoch": 0.10705885686757913, + "epoch": 0.14782064324282027, "grad_norm": 0.0, - "learning_rate": 1.9690244207071146e-05, - "loss": 1.1674, + "learning_rate": 1.928083645419197e-05, + "loss": 1.2495, "step": 3778 }, { - "epoch": 0.1070871943098416, + "epoch": 0.14785976993504968, "grad_norm": 0.0, - "learning_rate": 1.969001750450616e-05, - "loss": 1.0648, + "learning_rate": 1.928036449856074e-05, + "loss": 1.1443, "step": 3779 }, { - "epoch": 0.10711553175210406, + "epoch": 0.14789889662727912, "grad_norm": 0.0, - "learning_rate": 1.9689790720318555e-05, - "loss": 0.9986, + "learning_rate": 1.9279892393898486e-05, + "loss": 1.2354, "step": 3780 }, { - "epoch": 0.10714386919436651, + "epoch": 0.14793802331950856, "grad_norm": 0.0, - "learning_rate": 1.9689563854510243e-05, - "loss": 1.0518, + "learning_rate": 1.927942014021279e-05, + "loss": 1.1064, "step": 3781 }, { - "epoch": 0.10717220663662898, + "epoch": 0.147977150011738, "grad_norm": 0.0, - "learning_rate": 1.9689336907083138e-05, - "loss": 1.0427, + "learning_rate": 1.9278947737511233e-05, + "loss": 1.2108, "step": 3782 }, { - "epoch": 0.10720054407889144, + "epoch": 0.14801627670396744, "grad_norm": 0.0, - "learning_rate": 1.968910987803915e-05, - "loss": 1.1095, + "learning_rate": 1.9278475185801404e-05, + "loss": 1.25, "step": 3783 }, { - "epoch": 0.1072288815211539, + "epoch": 0.14805540339619688, "grad_norm": 0.0, - "learning_rate": 1.968888276738019e-05, - "loss": 1.1271, + "learning_rate": 1.9278002485090886e-05, + "loss": 1.2753, "step": 3784 }, { - "epoch": 0.10725721896341636, + "epoch": 0.14809453008842632, "grad_norm": 0.0, - "learning_rate": 1.9688655575108175e-05, - "loss": 1.1136, + "learning_rate": 1.927752963538728e-05, + "loss": 1.1716, "step": 3785 }, { - "epoch": 0.10728555640567883, + "epoch": 0.14813365678065576, "grad_norm": 0.0, - "learning_rate": 1.9688428301225017e-05, - "loss": 1.1278, + "learning_rate": 1.9277056636698168e-05, + "loss": 1.1311, "step": 3786 }, { - "epoch": 0.10731389384794128, + "epoch": 0.1481727834728852, "grad_norm": 0.0, - "learning_rate": 1.9688200945732627e-05, - "loss": 1.0349, + "learning_rate": 1.9276583489031156e-05, + "loss": 1.269, "step": 3787 }, { - "epoch": 0.10734223129020375, + "epoch": 0.14821191016511465, "grad_norm": 0.0, - "learning_rate": 1.9687973508632925e-05, - "loss": 1.0635, + "learning_rate": 1.9276110192393834e-05, + "loss": 1.181, "step": 3788 }, { - "epoch": 0.10737056873246621, + "epoch": 0.1482510368573441, "grad_norm": 0.0, - "learning_rate": 1.9687745989927823e-05, - "loss": 1.0506, + "learning_rate": 1.9275636746793805e-05, + "loss": 1.091, "step": 3789 }, { - "epoch": 0.10739890617472866, + "epoch": 0.14829016354957353, "grad_norm": 0.0, - "learning_rate": 1.9687518389619242e-05, - "loss": 1.0949, + "learning_rate": 1.9275163152238675e-05, + "loss": 1.1265, "step": 3790 }, { - "epoch": 0.10742724361699113, + "epoch": 0.14832929024180297, "grad_norm": 0.0, - "learning_rate": 1.9687290707709092e-05, - "loss": 1.1547, + "learning_rate": 1.9274689408736047e-05, + "loss": 1.2689, "step": 3791 }, { - "epoch": 0.1074555810592536, + "epoch": 0.1483684169340324, "grad_norm": 0.0, - "learning_rate": 1.9687062944199294e-05, - "loss": 1.106, + "learning_rate": 1.9274215516293527e-05, + "loss": 1.1711, "step": 3792 }, { - "epoch": 0.10748391850151605, + "epoch": 0.14840754362626182, "grad_norm": 0.0, - "learning_rate": 1.968683509909177e-05, - "loss": 1.0335, + "learning_rate": 1.927374147491873e-05, + "loss": 1.249, "step": 3793 }, { - "epoch": 0.10751225594377851, + "epoch": 0.14844667031849126, "grad_norm": 0.0, - "learning_rate": 1.968660717238844e-05, - "loss": 1.0708, + "learning_rate": 1.9273267284619262e-05, + "loss": 1.3002, "step": 3794 }, { - "epoch": 0.10754059338604098, + "epoch": 0.1484857970107207, "grad_norm": 0.0, - "learning_rate": 1.9686379164091218e-05, - "loss": 1.1041, + "learning_rate": 1.9272792945402743e-05, + "loss": 1.1331, "step": 3795 }, { - "epoch": 0.10756893082830343, + "epoch": 0.14852492370295015, "grad_norm": 0.0, - "learning_rate": 1.9686151074202028e-05, - "loss": 1.0373, + "learning_rate": 1.9272318457276792e-05, + "loss": 1.1917, "step": 3796 }, { - "epoch": 0.1075972682705659, + "epoch": 0.1485640503951796, "grad_norm": 0.0, - "learning_rate": 1.968592290272279e-05, - "loss": 1.0667, + "learning_rate": 1.9271843820249022e-05, + "loss": 1.0818, "step": 3797 }, { - "epoch": 0.10762560571282836, + "epoch": 0.14860317708740903, "grad_norm": 0.0, - "learning_rate": 1.9685694649655426e-05, - "loss": 1.064, + "learning_rate": 1.9271369034327062e-05, + "loss": 1.2186, "step": 3798 }, { - "epoch": 0.10765394315509082, + "epoch": 0.14864230377963847, "grad_norm": 0.0, - "learning_rate": 1.9685466315001863e-05, - "loss": 1.0619, + "learning_rate": 1.927089409951853e-05, + "loss": 1.0901, "step": 3799 }, { - "epoch": 0.10768228059735328, + "epoch": 0.1486814304718679, "grad_norm": 0.0, - "learning_rate": 1.9685237898764014e-05, - "loss": 1.0029, + "learning_rate": 1.9270419015831056e-05, + "loss": 1.0841, "step": 3800 }, { - "epoch": 0.10771061803961575, + "epoch": 0.14872055716409735, "grad_norm": 0.0, - "learning_rate": 1.9685009400943815e-05, - "loss": 1.1421, + "learning_rate": 1.926994378327227e-05, + "loss": 1.2695, "step": 3801 }, { - "epoch": 0.1077389554818782, + "epoch": 0.1487596838563268, "grad_norm": 0.0, - "learning_rate": 1.9684780821543185e-05, - "loss": 1.2068, + "learning_rate": 1.92694684018498e-05, + "loss": 1.1721, "step": 3802 }, { - "epoch": 0.10776729292414067, + "epoch": 0.14879881054855623, "grad_norm": 0.0, - "learning_rate": 1.9684552160564047e-05, - "loss": 0.9427, + "learning_rate": 1.9268992871571284e-05, + "loss": 1.1616, "step": 3803 }, { - "epoch": 0.10779563036640313, + "epoch": 0.14883793724078567, "grad_norm": 0.0, - "learning_rate": 1.968432341800833e-05, - "loss": 0.9499, + "learning_rate": 1.926851719244436e-05, + "loss": 1.2693, "step": 3804 }, { - "epoch": 0.10782396780866559, + "epoch": 0.1488770639330151, "grad_norm": 0.0, - "learning_rate": 1.9684094593877964e-05, - "loss": 1.1516, + "learning_rate": 1.9268041364476662e-05, + "loss": 1.1821, "step": 3805 }, { - "epoch": 0.10785230525092805, + "epoch": 0.14891619062524455, "grad_norm": 0.0, - "learning_rate": 1.9683865688174873e-05, - "loss": 1.1173, + "learning_rate": 1.9267565387675832e-05, + "loss": 1.0999, "step": 3806 }, { - "epoch": 0.10788064269319052, + "epoch": 0.14895531731747397, "grad_norm": 0.0, - "learning_rate": 1.9683636700900984e-05, - "loss": 1.1164, + "learning_rate": 1.9267089262049517e-05, + "loss": 1.2307, "step": 3807 }, { - "epoch": 0.10790898013545297, + "epoch": 0.1489944440097034, "grad_norm": 0.0, - "learning_rate": 1.9683407632058226e-05, - "loss": 0.9258, + "learning_rate": 1.926661298760536e-05, + "loss": 1.1554, "step": 3808 }, { - "epoch": 0.10793731757771544, + "epoch": 0.14903357070193285, "grad_norm": 0.0, - "learning_rate": 1.968317848164853e-05, - "loss": 1.0388, + "learning_rate": 1.926613656435101e-05, + "loss": 1.2624, "step": 3809 }, { - "epoch": 0.1079656550199779, + "epoch": 0.1490726973941623, "grad_norm": 0.0, - "learning_rate": 1.9682949249673825e-05, - "loss": 1.0627, + "learning_rate": 1.926565999229412e-05, + "loss": 1.1295, "step": 3810 }, { - "epoch": 0.10799399246224035, + "epoch": 0.14911182408639173, "grad_norm": 0.0, - "learning_rate": 1.9682719936136045e-05, - "loss": 1.0964, + "learning_rate": 1.9265183271442336e-05, + "loss": 1.2524, "step": 3811 }, { - "epoch": 0.10802232990450282, + "epoch": 0.14915095077862117, "grad_norm": 0.0, - "learning_rate": 1.9682490541037117e-05, - "loss": 1.014, + "learning_rate": 1.926470640180332e-05, + "loss": 1.2985, "step": 3812 }, { - "epoch": 0.10805066734676529, + "epoch": 0.1491900774708506, "grad_norm": 0.0, - "learning_rate": 1.968226106437898e-05, - "loss": 0.9158, + "learning_rate": 1.9264229383384734e-05, + "loss": 1.0598, "step": 3813 }, { - "epoch": 0.10807900478902774, + "epoch": 0.14922920416308005, "grad_norm": 0.0, - "learning_rate": 1.9682031506163556e-05, - "loss": 1.0662, + "learning_rate": 1.926375221619423e-05, + "loss": 1.2234, "step": 3814 }, { - "epoch": 0.1081073422312902, + "epoch": 0.1492683308553095, "grad_norm": 0.0, - "learning_rate": 1.968180186639279e-05, - "loss": 1.1237, + "learning_rate": 1.926327490023947e-05, + "loss": 1.2546, "step": 3815 }, { - "epoch": 0.10813567967355267, + "epoch": 0.14930745754753894, "grad_norm": 0.0, - "learning_rate": 1.968157214506861e-05, - "loss": 0.9884, + "learning_rate": 1.9262797435528126e-05, + "loss": 1.2014, "step": 3816 }, { - "epoch": 0.10816401711581512, + "epoch": 0.14934658423976838, "grad_norm": 0.0, - "learning_rate": 1.968134234219295e-05, - "loss": 1.0782, + "learning_rate": 1.926231982206786e-05, + "loss": 1.1225, "step": 3817 }, { - "epoch": 0.10819235455807759, + "epoch": 0.14938571093199782, "grad_norm": 0.0, - "learning_rate": 1.9681112457767755e-05, - "loss": 1.0177, + "learning_rate": 1.9261842059866345e-05, + "loss": 1.1714, "step": 3818 }, { - "epoch": 0.10822069200034005, + "epoch": 0.14942483762422726, "grad_norm": 0.0, - "learning_rate": 1.9680882491794953e-05, - "loss": 1.0002, + "learning_rate": 1.926136414893125e-05, + "loss": 1.2579, "step": 3819 }, { - "epoch": 0.1082490294426025, + "epoch": 0.1494639643164567, "grad_norm": 0.0, - "learning_rate": 1.968065244427648e-05, - "loss": 0.9186, + "learning_rate": 1.9260886089270258e-05, + "loss": 1.0762, "step": 3820 }, { - "epoch": 0.10827736688486497, + "epoch": 0.1495030910086861, "grad_norm": 0.0, - "learning_rate": 1.9680422315214278e-05, - "loss": 1.0743, + "learning_rate": 1.9260407880891035e-05, + "loss": 1.2043, "step": 3821 }, { - "epoch": 0.10830570432712744, + "epoch": 0.14954221770091555, "grad_norm": 0.0, - "learning_rate": 1.9680192104610283e-05, - "loss": 0.9785, + "learning_rate": 1.9259929523801266e-05, + "loss": 1.097, "step": 3822 }, { - "epoch": 0.10833404176938989, + "epoch": 0.149581344393145, "grad_norm": 0.0, - "learning_rate": 1.967996181246644e-05, - "loss": 1.1013, + "learning_rate": 1.9259451018008632e-05, + "loss": 1.1255, "step": 3823 }, { - "epoch": 0.10836237921165236, + "epoch": 0.14962047108537443, "grad_norm": 0.0, - "learning_rate": 1.967973143878468e-05, - "loss": 1.0764, + "learning_rate": 1.925897236352082e-05, + "loss": 1.1549, "step": 3824 }, { - "epoch": 0.10839071665391482, + "epoch": 0.14965959777760388, "grad_norm": 0.0, - "learning_rate": 1.9679500983566947e-05, - "loss": 1.0648, + "learning_rate": 1.925849356034551e-05, + "loss": 1.2953, "step": 3825 }, { - "epoch": 0.10841905409617728, + "epoch": 0.14969872446983332, "grad_norm": 0.0, - "learning_rate": 1.9679270446815183e-05, - "loss": 1.0867, + "learning_rate": 1.92580146084904e-05, + "loss": 1.2635, "step": 3826 }, { - "epoch": 0.10844739153843974, + "epoch": 0.14973785116206276, "grad_norm": 0.0, - "learning_rate": 1.967903982853133e-05, - "loss": 1.0099, + "learning_rate": 1.925753550796317e-05, + "loss": 1.2186, "step": 3827 }, { - "epoch": 0.10847572898070221, + "epoch": 0.1497769778542922, "grad_norm": 0.0, - "learning_rate": 1.9678809128717332e-05, - "loss": 1.083, + "learning_rate": 1.9257056258771525e-05, + "loss": 1.1989, "step": 3828 }, { - "epoch": 0.10850406642296466, + "epoch": 0.14981610454652164, "grad_norm": 0.0, - "learning_rate": 1.967857834737513e-05, - "loss": 1.0731, + "learning_rate": 1.925657686092315e-05, + "loss": 1.1406, "step": 3829 }, { - "epoch": 0.10853240386522713, + "epoch": 0.14985523123875108, "grad_norm": 0.0, - "learning_rate": 1.9678347484506667e-05, - "loss": 1.1437, + "learning_rate": 1.925609731442576e-05, + "loss": 1.2759, "step": 3830 }, { - "epoch": 0.10856074130748959, + "epoch": 0.14989435793098052, "grad_norm": 0.0, - "learning_rate": 1.967811654011389e-05, - "loss": 0.9666, + "learning_rate": 1.925561761928704e-05, + "loss": 1.1598, "step": 3831 }, { - "epoch": 0.10858907874975204, + "epoch": 0.14993348462320996, "grad_norm": 0.0, - "learning_rate": 1.9677885514198746e-05, - "loss": 1.0704, + "learning_rate": 1.92551377755147e-05, + "loss": 1.2648, "step": 3832 }, { - "epoch": 0.10861741619201451, + "epoch": 0.1499726113154394, "grad_norm": 0.0, - "learning_rate": 1.9677654406763172e-05, - "loss": 1.1684, + "learning_rate": 1.925465778311644e-05, + "loss": 1.2142, "step": 3833 }, { - "epoch": 0.10864575363427698, + "epoch": 0.15001173800766884, "grad_norm": 0.0, - "learning_rate": 1.9677423217809127e-05, - "loss": 1.0497, + "learning_rate": 1.925417764209998e-05, + "loss": 1.2515, "step": 3834 }, { - "epoch": 0.10867409107653943, + "epoch": 0.15005086469989828, "grad_norm": 0.0, - "learning_rate": 1.9677191947338552e-05, - "loss": 1.0291, + "learning_rate": 1.925369735247302e-05, + "loss": 1.1108, "step": 3835 }, { - "epoch": 0.1087024285188019, + "epoch": 0.1500899913921277, "grad_norm": 0.0, - "learning_rate": 1.9676960595353392e-05, - "loss": 1.1045, + "learning_rate": 1.9253216914243275e-05, + "loss": 1.1712, "step": 3836 }, { - "epoch": 0.10873076596106436, + "epoch": 0.15012911808435714, "grad_norm": 0.0, - "learning_rate": 1.9676729161855603e-05, - "loss": 1.1013, + "learning_rate": 1.925273632741846e-05, + "loss": 1.2311, "step": 3837 }, { - "epoch": 0.10875910340332681, + "epoch": 0.15016824477658658, "grad_norm": 0.0, - "learning_rate": 1.967649764684713e-05, - "loss": 1.0714, + "learning_rate": 1.9252255592006298e-05, + "loss": 1.2065, "step": 3838 }, { - "epoch": 0.10878744084558928, + "epoch": 0.15020737146881602, "grad_norm": 0.0, - "learning_rate": 1.9676266050329926e-05, - "loss": 1.0748, + "learning_rate": 1.92517747080145e-05, + "loss": 1.1738, "step": 3839 }, { - "epoch": 0.10881577828785174, + "epoch": 0.15024649816104546, "grad_norm": 0.0, - "learning_rate": 1.9676034372305938e-05, - "loss": 1.0925, + "learning_rate": 1.92512936754508e-05, + "loss": 1.1854, "step": 3840 }, { - "epoch": 0.1088441157301142, + "epoch": 0.1502856248532749, "grad_norm": 0.0, - "learning_rate": 1.9675802612777117e-05, - "loss": 1.1287, + "learning_rate": 1.925081249432291e-05, + "loss": 1.1276, "step": 3841 }, { - "epoch": 0.10887245317237666, + "epoch": 0.15032475154550434, "grad_norm": 0.0, - "learning_rate": 1.967557077174542e-05, - "loss": 0.9717, + "learning_rate": 1.9250331164638566e-05, + "loss": 1.1388, "step": 3842 }, { - "epoch": 0.10890079061463913, + "epoch": 0.15036387823773378, "grad_norm": 0.0, - "learning_rate": 1.96753388492128e-05, - "loss": 1.1369, + "learning_rate": 1.9249849686405496e-05, + "loss": 1.2111, "step": 3843 }, { - "epoch": 0.10892912805690158, + "epoch": 0.15040300492996322, "grad_norm": 0.0, - "learning_rate": 1.9675106845181205e-05, - "loss": 1.0757, + "learning_rate": 1.924936805963143e-05, + "loss": 1.2115, "step": 3844 }, { - "epoch": 0.10895746549916405, + "epoch": 0.15044213162219267, "grad_norm": 0.0, - "learning_rate": 1.967487475965259e-05, - "loss": 1.1063, + "learning_rate": 1.92488862843241e-05, + "loss": 1.1249, "step": 3845 }, { - "epoch": 0.10898580294142651, + "epoch": 0.1504812583144221, "grad_norm": 0.0, - "learning_rate": 1.9674642592628913e-05, - "loss": 1.0832, + "learning_rate": 1.9248404360491247e-05, + "loss": 1.2242, "step": 3846 }, { - "epoch": 0.10901414038368896, + "epoch": 0.15052038500665155, "grad_norm": 0.0, - "learning_rate": 1.9674410344112132e-05, - "loss": 1.0984, + "learning_rate": 1.924792228814061e-05, + "loss": 1.1333, "step": 3847 }, { - "epoch": 0.10904247782595143, + "epoch": 0.150559511698881, "grad_norm": 0.0, - "learning_rate": 1.9674178014104198e-05, - "loss": 1.0336, + "learning_rate": 1.924744006727993e-05, + "loss": 1.2502, "step": 3848 }, { - "epoch": 0.1090708152682139, + "epoch": 0.15059863839111043, "grad_norm": 0.0, - "learning_rate": 1.9673945602607073e-05, - "loss": 1.0465, + "learning_rate": 1.9246957697916947e-05, + "loss": 1.1507, "step": 3849 }, { - "epoch": 0.10909915271047635, + "epoch": 0.15063776508333984, "grad_norm": 0.0, - "learning_rate": 1.967371310962271e-05, - "loss": 1.1512, + "learning_rate": 1.9246475180059416e-05, + "loss": 1.2394, "step": 3850 }, { - "epoch": 0.10912749015273882, + "epoch": 0.15067689177556928, "grad_norm": 0.0, - "learning_rate": 1.9673480535153067e-05, - "loss": 1.0382, + "learning_rate": 1.9245992513715074e-05, + "loss": 1.034, "step": 3851 }, { - "epoch": 0.10915582759500128, + "epoch": 0.15071601846779872, "grad_norm": 0.0, - "learning_rate": 1.967324787920011e-05, - "loss": 1.0207, + "learning_rate": 1.924550969889168e-05, + "loss": 1.0717, "step": 3852 }, { - "epoch": 0.10918416503726373, + "epoch": 0.15075514516002816, "grad_norm": 0.0, - "learning_rate": 1.967301514176579e-05, - "loss": 0.9619, + "learning_rate": 1.924502673559699e-05, + "loss": 1.2034, "step": 3853 }, { - "epoch": 0.1092125024795262, + "epoch": 0.1507942718522576, "grad_norm": 0.0, - "learning_rate": 1.9672782322852073e-05, - "loss": 1.0927, + "learning_rate": 1.924454362383875e-05, + "loss": 1.2368, "step": 3854 }, { - "epoch": 0.10924083992178867, + "epoch": 0.15083339854448705, "grad_norm": 0.0, - "learning_rate": 1.967254942246092e-05, - "loss": 1.03, + "learning_rate": 1.9244060363624727e-05, + "loss": 1.186, "step": 3855 }, { - "epoch": 0.10926917736405112, + "epoch": 0.1508725252367165, "grad_norm": 0.0, - "learning_rate": 1.967231644059429e-05, - "loss": 0.9898, + "learning_rate": 1.9243576954962676e-05, + "loss": 1.3087, "step": 3856 }, { - "epoch": 0.10929751480631358, + "epoch": 0.15091165192894593, "grad_norm": 0.0, - "learning_rate": 1.9672083377254144e-05, - "loss": 1.0674, + "learning_rate": 1.924309339786036e-05, + "loss": 1.1306, "step": 3857 }, { - "epoch": 0.10932585224857605, + "epoch": 0.15095077862117537, "grad_norm": 0.0, - "learning_rate": 1.9671850232442453e-05, - "loss": 1.0373, + "learning_rate": 1.924260969232555e-05, + "loss": 1.2231, "step": 3858 }, { - "epoch": 0.1093541896908385, + "epoch": 0.1509899053134048, "grad_norm": 0.0, - "learning_rate": 1.967161700616117e-05, - "loss": 0.9546, + "learning_rate": 1.924212583836601e-05, + "loss": 1.2237, "step": 3859 }, { - "epoch": 0.10938252713310097, + "epoch": 0.15102903200563425, "grad_norm": 0.0, - "learning_rate": 1.9671383698412275e-05, - "loss": 1.1603, + "learning_rate": 1.9241641835989506e-05, + "loss": 1.2693, "step": 3860 }, { - "epoch": 0.10941086457536343, + "epoch": 0.1510681586978637, "grad_norm": 0.0, - "learning_rate": 1.9671150309197714e-05, - "loss": 0.9996, + "learning_rate": 1.9241157685203817e-05, + "loss": 1.2078, "step": 3861 }, { - "epoch": 0.10943920201762589, + "epoch": 0.15110728539009313, "grad_norm": 0.0, - "learning_rate": 1.967091683851947e-05, - "loss": 0.9416, + "learning_rate": 1.924067338601672e-05, + "loss": 1.2562, "step": 3862 }, { - "epoch": 0.10946753945988835, + "epoch": 0.15114641208232257, "grad_norm": 0.0, - "learning_rate": 1.9670683286379496e-05, - "loss": 1.0798, + "learning_rate": 1.924018893843598e-05, + "loss": 1.2223, "step": 3863 }, { - "epoch": 0.10949587690215082, + "epoch": 0.151185538774552, "grad_norm": 0.0, - "learning_rate": 1.967044965277977e-05, - "loss": 0.9496, + "learning_rate": 1.923970434246939e-05, + "loss": 1.2868, "step": 3864 }, { - "epoch": 0.10952421434441327, + "epoch": 0.15122466546678143, "grad_norm": 0.0, - "learning_rate": 1.9670215937722256e-05, - "loss": 0.9979, + "learning_rate": 1.9239219598124722e-05, + "loss": 1.2565, "step": 3865 }, { - "epoch": 0.10955255178667574, + "epoch": 0.15126379215901087, "grad_norm": 0.0, - "learning_rate": 1.9669982141208917e-05, - "loss": 1.0492, + "learning_rate": 1.9238734705409766e-05, + "loss": 1.0752, "step": 3866 }, { - "epoch": 0.1095808892289382, + "epoch": 0.1513029188512403, "grad_norm": 0.0, - "learning_rate": 1.9669748263241733e-05, - "loss": 1.1324, + "learning_rate": 1.9238249664332307e-05, + "loss": 1.2852, "step": 3867 }, { - "epoch": 0.10960922667120065, + "epoch": 0.15134204554346975, "grad_norm": 0.0, - "learning_rate": 1.9669514303822665e-05, - "loss": 1.0161, + "learning_rate": 1.9237764474900137e-05, + "loss": 1.1938, "step": 3868 }, { - "epoch": 0.10963756411346312, + "epoch": 0.1513811722356992, "grad_norm": 0.0, - "learning_rate": 1.966928026295369e-05, - "loss": 1.1542, + "learning_rate": 1.9237279137121043e-05, + "loss": 1.2128, "step": 3869 }, { - "epoch": 0.10966590155572559, + "epoch": 0.15142029892792863, "grad_norm": 0.0, - "learning_rate": 1.9669046140636773e-05, - "loss": 1.1465, + "learning_rate": 1.923679365100282e-05, + "loss": 1.0651, "step": 3870 }, { - "epoch": 0.10969423899798804, + "epoch": 0.15145942562015807, "grad_norm": 0.0, - "learning_rate": 1.9668811936873894e-05, - "loss": 1.0824, + "learning_rate": 1.9236308016553266e-05, + "loss": 1.104, "step": 3871 }, { - "epoch": 0.1097225764402505, + "epoch": 0.1514985523123875, "grad_norm": 0.0, - "learning_rate": 1.966857765166702e-05, - "loss": 1.0033, + "learning_rate": 1.9235822233780182e-05, + "loss": 1.1729, "step": 3872 }, { - "epoch": 0.10975091388251297, + "epoch": 0.15153767900461695, "grad_norm": 0.0, - "learning_rate": 1.9668343285018127e-05, - "loss": 1.0851, + "learning_rate": 1.923533630269136e-05, + "loss": 1.2643, "step": 3873 }, { - "epoch": 0.10977925132477542, + "epoch": 0.1515768056968464, "grad_norm": 0.0, - "learning_rate": 1.9668108836929187e-05, - "loss": 1.071, + "learning_rate": 1.9234850223294613e-05, + "loss": 1.1589, "step": 3874 }, { - "epoch": 0.10980758876703789, + "epoch": 0.15161593238907584, "grad_norm": 0.0, - "learning_rate": 1.9667874307402176e-05, - "loss": 1.2281, + "learning_rate": 1.9234363995597748e-05, + "loss": 1.1191, "step": 3875 }, { - "epoch": 0.10983592620930036, + "epoch": 0.15165505908130528, "grad_norm": 0.0, - "learning_rate": 1.966763969643907e-05, - "loss": 1.0918, + "learning_rate": 1.9233877619608564e-05, + "loss": 1.0532, "step": 3876 }, { - "epoch": 0.10986426365156281, + "epoch": 0.15169418577353472, "grad_norm": 0.0, - "learning_rate": 1.9667405004041846e-05, - "loss": 1.1279, + "learning_rate": 1.9233391095334876e-05, + "loss": 1.2513, "step": 3877 }, { - "epoch": 0.10989260109382527, + "epoch": 0.15173331246576413, "grad_norm": 0.0, - "learning_rate": 1.9667170230212478e-05, - "loss": 1.0705, + "learning_rate": 1.9232904422784496e-05, + "loss": 1.2178, "step": 3878 }, { - "epoch": 0.10992093853608774, + "epoch": 0.15177243915799357, "grad_norm": 0.0, - "learning_rate": 1.9666935374952946e-05, - "loss": 0.917, + "learning_rate": 1.923241760196524e-05, + "loss": 1.2234, "step": 3879 }, { - "epoch": 0.10994927597835019, + "epoch": 0.151811565850223, "grad_norm": 0.0, - "learning_rate": 1.9666700438265227e-05, - "loss": 1.06, + "learning_rate": 1.923193063288493e-05, + "loss": 1.1938, "step": 3880 }, { - "epoch": 0.10997761342061266, + "epoch": 0.15185069254245245, "grad_norm": 0.0, - "learning_rate": 1.9666465420151303e-05, - "loss": 1.075, + "learning_rate": 1.9231443515551382e-05, + "loss": 1.1136, "step": 3881 }, { - "epoch": 0.11000595086287512, + "epoch": 0.1518898192346819, "grad_norm": 0.0, - "learning_rate": 1.9666230320613148e-05, - "loss": 1.0129, + "learning_rate": 1.9230956249972415e-05, + "loss": 1.1674, "step": 3882 }, { - "epoch": 0.11003428830513758, + "epoch": 0.15192894592691134, "grad_norm": 0.0, - "learning_rate": 1.966599513965275e-05, - "loss": 0.9809, + "learning_rate": 1.923046883615586e-05, + "loss": 1.0928, "step": 3883 }, { - "epoch": 0.11006262574740004, + "epoch": 0.15196807261914078, "grad_norm": 0.0, - "learning_rate": 1.966575987727208e-05, - "loss": 1.0945, + "learning_rate": 1.922998127410954e-05, + "loss": 1.1082, "step": 3884 }, { - "epoch": 0.11009096318966251, + "epoch": 0.15200719931137022, "grad_norm": 0.0, - "learning_rate": 1.966552453347313e-05, - "loss": 1.1167, + "learning_rate": 1.922949356384129e-05, + "loss": 1.2875, "step": 3885 }, { - "epoch": 0.11011930063192496, + "epoch": 0.15204632600359966, "grad_norm": 0.0, - "learning_rate": 1.966528910825787e-05, - "loss": 1.0105, + "learning_rate": 1.9229005705358937e-05, + "loss": 1.1799, "step": 3886 }, { - "epoch": 0.11014763807418743, + "epoch": 0.1520854526958291, "grad_norm": 0.0, - "learning_rate": 1.96650536016283e-05, - "loss": 0.9327, + "learning_rate": 1.9228517698670316e-05, + "loss": 1.1807, "step": 3887 }, { - "epoch": 0.11017597551644988, + "epoch": 0.15212457938805854, "grad_norm": 0.0, - "learning_rate": 1.966481801358639e-05, - "loss": 1.0777, + "learning_rate": 1.9228029543783265e-05, + "loss": 1.1824, "step": 3888 }, { - "epoch": 0.11020431295871234, + "epoch": 0.15216370608028798, "grad_norm": 0.0, - "learning_rate": 1.966458234413413e-05, - "loss": 1.1112, + "learning_rate": 1.9227541240705622e-05, + "loss": 1.264, "step": 3889 }, { - "epoch": 0.11023265040097481, + "epoch": 0.15220283277251742, "grad_norm": 0.0, - "learning_rate": 1.96643465932735e-05, - "loss": 1.0327, + "learning_rate": 1.9227052789445233e-05, + "loss": 1.2039, "step": 3890 }, { - "epoch": 0.11026098784323726, + "epoch": 0.15224195946474686, "grad_norm": 0.0, - "learning_rate": 1.9664110761006497e-05, - "loss": 1.0424, + "learning_rate": 1.9226564190009935e-05, + "loss": 1.3123, "step": 3891 }, { - "epoch": 0.11028932528549973, + "epoch": 0.1522810861569763, "grad_norm": 0.0, - "learning_rate": 1.9663874847335096e-05, - "loss": 1.0366, + "learning_rate": 1.9226075442407582e-05, + "loss": 1.1701, "step": 3892 }, { - "epoch": 0.1103176627277622, + "epoch": 0.15232021284920572, "grad_norm": 0.0, - "learning_rate": 1.966363885226129e-05, - "loss": 0.9711, + "learning_rate": 1.9225586546646014e-05, + "loss": 1.2052, "step": 3893 }, { - "epoch": 0.11034600017002465, + "epoch": 0.15235933954143516, "grad_norm": 0.0, - "learning_rate": 1.9663402775787066e-05, - "loss": 1.115, + "learning_rate": 1.9225097502733087e-05, + "loss": 1.418, "step": 3894 }, { - "epoch": 0.11037433761228711, + "epoch": 0.1523984662336646, "grad_norm": 0.0, - "learning_rate": 1.966316661791441e-05, - "loss": 1.0555, + "learning_rate": 1.9224608310676653e-05, + "loss": 1.2289, "step": 3895 }, { - "epoch": 0.11040267505454958, + "epoch": 0.15243759292589404, "grad_norm": 0.0, - "learning_rate": 1.9662930378645313e-05, - "loss": 1.1126, + "learning_rate": 1.9224118970484568e-05, + "loss": 1.2732, "step": 3896 }, { - "epoch": 0.11043101249681203, + "epoch": 0.15247671961812348, "grad_norm": 0.0, - "learning_rate": 1.9662694057981768e-05, - "loss": 0.9509, + "learning_rate": 1.9223629482164695e-05, + "loss": 1.2563, "step": 3897 }, { - "epoch": 0.1104593499390745, + "epoch": 0.15251584631035292, "grad_norm": 0.0, - "learning_rate": 1.966245765592576e-05, - "loss": 1.0611, + "learning_rate": 1.922313984572489e-05, + "loss": 1.2404, "step": 3898 }, { - "epoch": 0.11048768738133696, + "epoch": 0.15255497300258236, "grad_norm": 0.0, - "learning_rate": 1.9662221172479287e-05, - "loss": 1.0869, + "learning_rate": 1.9222650061173012e-05, + "loss": 1.1989, "step": 3899 }, { - "epoch": 0.11051602482359942, + "epoch": 0.1525940996948118, "grad_norm": 0.0, - "learning_rate": 1.9661984607644332e-05, - "loss": 1.1232, + "learning_rate": 1.9222160128516932e-05, + "loss": 1.1565, "step": 3900 }, { - "epoch": 0.11054436226586188, + "epoch": 0.15263322638704124, "grad_norm": 0.0, - "learning_rate": 1.9661747961422894e-05, - "loss": 1.0048, + "learning_rate": 1.9221670047764517e-05, + "loss": 1.2963, "step": 3901 }, { - "epoch": 0.11057269970812435, + "epoch": 0.15267235307927068, "grad_norm": 0.0, - "learning_rate": 1.9661511233816965e-05, - "loss": 1.0612, + "learning_rate": 1.9221179818923638e-05, + "loss": 1.209, "step": 3902 }, { - "epoch": 0.1106010371503868, + "epoch": 0.15271147977150012, "grad_norm": 0.0, - "learning_rate": 1.966127442482854e-05, - "loss": 1.0187, + "learning_rate": 1.9220689442002165e-05, + "loss": 1.2054, "step": 3903 }, { - "epoch": 0.11062937459264927, + "epoch": 0.15275060646372957, "grad_norm": 0.0, - "learning_rate": 1.9661037534459614e-05, - "loss": 0.9816, + "learning_rate": 1.9220198917007976e-05, + "loss": 1.1794, "step": 3904 }, { - "epoch": 0.11065771203491173, + "epoch": 0.152789733155959, "grad_norm": 0.0, - "learning_rate": 1.9660800562712183e-05, - "loss": 0.9462, + "learning_rate": 1.921970824394894e-05, + "loss": 1.1905, "step": 3905 }, { - "epoch": 0.11068604947717418, + "epoch": 0.15282885984818845, "grad_norm": 0.0, - "learning_rate": 1.9660563509588236e-05, - "loss": 0.9537, + "learning_rate": 1.921921742283295e-05, + "loss": 1.2788, "step": 3906 }, { - "epoch": 0.11071438691943665, + "epoch": 0.15286798654041786, "grad_norm": 0.0, - "learning_rate": 1.966032637508978e-05, - "loss": 1.1274, + "learning_rate": 1.9218726453667872e-05, + "loss": 1.2379, "step": 3907 }, { - "epoch": 0.11074272436169912, + "epoch": 0.1529071132326473, "grad_norm": 0.0, - "learning_rate": 1.9660089159218807e-05, - "loss": 1.0406, + "learning_rate": 1.9218235336461606e-05, + "loss": 1.2142, "step": 3908 }, { - "epoch": 0.11077106180396157, + "epoch": 0.15294623992487674, "grad_norm": 0.0, - "learning_rate": 1.9659851861977316e-05, - "loss": 1.1923, + "learning_rate": 1.9217744071222028e-05, + "loss": 1.0614, "step": 3909 }, { - "epoch": 0.11079939924622403, + "epoch": 0.15298536661710618, "grad_norm": 0.0, - "learning_rate": 1.9659614483367302e-05, - "loss": 1.0758, + "learning_rate": 1.921725265795703e-05, + "loss": 1.1804, "step": 3910 }, { - "epoch": 0.1108277366884865, + "epoch": 0.15302449330933562, "grad_norm": 0.0, - "learning_rate": 1.965937702339077e-05, - "loss": 1.1283, + "learning_rate": 1.9216761096674505e-05, + "loss": 1.2074, "step": 3911 }, { - "epoch": 0.11085607413074895, + "epoch": 0.15306362000156506, "grad_norm": 0.0, - "learning_rate": 1.965913948204972e-05, - "loss": 1.0097, + "learning_rate": 1.921626938738235e-05, + "loss": 1.1329, "step": 3912 }, { - "epoch": 0.11088441157301142, + "epoch": 0.1531027466937945, "grad_norm": 0.0, - "learning_rate": 1.965890185934615e-05, - "loss": 0.9638, + "learning_rate": 1.9215777530088452e-05, + "loss": 1.1299, "step": 3913 }, { - "epoch": 0.11091274901527388, + "epoch": 0.15314187338602395, "grad_norm": 0.0, - "learning_rate": 1.9658664155282065e-05, - "loss": 1.0465, + "learning_rate": 1.9215285524800717e-05, + "loss": 1.1452, "step": 3914 }, { - "epoch": 0.11094108645753634, + "epoch": 0.1531810000782534, "grad_norm": 0.0, - "learning_rate": 1.965842636985946e-05, - "loss": 0.9667, + "learning_rate": 1.9214793371527043e-05, + "loss": 1.1453, "step": 3915 }, { - "epoch": 0.1109694238997988, + "epoch": 0.15322012677048283, "grad_norm": 0.0, - "learning_rate": 1.9658188503080347e-05, - "loss": 1.0955, + "learning_rate": 1.9214301070275335e-05, + "loss": 1.1331, "step": 3916 }, { - "epoch": 0.11099776134206127, + "epoch": 0.15325925346271227, "grad_norm": 0.0, - "learning_rate": 1.9657950554946724e-05, - "loss": 1.0849, + "learning_rate": 1.92138086210535e-05, + "loss": 1.2079, "step": 3917 }, { - "epoch": 0.11102609878432372, + "epoch": 0.1532983801549417, "grad_norm": 0.0, - "learning_rate": 1.9657712525460598e-05, - "loss": 1.0488, + "learning_rate": 1.921331602386944e-05, + "loss": 1.2784, "step": 3918 }, { - "epoch": 0.11105443622658619, + "epoch": 0.15333750684717115, "grad_norm": 0.0, - "learning_rate": 1.9657474414623974e-05, - "loss": 1.0172, + "learning_rate": 1.921282327873107e-05, + "loss": 1.2344, "step": 3919 }, { - "epoch": 0.11108277366884865, + "epoch": 0.1533766335394006, "grad_norm": 0.0, - "learning_rate": 1.9657236222438855e-05, - "loss": 0.9377, + "learning_rate": 1.9212330385646306e-05, + "loss": 1.1358, "step": 3920 }, { - "epoch": 0.1111111111111111, + "epoch": 0.15341576023163, "grad_norm": 0.0, - "learning_rate": 1.9656997948907253e-05, - "loss": 0.962, + "learning_rate": 1.921183734462306e-05, + "loss": 1.1962, "step": 3921 }, { - "epoch": 0.11113944855337357, + "epoch": 0.15345488692385945, "grad_norm": 0.0, - "learning_rate": 1.9656759594031168e-05, - "loss": 1.1068, + "learning_rate": 1.9211344155669244e-05, + "loss": 1.2269, "step": 3922 }, { - "epoch": 0.11116778599563604, + "epoch": 0.1534940136160889, "grad_norm": 0.0, - "learning_rate": 1.965652115781261e-05, - "loss": 0.9587, + "learning_rate": 1.9210850818792787e-05, + "loss": 1.2086, "step": 3923 }, { - "epoch": 0.11119612343789849, + "epoch": 0.15353314030831833, "grad_norm": 0.0, - "learning_rate": 1.965628264025359e-05, - "loss": 1.05, + "learning_rate": 1.921035733400161e-05, + "loss": 1.0941, "step": 3924 }, { - "epoch": 0.11122446088016096, + "epoch": 0.15357226700054777, "grad_norm": 0.0, - "learning_rate": 1.9656044041356116e-05, - "loss": 1.0749, + "learning_rate": 1.920986370130363e-05, + "loss": 1.2756, "step": 3925 }, { - "epoch": 0.11125279832242342, + "epoch": 0.1536113936927772, "grad_norm": 0.0, - "learning_rate": 1.9655805361122197e-05, - "loss": 1.1262, + "learning_rate": 1.9209369920706783e-05, + "loss": 1.2045, "step": 3926 }, { - "epoch": 0.11128113576468587, + "epoch": 0.15365052038500665, "grad_norm": 0.0, - "learning_rate": 1.9655566599553846e-05, - "loss": 1.0028, + "learning_rate": 1.9208875992218995e-05, + "loss": 1.2863, "step": 3927 }, { - "epoch": 0.11130947320694834, + "epoch": 0.1536896470772361, "grad_norm": 0.0, - "learning_rate": 1.965532775665307e-05, - "loss": 1.085, + "learning_rate": 1.9208381915848196e-05, + "loss": 1.1957, "step": 3928 }, { - "epoch": 0.1113378106492108, + "epoch": 0.15372877376946553, "grad_norm": 0.0, - "learning_rate": 1.965508883242188e-05, - "loss": 1.1808, + "learning_rate": 1.9207887691602324e-05, + "loss": 1.0756, "step": 3929 }, { - "epoch": 0.11136614809147326, + "epoch": 0.15376790046169497, "grad_norm": 0.0, - "learning_rate": 1.9654849826862295e-05, - "loss": 0.9226, + "learning_rate": 1.9207393319489314e-05, + "loss": 1.1996, "step": 3930 }, { - "epoch": 0.11139448553373572, + "epoch": 0.1538070271539244, "grad_norm": 0.0, - "learning_rate": 1.9654610739976325e-05, - "loss": 1.0503, + "learning_rate": 1.9206898799517105e-05, + "loss": 1.2125, "step": 3931 }, { - "epoch": 0.11142282297599819, + "epoch": 0.15384615384615385, "grad_norm": 0.0, - "learning_rate": 1.9654371571765983e-05, - "loss": 1.0806, + "learning_rate": 1.9206404131693634e-05, + "loss": 1.167, "step": 3932 }, { - "epoch": 0.11145116041826064, + "epoch": 0.1538852805383833, "grad_norm": 0.0, - "learning_rate": 1.965413232223328e-05, - "loss": 1.0605, + "learning_rate": 1.9205909316026854e-05, + "loss": 1.2745, "step": 3933 }, { - "epoch": 0.11147949786052311, + "epoch": 0.15392440723061274, "grad_norm": 0.0, - "learning_rate": 1.965389299138024e-05, - "loss": 1.0646, + "learning_rate": 1.9205414352524703e-05, + "loss": 1.231, "step": 3934 }, { - "epoch": 0.11150783530278557, + "epoch": 0.15396353392284215, "grad_norm": 0.0, - "learning_rate": 1.9653653579208877e-05, - "loss": 0.9535, + "learning_rate": 1.9204919241195134e-05, + "loss": 1.1247, "step": 3935 }, { - "epoch": 0.11153617274504803, + "epoch": 0.1540026606150716, "grad_norm": 0.0, - "learning_rate": 1.96534140857212e-05, - "loss": 1.1627, + "learning_rate": 1.9204423982046097e-05, + "loss": 1.2279, "step": 3936 }, { - "epoch": 0.11156451018731049, + "epoch": 0.15404178730730103, "grad_norm": 0.0, - "learning_rate": 1.9653174510919234e-05, - "loss": 1.0692, + "learning_rate": 1.9203928575085543e-05, + "loss": 1.0684, "step": 3937 }, { - "epoch": 0.11159284762957296, + "epoch": 0.15408091399953047, "grad_norm": 0.0, - "learning_rate": 1.965293485480499e-05, - "loss": 1.0664, + "learning_rate": 1.920343302032143e-05, + "loss": 1.1102, "step": 3938 }, { - "epoch": 0.11162118507183541, + "epoch": 0.1541200406917599, "grad_norm": 0.0, - "learning_rate": 1.9652695117380496e-05, - "loss": 1.1005, + "learning_rate": 1.9202937317761713e-05, + "loss": 1.22, "step": 3939 }, { - "epoch": 0.11164952251409788, + "epoch": 0.15415916738398935, "grad_norm": 0.0, - "learning_rate": 1.9652455298647766e-05, - "loss": 1.0341, + "learning_rate": 1.920244146741436e-05, + "loss": 1.061, "step": 3940 }, { - "epoch": 0.11167785995636034, + "epoch": 0.1541982940762188, "grad_norm": 0.0, - "learning_rate": 1.9652215398608818e-05, - "loss": 1.1476, + "learning_rate": 1.9201945469287325e-05, + "loss": 1.2156, "step": 3941 }, { - "epoch": 0.1117061973986228, + "epoch": 0.15423742076844824, "grad_norm": 0.0, - "learning_rate": 1.9651975417265678e-05, - "loss": 1.0666, + "learning_rate": 1.9201449323388573e-05, + "loss": 1.3179, "step": 3942 }, { - "epoch": 0.11173453484088526, + "epoch": 0.15427654746067768, "grad_norm": 0.0, - "learning_rate": 1.965173535462036e-05, - "loss": 0.9252, + "learning_rate": 1.9200953029726082e-05, + "loss": 1.2757, "step": 3943 }, { - "epoch": 0.11176287228314773, + "epoch": 0.15431567415290712, "grad_norm": 0.0, - "learning_rate": 1.96514952106749e-05, - "loss": 0.9138, + "learning_rate": 1.920045658830781e-05, + "loss": 1.1016, "step": 3944 }, { - "epoch": 0.11179120972541018, + "epoch": 0.15435480084513656, "grad_norm": 0.0, - "learning_rate": 1.9651254985431304e-05, - "loss": 1.1385, + "learning_rate": 1.9199959999141735e-05, + "loss": 1.2632, "step": 3945 }, { - "epoch": 0.11181954716767264, + "epoch": 0.154393927537366, "grad_norm": 0.0, - "learning_rate": 1.965101467889161e-05, - "loss": 1.0026, + "learning_rate": 1.919946326223583e-05, + "loss": 0.9958, "step": 3946 }, { - "epoch": 0.11184788460993511, + "epoch": 0.15443305422959544, "grad_norm": 0.0, - "learning_rate": 1.965077429105783e-05, - "loss": 1.0312, + "learning_rate": 1.9198966377598073e-05, + "loss": 1.1239, "step": 3947 }, { - "epoch": 0.11187622205219756, + "epoch": 0.15447218092182488, "grad_norm": 0.0, - "learning_rate": 1.9650533821931998e-05, - "loss": 1.0749, + "learning_rate": 1.9198469345236444e-05, + "loss": 1.2368, "step": 3948 }, { - "epoch": 0.11190455949446003, + "epoch": 0.1545113076140543, "grad_norm": 0.0, - "learning_rate": 1.9650293271516135e-05, - "loss": 1.076, + "learning_rate": 1.9197972165158924e-05, + "loss": 1.2322, "step": 3949 }, { - "epoch": 0.1119328969367225, + "epoch": 0.15455043430628373, "grad_norm": 0.0, - "learning_rate": 1.9650052639812268e-05, - "loss": 1.1203, + "learning_rate": 1.9197474837373495e-05, + "loss": 1.0769, "step": 3950 }, { - "epoch": 0.11196123437898495, + "epoch": 0.15458956099851318, "grad_norm": 0.0, - "learning_rate": 1.964981192682242e-05, - "loss": 1.1374, + "learning_rate": 1.919697736188815e-05, + "loss": 1.1706, "step": 3951 }, { - "epoch": 0.11198957182124741, + "epoch": 0.15462868769074262, "grad_norm": 0.0, - "learning_rate": 1.964957113254863e-05, - "loss": 1.1822, + "learning_rate": 1.9196479738710865e-05, + "loss": 1.123, "step": 3952 }, { - "epoch": 0.11201790926350988, + "epoch": 0.15466781438297206, "grad_norm": 0.0, - "learning_rate": 1.9649330256992917e-05, - "loss": 1.0614, + "learning_rate": 1.9195981967849643e-05, + "loss": 1.1554, "step": 3953 }, { - "epoch": 0.11204624670577233, + "epoch": 0.1547069410752015, "grad_norm": 0.0, - "learning_rate": 1.9649089300157307e-05, - "loss": 1.0721, + "learning_rate": 1.9195484049312475e-05, + "loss": 1.1396, "step": 3954 }, { - "epoch": 0.1120745841480348, + "epoch": 0.15474606776743094, "grad_norm": 0.0, - "learning_rate": 1.9648848262043837e-05, - "loss": 1.0522, + "learning_rate": 1.9194985983107354e-05, + "loss": 1.2631, "step": 3955 }, { - "epoch": 0.11210292159029726, + "epoch": 0.15478519445966038, "grad_norm": 0.0, - "learning_rate": 1.9648607142654537e-05, - "loss": 1.139, + "learning_rate": 1.9194487769242283e-05, + "loss": 1.1077, "step": 3956 }, { - "epoch": 0.11213125903255972, + "epoch": 0.15482432115188982, "grad_norm": 0.0, - "learning_rate": 1.964836594199144e-05, - "loss": 0.9546, + "learning_rate": 1.9193989407725255e-05, + "loss": 1.1094, "step": 3957 }, { - "epoch": 0.11215959647482218, + "epoch": 0.15486344784411926, "grad_norm": 0.0, - "learning_rate": 1.964812466005657e-05, - "loss": 1.0715, + "learning_rate": 1.919349089856428e-05, + "loss": 1.176, "step": 3958 }, { - "epoch": 0.11218793391708465, + "epoch": 0.1549025745363487, "grad_norm": 0.0, - "learning_rate": 1.964788329685196e-05, - "loss": 0.953, + "learning_rate": 1.9192992241767363e-05, + "loss": 1.2145, "step": 3959 }, { - "epoch": 0.1122162713593471, + "epoch": 0.15494170122857814, "grad_norm": 0.0, - "learning_rate": 1.964764185237965e-05, - "loss": 1.2307, + "learning_rate": 1.9192493437342508e-05, + "loss": 1.2012, "step": 3960 }, { - "epoch": 0.11224460880160957, + "epoch": 0.15498082792080758, "grad_norm": 0.0, - "learning_rate": 1.964740032664167e-05, - "loss": 1.0446, + "learning_rate": 1.9191994485297727e-05, + "loss": 1.1234, "step": 3961 }, { - "epoch": 0.11227294624387203, + "epoch": 0.15501995461303703, "grad_norm": 0.0, - "learning_rate": 1.964715871964005e-05, - "loss": 1.1701, + "learning_rate": 1.9191495385641037e-05, + "loss": 1.2312, "step": 3962 }, { - "epoch": 0.11230128368613448, + "epoch": 0.15505908130526647, "grad_norm": 0.0, - "learning_rate": 1.9646917031376834e-05, - "loss": 0.9512, + "learning_rate": 1.9190996138380446e-05, + "loss": 1.0781, "step": 3963 }, { - "epoch": 0.11232962112839695, + "epoch": 0.15509820799749588, "grad_norm": 0.0, - "learning_rate": 1.9646675261854053e-05, - "loss": 0.972, + "learning_rate": 1.919049674352397e-05, + "loss": 1.1855, "step": 3964 }, { - "epoch": 0.11235795857065942, + "epoch": 0.15513733468972532, "grad_norm": 0.0, - "learning_rate": 1.9646433411073745e-05, - "loss": 1.1536, + "learning_rate": 1.9189997201079638e-05, + "loss": 1.1898, "step": 3965 }, { - "epoch": 0.11238629601292187, + "epoch": 0.15517646138195476, "grad_norm": 0.0, - "learning_rate": 1.9646191479037946e-05, - "loss": 1.1202, + "learning_rate": 1.9189497511055464e-05, + "loss": 1.145, "step": 3966 }, { - "epoch": 0.11241463345518433, + "epoch": 0.1552155880741842, "grad_norm": 0.0, - "learning_rate": 1.964594946574869e-05, - "loss": 1.0866, + "learning_rate": 1.9188997673459475e-05, + "loss": 1.1259, "step": 3967 }, { - "epoch": 0.1124429708974468, + "epoch": 0.15525471476641364, "grad_norm": 0.0, - "learning_rate": 1.9645707371208025e-05, - "loss": 1.0669, + "learning_rate": 1.9188497688299697e-05, + "loss": 1.2398, "step": 3968 }, { - "epoch": 0.11247130833970925, + "epoch": 0.15529384145864308, "grad_norm": 0.0, - "learning_rate": 1.9645465195417986e-05, - "loss": 1.0544, + "learning_rate": 1.9187997555584165e-05, + "loss": 1.1325, "step": 3969 }, { - "epoch": 0.11249964578197172, + "epoch": 0.15533296815087252, "grad_norm": 0.0, - "learning_rate": 1.964522293838061e-05, - "loss": 1.0602, + "learning_rate": 1.91874972753209e-05, + "loss": 1.1984, "step": 3970 }, { - "epoch": 0.11252798322423418, + "epoch": 0.15537209484310197, "grad_norm": 0.0, - "learning_rate": 1.9644980600097935e-05, - "loss": 1.0404, + "learning_rate": 1.9186996847517945e-05, + "loss": 1.2748, "step": 3971 }, { - "epoch": 0.11255632066649664, + "epoch": 0.1554112215353314, "grad_norm": 0.0, - "learning_rate": 1.964473818057201e-05, - "loss": 1.0007, + "learning_rate": 1.918649627218333e-05, + "loss": 1.1505, "step": 3972 }, { - "epoch": 0.1125846581087591, + "epoch": 0.15545034822756085, "grad_norm": 0.0, - "learning_rate": 1.9644495679804873e-05, - "loss": 0.9317, + "learning_rate": 1.9185995549325097e-05, + "loss": 1.2668, "step": 3973 }, { - "epoch": 0.11261299555102157, + "epoch": 0.1554894749197903, "grad_norm": 0.0, - "learning_rate": 1.9644253097798572e-05, - "loss": 1.1327, + "learning_rate": 1.9185494678951285e-05, + "loss": 1.124, "step": 3974 }, { - "epoch": 0.11264133299328402, + "epoch": 0.15552860161201973, "grad_norm": 0.0, - "learning_rate": 1.964401043455514e-05, - "loss": 1.058, + "learning_rate": 1.918499366106994e-05, + "loss": 1.3634, "step": 3975 }, { - "epoch": 0.11266967043554649, + "epoch": 0.15556772830424917, "grad_norm": 0.0, - "learning_rate": 1.964376769007663e-05, - "loss": 1.0316, + "learning_rate": 1.918449249568911e-05, + "loss": 1.2705, "step": 3976 }, { - "epoch": 0.11269800787780895, + "epoch": 0.1556068549964786, "grad_norm": 0.0, - "learning_rate": 1.9643524864365086e-05, - "loss": 1.1106, + "learning_rate": 1.9183991182816832e-05, + "loss": 1.0793, "step": 3977 }, { - "epoch": 0.1127263453200714, + "epoch": 0.15564598168870802, "grad_norm": 0.0, - "learning_rate": 1.9643281957422547e-05, - "loss": 1.1522, + "learning_rate": 1.9183489722461167e-05, + "loss": 1.0814, "step": 3978 }, { - "epoch": 0.11275468276233387, + "epoch": 0.15568510838093746, "grad_norm": 0.0, - "learning_rate": 1.9643038969251066e-05, - "loss": 1.01, + "learning_rate": 1.9182988114630164e-05, + "loss": 1.1106, "step": 3979 }, { - "epoch": 0.11278302020459634, + "epoch": 0.1557242350731669, "grad_norm": 0.0, - "learning_rate": 1.9642795899852682e-05, - "loss": 1.0763, + "learning_rate": 1.918248635933188e-05, + "loss": 1.1316, "step": 3980 }, { - "epoch": 0.11281135764685879, + "epoch": 0.15576336176539635, "grad_norm": 0.0, - "learning_rate": 1.964255274922945e-05, - "loss": 1.0755, + "learning_rate": 1.918198445657437e-05, + "loss": 1.1895, "step": 3981 }, { - "epoch": 0.11283969508912126, + "epoch": 0.1558024884576258, "grad_norm": 0.0, - "learning_rate": 1.964230951738342e-05, - "loss": 0.9864, + "learning_rate": 1.91814824063657e-05, + "loss": 1.1743, "step": 3982 }, { - "epoch": 0.11286803253138372, + "epoch": 0.15584161514985523, "grad_norm": 0.0, - "learning_rate": 1.964206620431663e-05, - "loss": 1.131, + "learning_rate": 1.9180980208713923e-05, + "loss": 1.2478, "step": 3983 }, { - "epoch": 0.11289636997364617, + "epoch": 0.15588074184208467, "grad_norm": 0.0, - "learning_rate": 1.9641822810031135e-05, - "loss": 1.0988, + "learning_rate": 1.918047786362711e-05, + "loss": 1.2704, "step": 3984 }, { - "epoch": 0.11292470741590864, + "epoch": 0.1559198685343141, "grad_norm": 0.0, - "learning_rate": 1.9641579334528992e-05, - "loss": 1.1697, + "learning_rate": 1.9179975371113324e-05, + "loss": 1.2409, "step": 3985 }, { - "epoch": 0.1129530448581711, + "epoch": 0.15595899522654355, "grad_norm": 0.0, - "learning_rate": 1.9641335777812243e-05, - "loss": 1.0087, + "learning_rate": 1.9179472731180638e-05, + "loss": 1.2106, "step": 3986 }, { - "epoch": 0.11298138230043356, + "epoch": 0.155998121918773, "grad_norm": 0.0, - "learning_rate": 1.9641092139882943e-05, - "loss": 1.1896, + "learning_rate": 1.9178969943837123e-05, + "loss": 1.078, "step": 3987 }, { - "epoch": 0.11300971974269602, + "epoch": 0.15603724861100243, "grad_norm": 0.0, - "learning_rate": 1.9640848420743143e-05, - "loss": 1.1455, + "learning_rate": 1.917846700909085e-05, + "loss": 1.1657, "step": 3988 }, { - "epoch": 0.11303805718495849, + "epoch": 0.15607637530323187, "grad_norm": 0.0, - "learning_rate": 1.96406046203949e-05, - "loss": 1.1293, + "learning_rate": 1.9177963926949903e-05, + "loss": 1.1831, "step": 3989 }, { - "epoch": 0.11306639462722094, + "epoch": 0.15611550199546131, "grad_norm": 0.0, - "learning_rate": 1.964036073884026e-05, - "loss": 1.0776, + "learning_rate": 1.9177460697422356e-05, + "loss": 1.1744, "step": 3990 }, { - "epoch": 0.11309473206948341, + "epoch": 0.15615462868769076, "grad_norm": 0.0, - "learning_rate": 1.9640116776081282e-05, - "loss": 1.1054, + "learning_rate": 1.9176957320516287e-05, + "loss": 1.1015, "step": 3991 }, { - "epoch": 0.11312306951174587, + "epoch": 0.15619375537992017, "grad_norm": 0.0, - "learning_rate": 1.963987273212002e-05, - "loss": 1.1144, + "learning_rate": 1.917645379623978e-05, + "loss": 1.1522, "step": 3992 }, { - "epoch": 0.11315140695400833, + "epoch": 0.1562328820721496, "grad_norm": 0.0, - "learning_rate": 1.9639628606958535e-05, - "loss": 1.0769, + "learning_rate": 1.9175950124600926e-05, + "loss": 1.2601, "step": 3993 }, { - "epoch": 0.11317974439627079, + "epoch": 0.15627200876437905, "grad_norm": 0.0, - "learning_rate": 1.9639384400598876e-05, - "loss": 0.9577, + "learning_rate": 1.9175446305607816e-05, + "loss": 1.2673, "step": 3994 }, { - "epoch": 0.11320808183853326, + "epoch": 0.1563111354566085, "grad_norm": 0.0, - "learning_rate": 1.9639140113043102e-05, - "loss": 1.0865, + "learning_rate": 1.917494233926853e-05, + "loss": 1.1482, "step": 3995 }, { - "epoch": 0.11323641928079571, + "epoch": 0.15635026214883793, "grad_norm": 0.0, - "learning_rate": 1.963889574429327e-05, - "loss": 1.0339, + "learning_rate": 1.917443822559117e-05, + "loss": 1.2053, "step": 3996 }, { - "epoch": 0.11326475672305818, + "epoch": 0.15638938884106737, "grad_norm": 0.0, - "learning_rate": 1.9638651294351442e-05, - "loss": 1.1285, + "learning_rate": 1.9173933964583824e-05, + "loss": 1.2182, "step": 3997 }, { - "epoch": 0.11329309416532064, + "epoch": 0.1564285155332968, "grad_norm": 0.0, - "learning_rate": 1.963840676321968e-05, - "loss": 1.0587, + "learning_rate": 1.9173429556254598e-05, + "loss": 1.0339, "step": 3998 }, { - "epoch": 0.1133214316075831, + "epoch": 0.15646764222552625, "grad_norm": 0.0, - "learning_rate": 1.9638162150900028e-05, - "loss": 1.1453, + "learning_rate": 1.9172925000611587e-05, + "loss": 1.1821, "step": 3999 }, { - "epoch": 0.11334976904984556, + "epoch": 0.1565067689177557, "grad_norm": 0.0, - "learning_rate": 1.9637917457394563e-05, - "loss": 1.173, + "learning_rate": 1.9172420297662896e-05, + "loss": 1.0907, "step": 4000 }, { - "epoch": 0.11337810649210803, + "epoch": 0.15654589560998514, "grad_norm": 0.0, - "learning_rate": 1.9637672682705344e-05, - "loss": 1.0817, + "learning_rate": 1.917191544741663e-05, + "loss": 1.1224, "step": 4001 }, { - "epoch": 0.11340644393437048, + "epoch": 0.15658502230221458, "grad_norm": 0.0, - "learning_rate": 1.963742782683442e-05, - "loss": 1.0807, + "learning_rate": 1.917141044988089e-05, + "loss": 1.1597, "step": 4002 }, { - "epoch": 0.11343478137663295, + "epoch": 0.15662414899444402, "grad_norm": 0.0, - "learning_rate": 1.963718288978387e-05, - "loss": 1.0661, + "learning_rate": 1.9170905305063795e-05, + "loss": 1.1562, "step": 4003 }, { - "epoch": 0.11346311881889541, + "epoch": 0.15666327568667346, "grad_norm": 0.0, - "learning_rate": 1.9636937871555747e-05, - "loss": 1.0477, + "learning_rate": 1.917040001297345e-05, + "loss": 1.0906, "step": 4004 }, { - "epoch": 0.11349145626115786, + "epoch": 0.1567024023789029, "grad_norm": 0.0, - "learning_rate": 1.9636692772152117e-05, - "loss": 1.0582, + "learning_rate": 1.9169894573617974e-05, + "loss": 1.1159, "step": 4005 }, { - "epoch": 0.11351979370342033, + "epoch": 0.1567415290711323, "grad_norm": 0.0, - "learning_rate": 1.9636447591575047e-05, - "loss": 1.1155, + "learning_rate": 1.916938898700548e-05, + "loss": 1.0933, "step": 4006 }, { - "epoch": 0.1135481311456828, + "epoch": 0.15678065576336175, "grad_norm": 0.0, - "learning_rate": 1.9636202329826602e-05, - "loss": 1.0857, + "learning_rate": 1.916888325314409e-05, + "loss": 1.3438, "step": 4007 }, { - "epoch": 0.11357646858794525, + "epoch": 0.1568197824555912, "grad_norm": 0.0, - "learning_rate": 1.963595698690884e-05, - "loss": 1.0361, + "learning_rate": 1.9168377372041924e-05, + "loss": 1.1796, "step": 4008 }, { - "epoch": 0.11360480603020771, + "epoch": 0.15685890914782064, "grad_norm": 0.0, - "learning_rate": 1.963571156282384e-05, - "loss": 1.0185, + "learning_rate": 1.9167871343707105e-05, + "loss": 1.1618, "step": 4009 }, { - "epoch": 0.11363314347247018, + "epoch": 0.15689803584005008, "grad_norm": 0.0, - "learning_rate": 1.9635466057573662e-05, - "loss": 1.0672, + "learning_rate": 1.9167365168147763e-05, + "loss": 1.1956, "step": 4010 }, { - "epoch": 0.11366148091473263, + "epoch": 0.15693716253227952, "grad_norm": 0.0, - "learning_rate": 1.9635220471160375e-05, - "loss": 1.1373, + "learning_rate": 1.916685884537202e-05, + "loss": 1.221, "step": 4011 }, { - "epoch": 0.1136898183569951, + "epoch": 0.15697628922450896, "grad_norm": 0.0, - "learning_rate": 1.963497480358605e-05, - "loss": 1.0185, + "learning_rate": 1.9166352375388013e-05, + "loss": 1.1706, "step": 4012 }, { - "epoch": 0.11371815579925756, + "epoch": 0.1570154159167384, "grad_norm": 0.0, - "learning_rate": 1.9634729054852752e-05, - "loss": 1.0482, + "learning_rate": 1.9165845758203872e-05, + "loss": 1.1896, "step": 4013 }, { - "epoch": 0.11374649324152002, + "epoch": 0.15705454260896784, "grad_norm": 0.0, - "learning_rate": 1.9634483224962555e-05, - "loss": 1.0915, + "learning_rate": 1.9165338993827736e-05, + "loss": 1.2356, "step": 4014 }, { - "epoch": 0.11377483068378248, + "epoch": 0.15709366930119728, "grad_norm": 0.0, - "learning_rate": 1.963423731391753e-05, - "loss": 1.0652, + "learning_rate": 1.9164832082267737e-05, + "loss": 1.191, "step": 4015 }, { - "epoch": 0.11380316812604495, + "epoch": 0.15713279599342672, "grad_norm": 0.0, - "learning_rate": 1.963399132171974e-05, - "loss": 1.0959, + "learning_rate": 1.916432502353202e-05, + "loss": 1.2, "step": 4016 }, { - "epoch": 0.1138315055683074, + "epoch": 0.15717192268565616, "grad_norm": 0.0, - "learning_rate": 1.9633745248371268e-05, - "loss": 1.0526, + "learning_rate": 1.9163817817628728e-05, + "loss": 1.1403, "step": 4017 }, { - "epoch": 0.11385984301056987, + "epoch": 0.1572110493778856, "grad_norm": 0.0, - "learning_rate": 1.9633499093874183e-05, - "loss": 1.0917, + "learning_rate": 1.9163310464566e-05, + "loss": 1.1756, "step": 4018 }, { - "epoch": 0.11388818045283233, + "epoch": 0.15725017607011504, "grad_norm": 0.0, - "learning_rate": 1.9633252858230553e-05, - "loss": 0.9306, + "learning_rate": 1.9162802964351995e-05, + "loss": 1.2119, "step": 4019 }, { - "epoch": 0.11391651789509478, + "epoch": 0.15728930276234449, "grad_norm": 0.0, - "learning_rate": 1.9633006541442464e-05, - "loss": 1.1345, + "learning_rate": 1.9162295316994854e-05, + "loss": 1.1672, "step": 4020 }, { - "epoch": 0.11394485533735725, + "epoch": 0.1573284294545739, "grad_norm": 0.0, - "learning_rate": 1.9632760143511976e-05, - "loss": 1.009, + "learning_rate": 1.9161787522502727e-05, + "loss": 1.199, "step": 4021 }, { - "epoch": 0.11397319277961972, + "epoch": 0.15736755614680334, "grad_norm": 0.0, - "learning_rate": 1.963251366444118e-05, - "loss": 1.2195, + "learning_rate": 1.916127958088378e-05, + "loss": 1.1951, "step": 4022 }, { - "epoch": 0.11400153022188217, + "epoch": 0.15740668283903278, "grad_norm": 0.0, - "learning_rate": 1.963226710423214e-05, - "loss": 1.184, + "learning_rate": 1.9160771492146158e-05, + "loss": 1.1595, "step": 4023 }, { - "epoch": 0.11402986766414464, + "epoch": 0.15744580953126222, "grad_norm": 0.0, - "learning_rate": 1.9632020462886937e-05, - "loss": 1.1285, + "learning_rate": 1.9160263256298023e-05, + "loss": 1.1675, "step": 4024 }, { - "epoch": 0.1140582051064071, + "epoch": 0.15748493622349166, "grad_norm": 0.0, - "learning_rate": 1.963177374040765e-05, - "loss": 0.9666, + "learning_rate": 1.9159754873347544e-05, + "loss": 1.2214, "step": 4025 }, { - "epoch": 0.11408654254866955, + "epoch": 0.1575240629157211, "grad_norm": 0.0, - "learning_rate": 1.9631526936796357e-05, - "loss": 1.0893, + "learning_rate": 1.9159246343302874e-05, + "loss": 1.1345, "step": 4026 }, { - "epoch": 0.11411487999093202, + "epoch": 0.15756318960795054, "grad_norm": 0.0, - "learning_rate": 1.9631280052055136e-05, - "loss": 1.1316, + "learning_rate": 1.915873766617219e-05, + "loss": 1.1758, "step": 4027 }, { - "epoch": 0.11414321743319449, + "epoch": 0.15760231630017998, "grad_norm": 0.0, - "learning_rate": 1.9631033086186066e-05, - "loss": 1.0245, + "learning_rate": 1.915822884196365e-05, + "loss": 1.2819, "step": 4028 }, { - "epoch": 0.11417155487545694, + "epoch": 0.15764144299240943, "grad_norm": 0.0, - "learning_rate": 1.963078603919123e-05, - "loss": 1.0496, + "learning_rate": 1.9157719870685438e-05, + "loss": 1.2749, "step": 4029 }, { - "epoch": 0.1141998923177194, + "epoch": 0.15768056968463887, "grad_norm": 0.0, - "learning_rate": 1.9630538911072702e-05, - "loss": 1.0946, + "learning_rate": 1.9157210752345713e-05, + "loss": 1.1726, "step": 4030 }, { - "epoch": 0.11422822975998187, + "epoch": 0.1577196963768683, "grad_norm": 0.0, - "learning_rate": 1.963029170183257e-05, - "loss": 1.074, + "learning_rate": 1.9156701486952663e-05, + "loss": 1.1194, "step": 4031 }, { - "epoch": 0.11425656720224432, + "epoch": 0.15775882306909775, "grad_norm": 0.0, - "learning_rate": 1.963004441147292e-05, - "loss": 1.1071, + "learning_rate": 1.915619207451446e-05, + "loss": 1.1808, "step": 4032 }, { - "epoch": 0.11428490464450679, + "epoch": 0.1577979497613272, "grad_norm": 0.0, - "learning_rate": 1.9629797039995823e-05, - "loss": 1.1352, + "learning_rate": 1.9155682515039286e-05, + "loss": 1.3103, "step": 4033 }, { - "epoch": 0.11431324208676925, + "epoch": 0.15783707645355663, "grad_norm": 0.0, - "learning_rate": 1.9629549587403373e-05, - "loss": 1.1239, + "learning_rate": 1.915517280853532e-05, + "loss": 1.2291, "step": 4034 }, { - "epoch": 0.1143415795290317, + "epoch": 0.15787620314578604, "grad_norm": 0.0, - "learning_rate": 1.962930205369765e-05, - "loss": 1.0708, + "learning_rate": 1.9154662955010753e-05, + "loss": 1.1607, "step": 4035 }, { - "epoch": 0.11436991697129417, + "epoch": 0.15791532983801548, "grad_norm": 0.0, - "learning_rate": 1.9629054438880742e-05, - "loss": 1.0578, + "learning_rate": 1.915415295447377e-05, + "loss": 1.2983, "step": 4036 }, { - "epoch": 0.11439825441355664, + "epoch": 0.15795445653024492, "grad_norm": 0.0, - "learning_rate": 1.962880674295473e-05, - "loss": 1.0218, + "learning_rate": 1.915364280693256e-05, + "loss": 1.201, "step": 4037 }, { - "epoch": 0.11442659185581909, + "epoch": 0.15799358322247437, "grad_norm": 0.0, - "learning_rate": 1.9628558965921708e-05, - "loss": 1.1422, + "learning_rate": 1.915313251239532e-05, + "loss": 1.162, "step": 4038 }, { - "epoch": 0.11445492929808156, + "epoch": 0.1580327099147038, "grad_norm": 0.0, - "learning_rate": 1.9628311107783753e-05, - "loss": 1.0855, + "learning_rate": 1.915262207087024e-05, + "loss": 1.0977, "step": 4039 }, { - "epoch": 0.11448326674034402, + "epoch": 0.15807183660693325, "grad_norm": 0.0, - "learning_rate": 1.962806316854296e-05, - "loss": 1.036, + "learning_rate": 1.915211148236552e-05, + "loss": 1.1366, "step": 4040 }, { - "epoch": 0.11451160418260647, + "epoch": 0.1581109632991627, "grad_norm": 0.0, - "learning_rate": 1.9627815148201417e-05, - "loss": 1.0996, + "learning_rate": 1.9151600746889355e-05, + "loss": 1.0785, "step": 4041 }, { - "epoch": 0.11453994162486894, + "epoch": 0.15815008999139213, "grad_norm": 0.0, - "learning_rate": 1.9627567046761207e-05, - "loss": 1.0572, + "learning_rate": 1.915108986444995e-05, + "loss": 1.239, "step": 4042 }, { - "epoch": 0.1145682790671314, + "epoch": 0.15818921668362157, "grad_norm": 0.0, - "learning_rate": 1.9627318864224433e-05, - "loss": 0.9373, + "learning_rate": 1.9150578835055507e-05, + "loss": 1.2688, "step": 4043 }, { - "epoch": 0.11459661650939386, + "epoch": 0.158228343375851, "grad_norm": 0.0, - "learning_rate": 1.9627070600593172e-05, - "loss": 1.0891, + "learning_rate": 1.9150067658714238e-05, + "loss": 1.1685, "step": 4044 }, { - "epoch": 0.11462495395165632, + "epoch": 0.15826747006808045, "grad_norm": 0.0, - "learning_rate": 1.962682225586952e-05, - "loss": 1.0108, + "learning_rate": 1.9149556335434348e-05, + "loss": 1.1905, "step": 4045 }, { - "epoch": 0.11465329139391879, + "epoch": 0.1583065967603099, "grad_norm": 0.0, - "learning_rate": 1.962657383005557e-05, - "loss": 1.1385, + "learning_rate": 1.9149044865224047e-05, + "loss": 1.1472, "step": 4046 }, { - "epoch": 0.11468162883618124, + "epoch": 0.15834572345253933, "grad_norm": 0.0, - "learning_rate": 1.9626325323153414e-05, - "loss": 1.0674, + "learning_rate": 1.914853324809155e-05, + "loss": 1.1653, "step": 4047 }, { - "epoch": 0.11470996627844371, + "epoch": 0.15838485014476877, "grad_norm": 0.0, - "learning_rate": 1.9626076735165146e-05, - "loss": 1.2121, + "learning_rate": 1.9148021484045073e-05, + "loss": 1.1768, "step": 4048 }, { - "epoch": 0.11473830372070618, + "epoch": 0.1584239768369982, "grad_norm": 0.0, - "learning_rate": 1.9625828066092857e-05, - "loss": 0.997, + "learning_rate": 1.9147509573092833e-05, + "loss": 1.0132, "step": 4049 }, { - "epoch": 0.11476664116296863, + "epoch": 0.15846310352922763, "grad_norm": 0.0, - "learning_rate": 1.9625579315938644e-05, - "loss": 1.1561, + "learning_rate": 1.9146997515243054e-05, + "loss": 1.3075, "step": 4050 }, { - "epoch": 0.1147949786052311, + "epoch": 0.15850223022145707, "grad_norm": 0.0, - "learning_rate": 1.9625330484704604e-05, - "loss": 1.0475, + "learning_rate": 1.9146485310503955e-05, + "loss": 1.2378, "step": 4051 }, { - "epoch": 0.11482331604749356, + "epoch": 0.1585413569136865, "grad_norm": 0.0, - "learning_rate": 1.9625081572392832e-05, - "loss": 1.1797, + "learning_rate": 1.9145972958883763e-05, + "loss": 1.1958, "step": 4052 }, { - "epoch": 0.11485165348975601, + "epoch": 0.15858048360591595, "grad_norm": 0.0, - "learning_rate": 1.962483257900542e-05, - "loss": 0.9171, + "learning_rate": 1.9145460460390707e-05, + "loss": 1.1798, "step": 4053 }, { - "epoch": 0.11487999093201848, + "epoch": 0.1586196102981454, "grad_norm": 0.0, - "learning_rate": 1.962458350454447e-05, - "loss": 1.0263, + "learning_rate": 1.9144947815033014e-05, + "loss": 1.1157, "step": 4054 }, { - "epoch": 0.11490832837428094, + "epoch": 0.15865873699037483, "grad_norm": 0.0, - "learning_rate": 1.9624334349012083e-05, - "loss": 1.0508, + "learning_rate": 1.9144435022818917e-05, + "loss": 1.0374, "step": 4055 }, { - "epoch": 0.1149366658165434, + "epoch": 0.15869786368260427, "grad_norm": 0.0, - "learning_rate": 1.962408511241035e-05, - "loss": 1.1013, + "learning_rate": 1.9143922083756656e-05, + "loss": 1.2153, "step": 4056 }, { - "epoch": 0.11496500325880586, + "epoch": 0.15873699037483371, "grad_norm": 0.0, - "learning_rate": 1.9623835794741374e-05, - "loss": 1.1262, + "learning_rate": 1.9143408997854464e-05, + "loss": 1.0899, "step": 4057 }, { - "epoch": 0.11499334070106833, + "epoch": 0.15877611706706315, "grad_norm": 0.0, - "learning_rate": 1.962358639600726e-05, - "loss": 1.1195, + "learning_rate": 1.914289576512058e-05, + "loss": 1.2448, "step": 4058 }, { - "epoch": 0.11502167814333078, + "epoch": 0.1588152437592926, "grad_norm": 0.0, - "learning_rate": 1.96233369162101e-05, - "loss": 0.9871, + "learning_rate": 1.9142382385563244e-05, + "loss": 1.3151, "step": 4059 }, { - "epoch": 0.11505001558559325, + "epoch": 0.15885437045152204, "grad_norm": 0.0, - "learning_rate": 1.9623087355351998e-05, - "loss": 1.0822, + "learning_rate": 1.9141868859190706e-05, + "loss": 1.2283, "step": 4060 }, { - "epoch": 0.11507835302785571, + "epoch": 0.15889349714375148, "grad_norm": 0.0, - "learning_rate": 1.962283771343506e-05, - "loss": 1.0845, + "learning_rate": 1.9141355186011206e-05, + "loss": 1.1511, "step": 4061 }, { - "epoch": 0.11510669047011816, + "epoch": 0.15893262383598092, "grad_norm": 0.0, - "learning_rate": 1.9622587990461387e-05, - "loss": 1.0824, + "learning_rate": 1.9140841366033003e-05, + "loss": 1.1904, "step": 4062 }, { - "epoch": 0.11513502791238063, + "epoch": 0.15897175052821033, "grad_norm": 0.0, - "learning_rate": 1.9622338186433084e-05, - "loss": 1.098, + "learning_rate": 1.914032739926434e-05, + "loss": 1.2993, "step": 4063 }, { - "epoch": 0.1151633653546431, + "epoch": 0.15901087722043977, "grad_norm": 0.0, - "learning_rate": 1.9622088301352253e-05, - "loss": 1.0461, + "learning_rate": 1.9139813285713468e-05, + "loss": 1.2037, "step": 4064 }, { - "epoch": 0.11519170279690555, + "epoch": 0.1590500039126692, "grad_norm": 0.0, - "learning_rate": 1.9621838335220997e-05, - "loss": 1.0172, + "learning_rate": 1.913929902538865e-05, + "loss": 1.2281, "step": 4065 }, { - "epoch": 0.11522004023916801, + "epoch": 0.15908913060489865, "grad_norm": 0.0, - "learning_rate": 1.9621588288041423e-05, - "loss": 0.9963, + "learning_rate": 1.913878461829814e-05, + "loss": 1.2894, "step": 4066 }, { - "epoch": 0.11524837768143048, + "epoch": 0.1591282572971281, "grad_norm": 0.0, - "learning_rate": 1.962133815981564e-05, - "loss": 1.0089, + "learning_rate": 1.9138270064450202e-05, + "loss": 1.1588, "step": 4067 }, { - "epoch": 0.11527671512369293, + "epoch": 0.15916738398935754, "grad_norm": 0.0, - "learning_rate": 1.962108795054575e-05, - "loss": 1.0443, + "learning_rate": 1.9137755363853098e-05, + "loss": 1.3121, "step": 4068 }, { - "epoch": 0.1153050525659554, + "epoch": 0.15920651068158698, "grad_norm": 0.0, - "learning_rate": 1.9620837660233866e-05, - "loss": 1.0892, + "learning_rate": 1.9137240516515094e-05, + "loss": 1.2424, "step": 4069 }, { - "epoch": 0.11533339000821786, + "epoch": 0.15924563737381642, "grad_norm": 0.0, - "learning_rate": 1.9620587288882095e-05, - "loss": 1.1014, + "learning_rate": 1.9136725522444458e-05, + "loss": 1.2384, "step": 4070 }, { - "epoch": 0.11536172745048032, + "epoch": 0.15928476406604586, "grad_norm": 0.0, - "learning_rate": 1.962033683649254e-05, - "loss": 1.1119, + "learning_rate": 1.9136210381649457e-05, + "loss": 1.071, "step": 4071 }, { - "epoch": 0.11539006489274278, + "epoch": 0.1593238907582753, "grad_norm": 0.0, - "learning_rate": 1.962008630306732e-05, - "loss": 0.8849, + "learning_rate": 1.9135695094138364e-05, + "loss": 1.1059, "step": 4072 }, { - "epoch": 0.11541840233500525, + "epoch": 0.15936301745050474, "grad_norm": 0.0, - "learning_rate": 1.9619835688608537e-05, - "loss": 0.9921, + "learning_rate": 1.9135179659919458e-05, + "loss": 1.2036, "step": 4073 }, { - "epoch": 0.1154467397772677, + "epoch": 0.15940214414273418, "grad_norm": 0.0, - "learning_rate": 1.9619584993118308e-05, - "loss": 1.1702, + "learning_rate": 1.9134664079001015e-05, + "loss": 1.2382, "step": 4074 }, { - "epoch": 0.11547507721953017, + "epoch": 0.15944127083496362, "grad_norm": 0.0, - "learning_rate": 1.9619334216598746e-05, - "loss": 1.0501, + "learning_rate": 1.9134148351391308e-05, + "loss": 1.2312, "step": 4075 }, { - "epoch": 0.11550341466179263, + "epoch": 0.15948039752719306, "grad_norm": 0.0, - "learning_rate": 1.961908335905195e-05, - "loss": 1.0195, + "learning_rate": 1.913363247709863e-05, + "loss": 1.2003, "step": 4076 }, { - "epoch": 0.11553175210405509, + "epoch": 0.1595195242194225, "grad_norm": 0.0, - "learning_rate": 1.961883242048005e-05, - "loss": 1.0481, + "learning_rate": 1.9133116456131256e-05, + "loss": 1.1519, "step": 4077 }, { - "epoch": 0.11556008954631755, + "epoch": 0.15955865091165192, "grad_norm": 0.0, - "learning_rate": 1.9618581400885156e-05, - "loss": 0.9608, + "learning_rate": 1.9132600288497477e-05, + "loss": 1.0596, "step": 4078 }, { - "epoch": 0.11558842698858002, + "epoch": 0.15959777760388136, "grad_norm": 0.0, - "learning_rate": 1.9618330300269372e-05, - "loss": 1.1041, + "learning_rate": 1.913208397420558e-05, + "loss": 1.1978, "step": 4079 }, { - "epoch": 0.11561676443084247, + "epoch": 0.1596369042961108, "grad_norm": 0.0, - "learning_rate": 1.961807911863482e-05, - "loss": 1.1039, + "learning_rate": 1.913156751326386e-05, + "loss": 1.1387, "step": 4080 }, { - "epoch": 0.11564510187310494, + "epoch": 0.15967603098834024, "grad_norm": 0.0, - "learning_rate": 1.9617827855983623e-05, - "loss": 1.0827, + "learning_rate": 1.9131050905680604e-05, + "loss": 1.1583, "step": 4081 }, { - "epoch": 0.1156734393153674, + "epoch": 0.15971515768056968, "grad_norm": 0.0, - "learning_rate": 1.9617576512317888e-05, - "loss": 1.0258, + "learning_rate": 1.9130534151464116e-05, + "loss": 1.2946, "step": 4082 }, { - "epoch": 0.11570177675762985, + "epoch": 0.15975428437279912, "grad_norm": 0.0, - "learning_rate": 1.961732508763973e-05, - "loss": 1.021, + "learning_rate": 1.913001725062269e-05, + "loss": 1.0678, "step": 4083 }, { - "epoch": 0.11573011419989232, + "epoch": 0.15979341106502856, "grad_norm": 0.0, - "learning_rate": 1.9617073581951274e-05, - "loss": 1.1032, + "learning_rate": 1.9129500203164627e-05, + "loss": 1.1598, "step": 4084 }, { - "epoch": 0.11575845164215479, + "epoch": 0.159832537757258, "grad_norm": 0.0, - "learning_rate": 1.961682199525464e-05, - "loss": 0.9996, + "learning_rate": 1.912898300909823e-05, + "loss": 1.1545, "step": 4085 }, { - "epoch": 0.11578678908441724, + "epoch": 0.15987166444948744, "grad_norm": 0.0, - "learning_rate": 1.961657032755194e-05, - "loss": 1.0795, + "learning_rate": 1.9128465668431808e-05, + "loss": 1.252, "step": 4086 }, { - "epoch": 0.1158151265266797, + "epoch": 0.15991079114171688, "grad_norm": 0.0, - "learning_rate": 1.9616318578845294e-05, - "loss": 1.0575, + "learning_rate": 1.9127948181173664e-05, + "loss": 1.0614, "step": 4087 }, { - "epoch": 0.11584346396894216, + "epoch": 0.15994991783394633, "grad_norm": 0.0, - "learning_rate": 1.961606674913683e-05, - "loss": 1.0825, + "learning_rate": 1.912743054733211e-05, + "loss": 1.2971, "step": 4088 }, { - "epoch": 0.11587180141120462, + "epoch": 0.15998904452617577, "grad_norm": 0.0, - "learning_rate": 1.9615814838428662e-05, - "loss": 1.0261, + "learning_rate": 1.912691276691546e-05, + "loss": 1.0942, "step": 4089 }, { - "epoch": 0.11590013885346709, + "epoch": 0.1600281712184052, "grad_norm": 0.0, - "learning_rate": 1.9615562846722915e-05, - "loss": 1.1195, + "learning_rate": 1.9126394839932027e-05, + "loss": 1.2647, "step": 4090 }, { - "epoch": 0.11592847629572954, + "epoch": 0.16006729791063465, "grad_norm": 0.0, - "learning_rate": 1.9615310774021715e-05, - "loss": 1.0634, + "learning_rate": 1.912587676639013e-05, + "loss": 1.1896, "step": 4091 }, { - "epoch": 0.115956813737992, + "epoch": 0.16010642460286406, "grad_norm": 0.0, - "learning_rate": 1.961505862032718e-05, - "loss": 1.0347, + "learning_rate": 1.9125358546298084e-05, + "loss": 1.1624, "step": 4092 }, { - "epoch": 0.11598515118025447, + "epoch": 0.1601455512950935, "grad_norm": 0.0, - "learning_rate": 1.9614806385641433e-05, - "loss": 1.0684, + "learning_rate": 1.9124840179664216e-05, + "loss": 1.1523, "step": 4093 }, { - "epoch": 0.11601348862251692, + "epoch": 0.16018467798732294, "grad_norm": 0.0, - "learning_rate": 1.9614554069966606e-05, - "loss": 1.1111, + "learning_rate": 1.9124321666496846e-05, + "loss": 1.2122, "step": 4094 }, { - "epoch": 0.11604182606477939, + "epoch": 0.16022380467955238, "grad_norm": 0.0, - "learning_rate": 1.9614301673304815e-05, - "loss": 0.9959, + "learning_rate": 1.912380300680431e-05, + "loss": 1.2114, "step": 4095 }, { - "epoch": 0.11607016350704186, + "epoch": 0.16026293137178182, "grad_norm": 0.0, - "learning_rate": 1.9614049195658197e-05, - "loss": 1.0498, + "learning_rate": 1.912328420059493e-05, + "loss": 1.1309, "step": 4096 }, { - "epoch": 0.11609850094930431, + "epoch": 0.16030205806401127, "grad_norm": 0.0, - "learning_rate": 1.961379663702887e-05, - "loss": 1.0192, + "learning_rate": 1.912276524787703e-05, + "loss": 1.0958, "step": 4097 }, { - "epoch": 0.11612683839156678, + "epoch": 0.1603411847562407, "grad_norm": 0.0, - "learning_rate": 1.9613543997418963e-05, - "loss": 1.0269, + "learning_rate": 1.912224614865896e-05, + "loss": 1.0923, "step": 4098 }, { - "epoch": 0.11615517583382924, + "epoch": 0.16038031144847015, "grad_norm": 0.0, - "learning_rate": 1.9613291276830604e-05, - "loss": 1.092, + "learning_rate": 1.9121726902949043e-05, + "loss": 1.1484, "step": 4099 }, { - "epoch": 0.1161835132760917, + "epoch": 0.1604194381406996, "grad_norm": 0.0, - "learning_rate": 1.9613038475265922e-05, - "loss": 1.1691, + "learning_rate": 1.9121207510755624e-05, + "loss": 1.1593, "step": 4100 }, { - "epoch": 0.11621185071835416, + "epoch": 0.16045856483292903, "grad_norm": 0.0, - "learning_rate": 1.9612785592727048e-05, - "loss": 1.0387, + "learning_rate": 1.912068797208704e-05, + "loss": 1.1688, "step": 4101 }, { - "epoch": 0.11624018816061663, + "epoch": 0.16049769152515847, "grad_norm": 0.0, - "learning_rate": 1.9612532629216114e-05, - "loss": 0.9898, + "learning_rate": 1.9120168286951638e-05, + "loss": 1.2585, "step": 4102 }, { - "epoch": 0.11626852560287908, + "epoch": 0.1605368182173879, "grad_norm": 0.0, - "learning_rate": 1.9612279584735247e-05, - "loss": 1.0219, + "learning_rate": 1.911964845535776e-05, + "loss": 1.2526, "step": 4103 }, { - "epoch": 0.11629686304514154, + "epoch": 0.16057594490961735, "grad_norm": 0.0, - "learning_rate": 1.961202645928658e-05, - "loss": 1.1041, + "learning_rate": 1.9119128477313757e-05, + "loss": 1.1746, "step": 4104 }, { - "epoch": 0.11632520048740401, + "epoch": 0.1606150716018468, "grad_norm": 0.0, - "learning_rate": 1.961177325287224e-05, - "loss": 1.1206, + "learning_rate": 1.9118608352827978e-05, + "loss": 1.1797, "step": 4105 }, { - "epoch": 0.11635353792966646, + "epoch": 0.1606541982940762, "grad_norm": 0.0, - "learning_rate": 1.961151996549437e-05, - "loss": 1.024, + "learning_rate": 1.9118088081908773e-05, + "loss": 1.3448, "step": 4106 }, { - "epoch": 0.11638187537192893, + "epoch": 0.16069332498630565, "grad_norm": 0.0, - "learning_rate": 1.9611266597155097e-05, - "loss": 1.1184, + "learning_rate": 1.91175676645645e-05, + "loss": 1.1989, "step": 4107 }, { - "epoch": 0.1164102128141914, + "epoch": 0.1607324516785351, "grad_norm": 0.0, - "learning_rate": 1.9611013147856558e-05, - "loss": 1.1423, + "learning_rate": 1.9117047100803513e-05, + "loss": 1.2637, "step": 4108 }, { - "epoch": 0.11643855025645385, + "epoch": 0.16077157837076453, "grad_norm": 0.0, - "learning_rate": 1.9610759617600883e-05, - "loss": 1.0784, + "learning_rate": 1.9116526390634177e-05, + "loss": 1.0911, "step": 4109 }, { - "epoch": 0.11646688769871631, + "epoch": 0.16081070506299397, "grad_norm": 0.0, - "learning_rate": 1.961050600639021e-05, - "loss": 1.0687, + "learning_rate": 1.9116005534064853e-05, + "loss": 1.1855, "step": 4110 }, { - "epoch": 0.11649522514097878, + "epoch": 0.1608498317552234, "grad_norm": 0.0, - "learning_rate": 1.9610252314226682e-05, - "loss": 1.1033, + "learning_rate": 1.9115484531103895e-05, + "loss": 1.2479, "step": 4111 }, { - "epoch": 0.11652356258324123, + "epoch": 0.16088895844745285, "grad_norm": 0.0, - "learning_rate": 1.9609998541112424e-05, - "loss": 1.051, + "learning_rate": 1.9114963381759685e-05, + "loss": 1.1398, "step": 4112 }, { - "epoch": 0.1165519000255037, + "epoch": 0.1609280851396823, "grad_norm": 0.0, - "learning_rate": 1.9609744687049582e-05, - "loss": 1.0123, + "learning_rate": 1.9114442086040583e-05, + "loss": 1.134, "step": 4113 }, { - "epoch": 0.11658023746776616, + "epoch": 0.16096721183191173, "grad_norm": 0.0, - "learning_rate": 1.960949075204029e-05, - "loss": 1.1591, + "learning_rate": 1.9113920643954963e-05, + "loss": 1.0888, "step": 4114 }, { - "epoch": 0.11660857491002861, + "epoch": 0.16100633852414117, "grad_norm": 0.0, - "learning_rate": 1.9609236736086695e-05, - "loss": 1.0113, + "learning_rate": 1.9113399055511195e-05, + "loss": 1.2395, "step": 4115 }, { - "epoch": 0.11663691235229108, + "epoch": 0.16104546521637061, "grad_norm": 0.0, - "learning_rate": 1.9608982639190925e-05, - "loss": 1.0366, + "learning_rate": 1.9112877320717657e-05, + "loss": 1.2004, "step": 4116 }, { - "epoch": 0.11666524979455355, + "epoch": 0.16108459190860006, "grad_norm": 0.0, - "learning_rate": 1.960872846135513e-05, - "loss": 1.0426, + "learning_rate": 1.9112355439582734e-05, + "loss": 1.2437, "step": 4117 }, { - "epoch": 0.116693587236816, + "epoch": 0.1611237186008295, "grad_norm": 0.0, - "learning_rate": 1.9608474202581444e-05, - "loss": 0.9921, + "learning_rate": 1.9111833412114796e-05, + "loss": 1.1625, "step": 4118 }, { - "epoch": 0.11672192467907846, + "epoch": 0.16116284529305894, "grad_norm": 0.0, - "learning_rate": 1.960821986287201e-05, - "loss": 0.9918, + "learning_rate": 1.9111311238322235e-05, + "loss": 1.1535, "step": 4119 }, { - "epoch": 0.11675026212134093, + "epoch": 0.16120197198528835, "grad_norm": 0.0, - "learning_rate": 1.9607965442228977e-05, - "loss": 0.9914, + "learning_rate": 1.911078891821343e-05, + "loss": 1.08, "step": 4120 }, { - "epoch": 0.11677859956360338, + "epoch": 0.1612410986775178, "grad_norm": 0.0, - "learning_rate": 1.9607710940654482e-05, - "loss": 1.1129, + "learning_rate": 1.9110266451796772e-05, + "loss": 1.2352, "step": 4121 }, { - "epoch": 0.11680693700586585, + "epoch": 0.16128022536974723, "grad_norm": 0.0, - "learning_rate": 1.9607456358150668e-05, - "loss": 1.0615, + "learning_rate": 1.910974383908065e-05, + "loss": 1.2636, "step": 4122 }, { - "epoch": 0.11683527444812832, + "epoch": 0.16131935206197667, "grad_norm": 0.0, - "learning_rate": 1.9607201694719683e-05, - "loss": 1.0159, + "learning_rate": 1.9109221080073457e-05, + "loss": 1.2855, "step": 4123 }, { - "epoch": 0.11686361189039077, + "epoch": 0.1613584787542061, "grad_norm": 0.0, - "learning_rate": 1.960694695036367e-05, - "loss": 1.112, + "learning_rate": 1.910869817478359e-05, + "loss": 1.0542, "step": 4124 }, { - "epoch": 0.11689194933265323, + "epoch": 0.16139760544643555, "grad_norm": 0.0, - "learning_rate": 1.9606692125084775e-05, - "loss": 1.005, + "learning_rate": 1.910817512321944e-05, + "loss": 1.1822, "step": 4125 }, { - "epoch": 0.1169202867749157, + "epoch": 0.161436732138665, "grad_norm": 0.0, - "learning_rate": 1.9606437218885145e-05, - "loss": 1.0249, + "learning_rate": 1.9107651925389413e-05, + "loss": 1.25, "step": 4126 }, { - "epoch": 0.11694862421717815, + "epoch": 0.16147585883089444, "grad_norm": 0.0, - "learning_rate": 1.960618223176693e-05, - "loss": 1.0597, + "learning_rate": 1.910712858130191e-05, + "loss": 1.1656, "step": 4127 }, { - "epoch": 0.11697696165944062, + "epoch": 0.16151498552312388, "grad_norm": 0.0, - "learning_rate": 1.9605927163732274e-05, - "loss": 1.0108, + "learning_rate": 1.9106605090965333e-05, + "loss": 1.1891, "step": 4128 }, { - "epoch": 0.11700529910170308, + "epoch": 0.16155411221535332, "grad_norm": 0.0, - "learning_rate": 1.960567201478332e-05, - "loss": 1.1323, + "learning_rate": 1.910608145438809e-05, + "loss": 1.2762, "step": 4129 }, { - "epoch": 0.11703363654396554, + "epoch": 0.16159323890758276, "grad_norm": 0.0, - "learning_rate": 1.960541678492223e-05, - "loss": 1.0828, + "learning_rate": 1.9105557671578588e-05, + "loss": 1.1245, "step": 4130 }, { - "epoch": 0.117061973986228, + "epoch": 0.1616323655998122, "grad_norm": 0.0, - "learning_rate": 1.9605161474151148e-05, - "loss": 1.0895, + "learning_rate": 1.9105033742545244e-05, + "loss": 1.2416, "step": 4131 }, { - "epoch": 0.11709031142849047, + "epoch": 0.16167149229204164, "grad_norm": 0.0, - "learning_rate": 1.9604906082472223e-05, - "loss": 0.9819, + "learning_rate": 1.910450966729646e-05, + "loss": 1.247, "step": 4132 }, { - "epoch": 0.11711864887075292, + "epoch": 0.16171061898427108, "grad_norm": 0.0, - "learning_rate": 1.9604650609887604e-05, - "loss": 1.0667, + "learning_rate": 1.910398544584066e-05, + "loss": 1.1813, "step": 4133 }, { - "epoch": 0.11714698631301539, + "epoch": 0.1617497456765005, "grad_norm": 0.0, - "learning_rate": 1.960439505639945e-05, - "loss": 1.0174, + "learning_rate": 1.9103461078186268e-05, + "loss": 1.1754, "step": 4134 }, { - "epoch": 0.11717532375527785, + "epoch": 0.16178887236872994, "grad_norm": 0.0, - "learning_rate": 1.9604139422009908e-05, - "loss": 0.9725, + "learning_rate": 1.9102936564341696e-05, + "loss": 1.242, "step": 4135 }, { - "epoch": 0.1172036611975403, + "epoch": 0.16182799906095938, "grad_norm": 0.0, - "learning_rate": 1.960388370672113e-05, - "loss": 1.0257, + "learning_rate": 1.910241190431537e-05, + "loss": 1.319, "step": 4136 }, { - "epoch": 0.11723199863980277, + "epoch": 0.16186712575318882, "grad_norm": 0.0, - "learning_rate": 1.9603627910535282e-05, - "loss": 0.9785, + "learning_rate": 1.910188709811571e-05, + "loss": 1.2273, "step": 4137 }, { - "epoch": 0.11726033608206524, + "epoch": 0.16190625244541826, "grad_norm": 0.0, - "learning_rate": 1.9603372033454504e-05, - "loss": 1.2129, + "learning_rate": 1.9101362145751154e-05, + "loss": 1.1494, "step": 4138 }, { - "epoch": 0.11728867352432769, + "epoch": 0.1619453791376477, "grad_norm": 0.0, - "learning_rate": 1.960311607548096e-05, - "loss": 1.0883, + "learning_rate": 1.9100837047230123e-05, + "loss": 1.1727, "step": 4139 }, { - "epoch": 0.11731701096659015, + "epoch": 0.16198450582987714, "grad_norm": 0.0, - "learning_rate": 1.96028600366168e-05, - "loss": 1.025, + "learning_rate": 1.9100311802561055e-05, + "loss": 1.2802, "step": 4140 }, { - "epoch": 0.11734534840885262, + "epoch": 0.16202363252210658, "grad_norm": 0.0, - "learning_rate": 1.9602603916864186e-05, - "loss": 1.0925, + "learning_rate": 1.909978641175238e-05, + "loss": 1.2495, "step": 4141 }, { - "epoch": 0.11737368585111507, + "epoch": 0.16206275921433602, "grad_norm": 0.0, - "learning_rate": 1.9602347716225272e-05, - "loss": 1.1383, + "learning_rate": 1.909926087481254e-05, + "loss": 1.2982, "step": 4142 }, { - "epoch": 0.11740202329337754, + "epoch": 0.16210188590656546, "grad_norm": 0.0, - "learning_rate": 1.9602091434702217e-05, - "loss": 1.0051, + "learning_rate": 1.9098735191749977e-05, + "loss": 1.1136, "step": 4143 }, { - "epoch": 0.11743036073564, + "epoch": 0.1621410125987949, "grad_norm": 0.0, - "learning_rate": 1.960183507229718e-05, - "loss": 1.0997, + "learning_rate": 1.909820936257312e-05, + "loss": 1.1752, "step": 4144 }, { - "epoch": 0.11745869817790246, + "epoch": 0.16218013929102434, "grad_norm": 0.0, - "learning_rate": 1.9601578629012327e-05, - "loss": 1.1526, + "learning_rate": 1.9097683387290428e-05, + "loss": 1.231, "step": 4145 }, { - "epoch": 0.11748703562016492, + "epoch": 0.16221926598325379, "grad_norm": 0.0, - "learning_rate": 1.9601322104849806e-05, - "loss": 1.1073, + "learning_rate": 1.9097157265910337e-05, + "loss": 1.2205, "step": 4146 }, { - "epoch": 0.11751537306242739, + "epoch": 0.16225839267548323, "grad_norm": 0.0, - "learning_rate": 1.9601065499811783e-05, - "loss": 0.9791, + "learning_rate": 1.9096630998441298e-05, + "loss": 1.1556, "step": 4147 }, { - "epoch": 0.11754371050468984, + "epoch": 0.16229751936771267, "grad_norm": 0.0, - "learning_rate": 1.960080881390042e-05, - "loss": 1.1336, + "learning_rate": 1.9096104584891767e-05, + "loss": 1.1721, "step": 4148 }, { - "epoch": 0.11757204794695231, + "epoch": 0.16233664605994208, "grad_norm": 0.0, - "learning_rate": 1.9600552047117883e-05, - "loss": 1.0738, + "learning_rate": 1.9095578025270195e-05, + "loss": 1.167, "step": 4149 }, { - "epoch": 0.11760038538921477, + "epoch": 0.16237577275217152, "grad_norm": 0.0, - "learning_rate": 1.9600295199466327e-05, - "loss": 1.109, + "learning_rate": 1.9095051319585035e-05, + "loss": 1.2411, "step": 4150 }, { - "epoch": 0.11762872283147723, + "epoch": 0.16241489944440096, "grad_norm": 0.0, - "learning_rate": 1.9600038270947923e-05, - "loss": 0.9592, + "learning_rate": 1.9094524467844747e-05, + "loss": 1.2506, "step": 4151 }, { - "epoch": 0.11765706027373969, + "epoch": 0.1624540261366304, "grad_norm": 0.0, - "learning_rate": 1.9599781261564827e-05, - "loss": 1.0819, + "learning_rate": 1.9093997470057796e-05, + "loss": 1.1369, "step": 4152 }, { - "epoch": 0.11768539771600216, + "epoch": 0.16249315282885984, "grad_norm": 0.0, - "learning_rate": 1.9599524171319214e-05, - "loss": 1.0542, + "learning_rate": 1.909347032623264e-05, + "loss": 1.2423, "step": 4153 }, { - "epoch": 0.11771373515826461, + "epoch": 0.16253227952108928, "grad_norm": 0.0, - "learning_rate": 1.959926700021324e-05, - "loss": 1.1537, + "learning_rate": 1.9092943036377743e-05, + "loss": 1.1349, "step": 4154 }, { - "epoch": 0.11774207260052708, + "epoch": 0.16257140621331873, "grad_norm": 0.0, - "learning_rate": 1.959900974824908e-05, - "loss": 1.005, + "learning_rate": 1.9092415600501577e-05, + "loss": 1.2032, "step": 4155 }, { - "epoch": 0.11777041004278954, + "epoch": 0.16261053290554817, "grad_norm": 0.0, - "learning_rate": 1.9598752415428893e-05, - "loss": 1.0484, + "learning_rate": 1.9091888018612607e-05, + "loss": 1.2603, "step": 4156 }, { - "epoch": 0.117798747485052, + "epoch": 0.1626496595977776, "grad_norm": 0.0, - "learning_rate": 1.959849500175485e-05, - "loss": 1.0725, + "learning_rate": 1.909136029071931e-05, + "loss": 1.2834, "step": 4157 }, { - "epoch": 0.11782708492731446, + "epoch": 0.16268878629000705, "grad_norm": 0.0, - "learning_rate": 1.9598237507229122e-05, - "loss": 1.2118, + "learning_rate": 1.909083241683016e-05, + "loss": 1.2194, "step": 4158 }, { - "epoch": 0.11785542236957693, + "epoch": 0.1627279129822365, "grad_norm": 0.0, - "learning_rate": 1.959797993185387e-05, - "loss": 0.9785, + "learning_rate": 1.9090304396953632e-05, + "loss": 1.0561, "step": 4159 }, { - "epoch": 0.11788375981183938, + "epoch": 0.16276703967446593, "grad_norm": 0.0, - "learning_rate": 1.959772227563127e-05, - "loss": 1.0134, + "learning_rate": 1.9089776231098204e-05, + "loss": 1.0687, "step": 4160 }, { - "epoch": 0.11791209725410184, + "epoch": 0.16280616636669537, "grad_norm": 0.0, - "learning_rate": 1.9597464538563495e-05, - "loss": 0.9993, + "learning_rate": 1.908924791927236e-05, + "loss": 1.3264, "step": 4161 }, { - "epoch": 0.11794043469636431, + "epoch": 0.1628452930589248, "grad_norm": 0.0, - "learning_rate": 1.959720672065271e-05, - "loss": 1.1471, + "learning_rate": 1.9088719461484588e-05, + "loss": 1.1408, "step": 4162 }, { - "epoch": 0.11796877213862676, + "epoch": 0.16288441975115422, "grad_norm": 0.0, - "learning_rate": 1.959694882190109e-05, - "loss": 1.1329, + "learning_rate": 1.9088190857743368e-05, + "loss": 1.2086, "step": 4163 }, { - "epoch": 0.11799710958088923, + "epoch": 0.16292354644338367, "grad_norm": 0.0, - "learning_rate": 1.9596690842310807e-05, - "loss": 1.0154, + "learning_rate": 1.9087662108057192e-05, + "loss": 1.1926, "step": 4164 }, { - "epoch": 0.1180254470231517, + "epoch": 0.1629626731356131, "grad_norm": 0.0, - "learning_rate": 1.9596432781884033e-05, - "loss": 1.0413, + "learning_rate": 1.908713321243455e-05, + "loss": 1.1262, "step": 4165 }, { - "epoch": 0.11805378446541415, + "epoch": 0.16300179982784255, "grad_norm": 0.0, - "learning_rate": 1.9596174640622944e-05, - "loss": 0.9599, + "learning_rate": 1.9086604170883932e-05, + "loss": 1.262, "step": 4166 }, { - "epoch": 0.11808212190767661, + "epoch": 0.163040926520072, "grad_norm": 0.0, - "learning_rate": 1.9595916418529706e-05, - "loss": 0.9699, + "learning_rate": 1.9086074983413843e-05, + "loss": 1.1426, "step": 4167 }, { - "epoch": 0.11811045934993908, + "epoch": 0.16308005321230143, "grad_norm": 0.0, - "learning_rate": 1.9595658115606507e-05, - "loss": 0.9862, + "learning_rate": 1.908554565003277e-05, + "loss": 1.3193, "step": 4168 }, { - "epoch": 0.11813879679220153, + "epoch": 0.16311917990453087, "grad_norm": 0.0, - "learning_rate": 1.9595399731855514e-05, - "loss": 0.9853, + "learning_rate": 1.9085016170749223e-05, + "loss": 1.1625, "step": 4169 }, { - "epoch": 0.118167134234464, + "epoch": 0.1631583065967603, "grad_norm": 0.0, - "learning_rate": 1.9595141267278906e-05, - "loss": 1.1548, + "learning_rate": 1.9084486545571694e-05, + "loss": 1.1174, "step": 4170 }, { - "epoch": 0.11819547167672646, + "epoch": 0.16319743328898975, "grad_norm": 0.0, - "learning_rate": 1.959488272187886e-05, - "loss": 1.081, + "learning_rate": 1.90839567745087e-05, + "loss": 1.1218, "step": 4171 }, { - "epoch": 0.11822380911898892, + "epoch": 0.1632365599812192, "grad_norm": 0.0, - "learning_rate": 1.9594624095657554e-05, - "loss": 0.9264, + "learning_rate": 1.908342685756874e-05, + "loss": 1.2762, "step": 4172 }, { - "epoch": 0.11825214656125138, + "epoch": 0.16327568667344863, "grad_norm": 0.0, - "learning_rate": 1.9594365388617165e-05, - "loss": 1.0841, + "learning_rate": 1.9082896794760327e-05, + "loss": 1.1449, "step": 4173 }, { - "epoch": 0.11828048400351385, + "epoch": 0.16331481336567807, "grad_norm": 0.0, - "learning_rate": 1.9594106600759875e-05, - "loss": 0.968, + "learning_rate": 1.9082366586091975e-05, + "loss": 1.1412, "step": 4174 }, { - "epoch": 0.1183088214457763, + "epoch": 0.16335394005790752, "grad_norm": 0.0, - "learning_rate": 1.9593847732087863e-05, - "loss": 0.9964, + "learning_rate": 1.90818362315722e-05, + "loss": 1.2247, "step": 4175 }, { - "epoch": 0.11833715888803877, + "epoch": 0.16339306675013696, "grad_norm": 0.0, - "learning_rate": 1.9593588782603307e-05, - "loss": 1.0685, + "learning_rate": 1.908130573120951e-05, + "loss": 1.2349, "step": 4176 }, { - "epoch": 0.11836549633030123, + "epoch": 0.16343219344236637, "grad_norm": 0.0, - "learning_rate": 1.9593329752308392e-05, - "loss": 0.9598, + "learning_rate": 1.908077508501243e-05, + "loss": 1.2173, "step": 4177 }, { - "epoch": 0.11839383377256368, + "epoch": 0.1634713201345958, "grad_norm": 0.0, - "learning_rate": 1.9593070641205298e-05, - "loss": 1.0769, + "learning_rate": 1.9080244292989484e-05, + "loss": 1.3192, "step": 4178 }, { - "epoch": 0.11842217121482615, + "epoch": 0.16351044682682525, "grad_norm": 0.0, - "learning_rate": 1.9592811449296206e-05, - "loss": 1.0693, + "learning_rate": 1.907971335514919e-05, + "loss": 1.2048, "step": 4179 }, { - "epoch": 0.11845050865708862, + "epoch": 0.1635495735190547, "grad_norm": 0.0, - "learning_rate": 1.9592552176583305e-05, - "loss": 1.09, + "learning_rate": 1.9079182271500073e-05, + "loss": 1.1899, "step": 4180 }, { - "epoch": 0.11847884609935107, + "epoch": 0.16358870021128413, "grad_norm": 0.0, - "learning_rate": 1.959229282306877e-05, - "loss": 1.2339, + "learning_rate": 1.9078651042050673e-05, + "loss": 1.1673, "step": 4181 }, { - "epoch": 0.11850718354161353, + "epoch": 0.16362782690351357, "grad_norm": 0.0, - "learning_rate": 1.9592033388754792e-05, - "loss": 1.0872, + "learning_rate": 1.907811966680951e-05, + "loss": 1.2289, "step": 4182 }, { - "epoch": 0.118535520983876, + "epoch": 0.16366695359574301, "grad_norm": 0.0, - "learning_rate": 1.9591773873643555e-05, - "loss": 1.038, + "learning_rate": 1.9077588145785123e-05, + "loss": 1.0131, "step": 4183 }, { - "epoch": 0.11856385842613845, + "epoch": 0.16370608028797246, "grad_norm": 0.0, - "learning_rate": 1.9591514277737243e-05, - "loss": 1.0357, + "learning_rate": 1.9077056478986043e-05, + "loss": 1.2811, "step": 4184 }, { - "epoch": 0.11859219586840092, + "epoch": 0.1637452069802019, "grad_norm": 0.0, - "learning_rate": 1.959125460103805e-05, - "loss": 0.996, + "learning_rate": 1.9076524666420808e-05, + "loss": 1.2052, "step": 4185 }, { - "epoch": 0.11862053331066338, + "epoch": 0.16378433367243134, "grad_norm": 0.0, - "learning_rate": 1.9590994843548155e-05, - "loss": 1.136, + "learning_rate": 1.9075992708097965e-05, + "loss": 1.1334, "step": 4186 }, { - "epoch": 0.11864887075292584, + "epoch": 0.16382346036466078, "grad_norm": 0.0, - "learning_rate": 1.959073500526975e-05, - "loss": 1.0598, + "learning_rate": 1.9075460604026047e-05, + "loss": 1.2309, "step": 4187 }, { - "epoch": 0.1186772081951883, + "epoch": 0.16386258705689022, "grad_norm": 0.0, - "learning_rate": 1.959047508620502e-05, - "loss": 1.0997, + "learning_rate": 1.907492835421361e-05, + "loss": 1.1668, "step": 4188 }, { - "epoch": 0.11870554563745077, + "epoch": 0.16390171374911966, "grad_norm": 0.0, - "learning_rate": 1.959021508635616e-05, - "loss": 1.1249, + "learning_rate": 1.907439595866919e-05, + "loss": 1.0172, "step": 4189 }, { - "epoch": 0.11873388307971322, + "epoch": 0.1639408404413491, "grad_norm": 0.0, - "learning_rate": 1.9589955005725354e-05, - "loss": 0.9071, + "learning_rate": 1.9073863417401342e-05, + "loss": 1.1213, "step": 4190 }, { - "epoch": 0.11876222052197569, + "epoch": 0.1639799671335785, "grad_norm": 0.0, - "learning_rate": 1.9589694844314798e-05, - "loss": 1.0572, + "learning_rate": 1.9073330730418623e-05, + "loss": 1.1378, "step": 4191 }, { - "epoch": 0.11879055796423815, + "epoch": 0.16401909382580795, "grad_norm": 0.0, - "learning_rate": 1.9589434602126678e-05, - "loss": 0.9386, + "learning_rate": 1.9072797897729577e-05, + "loss": 0.9984, "step": 4192 }, { - "epoch": 0.1188188954065006, + "epoch": 0.1640582205180374, "grad_norm": 0.0, - "learning_rate": 1.9589174279163192e-05, - "loss": 1.0992, + "learning_rate": 1.9072264919342766e-05, + "loss": 1.1155, "step": 4193 }, { - "epoch": 0.11884723284876307, + "epoch": 0.16409734721026684, "grad_norm": 0.0, - "learning_rate": 1.9588913875426532e-05, - "loss": 0.9205, + "learning_rate": 1.907173179526675e-05, + "loss": 1.2513, "step": 4194 }, { - "epoch": 0.11887557029102554, + "epoch": 0.16413647390249628, "grad_norm": 0.0, - "learning_rate": 1.9588653390918887e-05, - "loss": 1.0013, + "learning_rate": 1.907119852551009e-05, + "loss": 1.0969, "step": 4195 }, { - "epoch": 0.11890390773328799, + "epoch": 0.16417560059472572, "grad_norm": 0.0, - "learning_rate": 1.9588392825642453e-05, - "loss": 1.0682, + "learning_rate": 1.9070665110081345e-05, + "loss": 1.2139, "step": 4196 }, { - "epoch": 0.11893224517555046, + "epoch": 0.16421472728695516, "grad_norm": 0.0, - "learning_rate": 1.958813217959943e-05, - "loss": 1.0526, + "learning_rate": 1.9070131548989087e-05, + "loss": 1.2436, "step": 4197 }, { - "epoch": 0.11896058261781292, + "epoch": 0.1642538539791846, "grad_norm": 0.0, - "learning_rate": 1.9587871452792006e-05, - "loss": 1.1177, + "learning_rate": 1.906959784224188e-05, + "loss": 1.2196, "step": 4198 }, { - "epoch": 0.11898892006007537, + "epoch": 0.16429298067141404, "grad_norm": 0.0, - "learning_rate": 1.958761064522238e-05, - "loss": 0.9989, + "learning_rate": 1.9069063989848298e-05, + "loss": 1.2025, "step": 4199 }, { - "epoch": 0.11901725750233784, + "epoch": 0.16433210736364348, "grad_norm": 0.0, - "learning_rate": 1.958734975689275e-05, - "loss": 1.0394, + "learning_rate": 1.9068529991816914e-05, + "loss": 1.2258, "step": 4200 }, { - "epoch": 0.1190455949446003, + "epoch": 0.16437123405587292, "grad_norm": 0.0, - "learning_rate": 1.958708878780531e-05, - "loss": 1.0621, + "learning_rate": 1.90679958481563e-05, + "loss": 1.1583, "step": 4201 }, { - "epoch": 0.11907393238686276, + "epoch": 0.16441036074810236, "grad_norm": 0.0, - "learning_rate": 1.9586827737962264e-05, - "loss": 1.0398, + "learning_rate": 1.9067461558875036e-05, + "loss": 1.1564, "step": 4202 }, { - "epoch": 0.11910226982912522, + "epoch": 0.1644494874403318, "grad_norm": 0.0, - "learning_rate": 1.958656660736581e-05, - "loss": 1.0603, + "learning_rate": 1.90669271239817e-05, + "loss": 1.1551, "step": 4203 }, { - "epoch": 0.11913060727138769, + "epoch": 0.16448861413256124, "grad_norm": 0.0, - "learning_rate": 1.9586305396018145e-05, - "loss": 1.0001, + "learning_rate": 1.906639254348488e-05, + "loss": 1.2021, "step": 4204 }, { - "epoch": 0.11915894471365014, + "epoch": 0.16452774082479069, "grad_norm": 0.0, - "learning_rate": 1.9586044103921468e-05, - "loss": 1.1084, + "learning_rate": 1.9065857817393153e-05, + "loss": 1.2073, "step": 4205 }, { - "epoch": 0.11918728215591261, + "epoch": 0.1645668675170201, "grad_norm": 0.0, - "learning_rate": 1.958578273107798e-05, - "loss": 1.0641, + "learning_rate": 1.9065322945715107e-05, + "loss": 1.3113, "step": 4206 }, { - "epoch": 0.11921561959817507, + "epoch": 0.16460599420924954, "grad_norm": 0.0, - "learning_rate": 1.9585521277489886e-05, - "loss": 1.0673, + "learning_rate": 1.9064787928459338e-05, + "loss": 1.3683, "step": 4207 }, { - "epoch": 0.11924395704043753, + "epoch": 0.16464512090147898, "grad_norm": 0.0, - "learning_rate": 1.9585259743159388e-05, - "loss": 1.1565, + "learning_rate": 1.9064252765634435e-05, + "loss": 1.1498, "step": 4208 }, { - "epoch": 0.11927229448269999, + "epoch": 0.16468424759370842, "grad_norm": 0.0, - "learning_rate": 1.9584998128088686e-05, - "loss": 1.0882, + "learning_rate": 1.906371745724899e-05, + "loss": 1.1281, "step": 4209 }, { - "epoch": 0.11930063192496246, + "epoch": 0.16472337428593786, "grad_norm": 0.0, - "learning_rate": 1.9584736432279986e-05, - "loss": 1.0242, + "learning_rate": 1.9063182003311595e-05, + "loss": 1.1297, "step": 4210 }, { - "epoch": 0.11932896936722491, + "epoch": 0.1647625009781673, "grad_norm": 0.0, - "learning_rate": 1.9584474655735493e-05, - "loss": 1.0747, + "learning_rate": 1.906264640383086e-05, + "loss": 1.0109, "step": 4211 }, { - "epoch": 0.11935730680948738, + "epoch": 0.16480162767039674, "grad_norm": 0.0, - "learning_rate": 1.958421279845741e-05, - "loss": 1.1447, + "learning_rate": 1.9062110658815375e-05, + "loss": 1.1478, "step": 4212 }, { - "epoch": 0.11938564425174984, + "epoch": 0.16484075436262619, "grad_norm": 0.0, - "learning_rate": 1.9583950860447944e-05, - "loss": 1.0988, + "learning_rate": 1.9061574768273753e-05, + "loss": 1.1984, "step": 4213 }, { - "epoch": 0.1194139816940123, + "epoch": 0.16487988105485563, "grad_norm": 0.0, - "learning_rate": 1.9583688841709302e-05, - "loss": 1.1292, + "learning_rate": 1.9061038732214592e-05, + "loss": 1.1606, "step": 4214 }, { - "epoch": 0.11944231913627476, + "epoch": 0.16491900774708507, "grad_norm": 0.0, - "learning_rate": 1.958342674224369e-05, - "loss": 1.131, + "learning_rate": 1.9060502550646504e-05, + "loss": 1.2404, "step": 4215 }, { - "epoch": 0.11947065657853723, + "epoch": 0.1649581344393145, "grad_norm": 0.0, - "learning_rate": 1.9583164562053314e-05, - "loss": 1.1292, + "learning_rate": 1.9059966223578098e-05, + "loss": 1.1774, "step": 4216 }, { - "epoch": 0.11949899402079968, + "epoch": 0.16499726113154395, "grad_norm": 0.0, - "learning_rate": 1.9582902301140386e-05, - "loss": 1.0299, + "learning_rate": 1.9059429751017987e-05, + "loss": 1.228, "step": 4217 }, { - "epoch": 0.11952733146306214, + "epoch": 0.1650363878237734, "grad_norm": 0.0, - "learning_rate": 1.9582639959507113e-05, - "loss": 1.0387, + "learning_rate": 1.905889313297479e-05, + "loss": 1.1737, "step": 4218 }, { - "epoch": 0.11955566890532461, + "epoch": 0.16507551451600283, "grad_norm": 0.0, - "learning_rate": 1.9582377537155703e-05, - "loss": 1.0843, + "learning_rate": 1.9058356369457117e-05, + "loss": 1.075, "step": 4219 }, { - "epoch": 0.11958400634758706, + "epoch": 0.16511464120823224, "grad_norm": 0.0, - "learning_rate": 1.958211503408837e-05, - "loss": 1.0, + "learning_rate": 1.905781946047359e-05, + "loss": 1.2129, "step": 4220 }, { - "epoch": 0.11961234378984953, + "epoch": 0.16515376790046168, "grad_norm": 0.0, - "learning_rate": 1.9581852450307324e-05, - "loss": 0.9897, + "learning_rate": 1.9057282406032835e-05, + "loss": 1.0834, "step": 4221 }, { - "epoch": 0.119640681232112, + "epoch": 0.16519289459269113, "grad_norm": 0.0, - "learning_rate": 1.958158978581478e-05, - "loss": 1.1543, + "learning_rate": 1.9056745206143472e-05, + "loss": 1.1469, "step": 4222 }, { - "epoch": 0.11966901867437445, + "epoch": 0.16523202128492057, "grad_norm": 0.0, - "learning_rate": 1.9581327040612942e-05, - "loss": 1.0612, + "learning_rate": 1.9056207860814134e-05, + "loss": 1.2673, "step": 4223 }, { - "epoch": 0.11969735611663691, + "epoch": 0.16527114797715, "grad_norm": 0.0, - "learning_rate": 1.958106421470403e-05, - "loss": 1.0738, + "learning_rate": 1.9055670370053444e-05, + "loss": 1.0922, "step": 4224 }, { - "epoch": 0.11972569355889938, + "epoch": 0.16531027466937945, "grad_norm": 0.0, - "learning_rate": 1.9580801308090257e-05, - "loss": 1.0492, + "learning_rate": 1.905513273387003e-05, + "loss": 1.0136, "step": 4225 }, { - "epoch": 0.11975403100116183, + "epoch": 0.1653494013616089, "grad_norm": 0.0, - "learning_rate": 1.9580538320773838e-05, - "loss": 0.9818, + "learning_rate": 1.9054594952272538e-05, + "loss": 1.1691, "step": 4226 }, { - "epoch": 0.1197823684434243, + "epoch": 0.16538852805383833, "grad_norm": 0.0, - "learning_rate": 1.9580275252756987e-05, - "loss": 1.071, + "learning_rate": 1.9054057025269595e-05, + "loss": 1.2629, "step": 4227 }, { - "epoch": 0.11981070588568676, + "epoch": 0.16542765474606777, "grad_norm": 0.0, - "learning_rate": 1.9580012104041916e-05, - "loss": 1.1495, + "learning_rate": 1.9053518952869842e-05, + "loss": 1.2699, "step": 4228 }, { - "epoch": 0.11983904332794922, + "epoch": 0.1654667814382972, "grad_norm": 0.0, - "learning_rate": 1.957974887463085e-05, - "loss": 1.0062, + "learning_rate": 1.9052980735081918e-05, + "loss": 1.2357, "step": 4229 }, { - "epoch": 0.11986738077021168, + "epoch": 0.16550590813052665, "grad_norm": 0.0, - "learning_rate": 1.9579485564526e-05, - "loss": 1.0847, + "learning_rate": 1.9052442371914466e-05, + "loss": 1.2788, "step": 4230 }, { - "epoch": 0.11989571821247415, + "epoch": 0.1655450348227561, "grad_norm": 0.0, - "learning_rate": 1.9579222173729585e-05, - "loss": 1.1503, + "learning_rate": 1.9051903863376134e-05, + "loss": 1.1591, "step": 4231 }, { - "epoch": 0.1199240556547366, + "epoch": 0.16558416151498553, "grad_norm": 0.0, - "learning_rate": 1.9578958702243827e-05, - "loss": 1.0822, + "learning_rate": 1.905136520947557e-05, + "loss": 1.2725, "step": 4232 }, { - "epoch": 0.11995239309699907, + "epoch": 0.16562328820721497, "grad_norm": 0.0, - "learning_rate": 1.957869515007094e-05, - "loss": 1.0073, + "learning_rate": 1.9050826410221423e-05, + "loss": 1.2554, "step": 4233 }, { - "epoch": 0.11998073053926153, + "epoch": 0.1656624148994444, "grad_norm": 0.0, - "learning_rate": 1.9578431517213148e-05, - "loss": 1.0863, + "learning_rate": 1.905028746562234e-05, + "loss": 1.1738, "step": 4234 }, { - "epoch": 0.12000906798152398, + "epoch": 0.16570154159167383, "grad_norm": 0.0, - "learning_rate": 1.9578167803672668e-05, - "loss": 1.1052, + "learning_rate": 1.9049748375686986e-05, + "loss": 1.1126, "step": 4235 }, { - "epoch": 0.12003740542378645, + "epoch": 0.16574066828390327, "grad_norm": 0.0, - "learning_rate": 1.9577904009451728e-05, - "loss": 1.0472, + "learning_rate": 1.9049209140424014e-05, + "loss": 1.222, "step": 4236 }, { - "epoch": 0.12006574286604892, + "epoch": 0.1657797949761327, "grad_norm": 0.0, - "learning_rate": 1.9577640134552546e-05, - "loss": 1.1091, + "learning_rate": 1.904866975984208e-05, + "loss": 1.2964, "step": 4237 }, { - "epoch": 0.12009408030831137, + "epoch": 0.16581892166836215, "grad_norm": 0.0, - "learning_rate": 1.957737617897734e-05, - "loss": 0.9411, + "learning_rate": 1.904813023394985e-05, + "loss": 1.3085, "step": 4238 }, { - "epoch": 0.12012241775057383, + "epoch": 0.1658580483605916, "grad_norm": 0.0, - "learning_rate": 1.957711214272834e-05, - "loss": 1.1331, + "learning_rate": 1.9047590562755984e-05, + "loss": 1.1964, "step": 4239 }, { - "epoch": 0.1201507551928363, + "epoch": 0.16589717505282103, "grad_norm": 0.0, - "learning_rate": 1.9576848025807772e-05, - "loss": 1.0207, + "learning_rate": 1.9047050746269155e-05, + "loss": 1.1931, "step": 4240 }, { - "epoch": 0.12017909263509875, + "epoch": 0.16593630174505047, "grad_norm": 0.0, - "learning_rate": 1.9576583828217854e-05, - "loss": 0.9914, + "learning_rate": 1.904651078449802e-05, + "loss": 1.1885, "step": 4241 }, { - "epoch": 0.12020743007736122, + "epoch": 0.16597542843727991, "grad_norm": 0.0, - "learning_rate": 1.9576319549960813e-05, - "loss": 0.953, + "learning_rate": 1.9045970677451264e-05, + "loss": 1.2438, "step": 4242 }, { - "epoch": 0.12023576751962368, + "epoch": 0.16601455512950936, "grad_norm": 0.0, - "learning_rate": 1.9576055191038877e-05, - "loss": 1.1521, + "learning_rate": 1.9045430425137553e-05, + "loss": 1.2953, "step": 4243 }, { - "epoch": 0.12026410496188614, + "epoch": 0.1660536818217388, "grad_norm": 0.0, - "learning_rate": 1.9575790751454274e-05, - "loss": 0.9651, + "learning_rate": 1.9044890027565563e-05, + "loss": 1.0874, "step": 4244 }, { - "epoch": 0.1202924424041486, + "epoch": 0.16609280851396824, "grad_norm": 0.0, - "learning_rate": 1.9575526231209224e-05, - "loss": 1.0564, + "learning_rate": 1.9044349484743976e-05, + "loss": 1.2646, "step": 4245 }, { - "epoch": 0.12032077984641107, + "epoch": 0.16613193520619768, "grad_norm": 0.0, - "learning_rate": 1.9575261630305966e-05, - "loss": 0.9615, + "learning_rate": 1.9043808796681467e-05, + "loss": 1.2135, "step": 4246 }, { - "epoch": 0.12034911728867352, + "epoch": 0.16617106189842712, "grad_norm": 0.0, - "learning_rate": 1.9574996948746724e-05, - "loss": 1.0353, + "learning_rate": 1.904326796338672e-05, + "loss": 1.2799, "step": 4247 }, { - "epoch": 0.12037745473093599, + "epoch": 0.16621018859065653, "grad_norm": 0.0, - "learning_rate": 1.9574732186533723e-05, - "loss": 1.0507, + "learning_rate": 1.9042726984868424e-05, + "loss": 1.1631, "step": 4248 }, { - "epoch": 0.12040579217319845, + "epoch": 0.16624931528288597, "grad_norm": 0.0, - "learning_rate": 1.9574467343669202e-05, - "loss": 0.9071, + "learning_rate": 1.904218586113526e-05, + "loss": 1.2445, "step": 4249 }, { - "epoch": 0.1204341296154609, + "epoch": 0.16628844197511541, "grad_norm": 0.0, - "learning_rate": 1.9574202420155384e-05, - "loss": 1.1193, + "learning_rate": 1.9041644592195924e-05, + "loss": 1.1387, "step": 4250 }, { - "epoch": 0.12046246705772337, + "epoch": 0.16632756866734486, "grad_norm": 0.0, - "learning_rate": 1.9573937415994506e-05, - "loss": 1.1124, + "learning_rate": 1.9041103178059107e-05, + "loss": 1.0436, "step": 4251 }, { - "epoch": 0.12049080449998584, + "epoch": 0.1663666953595743, "grad_norm": 0.0, - "learning_rate": 1.9573672331188797e-05, - "loss": 0.9905, + "learning_rate": 1.90405616187335e-05, + "loss": 1.1896, "step": 4252 }, { - "epoch": 0.12051914194224829, + "epoch": 0.16640582205180374, "grad_norm": 0.0, - "learning_rate": 1.957340716574049e-05, - "loss": 1.0326, + "learning_rate": 1.9040019914227803e-05, + "loss": 1.2952, "step": 4253 }, { - "epoch": 0.12054747938451076, + "epoch": 0.16644494874403318, "grad_norm": 0.0, - "learning_rate": 1.957314191965182e-05, - "loss": 1.0075, + "learning_rate": 1.9039478064550713e-05, + "loss": 1.1815, "step": 4254 }, { - "epoch": 0.12057581682677322, + "epoch": 0.16648407543626262, "grad_norm": 0.0, - "learning_rate": 1.957287659292502e-05, - "loss": 1.0006, + "learning_rate": 1.9038936069710934e-05, + "loss": 1.1314, "step": 4255 }, { - "epoch": 0.12060415426903567, + "epoch": 0.16652320212849206, "grad_norm": 0.0, - "learning_rate": 1.9572611185562323e-05, - "loss": 1.0345, + "learning_rate": 1.9038393929717168e-05, + "loss": 1.0931, "step": 4256 }, { - "epoch": 0.12063249171129814, + "epoch": 0.1665623288207215, "grad_norm": 0.0, - "learning_rate": 1.9572345697565973e-05, - "loss": 1.0372, + "learning_rate": 1.903785164457812e-05, + "loss": 1.1885, "step": 4257 }, { - "epoch": 0.1206608291535606, + "epoch": 0.16660145551295094, "grad_norm": 0.0, - "learning_rate": 1.95720801289382e-05, - "loss": 1.0657, + "learning_rate": 1.90373092143025e-05, + "loss": 1.0405, "step": 4258 }, { - "epoch": 0.12068916659582306, + "epoch": 0.16664058220518038, "grad_norm": 0.0, - "learning_rate": 1.9571814479681236e-05, - "loss": 1.0475, + "learning_rate": 1.903676663889902e-05, + "loss": 1.1608, "step": 4259 }, { - "epoch": 0.12071750403808552, + "epoch": 0.16667970889740982, "grad_norm": 0.0, - "learning_rate": 1.9571548749797328e-05, - "loss": 1.0995, + "learning_rate": 1.9036223918376386e-05, + "loss": 1.0435, "step": 4260 }, { - "epoch": 0.12074584148034799, + "epoch": 0.16671883558963926, "grad_norm": 0.0, - "learning_rate": 1.957128293928871e-05, - "loss": 1.0169, + "learning_rate": 1.9035681052743325e-05, + "loss": 1.1837, "step": 4261 }, { - "epoch": 0.12077417892261044, + "epoch": 0.1667579622818687, "grad_norm": 0.0, - "learning_rate": 1.9571017048157627e-05, - "loss": 1.0509, + "learning_rate": 1.9035138042008546e-05, + "loss": 1.1472, "step": 4262 }, { - "epoch": 0.12080251636487291, + "epoch": 0.16679708897409812, "grad_norm": 0.0, - "learning_rate": 1.9570751076406304e-05, - "loss": 0.99, + "learning_rate": 1.903459488618077e-05, + "loss": 1.2695, "step": 4263 }, { - "epoch": 0.12083085380713537, + "epoch": 0.16683621566632756, "grad_norm": 0.0, - "learning_rate": 1.9570485024036997e-05, - "loss": 1.0786, + "learning_rate": 1.9034051585268725e-05, + "loss": 1.2701, "step": 4264 }, { - "epoch": 0.12085919124939783, + "epoch": 0.166875342358557, "grad_norm": 0.0, - "learning_rate": 1.957021889105194e-05, - "loss": 1.0226, + "learning_rate": 1.903350813928113e-05, + "loss": 1.1107, "step": 4265 }, { - "epoch": 0.12088752869166029, + "epoch": 0.16691446905078644, "grad_norm": 0.0, - "learning_rate": 1.9569952677453373e-05, - "loss": 1.0514, + "learning_rate": 1.9032964548226714e-05, + "loss": 1.2721, "step": 4266 }, { - "epoch": 0.12091586613392276, + "epoch": 0.16695359574301588, "grad_norm": 0.0, - "learning_rate": 1.9569686383243542e-05, - "loss": 1.1209, + "learning_rate": 1.9032420812114206e-05, + "loss": 1.2367, "step": 4267 }, { - "epoch": 0.12094420357618521, + "epoch": 0.16699272243524532, "grad_norm": 0.0, - "learning_rate": 1.956942000842469e-05, - "loss": 1.0201, + "learning_rate": 1.9031876930952338e-05, + "loss": 1.2262, "step": 4268 }, { - "epoch": 0.12097254101844768, + "epoch": 0.16703184912747476, "grad_norm": 0.0, - "learning_rate": 1.9569153552999057e-05, - "loss": 1.0098, + "learning_rate": 1.9031332904749843e-05, + "loss": 1.165, "step": 4269 }, { - "epoch": 0.12100087846071014, + "epoch": 0.1670709758197042, "grad_norm": 0.0, - "learning_rate": 1.9568887016968895e-05, - "loss": 1.0786, + "learning_rate": 1.9030788733515463e-05, + "loss": 1.1967, "step": 4270 }, { - "epoch": 0.1210292159029726, + "epoch": 0.16711010251193364, "grad_norm": 0.0, - "learning_rate": 1.9568620400336437e-05, - "loss": 1.0023, + "learning_rate": 1.9030244417257927e-05, + "loss": 1.1743, "step": 4271 }, { - "epoch": 0.12105755334523506, + "epoch": 0.16714922920416309, "grad_norm": 0.0, - "learning_rate": 1.9568353703103945e-05, - "loss": 0.9704, + "learning_rate": 1.9029699955985985e-05, + "loss": 1.2125, "step": 4272 }, { - "epoch": 0.12108589078749753, + "epoch": 0.16718835589639253, "grad_norm": 0.0, - "learning_rate": 1.956808692527365e-05, - "loss": 0.966, + "learning_rate": 1.902915534970837e-05, + "loss": 1.1526, "step": 4273 }, { - "epoch": 0.12111422822975998, + "epoch": 0.16722748258862197, "grad_norm": 0.0, - "learning_rate": 1.956782006684781e-05, - "loss": 1.0891, + "learning_rate": 1.9028610598433843e-05, + "loss": 1.2156, "step": 4274 }, { - "epoch": 0.12114256567202245, + "epoch": 0.1672666092808514, "grad_norm": 0.0, - "learning_rate": 1.9567553127828663e-05, - "loss": 1.034, + "learning_rate": 1.9028065702171136e-05, + "loss": 1.1783, "step": 4275 }, { - "epoch": 0.12117090311428491, + "epoch": 0.16730573597308085, "grad_norm": 0.0, - "learning_rate": 1.9567286108218467e-05, - "loss": 1.1286, + "learning_rate": 1.9027520660929013e-05, + "loss": 1.1661, "step": 4276 }, { - "epoch": 0.12119924055654736, + "epoch": 0.16734486266531026, "grad_norm": 0.0, - "learning_rate": 1.9567019008019467e-05, - "loss": 1.0532, + "learning_rate": 1.9026975474716215e-05, + "loss": 1.1166, "step": 4277 }, { - "epoch": 0.12122757799880983, + "epoch": 0.1673839893575397, "grad_norm": 0.0, - "learning_rate": 1.9566751827233914e-05, - "loss": 1.0286, + "learning_rate": 1.9026430143541503e-05, + "loss": 1.1608, "step": 4278 }, { - "epoch": 0.1212559154410723, + "epoch": 0.16742311604976914, "grad_norm": 0.0, - "learning_rate": 1.9566484565864056e-05, - "loss": 1.0945, + "learning_rate": 1.9025884667413637e-05, + "loss": 1.0762, "step": 4279 }, { - "epoch": 0.12128425288333475, + "epoch": 0.16746224274199858, "grad_norm": 0.0, - "learning_rate": 1.956621722391215e-05, - "loss": 1.006, + "learning_rate": 1.902533904634137e-05, + "loss": 1.1051, "step": 4280 }, { - "epoch": 0.12131259032559721, + "epoch": 0.16750136943422803, "grad_norm": 0.0, - "learning_rate": 1.9565949801380442e-05, - "loss": 1.1116, + "learning_rate": 1.9024793280333467e-05, + "loss": 1.199, "step": 4281 }, { - "epoch": 0.12134092776785968, + "epoch": 0.16754049612645747, "grad_norm": 0.0, - "learning_rate": 1.9565682298271186e-05, - "loss": 1.1235, + "learning_rate": 1.9024247369398697e-05, + "loss": 1.151, "step": 4282 }, { - "epoch": 0.12136926521012213, + "epoch": 0.1675796228186869, "grad_norm": 0.0, - "learning_rate": 1.9565414714586636e-05, - "loss": 1.1007, + "learning_rate": 1.902370131354582e-05, + "loss": 1.1334, "step": 4283 }, { - "epoch": 0.1213976026523846, + "epoch": 0.16761874951091635, "grad_norm": 0.0, - "learning_rate": 1.9565147050329046e-05, - "loss": 1.133, + "learning_rate": 1.9023155112783607e-05, + "loss": 1.303, "step": 4284 }, { - "epoch": 0.12142594009464706, + "epoch": 0.1676578762031458, "grad_norm": 0.0, - "learning_rate": 1.956487930550067e-05, - "loss": 1.0222, + "learning_rate": 1.902260876712083e-05, + "loss": 1.3567, "step": 4285 }, { - "epoch": 0.12145427753690952, + "epoch": 0.16769700289537523, "grad_norm": 0.0, - "learning_rate": 1.9564611480103762e-05, - "loss": 0.9623, + "learning_rate": 1.902206227656626e-05, + "loss": 1.2795, "step": 4286 }, { - "epoch": 0.12148261497917198, + "epoch": 0.16773612958760467, "grad_norm": 0.0, - "learning_rate": 1.9564343574140585e-05, - "loss": 1.0948, + "learning_rate": 1.9021515641128676e-05, + "loss": 1.1378, "step": 4287 }, { - "epoch": 0.12151095242143443, + "epoch": 0.1677752562798341, "grad_norm": 0.0, - "learning_rate": 1.9564075587613386e-05, - "loss": 1.1006, + "learning_rate": 1.902096886081686e-05, + "loss": 1.1225, "step": 4288 }, { - "epoch": 0.1215392898636969, + "epoch": 0.16781438297206355, "grad_norm": 0.0, - "learning_rate": 1.9563807520524426e-05, - "loss": 1.0616, + "learning_rate": 1.9020421935639582e-05, + "loss": 1.1781, "step": 4289 }, { - "epoch": 0.12156762730595937, + "epoch": 0.167853509664293, "grad_norm": 0.0, - "learning_rate": 1.9563539372875965e-05, - "loss": 1.0571, + "learning_rate": 1.901987486560563e-05, + "loss": 1.1588, "step": 4290 }, { - "epoch": 0.12159596474822182, + "epoch": 0.1678926363565224, "grad_norm": 0.0, - "learning_rate": 1.9563271144670263e-05, - "loss": 1.0563, + "learning_rate": 1.9019327650723795e-05, + "loss": 1.2261, "step": 4291 }, { - "epoch": 0.12162430219048428, + "epoch": 0.16793176304875185, "grad_norm": 0.0, - "learning_rate": 1.9563002835909576e-05, - "loss": 1.092, + "learning_rate": 1.901878029100286e-05, + "loss": 1.2667, "step": 4292 }, { - "epoch": 0.12165263963274675, + "epoch": 0.1679708897409813, "grad_norm": 0.0, - "learning_rate": 1.9562734446596164e-05, - "loss": 1.0033, + "learning_rate": 1.901823278645161e-05, + "loss": 1.2495, "step": 4293 }, { - "epoch": 0.1216809770750092, + "epoch": 0.16801001643321073, "grad_norm": 0.0, - "learning_rate": 1.9562465976732288e-05, - "loss": 1.0633, + "learning_rate": 1.9017685137078847e-05, + "loss": 1.064, "step": 4294 }, { - "epoch": 0.12170931451727167, + "epoch": 0.16804914312544017, "grad_norm": 0.0, - "learning_rate": 1.956219742632021e-05, - "loss": 1.0688, + "learning_rate": 1.9017137342893356e-05, + "loss": 1.1983, "step": 4295 }, { - "epoch": 0.12173765195953413, + "epoch": 0.1680882698176696, "grad_norm": 0.0, - "learning_rate": 1.9561928795362192e-05, - "loss": 0.9957, + "learning_rate": 1.901658940390394e-05, + "loss": 1.1038, "step": 4296 }, { - "epoch": 0.12176598940179659, + "epoch": 0.16812739650989905, "grad_norm": 0.0, - "learning_rate": 1.95616600838605e-05, - "loss": 0.9962, + "learning_rate": 1.9016041320119397e-05, + "loss": 1.0071, "step": 4297 }, { - "epoch": 0.12179432684405905, + "epoch": 0.1681665232021285, "grad_norm": 0.0, - "learning_rate": 1.9561391291817393e-05, - "loss": 1.0172, + "learning_rate": 1.9015493091548526e-05, + "loss": 1.1726, "step": 4298 }, { - "epoch": 0.12182266428632152, + "epoch": 0.16820564989435793, "grad_norm": 0.0, - "learning_rate": 1.9561122419235137e-05, - "loss": 1.0538, + "learning_rate": 1.9014944718200137e-05, + "loss": 1.3197, "step": 4299 }, { - "epoch": 0.12185100172858397, + "epoch": 0.16824477658658737, "grad_norm": 0.0, - "learning_rate": 1.9560853466115996e-05, - "loss": 1.0224, + "learning_rate": 1.901439620008303e-05, + "loss": 1.1791, "step": 4300 }, { - "epoch": 0.12187933917084644, + "epoch": 0.16828390327881682, "grad_norm": 0.0, - "learning_rate": 1.9560584432462236e-05, - "loss": 0.9797, + "learning_rate": 1.9013847537206014e-05, + "loss": 1.2303, "step": 4301 }, { - "epoch": 0.1219076766131089, + "epoch": 0.16832302997104626, "grad_norm": 0.0, - "learning_rate": 1.956031531827612e-05, - "loss": 1.0812, + "learning_rate": 1.9013298729577905e-05, + "loss": 1.1153, "step": 4302 }, { - "epoch": 0.12193601405537136, + "epoch": 0.1683621566632757, "grad_norm": 0.0, - "learning_rate": 1.9560046123559923e-05, - "loss": 1.0459, + "learning_rate": 1.901274977720751e-05, + "loss": 1.1785, "step": 4303 }, { - "epoch": 0.12196435149763382, + "epoch": 0.16840128335550514, "grad_norm": 0.0, - "learning_rate": 1.9559776848315907e-05, - "loss": 0.958, + "learning_rate": 1.901220068010365e-05, + "loss": 1.3015, "step": 4304 }, { - "epoch": 0.12199268893989629, + "epoch": 0.16844041004773455, "grad_norm": 0.0, - "learning_rate": 1.9559507492546336e-05, - "loss": 1.0086, + "learning_rate": 1.9011651438275137e-05, + "loss": 1.2987, "step": 4305 }, { - "epoch": 0.12202102638215874, + "epoch": 0.168479536739964, "grad_norm": 0.0, - "learning_rate": 1.9559238056253485e-05, - "loss": 1.114, + "learning_rate": 1.9011102051730796e-05, + "loss": 1.0646, "step": 4306 }, { - "epoch": 0.1220493638244212, + "epoch": 0.16851866343219343, "grad_norm": 0.0, - "learning_rate": 1.9558968539439627e-05, - "loss": 1.0049, + "learning_rate": 1.9010552520479448e-05, + "loss": 1.1924, "step": 4307 }, { - "epoch": 0.12207770126668367, + "epoch": 0.16855779012442287, "grad_norm": 0.0, - "learning_rate": 1.9558698942107023e-05, - "loss": 1.0605, + "learning_rate": 1.901000284452992e-05, + "loss": 1.2032, "step": 4308 }, { - "epoch": 0.12210603870894612, + "epoch": 0.16859691681665231, "grad_norm": 0.0, - "learning_rate": 1.955842926425795e-05, - "loss": 1.1237, + "learning_rate": 1.9009453023891028e-05, + "loss": 1.1331, "step": 4309 }, { - "epoch": 0.12213437615120859, + "epoch": 0.16863604350888176, "grad_norm": 0.0, - "learning_rate": 1.9558159505894676e-05, - "loss": 0.9602, + "learning_rate": 1.9008903058571615e-05, + "loss": 1.2094, "step": 4310 }, { - "epoch": 0.12216271359347106, + "epoch": 0.1686751702011112, "grad_norm": 0.0, - "learning_rate": 1.9557889667019477e-05, - "loss": 1.076, + "learning_rate": 1.900835294858051e-05, + "loss": 1.2139, "step": 4311 }, { - "epoch": 0.12219105103573351, + "epoch": 0.16871429689334064, "grad_norm": 0.0, - "learning_rate": 1.9557619747634623e-05, - "loss": 1.153, + "learning_rate": 1.900780269392654e-05, + "loss": 1.1286, "step": 4312 }, { - "epoch": 0.12221938847799597, + "epoch": 0.16875342358557008, "grad_norm": 0.0, - "learning_rate": 1.955734974774239e-05, - "loss": 1.2156, + "learning_rate": 1.900725229461855e-05, + "loss": 1.2438, "step": 4313 }, { - "epoch": 0.12224772592025844, + "epoch": 0.16879255027779952, "grad_norm": 0.0, - "learning_rate": 1.955707966734505e-05, - "loss": 0.9401, + "learning_rate": 1.9006701750665373e-05, + "loss": 1.2408, "step": 4314 }, { - "epoch": 0.12227606336252089, + "epoch": 0.16883167697002896, "grad_norm": 0.0, - "learning_rate": 1.955680950644488e-05, - "loss": 1.04, + "learning_rate": 1.9006151062075853e-05, + "loss": 1.1401, "step": 4315 }, { - "epoch": 0.12230440080478336, + "epoch": 0.1688708036622584, "grad_norm": 0.0, - "learning_rate": 1.9556539265044153e-05, - "loss": 0.9478, + "learning_rate": 1.9005600228858832e-05, + "loss": 1.2613, "step": 4316 }, { - "epoch": 0.12233273824704582, + "epoch": 0.16890993035448784, "grad_norm": 0.0, - "learning_rate": 1.9556268943145146e-05, - "loss": 1.1146, + "learning_rate": 1.9005049251023156e-05, + "loss": 1.143, "step": 4317 }, { - "epoch": 0.12236107568930828, + "epoch": 0.16894905704671728, "grad_norm": 0.0, - "learning_rate": 1.955599854075014e-05, - "loss": 1.1389, + "learning_rate": 1.9004498128577672e-05, + "loss": 1.2166, "step": 4318 }, { - "epoch": 0.12238941313157074, + "epoch": 0.1689881837389467, "grad_norm": 0.0, - "learning_rate": 1.955572805786141e-05, - "loss": 1.0105, + "learning_rate": 1.900394686153123e-05, + "loss": 1.141, "step": 4319 }, { - "epoch": 0.12241775057383321, + "epoch": 0.16902731043117614, "grad_norm": 0.0, - "learning_rate": 1.955545749448123e-05, - "loss": 1.059, + "learning_rate": 1.9003395449892685e-05, + "loss": 1.0888, "step": 4320 }, { - "epoch": 0.12244608801609566, + "epoch": 0.16906643712340558, "grad_norm": 0.0, - "learning_rate": 1.955518685061189e-05, - "loss": 1.0322, + "learning_rate": 1.900284389367089e-05, + "loss": 1.2271, "step": 4321 }, { - "epoch": 0.12247442545835813, + "epoch": 0.16910556381563502, "grad_norm": 0.0, - "learning_rate": 1.9554916126255657e-05, - "loss": 1.1997, + "learning_rate": 1.900229219287471e-05, + "loss": 1.1249, "step": 4322 }, { - "epoch": 0.1225027629006206, + "epoch": 0.16914469050786446, "grad_norm": 0.0, - "learning_rate": 1.9554645321414818e-05, - "loss": 0.9837, + "learning_rate": 1.900174034751299e-05, + "loss": 1.1157, "step": 4323 }, { - "epoch": 0.12253110034288305, + "epoch": 0.1691838172000939, "grad_norm": 0.0, - "learning_rate": 1.9554374436091653e-05, - "loss": 1.0022, + "learning_rate": 1.9001188357594604e-05, + "loss": 1.22, "step": 4324 }, { - "epoch": 0.12255943778514551, + "epoch": 0.16922294389232334, "grad_norm": 0.0, - "learning_rate": 1.9554103470288452e-05, - "loss": 1.0347, + "learning_rate": 1.900063622312841e-05, + "loss": 1.2192, "step": 4325 }, { - "epoch": 0.12258777522740798, + "epoch": 0.16926207058455278, "grad_norm": 0.0, - "learning_rate": 1.9553832424007478e-05, - "loss": 1.1321, + "learning_rate": 1.9000083944123277e-05, + "loss": 1.1087, "step": 4326 }, { - "epoch": 0.12261611266967043, + "epoch": 0.16930119727678222, "grad_norm": 0.0, - "learning_rate": 1.9553561297251033e-05, - "loss": 0.9622, + "learning_rate": 1.8999531520588074e-05, + "loss": 1.1699, "step": 4327 }, { - "epoch": 0.1226444501119329, + "epoch": 0.16934032396901166, "grad_norm": 0.0, - "learning_rate": 1.9553290090021392e-05, - "loss": 1.1105, + "learning_rate": 1.8998978952531672e-05, + "loss": 1.2156, "step": 4328 }, { - "epoch": 0.12267278755419536, + "epoch": 0.1693794506612411, "grad_norm": 0.0, - "learning_rate": 1.9553018802320843e-05, - "loss": 1.1274, + "learning_rate": 1.8998426239962945e-05, + "loss": 1.0555, "step": 4329 }, { - "epoch": 0.12270112499645781, + "epoch": 0.16941857735347055, "grad_norm": 0.0, - "learning_rate": 1.9552747434151665e-05, - "loss": 1.0273, + "learning_rate": 1.8997873382890767e-05, + "loss": 1.1249, "step": 4330 }, { - "epoch": 0.12272946243872028, + "epoch": 0.1694577040457, "grad_norm": 0.0, - "learning_rate": 1.955247598551615e-05, - "loss": 1.0858, + "learning_rate": 1.8997320381324018e-05, + "loss": 1.1191, "step": 4331 }, { - "epoch": 0.12275779988098275, + "epoch": 0.16949683073792943, "grad_norm": 0.0, - "learning_rate": 1.9552204456416585e-05, - "loss": 0.9539, + "learning_rate": 1.8996767235271577e-05, + "loss": 1.0623, "step": 4332 }, { - "epoch": 0.1227861373232452, + "epoch": 0.16953595743015887, "grad_norm": 0.0, - "learning_rate": 1.955193284685525e-05, - "loss": 0.9835, + "learning_rate": 1.899621394474233e-05, + "loss": 1.0843, "step": 4333 }, { - "epoch": 0.12281447476550766, + "epoch": 0.16957508412238828, "grad_norm": 0.0, - "learning_rate": 1.955166115683444e-05, - "loss": 1.1348, + "learning_rate": 1.899566050974516e-05, + "loss": 1.0079, "step": 4334 }, { - "epoch": 0.12284281220777013, + "epoch": 0.16961421081461772, "grad_norm": 0.0, - "learning_rate": 1.9551389386356444e-05, - "loss": 0.9284, + "learning_rate": 1.899510693028895e-05, + "loss": 1.2097, "step": 4335 }, { - "epoch": 0.12287114965003258, + "epoch": 0.16965333750684716, "grad_norm": 0.0, - "learning_rate": 1.9551117535423546e-05, - "loss": 1.1009, + "learning_rate": 1.8994553206382597e-05, + "loss": 1.0765, "step": 4336 }, { - "epoch": 0.12289948709229505, + "epoch": 0.1696924641990766, "grad_norm": 0.0, - "learning_rate": 1.9550845604038035e-05, - "loss": 0.9857, + "learning_rate": 1.899399933803499e-05, + "loss": 1.1555, "step": 4337 }, { - "epoch": 0.12292782453455751, + "epoch": 0.16973159089130604, "grad_norm": 0.0, - "learning_rate": 1.955057359220221e-05, - "loss": 1.0808, + "learning_rate": 1.8993445325255027e-05, + "loss": 1.2379, "step": 4338 }, { - "epoch": 0.12295616197681997, + "epoch": 0.16977071758353549, "grad_norm": 0.0, - "learning_rate": 1.9550301499918355e-05, - "loss": 1.1275, + "learning_rate": 1.89928911680516e-05, + "loss": 1.1472, "step": 4339 }, { - "epoch": 0.12298449941908243, + "epoch": 0.16980984427576493, "grad_norm": 0.0, - "learning_rate": 1.9550029327188763e-05, - "loss": 1.0529, + "learning_rate": 1.8992336866433606e-05, + "loss": 1.2579, "step": 4340 }, { - "epoch": 0.1230128368613449, + "epoch": 0.16984897096799437, "grad_norm": 0.0, - "learning_rate": 1.9549757074015727e-05, - "loss": 1.0574, + "learning_rate": 1.899178242040995e-05, + "loss": 1.1705, "step": 4341 }, { - "epoch": 0.12304117430360735, + "epoch": 0.1698880976602238, "grad_norm": 0.0, - "learning_rate": 1.9549484740401544e-05, - "loss": 1.0168, + "learning_rate": 1.899122782998954e-05, + "loss": 1.1254, "step": 4342 }, { - "epoch": 0.12306951174586982, + "epoch": 0.16992722435245325, "grad_norm": 0.0, - "learning_rate": 1.9549212326348505e-05, - "loss": 1.0681, + "learning_rate": 1.8990673095181276e-05, + "loss": 1.2457, "step": 4343 }, { - "epoch": 0.12309784918813228, + "epoch": 0.1699663510446827, "grad_norm": 0.0, - "learning_rate": 1.95489398318589e-05, - "loss": 1.1624, + "learning_rate": 1.8990118215994068e-05, + "loss": 1.1609, "step": 4344 }, { - "epoch": 0.12312618663039473, + "epoch": 0.17000547773691213, "grad_norm": 0.0, - "learning_rate": 1.954866725693503e-05, - "loss": 0.9582, + "learning_rate": 1.8989563192436826e-05, + "loss": 1.1854, "step": 4345 }, { - "epoch": 0.1231545240726572, + "epoch": 0.17004460442914157, "grad_norm": 0.0, - "learning_rate": 1.9548394601579192e-05, - "loss": 1.0088, + "learning_rate": 1.8989008024518462e-05, + "loss": 1.1246, "step": 4346 }, { - "epoch": 0.12318286151491967, + "epoch": 0.170083731121371, "grad_norm": 0.0, - "learning_rate": 1.954812186579368e-05, - "loss": 0.9567, + "learning_rate": 1.8988452712247896e-05, + "loss": 1.2095, "step": 4347 }, { - "epoch": 0.12321119895718212, + "epoch": 0.17012285781360043, "grad_norm": 0.0, - "learning_rate": 1.9547849049580792e-05, - "loss": 1.0817, + "learning_rate": 1.8987897255634042e-05, + "loss": 1.1484, "step": 4348 }, { - "epoch": 0.12323953639944459, + "epoch": 0.17016198450582987, "grad_norm": 0.0, - "learning_rate": 1.954757615294283e-05, - "loss": 1.0155, + "learning_rate": 1.898734165468582e-05, + "loss": 1.2528, "step": 4349 }, { - "epoch": 0.12326787384170705, + "epoch": 0.1702011111980593, "grad_norm": 0.0, - "learning_rate": 1.9547303175882085e-05, - "loss": 1.0637, + "learning_rate": 1.898678590941215e-05, + "loss": 1.2349, "step": 4350 }, { - "epoch": 0.1232962112839695, + "epoch": 0.17024023789028875, "grad_norm": 0.0, - "learning_rate": 1.9547030118400857e-05, - "loss": 1.1226, + "learning_rate": 1.8986230019821963e-05, + "loss": 1.2042, "step": 4351 }, { - "epoch": 0.12332454872623197, + "epoch": 0.1702793645825182, "grad_norm": 0.0, - "learning_rate": 1.9546756980501454e-05, - "loss": 1.0872, + "learning_rate": 1.8985673985924185e-05, + "loss": 1.0508, "step": 4352 }, { - "epoch": 0.12335288616849444, + "epoch": 0.17031849127474763, "grad_norm": 0.0, - "learning_rate": 1.954648376218617e-05, - "loss": 1.0201, + "learning_rate": 1.8985117807727737e-05, + "loss": 1.316, "step": 4353 }, { - "epoch": 0.12338122361075689, + "epoch": 0.17035761796697707, "grad_norm": 0.0, - "learning_rate": 1.9546210463457306e-05, - "loss": 1.0634, + "learning_rate": 1.8984561485241558e-05, + "loss": 1.2086, "step": 4354 }, { - "epoch": 0.12340956105301935, + "epoch": 0.1703967446592065, "grad_norm": 0.0, - "learning_rate": 1.954593708431717e-05, - "loss": 0.9336, + "learning_rate": 1.898400501847458e-05, + "loss": 1.1855, "step": 4355 }, { - "epoch": 0.12343789849528182, + "epoch": 0.17043587135143595, "grad_norm": 0.0, - "learning_rate": 1.954566362476806e-05, - "loss": 0.9696, + "learning_rate": 1.898344840743574e-05, + "loss": 1.1801, "step": 4356 }, { - "epoch": 0.12346623593754427, + "epoch": 0.1704749980436654, "grad_norm": 0.0, - "learning_rate": 1.9545390084812278e-05, - "loss": 1.1631, + "learning_rate": 1.8982891652133976e-05, + "loss": 1.1839, "step": 4357 }, { - "epoch": 0.12349457337980674, + "epoch": 0.17051412473589483, "grad_norm": 0.0, - "learning_rate": 1.9545116464452133e-05, - "loss": 1.0892, + "learning_rate": 1.8982334752578226e-05, + "loss": 1.1744, "step": 4358 }, { - "epoch": 0.1235229108220692, + "epoch": 0.17055325142812428, "grad_norm": 0.0, - "learning_rate": 1.9544842763689928e-05, - "loss": 1.0468, + "learning_rate": 1.8981777708777437e-05, + "loss": 1.2429, "step": 4359 }, { - "epoch": 0.12355124826433166, + "epoch": 0.17059237812035372, "grad_norm": 0.0, - "learning_rate": 1.9544568982527968e-05, - "loss": 1.0581, + "learning_rate": 1.898122052074055e-05, + "loss": 1.1681, "step": 4360 }, { - "epoch": 0.12357958570659412, + "epoch": 0.17063150481258316, "grad_norm": 0.0, - "learning_rate": 1.9544295120968558e-05, - "loss": 1.053, + "learning_rate": 1.8980663188476518e-05, + "loss": 1.0998, "step": 4361 }, { - "epoch": 0.12360792314885659, + "epoch": 0.17067063150481257, "grad_norm": 0.0, - "learning_rate": 1.954402117901401e-05, - "loss": 0.9168, + "learning_rate": 1.8980105711994288e-05, + "loss": 1.0024, "step": 4362 }, { - "epoch": 0.12363626059111904, + "epoch": 0.170709758197042, "grad_norm": 0.0, - "learning_rate": 1.954374715666662e-05, - "loss": 1.072, + "learning_rate": 1.8979548091302814e-05, + "loss": 1.2431, "step": 4363 }, { - "epoch": 0.1236645980333815, + "epoch": 0.17074888488927145, "grad_norm": 0.0, - "learning_rate": 1.954347305392871e-05, - "loss": 0.9448, + "learning_rate": 1.8978990326411046e-05, + "loss": 1.2228, "step": 4364 }, { - "epoch": 0.12369293547564397, + "epoch": 0.1707880115815009, "grad_norm": 0.0, - "learning_rate": 1.954319887080258e-05, - "loss": 1.1364, + "learning_rate": 1.8978432417327948e-05, + "loss": 1.2422, "step": 4365 }, { - "epoch": 0.12372127291790642, + "epoch": 0.17082713827373033, "grad_norm": 0.0, - "learning_rate": 1.9542924607290542e-05, - "loss": 0.9094, + "learning_rate": 1.8977874364062474e-05, + "loss": 0.9973, "step": 4366 }, { - "epoch": 0.12374961036016889, + "epoch": 0.17086626496595977, "grad_norm": 0.0, - "learning_rate": 1.9542650263394905e-05, - "loss": 1.0017, + "learning_rate": 1.897731616662359e-05, + "loss": 1.2368, "step": 4367 }, { - "epoch": 0.12377794780243136, + "epoch": 0.17090539165818922, "grad_norm": 0.0, - "learning_rate": 1.9542375839117986e-05, - "loss": 1.0598, + "learning_rate": 1.8976757825020255e-05, + "loss": 1.1647, "step": 4368 }, { - "epoch": 0.12380628524469381, + "epoch": 0.17094451835041866, "grad_norm": 0.0, - "learning_rate": 1.954210133446209e-05, - "loss": 0.9596, + "learning_rate": 1.897619933926144e-05, + "loss": 1.2939, "step": 4369 }, { - "epoch": 0.12383462268695627, + "epoch": 0.1709836450426481, "grad_norm": 0.0, - "learning_rate": 1.9541826749429528e-05, - "loss": 1.0294, + "learning_rate": 1.897564070935611e-05, + "loss": 1.1894, "step": 4370 }, { - "epoch": 0.12386296012921874, + "epoch": 0.17102277173487754, "grad_norm": 0.0, - "learning_rate": 1.954155208402262e-05, - "loss": 1.0665, + "learning_rate": 1.897508193531324e-05, + "loss": 1.1304, "step": 4371 }, { - "epoch": 0.1238912975714812, + "epoch": 0.17106189842710698, "grad_norm": 0.0, - "learning_rate": 1.9541277338243674e-05, - "loss": 1.0164, + "learning_rate": 1.8974523017141798e-05, + "loss": 1.2377, "step": 4372 }, { - "epoch": 0.12391963501374366, + "epoch": 0.17110102511933642, "grad_norm": 0.0, - "learning_rate": 1.9541002512095005e-05, - "loss": 0.9555, + "learning_rate": 1.8973963954850762e-05, + "loss": 1.1261, "step": 4373 }, { - "epoch": 0.12394797245600613, + "epoch": 0.17114015181156586, "grad_norm": 0.0, - "learning_rate": 1.954072760557893e-05, - "loss": 1.0597, + "learning_rate": 1.8973404748449108e-05, + "loss": 1.1989, "step": 4374 }, { - "epoch": 0.12397630989826858, + "epoch": 0.1711792785037953, "grad_norm": 0.0, - "learning_rate": 1.9540452618697763e-05, - "loss": 1.0051, + "learning_rate": 1.897284539794582e-05, + "loss": 1.1217, "step": 4375 }, { - "epoch": 0.12400464734053104, + "epoch": 0.17121840519602471, "grad_norm": 0.0, - "learning_rate": 1.954017755145382e-05, - "loss": 1.0593, + "learning_rate": 1.8972285903349885e-05, + "loss": 1.2041, "step": 4376 }, { - "epoch": 0.12403298478279351, + "epoch": 0.17125753188825416, "grad_norm": 0.0, - "learning_rate": 1.953990240384942e-05, - "loss": 0.9648, + "learning_rate": 1.8971726264670272e-05, + "loss": 1.2497, "step": 4377 }, { - "epoch": 0.12406132222505596, + "epoch": 0.1712966585804836, "grad_norm": 0.0, - "learning_rate": 1.953962717588688e-05, - "loss": 0.9456, + "learning_rate": 1.8971166481915983e-05, + "loss": 1.1281, "step": 4378 }, { - "epoch": 0.12408965966731843, + "epoch": 0.17133578527271304, "grad_norm": 0.0, - "learning_rate": 1.9539351867568516e-05, - "loss": 1.0707, + "learning_rate": 1.8970606555096e-05, + "loss": 1.2095, "step": 4379 }, { - "epoch": 0.1241179971095809, + "epoch": 0.17137491196494248, "grad_norm": 0.0, - "learning_rate": 1.953907647889665e-05, - "loss": 0.9787, + "learning_rate": 1.8970046484219312e-05, + "loss": 1.0728, "step": 4380 }, { - "epoch": 0.12414633455184335, + "epoch": 0.17141403865717192, "grad_norm": 0.0, - "learning_rate": 1.95388010098736e-05, - "loss": 1.1275, + "learning_rate": 1.8969486269294922e-05, + "loss": 1.1997, "step": 4381 }, { - "epoch": 0.12417467199410581, + "epoch": 0.17145316534940136, "grad_norm": 0.0, - "learning_rate": 1.9538525460501687e-05, - "loss": 1.0367, + "learning_rate": 1.8968925910331823e-05, + "loss": 1.2479, "step": 4382 }, { - "epoch": 0.12420300943636828, + "epoch": 0.1714922920416308, "grad_norm": 0.0, - "learning_rate": 1.9538249830783233e-05, - "loss": 1.0437, + "learning_rate": 1.896836540733901e-05, + "loss": 1.0992, "step": 4383 }, { - "epoch": 0.12423134687863073, + "epoch": 0.17153141873386024, "grad_norm": 0.0, - "learning_rate": 1.953797412072056e-05, - "loss": 1.0373, + "learning_rate": 1.8967804760325487e-05, + "loss": 1.2778, "step": 4384 }, { - "epoch": 0.1242596843208932, + "epoch": 0.17157054542608968, "grad_norm": 0.0, - "learning_rate": 1.9537698330315987e-05, - "loss": 1.1073, + "learning_rate": 1.8967243969300257e-05, + "loss": 0.9611, "step": 4385 }, { - "epoch": 0.12428802176315566, + "epoch": 0.17160967211831912, "grad_norm": 0.0, - "learning_rate": 1.953742245957184e-05, - "loss": 0.9616, + "learning_rate": 1.8966683034272328e-05, + "loss": 1.2111, "step": 4386 }, { - "epoch": 0.12431635920541811, + "epoch": 0.17164879881054856, "grad_norm": 0.0, - "learning_rate": 1.953714650849044e-05, - "loss": 0.9033, + "learning_rate": 1.8966121955250702e-05, + "loss": 1.1711, "step": 4387 }, { - "epoch": 0.12434469664768058, + "epoch": 0.171687925502778, "grad_norm": 0.0, - "learning_rate": 1.9536870477074113e-05, - "loss": 1.0858, + "learning_rate": 1.8965560732244395e-05, + "loss": 1.121, "step": 4388 }, { - "epoch": 0.12437303408994305, + "epoch": 0.17172705219500745, "grad_norm": 0.0, - "learning_rate": 1.953659436532519e-05, - "loss": 0.9793, + "learning_rate": 1.8964999365262416e-05, + "loss": 1.1506, "step": 4389 }, { - "epoch": 0.1244013715322055, + "epoch": 0.1717661788872369, "grad_norm": 0.0, - "learning_rate": 1.9536318173245987e-05, - "loss": 1.0472, + "learning_rate": 1.896443785431378e-05, + "loss": 1.1005, "step": 4390 }, { - "epoch": 0.12442970897446796, + "epoch": 0.1718053055794663, "grad_norm": 0.0, - "learning_rate": 1.9536041900838834e-05, - "loss": 1.1064, + "learning_rate": 1.8963876199407508e-05, + "loss": 1.2621, "step": 4391 }, { - "epoch": 0.12445804641673043, + "epoch": 0.17184443227169574, "grad_norm": 0.0, - "learning_rate": 1.9535765548106063e-05, - "loss": 1.0766, + "learning_rate": 1.896331440055261e-05, + "loss": 1.2086, "step": 4392 }, { - "epoch": 0.12448638385899288, + "epoch": 0.17188355896392518, "grad_norm": 0.0, - "learning_rate": 1.9535489115049994e-05, - "loss": 1.0405, + "learning_rate": 1.896275245775812e-05, + "loss": 1.1672, "step": 4393 }, { - "epoch": 0.12451472130125535, + "epoch": 0.17192268565615462, "grad_norm": 0.0, - "learning_rate": 1.9535212601672963e-05, - "loss": 0.9424, + "learning_rate": 1.8962190371033057e-05, + "loss": 1.1758, "step": 4394 }, { - "epoch": 0.12454305874351781, + "epoch": 0.17196181234838406, "grad_norm": 0.0, - "learning_rate": 1.9534936007977292e-05, - "loss": 1.1571, + "learning_rate": 1.8961628140386446e-05, + "loss": 1.0793, "step": 4395 }, { - "epoch": 0.12457139618578027, + "epoch": 0.1720009390406135, "grad_norm": 0.0, - "learning_rate": 1.9534659333965317e-05, - "loss": 0.9901, + "learning_rate": 1.8961065765827317e-05, + "loss": 1.1247, "step": 4396 }, { - "epoch": 0.12459973362804273, + "epoch": 0.17204006573284295, "grad_norm": 0.0, - "learning_rate": 1.9534382579639365e-05, - "loss": 1.0647, + "learning_rate": 1.8960503247364697e-05, + "loss": 1.1357, "step": 4397 }, { - "epoch": 0.1246280710703052, + "epoch": 0.17207919242507239, "grad_norm": 0.0, - "learning_rate": 1.9534105745001764e-05, - "loss": 1.1598, + "learning_rate": 1.8959940585007626e-05, + "loss": 1.0712, "step": 4398 }, { - "epoch": 0.12465640851256765, + "epoch": 0.17211831911730183, "grad_norm": 0.0, - "learning_rate": 1.9533828830054852e-05, - "loss": 1.0691, + "learning_rate": 1.8959377778765135e-05, + "loss": 1.0366, "step": 4399 }, { - "epoch": 0.12468474595483012, + "epoch": 0.17215744580953127, "grad_norm": 0.0, - "learning_rate": 1.9533551834800963e-05, - "loss": 1.0956, + "learning_rate": 1.8958814828646268e-05, + "loss": 1.1201, "step": 4400 }, { - "epoch": 0.12471308339709258, + "epoch": 0.1721965725017607, "grad_norm": 0.0, - "learning_rate": 1.9533274759242424e-05, - "loss": 1.0365, + "learning_rate": 1.8958251734660055e-05, + "loss": 1.1088, "step": 4401 }, { - "epoch": 0.12474142083935504, + "epoch": 0.17223569919399015, "grad_norm": 0.0, - "learning_rate": 1.953299760338157e-05, - "loss": 1.0961, + "learning_rate": 1.895768849681555e-05, + "loss": 1.2289, "step": 4402 }, { - "epoch": 0.1247697582816175, + "epoch": 0.1722748258862196, "grad_norm": 0.0, - "learning_rate": 1.953272036722074e-05, - "loss": 0.9999, + "learning_rate": 1.8957125115121786e-05, + "loss": 1.2599, "step": 4403 }, { - "epoch": 0.12479809572387997, + "epoch": 0.17231395257844903, "grad_norm": 0.0, - "learning_rate": 1.9532443050762265e-05, - "loss": 1.0242, + "learning_rate": 1.895656158958782e-05, + "loss": 1.1932, "step": 4404 }, { - "epoch": 0.12482643316614242, + "epoch": 0.17235307927067844, "grad_norm": 0.0, - "learning_rate": 1.9532165654008484e-05, - "loss": 1.1601, + "learning_rate": 1.89559979202227e-05, + "loss": 1.2019, "step": 4405 }, { - "epoch": 0.12485477060840489, + "epoch": 0.17239220596290789, "grad_norm": 0.0, - "learning_rate": 1.953188817696173e-05, - "loss": 1.0236, + "learning_rate": 1.8955434107035472e-05, + "loss": 1.1969, "step": 4406 }, { - "epoch": 0.12488310805066735, + "epoch": 0.17243133265513733, "grad_norm": 0.0, - "learning_rate": 1.9531610619624345e-05, - "loss": 1.1, + "learning_rate": 1.8954870150035195e-05, + "loss": 1.1476, "step": 4407 }, { - "epoch": 0.1249114454929298, + "epoch": 0.17247045934736677, "grad_norm": 0.0, - "learning_rate": 1.953133298199866e-05, - "loss": 1.0885, + "learning_rate": 1.895430604923093e-05, + "loss": 1.2125, "step": 4408 }, { - "epoch": 0.12493978293519227, + "epoch": 0.1725095860395962, "grad_norm": 0.0, - "learning_rate": 1.9531055264087025e-05, - "loss": 1.1245, + "learning_rate": 1.8953741804631725e-05, + "loss": 1.0639, "step": 4409 }, { - "epoch": 0.12496812037745474, + "epoch": 0.17254871273182565, "grad_norm": 0.0, - "learning_rate": 1.9530777465891767e-05, - "loss": 0.9862, + "learning_rate": 1.8953177416246648e-05, + "loss": 1.1388, "step": 4410 }, { - "epoch": 0.12499645781971719, + "epoch": 0.1725878394240551, "grad_norm": 0.0, - "learning_rate": 1.953049958741523e-05, - "loss": 0.9769, + "learning_rate": 1.895261288408476e-05, + "loss": 1.1716, "step": 4411 }, { - "epoch": 0.12502479526197965, + "epoch": 0.17262696611628453, "grad_norm": 0.0, - "learning_rate": 1.9530221628659758e-05, - "loss": 0.8865, + "learning_rate": 1.8952048208155123e-05, + "loss": 1.1887, "step": 4412 }, { - "epoch": 0.12505313270424212, + "epoch": 0.17266609280851397, "grad_norm": 0.0, - "learning_rate": 1.952994358962769e-05, - "loss": 1.2537, + "learning_rate": 1.8951483388466816e-05, + "loss": 1.1619, "step": 4413 }, { - "epoch": 0.1250814701465046, + "epoch": 0.1727052195007434, "grad_norm": 0.0, - "learning_rate": 1.9529665470321368e-05, - "loss": 1.0131, + "learning_rate": 1.89509184250289e-05, + "loss": 1.1846, "step": 4414 }, { - "epoch": 0.12510980758876702, + "epoch": 0.17274434619297285, "grad_norm": 0.0, - "learning_rate": 1.9529387270743135e-05, - "loss": 1.1429, + "learning_rate": 1.895035331785045e-05, + "loss": 1.2076, "step": 4415 }, { - "epoch": 0.1251381450310295, + "epoch": 0.1727834728852023, "grad_norm": 0.0, - "learning_rate": 1.9529108990895335e-05, - "loss": 1.0844, + "learning_rate": 1.894978806694054e-05, + "loss": 1.3023, "step": 4416 }, { - "epoch": 0.12516648247329196, + "epoch": 0.17282259957743173, "grad_norm": 0.0, - "learning_rate": 1.9528830630780313e-05, - "loss": 1.0756, + "learning_rate": 1.894922267230825e-05, + "loss": 1.096, "step": 4417 }, { - "epoch": 0.12519481991555442, + "epoch": 0.17286172626966118, "grad_norm": 0.0, - "learning_rate": 1.952855219040041e-05, - "loss": 1.0741, + "learning_rate": 1.8948657133962658e-05, + "loss": 1.2277, "step": 4418 }, { - "epoch": 0.1252231573578169, + "epoch": 0.1729008529618906, "grad_norm": 0.0, - "learning_rate": 1.9528273669757974e-05, - "loss": 1.0858, + "learning_rate": 1.8948091451912844e-05, + "loss": 1.3237, "step": 4419 }, { - "epoch": 0.12525149480007935, + "epoch": 0.17293997965412003, "grad_norm": 0.0, - "learning_rate": 1.952799506885535e-05, - "loss": 1.067, + "learning_rate": 1.8947525626167896e-05, + "loss": 1.1808, "step": 4420 }, { - "epoch": 0.1252798322423418, + "epoch": 0.17297910634634947, "grad_norm": 0.0, - "learning_rate": 1.9527716387694888e-05, - "loss": 1.0264, + "learning_rate": 1.8946959656736897e-05, + "loss": 1.3514, "step": 4421 }, { - "epoch": 0.12530816968460426, + "epoch": 0.1730182330385789, "grad_norm": 0.0, - "learning_rate": 1.952743762627893e-05, - "loss": 0.9845, + "learning_rate": 1.894639354362894e-05, + "loss": 1.2404, "step": 4422 }, { - "epoch": 0.12533650712686673, + "epoch": 0.17305735973080835, "grad_norm": 0.0, - "learning_rate": 1.9527158784609828e-05, - "loss": 1.1068, + "learning_rate": 1.894582728685311e-05, + "loss": 1.093, "step": 4423 }, { - "epoch": 0.1253648445691292, + "epoch": 0.1730964864230378, "grad_norm": 0.0, - "learning_rate": 1.9526879862689928e-05, - "loss": 1.0898, + "learning_rate": 1.8945260886418502e-05, + "loss": 1.1303, "step": 4424 }, { - "epoch": 0.12539318201139166, + "epoch": 0.17313561311526723, "grad_norm": 0.0, - "learning_rate": 1.952660086052158e-05, - "loss": 0.9305, + "learning_rate": 1.894469434233422e-05, + "loss": 1.1951, "step": 4425 }, { - "epoch": 0.12542151945365412, + "epoch": 0.17317473980749667, "grad_norm": 0.0, - "learning_rate": 1.9526321778107143e-05, - "loss": 0.9955, + "learning_rate": 1.8944127654609346e-05, + "loss": 1.0833, "step": 4426 }, { - "epoch": 0.12544985689591656, + "epoch": 0.17321386649972612, "grad_norm": 0.0, - "learning_rate": 1.9526042615448953e-05, - "loss": 1.0534, + "learning_rate": 1.8943560823252995e-05, + "loss": 1.1138, "step": 4427 }, { - "epoch": 0.12547819433817903, + "epoch": 0.17325299319195556, "grad_norm": 0.0, - "learning_rate": 1.952576337254937e-05, - "loss": 0.9487, + "learning_rate": 1.8942993848274263e-05, + "loss": 1.1971, "step": 4428 }, { - "epoch": 0.1255065317804415, + "epoch": 0.173292119884185, "grad_norm": 0.0, - "learning_rate": 1.9525484049410746e-05, - "loss": 1.0438, + "learning_rate": 1.8942426729682256e-05, + "loss": 1.162, "step": 4429 }, { - "epoch": 0.12553486922270396, + "epoch": 0.17333124657641444, "grad_norm": 0.0, - "learning_rate": 1.9525204646035432e-05, - "loss": 1.0465, + "learning_rate": 1.8941859467486083e-05, + "loss": 1.1793, "step": 4430 }, { - "epoch": 0.12556320666496643, + "epoch": 0.17337037326864388, "grad_norm": 0.0, - "learning_rate": 1.9524925162425783e-05, - "loss": 1.0084, + "learning_rate": 1.894129206169485e-05, + "loss": 1.1385, "step": 4431 }, { - "epoch": 0.1255915441072289, + "epoch": 0.17340949996087332, "grad_norm": 0.0, - "learning_rate": 1.9524645598584153e-05, - "loss": 0.9922, + "learning_rate": 1.8940724512317672e-05, + "loss": 1.2037, "step": 4432 }, { - "epoch": 0.12561988154949133, + "epoch": 0.17344862665310273, "grad_norm": 0.0, - "learning_rate": 1.9524365954512893e-05, - "loss": 1.0709, + "learning_rate": 1.894015681936366e-05, + "loss": 1.0979, "step": 4433 }, { - "epoch": 0.1256482189917538, + "epoch": 0.17348775334533217, "grad_norm": 0.0, - "learning_rate": 1.9524086230214366e-05, - "loss": 1.0356, + "learning_rate": 1.893958898284193e-05, + "loss": 1.1006, "step": 4434 }, { - "epoch": 0.12567655643401626, + "epoch": 0.17352688003756161, "grad_norm": 0.0, - "learning_rate": 1.952380642569092e-05, - "loss": 1.1378, + "learning_rate": 1.8939021002761605e-05, + "loss": 1.1134, "step": 4435 }, { - "epoch": 0.12570489387627873, + "epoch": 0.17356600672979106, "grad_norm": 0.0, - "learning_rate": 1.9523526540944918e-05, - "loss": 1.0973, + "learning_rate": 1.8938452879131807e-05, + "loss": 1.0721, "step": 4436 }, { - "epoch": 0.1257332313185412, + "epoch": 0.1736051334220205, "grad_norm": 0.0, - "learning_rate": 1.9523246575978713e-05, - "loss": 1.0725, + "learning_rate": 1.8937884611961653e-05, + "loss": 1.3123, "step": 4437 }, { - "epoch": 0.12576156876080366, + "epoch": 0.17364426011424994, "grad_norm": 0.0, - "learning_rate": 1.952296653079467e-05, - "loss": 1.0305, + "learning_rate": 1.8937316201260273e-05, + "loss": 1.1208, "step": 4438 }, { - "epoch": 0.1257899062030661, + "epoch": 0.17368338680647938, "grad_norm": 0.0, - "learning_rate": 1.9522686405395143e-05, - "loss": 1.1002, + "learning_rate": 1.8936747647036793e-05, + "loss": 1.0829, "step": 4439 }, { - "epoch": 0.12581824364532856, + "epoch": 0.17372251349870882, "grad_norm": 0.0, - "learning_rate": 1.952240619978249e-05, - "loss": 1.0342, + "learning_rate": 1.8936178949300344e-05, + "loss": 1.1677, "step": 4440 }, { - "epoch": 0.12584658108759103, + "epoch": 0.17376164019093826, "grad_norm": 0.0, - "learning_rate": 1.9522125913959073e-05, - "loss": 0.9961, + "learning_rate": 1.893561010806006e-05, + "loss": 1.1608, "step": 4441 }, { - "epoch": 0.1258749185298535, + "epoch": 0.1738007668831677, "grad_norm": 0.0, - "learning_rate": 1.9521845547927256e-05, - "loss": 1.1303, + "learning_rate": 1.8935041123325073e-05, + "loss": 1.1776, "step": 4442 }, { - "epoch": 0.12590325597211596, + "epoch": 0.17383989357539714, "grad_norm": 0.0, - "learning_rate": 1.9521565101689396e-05, - "loss": 1.1036, + "learning_rate": 1.8934471995104525e-05, + "loss": 1.3594, "step": 4443 }, { - "epoch": 0.12593159341437843, + "epoch": 0.17387902026762658, "grad_norm": 0.0, - "learning_rate": 1.952128457524786e-05, - "loss": 1.068, + "learning_rate": 1.8933902723407547e-05, + "loss": 1.0615, "step": 4444 }, { - "epoch": 0.12595993085664087, + "epoch": 0.17391814695985602, "grad_norm": 0.0, - "learning_rate": 1.9521003968605004e-05, - "loss": 1.0619, + "learning_rate": 1.893333330824329e-05, + "loss": 1.2118, "step": 4445 }, { - "epoch": 0.12598826829890333, + "epoch": 0.17395727365208546, "grad_norm": 0.0, - "learning_rate": 1.95207232817632e-05, - "loss": 1.0762, + "learning_rate": 1.8932763749620894e-05, + "loss": 1.3845, "step": 4446 }, { - "epoch": 0.1260166057411658, + "epoch": 0.1739964003443149, "grad_norm": 0.0, - "learning_rate": 1.9520442514724807e-05, - "loss": 0.9556, + "learning_rate": 1.8932194047549504e-05, + "loss": 1.132, "step": 4447 }, { - "epoch": 0.12604494318342827, + "epoch": 0.17403552703654432, "grad_norm": 0.0, - "learning_rate": 1.952016166749219e-05, - "loss": 1.0985, + "learning_rate": 1.8931624202038272e-05, + "loss": 1.1263, "step": 4448 }, { - "epoch": 0.12607328062569073, + "epoch": 0.17407465372877376, "grad_norm": 0.0, - "learning_rate": 1.951988074006772e-05, - "loss": 1.0236, + "learning_rate": 1.8931054213096345e-05, + "loss": 1.1619, "step": 4449 }, { - "epoch": 0.1261016180679532, + "epoch": 0.1741137804210032, "grad_norm": 0.0, - "learning_rate": 1.9519599732453756e-05, - "loss": 1.0961, + "learning_rate": 1.893048408073288e-05, + "loss": 1.1931, "step": 4450 }, { - "epoch": 0.12612995551021564, + "epoch": 0.17415290711323264, "grad_norm": 0.0, - "learning_rate": 1.951931864465267e-05, - "loss": 1.1011, + "learning_rate": 1.8929913804957028e-05, + "loss": 1.1163, "step": 4451 }, { - "epoch": 0.1261582929524781, + "epoch": 0.17419203380546208, "grad_norm": 0.0, - "learning_rate": 1.9519037476666827e-05, - "loss": 1.1328, + "learning_rate": 1.8929343385777957e-05, + "loss": 1.1378, "step": 4452 }, { - "epoch": 0.12618663039474057, + "epoch": 0.17423116049769152, "grad_norm": 0.0, - "learning_rate": 1.9518756228498596e-05, - "loss": 1.0188, + "learning_rate": 1.8928772823204814e-05, + "loss": 1.2235, "step": 4453 }, { - "epoch": 0.12621496783700303, + "epoch": 0.17427028718992096, "grad_norm": 0.0, - "learning_rate": 1.9518474900150348e-05, - "loss": 0.9207, + "learning_rate": 1.8928202117246773e-05, + "loss": 1.2335, "step": 4454 }, { - "epoch": 0.1262433052792655, + "epoch": 0.1743094138821504, "grad_norm": 0.0, - "learning_rate": 1.951819349162445e-05, - "loss": 1.0002, + "learning_rate": 1.892763126791299e-05, + "loss": 1.3854, "step": 4455 }, { - "epoch": 0.12627164272152797, + "epoch": 0.17434854057437985, "grad_norm": 0.0, - "learning_rate": 1.9517912002923274e-05, - "loss": 1.0344, + "learning_rate": 1.8927060275212637e-05, + "loss": 1.1325, "step": 4456 }, { - "epoch": 0.1262999801637904, + "epoch": 0.1743876672666093, "grad_norm": 0.0, - "learning_rate": 1.9517630434049192e-05, - "loss": 0.9601, + "learning_rate": 1.892648913915488e-05, + "loss": 1.1536, "step": 4457 }, { - "epoch": 0.12632831760605287, + "epoch": 0.17442679395883873, "grad_norm": 0.0, - "learning_rate": 1.9517348785004574e-05, - "loss": 1.1409, + "learning_rate": 1.8925917859748898e-05, + "loss": 1.2169, "step": 4458 }, { - "epoch": 0.12635665504831534, + "epoch": 0.17446592065106817, "grad_norm": 0.0, - "learning_rate": 1.9517067055791793e-05, - "loss": 1.0319, + "learning_rate": 1.8925346437003856e-05, + "loss": 1.1578, "step": 4459 }, { - "epoch": 0.1263849924905778, + "epoch": 0.1745050473432976, "grad_norm": 0.0, - "learning_rate": 1.9516785246413223e-05, - "loss": 1.0321, + "learning_rate": 1.8924774870928934e-05, + "loss": 1.0969, "step": 4460 }, { - "epoch": 0.12641332993284027, + "epoch": 0.17454417403552705, "grad_norm": 0.0, - "learning_rate": 1.9516503356871234e-05, - "loss": 0.9095, + "learning_rate": 1.8924203161533312e-05, + "loss": 1.2028, "step": 4461 }, { - "epoch": 0.12644166737510273, + "epoch": 0.17458330072775646, "grad_norm": 0.0, - "learning_rate": 1.9516221387168208e-05, - "loss": 1.0684, + "learning_rate": 1.892363130882617e-05, + "loss": 1.247, "step": 4462 }, { - "epoch": 0.12647000481736517, + "epoch": 0.1746224274199859, "grad_norm": 0.0, - "learning_rate": 1.951593933730651e-05, - "loss": 1.1306, + "learning_rate": 1.892305931281669e-05, + "loss": 1.1283, "step": 4463 }, { - "epoch": 0.12649834225962764, + "epoch": 0.17466155411221534, "grad_norm": 0.0, - "learning_rate": 1.9515657207288528e-05, - "loss": 1.1523, + "learning_rate": 1.892248717351406e-05, + "loss": 1.0954, "step": 4464 }, { - "epoch": 0.1265266797018901, + "epoch": 0.17470068080444479, "grad_norm": 0.0, - "learning_rate": 1.9515374997116626e-05, - "loss": 1.0114, + "learning_rate": 1.892191489092747e-05, + "loss": 1.1097, "step": 4465 }, { - "epoch": 0.12655501714415257, + "epoch": 0.17473980749667423, "grad_norm": 0.0, - "learning_rate": 1.951509270679319e-05, - "loss": 0.9854, + "learning_rate": 1.89213424650661e-05, + "loss": 1.0547, "step": 4466 }, { - "epoch": 0.12658335458641504, + "epoch": 0.17477893418890367, "grad_norm": 0.0, - "learning_rate": 1.9514810336320594e-05, - "loss": 0.9987, + "learning_rate": 1.8920769895939154e-05, + "loss": 1.2075, "step": 4467 }, { - "epoch": 0.1266116920286775, + "epoch": 0.1748180608811331, "grad_norm": 0.0, - "learning_rate": 1.9514527885701216e-05, - "loss": 1.0687, + "learning_rate": 1.8920197183555818e-05, + "loss": 1.2498, "step": 4468 }, { - "epoch": 0.12664002947093994, + "epoch": 0.17485718757336255, "grad_norm": 0.0, - "learning_rate": 1.9514245354937437e-05, - "loss": 1.0567, + "learning_rate": 1.8919624327925295e-05, + "loss": 1.1829, "step": 4469 }, { - "epoch": 0.1266683669132024, + "epoch": 0.174896314265592, "grad_norm": 0.0, - "learning_rate": 1.951396274403164e-05, - "loss": 1.0525, + "learning_rate": 1.891905132905678e-05, + "loss": 1.1215, "step": 4470 }, { - "epoch": 0.12669670435546487, + "epoch": 0.17493544095782143, "grad_norm": 0.0, - "learning_rate": 1.9513680052986195e-05, - "loss": 0.9528, + "learning_rate": 1.891847818695948e-05, + "loss": 1.1792, "step": 4471 }, { - "epoch": 0.12672504179772734, + "epoch": 0.17497456765005087, "grad_norm": 0.0, - "learning_rate": 1.9513397281803495e-05, - "loss": 1.1627, + "learning_rate": 1.8917904901642593e-05, + "loss": 1.1142, "step": 4472 }, { - "epoch": 0.1267533792399898, + "epoch": 0.1750136943422803, "grad_norm": 0.0, - "learning_rate": 1.9513114430485914e-05, - "loss": 1.0556, + "learning_rate": 1.8917331473115332e-05, + "loss": 1.1709, "step": 4473 }, { - "epoch": 0.12678171668225227, + "epoch": 0.17505282103450975, "grad_norm": 0.0, - "learning_rate": 1.951283149903584e-05, - "loss": 1.0919, + "learning_rate": 1.89167579013869e-05, + "loss": 1.2002, "step": 4474 }, { - "epoch": 0.1268100541245147, + "epoch": 0.1750919477267392, "grad_norm": 0.0, - "learning_rate": 1.9512548487455655e-05, - "loss": 1.0822, + "learning_rate": 1.8916184186466507e-05, + "loss": 1.1753, "step": 4475 }, { - "epoch": 0.12683839156677718, + "epoch": 0.1751310744189686, "grad_norm": 0.0, - "learning_rate": 1.951226539574774e-05, - "loss": 1.09, + "learning_rate": 1.891561032836337e-05, + "loss": 1.1944, "step": 4476 }, { - "epoch": 0.12686672900903964, + "epoch": 0.17517020111119805, "grad_norm": 0.0, - "learning_rate": 1.9511982223914477e-05, - "loss": 1.065, + "learning_rate": 1.8915036327086702e-05, + "loss": 0.8831, "step": 4477 }, { - "epoch": 0.1268950664513021, + "epoch": 0.1752093278034275, "grad_norm": 0.0, - "learning_rate": 1.9511698971958265e-05, - "loss": 1.1078, + "learning_rate": 1.8914462182645724e-05, + "loss": 1.235, "step": 4478 }, { - "epoch": 0.12692340389356457, + "epoch": 0.17524845449565693, "grad_norm": 0.0, - "learning_rate": 1.9511415639881474e-05, - "loss": 1.1195, + "learning_rate": 1.8913887895049652e-05, + "loss": 1.1368, "step": 4479 }, { - "epoch": 0.12695174133582704, + "epoch": 0.17528758118788637, "grad_norm": 0.0, - "learning_rate": 1.95111322276865e-05, - "loss": 0.8486, + "learning_rate": 1.8913313464307713e-05, + "loss": 1.0982, "step": 4480 }, { - "epoch": 0.12698007877808948, + "epoch": 0.1753267078801158, "grad_norm": 0.0, - "learning_rate": 1.9510848735375723e-05, - "loss": 1.0122, + "learning_rate": 1.8912738890429125e-05, + "loss": 1.1565, "step": 4481 }, { - "epoch": 0.12700841622035194, + "epoch": 0.17536583457234525, "grad_norm": 0.0, - "learning_rate": 1.9510565162951538e-05, - "loss": 1.06, + "learning_rate": 1.891216417342312e-05, + "loss": 1.1837, "step": 4482 }, { - "epoch": 0.1270367536626144, + "epoch": 0.1754049612645747, "grad_norm": 0.0, - "learning_rate": 1.951028151041633e-05, - "loss": 1.0542, + "learning_rate": 1.8911589313298927e-05, + "loss": 1.3203, "step": 4483 }, { - "epoch": 0.12706509110487688, + "epoch": 0.17544408795680413, "grad_norm": 0.0, - "learning_rate": 1.950999777777249e-05, - "loss": 0.9537, + "learning_rate": 1.8911014310065776e-05, + "loss": 1.274, "step": 4484 }, { - "epoch": 0.12709342854713934, + "epoch": 0.17548321464903358, "grad_norm": 0.0, - "learning_rate": 1.9509713965022407e-05, - "loss": 1.1105, + "learning_rate": 1.89104391637329e-05, + "loss": 1.1756, "step": 4485 }, { - "epoch": 0.1271217659894018, + "epoch": 0.17552234134126302, "grad_norm": 0.0, - "learning_rate": 1.9509430072168473e-05, - "loss": 1.0636, + "learning_rate": 1.8909863874309538e-05, + "loss": 1.2891, "step": 4486 }, { - "epoch": 0.12715010343166425, + "epoch": 0.17556146803349246, "grad_norm": 0.0, - "learning_rate": 1.950914609921308e-05, - "loss": 1.1702, + "learning_rate": 1.8909288441804927e-05, + "loss": 1.0999, "step": 4487 }, { - "epoch": 0.1271784408739267, + "epoch": 0.1756005947257219, "grad_norm": 0.0, - "learning_rate": 1.9508862046158615e-05, - "loss": 1.0099, + "learning_rate": 1.8908712866228306e-05, + "loss": 1.2434, "step": 4488 }, { - "epoch": 0.12720677831618918, + "epoch": 0.17563972141795134, "grad_norm": 0.0, - "learning_rate": 1.9508577913007475e-05, - "loss": 0.996, + "learning_rate": 1.8908137147588922e-05, + "loss": 1.1646, "step": 4489 }, { - "epoch": 0.12723511575845164, + "epoch": 0.17567884811018075, "grad_norm": 0.0, - "learning_rate": 1.950829369976205e-05, - "loss": 1.0102, + "learning_rate": 1.8907561285896014e-05, + "loss": 1.2185, "step": 4490 }, { - "epoch": 0.1272634532007141, + "epoch": 0.1757179748024102, "grad_norm": 0.0, - "learning_rate": 1.950800940642474e-05, - "loss": 1.0744, + "learning_rate": 1.8906985281158837e-05, + "loss": 1.0901, "step": 4491 }, { - "epoch": 0.12729179064297658, + "epoch": 0.17575710149463963, "grad_norm": 0.0, - "learning_rate": 1.950772503299793e-05, - "loss": 1.0699, + "learning_rate": 1.890640913338664e-05, + "loss": 1.2159, "step": 4492 }, { - "epoch": 0.12732012808523901, + "epoch": 0.17579622818686907, "grad_norm": 0.0, - "learning_rate": 1.950744057948403e-05, - "loss": 1.128, + "learning_rate": 1.8905832842588666e-05, + "loss": 1.0787, "step": 4493 }, { - "epoch": 0.12734846552750148, + "epoch": 0.17583535487909852, "grad_norm": 0.0, - "learning_rate": 1.9507156045885423e-05, - "loss": 0.9782, + "learning_rate": 1.890525640877418e-05, + "loss": 1.1685, "step": 4494 }, { - "epoch": 0.12737680296976395, + "epoch": 0.17587448157132796, "grad_norm": 0.0, - "learning_rate": 1.950687143220451e-05, - "loss": 0.9579, + "learning_rate": 1.890467983195243e-05, + "loss": 1.1062, "step": 4495 }, { - "epoch": 0.1274051404120264, + "epoch": 0.1759136082635574, "grad_norm": 0.0, - "learning_rate": 1.950658673844369e-05, - "loss": 1.2316, + "learning_rate": 1.8904103112132687e-05, + "loss": 1.192, "step": 4496 }, { - "epoch": 0.12743347785428888, + "epoch": 0.17595273495578684, "grad_norm": 0.0, - "learning_rate": 1.9506301964605358e-05, - "loss": 1.1133, + "learning_rate": 1.8903526249324203e-05, + "loss": 1.1556, "step": 4497 }, { - "epoch": 0.12746181529655135, + "epoch": 0.17599186164801628, "grad_norm": 0.0, - "learning_rate": 1.9506017110691918e-05, - "loss": 1.0392, + "learning_rate": 1.8902949243536245e-05, + "loss": 1.0969, "step": 4498 }, { - "epoch": 0.12749015273881378, + "epoch": 0.17603098834024572, "grad_norm": 0.0, - "learning_rate": 1.9505732176705763e-05, - "loss": 1.0527, + "learning_rate": 1.8902372094778078e-05, + "loss": 1.0747, "step": 4499 }, { - "epoch": 0.12751849018107625, + "epoch": 0.17607011503247516, "grad_norm": 0.0, - "learning_rate": 1.9505447162649296e-05, - "loss": 1.1156, + "learning_rate": 1.8901794803058967e-05, + "loss": 1.1879, "step": 4500 }, { - "epoch": 0.12754682762333872, + "epoch": 0.1761092417247046, "grad_norm": 0.0, - "learning_rate": 1.9505162068524915e-05, - "loss": 0.9157, + "learning_rate": 1.8901217368388193e-05, + "loss": 1.0059, "step": 4501 }, { - "epoch": 0.12757516506560118, + "epoch": 0.17614836841693404, "grad_norm": 0.0, - "learning_rate": 1.9504876894335033e-05, - "loss": 0.9828, + "learning_rate": 1.8900639790775014e-05, + "loss": 1.3317, "step": 4502 }, { - "epoch": 0.12760350250786365, + "epoch": 0.17618749510916348, "grad_norm": 0.0, - "learning_rate": 1.9504591640082034e-05, - "loss": 1.0687, + "learning_rate": 1.8900062070228716e-05, + "loss": 1.2022, "step": 4503 }, { - "epoch": 0.1276318399501261, + "epoch": 0.1762266218013929, "grad_norm": 0.0, - "learning_rate": 1.9504306305768335e-05, - "loss": 1.1111, + "learning_rate": 1.8899484206758574e-05, + "loss": 1.1211, "step": 4504 }, { - "epoch": 0.12766017739238855, + "epoch": 0.17626574849362234, "grad_norm": 0.0, - "learning_rate": 1.9504020891396335e-05, - "loss": 1.0517, + "learning_rate": 1.889890620037387e-05, + "loss": 1.2247, "step": 4505 }, { - "epoch": 0.12768851483465102, + "epoch": 0.17630487518585178, "grad_norm": 0.0, - "learning_rate": 1.9503735396968435e-05, - "loss": 1.0095, + "learning_rate": 1.889832805108388e-05, + "loss": 1.1786, "step": 4506 }, { - "epoch": 0.12771685227691348, + "epoch": 0.17634400187808122, "grad_norm": 0.0, - "learning_rate": 1.9503449822487045e-05, - "loss": 1.1511, + "learning_rate": 1.8897749758897893e-05, + "loss": 1.0692, "step": 4507 }, { - "epoch": 0.12774518971917595, + "epoch": 0.17638312857031066, "grad_norm": 0.0, - "learning_rate": 1.9503164167954566e-05, - "loss": 1.0551, + "learning_rate": 1.8897171323825195e-05, + "loss": 1.1556, "step": 4508 }, { - "epoch": 0.12777352716143842, + "epoch": 0.1764222552625401, "grad_norm": 0.0, - "learning_rate": 1.950287843337341e-05, - "loss": 1.0288, + "learning_rate": 1.8896592745875075e-05, + "loss": 1.1383, "step": 4509 }, { - "epoch": 0.12780186460370088, + "epoch": 0.17646138195476954, "grad_norm": 0.0, - "learning_rate": 1.9502592618745977e-05, - "loss": 0.9376, + "learning_rate": 1.8896014025056822e-05, + "loss": 1.1144, "step": 4510 }, { - "epoch": 0.12783020204596332, + "epoch": 0.17650050864699898, "grad_norm": 0.0, - "learning_rate": 1.9502306724074678e-05, - "loss": 1.0973, + "learning_rate": 1.889543516137973e-05, + "loss": 1.2275, "step": 4511 }, { - "epoch": 0.1278585394882258, + "epoch": 0.17653963533922842, "grad_norm": 0.0, - "learning_rate": 1.9502020749361922e-05, - "loss": 1.0674, + "learning_rate": 1.8894856154853097e-05, + "loss": 1.1922, "step": 4512 }, { - "epoch": 0.12788687693048825, + "epoch": 0.17657876203145786, "grad_norm": 0.0, - "learning_rate": 1.9501734694610113e-05, - "loss": 1.0331, + "learning_rate": 1.889427700548622e-05, + "loss": 1.2329, "step": 4513 }, { - "epoch": 0.12791521437275072, + "epoch": 0.1766178887236873, "grad_norm": 0.0, - "learning_rate": 1.950144855982167e-05, - "loss": 1.077, + "learning_rate": 1.88936977132884e-05, + "loss": 1.2418, "step": 4514 }, { - "epoch": 0.12794355181501318, + "epoch": 0.17665701541591675, "grad_norm": 0.0, - "learning_rate": 1.9501162344998994e-05, - "loss": 0.9508, + "learning_rate": 1.8893118278268937e-05, + "loss": 1.1539, "step": 4515 }, { - "epoch": 0.12797188925727565, + "epoch": 0.1766961421081462, "grad_norm": 0.0, - "learning_rate": 1.95008760501445e-05, - "loss": 1.0273, + "learning_rate": 1.889253870043714e-05, + "loss": 1.319, "step": 4516 }, { - "epoch": 0.1280002266995381, + "epoch": 0.17673526880037563, "grad_norm": 0.0, - "learning_rate": 1.9500589675260596e-05, - "loss": 0.9225, + "learning_rate": 1.8891958979802316e-05, + "loss": 1.1099, "step": 4517 }, { - "epoch": 0.12802856414180055, + "epoch": 0.17677439549260507, "grad_norm": 0.0, - "learning_rate": 1.9500303220349704e-05, - "loss": 1.0229, + "learning_rate": 1.8891379116373768e-05, + "loss": 1.3125, "step": 4518 }, { - "epoch": 0.12805690158406302, + "epoch": 0.17681352218483448, "grad_norm": 0.0, - "learning_rate": 1.9500016685414226e-05, - "loss": 0.9283, + "learning_rate": 1.8890799110160815e-05, + "loss": 1.1499, "step": 4519 }, { - "epoch": 0.1280852390263255, + "epoch": 0.17685264887706392, "grad_norm": 0.0, - "learning_rate": 1.949973007045658e-05, - "loss": 0.9649, + "learning_rate": 1.889021896117277e-05, + "loss": 1.0594, "step": 4520 }, { - "epoch": 0.12811357646858795, + "epoch": 0.17689177556929336, "grad_norm": 0.0, - "learning_rate": 1.949944337547918e-05, - "loss": 0.9264, + "learning_rate": 1.888963866941895e-05, + "loss": 1.226, "step": 4521 }, { - "epoch": 0.12814191391085042, + "epoch": 0.1769309022615228, "grad_norm": 0.0, - "learning_rate": 1.9499156600484442e-05, - "loss": 0.9994, + "learning_rate": 1.888905823490867e-05, + "loss": 1.0658, "step": 4522 }, { - "epoch": 0.12817025135311286, + "epoch": 0.17697002895375225, "grad_norm": 0.0, - "learning_rate": 1.9498869745474777e-05, - "loss": 0.9974, + "learning_rate": 1.8888477657651255e-05, + "loss": 1.1181, "step": 4523 }, { - "epoch": 0.12819858879537532, + "epoch": 0.1770091556459817, "grad_norm": 0.0, - "learning_rate": 1.9498582810452607e-05, - "loss": 0.9706, + "learning_rate": 1.8887896937656028e-05, + "loss": 1.3084, "step": 4524 }, { - "epoch": 0.1282269262376378, + "epoch": 0.17704828233821113, "grad_norm": 0.0, - "learning_rate": 1.949829579542035e-05, - "loss": 1.1072, + "learning_rate": 1.888731607493231e-05, + "loss": 1.187, "step": 4525 }, { - "epoch": 0.12825526367990026, + "epoch": 0.17708740903044057, "grad_norm": 0.0, - "learning_rate": 1.9498008700380414e-05, - "loss": 1.0061, + "learning_rate": 1.8886735069489433e-05, + "loss": 1.0996, "step": 4526 }, { - "epoch": 0.12828360112216272, + "epoch": 0.17712653572267, "grad_norm": 0.0, - "learning_rate": 1.949772152533523e-05, - "loss": 1.0603, + "learning_rate": 1.888615392133673e-05, + "loss": 1.216, "step": 4527 }, { - "epoch": 0.1283119385644252, + "epoch": 0.17716566241489945, "grad_norm": 0.0, - "learning_rate": 1.9497434270287208e-05, - "loss": 1.04, + "learning_rate": 1.8885572630483527e-05, + "loss": 1.2082, "step": 4528 }, { - "epoch": 0.12834027600668763, + "epoch": 0.1772047891071289, "grad_norm": 0.0, - "learning_rate": 1.949714693523877e-05, - "loss": 1.0161, + "learning_rate": 1.8884991196939163e-05, + "loss": 1.1439, "step": 4529 }, { - "epoch": 0.1283686134489501, + "epoch": 0.17724391579935833, "grad_norm": 0.0, - "learning_rate": 1.9496859520192336e-05, - "loss": 1.1608, + "learning_rate": 1.888440962071297e-05, + "loss": 1.158, "step": 4530 }, { - "epoch": 0.12839695089121256, + "epoch": 0.17728304249158777, "grad_norm": 0.0, - "learning_rate": 1.9496572025150332e-05, - "loss": 1.05, + "learning_rate": 1.8883827901814296e-05, + "loss": 1.1774, "step": 4531 }, { - "epoch": 0.12842528833347502, + "epoch": 0.1773221691838172, "grad_norm": 0.0, - "learning_rate": 1.949628445011517e-05, - "loss": 0.9913, + "learning_rate": 1.8883246040252478e-05, + "loss": 1.1643, "step": 4532 }, { - "epoch": 0.1284536257757375, + "epoch": 0.17736129587604663, "grad_norm": 0.0, - "learning_rate": 1.949599679508928e-05, - "loss": 0.9306, + "learning_rate": 1.8882664036036854e-05, + "loss": 1.1993, "step": 4533 }, { - "epoch": 0.12848196321799996, + "epoch": 0.17740042256827607, "grad_norm": 0.0, - "learning_rate": 1.949570906007508e-05, - "loss": 1.0245, + "learning_rate": 1.888208188917678e-05, + "loss": 1.1875, "step": 4534 }, { - "epoch": 0.1285103006602624, + "epoch": 0.1774395492605055, "grad_norm": 0.0, - "learning_rate": 1.9495421245075e-05, - "loss": 1.0982, + "learning_rate": 1.8881499599681603e-05, + "loss": 1.1647, "step": 4535 }, { - "epoch": 0.12853863810252486, + "epoch": 0.17747867595273495, "grad_norm": 0.0, - "learning_rate": 1.9495133350091463e-05, - "loss": 1.0351, + "learning_rate": 1.8880917167560668e-05, + "loss": 1.218, "step": 4536 }, { - "epoch": 0.12856697554478733, + "epoch": 0.1775178026449644, "grad_norm": 0.0, - "learning_rate": 1.949484537512689e-05, - "loss": 1.0529, + "learning_rate": 1.8880334592823333e-05, + "loss": 1.1763, "step": 4537 }, { - "epoch": 0.1285953129870498, + "epoch": 0.17755692933719383, "grad_norm": 0.0, - "learning_rate": 1.9494557320183706e-05, - "loss": 0.8805, + "learning_rate": 1.8879751875478954e-05, + "loss": 1.0676, "step": 4538 }, { - "epoch": 0.12862365042931226, + "epoch": 0.17759605602942327, "grad_norm": 0.0, - "learning_rate": 1.949426918526434e-05, - "loss": 1.061, + "learning_rate": 1.8879169015536884e-05, + "loss": 1.1329, "step": 4539 }, { - "epoch": 0.12865198787157472, + "epoch": 0.1776351827216527, "grad_norm": 0.0, - "learning_rate": 1.949398097037122e-05, - "loss": 1.0483, + "learning_rate": 1.8878586013006487e-05, + "loss": 1.1038, "step": 4540 }, { - "epoch": 0.12868032531383716, + "epoch": 0.17767430941388215, "grad_norm": 0.0, - "learning_rate": 1.9493692675506774e-05, - "loss": 1.0183, + "learning_rate": 1.8878002867897124e-05, + "loss": 0.9558, "step": 4541 }, { - "epoch": 0.12870866275609963, + "epoch": 0.1777134361061116, "grad_norm": 0.0, - "learning_rate": 1.9493404300673426e-05, - "loss": 1.009, + "learning_rate": 1.887741958021816e-05, + "loss": 1.2072, "step": 4542 }, { - "epoch": 0.1287370001983621, + "epoch": 0.17775256279834104, "grad_norm": 0.0, - "learning_rate": 1.9493115845873612e-05, - "loss": 1.1425, + "learning_rate": 1.8876836149978964e-05, + "loss": 1.1981, "step": 4543 }, { - "epoch": 0.12876533764062456, + "epoch": 0.17779168949057048, "grad_norm": 0.0, - "learning_rate": 1.9492827311109758e-05, - "loss": 1.0509, + "learning_rate": 1.8876252577188897e-05, + "loss": 1.2228, "step": 4544 }, { - "epoch": 0.12879367508288703, + "epoch": 0.17783081618279992, "grad_norm": 0.0, - "learning_rate": 1.949253869638429e-05, - "loss": 1.0232, + "learning_rate": 1.887566886185734e-05, + "loss": 1.1869, "step": 4545 }, { - "epoch": 0.1288220125251495, + "epoch": 0.17786994287502936, "grad_norm": 0.0, - "learning_rate": 1.949225000169965e-05, - "loss": 1.0918, + "learning_rate": 1.8875085003993663e-05, + "loss": 1.1678, "step": 4546 }, { - "epoch": 0.12885034996741193, + "epoch": 0.17790906956725877, "grad_norm": 0.0, - "learning_rate": 1.949196122705826e-05, - "loss": 1.0953, + "learning_rate": 1.8874501003607242e-05, + "loss": 1.161, "step": 4547 }, { - "epoch": 0.1288786874096744, + "epoch": 0.1779481962594882, "grad_norm": 0.0, - "learning_rate": 1.9491672372462554e-05, - "loss": 0.9294, + "learning_rate": 1.8873916860707453e-05, + "loss": 1.1193, "step": 4548 }, { - "epoch": 0.12890702485193686, + "epoch": 0.17798732295171765, "grad_norm": 0.0, - "learning_rate": 1.9491383437914968e-05, - "loss": 1.0046, + "learning_rate": 1.8873332575303682e-05, + "loss": 1.1425, "step": 4549 }, { - "epoch": 0.12893536229419933, + "epoch": 0.1780264496439471, "grad_norm": 0.0, - "learning_rate": 1.9491094423417934e-05, - "loss": 0.9948, + "learning_rate": 1.8872748147405303e-05, + "loss": 1.1277, "step": 4550 }, { - "epoch": 0.1289636997364618, + "epoch": 0.17806557633617653, "grad_norm": 0.0, - "learning_rate": 1.949080532897389e-05, - "loss": 1.0031, + "learning_rate": 1.8872163577021714e-05, + "loss": 1.1665, "step": 4551 }, { - "epoch": 0.12899203717872426, + "epoch": 0.17810470302840598, "grad_norm": 0.0, - "learning_rate": 1.9490516154585268e-05, - "loss": 0.9948, + "learning_rate": 1.8871578864162292e-05, + "loss": 1.3033, "step": 4552 }, { - "epoch": 0.1290203746209867, + "epoch": 0.17814382972063542, "grad_norm": 0.0, - "learning_rate": 1.9490226900254504e-05, - "loss": 1.0824, + "learning_rate": 1.887099400883643e-05, + "loss": 1.2838, "step": 4553 }, { - "epoch": 0.12904871206324917, + "epoch": 0.17818295641286486, "grad_norm": 0.0, - "learning_rate": 1.9489937565984033e-05, - "loss": 0.9629, + "learning_rate": 1.8870409011053522e-05, + "loss": 1.0154, "step": 4554 }, { - "epoch": 0.12907704950551163, + "epoch": 0.1782220831050943, "grad_norm": 0.0, - "learning_rate": 1.94896481517763e-05, - "loss": 1.0003, + "learning_rate": 1.8869823870822958e-05, + "loss": 1.0679, "step": 4555 }, { - "epoch": 0.1291053869477741, + "epoch": 0.17826120979732374, "grad_norm": 0.0, - "learning_rate": 1.948935865763373e-05, - "loss": 0.9133, + "learning_rate": 1.8869238588154138e-05, + "loss": 1.1614, "step": 4556 }, { - "epoch": 0.12913372439003656, + "epoch": 0.17830033648955318, "grad_norm": 0.0, - "learning_rate": 1.9489069083558768e-05, - "loss": 0.977, + "learning_rate": 1.886865316305646e-05, + "loss": 1.2178, "step": 4557 }, { - "epoch": 0.12916206183229903, + "epoch": 0.17833946318178262, "grad_norm": 0.0, - "learning_rate": 1.9488779429553855e-05, - "loss": 1.002, + "learning_rate": 1.8868067595539327e-05, + "loss": 1.166, "step": 4558 }, { - "epoch": 0.12919039927456147, + "epoch": 0.17837858987401206, "grad_norm": 0.0, - "learning_rate": 1.9488489695621432e-05, - "loss": 1.1081, + "learning_rate": 1.886748188561214e-05, + "loss": 1.1086, "step": 4559 }, { - "epoch": 0.12921873671682393, + "epoch": 0.1784177165662415, "grad_norm": 0.0, - "learning_rate": 1.9488199881763932e-05, - "loss": 1.0138, + "learning_rate": 1.8866896033284305e-05, + "loss": 1.0898, "step": 4560 }, { - "epoch": 0.1292470741590864, + "epoch": 0.17845684325847092, "grad_norm": 0.0, - "learning_rate": 1.9487909987983805e-05, - "loss": 1.0312, + "learning_rate": 1.886631003856523e-05, + "loss": 1.0368, "step": 4561 }, { - "epoch": 0.12927541160134887, + "epoch": 0.17849596995070036, "grad_norm": 0.0, - "learning_rate": 1.9487620014283487e-05, - "loss": 0.9723, + "learning_rate": 1.8865723901464326e-05, + "loss": 1.152, "step": 4562 }, { - "epoch": 0.12930374904361133, + "epoch": 0.1785350966429298, "grad_norm": 0.0, - "learning_rate": 1.9487329960665424e-05, - "loss": 1.0182, + "learning_rate": 1.886513762199101e-05, + "loss": 1.2094, "step": 4563 }, { - "epoch": 0.1293320864858738, + "epoch": 0.17857422333515924, "grad_norm": 0.0, - "learning_rate": 1.9487039827132056e-05, - "loss": 1.0638, + "learning_rate": 1.8864551200154687e-05, + "loss": 1.202, "step": 4564 }, { - "epoch": 0.12936042392813624, + "epoch": 0.17861335002738868, "grad_norm": 0.0, - "learning_rate": 1.9486749613685828e-05, - "loss": 1.1133, + "learning_rate": 1.8863964635964777e-05, + "loss": 1.1681, "step": 4565 }, { - "epoch": 0.1293887613703987, + "epoch": 0.17865247671961812, "grad_norm": 0.0, - "learning_rate": 1.9486459320329187e-05, - "loss": 1.1229, + "learning_rate": 1.8863377929430706e-05, + "loss": 1.2178, "step": 4566 }, { - "epoch": 0.12941709881266117, + "epoch": 0.17869160341184756, "grad_norm": 0.0, - "learning_rate": 1.9486168947064576e-05, - "loss": 1.2549, + "learning_rate": 1.8862791080561894e-05, + "loss": 1.2032, "step": 4567 }, { - "epoch": 0.12944543625492363, + "epoch": 0.178730730104077, "grad_norm": 0.0, - "learning_rate": 1.948587849389444e-05, - "loss": 1.1534, + "learning_rate": 1.886220408936776e-05, + "loss": 1.2034, "step": 4568 }, { - "epoch": 0.1294737736971861, + "epoch": 0.17876985679630644, "grad_norm": 0.0, - "learning_rate": 1.948558796082123e-05, - "loss": 1.0073, + "learning_rate": 1.8861616955857734e-05, + "loss": 1.1194, "step": 4569 }, { - "epoch": 0.12950211113944857, + "epoch": 0.17880898348853588, "grad_norm": 0.0, - "learning_rate": 1.9485297347847388e-05, - "loss": 1.0334, + "learning_rate": 1.8861029680041242e-05, + "loss": 1.2424, "step": 4570 }, { - "epoch": 0.129530448581711, + "epoch": 0.17884811018076532, "grad_norm": 0.0, - "learning_rate": 1.9485006654975366e-05, - "loss": 0.9386, + "learning_rate": 1.8860442261927722e-05, + "loss": 1.0947, "step": 4571 }, { - "epoch": 0.12955878602397347, + "epoch": 0.17888723687299476, "grad_norm": 0.0, - "learning_rate": 1.9484715882207608e-05, - "loss": 1.0567, + "learning_rate": 1.8859854701526598e-05, + "loss": 1.1826, "step": 4572 }, { - "epoch": 0.12958712346623594, + "epoch": 0.1789263635652242, "grad_norm": 0.0, - "learning_rate": 1.9484425029546567e-05, - "loss": 1.0632, + "learning_rate": 1.885926699884731e-05, + "loss": 1.0986, "step": 4573 }, { - "epoch": 0.1296154609084984, + "epoch": 0.17896549025745365, "grad_norm": 0.0, - "learning_rate": 1.9484134096994693e-05, - "loss": 1.1605, + "learning_rate": 1.8858679153899295e-05, + "loss": 1.2298, "step": 4574 }, { - "epoch": 0.12964379835076087, + "epoch": 0.1790046169496831, "grad_norm": 0.0, - "learning_rate": 1.9483843084554436e-05, - "loss": 1.1536, + "learning_rate": 1.8858091166691995e-05, + "loss": 1.2611, "step": 4575 }, { - "epoch": 0.12967213579302334, + "epoch": 0.1790437436419125, "grad_norm": 0.0, - "learning_rate": 1.9483551992228245e-05, - "loss": 1.0774, + "learning_rate": 1.885750303723485e-05, + "loss": 1.2273, "step": 4576 }, { - "epoch": 0.12970047323528577, + "epoch": 0.17908287033414194, "grad_norm": 0.0, - "learning_rate": 1.9483260820018577e-05, - "loss": 1.0396, + "learning_rate": 1.8856914765537303e-05, + "loss": 1.2407, "step": 4577 }, { - "epoch": 0.12972881067754824, + "epoch": 0.17912199702637138, "grad_norm": 0.0, - "learning_rate": 1.9482969567927878e-05, - "loss": 1.098, + "learning_rate": 1.8856326351608806e-05, + "loss": 0.9752, "step": 4578 }, { - "epoch": 0.1297571481198107, + "epoch": 0.17916112371860082, "grad_norm": 0.0, - "learning_rate": 1.9482678235958604e-05, - "loss": 1.0426, + "learning_rate": 1.8855737795458806e-05, + "loss": 1.3062, "step": 4579 }, { - "epoch": 0.12978548556207317, + "epoch": 0.17920025041083026, "grad_norm": 0.0, - "learning_rate": 1.9482386824113215e-05, - "loss": 1.0706, + "learning_rate": 1.885514909709675e-05, + "loss": 1.2061, "step": 4580 }, { - "epoch": 0.12981382300433564, + "epoch": 0.1792393771030597, "grad_norm": 0.0, - "learning_rate": 1.9482095332394157e-05, - "loss": 0.9672, + "learning_rate": 1.8854560256532098e-05, + "loss": 1.1608, "step": 4581 }, { - "epoch": 0.1298421604465981, + "epoch": 0.17927850379528915, "grad_norm": 0.0, - "learning_rate": 1.948180376080389e-05, - "loss": 0.9828, + "learning_rate": 1.8853971273774306e-05, + "loss": 1.1391, "step": 4582 }, { - "epoch": 0.12987049788886054, + "epoch": 0.1793176304875186, "grad_norm": 0.0, - "learning_rate": 1.948151210934487e-05, - "loss": 1.0029, + "learning_rate": 1.885338214883283e-05, + "loss": 1.2205, "step": 4583 }, { - "epoch": 0.129898835331123, + "epoch": 0.17935675717974803, "grad_norm": 0.0, - "learning_rate": 1.9481220378019553e-05, - "loss": 1.1037, + "learning_rate": 1.8852792881717125e-05, + "loss": 1.2137, "step": 4584 }, { - "epoch": 0.12992717277338547, + "epoch": 0.17939588387197747, "grad_norm": 0.0, - "learning_rate": 1.948092856683039e-05, - "loss": 1.075, + "learning_rate": 1.8852203472436662e-05, + "loss": 1.1208, "step": 4585 }, { - "epoch": 0.12995551021564794, + "epoch": 0.1794350105642069, "grad_norm": 0.0, - "learning_rate": 1.9480636675779853e-05, - "loss": 1.0903, + "learning_rate": 1.8851613921000906e-05, + "loss": 1.1845, "step": 4586 }, { - "epoch": 0.1299838476579104, + "epoch": 0.17947413725643635, "grad_norm": 0.0, - "learning_rate": 1.9480344704870387e-05, - "loss": 1.1167, + "learning_rate": 1.8851024227419322e-05, + "loss": 1.3884, "step": 4587 }, { - "epoch": 0.13001218510017284, + "epoch": 0.1795132639486658, "grad_norm": 0.0, - "learning_rate": 1.9480052654104458e-05, - "loss": 1.0914, + "learning_rate": 1.885043439170138e-05, + "loss": 1.1501, "step": 4588 }, { - "epoch": 0.1300405225424353, + "epoch": 0.17955239064089523, "grad_norm": 0.0, - "learning_rate": 1.947976052348453e-05, - "loss": 1.0388, + "learning_rate": 1.8849844413856548e-05, + "loss": 1.2246, "step": 4589 }, { - "epoch": 0.13006885998469778, + "epoch": 0.17959151733312465, "grad_norm": 0.0, - "learning_rate": 1.9479468313013055e-05, - "loss": 1.0937, + "learning_rate": 1.8849254293894307e-05, + "loss": 1.0839, "step": 4590 }, { - "epoch": 0.13009719742696024, + "epoch": 0.17963064402535409, "grad_norm": 0.0, - "learning_rate": 1.94791760226925e-05, - "loss": 1.0296, + "learning_rate": 1.8848664031824132e-05, + "loss": 1.2349, "step": 4591 }, { - "epoch": 0.1301255348692227, + "epoch": 0.17966977071758353, "grad_norm": 0.0, - "learning_rate": 1.9478883652525323e-05, - "loss": 1.0507, + "learning_rate": 1.8848073627655496e-05, + "loss": 1.2089, "step": 4592 }, { - "epoch": 0.13015387231148517, + "epoch": 0.17970889740981297, "grad_norm": 0.0, - "learning_rate": 1.947859120251399e-05, - "loss": 1.0308, + "learning_rate": 1.884748308139789e-05, + "loss": 1.1687, "step": 4593 }, { - "epoch": 0.1301822097537476, + "epoch": 0.1797480241020424, "grad_norm": 0.0, - "learning_rate": 1.9478298672660963e-05, - "loss": 1.0605, + "learning_rate": 1.884689239306079e-05, + "loss": 1.2243, "step": 4594 }, { - "epoch": 0.13021054719601008, + "epoch": 0.17978715079427185, "grad_norm": 0.0, - "learning_rate": 1.947800606296871e-05, - "loss": 1.0507, + "learning_rate": 1.8846301562653682e-05, + "loss": 1.122, "step": 4595 }, { - "epoch": 0.13023888463827255, + "epoch": 0.1798262774865013, "grad_norm": 0.0, - "learning_rate": 1.947771337343969e-05, - "loss": 1.1042, + "learning_rate": 1.8845710590186058e-05, + "loss": 1.1019, "step": 4596 }, { - "epoch": 0.130267222080535, + "epoch": 0.17986540417873073, "grad_norm": 0.0, - "learning_rate": 1.947742060407637e-05, - "loss": 0.9458, + "learning_rate": 1.8845119475667407e-05, + "loss": 1.178, "step": 4597 }, { - "epoch": 0.13029555952279748, + "epoch": 0.17990453087096017, "grad_norm": 0.0, - "learning_rate": 1.9477127754881215e-05, - "loss": 1.096, + "learning_rate": 1.884452821910722e-05, + "loss": 1.2399, "step": 4598 }, { - "epoch": 0.13032389696505994, + "epoch": 0.1799436575631896, "grad_norm": 0.0, - "learning_rate": 1.9476834825856696e-05, - "loss": 0.9566, + "learning_rate": 1.884393682051499e-05, + "loss": 1.2457, "step": 4599 }, { - "epoch": 0.13035223440732238, + "epoch": 0.17998278425541905, "grad_norm": 0.0, - "learning_rate": 1.9476541817005278e-05, - "loss": 1.0083, + "learning_rate": 1.884334527990022e-05, + "loss": 1.1498, "step": 4600 }, { - "epoch": 0.13038057184958485, + "epoch": 0.1800219109476485, "grad_norm": 0.0, - "learning_rate": 1.947624872832943e-05, - "loss": 1.0954, + "learning_rate": 1.8842753597272408e-05, + "loss": 1.2216, "step": 4601 }, { - "epoch": 0.1304089092918473, + "epoch": 0.18006103763987794, "grad_norm": 0.0, - "learning_rate": 1.9475955559831622e-05, - "loss": 1.0762, + "learning_rate": 1.884216177264105e-05, + "loss": 1.1417, "step": 4602 }, { - "epoch": 0.13043724673410978, + "epoch": 0.18010016433210738, "grad_norm": 0.0, - "learning_rate": 1.9475662311514317e-05, - "loss": 0.9703, + "learning_rate": 1.8841569806015652e-05, + "loss": 1.1022, "step": 4603 }, { - "epoch": 0.13046558417637225, + "epoch": 0.1801392910243368, "grad_norm": 0.0, - "learning_rate": 1.9475368983379992e-05, - "loss": 1.0342, + "learning_rate": 1.8840977697405728e-05, + "loss": 1.1118, "step": 4604 }, { - "epoch": 0.1304939216186347, + "epoch": 0.18017841771656623, "grad_norm": 0.0, - "learning_rate": 1.9475075575431112e-05, - "loss": 0.9788, + "learning_rate": 1.884038544682078e-05, + "loss": 1.1564, "step": 4605 }, { - "epoch": 0.13052225906089715, + "epoch": 0.18021754440879567, "grad_norm": 0.0, - "learning_rate": 1.9474782087670156e-05, - "loss": 1.1095, + "learning_rate": 1.8839793054270315e-05, + "loss": 1.238, "step": 4606 }, { - "epoch": 0.13055059650315962, + "epoch": 0.1802566711010251, "grad_norm": 0.0, - "learning_rate": 1.9474488520099594e-05, - "loss": 1.0567, + "learning_rate": 1.8839200519763852e-05, + "loss": 1.2336, "step": 4607 }, { - "epoch": 0.13057893394542208, + "epoch": 0.18029579779325455, "grad_norm": 0.0, - "learning_rate": 1.9474194872721892e-05, - "loss": 1.1384, + "learning_rate": 1.8838607843310907e-05, + "loss": 1.0875, "step": 4608 }, { - "epoch": 0.13060727138768455, + "epoch": 0.180334924485484, "grad_norm": 0.0, - "learning_rate": 1.947390114553953e-05, - "loss": 1.0814, + "learning_rate": 1.8838015024920993e-05, + "loss": 1.166, "step": 4609 }, { - "epoch": 0.13063560882994701, + "epoch": 0.18037405117771343, "grad_norm": 0.0, - "learning_rate": 1.947360733855498e-05, - "loss": 1.104, + "learning_rate": 1.883742206460363e-05, + "loss": 1.1813, "step": 4610 }, { - "epoch": 0.13066394627220948, + "epoch": 0.18041317786994288, "grad_norm": 0.0, - "learning_rate": 1.9473313451770722e-05, - "loss": 1.0585, + "learning_rate": 1.8836828962368344e-05, + "loss": 1.2397, "step": 4611 }, { - "epoch": 0.13069228371447192, + "epoch": 0.18045230456217232, "grad_norm": 0.0, - "learning_rate": 1.947301948518922e-05, - "loss": 1.079, + "learning_rate": 1.8836235718224664e-05, + "loss": 1.1896, "step": 4612 }, { - "epoch": 0.13072062115673438, + "epoch": 0.18049143125440176, "grad_norm": 0.0, - "learning_rate": 1.9472725438812963e-05, - "loss": 1.0851, + "learning_rate": 1.8835642332182105e-05, + "loss": 1.1949, "step": 4613 }, { - "epoch": 0.13074895859899685, + "epoch": 0.1805305579466312, "grad_norm": 0.0, - "learning_rate": 1.947243131264442e-05, - "loss": 1.0772, + "learning_rate": 1.8835048804250204e-05, + "loss": 1.218, "step": 4614 }, { - "epoch": 0.13077729604125932, + "epoch": 0.18056968463886064, "grad_norm": 0.0, - "learning_rate": 1.9472137106686067e-05, - "loss": 1.0546, + "learning_rate": 1.883445513443849e-05, + "loss": 1.0898, "step": 4615 }, { - "epoch": 0.13080563348352178, + "epoch": 0.18060881133109008, "grad_norm": 0.0, - "learning_rate": 1.947184282094039e-05, - "loss": 1.0691, + "learning_rate": 1.8833861322756496e-05, + "loss": 1.0529, "step": 4616 }, { - "epoch": 0.13083397092578425, + "epoch": 0.18064793802331952, "grad_norm": 0.0, - "learning_rate": 1.9471548455409866e-05, - "loss": 0.9499, + "learning_rate": 1.883326736921376e-05, + "loss": 1.1663, "step": 4617 }, { - "epoch": 0.1308623083680467, + "epoch": 0.18068706471554893, "grad_norm": 0.0, - "learning_rate": 1.9471254010096967e-05, - "loss": 1.1187, + "learning_rate": 1.8832673273819812e-05, + "loss": 1.1663, "step": 4618 }, { - "epoch": 0.13089064581030915, + "epoch": 0.18072619140777837, "grad_norm": 0.0, - "learning_rate": 1.947095948500418e-05, - "loss": 1.1072, + "learning_rate": 1.8832079036584208e-05, + "loss": 1.208, "step": 4619 }, { - "epoch": 0.13091898325257162, + "epoch": 0.18076531810000782, "grad_norm": 0.0, - "learning_rate": 1.9470664880133986e-05, - "loss": 0.9824, + "learning_rate": 1.8831484657516478e-05, + "loss": 1.17, "step": 4620 }, { - "epoch": 0.13094732069483409, + "epoch": 0.18080444479223726, "grad_norm": 0.0, - "learning_rate": 1.9470370195488862e-05, - "loss": 0.9617, + "learning_rate": 1.883089013662617e-05, + "loss": 1.1344, "step": 4621 }, { - "epoch": 0.13097565813709655, + "epoch": 0.1808435714844667, "grad_norm": 0.0, - "learning_rate": 1.9470075431071293e-05, - "loss": 1.0363, + "learning_rate": 1.8830295473922833e-05, + "loss": 1.1466, "step": 4622 }, { - "epoch": 0.13100399557935902, + "epoch": 0.18088269817669614, "grad_norm": 0.0, - "learning_rate": 1.9469780586883765e-05, - "loss": 1.0337, + "learning_rate": 1.8829700669416017e-05, + "loss": 1.1217, "step": 4623 }, { - "epoch": 0.13103233302162146, + "epoch": 0.18092182486892558, "grad_norm": 0.0, - "learning_rate": 1.9469485662928757e-05, - "loss": 0.9731, + "learning_rate": 1.8829105723115272e-05, + "loss": 1.2742, "step": 4624 }, { - "epoch": 0.13106067046388392, + "epoch": 0.18096095156115502, "grad_norm": 0.0, - "learning_rate": 1.9469190659208754e-05, - "loss": 1.1471, + "learning_rate": 1.882851063503015e-05, + "loss": 1.2891, "step": 4625 }, { - "epoch": 0.1310890079061464, + "epoch": 0.18100007825338446, "grad_norm": 0.0, - "learning_rate": 1.9468895575726243e-05, - "loss": 1.0154, + "learning_rate": 1.8827915405170212e-05, + "loss": 1.1975, "step": 4626 }, { - "epoch": 0.13111734534840885, + "epoch": 0.1810392049456139, "grad_norm": 0.0, - "learning_rate": 1.946860041248371e-05, - "loss": 1.0814, + "learning_rate": 1.8827320033545015e-05, + "loss": 1.2396, "step": 4627 }, { - "epoch": 0.13114568279067132, + "epoch": 0.18107833163784334, "grad_norm": 0.0, - "learning_rate": 1.9468305169483637e-05, - "loss": 1.1037, + "learning_rate": 1.8826724520164118e-05, + "loss": 1.0533, "step": 4628 }, { - "epoch": 0.13117402023293379, + "epoch": 0.18111745833007278, "grad_norm": 0.0, - "learning_rate": 1.9468009846728515e-05, - "loss": 0.9931, + "learning_rate": 1.882612886503709e-05, + "loss": 1.0398, "step": 4629 }, { - "epoch": 0.13120235767519622, + "epoch": 0.18115658502230222, "grad_norm": 0.0, - "learning_rate": 1.946771444422083e-05, - "loss": 1.0208, + "learning_rate": 1.8825533068173486e-05, + "loss": 1.1506, "step": 4630 }, { - "epoch": 0.1312306951174587, + "epoch": 0.18119571171453167, "grad_norm": 0.0, - "learning_rate": 1.946741896196307e-05, - "loss": 1.0169, + "learning_rate": 1.8824937129582886e-05, + "loss": 1.2109, "step": 4631 }, { - "epoch": 0.13125903255972116, + "epoch": 0.1812348384067611, "grad_norm": 0.0, - "learning_rate": 1.9467123399957724e-05, - "loss": 1.0142, + "learning_rate": 1.882434104927485e-05, + "loss": 1.2931, "step": 4632 }, { - "epoch": 0.13128737000198362, + "epoch": 0.18127396509899052, "grad_norm": 0.0, - "learning_rate": 1.9466827758207284e-05, - "loss": 1.0432, + "learning_rate": 1.8823744827258954e-05, + "loss": 1.145, "step": 4633 }, { - "epoch": 0.1313157074442461, + "epoch": 0.18131309179121996, "grad_norm": 0.0, - "learning_rate": 1.9466532036714235e-05, - "loss": 1.1512, + "learning_rate": 1.8823148463544775e-05, + "loss": 1.0826, "step": 4634 }, { - "epoch": 0.13134404488650855, + "epoch": 0.1813522184834494, "grad_norm": 0.0, - "learning_rate": 1.9466236235481074e-05, - "loss": 1.0091, + "learning_rate": 1.882255195814189e-05, + "loss": 1.2265, "step": 4635 }, { - "epoch": 0.131372382328771, + "epoch": 0.18139134517567884, "grad_norm": 0.0, - "learning_rate": 1.9465940354510287e-05, - "loss": 1.0558, + "learning_rate": 1.882195531105987e-05, + "loss": 1.1564, "step": 4636 }, { - "epoch": 0.13140071977103346, + "epoch": 0.18143047186790828, "grad_norm": 0.0, - "learning_rate": 1.9465644393804373e-05, - "loss": 1.0006, + "learning_rate": 1.8821358522308306e-05, + "loss": 1.2659, "step": 4637 }, { - "epoch": 0.13142905721329592, + "epoch": 0.18146959856013772, "grad_norm": 0.0, - "learning_rate": 1.946534835336582e-05, - "loss": 1.0156, + "learning_rate": 1.8820761591896775e-05, + "loss": 1.0577, "step": 4638 }, { - "epoch": 0.1314573946555584, + "epoch": 0.18150872525236716, "grad_norm": 0.0, - "learning_rate": 1.9465052233197125e-05, - "loss": 1.0824, + "learning_rate": 1.8820164519834868e-05, + "loss": 1.1875, "step": 4639 }, { - "epoch": 0.13148573209782086, + "epoch": 0.1815478519445966, "grad_norm": 0.0, - "learning_rate": 1.9464756033300775e-05, - "loss": 1.0379, + "learning_rate": 1.881956730613217e-05, + "loss": 1.1053, "step": 4640 }, { - "epoch": 0.13151406954008332, + "epoch": 0.18158697863682605, "grad_norm": 0.0, - "learning_rate": 1.9464459753679272e-05, - "loss": 0.9449, + "learning_rate": 1.8818969950798274e-05, + "loss": 1.2126, "step": 4641 }, { - "epoch": 0.13154240698234576, + "epoch": 0.1816261053290555, "grad_norm": 0.0, - "learning_rate": 1.9464163394335112e-05, - "loss": 1.0157, + "learning_rate": 1.881837245384277e-05, + "loss": 1.1569, "step": 4642 }, { - "epoch": 0.13157074442460823, + "epoch": 0.18166523202128493, "grad_norm": 0.0, - "learning_rate": 1.946386695527079e-05, - "loss": 1.0304, + "learning_rate": 1.8817774815275256e-05, + "loss": 1.1471, "step": 4643 }, { - "epoch": 0.1315990818668707, + "epoch": 0.18170435871351437, "grad_norm": 0.0, - "learning_rate": 1.9463570436488803e-05, - "loss": 0.9855, + "learning_rate": 1.881717703510533e-05, + "loss": 1.1234, "step": 4644 }, { - "epoch": 0.13162741930913316, + "epoch": 0.1817434854057438, "grad_norm": 0.0, - "learning_rate": 1.9463273837991643e-05, - "loss": 0.9715, + "learning_rate": 1.881657911334258e-05, + "loss": 1.2017, "step": 4645 }, { - "epoch": 0.13165575675139563, + "epoch": 0.18178261209797325, "grad_norm": 0.0, - "learning_rate": 1.946297715978182e-05, - "loss": 0.9922, + "learning_rate": 1.8815981049996627e-05, + "loss": 1.1543, "step": 4646 }, { - "epoch": 0.1316840941936581, + "epoch": 0.18182173879020266, "grad_norm": 0.0, - "learning_rate": 1.946268040186182e-05, - "loss": 1.0871, + "learning_rate": 1.881538284507706e-05, + "loss": 1.0834, "step": 4647 }, { - "epoch": 0.13171243163592053, + "epoch": 0.1818608654824321, "grad_norm": 0.0, - "learning_rate": 1.946238356423415e-05, - "loss": 1.1496, + "learning_rate": 1.8814784498593494e-05, + "loss": 1.1343, "step": 4648 }, { - "epoch": 0.131740769078183, + "epoch": 0.18189999217466155, "grad_norm": 0.0, - "learning_rate": 1.946208664690131e-05, - "loss": 0.9843, + "learning_rate": 1.8814186010555533e-05, + "loss": 1.2212, "step": 4649 }, { - "epoch": 0.13176910652044546, + "epoch": 0.181939118866891, "grad_norm": 0.0, - "learning_rate": 1.9461789649865802e-05, - "loss": 1.0144, + "learning_rate": 1.881358738097279e-05, + "loss": 1.2234, "step": 4650 }, { - "epoch": 0.13179744396270793, + "epoch": 0.18197824555912043, "grad_norm": 0.0, - "learning_rate": 1.946149257313013e-05, - "loss": 1.1156, + "learning_rate": 1.8812988609854877e-05, + "loss": 1.1791, "step": 4651 }, { - "epoch": 0.1318257814049704, + "epoch": 0.18201737225134987, "grad_norm": 0.0, - "learning_rate": 1.9461195416696787e-05, - "loss": 0.9905, + "learning_rate": 1.881238969721141e-05, + "loss": 1.1073, "step": 4652 }, { - "epoch": 0.13185411884723286, + "epoch": 0.1820564989435793, "grad_norm": 0.0, - "learning_rate": 1.9460898180568285e-05, - "loss": 1.0906, + "learning_rate": 1.881179064305201e-05, + "loss": 1.1268, "step": 4653 }, { - "epoch": 0.1318824562894953, + "epoch": 0.18209562563580875, "grad_norm": 0.0, - "learning_rate": 1.946060086474712e-05, - "loss": 1.1064, + "learning_rate": 1.881119144738629e-05, + "loss": 1.1137, "step": 4654 }, { - "epoch": 0.13191079373175776, + "epoch": 0.1821347523280382, "grad_norm": 0.0, - "learning_rate": 1.9460303469235808e-05, - "loss": 1.0056, + "learning_rate": 1.8810592110223876e-05, + "loss": 1.185, "step": 4655 }, { - "epoch": 0.13193913117402023, + "epoch": 0.18217387902026763, "grad_norm": 0.0, - "learning_rate": 1.946000599403684e-05, - "loss": 1.1154, + "learning_rate": 1.8809992631574395e-05, + "loss": 1.0962, "step": 4656 }, { - "epoch": 0.1319674686162827, + "epoch": 0.18221300571249707, "grad_norm": 0.0, - "learning_rate": 1.945970843915273e-05, - "loss": 1.0304, + "learning_rate": 1.880939301144747e-05, + "loss": 1.2541, "step": 4657 }, { - "epoch": 0.13199580605854516, + "epoch": 0.1822521324047265, "grad_norm": 0.0, - "learning_rate": 1.9459410804585984e-05, - "loss": 0.9067, + "learning_rate": 1.8808793249852737e-05, + "loss": 1.2389, "step": 4658 }, { - "epoch": 0.13202414350080763, + "epoch": 0.18229125909695595, "grad_norm": 0.0, - "learning_rate": 1.9459113090339107e-05, - "loss": 1.1121, + "learning_rate": 1.880819334679982e-05, + "loss": 1.1641, "step": 4659 }, { - "epoch": 0.13205248094307007, + "epoch": 0.1823303857891854, "grad_norm": 0.0, - "learning_rate": 1.945881529641461e-05, - "loss": 1.0396, + "learning_rate": 1.8807593302298354e-05, + "loss": 1.1259, "step": 4660 }, { - "epoch": 0.13208081838533253, + "epoch": 0.1823695124814148, "grad_norm": 0.0, - "learning_rate": 1.9458517422814998e-05, - "loss": 1.0558, + "learning_rate": 1.8806993116357975e-05, + "loss": 1.1758, "step": 4661 }, { - "epoch": 0.132109155827595, + "epoch": 0.18240863917364425, "grad_norm": 0.0, - "learning_rate": 1.9458219469542782e-05, - "loss": 0.9856, + "learning_rate": 1.8806392788988325e-05, + "loss": 1.1133, "step": 4662 }, { - "epoch": 0.13213749326985746, + "epoch": 0.1824477658658737, "grad_norm": 0.0, - "learning_rate": 1.9457921436600473e-05, - "loss": 1.044, + "learning_rate": 1.880579232019904e-05, + "loss": 1.0837, "step": 4663 }, { - "epoch": 0.13216583071211993, + "epoch": 0.18248689255810313, "grad_norm": 0.0, - "learning_rate": 1.9457623323990574e-05, - "loss": 1.0454, + "learning_rate": 1.8805191709999767e-05, + "loss": 1.1856, "step": 4664 }, { - "epoch": 0.1321941681543824, + "epoch": 0.18252601925033257, "grad_norm": 0.0, - "learning_rate": 1.9457325131715608e-05, - "loss": 1.078, + "learning_rate": 1.8804590958400147e-05, + "loss": 1.2021, "step": 4665 }, { - "epoch": 0.13222250559664483, + "epoch": 0.182565145942562, "grad_norm": 0.0, - "learning_rate": 1.9457026859778077e-05, - "loss": 0.9882, + "learning_rate": 1.8803990065409826e-05, + "loss": 1.2362, "step": 4666 }, { - "epoch": 0.1322508430389073, + "epoch": 0.18260427263479145, "grad_norm": 0.0, - "learning_rate": 1.94567285081805e-05, - "loss": 0.9367, + "learning_rate": 1.8803389031038462e-05, + "loss": 1.0806, "step": 4667 }, { - "epoch": 0.13227918048116977, + "epoch": 0.1826433993270209, "grad_norm": 0.0, - "learning_rate": 1.9456430076925382e-05, - "loss": 1.0106, + "learning_rate": 1.88027878552957e-05, + "loss": 1.3218, "step": 4668 }, { - "epoch": 0.13230751792343223, + "epoch": 0.18268252601925034, "grad_norm": 0.0, - "learning_rate": 1.9456131566015245e-05, - "loss": 1.1138, + "learning_rate": 1.880218653819119e-05, + "loss": 1.0877, "step": 4669 }, { - "epoch": 0.1323358553656947, + "epoch": 0.18272165271147978, "grad_norm": 0.0, - "learning_rate": 1.9455832975452604e-05, - "loss": 1.0162, + "learning_rate": 1.88015850797346e-05, + "loss": 0.9909, "step": 4670 }, { - "epoch": 0.13236419280795717, + "epoch": 0.18276077940370922, "grad_norm": 0.0, - "learning_rate": 1.9455534305239964e-05, - "loss": 0.9405, + "learning_rate": 1.8800983479935585e-05, + "loss": 1.1028, "step": 4671 }, { - "epoch": 0.1323925302502196, + "epoch": 0.18279990609593866, "grad_norm": 0.0, - "learning_rate": 1.945523555537985e-05, - "loss": 0.9561, + "learning_rate": 1.88003817388038e-05, + "loss": 1.2654, "step": 4672 }, { - "epoch": 0.13242086769248207, + "epoch": 0.1828390327881681, "grad_norm": 0.0, - "learning_rate": 1.9454936725874775e-05, - "loss": 1.019, + "learning_rate": 1.879977985634891e-05, + "loss": 1.1433, "step": 4673 }, { - "epoch": 0.13244920513474454, + "epoch": 0.18287815948039754, "grad_norm": 0.0, - "learning_rate": 1.945463781672726e-05, - "loss": 1.1854, + "learning_rate": 1.8799177832580585e-05, + "loss": 1.2122, "step": 4674 }, { - "epoch": 0.132477542577007, + "epoch": 0.18291728617262695, "grad_norm": 0.0, - "learning_rate": 1.9454338827939817e-05, - "loss": 1.0361, + "learning_rate": 1.8798575667508486e-05, + "loss": 1.147, "step": 4675 }, { - "epoch": 0.13250588001926947, + "epoch": 0.1829564128648564, "grad_norm": 0.0, - "learning_rate": 1.945403975951497e-05, - "loss": 1.073, + "learning_rate": 1.879797336114229e-05, + "loss": 1.2271, "step": 4676 }, { - "epoch": 0.13253421746153193, + "epoch": 0.18299553955708583, "grad_norm": 0.0, - "learning_rate": 1.9453740611455232e-05, - "loss": 1.0313, + "learning_rate": 1.8797370913491666e-05, + "loss": 1.1489, "step": 4677 }, { - "epoch": 0.13256255490379437, + "epoch": 0.18303466624931528, "grad_norm": 0.0, - "learning_rate": 1.9453441383763128e-05, - "loss": 0.9621, + "learning_rate": 1.879676832456629e-05, + "loss": 1.1574, "step": 4678 }, { - "epoch": 0.13259089234605684, + "epoch": 0.18307379294154472, "grad_norm": 0.0, - "learning_rate": 1.9453142076441173e-05, - "loss": 1.0381, + "learning_rate": 1.8796165594375835e-05, + "loss": 1.0451, "step": 4679 }, { - "epoch": 0.1326192297883193, + "epoch": 0.18311291963377416, "grad_norm": 0.0, - "learning_rate": 1.9452842689491896e-05, - "loss": 1.1113, + "learning_rate": 1.8795562722929986e-05, + "loss": 1.2725, "step": 4680 }, { - "epoch": 0.13264756723058177, + "epoch": 0.1831520463260036, "grad_norm": 0.0, - "learning_rate": 1.9452543222917816e-05, - "loss": 1.0222, + "learning_rate": 1.8794959710238417e-05, + "loss": 1.0546, "step": 4681 }, { - "epoch": 0.13267590467284424, + "epoch": 0.18319117301823304, "grad_norm": 0.0, - "learning_rate": 1.945224367672145e-05, - "loss": 1.1239, + "learning_rate": 1.879435655631082e-05, + "loss": 1.1812, "step": 4682 }, { - "epoch": 0.1327042421151067, + "epoch": 0.18323029971046248, "grad_norm": 0.0, - "learning_rate": 1.9451944050905328e-05, - "loss": 1.0332, + "learning_rate": 1.8793753261156874e-05, + "loss": 1.1942, "step": 4683 }, { - "epoch": 0.13273257955736914, + "epoch": 0.18326942640269192, "grad_norm": 0.0, - "learning_rate": 1.945164434547197e-05, - "loss": 1.0343, + "learning_rate": 1.879314982478627e-05, + "loss": 1.0081, "step": 4684 }, { - "epoch": 0.1327609169996316, + "epoch": 0.18330855309492136, "grad_norm": 0.0, - "learning_rate": 1.9451344560423905e-05, - "loss": 1.0853, + "learning_rate": 1.87925462472087e-05, + "loss": 1.2726, "step": 4685 }, { - "epoch": 0.13278925444189407, + "epoch": 0.1833476797871508, "grad_norm": 0.0, - "learning_rate": 1.945104469576365e-05, - "loss": 0.9388, + "learning_rate": 1.8791942528433854e-05, + "loss": 1.2358, "step": 4686 }, { - "epoch": 0.13281759188415654, + "epoch": 0.18338680647938024, "grad_norm": 0.0, - "learning_rate": 1.9450744751493743e-05, - "loss": 0.9674, + "learning_rate": 1.8791338668471427e-05, + "loss": 1.158, "step": 4687 }, { - "epoch": 0.132845929326419, + "epoch": 0.18342593317160968, "grad_norm": 0.0, - "learning_rate": 1.94504447276167e-05, - "loss": 1.1224, + "learning_rate": 1.879073466733112e-05, + "loss": 1.1735, "step": 4688 }, { - "epoch": 0.13287426676868147, + "epoch": 0.1834650598638391, "grad_norm": 0.0, - "learning_rate": 1.945014462413505e-05, - "loss": 0.8661, + "learning_rate": 1.8790130525022625e-05, + "loss": 1.1477, "step": 4689 }, { - "epoch": 0.1329026042109439, + "epoch": 0.18350418655606854, "grad_norm": 0.0, - "learning_rate": 1.9449844441051328e-05, - "loss": 1.0561, + "learning_rate": 1.878952624155565e-05, + "loss": 1.1797, "step": 4690 }, { - "epoch": 0.13293094165320637, + "epoch": 0.18354331324829798, "grad_norm": 0.0, - "learning_rate": 1.944954417836805e-05, - "loss": 1.0734, + "learning_rate": 1.87889218169399e-05, + "loss": 1.25, "step": 4691 }, { - "epoch": 0.13295927909546884, + "epoch": 0.18358243994052742, "grad_norm": 0.0, - "learning_rate": 1.9449243836087758e-05, - "loss": 1.0469, + "learning_rate": 1.8788317251185077e-05, + "loss": 1.2106, "step": 4692 }, { - "epoch": 0.1329876165377313, + "epoch": 0.18362156663275686, "grad_norm": 0.0, - "learning_rate": 1.9448943414212972e-05, - "loss": 1.105, + "learning_rate": 1.878771254430089e-05, + "loss": 1.1262, "step": 4693 }, { - "epoch": 0.13301595397999377, + "epoch": 0.1836606933249863, "grad_norm": 0.0, - "learning_rate": 1.944864291274623e-05, - "loss": 1.1712, + "learning_rate": 1.8787107696297052e-05, + "loss": 1.1697, "step": 4694 }, { - "epoch": 0.13304429142225624, + "epoch": 0.18369982001721574, "grad_norm": 0.0, - "learning_rate": 1.944834233169006e-05, - "loss": 1.1055, + "learning_rate": 1.8786502707183277e-05, + "loss": 1.2346, "step": 4695 }, { - "epoch": 0.13307262886451868, + "epoch": 0.18373894670944518, "grad_norm": 0.0, - "learning_rate": 1.9448041671046992e-05, - "loss": 0.8981, + "learning_rate": 1.878589757696928e-05, + "loss": 1.1523, "step": 4696 }, { - "epoch": 0.13310096630678114, + "epoch": 0.18377807340167462, "grad_norm": 0.0, - "learning_rate": 1.944774093081956e-05, - "loss": 0.9945, + "learning_rate": 1.8785292305664774e-05, + "loss": 1.1196, "step": 4697 }, { - "epoch": 0.1331293037490436, + "epoch": 0.18381720009390407, "grad_norm": 0.0, - "learning_rate": 1.94474401110103e-05, - "loss": 0.9541, + "learning_rate": 1.878468689327948e-05, + "loss": 1.1682, "step": 4698 }, { - "epoch": 0.13315764119130608, + "epoch": 0.1838563267861335, "grad_norm": 0.0, - "learning_rate": 1.944713921162174e-05, - "loss": 1.0107, + "learning_rate": 1.8784081339823127e-05, + "loss": 1.1339, "step": 4699 }, { - "epoch": 0.13318597863356854, + "epoch": 0.18389545347836295, "grad_norm": 0.0, - "learning_rate": 1.9446838232656426e-05, - "loss": 1.0943, + "learning_rate": 1.8783475645305436e-05, + "loss": 1.1525, "step": 4700 }, { - "epoch": 0.133214316075831, + "epoch": 0.1839345801705924, "grad_norm": 0.0, - "learning_rate": 1.9446537174116877e-05, - "loss": 0.9699, + "learning_rate": 1.878286980973613e-05, + "loss": 1.1415, "step": 4701 }, { - "epoch": 0.13324265351809345, + "epoch": 0.18397370686282183, "grad_norm": 0.0, - "learning_rate": 1.9446236036005645e-05, - "loss": 1.0504, + "learning_rate": 1.8782263833124937e-05, + "loss": 1.1614, "step": 4702 }, { - "epoch": 0.1332709909603559, + "epoch": 0.18401283355505127, "grad_norm": 0.0, - "learning_rate": 1.9445934818325255e-05, - "loss": 1.094, + "learning_rate": 1.8781657715481594e-05, + "loss": 1.0883, "step": 4703 }, { - "epoch": 0.13329932840261838, + "epoch": 0.18405196024728068, "grad_norm": 0.0, - "learning_rate": 1.9445633521078246e-05, - "loss": 1.0255, + "learning_rate": 1.8781051456815834e-05, + "loss": 1.1976, "step": 4704 }, { - "epoch": 0.13332766584488084, + "epoch": 0.18409108693951012, "grad_norm": 0.0, - "learning_rate": 1.9445332144267162e-05, - "loss": 1.0834, + "learning_rate": 1.8780445057137387e-05, + "loss": 1.1927, "step": 4705 }, { - "epoch": 0.1333560032871433, + "epoch": 0.18413021363173956, "grad_norm": 0.0, - "learning_rate": 1.9445030687894535e-05, - "loss": 1.1628, + "learning_rate": 1.8779838516455998e-05, + "loss": 1.3055, "step": 4706 }, { - "epoch": 0.13338434072940578, + "epoch": 0.184169340323969, "grad_norm": 0.0, - "learning_rate": 1.944472915196291e-05, - "loss": 1.0743, + "learning_rate": 1.8779231834781405e-05, + "loss": 1.1566, "step": 4707 }, { - "epoch": 0.13341267817166821, + "epoch": 0.18420846701619845, "grad_norm": 0.0, - "learning_rate": 1.9444427536474823e-05, - "loss": 1.0686, + "learning_rate": 1.8778625012123347e-05, + "loss": 1.1536, "step": 4708 }, { - "epoch": 0.13344101561393068, + "epoch": 0.1842475937084279, "grad_norm": 0.0, - "learning_rate": 1.9444125841432817e-05, - "loss": 1.0804, + "learning_rate": 1.8778018048491574e-05, + "loss": 1.1071, "step": 4709 }, { - "epoch": 0.13346935305619315, + "epoch": 0.18428672040065733, "grad_norm": 0.0, - "learning_rate": 1.944382406683943e-05, - "loss": 1.0238, + "learning_rate": 1.877741094389583e-05, + "loss": 1.2478, "step": 4710 }, { - "epoch": 0.1334976904984556, + "epoch": 0.18432584709288677, "grad_norm": 0.0, - "learning_rate": 1.9443522212697208e-05, - "loss": 1.0595, + "learning_rate": 1.8776803698345866e-05, + "loss": 1.113, "step": 4711 }, { - "epoch": 0.13352602794071808, + "epoch": 0.1843649737851162, "grad_norm": 0.0, - "learning_rate": 1.944322027900869e-05, - "loss": 1.0039, + "learning_rate": 1.877619631185143e-05, + "loss": 1.1661, "step": 4712 }, { - "epoch": 0.13355436538298054, + "epoch": 0.18440410047734565, "grad_norm": 0.0, - "learning_rate": 1.9442918265776424e-05, - "loss": 1.0212, + "learning_rate": 1.877558878442228e-05, + "loss": 0.9972, "step": 4713 }, { - "epoch": 0.13358270282524298, + "epoch": 0.1844432271695751, "grad_norm": 0.0, - "learning_rate": 1.9442616173002945e-05, - "loss": 1.0783, + "learning_rate": 1.877498111606817e-05, + "loss": 1.22, "step": 4714 }, { - "epoch": 0.13361104026750545, + "epoch": 0.18448235386180453, "grad_norm": 0.0, - "learning_rate": 1.944231400069081e-05, - "loss": 1.0482, + "learning_rate": 1.8774373306798858e-05, + "loss": 1.0317, "step": 4715 }, { - "epoch": 0.13363937770976791, + "epoch": 0.18452148055403397, "grad_norm": 0.0, - "learning_rate": 1.944201174884255e-05, - "loss": 1.0509, + "learning_rate": 1.8773765356624104e-05, + "loss": 1.1992, "step": 4716 }, { - "epoch": 0.13366771515203038, + "epoch": 0.18456060724626341, "grad_norm": 0.0, - "learning_rate": 1.944170941746073e-05, - "loss": 1.1552, + "learning_rate": 1.8773157265553676e-05, + "loss": 1.1731, "step": 4717 }, { - "epoch": 0.13369605259429285, + "epoch": 0.18459973393849283, "grad_norm": 0.0, - "learning_rate": 1.9441407006547875e-05, - "loss": 1.1037, + "learning_rate": 1.8772549033597336e-05, + "loss": 1.2013, "step": 4718 }, { - "epoch": 0.1337243900365553, + "epoch": 0.18463886063072227, "grad_norm": 0.0, - "learning_rate": 1.944110451610655e-05, - "loss": 1.0044, + "learning_rate": 1.877194066076485e-05, + "loss": 1.1014, "step": 4719 }, { - "epoch": 0.13375272747881775, + "epoch": 0.1846779873229517, "grad_norm": 0.0, - "learning_rate": 1.9440801946139293e-05, - "loss": 0.9925, + "learning_rate": 1.877133214706599e-05, + "loss": 1.1069, "step": 4720 }, { - "epoch": 0.13378106492108022, + "epoch": 0.18471711401518115, "grad_norm": 0.0, - "learning_rate": 1.9440499296648653e-05, - "loss": 1.0305, + "learning_rate": 1.8770723492510522e-05, + "loss": 1.1208, "step": 4721 }, { - "epoch": 0.13380940236334268, + "epoch": 0.1847562407074106, "grad_norm": 0.0, - "learning_rate": 1.9440196567637188e-05, - "loss": 1.1067, + "learning_rate": 1.8770114697108227e-05, + "loss": 1.2175, "step": 4722 }, { - "epoch": 0.13383773980560515, + "epoch": 0.18479536739964003, "grad_norm": 0.0, - "learning_rate": 1.9439893759107435e-05, - "loss": 1.0142, + "learning_rate": 1.8769505760868884e-05, + "loss": 1.1973, "step": 4723 }, { - "epoch": 0.13386607724786762, + "epoch": 0.18483449409186947, "grad_norm": 0.0, - "learning_rate": 1.9439590871061956e-05, - "loss": 0.9807, + "learning_rate": 1.8768896683802263e-05, + "loss": 1.1824, "step": 4724 }, { - "epoch": 0.13389441469013008, + "epoch": 0.1848736207840989, "grad_norm": 0.0, - "learning_rate": 1.9439287903503295e-05, - "loss": 0.9465, + "learning_rate": 1.8768287465918152e-05, + "loss": 1.1668, "step": 4725 }, { - "epoch": 0.13392275213239252, + "epoch": 0.18491274747632835, "grad_norm": 0.0, - "learning_rate": 1.9438984856434008e-05, - "loss": 1.0223, + "learning_rate": 1.876767810722633e-05, + "loss": 1.1393, "step": 4726 }, { - "epoch": 0.13395108957465499, + "epoch": 0.1849518741685578, "grad_norm": 0.0, - "learning_rate": 1.9438681729856648e-05, - "loss": 1.0444, + "learning_rate": 1.8767068607736586e-05, + "loss": 1.2121, "step": 4727 }, { - "epoch": 0.13397942701691745, + "epoch": 0.18499100086078724, "grad_norm": 0.0, - "learning_rate": 1.9438378523773763e-05, - "loss": 0.9922, + "learning_rate": 1.8766458967458704e-05, + "loss": 1.1216, "step": 4728 }, { - "epoch": 0.13400776445917992, + "epoch": 0.18503012755301668, "grad_norm": 0.0, - "learning_rate": 1.9438075238187916e-05, - "loss": 1.1235, + "learning_rate": 1.8765849186402475e-05, + "loss": 1.2924, "step": 4729 }, { - "epoch": 0.13403610190144238, + "epoch": 0.18506925424524612, "grad_norm": 0.0, - "learning_rate": 1.9437771873101653e-05, - "loss": 1.0863, + "learning_rate": 1.8765239264577695e-05, + "loss": 1.1535, "step": 4730 }, { - "epoch": 0.13406443934370485, + "epoch": 0.18510838093747556, "grad_norm": 0.0, - "learning_rate": 1.9437468428517533e-05, - "loss": 1.0628, + "learning_rate": 1.8764629201994152e-05, + "loss": 1.2347, "step": 4731 }, { - "epoch": 0.1340927767859673, + "epoch": 0.18514750762970497, "grad_norm": 0.0, - "learning_rate": 1.9437164904438114e-05, - "loss": 1.0248, + "learning_rate": 1.876401899866165e-05, + "loss": 1.1031, "step": 4732 }, { - "epoch": 0.13412111422822975, + "epoch": 0.1851866343219344, "grad_norm": 0.0, - "learning_rate": 1.9436861300865947e-05, - "loss": 1.1146, + "learning_rate": 1.876340865458998e-05, + "loss": 1.1432, "step": 4733 }, { - "epoch": 0.13414945167049222, + "epoch": 0.18522576101416385, "grad_norm": 0.0, - "learning_rate": 1.9436557617803594e-05, - "loss": 1.1315, + "learning_rate": 1.8762798169788958e-05, + "loss": 1.0842, "step": 4734 }, { - "epoch": 0.1341777891127547, + "epoch": 0.1852648877063933, "grad_norm": 0.0, - "learning_rate": 1.9436253855253612e-05, - "loss": 1.0934, + "learning_rate": 1.8762187544268368e-05, + "loss": 1.2186, "step": 4735 }, { - "epoch": 0.13420612655501715, + "epoch": 0.18530401439862274, "grad_norm": 0.0, - "learning_rate": 1.9435950013218564e-05, - "loss": 1.0674, + "learning_rate": 1.8761576778038032e-05, + "loss": 1.1904, "step": 4736 }, { - "epoch": 0.13423446399727962, + "epoch": 0.18534314109085218, "grad_norm": 0.0, - "learning_rate": 1.9435646091701e-05, - "loss": 1.0822, + "learning_rate": 1.8760965871107748e-05, + "loss": 1.1292, "step": 4737 }, { - "epoch": 0.13426280143954206, + "epoch": 0.18538226778308162, "grad_norm": 0.0, - "learning_rate": 1.9435342090703485e-05, - "loss": 0.981, + "learning_rate": 1.8760354823487334e-05, + "loss": 1.1524, "step": 4738 }, { - "epoch": 0.13429113888180452, + "epoch": 0.18542139447531106, "grad_norm": 0.0, - "learning_rate": 1.9435038010228584e-05, - "loss": 1.0672, + "learning_rate": 1.8759743635186596e-05, + "loss": 1.1739, "step": 4739 }, { - "epoch": 0.134319476324067, + "epoch": 0.1854605211675405, "grad_norm": 0.0, - "learning_rate": 1.9434733850278854e-05, - "loss": 1.1739, + "learning_rate": 1.8759132306215352e-05, + "loss": 1.0596, "step": 4740 }, { - "epoch": 0.13434781376632945, + "epoch": 0.18549964785976994, "grad_norm": 0.0, - "learning_rate": 1.9434429610856852e-05, - "loss": 1.077, + "learning_rate": 1.875852083658342e-05, + "loss": 1.1732, "step": 4741 }, { - "epoch": 0.13437615120859192, + "epoch": 0.18553877455199938, "grad_norm": 0.0, - "learning_rate": 1.943412529196515e-05, - "loss": 1.0451, + "learning_rate": 1.8757909226300617e-05, + "loss": 1.03, "step": 4742 }, { - "epoch": 0.1344044886508544, + "epoch": 0.18557790124422882, "grad_norm": 0.0, - "learning_rate": 1.9433820893606307e-05, - "loss": 0.9973, + "learning_rate": 1.8757297475376766e-05, + "loss": 1.1812, "step": 4743 }, { - "epoch": 0.13443282609311683, + "epoch": 0.18561702793645826, "grad_norm": 0.0, - "learning_rate": 1.9433516415782887e-05, - "loss": 1.0385, + "learning_rate": 1.8756685583821693e-05, + "loss": 1.233, "step": 4744 }, { - "epoch": 0.1344611635353793, + "epoch": 0.1856561546286877, "grad_norm": 0.0, - "learning_rate": 1.9433211858497456e-05, - "loss": 1.1069, + "learning_rate": 1.875607355164522e-05, + "loss": 1.1719, "step": 4745 }, { - "epoch": 0.13448950097764176, + "epoch": 0.18569528132091712, "grad_norm": 0.0, - "learning_rate": 1.9432907221752576e-05, - "loss": 1.0607, + "learning_rate": 1.8755461378857177e-05, + "loss": 1.1515, "step": 4746 }, { - "epoch": 0.13451783841990422, + "epoch": 0.18573440801314656, "grad_norm": 0.0, - "learning_rate": 1.9432602505550818e-05, - "loss": 1.088, + "learning_rate": 1.8754849065467396e-05, + "loss": 1.0269, "step": 4747 }, { - "epoch": 0.1345461758621667, + "epoch": 0.185773534705376, "grad_norm": 0.0, - "learning_rate": 1.9432297709894747e-05, - "loss": 0.9692, + "learning_rate": 1.875423661148571e-05, + "loss": 1.1702, "step": 4748 }, { - "epoch": 0.13457451330442916, + "epoch": 0.18581266139760544, "grad_norm": 0.0, - "learning_rate": 1.943199283478693e-05, - "loss": 0.9708, + "learning_rate": 1.8753624016921955e-05, + "loss": 1.314, "step": 4749 }, { - "epoch": 0.1346028507466916, + "epoch": 0.18585178808983488, "grad_norm": 0.0, - "learning_rate": 1.9431687880229934e-05, - "loss": 1.0364, + "learning_rate": 1.875301128178597e-05, + "loss": 1.1193, "step": 4750 }, { - "epoch": 0.13463118818895406, + "epoch": 0.18589091478206432, "grad_norm": 0.0, - "learning_rate": 1.9431382846226327e-05, - "loss": 1.1621, + "learning_rate": 1.875239840608759e-05, + "loss": 1.0579, "step": 4751 }, { - "epoch": 0.13465952563121653, + "epoch": 0.18593004147429376, "grad_norm": 0.0, - "learning_rate": 1.943107773277868e-05, - "loss": 1.0831, + "learning_rate": 1.8751785389836653e-05, + "loss": 1.1774, "step": 4752 }, { - "epoch": 0.134687863073479, + "epoch": 0.1859691681665232, "grad_norm": 0.0, - "learning_rate": 1.9430772539889565e-05, - "loss": 1.0425, + "learning_rate": 1.8751172233043017e-05, + "loss": 1.2139, "step": 4753 }, { - "epoch": 0.13471620051574146, + "epoch": 0.18600829485875264, "grad_norm": 0.0, - "learning_rate": 1.943046726756155e-05, - "loss": 0.9913, + "learning_rate": 1.8750558935716516e-05, + "loss": 1.1559, "step": 4754 }, { - "epoch": 0.13474453795800392, + "epoch": 0.18604742155098208, "grad_norm": 0.0, - "learning_rate": 1.943016191579721e-05, - "loss": 1.0685, + "learning_rate": 1.8749945497867004e-05, + "loss": 1.1322, "step": 4755 }, { - "epoch": 0.13477287540026636, + "epoch": 0.18608654824321152, "grad_norm": 0.0, - "learning_rate": 1.9429856484599107e-05, - "loss": 1.1194, + "learning_rate": 1.8749331919504336e-05, + "loss": 1.2776, "step": 4756 }, { - "epoch": 0.13480121284252883, + "epoch": 0.18612567493544097, "grad_norm": 0.0, - "learning_rate": 1.9429550973969828e-05, - "loss": 1.0394, + "learning_rate": 1.8748718200638356e-05, + "loss": 1.1322, "step": 4757 }, { - "epoch": 0.1348295502847913, + "epoch": 0.1861648016276704, "grad_norm": 0.0, - "learning_rate": 1.9429245383911937e-05, - "loss": 1.0578, + "learning_rate": 1.8748104341278924e-05, + "loss": 1.2327, "step": 4758 }, { - "epoch": 0.13485788772705376, + "epoch": 0.18620392831989985, "grad_norm": 0.0, - "learning_rate": 1.942893971442801e-05, - "loss": 0.9493, + "learning_rate": 1.8747490341435904e-05, + "loss": 1.1874, "step": 4759 }, { - "epoch": 0.13488622516931623, + "epoch": 0.1862430550121293, "grad_norm": 0.0, - "learning_rate": 1.9428633965520625e-05, - "loss": 1.1262, + "learning_rate": 1.8746876201119143e-05, + "loss": 1.0848, "step": 4760 }, { - "epoch": 0.1349145626115787, + "epoch": 0.1862821817043587, "grad_norm": 0.0, - "learning_rate": 1.9428328137192353e-05, - "loss": 1.06, + "learning_rate": 1.8746261920338516e-05, + "loss": 1.1719, "step": 4761 }, { - "epoch": 0.13494290005384113, + "epoch": 0.18632130839658814, "grad_norm": 0.0, - "learning_rate": 1.942802222944577e-05, - "loss": 1.0501, + "learning_rate": 1.8745647499103882e-05, + "loss": 1.259, "step": 4762 }, { - "epoch": 0.1349712374961036, + "epoch": 0.18636043508881758, "grad_norm": 0.0, - "learning_rate": 1.9427716242283462e-05, - "loss": 1.1493, + "learning_rate": 1.8745032937425103e-05, + "loss": 1.0412, "step": 4763 }, { - "epoch": 0.13499957493836606, + "epoch": 0.18639956178104702, "grad_norm": 0.0, - "learning_rate": 1.9427410175707993e-05, - "loss": 1.0988, + "learning_rate": 1.8744418235312057e-05, + "loss": 1.1843, "step": 4764 }, { - "epoch": 0.13502791238062853, + "epoch": 0.18643868847327646, "grad_norm": 0.0, - "learning_rate": 1.942710402972195e-05, - "loss": 0.9501, + "learning_rate": 1.8743803392774612e-05, + "loss": 1.2305, "step": 4765 }, { - "epoch": 0.135056249822891, + "epoch": 0.1864778151655059, "grad_norm": 0.0, - "learning_rate": 1.9426797804327904e-05, - "loss": 1.0347, + "learning_rate": 1.8743188409822642e-05, + "loss": 1.225, "step": 4766 }, { - "epoch": 0.13508458726515346, + "epoch": 0.18651694185773535, "grad_norm": 0.0, - "learning_rate": 1.9426491499528444e-05, - "loss": 1.0133, + "learning_rate": 1.874257328646602e-05, + "loss": 1.1282, "step": 4767 }, { - "epoch": 0.1351129247074159, + "epoch": 0.1865560685499648, "grad_norm": 0.0, - "learning_rate": 1.9426185115326147e-05, - "loss": 0.9282, + "learning_rate": 1.8741958022714625e-05, + "loss": 1.149, "step": 4768 }, { - "epoch": 0.13514126214967837, + "epoch": 0.18659519524219423, "grad_norm": 0.0, - "learning_rate": 1.942587865172359e-05, - "loss": 0.9998, + "learning_rate": 1.874134261857834e-05, + "loss": 1.0873, "step": 4769 }, { - "epoch": 0.13516959959194083, + "epoch": 0.18663432193442367, "grad_norm": 0.0, - "learning_rate": 1.9425572108723356e-05, - "loss": 1.0591, + "learning_rate": 1.8740727074067047e-05, + "loss": 1.1022, "step": 4770 }, { - "epoch": 0.1351979370342033, + "epoch": 0.1866734486266531, "grad_norm": 0.0, - "learning_rate": 1.942526548632803e-05, - "loss": 1.0075, + "learning_rate": 1.874011138919063e-05, + "loss": 1.2581, "step": 4771 }, { - "epoch": 0.13522627447646576, + "epoch": 0.18671257531888255, "grad_norm": 0.0, - "learning_rate": 1.942495878454019e-05, - "loss": 1.0555, + "learning_rate": 1.8739495563958973e-05, + "loss": 1.1657, "step": 4772 }, { - "epoch": 0.13525461191872823, + "epoch": 0.186751702011112, "grad_norm": 0.0, - "learning_rate": 1.942465200336243e-05, - "loss": 1.0147, + "learning_rate": 1.873887959838197e-05, + "loss": 1.2218, "step": 4773 }, { - "epoch": 0.13528294936099067, + "epoch": 0.18679082870334143, "grad_norm": 0.0, - "learning_rate": 1.942434514279732e-05, - "loss": 1.1539, + "learning_rate": 1.873826349246951e-05, + "loss": 1.1769, "step": 4774 }, { - "epoch": 0.13531128680325313, + "epoch": 0.18682995539557085, "grad_norm": 0.0, - "learning_rate": 1.942403820284745e-05, - "loss": 1.1456, + "learning_rate": 1.8737647246231492e-05, + "loss": 1.1701, "step": 4775 }, { - "epoch": 0.1353396242455156, + "epoch": 0.1868690820878003, "grad_norm": 0.0, - "learning_rate": 1.9423731183515407e-05, - "loss": 1.1013, + "learning_rate": 1.8737030859677807e-05, + "loss": 1.086, "step": 4776 }, { - "epoch": 0.13536796168777807, + "epoch": 0.18690820878002973, "grad_norm": 0.0, - "learning_rate": 1.942342408480378e-05, - "loss": 1.1077, + "learning_rate": 1.873641433281835e-05, + "loss": 1.2136, "step": 4777 }, { - "epoch": 0.13539629913004053, + "epoch": 0.18694733547225917, "grad_norm": 0.0, - "learning_rate": 1.942311690671515e-05, - "loss": 1.0925, + "learning_rate": 1.873579766566303e-05, + "loss": 1.277, "step": 4778 }, { - "epoch": 0.135424636572303, + "epoch": 0.1869864621644886, "grad_norm": 0.0, - "learning_rate": 1.942280964925211e-05, - "loss": 0.9397, + "learning_rate": 1.8735180858221746e-05, + "loss": 1.1143, "step": 4779 }, { - "epoch": 0.13545297401456544, + "epoch": 0.18702558885671805, "grad_norm": 0.0, - "learning_rate": 1.9422502312417245e-05, - "loss": 0.9995, + "learning_rate": 1.8734563910504403e-05, + "loss": 1.2751, "step": 4780 }, { - "epoch": 0.1354813114568279, + "epoch": 0.1870647155489475, "grad_norm": 0.0, - "learning_rate": 1.942219489621314e-05, - "loss": 1.0461, + "learning_rate": 1.8733946822520908e-05, + "loss": 1.0901, "step": 4781 }, { - "epoch": 0.13550964889909037, + "epoch": 0.18710384224117693, "grad_norm": 0.0, - "learning_rate": 1.9421887400642392e-05, - "loss": 0.977, + "learning_rate": 1.873332959428117e-05, + "loss": 1.2755, "step": 4782 }, { - "epoch": 0.13553798634135283, + "epoch": 0.18714296893340637, "grad_norm": 0.0, - "learning_rate": 1.9421579825707585e-05, - "loss": 1.0617, + "learning_rate": 1.8732712225795105e-05, + "loss": 1.1727, "step": 4783 }, { - "epoch": 0.1355663237836153, + "epoch": 0.1871820956256358, "grad_norm": 0.0, - "learning_rate": 1.9421272171411316e-05, - "loss": 1.0804, + "learning_rate": 1.873209471707262e-05, + "loss": 1.1812, "step": 4784 }, { - "epoch": 0.13559466122587774, + "epoch": 0.18722122231786525, "grad_norm": 0.0, - "learning_rate": 1.9420964437756172e-05, - "loss": 1.0134, + "learning_rate": 1.873147706812364e-05, + "loss": 1.1461, "step": 4785 }, { - "epoch": 0.1356229986681402, + "epoch": 0.1872603490100947, "grad_norm": 0.0, - "learning_rate": 1.9420656624744744e-05, - "loss": 1.0878, + "learning_rate": 1.873085927895808e-05, + "loss": 1.2124, "step": 4786 }, { - "epoch": 0.13565133611040267, + "epoch": 0.18729947570232414, "grad_norm": 0.0, - "learning_rate": 1.942034873237963e-05, - "loss": 1.0418, + "learning_rate": 1.8730241349585857e-05, + "loss": 1.1506, "step": 4787 }, { - "epoch": 0.13567967355266514, + "epoch": 0.18733860239455358, "grad_norm": 0.0, - "learning_rate": 1.942004076066342e-05, - "loss": 1.0193, + "learning_rate": 1.87296232800169e-05, + "loss": 1.2932, "step": 4788 }, { - "epoch": 0.1357080109949276, + "epoch": 0.187377729086783, "grad_norm": 0.0, - "learning_rate": 1.9419732709598708e-05, - "loss": 1.0612, + "learning_rate": 1.872900507026113e-05, + "loss": 1.2691, "step": 4789 }, { - "epoch": 0.13573634843719007, + "epoch": 0.18741685577901243, "grad_norm": 0.0, - "learning_rate": 1.941942457918809e-05, - "loss": 1.0593, + "learning_rate": 1.8728386720328477e-05, + "loss": 1.164, "step": 4790 }, { - "epoch": 0.1357646858794525, + "epoch": 0.18745598247124187, "grad_norm": 0.0, - "learning_rate": 1.9419116369434157e-05, - "loss": 1.1146, + "learning_rate": 1.872776823022887e-05, + "loss": 1.2066, "step": 4791 }, { - "epoch": 0.13579302332171497, + "epoch": 0.1874951091634713, "grad_norm": 0.0, - "learning_rate": 1.9418808080339513e-05, - "loss": 1.0151, + "learning_rate": 1.8727149599972244e-05, + "loss": 1.0396, "step": 4792 }, { - "epoch": 0.13582136076397744, + "epoch": 0.18753423585570075, "grad_norm": 0.0, - "learning_rate": 1.941849971190675e-05, - "loss": 1.0741, + "learning_rate": 1.872653082956853e-05, + "loss": 1.1707, "step": 4793 }, { - "epoch": 0.1358496982062399, + "epoch": 0.1875733625479302, "grad_norm": 0.0, - "learning_rate": 1.9418191264138468e-05, - "loss": 1.0916, + "learning_rate": 1.8725911919027668e-05, + "loss": 1.259, "step": 4794 }, { - "epoch": 0.13587803564850237, + "epoch": 0.18761248924015964, "grad_norm": 0.0, - "learning_rate": 1.9417882737037262e-05, - "loss": 1.0094, + "learning_rate": 1.872529286835959e-05, + "loss": 1.0987, "step": 4795 }, { - "epoch": 0.13590637309076484, + "epoch": 0.18765161593238908, "grad_norm": 0.0, - "learning_rate": 1.9417574130605732e-05, - "loss": 1.0872, + "learning_rate": 1.8724673677574245e-05, + "loss": 1.1081, "step": 4796 }, { - "epoch": 0.13593471053302728, + "epoch": 0.18769074262461852, "grad_norm": 0.0, - "learning_rate": 1.9417265444846476e-05, - "loss": 0.9607, + "learning_rate": 1.8724054346681573e-05, + "loss": 1.2079, "step": 4797 }, { - "epoch": 0.13596304797528974, + "epoch": 0.18772986931684796, "grad_norm": 0.0, - "learning_rate": 1.94169566797621e-05, - "loss": 0.9902, + "learning_rate": 1.8723434875691518e-05, + "loss": 1.1483, "step": 4798 }, { - "epoch": 0.1359913854175522, + "epoch": 0.1877689960090774, "grad_norm": 0.0, - "learning_rate": 1.94166478353552e-05, - "loss": 1.0333, + "learning_rate": 1.8722815264614035e-05, + "loss": 1.186, "step": 4799 }, { - "epoch": 0.13601972285981467, + "epoch": 0.18780812270130684, "grad_norm": 0.0, - "learning_rate": 1.9416338911628377e-05, - "loss": 0.9603, + "learning_rate": 1.8722195513459065e-05, + "loss": 1.1804, "step": 4800 }, { - "epoch": 0.13604806030207714, + "epoch": 0.18784724939353628, "grad_norm": 0.0, - "learning_rate": 1.941602990858424e-05, - "loss": 1.0323, + "learning_rate": 1.8721575622236565e-05, + "loss": 1.1679, "step": 4801 }, { - "epoch": 0.1360763977443396, + "epoch": 0.18788637608576572, "grad_norm": 0.0, - "learning_rate": 1.9415720826225382e-05, - "loss": 1.0159, + "learning_rate": 1.872095559095649e-05, + "loss": 1.3154, "step": 4802 }, { - "epoch": 0.13610473518660204, + "epoch": 0.18792550277799513, "grad_norm": 0.0, - "learning_rate": 1.941541166455441e-05, - "loss": 1.0603, + "learning_rate": 1.8720335419628796e-05, + "loss": 1.0707, "step": 4803 }, { - "epoch": 0.1361330726288645, + "epoch": 0.18796462947022458, "grad_norm": 0.0, - "learning_rate": 1.941510242357393e-05, - "loss": 1.0528, + "learning_rate": 1.8719715108263442e-05, + "loss": 1.1655, "step": 4804 }, { - "epoch": 0.13616141007112698, + "epoch": 0.18800375616245402, "grad_norm": 0.0, - "learning_rate": 1.9414793103286547e-05, - "loss": 1.2096, + "learning_rate": 1.871909465687039e-05, + "loss": 1.1021, "step": 4805 }, { - "epoch": 0.13618974751338944, + "epoch": 0.18804288285468346, "grad_norm": 0.0, - "learning_rate": 1.9414483703694866e-05, - "loss": 1.1096, + "learning_rate": 1.8718474065459603e-05, + "loss": 1.2336, "step": 4806 }, { - "epoch": 0.1362180849556519, + "epoch": 0.1880820095469129, "grad_norm": 0.0, - "learning_rate": 1.941417422480149e-05, - "loss": 1.0992, + "learning_rate": 1.871785333404105e-05, + "loss": 1.05, "step": 4807 }, { - "epoch": 0.13624642239791437, + "epoch": 0.18812113623914234, "grad_norm": 0.0, - "learning_rate": 1.9413864666609036e-05, - "loss": 1.0316, + "learning_rate": 1.8717232462624695e-05, + "loss": 1.1861, "step": 4808 }, { - "epoch": 0.1362747598401768, + "epoch": 0.18816026293137178, "grad_norm": 0.0, - "learning_rate": 1.94135550291201e-05, - "loss": 0.9832, + "learning_rate": 1.871661145122051e-05, + "loss": 1.1608, "step": 4809 }, { - "epoch": 0.13630309728243928, + "epoch": 0.18819938962360122, "grad_norm": 0.0, - "learning_rate": 1.941324531233729e-05, - "loss": 1.0151, + "learning_rate": 1.8715990299838463e-05, + "loss": 1.1978, "step": 4810 }, { - "epoch": 0.13633143472470174, + "epoch": 0.18823851631583066, "grad_norm": 0.0, - "learning_rate": 1.941293551626322e-05, - "loss": 1.1024, + "learning_rate": 1.871536900848854e-05, + "loss": 1.2668, "step": 4811 }, { - "epoch": 0.1363597721669642, + "epoch": 0.1882776430080601, "grad_norm": 0.0, - "learning_rate": 1.9412625640900503e-05, - "loss": 0.9457, + "learning_rate": 1.871474757718071e-05, + "loss": 1.1837, "step": 4812 }, { - "epoch": 0.13638810960922668, + "epoch": 0.18831676970028954, "grad_norm": 0.0, - "learning_rate": 1.9412315686251743e-05, - "loss": 1.0588, + "learning_rate": 1.8714126005924954e-05, + "loss": 1.1173, "step": 4813 }, { - "epoch": 0.13641644705148914, + "epoch": 0.18835589639251898, "grad_norm": 0.0, - "learning_rate": 1.9412005652319555e-05, - "loss": 1.0482, + "learning_rate": 1.8713504294731254e-05, + "loss": 1.1624, "step": 4814 }, { - "epoch": 0.13644478449375158, + "epoch": 0.18839502308474843, "grad_norm": 0.0, - "learning_rate": 1.9411695539106546e-05, - "loss": 1.1101, + "learning_rate": 1.8712882443609592e-05, + "loss": 1.2413, "step": 4815 }, { - "epoch": 0.13647312193601405, + "epoch": 0.18843414977697787, "grad_norm": 0.0, - "learning_rate": 1.941138534661533e-05, - "loss": 1.0817, + "learning_rate": 1.8712260452569955e-05, + "loss": 1.1162, "step": 4816 }, { - "epoch": 0.1365014593782765, + "epoch": 0.1884732764692073, "grad_norm": 0.0, - "learning_rate": 1.9411075074848523e-05, - "loss": 0.9899, + "learning_rate": 1.8711638321622336e-05, + "loss": 1.2459, "step": 4817 }, { - "epoch": 0.13652979682053898, + "epoch": 0.18851240316143672, "grad_norm": 0.0, - "learning_rate": 1.941076472380873e-05, - "loss": 1.0656, + "learning_rate": 1.871101605077672e-05, + "loss": 1.0581, "step": 4818 }, { - "epoch": 0.13655813426280144, + "epoch": 0.18855152985366616, "grad_norm": 0.0, - "learning_rate": 1.9410454293498577e-05, - "loss": 1.1714, + "learning_rate": 1.87103936400431e-05, + "loss": 1.1141, "step": 4819 }, { - "epoch": 0.1365864717050639, + "epoch": 0.1885906565458956, "grad_norm": 0.0, - "learning_rate": 1.941014378392067e-05, - "loss": 0.9513, + "learning_rate": 1.8709771089431476e-05, + "loss": 1.1984, "step": 4820 }, { - "epoch": 0.13661480914732635, + "epoch": 0.18862978323812504, "grad_norm": 0.0, - "learning_rate": 1.9409833195077633e-05, - "loss": 1.02, + "learning_rate": 1.870914839895184e-05, + "loss": 1.1514, "step": 4821 }, { - "epoch": 0.13664314658958882, + "epoch": 0.18866890993035448, "grad_norm": 0.0, - "learning_rate": 1.940952252697207e-05, - "loss": 1.036, + "learning_rate": 1.8708525568614194e-05, + "loss": 1.3022, "step": 4822 }, { - "epoch": 0.13667148403185128, + "epoch": 0.18870803662258392, "grad_norm": 0.0, - "learning_rate": 1.9409211779606608e-05, - "loss": 1.0063, + "learning_rate": 1.870790259842854e-05, + "loss": 1.006, "step": 4823 }, { - "epoch": 0.13669982147411375, + "epoch": 0.18874716331481337, "grad_norm": 0.0, - "learning_rate": 1.940890095298386e-05, - "loss": 0.9593, + "learning_rate": 1.870727948840488e-05, + "loss": 1.1426, "step": 4824 }, { - "epoch": 0.1367281589163762, + "epoch": 0.1887862900070428, "grad_norm": 0.0, - "learning_rate": 1.9408590047106445e-05, - "loss": 1.0089, + "learning_rate": 1.8706656238553224e-05, + "loss": 1.1791, "step": 4825 }, { - "epoch": 0.13675649635863868, + "epoch": 0.18882541669927225, "grad_norm": 0.0, - "learning_rate": 1.9408279061976985e-05, - "loss": 1.1095, + "learning_rate": 1.8706032848883583e-05, + "loss": 1.1967, "step": 4826 }, { - "epoch": 0.13678483380090112, + "epoch": 0.1888645433915017, "grad_norm": 0.0, - "learning_rate": 1.9407967997598093e-05, - "loss": 1.0142, + "learning_rate": 1.8705409319405957e-05, + "loss": 1.1309, "step": 4827 }, { - "epoch": 0.13681317124316358, + "epoch": 0.18890367008373113, "grad_norm": 0.0, - "learning_rate": 1.9407656853972394e-05, - "loss": 1.0803, + "learning_rate": 1.870478565013037e-05, + "loss": 1.1087, "step": 4828 }, { - "epoch": 0.13684150868542605, + "epoch": 0.18894279677596057, "grad_norm": 0.0, - "learning_rate": 1.940734563110251e-05, - "loss": 1.0058, + "learning_rate": 1.870416184106683e-05, + "loss": 1.1532, "step": 4829 }, { - "epoch": 0.13686984612768852, + "epoch": 0.18898192346819, "grad_norm": 0.0, - "learning_rate": 1.9407034328991058e-05, - "loss": 1.2018, + "learning_rate": 1.870353789222536e-05, + "loss": 1.1774, "step": 4830 }, { - "epoch": 0.13689818356995098, + "epoch": 0.18902105016041945, "grad_norm": 0.0, - "learning_rate": 1.9406722947640663e-05, - "loss": 1.0748, + "learning_rate": 1.870291380361598e-05, + "loss": 1.2236, "step": 4831 }, { - "epoch": 0.13692652101221345, + "epoch": 0.18906017685264886, "grad_norm": 0.0, - "learning_rate": 1.940641148705395e-05, - "loss": 0.9988, + "learning_rate": 1.8702289575248706e-05, + "loss": 1.2892, "step": 4832 }, { - "epoch": 0.1369548584544759, + "epoch": 0.1890993035448783, "grad_norm": 0.0, - "learning_rate": 1.9406099947233537e-05, - "loss": 1.0222, + "learning_rate": 1.8701665207133567e-05, + "loss": 0.9876, "step": 4833 }, { - "epoch": 0.13698319589673835, + "epoch": 0.18913843023710775, "grad_norm": 0.0, - "learning_rate": 1.9405788328182052e-05, - "loss": 0.9873, + "learning_rate": 1.8701040699280588e-05, + "loss": 1.1888, "step": 4834 }, { - "epoch": 0.13701153333900082, + "epoch": 0.1891775569293372, "grad_norm": 0.0, - "learning_rate": 1.9405476629902123e-05, - "loss": 1.027, + "learning_rate": 1.87004160516998e-05, + "loss": 1.273, "step": 4835 }, { - "epoch": 0.13703987078126328, + "epoch": 0.18921668362156663, "grad_norm": 0.0, - "learning_rate": 1.9405164852396367e-05, - "loss": 0.9516, + "learning_rate": 1.869979126440123e-05, + "loss": 1.0736, "step": 4836 }, { - "epoch": 0.13706820822352575, + "epoch": 0.18925581031379607, "grad_norm": 0.0, - "learning_rate": 1.9404852995667416e-05, - "loss": 1.0952, + "learning_rate": 1.8699166337394916e-05, + "loss": 1.0906, "step": 4837 }, { - "epoch": 0.13709654566578822, + "epoch": 0.1892949370060255, "grad_norm": 0.0, - "learning_rate": 1.9404541059717895e-05, - "loss": 0.9631, + "learning_rate": 1.869854127069089e-05, + "loss": 1.1613, "step": 4838 }, { - "epoch": 0.13712488310805065, + "epoch": 0.18933406369825495, "grad_norm": 0.0, - "learning_rate": 1.9404229044550432e-05, - "loss": 1.1146, + "learning_rate": 1.8697916064299192e-05, + "loss": 1.1329, "step": 4839 }, { - "epoch": 0.13715322055031312, + "epoch": 0.1893731903904844, "grad_norm": 0.0, - "learning_rate": 1.940391695016766e-05, - "loss": 1.1016, + "learning_rate": 1.869729071822986e-05, + "loss": 1.1606, "step": 4840 }, { - "epoch": 0.1371815579925756, + "epoch": 0.18941231708271383, "grad_norm": 0.0, - "learning_rate": 1.94036047765722e-05, - "loss": 1.1556, + "learning_rate": 1.869666523249294e-05, + "loss": 1.1789, "step": 4841 }, { - "epoch": 0.13720989543483805, + "epoch": 0.18945144377494327, "grad_norm": 0.0, - "learning_rate": 1.9403292523766685e-05, - "loss": 1.0116, + "learning_rate": 1.869603960709847e-05, + "loss": 1.0372, "step": 4842 }, { - "epoch": 0.13723823287710052, + "epoch": 0.18949057046717271, "grad_norm": 0.0, - "learning_rate": 1.9402980191753747e-05, - "loss": 1.0494, + "learning_rate": 1.8695413842056503e-05, + "loss": 1.205, "step": 4843 }, { - "epoch": 0.13726657031936298, + "epoch": 0.18952969715940216, "grad_norm": 0.0, - "learning_rate": 1.9402667780536012e-05, - "loss": 1.0347, + "learning_rate": 1.8694787937377085e-05, + "loss": 1.2, "step": 4844 }, { - "epoch": 0.13729490776162542, + "epoch": 0.1895688238516316, "grad_norm": 0.0, - "learning_rate": 1.9402355290116116e-05, - "loss": 1.0217, + "learning_rate": 1.869416189307027e-05, + "loss": 1.0679, "step": 4845 }, { - "epoch": 0.1373232452038879, + "epoch": 0.189607950543861, "grad_norm": 0.0, - "learning_rate": 1.940204272049669e-05, - "loss": 1.1601, + "learning_rate": 1.8693535709146106e-05, + "loss": 1.1426, "step": 4846 }, { - "epoch": 0.13735158264615036, + "epoch": 0.18964707723609045, "grad_norm": 0.0, - "learning_rate": 1.940173007168037e-05, - "loss": 0.9704, + "learning_rate": 1.8692909385614653e-05, + "loss": 1.1482, "step": 4847 }, { - "epoch": 0.13737992008841282, + "epoch": 0.1896862039283199, "grad_norm": 0.0, - "learning_rate": 1.940141734366978e-05, - "loss": 0.9824, + "learning_rate": 1.8692282922485966e-05, + "loss": 1.1486, "step": 4848 }, { - "epoch": 0.1374082575306753, + "epoch": 0.18972533062054933, "grad_norm": 0.0, - "learning_rate": 1.9401104536467566e-05, - "loss": 1.0363, + "learning_rate": 1.8691656319770112e-05, + "loss": 1.1294, "step": 4849 }, { - "epoch": 0.13743659497293775, + "epoch": 0.18976445731277877, "grad_norm": 0.0, - "learning_rate": 1.9400791650076355e-05, - "loss": 1.0278, + "learning_rate": 1.8691029577477147e-05, + "loss": 1.1191, "step": 4850 }, { - "epoch": 0.1374649324152002, + "epoch": 0.1898035840050082, "grad_norm": 0.0, - "learning_rate": 1.9400478684498788e-05, - "loss": 1.0362, + "learning_rate": 1.8690402695617136e-05, + "loss": 1.1516, "step": 4851 }, { - "epoch": 0.13749326985746266, + "epoch": 0.18984271069723765, "grad_norm": 0.0, - "learning_rate": 1.9400165639737495e-05, - "loss": 1.0326, + "learning_rate": 1.8689775674200147e-05, + "loss": 1.1668, "step": 4852 }, { - "epoch": 0.13752160729972512, + "epoch": 0.1898818373894671, "grad_norm": 0.0, - "learning_rate": 1.9399852515795115e-05, - "loss": 0.9615, + "learning_rate": 1.868914851323625e-05, + "loss": 1.2462, "step": 4853 }, { - "epoch": 0.1375499447419876, + "epoch": 0.18992096408169654, "grad_norm": 0.0, - "learning_rate": 1.939953931267429e-05, - "loss": 1.1088, + "learning_rate": 1.8688521212735523e-05, + "loss": 1.3203, "step": 4854 }, { - "epoch": 0.13757828218425006, + "epoch": 0.18996009077392598, "grad_norm": 0.0, - "learning_rate": 1.9399226030377654e-05, - "loss": 1.0537, + "learning_rate": 1.8687893772708024e-05, + "loss": 1.2341, "step": 4855 }, { - "epoch": 0.13760661962651252, + "epoch": 0.18999921746615542, "grad_norm": 0.0, - "learning_rate": 1.939891266890785e-05, - "loss": 0.9786, + "learning_rate": 1.868726619316384e-05, + "loss": 1.2203, "step": 4856 }, { - "epoch": 0.13763495706877496, + "epoch": 0.19003834415838486, "grad_norm": 0.0, - "learning_rate": 1.939859922826751e-05, - "loss": 0.8695, + "learning_rate": 1.8686638474113047e-05, + "loss": 1.208, "step": 4857 }, { - "epoch": 0.13766329451103743, + "epoch": 0.1900774708506143, "grad_norm": 0.0, - "learning_rate": 1.9398285708459278e-05, - "loss": 1.0629, + "learning_rate": 1.8686010615565725e-05, + "loss": 1.1065, "step": 4858 }, { - "epoch": 0.1376916319532999, + "epoch": 0.19011659754284374, "grad_norm": 0.0, - "learning_rate": 1.9397972109485798e-05, - "loss": 1.0273, + "learning_rate": 1.868538261753196e-05, + "loss": 1.1613, "step": 4859 }, { - "epoch": 0.13771996939556236, + "epoch": 0.19015572423507315, "grad_norm": 0.0, - "learning_rate": 1.939765843134971e-05, - "loss": 1.0376, + "learning_rate": 1.8684754480021827e-05, + "loss": 1.0647, "step": 4860 }, { - "epoch": 0.13774830683782482, + "epoch": 0.1901948509273026, "grad_norm": 0.0, - "learning_rate": 1.9397344674053653e-05, - "loss": 0.9173, + "learning_rate": 1.8684126203045423e-05, + "loss": 1.1315, "step": 4861 }, { - "epoch": 0.1377766442800873, + "epoch": 0.19023397761953204, "grad_norm": 0.0, - "learning_rate": 1.9397030837600273e-05, - "loss": 1.0525, + "learning_rate": 1.8683497786612834e-05, + "loss": 1.1204, "step": 4862 }, { - "epoch": 0.13780498172234973, + "epoch": 0.19027310431176148, "grad_norm": 0.0, - "learning_rate": 1.9396716921992213e-05, - "loss": 1.1209, + "learning_rate": 1.8682869230734152e-05, + "loss": 1.1109, "step": 4863 }, { - "epoch": 0.1378333191646122, + "epoch": 0.19031223100399092, "grad_norm": 0.0, - "learning_rate": 1.9396402927232115e-05, - "loss": 0.9268, + "learning_rate": 1.868224053541947e-05, + "loss": 1.0919, "step": 4864 }, { - "epoch": 0.13786165660687466, + "epoch": 0.19035135769622036, "grad_norm": 0.0, - "learning_rate": 1.9396088853322627e-05, - "loss": 1.066, + "learning_rate": 1.8681611700678886e-05, + "loss": 1.0939, "step": 4865 }, { - "epoch": 0.13788999404913713, + "epoch": 0.1903904843884498, "grad_norm": 0.0, - "learning_rate": 1.9395774700266394e-05, - "loss": 0.9684, + "learning_rate": 1.8680982726522494e-05, + "loss": 1.1818, "step": 4866 }, { - "epoch": 0.1379183314913996, + "epoch": 0.19042961108067924, "grad_norm": 0.0, - "learning_rate": 1.939546046806606e-05, - "loss": 0.9358, + "learning_rate": 1.86803536129604e-05, + "loss": 1.2216, "step": 4867 }, { - "epoch": 0.13794666893366206, + "epoch": 0.19046873777290868, "grad_norm": 0.0, - "learning_rate": 1.9395146156724276e-05, - "loss": 1.1264, + "learning_rate": 1.8679724360002703e-05, + "loss": 1.1603, "step": 4868 }, { - "epoch": 0.1379750063759245, + "epoch": 0.19050786446513812, "grad_norm": 0.0, - "learning_rate": 1.9394831766243688e-05, - "loss": 1.0161, + "learning_rate": 1.8679094967659508e-05, + "loss": 1.1827, "step": 4869 }, { - "epoch": 0.13800334381818696, + "epoch": 0.19054699115736756, "grad_norm": 0.0, - "learning_rate": 1.939451729662694e-05, - "loss": 1.0804, + "learning_rate": 1.867846543594092e-05, + "loss": 1.1156, "step": 4870 }, { - "epoch": 0.13803168126044943, + "epoch": 0.190586117849597, "grad_norm": 0.0, - "learning_rate": 1.9394202747876686e-05, - "loss": 1.0587, + "learning_rate": 1.8677835764857058e-05, + "loss": 1.2535, "step": 4871 }, { - "epoch": 0.1380600187027119, + "epoch": 0.19062524454182644, "grad_norm": 0.0, - "learning_rate": 1.939388811999557e-05, - "loss": 1.0404, + "learning_rate": 1.8677205954418024e-05, + "loss": 1.0573, "step": 4872 }, { - "epoch": 0.13808835614497436, + "epoch": 0.19066437123405588, "grad_norm": 0.0, - "learning_rate": 1.9393573412986254e-05, - "loss": 1.0667, + "learning_rate": 1.867657600463394e-05, + "loss": 1.1215, "step": 4873 }, { - "epoch": 0.13811669358723683, + "epoch": 0.1907034979262853, "grad_norm": 0.0, - "learning_rate": 1.9393258626851376e-05, - "loss": 1.0203, + "learning_rate": 1.8675945915514916e-05, + "loss": 1.1063, "step": 4874 }, { - "epoch": 0.13814503102949927, + "epoch": 0.19074262461851474, "grad_norm": 0.0, - "learning_rate": 1.939294376159359e-05, - "loss": 0.9898, + "learning_rate": 1.8675315687071068e-05, + "loss": 1.302, "step": 4875 }, { - "epoch": 0.13817336847176173, + "epoch": 0.19078175131074418, "grad_norm": 0.0, - "learning_rate": 1.9392628817215556e-05, - "loss": 1.1429, + "learning_rate": 1.8674685319312524e-05, + "loss": 1.124, "step": 4876 }, { - "epoch": 0.1382017059140242, + "epoch": 0.19082087800297362, "grad_norm": 0.0, - "learning_rate": 1.939231379371992e-05, - "loss": 1.0543, + "learning_rate": 1.8674054812249403e-05, + "loss": 1.1245, "step": 4877 }, { - "epoch": 0.13823004335628666, + "epoch": 0.19086000469520306, "grad_norm": 0.0, - "learning_rate": 1.9391998691109335e-05, - "loss": 1.0308, + "learning_rate": 1.8673424165891832e-05, + "loss": 1.2272, "step": 4878 }, { - "epoch": 0.13825838079854913, + "epoch": 0.1908991313874325, "grad_norm": 0.0, - "learning_rate": 1.9391683509386457e-05, - "loss": 0.9559, + "learning_rate": 1.8672793380249935e-05, + "loss": 1.2534, "step": 4879 }, { - "epoch": 0.1382867182408116, + "epoch": 0.19093825807966194, "grad_norm": 0.0, - "learning_rate": 1.9391368248553946e-05, - "loss": 1.0622, + "learning_rate": 1.8672162455333847e-05, + "loss": 1.0871, "step": 4880 }, { - "epoch": 0.13831505568307403, + "epoch": 0.19097738477189138, "grad_norm": 0.0, - "learning_rate": 1.9391052908614448e-05, - "loss": 1.0405, + "learning_rate": 1.8671531391153697e-05, + "loss": 1.2665, "step": 4881 }, { - "epoch": 0.1383433931253365, + "epoch": 0.19101651146412083, "grad_norm": 0.0, - "learning_rate": 1.939073748957063e-05, - "loss": 1.0959, + "learning_rate": 1.8670900187719616e-05, + "loss": 1.2513, "step": 4882 }, { - "epoch": 0.13837173056759897, + "epoch": 0.19105563815635027, "grad_norm": 0.0, - "learning_rate": 1.9390421991425137e-05, - "loss": 0.9074, + "learning_rate": 1.8670268845041744e-05, + "loss": 1.2701, "step": 4883 }, { - "epoch": 0.13840006800986143, + "epoch": 0.1910947648485797, "grad_norm": 0.0, - "learning_rate": 1.9390106414180635e-05, - "loss": 0.9335, + "learning_rate": 1.866963736313022e-05, + "loss": 1.301, "step": 4884 }, { - "epoch": 0.1384284054521239, + "epoch": 0.19113389154080915, "grad_norm": 0.0, - "learning_rate": 1.9389790757839776e-05, - "loss": 1.1161, + "learning_rate": 1.866900574199518e-05, + "loss": 1.1941, "step": 4885 }, { - "epoch": 0.13845674289438636, + "epoch": 0.1911730182330386, "grad_norm": 0.0, - "learning_rate": 1.9389475022405227e-05, - "loss": 0.9531, + "learning_rate": 1.8668373981646776e-05, + "loss": 1.2292, "step": 4886 }, { - "epoch": 0.1384850803366488, + "epoch": 0.19121214492526803, "grad_norm": 0.0, - "learning_rate": 1.9389159207879644e-05, - "loss": 0.9588, + "learning_rate": 1.8667742082095147e-05, + "loss": 1.2769, "step": 4887 }, { - "epoch": 0.13851341777891127, + "epoch": 0.19125127161749747, "grad_norm": 0.0, - "learning_rate": 1.9388843314265684e-05, - "loss": 0.9849, + "learning_rate": 1.8667110043350435e-05, + "loss": 1.1753, "step": 4888 }, { - "epoch": 0.13854175522117373, + "epoch": 0.19129039830972688, "grad_norm": 0.0, - "learning_rate": 1.9388527341566012e-05, - "loss": 1.0717, + "learning_rate": 1.8666477865422804e-05, + "loss": 1.1248, "step": 4889 }, { - "epoch": 0.1385700926634362, + "epoch": 0.19132952500195632, "grad_norm": 0.0, - "learning_rate": 1.9388211289783285e-05, - "loss": 1.0554, + "learning_rate": 1.8665845548322394e-05, + "loss": 1.1966, "step": 4890 }, { - "epoch": 0.13859843010569867, + "epoch": 0.19136865169418577, "grad_norm": 0.0, - "learning_rate": 1.938789515892017e-05, - "loss": 1.0407, + "learning_rate": 1.8665213092059367e-05, + "loss": 1.1643, "step": 4891 }, { - "epoch": 0.13862676754796113, + "epoch": 0.1914077783864152, "grad_norm": 0.0, - "learning_rate": 1.9387578948979326e-05, - "loss": 0.9103, + "learning_rate": 1.866458049664387e-05, + "loss": 1.0357, "step": 4892 }, { - "epoch": 0.13865510499022357, + "epoch": 0.19144690507864465, "grad_norm": 0.0, - "learning_rate": 1.9387262659963423e-05, - "loss": 1.099, + "learning_rate": 1.8663947762086074e-05, + "loss": 1.1673, "step": 4893 }, { - "epoch": 0.13868344243248604, + "epoch": 0.1914860317708741, "grad_norm": 0.0, - "learning_rate": 1.9386946291875117e-05, - "loss": 0.9285, + "learning_rate": 1.866331488839613e-05, + "loss": 1.296, "step": 4894 }, { - "epoch": 0.1387117798747485, + "epoch": 0.19152515846310353, "grad_norm": 0.0, - "learning_rate": 1.938662984471708e-05, - "loss": 1.1174, + "learning_rate": 1.8662681875584207e-05, + "loss": 1.1765, "step": 4895 }, { - "epoch": 0.13874011731701097, + "epoch": 0.19156428515533297, "grad_norm": 0.0, - "learning_rate": 1.938631331849197e-05, - "loss": 1.0594, + "learning_rate": 1.8662048723660464e-05, + "loss": 1.1964, "step": 4896 }, { - "epoch": 0.13876845475927344, + "epoch": 0.1916034118475624, "grad_norm": 0.0, - "learning_rate": 1.9385996713202456e-05, - "loss": 1.106, + "learning_rate": 1.8661415432635076e-05, + "loss": 1.1683, "step": 4897 }, { - "epoch": 0.1387967922015359, + "epoch": 0.19164253853979185, "grad_norm": 0.0, - "learning_rate": 1.938568002885121e-05, - "loss": 1.0217, + "learning_rate": 1.866078200251821e-05, + "loss": 1.0937, "step": 4898 }, { - "epoch": 0.13882512964379834, + "epoch": 0.1916816652320213, "grad_norm": 0.0, - "learning_rate": 1.9385363265440896e-05, - "loss": 1.079, + "learning_rate": 1.8660148433320032e-05, + "loss": 1.2487, "step": 4899 }, { - "epoch": 0.1388534670860608, + "epoch": 0.19172079192425073, "grad_norm": 0.0, - "learning_rate": 1.938504642297418e-05, - "loss": 1.0664, + "learning_rate": 1.8659514725050728e-05, + "loss": 1.2235, "step": 4900 }, { - "epoch": 0.13888180452832327, + "epoch": 0.19175991861648017, "grad_norm": 0.0, - "learning_rate": 1.9384729501453737e-05, - "loss": 0.9895, + "learning_rate": 1.8658880877720467e-05, + "loss": 1.0493, "step": 4901 }, { - "epoch": 0.13891014197058574, + "epoch": 0.19179904530870961, "grad_norm": 0.0, - "learning_rate": 1.9384412500882227e-05, - "loss": 0.9659, + "learning_rate": 1.8658246891339428e-05, + "loss": 1.1022, "step": 4902 }, { - "epoch": 0.1389384794128482, + "epoch": 0.19183817200093903, "grad_norm": 0.0, - "learning_rate": 1.938409542126233e-05, - "loss": 0.9787, + "learning_rate": 1.8657612765917794e-05, + "loss": 1.1521, "step": 4903 }, { - "epoch": 0.13896681685511067, + "epoch": 0.19187729869316847, "grad_norm": 0.0, - "learning_rate": 1.938377826259671e-05, - "loss": 1.1391, + "learning_rate": 1.865697850146575e-05, + "loss": 1.2854, "step": 4904 }, { - "epoch": 0.1389951542973731, + "epoch": 0.1919164253853979, "grad_norm": 0.0, - "learning_rate": 1.9383461024888046e-05, - "loss": 0.9174, + "learning_rate": 1.8656344097993475e-05, + "loss": 1.2386, "step": 4905 }, { - "epoch": 0.13902349173963557, + "epoch": 0.19195555207762735, "grad_norm": 0.0, - "learning_rate": 1.9383143708138997e-05, - "loss": 1.0374, + "learning_rate": 1.8655709555511162e-05, + "loss": 1.2449, "step": 4906 }, { - "epoch": 0.13905182918189804, + "epoch": 0.1919946787698568, "grad_norm": 0.0, - "learning_rate": 1.938282631235225e-05, - "loss": 1.0577, + "learning_rate": 1.8655074874029e-05, + "loss": 1.0416, "step": 4907 }, { - "epoch": 0.1390801666241605, + "epoch": 0.19203380546208623, "grad_norm": 0.0, - "learning_rate": 1.9382508837530472e-05, - "loss": 0.9452, + "learning_rate": 1.865444005355718e-05, + "loss": 1.0896, "step": 4908 }, { - "epoch": 0.13910850406642297, + "epoch": 0.19207293215431567, "grad_norm": 0.0, - "learning_rate": 1.9382191283676336e-05, - "loss": 1.037, + "learning_rate": 1.86538050941059e-05, + "loss": 1.1508, "step": 4909 }, { - "epoch": 0.13913684150868544, + "epoch": 0.19211205884654511, "grad_norm": 0.0, - "learning_rate": 1.938187365079252e-05, - "loss": 1.0239, + "learning_rate": 1.865316999568535e-05, + "loss": 1.1533, "step": 4910 }, { - "epoch": 0.13916517895094788, + "epoch": 0.19215118553877455, "grad_norm": 0.0, - "learning_rate": 1.93815559388817e-05, - "loss": 1.1036, + "learning_rate": 1.865253475830574e-05, + "loss": 1.1821, "step": 4911 }, { - "epoch": 0.13919351639321034, + "epoch": 0.192190312231004, "grad_norm": 0.0, - "learning_rate": 1.938123814794655e-05, - "loss": 0.9662, + "learning_rate": 1.8651899381977257e-05, + "loss": 1.2946, "step": 4912 }, { - "epoch": 0.1392218538354728, + "epoch": 0.19222943892323344, "grad_norm": 0.0, - "learning_rate": 1.9380920277989746e-05, - "loss": 1.0307, + "learning_rate": 1.8651263866710113e-05, + "loss": 1.2534, "step": 4913 }, { - "epoch": 0.13925019127773527, + "epoch": 0.19226856561546288, "grad_norm": 0.0, - "learning_rate": 1.9380602329013967e-05, - "loss": 1.021, + "learning_rate": 1.8650628212514516e-05, + "loss": 1.1993, "step": 4914 }, { - "epoch": 0.13927852871999774, + "epoch": 0.19230769230769232, "grad_norm": 0.0, - "learning_rate": 1.9380284301021894e-05, - "loss": 1.0137, + "learning_rate": 1.8649992419400663e-05, + "loss": 1.1555, "step": 4915 }, { - "epoch": 0.1393068661622602, + "epoch": 0.19234681899992176, "grad_norm": 0.0, - "learning_rate": 1.93799661940162e-05, - "loss": 1.0266, + "learning_rate": 1.8649356487378774e-05, + "loss": 1.2495, "step": 4916 }, { - "epoch": 0.13933520360452264, + "epoch": 0.19238594569215117, "grad_norm": 0.0, - "learning_rate": 1.9379648007999567e-05, - "loss": 1.015, + "learning_rate": 1.864872041645906e-05, + "loss": 1.1823, "step": 4917 }, { - "epoch": 0.1393635410467851, + "epoch": 0.1924250723843806, "grad_norm": 0.0, - "learning_rate": 1.9379329742974677e-05, - "loss": 0.973, + "learning_rate": 1.864808420665173e-05, + "loss": 1.0782, "step": 4918 }, { - "epoch": 0.13939187848904758, + "epoch": 0.19246419907661005, "grad_norm": 0.0, - "learning_rate": 1.937901139894421e-05, - "loss": 1.0498, + "learning_rate": 1.8647447857967007e-05, + "loss": 1.2419, "step": 4919 }, { - "epoch": 0.13942021593131004, + "epoch": 0.1925033257688395, "grad_norm": 0.0, - "learning_rate": 1.9378692975910846e-05, - "loss": 1.0745, + "learning_rate": 1.8646811370415107e-05, + "loss": 1.1087, "step": 4920 }, { - "epoch": 0.1394485533735725, + "epoch": 0.19254245246106894, "grad_norm": 0.0, - "learning_rate": 1.937837447387727e-05, - "loss": 1.0009, + "learning_rate": 1.8646174744006253e-05, + "loss": 1.1345, "step": 4921 }, { - "epoch": 0.13947689081583498, + "epoch": 0.19258157915329838, "grad_norm": 0.0, - "learning_rate": 1.9378055892846164e-05, - "loss": 1.1076, + "learning_rate": 1.8645537978750664e-05, + "loss": 1.0924, "step": 4922 }, { - "epoch": 0.1395052282580974, + "epoch": 0.19262070584552782, "grad_norm": 0.0, - "learning_rate": 1.937773723282021e-05, - "loss": 1.062, + "learning_rate": 1.864490107465857e-05, + "loss": 1.1898, "step": 4923 }, { - "epoch": 0.13953356570035988, + "epoch": 0.19265983253775726, "grad_norm": 0.0, - "learning_rate": 1.937741849380209e-05, - "loss": 0.9747, + "learning_rate": 1.8644264031740197e-05, + "loss": 1.136, "step": 4924 }, { - "epoch": 0.13956190314262235, + "epoch": 0.1926989592299867, "grad_norm": 0.0, - "learning_rate": 1.9377099675794495e-05, - "loss": 1.0366, + "learning_rate": 1.8643626850005772e-05, + "loss": 1.2061, "step": 4925 }, { - "epoch": 0.1395902405848848, + "epoch": 0.19273808592221614, "grad_norm": 0.0, - "learning_rate": 1.9376780778800106e-05, - "loss": 1.0718, + "learning_rate": 1.8642989529465537e-05, + "loss": 1.1994, "step": 4926 }, { - "epoch": 0.13961857802714728, + "epoch": 0.19277721261444558, "grad_norm": 0.0, - "learning_rate": 1.937646180282161e-05, - "loss": 1.0565, + "learning_rate": 1.8642352070129715e-05, + "loss": 1.2907, "step": 4927 }, { - "epoch": 0.13964691546940974, + "epoch": 0.19281633930667502, "grad_norm": 0.0, - "learning_rate": 1.9376142747861693e-05, - "loss": 1.1297, + "learning_rate": 1.864171447200855e-05, + "loss": 1.1562, "step": 4928 }, { - "epoch": 0.13967525291167218, + "epoch": 0.19285546599890446, "grad_norm": 0.0, - "learning_rate": 1.937582361392305e-05, - "loss": 1.1423, + "learning_rate": 1.864107673511228e-05, + "loss": 1.191, "step": 4929 }, { - "epoch": 0.13970359035393465, + "epoch": 0.1928945926911339, "grad_norm": 0.0, - "learning_rate": 1.9375504401008357e-05, - "loss": 1.0558, + "learning_rate": 1.8640438859451147e-05, + "loss": 1.1668, "step": 4930 }, { - "epoch": 0.13973192779619711, + "epoch": 0.19293371938336332, "grad_norm": 0.0, - "learning_rate": 1.9375185109120313e-05, - "loss": 1.0933, + "learning_rate": 1.863980084503539e-05, + "loss": 1.1561, "step": 4931 }, { - "epoch": 0.13976026523845958, + "epoch": 0.19297284607559276, "grad_norm": 0.0, - "learning_rate": 1.93748657382616e-05, - "loss": 1.009, + "learning_rate": 1.863916269187526e-05, + "loss": 1.2158, "step": 4932 }, { - "epoch": 0.13978860268072205, + "epoch": 0.1930119727678222, "grad_norm": 0.0, - "learning_rate": 1.9374546288434913e-05, - "loss": 0.9305, + "learning_rate": 1.8638524399981003e-05, + "loss": 1.1563, "step": 4933 }, { - "epoch": 0.1398169401229845, + "epoch": 0.19305109946005164, "grad_norm": 0.0, - "learning_rate": 1.937422675964294e-05, - "loss": 0.9058, + "learning_rate": 1.8637885969362867e-05, + "loss": 0.966, "step": 4934 }, { - "epoch": 0.13984527756524695, + "epoch": 0.19309022615228108, "grad_norm": 0.0, - "learning_rate": 1.937390715188838e-05, - "loss": 1.005, + "learning_rate": 1.8637247400031106e-05, + "loss": 1.2153, "step": 4935 }, { - "epoch": 0.13987361500750942, + "epoch": 0.19312935284451052, "grad_norm": 0.0, - "learning_rate": 1.9373587465173915e-05, - "loss": 1.0244, + "learning_rate": 1.8636608691995976e-05, + "loss": 1.2854, "step": 4936 }, { - "epoch": 0.13990195244977188, + "epoch": 0.19316847953673996, "grad_norm": 0.0, - "learning_rate": 1.937326769950224e-05, - "loss": 1.1044, + "learning_rate": 1.8635969845267735e-05, + "loss": 1.132, "step": 4937 }, { - "epoch": 0.13993028989203435, + "epoch": 0.1932076062289694, "grad_norm": 0.0, - "learning_rate": 1.9372947854876053e-05, - "loss": 0.9014, + "learning_rate": 1.8635330859856636e-05, + "loss": 1.1449, "step": 4938 }, { - "epoch": 0.13995862733429681, + "epoch": 0.19324673292119884, "grad_norm": 0.0, - "learning_rate": 1.9372627931298043e-05, - "loss": 0.9499, + "learning_rate": 1.8634691735772942e-05, + "loss": 1.1192, "step": 4939 }, { - "epoch": 0.13998696477655928, + "epoch": 0.19328585961342828, "grad_norm": 0.0, - "learning_rate": 1.9372307928770912e-05, - "loss": 1.1589, + "learning_rate": 1.8634052473026925e-05, + "loss": 1.0576, "step": 4940 }, { - "epoch": 0.14001530221882172, + "epoch": 0.19332498630565773, "grad_norm": 0.0, - "learning_rate": 1.9371987847297348e-05, - "loss": 0.9515, + "learning_rate": 1.8633413071628842e-05, + "loss": 1.1893, "step": 4941 }, { - "epoch": 0.14004363966108418, + "epoch": 0.19336411299788717, "grad_norm": 0.0, - "learning_rate": 1.937166768688005e-05, - "loss": 1.0111, + "learning_rate": 1.8632773531588962e-05, + "loss": 1.2005, "step": 4942 }, { - "epoch": 0.14007197710334665, + "epoch": 0.1934032396901166, "grad_norm": 0.0, - "learning_rate": 1.9371347447521717e-05, - "loss": 1.0436, + "learning_rate": 1.8632133852917557e-05, + "loss": 1.1102, "step": 4943 }, { - "epoch": 0.14010031454560912, + "epoch": 0.19344236638234605, "grad_norm": 0.0, - "learning_rate": 1.9371027129225042e-05, - "loss": 1.0895, + "learning_rate": 1.8631494035624898e-05, + "loss": 1.2141, "step": 4944 }, { - "epoch": 0.14012865198787158, + "epoch": 0.1934814930745755, "grad_norm": 0.0, - "learning_rate": 1.9370706731992724e-05, - "loss": 0.9957, + "learning_rate": 1.8630854079721263e-05, + "loss": 1.1364, "step": 4945 }, { - "epoch": 0.14015698943013405, + "epoch": 0.1935206197668049, "grad_norm": 0.0, - "learning_rate": 1.937038625582747e-05, - "loss": 1.0206, + "learning_rate": 1.8630213985216925e-05, + "loss": 1.2272, "step": 4946 }, { - "epoch": 0.1401853268723965, + "epoch": 0.19355974645903434, "grad_norm": 0.0, - "learning_rate": 1.9370065700731967e-05, - "loss": 1.0182, + "learning_rate": 1.862957375212216e-05, + "loss": 1.1326, "step": 4947 }, { - "epoch": 0.14021366431465895, + "epoch": 0.19359887315126378, "grad_norm": 0.0, - "learning_rate": 1.9369745066708922e-05, - "loss": 1.0513, + "learning_rate": 1.862893338044726e-05, + "loss": 1.0977, "step": 4948 }, { - "epoch": 0.14024200175692142, + "epoch": 0.19363799984349322, "grad_norm": 0.0, - "learning_rate": 1.9369424353761037e-05, - "loss": 1.0627, + "learning_rate": 1.8628292870202504e-05, + "loss": 1.1838, "step": 4949 }, { - "epoch": 0.14027033919918389, + "epoch": 0.19367712653572267, "grad_norm": 0.0, - "learning_rate": 1.936910356189101e-05, - "loss": 1.0938, + "learning_rate": 1.8627652221398173e-05, + "loss": 1.1749, "step": 4950 }, { - "epoch": 0.14029867664144635, + "epoch": 0.1937162532279521, "grad_norm": 0.0, - "learning_rate": 1.9368782691101545e-05, - "loss": 1.0641, + "learning_rate": 1.862701143404456e-05, + "loss": 1.1948, "step": 4951 }, { - "epoch": 0.14032701408370882, + "epoch": 0.19375537992018155, "grad_norm": 0.0, - "learning_rate": 1.9368461741395344e-05, - "loss": 0.9559, + "learning_rate": 1.8626370508151955e-05, + "loss": 1.1544, "step": 4952 }, { - "epoch": 0.14035535152597126, + "epoch": 0.193794506612411, "grad_norm": 0.0, - "learning_rate": 1.9368140712775113e-05, - "loss": 1.0709, + "learning_rate": 1.8625729443730643e-05, + "loss": 1.1938, "step": 4953 }, { - "epoch": 0.14038368896823372, + "epoch": 0.19383363330464043, "grad_norm": 0.0, - "learning_rate": 1.9367819605243553e-05, - "loss": 1.0268, + "learning_rate": 1.862508824079093e-05, + "loss": 1.0456, "step": 4954 }, { - "epoch": 0.1404120264104962, + "epoch": 0.19387275999686987, "grad_norm": 0.0, - "learning_rate": 1.936749841880337e-05, - "loss": 0.9638, + "learning_rate": 1.8624446899343107e-05, + "loss": 1.1425, "step": 4955 }, { - "epoch": 0.14044036385275865, + "epoch": 0.1939118866890993, "grad_norm": 0.0, - "learning_rate": 1.9367177153457268e-05, - "loss": 0.9385, + "learning_rate": 1.8623805419397474e-05, + "loss": 1.1872, "step": 4956 }, { - "epoch": 0.14046870129502112, + "epoch": 0.19395101338132875, "grad_norm": 0.0, - "learning_rate": 1.9366855809207955e-05, - "loss": 1.0925, + "learning_rate": 1.862316380096433e-05, + "loss": 1.1149, "step": 4957 }, { - "epoch": 0.14049703873728359, + "epoch": 0.1939901400735582, "grad_norm": 0.0, - "learning_rate": 1.9366534386058137e-05, - "loss": 1.0483, + "learning_rate": 1.8622522044053984e-05, + "loss": 1.1206, "step": 4958 }, { - "epoch": 0.14052537617954602, + "epoch": 0.19402926676578763, "grad_norm": 0.0, - "learning_rate": 1.9366212884010523e-05, - "loss": 1.149, + "learning_rate": 1.8621880148676736e-05, + "loss": 1.2714, "step": 4959 }, { - "epoch": 0.1405537136218085, + "epoch": 0.19406839345801705, "grad_norm": 0.0, - "learning_rate": 1.9365891303067815e-05, - "loss": 0.986, + "learning_rate": 1.8621238114842897e-05, + "loss": 1.153, "step": 4960 }, { - "epoch": 0.14058205106407096, + "epoch": 0.1941075201502465, "grad_norm": 0.0, - "learning_rate": 1.9365569643232732e-05, - "loss": 1.0217, + "learning_rate": 1.862059594256278e-05, + "loss": 1.194, "step": 4961 }, { - "epoch": 0.14061038850633342, + "epoch": 0.19414664684247593, "grad_norm": 0.0, - "learning_rate": 1.936524790450798e-05, - "loss": 1.0563, + "learning_rate": 1.861995363184669e-05, + "loss": 1.1339, "step": 4962 }, { - "epoch": 0.1406387259485959, + "epoch": 0.19418577353470537, "grad_norm": 0.0, - "learning_rate": 1.936492608689626e-05, - "loss": 0.9452, + "learning_rate": 1.8619311182704946e-05, + "loss": 1.0961, "step": 4963 }, { - "epoch": 0.14066706339085835, + "epoch": 0.1942249002269348, "grad_norm": 0.0, - "learning_rate": 1.9364604190400293e-05, - "loss": 1.0178, + "learning_rate": 1.8618668595147867e-05, + "loss": 1.1907, "step": 4964 }, { - "epoch": 0.1406954008331208, + "epoch": 0.19426402691916425, "grad_norm": 0.0, - "learning_rate": 1.9364282215022788e-05, - "loss": 1.0687, + "learning_rate": 1.861802586918577e-05, + "loss": 1.1104, "step": 4965 }, { - "epoch": 0.14072373827538326, + "epoch": 0.1943031536113937, "grad_norm": 0.0, - "learning_rate": 1.9363960160766457e-05, - "loss": 1.1012, + "learning_rate": 1.8617383004828978e-05, + "loss": 1.2413, "step": 4966 }, { - "epoch": 0.14075207571764572, + "epoch": 0.19434228030362313, "grad_norm": 0.0, - "learning_rate": 1.9363638027634012e-05, - "loss": 1.0314, + "learning_rate": 1.861674000208781e-05, + "loss": 1.2216, "step": 4967 }, { - "epoch": 0.1407804131599082, + "epoch": 0.19438140699585257, "grad_norm": 0.0, - "learning_rate": 1.9363315815628166e-05, - "loss": 0.8916, + "learning_rate": 1.8616096860972598e-05, + "loss": 1.1856, "step": 4968 }, { - "epoch": 0.14080875060217066, + "epoch": 0.19442053368808201, "grad_norm": 0.0, - "learning_rate": 1.9362993524751634e-05, - "loss": 0.9559, + "learning_rate": 1.8615453581493664e-05, + "loss": 1.0993, "step": 4969 }, { - "epoch": 0.14083708804443312, + "epoch": 0.19445966038031146, "grad_norm": 0.0, - "learning_rate": 1.9362671155007132e-05, - "loss": 1.1125, + "learning_rate": 1.8614810163661342e-05, + "loss": 1.0587, "step": 4970 }, { - "epoch": 0.14086542548669556, + "epoch": 0.1944987870725409, "grad_norm": 0.0, - "learning_rate": 1.9362348706397374e-05, - "loss": 1.0007, + "learning_rate": 1.8614166607485967e-05, + "loss": 1.0466, "step": 4971 }, { - "epoch": 0.14089376292895803, + "epoch": 0.19453791376477034, "grad_norm": 0.0, - "learning_rate": 1.9362026178925074e-05, - "loss": 1.0279, + "learning_rate": 1.8613522912977868e-05, + "loss": 1.1266, "step": 4972 }, { - "epoch": 0.1409221003712205, + "epoch": 0.19457704045699978, "grad_norm": 0.0, - "learning_rate": 1.9361703572592954e-05, - "loss": 1.1118, + "learning_rate": 1.8612879080147384e-05, + "loss": 1.1619, "step": 4973 }, { - "epoch": 0.14095043781348296, + "epoch": 0.1946161671492292, "grad_norm": 0.0, - "learning_rate": 1.9361380887403726e-05, - "loss": 1.0617, + "learning_rate": 1.8612235109004853e-05, + "loss": 1.1503, "step": 4974 }, { - "epoch": 0.14097877525574543, + "epoch": 0.19465529384145863, "grad_norm": 0.0, - "learning_rate": 1.936105812336011e-05, - "loss": 1.0382, + "learning_rate": 1.8611590999560622e-05, + "loss": 1.038, "step": 4975 }, { - "epoch": 0.1410071126980079, + "epoch": 0.19469442053368807, "grad_norm": 0.0, - "learning_rate": 1.936073528046483e-05, - "loss": 1.1312, + "learning_rate": 1.861094675182503e-05, + "loss": 1.1593, "step": 4976 }, { - "epoch": 0.14103545014027033, + "epoch": 0.1947335472259175, "grad_norm": 0.0, - "learning_rate": 1.9360412358720596e-05, - "loss": 1.0011, + "learning_rate": 1.8610302365808423e-05, + "loss": 1.2377, "step": 4977 }, { - "epoch": 0.1410637875825328, + "epoch": 0.19477267391814695, "grad_norm": 0.0, - "learning_rate": 1.9360089358130136e-05, - "loss": 1.0137, + "learning_rate": 1.8609657841521146e-05, + "loss": 1.1926, "step": 4978 }, { - "epoch": 0.14109212502479526, + "epoch": 0.1948118006103764, "grad_norm": 0.0, - "learning_rate": 1.9359766278696165e-05, - "loss": 1.0344, + "learning_rate": 1.8609013178973555e-05, + "loss": 1.1805, "step": 4979 }, { - "epoch": 0.14112046246705773, + "epoch": 0.19485092730260584, "grad_norm": 0.0, - "learning_rate": 1.9359443120421408e-05, - "loss": 1.0462, + "learning_rate": 1.8608368378175998e-05, + "loss": 1.0035, "step": 4980 }, { - "epoch": 0.1411487999093202, + "epoch": 0.19489005399483528, "grad_norm": 0.0, - "learning_rate": 1.9359119883308584e-05, - "loss": 0.858, + "learning_rate": 1.8607723439138832e-05, + "loss": 1.1014, "step": 4981 }, { - "epoch": 0.14117713735158266, + "epoch": 0.19492918068706472, "grad_norm": 0.0, - "learning_rate": 1.935879656736042e-05, - "loss": 0.9902, + "learning_rate": 1.8607078361872414e-05, + "loss": 1.1911, "step": 4982 }, { - "epoch": 0.1412054747938451, + "epoch": 0.19496830737929416, "grad_norm": 0.0, - "learning_rate": 1.935847317257964e-05, - "loss": 1.1787, + "learning_rate": 1.8606433146387103e-05, + "loss": 1.1451, "step": 4983 }, { - "epoch": 0.14123381223610756, + "epoch": 0.1950074340715236, "grad_norm": 0.0, - "learning_rate": 1.935814969896896e-05, - "loss": 1.1081, + "learning_rate": 1.8605787792693265e-05, + "loss": 1.0843, "step": 4984 }, { - "epoch": 0.14126214967837003, + "epoch": 0.19504656076375304, "grad_norm": 0.0, - "learning_rate": 1.9357826146531114e-05, - "loss": 0.9622, + "learning_rate": 1.860514230080125e-05, + "loss": 1.268, "step": 4985 }, { - "epoch": 0.1412904871206325, + "epoch": 0.19508568745598248, "grad_norm": 0.0, - "learning_rate": 1.935750251526882e-05, - "loss": 1.0115, + "learning_rate": 1.860449667072144e-05, + "loss": 1.2591, "step": 4986 }, { - "epoch": 0.14131882456289496, + "epoch": 0.19512481414821192, "grad_norm": 0.0, - "learning_rate": 1.9357178805184808e-05, - "loss": 0.984, + "learning_rate": 1.8603850902464193e-05, + "loss": 1.1492, "step": 4987 }, { - "epoch": 0.1413471620051574, + "epoch": 0.19516394084044134, "grad_norm": 0.0, - "learning_rate": 1.9356855016281807e-05, - "loss": 1.0161, + "learning_rate": 1.860320499603988e-05, + "loss": 1.0651, "step": 4988 }, { - "epoch": 0.14137549944741987, + "epoch": 0.19520306753267078, "grad_norm": 0.0, - "learning_rate": 1.935653114856254e-05, - "loss": 1.0779, + "learning_rate": 1.8602558951458878e-05, + "loss": 1.1608, "step": 4989 }, { - "epoch": 0.14140383688968233, + "epoch": 0.19524219422490022, "grad_norm": 0.0, - "learning_rate": 1.9356207202029737e-05, - "loss": 1.129, + "learning_rate": 1.8601912768731556e-05, + "loss": 1.1511, "step": 4990 }, { - "epoch": 0.1414321743319448, + "epoch": 0.19528132091712966, "grad_norm": 0.0, - "learning_rate": 1.9355883176686125e-05, - "loss": 1.0579, + "learning_rate": 1.8601266447868296e-05, + "loss": 1.0543, "step": 4991 }, { - "epoch": 0.14146051177420726, + "epoch": 0.1953204476093591, "grad_norm": 0.0, - "learning_rate": 1.9355559072534436e-05, - "loss": 0.9916, + "learning_rate": 1.860061998887947e-05, + "loss": 1.2479, "step": 4992 }, { - "epoch": 0.14148884921646973, + "epoch": 0.19535957430158854, "grad_norm": 0.0, - "learning_rate": 1.93552348895774e-05, - "loss": 0.9326, + "learning_rate": 1.859997339177547e-05, + "loss": 1.0458, "step": 4993 }, { - "epoch": 0.14151718665873217, + "epoch": 0.19539870099381798, "grad_norm": 0.0, - "learning_rate": 1.9354910627817745e-05, - "loss": 1.0701, + "learning_rate": 1.8599326656566674e-05, + "loss": 1.0219, "step": 4994 }, { - "epoch": 0.14154552410099464, + "epoch": 0.19543782768604742, "grad_norm": 0.0, - "learning_rate": 1.9354586287258205e-05, - "loss": 1.0681, + "learning_rate": 1.8598679783263463e-05, + "loss": 1.0874, "step": 4995 }, { - "epoch": 0.1415738615432571, + "epoch": 0.19547695437827686, "grad_norm": 0.0, - "learning_rate": 1.9354261867901507e-05, - "loss": 1.0433, + "learning_rate": 1.859803277187623e-05, + "loss": 1.1345, "step": 4996 }, { - "epoch": 0.14160219898551957, + "epoch": 0.1955160810705063, "grad_norm": 0.0, - "learning_rate": 1.9353937369750392e-05, - "loss": 1.054, + "learning_rate": 1.8597385622415366e-05, + "loss": 1.118, "step": 4997 }, { - "epoch": 0.14163053642778203, + "epoch": 0.19555520776273574, "grad_norm": 0.0, - "learning_rate": 1.935361279280759e-05, - "loss": 1.0787, + "learning_rate": 1.8596738334891262e-05, + "loss": 1.1445, "step": 4998 }, { - "epoch": 0.1416588738700445, + "epoch": 0.19559433445496519, "grad_norm": 0.0, - "learning_rate": 1.935328813707583e-05, - "loss": 1.152, + "learning_rate": 1.8596090909314313e-05, + "loss": 1.2452, "step": 4999 }, { - "epoch": 0.14168721131230694, + "epoch": 0.19563346114719463, "grad_norm": 0.0, - "learning_rate": 1.935296340255785e-05, - "loss": 1.0898, + "learning_rate": 1.8595443345694918e-05, + "loss": 1.103, "step": 5000 }, { - "epoch": 0.1417155487545694, + "epoch": 0.19567258783942407, "grad_norm": 0.0, - "learning_rate": 1.9352638589256392e-05, - "loss": 1.1025, + "learning_rate": 1.859479564404347e-05, + "loss": 1.1436, "step": 5001 }, { - "epoch": 0.14174388619683187, + "epoch": 0.1957117145316535, "grad_norm": 0.0, - "learning_rate": 1.935231369717418e-05, - "loss": 1.2367, + "learning_rate": 1.8594147804370375e-05, + "loss": 1.0621, "step": 5002 }, { - "epoch": 0.14177222363909434, + "epoch": 0.19575084122388292, "grad_norm": 0.0, - "learning_rate": 1.935198872631396e-05, - "loss": 1.0208, + "learning_rate": 1.8593499826686035e-05, + "loss": 1.2169, "step": 5003 }, { - "epoch": 0.1418005610813568, + "epoch": 0.19578996791611236, "grad_norm": 0.0, - "learning_rate": 1.9351663676678465e-05, - "loss": 1.0433, + "learning_rate": 1.8592851711000857e-05, + "loss": 1.1651, "step": 5004 }, { - "epoch": 0.14182889852361927, + "epoch": 0.1958290946083418, "grad_norm": 0.0, - "learning_rate": 1.9351338548270437e-05, - "loss": 1.028, + "learning_rate": 1.8592203457325248e-05, + "loss": 1.1946, "step": 5005 }, { - "epoch": 0.1418572359658817, + "epoch": 0.19586822130057124, "grad_norm": 0.0, - "learning_rate": 1.935101334109261e-05, - "loss": 1.0619, + "learning_rate": 1.8591555065669614e-05, + "loss": 1.1144, "step": 5006 }, { - "epoch": 0.14188557340814417, + "epoch": 0.19590734799280068, "grad_norm": 0.0, - "learning_rate": 1.9350688055147725e-05, - "loss": 1.0682, + "learning_rate": 1.8590906536044375e-05, + "loss": 1.23, "step": 5007 }, { - "epoch": 0.14191391085040664, + "epoch": 0.19594647468503013, "grad_norm": 0.0, - "learning_rate": 1.9350362690438524e-05, - "loss": 1.1476, + "learning_rate": 1.8590257868459943e-05, + "loss": 1.2425, "step": 5008 }, { - "epoch": 0.1419422482926691, + "epoch": 0.19598560137725957, "grad_norm": 0.0, - "learning_rate": 1.9350037246967744e-05, - "loss": 1.0928, + "learning_rate": 1.8589609062926732e-05, + "loss": 1.105, "step": 5009 }, { - "epoch": 0.14197058573493157, + "epoch": 0.196024728069489, "grad_norm": 0.0, - "learning_rate": 1.9349711724738126e-05, - "loss": 0.9533, + "learning_rate": 1.8588960119455164e-05, + "loss": 1.217, "step": 5010 }, { - "epoch": 0.14199892317719404, + "epoch": 0.19606385476171845, "grad_norm": 0.0, - "learning_rate": 1.9349386123752418e-05, - "loss": 1.0667, + "learning_rate": 1.858831103805566e-05, + "loss": 1.0924, "step": 5011 }, { - "epoch": 0.14202726061945647, + "epoch": 0.1961029814539479, "grad_norm": 0.0, - "learning_rate": 1.9349060444013358e-05, - "loss": 1.0368, + "learning_rate": 1.8587661818738637e-05, + "loss": 1.1385, "step": 5012 }, { - "epoch": 0.14205559806171894, + "epoch": 0.19614210814617733, "grad_norm": 0.0, - "learning_rate": 1.9348734685523693e-05, - "loss": 1.0836, + "learning_rate": 1.858701246151453e-05, + "loss": 1.2245, "step": 5013 }, { - "epoch": 0.1420839355039814, + "epoch": 0.19618123483840677, "grad_norm": 0.0, - "learning_rate": 1.9348408848286157e-05, - "loss": 1.0088, + "learning_rate": 1.8586362966393765e-05, + "loss": 1.1335, "step": 5014 }, { - "epoch": 0.14211227294624387, + "epoch": 0.1962203615306362, "grad_norm": 0.0, - "learning_rate": 1.9348082932303506e-05, - "loss": 1.0046, + "learning_rate": 1.8585713333386767e-05, + "loss": 1.2324, "step": 5015 }, { - "epoch": 0.14214061038850634, + "epoch": 0.19625948822286565, "grad_norm": 0.0, - "learning_rate": 1.934775693757848e-05, - "loss": 1.0002, + "learning_rate": 1.8585063562503973e-05, + "loss": 1.2714, "step": 5016 }, { - "epoch": 0.1421689478307688, + "epoch": 0.19629861491509507, "grad_norm": 0.0, - "learning_rate": 1.9347430864113827e-05, - "loss": 0.9787, + "learning_rate": 1.8584413653755814e-05, + "loss": 1.0894, "step": 5017 }, { - "epoch": 0.14219728527303124, + "epoch": 0.1963377416073245, "grad_norm": 0.0, - "learning_rate": 1.9347104711912294e-05, - "loss": 1.1114, + "learning_rate": 1.858376360715273e-05, + "loss": 1.1689, "step": 5018 }, { - "epoch": 0.1422256227152937, + "epoch": 0.19637686829955395, "grad_norm": 0.0, - "learning_rate": 1.9346778480976626e-05, - "loss": 1.1155, + "learning_rate": 1.8583113422705162e-05, + "loss": 1.2944, "step": 5019 }, { - "epoch": 0.14225396015755618, + "epoch": 0.1964159949917834, "grad_norm": 0.0, - "learning_rate": 1.934645217130957e-05, - "loss": 1.1321, + "learning_rate": 1.858246310042354e-05, + "loss": 1.0884, "step": 5020 }, { - "epoch": 0.14228229759981864, + "epoch": 0.19645512168401283, "grad_norm": 0.0, - "learning_rate": 1.934612578291388e-05, - "loss": 0.9609, + "learning_rate": 1.8581812640318322e-05, + "loss": 1.1034, "step": 5021 }, { - "epoch": 0.1423106350420811, + "epoch": 0.19649424837624227, "grad_norm": 0.0, - "learning_rate": 1.93457993157923e-05, - "loss": 1.0507, + "learning_rate": 1.8581162042399942e-05, + "loss": 1.1151, "step": 5022 }, { - "epoch": 0.14233897248434357, + "epoch": 0.1965333750684717, "grad_norm": 0.0, - "learning_rate": 1.9345472769947582e-05, - "loss": 1.0177, + "learning_rate": 1.8580511306678856e-05, + "loss": 1.1658, "step": 5023 }, { - "epoch": 0.142367309926606, + "epoch": 0.19657250176070115, "grad_norm": 0.0, - "learning_rate": 1.934514614538248e-05, - "loss": 1.139, + "learning_rate": 1.857986043316551e-05, + "loss": 1.152, "step": 5024 }, { - "epoch": 0.14239564736886848, + "epoch": 0.1966116284529306, "grad_norm": 0.0, - "learning_rate": 1.934481944209974e-05, - "loss": 1.1845, + "learning_rate": 1.857920942187035e-05, + "loss": 1.1757, "step": 5025 }, { - "epoch": 0.14242398481113094, + "epoch": 0.19665075514516003, "grad_norm": 0.0, - "learning_rate": 1.9344492660102112e-05, - "loss": 0.9887, + "learning_rate": 1.8578558272803848e-05, + "loss": 1.1246, "step": 5026 }, { - "epoch": 0.1424523222533934, + "epoch": 0.19668988183738947, "grad_norm": 0.0, - "learning_rate": 1.9344165799392353e-05, - "loss": 1.0306, + "learning_rate": 1.857790698597644e-05, + "loss": 1.2351, "step": 5027 }, { - "epoch": 0.14248065969565588, + "epoch": 0.19672900852961892, "grad_norm": 0.0, - "learning_rate": 1.934383885997322e-05, - "loss": 1.0428, + "learning_rate": 1.85772555613986e-05, + "loss": 1.054, "step": 5028 }, { - "epoch": 0.14250899713791834, + "epoch": 0.19676813522184836, "grad_norm": 0.0, - "learning_rate": 1.9343511841847458e-05, - "loss": 1.0525, + "learning_rate": 1.857660399908078e-05, + "loss": 1.1986, "step": 5029 }, { - "epoch": 0.14253733458018078, + "epoch": 0.1968072619140778, "grad_norm": 0.0, - "learning_rate": 1.9343184745017828e-05, - "loss": 1.0434, + "learning_rate": 1.857595229903345e-05, + "loss": 1.2903, "step": 5030 }, { - "epoch": 0.14256567202244325, + "epoch": 0.1968463886063072, "grad_norm": 0.0, - "learning_rate": 1.9342857569487084e-05, - "loss": 1.1819, + "learning_rate": 1.8575300461267073e-05, + "loss": 1.1719, "step": 5031 }, { - "epoch": 0.1425940094647057, + "epoch": 0.19688551529853665, "grad_norm": 0.0, - "learning_rate": 1.934253031525798e-05, - "loss": 0.9885, + "learning_rate": 1.8574648485792113e-05, + "loss": 1.1801, "step": 5032 }, { - "epoch": 0.14262234690696818, + "epoch": 0.1969246419907661, "grad_norm": 0.0, - "learning_rate": 1.9342202982333272e-05, - "loss": 0.9866, + "learning_rate": 1.8573996372619044e-05, + "loss": 1.239, "step": 5033 }, { - "epoch": 0.14265068434923064, + "epoch": 0.19696376868299553, "grad_norm": 0.0, - "learning_rate": 1.9341875570715723e-05, - "loss": 1.0819, + "learning_rate": 1.8573344121758338e-05, + "loss": 1.2752, "step": 5034 }, { - "epoch": 0.1426790217914931, + "epoch": 0.19700289537522497, "grad_norm": 0.0, - "learning_rate": 1.9341548080408085e-05, - "loss": 1.1271, + "learning_rate": 1.8572691733220465e-05, + "loss": 1.157, "step": 5035 }, { - "epoch": 0.14270735923375555, + "epoch": 0.19704202206745441, "grad_norm": 0.0, - "learning_rate": 1.934122051141312e-05, - "loss": 0.9518, + "learning_rate": 1.8572039207015907e-05, + "loss": 1.1685, "step": 5036 }, { - "epoch": 0.14273569667601801, + "epoch": 0.19708114875968386, "grad_norm": 0.0, - "learning_rate": 1.9340892863733585e-05, - "loss": 1.0082, + "learning_rate": 1.8571386543155142e-05, + "loss": 1.1099, "step": 5037 }, { - "epoch": 0.14276403411828048, + "epoch": 0.1971202754519133, "grad_norm": 0.0, - "learning_rate": 1.934056513737224e-05, - "loss": 1.0202, + "learning_rate": 1.8570733741648654e-05, + "loss": 1.1664, "step": 5038 }, { - "epoch": 0.14279237156054295, + "epoch": 0.19715940214414274, "grad_norm": 0.0, - "learning_rate": 1.9340237332331848e-05, - "loss": 1.1248, + "learning_rate": 1.8570080802506914e-05, + "loss": 1.1171, "step": 5039 }, { - "epoch": 0.1428207090028054, + "epoch": 0.19719852883637218, "grad_norm": 0.0, - "learning_rate": 1.933990944861517e-05, - "loss": 0.9853, + "learning_rate": 1.856942772574042e-05, + "loss": 1.1801, "step": 5040 }, { - "epoch": 0.14284904644506788, + "epoch": 0.19723765552860162, "grad_norm": 0.0, - "learning_rate": 1.9339581486224964e-05, - "loss": 0.9731, + "learning_rate": 1.856877451135965e-05, + "loss": 1.1452, "step": 5041 }, { - "epoch": 0.14287738388733032, + "epoch": 0.19727678222083106, "grad_norm": 0.0, - "learning_rate": 1.9339253445163994e-05, - "loss": 0.9987, + "learning_rate": 1.85681211593751e-05, + "loss": 1.204, "step": 5042 }, { - "epoch": 0.14290572132959278, + "epoch": 0.1973159089130605, "grad_norm": 0.0, - "learning_rate": 1.9338925325435026e-05, - "loss": 1.0623, + "learning_rate": 1.8567467669797264e-05, + "loss": 1.1023, "step": 5043 }, { - "epoch": 0.14293405877185525, + "epoch": 0.19735503560528994, "grad_norm": 0.0, - "learning_rate": 1.933859712704082e-05, - "loss": 0.8969, + "learning_rate": 1.856681404263663e-05, + "loss": 1.118, "step": 5044 }, { - "epoch": 0.14296239621411772, + "epoch": 0.19739416229751935, "grad_norm": 0.0, - "learning_rate": 1.9338268849984146e-05, - "loss": 1.0466, + "learning_rate": 1.85661602779037e-05, + "loss": 1.1824, "step": 5045 }, { - "epoch": 0.14299073365638018, + "epoch": 0.1974332889897488, "grad_norm": 0.0, - "learning_rate": 1.9337940494267763e-05, - "loss": 1.0476, + "learning_rate": 1.8565506375608964e-05, + "loss": 1.1868, "step": 5046 }, { - "epoch": 0.14301907109864265, + "epoch": 0.19747241568197824, "grad_norm": 0.0, - "learning_rate": 1.933761205989444e-05, - "loss": 1.1043, + "learning_rate": 1.8564852335762936e-05, + "loss": 1.0314, "step": 5047 }, { - "epoch": 0.14304740854090509, + "epoch": 0.19751154237420768, "grad_norm": 0.0, - "learning_rate": 1.9337283546866943e-05, - "loss": 1.056, + "learning_rate": 1.856419815837611e-05, + "loss": 1.1881, "step": 5048 }, { - "epoch": 0.14307574598316755, + "epoch": 0.19755066906643712, "grad_norm": 0.0, - "learning_rate": 1.9336954955188042e-05, - "loss": 1.015, + "learning_rate": 1.8563543843458994e-05, + "loss": 1.1951, "step": 5049 }, { - "epoch": 0.14310408342543002, + "epoch": 0.19758979575866656, "grad_norm": 0.0, - "learning_rate": 1.93366262848605e-05, - "loss": 0.9691, + "learning_rate": 1.856288939102209e-05, + "loss": 1.1606, "step": 5050 }, { - "epoch": 0.14313242086769248, + "epoch": 0.197628922450896, "grad_norm": 0.0, - "learning_rate": 1.9336297535887088e-05, - "loss": 1.0723, + "learning_rate": 1.8562234801075917e-05, + "loss": 1.1855, "step": 5051 }, { - "epoch": 0.14316075830995495, + "epoch": 0.19766804914312544, "grad_norm": 0.0, - "learning_rate": 1.9335968708270575e-05, - "loss": 0.9494, + "learning_rate": 1.856158007363098e-05, + "loss": 1.1567, "step": 5052 }, { - "epoch": 0.14318909575221742, + "epoch": 0.19770717583535488, "grad_norm": 0.0, - "learning_rate": 1.933563980201373e-05, - "loss": 1.1152, + "learning_rate": 1.8560925208697794e-05, + "loss": 1.2124, "step": 5053 }, { - "epoch": 0.14321743319447985, + "epoch": 0.19774630252758432, "grad_norm": 0.0, - "learning_rate": 1.9335310817119323e-05, - "loss": 0.9953, + "learning_rate": 1.8560270206286875e-05, + "loss": 1.1852, "step": 5054 }, { - "epoch": 0.14324577063674232, + "epoch": 0.19778542921981376, "grad_norm": 0.0, - "learning_rate": 1.933498175359013e-05, - "loss": 1.1834, + "learning_rate": 1.855961506640875e-05, + "loss": 1.1086, "step": 5055 }, { - "epoch": 0.14327410807900479, + "epoch": 0.1978245559120432, "grad_norm": 0.0, - "learning_rate": 1.9334652611428915e-05, - "loss": 1.0, + "learning_rate": 1.8558959789073926e-05, + "loss": 1.1141, "step": 5056 }, { - "epoch": 0.14330244552126725, + "epoch": 0.19786368260427264, "grad_norm": 0.0, - "learning_rate": 1.9334323390638458e-05, - "loss": 1.0226, + "learning_rate": 1.855830437429294e-05, + "loss": 1.1627, "step": 5057 }, { - "epoch": 0.14333078296352972, + "epoch": 0.19790280929650209, "grad_norm": 0.0, - "learning_rate": 1.9333994091221527e-05, - "loss": 1.033, + "learning_rate": 1.8557648822076302e-05, + "loss": 1.244, "step": 5058 }, { - "epoch": 0.14335912040579218, + "epoch": 0.1979419359887315, "grad_norm": 0.0, - "learning_rate": 1.93336647131809e-05, - "loss": 1.0246, + "learning_rate": 1.855699313243455e-05, + "loss": 1.0648, "step": 5059 }, { - "epoch": 0.14338745784805462, + "epoch": 0.19798106268096094, "grad_norm": 0.0, - "learning_rate": 1.9333335256519346e-05, - "loss": 1.0487, + "learning_rate": 1.8556337305378212e-05, + "loss": 1.1667, "step": 5060 }, { - "epoch": 0.1434157952903171, + "epoch": 0.19802018937319038, "grad_norm": 0.0, - "learning_rate": 1.9333005721239647e-05, - "loss": 0.9613, + "learning_rate": 1.855568134091782e-05, + "loss": 1.2961, "step": 5061 }, { - "epoch": 0.14344413273257955, + "epoch": 0.19805931606541982, "grad_norm": 0.0, - "learning_rate": 1.9332676107344573e-05, - "loss": 1.0637, + "learning_rate": 1.8555025239063905e-05, + "loss": 1.2082, "step": 5062 }, { - "epoch": 0.14347247017484202, + "epoch": 0.19809844275764926, "grad_norm": 0.0, - "learning_rate": 1.9332346414836904e-05, - "loss": 1.0806, + "learning_rate": 1.8554368999827e-05, + "loss": 1.2007, "step": 5063 }, { - "epoch": 0.1435008076171045, + "epoch": 0.1981375694498787, "grad_norm": 0.0, - "learning_rate": 1.9332016643719413e-05, - "loss": 1.1983, + "learning_rate": 1.855371262321766e-05, + "loss": 1.257, "step": 5064 }, { - "epoch": 0.14352914505936695, + "epoch": 0.19817669614210814, "grad_norm": 0.0, - "learning_rate": 1.9331686793994883e-05, - "loss": 0.9409, + "learning_rate": 1.85530561092464e-05, + "loss": 1.2158, "step": 5065 }, { - "epoch": 0.1435574825016294, + "epoch": 0.19821582283433758, "grad_norm": 0.0, - "learning_rate": 1.933135686566609e-05, - "loss": 0.9726, + "learning_rate": 1.8552399457923785e-05, + "loss": 1.1749, "step": 5066 }, { - "epoch": 0.14358581994389186, + "epoch": 0.19825494952656703, "grad_norm": 0.0, - "learning_rate": 1.933102685873581e-05, - "loss": 1.1324, + "learning_rate": 1.8551742669260348e-05, + "loss": 1.0994, "step": 5067 }, { - "epoch": 0.14361415738615432, + "epoch": 0.19829407621879647, "grad_norm": 0.0, - "learning_rate": 1.9330696773206826e-05, - "loss": 1.0843, + "learning_rate": 1.8551085743266642e-05, + "loss": 1.0919, "step": 5068 }, { - "epoch": 0.1436424948284168, + "epoch": 0.1983332029110259, "grad_norm": 0.0, - "learning_rate": 1.933036660908192e-05, - "loss": 1.0474, + "learning_rate": 1.8550428679953213e-05, + "loss": 1.1791, "step": 5069 }, { - "epoch": 0.14367083227067926, + "epoch": 0.19837232960325535, "grad_norm": 0.0, - "learning_rate": 1.9330036366363872e-05, - "loss": 1.0878, + "learning_rate": 1.8549771479330612e-05, + "loss": 1.0764, "step": 5070 }, { - "epoch": 0.14369916971294172, + "epoch": 0.1984114562954848, "grad_norm": 0.0, - "learning_rate": 1.9329706045055463e-05, - "loss": 1.0675, + "learning_rate": 1.8549114141409396e-05, + "loss": 1.1371, "step": 5071 }, { - "epoch": 0.14372750715520416, + "epoch": 0.19845058298771423, "grad_norm": 0.0, - "learning_rate": 1.9329375645159473e-05, - "loss": 0.9851, + "learning_rate": 1.8548456666200117e-05, + "loss": 1.1597, "step": 5072 }, { - "epoch": 0.14375584459746663, + "epoch": 0.19848970967994367, "grad_norm": 0.0, - "learning_rate": 1.932904516667869e-05, - "loss": 1.0748, + "learning_rate": 1.854779905371334e-05, + "loss": 1.1127, "step": 5073 }, { - "epoch": 0.1437841820397291, + "epoch": 0.19852883637217308, "grad_norm": 0.0, - "learning_rate": 1.9328714609615896e-05, - "loss": 1.048, + "learning_rate": 1.8547141303959623e-05, + "loss": 1.2726, "step": 5074 }, { - "epoch": 0.14381251948199156, + "epoch": 0.19856796306440253, "grad_norm": 0.0, - "learning_rate": 1.932838397397387e-05, - "loss": 0.9675, + "learning_rate": 1.854648341694952e-05, + "loss": 1.0474, "step": 5075 }, { - "epoch": 0.14384085692425402, + "epoch": 0.19860708975663197, "grad_norm": 0.0, - "learning_rate": 1.9328053259755406e-05, - "loss": 1.0243, + "learning_rate": 1.8545825392693605e-05, + "loss": 1.0779, "step": 5076 }, { - "epoch": 0.1438691943665165, + "epoch": 0.1986462164488614, "grad_norm": 0.0, - "learning_rate": 1.9327722466963285e-05, - "loss": 0.9883, + "learning_rate": 1.8545167231202447e-05, + "loss": 1.2449, "step": 5077 }, { - "epoch": 0.14389753180877893, + "epoch": 0.19868534314109085, "grad_norm": 0.0, - "learning_rate": 1.9327391595600296e-05, - "loss": 0.9968, + "learning_rate": 1.8544508932486605e-05, + "loss": 1.1439, "step": 5078 }, { - "epoch": 0.1439258692510414, + "epoch": 0.1987244698333203, "grad_norm": 0.0, - "learning_rate": 1.932706064566922e-05, - "loss": 1.0788, + "learning_rate": 1.854385049655666e-05, + "loss": 1.1512, "step": 5079 }, { - "epoch": 0.14395420669330386, + "epoch": 0.19876359652554973, "grad_norm": 0.0, - "learning_rate": 1.932672961717285e-05, - "loss": 1.0965, + "learning_rate": 1.854319192342318e-05, + "loss": 1.1863, "step": 5080 }, { - "epoch": 0.14398254413556633, + "epoch": 0.19880272321777917, "grad_norm": 0.0, - "learning_rate": 1.9326398510113974e-05, - "loss": 1.0922, + "learning_rate": 1.8542533213096746e-05, + "loss": 1.2068, "step": 5081 }, { - "epoch": 0.1440108815778288, + "epoch": 0.1988418499100086, "grad_norm": 0.0, - "learning_rate": 1.932606732449538e-05, - "loss": 0.9897, + "learning_rate": 1.854187436558793e-05, + "loss": 1.2245, "step": 5082 }, { - "epoch": 0.14403921902009126, + "epoch": 0.19888097660223805, "grad_norm": 0.0, - "learning_rate": 1.9325736060319854e-05, - "loss": 1.009, + "learning_rate": 1.8541215380907317e-05, + "loss": 1.1605, "step": 5083 }, { - "epoch": 0.1440675564623537, + "epoch": 0.1989201032944675, "grad_norm": 0.0, - "learning_rate": 1.9325404717590196e-05, - "loss": 0.9534, + "learning_rate": 1.8540556259065487e-05, + "loss": 1.0871, "step": 5084 }, { - "epoch": 0.14409589390461616, + "epoch": 0.19895922998669693, "grad_norm": 0.0, - "learning_rate": 1.9325073296309186e-05, - "loss": 1.0941, + "learning_rate": 1.8539897000073025e-05, + "loss": 1.1163, "step": 5085 }, { - "epoch": 0.14412423134687863, + "epoch": 0.19899835667892637, "grad_norm": 0.0, - "learning_rate": 1.9324741796479626e-05, - "loss": 1.0465, + "learning_rate": 1.8539237603940517e-05, + "loss": 1.154, "step": 5086 }, { - "epoch": 0.1441525687891411, + "epoch": 0.19903748337115582, "grad_norm": 0.0, - "learning_rate": 1.9324410218104297e-05, - "loss": 0.9838, + "learning_rate": 1.8538578070678556e-05, + "loss": 1.179, "step": 5087 }, { - "epoch": 0.14418090623140356, + "epoch": 0.19907661006338523, "grad_norm": 0.0, - "learning_rate": 1.9324078561186002e-05, - "loss": 1.0158, + "learning_rate": 1.853791840029773e-05, + "loss": 1.1596, "step": 5088 }, { - "epoch": 0.14420924367366603, + "epoch": 0.19911573675561467, "grad_norm": 0.0, - "learning_rate": 1.932374682572753e-05, - "loss": 1.1004, + "learning_rate": 1.8537258592808634e-05, + "loss": 1.2094, "step": 5089 }, { - "epoch": 0.14423758111592846, + "epoch": 0.1991548634478441, "grad_norm": 0.0, - "learning_rate": 1.9323415011731677e-05, - "loss": 1.1366, + "learning_rate": 1.853659864822186e-05, + "loss": 1.0394, "step": 5090 }, { - "epoch": 0.14426591855819093, + "epoch": 0.19919399014007355, "grad_norm": 0.0, - "learning_rate": 1.9323083119201237e-05, - "loss": 1.0843, + "learning_rate": 1.853593856654801e-05, + "loss": 1.1572, "step": 5091 }, { - "epoch": 0.1442942560004534, + "epoch": 0.199233116832303, "grad_norm": 0.0, - "learning_rate": 1.9322751148139005e-05, - "loss": 1.079, + "learning_rate": 1.8535278347797683e-05, + "loss": 1.0742, "step": 5092 }, { - "epoch": 0.14432259344271586, + "epoch": 0.19927224352453243, "grad_norm": 0.0, - "learning_rate": 1.932241909854778e-05, - "loss": 0.9724, + "learning_rate": 1.853461799198148e-05, + "loss": 1.1736, "step": 5093 }, { - "epoch": 0.14435093088497833, + "epoch": 0.19931137021676187, "grad_norm": 0.0, - "learning_rate": 1.9322086970430355e-05, - "loss": 1.0335, + "learning_rate": 1.853395749911001e-05, + "loss": 1.2209, "step": 5094 }, { - "epoch": 0.1443792683272408, + "epoch": 0.19935049690899131, "grad_norm": 0.0, - "learning_rate": 1.932175476378953e-05, - "loss": 1.0292, + "learning_rate": 1.8533296869193874e-05, + "loss": 1.1335, "step": 5095 }, { - "epoch": 0.14440760576950323, + "epoch": 0.19938962360122076, "grad_norm": 0.0, - "learning_rate": 1.93214224786281e-05, - "loss": 1.0502, + "learning_rate": 1.853263610224368e-05, + "loss": 1.2036, "step": 5096 }, { - "epoch": 0.1444359432117657, + "epoch": 0.1994287502934502, "grad_norm": 0.0, - "learning_rate": 1.9321090114948876e-05, - "loss": 1.0666, + "learning_rate": 1.8531975198270047e-05, + "loss": 1.2068, "step": 5097 }, { - "epoch": 0.14446428065402817, + "epoch": 0.19946787698567964, "grad_norm": 0.0, - "learning_rate": 1.9320757672754643e-05, - "loss": 1.08, + "learning_rate": 1.853131415728358e-05, + "loss": 1.1822, "step": 5098 }, { - "epoch": 0.14449261809629063, + "epoch": 0.19950700367790908, "grad_norm": 0.0, - "learning_rate": 1.9320425152048205e-05, - "loss": 0.9866, + "learning_rate": 1.85306529792949e-05, + "loss": 1.013, "step": 5099 }, { - "epoch": 0.1445209555385531, + "epoch": 0.19954613037013852, "grad_norm": 0.0, - "learning_rate": 1.9320092552832367e-05, - "loss": 1.1024, + "learning_rate": 1.852999166431462e-05, + "loss": 1.1915, "step": 5100 }, { - "epoch": 0.14454929298081556, + "epoch": 0.19958525706236796, "grad_norm": 0.0, - "learning_rate": 1.931975987510993e-05, - "loss": 1.0296, + "learning_rate": 1.8529330212353367e-05, + "loss": 1.201, "step": 5101 }, { - "epoch": 0.144577630423078, + "epoch": 0.19962438375459737, "grad_norm": 0.0, - "learning_rate": 1.9319427118883694e-05, - "loss": 0.9473, + "learning_rate": 1.852866862342176e-05, + "loss": 1.2386, "step": 5102 }, { - "epoch": 0.14460596786534047, + "epoch": 0.19966351044682681, "grad_norm": 0.0, - "learning_rate": 1.931909428415646e-05, - "loss": 1.1321, + "learning_rate": 1.8528006897530415e-05, + "loss": 1.0876, "step": 5103 }, { - "epoch": 0.14463430530760293, + "epoch": 0.19970263713905625, "grad_norm": 0.0, - "learning_rate": 1.9318761370931037e-05, - "loss": 1.0755, + "learning_rate": 1.8527345034689968e-05, + "loss": 1.1583, "step": 5104 }, { - "epoch": 0.1446626427498654, + "epoch": 0.1997417638312857, "grad_norm": 0.0, - "learning_rate": 1.9318428379210224e-05, - "loss": 1.0693, + "learning_rate": 1.8526683034911046e-05, + "loss": 1.2076, "step": 5105 }, { - "epoch": 0.14469098019212787, + "epoch": 0.19978089052351514, "grad_norm": 0.0, - "learning_rate": 1.9318095308996833e-05, - "loss": 0.9922, + "learning_rate": 1.8526020898204278e-05, + "loss": 1.1432, "step": 5106 }, { - "epoch": 0.14471931763439033, + "epoch": 0.19982001721574458, "grad_norm": 0.0, - "learning_rate": 1.9317762160293663e-05, - "loss": 0.9857, + "learning_rate": 1.8525358624580302e-05, + "loss": 1.0949, "step": 5107 }, { - "epoch": 0.14474765507665277, + "epoch": 0.19985914390797402, "grad_norm": 0.0, - "learning_rate": 1.931742893310352e-05, - "loss": 1.1522, + "learning_rate": 1.8524696214049744e-05, + "loss": 0.9551, "step": 5108 }, { - "epoch": 0.14477599251891524, + "epoch": 0.19989827060020346, "grad_norm": 0.0, - "learning_rate": 1.9317095627429215e-05, - "loss": 1.0617, + "learning_rate": 1.852403366662325e-05, + "loss": 1.1982, "step": 5109 }, { - "epoch": 0.1448043299611777, + "epoch": 0.1999373972924329, "grad_norm": 0.0, - "learning_rate": 1.9316762243273555e-05, - "loss": 1.2104, + "learning_rate": 1.8523370982311455e-05, + "loss": 1.1293, "step": 5110 }, { - "epoch": 0.14483266740344017, + "epoch": 0.19997652398466234, "grad_norm": 0.0, - "learning_rate": 1.9316428780639347e-05, - "loss": 0.9292, + "learning_rate": 1.8522708161125003e-05, + "loss": 1.1858, "step": 5111 }, { - "epoch": 0.14486100484570263, + "epoch": 0.20001565067689178, "grad_norm": 0.0, - "learning_rate": 1.9316095239529397e-05, - "loss": 1.0341, + "learning_rate": 1.8522045203074536e-05, + "loss": 1.1689, "step": 5112 }, { - "epoch": 0.1448893422879651, + "epoch": 0.20005477736912122, "grad_norm": 0.0, - "learning_rate": 1.9315761619946517e-05, - "loss": 0.9943, + "learning_rate": 1.85213821081707e-05, + "loss": 1.2134, "step": 5113 }, { - "epoch": 0.14491767973022754, + "epoch": 0.20009390406135066, "grad_norm": 0.0, - "learning_rate": 1.9315427921893518e-05, - "loss": 1.0541, + "learning_rate": 1.8520718876424153e-05, + "loss": 1.186, "step": 5114 }, { - "epoch": 0.14494601717249, + "epoch": 0.2001330307535801, "grad_norm": 0.0, - "learning_rate": 1.9315094145373213e-05, - "loss": 1.0287, + "learning_rate": 1.8520055507845535e-05, + "loss": 1.1639, "step": 5115 }, { - "epoch": 0.14497435461475247, + "epoch": 0.20017215744580952, "grad_norm": 0.0, - "learning_rate": 1.931476029038841e-05, - "loss": 1.1627, + "learning_rate": 1.85193920024455e-05, + "loss": 1.2792, "step": 5116 }, { - "epoch": 0.14500269205701494, + "epoch": 0.20021128413803896, "grad_norm": 0.0, - "learning_rate": 1.931442635694192e-05, - "loss": 1.0297, + "learning_rate": 1.8518728360234702e-05, + "loss": 1.1854, "step": 5117 }, { - "epoch": 0.1450310294992774, + "epoch": 0.2002504108302684, "grad_norm": 0.0, - "learning_rate": 1.931409234503656e-05, - "loss": 1.0254, + "learning_rate": 1.8518064581223803e-05, + "loss": 1.2462, "step": 5118 }, { - "epoch": 0.14505936694153987, + "epoch": 0.20028953752249784, "grad_norm": 0.0, - "learning_rate": 1.9313758254675143e-05, - "loss": 0.9158, + "learning_rate": 1.8517400665423462e-05, + "loss": 1.1605, "step": 5119 }, { - "epoch": 0.1450877043838023, + "epoch": 0.20032866421472728, "grad_norm": 0.0, - "learning_rate": 1.931342408586048e-05, - "loss": 1.0472, + "learning_rate": 1.851673661284434e-05, + "loss": 1.2087, "step": 5120 }, { - "epoch": 0.14511604182606477, + "epoch": 0.20036779090695672, "grad_norm": 0.0, - "learning_rate": 1.9313089838595387e-05, - "loss": 1.0114, + "learning_rate": 1.8516072423497097e-05, + "loss": 1.184, "step": 5121 }, { - "epoch": 0.14514437926832724, + "epoch": 0.20040691759918616, "grad_norm": 0.0, - "learning_rate": 1.931275551288268e-05, - "loss": 1.0093, + "learning_rate": 1.8515408097392408e-05, + "loss": 1.2777, "step": 5122 }, { - "epoch": 0.1451727167105897, + "epoch": 0.2004460442914156, "grad_norm": 0.0, - "learning_rate": 1.9312421108725175e-05, - "loss": 1.1423, + "learning_rate": 1.8514743634540928e-05, + "loss": 1.0637, "step": 5123 }, { - "epoch": 0.14520105415285217, + "epoch": 0.20048517098364504, "grad_norm": 0.0, - "learning_rate": 1.931208662612569e-05, - "loss": 1.0486, + "learning_rate": 1.851407903495334e-05, + "loss": 1.2133, "step": 5124 }, { - "epoch": 0.14522939159511464, + "epoch": 0.20052429767587449, "grad_norm": 0.0, - "learning_rate": 1.931175206508704e-05, - "loss": 1.0709, + "learning_rate": 1.851341429864031e-05, + "loss": 1.2323, "step": 5125 }, { - "epoch": 0.14525772903737708, + "epoch": 0.20056342436810393, "grad_norm": 0.0, - "learning_rate": 1.9311417425612046e-05, - "loss": 0.9812, + "learning_rate": 1.851274942561251e-05, + "loss": 1.1607, "step": 5126 }, { - "epoch": 0.14528606647963954, + "epoch": 0.20060255106033337, "grad_norm": 0.0, - "learning_rate": 1.9311082707703525e-05, - "loss": 0.9792, + "learning_rate": 1.851208441588063e-05, + "loss": 1.2037, "step": 5127 }, { - "epoch": 0.145314403921902, + "epoch": 0.2006416777525628, "grad_norm": 0.0, - "learning_rate": 1.9310747911364296e-05, - "loss": 1.0642, + "learning_rate": 1.8511419269455335e-05, + "loss": 1.1359, "step": 5128 }, { - "epoch": 0.14534274136416447, + "epoch": 0.20068080444479225, "grad_norm": 0.0, - "learning_rate": 1.931041303659718e-05, - "loss": 1.0377, + "learning_rate": 1.8510753986347313e-05, + "loss": 1.1929, "step": 5129 }, { - "epoch": 0.14537107880642694, + "epoch": 0.2007199311370217, "grad_norm": 0.0, - "learning_rate": 1.9310078083404996e-05, - "loss": 0.9911, + "learning_rate": 1.8510088566567245e-05, + "loss": 1.2126, "step": 5130 }, { - "epoch": 0.1453994162486894, + "epoch": 0.2007590578292511, "grad_norm": 0.0, - "learning_rate": 1.930974305179057e-05, - "loss": 1.0757, + "learning_rate": 1.850942301012582e-05, + "loss": 1.1202, "step": 5131 }, { - "epoch": 0.14542775369095184, + "epoch": 0.20079818452148054, "grad_norm": 0.0, - "learning_rate": 1.930940794175672e-05, - "loss": 1.0221, + "learning_rate": 1.8508757317033723e-05, + "loss": 1.1265, "step": 5132 }, { - "epoch": 0.1454560911332143, + "epoch": 0.20083731121370998, "grad_norm": 0.0, - "learning_rate": 1.930907275330627e-05, - "loss": 0.9821, + "learning_rate": 1.8508091487301648e-05, + "loss": 1.2551, "step": 5133 }, { - "epoch": 0.14548442857547678, + "epoch": 0.20087643790593943, "grad_norm": 0.0, - "learning_rate": 1.9308737486442045e-05, - "loss": 1.0501, + "learning_rate": 1.8507425520940282e-05, + "loss": 1.0681, "step": 5134 }, { - "epoch": 0.14551276601773924, + "epoch": 0.20091556459816887, "grad_norm": 0.0, - "learning_rate": 1.9308402141166865e-05, - "loss": 1.0628, + "learning_rate": 1.8506759417960322e-05, + "loss": 1.1451, "step": 5135 }, { - "epoch": 0.1455411034600017, + "epoch": 0.2009546912903983, "grad_norm": 0.0, - "learning_rate": 1.9308066717483557e-05, - "loss": 1.0022, + "learning_rate": 1.8506093178372467e-05, + "loss": 1.0831, "step": 5136 }, { - "epoch": 0.14556944090226417, + "epoch": 0.20099381798262775, "grad_norm": 0.0, - "learning_rate": 1.930773121539495e-05, - "loss": 1.0623, + "learning_rate": 1.8505426802187417e-05, + "loss": 1.1379, "step": 5137 }, { - "epoch": 0.1455977783445266, + "epoch": 0.2010329446748572, "grad_norm": 0.0, - "learning_rate": 1.9307395634903863e-05, - "loss": 0.9938, + "learning_rate": 1.8504760289415867e-05, + "loss": 1.2077, "step": 5138 }, { - "epoch": 0.14562611578678908, + "epoch": 0.20107207136708663, "grad_norm": 0.0, - "learning_rate": 1.930705997601313e-05, - "loss": 1.0502, + "learning_rate": 1.8504093640068523e-05, + "loss": 1.1947, "step": 5139 }, { - "epoch": 0.14565445322905154, + "epoch": 0.20111119805931607, "grad_norm": 0.0, - "learning_rate": 1.930672423872557e-05, - "loss": 1.0925, + "learning_rate": 1.8503426854156095e-05, + "loss": 1.1537, "step": 5140 }, { - "epoch": 0.145682790671314, + "epoch": 0.2011503247515455, "grad_norm": 0.0, - "learning_rate": 1.9306388423044018e-05, - "loss": 1.0063, + "learning_rate": 1.8502759931689285e-05, + "loss": 1.2084, "step": 5141 }, { - "epoch": 0.14571112811357648, + "epoch": 0.20118945144377495, "grad_norm": 0.0, - "learning_rate": 1.93060525289713e-05, - "loss": 1.1428, + "learning_rate": 1.8502092872678806e-05, + "loss": 1.1907, "step": 5142 }, { - "epoch": 0.14573946555583894, + "epoch": 0.2012285781360044, "grad_norm": 0.0, - "learning_rate": 1.930571655651025e-05, - "loss": 1.0933, + "learning_rate": 1.8501425677135367e-05, + "loss": 1.285, "step": 5143 }, { - "epoch": 0.14576780299810138, + "epoch": 0.20126770482823383, "grad_norm": 0.0, - "learning_rate": 1.9305380505663686e-05, - "loss": 1.0083, + "learning_rate": 1.8500758345069685e-05, + "loss": 1.22, "step": 5144 }, { - "epoch": 0.14579614044036385, + "epoch": 0.20130683152046325, "grad_norm": 0.0, - "learning_rate": 1.930504437643445e-05, - "loss": 0.9161, + "learning_rate": 1.850009087649248e-05, + "loss": 1.2628, "step": 5145 }, { - "epoch": 0.1458244778826263, + "epoch": 0.2013459582126927, "grad_norm": 0.0, - "learning_rate": 1.9304708168825373e-05, - "loss": 1.0296, + "learning_rate": 1.8499423271414463e-05, + "loss": 1.116, "step": 5146 }, { - "epoch": 0.14585281532488878, + "epoch": 0.20138508490492213, "grad_norm": 0.0, - "learning_rate": 1.930437188283928e-05, - "loss": 0.8614, + "learning_rate": 1.849875552984636e-05, + "loss": 1.1306, "step": 5147 }, { - "epoch": 0.14588115276715125, + "epoch": 0.20142421159715157, "grad_norm": 0.0, - "learning_rate": 1.930403551847901e-05, - "loss": 1.03, + "learning_rate": 1.8498087651798893e-05, + "loss": 1.1987, "step": 5148 }, { - "epoch": 0.1459094902094137, + "epoch": 0.201463338289381, "grad_norm": 0.0, - "learning_rate": 1.9303699075747392e-05, - "loss": 1.0811, + "learning_rate": 1.8497419637282784e-05, + "loss": 1.2489, "step": 5149 }, { - "epoch": 0.14593782765167615, + "epoch": 0.20150246498161045, "grad_norm": 0.0, - "learning_rate": 1.9303362554647262e-05, - "loss": 1.0387, + "learning_rate": 1.8496751486308766e-05, + "loss": 1.1843, "step": 5150 }, { - "epoch": 0.14596616509393862, + "epoch": 0.2015415916738399, "grad_norm": 0.0, - "learning_rate": 1.9303025955181456e-05, - "loss": 1.0952, + "learning_rate": 1.8496083198887565e-05, + "loss": 1.078, "step": 5151 }, { - "epoch": 0.14599450253620108, + "epoch": 0.20158071836606933, "grad_norm": 0.0, - "learning_rate": 1.9302689277352808e-05, - "loss": 1.0975, + "learning_rate": 1.8495414775029916e-05, + "loss": 1.1263, "step": 5152 }, { - "epoch": 0.14602283997846355, + "epoch": 0.20161984505829877, "grad_norm": 0.0, - "learning_rate": 1.930235252116415e-05, - "loss": 1.0776, + "learning_rate": 1.849474621474655e-05, + "loss": 1.2113, "step": 5153 }, { - "epoch": 0.146051177420726, + "epoch": 0.20165897175052822, "grad_norm": 0.0, - "learning_rate": 1.9302015686618328e-05, - "loss": 1.1394, + "learning_rate": 1.8494077518048207e-05, + "loss": 1.1501, "step": 5154 }, { - "epoch": 0.14607951486298848, + "epoch": 0.20169809844275766, "grad_norm": 0.0, - "learning_rate": 1.930167877371817e-05, - "loss": 1.1206, + "learning_rate": 1.8493408684945617e-05, + "loss": 1.1494, "step": 5155 }, { - "epoch": 0.14610785230525092, + "epoch": 0.2017372251349871, "grad_norm": 0.0, - "learning_rate": 1.930134178246652e-05, - "loss": 1.1226, + "learning_rate": 1.8492739715449526e-05, + "loss": 1.1282, "step": 5156 }, { - "epoch": 0.14613618974751338, + "epoch": 0.20177635182721654, "grad_norm": 0.0, - "learning_rate": 1.930100471286621e-05, - "loss": 1.0291, + "learning_rate": 1.8492070609570682e-05, + "loss": 1.2301, "step": 5157 }, { - "epoch": 0.14616452718977585, + "epoch": 0.20181547851944598, "grad_norm": 0.0, - "learning_rate": 1.9300667564920086e-05, - "loss": 1.098, + "learning_rate": 1.8491401367319825e-05, + "loss": 1.0168, "step": 5158 }, { - "epoch": 0.14619286463203832, + "epoch": 0.2018546052116754, "grad_norm": 0.0, - "learning_rate": 1.9300330338630986e-05, - "loss": 0.962, + "learning_rate": 1.84907319887077e-05, + "loss": 1.1561, "step": 5159 }, { - "epoch": 0.14622120207430078, + "epoch": 0.20189373190390483, "grad_norm": 0.0, - "learning_rate": 1.9299993034001748e-05, - "loss": 1.0629, + "learning_rate": 1.849006247374506e-05, + "loss": 1.1655, "step": 5160 }, { - "epoch": 0.14624953951656325, + "epoch": 0.20193285859613427, "grad_norm": 0.0, - "learning_rate": 1.9299655651035216e-05, - "loss": 1.0534, + "learning_rate": 1.8489392822442657e-05, + "loss": 1.1084, "step": 5161 }, { - "epoch": 0.1462778769588257, + "epoch": 0.20197198528836371, "grad_norm": 0.0, - "learning_rate": 1.9299318189734234e-05, - "loss": 1.123, + "learning_rate": 1.848872303481124e-05, + "loss": 1.1297, "step": 5162 }, { - "epoch": 0.14630621440108815, + "epoch": 0.20201111198059316, "grad_norm": 0.0, - "learning_rate": 1.9298980650101637e-05, - "loss": 0.9302, + "learning_rate": 1.8488053110861568e-05, + "loss": 1.1383, "step": 5163 }, { - "epoch": 0.14633455184335062, + "epoch": 0.2020502386728226, "grad_norm": 0.0, - "learning_rate": 1.9298643032140275e-05, - "loss": 1.1119, + "learning_rate": 1.8487383050604403e-05, + "loss": 1.2009, "step": 5164 }, { - "epoch": 0.14636288928561308, + "epoch": 0.20208936536505204, "grad_norm": 0.0, - "learning_rate": 1.929830533585299e-05, - "loss": 1.0373, + "learning_rate": 1.8486712854050497e-05, + "loss": 1.2283, "step": 5165 }, { - "epoch": 0.14639122672787555, + "epoch": 0.20212849205728148, "grad_norm": 0.0, - "learning_rate": 1.929796756124263e-05, - "loss": 0.9384, + "learning_rate": 1.8486042521210618e-05, + "loss": 1.2525, "step": 5166 }, { - "epoch": 0.14641956417013802, + "epoch": 0.20216761874951092, "grad_norm": 0.0, - "learning_rate": 1.9297629708312033e-05, - "loss": 1.1293, + "learning_rate": 1.848537205209553e-05, + "loss": 1.1129, "step": 5167 }, { - "epoch": 0.14644790161240046, + "epoch": 0.20220674544174036, "grad_norm": 0.0, - "learning_rate": 1.9297291777064046e-05, - "loss": 0.8747, + "learning_rate": 1.8484701446716e-05, + "loss": 1.1375, "step": 5168 }, { - "epoch": 0.14647623905466292, + "epoch": 0.2022458721339698, "grad_norm": 0.0, - "learning_rate": 1.929695376750152e-05, - "loss": 1.0037, + "learning_rate": 1.84840307050828e-05, + "loss": 1.2878, "step": 5169 }, { - "epoch": 0.1465045764969254, + "epoch": 0.20228499882619924, "grad_norm": 0.0, - "learning_rate": 1.9296615679627302e-05, - "loss": 1.0549, + "learning_rate": 1.8483359827206692e-05, + "loss": 1.1272, "step": 5170 }, { - "epoch": 0.14653291393918785, + "epoch": 0.20232412551842868, "grad_norm": 0.0, - "learning_rate": 1.9296277513444234e-05, - "loss": 1.0757, + "learning_rate": 1.848268881309846e-05, + "loss": 1.1736, "step": 5171 }, { - "epoch": 0.14656125138145032, + "epoch": 0.20236325221065812, "grad_norm": 0.0, - "learning_rate": 1.929593926895517e-05, - "loss": 1.075, + "learning_rate": 1.848201766276887e-05, + "loss": 1.068, "step": 5172 }, { - "epoch": 0.14658958882371279, + "epoch": 0.20240237890288754, "grad_norm": 0.0, - "learning_rate": 1.9295600946162957e-05, - "loss": 1.0769, + "learning_rate": 1.848134637622871e-05, + "loss": 1.1786, "step": 5173 }, { - "epoch": 0.14661792626597522, + "epoch": 0.20244150559511698, "grad_norm": 0.0, - "learning_rate": 1.929526254507045e-05, - "loss": 1.1351, + "learning_rate": 1.8480674953488752e-05, + "loss": 1.2307, "step": 5174 }, { - "epoch": 0.1466462637082377, + "epoch": 0.20248063228734642, "grad_norm": 0.0, - "learning_rate": 1.929492406568049e-05, - "loss": 1.0921, + "learning_rate": 1.8480003394559786e-05, + "loss": 1.1847, "step": 5175 }, { - "epoch": 0.14667460115050016, + "epoch": 0.20251975897957586, "grad_norm": 0.0, - "learning_rate": 1.9294585507995937e-05, - "loss": 0.9626, + "learning_rate": 1.8479331699452583e-05, + "loss": 1.2589, "step": 5176 }, { - "epoch": 0.14670293859276262, + "epoch": 0.2025588856718053, "grad_norm": 0.0, - "learning_rate": 1.929424687201963e-05, - "loss": 1.0929, + "learning_rate": 1.8478659868177943e-05, + "loss": 1.1497, "step": 5177 }, { - "epoch": 0.1467312760350251, + "epoch": 0.20259801236403474, "grad_norm": 0.0, - "learning_rate": 1.929390815775444e-05, - "loss": 0.9055, + "learning_rate": 1.8477987900746653e-05, + "loss": 1.1389, "step": 5178 }, { - "epoch": 0.14675961347728755, + "epoch": 0.20263713905626418, "grad_norm": 0.0, - "learning_rate": 1.9293569365203205e-05, - "loss": 0.9868, + "learning_rate": 1.8477315797169498e-05, + "loss": 1.1169, "step": 5179 }, { - "epoch": 0.14678795091955, + "epoch": 0.20267626574849362, "grad_norm": 0.0, - "learning_rate": 1.929323049436879e-05, - "loss": 0.9831, + "learning_rate": 1.847664355745727e-05, + "loss": 1.1722, "step": 5180 }, { - "epoch": 0.14681628836181246, + "epoch": 0.20271539244072306, "grad_norm": 0.0, - "learning_rate": 1.9292891545254036e-05, - "loss": 1.0956, + "learning_rate": 1.8475971181620772e-05, + "loss": 1.2057, "step": 5181 }, { - "epoch": 0.14684462580407492, + "epoch": 0.2027545191329525, "grad_norm": 0.0, - "learning_rate": 1.929255251786181e-05, - "loss": 0.9792, + "learning_rate": 1.8475298669670798e-05, + "loss": 1.1297, "step": 5182 }, { - "epoch": 0.1468729632463374, + "epoch": 0.20279364582518195, "grad_norm": 0.0, - "learning_rate": 1.9292213412194964e-05, - "loss": 1.0648, + "learning_rate": 1.847462602161815e-05, + "loss": 1.3037, "step": 5183 }, { - "epoch": 0.14690130068859986, + "epoch": 0.20283277251741139, "grad_norm": 0.0, - "learning_rate": 1.9291874228256355e-05, - "loss": 0.9282, + "learning_rate": 1.8473953237473623e-05, + "loss": 1.1487, "step": 5184 }, { - "epoch": 0.1469296381308623, + "epoch": 0.20287189920964083, "grad_norm": 0.0, - "learning_rate": 1.929153496604883e-05, - "loss": 0.9371, + "learning_rate": 1.8473280317248025e-05, + "loss": 1.0319, "step": 5185 }, { - "epoch": 0.14695797557312476, + "epoch": 0.20291102590187027, "grad_norm": 0.0, - "learning_rate": 1.9291195625575264e-05, - "loss": 1.0822, + "learning_rate": 1.8472607260952168e-05, + "loss": 1.1646, "step": 5186 }, { - "epoch": 0.14698631301538723, + "epoch": 0.2029501525940997, "grad_norm": 0.0, - "learning_rate": 1.9290856206838505e-05, - "loss": 1.0179, + "learning_rate": 1.847193406859685e-05, + "loss": 1.1278, "step": 5187 }, { - "epoch": 0.1470146504576497, + "epoch": 0.20298927928632912, "grad_norm": 0.0, - "learning_rate": 1.9290516709841414e-05, - "loss": 1.002, + "learning_rate": 1.8471260740192888e-05, + "loss": 1.0136, "step": 5188 }, { - "epoch": 0.14704298789991216, + "epoch": 0.20302840597855856, "grad_norm": 0.0, - "learning_rate": 1.929017713458685e-05, - "loss": 1.0169, + "learning_rate": 1.8470587275751095e-05, + "loss": 1.2085, "step": 5189 }, { - "epoch": 0.14707132534217462, + "epoch": 0.203067532670788, "grad_norm": 0.0, - "learning_rate": 1.928983748107768e-05, - "loss": 1.1104, + "learning_rate": 1.8469913675282285e-05, + "loss": 1.15, "step": 5190 }, { - "epoch": 0.14709966278443706, + "epoch": 0.20310665936301744, "grad_norm": 0.0, - "learning_rate": 1.9289497749316754e-05, - "loss": 1.0301, + "learning_rate": 1.846923993879727e-05, + "loss": 1.2373, "step": 5191 }, { - "epoch": 0.14712800022669953, + "epoch": 0.20314578605524689, "grad_norm": 0.0, - "learning_rate": 1.928915793930694e-05, - "loss": 0.9225, + "learning_rate": 1.846856606630688e-05, + "loss": 1.2498, "step": 5192 }, { - "epoch": 0.147156337668962, + "epoch": 0.20318491274747633, "grad_norm": 0.0, - "learning_rate": 1.9288818051051096e-05, - "loss": 1.0587, + "learning_rate": 1.846789205782193e-05, + "loss": 1.1473, "step": 5193 }, { - "epoch": 0.14718467511122446, + "epoch": 0.20322403943970577, "grad_norm": 0.0, - "learning_rate": 1.9288478084552092e-05, - "loss": 0.9847, + "learning_rate": 1.846721791335324e-05, + "loss": 1.2819, "step": 5194 }, { - "epoch": 0.14721301255348693, + "epoch": 0.2032631661319352, "grad_norm": 0.0, - "learning_rate": 1.9288138039812784e-05, - "loss": 0.9653, + "learning_rate": 1.8466543632911645e-05, + "loss": 1.1609, "step": 5195 }, { - "epoch": 0.1472413499957494, + "epoch": 0.20330229282416465, "grad_norm": 0.0, - "learning_rate": 1.9287797916836045e-05, - "loss": 0.9429, + "learning_rate": 1.8465869216507965e-05, + "loss": 1.0301, "step": 5196 }, { - "epoch": 0.14726968743801183, + "epoch": 0.2033414195163941, "grad_norm": 0.0, - "learning_rate": 1.9287457715624734e-05, - "loss": 0.9357, + "learning_rate": 1.8465194664153036e-05, + "loss": 1.2109, "step": 5197 }, { - "epoch": 0.1472980248802743, + "epoch": 0.20338054620862353, "grad_norm": 0.0, - "learning_rate": 1.9287117436181714e-05, - "loss": 0.9865, + "learning_rate": 1.8464519975857688e-05, + "loss": 1.122, "step": 5198 }, { - "epoch": 0.14732636232253676, + "epoch": 0.20341967290085297, "grad_norm": 0.0, - "learning_rate": 1.928677707850986e-05, - "loss": 1.0485, + "learning_rate": 1.8463845151632756e-05, + "loss": 1.0578, "step": 5199 }, { - "epoch": 0.14735469976479923, + "epoch": 0.2034587995930824, "grad_norm": 0.0, - "learning_rate": 1.9286436642612032e-05, - "loss": 1.0276, + "learning_rate": 1.8463170191489075e-05, + "loss": 1.1986, "step": 5200 }, { - "epoch": 0.1473830372070617, + "epoch": 0.20349792628531185, "grad_norm": 0.0, - "learning_rate": 1.92860961284911e-05, - "loss": 1.02, + "learning_rate": 1.8462495095437484e-05, + "loss": 1.1725, "step": 5201 }, { - "epoch": 0.14741137464932416, + "epoch": 0.20353705297754127, "grad_norm": 0.0, - "learning_rate": 1.928575553614993e-05, - "loss": 1.1007, + "learning_rate": 1.846181986348883e-05, + "loss": 1.0909, "step": 5202 }, { - "epoch": 0.1474397120915866, + "epoch": 0.2035761796697707, "grad_norm": 0.0, - "learning_rate": 1.9285414865591397e-05, - "loss": 1.0864, + "learning_rate": 1.8461144495653952e-05, + "loss": 1.1731, "step": 5203 }, { - "epoch": 0.14746804953384907, + "epoch": 0.20361530636200015, "grad_norm": 0.0, - "learning_rate": 1.9285074116818364e-05, - "loss": 1.0054, + "learning_rate": 1.8460468991943693e-05, + "loss": 1.1324, "step": 5204 }, { - "epoch": 0.14749638697611153, + "epoch": 0.2036544330542296, "grad_norm": 0.0, - "learning_rate": 1.9284733289833704e-05, - "loss": 1.0312, + "learning_rate": 1.8459793352368908e-05, + "loss": 1.1091, "step": 5205 }, { - "epoch": 0.147524724418374, + "epoch": 0.20369355974645903, "grad_norm": 0.0, - "learning_rate": 1.9284392384640286e-05, - "loss": 1.0549, + "learning_rate": 1.8459117576940436e-05, + "loss": 1.0029, "step": 5206 }, { - "epoch": 0.14755306186063646, + "epoch": 0.20373268643868847, "grad_norm": 0.0, - "learning_rate": 1.9284051401240988e-05, - "loss": 1.108, + "learning_rate": 1.845844166566914e-05, + "loss": 1.2093, "step": 5207 }, { - "epoch": 0.14758139930289893, + "epoch": 0.2037718131309179, "grad_norm": 0.0, - "learning_rate": 1.9283710339638674e-05, - "loss": 1.1874, + "learning_rate": 1.845776561856587e-05, + "loss": 1.0176, "step": 5208 }, { - "epoch": 0.14760973674516137, + "epoch": 0.20381093982314735, "grad_norm": 0.0, - "learning_rate": 1.9283369199836222e-05, - "loss": 1.1778, + "learning_rate": 1.845708943564148e-05, + "loss": 1.127, "step": 5209 }, { - "epoch": 0.14763807418742383, + "epoch": 0.2038500665153768, "grad_norm": 0.0, - "learning_rate": 1.92830279818365e-05, - "loss": 1.0814, + "learning_rate": 1.8456413116906834e-05, + "loss": 1.2526, "step": 5210 }, { - "epoch": 0.1476664116296863, + "epoch": 0.20388919320760623, "grad_norm": 0.0, - "learning_rate": 1.9282686685642392e-05, - "loss": 0.9918, + "learning_rate": 1.8455736662372787e-05, + "loss": 1.1232, "step": 5211 }, { - "epoch": 0.14769474907194877, + "epoch": 0.20392831989983567, "grad_norm": 0.0, - "learning_rate": 1.9282345311256764e-05, - "loss": 1.1157, + "learning_rate": 1.8455060072050208e-05, + "loss": 1.2657, "step": 5212 }, { - "epoch": 0.14772308651421123, + "epoch": 0.20396744659206512, "grad_norm": 0.0, - "learning_rate": 1.9282003858682494e-05, - "loss": 1.0583, + "learning_rate": 1.8454383345949954e-05, + "loss": 1.2794, "step": 5213 }, { - "epoch": 0.1477514239564737, + "epoch": 0.20400657328429456, "grad_norm": 0.0, - "learning_rate": 1.9281662327922458e-05, - "loss": 1.0309, + "learning_rate": 1.8453706484082903e-05, + "loss": 1.2028, "step": 5214 }, { - "epoch": 0.14777976139873614, + "epoch": 0.204045699976524, "grad_norm": 0.0, - "learning_rate": 1.9281320718979536e-05, - "loss": 1.0507, + "learning_rate": 1.8453029486459916e-05, + "loss": 1.2454, "step": 5215 }, { - "epoch": 0.1478080988409986, + "epoch": 0.2040848266687534, "grad_norm": 0.0, - "learning_rate": 1.9280979031856604e-05, - "loss": 1.0001, + "learning_rate": 1.8452352353091866e-05, + "loss": 1.1829, "step": 5216 }, { - "epoch": 0.14783643628326107, + "epoch": 0.20412395336098285, "grad_norm": 0.0, - "learning_rate": 1.9280637266556533e-05, - "loss": 1.1559, + "learning_rate": 1.8451675083989632e-05, + "loss": 1.1609, "step": 5217 }, { - "epoch": 0.14786477372552354, + "epoch": 0.2041630800532123, "grad_norm": 0.0, - "learning_rate": 1.9280295423082215e-05, - "loss": 1.0253, + "learning_rate": 1.8450997679164087e-05, + "loss": 1.0489, "step": 5218 }, { - "epoch": 0.147893111167786, + "epoch": 0.20420220674544173, "grad_norm": 0.0, - "learning_rate": 1.9279953501436518e-05, - "loss": 0.9654, + "learning_rate": 1.8450320138626107e-05, + "loss": 1.0876, "step": 5219 }, { - "epoch": 0.14792144861004847, + "epoch": 0.20424133343767117, "grad_norm": 0.0, - "learning_rate": 1.9279611501622328e-05, - "loss": 1.1602, + "learning_rate": 1.8449642462386574e-05, + "loss": 1.0706, "step": 5220 }, { - "epoch": 0.1479497860523109, + "epoch": 0.20428046012990062, "grad_norm": 0.0, - "learning_rate": 1.9279269423642528e-05, - "loss": 1.0548, + "learning_rate": 1.844896465045637e-05, + "loss": 1.1439, "step": 5221 }, { - "epoch": 0.14797812349457337, + "epoch": 0.20431958682213006, "grad_norm": 0.0, - "learning_rate": 1.927892726749999e-05, - "loss": 1.11, + "learning_rate": 1.8448286702846384e-05, + "loss": 1.2552, "step": 5222 }, { - "epoch": 0.14800646093683584, + "epoch": 0.2043587135143595, "grad_norm": 0.0, - "learning_rate": 1.9278585033197604e-05, - "loss": 0.9725, + "learning_rate": 1.84476086195675e-05, + "loss": 1.1, "step": 5223 }, { - "epoch": 0.1480347983790983, + "epoch": 0.20439784020658894, "grad_norm": 0.0, - "learning_rate": 1.9278242720738254e-05, - "loss": 1.0844, + "learning_rate": 1.8446930400630606e-05, + "loss": 1.2148, "step": 5224 }, { - "epoch": 0.14806313582136077, + "epoch": 0.20443696689881838, "grad_norm": 0.0, - "learning_rate": 1.9277900330124817e-05, - "loss": 1.0513, + "learning_rate": 1.8446252046046593e-05, + "loss": 1.0359, "step": 5225 }, { - "epoch": 0.14809147326362324, + "epoch": 0.20447609359104782, "grad_norm": 0.0, - "learning_rate": 1.927755786136018e-05, - "loss": 0.9662, + "learning_rate": 1.8445573555826355e-05, + "loss": 1.19, "step": 5226 }, { - "epoch": 0.14811981070588567, + "epoch": 0.20451522028327726, "grad_norm": 0.0, - "learning_rate": 1.927721531444723e-05, - "loss": 1.0916, + "learning_rate": 1.8444894929980794e-05, + "loss": 1.0209, "step": 5227 }, { - "epoch": 0.14814814814814814, + "epoch": 0.2045543469755067, "grad_norm": 0.0, - "learning_rate": 1.927687268938885e-05, - "loss": 0.9832, + "learning_rate": 1.8444216168520795e-05, + "loss": 1.1136, "step": 5228 }, { - "epoch": 0.1481764855904106, + "epoch": 0.20459347366773614, "grad_norm": 0.0, - "learning_rate": 1.9276529986187925e-05, - "loss": 0.9731, + "learning_rate": 1.844353727145727e-05, + "loss": 1.1149, "step": 5229 }, { - "epoch": 0.14820482303267307, + "epoch": 0.20463260035996556, "grad_norm": 0.0, - "learning_rate": 1.9276187204847344e-05, - "loss": 1.0659, + "learning_rate": 1.8442858238801117e-05, + "loss": 1.0188, "step": 5230 }, { - "epoch": 0.14823316047493554, + "epoch": 0.204671727052195, "grad_norm": 0.0, - "learning_rate": 1.9275844345369993e-05, - "loss": 1.0667, + "learning_rate": 1.844217907056324e-05, + "loss": 1.1853, "step": 5231 }, { - "epoch": 0.148261497917198, + "epoch": 0.20471085374442444, "grad_norm": 0.0, - "learning_rate": 1.9275501407758762e-05, - "loss": 1.0327, + "learning_rate": 1.8441499766754543e-05, + "loss": 1.0648, "step": 5232 }, { - "epoch": 0.14828983535946044, + "epoch": 0.20474998043665388, "grad_norm": 0.0, - "learning_rate": 1.9275158392016534e-05, - "loss": 1.1037, + "learning_rate": 1.844082032738594e-05, + "loss": 1.0836, "step": 5233 }, { - "epoch": 0.1483181728017229, + "epoch": 0.20478910712888332, "grad_norm": 0.0, - "learning_rate": 1.9274815298146207e-05, - "loss": 1.0331, + "learning_rate": 1.844014075246834e-05, + "loss": 1.1452, "step": 5234 }, { - "epoch": 0.14834651024398537, + "epoch": 0.20482823382111276, "grad_norm": 0.0, - "learning_rate": 1.9274472126150667e-05, - "loss": 0.9513, + "learning_rate": 1.8439461042012653e-05, + "loss": 1.2335, "step": 5235 }, { - "epoch": 0.14837484768624784, + "epoch": 0.2048673605133422, "grad_norm": 0.0, - "learning_rate": 1.9274128876032803e-05, - "loss": 0.9612, + "learning_rate": 1.84387811960298e-05, + "loss": 1.0446, "step": 5236 }, { - "epoch": 0.1484031851285103, + "epoch": 0.20490648720557164, "grad_norm": 0.0, - "learning_rate": 1.9273785547795506e-05, - "loss": 0.9229, + "learning_rate": 1.8438101214530696e-05, + "loss": 1.2932, "step": 5237 }, { - "epoch": 0.14843152257077277, + "epoch": 0.20494561389780108, "grad_norm": 0.0, - "learning_rate": 1.927344214144167e-05, - "loss": 1.0988, + "learning_rate": 1.8437421097526257e-05, + "loss": 1.1023, "step": 5238 }, { - "epoch": 0.1484598600130352, + "epoch": 0.20498474059003052, "grad_norm": 0.0, - "learning_rate": 1.9273098656974188e-05, - "loss": 0.9547, + "learning_rate": 1.843674084502741e-05, + "loss": 1.2858, "step": 5239 }, { - "epoch": 0.14848819745529768, + "epoch": 0.20502386728225996, "grad_norm": 0.0, - "learning_rate": 1.9272755094395953e-05, - "loss": 1.0651, + "learning_rate": 1.8436060457045072e-05, + "loss": 1.1657, "step": 5240 }, { - "epoch": 0.14851653489756014, + "epoch": 0.2050629939744894, "grad_norm": 0.0, - "learning_rate": 1.927241145370986e-05, - "loss": 1.0239, + "learning_rate": 1.843537993359018e-05, + "loss": 1.2913, "step": 5241 }, { - "epoch": 0.1485448723398226, + "epoch": 0.20510212066671885, "grad_norm": 0.0, - "learning_rate": 1.92720677349188e-05, - "loss": 1.0344, + "learning_rate": 1.8434699274673654e-05, + "loss": 1.0997, "step": 5242 }, { - "epoch": 0.14857320978208508, + "epoch": 0.2051412473589483, "grad_norm": 0.0, - "learning_rate": 1.9271723938025672e-05, - "loss": 1.0499, + "learning_rate": 1.8434018480306427e-05, + "loss": 1.2256, "step": 5243 }, { - "epoch": 0.14860154722434754, + "epoch": 0.2051803740511777, "grad_norm": 0.0, - "learning_rate": 1.9271380063033368e-05, - "loss": 1.1839, + "learning_rate": 1.843333755049943e-05, + "loss": 1.1511, "step": 5244 }, { - "epoch": 0.14862988466660998, + "epoch": 0.20521950074340714, "grad_norm": 0.0, - "learning_rate": 1.9271036109944786e-05, - "loss": 1.1143, + "learning_rate": 1.84326564852636e-05, + "loss": 1.1292, "step": 5245 }, { - "epoch": 0.14865822210887245, + "epoch": 0.20525862743563658, "grad_norm": 0.0, - "learning_rate": 1.9270692078762825e-05, - "loss": 1.0966, + "learning_rate": 1.8431975284609874e-05, + "loss": 1.1442, "step": 5246 }, { - "epoch": 0.1486865595511349, + "epoch": 0.20529775412786602, "grad_norm": 0.0, - "learning_rate": 1.9270347969490384e-05, - "loss": 0.9098, + "learning_rate": 1.8431293948549194e-05, + "loss": 1.1154, "step": 5247 }, { - "epoch": 0.14871489699339738, + "epoch": 0.20533688082009546, "grad_norm": 0.0, - "learning_rate": 1.927000378213036e-05, - "loss": 0.9424, + "learning_rate": 1.8430612477092496e-05, + "loss": 1.1607, "step": 5248 }, { - "epoch": 0.14874323443565984, + "epoch": 0.2053760075123249, "grad_norm": 0.0, - "learning_rate": 1.926965951668565e-05, - "loss": 0.9794, + "learning_rate": 1.8429930870250727e-05, + "loss": 0.9913, "step": 5249 }, { - "epoch": 0.1487715718779223, + "epoch": 0.20541513420455434, "grad_norm": 0.0, - "learning_rate": 1.926931517315916e-05, - "loss": 0.993, + "learning_rate": 1.8429249128034827e-05, + "loss": 1.1864, "step": 5250 }, { - "epoch": 0.14879990932018475, + "epoch": 0.20545426089678379, "grad_norm": 0.0, - "learning_rate": 1.926897075155378e-05, - "loss": 1.1175, + "learning_rate": 1.842856725045575e-05, + "loss": 1.2151, "step": 5251 }, { - "epoch": 0.1488282467624472, + "epoch": 0.20549338758901323, "grad_norm": 0.0, - "learning_rate": 1.926862625187242e-05, - "loss": 0.9291, + "learning_rate": 1.8427885237524446e-05, + "loss": 0.9781, "step": 5252 }, { - "epoch": 0.14885658420470968, + "epoch": 0.20553251428124267, "grad_norm": 0.0, - "learning_rate": 1.926828167411798e-05, - "loss": 1.1128, + "learning_rate": 1.842720308925187e-05, + "loss": 1.2333, "step": 5253 }, { - "epoch": 0.14888492164697215, + "epoch": 0.2055716409734721, "grad_norm": 0.0, - "learning_rate": 1.9267937018293357e-05, - "loss": 1.0891, + "learning_rate": 1.8426520805648966e-05, + "loss": 1.285, "step": 5254 }, { - "epoch": 0.1489132590892346, + "epoch": 0.20561076766570155, "grad_norm": 0.0, - "learning_rate": 1.9267592284401463e-05, - "loss": 1.02, + "learning_rate": 1.84258383867267e-05, + "loss": 1.2858, "step": 5255 }, { - "epoch": 0.14894159653149708, + "epoch": 0.205649894357931, "grad_norm": 0.0, - "learning_rate": 1.92672474724452e-05, - "loss": 1.109, + "learning_rate": 1.8425155832496027e-05, + "loss": 1.2271, "step": 5256 }, { - "epoch": 0.14896993397375952, + "epoch": 0.20568902105016043, "grad_norm": 0.0, - "learning_rate": 1.9266902582427467e-05, - "loss": 0.9575, + "learning_rate": 1.842447314296791e-05, + "loss": 1.3264, "step": 5257 }, { - "epoch": 0.14899827141602198, + "epoch": 0.20572814774238987, "grad_norm": 0.0, - "learning_rate": 1.9266557614351172e-05, - "loss": 1.0404, + "learning_rate": 1.8423790318153307e-05, + "loss": 1.0729, "step": 5258 }, { - "epoch": 0.14902660885828445, + "epoch": 0.20576727443461929, "grad_norm": 0.0, - "learning_rate": 1.9266212568219223e-05, - "loss": 0.883, + "learning_rate": 1.8423107358063187e-05, + "loss": 1.1599, "step": 5259 }, { - "epoch": 0.14905494630054691, + "epoch": 0.20580640112684873, "grad_norm": 0.0, - "learning_rate": 1.9265867444034523e-05, - "loss": 1.0392, + "learning_rate": 1.8422424262708523e-05, + "loss": 1.1494, "step": 5260 }, { - "epoch": 0.14908328374280938, + "epoch": 0.20584552781907817, "grad_norm": 0.0, - "learning_rate": 1.9265522241799982e-05, - "loss": 0.9426, + "learning_rate": 1.8421741032100274e-05, + "loss": 1.0458, "step": 5261 }, { - "epoch": 0.14911162118507185, + "epoch": 0.2058846545113076, "grad_norm": 0.0, - "learning_rate": 1.9265176961518507e-05, - "loss": 0.9774, + "learning_rate": 1.842105766624942e-05, + "loss": 1.0864, "step": 5262 }, { - "epoch": 0.14913995862733428, + "epoch": 0.20592378120353705, "grad_norm": 0.0, - "learning_rate": 1.9264831603193003e-05, - "loss": 0.9554, + "learning_rate": 1.8420374165166928e-05, + "loss": 1.2937, "step": 5263 }, { - "epoch": 0.14916829606959675, + "epoch": 0.2059629078957665, "grad_norm": 0.0, - "learning_rate": 1.9264486166826385e-05, - "loss": 1.0155, + "learning_rate": 1.8419690528863783e-05, + "loss": 1.1837, "step": 5264 }, { - "epoch": 0.14919663351185922, + "epoch": 0.20600203458799593, "grad_norm": 0.0, - "learning_rate": 1.9264140652421558e-05, - "loss": 1.0124, + "learning_rate": 1.8419006757350956e-05, + "loss": 1.2469, "step": 5265 }, { - "epoch": 0.14922497095412168, + "epoch": 0.20604116128022537, "grad_norm": 0.0, - "learning_rate": 1.9263795059981435e-05, - "loss": 0.9594, + "learning_rate": 1.841832285063943e-05, + "loss": 1.191, "step": 5266 }, { - "epoch": 0.14925330839638415, + "epoch": 0.2060802879724548, "grad_norm": 0.0, - "learning_rate": 1.9263449389508927e-05, - "loss": 1.1641, + "learning_rate": 1.841763880874019e-05, + "loss": 1.2017, "step": 5267 }, { - "epoch": 0.14928164583864661, + "epoch": 0.20611941466468425, "grad_norm": 0.0, - "learning_rate": 1.9263103641006945e-05, - "loss": 1.0697, + "learning_rate": 1.8416954631664216e-05, + "loss": 1.2155, "step": 5268 }, { - "epoch": 0.14930998328090905, + "epoch": 0.2061585413569137, "grad_norm": 0.0, - "learning_rate": 1.92627578144784e-05, - "loss": 1.1202, + "learning_rate": 1.84162703194225e-05, + "loss": 1.2429, "step": 5269 }, { - "epoch": 0.14933832072317152, + "epoch": 0.20619766804914313, "grad_norm": 0.0, - "learning_rate": 1.9262411909926208e-05, - "loss": 1.0054, + "learning_rate": 1.8415585872026026e-05, + "loss": 1.0596, "step": 5270 }, { - "epoch": 0.14936665816543399, + "epoch": 0.20623679474137258, "grad_norm": 0.0, - "learning_rate": 1.9262065927353277e-05, - "loss": 1.0481, + "learning_rate": 1.8414901289485793e-05, + "loss": 1.2954, "step": 5271 }, { - "epoch": 0.14939499560769645, + "epoch": 0.20627592143360202, "grad_norm": 0.0, - "learning_rate": 1.9261719866762527e-05, - "loss": 0.988, + "learning_rate": 1.8414216571812787e-05, + "loss": 1.0694, "step": 5272 }, { - "epoch": 0.14942333304995892, + "epoch": 0.20631504812583143, "grad_norm": 0.0, - "learning_rate": 1.9261373728156872e-05, - "loss": 1.0928, + "learning_rate": 1.8413531719018006e-05, + "loss": 1.1062, "step": 5273 }, { - "epoch": 0.14945167049222138, + "epoch": 0.20635417481806087, "grad_norm": 0.0, - "learning_rate": 1.9261027511539227e-05, - "loss": 1.1065, + "learning_rate": 1.8412846731112447e-05, + "loss": 1.2136, "step": 5274 }, { - "epoch": 0.14948000793448382, + "epoch": 0.2063933015102903, "grad_norm": 0.0, - "learning_rate": 1.926068121691251e-05, - "loss": 0.9579, + "learning_rate": 1.8412161608107114e-05, + "loss": 1.1917, "step": 5275 }, { - "epoch": 0.1495083453767463, + "epoch": 0.20643242820251975, "grad_norm": 0.0, - "learning_rate": 1.9260334844279635e-05, - "loss": 1.0217, + "learning_rate": 1.8411476350013003e-05, + "loss": 1.2411, "step": 5276 }, { - "epoch": 0.14953668281900875, + "epoch": 0.2064715548947492, "grad_norm": 0.0, - "learning_rate": 1.9259988393643518e-05, - "loss": 1.0446, + "learning_rate": 1.8410790956841126e-05, + "loss": 1.2133, "step": 5277 }, { - "epoch": 0.14956502026127122, + "epoch": 0.20651068158697863, "grad_norm": 0.0, - "learning_rate": 1.9259641865007085e-05, - "loss": 1.23, + "learning_rate": 1.8410105428602485e-05, + "loss": 1.1594, "step": 5278 }, { - "epoch": 0.14959335770353369, + "epoch": 0.20654980827920807, "grad_norm": 0.0, - "learning_rate": 1.9259295258373245e-05, - "loss": 1.1063, + "learning_rate": 1.8409419765308087e-05, + "loss": 1.2021, "step": 5279 }, { - "epoch": 0.14962169514579615, + "epoch": 0.20658893497143752, "grad_norm": 0.0, - "learning_rate": 1.9258948573744927e-05, - "loss": 1.0261, + "learning_rate": 1.840873396696895e-05, + "loss": 1.0812, "step": 5280 }, { - "epoch": 0.1496500325880586, + "epoch": 0.20662806166366696, "grad_norm": 0.0, - "learning_rate": 1.9258601811125044e-05, - "loss": 0.8167, + "learning_rate": 1.8408048033596076e-05, + "loss": 1.2158, "step": 5281 }, { - "epoch": 0.14967837003032106, + "epoch": 0.2066671883558964, "grad_norm": 0.0, - "learning_rate": 1.925825497051652e-05, - "loss": 1.0702, + "learning_rate": 1.840736196520049e-05, + "loss": 1.048, "step": 5282 }, { - "epoch": 0.14970670747258352, + "epoch": 0.20670631504812584, "grad_norm": 0.0, - "learning_rate": 1.925790805192228e-05, - "loss": 1.0055, + "learning_rate": 1.8406675761793206e-05, + "loss": 1.1545, "step": 5283 }, { - "epoch": 0.149735044914846, + "epoch": 0.20674544174035528, "grad_norm": 0.0, - "learning_rate": 1.925756105534524e-05, - "loss": 1.0053, + "learning_rate": 1.840598942338524e-05, + "loss": 1.1792, "step": 5284 }, { - "epoch": 0.14976338235710845, + "epoch": 0.20678456843258472, "grad_norm": 0.0, - "learning_rate": 1.9257213980788325e-05, - "loss": 0.9784, + "learning_rate": 1.840530294998762e-05, + "loss": 1.1764, "step": 5285 }, { - "epoch": 0.14979171979937092, + "epoch": 0.20682369512481416, "grad_norm": 0.0, - "learning_rate": 1.925686682825446e-05, - "loss": 0.969, + "learning_rate": 1.8404616341611366e-05, + "loss": 1.1633, "step": 5286 }, { - "epoch": 0.14982005724163336, + "epoch": 0.20686282181704357, "grad_norm": 0.0, - "learning_rate": 1.9256519597746566e-05, - "loss": 1.0144, + "learning_rate": 1.8403929598267507e-05, + "loss": 1.1976, "step": 5287 }, { - "epoch": 0.14984839468389582, + "epoch": 0.20690194850927301, "grad_norm": 0.0, - "learning_rate": 1.9256172289267573e-05, - "loss": 1.0756, + "learning_rate": 1.840324271996707e-05, + "loss": 1.1215, "step": 5288 }, { - "epoch": 0.1498767321261583, + "epoch": 0.20694107520150246, "grad_norm": 0.0, - "learning_rate": 1.9255824902820403e-05, - "loss": 1.0112, + "learning_rate": 1.840255570672108e-05, + "loss": 1.1368, "step": 5289 }, { - "epoch": 0.14990506956842076, + "epoch": 0.2069802018937319, "grad_norm": 0.0, - "learning_rate": 1.9255477438407983e-05, - "loss": 1.1233, + "learning_rate": 1.840186855854058e-05, + "loss": 1.2061, "step": 5290 }, { - "epoch": 0.14993340701068322, + "epoch": 0.20701932858596134, "grad_norm": 0.0, - "learning_rate": 1.9255129896033238e-05, - "loss": 1.0126, + "learning_rate": 1.8401181275436596e-05, + "loss": 1.0252, "step": 5291 }, { - "epoch": 0.1499617444529457, + "epoch": 0.20705845527819078, "grad_norm": 0.0, - "learning_rate": 1.92547822756991e-05, - "loss": 0.9232, + "learning_rate": 1.840049385742017e-05, + "loss": 1.2101, "step": 5292 }, { - "epoch": 0.14999008189520813, + "epoch": 0.20709758197042022, "grad_norm": 0.0, - "learning_rate": 1.9254434577408492e-05, - "loss": 1.0997, + "learning_rate": 1.8399806304502333e-05, + "loss": 1.1556, "step": 5293 }, { - "epoch": 0.1500184193374706, + "epoch": 0.20713670866264966, "grad_norm": 0.0, - "learning_rate": 1.9254086801164345e-05, - "loss": 1.0846, + "learning_rate": 1.839911861669414e-05, + "loss": 1.1128, "step": 5294 }, { - "epoch": 0.15004675677973306, + "epoch": 0.2071758353548791, "grad_norm": 0.0, - "learning_rate": 1.925373894696959e-05, - "loss": 1.0702, + "learning_rate": 1.839843079400662e-05, + "loss": 1.1604, "step": 5295 }, { - "epoch": 0.15007509422199553, + "epoch": 0.20721496204710854, "grad_norm": 0.0, - "learning_rate": 1.9253391014827156e-05, - "loss": 0.9791, + "learning_rate": 1.839774283645083e-05, + "loss": 1.0905, "step": 5296 }, { - "epoch": 0.150103431664258, + "epoch": 0.20725408873933798, "grad_norm": 0.0, - "learning_rate": 1.9253043004739967e-05, - "loss": 0.9451, + "learning_rate": 1.839705474403781e-05, + "loss": 1.231, "step": 5297 }, { - "epoch": 0.15013176910652046, + "epoch": 0.20729321543156742, "grad_norm": 0.0, - "learning_rate": 1.9252694916710965e-05, - "loss": 0.9556, + "learning_rate": 1.8396366516778615e-05, + "loss": 1.1814, "step": 5298 }, { - "epoch": 0.1501601065487829, + "epoch": 0.20733234212379686, "grad_norm": 0.0, - "learning_rate": 1.925234675074308e-05, - "loss": 1.0275, + "learning_rate": 1.8395678154684297e-05, + "loss": 1.2345, "step": 5299 }, { - "epoch": 0.15018844399104536, + "epoch": 0.2073714688160263, "grad_norm": 0.0, - "learning_rate": 1.925199850683924e-05, - "loss": 1.0872, + "learning_rate": 1.8394989657765904e-05, + "loss": 1.1711, "step": 5300 }, { - "epoch": 0.15021678143330783, + "epoch": 0.20741059550825572, "grad_norm": 0.0, - "learning_rate": 1.925165018500238e-05, - "loss": 0.8752, + "learning_rate": 1.83943010260345e-05, + "loss": 1.0597, "step": 5301 }, { - "epoch": 0.1502451188755703, + "epoch": 0.20744972220048516, "grad_norm": 0.0, - "learning_rate": 1.925130178523544e-05, - "loss": 1.0572, + "learning_rate": 1.8393612259501136e-05, + "loss": 1.1815, "step": 5302 }, { - "epoch": 0.15027345631783276, + "epoch": 0.2074888488927146, "grad_norm": 0.0, - "learning_rate": 1.9250953307541347e-05, - "loss": 0.9779, + "learning_rate": 1.8392923358176878e-05, + "loss": 1.1185, "step": 5303 }, { - "epoch": 0.15030179376009523, + "epoch": 0.20752797558494404, "grad_norm": 0.0, - "learning_rate": 1.9250604751923035e-05, - "loss": 1.0674, + "learning_rate": 1.8392234322072792e-05, + "loss": 1.1445, "step": 5304 }, { - "epoch": 0.15033013120235766, + "epoch": 0.20756710227717348, "grad_norm": 0.0, - "learning_rate": 1.925025611838345e-05, - "loss": 0.9418, + "learning_rate": 1.8391545151199936e-05, + "loss": 1.1072, "step": 5305 }, { - "epoch": 0.15035846864462013, + "epoch": 0.20760622896940292, "grad_norm": 0.0, - "learning_rate": 1.924990740692552e-05, - "loss": 0.9993, + "learning_rate": 1.839085584556938e-05, + "loss": 1.0608, "step": 5306 }, { - "epoch": 0.1503868060868826, + "epoch": 0.20764535566163236, "grad_norm": 0.0, - "learning_rate": 1.9249558617552187e-05, - "loss": 0.9476, + "learning_rate": 1.8390166405192192e-05, + "loss": 1.1322, "step": 5307 }, { - "epoch": 0.15041514352914506, + "epoch": 0.2076844823538618, "grad_norm": 0.0, - "learning_rate": 1.9249209750266385e-05, - "loss": 1.1124, + "learning_rate": 1.8389476830079446e-05, + "loss": 1.153, "step": 5308 }, { - "epoch": 0.15044348097140753, + "epoch": 0.20772360904609125, "grad_norm": 0.0, - "learning_rate": 1.9248860805071056e-05, - "loss": 1.0731, + "learning_rate": 1.838878712024222e-05, + "loss": 1.0927, "step": 5309 }, { - "epoch": 0.15047181841367, + "epoch": 0.2077627357383207, "grad_norm": 0.0, - "learning_rate": 1.9248511781969135e-05, - "loss": 1.0334, + "learning_rate": 1.838809727569158e-05, + "loss": 1.0849, "step": 5310 }, { - "epoch": 0.15050015585593243, + "epoch": 0.20780186243055013, "grad_norm": 0.0, - "learning_rate": 1.924816268096357e-05, - "loss": 1.0759, + "learning_rate": 1.8387407296438606e-05, + "loss": 1.1188, "step": 5311 }, { - "epoch": 0.1505284932981949, + "epoch": 0.20784098912277957, "grad_norm": 0.0, - "learning_rate": 1.924781350205729e-05, - "loss": 1.0407, + "learning_rate": 1.8386717182494385e-05, + "loss": 1.3183, "step": 5312 }, { - "epoch": 0.15055683074045736, + "epoch": 0.207880115815009, "grad_norm": 0.0, - "learning_rate": 1.9247464245253246e-05, - "loss": 1.0833, + "learning_rate": 1.838602693386999e-05, + "loss": 1.1824, "step": 5313 }, { - "epoch": 0.15058516818271983, + "epoch": 0.20791924250723845, "grad_norm": 0.0, - "learning_rate": 1.9247114910554376e-05, - "loss": 1.0208, + "learning_rate": 1.8385336550576515e-05, + "loss": 1.2272, "step": 5314 }, { - "epoch": 0.1506135056249823, + "epoch": 0.2079583691994679, "grad_norm": 0.0, - "learning_rate": 1.9246765497963623e-05, - "loss": 1.0484, + "learning_rate": 1.8384646032625042e-05, + "loss": 1.2067, "step": 5315 }, { - "epoch": 0.15064184306724476, + "epoch": 0.2079974958916973, "grad_norm": 0.0, - "learning_rate": 1.9246416007483932e-05, - "loss": 1.0516, + "learning_rate": 1.838395538002666e-05, + "loss": 1.3491, "step": 5316 }, { - "epoch": 0.1506701805095072, + "epoch": 0.20803662258392674, "grad_norm": 0.0, - "learning_rate": 1.9246066439118247e-05, - "loss": 1.0844, + "learning_rate": 1.838326459279246e-05, + "loss": 1.1464, "step": 5317 }, { - "epoch": 0.15069851795176967, + "epoch": 0.20807574927615619, "grad_norm": 0.0, - "learning_rate": 1.9245716792869505e-05, - "loss": 1.0308, + "learning_rate": 1.8382573670933536e-05, + "loss": 1.1032, "step": 5318 }, { - "epoch": 0.15072685539403213, + "epoch": 0.20811487596838563, "grad_norm": 0.0, - "learning_rate": 1.9245367068740664e-05, - "loss": 1.0306, + "learning_rate": 1.8381882614460985e-05, + "loss": 1.2354, "step": 5319 }, { - "epoch": 0.1507551928362946, + "epoch": 0.20815400266061507, "grad_norm": 0.0, - "learning_rate": 1.924501726673466e-05, - "loss": 1.0759, + "learning_rate": 1.8381191423385897e-05, + "loss": 1.1711, "step": 5320 }, { - "epoch": 0.15078353027855707, + "epoch": 0.2081931293528445, "grad_norm": 0.0, - "learning_rate": 1.9244667386854443e-05, - "loss": 1.0395, + "learning_rate": 1.838050009771938e-05, + "loss": 1.166, "step": 5321 }, { - "epoch": 0.15081186772081953, + "epoch": 0.20823225604507395, "grad_norm": 0.0, - "learning_rate": 1.924431742910296e-05, - "loss": 0.9441, + "learning_rate": 1.8379808637472532e-05, + "loss": 1.1409, "step": 5322 }, { - "epoch": 0.15084020516308197, + "epoch": 0.2082713827373034, "grad_norm": 0.0, - "learning_rate": 1.9243967393483156e-05, - "loss": 1.0807, + "learning_rate": 1.837911704265646e-05, + "loss": 1.2248, "step": 5323 }, { - "epoch": 0.15086854260534444, + "epoch": 0.20831050942953283, "grad_norm": 0.0, - "learning_rate": 1.9243617279997986e-05, - "loss": 1.0075, + "learning_rate": 1.8378425313282266e-05, + "loss": 1.0572, "step": 5324 }, { - "epoch": 0.1508968800476069, + "epoch": 0.20834963612176227, "grad_norm": 0.0, - "learning_rate": 1.924326708865039e-05, - "loss": 1.0056, + "learning_rate": 1.837773344936106e-05, + "loss": 1.133, "step": 5325 }, { - "epoch": 0.15092521748986937, + "epoch": 0.2083887628139917, "grad_norm": 0.0, - "learning_rate": 1.9242916819443328e-05, - "loss": 1.062, + "learning_rate": 1.8377041450903955e-05, + "loss": 1.1219, "step": 5326 }, { - "epoch": 0.15095355493213183, + "epoch": 0.20842788950622115, "grad_norm": 0.0, - "learning_rate": 1.9242566472379742e-05, - "loss": 0.9969, + "learning_rate": 1.8376349317922056e-05, + "loss": 1.2594, "step": 5327 }, { - "epoch": 0.1509818923743943, + "epoch": 0.2084670161984506, "grad_norm": 0.0, - "learning_rate": 1.9242216047462587e-05, - "loss": 0.9579, + "learning_rate": 1.837565705042649e-05, + "loss": 1.0265, "step": 5328 }, { - "epoch": 0.15101022981665674, + "epoch": 0.20850614289068004, "grad_norm": 0.0, - "learning_rate": 1.9241865544694817e-05, - "loss": 1.1995, + "learning_rate": 1.837496464842836e-05, + "loss": 1.0501, "step": 5329 }, { - "epoch": 0.1510385672589192, + "epoch": 0.20854526958290945, "grad_norm": 0.0, - "learning_rate": 1.924151496407938e-05, - "loss": 0.9552, + "learning_rate": 1.8374272111938797e-05, + "loss": 1.1177, "step": 5330 }, { - "epoch": 0.15106690470118167, + "epoch": 0.2085843962751389, "grad_norm": 0.0, - "learning_rate": 1.9241164305619228e-05, - "loss": 0.9434, + "learning_rate": 1.837357944096892e-05, + "loss": 1.1179, "step": 5331 }, { - "epoch": 0.15109524214344414, + "epoch": 0.20862352296736833, "grad_norm": 0.0, - "learning_rate": 1.924081356931732e-05, - "loss": 1.1095, + "learning_rate": 1.8372886635529843e-05, + "loss": 1.1175, "step": 5332 }, { - "epoch": 0.1511235795857066, + "epoch": 0.20866264965959777, "grad_norm": 0.0, - "learning_rate": 1.9240462755176604e-05, - "loss": 1.0505, + "learning_rate": 1.83721936956327e-05, + "loss": 1.0392, "step": 5333 }, { - "epoch": 0.15115191702796907, + "epoch": 0.2087017763518272, "grad_norm": 0.0, - "learning_rate": 1.9240111863200047e-05, - "loss": 1.0067, + "learning_rate": 1.837150062128862e-05, + "loss": 1.1451, "step": 5334 }, { - "epoch": 0.1511802544702315, + "epoch": 0.20874090304405665, "grad_norm": 0.0, - "learning_rate": 1.9239760893390592e-05, - "loss": 1.0935, + "learning_rate": 1.8370807412508728e-05, + "loss": 1.2771, "step": 5335 }, { - "epoch": 0.15120859191249397, + "epoch": 0.2087800297362861, "grad_norm": 0.0, - "learning_rate": 1.92394098457512e-05, - "loss": 1.0358, + "learning_rate": 1.8370114069304158e-05, + "loss": 1.1506, "step": 5336 }, { - "epoch": 0.15123692935475644, + "epoch": 0.20881915642851553, "grad_norm": 0.0, - "learning_rate": 1.9239058720284823e-05, - "loss": 1.0332, + "learning_rate": 1.8369420591686048e-05, + "loss": 1.1258, "step": 5337 }, { - "epoch": 0.1512652667970189, + "epoch": 0.20885828312074498, "grad_norm": 0.0, - "learning_rate": 1.9238707516994432e-05, - "loss": 1.0549, + "learning_rate": 1.8368726979665527e-05, + "loss": 1.1887, "step": 5338 }, { - "epoch": 0.15129360423928137, + "epoch": 0.20889740981297442, "grad_norm": 0.0, - "learning_rate": 1.923835623588297e-05, - "loss": 0.8948, + "learning_rate": 1.836803323325374e-05, + "loss": 0.999, "step": 5339 }, { - "epoch": 0.15132194168154384, + "epoch": 0.20893653650520386, "grad_norm": 0.0, - "learning_rate": 1.9238004876953406e-05, - "loss": 0.9906, + "learning_rate": 1.836733935246182e-05, + "loss": 1.178, "step": 5340 }, { - "epoch": 0.15135027912380628, + "epoch": 0.2089756631974333, "grad_norm": 0.0, - "learning_rate": 1.9237653440208696e-05, - "loss": 1.1693, + "learning_rate": 1.836664533730092e-05, + "loss": 1.1127, "step": 5341 }, { - "epoch": 0.15137861656606874, + "epoch": 0.20901478988966274, "grad_norm": 0.0, - "learning_rate": 1.9237301925651803e-05, - "loss": 1.0739, + "learning_rate": 1.836595118778218e-05, + "loss": 1.1465, "step": 5342 }, { - "epoch": 0.1514069540083312, + "epoch": 0.20905391658189218, "grad_norm": 0.0, - "learning_rate": 1.9236950333285685e-05, - "loss": 1.0728, + "learning_rate": 1.836525690391674e-05, + "loss": 1.1562, "step": 5343 }, { - "epoch": 0.15143529145059367, + "epoch": 0.2090930432741216, "grad_norm": 0.0, - "learning_rate": 1.92365986631133e-05, - "loss": 1.0453, + "learning_rate": 1.8364562485715763e-05, + "loss": 1.2314, "step": 5344 }, { - "epoch": 0.15146362889285614, + "epoch": 0.20913216996635103, "grad_norm": 0.0, - "learning_rate": 1.9236246915137618e-05, - "loss": 0.9763, + "learning_rate": 1.8363867933190396e-05, + "loss": 1.2122, "step": 5345 }, { - "epoch": 0.1514919663351186, + "epoch": 0.20917129665858047, "grad_norm": 0.0, - "learning_rate": 1.92358950893616e-05, - "loss": 1.0194, + "learning_rate": 1.8363173246351788e-05, + "loss": 1.1837, "step": 5346 }, { - "epoch": 0.15152030377738104, + "epoch": 0.20921042335080992, "grad_norm": 0.0, - "learning_rate": 1.9235543185788207e-05, - "loss": 1.1258, + "learning_rate": 1.836247842521109e-05, + "loss": 1.1593, "step": 5347 }, { - "epoch": 0.1515486412196435, + "epoch": 0.20924955004303936, "grad_norm": 0.0, - "learning_rate": 1.92351912044204e-05, - "loss": 1.0688, + "learning_rate": 1.8361783469779476e-05, + "loss": 1.1359, "step": 5348 }, { - "epoch": 0.15157697866190598, + "epoch": 0.2092886767352688, "grad_norm": 0.0, - "learning_rate": 1.9234839145261154e-05, - "loss": 0.9801, + "learning_rate": 1.8361088380068098e-05, + "loss": 1.162, "step": 5349 }, { - "epoch": 0.15160531610416844, + "epoch": 0.20932780342749824, "grad_norm": 0.0, - "learning_rate": 1.9234487008313426e-05, - "loss": 1.0463, + "learning_rate": 1.8360393156088113e-05, + "loss": 1.0151, "step": 5350 }, { - "epoch": 0.1516336535464309, + "epoch": 0.20936693011972768, "grad_norm": 0.0, - "learning_rate": 1.9234134793580183e-05, - "loss": 1.1568, + "learning_rate": 1.8359697797850692e-05, + "loss": 1.2221, "step": 5351 }, { - "epoch": 0.15166199098869337, + "epoch": 0.20940605681195712, "grad_norm": 0.0, - "learning_rate": 1.9233782501064396e-05, - "loss": 0.8671, + "learning_rate": 1.8359002305366996e-05, + "loss": 1.231, "step": 5352 }, { - "epoch": 0.1516903284309558, + "epoch": 0.20944518350418656, "grad_norm": 0.0, - "learning_rate": 1.923343013076903e-05, - "loss": 1.0039, + "learning_rate": 1.8358306678648205e-05, + "loss": 1.2227, "step": 5353 }, { - "epoch": 0.15171866587321828, + "epoch": 0.209484310196416, "grad_norm": 0.0, - "learning_rate": 1.9233077682697054e-05, - "loss": 0.9788, + "learning_rate": 1.8357610917705476e-05, + "loss": 1.1533, "step": 5354 }, { - "epoch": 0.15174700331548074, + "epoch": 0.20952343688864544, "grad_norm": 0.0, - "learning_rate": 1.923272515685143e-05, - "loss": 1.0721, + "learning_rate": 1.835691502254999e-05, + "loss": 1.2803, "step": 5355 }, { - "epoch": 0.1517753407577432, + "epoch": 0.20956256358087488, "grad_norm": 0.0, - "learning_rate": 1.923237255323514e-05, - "loss": 0.966, + "learning_rate": 1.8356218993192922e-05, + "loss": 1.1396, "step": 5356 }, { - "epoch": 0.15180367820000568, + "epoch": 0.20960169027310432, "grad_norm": 0.0, - "learning_rate": 1.9232019871851145e-05, - "loss": 1.0082, + "learning_rate": 1.8355522829645445e-05, + "loss": 1.2576, "step": 5357 }, { - "epoch": 0.15183201564226814, + "epoch": 0.20964081696533374, "grad_norm": 0.0, - "learning_rate": 1.923166711270242e-05, - "loss": 1.0035, + "learning_rate": 1.8354826531918744e-05, + "loss": 1.1162, "step": 5358 }, { - "epoch": 0.15186035308453058, + "epoch": 0.20967994365756318, "grad_norm": 0.0, - "learning_rate": 1.9231314275791934e-05, - "loss": 1.1139, + "learning_rate": 1.8354130100024e-05, + "loss": 0.9481, "step": 5359 }, { - "epoch": 0.15188869052679305, + "epoch": 0.20971907034979262, "grad_norm": 0.0, - "learning_rate": 1.923096136112266e-05, - "loss": 0.9791, + "learning_rate": 1.8353433533972392e-05, + "loss": 1.1005, "step": 5360 }, { - "epoch": 0.1519170279690555, + "epoch": 0.20975819704202206, "grad_norm": 0.0, - "learning_rate": 1.9230608368697572e-05, - "loss": 1.1144, + "learning_rate": 1.8352736833775106e-05, + "loss": 1.1344, "step": 5361 }, { - "epoch": 0.15194536541131798, + "epoch": 0.2097973237342515, "grad_norm": 0.0, - "learning_rate": 1.923025529851964e-05, - "loss": 1.0527, + "learning_rate": 1.8352039999443336e-05, + "loss": 1.2896, "step": 5362 }, { - "epoch": 0.15197370285358044, + "epoch": 0.20983645042648094, "grad_norm": 0.0, - "learning_rate": 1.922990215059184e-05, - "loss": 1.0193, + "learning_rate": 1.8351343030988273e-05, + "loss": 1.0726, "step": 5363 }, { - "epoch": 0.1520020402958429, + "epoch": 0.20987557711871038, "grad_norm": 0.0, - "learning_rate": 1.9229548924917146e-05, - "loss": 1.0773, + "learning_rate": 1.8350645928421097e-05, + "loss": 1.121, "step": 5364 }, { - "epoch": 0.15203037773810535, + "epoch": 0.20991470381093982, "grad_norm": 0.0, - "learning_rate": 1.9229195621498538e-05, - "loss": 1.0269, + "learning_rate": 1.8349948691753017e-05, + "loss": 1.2027, "step": 5365 }, { - "epoch": 0.15205871518036781, + "epoch": 0.20995383050316926, "grad_norm": 0.0, - "learning_rate": 1.9228842240338985e-05, - "loss": 1.0449, + "learning_rate": 1.8349251320995224e-05, + "loss": 1.0792, "step": 5366 }, { - "epoch": 0.15208705262263028, + "epoch": 0.2099929571953987, "grad_norm": 0.0, - "learning_rate": 1.9228488781441468e-05, - "loss": 0.8657, + "learning_rate": 1.8348553816158915e-05, + "loss": 1.2187, "step": 5367 }, { - "epoch": 0.15211539006489275, + "epoch": 0.21003208388762815, "grad_norm": 0.0, - "learning_rate": 1.9228135244808963e-05, - "loss": 1.026, + "learning_rate": 1.8347856177255296e-05, + "loss": 1.1929, "step": 5368 }, { - "epoch": 0.1521437275071552, + "epoch": 0.2100712105798576, "grad_norm": 0.0, - "learning_rate": 1.9227781630444448e-05, - "loss": 0.9687, + "learning_rate": 1.8347158404295566e-05, + "loss": 1.3102, "step": 5369 }, { - "epoch": 0.15217206494941768, + "epoch": 0.21011033727208703, "grad_norm": 0.0, - "learning_rate": 1.92274279383509e-05, - "loss": 0.9401, + "learning_rate": 1.834646049729093e-05, + "loss": 1.161, "step": 5370 }, { - "epoch": 0.15220040239168012, + "epoch": 0.21014946396431647, "grad_norm": 0.0, - "learning_rate": 1.9227074168531303e-05, - "loss": 0.9589, + "learning_rate": 1.8345762456252602e-05, + "loss": 1.1677, "step": 5371 }, { - "epoch": 0.15222873983394258, + "epoch": 0.2101885906565459, "grad_norm": 0.0, - "learning_rate": 1.922672032098863e-05, - "loss": 1.0332, + "learning_rate": 1.8345064281191784e-05, + "loss": 1.1634, "step": 5372 }, { - "epoch": 0.15225707727620505, + "epoch": 0.21022771734877532, "grad_norm": 0.0, - "learning_rate": 1.9226366395725868e-05, - "loss": 0.9277, + "learning_rate": 1.8344365972119687e-05, + "loss": 1.2948, "step": 5373 }, { - "epoch": 0.15228541471846752, + "epoch": 0.21026684404100476, "grad_norm": 0.0, - "learning_rate": 1.9226012392745994e-05, - "loss": 0.9771, + "learning_rate": 1.834366752904753e-05, + "loss": 1.3063, "step": 5374 }, { - "epoch": 0.15231375216072998, + "epoch": 0.2103059707332342, "grad_norm": 0.0, - "learning_rate": 1.9225658312051993e-05, - "loss": 1.0256, + "learning_rate": 1.834296895198653e-05, + "loss": 0.9497, "step": 5375 }, { - "epoch": 0.15234208960299245, + "epoch": 0.21034509742546365, "grad_norm": 0.0, - "learning_rate": 1.9225304153646845e-05, - "loss": 1.0983, + "learning_rate": 1.83422702409479e-05, + "loss": 1.1331, "step": 5376 }, { - "epoch": 0.15237042704525489, + "epoch": 0.21038422411769309, "grad_norm": 0.0, - "learning_rate": 1.9224949917533536e-05, - "loss": 0.9704, + "learning_rate": 1.834157139594287e-05, + "loss": 1.2402, "step": 5377 }, { - "epoch": 0.15239876448751735, + "epoch": 0.21042335080992253, "grad_norm": 0.0, - "learning_rate": 1.9224595603715047e-05, - "loss": 1.1756, + "learning_rate": 1.834087241698265e-05, + "loss": 1.2138, "step": 5378 }, { - "epoch": 0.15242710192977982, + "epoch": 0.21046247750215197, "grad_norm": 0.0, - "learning_rate": 1.9224241212194364e-05, - "loss": 1.0408, + "learning_rate": 1.8340173304078472e-05, + "loss": 1.19, "step": 5379 }, { - "epoch": 0.15245543937204228, + "epoch": 0.2105016041943814, "grad_norm": 0.0, - "learning_rate": 1.9223886742974474e-05, - "loss": 0.9622, + "learning_rate": 1.833947405724156e-05, + "loss": 1.1542, "step": 5380 }, { - "epoch": 0.15248377681430475, + "epoch": 0.21054073088661085, "grad_norm": 0.0, - "learning_rate": 1.922353219605836e-05, - "loss": 1.0819, + "learning_rate": 1.8338774676483148e-05, + "loss": 1.0744, "step": 5381 }, { - "epoch": 0.15251211425656722, + "epoch": 0.2105798575788403, "grad_norm": 0.0, - "learning_rate": 1.922317757144901e-05, - "loss": 1.0006, + "learning_rate": 1.8338075161814462e-05, + "loss": 1.076, "step": 5382 }, { - "epoch": 0.15254045169882965, + "epoch": 0.21061898427106973, "grad_norm": 0.0, - "learning_rate": 1.9222822869149406e-05, - "loss": 0.9516, + "learning_rate": 1.8337375513246736e-05, + "loss": 1.1885, "step": 5383 }, { - "epoch": 0.15256878914109212, + "epoch": 0.21065811096329917, "grad_norm": 0.0, - "learning_rate": 1.9222468089162544e-05, - "loss": 0.9136, + "learning_rate": 1.833667573079121e-05, + "loss": 1.2914, "step": 5384 }, { - "epoch": 0.1525971265833546, + "epoch": 0.2106972376555286, "grad_norm": 0.0, - "learning_rate": 1.922211323149141e-05, - "loss": 1.0663, + "learning_rate": 1.8335975814459113e-05, + "loss": 1.1824, "step": 5385 }, { - "epoch": 0.15262546402561705, + "epoch": 0.21073636434775805, "grad_norm": 0.0, - "learning_rate": 1.9221758296138986e-05, - "loss": 1.0076, + "learning_rate": 1.8335275764261693e-05, + "loss": 1.1133, "step": 5386 }, { - "epoch": 0.15265380146787952, + "epoch": 0.21077549103998747, "grad_norm": 0.0, - "learning_rate": 1.922140328310827e-05, - "loss": 1.0428, + "learning_rate": 1.833457558021019e-05, + "loss": 1.1246, "step": 5387 }, { - "epoch": 0.15268213891014196, + "epoch": 0.2108146177322169, "grad_norm": 0.0, - "learning_rate": 1.9221048192402252e-05, - "loss": 1.1583, + "learning_rate": 1.8333875262315844e-05, + "loss": 1.2311, "step": 5388 }, { - "epoch": 0.15271047635240442, + "epoch": 0.21085374442444635, "grad_norm": 0.0, - "learning_rate": 1.922069302402392e-05, - "loss": 1.1094, + "learning_rate": 1.8333174810589905e-05, + "loss": 1.1649, "step": 5389 }, { - "epoch": 0.1527388137946669, + "epoch": 0.2108928711166758, "grad_norm": 0.0, - "learning_rate": 1.9220337777976263e-05, - "loss": 1.1432, + "learning_rate": 1.8332474225043623e-05, + "loss": 1.1425, "step": 5390 }, { - "epoch": 0.15276715123692935, + "epoch": 0.21093199780890523, "grad_norm": 0.0, - "learning_rate": 1.9219982454262282e-05, - "loss": 1.0802, + "learning_rate": 1.8331773505688243e-05, + "loss": 1.2617, "step": 5391 }, { - "epoch": 0.15279548867919182, + "epoch": 0.21097112450113467, "grad_norm": 0.0, - "learning_rate": 1.9219627052884965e-05, - "loss": 1.0251, + "learning_rate": 1.8331072652535024e-05, + "loss": 1.0261, "step": 5392 }, { - "epoch": 0.1528238261214543, + "epoch": 0.2110102511933641, "grad_norm": 0.0, - "learning_rate": 1.92192715738473e-05, - "loss": 1.057, + "learning_rate": 1.8330371665595218e-05, + "loss": 1.2296, "step": 5393 }, { - "epoch": 0.15285216356371673, + "epoch": 0.21104937788559355, "grad_norm": 0.0, - "learning_rate": 1.9218916017152292e-05, - "loss": 0.9666, + "learning_rate": 1.832967054488008e-05, + "loss": 1.0838, "step": 5394 }, { - "epoch": 0.1528805010059792, + "epoch": 0.211088504577823, "grad_norm": 0.0, - "learning_rate": 1.921856038280293e-05, - "loss": 1.0613, + "learning_rate": 1.8328969290400867e-05, + "loss": 1.11, "step": 5395 }, { - "epoch": 0.15290883844824166, + "epoch": 0.21112763127005243, "grad_norm": 0.0, - "learning_rate": 1.9218204670802212e-05, - "loss": 1.1712, + "learning_rate": 1.8328267902168848e-05, + "loss": 1.1964, "step": 5396 }, { - "epoch": 0.15293717589050412, + "epoch": 0.21116675796228188, "grad_norm": 0.0, - "learning_rate": 1.921784888115313e-05, - "loss": 1.1611, + "learning_rate": 1.8327566380195283e-05, + "loss": 1.1044, "step": 5397 }, { - "epoch": 0.1529655133327666, + "epoch": 0.21120588465451132, "grad_norm": 0.0, - "learning_rate": 1.9217493013858687e-05, - "loss": 1.1115, + "learning_rate": 1.8326864724491434e-05, + "loss": 1.1432, "step": 5398 }, { - "epoch": 0.15299385077502906, + "epoch": 0.21124501134674076, "grad_norm": 0.0, - "learning_rate": 1.9217137068921875e-05, - "loss": 0.8784, + "learning_rate": 1.8326162935068575e-05, + "loss": 1.22, "step": 5399 }, { - "epoch": 0.1530221882172915, + "epoch": 0.2112841380389702, "grad_norm": 0.0, - "learning_rate": 1.9216781046345696e-05, - "loss": 0.9809, + "learning_rate": 1.832546101193797e-05, + "loss": 1.2318, "step": 5400 }, { - "epoch": 0.15305052565955396, + "epoch": 0.2113232647311996, "grad_norm": 0.0, - "learning_rate": 1.9216424946133146e-05, - "loss": 0.9957, + "learning_rate": 1.8324758955110895e-05, + "loss": 1.1873, "step": 5401 }, { - "epoch": 0.15307886310181643, + "epoch": 0.21136239142342905, "grad_norm": 0.0, - "learning_rate": 1.9216068768287228e-05, - "loss": 1.0648, + "learning_rate": 1.8324056764598623e-05, + "loss": 1.2614, "step": 5402 }, { - "epoch": 0.1531072005440789, + "epoch": 0.2114015181156585, "grad_norm": 0.0, - "learning_rate": 1.921571251281094e-05, - "loss": 0.9193, + "learning_rate": 1.8323354440412425e-05, + "loss": 1.1061, "step": 5403 }, { - "epoch": 0.15313553798634136, + "epoch": 0.21144064480788793, "grad_norm": 0.0, - "learning_rate": 1.9215356179707285e-05, - "loss": 1.0183, + "learning_rate": 1.832265198256359e-05, + "loss": 1.1392, "step": 5404 }, { - "epoch": 0.15316387542860382, + "epoch": 0.21147977150011738, "grad_norm": 0.0, - "learning_rate": 1.921499976897926e-05, - "loss": 1.0724, + "learning_rate": 1.8321949391063394e-05, + "loss": 1.0645, "step": 5405 }, { - "epoch": 0.15319221287086626, + "epoch": 0.21151889819234682, "grad_norm": 0.0, - "learning_rate": 1.921464328062987e-05, - "loss": 0.9265, + "learning_rate": 1.8321246665923116e-05, + "loss": 1.1523, "step": 5406 }, { - "epoch": 0.15322055031312873, + "epoch": 0.21155802488457626, "grad_norm": 0.0, - "learning_rate": 1.921428671466212e-05, - "loss": 1.0841, + "learning_rate": 1.8320543807154043e-05, + "loss": 1.2289, "step": 5407 }, { - "epoch": 0.1532488877553912, + "epoch": 0.2115971515768057, "grad_norm": 0.0, - "learning_rate": 1.921393007107901e-05, - "loss": 1.0184, + "learning_rate": 1.8319840814767463e-05, + "loss": 1.1923, "step": 5408 }, { - "epoch": 0.15327722519765366, + "epoch": 0.21163627826903514, "grad_norm": 0.0, - "learning_rate": 1.9213573349883545e-05, - "loss": 0.9654, + "learning_rate": 1.8319137688774667e-05, + "loss": 1.1721, "step": 5409 }, { - "epoch": 0.15330556263991613, + "epoch": 0.21167540496126458, "grad_norm": 0.0, - "learning_rate": 1.9213216551078732e-05, - "loss": 1.1358, + "learning_rate": 1.8318434429186943e-05, + "loss": 1.0993, "step": 5410 }, { - "epoch": 0.1533339000821786, + "epoch": 0.21171453165349402, "grad_norm": 0.0, - "learning_rate": 1.9212859674667575e-05, - "loss": 1.0541, + "learning_rate": 1.8317731036015584e-05, + "loss": 1.1623, "step": 5411 }, { - "epoch": 0.15336223752444103, + "epoch": 0.21175365834572346, "grad_norm": 0.0, - "learning_rate": 1.921250272065308e-05, - "loss": 1.0967, + "learning_rate": 1.831702750927189e-05, + "loss": 1.2911, "step": 5412 }, { - "epoch": 0.1533905749667035, + "epoch": 0.2117927850379529, "grad_norm": 0.0, - "learning_rate": 1.921214568903825e-05, - "loss": 1.1958, + "learning_rate": 1.8316323848967156e-05, + "loss": 1.1111, "step": 5413 }, { - "epoch": 0.15341891240896596, + "epoch": 0.21183191173018234, "grad_norm": 0.0, - "learning_rate": 1.92117885798261e-05, - "loss": 1.0955, + "learning_rate": 1.831562005511268e-05, + "loss": 1.139, "step": 5414 }, { - "epoch": 0.15344724985122843, + "epoch": 0.21187103842241176, "grad_norm": 0.0, - "learning_rate": 1.9211431393019634e-05, - "loss": 1.0726, + "learning_rate": 1.8314916127719765e-05, + "loss": 1.0829, "step": 5415 }, { - "epoch": 0.1534755872934909, + "epoch": 0.2119101651146412, "grad_norm": 0.0, - "learning_rate": 1.9211074128621857e-05, - "loss": 0.9403, + "learning_rate": 1.8314212066799716e-05, + "loss": 1.1694, "step": 5416 }, { - "epoch": 0.15350392473575336, + "epoch": 0.21194929180687064, "grad_norm": 0.0, - "learning_rate": 1.9210716786635787e-05, - "loss": 1.1376, + "learning_rate": 1.8313507872363843e-05, + "loss": 1.0843, "step": 5417 }, { - "epoch": 0.1535322621780158, + "epoch": 0.21198841849910008, "grad_norm": 0.0, - "learning_rate": 1.9210359367064427e-05, - "loss": 1.0538, + "learning_rate": 1.831280354442345e-05, + "loss": 1.191, "step": 5418 }, { - "epoch": 0.15356059962027827, + "epoch": 0.21202754519132952, "grad_norm": 0.0, - "learning_rate": 1.921000186991079e-05, - "loss": 1.0605, + "learning_rate": 1.8312099082989846e-05, + "loss": 1.1664, "step": 5419 }, { - "epoch": 0.15358893706254073, + "epoch": 0.21206667188355896, "grad_norm": 0.0, - "learning_rate": 1.9209644295177884e-05, - "loss": 1.0123, + "learning_rate": 1.831139448807435e-05, + "loss": 1.1023, "step": 5420 }, { - "epoch": 0.1536172745048032, + "epoch": 0.2121057985757884, "grad_norm": 0.0, - "learning_rate": 1.9209286642868728e-05, - "loss": 1.1016, + "learning_rate": 1.831068975968827e-05, + "loss": 1.1467, "step": 5421 }, { - "epoch": 0.15364561194706566, + "epoch": 0.21214492526801784, "grad_norm": 0.0, - "learning_rate": 1.9208928912986332e-05, - "loss": 1.0557, + "learning_rate": 1.830998489784293e-05, + "loss": 1.1061, "step": 5422 }, { - "epoch": 0.15367394938932813, + "epoch": 0.21218405196024728, "grad_norm": 0.0, - "learning_rate": 1.9208571105533703e-05, - "loss": 0.9802, + "learning_rate": 1.8309279902549642e-05, + "loss": 1.2316, "step": 5423 }, { - "epoch": 0.15370228683159057, + "epoch": 0.21222317865247672, "grad_norm": 0.0, - "learning_rate": 1.9208213220513866e-05, - "loss": 0.9862, + "learning_rate": 1.830857477381973e-05, + "loss": 1.1539, "step": 5424 }, { - "epoch": 0.15373062427385303, + "epoch": 0.21226230534470616, "grad_norm": 0.0, - "learning_rate": 1.9207855257929826e-05, - "loss": 0.9855, + "learning_rate": 1.8307869511664522e-05, + "loss": 1.1017, "step": 5425 }, { - "epoch": 0.1537589617161155, + "epoch": 0.2123014320369356, "grad_norm": 0.0, - "learning_rate": 1.9207497217784602e-05, - "loss": 0.9899, + "learning_rate": 1.830716411609534e-05, + "loss": 1.2296, "step": 5426 }, { - "epoch": 0.15378729915837797, + "epoch": 0.21234055872916505, "grad_norm": 0.0, - "learning_rate": 1.9207139100081213e-05, - "loss": 1.0414, + "learning_rate": 1.8306458587123507e-05, + "loss": 1.2854, "step": 5427 }, { - "epoch": 0.15381563660064043, + "epoch": 0.2123796854213945, "grad_norm": 0.0, - "learning_rate": 1.9206780904822667e-05, - "loss": 0.9465, + "learning_rate": 1.8305752924760362e-05, + "loss": 1.2113, "step": 5428 }, { - "epoch": 0.1538439740429029, + "epoch": 0.2124188121136239, "grad_norm": 0.0, - "learning_rate": 1.920642263201199e-05, - "loss": 1.0266, + "learning_rate": 1.8305047129017233e-05, + "loss": 1.1359, "step": 5429 }, { - "epoch": 0.15387231148516534, + "epoch": 0.21245793880585334, "grad_norm": 0.0, - "learning_rate": 1.9206064281652194e-05, - "loss": 1.1734, + "learning_rate": 1.830434119990545e-05, + "loss": 1.2902, "step": 5430 }, { - "epoch": 0.1539006489274278, + "epoch": 0.21249706549808278, "grad_norm": 0.0, - "learning_rate": 1.9205705853746305e-05, - "loss": 1.0392, + "learning_rate": 1.830363513743636e-05, + "loss": 1.1726, "step": 5431 }, { - "epoch": 0.15392898636969027, + "epoch": 0.21253619219031222, "grad_norm": 0.0, - "learning_rate": 1.920534734829733e-05, - "loss": 1.133, + "learning_rate": 1.8302928941621286e-05, + "loss": 1.2181, "step": 5432 }, { - "epoch": 0.15395732381195273, + "epoch": 0.21257531888254166, "grad_norm": 0.0, - "learning_rate": 1.9204988765308302e-05, - "loss": 1.0321, + "learning_rate": 1.8302222612471583e-05, + "loss": 1.2133, "step": 5433 }, { - "epoch": 0.1539856612542152, + "epoch": 0.2126144455747711, "grad_norm": 0.0, - "learning_rate": 1.9204630104782232e-05, - "loss": 1.1128, + "learning_rate": 1.830151614999859e-05, + "loss": 1.1968, "step": 5434 }, { - "epoch": 0.15401399869647767, + "epoch": 0.21265357226700055, "grad_norm": 0.0, - "learning_rate": 1.9204271366722148e-05, - "loss": 1.0178, + "learning_rate": 1.8300809554213644e-05, + "loss": 1.2075, "step": 5435 }, { - "epoch": 0.1540423361387401, + "epoch": 0.21269269895923, "grad_norm": 0.0, - "learning_rate": 1.9203912551131064e-05, - "loss": 0.9191, + "learning_rate": 1.8300102825128103e-05, + "loss": 1.0816, "step": 5436 }, { - "epoch": 0.15407067358100257, + "epoch": 0.21273182565145943, "grad_norm": 0.0, - "learning_rate": 1.920355365801201e-05, - "loss": 1.0386, + "learning_rate": 1.829939596275331e-05, + "loss": 1.0805, "step": 5437 }, { - "epoch": 0.15409901102326504, + "epoch": 0.21277095234368887, "grad_norm": 0.0, - "learning_rate": 1.9203194687368005e-05, - "loss": 0.9871, + "learning_rate": 1.8298688967100615e-05, + "loss": 1.1625, "step": 5438 }, { - "epoch": 0.1541273484655275, + "epoch": 0.2128100790359183, "grad_norm": 0.0, - "learning_rate": 1.9202835639202075e-05, - "loss": 1.0822, + "learning_rate": 1.8297981838181376e-05, + "loss": 1.1884, "step": 5439 }, { - "epoch": 0.15415568590778997, + "epoch": 0.21284920572814775, "grad_norm": 0.0, - "learning_rate": 1.920247651351724e-05, - "loss": 1.0708, + "learning_rate": 1.829727457600695e-05, + "loss": 1.2435, "step": 5440 }, { - "epoch": 0.15418402335005243, + "epoch": 0.2128883324203772, "grad_norm": 0.0, - "learning_rate": 1.920211731031653e-05, - "loss": 0.9813, + "learning_rate": 1.8296567180588685e-05, + "loss": 1.1755, "step": 5441 }, { - "epoch": 0.15421236079231487, + "epoch": 0.21292745911260663, "grad_norm": 0.0, - "learning_rate": 1.920175802960297e-05, - "loss": 1.0366, + "learning_rate": 1.829585965193795e-05, + "loss": 1.2415, "step": 5442 }, { - "epoch": 0.15424069823457734, + "epoch": 0.21296658580483607, "grad_norm": 0.0, - "learning_rate": 1.9201398671379585e-05, - "loss": 1.1654, + "learning_rate": 1.8295151990066105e-05, + "loss": 1.134, "step": 5443 }, { - "epoch": 0.1542690356768398, + "epoch": 0.21300571249706549, "grad_norm": 0.0, - "learning_rate": 1.92010392356494e-05, - "loss": 1.0215, + "learning_rate": 1.8294444194984514e-05, + "loss": 1.0706, "step": 5444 }, { - "epoch": 0.15429737311910227, + "epoch": 0.21304483918929493, "grad_norm": 0.0, - "learning_rate": 1.9200679722415444e-05, - "loss": 1.0978, + "learning_rate": 1.829373626670454e-05, + "loss": 1.2309, "step": 5445 }, { - "epoch": 0.15432571056136474, + "epoch": 0.21308396588152437, "grad_norm": 0.0, - "learning_rate": 1.9200320131680746e-05, - "loss": 0.9949, + "learning_rate": 1.8293028205237556e-05, + "loss": 1.1263, "step": 5446 }, { - "epoch": 0.1543540480036272, + "epoch": 0.2131230925737538, "grad_norm": 0.0, - "learning_rate": 1.9199960463448337e-05, - "loss": 0.9692, + "learning_rate": 1.829232001059493e-05, + "loss": 1.2143, "step": 5447 }, { - "epoch": 0.15438238544588964, + "epoch": 0.21316221926598325, "grad_norm": 0.0, - "learning_rate": 1.9199600717721247e-05, - "loss": 0.9975, + "learning_rate": 1.8291611682788034e-05, + "loss": 1.0906, "step": 5448 }, { - "epoch": 0.1544107228881521, + "epoch": 0.2132013459582127, "grad_norm": 0.0, - "learning_rate": 1.91992408945025e-05, - "loss": 0.8808, + "learning_rate": 1.829090322182825e-05, + "loss": 1.1034, "step": 5449 }, { - "epoch": 0.15443906033041457, + "epoch": 0.21324047265044213, "grad_norm": 0.0, - "learning_rate": 1.919888099379513e-05, - "loss": 0.9665, + "learning_rate": 1.8290194627726942e-05, + "loss": 1.0661, "step": 5450 }, { - "epoch": 0.15446739777267704, + "epoch": 0.21327959934267157, "grad_norm": 0.0, - "learning_rate": 1.9198521015602174e-05, - "loss": 1.049, + "learning_rate": 1.8289485900495502e-05, + "loss": 1.2627, "step": 5451 }, { - "epoch": 0.1544957352149395, + "epoch": 0.213318726034901, "grad_norm": 0.0, - "learning_rate": 1.9198160959926656e-05, - "loss": 1.006, + "learning_rate": 1.8288777040145303e-05, + "loss": 1.0492, "step": 5452 }, { - "epoch": 0.15452407265720197, + "epoch": 0.21335785272713045, "grad_norm": 0.0, - "learning_rate": 1.9197800826771615e-05, - "loss": 1.1211, + "learning_rate": 1.8288068046687732e-05, + "loss": 1.2667, "step": 5453 }, { - "epoch": 0.1545524100994644, + "epoch": 0.2133969794193599, "grad_norm": 0.0, - "learning_rate": 1.919744061614008e-05, - "loss": 1.0723, + "learning_rate": 1.8287358920134176e-05, + "loss": 1.2174, "step": 5454 }, { - "epoch": 0.15458074754172688, + "epoch": 0.21343610611158934, "grad_norm": 0.0, - "learning_rate": 1.919708032803509e-05, - "loss": 0.983, + "learning_rate": 1.8286649660496017e-05, + "loss": 1.1912, "step": 5455 }, { - "epoch": 0.15460908498398934, + "epoch": 0.21347523280381878, "grad_norm": 0.0, - "learning_rate": 1.9196719962459673e-05, - "loss": 0.8029, + "learning_rate": 1.828594026778465e-05, + "loss": 1.1625, "step": 5456 }, { - "epoch": 0.1546374224262518, + "epoch": 0.21351435949604822, "grad_norm": 0.0, - "learning_rate": 1.9196359519416872e-05, - "loss": 1.0929, + "learning_rate": 1.8285230742011464e-05, + "loss": 1.2369, "step": 5457 }, { - "epoch": 0.15466575986851427, + "epoch": 0.21355348618827763, "grad_norm": 0.0, - "learning_rate": 1.919599899890972e-05, - "loss": 0.971, + "learning_rate": 1.8284521083187856e-05, + "loss": 1.195, "step": 5458 }, { - "epoch": 0.15469409731077674, + "epoch": 0.21359261288050707, "grad_norm": 0.0, - "learning_rate": 1.9195638400941254e-05, - "loss": 1.0446, + "learning_rate": 1.8283811291325218e-05, + "loss": 1.2427, "step": 5459 }, { - "epoch": 0.15472243475303918, + "epoch": 0.2136317395727365, "grad_norm": 0.0, - "learning_rate": 1.919527772551451e-05, - "loss": 1.0303, + "learning_rate": 1.8283101366434954e-05, + "loss": 1.1789, "step": 5460 }, { - "epoch": 0.15475077219530164, + "epoch": 0.21367086626496595, "grad_norm": 0.0, - "learning_rate": 1.9194916972632526e-05, - "loss": 1.0448, + "learning_rate": 1.828239130852846e-05, + "loss": 1.1638, "step": 5461 }, { - "epoch": 0.1547791096375641, + "epoch": 0.2137099929571954, "grad_norm": 0.0, - "learning_rate": 1.919455614229834e-05, - "loss": 1.0995, + "learning_rate": 1.8281681117617138e-05, + "loss": 1.0125, "step": 5462 }, { - "epoch": 0.15480744707982658, + "epoch": 0.21374911964942483, "grad_norm": 0.0, - "learning_rate": 1.9194195234514996e-05, - "loss": 0.9959, + "learning_rate": 1.8280970793712397e-05, + "loss": 1.0185, "step": 5463 }, { - "epoch": 0.15483578452208904, + "epoch": 0.21378824634165428, "grad_norm": 0.0, - "learning_rate": 1.9193834249285532e-05, - "loss": 1.1154, + "learning_rate": 1.8280260336825642e-05, + "loss": 1.1448, "step": 5464 }, { - "epoch": 0.1548641219643515, + "epoch": 0.21382737303388372, "grad_norm": 0.0, - "learning_rate": 1.9193473186612988e-05, - "loss": 1.0529, + "learning_rate": 1.827954974696828e-05, + "loss": 1.3115, "step": 5465 }, { - "epoch": 0.15489245940661395, + "epoch": 0.21386649972611316, "grad_norm": 0.0, - "learning_rate": 1.9193112046500405e-05, - "loss": 1.0073, + "learning_rate": 1.8278839024151723e-05, + "loss": 1.146, "step": 5466 }, { - "epoch": 0.1549207968488764, + "epoch": 0.2139056264183426, "grad_norm": 0.0, - "learning_rate": 1.9192750828950823e-05, - "loss": 1.165, + "learning_rate": 1.827812816838739e-05, + "loss": 1.2011, "step": 5467 }, { - "epoch": 0.15494913429113888, + "epoch": 0.21394475311057204, "grad_norm": 0.0, - "learning_rate": 1.9192389533967292e-05, - "loss": 1.0884, + "learning_rate": 1.8277417179686688e-05, + "loss": 1.1286, "step": 5468 }, { - "epoch": 0.15497747173340135, + "epoch": 0.21398387980280148, "grad_norm": 0.0, - "learning_rate": 1.9192028161552848e-05, - "loss": 0.9756, + "learning_rate": 1.827670605806104e-05, + "loss": 1.107, "step": 5469 }, { - "epoch": 0.1550058091756638, + "epoch": 0.21402300649503092, "grad_norm": 0.0, - "learning_rate": 1.9191666711710538e-05, - "loss": 0.9936, + "learning_rate": 1.827599480352186e-05, + "loss": 1.2267, "step": 5470 }, { - "epoch": 0.15503414661792628, + "epoch": 0.21406213318726036, "grad_norm": 0.0, - "learning_rate": 1.9191305184443404e-05, - "loss": 1.0948, + "learning_rate": 1.8275283416080576e-05, + "loss": 1.2112, "step": 5471 }, { - "epoch": 0.15506248406018872, + "epoch": 0.21410125987948977, "grad_norm": 0.0, - "learning_rate": 1.9190943579754493e-05, - "loss": 0.966, + "learning_rate": 1.827457189574861e-05, + "loss": 1.1436, "step": 5472 }, { - "epoch": 0.15509082150245118, + "epoch": 0.21414038657171922, "grad_norm": 0.0, - "learning_rate": 1.9190581897646852e-05, - "loss": 0.9611, + "learning_rate": 1.827386024253739e-05, + "loss": 1.203, "step": 5473 }, { - "epoch": 0.15511915894471365, + "epoch": 0.21417951326394866, "grad_norm": 0.0, - "learning_rate": 1.919022013812353e-05, - "loss": 0.9972, + "learning_rate": 1.827314845645834e-05, + "loss": 1.2644, "step": 5474 }, { - "epoch": 0.1551474963869761, + "epoch": 0.2142186399561781, "grad_norm": 0.0, - "learning_rate": 1.9189858301187568e-05, - "loss": 0.9455, + "learning_rate": 1.8272436537522897e-05, + "loss": 1.2243, "step": 5475 }, { - "epoch": 0.15517583382923858, + "epoch": 0.21425776664840754, "grad_norm": 0.0, - "learning_rate": 1.9189496386842016e-05, - "loss": 1.0348, + "learning_rate": 1.8271724485742484e-05, + "loss": 1.2451, "step": 5476 }, { - "epoch": 0.15520417127150105, + "epoch": 0.21429689334063698, "grad_norm": 0.0, - "learning_rate": 1.9189134395089928e-05, - "loss": 1.0724, + "learning_rate": 1.8271012301128542e-05, + "loss": 1.1591, "step": 5477 }, { - "epoch": 0.15523250871376348, + "epoch": 0.21433602003286642, "grad_norm": 0.0, - "learning_rate": 1.9188772325934346e-05, - "loss": 1.0491, + "learning_rate": 1.8270299983692505e-05, + "loss": 1.1136, "step": 5478 }, { - "epoch": 0.15526084615602595, + "epoch": 0.21437514672509586, "grad_norm": 0.0, - "learning_rate": 1.9188410179378324e-05, - "loss": 0.9927, + "learning_rate": 1.826958753344582e-05, + "loss": 1.1514, "step": 5479 }, { - "epoch": 0.15528918359828842, + "epoch": 0.2144142734173253, "grad_norm": 0.0, - "learning_rate": 1.918804795542491e-05, - "loss": 0.9594, + "learning_rate": 1.8268874950399914e-05, + "loss": 1.1816, "step": 5480 }, { - "epoch": 0.15531752104055088, + "epoch": 0.21445340010955474, "grad_norm": 0.0, - "learning_rate": 1.9187685654077153e-05, - "loss": 1.0494, + "learning_rate": 1.826816223456624e-05, + "loss": 1.1317, "step": 5481 }, { - "epoch": 0.15534585848281335, + "epoch": 0.21449252680178418, "grad_norm": 0.0, - "learning_rate": 1.918732327533811e-05, - "loss": 1.0279, + "learning_rate": 1.8267449385956242e-05, + "loss": 1.1243, "step": 5482 }, { - "epoch": 0.15537419592507581, + "epoch": 0.21453165349401362, "grad_norm": 0.0, - "learning_rate": 1.918696081921083e-05, - "loss": 1.0206, + "learning_rate": 1.8266736404581368e-05, + "loss": 1.065, "step": 5483 }, { - "epoch": 0.15540253336733825, + "epoch": 0.21457078018624307, "grad_norm": 0.0, - "learning_rate": 1.9186598285698373e-05, - "loss": 1.0735, + "learning_rate": 1.8266023290453067e-05, + "loss": 1.1809, "step": 5484 }, { - "epoch": 0.15543087080960072, + "epoch": 0.2146099068784725, "grad_norm": 0.0, - "learning_rate": 1.918623567480378e-05, - "loss": 0.9427, + "learning_rate": 1.8265310043582786e-05, + "loss": 1.0837, "step": 5485 }, { - "epoch": 0.15545920825186318, + "epoch": 0.21464903357070192, "grad_norm": 0.0, - "learning_rate": 1.9185872986530118e-05, - "loss": 0.9503, + "learning_rate": 1.8264596663981985e-05, + "loss": 1.1622, "step": 5486 }, { - "epoch": 0.15548754569412565, + "epoch": 0.21468816026293136, "grad_norm": 0.0, - "learning_rate": 1.9185510220880438e-05, - "loss": 1.0816, + "learning_rate": 1.8263883151662116e-05, + "loss": 1.3585, "step": 5487 }, { - "epoch": 0.15551588313638812, + "epoch": 0.2147272869551608, "grad_norm": 0.0, - "learning_rate": 1.9185147377857788e-05, - "loss": 1.0654, + "learning_rate": 1.8263169506634638e-05, + "loss": 1.1392, "step": 5488 }, { - "epoch": 0.15554422057865058, + "epoch": 0.21476641364739024, "grad_norm": 0.0, - "learning_rate": 1.9184784457465238e-05, - "loss": 0.9161, + "learning_rate": 1.8262455728911015e-05, + "loss": 1.2092, "step": 5489 }, { - "epoch": 0.15557255802091302, + "epoch": 0.21480554033961968, "grad_norm": 0.0, - "learning_rate": 1.9184421459705834e-05, - "loss": 1.076, + "learning_rate": 1.8261741818502706e-05, + "loss": 1.2592, "step": 5490 }, { - "epoch": 0.1556008954631755, + "epoch": 0.21484466703184912, "grad_norm": 0.0, - "learning_rate": 1.9184058384582638e-05, - "loss": 0.9947, + "learning_rate": 1.8261027775421174e-05, + "loss": 1.0802, "step": 5491 }, { - "epoch": 0.15562923290543795, + "epoch": 0.21488379372407856, "grad_norm": 0.0, - "learning_rate": 1.9183695232098707e-05, - "loss": 1.1385, + "learning_rate": 1.826031359967789e-05, + "loss": 1.1573, "step": 5492 }, { - "epoch": 0.15565757034770042, + "epoch": 0.214922920416308, "grad_norm": 0.0, - "learning_rate": 1.91833320022571e-05, - "loss": 0.9647, + "learning_rate": 1.8259599291284318e-05, + "loss": 1.1092, "step": 5493 }, { - "epoch": 0.15568590778996289, + "epoch": 0.21496204710853745, "grad_norm": 0.0, - "learning_rate": 1.918296869506088e-05, - "loss": 1.2305, + "learning_rate": 1.8258884850251932e-05, + "loss": 1.2002, "step": 5494 }, { - "epoch": 0.15571424523222535, + "epoch": 0.2150011738007669, "grad_norm": 0.0, - "learning_rate": 1.9182605310513102e-05, - "loss": 1.001, + "learning_rate": 1.8258170276592207e-05, + "loss": 1.18, "step": 5495 }, { - "epoch": 0.1557425826744878, + "epoch": 0.21504030049299633, "grad_norm": 0.0, - "learning_rate": 1.9182241848616834e-05, - "loss": 1.1171, + "learning_rate": 1.825745557031661e-05, + "loss": 1.1275, "step": 5496 }, { - "epoch": 0.15577092011675026, + "epoch": 0.21507942718522577, "grad_norm": 0.0, - "learning_rate": 1.9181878309375128e-05, - "loss": 1.0679, + "learning_rate": 1.8256740731436627e-05, + "loss": 1.1365, "step": 5497 }, { - "epoch": 0.15579925755901272, + "epoch": 0.2151185538774552, "grad_norm": 0.0, - "learning_rate": 1.9181514692791054e-05, - "loss": 0.9892, + "learning_rate": 1.8256025759963735e-05, + "loss": 1.1805, "step": 5498 }, { - "epoch": 0.1558275950012752, + "epoch": 0.21515768056968465, "grad_norm": 0.0, - "learning_rate": 1.9181150998867674e-05, - "loss": 1.0279, + "learning_rate": 1.8255310655909414e-05, + "loss": 1.1419, "step": 5499 }, { - "epoch": 0.15585593244353765, + "epoch": 0.2151968072619141, "grad_norm": 0.0, - "learning_rate": 1.9180787227608045e-05, - "loss": 1.0259, + "learning_rate": 1.8254595419285147e-05, + "loss": 1.2194, "step": 5500 }, { - "epoch": 0.15588426988580012, + "epoch": 0.2152359339541435, "grad_norm": 0.0, - "learning_rate": 1.918042337901524e-05, - "loss": 1.0509, + "learning_rate": 1.8253880050102422e-05, + "loss": 1.2761, "step": 5501 }, { - "epoch": 0.15591260732806256, + "epoch": 0.21527506064637295, "grad_norm": 0.0, - "learning_rate": 1.918005945309232e-05, - "loss": 0.9924, + "learning_rate": 1.8253164548372726e-05, + "loss": 1.2208, "step": 5502 }, { - "epoch": 0.15594094477032502, + "epoch": 0.2153141873386024, "grad_norm": 0.0, - "learning_rate": 1.9179695449842347e-05, - "loss": 1.021, + "learning_rate": 1.825244891410755e-05, + "loss": 1.1796, "step": 5503 }, { - "epoch": 0.1559692822125875, + "epoch": 0.21535331403083183, "grad_norm": 0.0, - "learning_rate": 1.9179331369268393e-05, - "loss": 1.0956, + "learning_rate": 1.8251733147318383e-05, + "loss": 1.1328, "step": 5504 }, { - "epoch": 0.15599761965484996, + "epoch": 0.21539244072306127, "grad_norm": 0.0, - "learning_rate": 1.917896721137352e-05, - "loss": 1.0513, + "learning_rate": 1.8251017248016724e-05, + "loss": 1.1085, "step": 5505 }, { - "epoch": 0.15602595709711242, + "epoch": 0.2154315674152907, "grad_norm": 0.0, - "learning_rate": 1.91786029761608e-05, - "loss": 1.1511, + "learning_rate": 1.8250301216214067e-05, + "loss": 1.15, "step": 5506 }, { - "epoch": 0.1560542945393749, + "epoch": 0.21547069410752015, "grad_norm": 0.0, - "learning_rate": 1.91782386636333e-05, - "loss": 1.0559, + "learning_rate": 1.824958505192191e-05, + "loss": 1.1351, "step": 5507 }, { - "epoch": 0.15608263198163733, + "epoch": 0.2155098207997496, "grad_norm": 0.0, - "learning_rate": 1.9177874273794083e-05, - "loss": 1.1598, + "learning_rate": 1.8248868755151753e-05, + "loss": 1.2754, "step": 5508 }, { - "epoch": 0.1561109694238998, + "epoch": 0.21554894749197903, "grad_norm": 0.0, - "learning_rate": 1.9177509806646225e-05, - "loss": 1.0471, + "learning_rate": 1.82481523259151e-05, + "loss": 1.156, "step": 5509 }, { - "epoch": 0.15613930686616226, + "epoch": 0.21558807418420847, "grad_norm": 0.0, - "learning_rate": 1.9177145262192797e-05, - "loss": 1.0448, + "learning_rate": 1.8247435764223462e-05, + "loss": 1.1018, "step": 5510 }, { - "epoch": 0.15616764430842472, + "epoch": 0.2156272008764379, "grad_norm": 0.0, - "learning_rate": 1.917678064043686e-05, - "loss": 0.9521, + "learning_rate": 1.8246719070088335e-05, + "loss": 1.2772, "step": 5511 }, { - "epoch": 0.1561959817506872, + "epoch": 0.21566632756866735, "grad_norm": 0.0, - "learning_rate": 1.9176415941381497e-05, - "loss": 1.0328, + "learning_rate": 1.8246002243521234e-05, + "loss": 1.1613, "step": 5512 }, { - "epoch": 0.15622431919294966, + "epoch": 0.2157054542608968, "grad_norm": 0.0, - "learning_rate": 1.9176051165029774e-05, - "loss": 1.1244, + "learning_rate": 1.824528528453367e-05, + "loss": 1.096, "step": 5513 }, { - "epoch": 0.1562526566352121, + "epoch": 0.21574458095312624, "grad_norm": 0.0, - "learning_rate": 1.9175686311384763e-05, - "loss": 1.082, + "learning_rate": 1.8244568193137157e-05, + "loss": 1.2298, "step": 5514 }, { - "epoch": 0.15628099407747456, + "epoch": 0.21578370764535565, "grad_norm": 0.0, - "learning_rate": 1.917532138044954e-05, - "loss": 1.0008, + "learning_rate": 1.824385096934321e-05, + "loss": 1.2289, "step": 5515 }, { - "epoch": 0.15630933151973703, + "epoch": 0.2158228343375851, "grad_norm": 0.0, - "learning_rate": 1.917495637222718e-05, - "loss": 0.999, + "learning_rate": 1.8243133613163344e-05, + "loss": 1.2034, "step": 5516 }, { - "epoch": 0.1563376689619995, + "epoch": 0.21586196102981453, "grad_norm": 0.0, - "learning_rate": 1.9174591286720754e-05, - "loss": 1.0096, + "learning_rate": 1.8242416124609087e-05, + "loss": 1.0987, "step": 5517 }, { - "epoch": 0.15636600640426196, + "epoch": 0.21590108772204397, "grad_norm": 0.0, - "learning_rate": 1.9174226123933336e-05, - "loss": 1.0409, + "learning_rate": 1.824169850369195e-05, + "loss": 1.2416, "step": 5518 }, { - "epoch": 0.15639434384652443, + "epoch": 0.2159402144142734, "grad_norm": 0.0, - "learning_rate": 1.9173860883868008e-05, - "loss": 1.0271, + "learning_rate": 1.8240980750423465e-05, + "loss": 1.1747, "step": 5519 }, { - "epoch": 0.15642268128878686, + "epoch": 0.21597934110650285, "grad_norm": 0.0, - "learning_rate": 1.917349556652784e-05, - "loss": 1.025, + "learning_rate": 1.8240262864815152e-05, + "loss": 0.9843, "step": 5520 }, { - "epoch": 0.15645101873104933, + "epoch": 0.2160184677987323, "grad_norm": 0.0, - "learning_rate": 1.9173130171915914e-05, - "loss": 0.9559, + "learning_rate": 1.8239544846878544e-05, + "loss": 1.2889, "step": 5521 }, { - "epoch": 0.1564793561733118, + "epoch": 0.21605759449096174, "grad_norm": 0.0, - "learning_rate": 1.9172764700035308e-05, - "loss": 0.9542, + "learning_rate": 1.8238826696625175e-05, + "loss": 1.2194, "step": 5522 }, { - "epoch": 0.15650769361557426, + "epoch": 0.21609672118319118, "grad_norm": 0.0, - "learning_rate": 1.9172399150889098e-05, - "loss": 0.9872, + "learning_rate": 1.823810841406657e-05, + "loss": 1.1752, "step": 5523 }, { - "epoch": 0.15653603105783673, + "epoch": 0.21613584787542062, "grad_norm": 0.0, - "learning_rate": 1.9172033524480364e-05, - "loss": 1.0348, + "learning_rate": 1.8237389999214267e-05, + "loss": 1.1916, "step": 5524 }, { - "epoch": 0.1565643685000992, + "epoch": 0.21617497456765006, "grad_norm": 0.0, - "learning_rate": 1.9171667820812183e-05, - "loss": 0.9706, + "learning_rate": 1.8236671452079805e-05, + "loss": 1.1242, "step": 5525 }, { - "epoch": 0.15659270594236163, + "epoch": 0.2162141012598795, "grad_norm": 0.0, - "learning_rate": 1.917130203988764e-05, - "loss": 1.0136, + "learning_rate": 1.8235952772674718e-05, + "loss": 1.1077, "step": 5526 }, { - "epoch": 0.1566210433846241, + "epoch": 0.21625322795210894, "grad_norm": 0.0, - "learning_rate": 1.9170936181709812e-05, - "loss": 0.9193, + "learning_rate": 1.823523396101055e-05, + "loss": 1.2402, "step": 5527 }, { - "epoch": 0.15664938082688656, + "epoch": 0.21629235464433838, "grad_norm": 0.0, - "learning_rate": 1.9170570246281786e-05, - "loss": 0.9467, + "learning_rate": 1.8234515017098845e-05, + "loss": 1.0641, "step": 5528 }, { - "epoch": 0.15667771826914903, + "epoch": 0.2163314813365678, "grad_norm": 0.0, - "learning_rate": 1.917020423360664e-05, - "loss": 1.0235, + "learning_rate": 1.8233795940951145e-05, + "loss": 1.1788, "step": 5529 }, { - "epoch": 0.1567060557114115, + "epoch": 0.21637060802879723, "grad_norm": 0.0, - "learning_rate": 1.9169838143687462e-05, - "loss": 1.1071, + "learning_rate": 1.8233076732579e-05, + "loss": 1.1405, "step": 5530 }, { - "epoch": 0.15673439315367396, + "epoch": 0.21640973472102668, "grad_norm": 0.0, - "learning_rate": 1.9169471976527325e-05, - "loss": 1.0166, + "learning_rate": 1.823235739199396e-05, + "loss": 1.033, "step": 5531 }, { - "epoch": 0.1567627305959364, + "epoch": 0.21644886141325612, "grad_norm": 0.0, - "learning_rate": 1.9169105732129326e-05, - "loss": 1.0003, + "learning_rate": 1.8231637919207576e-05, + "loss": 1.2212, "step": 5532 }, { - "epoch": 0.15679106803819887, + "epoch": 0.21648798810548556, "grad_norm": 0.0, - "learning_rate": 1.9168739410496546e-05, - "loss": 1.011, + "learning_rate": 1.8230918314231406e-05, + "loss": 1.1176, "step": 5533 }, { - "epoch": 0.15681940548046133, + "epoch": 0.216527114797715, "grad_norm": 0.0, - "learning_rate": 1.9168373011632063e-05, - "loss": 0.9784, + "learning_rate": 1.8230198577076996e-05, + "loss": 1.1545, "step": 5534 }, { - "epoch": 0.1568477429227238, + "epoch": 0.21656624148994444, "grad_norm": 0.0, - "learning_rate": 1.9168006535538973e-05, - "loss": 1.0699, + "learning_rate": 1.822947870775591e-05, + "loss": 1.0468, "step": 5535 }, { - "epoch": 0.15687608036498626, + "epoch": 0.21660536818217388, "grad_norm": 0.0, - "learning_rate": 1.916763998222036e-05, - "loss": 1.073, + "learning_rate": 1.822875870627971e-05, + "loss": 1.1628, "step": 5536 }, { - "epoch": 0.15690441780724873, + "epoch": 0.21664449487440332, "grad_norm": 0.0, - "learning_rate": 1.9167273351679313e-05, - "loss": 1.0061, + "learning_rate": 1.8228038572659958e-05, + "loss": 1.2669, "step": 5537 }, { - "epoch": 0.15693275524951117, + "epoch": 0.21668362156663276, "grad_norm": 0.0, - "learning_rate": 1.9166906643918913e-05, - "loss": 1.0499, + "learning_rate": 1.8227318306908216e-05, + "loss": 1.0937, "step": 5538 }, { - "epoch": 0.15696109269177363, + "epoch": 0.2167227482588622, "grad_norm": 0.0, - "learning_rate": 1.9166539858942258e-05, - "loss": 1.0955, + "learning_rate": 1.8226597909036048e-05, + "loss": 1.1727, "step": 5539 }, { - "epoch": 0.1569894301340361, + "epoch": 0.21676187495109164, "grad_norm": 0.0, - "learning_rate": 1.9166172996752434e-05, - "loss": 1.0278, + "learning_rate": 1.822587737905503e-05, + "loss": 1.0814, "step": 5540 }, { - "epoch": 0.15701776757629857, + "epoch": 0.21680100164332108, "grad_norm": 0.0, - "learning_rate": 1.9165806057352528e-05, - "loss": 0.9838, + "learning_rate": 1.822515671697673e-05, + "loss": 1.1798, "step": 5541 }, { - "epoch": 0.15704610501856103, + "epoch": 0.21684012833555052, "grad_norm": 0.0, - "learning_rate": 1.9165439040745637e-05, - "loss": 1.0762, + "learning_rate": 1.8224435922812716e-05, + "loss": 1.1748, "step": 5542 }, { - "epoch": 0.1570744424608235, + "epoch": 0.21687925502777994, "grad_norm": 0.0, - "learning_rate": 1.9165071946934847e-05, - "loss": 0.9809, + "learning_rate": 1.8223714996574566e-05, + "loss": 1.0745, "step": 5543 }, { - "epoch": 0.15710277990308594, + "epoch": 0.21691838172000938, "grad_norm": 0.0, - "learning_rate": 1.9164704775923258e-05, - "loss": 1.0854, + "learning_rate": 1.822299393827386e-05, + "loss": 1.2459, "step": 5544 }, { - "epoch": 0.1571311173453484, + "epoch": 0.21695750841223882, "grad_norm": 0.0, - "learning_rate": 1.916433752771395e-05, - "loss": 0.9601, + "learning_rate": 1.8222272747922178e-05, + "loss": 1.1013, "step": 5545 }, { - "epoch": 0.15715945478761087, + "epoch": 0.21699663510446826, "grad_norm": 0.0, - "learning_rate": 1.916397020231003e-05, - "loss": 0.926, + "learning_rate": 1.8221551425531095e-05, + "loss": 1.1343, "step": 5546 }, { - "epoch": 0.15718779222987334, + "epoch": 0.2170357617966977, "grad_norm": 0.0, - "learning_rate": 1.9163602799714583e-05, - "loss": 1.0041, + "learning_rate": 1.82208299711122e-05, + "loss": 1.2205, "step": 5547 }, { - "epoch": 0.1572161296721358, + "epoch": 0.21707488848892714, "grad_norm": 0.0, - "learning_rate": 1.9163235319930706e-05, - "loss": 1.1034, + "learning_rate": 1.8220108384677076e-05, + "loss": 1.1807, "step": 5548 }, { - "epoch": 0.15724446711439827, + "epoch": 0.21711401518115658, "grad_norm": 0.0, - "learning_rate": 1.9162867762961497e-05, - "loss": 1.1148, + "learning_rate": 1.8219386666237314e-05, + "loss": 1.0995, "step": 5549 }, { - "epoch": 0.1572728045566607, + "epoch": 0.21715314187338602, "grad_norm": 0.0, - "learning_rate": 1.916250012881005e-05, - "loss": 1.1121, + "learning_rate": 1.82186648158045e-05, + "loss": 1.113, "step": 5550 }, { - "epoch": 0.15730114199892317, + "epoch": 0.21719226856561547, "grad_norm": 0.0, - "learning_rate": 1.916213241747946e-05, - "loss": 1.0874, + "learning_rate": 1.8217942833390227e-05, + "loss": 1.3029, "step": 5551 }, { - "epoch": 0.15732947944118564, + "epoch": 0.2172313952578449, "grad_norm": 0.0, - "learning_rate": 1.916176462897283e-05, - "loss": 0.9816, + "learning_rate": 1.821722071900609e-05, + "loss": 1.1521, "step": 5552 }, { - "epoch": 0.1573578168834481, + "epoch": 0.21727052195007435, "grad_norm": 0.0, - "learning_rate": 1.9161396763293252e-05, - "loss": 0.9885, + "learning_rate": 1.8216498472663685e-05, + "loss": 1.1888, "step": 5553 }, { - "epoch": 0.15738615432571057, + "epoch": 0.2173096486423038, "grad_norm": 0.0, - "learning_rate": 1.916102882044383e-05, - "loss": 0.9675, + "learning_rate": 1.8215776094374612e-05, + "loss": 1.1631, "step": 5554 }, { - "epoch": 0.15741449176797304, + "epoch": 0.21734877533453323, "grad_norm": 0.0, - "learning_rate": 1.9160660800427658e-05, - "loss": 0.9852, + "learning_rate": 1.8215053584150467e-05, + "loss": 1.1355, "step": 5555 }, { - "epoch": 0.15744282921023547, + "epoch": 0.21738790202676267, "grad_norm": 0.0, - "learning_rate": 1.9160292703247836e-05, - "loss": 1.0324, + "learning_rate": 1.8214330942002855e-05, + "loss": 1.1631, "step": 5556 }, { - "epoch": 0.15747116665249794, + "epoch": 0.2174270287189921, "grad_norm": 0.0, - "learning_rate": 1.915992452890747e-05, - "loss": 1.0515, + "learning_rate": 1.8213608167943384e-05, + "loss": 1.2104, "step": 5557 }, { - "epoch": 0.1574995040947604, + "epoch": 0.21746615541122152, "grad_norm": 0.0, - "learning_rate": 1.9159556277409658e-05, - "loss": 0.9077, + "learning_rate": 1.8212885261983657e-05, + "loss": 1.1777, "step": 5558 }, { - "epoch": 0.15752784153702287, + "epoch": 0.21750528210345096, "grad_norm": 0.0, - "learning_rate": 1.9159187948757503e-05, - "loss": 0.9853, + "learning_rate": 1.8212162224135283e-05, + "loss": 1.2266, "step": 5559 }, { - "epoch": 0.15755617897928534, + "epoch": 0.2175444087956804, "grad_norm": 0.0, - "learning_rate": 1.9158819542954105e-05, - "loss": 1.0255, + "learning_rate": 1.821143905440988e-05, + "loss": 1.1328, "step": 5560 }, { - "epoch": 0.1575845164215478, + "epoch": 0.21758353548790985, "grad_norm": 0.0, - "learning_rate": 1.9158451060002566e-05, - "loss": 0.853, + "learning_rate": 1.8210715752819047e-05, + "loss": 1.1327, "step": 5561 }, { - "epoch": 0.15761285386381024, + "epoch": 0.2176226621801393, "grad_norm": 0.0, - "learning_rate": 1.9158082499906e-05, - "loss": 0.9817, + "learning_rate": 1.820999231937441e-05, + "loss": 1.007, "step": 5562 }, { - "epoch": 0.1576411913060727, + "epoch": 0.21766178887236873, "grad_norm": 0.0, - "learning_rate": 1.91577138626675e-05, - "loss": 1.026, + "learning_rate": 1.8209268754087586e-05, + "loss": 1.1601, "step": 5563 }, { - "epoch": 0.15766952874833517, + "epoch": 0.21770091556459817, "grad_norm": 0.0, - "learning_rate": 1.9157345148290173e-05, - "loss": 1.0953, + "learning_rate": 1.8208545056970193e-05, + "loss": 1.2724, "step": 5564 }, { - "epoch": 0.15769786619059764, + "epoch": 0.2177400422568276, "grad_norm": 0.0, - "learning_rate": 1.9156976356777132e-05, - "loss": 0.9912, + "learning_rate": 1.820782122803385e-05, + "loss": 1.266, "step": 5565 }, { - "epoch": 0.1577262036328601, + "epoch": 0.21777916894905705, "grad_norm": 0.0, - "learning_rate": 1.915660748813148e-05, - "loss": 0.9261, + "learning_rate": 1.8207097267290183e-05, + "loss": 1.0701, "step": 5566 }, { - "epoch": 0.15775454107512257, + "epoch": 0.2178182956412865, "grad_norm": 0.0, - "learning_rate": 1.915623854235632e-05, - "loss": 1.1058, + "learning_rate": 1.8206373174750824e-05, + "loss": 1.1523, "step": 5567 }, { - "epoch": 0.157782878517385, + "epoch": 0.21785742233351593, "grad_norm": 0.0, - "learning_rate": 1.9155869519454762e-05, - "loss": 1.0601, + "learning_rate": 1.8205648950427388e-05, + "loss": 1.2166, "step": 5568 }, { - "epoch": 0.15781121595964748, + "epoch": 0.21789654902574537, "grad_norm": 0.0, - "learning_rate": 1.9155500419429916e-05, - "loss": 1.0006, + "learning_rate": 1.8204924594331514e-05, + "loss": 1.1706, "step": 5569 }, { - "epoch": 0.15783955340190994, + "epoch": 0.2179356757179748, "grad_norm": 0.0, - "learning_rate": 1.9155131242284888e-05, - "loss": 1.0842, + "learning_rate": 1.8204200106474834e-05, + "loss": 1.191, "step": 5570 }, { - "epoch": 0.1578678908441724, + "epoch": 0.21797480241020425, "grad_norm": 0.0, - "learning_rate": 1.9154761988022793e-05, - "loss": 1.1093, + "learning_rate": 1.820347548686898e-05, + "loss": 1.1541, "step": 5571 }, { - "epoch": 0.15789622828643488, + "epoch": 0.21801392910243367, "grad_norm": 0.0, - "learning_rate": 1.9154392656646736e-05, - "loss": 1.0647, + "learning_rate": 1.8202750735525584e-05, + "loss": 1.0621, "step": 5572 }, { - "epoch": 0.15792456572869734, + "epoch": 0.2180530557946631, "grad_norm": 0.0, - "learning_rate": 1.9154023248159833e-05, - "loss": 1.0285, + "learning_rate": 1.8202025852456294e-05, + "loss": 1.1832, "step": 5573 }, { - "epoch": 0.15795290317095978, + "epoch": 0.21809218248689255, "grad_norm": 0.0, - "learning_rate": 1.915365376256519e-05, - "loss": 1.1139, + "learning_rate": 1.820130083767275e-05, + "loss": 1.2719, "step": 5574 }, { - "epoch": 0.15798124061322225, + "epoch": 0.218131309179122, "grad_norm": 0.0, - "learning_rate": 1.9153284199865926e-05, - "loss": 1.0099, + "learning_rate": 1.8200575691186588e-05, + "loss": 1.2143, "step": 5575 }, { - "epoch": 0.1580095780554847, + "epoch": 0.21817043587135143, "grad_norm": 0.0, - "learning_rate": 1.9152914560065146e-05, - "loss": 1.0744, + "learning_rate": 1.8199850413009455e-05, + "loss": 1.256, "step": 5576 }, { - "epoch": 0.15803791549774718, + "epoch": 0.21820956256358087, "grad_norm": 0.0, - "learning_rate": 1.9152544843165968e-05, - "loss": 1.0511, + "learning_rate": 1.8199125003153e-05, + "loss": 1.4114, "step": 5577 }, { - "epoch": 0.15806625294000964, + "epoch": 0.2182486892558103, "grad_norm": 0.0, - "learning_rate": 1.9152175049171507e-05, - "loss": 0.9361, + "learning_rate": 1.8198399461628873e-05, + "loss": 1.2153, "step": 5578 }, { - "epoch": 0.1580945903822721, + "epoch": 0.21828781594803975, "grad_norm": 0.0, - "learning_rate": 1.9151805178084878e-05, - "loss": 0.9895, + "learning_rate": 1.8197673788448723e-05, + "loss": 1.0945, "step": 5579 }, { - "epoch": 0.15812292782453455, + "epoch": 0.2183269426402692, "grad_norm": 0.0, - "learning_rate": 1.9151435229909197e-05, - "loss": 0.987, + "learning_rate": 1.8196947983624207e-05, + "loss": 1.1382, "step": 5580 }, { - "epoch": 0.15815126526679701, + "epoch": 0.21836606933249864, "grad_norm": 0.0, - "learning_rate": 1.9151065204647576e-05, - "loss": 1.1356, + "learning_rate": 1.8196222047166973e-05, + "loss": 1.0108, "step": 5581 }, { - "epoch": 0.15817960270905948, + "epoch": 0.21840519602472808, "grad_norm": 0.0, - "learning_rate": 1.9150695102303138e-05, - "loss": 0.908, + "learning_rate": 1.8195495979088686e-05, + "loss": 1.0105, "step": 5582 }, { - "epoch": 0.15820794015132195, + "epoch": 0.21844432271695752, "grad_norm": 0.0, - "learning_rate": 1.9150324922878992e-05, - "loss": 1.1281, + "learning_rate": 1.8194769779401004e-05, + "loss": 1.1564, "step": 5583 }, { - "epoch": 0.1582362775935844, + "epoch": 0.21848344940918696, "grad_norm": 0.0, - "learning_rate": 1.9149954666378264e-05, - "loss": 1.0842, + "learning_rate": 1.8194043448115584e-05, + "loss": 1.2119, "step": 5584 }, { - "epoch": 0.15826461503584685, + "epoch": 0.2185225761014164, "grad_norm": 0.0, - "learning_rate": 1.914958433280407e-05, - "loss": 1.0615, + "learning_rate": 1.8193316985244097e-05, + "loss": 1.1299, "step": 5585 }, { - "epoch": 0.15829295247810932, + "epoch": 0.2185617027936458, "grad_norm": 0.0, - "learning_rate": 1.914921392215953e-05, - "loss": 1.0398, + "learning_rate": 1.8192590390798205e-05, + "loss": 1.1947, "step": 5586 }, { - "epoch": 0.15832128992037178, + "epoch": 0.21860082948587525, "grad_norm": 0.0, - "learning_rate": 1.9148843434447762e-05, - "loss": 0.95, + "learning_rate": 1.819186366478958e-05, + "loss": 1.1915, "step": 5587 }, { - "epoch": 0.15834962736263425, + "epoch": 0.2186399561781047, "grad_norm": 0.0, - "learning_rate": 1.914847286967189e-05, - "loss": 1.0889, + "learning_rate": 1.8191136807229888e-05, + "loss": 1.1701, "step": 5588 }, { - "epoch": 0.15837796480489671, + "epoch": 0.21867908287033413, "grad_norm": 0.0, - "learning_rate": 1.9148102227835033e-05, - "loss": 1.0088, + "learning_rate": 1.81904098181308e-05, + "loss": 1.1154, "step": 5589 }, { - "epoch": 0.15840630224715918, + "epoch": 0.21871820956256358, "grad_norm": 0.0, - "learning_rate": 1.9147731508940313e-05, - "loss": 1.0147, + "learning_rate": 1.8189682697504e-05, + "loss": 1.199, "step": 5590 }, { - "epoch": 0.15843463968942162, + "epoch": 0.21875733625479302, "grad_norm": 0.0, - "learning_rate": 1.9147360712990857e-05, - "loss": 1.1193, + "learning_rate": 1.818895544536115e-05, + "loss": 1.1815, "step": 5591 }, { - "epoch": 0.15846297713168409, + "epoch": 0.21879646294702246, "grad_norm": 0.0, - "learning_rate": 1.9146989839989785e-05, - "loss": 1.0368, + "learning_rate": 1.8188228061713943e-05, + "loss": 1.1929, "step": 5592 }, { - "epoch": 0.15849131457394655, + "epoch": 0.2188355896392519, "grad_norm": 0.0, - "learning_rate": 1.9146618889940218e-05, - "loss": 0.9675, + "learning_rate": 1.8187500546574052e-05, + "loss": 1.1071, "step": 5593 }, { - "epoch": 0.15851965201620902, + "epoch": 0.21887471633148134, "grad_norm": 0.0, - "learning_rate": 1.9146247862845282e-05, - "loss": 1.0591, + "learning_rate": 1.818677289995316e-05, + "loss": 1.1788, "step": 5594 }, { - "epoch": 0.15854798945847148, + "epoch": 0.21891384302371078, "grad_norm": 0.0, - "learning_rate": 1.9145876758708106e-05, - "loss": 0.9498, + "learning_rate": 1.8186045121862953e-05, + "loss": 1.0587, "step": 5595 }, { - "epoch": 0.15857632690073395, + "epoch": 0.21895296971594022, "grad_norm": 0.0, - "learning_rate": 1.9145505577531816e-05, - "loss": 0.915, + "learning_rate": 1.818531721231512e-05, + "loss": 1.11, "step": 5596 }, { - "epoch": 0.1586046643429964, + "epoch": 0.21899209640816966, "grad_norm": 0.0, - "learning_rate": 1.9145134319319533e-05, - "loss": 1.0729, + "learning_rate": 1.8184589171321353e-05, + "loss": 1.0551, "step": 5597 }, { - "epoch": 0.15863300178525885, + "epoch": 0.2190312231003991, "grad_norm": 0.0, - "learning_rate": 1.914476298407439e-05, - "loss": 1.0662, + "learning_rate": 1.818386099889334e-05, + "loss": 1.018, "step": 5598 }, { - "epoch": 0.15866133922752132, + "epoch": 0.21907034979262854, "grad_norm": 0.0, - "learning_rate": 1.9144391571799513e-05, - "loss": 0.9565, + "learning_rate": 1.8183132695042768e-05, + "loss": 0.993, "step": 5599 }, { - "epoch": 0.15868967666978379, + "epoch": 0.21910947648485796, "grad_norm": 0.0, - "learning_rate": 1.9144020082498027e-05, - "loss": 0.963, + "learning_rate": 1.8182404259781344e-05, + "loss": 1.1147, "step": 5600 }, { - "epoch": 0.15871801411204625, + "epoch": 0.2191486031770874, "grad_norm": 0.0, - "learning_rate": 1.9143648516173064e-05, - "loss": 1.0202, + "learning_rate": 1.8181675693120756e-05, + "loss": 1.2205, "step": 5601 }, { - "epoch": 0.15874635155430872, + "epoch": 0.21918772986931684, "grad_norm": 0.0, - "learning_rate": 1.9143276872827758e-05, - "loss": 0.9678, + "learning_rate": 1.8180946995072714e-05, + "loss": 1.1599, "step": 5602 }, { - "epoch": 0.15877468899657116, + "epoch": 0.21922685656154628, "grad_norm": 0.0, - "learning_rate": 1.9142905152465236e-05, - "loss": 1.0597, + "learning_rate": 1.8180218165648913e-05, + "loss": 1.1646, "step": 5603 }, { - "epoch": 0.15880302643883362, + "epoch": 0.21926598325377572, "grad_norm": 0.0, - "learning_rate": 1.9142533355088628e-05, - "loss": 1.0564, + "learning_rate": 1.8179489204861055e-05, + "loss": 1.2142, "step": 5604 }, { - "epoch": 0.1588313638810961, + "epoch": 0.21930510994600516, "grad_norm": 0.0, - "learning_rate": 1.914216148070106e-05, - "loss": 1.0712, + "learning_rate": 1.8178760112720854e-05, + "loss": 1.2047, "step": 5605 }, { - "epoch": 0.15885970132335855, + "epoch": 0.2193442366382346, "grad_norm": 0.0, - "learning_rate": 1.9141789529305678e-05, - "loss": 0.9436, + "learning_rate": 1.8178030889240013e-05, + "loss": 1.3321, "step": 5606 }, { - "epoch": 0.15888803876562102, + "epoch": 0.21938336333046404, "grad_norm": 0.0, - "learning_rate": 1.9141417500905604e-05, - "loss": 1.0045, + "learning_rate": 1.8177301534430243e-05, + "loss": 1.1721, "step": 5607 }, { - "epoch": 0.1589163762078835, + "epoch": 0.21942249002269348, "grad_norm": 0.0, - "learning_rate": 1.9141045395503978e-05, - "loss": 1.0218, + "learning_rate": 1.8176572048303258e-05, + "loss": 1.2519, "step": 5608 }, { - "epoch": 0.15894471365014592, + "epoch": 0.21946161671492292, "grad_norm": 0.0, - "learning_rate": 1.9140673213103932e-05, - "loss": 0.9695, + "learning_rate": 1.8175842430870774e-05, + "loss": 1.0932, "step": 5609 }, { - "epoch": 0.1589730510924084, + "epoch": 0.21950074340715237, "grad_norm": 0.0, - "learning_rate": 1.9140300953708602e-05, - "loss": 1.0079, + "learning_rate": 1.81751126821445e-05, + "loss": 1.1035, "step": 5610 }, { - "epoch": 0.15900138853467086, + "epoch": 0.2195398700993818, "grad_norm": 0.0, - "learning_rate": 1.9139928617321125e-05, - "loss": 1.0396, + "learning_rate": 1.8174382802136167e-05, + "loss": 1.132, "step": 5611 }, { - "epoch": 0.15902972597693332, + "epoch": 0.21957899679161125, "grad_norm": 0.0, - "learning_rate": 1.913955620394463e-05, - "loss": 1.0216, + "learning_rate": 1.8173652790857482e-05, + "loss": 1.0948, "step": 5612 }, { - "epoch": 0.1590580634191958, + "epoch": 0.2196181234838407, "grad_norm": 0.0, - "learning_rate": 1.9139183713582264e-05, - "loss": 0.9861, + "learning_rate": 1.817292264832018e-05, + "loss": 1.1893, "step": 5613 }, { - "epoch": 0.15908640086145825, + "epoch": 0.2196572501760701, "grad_norm": 0.0, - "learning_rate": 1.9138811146237156e-05, - "loss": 1.0739, + "learning_rate": 1.817219237453598e-05, + "loss": 1.152, "step": 5614 }, { - "epoch": 0.1591147383037207, + "epoch": 0.21969637686829954, "grad_norm": 0.0, - "learning_rate": 1.913843850191245e-05, - "loss": 1.0687, + "learning_rate": 1.8171461969516612e-05, + "loss": 1.303, "step": 5615 }, { - "epoch": 0.15914307574598316, + "epoch": 0.21973550356052898, "grad_norm": 0.0, - "learning_rate": 1.9138065780611283e-05, - "loss": 1.0704, + "learning_rate": 1.8170731433273802e-05, + "loss": 1.2507, "step": 5616 }, { - "epoch": 0.15917141318824563, + "epoch": 0.21977463025275842, "grad_norm": 0.0, - "learning_rate": 1.9137692982336794e-05, - "loss": 0.955, + "learning_rate": 1.8170000765819283e-05, + "loss": 1.2578, "step": 5617 }, { - "epoch": 0.1591997506305081, + "epoch": 0.21981375694498786, "grad_norm": 0.0, - "learning_rate": 1.9137320107092122e-05, - "loss": 1.0774, + "learning_rate": 1.8169269967164792e-05, + "loss": 1.1086, "step": 5618 }, { - "epoch": 0.15922808807277056, + "epoch": 0.2198528836372173, "grad_norm": 0.0, - "learning_rate": 1.9136947154880413e-05, - "loss": 0.9783, + "learning_rate": 1.816853903732206e-05, + "loss": 1.3674, "step": 5619 }, { - "epoch": 0.15925642551503302, + "epoch": 0.21989201032944675, "grad_norm": 0.0, - "learning_rate": 1.9136574125704807e-05, - "loss": 1.013, + "learning_rate": 1.8167807976302828e-05, + "loss": 1.0291, "step": 5620 }, { - "epoch": 0.15928476295729546, + "epoch": 0.2199311370216762, "grad_norm": 0.0, - "learning_rate": 1.9136201019568437e-05, - "loss": 1.0444, + "learning_rate": 1.816707678411884e-05, + "loss": 1.1287, "step": 5621 }, { - "epoch": 0.15931310039955793, + "epoch": 0.21997026371390563, "grad_norm": 0.0, - "learning_rate": 1.9135827836474463e-05, - "loss": 1.109, + "learning_rate": 1.8166345460781826e-05, + "loss": 1.1454, "step": 5622 }, { - "epoch": 0.1593414378418204, + "epoch": 0.22000939040613507, "grad_norm": 0.0, - "learning_rate": 1.913545457642601e-05, - "loss": 1.0259, + "learning_rate": 1.8165614006303537e-05, + "loss": 1.1345, "step": 5623 }, { - "epoch": 0.15936977528408286, + "epoch": 0.2200485170983645, "grad_norm": 0.0, - "learning_rate": 1.9135081239426233e-05, - "loss": 0.8654, + "learning_rate": 1.816488242069572e-05, + "loss": 1.1262, "step": 5624 }, { - "epoch": 0.15939811272634533, + "epoch": 0.22008764379059395, "grad_norm": 0.0, - "learning_rate": 1.9134707825478277e-05, - "loss": 1.0417, + "learning_rate": 1.8164150703970124e-05, + "loss": 1.0819, "step": 5625 }, { - "epoch": 0.1594264501686078, + "epoch": 0.2201267704828234, "grad_norm": 0.0, - "learning_rate": 1.9134334334585282e-05, - "loss": 0.967, + "learning_rate": 1.8163418856138496e-05, + "loss": 1.0975, "step": 5626 }, { - "epoch": 0.15945478761087023, + "epoch": 0.22016589717505283, "grad_norm": 0.0, - "learning_rate": 1.91339607667504e-05, - "loss": 1.1558, + "learning_rate": 1.8162686877212592e-05, + "loss": 1.1521, "step": 5627 }, { - "epoch": 0.1594831250531327, + "epoch": 0.22020502386728227, "grad_norm": 0.0, - "learning_rate": 1.913358712197677e-05, - "loss": 1.0399, + "learning_rate": 1.8161954767204165e-05, + "loss": 1.1283, "step": 5628 }, { - "epoch": 0.15951146249539516, + "epoch": 0.2202441505595117, "grad_norm": 0.0, - "learning_rate": 1.9133213400267548e-05, - "loss": 0.9448, + "learning_rate": 1.816122252612497e-05, + "loss": 1.2858, "step": 5629 }, { - "epoch": 0.15953979993765763, + "epoch": 0.22028327725174113, "grad_norm": 0.0, - "learning_rate": 1.9132839601625877e-05, - "loss": 1.1023, + "learning_rate": 1.816049015398677e-05, + "loss": 1.0851, "step": 5630 }, { - "epoch": 0.1595681373799201, + "epoch": 0.22032240394397057, "grad_norm": 0.0, - "learning_rate": 1.913246572605491e-05, - "loss": 1.0424, + "learning_rate": 1.8159757650801323e-05, + "loss": 1.0947, "step": 5631 }, { - "epoch": 0.15959647482218256, + "epoch": 0.2203615306362, "grad_norm": 0.0, - "learning_rate": 1.9132091773557787e-05, - "loss": 1.0316, + "learning_rate": 1.8159025016580394e-05, + "loss": 1.2516, "step": 5632 }, { - "epoch": 0.159624812264445, + "epoch": 0.22040065732842945, "grad_norm": 0.0, - "learning_rate": 1.9131717744137667e-05, - "loss": 1.0589, + "learning_rate": 1.8158292251335746e-05, + "loss": 1.1412, "step": 5633 }, { - "epoch": 0.15965314970670746, + "epoch": 0.2204397840206589, "grad_norm": 0.0, - "learning_rate": 1.9131343637797695e-05, - "loss": 1.0898, + "learning_rate": 1.8157559355079147e-05, + "loss": 1.2073, "step": 5634 }, { - "epoch": 0.15968148714896993, + "epoch": 0.22047891071288833, "grad_norm": 0.0, - "learning_rate": 1.9130969454541026e-05, - "loss": 1.1714, + "learning_rate": 1.8156826327822367e-05, + "loss": 1.266, "step": 5635 }, { - "epoch": 0.1597098245912324, + "epoch": 0.22051803740511777, "grad_norm": 0.0, - "learning_rate": 1.913059519437081e-05, - "loss": 1.0185, + "learning_rate": 1.8156093169577177e-05, + "loss": 1.0803, "step": 5636 }, { - "epoch": 0.15973816203349486, + "epoch": 0.2205571640973472, "grad_norm": 0.0, - "learning_rate": 1.91302208572902e-05, - "loss": 1.0181, + "learning_rate": 1.8155359880355352e-05, + "loss": 1.1016, "step": 5637 }, { - "epoch": 0.15976649947575733, + "epoch": 0.22059629078957665, "grad_norm": 0.0, - "learning_rate": 1.9129846443302354e-05, - "loss": 1.0666, + "learning_rate": 1.815462646016867e-05, + "loss": 0.9656, "step": 5638 }, { - "epoch": 0.15979483691801977, + "epoch": 0.2206354174818061, "grad_norm": 0.0, - "learning_rate": 1.9129471952410417e-05, - "loss": 0.9843, + "learning_rate": 1.81538929090289e-05, + "loss": 1.0507, "step": 5639 }, { - "epoch": 0.15982317436028223, + "epoch": 0.22067454417403554, "grad_norm": 0.0, - "learning_rate": 1.912909738461755e-05, - "loss": 1.0715, + "learning_rate": 1.8153159226947827e-05, + "loss": 1.1124, "step": 5640 }, { - "epoch": 0.1598515118025447, + "epoch": 0.22071367086626498, "grad_norm": 0.0, - "learning_rate": 1.91287227399269e-05, - "loss": 1.1478, + "learning_rate": 1.8152425413937233e-05, + "loss": 1.298, "step": 5641 }, { - "epoch": 0.15987984924480717, + "epoch": 0.22075279755849442, "grad_norm": 0.0, - "learning_rate": 1.9128348018341634e-05, - "loss": 1.1121, + "learning_rate": 1.8151691470008906e-05, + "loss": 1.1498, "step": 5642 }, { - "epoch": 0.15990818668706963, + "epoch": 0.22079192425072383, "grad_norm": 0.0, - "learning_rate": 1.91279732198649e-05, - "loss": 0.9633, + "learning_rate": 1.8150957395174628e-05, + "loss": 1.1945, "step": 5643 }, { - "epoch": 0.1599365241293321, + "epoch": 0.22083105094295327, "grad_norm": 0.0, - "learning_rate": 1.912759834449986e-05, - "loss": 1.0357, + "learning_rate": 1.8150223189446184e-05, + "loss": 1.1793, "step": 5644 }, { - "epoch": 0.15996486157159454, + "epoch": 0.2208701776351827, "grad_norm": 0.0, - "learning_rate": 1.912722339224967e-05, - "loss": 1.0605, + "learning_rate": 1.814948885283537e-05, + "loss": 1.151, "step": 5645 }, { - "epoch": 0.159993199013857, + "epoch": 0.22090930432741215, "grad_norm": 0.0, - "learning_rate": 1.9126848363117487e-05, - "loss": 0.9487, + "learning_rate": 1.8148754385353982e-05, + "loss": 1.1034, "step": 5646 }, { - "epoch": 0.16002153645611947, + "epoch": 0.2209484310196416, "grad_norm": 0.0, - "learning_rate": 1.9126473257106473e-05, - "loss": 0.9675, + "learning_rate": 1.8148019787013804e-05, + "loss": 1.121, "step": 5647 }, { - "epoch": 0.16004987389838193, + "epoch": 0.22098755771187104, "grad_norm": 0.0, - "learning_rate": 1.9126098074219782e-05, - "loss": 1.0866, + "learning_rate": 1.814728505782664e-05, + "loss": 1.0861, "step": 5648 }, { - "epoch": 0.1600782113406444, + "epoch": 0.22102668440410048, "grad_norm": 0.0, - "learning_rate": 1.9125722814460582e-05, - "loss": 1.0653, + "learning_rate": 1.8146550197804287e-05, + "loss": 1.0995, "step": 5649 }, { - "epoch": 0.16010654878290687, + "epoch": 0.22106581109632992, "grad_norm": 0.0, - "learning_rate": 1.9125347477832024e-05, - "loss": 0.9451, + "learning_rate": 1.8145815206958544e-05, + "loss": 1.2747, "step": 5650 }, { - "epoch": 0.1601348862251693, + "epoch": 0.22110493778855936, "grad_norm": 0.0, - "learning_rate": 1.9124972064337283e-05, - "loss": 1.0361, + "learning_rate": 1.814508008530122e-05, + "loss": 1.1134, "step": 5651 }, { - "epoch": 0.16016322366743177, + "epoch": 0.2211440644807888, "grad_norm": 0.0, - "learning_rate": 1.912459657397951e-05, - "loss": 1.0349, + "learning_rate": 1.8144344832844114e-05, + "loss": 1.1548, "step": 5652 }, { - "epoch": 0.16019156110969424, + "epoch": 0.22118319117301824, "grad_norm": 0.0, - "learning_rate": 1.912422100676187e-05, - "loss": 0.9184, + "learning_rate": 1.8143609449599035e-05, + "loss": 1.0919, "step": 5653 }, { - "epoch": 0.1602198985519567, + "epoch": 0.22122231786524768, "grad_norm": 0.0, - "learning_rate": 1.912384536268753e-05, - "loss": 0.9487, + "learning_rate": 1.8142873935577795e-05, + "loss": 1.2157, "step": 5654 }, { - "epoch": 0.16024823599421917, + "epoch": 0.22126144455747712, "grad_norm": 0.0, - "learning_rate": 1.912346964175965e-05, - "loss": 1.0714, + "learning_rate": 1.8142138290792202e-05, + "loss": 1.2334, "step": 5655 }, { - "epoch": 0.16027657343648163, + "epoch": 0.22130057124970656, "grad_norm": 0.0, - "learning_rate": 1.9123093843981403e-05, - "loss": 0.8867, + "learning_rate": 1.814140251525407e-05, + "loss": 1.0992, "step": 5656 }, { - "epoch": 0.16030491087874407, + "epoch": 0.22133969794193598, "grad_norm": 0.0, - "learning_rate": 1.9122717969355945e-05, - "loss": 1.0857, + "learning_rate": 1.8140666608975216e-05, + "loss": 1.1585, "step": 5657 }, { - "epoch": 0.16033324832100654, + "epoch": 0.22137882463416542, "grad_norm": 0.0, - "learning_rate": 1.912234201788645e-05, - "loss": 0.9776, + "learning_rate": 1.8139930571967455e-05, + "loss": 1.2488, "step": 5658 }, { - "epoch": 0.160361585763269, + "epoch": 0.22141795132639486, "grad_norm": 0.0, - "learning_rate": 1.9121965989576075e-05, - "loss": 1.0491, + "learning_rate": 1.8139194404242613e-05, + "loss": 1.2367, "step": 5659 }, { - "epoch": 0.16038992320553147, + "epoch": 0.2214570780186243, "grad_norm": 0.0, - "learning_rate": 1.9121589884427995e-05, - "loss": 1.161, + "learning_rate": 1.8138458105812505e-05, + "loss": 1.2625, "step": 5660 }, { - "epoch": 0.16041826064779394, + "epoch": 0.22149620471085374, "grad_norm": 0.0, - "learning_rate": 1.9121213702445377e-05, - "loss": 1.0666, + "learning_rate": 1.8137721676688958e-05, + "loss": 1.2211, "step": 5661 }, { - "epoch": 0.1604465980900564, + "epoch": 0.22153533140308318, "grad_norm": 0.0, - "learning_rate": 1.9120837443631388e-05, - "loss": 1.0264, + "learning_rate": 1.81369851168838e-05, + "loss": 1.0512, "step": 5662 }, { - "epoch": 0.16047493553231884, + "epoch": 0.22157445809531262, "grad_norm": 0.0, - "learning_rate": 1.91204611079892e-05, - "loss": 1.1456, + "learning_rate": 1.8136248426408856e-05, + "loss": 1.1733, "step": 5663 }, { - "epoch": 0.1605032729745813, + "epoch": 0.22161358478754206, "grad_norm": 0.0, - "learning_rate": 1.912008469552198e-05, - "loss": 1.0226, + "learning_rate": 1.8135511605275954e-05, + "loss": 1.2429, "step": 5664 }, { - "epoch": 0.16053161041684377, + "epoch": 0.2216527114797715, "grad_norm": 0.0, - "learning_rate": 1.91197082062329e-05, - "loss": 1.0437, + "learning_rate": 1.8134774653496934e-05, + "loss": 1.1571, "step": 5665 }, { - "epoch": 0.16055994785910624, + "epoch": 0.22169183817200094, "grad_norm": 0.0, - "learning_rate": 1.911933164012513e-05, - "loss": 1.0547, + "learning_rate": 1.8134037571083624e-05, + "loss": 1.1386, "step": 5666 }, { - "epoch": 0.1605882853013687, + "epoch": 0.22173096486423038, "grad_norm": 0.0, - "learning_rate": 1.9118954997201845e-05, - "loss": 1.1693, + "learning_rate": 1.813330035804787e-05, + "loss": 1.2353, "step": 5667 }, { - "epoch": 0.16061662274363117, + "epoch": 0.22177009155645983, "grad_norm": 0.0, - "learning_rate": 1.9118578277466215e-05, - "loss": 1.0314, + "learning_rate": 1.8132563014401497e-05, + "loss": 1.1344, "step": 5668 }, { - "epoch": 0.1606449601858936, + "epoch": 0.22180921824868927, "grad_norm": 0.0, - "learning_rate": 1.9118201480921413e-05, - "loss": 1.061, + "learning_rate": 1.8131825540156353e-05, + "loss": 1.121, "step": 5669 }, { - "epoch": 0.16067329762815608, + "epoch": 0.2218483449409187, "grad_norm": 0.0, - "learning_rate": 1.9117824607570615e-05, - "loss": 1.0734, + "learning_rate": 1.813108793532428e-05, + "loss": 1.0997, "step": 5670 }, { - "epoch": 0.16070163507041854, + "epoch": 0.22188747163314812, "grad_norm": 0.0, - "learning_rate": 1.9117447657416995e-05, - "loss": 1.0465, + "learning_rate": 1.8130350199917124e-05, + "loss": 1.1341, "step": 5671 }, { - "epoch": 0.160729972512681, + "epoch": 0.22192659832537756, "grad_norm": 0.0, - "learning_rate": 1.911707063046373e-05, - "loss": 1.04, + "learning_rate": 1.8129612333946737e-05, + "loss": 1.0776, "step": 5672 }, { - "epoch": 0.16075830995494347, + "epoch": 0.221965725017607, "grad_norm": 0.0, - "learning_rate": 1.911669352671399e-05, - "loss": 1.0667, + "learning_rate": 1.8128874337424957e-05, + "loss": 1.0715, "step": 5673 }, { - "epoch": 0.16078664739720594, + "epoch": 0.22200485170983644, "grad_norm": 0.0, - "learning_rate": 1.9116316346170957e-05, - "loss": 1.0908, + "learning_rate": 1.8128136210363646e-05, + "loss": 1.1542, "step": 5674 }, { - "epoch": 0.16081498483946838, + "epoch": 0.22204397840206588, "grad_norm": 0.0, - "learning_rate": 1.9115939088837806e-05, - "loss": 1.022, + "learning_rate": 1.812739795277465e-05, + "loss": 1.124, "step": 5675 }, { - "epoch": 0.16084332228173084, + "epoch": 0.22208310509429532, "grad_norm": 0.0, - "learning_rate": 1.9115561754717713e-05, - "loss": 1.0347, + "learning_rate": 1.8126659564669827e-05, + "loss": 1.0048, "step": 5676 }, { - "epoch": 0.1608716597239933, + "epoch": 0.22212223178652477, "grad_norm": 0.0, - "learning_rate": 1.911518434381386e-05, - "loss": 1.1365, + "learning_rate": 1.8125921046061035e-05, + "loss": 1.1614, "step": 5677 }, { - "epoch": 0.16089999716625578, + "epoch": 0.2221613584787542, "grad_norm": 0.0, - "learning_rate": 1.9114806856129422e-05, - "loss": 1.0944, + "learning_rate": 1.8125182396960132e-05, + "loss": 1.2309, "step": 5678 }, { - "epoch": 0.16092833460851824, + "epoch": 0.22220048517098365, "grad_norm": 0.0, - "learning_rate": 1.9114429291667583e-05, - "loss": 1.0819, + "learning_rate": 1.812444361737898e-05, + "loss": 0.948, "step": 5679 }, { - "epoch": 0.1609566720507807, + "epoch": 0.2222396118632131, "grad_norm": 0.0, - "learning_rate": 1.9114051650431525e-05, - "loss": 0.9855, + "learning_rate": 1.8123704707329447e-05, + "loss": 1.158, "step": 5680 }, { - "epoch": 0.16098500949304315, + "epoch": 0.22227873855544253, "grad_norm": 0.0, - "learning_rate": 1.911367393242442e-05, - "loss": 1.0491, + "learning_rate": 1.8122965666823398e-05, + "loss": 1.2961, "step": 5681 }, { - "epoch": 0.1610133469353056, + "epoch": 0.22231786524767197, "grad_norm": 0.0, - "learning_rate": 1.911329613764946e-05, - "loss": 0.9655, + "learning_rate": 1.8122226495872693e-05, + "loss": 1.1713, "step": 5682 }, { - "epoch": 0.16104168437756808, + "epoch": 0.2223569919399014, "grad_norm": 0.0, - "learning_rate": 1.9112918266109817e-05, - "loss": 1.1333, + "learning_rate": 1.812148719448921e-05, + "loss": 1.1423, "step": 5683 }, { - "epoch": 0.16107002181983054, + "epoch": 0.22239611863213085, "grad_norm": 0.0, - "learning_rate": 1.9112540317808683e-05, - "loss": 0.9182, + "learning_rate": 1.8120747762684826e-05, + "loss": 1.1928, "step": 5684 }, { - "epoch": 0.161098359262093, + "epoch": 0.2224352453243603, "grad_norm": 0.0, - "learning_rate": 1.9112162292749236e-05, - "loss": 1.1277, + "learning_rate": 1.81200082004714e-05, + "loss": 1.1556, "step": 5685 }, { - "epoch": 0.16112669670435548, + "epoch": 0.2224743720165897, "grad_norm": 0.0, - "learning_rate": 1.9111784190934665e-05, - "loss": 0.9809, + "learning_rate": 1.8119268507860823e-05, + "loss": 1.0092, "step": 5686 }, { - "epoch": 0.16115503414661791, + "epoch": 0.22251349870881915, "grad_norm": 0.0, - "learning_rate": 1.9111406012368153e-05, - "loss": 1.0106, + "learning_rate": 1.8118528684864965e-05, + "loss": 1.2409, "step": 5687 }, { - "epoch": 0.16118337158888038, + "epoch": 0.2225526254010486, "grad_norm": 0.0, - "learning_rate": 1.9111027757052882e-05, - "loss": 0.9635, + "learning_rate": 1.811778873149571e-05, + "loss": 1.0878, "step": 5688 }, { - "epoch": 0.16121170903114285, + "epoch": 0.22259175209327803, "grad_norm": 0.0, - "learning_rate": 1.911064942499204e-05, - "loss": 1.0365, + "learning_rate": 1.8117048647764937e-05, + "loss": 1.2236, "step": 5689 }, { - "epoch": 0.1612400464734053, + "epoch": 0.22263087878550747, "grad_norm": 0.0, - "learning_rate": 1.911027101618882e-05, - "loss": 1.0824, + "learning_rate": 1.8116308433684538e-05, + "loss": 1.1021, "step": 5690 }, { - "epoch": 0.16126838391566778, + "epoch": 0.2226700054777369, "grad_norm": 0.0, - "learning_rate": 1.91098925306464e-05, - "loss": 0.8353, + "learning_rate": 1.8115568089266396e-05, + "loss": 1.2059, "step": 5691 }, { - "epoch": 0.16129672135793025, + "epoch": 0.22270913216996635, "grad_norm": 0.0, - "learning_rate": 1.910951396836797e-05, - "loss": 1.0613, + "learning_rate": 1.81148276145224e-05, + "loss": 1.0938, "step": 5692 }, { - "epoch": 0.16132505880019268, + "epoch": 0.2227482588621958, "grad_norm": 0.0, - "learning_rate": 1.9109135329356722e-05, - "loss": 1.0703, + "learning_rate": 1.811408700946444e-05, + "loss": 1.2177, "step": 5693 }, { - "epoch": 0.16135339624245515, + "epoch": 0.22278738555442523, "grad_norm": 0.0, - "learning_rate": 1.9108756613615846e-05, - "loss": 0.9441, + "learning_rate": 1.811334627410441e-05, + "loss": 1.1503, "step": 5694 }, { - "epoch": 0.16138173368471762, + "epoch": 0.22282651224665467, "grad_norm": 0.0, - "learning_rate": 1.9108377821148534e-05, - "loss": 1.0667, + "learning_rate": 1.8112605408454205e-05, + "loss": 1.2169, "step": 5695 }, { - "epoch": 0.16141007112698008, + "epoch": 0.22286563893888411, "grad_norm": 0.0, - "learning_rate": 1.910799895195797e-05, - "loss": 1.0403, + "learning_rate": 1.8111864412525723e-05, + "loss": 1.1652, "step": 5696 }, { - "epoch": 0.16143840856924255, + "epoch": 0.22290476563111356, "grad_norm": 0.0, - "learning_rate": 1.9107620006047346e-05, - "loss": 1.0214, + "learning_rate": 1.811112328633086e-05, + "loss": 1.0692, "step": 5697 }, { - "epoch": 0.161466746011505, + "epoch": 0.222943892323343, "grad_norm": 0.0, - "learning_rate": 1.910724098341986e-05, - "loss": 1.0137, + "learning_rate": 1.8110382029881526e-05, + "loss": 1.2817, "step": 5698 }, { - "epoch": 0.16149508345376745, + "epoch": 0.22298301901557244, "grad_norm": 0.0, - "learning_rate": 1.9106861884078704e-05, - "loss": 1.1287, + "learning_rate": 1.810964064318962e-05, + "loss": 1.1747, "step": 5699 }, { - "epoch": 0.16152342089602992, + "epoch": 0.22302214570780185, "grad_norm": 0.0, - "learning_rate": 1.9106482708027063e-05, - "loss": 1.1938, + "learning_rate": 1.8108899126267045e-05, + "loss": 1.1005, "step": 5700 }, { - "epoch": 0.16155175833829238, + "epoch": 0.2230612724000313, "grad_norm": 0.0, - "learning_rate": 1.9106103455268142e-05, - "loss": 0.9625, + "learning_rate": 1.810815747912571e-05, + "loss": 1.0374, "step": 5701 }, { - "epoch": 0.16158009578055485, + "epoch": 0.22310039909226073, "grad_norm": 0.0, - "learning_rate": 1.9105724125805126e-05, - "loss": 0.9896, + "learning_rate": 1.8107415701777527e-05, + "loss": 1.2223, "step": 5702 }, { - "epoch": 0.16160843322281732, + "epoch": 0.22313952578449017, "grad_norm": 0.0, - "learning_rate": 1.9105344719641213e-05, - "loss": 1.0095, + "learning_rate": 1.810667379423441e-05, + "loss": 1.063, "step": 5703 }, { - "epoch": 0.16163677066507978, + "epoch": 0.2231786524767196, "grad_norm": 0.0, - "learning_rate": 1.9104965236779605e-05, - "loss": 0.9303, + "learning_rate": 1.8105931756508263e-05, + "loss": 0.8236, "step": 5704 }, { - "epoch": 0.16166510810734222, + "epoch": 0.22321777916894905, "grad_norm": 0.0, - "learning_rate": 1.910458567722349e-05, - "loss": 0.9945, + "learning_rate": 1.8105189588611015e-05, + "loss": 1.1941, "step": 5705 }, { - "epoch": 0.1616934455496047, + "epoch": 0.2232569058611785, "grad_norm": 0.0, - "learning_rate": 1.910420604097607e-05, - "loss": 0.9974, + "learning_rate": 1.8104447290554575e-05, + "loss": 1.1931, "step": 5706 }, { - "epoch": 0.16172178299186715, + "epoch": 0.22329603255340794, "grad_norm": 0.0, - "learning_rate": 1.9103826328040546e-05, - "loss": 1.0854, + "learning_rate": 1.810370486235087e-05, + "loss": 1.0878, "step": 5707 }, { - "epoch": 0.16175012043412962, + "epoch": 0.22333515924563738, "grad_norm": 0.0, - "learning_rate": 1.9103446538420108e-05, - "loss": 1.0719, + "learning_rate": 1.810296230401182e-05, + "loss": 1.1958, "step": 5708 }, { - "epoch": 0.16177845787639208, + "epoch": 0.22337428593786682, "grad_norm": 0.0, - "learning_rate": 1.9103066672117957e-05, - "loss": 1.0802, + "learning_rate": 1.8102219615549346e-05, + "loss": 0.9774, "step": 5709 }, { - "epoch": 0.16180679531865455, + "epoch": 0.22341341263009626, "grad_norm": 0.0, - "learning_rate": 1.91026867291373e-05, - "loss": 1.0615, + "learning_rate": 1.8101476796975377e-05, + "loss": 1.1609, "step": 5710 }, { - "epoch": 0.161835132760917, + "epoch": 0.2234525393223257, "grad_norm": 0.0, - "learning_rate": 1.9102306709481327e-05, - "loss": 1.0723, + "learning_rate": 1.8100733848301845e-05, + "loss": 1.2446, "step": 5711 }, { - "epoch": 0.16186347020317945, + "epoch": 0.22349166601455514, "grad_norm": 0.0, - "learning_rate": 1.910192661315325e-05, - "loss": 1.076, + "learning_rate": 1.8099990769540677e-05, + "loss": 1.1777, "step": 5712 }, { - "epoch": 0.16189180764544192, + "epoch": 0.22353079270678458, "grad_norm": 0.0, - "learning_rate": 1.9101546440156262e-05, - "loss": 0.9098, + "learning_rate": 1.8099247560703806e-05, + "loss": 1.1247, "step": 5713 }, { - "epoch": 0.1619201450877044, + "epoch": 0.223569919399014, "grad_norm": 0.0, - "learning_rate": 1.9101166190493573e-05, - "loss": 1.0233, + "learning_rate": 1.809850422180317e-05, + "loss": 1.018, "step": 5714 }, { - "epoch": 0.16194848252996685, + "epoch": 0.22360904609124344, "grad_norm": 0.0, - "learning_rate": 1.9100785864168377e-05, - "loss": 1.0782, + "learning_rate": 1.8097760752850705e-05, + "loss": 1.1667, "step": 5715 }, { - "epoch": 0.16197681997222932, + "epoch": 0.22364817278347288, "grad_norm": 0.0, - "learning_rate": 1.910040546118388e-05, - "loss": 1.1053, + "learning_rate": 1.809701715385835e-05, + "loss": 1.2219, "step": 5716 }, { - "epoch": 0.16200515741449176, + "epoch": 0.22368729947570232, "grad_norm": 0.0, - "learning_rate": 1.9100024981543296e-05, - "loss": 1.0745, + "learning_rate": 1.8096273424838046e-05, + "loss": 1.1536, "step": 5717 }, { - "epoch": 0.16203349485675422, + "epoch": 0.22372642616793176, "grad_norm": 0.0, - "learning_rate": 1.9099644425249818e-05, - "loss": 1.0763, + "learning_rate": 1.809552956580173e-05, + "loss": 1.1501, "step": 5718 }, { - "epoch": 0.1620618322990167, + "epoch": 0.2237655528601612, "grad_norm": 0.0, - "learning_rate": 1.9099263792306654e-05, - "loss": 1.0172, + "learning_rate": 1.8094785576761357e-05, + "loss": 1.1594, "step": 5719 }, { - "epoch": 0.16209016974127916, + "epoch": 0.22380467955239064, "grad_norm": 0.0, - "learning_rate": 1.9098883082717014e-05, - "loss": 0.9768, + "learning_rate": 1.809404145772887e-05, + "loss": 1.1303, "step": 5720 }, { - "epoch": 0.16211850718354162, + "epoch": 0.22384380624462008, "grad_norm": 0.0, - "learning_rate": 1.9098502296484102e-05, - "loss": 1.1107, + "learning_rate": 1.8093297208716223e-05, + "loss": 1.1464, "step": 5721 }, { - "epoch": 0.1621468446258041, + "epoch": 0.22388293293684952, "grad_norm": 0.0, - "learning_rate": 1.909812143361113e-05, - "loss": 1.0774, + "learning_rate": 1.809255282973536e-05, + "loss": 1.1694, "step": 5722 }, { - "epoch": 0.16217518206806653, + "epoch": 0.22392205962907896, "grad_norm": 0.0, - "learning_rate": 1.90977404941013e-05, - "loss": 0.9197, + "learning_rate": 1.8091808320798242e-05, + "loss": 0.9955, "step": 5723 }, { - "epoch": 0.162203519510329, + "epoch": 0.2239611863213084, "grad_norm": 0.0, - "learning_rate": 1.9097359477957825e-05, - "loss": 0.9169, + "learning_rate": 1.8091063681916823e-05, + "loss": 1.0962, "step": 5724 }, { - "epoch": 0.16223185695259146, + "epoch": 0.22400031301353784, "grad_norm": 0.0, - "learning_rate": 1.909697838518391e-05, - "loss": 0.9547, + "learning_rate": 1.8090318913103057e-05, + "loss": 1.0452, "step": 5725 }, { - "epoch": 0.16226019439485392, + "epoch": 0.22403943970576728, "grad_norm": 0.0, - "learning_rate": 1.909659721578277e-05, - "loss": 0.9746, + "learning_rate": 1.808957401436891e-05, + "loss": 1.1335, "step": 5726 }, { - "epoch": 0.1622885318371164, + "epoch": 0.22407856639799673, "grad_norm": 0.0, - "learning_rate": 1.9096215969757616e-05, - "loss": 1.0766, + "learning_rate": 1.8088828985726337e-05, + "loss": 1.0955, "step": 5727 }, { - "epoch": 0.16231686927937886, + "epoch": 0.22411769309022614, "grad_norm": 0.0, - "learning_rate": 1.9095834647111654e-05, - "loss": 1.0219, + "learning_rate": 1.808808382718731e-05, + "loss": 1.1101, "step": 5728 }, { - "epoch": 0.1623452067216413, + "epoch": 0.22415681978245558, "grad_norm": 0.0, - "learning_rate": 1.90954532478481e-05, - "loss": 0.9398, + "learning_rate": 1.808733853876379e-05, + "loss": 1.1257, "step": 5729 }, { - "epoch": 0.16237354416390376, + "epoch": 0.22419594647468502, "grad_norm": 0.0, - "learning_rate": 1.9095071771970165e-05, - "loss": 1.0554, + "learning_rate": 1.8086593120467748e-05, + "loss": 1.1852, "step": 5730 }, { - "epoch": 0.16240188160616623, + "epoch": 0.22423507316691446, "grad_norm": 0.0, - "learning_rate": 1.909469021948106e-05, - "loss": 0.9999, + "learning_rate": 1.8085847572311154e-05, + "loss": 1.201, "step": 5731 }, { - "epoch": 0.1624302190484287, + "epoch": 0.2242741998591439, "grad_norm": 0.0, - "learning_rate": 1.9094308590384007e-05, - "loss": 1.0199, + "learning_rate": 1.8085101894305975e-05, + "loss": 1.2269, "step": 5732 }, { - "epoch": 0.16245855649069116, + "epoch": 0.22431332655137334, "grad_norm": 0.0, - "learning_rate": 1.9093926884682215e-05, - "loss": 1.0564, + "learning_rate": 1.8084356086464197e-05, + "loss": 1.2072, "step": 5733 }, { - "epoch": 0.16248689393295362, + "epoch": 0.22435245324360278, "grad_norm": 0.0, - "learning_rate": 1.90935451023789e-05, - "loss": 1.1443, + "learning_rate": 1.808361014879779e-05, + "loss": 1.3187, "step": 5734 }, { - "epoch": 0.16251523137521606, + "epoch": 0.22439157993583222, "grad_norm": 0.0, - "learning_rate": 1.9093163243477274e-05, - "loss": 0.9503, + "learning_rate": 1.808286408131873e-05, + "loss": 1.1882, "step": 5735 }, { - "epoch": 0.16254356881747853, + "epoch": 0.22443070662806167, "grad_norm": 0.0, - "learning_rate": 1.9092781307980562e-05, - "loss": 1.1012, + "learning_rate": 1.8082117884039004e-05, + "loss": 1.0153, "step": 5736 }, { - "epoch": 0.162571906259741, + "epoch": 0.2244698333202911, "grad_norm": 0.0, - "learning_rate": 1.9092399295891974e-05, - "loss": 0.9695, + "learning_rate": 1.808137155697059e-05, + "loss": 1.2931, "step": 5737 }, { - "epoch": 0.16260024370200346, + "epoch": 0.22450896001252055, "grad_norm": 0.0, - "learning_rate": 1.909201720721473e-05, - "loss": 1.0096, + "learning_rate": 1.8080625100125474e-05, + "loss": 1.1066, "step": 5738 }, { - "epoch": 0.16262858114426593, + "epoch": 0.22454808670475, "grad_norm": 0.0, - "learning_rate": 1.9091635041952052e-05, - "loss": 1.0304, + "learning_rate": 1.8079878513515648e-05, + "loss": 1.1675, "step": 5739 }, { - "epoch": 0.1626569185865284, + "epoch": 0.22458721339697943, "grad_norm": 0.0, - "learning_rate": 1.9091252800107153e-05, - "loss": 1.1286, + "learning_rate": 1.8079131797153097e-05, + "loss": 1.2004, "step": 5740 }, { - "epoch": 0.16268525602879083, + "epoch": 0.22462634008920887, "grad_norm": 0.0, - "learning_rate": 1.9090870481683258e-05, - "loss": 1.0615, + "learning_rate": 1.8078384951049815e-05, + "loss": 1.2153, "step": 5741 }, { - "epoch": 0.1627135934710533, + "epoch": 0.2246654667814383, "grad_norm": 0.0, - "learning_rate": 1.9090488086683587e-05, - "loss": 1.0232, + "learning_rate": 1.807763797521779e-05, + "loss": 1.212, "step": 5742 }, { - "epoch": 0.16274193091331576, + "epoch": 0.22470459347366772, "grad_norm": 0.0, - "learning_rate": 1.9090105615111354e-05, - "loss": 0.9504, + "learning_rate": 1.807689086966902e-05, + "loss": 1.1682, "step": 5743 }, { - "epoch": 0.16277026835557823, + "epoch": 0.22474372016589717, "grad_norm": 0.0, - "learning_rate": 1.9089723066969787e-05, - "loss": 1.0145, + "learning_rate": 1.807614363441551e-05, + "loss": 1.1186, "step": 5744 }, { - "epoch": 0.1627986057978407, + "epoch": 0.2247828468581266, "grad_norm": 0.0, - "learning_rate": 1.908934044226211e-05, - "loss": 1.0727, + "learning_rate": 1.8075396269469247e-05, + "loss": 1.2174, "step": 5745 }, { - "epoch": 0.16282694324010316, + "epoch": 0.22482197355035605, "grad_norm": 0.0, - "learning_rate": 1.908895774099154e-05, - "loss": 1.0461, + "learning_rate": 1.807464877484224e-05, + "loss": 1.1048, "step": 5746 }, { - "epoch": 0.1628552806823656, + "epoch": 0.2248611002425855, "grad_norm": 0.0, - "learning_rate": 1.9088574963161304e-05, - "loss": 1.03, + "learning_rate": 1.8073901150546492e-05, + "loss": 1.0815, "step": 5747 }, { - "epoch": 0.16288361812462807, + "epoch": 0.22490022693481493, "grad_norm": 0.0, - "learning_rate": 1.9088192108774625e-05, - "loss": 1.129, + "learning_rate": 1.8073153396594012e-05, + "loss": 1.1494, "step": 5748 }, { - "epoch": 0.16291195556689053, + "epoch": 0.22493935362704437, "grad_norm": 0.0, - "learning_rate": 1.908780917783473e-05, - "loss": 1.0276, + "learning_rate": 1.80724055129968e-05, + "loss": 1.1958, "step": 5749 }, { - "epoch": 0.162940293009153, + "epoch": 0.2249784803192738, "grad_norm": 0.0, - "learning_rate": 1.908742617034485e-05, - "loss": 1.0428, + "learning_rate": 1.8071657499766875e-05, + "loss": 1.1531, "step": 5750 }, { - "epoch": 0.16296863045141546, + "epoch": 0.22501760701150325, "grad_norm": 0.0, - "learning_rate": 1.9087043086308198e-05, - "loss": 0.9616, + "learning_rate": 1.807090935691624e-05, + "loss": 1.3494, "step": 5751 }, { - "epoch": 0.16299696789367793, + "epoch": 0.2250567337037327, "grad_norm": 0.0, - "learning_rate": 1.908665992572801e-05, - "loss": 1.0302, + "learning_rate": 1.8070161084456915e-05, + "loss": 1.1358, "step": 5752 }, { - "epoch": 0.16302530533594037, + "epoch": 0.22509586039596213, "grad_norm": 0.0, - "learning_rate": 1.908627668860751e-05, - "loss": 1.0502, + "learning_rate": 1.806941268240092e-05, + "loss": 1.1257, "step": 5753 }, { - "epoch": 0.16305364277820283, + "epoch": 0.22513498708819157, "grad_norm": 0.0, - "learning_rate": 1.9085893374949926e-05, - "loss": 1.0624, + "learning_rate": 1.8068664150760267e-05, + "loss": 1.1625, "step": 5754 }, { - "epoch": 0.1630819802204653, + "epoch": 0.22517411378042101, "grad_norm": 0.0, - "learning_rate": 1.9085509984758492e-05, - "loss": 1.0424, + "learning_rate": 1.8067915489546976e-05, + "loss": 1.1837, "step": 5755 }, { - "epoch": 0.16311031766272777, + "epoch": 0.22521324047265046, "grad_norm": 0.0, - "learning_rate": 1.9085126518036432e-05, - "loss": 1.1076, + "learning_rate": 1.8067166698773073e-05, + "loss": 1.0018, "step": 5756 }, { - "epoch": 0.16313865510499023, + "epoch": 0.22525236716487987, "grad_norm": 0.0, - "learning_rate": 1.9084742974786978e-05, - "loss": 1.0501, + "learning_rate": 1.8066417778450584e-05, + "loss": 1.2357, "step": 5757 }, { - "epoch": 0.1631669925472527, + "epoch": 0.2252914938571093, "grad_norm": 0.0, - "learning_rate": 1.908435935501336e-05, - "loss": 0.9244, + "learning_rate": 1.806566872859153e-05, + "loss": 1.1533, "step": 5758 }, { - "epoch": 0.16319532998951514, + "epoch": 0.22533062054933875, "grad_norm": 0.0, - "learning_rate": 1.9083975658718808e-05, - "loss": 0.9862, + "learning_rate": 1.8064919549207946e-05, + "loss": 1.0984, "step": 5759 }, { - "epoch": 0.1632236674317776, + "epoch": 0.2253697472415682, "grad_norm": 0.0, - "learning_rate": 1.9083591885906555e-05, - "loss": 1.1595, + "learning_rate": 1.8064170240311857e-05, + "loss": 1.117, "step": 5760 }, { - "epoch": 0.16325200487404007, + "epoch": 0.22540887393379763, "grad_norm": 0.0, - "learning_rate": 1.908320803657984e-05, - "loss": 1.0224, + "learning_rate": 1.80634208019153e-05, + "loss": 1.1388, "step": 5761 }, { - "epoch": 0.16328034231630253, + "epoch": 0.22544800062602707, "grad_norm": 0.0, - "learning_rate": 1.9082824110741886e-05, - "loss": 1.045, + "learning_rate": 1.806267123403031e-05, + "loss": 1.0758, "step": 5762 }, { - "epoch": 0.163308679758565, + "epoch": 0.22548712731825651, "grad_norm": 0.0, - "learning_rate": 1.9082440108395933e-05, - "loss": 1.0613, + "learning_rate": 1.806192153666892e-05, + "loss": 1.0352, "step": 5763 }, { - "epoch": 0.16333701720082747, + "epoch": 0.22552625401048595, "grad_norm": 0.0, - "learning_rate": 1.9082056029545215e-05, - "loss": 1.1109, + "learning_rate": 1.8061171709843174e-05, + "loss": 1.0444, "step": 5764 }, { - "epoch": 0.1633653546430899, + "epoch": 0.2255653807027154, "grad_norm": 0.0, - "learning_rate": 1.9081671874192965e-05, - "loss": 1.0507, + "learning_rate": 1.8060421753565113e-05, + "loss": 1.1678, "step": 5765 }, { - "epoch": 0.16339369208535237, + "epoch": 0.22560450739494484, "grad_norm": 0.0, - "learning_rate": 1.9081287642342422e-05, - "loss": 1.0123, + "learning_rate": 1.805967166784678e-05, + "loss": 1.0348, "step": 5766 }, { - "epoch": 0.16342202952761484, + "epoch": 0.22564363408717428, "grad_norm": 0.0, - "learning_rate": 1.908090333399682e-05, - "loss": 1.042, + "learning_rate": 1.8058921452700216e-05, + "loss": 1.2822, "step": 5767 }, { - "epoch": 0.1634503669698773, + "epoch": 0.22568276077940372, "grad_norm": 0.0, - "learning_rate": 1.9080518949159395e-05, - "loss": 0.9236, + "learning_rate": 1.8058171108137474e-05, + "loss": 1.2462, "step": 5768 }, { - "epoch": 0.16347870441213977, + "epoch": 0.22572188747163316, "grad_norm": 0.0, - "learning_rate": 1.9080134487833393e-05, - "loss": 0.9734, + "learning_rate": 1.80574206341706e-05, + "loss": 1.1511, "step": 5769 }, { - "epoch": 0.16350704185440224, + "epoch": 0.2257610141638626, "grad_norm": 0.0, - "learning_rate": 1.907974995002204e-05, - "loss": 0.9211, + "learning_rate": 1.8056670030811647e-05, + "loss": 1.3509, "step": 5770 }, { - "epoch": 0.16353537929666467, + "epoch": 0.225800140856092, "grad_norm": 0.0, - "learning_rate": 1.9079365335728586e-05, - "loss": 1.0338, + "learning_rate": 1.805591929807267e-05, + "loss": 1.0965, "step": 5771 }, { - "epoch": 0.16356371673892714, + "epoch": 0.22583926754832145, "grad_norm": 0.0, - "learning_rate": 1.9078980644956263e-05, - "loss": 1.0625, + "learning_rate": 1.8055168435965722e-05, + "loss": 1.2179, "step": 5772 }, { - "epoch": 0.1635920541811896, + "epoch": 0.2258783942405509, "grad_norm": 0.0, - "learning_rate": 1.907859587770832e-05, - "loss": 1.0701, + "learning_rate": 1.8054417444502864e-05, + "loss": 1.1631, "step": 5773 }, { - "epoch": 0.16362039162345207, + "epoch": 0.22591752093278034, "grad_norm": 0.0, - "learning_rate": 1.9078211033987986e-05, - "loss": 1.0724, + "learning_rate": 1.8053666323696155e-05, + "loss": 1.2284, "step": 5774 }, { - "epoch": 0.16364872906571454, + "epoch": 0.22595664762500978, "grad_norm": 0.0, - "learning_rate": 1.907782611379851e-05, - "loss": 1.0789, + "learning_rate": 1.8052915073557655e-05, + "loss": 1.1169, "step": 5775 }, { - "epoch": 0.163677066507977, + "epoch": 0.22599577431723922, "grad_norm": 0.0, - "learning_rate": 1.907744111714314e-05, - "loss": 1.0683, + "learning_rate": 1.8052163694099433e-05, + "loss": 1.0177, "step": 5776 }, { - "epoch": 0.16370540395023944, + "epoch": 0.22603490100946866, "grad_norm": 0.0, - "learning_rate": 1.9077056044025107e-05, - "loss": 1.0527, + "learning_rate": 1.805141218533355e-05, + "loss": 1.1926, "step": 5777 }, { - "epoch": 0.1637337413925019, + "epoch": 0.2260740277016981, "grad_norm": 0.0, - "learning_rate": 1.907667089444766e-05, - "loss": 1.1691, + "learning_rate": 1.8050660547272074e-05, + "loss": 1.0111, "step": 5778 }, { - "epoch": 0.16376207883476437, + "epoch": 0.22611315439392754, "grad_norm": 0.0, - "learning_rate": 1.9076285668414046e-05, - "loss": 1.0699, + "learning_rate": 1.804990877992708e-05, + "loss": 1.0635, "step": 5779 }, { - "epoch": 0.16379041627702684, + "epoch": 0.22615228108615698, "grad_norm": 0.0, - "learning_rate": 1.9075900365927508e-05, - "loss": 1.0275, + "learning_rate": 1.804915688331064e-05, + "loss": 1.0997, "step": 5780 }, { - "epoch": 0.1638187537192893, + "epoch": 0.22619140777838642, "grad_norm": 0.0, - "learning_rate": 1.907551498699129e-05, - "loss": 0.9212, + "learning_rate": 1.8048404857434823e-05, + "loss": 1.0582, "step": 5781 }, { - "epoch": 0.16384709116155174, + "epoch": 0.22623053447061586, "grad_norm": 0.0, - "learning_rate": 1.9075129531608637e-05, - "loss": 0.9709, + "learning_rate": 1.8047652702311712e-05, + "loss": 1.037, "step": 5782 }, { - "epoch": 0.1638754286038142, + "epoch": 0.2262696611628453, "grad_norm": 0.0, - "learning_rate": 1.90747439997828e-05, - "loss": 0.9633, + "learning_rate": 1.804690041795338e-05, + "loss": 1.0822, "step": 5783 }, { - "epoch": 0.16390376604607668, + "epoch": 0.22630878785507474, "grad_norm": 0.0, - "learning_rate": 1.9074358391517026e-05, - "loss": 1.0322, + "learning_rate": 1.8046148004371914e-05, + "loss": 1.101, "step": 5784 }, { - "epoch": 0.16393210348833914, + "epoch": 0.22634791454730416, "grad_norm": 0.0, - "learning_rate": 1.9073972706814555e-05, - "loss": 0.8944, + "learning_rate": 1.804539546157939e-05, + "loss": 1.2075, "step": 5785 }, { - "epoch": 0.1639604409306016, + "epoch": 0.2263870412395336, "grad_norm": 0.0, - "learning_rate": 1.907358694567865e-05, - "loss": 0.8769, + "learning_rate": 1.80446427895879e-05, + "loss": 1.2213, "step": 5786 }, { - "epoch": 0.16398877837286407, + "epoch": 0.22642616793176304, "grad_norm": 0.0, - "learning_rate": 1.907320110811255e-05, - "loss": 1.041, + "learning_rate": 1.8043889988409524e-05, + "loss": 1.0886, "step": 5787 }, { - "epoch": 0.1640171158151265, + "epoch": 0.22646529462399248, "grad_norm": 0.0, - "learning_rate": 1.9072815194119507e-05, - "loss": 1.1707, + "learning_rate": 1.8043137058056354e-05, + "loss": 1.1674, "step": 5788 }, { - "epoch": 0.16404545325738898, + "epoch": 0.22650442131622192, "grad_norm": 0.0, - "learning_rate": 1.9072429203702774e-05, - "loss": 1.0159, + "learning_rate": 1.8042383998540486e-05, + "loss": 1.0717, "step": 5789 }, { - "epoch": 0.16407379069965145, + "epoch": 0.22654354800845136, "grad_norm": 0.0, - "learning_rate": 1.90720431368656e-05, - "loss": 0.9714, + "learning_rate": 1.8041630809874004e-05, + "loss": 0.9409, "step": 5790 }, { - "epoch": 0.1641021281419139, + "epoch": 0.2265826747006808, "grad_norm": 0.0, - "learning_rate": 1.907165699361124e-05, - "loss": 1.0685, + "learning_rate": 1.804087749206901e-05, + "loss": 1.197, "step": 5791 }, { - "epoch": 0.16413046558417638, + "epoch": 0.22662180139291024, "grad_norm": 0.0, - "learning_rate": 1.907127077394294e-05, - "loss": 1.0793, + "learning_rate": 1.80401240451376e-05, + "loss": 1.0916, "step": 5792 }, { - "epoch": 0.16415880302643884, + "epoch": 0.22666092808513968, "grad_norm": 0.0, - "learning_rate": 1.907088447786396e-05, - "loss": 0.9068, + "learning_rate": 1.803937046909187e-05, + "loss": 1.1875, "step": 5793 }, { - "epoch": 0.16418714046870128, + "epoch": 0.22670005477736913, "grad_norm": 0.0, - "learning_rate": 1.907049810537755e-05, - "loss": 0.9588, + "learning_rate": 1.8038616763943925e-05, + "loss": 1.0496, "step": 5794 }, { - "epoch": 0.16421547791096375, + "epoch": 0.22673918146959857, "grad_norm": 0.0, - "learning_rate": 1.907011165648697e-05, - "loss": 1.0206, + "learning_rate": 1.8037862929705872e-05, + "loss": 1.1561, "step": 5795 }, { - "epoch": 0.1642438153532262, + "epoch": 0.226778308161828, "grad_norm": 0.0, - "learning_rate": 1.906972513119547e-05, - "loss": 1.046, + "learning_rate": 1.8037108966389806e-05, + "loss": 1.2091, "step": 5796 }, { - "epoch": 0.16427215279548868, + "epoch": 0.22681743485405745, "grad_norm": 0.0, - "learning_rate": 1.9069338529506307e-05, - "loss": 0.9155, + "learning_rate": 1.8036354874007846e-05, + "loss": 1.2079, "step": 5797 }, { - "epoch": 0.16430049023775115, + "epoch": 0.2268565615462869, "grad_norm": 0.0, - "learning_rate": 1.9068951851422735e-05, - "loss": 0.9237, + "learning_rate": 1.8035600652572093e-05, + "loss": 1.3018, "step": 5798 }, { - "epoch": 0.1643288276800136, + "epoch": 0.2268956882385163, "grad_norm": 0.0, - "learning_rate": 1.9068565096948017e-05, - "loss": 1.0386, + "learning_rate": 1.8034846302094668e-05, + "loss": 1.1075, "step": 5799 }, { - "epoch": 0.16435716512227605, + "epoch": 0.22693481493074574, "grad_norm": 0.0, - "learning_rate": 1.9068178266085407e-05, - "loss": 0.9597, + "learning_rate": 1.803409182258767e-05, + "loss": 0.9697, "step": 5800 }, { - "epoch": 0.16438550256453852, + "epoch": 0.22697394162297518, "grad_norm": 0.0, - "learning_rate": 1.9067791358838165e-05, - "loss": 0.9852, + "learning_rate": 1.8033337214063234e-05, + "loss": 1.1506, "step": 5801 }, { - "epoch": 0.16441384000680098, + "epoch": 0.22701306831520462, "grad_norm": 0.0, - "learning_rate": 1.906740437520955e-05, - "loss": 0.9818, + "learning_rate": 1.8032582476533463e-05, + "loss": 1.0989, "step": 5802 }, { - "epoch": 0.16444217744906345, + "epoch": 0.22705219500743407, "grad_norm": 0.0, - "learning_rate": 1.9067017315202815e-05, - "loss": 1.15, + "learning_rate": 1.8031827610010485e-05, + "loss": 1.179, "step": 5803 }, { - "epoch": 0.16447051489132591, + "epoch": 0.2270913216996635, "grad_norm": 0.0, - "learning_rate": 1.9066630178821232e-05, - "loss": 0.9743, + "learning_rate": 1.8031072614506426e-05, + "loss": 1.235, "step": 5804 }, { - "epoch": 0.16449885233358838, + "epoch": 0.22713044839189295, "grad_norm": 0.0, - "learning_rate": 1.9066242966068055e-05, - "loss": 0.9338, + "learning_rate": 1.8030317490033394e-05, + "loss": 1.2507, "step": 5805 }, { - "epoch": 0.16452718977585082, + "epoch": 0.2271695750841224, "grad_norm": 0.0, - "learning_rate": 1.9065855676946546e-05, - "loss": 1.0026, + "learning_rate": 1.802956223660353e-05, + "loss": 1.1132, "step": 5806 }, { - "epoch": 0.16455552721811328, + "epoch": 0.22720870177635183, "grad_norm": 0.0, - "learning_rate": 1.9065468311459966e-05, - "loss": 1.0236, + "learning_rate": 1.802880685422896e-05, + "loss": 1.1024, "step": 5807 }, { - "epoch": 0.16458386466037575, + "epoch": 0.22724782846858127, "grad_norm": 0.0, - "learning_rate": 1.9065080869611586e-05, - "loss": 0.9831, + "learning_rate": 1.8028051342921807e-05, + "loss": 1.0528, "step": 5808 }, { - "epoch": 0.16461220210263822, + "epoch": 0.2272869551608107, "grad_norm": 0.0, - "learning_rate": 1.9064693351404657e-05, - "loss": 1.0572, + "learning_rate": 1.8027295702694216e-05, + "loss": 1.3079, "step": 5809 }, { - "epoch": 0.16464053954490068, + "epoch": 0.22732608185304015, "grad_norm": 0.0, - "learning_rate": 1.9064305756842455e-05, - "loss": 0.9714, + "learning_rate": 1.8026539933558312e-05, + "loss": 1.1246, "step": 5810 }, { - "epoch": 0.16466887698716315, + "epoch": 0.2273652085452696, "grad_norm": 0.0, - "learning_rate": 1.9063918085928238e-05, - "loss": 0.9727, + "learning_rate": 1.8025784035526235e-05, + "loss": 1.1449, "step": 5811 }, { - "epoch": 0.1646972144294256, + "epoch": 0.22740433523749903, "grad_norm": 0.0, - "learning_rate": 1.9063530338665273e-05, - "loss": 1.0609, + "learning_rate": 1.802502800861012e-05, + "loss": 1.0276, "step": 5812 }, { - "epoch": 0.16472555187168805, + "epoch": 0.22744346192972847, "grad_norm": 0.0, - "learning_rate": 1.9063142515056824e-05, - "loss": 0.9949, + "learning_rate": 1.8024271852822116e-05, + "loss": 1.131, "step": 5813 }, { - "epoch": 0.16475388931395052, + "epoch": 0.2274825886219579, "grad_norm": 0.0, - "learning_rate": 1.9062754615106162e-05, - "loss": 1.093, + "learning_rate": 1.802351556817436e-05, + "loss": 1.2775, "step": 5814 }, { - "epoch": 0.16478222675621298, + "epoch": 0.22752171531418733, "grad_norm": 0.0, - "learning_rate": 1.906236663881655e-05, - "loss": 1.0214, + "learning_rate": 1.8022759154678995e-05, + "loss": 1.161, "step": 5815 }, { - "epoch": 0.16481056419847545, + "epoch": 0.22756084200641677, "grad_norm": 0.0, - "learning_rate": 1.9061978586191263e-05, - "loss": 0.9495, + "learning_rate": 1.802200261234817e-05, + "loss": 1.1647, "step": 5816 }, { - "epoch": 0.16483890164073792, + "epoch": 0.2275999686986462, "grad_norm": 0.0, - "learning_rate": 1.9061590457233562e-05, - "loss": 0.9486, + "learning_rate": 1.8021245941194042e-05, + "loss": 1.0561, "step": 5817 }, { - "epoch": 0.16486723908300036, + "epoch": 0.22763909539087565, "grad_norm": 0.0, - "learning_rate": 1.9061202251946723e-05, - "loss": 0.9595, + "learning_rate": 1.802048914122875e-05, + "loss": 1.1273, "step": 5818 }, { - "epoch": 0.16489557652526282, + "epoch": 0.2276782220831051, "grad_norm": 0.0, - "learning_rate": 1.906081397033401e-05, - "loss": 1.0942, + "learning_rate": 1.8019732212464453e-05, + "loss": 1.2306, "step": 5819 }, { - "epoch": 0.1649239139675253, + "epoch": 0.22771734877533453, "grad_norm": 0.0, - "learning_rate": 1.9060425612398695e-05, - "loss": 0.9945, + "learning_rate": 1.8018975154913307e-05, + "loss": 1.0568, "step": 5820 }, { - "epoch": 0.16495225140978775, + "epoch": 0.22775647546756397, "grad_norm": 0.0, - "learning_rate": 1.9060037178144054e-05, - "loss": 0.9986, + "learning_rate": 1.8018217968587467e-05, + "loss": 1.2175, "step": 5821 }, { - "epoch": 0.16498058885205022, + "epoch": 0.22779560215979341, "grad_norm": 0.0, - "learning_rate": 1.9059648667573355e-05, - "loss": 0.9861, + "learning_rate": 1.8017460653499097e-05, + "loss": 1.155, "step": 5822 }, { - "epoch": 0.16500892629431269, + "epoch": 0.22783472885202286, "grad_norm": 0.0, - "learning_rate": 1.905926008068987e-05, - "loss": 1.0504, + "learning_rate": 1.8016703209660354e-05, + "loss": 0.9985, "step": 5823 }, { - "epoch": 0.16503726373657512, + "epoch": 0.2278738555442523, "grad_norm": 0.0, - "learning_rate": 1.905887141749687e-05, - "loss": 1.0435, + "learning_rate": 1.80159456370834e-05, + "loss": 1.2841, "step": 5824 }, { - "epoch": 0.1650656011788376, + "epoch": 0.22791298223648174, "grad_norm": 0.0, - "learning_rate": 1.9058482677997637e-05, - "loss": 1.0716, + "learning_rate": 1.8015187935780405e-05, + "loss": 1.016, "step": 5825 }, { - "epoch": 0.16509393862110006, + "epoch": 0.22795210892871118, "grad_norm": 0.0, - "learning_rate": 1.905809386219544e-05, - "loss": 1.0217, + "learning_rate": 1.8014430105763536e-05, + "loss": 1.1252, "step": 5826 }, { - "epoch": 0.16512227606336252, + "epoch": 0.22799123562094062, "grad_norm": 0.0, - "learning_rate": 1.9057704970093556e-05, - "loss": 1.0393, + "learning_rate": 1.8013672147044964e-05, + "loss": 1.0651, "step": 5827 }, { - "epoch": 0.165150613505625, + "epoch": 0.22803036231317003, "grad_norm": 0.0, - "learning_rate": 1.9057316001695257e-05, - "loss": 0.9022, + "learning_rate": 1.8012914059636855e-05, + "loss": 1.0981, "step": 5828 }, { - "epoch": 0.16517895094788745, + "epoch": 0.22806948900539947, "grad_norm": 0.0, - "learning_rate": 1.905692695700382e-05, - "loss": 1.0486, + "learning_rate": 1.801215584355139e-05, + "loss": 1.1204, "step": 5829 }, { - "epoch": 0.1652072883901499, + "epoch": 0.2281086156976289, "grad_norm": 0.0, - "learning_rate": 1.9056537836022526e-05, - "loss": 1.0616, + "learning_rate": 1.8011397498800742e-05, + "loss": 1.0717, "step": 5830 }, { - "epoch": 0.16523562583241236, + "epoch": 0.22814774238985835, "grad_norm": 0.0, - "learning_rate": 1.9056148638754654e-05, - "loss": 1.0565, + "learning_rate": 1.801063902539709e-05, + "loss": 1.2082, "step": 5831 }, { - "epoch": 0.16526396327467482, + "epoch": 0.2281868690820878, "grad_norm": 0.0, - "learning_rate": 1.9055759365203476e-05, - "loss": 1.0018, + "learning_rate": 1.800988042335261e-05, + "loss": 1.1992, "step": 5832 }, { - "epoch": 0.1652923007169373, + "epoch": 0.22822599577431724, "grad_norm": 0.0, - "learning_rate": 1.905537001537227e-05, - "loss": 0.9854, + "learning_rate": 1.800912169267949e-05, + "loss": 1.2138, "step": 5833 }, { - "epoch": 0.16532063815919976, + "epoch": 0.22826512246654668, "grad_norm": 0.0, - "learning_rate": 1.9054980589264326e-05, - "loss": 0.988, + "learning_rate": 1.800836283338991e-05, + "loss": 1.2406, "step": 5834 }, { - "epoch": 0.16534897560146222, + "epoch": 0.22830424915877612, "grad_norm": 0.0, - "learning_rate": 1.9054591086882918e-05, - "loss": 1.0427, + "learning_rate": 1.800760384549606e-05, + "loss": 1.1223, "step": 5835 }, { - "epoch": 0.16537731304372466, + "epoch": 0.22834337585100556, "grad_norm": 0.0, - "learning_rate": 1.9054201508231323e-05, - "loss": 0.9106, + "learning_rate": 1.8006844729010123e-05, + "loss": 1.0827, "step": 5836 }, { - "epoch": 0.16540565048598713, + "epoch": 0.228382502543235, "grad_norm": 0.0, - "learning_rate": 1.905381185331283e-05, - "loss": 1.1528, + "learning_rate": 1.8006085483944295e-05, + "loss": 1.0451, "step": 5837 }, { - "epoch": 0.1654339879282496, + "epoch": 0.22842162923546444, "grad_norm": 0.0, - "learning_rate": 1.9053422122130712e-05, - "loss": 0.9674, + "learning_rate": 1.8005326110310767e-05, + "loss": 1.0295, "step": 5838 }, { - "epoch": 0.16546232537051206, + "epoch": 0.22846075592769388, "grad_norm": 0.0, - "learning_rate": 1.9053032314688264e-05, - "loss": 1.1941, + "learning_rate": 1.800456660812173e-05, + "loss": 1.1675, "step": 5839 }, { - "epoch": 0.16549066281277452, + "epoch": 0.22849988261992332, "grad_norm": 0.0, - "learning_rate": 1.905264243098876e-05, - "loss": 0.9551, + "learning_rate": 1.8003806977389385e-05, + "loss": 1.1785, "step": 5840 }, { - "epoch": 0.165519000255037, + "epoch": 0.22853900931215276, "grad_norm": 0.0, - "learning_rate": 1.9052252471035492e-05, - "loss": 1.0362, + "learning_rate": 1.800304721812593e-05, + "loss": 1.1239, "step": 5841 }, { - "epoch": 0.16554733769729943, + "epoch": 0.22857813600438218, "grad_norm": 0.0, - "learning_rate": 1.9051862434831735e-05, - "loss": 0.9627, + "learning_rate": 1.8002287330343564e-05, + "loss": 1.1717, "step": 5842 }, { - "epoch": 0.1655756751395619, + "epoch": 0.22861726269661162, "grad_norm": 0.0, - "learning_rate": 1.9051472322380776e-05, - "loss": 0.9761, + "learning_rate": 1.800152731405449e-05, + "loss": 1.1837, "step": 5843 }, { - "epoch": 0.16560401258182436, + "epoch": 0.22865638938884106, "grad_norm": 0.0, - "learning_rate": 1.905108213368591e-05, - "loss": 0.9775, + "learning_rate": 1.8000767169270917e-05, + "loss": 1.1868, "step": 5844 }, { - "epoch": 0.16563235002408683, + "epoch": 0.2286955160810705, "grad_norm": 0.0, - "learning_rate": 1.905069186875042e-05, - "loss": 1.0384, + "learning_rate": 1.800000689600505e-05, + "loss": 1.1252, "step": 5845 }, { - "epoch": 0.1656606874663493, + "epoch": 0.22873464277329994, "grad_norm": 0.0, - "learning_rate": 1.9050301527577587e-05, - "loss": 1.1178, + "learning_rate": 1.7999246494269093e-05, + "loss": 1.1596, "step": 5846 }, { - "epoch": 0.16568902490861176, + "epoch": 0.22877376946552938, "grad_norm": 0.0, - "learning_rate": 1.9049911110170704e-05, - "loss": 1.0635, + "learning_rate": 1.7998485964075258e-05, + "loss": 1.2136, "step": 5847 }, { - "epoch": 0.1657173623508742, + "epoch": 0.22881289615775882, "grad_norm": 0.0, - "learning_rate": 1.904952061653306e-05, - "loss": 1.0682, + "learning_rate": 1.7997725305435768e-05, + "loss": 1.1724, "step": 5848 }, { - "epoch": 0.16574569979313666, + "epoch": 0.22885202284998826, "grad_norm": 0.0, - "learning_rate": 1.9049130046667943e-05, - "loss": 0.9882, + "learning_rate": 1.7996964518362827e-05, + "loss": 1.109, "step": 5849 }, { - "epoch": 0.16577403723539913, + "epoch": 0.2288911495422177, "grad_norm": 0.0, - "learning_rate": 1.9048739400578645e-05, - "loss": 1.0767, + "learning_rate": 1.7996203602868657e-05, + "loss": 1.1523, "step": 5850 }, { - "epoch": 0.1658023746776616, + "epoch": 0.22893027623444714, "grad_norm": 0.0, - "learning_rate": 1.9048348678268453e-05, - "loss": 1.0708, + "learning_rate": 1.7995442558965477e-05, + "loss": 1.2567, "step": 5851 }, { - "epoch": 0.16583071211992406, + "epoch": 0.22896940292667659, "grad_norm": 0.0, - "learning_rate": 1.904795787974066e-05, - "loss": 0.9433, + "learning_rate": 1.799468138666551e-05, + "loss": 1.2088, "step": 5852 }, { - "epoch": 0.16585904956218653, + "epoch": 0.22900852961890603, "grad_norm": 0.0, - "learning_rate": 1.904756700499856e-05, - "loss": 0.9648, + "learning_rate": 1.7993920085980975e-05, + "loss": 1.1826, "step": 5853 }, { - "epoch": 0.16588738700444897, + "epoch": 0.22904765631113547, "grad_norm": 0.0, - "learning_rate": 1.9047176054045443e-05, - "loss": 1.0615, + "learning_rate": 1.7993158656924104e-05, + "loss": 1.1754, "step": 5854 }, { - "epoch": 0.16591572444671143, + "epoch": 0.2290867830033649, "grad_norm": 0.0, - "learning_rate": 1.90467850268846e-05, - "loss": 1.0994, + "learning_rate": 1.7992397099507116e-05, + "loss": 1.2177, "step": 5855 }, { - "epoch": 0.1659440618889739, + "epoch": 0.22912590969559432, "grad_norm": 0.0, - "learning_rate": 1.904639392351933e-05, - "loss": 1.0241, + "learning_rate": 1.7991635413742254e-05, + "loss": 1.1132, "step": 5856 }, { - "epoch": 0.16597239933123636, + "epoch": 0.22916503638782376, "grad_norm": 0.0, - "learning_rate": 1.904600274395292e-05, - "loss": 1.0147, + "learning_rate": 1.7990873599641735e-05, + "loss": 1.1607, "step": 5857 }, { - "epoch": 0.16600073677349883, + "epoch": 0.2292041630800532, "grad_norm": 0.0, - "learning_rate": 1.9045611488188672e-05, - "loss": 0.9757, + "learning_rate": 1.79901116572178e-05, + "loss": 1.0776, "step": 5858 }, { - "epoch": 0.1660290742157613, + "epoch": 0.22924328977228264, "grad_norm": 0.0, - "learning_rate": 1.904522015622988e-05, - "loss": 1.0439, + "learning_rate": 1.7989349586482683e-05, + "loss": 1.1069, "step": 5859 }, { - "epoch": 0.16605741165802373, + "epoch": 0.22928241646451208, "grad_norm": 0.0, - "learning_rate": 1.9044828748079842e-05, - "loss": 1.0198, + "learning_rate": 1.7988587387448625e-05, + "loss": 1.2086, "step": 5860 }, { - "epoch": 0.1660857491002862, + "epoch": 0.22932154315674153, "grad_norm": 0.0, - "learning_rate": 1.904443726374185e-05, - "loss": 0.91, + "learning_rate": 1.7987825060127863e-05, + "loss": 1.2743, "step": 5861 }, { - "epoch": 0.16611408654254867, + "epoch": 0.22936066984897097, "grad_norm": 0.0, - "learning_rate": 1.9044045703219207e-05, - "loss": 1.0222, + "learning_rate": 1.7987062604532638e-05, + "loss": 1.123, "step": 5862 }, { - "epoch": 0.16614242398481113, + "epoch": 0.2293997965412004, "grad_norm": 0.0, - "learning_rate": 1.9043654066515206e-05, - "loss": 1.1337, + "learning_rate": 1.7986300020675198e-05, + "loss": 1.3572, "step": 5863 }, { - "epoch": 0.1661707614270736, + "epoch": 0.22943892323342985, "grad_norm": 0.0, - "learning_rate": 1.904326235363315e-05, - "loss": 0.9615, + "learning_rate": 1.7985537308567788e-05, + "loss": 1.1668, "step": 5864 }, { - "epoch": 0.16619909886933606, + "epoch": 0.2294780499256593, "grad_norm": 0.0, - "learning_rate": 1.9042870564576338e-05, - "loss": 0.8545, + "learning_rate": 1.7984774468222652e-05, + "loss": 1.0809, "step": 5865 }, { - "epoch": 0.1662274363115985, + "epoch": 0.22951717661788873, "grad_norm": 0.0, - "learning_rate": 1.904247869934807e-05, - "loss": 1.0506, + "learning_rate": 1.7984011499652046e-05, + "loss": 1.1234, "step": 5866 }, { - "epoch": 0.16625577375386097, + "epoch": 0.22955630331011817, "grad_norm": 0.0, - "learning_rate": 1.904208675795164e-05, - "loss": 0.9833, + "learning_rate": 1.798324840286822e-05, + "loss": 1.1454, "step": 5867 }, { - "epoch": 0.16628411119612344, + "epoch": 0.2295954300023476, "grad_norm": 0.0, - "learning_rate": 1.9041694740390362e-05, - "loss": 1.0392, + "learning_rate": 1.7982485177883426e-05, + "loss": 1.1871, "step": 5868 }, { - "epoch": 0.1663124486383859, + "epoch": 0.22963455669457705, "grad_norm": 0.0, - "learning_rate": 1.9041302646667526e-05, - "loss": 1.0969, + "learning_rate": 1.7981721824709924e-05, + "loss": 1.132, "step": 5869 }, { - "epoch": 0.16634078608064837, + "epoch": 0.2296736833868065, "grad_norm": 0.0, - "learning_rate": 1.9040910476786443e-05, - "loss": 1.0488, + "learning_rate": 1.7980958343359972e-05, + "loss": 1.1992, "step": 5870 }, { - "epoch": 0.16636912352291083, + "epoch": 0.2297128100790359, "grad_norm": 0.0, - "learning_rate": 1.9040518230750414e-05, - "loss": 0.9957, + "learning_rate": 1.798019473384583e-05, + "loss": 1.1787, "step": 5871 }, { - "epoch": 0.16639746096517327, + "epoch": 0.22975193677126535, "grad_norm": 0.0, - "learning_rate": 1.9040125908562745e-05, - "loss": 0.9948, + "learning_rate": 1.797943099617976e-05, + "loss": 1.1396, "step": 5872 }, { - "epoch": 0.16642579840743574, + "epoch": 0.2297910634634948, "grad_norm": 0.0, - "learning_rate": 1.9039733510226735e-05, - "loss": 0.9941, + "learning_rate": 1.7978667130374025e-05, + "loss": 1.1996, "step": 5873 }, { - "epoch": 0.1664541358496982, + "epoch": 0.22983019015572423, "grad_norm": 0.0, - "learning_rate": 1.9039341035745696e-05, - "loss": 1.0053, + "learning_rate": 1.7977903136440895e-05, + "loss": 1.063, "step": 5874 }, { - "epoch": 0.16648247329196067, + "epoch": 0.22986931684795367, "grad_norm": 0.0, - "learning_rate": 1.903894848512293e-05, - "loss": 1.0703, + "learning_rate": 1.7977139014392636e-05, + "loss": 1.075, "step": 5875 }, { - "epoch": 0.16651081073422314, + "epoch": 0.2299084435401831, "grad_norm": 0.0, - "learning_rate": 1.9038555858361743e-05, - "loss": 1.0202, + "learning_rate": 1.7976374764241523e-05, + "loss": 1.0786, "step": 5876 }, { - "epoch": 0.1665391481764856, + "epoch": 0.22994757023241255, "grad_norm": 0.0, - "learning_rate": 1.9038163155465446e-05, - "loss": 0.9717, + "learning_rate": 1.7975610385999828e-05, + "loss": 1.0959, "step": 5877 }, { - "epoch": 0.16656748561874804, + "epoch": 0.229986696924642, "grad_norm": 0.0, - "learning_rate": 1.9037770376437343e-05, - "loss": 0.9303, + "learning_rate": 1.797484587967982e-05, + "loss": 1.0771, "step": 5878 }, { - "epoch": 0.1665958230610105, + "epoch": 0.23002582361687143, "grad_norm": 0.0, - "learning_rate": 1.903737752128074e-05, - "loss": 1.014, + "learning_rate": 1.797408124529378e-05, + "loss": 1.1188, "step": 5879 }, { - "epoch": 0.16662416050327297, + "epoch": 0.23006495030910087, "grad_norm": 0.0, - "learning_rate": 1.9036984589998953e-05, - "loss": 0.9191, + "learning_rate": 1.797331648285399e-05, + "loss": 1.2406, "step": 5880 }, { - "epoch": 0.16665249794553544, + "epoch": 0.23010407700133031, "grad_norm": 0.0, - "learning_rate": 1.9036591582595294e-05, - "loss": 1.1168, + "learning_rate": 1.7972551592372726e-05, + "loss": 1.1105, "step": 5881 }, { - "epoch": 0.1666808353877979, + "epoch": 0.23014320369355976, "grad_norm": 0.0, - "learning_rate": 1.9036198499073063e-05, - "loss": 1.0826, + "learning_rate": 1.7971786573862275e-05, + "loss": 1.1953, "step": 5882 }, { - "epoch": 0.16670917283006037, + "epoch": 0.2301823303857892, "grad_norm": 0.0, - "learning_rate": 1.9035805339435577e-05, - "loss": 1.0355, + "learning_rate": 1.7971021427334924e-05, + "loss": 1.3677, "step": 5883 }, { - "epoch": 0.1667375102723228, + "epoch": 0.23022145707801864, "grad_norm": 0.0, - "learning_rate": 1.9035412103686147e-05, - "loss": 1.0608, + "learning_rate": 1.797025615280295e-05, + "loss": 1.1988, "step": 5884 }, { - "epoch": 0.16676584771458527, + "epoch": 0.23026058377024805, "grad_norm": 0.0, - "learning_rate": 1.903501879182809e-05, - "loss": 1.0827, + "learning_rate": 1.7969490750278655e-05, + "loss": 1.1119, "step": 5885 }, { - "epoch": 0.16679418515684774, + "epoch": 0.2302997104624775, "grad_norm": 0.0, - "learning_rate": 1.903462540386471e-05, - "loss": 1.0425, + "learning_rate": 1.7968725219774324e-05, + "loss": 1.1086, "step": 5886 }, { - "epoch": 0.1668225225991102, + "epoch": 0.23033883715470693, "grad_norm": 0.0, - "learning_rate": 1.9034231939799328e-05, - "loss": 1.0857, + "learning_rate": 1.796795956130225e-05, + "loss": 1.0255, "step": 5887 }, { - "epoch": 0.16685086004137267, + "epoch": 0.23037796384693637, "grad_norm": 0.0, - "learning_rate": 1.9033838399635255e-05, - "loss": 0.9145, + "learning_rate": 1.796719377487473e-05, + "loss": 1.1326, "step": 5888 }, { - "epoch": 0.16687919748363514, + "epoch": 0.23041709053916581, "grad_norm": 0.0, - "learning_rate": 1.9033444783375806e-05, - "loss": 1.072, + "learning_rate": 1.796642786050406e-05, + "loss": 1.0825, "step": 5889 }, { - "epoch": 0.16690753492589758, + "epoch": 0.23045621723139526, "grad_norm": 0.0, - "learning_rate": 1.9033051091024293e-05, - "loss": 0.9998, + "learning_rate": 1.7965661818202544e-05, + "loss": 1.159, "step": 5890 }, { - "epoch": 0.16693587236816004, + "epoch": 0.2304953439236247, "grad_norm": 0.0, - "learning_rate": 1.9032657322584045e-05, - "loss": 0.9802, + "learning_rate": 1.7964895647982483e-05, + "loss": 1.1502, "step": 5891 }, { - "epoch": 0.1669642098104225, + "epoch": 0.23053447061585414, "grad_norm": 0.0, - "learning_rate": 1.9032263478058365e-05, - "loss": 0.9733, + "learning_rate": 1.7964129349856173e-05, + "loss": 1.1009, "step": 5892 }, { - "epoch": 0.16699254725268498, + "epoch": 0.23057359730808358, "grad_norm": 0.0, - "learning_rate": 1.9031869557450575e-05, - "loss": 0.9995, + "learning_rate": 1.7963362923835926e-05, + "loss": 1.2004, "step": 5893 }, { - "epoch": 0.16702088469494744, + "epoch": 0.23061272400031302, "grad_norm": 0.0, - "learning_rate": 1.9031475560763994e-05, - "loss": 0.9671, + "learning_rate": 1.7962596369934052e-05, + "loss": 1.1934, "step": 5894 }, { - "epoch": 0.1670492221372099, + "epoch": 0.23065185069254246, "grad_norm": 0.0, - "learning_rate": 1.9031081488001942e-05, - "loss": 1.0191, + "learning_rate": 1.7961829688162855e-05, + "loss": 1.2112, "step": 5895 }, { - "epoch": 0.16707755957947235, + "epoch": 0.2306909773847719, "grad_norm": 0.0, - "learning_rate": 1.903068733916774e-05, - "loss": 0.9758, + "learning_rate": 1.796106287853465e-05, + "loss": 1.183, "step": 5896 }, { - "epoch": 0.1671058970217348, + "epoch": 0.23073010407700134, "grad_norm": 0.0, - "learning_rate": 1.9030293114264698e-05, - "loss": 0.9978, + "learning_rate": 1.7960295941061754e-05, + "loss": 1.0785, "step": 5897 }, { - "epoch": 0.16713423446399728, + "epoch": 0.23076923076923078, "grad_norm": 0.0, - "learning_rate": 1.9029898813296147e-05, - "loss": 1.0212, + "learning_rate": 1.7959528875756478e-05, + "loss": 1.2248, "step": 5898 }, { - "epoch": 0.16716257190625974, + "epoch": 0.2308083574614602, "grad_norm": 0.0, - "learning_rate": 1.9029504436265406e-05, - "loss": 1.0004, + "learning_rate": 1.795876168263114e-05, + "loss": 1.1093, "step": 5899 }, { - "epoch": 0.1671909093485222, + "epoch": 0.23084748415368964, "grad_norm": 0.0, - "learning_rate": 1.90291099831758e-05, - "loss": 1.0071, + "learning_rate": 1.7957994361698068e-05, + "loss": 1.1638, "step": 5900 }, { - "epoch": 0.16721924679078468, + "epoch": 0.23088661084591908, "grad_norm": 0.0, - "learning_rate": 1.902871545403064e-05, - "loss": 1.0898, + "learning_rate": 1.7957226912969576e-05, + "loss": 1.1544, "step": 5901 }, { - "epoch": 0.16724758423304711, + "epoch": 0.23092573753814852, "grad_norm": 0.0, - "learning_rate": 1.902832084883326e-05, - "loss": 1.0101, + "learning_rate": 1.795645933645799e-05, + "loss": 1.1474, "step": 5902 }, { - "epoch": 0.16727592167530958, + "epoch": 0.23096486423037796, "grad_norm": 0.0, - "learning_rate": 1.9027926167586982e-05, - "loss": 1.0773, + "learning_rate": 1.7955691632175637e-05, + "loss": 1.2065, "step": 5903 }, { - "epoch": 0.16730425911757205, + "epoch": 0.2310039909226074, "grad_norm": 0.0, - "learning_rate": 1.9027531410295128e-05, - "loss": 1.0385, + "learning_rate": 1.7954923800134843e-05, + "loss": 1.1207, "step": 5904 }, { - "epoch": 0.1673325965598345, + "epoch": 0.23104311761483684, "grad_norm": 0.0, - "learning_rate": 1.9027136576961027e-05, - "loss": 1.0431, + "learning_rate": 1.7954155840347945e-05, + "loss": 1.2152, "step": 5905 }, { - "epoch": 0.16736093400209698, + "epoch": 0.23108224430706628, "grad_norm": 0.0, - "learning_rate": 1.9026741667588002e-05, - "loss": 1.0411, + "learning_rate": 1.795338775282727e-05, + "loss": 1.1256, "step": 5906 }, { - "epoch": 0.16738927144435944, + "epoch": 0.23112137099929572, "grad_norm": 0.0, - "learning_rate": 1.9026346682179374e-05, - "loss": 1.1446, + "learning_rate": 1.795261953758515e-05, + "loss": 1.1666, "step": 5907 }, { - "epoch": 0.16741760888662188, + "epoch": 0.23116049769152516, "grad_norm": 0.0, - "learning_rate": 1.9025951620738483e-05, - "loss": 0.9794, + "learning_rate": 1.795185119463393e-05, + "loss": 1.1219, "step": 5908 }, { - "epoch": 0.16744594632888435, + "epoch": 0.2311996243837546, "grad_norm": 0.0, - "learning_rate": 1.9025556483268647e-05, - "loss": 1.0736, + "learning_rate": 1.7951082723985944e-05, + "loss": 1.1215, "step": 5909 }, { - "epoch": 0.16747428377114681, + "epoch": 0.23123875107598404, "grad_norm": 0.0, - "learning_rate": 1.90251612697732e-05, - "loss": 0.9707, + "learning_rate": 1.795031412565353e-05, + "loss": 1.1736, "step": 5910 }, { - "epoch": 0.16750262121340928, + "epoch": 0.23127787776821349, "grad_norm": 0.0, - "learning_rate": 1.9024765980255467e-05, - "loss": 0.9968, + "learning_rate": 1.7949545399649038e-05, + "loss": 1.1926, "step": 5911 }, { - "epoch": 0.16753095865567175, + "epoch": 0.23131700446044293, "grad_norm": 0.0, - "learning_rate": 1.9024370614718774e-05, - "loss": 1.0614, + "learning_rate": 1.7948776545984804e-05, + "loss": 1.2293, "step": 5912 }, { - "epoch": 0.1675592960979342, + "epoch": 0.23135613115267234, "grad_norm": 0.0, - "learning_rate": 1.902397517316646e-05, - "loss": 1.0603, + "learning_rate": 1.794800756467318e-05, + "loss": 1.3571, "step": 5913 }, { - "epoch": 0.16758763354019665, + "epoch": 0.23139525784490178, "grad_norm": 0.0, - "learning_rate": 1.9023579655601852e-05, - "loss": 0.8893, + "learning_rate": 1.7947238455726515e-05, + "loss": 1.0303, "step": 5914 }, { - "epoch": 0.16761597098245912, + "epoch": 0.23143438453713122, "grad_norm": 0.0, - "learning_rate": 1.9023184062028284e-05, - "loss": 1.0201, + "learning_rate": 1.7946469219157158e-05, + "loss": 1.1916, "step": 5915 }, { - "epoch": 0.16764430842472158, + "epoch": 0.23147351122936066, "grad_norm": 0.0, - "learning_rate": 1.9022788392449085e-05, - "loss": 1.0064, + "learning_rate": 1.794569985497746e-05, + "loss": 1.1777, "step": 5916 }, { - "epoch": 0.16767264586698405, + "epoch": 0.2315126379215901, "grad_norm": 0.0, - "learning_rate": 1.9022392646867587e-05, - "loss": 0.9963, + "learning_rate": 1.7944930363199783e-05, + "loss": 1.1997, "step": 5917 }, { - "epoch": 0.16770098330924652, + "epoch": 0.23155176461381954, "grad_norm": 0.0, - "learning_rate": 1.9021996825287126e-05, - "loss": 1.0194, + "learning_rate": 1.7944160743836478e-05, + "loss": 1.2664, "step": 5918 }, { - "epoch": 0.16772932075150898, + "epoch": 0.23159089130604898, "grad_norm": 0.0, - "learning_rate": 1.9021600927711037e-05, - "loss": 1.0379, + "learning_rate": 1.7943390996899907e-05, + "loss": 1.1116, "step": 5919 }, { - "epoch": 0.16775765819377142, + "epoch": 0.23163001799827843, "grad_norm": 0.0, - "learning_rate": 1.902120495414265e-05, - "loss": 0.9777, + "learning_rate": 1.794262112240243e-05, + "loss": 1.2375, "step": 5920 }, { - "epoch": 0.16778599563603389, + "epoch": 0.23166914469050787, "grad_norm": 0.0, - "learning_rate": 1.9020808904585307e-05, - "loss": 1.1043, + "learning_rate": 1.794185112035641e-05, + "loss": 1.0826, "step": 5921 }, { - "epoch": 0.16781433307829635, + "epoch": 0.2317082713827373, "grad_norm": 0.0, - "learning_rate": 1.902041277904234e-05, - "loss": 1.0379, + "learning_rate": 1.7941080990774213e-05, + "loss": 1.2651, "step": 5922 }, { - "epoch": 0.16784267052055882, + "epoch": 0.23174739807496675, "grad_norm": 0.0, - "learning_rate": 1.9020016577517087e-05, - "loss": 1.0491, + "learning_rate": 1.7940310733668208e-05, + "loss": 1.2502, "step": 5923 }, { - "epoch": 0.16787100796282128, + "epoch": 0.2317865247671962, "grad_norm": 0.0, - "learning_rate": 1.901962030001288e-05, - "loss": 1.0624, + "learning_rate": 1.793954034905076e-05, + "loss": 1.1091, "step": 5924 }, { - "epoch": 0.16789934540508375, + "epoch": 0.23182565145942563, "grad_norm": 0.0, - "learning_rate": 1.9019223946533066e-05, - "loss": 1.1165, + "learning_rate": 1.793876983693424e-05, + "loss": 1.0025, "step": 5925 }, { - "epoch": 0.1679276828473462, + "epoch": 0.23186477815165507, "grad_norm": 0.0, - "learning_rate": 1.9018827517080982e-05, - "loss": 0.9444, + "learning_rate": 1.793799919733103e-05, + "loss": 1.1673, "step": 5926 }, { - "epoch": 0.16795602028960865, + "epoch": 0.2319039048438845, "grad_norm": 0.0, - "learning_rate": 1.901843101165996e-05, - "loss": 1.0858, + "learning_rate": 1.7937228430253497e-05, + "loss": 1.2155, "step": 5927 }, { - "epoch": 0.16798435773187112, + "epoch": 0.23194303153611392, "grad_norm": 0.0, - "learning_rate": 1.9018034430273346e-05, - "loss": 1.0598, + "learning_rate": 1.7936457535714023e-05, + "loss": 1.1395, "step": 5928 }, { - "epoch": 0.16801269517413359, + "epoch": 0.23198215822834337, "grad_norm": 0.0, - "learning_rate": 1.9017637772924483e-05, - "loss": 1.0851, + "learning_rate": 1.7935686513724985e-05, + "loss": 1.0768, "step": 5929 }, { - "epoch": 0.16804103261639605, + "epoch": 0.2320212849205728, "grad_norm": 0.0, - "learning_rate": 1.9017241039616704e-05, - "loss": 1.0625, + "learning_rate": 1.793491536429876e-05, + "loss": 1.2339, "step": 5930 }, { - "epoch": 0.16806937005865852, + "epoch": 0.23206041161280225, "grad_norm": 0.0, - "learning_rate": 1.901684423035336e-05, - "loss": 1.0963, + "learning_rate": 1.7934144087447744e-05, + "loss": 1.0071, "step": 5931 }, { - "epoch": 0.16809770750092096, + "epoch": 0.2320995383050317, "grad_norm": 0.0, - "learning_rate": 1.901644734513778e-05, - "loss": 1.1015, + "learning_rate": 1.7933372683184317e-05, + "loss": 1.1801, "step": 5932 }, { - "epoch": 0.16812604494318342, + "epoch": 0.23213866499726113, "grad_norm": 0.0, - "learning_rate": 1.9016050383973322e-05, - "loss": 1.0666, + "learning_rate": 1.7932601151520863e-05, + "loss": 1.0634, "step": 5933 }, { - "epoch": 0.1681543823854459, + "epoch": 0.23217779168949057, "grad_norm": 0.0, - "learning_rate": 1.9015653346863322e-05, - "loss": 1.1084, + "learning_rate": 1.7931829492469773e-05, + "loss": 1.2128, "step": 5934 }, { - "epoch": 0.16818271982770835, + "epoch": 0.23221691838172, "grad_norm": 0.0, - "learning_rate": 1.901525623381113e-05, - "loss": 1.0202, + "learning_rate": 1.7931057706043443e-05, + "loss": 1.0373, "step": 5935 }, { - "epoch": 0.16821105726997082, + "epoch": 0.23225604507394945, "grad_norm": 0.0, - "learning_rate": 1.9014859044820082e-05, - "loss": 1.0495, + "learning_rate": 1.7930285792254262e-05, + "loss": 1.3115, "step": 5936 }, { - "epoch": 0.1682393947122333, + "epoch": 0.2322951717661789, "grad_norm": 0.0, - "learning_rate": 1.901446177989353e-05, - "loss": 1.0182, + "learning_rate": 1.792951375111463e-05, + "loss": 1.166, "step": 5937 }, { - "epoch": 0.16826773215449572, + "epoch": 0.23233429845840833, "grad_norm": 0.0, - "learning_rate": 1.9014064439034818e-05, - "loss": 1.0817, + "learning_rate": 1.7928741582636944e-05, + "loss": 1.0734, "step": 5938 }, { - "epoch": 0.1682960695967582, + "epoch": 0.23237342515063777, "grad_norm": 0.0, - "learning_rate": 1.9013667022247297e-05, - "loss": 1.0165, + "learning_rate": 1.7927969286833603e-05, + "loss": 1.2532, "step": 5939 }, { - "epoch": 0.16832440703902066, + "epoch": 0.23241255184286722, "grad_norm": 0.0, - "learning_rate": 1.901326952953431e-05, - "loss": 0.9936, + "learning_rate": 1.792719686371701e-05, + "loss": 1.1085, "step": 5940 }, { - "epoch": 0.16835274448128312, + "epoch": 0.23245167853509666, "grad_norm": 0.0, - "learning_rate": 1.9012871960899206e-05, - "loss": 1.0155, + "learning_rate": 1.7926424313299568e-05, + "loss": 1.1055, "step": 5941 }, { - "epoch": 0.1683810819235456, + "epoch": 0.23249080522732607, "grad_norm": 0.0, - "learning_rate": 1.9012474316345335e-05, - "loss": 0.9671, + "learning_rate": 1.7925651635593682e-05, + "loss": 1.1452, "step": 5942 }, { - "epoch": 0.16840941936580806, + "epoch": 0.2325299319195555, "grad_norm": 0.0, - "learning_rate": 1.9012076595876045e-05, - "loss": 0.9495, + "learning_rate": 1.7924878830611765e-05, + "loss": 1.1633, "step": 5943 }, { - "epoch": 0.1684377568080705, + "epoch": 0.23256905861178495, "grad_norm": 0.0, - "learning_rate": 1.901167879949469e-05, - "loss": 1.0383, + "learning_rate": 1.7924105898366224e-05, + "loss": 1.0948, "step": 5944 }, { - "epoch": 0.16846609425033296, + "epoch": 0.2326081853040144, "grad_norm": 0.0, - "learning_rate": 1.9011280927204616e-05, - "loss": 1.0613, + "learning_rate": 1.7923332838869473e-05, + "loss": 1.053, "step": 5945 }, { - "epoch": 0.16849443169259543, + "epoch": 0.23264731199624383, "grad_norm": 0.0, - "learning_rate": 1.901088297900918e-05, - "loss": 1.1078, + "learning_rate": 1.792255965213392e-05, + "loss": 1.1546, "step": 5946 }, { - "epoch": 0.1685227691348579, + "epoch": 0.23268643868847327, "grad_norm": 0.0, - "learning_rate": 1.9010484954911724e-05, - "loss": 0.9988, + "learning_rate": 1.7921786338171992e-05, + "loss": 1.1951, "step": 5947 }, { - "epoch": 0.16855110657712036, + "epoch": 0.23272556538070271, "grad_norm": 0.0, - "learning_rate": 1.9010086854915613e-05, - "loss": 1.0281, + "learning_rate": 1.79210128969961e-05, + "loss": 1.1473, "step": 5948 }, { - "epoch": 0.16857944401938282, + "epoch": 0.23276469207293216, "grad_norm": 0.0, - "learning_rate": 1.900968867902419e-05, - "loss": 0.9888, + "learning_rate": 1.7920239328618665e-05, + "loss": 1.176, "step": 5949 }, { - "epoch": 0.16860778146164526, + "epoch": 0.2328038187651616, "grad_norm": 0.0, - "learning_rate": 1.9009290427240817e-05, - "loss": 0.9199, + "learning_rate": 1.7919465633052114e-05, + "loss": 1.0706, "step": 5950 }, { - "epoch": 0.16863611890390773, + "epoch": 0.23284294545739104, "grad_norm": 0.0, - "learning_rate": 1.9008892099568847e-05, - "loss": 0.9199, + "learning_rate": 1.7918691810308865e-05, + "loss": 1.2096, "step": 5951 }, { - "epoch": 0.1686644563461702, + "epoch": 0.23288207214962048, "grad_norm": 0.0, - "learning_rate": 1.900849369601163e-05, - "loss": 0.8648, + "learning_rate": 1.791791786040135e-05, + "loss": 1.0901, "step": 5952 }, { - "epoch": 0.16869279378843266, + "epoch": 0.23292119884184992, "grad_norm": 0.0, - "learning_rate": 1.9008095216572526e-05, - "loss": 0.8949, + "learning_rate": 1.7917143783341995e-05, + "loss": 1.2055, "step": 5953 }, { - "epoch": 0.16872113123069513, + "epoch": 0.23296032553407936, "grad_norm": 0.0, - "learning_rate": 1.900769666125489e-05, - "loss": 1.0537, + "learning_rate": 1.7916369579143235e-05, + "loss": 1.1537, "step": 5954 }, { - "epoch": 0.1687494686729576, + "epoch": 0.2329994522263088, "grad_norm": 0.0, - "learning_rate": 1.9007298030062085e-05, - "loss": 1.0094, + "learning_rate": 1.7915595247817495e-05, + "loss": 1.0143, "step": 5955 }, { - "epoch": 0.16877780611522003, + "epoch": 0.23303857891853821, "grad_norm": 0.0, - "learning_rate": 1.9006899322997457e-05, - "loss": 1.016, + "learning_rate": 1.7914820789377215e-05, + "loss": 1.0411, "step": 5956 }, { - "epoch": 0.1688061435574825, + "epoch": 0.23307770561076765, "grad_norm": 0.0, - "learning_rate": 1.9006500540064377e-05, - "loss": 1.0535, + "learning_rate": 1.7914046203834834e-05, + "loss": 0.9853, "step": 5957 }, { - "epoch": 0.16883448099974496, + "epoch": 0.2331168323029971, "grad_norm": 0.0, - "learning_rate": 1.9006101681266194e-05, - "loss": 0.9818, + "learning_rate": 1.7913271491202782e-05, + "loss": 1.1432, "step": 5958 }, { - "epoch": 0.16886281844200743, + "epoch": 0.23315595899522654, "grad_norm": 0.0, - "learning_rate": 1.9005702746606274e-05, - "loss": 1.0285, + "learning_rate": 1.7912496651493512e-05, + "loss": 1.0461, "step": 5959 }, { - "epoch": 0.1688911558842699, + "epoch": 0.23319508568745598, "grad_norm": 0.0, - "learning_rate": 1.9005303736087976e-05, - "loss": 1.0633, + "learning_rate": 1.791172168471946e-05, + "loss": 1.04, "step": 5960 }, { - "epoch": 0.16891949332653236, + "epoch": 0.23323421237968542, "grad_norm": 0.0, - "learning_rate": 1.9004904649714663e-05, - "loss": 1.0764, + "learning_rate": 1.7910946590893068e-05, + "loss": 1.151, "step": 5961 }, { - "epoch": 0.1689478307687948, + "epoch": 0.23327333907191486, "grad_norm": 0.0, - "learning_rate": 1.9004505487489692e-05, - "loss": 1.0731, + "learning_rate": 1.7910171370026788e-05, + "loss": 1.1008, "step": 5962 }, { - "epoch": 0.16897616821105726, + "epoch": 0.2333124657641443, "grad_norm": 0.0, - "learning_rate": 1.9004106249416427e-05, - "loss": 1.0661, + "learning_rate": 1.7909396022133067e-05, + "loss": 1.056, "step": 5963 }, { - "epoch": 0.16900450565331973, + "epoch": 0.23335159245637374, "grad_norm": 0.0, - "learning_rate": 1.9003706935498233e-05, - "loss": 0.9479, + "learning_rate": 1.790862054722436e-05, + "loss": 1.2986, "step": 5964 }, { - "epoch": 0.1690328430955822, + "epoch": 0.23339071914860318, "grad_norm": 0.0, - "learning_rate": 1.900330754573847e-05, - "loss": 1.0359, + "learning_rate": 1.7907844945313115e-05, + "loss": 1.1552, "step": 5965 }, { - "epoch": 0.16906118053784466, + "epoch": 0.23342984584083262, "grad_norm": 0.0, - "learning_rate": 1.9002908080140504e-05, - "loss": 1.0353, + "learning_rate": 1.790706921641179e-05, + "loss": 1.1455, "step": 5966 }, { - "epoch": 0.16908951798010713, + "epoch": 0.23346897253306206, "grad_norm": 0.0, - "learning_rate": 1.90025085387077e-05, - "loss": 0.9718, + "learning_rate": 1.790629336053284e-05, + "loss": 1.0528, "step": 5967 }, { - "epoch": 0.16911785542236957, + "epoch": 0.2335080992252915, "grad_norm": 0.0, - "learning_rate": 1.9002108921443424e-05, - "loss": 1.0427, + "learning_rate": 1.7905517377688725e-05, + "loss": 1.1492, "step": 5968 }, { - "epoch": 0.16914619286463203, + "epoch": 0.23354722591752095, "grad_norm": 0.0, - "learning_rate": 1.9001709228351042e-05, - "loss": 1.0933, + "learning_rate": 1.7904741267891914e-05, + "loss": 1.1315, "step": 5969 }, { - "epoch": 0.1691745303068945, + "epoch": 0.23358635260975036, "grad_norm": 0.0, - "learning_rate": 1.900130945943392e-05, - "loss": 1.0303, + "learning_rate": 1.790396503115486e-05, + "loss": 1.2202, "step": 5970 }, { - "epoch": 0.16920286774915697, + "epoch": 0.2336254793019798, "grad_norm": 0.0, - "learning_rate": 1.900090961469543e-05, - "loss": 0.9095, + "learning_rate": 1.7903188667490025e-05, + "loss": 1.1785, "step": 5971 }, { - "epoch": 0.16923120519141943, + "epoch": 0.23366460599420924, "grad_norm": 0.0, - "learning_rate": 1.900050969413893e-05, - "loss": 1.0622, + "learning_rate": 1.7902412176909888e-05, + "loss": 1.1378, "step": 5972 }, { - "epoch": 0.1692595426336819, + "epoch": 0.23370373268643868, "grad_norm": 0.0, - "learning_rate": 1.9000109697767798e-05, - "loss": 0.975, + "learning_rate": 1.7901635559426915e-05, + "loss": 1.1898, "step": 5973 }, { - "epoch": 0.16928788007594434, + "epoch": 0.23374285937866812, "grad_norm": 0.0, - "learning_rate": 1.8999709625585397e-05, - "loss": 0.9593, + "learning_rate": 1.7900858815053576e-05, + "loss": 1.1916, "step": 5974 }, { - "epoch": 0.1693162175182068, + "epoch": 0.23378198607089756, "grad_norm": 0.0, - "learning_rate": 1.8999309477595103e-05, - "loss": 0.9509, + "learning_rate": 1.7900081943802345e-05, + "loss": 1.0873, "step": 5975 }, { - "epoch": 0.16934455496046927, + "epoch": 0.233821112763127, "grad_norm": 0.0, - "learning_rate": 1.8998909253800283e-05, - "loss": 1.0133, + "learning_rate": 1.7899304945685693e-05, + "loss": 1.127, "step": 5976 }, { - "epoch": 0.16937289240273173, + "epoch": 0.23386023945535644, "grad_norm": 0.0, - "learning_rate": 1.899850895420431e-05, - "loss": 0.999, + "learning_rate": 1.7898527820716107e-05, + "loss": 1.2555, "step": 5977 }, { - "epoch": 0.1694012298449942, + "epoch": 0.23389936614758589, "grad_norm": 0.0, - "learning_rate": 1.899810857881055e-05, - "loss": 1.0561, + "learning_rate": 1.789775056890606e-05, + "loss": 1.1161, "step": 5978 }, { - "epoch": 0.16942956728725667, + "epoch": 0.23393849283981533, "grad_norm": 0.0, - "learning_rate": 1.8997708127622384e-05, - "loss": 1.0912, + "learning_rate": 1.7896973190268036e-05, + "loss": 1.1238, "step": 5979 }, { - "epoch": 0.1694579047295191, + "epoch": 0.23397761953204477, "grad_norm": 0.0, - "learning_rate": 1.899730760064318e-05, - "loss": 1.0467, + "learning_rate": 1.7896195684814516e-05, + "loss": 1.1575, "step": 5980 }, { - "epoch": 0.16948624217178157, + "epoch": 0.2340167462242742, "grad_norm": 0.0, - "learning_rate": 1.8996906997876312e-05, - "loss": 0.8781, + "learning_rate": 1.789541805255799e-05, + "loss": 1.1273, "step": 5981 }, { - "epoch": 0.16951457961404404, + "epoch": 0.23405587291650365, "grad_norm": 0.0, - "learning_rate": 1.8996506319325156e-05, - "loss": 0.9037, + "learning_rate": 1.7894640293510942e-05, + "loss": 1.1647, "step": 5982 }, { - "epoch": 0.1695429170563065, + "epoch": 0.2340949996087331, "grad_norm": 0.0, - "learning_rate": 1.899610556499309e-05, - "loss": 1.0034, + "learning_rate": 1.7893862407685866e-05, + "loss": 1.1112, "step": 5983 }, { - "epoch": 0.16957125449856897, + "epoch": 0.2341341263009625, "grad_norm": 0.0, - "learning_rate": 1.8995704734883484e-05, - "loss": 1.0683, + "learning_rate": 1.7893084395095248e-05, + "loss": 1.1943, "step": 5984 }, { - "epoch": 0.1695995919408314, + "epoch": 0.23417325299319194, "grad_norm": 0.0, - "learning_rate": 1.8995303828999713e-05, - "loss": 1.0225, + "learning_rate": 1.7892306255751584e-05, + "loss": 1.144, "step": 5985 }, { - "epoch": 0.16962792938309387, + "epoch": 0.23421237968542138, "grad_norm": 0.0, - "learning_rate": 1.8994902847345162e-05, - "loss": 0.9875, + "learning_rate": 1.7891527989667377e-05, + "loss": 1.0953, "step": 5986 }, { - "epoch": 0.16965626682535634, + "epoch": 0.23425150637765083, "grad_norm": 0.0, - "learning_rate": 1.89945017899232e-05, - "loss": 0.9965, + "learning_rate": 1.7890749596855114e-05, + "loss": 1.21, "step": 5987 }, { - "epoch": 0.1696846042676188, + "epoch": 0.23429063306988027, "grad_norm": 0.0, - "learning_rate": 1.8994100656737212e-05, - "loss": 1.1023, + "learning_rate": 1.7889971077327302e-05, + "loss": 1.0252, "step": 5988 }, { - "epoch": 0.16971294170988127, + "epoch": 0.2343297597621097, "grad_norm": 0.0, - "learning_rate": 1.8993699447790576e-05, - "loss": 0.9201, + "learning_rate": 1.7889192431096442e-05, + "loss": 1.106, "step": 5989 }, { - "epoch": 0.16974127915214374, + "epoch": 0.23436888645433915, "grad_norm": 0.0, - "learning_rate": 1.8993298163086668e-05, - "loss": 1.0447, + "learning_rate": 1.7888413658175038e-05, + "loss": 1.1203, "step": 5990 }, { - "epoch": 0.16976961659440618, + "epoch": 0.2344080131465686, "grad_norm": 0.0, - "learning_rate": 1.899289680262887e-05, - "loss": 1.0748, + "learning_rate": 1.788763475857559e-05, + "loss": 1.2433, "step": 5991 }, { - "epoch": 0.16979795403666864, + "epoch": 0.23444713983879803, "grad_norm": 0.0, - "learning_rate": 1.899249536642056e-05, - "loss": 0.9645, + "learning_rate": 1.788685573231062e-05, + "loss": 1.0885, "step": 5992 }, { - "epoch": 0.1698262914789311, + "epoch": 0.23448626653102747, "grad_norm": 0.0, - "learning_rate": 1.8992093854465124e-05, - "loss": 1.0259, + "learning_rate": 1.7886076579392622e-05, + "loss": 0.9399, "step": 5993 }, { - "epoch": 0.16985462892119357, + "epoch": 0.2345253932232569, "grad_norm": 0.0, - "learning_rate": 1.8991692266765947e-05, - "loss": 1.1338, + "learning_rate": 1.788529729983412e-05, + "loss": 1.1912, "step": 5994 }, { - "epoch": 0.16988296636345604, + "epoch": 0.23456451991548635, "grad_norm": 0.0, - "learning_rate": 1.89912906033264e-05, - "loss": 1.052, + "learning_rate": 1.7884517893647624e-05, + "loss": 1.1682, "step": 5995 }, { - "epoch": 0.1699113038057185, + "epoch": 0.2346036466077158, "grad_norm": 0.0, - "learning_rate": 1.8990888864149876e-05, - "loss": 1.0522, + "learning_rate": 1.7883738360845648e-05, + "loss": 1.176, "step": 5996 }, { - "epoch": 0.16993964124798094, + "epoch": 0.23464277329994523, "grad_norm": 0.0, - "learning_rate": 1.8990487049239758e-05, - "loss": 1.0704, + "learning_rate": 1.7882958701440716e-05, + "loss": 1.2006, "step": 5997 }, { - "epoch": 0.1699679786902434, + "epoch": 0.23468189999217468, "grad_norm": 0.0, - "learning_rate": 1.899008515859943e-05, - "loss": 0.9956, + "learning_rate": 1.788217891544534e-05, + "loss": 1.132, "step": 5998 }, { - "epoch": 0.16999631613250588, + "epoch": 0.2347210266844041, "grad_norm": 0.0, - "learning_rate": 1.8989683192232276e-05, - "loss": 1.0888, + "learning_rate": 1.7881399002872057e-05, + "loss": 1.0921, "step": 5999 }, { - "epoch": 0.17002465357476834, + "epoch": 0.23476015337663353, "grad_norm": 0.0, - "learning_rate": 1.8989281150141678e-05, - "loss": 0.9794, + "learning_rate": 1.7880618963733374e-05, + "loss": 1.114, "step": 6000 }, { - "epoch": 0.1700529910170308, + "epoch": 0.23479928006886297, "grad_norm": 0.0, - "learning_rate": 1.898887903233103e-05, - "loss": 1.0159, + "learning_rate": 1.7879838798041827e-05, + "loss": 1.1429, "step": 6001 }, { - "epoch": 0.17008132845929327, + "epoch": 0.2348384067610924, "grad_norm": 0.0, - "learning_rate": 1.8988476838803714e-05, - "loss": 1.0347, + "learning_rate": 1.7879058505809944e-05, + "loss": 1.1126, "step": 6002 }, { - "epoch": 0.1701096659015557, + "epoch": 0.23487753345332185, "grad_norm": 0.0, - "learning_rate": 1.8988074569563122e-05, - "loss": 1.1392, + "learning_rate": 1.7878278087050255e-05, + "loss": 1.11, "step": 6003 }, { - "epoch": 0.17013800334381818, + "epoch": 0.2349166601455513, "grad_norm": 0.0, - "learning_rate": 1.898767222461264e-05, - "loss": 0.9631, + "learning_rate": 1.7877497541775288e-05, + "loss": 1.1114, "step": 6004 }, { - "epoch": 0.17016634078608064, + "epoch": 0.23495578683778073, "grad_norm": 0.0, - "learning_rate": 1.8987269803955656e-05, - "loss": 1.0094, + "learning_rate": 1.7876716869997584e-05, + "loss": 1.1793, "step": 6005 }, { - "epoch": 0.1701946782283431, + "epoch": 0.23499491353001017, "grad_norm": 0.0, - "learning_rate": 1.8986867307595562e-05, - "loss": 0.9868, + "learning_rate": 1.7875936071729682e-05, + "loss": 1.0809, "step": 6006 }, { - "epoch": 0.17022301567060558, + "epoch": 0.23503404022223962, "grad_norm": 0.0, - "learning_rate": 1.898646473553575e-05, - "loss": 1.0305, + "learning_rate": 1.7875155146984108e-05, + "loss": 1.1771, "step": 6007 }, { - "epoch": 0.17025135311286804, + "epoch": 0.23507316691446906, "grad_norm": 0.0, - "learning_rate": 1.8986062087779604e-05, - "loss": 0.9196, + "learning_rate": 1.7874374095773414e-05, + "loss": 1.4, "step": 6008 }, { - "epoch": 0.17027969055513048, + "epoch": 0.2351122936066985, "grad_norm": 0.0, - "learning_rate": 1.8985659364330522e-05, - "loss": 1.0282, + "learning_rate": 1.787359291811014e-05, + "loss": 1.2081, "step": 6009 }, { - "epoch": 0.17030802799739295, + "epoch": 0.23515142029892794, "grad_norm": 0.0, - "learning_rate": 1.8985256565191898e-05, - "loss": 0.9752, + "learning_rate": 1.7872811614006827e-05, + "loss": 1.0987, "step": 6010 }, { - "epoch": 0.1703363654396554, + "epoch": 0.23519054699115738, "grad_norm": 0.0, - "learning_rate": 1.8984853690367116e-05, - "loss": 1.0945, + "learning_rate": 1.787203018347603e-05, + "loss": 1.1567, "step": 6011 }, { - "epoch": 0.17036470288191788, + "epoch": 0.23522967368338682, "grad_norm": 0.0, - "learning_rate": 1.8984450739859577e-05, - "loss": 0.9914, + "learning_rate": 1.7871248626530285e-05, + "loss": 1.2257, "step": 6012 }, { - "epoch": 0.17039304032418034, + "epoch": 0.23526880037561623, "grad_norm": 0.0, - "learning_rate": 1.8984047713672673e-05, - "loss": 1.0068, + "learning_rate": 1.7870466943182156e-05, + "loss": 1.1288, "step": 6013 }, { - "epoch": 0.1704213777664428, + "epoch": 0.23530792706784567, "grad_norm": 0.0, - "learning_rate": 1.8983644611809796e-05, - "loss": 1.1082, + "learning_rate": 1.786968513344419e-05, + "loss": 1.3461, "step": 6014 }, { - "epoch": 0.17044971520870525, + "epoch": 0.23534705376007511, "grad_norm": 0.0, - "learning_rate": 1.898324143427435e-05, - "loss": 1.073, + "learning_rate": 1.7868903197328938e-05, + "loss": 1.1196, "step": 6015 }, { - "epoch": 0.17047805265096772, + "epoch": 0.23538618045230456, "grad_norm": 0.0, - "learning_rate": 1.8982838181069723e-05, - "loss": 1.047, + "learning_rate": 1.7868121134848967e-05, + "loss": 1.1703, "step": 6016 }, { - "epoch": 0.17050639009323018, + "epoch": 0.235425307144534, "grad_norm": 0.0, - "learning_rate": 1.8982434852199316e-05, - "loss": 0.9219, + "learning_rate": 1.7867338946016826e-05, + "loss": 1.1423, "step": 6017 }, { - "epoch": 0.17053472753549265, + "epoch": 0.23546443383676344, "grad_norm": 0.0, - "learning_rate": 1.8982031447666522e-05, - "loss": 1.047, + "learning_rate": 1.7866556630845076e-05, + "loss": 1.1966, "step": 6018 }, { - "epoch": 0.1705630649777551, + "epoch": 0.23550356052899288, "grad_norm": 0.0, - "learning_rate": 1.898162796747474e-05, - "loss": 1.0054, + "learning_rate": 1.786577418934629e-05, + "loss": 1.1584, "step": 6019 }, { - "epoch": 0.17059140242001758, + "epoch": 0.23554268722122232, "grad_norm": 0.0, - "learning_rate": 1.8981224411627376e-05, - "loss": 0.9711, + "learning_rate": 1.7864991621533025e-05, + "loss": 1.1673, "step": 6020 }, { - "epoch": 0.17061973986228002, + "epoch": 0.23558181391345176, "grad_norm": 0.0, - "learning_rate": 1.8980820780127818e-05, - "loss": 0.9227, + "learning_rate": 1.786420892741785e-05, + "loss": 1.092, "step": 6021 }, { - "epoch": 0.17064807730454248, + "epoch": 0.2356209406056812, "grad_norm": 0.0, - "learning_rate": 1.8980417072979476e-05, - "loss": 0.9468, + "learning_rate": 1.7863426107013333e-05, + "loss": 1.2126, "step": 6022 }, { - "epoch": 0.17067641474680495, + "epoch": 0.23566006729791064, "grad_norm": 0.0, - "learning_rate": 1.8980013290185743e-05, - "loss": 1.1199, + "learning_rate": 1.7862643160332046e-05, + "loss": 1.1779, "step": 6023 }, { - "epoch": 0.17070475218906742, + "epoch": 0.23569919399014008, "grad_norm": 0.0, - "learning_rate": 1.8979609431750025e-05, - "loss": 0.8233, + "learning_rate": 1.7861860087386563e-05, + "loss": 1.1286, "step": 6024 }, { - "epoch": 0.17073308963132988, + "epoch": 0.23573832068236952, "grad_norm": 0.0, - "learning_rate": 1.8979205497675722e-05, - "loss": 0.8857, + "learning_rate": 1.786107688818946e-05, + "loss": 1.0307, "step": 6025 }, { - "epoch": 0.17076142707359235, + "epoch": 0.23577744737459896, "grad_norm": 0.0, - "learning_rate": 1.897880148796624e-05, - "loss": 1.0196, + "learning_rate": 1.786029356275331e-05, + "loss": 1.2787, "step": 6026 }, { - "epoch": 0.1707897645158548, + "epoch": 0.23581657406682838, "grad_norm": 0.0, - "learning_rate": 1.8978397402624975e-05, - "loss": 1.1489, + "learning_rate": 1.7859510111090697e-05, + "loss": 1.2169, "step": 6027 }, { - "epoch": 0.17081810195811725, + "epoch": 0.23585570075905782, "grad_norm": 0.0, - "learning_rate": 1.8977993241655333e-05, - "loss": 1.1172, + "learning_rate": 1.78587265332142e-05, + "loss": 1.0411, "step": 6028 }, { - "epoch": 0.17084643940037972, + "epoch": 0.23589482745128726, "grad_norm": 0.0, - "learning_rate": 1.8977589005060723e-05, - "loss": 1.02, + "learning_rate": 1.7857942829136404e-05, + "loss": 1.2031, "step": 6029 }, { - "epoch": 0.17087477684264218, + "epoch": 0.2359339541435167, "grad_norm": 0.0, - "learning_rate": 1.897718469284455e-05, - "loss": 1.0946, + "learning_rate": 1.785715899886989e-05, + "loss": 1.2626, "step": 6030 }, { - "epoch": 0.17090311428490465, + "epoch": 0.23597308083574614, "grad_norm": 0.0, - "learning_rate": 1.897678030501021e-05, - "loss": 0.9935, + "learning_rate": 1.7856375042427246e-05, + "loss": 1.1674, "step": 6031 }, { - "epoch": 0.17093145172716712, + "epoch": 0.23601220752797558, "grad_norm": 0.0, - "learning_rate": 1.897637584156112e-05, - "loss": 0.9959, + "learning_rate": 1.7855590959821068e-05, + "loss": 1.188, "step": 6032 }, { - "epoch": 0.17095978916942955, + "epoch": 0.23605133422020502, "grad_norm": 0.0, - "learning_rate": 1.8975971302500683e-05, - "loss": 0.9927, + "learning_rate": 1.785480675106394e-05, + "loss": 1.0977, "step": 6033 }, { - "epoch": 0.17098812661169202, + "epoch": 0.23609046091243446, "grad_norm": 0.0, - "learning_rate": 1.8975566687832304e-05, - "loss": 1.0092, + "learning_rate": 1.785402241616846e-05, + "loss": 1.0096, "step": 6034 }, { - "epoch": 0.1710164640539545, + "epoch": 0.2361295876046639, "grad_norm": 0.0, - "learning_rate": 1.8975161997559393e-05, - "loss": 1.0666, + "learning_rate": 1.7853237955147218e-05, + "loss": 1.064, "step": 6035 }, { - "epoch": 0.17104480149621695, + "epoch": 0.23616871429689335, "grad_norm": 0.0, - "learning_rate": 1.8974757231685367e-05, - "loss": 1.0859, + "learning_rate": 1.7852453368012817e-05, + "loss": 1.0058, "step": 6036 }, { - "epoch": 0.17107313893847942, + "epoch": 0.23620784098912279, "grad_norm": 0.0, - "learning_rate": 1.8974352390213623e-05, - "loss": 0.9899, + "learning_rate": 1.7851668654777857e-05, + "loss": 1.0129, "step": 6037 }, { - "epoch": 0.17110147638074188, + "epoch": 0.23624696768135223, "grad_norm": 0.0, - "learning_rate": 1.8973947473147576e-05, - "loss": 1.014, + "learning_rate": 1.7850883815454935e-05, + "loss": 1.2175, "step": 6038 }, { - "epoch": 0.17112981382300432, + "epoch": 0.23628609437358167, "grad_norm": 0.0, - "learning_rate": 1.8973542480490636e-05, - "loss": 0.9724, + "learning_rate": 1.785009885005666e-05, + "loss": 1.1676, "step": 6039 }, { - "epoch": 0.1711581512652668, + "epoch": 0.2363252210658111, "grad_norm": 0.0, - "learning_rate": 1.897313741224622e-05, - "loss": 1.027, + "learning_rate": 1.784931375859563e-05, + "loss": 1.1835, "step": 6040 }, { - "epoch": 0.17118648870752926, + "epoch": 0.23636434775804052, "grad_norm": 0.0, - "learning_rate": 1.8972732268417734e-05, - "loss": 1.0072, + "learning_rate": 1.7848528541084457e-05, + "loss": 1.1585, "step": 6041 }, { - "epoch": 0.17121482614979172, + "epoch": 0.23640347445026996, "grad_norm": 0.0, - "learning_rate": 1.897232704900859e-05, - "loss": 0.9926, + "learning_rate": 1.784774319753575e-05, + "loss": 1.204, "step": 6042 }, { - "epoch": 0.1712431635920542, + "epoch": 0.2364426011424994, "grad_norm": 0.0, - "learning_rate": 1.8971921754022202e-05, - "loss": 1.1282, + "learning_rate": 1.7846957727962124e-05, + "loss": 1.296, "step": 6043 }, { - "epoch": 0.17127150103431665, + "epoch": 0.23648172783472884, "grad_norm": 0.0, - "learning_rate": 1.8971516383461986e-05, - "loss": 0.9774, + "learning_rate": 1.7846172132376188e-05, + "loss": 1.2318, "step": 6044 }, { - "epoch": 0.1712998384765791, + "epoch": 0.23652085452695829, "grad_norm": 0.0, - "learning_rate": 1.897111093733136e-05, - "loss": 0.9272, + "learning_rate": 1.7845386410790558e-05, + "loss": 1.1602, "step": 6045 }, { - "epoch": 0.17132817591884156, + "epoch": 0.23655998121918773, "grad_norm": 0.0, - "learning_rate": 1.8970705415633732e-05, - "loss": 0.9738, + "learning_rate": 1.7844600563217857e-05, + "loss": 1.0885, "step": 6046 }, { - "epoch": 0.17135651336110402, + "epoch": 0.23659910791141717, "grad_norm": 0.0, - "learning_rate": 1.8970299818372525e-05, - "loss": 1.0475, + "learning_rate": 1.7843814589670696e-05, + "loss": 1.1464, "step": 6047 }, { - "epoch": 0.1713848508033665, + "epoch": 0.2366382346036466, "grad_norm": 0.0, - "learning_rate": 1.896989414555115e-05, - "loss": 1.0127, + "learning_rate": 1.7843028490161705e-05, + "loss": 0.9994, "step": 6048 }, { - "epoch": 0.17141318824562896, + "epoch": 0.23667736129587605, "grad_norm": 0.0, - "learning_rate": 1.8969488397173023e-05, - "loss": 1.1199, + "learning_rate": 1.7842242264703503e-05, + "loss": 1.1871, "step": 6049 }, { - "epoch": 0.17144152568789142, + "epoch": 0.2367164879881055, "grad_norm": 0.0, - "learning_rate": 1.8969082573241567e-05, - "loss": 0.9813, + "learning_rate": 1.7841455913308717e-05, + "loss": 1.1636, "step": 6050 }, { - "epoch": 0.17146986313015386, + "epoch": 0.23675561468033493, "grad_norm": 0.0, - "learning_rate": 1.8968676673760192e-05, - "loss": 1.0002, + "learning_rate": 1.7840669435989974e-05, + "loss": 1.2443, "step": 6051 }, { - "epoch": 0.17149820057241633, + "epoch": 0.23679474137256437, "grad_norm": 0.0, - "learning_rate": 1.896827069873233e-05, - "loss": 0.9487, + "learning_rate": 1.7839882832759906e-05, + "loss": 1.2524, "step": 6052 }, { - "epoch": 0.1715265380146788, + "epoch": 0.2368338680647938, "grad_norm": 0.0, - "learning_rate": 1.8967864648161392e-05, - "loss": 0.9831, + "learning_rate": 1.7839096103631143e-05, + "loss": 0.9946, "step": 6053 }, { - "epoch": 0.17155487545694126, + "epoch": 0.23687299475702325, "grad_norm": 0.0, - "learning_rate": 1.89674585220508e-05, - "loss": 0.8971, + "learning_rate": 1.7838309248616323e-05, + "loss": 1.1983, "step": 6054 }, { - "epoch": 0.17158321289920372, + "epoch": 0.2369121214492527, "grad_norm": 0.0, - "learning_rate": 1.8967052320403974e-05, - "loss": 1.0004, + "learning_rate": 1.7837522267728077e-05, + "loss": 1.1564, "step": 6055 }, { - "epoch": 0.1716115503414662, + "epoch": 0.2369512481414821, "grad_norm": 0.0, - "learning_rate": 1.8966646043224333e-05, - "loss": 1.0225, + "learning_rate": 1.783673516097904e-05, + "loss": 1.1624, "step": 6056 }, { - "epoch": 0.17163988778372863, + "epoch": 0.23699037483371155, "grad_norm": 0.0, - "learning_rate": 1.896623969051531e-05, - "loss": 1.0205, + "learning_rate": 1.7835947928381858e-05, + "loss": 1.0367, "step": 6057 }, { - "epoch": 0.1716682252259911, + "epoch": 0.237029501525941, "grad_norm": 0.0, - "learning_rate": 1.8965833262280314e-05, - "loss": 1.0837, + "learning_rate": 1.7835160569949174e-05, + "loss": 1.1587, "step": 6058 }, { - "epoch": 0.17169656266825356, + "epoch": 0.23706862821817043, "grad_norm": 0.0, - "learning_rate": 1.896542675852278e-05, - "loss": 0.9576, + "learning_rate": 1.7834373085693628e-05, + "loss": 1.1068, "step": 6059 }, { - "epoch": 0.17172490011051603, + "epoch": 0.23710775491039987, "grad_norm": 0.0, - "learning_rate": 1.8965020179246124e-05, - "loss": 1.0775, + "learning_rate": 1.7833585475627865e-05, + "loss": 1.086, "step": 6060 }, { - "epoch": 0.1717532375527785, + "epoch": 0.2371468816026293, "grad_norm": 0.0, - "learning_rate": 1.8964613524453775e-05, - "loss": 1.1407, + "learning_rate": 1.7832797739764537e-05, + "loss": 1.212, "step": 6061 }, { - "epoch": 0.17178157499504096, + "epoch": 0.23718600829485875, "grad_norm": 0.0, - "learning_rate": 1.8964206794149157e-05, - "loss": 1.0098, + "learning_rate": 1.7832009878116294e-05, + "loss": 1.1954, "step": 6062 }, { - "epoch": 0.1718099124373034, + "epoch": 0.2372251349870882, "grad_norm": 0.0, - "learning_rate": 1.89637999883357e-05, - "loss": 1.0668, + "learning_rate": 1.7831221890695785e-05, + "loss": 1.2024, "step": 6063 }, { - "epoch": 0.17183824987956586, + "epoch": 0.23726426167931763, "grad_norm": 0.0, - "learning_rate": 1.8963393107016822e-05, - "loss": 1.0169, + "learning_rate": 1.7830433777515664e-05, + "loss": 1.2551, "step": 6064 }, { - "epoch": 0.17186658732182833, + "epoch": 0.23730338837154707, "grad_norm": 0.0, - "learning_rate": 1.8962986150195956e-05, - "loss": 1.0911, + "learning_rate": 1.7829645538588592e-05, + "loss": 1.1203, "step": 6065 }, { - "epoch": 0.1718949247640908, + "epoch": 0.23734251506377652, "grad_norm": 0.0, - "learning_rate": 1.8962579117876533e-05, - "loss": 1.118, + "learning_rate": 1.782885717392722e-05, + "loss": 1.1289, "step": 6066 }, { - "epoch": 0.17192326220635326, + "epoch": 0.23738164175600596, "grad_norm": 0.0, - "learning_rate": 1.8962172010061975e-05, - "loss": 1.1144, + "learning_rate": 1.7828068683544217e-05, + "loss": 1.1949, "step": 6067 }, { - "epoch": 0.17195159964861573, + "epoch": 0.2374207684482354, "grad_norm": 0.0, - "learning_rate": 1.8961764826755714e-05, - "loss": 1.1092, + "learning_rate": 1.7827280067452232e-05, + "loss": 1.0324, "step": 6068 }, { - "epoch": 0.17197993709087817, + "epoch": 0.23745989514046484, "grad_norm": 0.0, - "learning_rate": 1.8961357567961182e-05, - "loss": 1.0811, + "learning_rate": 1.7826491325663943e-05, + "loss": 1.0885, "step": 6069 }, { - "epoch": 0.17200827453314063, + "epoch": 0.23749902183269425, "grad_norm": 0.0, - "learning_rate": 1.8960950233681804e-05, - "loss": 1.1359, + "learning_rate": 1.782570245819201e-05, + "loss": 1.2628, "step": 6070 }, { - "epoch": 0.1720366119754031, + "epoch": 0.2375381485249237, "grad_norm": 0.0, - "learning_rate": 1.8960542823921017e-05, - "loss": 0.9455, + "learning_rate": 1.78249134650491e-05, + "loss": 1.1417, "step": 6071 }, { - "epoch": 0.17206494941766556, + "epoch": 0.23757727521715313, "grad_norm": 0.0, - "learning_rate": 1.8960135338682252e-05, - "loss": 1.0821, + "learning_rate": 1.7824124346247885e-05, + "loss": 1.1089, "step": 6072 }, { - "epoch": 0.17209328685992803, + "epoch": 0.23761640190938257, "grad_norm": 0.0, - "learning_rate": 1.8959727777968938e-05, - "loss": 0.9236, + "learning_rate": 1.782333510180104e-05, + "loss": 1.0929, "step": 6073 }, { - "epoch": 0.1721216243021905, + "epoch": 0.23765552860161201, "grad_norm": 0.0, - "learning_rate": 1.8959320141784508e-05, - "loss": 1.0208, + "learning_rate": 1.7822545731721237e-05, + "loss": 1.1933, "step": 6074 }, { - "epoch": 0.17214996174445293, + "epoch": 0.23769465529384146, "grad_norm": 0.0, - "learning_rate": 1.8958912430132403e-05, - "loss": 1.0169, + "learning_rate": 1.782175623602115e-05, + "loss": 1.0397, "step": 6075 }, { - "epoch": 0.1721782991867154, + "epoch": 0.2377337819860709, "grad_norm": 0.0, - "learning_rate": 1.8958504643016045e-05, - "loss": 0.9544, + "learning_rate": 1.7820966614713455e-05, + "loss": 1.1632, "step": 6076 }, { - "epoch": 0.17220663662897787, + "epoch": 0.23777290867830034, "grad_norm": 0.0, - "learning_rate": 1.8958096780438878e-05, - "loss": 0.9562, + "learning_rate": 1.7820176867810842e-05, + "loss": 1.1379, "step": 6077 }, { - "epoch": 0.17223497407124033, + "epoch": 0.23781203537052978, "grad_norm": 0.0, - "learning_rate": 1.895768884240434e-05, - "loss": 1.0867, + "learning_rate": 1.7819386995325986e-05, + "loss": 1.2098, "step": 6078 }, { - "epoch": 0.1722633115135028, + "epoch": 0.23785116206275922, "grad_norm": 0.0, - "learning_rate": 1.8957280828915855e-05, - "loss": 0.9927, + "learning_rate": 1.7818596997271572e-05, + "loss": 1.151, "step": 6079 }, { - "epoch": 0.17229164895576526, + "epoch": 0.23789028875498866, "grad_norm": 0.0, - "learning_rate": 1.895687273997687e-05, - "loss": 0.9833, + "learning_rate": 1.781780687366029e-05, + "loss": 1.1711, "step": 6080 }, { - "epoch": 0.1723199863980277, + "epoch": 0.2379294154472181, "grad_norm": 0.0, - "learning_rate": 1.8956464575590823e-05, - "loss": 1.0132, + "learning_rate": 1.781701662450482e-05, + "loss": 1.2812, "step": 6081 }, { - "epoch": 0.17234832384029017, + "epoch": 0.23796854213944754, "grad_norm": 0.0, - "learning_rate": 1.8956056335761146e-05, - "loss": 0.9624, + "learning_rate": 1.7816226249817865e-05, + "loss": 1.0815, "step": 6082 }, { - "epoch": 0.17237666128255263, + "epoch": 0.23800766883167698, "grad_norm": 0.0, - "learning_rate": 1.8955648020491282e-05, - "loss": 1.0638, + "learning_rate": 1.7815435749612108e-05, + "loss": 1.0802, "step": 6083 }, { - "epoch": 0.1724049987248151, + "epoch": 0.2380467955239064, "grad_norm": 0.0, - "learning_rate": 1.8955239629784667e-05, - "loss": 1.1519, + "learning_rate": 1.7814645123900246e-05, + "loss": 1.1044, "step": 6084 }, { - "epoch": 0.17243333616707757, + "epoch": 0.23808592221613584, "grad_norm": 0.0, - "learning_rate": 1.8954831163644748e-05, - "loss": 1.0014, + "learning_rate": 1.7813854372694978e-05, + "loss": 1.1278, "step": 6085 }, { - "epoch": 0.17246167360934003, + "epoch": 0.23812504890836528, "grad_norm": 0.0, - "learning_rate": 1.8954422622074955e-05, - "loss": 1.0658, + "learning_rate": 1.7813063496008995e-05, + "loss": 1.1852, "step": 6086 }, { - "epoch": 0.17249001105160247, + "epoch": 0.23816417560059472, "grad_norm": 0.0, - "learning_rate": 1.895401400507874e-05, - "loss": 1.0819, + "learning_rate": 1.7812272493855007e-05, + "loss": 1.2083, "step": 6087 }, { - "epoch": 0.17251834849386494, + "epoch": 0.23820330229282416, "grad_norm": 0.0, - "learning_rate": 1.8953605312659537e-05, - "loss": 0.9287, + "learning_rate": 1.7811481366245708e-05, + "loss": 1.2222, "step": 6088 }, { - "epoch": 0.1725466859361274, + "epoch": 0.2382424289850536, "grad_norm": 0.0, - "learning_rate": 1.895319654482079e-05, - "loss": 1.0555, + "learning_rate": 1.7810690113193812e-05, + "loss": 1.3394, "step": 6089 }, { - "epoch": 0.17257502337838987, + "epoch": 0.23828155567728304, "grad_norm": 0.0, - "learning_rate": 1.895278770156595e-05, - "loss": 0.9366, + "learning_rate": 1.7809898734712016e-05, + "loss": 1.1754, "step": 6090 }, { - "epoch": 0.17260336082065234, + "epoch": 0.23832068236951248, "grad_norm": 0.0, - "learning_rate": 1.895237878289845e-05, - "loss": 1.042, + "learning_rate": 1.7809107230813034e-05, + "loss": 1.201, "step": 6091 }, { - "epoch": 0.1726316982629148, + "epoch": 0.23835980906174192, "grad_norm": 0.0, - "learning_rate": 1.895196978882174e-05, - "loss": 0.9603, + "learning_rate": 1.7808315601509576e-05, + "loss": 1.0887, "step": 6092 }, { - "epoch": 0.17266003570517724, + "epoch": 0.23839893575397136, "grad_norm": 0.0, - "learning_rate": 1.8951560719339265e-05, - "loss": 0.9583, + "learning_rate": 1.7807523846814353e-05, + "loss": 1.1876, "step": 6093 }, { - "epoch": 0.1726883731474397, + "epoch": 0.2384380624462008, "grad_norm": 0.0, - "learning_rate": 1.8951151574454467e-05, - "loss": 1.1373, + "learning_rate": 1.780673196674008e-05, + "loss": 1.1339, "step": 6094 }, { - "epoch": 0.17271671058970217, + "epoch": 0.23847718913843025, "grad_norm": 0.0, - "learning_rate": 1.89507423541708e-05, - "loss": 1.0441, + "learning_rate": 1.7805939961299472e-05, + "loss": 1.2116, "step": 6095 }, { - "epoch": 0.17274504803196464, + "epoch": 0.2385163158306597, "grad_norm": 0.0, - "learning_rate": 1.8950333058491702e-05, - "loss": 0.9899, + "learning_rate": 1.7805147830505253e-05, + "loss": 1.1647, "step": 6096 }, { - "epoch": 0.1727733854742271, + "epoch": 0.23855544252288913, "grad_norm": 0.0, - "learning_rate": 1.8949923687420626e-05, - "loss": 0.9349, + "learning_rate": 1.780435557437014e-05, + "loss": 1.1955, "step": 6097 }, { - "epoch": 0.17280172291648957, + "epoch": 0.23859456921511854, "grad_norm": 0.0, - "learning_rate": 1.8949514240961023e-05, - "loss": 0.9711, + "learning_rate": 1.7803563192906853e-05, + "loss": 1.1658, "step": 6098 }, { - "epoch": 0.172830060358752, + "epoch": 0.23863369590734798, "grad_norm": 0.0, - "learning_rate": 1.8949104719116334e-05, - "loss": 0.9083, + "learning_rate": 1.7802770686128122e-05, + "loss": 1.1032, "step": 6099 }, { - "epoch": 0.17285839780101447, + "epoch": 0.23867282259957742, "grad_norm": 0.0, - "learning_rate": 1.8948695121890016e-05, - "loss": 1.0797, + "learning_rate": 1.7801978054046675e-05, + "loss": 1.0849, "step": 6100 }, { - "epoch": 0.17288673524327694, + "epoch": 0.23871194929180686, "grad_norm": 0.0, - "learning_rate": 1.894828544928551e-05, - "loss": 1.0997, + "learning_rate": 1.780118529667523e-05, + "loss": 1.1124, "step": 6101 }, { - "epoch": 0.1729150726855394, + "epoch": 0.2387510759840363, "grad_norm": 0.0, - "learning_rate": 1.894787570130628e-05, - "loss": 1.0773, + "learning_rate": 1.7800392414026524e-05, + "loss": 1.1177, "step": 6102 }, { - "epoch": 0.17294341012780187, + "epoch": 0.23879020267626574, "grad_norm": 0.0, - "learning_rate": 1.8947465877955767e-05, - "loss": 1.1133, + "learning_rate": 1.7799599406113296e-05, + "loss": 1.1484, "step": 6103 }, { - "epoch": 0.17297174757006434, + "epoch": 0.23882932936849519, "grad_norm": 0.0, - "learning_rate": 1.8947055979237427e-05, - "loss": 1.0107, + "learning_rate": 1.7798806272948272e-05, + "loss": 1.1459, "step": 6104 }, { - "epoch": 0.17300008501232678, + "epoch": 0.23886845606072463, "grad_norm": 0.0, - "learning_rate": 1.8946646005154712e-05, - "loss": 1.0187, + "learning_rate": 1.7798013014544193e-05, + "loss": 1.2695, "step": 6105 }, { - "epoch": 0.17302842245458924, + "epoch": 0.23890758275295407, "grad_norm": 0.0, - "learning_rate": 1.8946235955711073e-05, - "loss": 1.0373, + "learning_rate": 1.7797219630913797e-05, + "loss": 1.1273, "step": 6106 }, { - "epoch": 0.1730567598968517, + "epoch": 0.2389467094451835, "grad_norm": 0.0, - "learning_rate": 1.8945825830909972e-05, - "loss": 0.9565, + "learning_rate": 1.779642612206982e-05, + "loss": 1.2594, "step": 6107 }, { - "epoch": 0.17308509733911417, + "epoch": 0.23898583613741295, "grad_norm": 0.0, - "learning_rate": 1.8945415630754852e-05, - "loss": 1.0325, + "learning_rate": 1.7795632488025016e-05, + "loss": 1.1129, "step": 6108 }, { - "epoch": 0.17311343478137664, + "epoch": 0.2390249628296424, "grad_norm": 0.0, - "learning_rate": 1.894500535524918e-05, - "loss": 1.045, + "learning_rate": 1.7794838728792117e-05, + "loss": 1.1194, "step": 6109 }, { - "epoch": 0.1731417722236391, + "epoch": 0.23906408952187183, "grad_norm": 0.0, - "learning_rate": 1.8944595004396404e-05, - "loss": 0.9997, + "learning_rate": 1.779404484438388e-05, + "loss": 1.1643, "step": 6110 }, { - "epoch": 0.17317010966590154, + "epoch": 0.23910321621410127, "grad_norm": 0.0, - "learning_rate": 1.894418457819998e-05, - "loss": 0.9861, + "learning_rate": 1.779325083481304e-05, + "loss": 1.2799, "step": 6111 }, { - "epoch": 0.173198447108164, + "epoch": 0.2391423429063307, "grad_norm": 0.0, - "learning_rate": 1.8943774076663372e-05, - "loss": 1.0098, + "learning_rate": 1.7792456700092368e-05, + "loss": 1.064, "step": 6112 }, { - "epoch": 0.17322678455042648, + "epoch": 0.23918146959856013, "grad_norm": 0.0, - "learning_rate": 1.894336349979003e-05, - "loss": 0.9706, + "learning_rate": 1.7791662440234597e-05, + "loss": 1.2317, "step": 6113 }, { - "epoch": 0.17325512199268894, + "epoch": 0.23922059629078957, "grad_norm": 0.0, - "learning_rate": 1.8942952847583417e-05, - "loss": 0.9655, + "learning_rate": 1.7790868055252496e-05, + "loss": 1.0014, "step": 6114 }, { - "epoch": 0.1732834594349514, + "epoch": 0.239259722983019, "grad_norm": 0.0, - "learning_rate": 1.8942542120046993e-05, - "loss": 1.0446, + "learning_rate": 1.7790073545158818e-05, + "loss": 1.1094, "step": 6115 }, { - "epoch": 0.17331179687721388, + "epoch": 0.23929884967524845, "grad_norm": 0.0, - "learning_rate": 1.8942131317184214e-05, - "loss": 0.9198, + "learning_rate": 1.7789278909966315e-05, + "loss": 1.12, "step": 6116 }, { - "epoch": 0.1733401343194763, + "epoch": 0.2393379763674779, "grad_norm": 0.0, - "learning_rate": 1.8941720438998545e-05, - "loss": 0.9947, + "learning_rate": 1.7788484149687758e-05, + "loss": 1.0126, "step": 6117 }, { - "epoch": 0.17336847176173878, + "epoch": 0.23937710305970733, "grad_norm": 0.0, - "learning_rate": 1.8941309485493443e-05, - "loss": 1.1242, + "learning_rate": 1.7787689264335902e-05, + "loss": 1.1691, "step": 6118 }, { - "epoch": 0.17339680920400125, + "epoch": 0.23941622975193677, "grad_norm": 0.0, - "learning_rate": 1.894089845667237e-05, - "loss": 0.9945, + "learning_rate": 1.7786894253923518e-05, + "loss": 1.1041, "step": 6119 }, { - "epoch": 0.1734251466462637, + "epoch": 0.2394553564441662, "grad_norm": 0.0, - "learning_rate": 1.894048735253879e-05, - "loss": 1.0777, + "learning_rate": 1.7786099118463368e-05, + "loss": 1.0537, "step": 6120 }, { - "epoch": 0.17345348408852618, + "epoch": 0.23949448313639565, "grad_norm": 0.0, - "learning_rate": 1.8940076173096166e-05, - "loss": 0.9564, + "learning_rate": 1.7785303857968223e-05, + "loss": 1.1786, "step": 6121 }, { - "epoch": 0.17348182153078864, + "epoch": 0.2395336098286251, "grad_norm": 0.0, - "learning_rate": 1.893966491834796e-05, - "loss": 1.0689, + "learning_rate": 1.7784508472450852e-05, + "loss": 1.1169, "step": 6122 }, { - "epoch": 0.17351015897305108, + "epoch": 0.23957273652085453, "grad_norm": 0.0, - "learning_rate": 1.8939253588297638e-05, - "loss": 1.0034, + "learning_rate": 1.7783712961924032e-05, + "loss": 1.1119, "step": 6123 }, { - "epoch": 0.17353849641531355, + "epoch": 0.23961186321308398, "grad_norm": 0.0, - "learning_rate": 1.893884218294866e-05, - "loss": 1.0168, + "learning_rate": 1.7782917326400533e-05, + "loss": 1.0833, "step": 6124 }, { - "epoch": 0.17356683385757601, + "epoch": 0.23965098990531342, "grad_norm": 0.0, - "learning_rate": 1.8938430702304497e-05, - "loss": 0.96, + "learning_rate": 1.7782121565893135e-05, + "loss": 1.1072, "step": 6125 }, { - "epoch": 0.17359517129983848, + "epoch": 0.23969011659754286, "grad_norm": 0.0, - "learning_rate": 1.8938019146368614e-05, - "loss": 1.0489, + "learning_rate": 1.7781325680414614e-05, + "loss": 1.158, "step": 6126 }, { - "epoch": 0.17362350874210095, + "epoch": 0.23972924328977227, "grad_norm": 0.0, - "learning_rate": 1.8937607515144475e-05, - "loss": 1.0078, + "learning_rate": 1.7780529669977758e-05, + "loss": 1.2437, "step": 6127 }, { - "epoch": 0.1736518461843634, + "epoch": 0.2397683699820017, "grad_norm": 0.0, - "learning_rate": 1.893719580863555e-05, - "loss": 0.9184, + "learning_rate": 1.7779733534595343e-05, + "loss": 1.1371, "step": 6128 }, { - "epoch": 0.17368018362662585, + "epoch": 0.23980749667423115, "grad_norm": 0.0, - "learning_rate": 1.8936784026845304e-05, - "loss": 1.0539, + "learning_rate": 1.7778937274280156e-05, + "loss": 1.2374, "step": 6129 }, { - "epoch": 0.17370852106888832, + "epoch": 0.2398466233664606, "grad_norm": 0.0, - "learning_rate": 1.8936372169777208e-05, - "loss": 0.9832, + "learning_rate": 1.7778140889044983e-05, + "loss": 1.2601, "step": 6130 }, { - "epoch": 0.17373685851115078, + "epoch": 0.23988575005869003, "grad_norm": 0.0, - "learning_rate": 1.8935960237434733e-05, - "loss": 1.1805, + "learning_rate": 1.7777344378902615e-05, + "loss": 1.0179, "step": 6131 }, { - "epoch": 0.17376519595341325, + "epoch": 0.23992487675091947, "grad_norm": 0.0, - "learning_rate": 1.8935548229821346e-05, - "loss": 1.1436, + "learning_rate": 1.777654774386584e-05, + "loss": 1.1119, "step": 6132 }, { - "epoch": 0.17379353339567571, + "epoch": 0.23996400344314892, "grad_norm": 0.0, - "learning_rate": 1.8935136146940517e-05, - "loss": 1.0745, + "learning_rate": 1.7775750983947455e-05, + "loss": 1.1478, "step": 6133 }, { - "epoch": 0.17382187083793818, + "epoch": 0.24000313013537836, "grad_norm": 0.0, - "learning_rate": 1.893472398879572e-05, - "loss": 1.0245, + "learning_rate": 1.7774954099160252e-05, + "loss": 1.1864, "step": 6134 }, { - "epoch": 0.17385020828020062, + "epoch": 0.2400422568276078, "grad_norm": 0.0, - "learning_rate": 1.8934311755390423e-05, - "loss": 1.0196, + "learning_rate": 1.777415708951703e-05, + "loss": 1.2549, "step": 6135 }, { - "epoch": 0.17387854572246308, + "epoch": 0.24008138351983724, "grad_norm": 0.0, - "learning_rate": 1.8933899446728103e-05, - "loss": 0.883, + "learning_rate": 1.7773359955030583e-05, + "loss": 1.1221, "step": 6136 }, { - "epoch": 0.17390688316472555, + "epoch": 0.24012051021206668, "grad_norm": 0.0, - "learning_rate": 1.8933487062812225e-05, - "loss": 0.9399, + "learning_rate": 1.777256269571372e-05, + "loss": 1.0428, "step": 6137 }, { - "epoch": 0.17393522060698802, + "epoch": 0.24015963690429612, "grad_norm": 0.0, - "learning_rate": 1.8933074603646275e-05, - "loss": 0.9329, + "learning_rate": 1.7771765311579236e-05, + "loss": 1.1385, "step": 6138 }, { - "epoch": 0.17396355804925048, + "epoch": 0.24019876359652556, "grad_norm": 0.0, - "learning_rate": 1.8932662069233717e-05, - "loss": 0.8999, + "learning_rate": 1.7770967802639943e-05, + "loss": 1.1021, "step": 6139 }, { - "epoch": 0.17399189549151295, + "epoch": 0.240237890288755, "grad_norm": 0.0, - "learning_rate": 1.893224945957803e-05, - "loss": 0.9375, + "learning_rate": 1.7770170168908642e-05, + "loss": 1.0675, "step": 6140 }, { - "epoch": 0.1740202329337754, + "epoch": 0.24027701698098441, "grad_norm": 0.0, - "learning_rate": 1.893183677468269e-05, - "loss": 1.0278, + "learning_rate": 1.7769372410398145e-05, + "loss": 1.1407, "step": 6141 }, { - "epoch": 0.17404857037603785, + "epoch": 0.24031614367321386, "grad_norm": 0.0, - "learning_rate": 1.8931424014551168e-05, - "loss": 0.9402, + "learning_rate": 1.776857452712126e-05, + "loss": 1.2491, "step": 6142 }, { - "epoch": 0.17407690781830032, + "epoch": 0.2403552703654433, "grad_norm": 0.0, - "learning_rate": 1.8931011179186946e-05, - "loss": 1.0562, + "learning_rate": 1.776777651909081e-05, + "loss": 1.2274, "step": 6143 }, { - "epoch": 0.17410524526056279, + "epoch": 0.24039439705767274, "grad_norm": 0.0, - "learning_rate": 1.8930598268593503e-05, - "loss": 1.184, + "learning_rate": 1.77669783863196e-05, + "loss": 1.0641, "step": 6144 }, { - "epoch": 0.17413358270282525, + "epoch": 0.24043352374990218, "grad_norm": 0.0, - "learning_rate": 1.8930185282774315e-05, - "loss": 1.0266, + "learning_rate": 1.7766180128820445e-05, + "loss": 1.1183, "step": 6145 }, { - "epoch": 0.17416192014508772, + "epoch": 0.24047265044213162, "grad_norm": 0.0, - "learning_rate": 1.8929772221732856e-05, - "loss": 0.8712, + "learning_rate": 1.7765381746606172e-05, + "loss": 1.268, "step": 6146 }, { - "epoch": 0.17419025758735016, + "epoch": 0.24051177713436106, "grad_norm": 0.0, - "learning_rate": 1.892935908547261e-05, - "loss": 1.0506, + "learning_rate": 1.7764583239689602e-05, + "loss": 1.1475, "step": 6147 }, { - "epoch": 0.17421859502961262, + "epoch": 0.2405509038265905, "grad_norm": 0.0, - "learning_rate": 1.892894587399706e-05, - "loss": 1.1258, + "learning_rate": 1.776378460808355e-05, + "loss": 1.2389, "step": 6148 }, { - "epoch": 0.1742469324718751, + "epoch": 0.24059003051881994, "grad_norm": 0.0, - "learning_rate": 1.8928532587309682e-05, - "loss": 0.9807, + "learning_rate": 1.7762985851800846e-05, + "loss": 1.2159, "step": 6149 }, { - "epoch": 0.17427526991413755, + "epoch": 0.24062915721104938, "grad_norm": 0.0, - "learning_rate": 1.8928119225413958e-05, - "loss": 0.8546, + "learning_rate": 1.776218697085432e-05, + "loss": 1.1503, "step": 6150 }, { - "epoch": 0.17430360735640002, + "epoch": 0.24066828390327882, "grad_norm": 0.0, - "learning_rate": 1.8927705788313373e-05, - "loss": 1.0531, + "learning_rate": 1.7761387965256792e-05, + "loss": 1.2256, "step": 6151 }, { - "epoch": 0.17433194479866249, + "epoch": 0.24070741059550826, "grad_norm": 0.0, - "learning_rate": 1.8927292276011404e-05, - "loss": 0.9689, + "learning_rate": 1.7760588835021106e-05, + "loss": 1.0375, "step": 6152 }, { - "epoch": 0.17436028224092492, + "epoch": 0.2407465372877377, "grad_norm": 0.0, - "learning_rate": 1.8926878688511537e-05, - "loss": 1.0637, + "learning_rate": 1.775978958016008e-05, + "loss": 1.1727, "step": 6153 }, { - "epoch": 0.1743886196831874, + "epoch": 0.24078566397996715, "grad_norm": 0.0, - "learning_rate": 1.892646502581726e-05, - "loss": 1.0099, + "learning_rate": 1.7758990200686564e-05, + "loss": 1.0487, "step": 6154 }, { - "epoch": 0.17441695712544986, + "epoch": 0.24082479067219656, "grad_norm": 0.0, - "learning_rate": 1.892605128793205e-05, - "loss": 0.9724, + "learning_rate": 1.7758190696613385e-05, + "loss": 1.2913, "step": 6155 }, { - "epoch": 0.17444529456771232, + "epoch": 0.240863917364426, "grad_norm": 0.0, - "learning_rate": 1.8925637474859394e-05, - "loss": 1.1151, + "learning_rate": 1.7757391067953387e-05, + "loss": 1.2838, "step": 6156 }, { - "epoch": 0.1744736320099748, + "epoch": 0.24090304405665544, "grad_norm": 0.0, - "learning_rate": 1.892522358660278e-05, - "loss": 1.012, + "learning_rate": 1.7756591314719405e-05, + "loss": 1.2231, "step": 6157 }, { - "epoch": 0.17450196945223725, + "epoch": 0.24094217074888488, "grad_norm": 0.0, - "learning_rate": 1.8924809623165694e-05, - "loss": 0.9341, + "learning_rate": 1.775579143692429e-05, + "loss": 1.0939, "step": 6158 }, { - "epoch": 0.1745303068944997, + "epoch": 0.24098129744111432, "grad_norm": 0.0, - "learning_rate": 1.8924395584551624e-05, - "loss": 1.0812, + "learning_rate": 1.7754991434580883e-05, + "loss": 1.1841, "step": 6159 }, { - "epoch": 0.17455864433676216, + "epoch": 0.24102042413334376, "grad_norm": 0.0, - "learning_rate": 1.892398147076405e-05, - "loss": 1.0784, + "learning_rate": 1.7754191307702027e-05, + "loss": 1.2281, "step": 6160 }, { - "epoch": 0.17458698177902462, + "epoch": 0.2410595508255732, "grad_norm": 0.0, - "learning_rate": 1.8923567281806475e-05, - "loss": 1.0951, + "learning_rate": 1.775339105630058e-05, + "loss": 1.2637, "step": 6161 }, { - "epoch": 0.1746153192212871, + "epoch": 0.24109867751780265, "grad_norm": 0.0, - "learning_rate": 1.8923153017682372e-05, - "loss": 0.9915, + "learning_rate": 1.7752590680389382e-05, + "loss": 1.1372, "step": 6162 }, { - "epoch": 0.17464365666354956, + "epoch": 0.2411378042100321, "grad_norm": 0.0, - "learning_rate": 1.892273867839524e-05, - "loss": 1.0015, + "learning_rate": 1.77517901799813e-05, + "loss": 1.1288, "step": 6163 }, { - "epoch": 0.17467199410581202, + "epoch": 0.24117693090226153, "grad_norm": 0.0, - "learning_rate": 1.8922324263948567e-05, - "loss": 1.1499, + "learning_rate": 1.7750989555089174e-05, + "loss": 1.17, "step": 6164 }, { - "epoch": 0.17470033154807446, + "epoch": 0.24121605759449097, "grad_norm": 0.0, - "learning_rate": 1.8921909774345842e-05, - "loss": 1.0958, + "learning_rate": 1.7750188805725873e-05, + "loss": 1.2081, "step": 6165 }, { - "epoch": 0.17472866899033693, + "epoch": 0.2412551842867204, "grad_norm": 0.0, - "learning_rate": 1.8921495209590562e-05, - "loss": 0.8745, + "learning_rate": 1.774938793190425e-05, + "loss": 1.0684, "step": 6166 }, { - "epoch": 0.1747570064325994, + "epoch": 0.24129431097894985, "grad_norm": 0.0, - "learning_rate": 1.892108056968621e-05, - "loss": 0.9745, + "learning_rate": 1.774858693363717e-05, + "loss": 1.2346, "step": 6167 }, { - "epoch": 0.17478534387486186, + "epoch": 0.2413334376711793, "grad_norm": 0.0, - "learning_rate": 1.8920665854636285e-05, - "loss": 1.0028, + "learning_rate": 1.7747785810937487e-05, + "loss": 1.2193, "step": 6168 }, { - "epoch": 0.17481368131712433, + "epoch": 0.2413725643634087, "grad_norm": 0.0, - "learning_rate": 1.8920251064444284e-05, - "loss": 1.0307, + "learning_rate": 1.7746984563818077e-05, + "loss": 1.0605, "step": 6169 }, { - "epoch": 0.1748420187593868, + "epoch": 0.24141169105563814, "grad_norm": 0.0, - "learning_rate": 1.891983619911369e-05, - "loss": 1.0023, + "learning_rate": 1.7746183192291803e-05, + "loss": 1.1323, "step": 6170 }, { - "epoch": 0.17487035620164923, + "epoch": 0.24145081774786759, "grad_norm": 0.0, - "learning_rate": 1.8919421258648007e-05, - "loss": 0.9633, + "learning_rate": 1.774538169637153e-05, + "loss": 1.0971, "step": 6171 }, { - "epoch": 0.1748986936439117, + "epoch": 0.24148994444009703, "grad_norm": 0.0, - "learning_rate": 1.8919006243050723e-05, - "loss": 1.0524, + "learning_rate": 1.7744580076070133e-05, + "loss": 1.1704, "step": 6172 }, { - "epoch": 0.17492703108617416, + "epoch": 0.24152907113232647, "grad_norm": 0.0, - "learning_rate": 1.891859115232534e-05, - "loss": 0.9257, + "learning_rate": 1.7743778331400486e-05, + "loss": 1.2039, "step": 6173 }, { - "epoch": 0.17495536852843663, + "epoch": 0.2415681978245559, "grad_norm": 0.0, - "learning_rate": 1.891817598647535e-05, - "loss": 1.0338, + "learning_rate": 1.7742976462375466e-05, + "loss": 1.2568, "step": 6174 }, { - "epoch": 0.1749837059706991, + "epoch": 0.24160732451678535, "grad_norm": 0.0, - "learning_rate": 1.8917760745504252e-05, - "loss": 1.0754, + "learning_rate": 1.774217446900794e-05, + "loss": 1.1948, "step": 6175 }, { - "epoch": 0.17501204341296156, + "epoch": 0.2416464512090148, "grad_norm": 0.0, - "learning_rate": 1.8917345429415546e-05, - "loss": 1.0795, + "learning_rate": 1.7741372351310797e-05, + "loss": 1.222, "step": 6176 }, { - "epoch": 0.175040380855224, + "epoch": 0.24168557790124423, "grad_norm": 0.0, - "learning_rate": 1.8916930038212726e-05, - "loss": 0.9788, + "learning_rate": 1.7740570109296915e-05, + "loss": 1.1913, "step": 6177 }, { - "epoch": 0.17506871829748646, + "epoch": 0.24172470459347367, "grad_norm": 0.0, - "learning_rate": 1.8916514571899295e-05, - "loss": 1.1468, + "learning_rate": 1.7739767742979174e-05, + "loss": 1.1563, "step": 6178 }, { - "epoch": 0.17509705573974893, + "epoch": 0.2417638312857031, "grad_norm": 0.0, - "learning_rate": 1.8916099030478747e-05, - "loss": 0.9597, + "learning_rate": 1.7738965252370463e-05, + "loss": 1.1103, "step": 6179 }, { - "epoch": 0.1751253931820114, + "epoch": 0.24180295797793255, "grad_norm": 0.0, - "learning_rate": 1.8915683413954592e-05, - "loss": 1.1072, + "learning_rate": 1.773816263748367e-05, + "loss": 1.2137, "step": 6180 }, { - "epoch": 0.17515373062427386, + "epoch": 0.241842084670162, "grad_norm": 0.0, - "learning_rate": 1.891526772233032e-05, - "loss": 1.0195, + "learning_rate": 1.7737359898331677e-05, + "loss": 1.1163, "step": 6181 }, { - "epoch": 0.1751820680665363, + "epoch": 0.24188121136239144, "grad_norm": 0.0, - "learning_rate": 1.891485195560944e-05, - "loss": 0.9778, + "learning_rate": 1.773655703492738e-05, + "loss": 1.1783, "step": 6182 }, { - "epoch": 0.17521040550879877, + "epoch": 0.24192033805462088, "grad_norm": 0.0, - "learning_rate": 1.891443611379545e-05, - "loss": 1.1291, + "learning_rate": 1.7735754047283674e-05, + "loss": 1.1135, "step": 6183 }, { - "epoch": 0.17523874295106123, + "epoch": 0.2419594647468503, "grad_norm": 0.0, - "learning_rate": 1.8914020196891853e-05, - "loss": 1.0578, + "learning_rate": 1.773495093541345e-05, + "loss": 1.0815, "step": 6184 }, { - "epoch": 0.1752670803933237, + "epoch": 0.24199859143907973, "grad_norm": 0.0, - "learning_rate": 1.8913604204902155e-05, - "loss": 0.9157, + "learning_rate": 1.7734147699329607e-05, + "loss": 1.1071, "step": 6185 }, { - "epoch": 0.17529541783558616, + "epoch": 0.24203771813130917, "grad_norm": 0.0, - "learning_rate": 1.891318813782986e-05, - "loss": 1.0634, + "learning_rate": 1.773334433904504e-05, + "loss": 1.0775, "step": 6186 }, { - "epoch": 0.17532375527784863, + "epoch": 0.2420768448235386, "grad_norm": 0.0, - "learning_rate": 1.8912771995678468e-05, - "loss": 1.0522, + "learning_rate": 1.7732540854572658e-05, + "loss": 1.095, "step": 6187 }, { - "epoch": 0.17535209272011107, + "epoch": 0.24211597151576805, "grad_norm": 0.0, - "learning_rate": 1.8912355778451494e-05, - "loss": 0.9561, + "learning_rate": 1.7731737245925357e-05, + "loss": 1.1344, "step": 6188 }, { - "epoch": 0.17538043016237354, + "epoch": 0.2421550982079975, "grad_norm": 0.0, - "learning_rate": 1.8911939486152433e-05, - "loss": 1.0065, + "learning_rate": 1.7730933513116046e-05, + "loss": 1.2171, "step": 6189 }, { - "epoch": 0.175408767604636, + "epoch": 0.24219422490022693, "grad_norm": 0.0, - "learning_rate": 1.8911523118784797e-05, - "loss": 1.0782, + "learning_rate": 1.773012965615763e-05, + "loss": 1.2204, "step": 6190 }, { - "epoch": 0.17543710504689847, + "epoch": 0.24223335159245638, "grad_norm": 0.0, - "learning_rate": 1.8911106676352094e-05, - "loss": 1.0451, + "learning_rate": 1.7729325675063017e-05, + "loss": 1.1989, "step": 6191 }, { - "epoch": 0.17546544248916093, + "epoch": 0.24227247828468582, "grad_norm": 0.0, - "learning_rate": 1.891069015885783e-05, - "loss": 0.8689, + "learning_rate": 1.772852156984512e-05, + "loss": 1.1325, "step": 6192 }, { - "epoch": 0.1754937799314234, + "epoch": 0.24231160497691526, "grad_norm": 0.0, - "learning_rate": 1.8910273566305514e-05, - "loss": 0.9781, + "learning_rate": 1.772771734051685e-05, + "loss": 1.1562, "step": 6193 }, { - "epoch": 0.17552211737368584, + "epoch": 0.2423507316691447, "grad_norm": 0.0, - "learning_rate": 1.890985689869865e-05, - "loss": 1.0224, + "learning_rate": 1.7726912987091123e-05, + "loss": 1.1819, "step": 6194 }, { - "epoch": 0.1755504548159483, + "epoch": 0.24238985836137414, "grad_norm": 0.0, - "learning_rate": 1.890944015604076e-05, - "loss": 1.0384, + "learning_rate": 1.7726108509580857e-05, + "loss": 1.1393, "step": 6195 }, { - "epoch": 0.17557879225821077, + "epoch": 0.24242898505360358, "grad_norm": 0.0, - "learning_rate": 1.8909023338335345e-05, - "loss": 0.9748, + "learning_rate": 1.7725303907998966e-05, + "loss": 1.123, "step": 6196 }, { - "epoch": 0.17560712970047324, + "epoch": 0.24246811174583302, "grad_norm": 0.0, - "learning_rate": 1.8908606445585914e-05, - "loss": 0.9293, + "learning_rate": 1.772449918235838e-05, + "loss": 1.0527, "step": 6197 }, { - "epoch": 0.1756354671427357, + "epoch": 0.24250723843806243, "grad_norm": 0.0, - "learning_rate": 1.8908189477795988e-05, - "loss": 0.9414, + "learning_rate": 1.7723694332672012e-05, + "loss": 1.1112, "step": 6198 }, { - "epoch": 0.17566380458499817, + "epoch": 0.24254636513029187, "grad_norm": 0.0, - "learning_rate": 1.8907772434969073e-05, - "loss": 0.9461, + "learning_rate": 1.7722889358952793e-05, + "loss": 1.1405, "step": 6199 }, { - "epoch": 0.1756921420272606, + "epoch": 0.24258549182252132, "grad_norm": 0.0, - "learning_rate": 1.8907355317108683e-05, - "loss": 1.0424, + "learning_rate": 1.772208426121365e-05, + "loss": 1.2443, "step": 6200 }, { - "epoch": 0.17572047946952307, + "epoch": 0.24262461851475076, "grad_norm": 0.0, - "learning_rate": 1.8906938124218328e-05, - "loss": 0.9978, + "learning_rate": 1.772127903946751e-05, + "loss": 1.1408, "step": 6201 }, { - "epoch": 0.17574881691178554, + "epoch": 0.2426637452069802, "grad_norm": 0.0, - "learning_rate": 1.8906520856301528e-05, - "loss": 1.1376, + "learning_rate": 1.77204736937273e-05, + "loss": 1.1734, "step": 6202 }, { - "epoch": 0.175777154354048, + "epoch": 0.24270287189920964, "grad_norm": 0.0, - "learning_rate": 1.8906103513361797e-05, - "loss": 1.0225, + "learning_rate": 1.771966822400596e-05, + "loss": 1.0796, "step": 6203 }, { - "epoch": 0.17580549179631047, + "epoch": 0.24274199859143908, "grad_norm": 0.0, - "learning_rate": 1.8905686095402648e-05, - "loss": 1.0396, + "learning_rate": 1.7718862630316417e-05, + "loss": 1.1185, "step": 6204 }, { - "epoch": 0.17583382923857294, + "epoch": 0.24278112528366852, "grad_norm": 0.0, - "learning_rate": 1.89052686024276e-05, - "loss": 1.0667, + "learning_rate": 1.771805691267162e-05, + "loss": 1.0215, "step": 6205 }, { - "epoch": 0.17586216668083537, + "epoch": 0.24282025197589796, "grad_norm": 0.0, - "learning_rate": 1.890485103444016e-05, - "loss": 1.0156, + "learning_rate": 1.7717251071084492e-05, + "loss": 1.1198, "step": 6206 }, { - "epoch": 0.17589050412309784, + "epoch": 0.2428593786681274, "grad_norm": 0.0, - "learning_rate": 1.890443339144386e-05, - "loss": 0.9691, + "learning_rate": 1.771644510556799e-05, + "loss": 1.3296, "step": 6207 }, { - "epoch": 0.1759188415653603, + "epoch": 0.24289850536035684, "grad_norm": 0.0, - "learning_rate": 1.890401567344221e-05, - "loss": 0.9131, + "learning_rate": 1.7715639016135043e-05, + "loss": 1.0197, "step": 6208 }, { - "epoch": 0.17594717900762277, + "epoch": 0.24293763205258628, "grad_norm": 0.0, - "learning_rate": 1.890359788043873e-05, - "loss": 1.0581, + "learning_rate": 1.7714832802798606e-05, + "loss": 1.1075, "step": 6209 }, { - "epoch": 0.17597551644988524, + "epoch": 0.24297675874481572, "grad_norm": 0.0, - "learning_rate": 1.8903180012436935e-05, - "loss": 0.9968, + "learning_rate": 1.771402646557162e-05, + "loss": 1.2003, "step": 6210 }, { - "epoch": 0.1760038538921477, + "epoch": 0.24301588543704516, "grad_norm": 0.0, - "learning_rate": 1.890276206944035e-05, - "loss": 0.9142, + "learning_rate": 1.7713220004467035e-05, + "loss": 1.0578, "step": 6211 }, { - "epoch": 0.17603219133441014, + "epoch": 0.24305501212927458, "grad_norm": 0.0, - "learning_rate": 1.89023440514525e-05, - "loss": 1.0385, + "learning_rate": 1.77124134194978e-05, + "loss": 1.1403, "step": 6212 }, { - "epoch": 0.1760605287766726, + "epoch": 0.24309413882150402, "grad_norm": 0.0, - "learning_rate": 1.8901925958476894e-05, - "loss": 1.1002, + "learning_rate": 1.771160671067687e-05, + "loss": 1.201, "step": 6213 }, { - "epoch": 0.17608886621893508, + "epoch": 0.24313326551373346, "grad_norm": 0.0, - "learning_rate": 1.8901507790517064e-05, - "loss": 0.9914, + "learning_rate": 1.7710799878017203e-05, + "loss": 1.1836, "step": 6214 }, { - "epoch": 0.17611720366119754, + "epoch": 0.2431723922059629, "grad_norm": 0.0, - "learning_rate": 1.890108954757652e-05, - "loss": 1.0356, + "learning_rate": 1.7709992921531748e-05, + "loss": 1.1448, "step": 6215 }, { - "epoch": 0.17614554110346, + "epoch": 0.24321151889819234, "grad_norm": 0.0, - "learning_rate": 1.8900671229658802e-05, - "loss": 0.9799, + "learning_rate": 1.770918584123347e-05, + "loss": 1.1556, "step": 6216 }, { - "epoch": 0.17617387854572247, + "epoch": 0.24325064559042178, "grad_norm": 0.0, - "learning_rate": 1.8900252836767424e-05, - "loss": 0.9586, + "learning_rate": 1.7708378637135325e-05, + "loss": 1.1437, "step": 6217 }, { - "epoch": 0.1762022159879849, + "epoch": 0.24328977228265122, "grad_norm": 0.0, - "learning_rate": 1.889983436890591e-05, - "loss": 1.0081, + "learning_rate": 1.7707571309250283e-05, + "loss": 1.1507, "step": 6218 }, { - "epoch": 0.17623055343024738, + "epoch": 0.24332889897488066, "grad_norm": 0.0, - "learning_rate": 1.8899415826077784e-05, - "loss": 0.9443, + "learning_rate": 1.77067638575913e-05, + "loss": 1.2057, "step": 6219 }, { - "epoch": 0.17625889087250984, + "epoch": 0.2433680256671101, "grad_norm": 0.0, - "learning_rate": 1.8898997208286576e-05, - "loss": 1.0546, + "learning_rate": 1.770595628217135e-05, + "loss": 1.0814, "step": 6220 }, { - "epoch": 0.1762872283147723, + "epoch": 0.24340715235933955, "grad_norm": 0.0, - "learning_rate": 1.889857851553581e-05, - "loss": 0.9347, + "learning_rate": 1.7705148583003395e-05, + "loss": 1.1652, "step": 6221 }, { - "epoch": 0.17631556575703478, + "epoch": 0.243446279051569, "grad_norm": 0.0, - "learning_rate": 1.8898159747829014e-05, - "loss": 0.9735, + "learning_rate": 1.770434076010041e-05, + "loss": 1.3118, "step": 6222 }, { - "epoch": 0.17634390319929724, + "epoch": 0.24348540574379843, "grad_norm": 0.0, - "learning_rate": 1.889774090516971e-05, - "loss": 1.0284, + "learning_rate": 1.7703532813475367e-05, + "loss": 1.138, "step": 6223 }, { - "epoch": 0.17637224064155968, + "epoch": 0.24352453243602787, "grad_norm": 0.0, - "learning_rate": 1.8897321987561436e-05, - "loss": 1.0054, + "learning_rate": 1.7702724743141234e-05, + "loss": 1.1891, "step": 6224 }, { - "epoch": 0.17640057808382215, + "epoch": 0.2435636591282573, "grad_norm": 0.0, - "learning_rate": 1.889690299500771e-05, - "loss": 0.9988, + "learning_rate": 1.7701916549111003e-05, + "loss": 1.1534, "step": 6225 }, { - "epoch": 0.1764289155260846, + "epoch": 0.24360278582048672, "grad_norm": 0.0, - "learning_rate": 1.889648392751207e-05, - "loss": 0.9978, + "learning_rate": 1.770110823139764e-05, + "loss": 1.3439, "step": 6226 }, { - "epoch": 0.17645725296834708, + "epoch": 0.24364191251271616, "grad_norm": 0.0, - "learning_rate": 1.889606478507804e-05, - "loss": 1.1082, + "learning_rate": 1.7700299790014126e-05, + "loss": 1.1132, "step": 6227 }, { - "epoch": 0.17648559041060954, + "epoch": 0.2436810392049456, "grad_norm": 0.0, - "learning_rate": 1.8895645567709154e-05, - "loss": 1.1267, + "learning_rate": 1.769949122497345e-05, + "loss": 1.0887, "step": 6228 }, { - "epoch": 0.176513927852872, + "epoch": 0.24372016589717505, "grad_norm": 0.0, - "learning_rate": 1.889522627540894e-05, - "loss": 1.0355, + "learning_rate": 1.769868253628859e-05, + "loss": 1.2522, "step": 6229 }, { - "epoch": 0.17654226529513445, + "epoch": 0.24375929258940449, "grad_norm": 0.0, - "learning_rate": 1.8894806908180934e-05, - "loss": 0.8982, + "learning_rate": 1.7697873723972536e-05, + "loss": 1.3215, "step": 6230 }, { - "epoch": 0.17657060273739691, + "epoch": 0.24379841928163393, "grad_norm": 0.0, - "learning_rate": 1.8894387466028665e-05, - "loss": 1.0621, + "learning_rate": 1.7697064788038277e-05, + "loss": 1.087, "step": 6231 }, { - "epoch": 0.17659894017965938, + "epoch": 0.24383754597386337, "grad_norm": 0.0, - "learning_rate": 1.889396794895567e-05, - "loss": 0.9981, + "learning_rate": 1.76962557284988e-05, + "loss": 1.2132, "step": 6232 }, { - "epoch": 0.17662727762192185, + "epoch": 0.2438766726660928, "grad_norm": 0.0, - "learning_rate": 1.8893548356965477e-05, - "loss": 1.0874, + "learning_rate": 1.7695446545367106e-05, + "loss": 1.1538, "step": 6233 }, { - "epoch": 0.1766556150641843, + "epoch": 0.24391579935832225, "grad_norm": 0.0, - "learning_rate": 1.8893128690061625e-05, - "loss": 0.97, + "learning_rate": 1.7694637238656178e-05, + "loss": 1.1533, "step": 6234 }, { - "epoch": 0.17668395250644678, + "epoch": 0.2439549260505517, "grad_norm": 0.0, - "learning_rate": 1.889270894824765e-05, - "loss": 0.9919, + "learning_rate": 1.769382780837902e-05, + "loss": 1.0618, "step": 6235 }, { - "epoch": 0.17671228994870922, + "epoch": 0.24399405274278113, "grad_norm": 0.0, - "learning_rate": 1.8892289131527078e-05, - "loss": 0.951, + "learning_rate": 1.7693018254548628e-05, + "loss": 1.0541, "step": 6236 }, { - "epoch": 0.17674062739097168, + "epoch": 0.24403317943501057, "grad_norm": 0.0, - "learning_rate": 1.889186923990346e-05, - "loss": 1.122, + "learning_rate": 1.7692208577178003e-05, + "loss": 1.1373, "step": 6237 }, { - "epoch": 0.17676896483323415, + "epoch": 0.24407230612724, "grad_norm": 0.0, - "learning_rate": 1.889144927338032e-05, - "loss": 0.9855, + "learning_rate": 1.769139877628015e-05, + "loss": 1.0011, "step": 6238 }, { - "epoch": 0.17679730227549662, + "epoch": 0.24411143281946945, "grad_norm": 0.0, - "learning_rate": 1.8891029231961208e-05, - "loss": 1.0639, + "learning_rate": 1.7690588851868066e-05, + "loss": 0.9766, "step": 6239 }, { - "epoch": 0.17682563971775908, + "epoch": 0.2441505595116989, "grad_norm": 0.0, - "learning_rate": 1.8890609115649653e-05, - "loss": 0.9774, + "learning_rate": 1.7689778803954764e-05, + "loss": 1.1626, "step": 6240 }, { - "epoch": 0.17685397716002155, + "epoch": 0.2441896862039283, "grad_norm": 0.0, - "learning_rate": 1.8890188924449192e-05, - "loss": 0.9486, + "learning_rate": 1.7688968632553246e-05, + "loss": 1.1253, "step": 6241 }, { - "epoch": 0.17688231460228399, + "epoch": 0.24422881289615775, "grad_norm": 0.0, - "learning_rate": 1.888976865836337e-05, - "loss": 1.0388, + "learning_rate": 1.7688158337676528e-05, + "loss": 1.074, "step": 6242 }, { - "epoch": 0.17691065204454645, + "epoch": 0.2442679395883872, "grad_norm": 0.0, - "learning_rate": 1.8889348317395727e-05, - "loss": 1.0485, + "learning_rate": 1.7687347919337626e-05, + "loss": 1.1115, "step": 6243 }, { - "epoch": 0.17693898948680892, + "epoch": 0.24430706628061663, "grad_norm": 0.0, - "learning_rate": 1.88889279015498e-05, - "loss": 0.9932, + "learning_rate": 1.7686537377549546e-05, + "loss": 1.1448, "step": 6244 }, { - "epoch": 0.17696732692907138, + "epoch": 0.24434619297284607, "grad_norm": 0.0, - "learning_rate": 1.8888507410829136e-05, - "loss": 1.0255, + "learning_rate": 1.7685726712325307e-05, + "loss": 1.1189, "step": 6245 }, { - "epoch": 0.17699566437133385, + "epoch": 0.2443853196650755, "grad_norm": 0.0, - "learning_rate": 1.888808684523727e-05, - "loss": 1.0502, + "learning_rate": 1.7684915923677928e-05, + "loss": 1.1931, "step": 6246 }, { - "epoch": 0.17702400181359632, + "epoch": 0.24442444635730495, "grad_norm": 0.0, - "learning_rate": 1.888766620477775e-05, - "loss": 1.0241, + "learning_rate": 1.768410501162043e-05, + "loss": 1.1985, "step": 6247 }, { - "epoch": 0.17705233925585875, + "epoch": 0.2444635730495344, "grad_norm": 0.0, - "learning_rate": 1.8887245489454117e-05, - "loss": 0.9481, + "learning_rate": 1.7683293976165835e-05, + "loss": 1.1696, "step": 6248 }, { - "epoch": 0.17708067669812122, + "epoch": 0.24450269974176383, "grad_norm": 0.0, - "learning_rate": 1.8886824699269916e-05, - "loss": 0.9858, + "learning_rate": 1.7682482817327163e-05, + "loss": 1.1661, "step": 6249 }, { - "epoch": 0.17710901414038369, + "epoch": 0.24454182643399328, "grad_norm": 0.0, - "learning_rate": 1.888640383422869e-05, - "loss": 0.9891, + "learning_rate": 1.7681671535117443e-05, + "loss": 1.2922, "step": 6250 }, { - "epoch": 0.17713735158264615, + "epoch": 0.24458095312622272, "grad_norm": 0.0, - "learning_rate": 1.888598289433398e-05, - "loss": 1.0694, + "learning_rate": 1.7680860129549708e-05, + "loss": 1.1821, "step": 6251 }, { - "epoch": 0.17716568902490862, + "epoch": 0.24462007981845216, "grad_norm": 0.0, - "learning_rate": 1.888556187958934e-05, - "loss": 1.0904, + "learning_rate": 1.768004860063698e-05, + "loss": 1.1272, "step": 6252 }, { - "epoch": 0.17719402646717108, + "epoch": 0.2446592065106816, "grad_norm": 0.0, - "learning_rate": 1.888514078999831e-05, - "loss": 1.0565, + "learning_rate": 1.76792369483923e-05, + "loss": 1.1146, "step": 6253 }, { - "epoch": 0.17722236390943352, + "epoch": 0.24469833320291104, "grad_norm": 0.0, - "learning_rate": 1.8884719625564444e-05, - "loss": 1.0278, + "learning_rate": 1.7678425172828696e-05, + "loss": 1.1445, "step": 6254 }, { - "epoch": 0.177250701351696, + "epoch": 0.24473745989514045, "grad_norm": 0.0, - "learning_rate": 1.8884298386291286e-05, - "loss": 0.9829, + "learning_rate": 1.7677613273959204e-05, + "loss": 1.1738, "step": 6255 }, { - "epoch": 0.17727903879395845, + "epoch": 0.2447765865873699, "grad_norm": 0.0, - "learning_rate": 1.888387707218238e-05, - "loss": 1.0981, + "learning_rate": 1.7676801251796863e-05, + "loss": 1.0743, "step": 6256 }, { - "epoch": 0.17730737623622092, + "epoch": 0.24481571327959933, "grad_norm": 0.0, - "learning_rate": 1.888345568324128e-05, - "loss": 1.0036, + "learning_rate": 1.7675989106354712e-05, + "loss": 1.1606, "step": 6257 }, { - "epoch": 0.1773357136784834, + "epoch": 0.24485483997182877, "grad_norm": 0.0, - "learning_rate": 1.8883034219471534e-05, - "loss": 1.0801, + "learning_rate": 1.76751768376458e-05, + "loss": 1.1025, "step": 6258 }, { - "epoch": 0.17736405112074585, + "epoch": 0.24489396666405822, "grad_norm": 0.0, - "learning_rate": 1.888261268087669e-05, - "loss": 1.0833, + "learning_rate": 1.767436444568316e-05, + "loss": 0.9461, "step": 6259 }, { - "epoch": 0.1773923885630083, + "epoch": 0.24493309335628766, "grad_norm": 0.0, - "learning_rate": 1.8882191067460305e-05, - "loss": 1.0505, + "learning_rate": 1.7673551930479847e-05, + "loss": 1.1462, "step": 6260 }, { - "epoch": 0.17742072600527076, + "epoch": 0.2449722200485171, "grad_norm": 0.0, - "learning_rate": 1.888176937922592e-05, - "loss": 1.0105, + "learning_rate": 1.7672739292048904e-05, + "loss": 1.1277, "step": 6261 }, { - "epoch": 0.17744906344753322, + "epoch": 0.24501134674074654, "grad_norm": 0.0, - "learning_rate": 1.8881347616177103e-05, - "loss": 1.0505, + "learning_rate": 1.7671926530403382e-05, + "loss": 1.1764, "step": 6262 }, { - "epoch": 0.1774774008897957, + "epoch": 0.24505047343297598, "grad_norm": 0.0, - "learning_rate": 1.888092577831739e-05, - "loss": 1.0151, + "learning_rate": 1.767111364555633e-05, + "loss": 1.0243, "step": 6263 }, { - "epoch": 0.17750573833205815, + "epoch": 0.24508960012520542, "grad_norm": 0.0, - "learning_rate": 1.888050386565034e-05, - "loss": 1.1333, + "learning_rate": 1.767030063752081e-05, + "loss": 1.0712, "step": 6264 }, { - "epoch": 0.17753407577432062, + "epoch": 0.24512872681743486, "grad_norm": 0.0, - "learning_rate": 1.888008187817951e-05, - "loss": 1.1588, + "learning_rate": 1.7669487506309874e-05, + "loss": 1.0237, "step": 6265 }, { - "epoch": 0.17756241321658306, + "epoch": 0.2451678535096643, "grad_norm": 0.0, - "learning_rate": 1.8879659815908457e-05, - "loss": 1.0065, + "learning_rate": 1.766867425193658e-05, + "loss": 1.1282, "step": 6266 }, { - "epoch": 0.17759075065884553, + "epoch": 0.24520698020189374, "grad_norm": 0.0, - "learning_rate": 1.887923767884073e-05, - "loss": 1.0194, + "learning_rate": 1.766786087441398e-05, + "loss": 1.1658, "step": 6267 }, { - "epoch": 0.177619088101108, + "epoch": 0.24524610689412318, "grad_norm": 0.0, - "learning_rate": 1.8878815466979886e-05, - "loss": 0.9196, + "learning_rate": 1.766704737375515e-05, + "loss": 1.1695, "step": 6268 }, { - "epoch": 0.17764742554337046, + "epoch": 0.2452852335863526, "grad_norm": 0.0, - "learning_rate": 1.8878393180329482e-05, - "loss": 0.9761, + "learning_rate": 1.7666233749973143e-05, + "loss": 1.0594, "step": 6269 }, { - "epoch": 0.17767576298563292, + "epoch": 0.24532436027858204, "grad_norm": 0.0, - "learning_rate": 1.8877970818893075e-05, - "loss": 1.0011, + "learning_rate": 1.7665420003081028e-05, + "loss": 1.1169, "step": 6270 }, { - "epoch": 0.1777041004278954, + "epoch": 0.24536348697081148, "grad_norm": 0.0, - "learning_rate": 1.8877548382674223e-05, - "loss": 1.0983, + "learning_rate": 1.7664606133091875e-05, + "loss": 1.1439, "step": 6271 }, { - "epoch": 0.17773243787015783, + "epoch": 0.24540261366304092, "grad_norm": 0.0, - "learning_rate": 1.8877125871676484e-05, - "loss": 1.0604, + "learning_rate": 1.7663792140018747e-05, + "loss": 1.2332, "step": 6272 }, { - "epoch": 0.1777607753124203, + "epoch": 0.24544174035527036, "grad_norm": 0.0, - "learning_rate": 1.8876703285903418e-05, - "loss": 1.0799, + "learning_rate": 1.7662978023874725e-05, + "loss": 1.1619, "step": 6273 }, { - "epoch": 0.17778911275468276, + "epoch": 0.2454808670474998, "grad_norm": 0.0, - "learning_rate": 1.8876280625358583e-05, - "loss": 1.0019, + "learning_rate": 1.7662163784672878e-05, + "loss": 1.1693, "step": 6274 }, { - "epoch": 0.17781745019694523, + "epoch": 0.24551999373972924, "grad_norm": 0.0, - "learning_rate": 1.8875857890045544e-05, - "loss": 0.9564, + "learning_rate": 1.7661349422426282e-05, + "loss": 1.0717, "step": 6275 }, { - "epoch": 0.1778457876392077, + "epoch": 0.24555912043195868, "grad_norm": 0.0, - "learning_rate": 1.8875435079967853e-05, - "loss": 1.0544, + "learning_rate": 1.7660534937148014e-05, + "loss": 1.0518, "step": 6276 }, { - "epoch": 0.17787412508147016, + "epoch": 0.24559824712418812, "grad_norm": 0.0, - "learning_rate": 1.887501219512908e-05, - "loss": 1.05, + "learning_rate": 1.7659720328851154e-05, + "loss": 1.1641, "step": 6277 }, { - "epoch": 0.1779024625237326, + "epoch": 0.24563737381641756, "grad_norm": 0.0, - "learning_rate": 1.8874589235532782e-05, - "loss": 0.9702, + "learning_rate": 1.7658905597548783e-05, + "loss": 1.2328, "step": 6278 }, { - "epoch": 0.17793079996599506, + "epoch": 0.245676500508647, "grad_norm": 0.0, - "learning_rate": 1.8874166201182526e-05, - "loss": 0.919, + "learning_rate": 1.7658090743253985e-05, + "loss": 1.2518, "step": 6279 }, { - "epoch": 0.17795913740825753, + "epoch": 0.24571562720087645, "grad_norm": 0.0, - "learning_rate": 1.8873743092081866e-05, - "loss": 1.0872, + "learning_rate": 1.7657275765979846e-05, + "loss": 1.1356, "step": 6280 }, { - "epoch": 0.17798747485052, + "epoch": 0.2457547538931059, "grad_norm": 0.0, - "learning_rate": 1.8873319908234377e-05, - "loss": 1.0344, + "learning_rate": 1.7656460665739453e-05, + "loss": 1.2781, "step": 6281 }, { - "epoch": 0.17801581229278246, + "epoch": 0.24579388058533533, "grad_norm": 0.0, - "learning_rate": 1.887289664964362e-05, - "loss": 1.0403, + "learning_rate": 1.76556454425459e-05, + "loss": 1.0737, "step": 6282 }, { - "epoch": 0.17804414973504493, + "epoch": 0.24583300727756474, "grad_norm": 0.0, - "learning_rate": 1.887247331631316e-05, - "loss": 0.9098, + "learning_rate": 1.7654830096412266e-05, + "loss": 1.1458, "step": 6283 }, { - "epoch": 0.17807248717730736, + "epoch": 0.24587213396979418, "grad_norm": 0.0, - "learning_rate": 1.8872049908246564e-05, - "loss": 1.1163, + "learning_rate": 1.765401462735166e-05, + "loss": 1.1665, "step": 6284 }, { - "epoch": 0.17810082461956983, + "epoch": 0.24591126066202362, "grad_norm": 0.0, - "learning_rate": 1.8871626425447392e-05, - "loss": 0.9787, + "learning_rate": 1.7653199035377167e-05, + "loss": 1.2396, "step": 6285 }, { - "epoch": 0.1781291620618323, + "epoch": 0.24595038735425306, "grad_norm": 0.0, - "learning_rate": 1.887120286791922e-05, - "loss": 1.066, + "learning_rate": 1.765238332050189e-05, + "loss": 1.1437, "step": 6286 }, { - "epoch": 0.17815749950409476, + "epoch": 0.2459895140464825, "grad_norm": 0.0, - "learning_rate": 1.887077923566561e-05, - "loss": 1.0161, + "learning_rate": 1.7651567482738925e-05, + "loss": 1.1379, "step": 6287 }, { - "epoch": 0.17818583694635723, + "epoch": 0.24602864073871195, "grad_norm": 0.0, - "learning_rate": 1.8870355528690134e-05, - "loss": 0.9692, + "learning_rate": 1.765075152210137e-05, + "loss": 1.1055, "step": 6288 }, { - "epoch": 0.1782141743886197, + "epoch": 0.2460677674309414, "grad_norm": 0.0, - "learning_rate": 1.886993174699636e-05, - "loss": 0.9974, + "learning_rate": 1.7649935438602338e-05, + "loss": 1.1137, "step": 6289 }, { - "epoch": 0.17824251183088213, + "epoch": 0.24610689412317083, "grad_norm": 0.0, - "learning_rate": 1.8869507890587854e-05, - "loss": 0.9633, + "learning_rate": 1.7649119232254925e-05, + "loss": 1.1068, "step": 6290 }, { - "epoch": 0.1782708492731446, + "epoch": 0.24614602081540027, "grad_norm": 0.0, - "learning_rate": 1.8869083959468194e-05, - "loss": 1.0166, + "learning_rate": 1.7648302903072244e-05, + "loss": 1.1664, "step": 6291 }, { - "epoch": 0.17829918671540707, + "epoch": 0.2461851475076297, "grad_norm": 0.0, - "learning_rate": 1.8868659953640943e-05, - "loss": 1.1055, + "learning_rate": 1.76474864510674e-05, + "loss": 1.1003, "step": 6292 }, { - "epoch": 0.17832752415766953, + "epoch": 0.24622427419985915, "grad_norm": 0.0, - "learning_rate": 1.8868235873109676e-05, - "loss": 1.0077, + "learning_rate": 1.764666987625351e-05, + "loss": 1.0598, "step": 6293 }, { - "epoch": 0.178355861599932, + "epoch": 0.2462634008920886, "grad_norm": 0.0, - "learning_rate": 1.8867811717877966e-05, - "loss": 1.0275, + "learning_rate": 1.764585317864368e-05, + "loss": 1.1723, "step": 6294 }, { - "epoch": 0.17838419904219446, + "epoch": 0.24630252758431803, "grad_norm": 0.0, - "learning_rate": 1.8867387487949385e-05, - "loss": 1.0397, + "learning_rate": 1.7645036358251033e-05, + "loss": 1.226, "step": 6295 }, { - "epoch": 0.1784125364844569, + "epoch": 0.24634165427654747, "grad_norm": 0.0, - "learning_rate": 1.8866963183327508e-05, - "loss": 0.9458, + "learning_rate": 1.764421941508868e-05, + "loss": 1.1836, "step": 6296 }, { - "epoch": 0.17844087392671937, + "epoch": 0.2463807809687769, "grad_norm": 0.0, - "learning_rate": 1.8866538804015905e-05, - "loss": 1.102, + "learning_rate": 1.764340234916974e-05, + "loss": 1.1185, "step": 6297 }, { - "epoch": 0.17846921136898183, + "epoch": 0.24641990766100633, "grad_norm": 0.0, - "learning_rate": 1.886611435001815e-05, - "loss": 1.0274, + "learning_rate": 1.7642585160507338e-05, + "loss": 1.0761, "step": 6298 }, { - "epoch": 0.1784975488112443, + "epoch": 0.24645903435323577, "grad_norm": 0.0, - "learning_rate": 1.8865689821337828e-05, - "loss": 1.0121, + "learning_rate": 1.764176784911459e-05, + "loss": 1.2067, "step": 6299 }, { - "epoch": 0.17852588625350677, + "epoch": 0.2464981610454652, "grad_norm": 0.0, - "learning_rate": 1.8865265217978503e-05, - "loss": 1.1281, + "learning_rate": 1.7640950415004635e-05, + "loss": 1.2329, "step": 6300 }, { - "epoch": 0.17855422369576923, + "epoch": 0.24653728773769465, "grad_norm": 0.0, - "learning_rate": 1.886484053994376e-05, - "loss": 1.0546, + "learning_rate": 1.7640132858190585e-05, + "loss": 1.1879, "step": 6301 }, { - "epoch": 0.17858256113803167, + "epoch": 0.2465764144299241, "grad_norm": 0.0, - "learning_rate": 1.8864415787237174e-05, - "loss": 0.9473, + "learning_rate": 1.7639315178685575e-05, + "loss": 1.0917, "step": 6302 }, { - "epoch": 0.17861089858029414, + "epoch": 0.24661554112215353, "grad_norm": 0.0, - "learning_rate": 1.886399095986232e-05, - "loss": 0.9611, + "learning_rate": 1.7638497376502736e-05, + "loss": 1.0922, "step": 6303 }, { - "epoch": 0.1786392360225566, + "epoch": 0.24665466781438297, "grad_norm": 0.0, - "learning_rate": 1.886356605782278e-05, - "loss": 0.9133, + "learning_rate": 1.7637679451655204e-05, + "loss": 1.1704, "step": 6304 }, { - "epoch": 0.17866757346481907, + "epoch": 0.2466937945066124, "grad_norm": 0.0, - "learning_rate": 1.8863141081122132e-05, - "loss": 0.9152, + "learning_rate": 1.7636861404156106e-05, + "loss": 0.9128, "step": 6305 }, { - "epoch": 0.17869591090708153, + "epoch": 0.24673292119884185, "grad_norm": 0.0, - "learning_rate": 1.8862716029763954e-05, - "loss": 0.926, + "learning_rate": 1.7636043234018587e-05, + "loss": 1.0104, "step": 6306 }, { - "epoch": 0.178724248349344, + "epoch": 0.2467720478910713, "grad_norm": 0.0, - "learning_rate": 1.886229090375183e-05, - "loss": 0.9852, + "learning_rate": 1.763522494125578e-05, + "loss": 1.1469, "step": 6307 }, { - "epoch": 0.17875258579160644, + "epoch": 0.24681117458330074, "grad_norm": 0.0, - "learning_rate": 1.8861865703089338e-05, - "loss": 1.1252, + "learning_rate": 1.763440652588083e-05, + "loss": 1.139, "step": 6308 }, { - "epoch": 0.1787809232338689, + "epoch": 0.24685030127553018, "grad_norm": 0.0, - "learning_rate": 1.886144042778006e-05, - "loss": 1.013, + "learning_rate": 1.7633587987906874e-05, + "loss": 1.1875, "step": 6309 }, { - "epoch": 0.17880926067613137, + "epoch": 0.24688942796775962, "grad_norm": 0.0, - "learning_rate": 1.8861015077827578e-05, - "loss": 1.1576, + "learning_rate": 1.7632769327347063e-05, + "loss": 1.1397, "step": 6310 }, { - "epoch": 0.17883759811839384, + "epoch": 0.24692855465998906, "grad_norm": 0.0, - "learning_rate": 1.8860589653235475e-05, - "loss": 1.0551, + "learning_rate": 1.763195054421454e-05, + "loss": 1.2203, "step": 6311 }, { - "epoch": 0.1788659355606563, + "epoch": 0.24696768135221847, "grad_norm": 0.0, - "learning_rate": 1.8860164154007335e-05, - "loss": 0.9479, + "learning_rate": 1.7631131638522458e-05, + "loss": 1.0554, "step": 6312 }, { - "epoch": 0.17889427300291877, + "epoch": 0.2470068080444479, "grad_norm": 0.0, - "learning_rate": 1.8859738580146746e-05, - "loss": 0.9948, + "learning_rate": 1.7630312610283958e-05, + "loss": 1.1062, "step": 6313 }, { - "epoch": 0.1789226104451812, + "epoch": 0.24704593473667735, "grad_norm": 0.0, - "learning_rate": 1.8859312931657285e-05, - "loss": 0.9604, + "learning_rate": 1.7629493459512205e-05, + "loss": 1.2245, "step": 6314 }, { - "epoch": 0.17895094788744367, + "epoch": 0.2470850614289068, "grad_norm": 0.0, - "learning_rate": 1.8858887208542542e-05, - "loss": 1.0424, + "learning_rate": 1.7628674186220344e-05, + "loss": 1.022, "step": 6315 }, { - "epoch": 0.17897928532970614, + "epoch": 0.24712418812113623, "grad_norm": 0.0, - "learning_rate": 1.8858461410806103e-05, - "loss": 1.0156, + "learning_rate": 1.7627854790421536e-05, + "loss": 1.1621, "step": 6316 }, { - "epoch": 0.1790076227719686, + "epoch": 0.24716331481336568, "grad_norm": 0.0, - "learning_rate": 1.8858035538451554e-05, - "loss": 1.0354, + "learning_rate": 1.7627035272128936e-05, + "loss": 1.0201, "step": 6317 }, { - "epoch": 0.17903596021423107, + "epoch": 0.24720244150559512, "grad_norm": 0.0, - "learning_rate": 1.885760959148248e-05, - "loss": 1.0416, + "learning_rate": 1.762621563135571e-05, + "loss": 1.3035, "step": 6318 }, { - "epoch": 0.17906429765649354, + "epoch": 0.24724156819782456, "grad_norm": 0.0, - "learning_rate": 1.8857183569902476e-05, - "loss": 0.9845, + "learning_rate": 1.7625395868115017e-05, + "loss": 1.1981, "step": 6319 }, { - "epoch": 0.17909263509875598, + "epoch": 0.247280694890054, "grad_norm": 0.0, - "learning_rate": 1.885675747371512e-05, - "loss": 0.9104, + "learning_rate": 1.762457598242002e-05, + "loss": 1.2208, "step": 6320 }, { - "epoch": 0.17912097254101844, + "epoch": 0.24731982158228344, "grad_norm": 0.0, - "learning_rate": 1.8856331302924013e-05, - "loss": 1.1904, + "learning_rate": 1.7623755974283885e-05, + "loss": 1.1589, "step": 6321 }, { - "epoch": 0.1791493099832809, + "epoch": 0.24735894827451288, "grad_norm": 0.0, - "learning_rate": 1.885590505753273e-05, - "loss": 0.8692, + "learning_rate": 1.7622935843719784e-05, + "loss": 1.1604, "step": 6322 }, { - "epoch": 0.17917764742554337, + "epoch": 0.24739807496674232, "grad_norm": 0.0, - "learning_rate": 1.8855478737544878e-05, - "loss": 1.0101, + "learning_rate": 1.7622115590740886e-05, + "loss": 1.1751, "step": 6323 }, { - "epoch": 0.17920598486780584, + "epoch": 0.24743720165897176, "grad_norm": 0.0, - "learning_rate": 1.885505234296404e-05, - "loss": 1.0184, + "learning_rate": 1.7621295215360363e-05, + "loss": 1.2125, "step": 6324 }, { - "epoch": 0.1792343223100683, + "epoch": 0.2474763283512012, "grad_norm": 0.0, - "learning_rate": 1.8854625873793807e-05, - "loss": 1.058, + "learning_rate": 1.7620474717591385e-05, + "loss": 1.2377, "step": 6325 }, { - "epoch": 0.17926265975233074, + "epoch": 0.24751545504343062, "grad_norm": 0.0, - "learning_rate": 1.8854199330037772e-05, - "loss": 1.1317, + "learning_rate": 1.761965409744714e-05, + "loss": 1.1553, "step": 6326 }, { - "epoch": 0.1792909971945932, + "epoch": 0.24755458173566006, "grad_norm": 0.0, - "learning_rate": 1.8853772711699524e-05, - "loss": 0.9507, + "learning_rate": 1.761883335494079e-05, + "loss": 1.2333, "step": 6327 }, { - "epoch": 0.17931933463685568, + "epoch": 0.2475937084278895, "grad_norm": 0.0, - "learning_rate": 1.8853346018782665e-05, - "loss": 1.0636, + "learning_rate": 1.7618012490085527e-05, + "loss": 1.1307, "step": 6328 }, { - "epoch": 0.17934767207911814, + "epoch": 0.24763283512011894, "grad_norm": 0.0, - "learning_rate": 1.8852919251290785e-05, - "loss": 0.9716, + "learning_rate": 1.761719150289453e-05, + "loss": 1.036, "step": 6329 }, { - "epoch": 0.1793760095213806, + "epoch": 0.24767196181234838, "grad_norm": 0.0, - "learning_rate": 1.8852492409227476e-05, - "loss": 1.0288, + "learning_rate": 1.761637039338098e-05, + "loss": 1.1365, "step": 6330 }, { - "epoch": 0.17940434696364307, + "epoch": 0.24771108850457782, "grad_norm": 0.0, - "learning_rate": 1.885206549259634e-05, - "loss": 1.024, + "learning_rate": 1.761554916155807e-05, + "loss": 1.2065, "step": 6331 }, { - "epoch": 0.1794326844059055, + "epoch": 0.24775021519680726, "grad_norm": 0.0, - "learning_rate": 1.8851638501400965e-05, - "loss": 1.1446, + "learning_rate": 1.7614727807438975e-05, + "loss": 1.0801, "step": 6332 }, { - "epoch": 0.17946102184816798, + "epoch": 0.2477893418890367, "grad_norm": 0.0, - "learning_rate": 1.8851211435644952e-05, - "loss": 1.0935, + "learning_rate": 1.76139063310369e-05, + "loss": 1.1436, "step": 6333 }, { - "epoch": 0.17948935929043044, + "epoch": 0.24782846858126614, "grad_norm": 0.0, - "learning_rate": 1.8850784295331903e-05, - "loss": 1.0403, + "learning_rate": 1.7613084732365027e-05, + "loss": 1.0488, "step": 6334 }, { - "epoch": 0.1795176967326929, + "epoch": 0.24786759527349558, "grad_norm": 0.0, - "learning_rate": 1.885035708046541e-05, - "loss": 1.022, + "learning_rate": 1.7612263011436554e-05, + "loss": 1.0684, "step": 6335 }, { - "epoch": 0.17954603417495538, + "epoch": 0.24790672196572502, "grad_norm": 0.0, - "learning_rate": 1.884992979104907e-05, - "loss": 1.0461, + "learning_rate": 1.7611441168264675e-05, + "loss": 1.1748, "step": 6336 }, { - "epoch": 0.17957437161721784, + "epoch": 0.24794584865795447, "grad_norm": 0.0, - "learning_rate": 1.8849502427086486e-05, - "loss": 0.9749, + "learning_rate": 1.761061920286259e-05, + "loss": 1.1545, "step": 6337 }, { - "epoch": 0.17960270905948028, + "epoch": 0.2479849753501839, "grad_norm": 0.0, - "learning_rate": 1.8849074988581258e-05, - "loss": 1.0619, + "learning_rate": 1.7609797115243495e-05, + "loss": 1.2521, "step": 6338 }, { - "epoch": 0.17963104650174275, + "epoch": 0.24802410204241335, "grad_norm": 0.0, - "learning_rate": 1.884864747553698e-05, - "loss": 0.9444, + "learning_rate": 1.7608974905420594e-05, + "loss": 1.2075, "step": 6339 }, { - "epoch": 0.1796593839440052, + "epoch": 0.24806322873464276, "grad_norm": 0.0, - "learning_rate": 1.8848219887957265e-05, - "loss": 0.9796, + "learning_rate": 1.7608152573407093e-05, + "loss": 1.1207, "step": 6340 }, { - "epoch": 0.17968772138626768, + "epoch": 0.2481023554268722, "grad_norm": 0.0, - "learning_rate": 1.8847792225845707e-05, - "loss": 0.8701, + "learning_rate": 1.7607330119216196e-05, + "loss": 1.1496, "step": 6341 }, { - "epoch": 0.17971605882853015, + "epoch": 0.24814148211910164, "grad_norm": 0.0, - "learning_rate": 1.8847364489205908e-05, - "loss": 1.0905, + "learning_rate": 1.7606507542861106e-05, + "loss": 1.2099, "step": 6342 }, { - "epoch": 0.1797443962707926, + "epoch": 0.24818060881133108, "grad_norm": 0.0, - "learning_rate": 1.884693667804147e-05, - "loss": 1.121, + "learning_rate": 1.760568484435504e-05, + "loss": 1.1321, "step": 6343 }, { - "epoch": 0.17977273371305505, + "epoch": 0.24821973550356052, "grad_norm": 0.0, - "learning_rate": 1.8846508792356007e-05, - "loss": 0.8089, + "learning_rate": 1.7604862023711204e-05, + "loss": 1.1121, "step": 6344 }, { - "epoch": 0.17980107115531752, + "epoch": 0.24825886219578996, "grad_norm": 0.0, - "learning_rate": 1.8846080832153107e-05, - "loss": 0.9826, + "learning_rate": 1.7604039080942814e-05, + "loss": 1.1444, "step": 6345 }, { - "epoch": 0.17982940859757998, + "epoch": 0.2482979888880194, "grad_norm": 0.0, - "learning_rate": 1.884565279743639e-05, - "loss": 1.0633, + "learning_rate": 1.7603216016063084e-05, + "loss": 1.1604, "step": 6346 }, { - "epoch": 0.17985774603984245, + "epoch": 0.24833711558024885, "grad_norm": 0.0, - "learning_rate": 1.8845224688209448e-05, - "loss": 0.9525, + "learning_rate": 1.760239282908523e-05, + "loss": 1.1429, "step": 6347 }, { - "epoch": 0.1798860834821049, + "epoch": 0.2483762422724783, "grad_norm": 0.0, - "learning_rate": 1.8844796504475898e-05, - "loss": 0.9869, + "learning_rate": 1.7601569520022477e-05, + "loss": 1.1286, "step": 6348 }, { - "epoch": 0.17991442092436738, + "epoch": 0.24841536896470773, "grad_norm": 0.0, - "learning_rate": 1.8844368246239343e-05, - "loss": 1.0397, + "learning_rate": 1.7600746088888042e-05, + "loss": 1.0516, "step": 6349 }, { - "epoch": 0.17994275836662982, + "epoch": 0.24845449565693717, "grad_norm": 0.0, - "learning_rate": 1.884393991350339e-05, - "loss": 0.9247, + "learning_rate": 1.759992253569515e-05, + "loss": 1.1088, "step": 6350 }, { - "epoch": 0.17997109580889228, + "epoch": 0.2484936223491666, "grad_norm": 0.0, - "learning_rate": 1.8843511506271647e-05, - "loss": 0.9897, + "learning_rate": 1.7599098860457024e-05, + "loss": 1.1547, "step": 6351 }, { - "epoch": 0.17999943325115475, + "epoch": 0.24853274904139605, "grad_norm": 0.0, - "learning_rate": 1.884308302454772e-05, - "loss": 0.9555, + "learning_rate": 1.7598275063186895e-05, + "loss": 1.0536, "step": 6352 }, { - "epoch": 0.18002777069341722, + "epoch": 0.2485718757336255, "grad_norm": 0.0, - "learning_rate": 1.8842654468335226e-05, - "loss": 1.0539, + "learning_rate": 1.7597451143897987e-05, + "loss": 1.2966, "step": 6353 }, { - "epoch": 0.18005610813567968, + "epoch": 0.2486110024258549, "grad_norm": 0.0, - "learning_rate": 1.8842225837637765e-05, - "loss": 1.0904, + "learning_rate": 1.7596627102603534e-05, + "loss": 1.2429, "step": 6354 }, { - "epoch": 0.18008444557794215, + "epoch": 0.24865012911808435, "grad_norm": 0.0, - "learning_rate": 1.8841797132458953e-05, - "loss": 1.1022, + "learning_rate": 1.7595802939316767e-05, + "loss": 1.0941, "step": 6355 }, { - "epoch": 0.1801127830202046, + "epoch": 0.2486892558103138, "grad_norm": 0.0, - "learning_rate": 1.88413683528024e-05, - "loss": 1.025, + "learning_rate": 1.7594978654050927e-05, + "loss": 1.1721, "step": 6356 }, { - "epoch": 0.18014112046246705, + "epoch": 0.24872838250254323, "grad_norm": 0.0, - "learning_rate": 1.8840939498671716e-05, - "loss": 0.9012, + "learning_rate": 1.759415424681924e-05, + "loss": 1.1299, "step": 6357 }, { - "epoch": 0.18016945790472952, + "epoch": 0.24876750919477267, "grad_norm": 0.0, - "learning_rate": 1.8840510570070522e-05, - "loss": 0.9845, + "learning_rate": 1.759332971763496e-05, + "loss": 1.1166, "step": 6358 }, { - "epoch": 0.18019779534699198, + "epoch": 0.2488066358870021, "grad_norm": 0.0, - "learning_rate": 1.884008156700242e-05, - "loss": 1.0287, + "learning_rate": 1.7592505066511316e-05, + "loss": 1.1246, "step": 6359 }, { - "epoch": 0.18022613278925445, + "epoch": 0.24884576257923155, "grad_norm": 0.0, - "learning_rate": 1.883965248947103e-05, - "loss": 0.906, + "learning_rate": 1.7591680293461553e-05, + "loss": 1.2477, "step": 6360 }, { - "epoch": 0.18025447023151692, + "epoch": 0.248884889271461, "grad_norm": 0.0, - "learning_rate": 1.8839223337479966e-05, - "loss": 1.0007, + "learning_rate": 1.759085539849892e-05, + "loss": 1.1842, "step": 6361 }, { - "epoch": 0.18028280767377936, + "epoch": 0.24892401596369043, "grad_norm": 0.0, - "learning_rate": 1.883879411103284e-05, - "loss": 1.0038, + "learning_rate": 1.759003038163666e-05, + "loss": 1.1163, "step": 6362 }, { - "epoch": 0.18031114511604182, + "epoch": 0.24896314265591987, "grad_norm": 0.0, - "learning_rate": 1.883836481013327e-05, - "loss": 1.15, + "learning_rate": 1.7589205242888027e-05, + "loss": 1.2144, "step": 6363 }, { - "epoch": 0.1803394825583043, + "epoch": 0.2490022693481493, "grad_norm": 0.0, - "learning_rate": 1.8837935434784865e-05, - "loss": 1.059, + "learning_rate": 1.7588379982266262e-05, + "loss": 1.2949, "step": 6364 }, { - "epoch": 0.18036782000056675, + "epoch": 0.24904139604037875, "grad_norm": 0.0, - "learning_rate": 1.8837505984991254e-05, - "loss": 1.0091, + "learning_rate": 1.7587554599784625e-05, + "loss": 1.1249, "step": 6365 }, { - "epoch": 0.18039615744282922, + "epoch": 0.2490805227326082, "grad_norm": 0.0, - "learning_rate": 1.883707646075605e-05, - "loss": 1.0313, + "learning_rate": 1.7586729095456366e-05, + "loss": 1.1048, "step": 6366 }, { - "epoch": 0.18042449488509169, + "epoch": 0.24911964942483764, "grad_norm": 0.0, - "learning_rate": 1.8836646862082864e-05, - "loss": 1.0083, + "learning_rate": 1.758590346929475e-05, + "loss": 1.1537, "step": 6367 }, { - "epoch": 0.18045283232735412, + "epoch": 0.24915877611706708, "grad_norm": 0.0, - "learning_rate": 1.8836217188975325e-05, - "loss": 1.0228, + "learning_rate": 1.7585077721313026e-05, + "loss": 1.212, "step": 6368 }, { - "epoch": 0.1804811697696166, + "epoch": 0.2491979028092965, "grad_norm": 0.0, - "learning_rate": 1.8835787441437043e-05, - "loss": 1.0751, + "learning_rate": 1.758425185152446e-05, + "loss": 1.3311, "step": 6369 }, { - "epoch": 0.18050950721187906, + "epoch": 0.24923702950152593, "grad_norm": 0.0, - "learning_rate": 1.8835357619471642e-05, - "loss": 1.1034, + "learning_rate": 1.7583425859942312e-05, + "loss": 1.1987, "step": 6370 }, { - "epoch": 0.18053784465414152, + "epoch": 0.24927615619375537, "grad_norm": 0.0, - "learning_rate": 1.883492772308275e-05, - "loss": 1.0001, + "learning_rate": 1.7582599746579848e-05, + "loss": 1.146, "step": 6371 }, { - "epoch": 0.180566182096404, + "epoch": 0.2493152828859848, "grad_norm": 0.0, - "learning_rate": 1.8834497752273975e-05, - "loss": 0.9543, + "learning_rate": 1.7581773511450336e-05, + "loss": 1.0363, "step": 6372 }, { - "epoch": 0.18059451953866645, + "epoch": 0.24935440957821425, "grad_norm": 0.0, - "learning_rate": 1.8834067707048948e-05, - "loss": 0.9859, + "learning_rate": 1.7580947154567038e-05, + "loss": 1.1771, "step": 6373 }, { - "epoch": 0.1806228569809289, + "epoch": 0.2493935362704437, "grad_norm": 0.0, - "learning_rate": 1.8833637587411284e-05, - "loss": 1.036, + "learning_rate": 1.758012067594323e-05, + "loss": 1.2597, "step": 6374 }, { - "epoch": 0.18065119442319136, + "epoch": 0.24943266296267314, "grad_norm": 0.0, - "learning_rate": 1.883320739336461e-05, - "loss": 1.0012, + "learning_rate": 1.757929407559218e-05, + "loss": 0.9346, "step": 6375 }, { - "epoch": 0.18067953186545382, + "epoch": 0.24947178965490258, "grad_norm": 0.0, - "learning_rate": 1.8832777124912556e-05, - "loss": 1.0469, + "learning_rate": 1.757846735352717e-05, + "loss": 1.1714, "step": 6376 }, { - "epoch": 0.1807078693077163, + "epoch": 0.24951091634713202, "grad_norm": 0.0, - "learning_rate": 1.8832346782058736e-05, - "loss": 1.1249, + "learning_rate": 1.7577640509761465e-05, + "loss": 1.1741, "step": 6377 }, { - "epoch": 0.18073620674997876, + "epoch": 0.24955004303936146, "grad_norm": 0.0, - "learning_rate": 1.883191636480678e-05, - "loss": 0.9862, + "learning_rate": 1.757681354430835e-05, + "loss": 1.1466, "step": 6378 }, { - "epoch": 0.18076454419224122, + "epoch": 0.2495891697315909, "grad_norm": 0.0, - "learning_rate": 1.8831485873160312e-05, - "loss": 1.0045, + "learning_rate": 1.7575986457181104e-05, + "loss": 1.2261, "step": 6379 }, { - "epoch": 0.18079288163450366, + "epoch": 0.24962829642382034, "grad_norm": 0.0, - "learning_rate": 1.883105530712296e-05, - "loss": 0.9682, + "learning_rate": 1.757515924839301e-05, + "loss": 1.1629, "step": 6380 }, { - "epoch": 0.18082121907676613, + "epoch": 0.24966742311604978, "grad_norm": 0.0, - "learning_rate": 1.883062466669835e-05, - "loss": 1.0807, + "learning_rate": 1.757433191795735e-05, + "loss": 1.237, "step": 6381 }, { - "epoch": 0.1808495565190286, + "epoch": 0.24970654980827922, "grad_norm": 0.0, - "learning_rate": 1.883019395189011e-05, - "loss": 1.0309, + "learning_rate": 1.757350446588741e-05, + "loss": 1.1341, "step": 6382 }, { - "epoch": 0.18087789396129106, + "epoch": 0.24974567650050863, "grad_norm": 0.0, - "learning_rate": 1.8829763162701866e-05, - "loss": 1.0187, + "learning_rate": 1.757267689219648e-05, + "loss": 1.0234, "step": 6383 }, { - "epoch": 0.18090623140355352, + "epoch": 0.24978480319273808, "grad_norm": 0.0, - "learning_rate": 1.8829332299137245e-05, - "loss": 1.0111, + "learning_rate": 1.7571849196897844e-05, + "loss": 1.2313, "step": 6384 }, { - "epoch": 0.18093456884581596, + "epoch": 0.24982392988496752, "grad_norm": 0.0, - "learning_rate": 1.8828901361199885e-05, - "loss": 0.9877, + "learning_rate": 1.7571021380004804e-05, + "loss": 1.1319, "step": 6385 }, { - "epoch": 0.18096290628807843, + "epoch": 0.24986305657719696, "grad_norm": 0.0, - "learning_rate": 1.882847034889341e-05, - "loss": 1.0034, + "learning_rate": 1.7570193441530646e-05, + "loss": 1.0327, "step": 6386 }, { - "epoch": 0.1809912437303409, + "epoch": 0.2499021832694264, "grad_norm": 0.0, - "learning_rate": 1.8828039262221448e-05, - "loss": 1.021, + "learning_rate": 1.7569365381488666e-05, + "loss": 1.2347, "step": 6387 }, { - "epoch": 0.18101958117260336, + "epoch": 0.24994130996165584, "grad_norm": 0.0, - "learning_rate": 1.8827608101187634e-05, - "loss": 0.9648, + "learning_rate": 1.7568537199892163e-05, + "loss": 1.2358, "step": 6388 }, { - "epoch": 0.18104791861486583, + "epoch": 0.24998043665388528, "grad_norm": 0.0, - "learning_rate": 1.8827176865795597e-05, - "loss": 1.0284, + "learning_rate": 1.7567708896754435e-05, + "loss": 1.1289, "step": 6389 }, { - "epoch": 0.1810762560571283, + "epoch": 0.2500195633461147, "grad_norm": 0.0, - "learning_rate": 1.8826745556048975e-05, - "loss": 1.1326, + "learning_rate": 1.7566880472088787e-05, + "loss": 1.0883, "step": 6390 }, { - "epoch": 0.18110459349939073, + "epoch": 0.25005869003834413, "grad_norm": 0.0, - "learning_rate": 1.8826314171951393e-05, - "loss": 1.0657, + "learning_rate": 1.7566051925908517e-05, + "loss": 1.1919, "step": 6391 }, { - "epoch": 0.1811329309416532, + "epoch": 0.2500978167305736, "grad_norm": 0.0, - "learning_rate": 1.8825882713506493e-05, - "loss": 1.0317, + "learning_rate": 1.756522325822694e-05, + "loss": 1.2599, "step": 6392 }, { - "epoch": 0.18116126838391566, + "epoch": 0.250136943422803, "grad_norm": 0.0, - "learning_rate": 1.8825451180717905e-05, - "loss": 0.9847, + "learning_rate": 1.7564394469057357e-05, + "loss": 1.1551, "step": 6393 }, { - "epoch": 0.18118960582617813, + "epoch": 0.25017607011503246, "grad_norm": 0.0, - "learning_rate": 1.8825019573589264e-05, - "loss": 0.9645, + "learning_rate": 1.756356555841307e-05, + "loss": 1.2468, "step": 6394 }, { - "epoch": 0.1812179432684406, + "epoch": 0.2502151968072619, "grad_norm": 0.0, - "learning_rate": 1.8824587892124208e-05, - "loss": 1.0543, + "learning_rate": 1.7562736526307404e-05, + "loss": 1.1333, "step": 6395 }, { - "epoch": 0.18124628071070306, + "epoch": 0.25025432349949134, "grad_norm": 0.0, - "learning_rate": 1.882415613632637e-05, - "loss": 0.892, + "learning_rate": 1.7561907372753665e-05, + "loss": 1.2029, "step": 6396 }, { - "epoch": 0.1812746181529655, + "epoch": 0.2502934501917208, "grad_norm": 0.0, - "learning_rate": 1.8823724306199385e-05, - "loss": 1.0325, + "learning_rate": 1.7561078097765166e-05, + "loss": 1.2497, "step": 6397 }, { - "epoch": 0.18130295559522797, + "epoch": 0.2503325768839502, "grad_norm": 0.0, - "learning_rate": 1.8823292401746895e-05, - "loss": 0.9429, + "learning_rate": 1.756024870135523e-05, + "loss": 1.0197, "step": 6398 }, { - "epoch": 0.18133129303749043, + "epoch": 0.25037170357617966, "grad_norm": 0.0, - "learning_rate": 1.882286042297254e-05, - "loss": 1.092, + "learning_rate": 1.7559419183537175e-05, + "loss": 1.1938, "step": 6399 }, { - "epoch": 0.1813596304797529, + "epoch": 0.2504108302684091, "grad_norm": 0.0, - "learning_rate": 1.882242836987995e-05, - "loss": 1.0936, + "learning_rate": 1.755858954432432e-05, + "loss": 1.1078, "step": 6400 }, { - "epoch": 0.18138796792201536, + "epoch": 0.25044995696063854, "grad_norm": 0.0, - "learning_rate": 1.8821996242472772e-05, - "loss": 1.0823, + "learning_rate": 1.7557759783729985e-05, + "loss": 1.0722, "step": 6401 }, { - "epoch": 0.18141630536427783, + "epoch": 0.250489083652868, "grad_norm": 0.0, - "learning_rate": 1.8821564040754646e-05, - "loss": 1.0248, + "learning_rate": 1.7556929901767502e-05, + "loss": 1.1418, "step": 6402 }, { - "epoch": 0.18144464280654027, + "epoch": 0.2505282103450974, "grad_norm": 0.0, - "learning_rate": 1.882113176472921e-05, - "loss": 1.2014, + "learning_rate": 1.7556099898450192e-05, + "loss": 1.0853, "step": 6403 }, { - "epoch": 0.18147298024880273, + "epoch": 0.25056733703732686, "grad_norm": 0.0, - "learning_rate": 1.88206994144001e-05, - "loss": 1.0306, + "learning_rate": 1.7555269773791387e-05, + "loss": 1.1152, "step": 6404 }, { - "epoch": 0.1815013176910652, + "epoch": 0.2506064637295563, "grad_norm": 0.0, - "learning_rate": 1.882026698977097e-05, - "loss": 1.0706, + "learning_rate": 1.7554439527804413e-05, + "loss": 1.0639, "step": 6405 }, { - "epoch": 0.18152965513332767, + "epoch": 0.25064559042178575, "grad_norm": 0.0, - "learning_rate": 1.881983449084545e-05, - "loss": 1.0229, + "learning_rate": 1.7553609160502612e-05, + "loss": 1.2256, "step": 6406 }, { - "epoch": 0.18155799257559013, + "epoch": 0.2506847171140152, "grad_norm": 0.0, - "learning_rate": 1.8819401917627195e-05, - "loss": 0.9113, + "learning_rate": 1.755277867189931e-05, + "loss": 1.0486, "step": 6407 }, { - "epoch": 0.1815863300178526, + "epoch": 0.25072384380624463, "grad_norm": 0.0, - "learning_rate": 1.881896927011984e-05, - "loss": 0.9897, + "learning_rate": 1.7551948062007845e-05, + "loss": 1.2196, "step": 6408 }, { - "epoch": 0.18161466746011504, + "epoch": 0.25076297049847407, "grad_norm": 0.0, - "learning_rate": 1.881853654832703e-05, - "loss": 0.925, + "learning_rate": 1.755111733084156e-05, + "loss": 1.0458, "step": 6409 }, { - "epoch": 0.1816430049023775, + "epoch": 0.2508020971907035, "grad_norm": 0.0, - "learning_rate": 1.8818103752252414e-05, - "loss": 1.0437, + "learning_rate": 1.7550286478413792e-05, + "loss": 1.1657, "step": 6410 }, { - "epoch": 0.18167134234463997, + "epoch": 0.25084122388293295, "grad_norm": 0.0, - "learning_rate": 1.8817670881899635e-05, - "loss": 0.9592, + "learning_rate": 1.7549455504737886e-05, + "loss": 1.0806, "step": 6411 }, { - "epoch": 0.18169967978690243, + "epoch": 0.2508803505751624, "grad_norm": 0.0, - "learning_rate": 1.881723793727234e-05, - "loss": 1.1201, + "learning_rate": 1.7548624409827184e-05, + "loss": 1.1355, "step": 6412 }, { - "epoch": 0.1817280172291649, + "epoch": 0.25091947726739183, "grad_norm": 0.0, - "learning_rate": 1.8816804918374175e-05, - "loss": 1.0943, + "learning_rate": 1.754779319369503e-05, + "loss": 1.1073, "step": 6413 }, { - "epoch": 0.18175635467142737, + "epoch": 0.2509586039596213, "grad_norm": 0.0, - "learning_rate": 1.881637182520879e-05, - "loss": 0.9897, + "learning_rate": 1.754696185635478e-05, + "loss": 1.2321, "step": 6414 }, { - "epoch": 0.1817846921136898, + "epoch": 0.2509977306518507, "grad_norm": 0.0, - "learning_rate": 1.8815938657779828e-05, - "loss": 1.0078, + "learning_rate": 1.7546130397819778e-05, + "loss": 1.255, "step": 6415 }, { - "epoch": 0.18181302955595227, + "epoch": 0.25103685734408016, "grad_norm": 0.0, - "learning_rate": 1.8815505416090946e-05, - "loss": 0.9207, + "learning_rate": 1.7545298818103375e-05, + "loss": 1.226, "step": 6416 }, { - "epoch": 0.18184136699821474, + "epoch": 0.2510759840363096, "grad_norm": 0.0, - "learning_rate": 1.8815072100145785e-05, - "loss": 0.9783, + "learning_rate": 1.754446711721893e-05, + "loss": 1.1137, "step": 6417 }, { - "epoch": 0.1818697044404772, + "epoch": 0.251115110728539, "grad_norm": 0.0, - "learning_rate": 1.8814638709947995e-05, - "loss": 1.0822, + "learning_rate": 1.7543635295179796e-05, + "loss": 1.1506, "step": 6418 }, { - "epoch": 0.18189804188273967, + "epoch": 0.2511542374207684, "grad_norm": 0.0, - "learning_rate": 1.8814205245501235e-05, - "loss": 1.0256, + "learning_rate": 1.754280335199933e-05, + "loss": 1.2497, "step": 6419 }, { - "epoch": 0.18192637932500214, + "epoch": 0.25119336411299786, "grad_norm": 0.0, - "learning_rate": 1.881377170680915e-05, - "loss": 1.0003, + "learning_rate": 1.7541971287690895e-05, + "loss": 1.1844, "step": 6420 }, { - "epoch": 0.18195471676726457, + "epoch": 0.2512324908052273, "grad_norm": 0.0, - "learning_rate": 1.8813338093875393e-05, - "loss": 0.9441, + "learning_rate": 1.7541139102267855e-05, + "loss": 1.2166, "step": 6421 }, { - "epoch": 0.18198305420952704, + "epoch": 0.25127161749745675, "grad_norm": 0.0, - "learning_rate": 1.8812904406703617e-05, - "loss": 1.0059, + "learning_rate": 1.7540306795743566e-05, + "loss": 1.1566, "step": 6422 }, { - "epoch": 0.1820113916517895, + "epoch": 0.2513107441896862, "grad_norm": 0.0, - "learning_rate": 1.8812470645297473e-05, - "loss": 0.9257, + "learning_rate": 1.75394743681314e-05, + "loss": 1.0967, "step": 6423 }, { - "epoch": 0.18203972909405197, + "epoch": 0.2513498708819156, "grad_norm": 0.0, - "learning_rate": 1.8812036809660618e-05, - "loss": 1.0234, + "learning_rate": 1.7538641819444722e-05, + "loss": 0.9797, "step": 6424 }, { - "epoch": 0.18206806653631444, + "epoch": 0.25138899757414507, "grad_norm": 0.0, - "learning_rate": 1.8811602899796703e-05, - "loss": 0.9157, + "learning_rate": 1.7537809149696907e-05, + "loss": 1.0384, "step": 6425 }, { - "epoch": 0.1820964039785769, + "epoch": 0.2514281242663745, "grad_norm": 0.0, - "learning_rate": 1.8811168915709385e-05, - "loss": 0.9155, + "learning_rate": 1.753697635890132e-05, + "loss": 1.1712, "step": 6426 }, { - "epoch": 0.18212474142083934, + "epoch": 0.25146725095860395, "grad_norm": 0.0, - "learning_rate": 1.881073485740232e-05, - "loss": 1.057, + "learning_rate": 1.7536143447071336e-05, + "loss": 1.0718, "step": 6427 }, { - "epoch": 0.1821530788631018, + "epoch": 0.2515063776508334, "grad_norm": 0.0, - "learning_rate": 1.8810300724879163e-05, - "loss": 0.937, + "learning_rate": 1.7535310414220333e-05, + "loss": 1.1357, "step": 6428 }, { - "epoch": 0.18218141630536427, + "epoch": 0.25154550434306283, "grad_norm": 0.0, - "learning_rate": 1.880986651814357e-05, - "loss": 1.092, + "learning_rate": 1.7534477260361685e-05, + "loss": 1.0759, "step": 6429 }, { - "epoch": 0.18220975374762674, + "epoch": 0.25158463103529227, "grad_norm": 0.0, - "learning_rate": 1.8809432237199204e-05, - "loss": 1.0199, + "learning_rate": 1.7533643985508775e-05, + "loss": 1.126, "step": 6430 }, { - "epoch": 0.1822380911898892, + "epoch": 0.2516237577275217, "grad_norm": 0.0, - "learning_rate": 1.8808997882049717e-05, - "loss": 0.9655, + "learning_rate": 1.7532810589674983e-05, + "loss": 1.1879, "step": 6431 }, { - "epoch": 0.18226642863215167, + "epoch": 0.25166288441975115, "grad_norm": 0.0, - "learning_rate": 1.8808563452698768e-05, - "loss": 1.0123, + "learning_rate": 1.753197707287369e-05, + "loss": 1.1602, "step": 6432 }, { - "epoch": 0.1822947660744141, + "epoch": 0.2517020111119806, "grad_norm": 0.0, - "learning_rate": 1.880812894915002e-05, - "loss": 1.0651, + "learning_rate": 1.7531143435118284e-05, + "loss": 1.173, "step": 6433 }, { - "epoch": 0.18232310351667658, + "epoch": 0.25174113780421004, "grad_norm": 0.0, - "learning_rate": 1.8807694371407132e-05, - "loss": 0.9951, + "learning_rate": 1.7530309676422157e-05, + "loss": 1.2012, "step": 6434 }, { - "epoch": 0.18235144095893904, + "epoch": 0.2517802644964395, "grad_norm": 0.0, - "learning_rate": 1.880725971947376e-05, - "loss": 0.9612, + "learning_rate": 1.7529475796798686e-05, + "loss": 1.2217, "step": 6435 }, { - "epoch": 0.1823797784012015, + "epoch": 0.2518193911886689, "grad_norm": 0.0, - "learning_rate": 1.880682499335357e-05, - "loss": 0.9991, + "learning_rate": 1.7528641796261273e-05, + "loss": 1.1727, "step": 6436 }, { - "epoch": 0.18240811584346397, + "epoch": 0.25185851788089836, "grad_norm": 0.0, - "learning_rate": 1.8806390193050223e-05, - "loss": 0.8877, + "learning_rate": 1.7527807674823303e-05, + "loss": 0.9862, "step": 6437 }, { - "epoch": 0.18243645328572644, + "epoch": 0.2518976445731278, "grad_norm": 0.0, - "learning_rate": 1.880595531856738e-05, - "loss": 0.9537, + "learning_rate": 1.7526973432498177e-05, + "loss": 1.1044, "step": 6438 }, { - "epoch": 0.18246479072798888, + "epoch": 0.25193677126535724, "grad_norm": 0.0, - "learning_rate": 1.8805520369908707e-05, - "loss": 1.1339, + "learning_rate": 1.7526139069299287e-05, + "loss": 1.1704, "step": 6439 }, { - "epoch": 0.18249312817025135, + "epoch": 0.2519758979575867, "grad_norm": 0.0, - "learning_rate": 1.880508534707787e-05, - "loss": 1.084, + "learning_rate": 1.7525304585240034e-05, + "loss": 1.1804, "step": 6440 }, { - "epoch": 0.1825214656125138, + "epoch": 0.2520150246498161, "grad_norm": 0.0, - "learning_rate": 1.8804650250078525e-05, - "loss": 1.0498, + "learning_rate": 1.7524469980333822e-05, + "loss": 1.0767, "step": 6441 }, { - "epoch": 0.18254980305477628, + "epoch": 0.25205415134204556, "grad_norm": 0.0, - "learning_rate": 1.880421507891434e-05, - "loss": 1.0681, + "learning_rate": 1.752363525459405e-05, + "loss": 1.2739, "step": 6442 }, { - "epoch": 0.18257814049703874, + "epoch": 0.252093278034275, "grad_norm": 0.0, - "learning_rate": 1.8803779833588983e-05, - "loss": 0.9621, + "learning_rate": 1.7522800408034125e-05, + "loss": 1.0349, "step": 6443 }, { - "epoch": 0.1826064779393012, + "epoch": 0.25213240472650444, "grad_norm": 0.0, - "learning_rate": 1.8803344514106123e-05, - "loss": 1.1356, + "learning_rate": 1.7521965440667448e-05, + "loss": 1.2247, "step": 6444 }, { - "epoch": 0.18263481538156365, + "epoch": 0.2521715314187339, "grad_norm": 0.0, - "learning_rate": 1.880290912046942e-05, - "loss": 0.9819, + "learning_rate": 1.7521130352507434e-05, + "loss": 1.155, "step": 6445 }, { - "epoch": 0.1826631528238261, + "epoch": 0.2522106581109633, "grad_norm": 0.0, - "learning_rate": 1.8802473652682543e-05, - "loss": 1.041, + "learning_rate": 1.7520295143567492e-05, + "loss": 1.1685, "step": 6446 }, { - "epoch": 0.18269149026608858, + "epoch": 0.2522497848031927, "grad_norm": 0.0, - "learning_rate": 1.8802038110749166e-05, - "loss": 0.948, + "learning_rate": 1.751945981386103e-05, + "loss": 1.2903, "step": 6447 }, { - "epoch": 0.18271982770835105, + "epoch": 0.25228891149542215, "grad_norm": 0.0, - "learning_rate": 1.880160249467295e-05, - "loss": 1.0231, + "learning_rate": 1.751862436340147e-05, + "loss": 1.0691, "step": 6448 }, { - "epoch": 0.1827481651506135, + "epoch": 0.2523280381876516, "grad_norm": 0.0, - "learning_rate": 1.880116680445757e-05, - "loss": 1.0029, + "learning_rate": 1.7517788792202225e-05, + "loss": 1.1782, "step": 6449 }, { - "epoch": 0.18277650259287598, + "epoch": 0.25236716487988103, "grad_norm": 0.0, - "learning_rate": 1.880073104010669e-05, - "loss": 0.9724, + "learning_rate": 1.7516953100276707e-05, + "loss": 1.1552, "step": 6450 }, { - "epoch": 0.18280484003513842, + "epoch": 0.2524062915721105, "grad_norm": 0.0, - "learning_rate": 1.880029520162399e-05, - "loss": 1.1026, + "learning_rate": 1.7516117287638345e-05, + "loss": 1.0809, "step": 6451 }, { - "epoch": 0.18283317747740088, + "epoch": 0.2524454182643399, "grad_norm": 0.0, - "learning_rate": 1.879985928901313e-05, - "loss": 1.0503, + "learning_rate": 1.7515281354300556e-05, + "loss": 1.1414, "step": 6452 }, { - "epoch": 0.18286151491966335, + "epoch": 0.25248454495656936, "grad_norm": 0.0, - "learning_rate": 1.8799423302277793e-05, - "loss": 1.138, + "learning_rate": 1.7514445300276767e-05, + "loss": 1.0562, "step": 6453 }, { - "epoch": 0.18288985236192581, + "epoch": 0.2525236716487988, "grad_norm": 0.0, - "learning_rate": 1.879898724142164e-05, - "loss": 1.0311, + "learning_rate": 1.75136091255804e-05, + "loss": 0.9898, "step": 6454 }, { - "epoch": 0.18291818980418828, + "epoch": 0.25256279834102824, "grad_norm": 0.0, - "learning_rate": 1.8798551106448354e-05, - "loss": 0.9713, + "learning_rate": 1.751277283022489e-05, + "loss": 1.2126, "step": 6455 }, { - "epoch": 0.18294652724645075, + "epoch": 0.2526019250332577, "grad_norm": 0.0, - "learning_rate": 1.8798114897361602e-05, - "loss": 0.9519, + "learning_rate": 1.7511936414223658e-05, + "loss": 1.0771, "step": 6456 }, { - "epoch": 0.18297486468871318, + "epoch": 0.2526410517254871, "grad_norm": 0.0, - "learning_rate": 1.8797678614165065e-05, - "loss": 1.1326, + "learning_rate": 1.751109987759014e-05, + "loss": 1.0921, "step": 6457 }, { - "epoch": 0.18300320213097565, + "epoch": 0.25268017841771656, "grad_norm": 0.0, - "learning_rate": 1.8797242256862414e-05, - "loss": 1.0005, + "learning_rate": 1.7510263220337772e-05, + "loss": 1.017, "step": 6458 }, { - "epoch": 0.18303153957323812, + "epoch": 0.252719305109946, "grad_norm": 0.0, - "learning_rate": 1.8796805825457324e-05, - "loss": 1.0248, + "learning_rate": 1.750942644247999e-05, + "loss": 1.2403, "step": 6459 }, { - "epoch": 0.18305987701550058, + "epoch": 0.25275843180217544, "grad_norm": 0.0, - "learning_rate": 1.879636931995347e-05, - "loss": 1.0412, + "learning_rate": 1.7508589544030224e-05, + "loss": 1.1357, "step": 6460 }, { - "epoch": 0.18308821445776305, + "epoch": 0.2527975584944049, "grad_norm": 0.0, - "learning_rate": 1.879593274035453e-05, - "loss": 1.0616, + "learning_rate": 1.7507752525001924e-05, + "loss": 1.095, "step": 6461 }, { - "epoch": 0.18311655190002551, + "epoch": 0.2528366851866343, "grad_norm": 0.0, - "learning_rate": 1.879549608666418e-05, - "loss": 1.0871, + "learning_rate": 1.7506915385408523e-05, + "loss": 1.2614, "step": 6462 }, { - "epoch": 0.18314488934228795, + "epoch": 0.25287581187886377, "grad_norm": 0.0, - "learning_rate": 1.8795059358886108e-05, - "loss": 1.1143, + "learning_rate": 1.750607812526347e-05, + "loss": 1.1393, "step": 6463 }, { - "epoch": 0.18317322678455042, + "epoch": 0.2529149385710932, "grad_norm": 0.0, - "learning_rate": 1.8794622557023982e-05, - "loss": 1.1336, + "learning_rate": 1.75052407445802e-05, + "loss": 1.1957, "step": 6464 }, { - "epoch": 0.18320156422681289, + "epoch": 0.25295406526332265, "grad_norm": 0.0, - "learning_rate": 1.879418568108148e-05, - "loss": 0.9266, + "learning_rate": 1.750440324337218e-05, + "loss": 1.0427, "step": 6465 }, { - "epoch": 0.18322990166907535, + "epoch": 0.2529931919555521, "grad_norm": 0.0, - "learning_rate": 1.8793748731062293e-05, - "loss": 1.0002, + "learning_rate": 1.750356562165284e-05, + "loss": 1.0795, "step": 6466 }, { - "epoch": 0.18325823911133782, + "epoch": 0.25303231864778153, "grad_norm": 0.0, - "learning_rate": 1.8793311706970088e-05, - "loss": 0.9634, + "learning_rate": 1.750272787943564e-05, + "loss": 1.194, "step": 6467 }, { - "epoch": 0.18328657655360028, + "epoch": 0.25307144534001097, "grad_norm": 0.0, - "learning_rate": 1.879287460880856e-05, - "loss": 1.1235, + "learning_rate": 1.7501890016734032e-05, + "loss": 1.1387, "step": 6468 }, { - "epoch": 0.18331491399586272, + "epoch": 0.2531105720322404, "grad_norm": 0.0, - "learning_rate": 1.8792437436581382e-05, - "loss": 1.1277, + "learning_rate": 1.750105203356147e-05, + "loss": 1.1731, "step": 6469 }, { - "epoch": 0.1833432514381252, + "epoch": 0.25314969872446985, "grad_norm": 0.0, - "learning_rate": 1.8792000190292236e-05, - "loss": 0.9324, + "learning_rate": 1.7500213929931412e-05, + "loss": 1.1523, "step": 6470 }, { - "epoch": 0.18337158888038765, + "epoch": 0.2531888254166993, "grad_norm": 0.0, - "learning_rate": 1.879156286994481e-05, - "loss": 0.9906, + "learning_rate": 1.7499375705857318e-05, + "loss": 1.0591, "step": 6471 }, { - "epoch": 0.18339992632265012, + "epoch": 0.25322795210892873, "grad_norm": 0.0, - "learning_rate": 1.879112547554278e-05, - "loss": 0.9793, + "learning_rate": 1.7498537361352644e-05, + "loss": 1.093, "step": 6472 }, { - "epoch": 0.18342826376491259, + "epoch": 0.2532670788011582, "grad_norm": 0.0, - "learning_rate": 1.8790688007089842e-05, - "loss": 0.9456, + "learning_rate": 1.749769889643086e-05, + "loss": 1.0653, "step": 6473 }, { - "epoch": 0.18345660120717505, + "epoch": 0.2533062054933876, "grad_norm": 0.0, - "learning_rate": 1.8790250464589676e-05, - "loss": 0.9486, + "learning_rate": 1.7496860311105426e-05, + "loss": 1.1787, "step": 6474 }, { - "epoch": 0.1834849386494375, + "epoch": 0.253345332185617, "grad_norm": 0.0, - "learning_rate": 1.8789812848045962e-05, - "loss": 1.0097, + "learning_rate": 1.7496021605389806e-05, + "loss": 1.2814, "step": 6475 }, { - "epoch": 0.18351327609169996, + "epoch": 0.25338445887784644, "grad_norm": 0.0, - "learning_rate": 1.878937515746239e-05, - "loss": 1.0434, + "learning_rate": 1.749518277929748e-05, + "loss": 1.0345, "step": 6476 }, { - "epoch": 0.18354161353396242, + "epoch": 0.2534235855700759, "grad_norm": 0.0, - "learning_rate": 1.8788937392842646e-05, - "loss": 0.9219, + "learning_rate": 1.7494343832841903e-05, + "loss": 1.1934, "step": 6477 }, { - "epoch": 0.1835699509762249, + "epoch": 0.2534627122623053, "grad_norm": 0.0, - "learning_rate": 1.8788499554190424e-05, - "loss": 0.9709, + "learning_rate": 1.749350476603656e-05, + "loss": 1.1375, "step": 6478 }, { - "epoch": 0.18359828841848735, + "epoch": 0.25350183895453476, "grad_norm": 0.0, - "learning_rate": 1.8788061641509402e-05, - "loss": 0.9938, + "learning_rate": 1.7492665578894917e-05, + "loss": 1.209, "step": 6479 }, { - "epoch": 0.18362662586074982, + "epoch": 0.2535409656467642, "grad_norm": 0.0, - "learning_rate": 1.8787623654803274e-05, - "loss": 0.9987, + "learning_rate": 1.7491826271430453e-05, + "loss": 1.0728, "step": 6480 }, { - "epoch": 0.18365496330301226, + "epoch": 0.25358009233899365, "grad_norm": 0.0, - "learning_rate": 1.878718559407573e-05, - "loss": 1.0798, + "learning_rate": 1.7490986843656648e-05, + "loss": 1.1603, "step": 6481 }, { - "epoch": 0.18368330074527472, + "epoch": 0.2536192190312231, "grad_norm": 0.0, - "learning_rate": 1.878674745933046e-05, - "loss": 1.0555, + "learning_rate": 1.7490147295586983e-05, + "loss": 1.1468, "step": 6482 }, { - "epoch": 0.1837116381875372, + "epoch": 0.2536583457234525, "grad_norm": 0.0, - "learning_rate": 1.8786309250571155e-05, - "loss": 1.0506, + "learning_rate": 1.7489307627234936e-05, + "loss": 0.9748, "step": 6483 }, { - "epoch": 0.18373997562979966, + "epoch": 0.25369747241568197, "grad_norm": 0.0, - "learning_rate": 1.87858709678015e-05, - "loss": 1.0197, + "learning_rate": 1.7488467838613995e-05, + "loss": 1.0875, "step": 6484 }, { - "epoch": 0.18376831307206212, + "epoch": 0.2537365991079114, "grad_norm": 0.0, - "learning_rate": 1.878543261102519e-05, - "loss": 0.8808, + "learning_rate": 1.748762792973764e-05, + "loss": 1.1025, "step": 6485 }, { - "epoch": 0.1837966505143246, + "epoch": 0.25377572580014085, "grad_norm": 0.0, - "learning_rate": 1.8784994180245922e-05, - "loss": 1.0194, + "learning_rate": 1.7486787900619366e-05, + "loss": 1.1421, "step": 6486 }, { - "epoch": 0.18382498795658703, + "epoch": 0.2538148524923703, "grad_norm": 0.0, - "learning_rate": 1.8784555675467383e-05, - "loss": 0.8523, + "learning_rate": 1.7485947751272657e-05, + "loss": 1.1947, "step": 6487 }, { - "epoch": 0.1838533253988495, + "epoch": 0.25385397918459973, "grad_norm": 0.0, - "learning_rate": 1.878411709669327e-05, - "loss": 0.9534, + "learning_rate": 1.7485107481711014e-05, + "loss": 1.0044, "step": 6488 }, { - "epoch": 0.18388166284111196, + "epoch": 0.2538931058768292, "grad_norm": 0.0, - "learning_rate": 1.8783678443927282e-05, - "loss": 0.9696, + "learning_rate": 1.748426709194792e-05, + "loss": 1.1182, "step": 6489 }, { - "epoch": 0.18391000028337443, + "epoch": 0.2539322325690586, "grad_norm": 0.0, - "learning_rate": 1.8783239717173106e-05, - "loss": 1.0014, + "learning_rate": 1.7483426581996874e-05, + "loss": 0.9367, "step": 6490 }, { - "epoch": 0.1839383377256369, + "epoch": 0.25397135926128805, "grad_norm": 0.0, - "learning_rate": 1.878280091643444e-05, - "loss": 1.018, + "learning_rate": 1.7482585951871374e-05, + "loss": 1.1769, "step": 6491 }, { - "epoch": 0.18396667516789936, + "epoch": 0.2540104859535175, "grad_norm": 0.0, - "learning_rate": 1.8782362041714978e-05, - "loss": 0.9931, + "learning_rate": 1.748174520158492e-05, + "loss": 1.0826, "step": 6492 }, { - "epoch": 0.1839950126101618, + "epoch": 0.25404961264574694, "grad_norm": 0.0, - "learning_rate": 1.878192309301842e-05, - "loss": 1.1193, + "learning_rate": 1.7480904331151015e-05, + "loss": 1.2732, "step": 6493 }, { - "epoch": 0.18402335005242426, + "epoch": 0.2540887393379764, "grad_norm": 0.0, - "learning_rate": 1.8781484070348464e-05, - "loss": 1.1311, + "learning_rate": 1.748006334058316e-05, + "loss": 1.2517, "step": 6494 }, { - "epoch": 0.18405168749468673, + "epoch": 0.2541278660302058, "grad_norm": 0.0, - "learning_rate": 1.8781044973708807e-05, - "loss": 0.9564, + "learning_rate": 1.7479222229894863e-05, + "loss": 1.2352, "step": 6495 }, { - "epoch": 0.1840800249369492, + "epoch": 0.25416699272243526, "grad_norm": 0.0, - "learning_rate": 1.8780605803103147e-05, - "loss": 0.9359, + "learning_rate": 1.7478380999099624e-05, + "loss": 1.2178, "step": 6496 }, { - "epoch": 0.18410836237921166, + "epoch": 0.2542061194146647, "grad_norm": 0.0, - "learning_rate": 1.8780166558535183e-05, - "loss": 1.0623, + "learning_rate": 1.747753964821096e-05, + "loss": 1.088, "step": 6497 }, { - "epoch": 0.18413669982147413, + "epoch": 0.25424524610689414, "grad_norm": 0.0, - "learning_rate": 1.8779727240008618e-05, - "loss": 1.0281, + "learning_rate": 1.747669817724238e-05, + "loss": 1.1415, "step": 6498 }, { - "epoch": 0.18416503726373656, + "epoch": 0.2542843727991236, "grad_norm": 0.0, - "learning_rate": 1.8779287847527146e-05, - "loss": 1.1072, + "learning_rate": 1.7475856586207396e-05, + "loss": 1.1464, "step": 6499 }, { - "epoch": 0.18419337470599903, + "epoch": 0.254323499491353, "grad_norm": 0.0, - "learning_rate": 1.8778848381094477e-05, - "loss": 1.0685, + "learning_rate": 1.747501487511952e-05, + "loss": 1.0598, "step": 6500 }, { - "epoch": 0.1842217121482615, + "epoch": 0.25436262618358246, "grad_norm": 0.0, - "learning_rate": 1.8778408840714307e-05, - "loss": 1.0783, + "learning_rate": 1.7474173043992278e-05, + "loss": 1.155, "step": 6501 }, { - "epoch": 0.18425004959052396, + "epoch": 0.2544017528758119, "grad_norm": 0.0, - "learning_rate": 1.877796922639034e-05, - "loss": 0.9768, + "learning_rate": 1.7473331092839177e-05, + "loss": 1.2058, "step": 6502 }, { - "epoch": 0.18427838703278643, + "epoch": 0.25444087956804134, "grad_norm": 0.0, - "learning_rate": 1.8777529538126273e-05, - "loss": 0.9568, + "learning_rate": 1.7472489021673743e-05, + "loss": 1.0725, "step": 6503 }, { - "epoch": 0.1843067244750489, + "epoch": 0.25448000626027073, "grad_norm": 0.0, - "learning_rate": 1.8777089775925822e-05, - "loss": 0.8413, + "learning_rate": 1.7471646830509503e-05, + "loss": 1.1925, "step": 6504 }, { - "epoch": 0.18433506191731133, + "epoch": 0.25451913295250017, "grad_norm": 0.0, - "learning_rate": 1.877664993979268e-05, - "loss": 1.0113, + "learning_rate": 1.7470804519359973e-05, + "loss": 1.1204, "step": 6505 }, { - "epoch": 0.1843633993595738, + "epoch": 0.2545582596447296, "grad_norm": 0.0, - "learning_rate": 1.877621002973056e-05, - "loss": 1.0978, + "learning_rate": 1.7469962088238688e-05, + "loss": 1.2126, "step": 6506 }, { - "epoch": 0.18439173680183626, + "epoch": 0.25459738633695905, "grad_norm": 0.0, - "learning_rate": 1.8775770045743163e-05, - "loss": 1.0498, + "learning_rate": 1.7469119537159168e-05, + "loss": 1.1418, "step": 6507 }, { - "epoch": 0.18442007424409873, + "epoch": 0.2546365130291885, "grad_norm": 0.0, - "learning_rate": 1.8775329987834193e-05, - "loss": 0.9781, + "learning_rate": 1.746827686613495e-05, + "loss": 1.1087, "step": 6508 }, { - "epoch": 0.1844484116863612, + "epoch": 0.25467563972141793, "grad_norm": 0.0, - "learning_rate": 1.8774889856007365e-05, - "loss": 0.9842, + "learning_rate": 1.7467434075179562e-05, + "loss": 1.188, "step": 6509 }, { - "epoch": 0.18447674912862366, + "epoch": 0.2547147664136474, "grad_norm": 0.0, - "learning_rate": 1.8774449650266375e-05, - "loss": 1.0833, + "learning_rate": 1.7466591164306545e-05, + "loss": 1.1553, "step": 6510 }, { - "epoch": 0.1845050865708861, + "epoch": 0.2547538931058768, "grad_norm": 0.0, - "learning_rate": 1.8774009370614944e-05, - "loss": 1.0254, + "learning_rate": 1.7465748133529424e-05, + "loss": 1.128, "step": 6511 }, { - "epoch": 0.18453342401314857, + "epoch": 0.25479301979810626, "grad_norm": 0.0, - "learning_rate": 1.8773569017056768e-05, - "loss": 0.9247, + "learning_rate": 1.7464904982861745e-05, + "loss": 1.195, "step": 6512 }, { - "epoch": 0.18456176145541103, + "epoch": 0.2548321464903357, "grad_norm": 0.0, - "learning_rate": 1.8773128589595567e-05, - "loss": 0.882, + "learning_rate": 1.7464061712317047e-05, + "loss": 1.2294, "step": 6513 }, { - "epoch": 0.1845900988976735, + "epoch": 0.25487127318256514, "grad_norm": 0.0, - "learning_rate": 1.8772688088235044e-05, - "loss": 1.0731, + "learning_rate": 1.746321832190887e-05, + "loss": 1.0941, "step": 6514 }, { - "epoch": 0.18461843633993597, + "epoch": 0.2549103998747946, "grad_norm": 0.0, - "learning_rate": 1.8772247512978913e-05, - "loss": 1.0562, + "learning_rate": 1.746237481165076e-05, + "loss": 1.1315, "step": 6515 }, { - "epoch": 0.18464677378219843, + "epoch": 0.254949526567024, "grad_norm": 0.0, - "learning_rate": 1.877180686383088e-05, - "loss": 0.934, + "learning_rate": 1.746153118155626e-05, + "loss": 1.1624, "step": 6516 }, { - "epoch": 0.18467511122446087, + "epoch": 0.25498865325925346, "grad_norm": 0.0, - "learning_rate": 1.8771366140794662e-05, - "loss": 1.0272, + "learning_rate": 1.746068743163892e-05, + "loss": 1.0897, "step": 6517 }, { - "epoch": 0.18470344866672334, + "epoch": 0.2550277799514829, "grad_norm": 0.0, - "learning_rate": 1.8770925343873972e-05, - "loss": 0.9433, + "learning_rate": 1.745984356191229e-05, + "loss": 1.1155, "step": 6518 }, { - "epoch": 0.1847317861089858, + "epoch": 0.25506690664371234, "grad_norm": 0.0, - "learning_rate": 1.8770484473072518e-05, - "loss": 0.9808, + "learning_rate": 1.7458999572389918e-05, + "loss": 1.1443, "step": 6519 }, { - "epoch": 0.18476012355124827, + "epoch": 0.2551060333359418, "grad_norm": 0.0, - "learning_rate": 1.877004352839402e-05, - "loss": 1.0337, + "learning_rate": 1.745815546308536e-05, + "loss": 1.126, "step": 6520 }, { - "epoch": 0.18478846099351073, + "epoch": 0.2551451600281712, "grad_norm": 0.0, - "learning_rate": 1.8769602509842185e-05, - "loss": 0.9094, + "learning_rate": 1.7457311234012167e-05, + "loss": 1.2877, "step": 6521 }, { - "epoch": 0.1848167984357732, + "epoch": 0.25518428672040067, "grad_norm": 0.0, - "learning_rate": 1.8769161417420733e-05, - "loss": 1.0276, + "learning_rate": 1.7456466885183906e-05, + "loss": 1.1671, "step": 6522 }, { - "epoch": 0.18484513587803564, + "epoch": 0.2552234134126301, "grad_norm": 0.0, - "learning_rate": 1.876872025113338e-05, - "loss": 1.108, + "learning_rate": 1.7455622416614127e-05, + "loss": 1.1263, "step": 6523 }, { - "epoch": 0.1848734733202981, + "epoch": 0.25526254010485955, "grad_norm": 0.0, - "learning_rate": 1.8768279010983836e-05, - "loss": 1.0467, + "learning_rate": 1.7454777828316397e-05, + "loss": 0.9959, "step": 6524 }, { - "epoch": 0.18490181076256057, + "epoch": 0.255301666797089, "grad_norm": 0.0, - "learning_rate": 1.8767837696975824e-05, - "loss": 1.0867, + "learning_rate": 1.7453933120304275e-05, + "loss": 1.1888, "step": 6525 }, { - "epoch": 0.18493014820482304, + "epoch": 0.25534079348931843, "grad_norm": 0.0, - "learning_rate": 1.8767396309113058e-05, - "loss": 1.0191, + "learning_rate": 1.7453088292591327e-05, + "loss": 1.0827, "step": 6526 }, { - "epoch": 0.1849584856470855, + "epoch": 0.25537992018154787, "grad_norm": 0.0, - "learning_rate": 1.8766954847399258e-05, - "loss": 1.0681, + "learning_rate": 1.745224334519112e-05, + "loss": 1.0984, "step": 6527 }, { - "epoch": 0.18498682308934797, + "epoch": 0.2554190468737773, "grad_norm": 0.0, - "learning_rate": 1.8766513311838147e-05, - "loss": 0.9969, + "learning_rate": 1.7451398278117225e-05, + "loss": 1.1892, "step": 6528 }, { - "epoch": 0.1850151605316104, + "epoch": 0.25545817356600675, "grad_norm": 0.0, - "learning_rate": 1.876607170243343e-05, - "loss": 0.9466, + "learning_rate": 1.745055309138321e-05, + "loss": 1.1556, "step": 6529 }, { - "epoch": 0.18504349797387287, + "epoch": 0.2554973002582362, "grad_norm": 0.0, - "learning_rate": 1.8765630019188844e-05, - "loss": 1.095, + "learning_rate": 1.7449707785002647e-05, + "loss": 1.0412, "step": 6530 }, { - "epoch": 0.18507183541613534, + "epoch": 0.25553642695046563, "grad_norm": 0.0, - "learning_rate": 1.8765188262108096e-05, - "loss": 1.0077, + "learning_rate": 1.7448862358989115e-05, + "loss": 1.137, "step": 6531 }, { - "epoch": 0.1851001728583978, + "epoch": 0.255575553642695, "grad_norm": 0.0, - "learning_rate": 1.8764746431194915e-05, - "loss": 0.9461, + "learning_rate": 1.744801681335618e-05, + "loss": 1.2047, "step": 6532 }, { - "epoch": 0.18512851030066027, + "epoch": 0.25561468033492446, "grad_norm": 0.0, - "learning_rate": 1.876430452645302e-05, - "loss": 1.1135, + "learning_rate": 1.7447171148117435e-05, + "loss": 1.1118, "step": 6533 }, { - "epoch": 0.18515684774292274, + "epoch": 0.2556538070271539, "grad_norm": 0.0, - "learning_rate": 1.8763862547886133e-05, - "loss": 1.0455, + "learning_rate": 1.7446325363286452e-05, + "loss": 1.187, "step": 6534 }, { - "epoch": 0.18518518518518517, + "epoch": 0.25569293371938334, "grad_norm": 0.0, - "learning_rate": 1.8763420495497976e-05, - "loss": 1.057, + "learning_rate": 1.7445479458876816e-05, + "loss": 1.2589, "step": 6535 }, { - "epoch": 0.18521352262744764, + "epoch": 0.2557320604116128, "grad_norm": 0.0, - "learning_rate": 1.8762978369292273e-05, - "loss": 0.9839, + "learning_rate": 1.7444633434902107e-05, + "loss": 1.1359, "step": 6536 }, { - "epoch": 0.1852418600697101, + "epoch": 0.2557711871038422, "grad_norm": 0.0, - "learning_rate": 1.876253616927275e-05, - "loss": 1.1316, + "learning_rate": 1.744378729137591e-05, + "loss": 1.0613, "step": 6537 }, { - "epoch": 0.18527019751197257, + "epoch": 0.25581031379607166, "grad_norm": 0.0, - "learning_rate": 1.8762093895443132e-05, - "loss": 0.9675, + "learning_rate": 1.7442941028311823e-05, + "loss": 1.1824, "step": 6538 }, { - "epoch": 0.18529853495423504, + "epoch": 0.2558494404883011, "grad_norm": 0.0, - "learning_rate": 1.8761651547807144e-05, - "loss": 0.9559, + "learning_rate": 1.7442094645723425e-05, + "loss": 1.0662, "step": 6539 }, { - "epoch": 0.1853268723964975, + "epoch": 0.25588856718053055, "grad_norm": 0.0, - "learning_rate": 1.876120912636851e-05, - "loss": 1.0645, + "learning_rate": 1.7441248143624316e-05, + "loss": 1.0521, "step": 6540 }, { - "epoch": 0.18535520983875994, + "epoch": 0.25592769387276, "grad_norm": 0.0, - "learning_rate": 1.876076663113096e-05, - "loss": 0.9755, + "learning_rate": 1.744040152202809e-05, + "loss": 1.1202, "step": 6541 }, { - "epoch": 0.1853835472810224, + "epoch": 0.25596682056498943, "grad_norm": 0.0, - "learning_rate": 1.8760324062098218e-05, - "loss": 1.0673, + "learning_rate": 1.743955478094833e-05, + "loss": 1.113, "step": 6542 }, { - "epoch": 0.18541188472328488, + "epoch": 0.25600594725721887, "grad_norm": 0.0, - "learning_rate": 1.8759881419274012e-05, - "loss": 0.9857, + "learning_rate": 1.7438707920398647e-05, + "loss": 1.0189, "step": 6543 }, { - "epoch": 0.18544022216554734, + "epoch": 0.2560450739494483, "grad_norm": 0.0, - "learning_rate": 1.8759438702662076e-05, - "loss": 0.9567, + "learning_rate": 1.7437860940392636e-05, + "loss": 1.1888, "step": 6544 }, { - "epoch": 0.1854685596078098, + "epoch": 0.25608420064167775, "grad_norm": 0.0, - "learning_rate": 1.875899591226613e-05, - "loss": 1.1052, + "learning_rate": 1.7437013840943897e-05, + "loss": 1.1143, "step": 6545 }, { - "epoch": 0.18549689705007227, + "epoch": 0.2561233273339072, "grad_norm": 0.0, - "learning_rate": 1.875855304808991e-05, - "loss": 0.9429, + "learning_rate": 1.7436166622066036e-05, + "loss": 1.089, "step": 6546 }, { - "epoch": 0.1855252344923347, + "epoch": 0.25616245402613663, "grad_norm": 0.0, - "learning_rate": 1.875811011013715e-05, - "loss": 1.0382, + "learning_rate": 1.7435319283772657e-05, + "loss": 1.1428, "step": 6547 }, { - "epoch": 0.18555357193459718, + "epoch": 0.2562015807183661, "grad_norm": 0.0, - "learning_rate": 1.8757667098411572e-05, - "loss": 1.0335, + "learning_rate": 1.7434471826077367e-05, + "loss": 1.1072, "step": 6548 }, { - "epoch": 0.18558190937685964, + "epoch": 0.2562407074105955, "grad_norm": 0.0, - "learning_rate": 1.8757224012916913e-05, - "loss": 0.9705, + "learning_rate": 1.7433624248993776e-05, + "loss": 1.0966, "step": 6549 }, { - "epoch": 0.1856102468191221, + "epoch": 0.25627983410282495, "grad_norm": 0.0, - "learning_rate": 1.8756780853656902e-05, - "loss": 1.0347, + "learning_rate": 1.7432776552535493e-05, + "loss": 1.1167, "step": 6550 }, { - "epoch": 0.18563858426138458, + "epoch": 0.2563189607950544, "grad_norm": 0.0, - "learning_rate": 1.8756337620635277e-05, - "loss": 1.0571, + "learning_rate": 1.7431928736716133e-05, + "loss": 1.1073, "step": 6551 }, { - "epoch": 0.18566692170364704, + "epoch": 0.25635808748728384, "grad_norm": 0.0, - "learning_rate": 1.8755894313855768e-05, - "loss": 1.0468, + "learning_rate": 1.7431080801549313e-05, + "loss": 1.1427, "step": 6552 }, { - "epoch": 0.18569525914590948, + "epoch": 0.2563972141795133, "grad_norm": 0.0, - "learning_rate": 1.875545093332211e-05, - "loss": 0.8786, + "learning_rate": 1.743023274704864e-05, + "loss": 1.0983, "step": 6553 }, { - "epoch": 0.18572359658817195, + "epoch": 0.2564363408717427, "grad_norm": 0.0, - "learning_rate": 1.8755007479038038e-05, - "loss": 0.9778, + "learning_rate": 1.7429384573227747e-05, + "loss": 1.1915, "step": 6554 }, { - "epoch": 0.1857519340304344, + "epoch": 0.25647546756397216, "grad_norm": 0.0, - "learning_rate": 1.8754563951007286e-05, - "loss": 1.1248, + "learning_rate": 1.7428536280100243e-05, + "loss": 1.0701, "step": 6555 }, { - "epoch": 0.18578027147269688, + "epoch": 0.2565145942562016, "grad_norm": 0.0, - "learning_rate": 1.8754120349233594e-05, - "loss": 0.9733, + "learning_rate": 1.7427687867679755e-05, + "loss": 1.0816, "step": 6556 }, { - "epoch": 0.18580860891495934, + "epoch": 0.25655372094843104, "grad_norm": 0.0, - "learning_rate": 1.875367667372069e-05, - "loss": 0.9804, + "learning_rate": 1.7426839335979912e-05, + "loss": 1.0557, "step": 6557 }, { - "epoch": 0.1858369463572218, + "epoch": 0.2565928476406605, "grad_norm": 0.0, - "learning_rate": 1.875323292447232e-05, - "loss": 0.9543, + "learning_rate": 1.742599068501433e-05, + "loss": 1.1256, "step": 6558 }, { - "epoch": 0.18586528379948425, + "epoch": 0.2566319743328899, "grad_norm": 0.0, - "learning_rate": 1.8752789101492217e-05, - "loss": 1.0172, + "learning_rate": 1.7425141914796646e-05, + "loss": 1.121, "step": 6559 }, { - "epoch": 0.18589362124174671, + "epoch": 0.25667110102511936, "grad_norm": 0.0, - "learning_rate": 1.8752345204784123e-05, - "loss": 1.0403, + "learning_rate": 1.7424293025340488e-05, + "loss": 1.0976, "step": 6560 }, { - "epoch": 0.18592195868400918, + "epoch": 0.25671022771734875, "grad_norm": 0.0, - "learning_rate": 1.8751901234351775e-05, - "loss": 1.0701, + "learning_rate": 1.742344401665948e-05, + "loss": 1.2392, "step": 6561 }, { - "epoch": 0.18595029612627165, + "epoch": 0.2567493544095782, "grad_norm": 0.0, - "learning_rate": 1.875145719019891e-05, - "loss": 0.9622, + "learning_rate": 1.742259488876727e-05, + "loss": 1.1385, "step": 6562 }, { - "epoch": 0.1859786335685341, + "epoch": 0.25678848110180763, "grad_norm": 0.0, - "learning_rate": 1.8751013072329274e-05, - "loss": 0.942, + "learning_rate": 1.7421745641677486e-05, + "loss": 1.1395, "step": 6563 }, { - "epoch": 0.18600697101079658, + "epoch": 0.25682760779403707, "grad_norm": 0.0, - "learning_rate": 1.8750568880746606e-05, - "loss": 0.9396, + "learning_rate": 1.7420896275403768e-05, + "loss": 1.0833, "step": 6564 }, { - "epoch": 0.18603530845305902, + "epoch": 0.2568667344862665, "grad_norm": 0.0, - "learning_rate": 1.8750124615454645e-05, - "loss": 1.0222, + "learning_rate": 1.7420046789959754e-05, + "loss": 1.1816, "step": 6565 }, { - "epoch": 0.18606364589532148, + "epoch": 0.25690586117849595, "grad_norm": 0.0, - "learning_rate": 1.8749680276457137e-05, - "loss": 0.9679, + "learning_rate": 1.7419197185359086e-05, + "loss": 0.9876, "step": 6566 }, { - "epoch": 0.18609198333758395, + "epoch": 0.2569449878707254, "grad_norm": 0.0, - "learning_rate": 1.8749235863757822e-05, - "loss": 1.061, + "learning_rate": 1.7418347461615407e-05, + "loss": 1.0669, "step": 6567 }, { - "epoch": 0.18612032077984642, + "epoch": 0.25698411456295484, "grad_norm": 0.0, - "learning_rate": 1.8748791377360443e-05, - "loss": 1.0083, + "learning_rate": 1.7417497618742364e-05, + "loss": 1.114, "step": 6568 }, { - "epoch": 0.18614865822210888, + "epoch": 0.2570232412551843, "grad_norm": 0.0, - "learning_rate": 1.874834681726875e-05, - "loss": 1.0446, + "learning_rate": 1.7416647656753603e-05, + "loss": 1.1224, "step": 6569 }, { - "epoch": 0.18617699566437135, + "epoch": 0.2570623679474137, "grad_norm": 0.0, - "learning_rate": 1.874790218348648e-05, - "loss": 0.9997, + "learning_rate": 1.7415797575662777e-05, + "loss": 1.1028, "step": 6570 }, { - "epoch": 0.18620533310663379, + "epoch": 0.25710149463964316, "grad_norm": 0.0, - "learning_rate": 1.874745747601738e-05, - "loss": 1.0153, + "learning_rate": 1.741494737548353e-05, + "loss": 1.0261, "step": 6571 }, { - "epoch": 0.18623367054889625, + "epoch": 0.2571406213318726, "grad_norm": 0.0, - "learning_rate": 1.8747012694865197e-05, - "loss": 1.0869, + "learning_rate": 1.7414097056229525e-05, + "loss": 1.0966, "step": 6572 }, { - "epoch": 0.18626200799115872, + "epoch": 0.25717974802410204, "grad_norm": 0.0, - "learning_rate": 1.874656784003368e-05, - "loss": 0.9807, + "learning_rate": 1.7413246617914408e-05, + "loss": 1.1824, "step": 6573 }, { - "epoch": 0.18629034543342118, + "epoch": 0.2572188747163315, "grad_norm": 0.0, - "learning_rate": 1.8746122911526575e-05, - "loss": 1.0632, + "learning_rate": 1.7412396060551843e-05, + "loss": 1.1059, "step": 6574 }, { - "epoch": 0.18631868287568365, + "epoch": 0.2572580014085609, "grad_norm": 0.0, - "learning_rate": 1.8745677909347626e-05, - "loss": 0.9748, + "learning_rate": 1.741154538415548e-05, + "loss": 1.296, "step": 6575 }, { - "epoch": 0.18634702031794612, + "epoch": 0.25729712810079036, "grad_norm": 0.0, - "learning_rate": 1.8745232833500585e-05, - "loss": 1.0529, + "learning_rate": 1.741069458873899e-05, + "loss": 1.1234, "step": 6576 }, { - "epoch": 0.18637535776020855, + "epoch": 0.2573362547930198, "grad_norm": 0.0, - "learning_rate": 1.8744787683989203e-05, - "loss": 1.147, + "learning_rate": 1.7409843674316026e-05, + "loss": 1.0905, "step": 6577 }, { - "epoch": 0.18640369520247102, + "epoch": 0.25737538148524924, "grad_norm": 0.0, - "learning_rate": 1.8744342460817227e-05, - "loss": 0.9824, + "learning_rate": 1.7408992640900263e-05, + "loss": 1.1157, "step": 6578 }, { - "epoch": 0.1864320326447335, + "epoch": 0.2574145081774787, "grad_norm": 0.0, - "learning_rate": 1.87438971639884e-05, - "loss": 1.0659, + "learning_rate": 1.740814148850536e-05, + "loss": 1.1, "step": 6579 }, { - "epoch": 0.18646037008699595, + "epoch": 0.2574536348697081, "grad_norm": 0.0, - "learning_rate": 1.874345179350649e-05, - "loss": 1.0281, + "learning_rate": 1.7407290217144988e-05, + "loss": 1.1888, "step": 6580 }, { - "epoch": 0.18648870752925842, + "epoch": 0.25749276156193757, "grad_norm": 0.0, - "learning_rate": 1.8743006349375236e-05, - "loss": 0.9777, + "learning_rate": 1.7406438826832818e-05, + "loss": 1.2493, "step": 6581 }, { - "epoch": 0.18651704497152086, + "epoch": 0.257531888254167, "grad_norm": 0.0, - "learning_rate": 1.874256083159839e-05, - "loss": 1.1229, + "learning_rate": 1.740558731758252e-05, + "loss": 1.1855, "step": 6582 }, { - "epoch": 0.18654538241378332, + "epoch": 0.25757101494639645, "grad_norm": 0.0, - "learning_rate": 1.874211524017971e-05, - "loss": 0.9545, + "learning_rate": 1.7404735689407768e-05, + "loss": 0.9633, "step": 6583 }, { - "epoch": 0.1865737198560458, + "epoch": 0.2576101416386259, "grad_norm": 0.0, - "learning_rate": 1.8741669575122946e-05, - "loss": 1.0627, + "learning_rate": 1.740388394232224e-05, + "loss": 1.0888, "step": 6584 }, { - "epoch": 0.18660205729830825, + "epoch": 0.25764926833085533, "grad_norm": 0.0, - "learning_rate": 1.874122383643185e-05, - "loss": 0.9307, + "learning_rate": 1.7403032076339617e-05, + "loss": 1.1726, "step": 6585 }, { - "epoch": 0.18663039474057072, + "epoch": 0.25768839502308477, "grad_norm": 0.0, - "learning_rate": 1.8740778024110188e-05, - "loss": 0.9878, + "learning_rate": 1.7402180091473574e-05, + "loss": 1.1573, "step": 6586 }, { - "epoch": 0.1866587321828332, + "epoch": 0.2577275217153142, "grad_norm": 0.0, - "learning_rate": 1.87403321381617e-05, - "loss": 1.0996, + "learning_rate": 1.740132798773779e-05, + "loss": 1.3249, "step": 6587 }, { - "epoch": 0.18668706962509563, + "epoch": 0.25776664840754365, "grad_norm": 0.0, - "learning_rate": 1.873988617859015e-05, - "loss": 1.0532, + "learning_rate": 1.7400475765145958e-05, + "loss": 1.0158, "step": 6588 }, { - "epoch": 0.1867154070673581, + "epoch": 0.25780577509977304, "grad_norm": 0.0, - "learning_rate": 1.8739440145399295e-05, - "loss": 0.9225, + "learning_rate": 1.7399623423711757e-05, + "loss": 1.1605, "step": 6589 }, { - "epoch": 0.18674374450962056, + "epoch": 0.2578449017920025, "grad_norm": 0.0, - "learning_rate": 1.873899403859289e-05, - "loss": 1.0802, + "learning_rate": 1.7398770963448872e-05, + "loss": 1.0251, "step": 6590 }, { - "epoch": 0.18677208195188302, + "epoch": 0.2578840284842319, "grad_norm": 0.0, - "learning_rate": 1.8738547858174692e-05, - "loss": 1.0759, + "learning_rate": 1.7397918384371003e-05, + "loss": 1.1491, "step": 6591 }, { - "epoch": 0.1868004193941455, + "epoch": 0.25792315517646136, "grad_norm": 0.0, - "learning_rate": 1.873810160414846e-05, - "loss": 0.9566, + "learning_rate": 1.739706568649183e-05, + "loss": 1.072, "step": 6592 }, { - "epoch": 0.18682875683640796, + "epoch": 0.2579622818686908, "grad_norm": 0.0, - "learning_rate": 1.8737655276517953e-05, - "loss": 0.9831, + "learning_rate": 1.7396212869825053e-05, + "loss": 1.1005, "step": 6593 }, { - "epoch": 0.1868570942786704, + "epoch": 0.25800140856092024, "grad_norm": 0.0, - "learning_rate": 1.8737208875286933e-05, - "loss": 0.9327, + "learning_rate": 1.7395359934384366e-05, + "loss": 1.1695, "step": 6594 }, { - "epoch": 0.18688543172093286, + "epoch": 0.2580405352531497, "grad_norm": 0.0, - "learning_rate": 1.8736762400459155e-05, - "loss": 1.0853, + "learning_rate": 1.7394506880183463e-05, + "loss": 1.1796, "step": 6595 }, { - "epoch": 0.18691376916319533, + "epoch": 0.2580796619453791, "grad_norm": 0.0, - "learning_rate": 1.8736315852038387e-05, - "loss": 1.0217, + "learning_rate": 1.7393653707236045e-05, + "loss": 1.1927, "step": 6596 }, { - "epoch": 0.1869421066054578, + "epoch": 0.25811878863760856, "grad_norm": 0.0, - "learning_rate": 1.873586923002838e-05, - "loss": 1.0776, + "learning_rate": 1.7392800415555818e-05, + "loss": 1.0897, "step": 6597 }, { - "epoch": 0.18697044404772026, + "epoch": 0.258157915329838, "grad_norm": 0.0, - "learning_rate": 1.873542253443291e-05, - "loss": 1.14, + "learning_rate": 1.7391947005156476e-05, + "loss": 1.1991, "step": 6598 }, { - "epoch": 0.18699878148998272, + "epoch": 0.25819704202206745, "grad_norm": 0.0, - "learning_rate": 1.8734975765255732e-05, - "loss": 1.0735, + "learning_rate": 1.7391093476051728e-05, + "loss": 1.1423, "step": 6599 }, { - "epoch": 0.18702711893224516, + "epoch": 0.2582361687142969, "grad_norm": 0.0, - "learning_rate": 1.8734528922500606e-05, - "loss": 0.9486, + "learning_rate": 1.7390239828255282e-05, + "loss": 1.1162, "step": 6600 }, { - "epoch": 0.18705545637450763, + "epoch": 0.25827529540652633, "grad_norm": 0.0, - "learning_rate": 1.87340820061713e-05, - "loss": 0.8844, + "learning_rate": 1.7389386061780843e-05, + "loss": 1.2562, "step": 6601 }, { - "epoch": 0.1870837938167701, + "epoch": 0.25831442209875577, "grad_norm": 0.0, - "learning_rate": 1.8733635016271576e-05, - "loss": 1.0602, + "learning_rate": 1.7388532176642127e-05, + "loss": 1.0165, "step": 6602 }, { - "epoch": 0.18711213125903256, + "epoch": 0.2583535487909852, "grad_norm": 0.0, - "learning_rate": 1.8733187952805204e-05, - "loss": 1.0258, + "learning_rate": 1.7387678172852837e-05, + "loss": 1.1005, "step": 6603 }, { - "epoch": 0.18714046870129503, + "epoch": 0.25839267548321465, "grad_norm": 0.0, - "learning_rate": 1.8732740815775945e-05, - "loss": 1.0905, + "learning_rate": 1.7386824050426697e-05, + "loss": 1.0551, "step": 6604 }, { - "epoch": 0.1871688061435575, + "epoch": 0.2584318021754441, "grad_norm": 0.0, - "learning_rate": 1.873229360518757e-05, - "loss": 0.9792, + "learning_rate": 1.7385969809377418e-05, + "loss": 1.183, "step": 6605 }, { - "epoch": 0.18719714358581993, + "epoch": 0.25847092886767353, "grad_norm": 0.0, - "learning_rate": 1.8731846321043842e-05, - "loss": 0.9874, + "learning_rate": 1.7385115449718718e-05, + "loss": 1.3622, "step": 6606 }, { - "epoch": 0.1872254810280824, + "epoch": 0.258510055559903, "grad_norm": 0.0, - "learning_rate": 1.8731398963348527e-05, - "loss": 0.9756, + "learning_rate": 1.738426097146432e-05, + "loss": 1.0568, "step": 6607 }, { - "epoch": 0.18725381847034486, + "epoch": 0.2585491822521324, "grad_norm": 0.0, - "learning_rate": 1.8730951532105403e-05, - "loss": 1.0274, + "learning_rate": 1.738340637462794e-05, + "loss": 1.1768, "step": 6608 }, { - "epoch": 0.18728215591260733, + "epoch": 0.25858830894436186, "grad_norm": 0.0, - "learning_rate": 1.8730504027318223e-05, - "loss": 1.0139, + "learning_rate": 1.738255165922331e-05, + "loss": 1.0323, "step": 6609 }, { - "epoch": 0.1873104933548698, + "epoch": 0.2586274356365913, "grad_norm": 0.0, - "learning_rate": 1.873005644899077e-05, - "loss": 0.9696, + "learning_rate": 1.738169682526415e-05, + "loss": 0.9881, "step": 6610 }, { - "epoch": 0.18733883079713226, + "epoch": 0.25866656232882074, "grad_norm": 0.0, - "learning_rate": 1.8729608797126813e-05, - "loss": 1.0735, + "learning_rate": 1.7380841872764185e-05, + "loss": 1.0492, "step": 6611 }, { - "epoch": 0.1873671682393947, + "epoch": 0.2587056890210502, "grad_norm": 0.0, - "learning_rate": 1.8729161071730115e-05, - "loss": 1.0267, + "learning_rate": 1.7379986801737154e-05, + "loss": 1.1525, "step": 6612 }, { - "epoch": 0.18739550568165717, + "epoch": 0.2587448157132796, "grad_norm": 0.0, - "learning_rate": 1.8728713272804455e-05, - "loss": 0.9765, + "learning_rate": 1.737913161219678e-05, + "loss": 1.2635, "step": 6613 }, { - "epoch": 0.18742384312391963, + "epoch": 0.25878394240550906, "grad_norm": 0.0, - "learning_rate": 1.87282654003536e-05, - "loss": 1.0523, + "learning_rate": 1.73782763041568e-05, + "loss": 1.0284, "step": 6614 }, { - "epoch": 0.1874521805661821, + "epoch": 0.2588230690977385, "grad_norm": 0.0, - "learning_rate": 1.8727817454381325e-05, - "loss": 1.066, + "learning_rate": 1.7377420877630947e-05, + "loss": 0.979, "step": 6615 }, { - "epoch": 0.18748051800844456, + "epoch": 0.25886219578996794, "grad_norm": 0.0, - "learning_rate": 1.87273694348914e-05, - "loss": 1.0497, + "learning_rate": 1.737656533263296e-05, + "loss": 1.2404, "step": 6616 }, { - "epoch": 0.18750885545070703, + "epoch": 0.2589013224821974, "grad_norm": 0.0, - "learning_rate": 1.8726921341887606e-05, - "loss": 1.0209, + "learning_rate": 1.7375709669176572e-05, + "loss": 1.1646, "step": 6617 }, { - "epoch": 0.18753719289296947, + "epoch": 0.25894044917442677, "grad_norm": 0.0, - "learning_rate": 1.872647317537371e-05, - "loss": 0.9639, + "learning_rate": 1.7374853887275533e-05, + "loss": 1.0869, "step": 6618 }, { - "epoch": 0.18756553033523193, + "epoch": 0.2589795758666562, "grad_norm": 0.0, - "learning_rate": 1.872602493535349e-05, - "loss": 1.064, + "learning_rate": 1.737399798694358e-05, + "loss": 1.2184, "step": 6619 }, { - "epoch": 0.1875938677774944, + "epoch": 0.25901870255888565, "grad_norm": 0.0, - "learning_rate": 1.8725576621830722e-05, - "loss": 1.0962, + "learning_rate": 1.737314196819446e-05, + "loss": 1.2767, "step": 6620 }, { - "epoch": 0.18762220521975687, + "epoch": 0.2590578292511151, "grad_norm": 0.0, - "learning_rate": 1.8725128234809183e-05, - "loss": 1.0491, + "learning_rate": 1.7372285831041923e-05, + "loss": 1.086, "step": 6621 }, { - "epoch": 0.18765054266201933, + "epoch": 0.25909695594334453, "grad_norm": 0.0, - "learning_rate": 1.8724679774292648e-05, - "loss": 0.9122, + "learning_rate": 1.737142957549971e-05, + "loss": 1.2015, "step": 6622 }, { - "epoch": 0.1876788801042818, + "epoch": 0.25913608263557397, "grad_norm": 0.0, - "learning_rate": 1.8724231240284895e-05, - "loss": 0.9707, + "learning_rate": 1.737057320158158e-05, + "loss": 1.3355, "step": 6623 }, { - "epoch": 0.18770721754654424, + "epoch": 0.2591752093278034, "grad_norm": 0.0, - "learning_rate": 1.87237826327897e-05, - "loss": 0.9658, + "learning_rate": 1.7369716709301275e-05, + "loss": 1.1996, "step": 6624 }, { - "epoch": 0.1877355549888067, + "epoch": 0.25921433602003285, "grad_norm": 0.0, - "learning_rate": 1.872333395181085e-05, - "loss": 1.0884, + "learning_rate": 1.736886009867255e-05, + "loss": 1.0744, "step": 6625 }, { - "epoch": 0.18776389243106917, + "epoch": 0.2592534627122623, "grad_norm": 0.0, - "learning_rate": 1.8722885197352113e-05, - "loss": 0.9359, + "learning_rate": 1.7368003369709175e-05, + "loss": 1.0771, "step": 6626 }, { - "epoch": 0.18779222987333163, + "epoch": 0.25929258940449174, "grad_norm": 0.0, - "learning_rate": 1.872243636941728e-05, - "loss": 0.9868, + "learning_rate": 1.7367146522424895e-05, + "loss": 1.2311, "step": 6627 }, { - "epoch": 0.1878205673155941, + "epoch": 0.2593317160967212, "grad_norm": 0.0, - "learning_rate": 1.8721987468010124e-05, - "loss": 1.0947, + "learning_rate": 1.7366289556833473e-05, + "loss": 1.2138, "step": 6628 }, { - "epoch": 0.18784890475785657, + "epoch": 0.2593708427889506, "grad_norm": 0.0, - "learning_rate": 1.8721538493134428e-05, - "loss": 1.0593, + "learning_rate": 1.736543247294867e-05, + "loss": 1.1292, "step": 6629 }, { - "epoch": 0.187877242200119, + "epoch": 0.25940996948118006, "grad_norm": 0.0, - "learning_rate": 1.8721089444793978e-05, - "loss": 0.9029, + "learning_rate": 1.736457527078425e-05, + "loss": 1.1911, "step": 6630 }, { - "epoch": 0.18790557964238147, + "epoch": 0.2594490961734095, "grad_norm": 0.0, - "learning_rate": 1.872064032299255e-05, - "loss": 1.0421, + "learning_rate": 1.7363717950353983e-05, + "loss": 1.1313, "step": 6631 }, { - "epoch": 0.18793391708464394, + "epoch": 0.25948822286563894, "grad_norm": 0.0, - "learning_rate": 1.872019112773393e-05, - "loss": 1.032, + "learning_rate": 1.7362860511671634e-05, + "loss": 1.1499, "step": 6632 }, { - "epoch": 0.1879622545269064, + "epoch": 0.2595273495578684, "grad_norm": 0.0, - "learning_rate": 1.8719741859021904e-05, - "loss": 1.0171, + "learning_rate": 1.7362002954750967e-05, + "loss": 1.0535, "step": 6633 }, { - "epoch": 0.18799059196916887, + "epoch": 0.2595664762500978, "grad_norm": 0.0, - "learning_rate": 1.8719292516860253e-05, - "loss": 1.041, + "learning_rate": 1.736114527960576e-05, + "loss": 1.1465, "step": 6634 }, { - "epoch": 0.18801892941143133, + "epoch": 0.25960560294232726, "grad_norm": 0.0, - "learning_rate": 1.8718843101252765e-05, - "loss": 0.9915, + "learning_rate": 1.7360287486249782e-05, + "loss": 1.1168, "step": 6635 }, { - "epoch": 0.18804726685369377, + "epoch": 0.2596447296345567, "grad_norm": 0.0, - "learning_rate": 1.871839361220322e-05, - "loss": 0.9926, + "learning_rate": 1.7359429574696813e-05, + "loss": 1.0954, "step": 6636 }, { - "epoch": 0.18807560429595624, + "epoch": 0.25968385632678614, "grad_norm": 0.0, - "learning_rate": 1.8717944049715412e-05, - "loss": 1.1075, + "learning_rate": 1.7358571544960623e-05, + "loss": 1.0877, "step": 6637 }, { - "epoch": 0.1881039417382187, + "epoch": 0.2597229830190156, "grad_norm": 0.0, - "learning_rate": 1.8717494413793123e-05, - "loss": 1.0602, + "learning_rate": 1.7357713397054995e-05, + "loss": 1.1899, "step": 6638 }, { - "epoch": 0.18813227918048117, + "epoch": 0.259762109711245, "grad_norm": 0.0, - "learning_rate": 1.871704470444014e-05, - "loss": 1.0309, + "learning_rate": 1.7356855130993713e-05, + "loss": 1.1289, "step": 6639 }, { - "epoch": 0.18816061662274364, + "epoch": 0.25980123640347447, "grad_norm": 0.0, - "learning_rate": 1.8716594921660253e-05, - "loss": 1.0683, + "learning_rate": 1.7355996746790556e-05, + "loss": 1.2284, "step": 6640 }, { - "epoch": 0.1881889540650061, + "epoch": 0.2598403630957039, "grad_norm": 0.0, - "learning_rate": 1.871614506545725e-05, - "loss": 1.03, + "learning_rate": 1.7355138244459306e-05, + "loss": 0.9862, "step": 6641 }, { - "epoch": 0.18821729150726854, + "epoch": 0.25987948978793335, "grad_norm": 0.0, - "learning_rate": 1.871569513583492e-05, - "loss": 0.9185, + "learning_rate": 1.7354279624013753e-05, + "loss": 1.1306, "step": 6642 }, { - "epoch": 0.188245628949531, + "epoch": 0.2599186164801628, "grad_norm": 0.0, - "learning_rate": 1.8715245132797056e-05, - "loss": 1.0316, + "learning_rate": 1.7353420885467688e-05, + "loss": 1.2341, "step": 6643 }, { - "epoch": 0.18827396639179347, + "epoch": 0.25995774317239223, "grad_norm": 0.0, - "learning_rate": 1.8714795056347443e-05, - "loss": 1.0402, + "learning_rate": 1.7352562028834895e-05, + "loss": 1.2251, "step": 6644 }, { - "epoch": 0.18830230383405594, + "epoch": 0.25999686986462167, "grad_norm": 0.0, - "learning_rate": 1.8714344906489878e-05, - "loss": 1.0761, + "learning_rate": 1.735170305412917e-05, + "loss": 1.1242, "step": 6645 }, { - "epoch": 0.1883306412763184, + "epoch": 0.26003599655685106, "grad_norm": 0.0, - "learning_rate": 1.8713894683228147e-05, - "loss": 1.0263, + "learning_rate": 1.7350843961364307e-05, + "loss": 1.129, "step": 6646 }, { - "epoch": 0.18835897871858087, + "epoch": 0.2600751232490805, "grad_norm": 0.0, - "learning_rate": 1.8713444386566047e-05, - "loss": 0.9859, + "learning_rate": 1.73499847505541e-05, + "loss": 1.1502, "step": 6647 }, { - "epoch": 0.1883873161608433, + "epoch": 0.26011424994130994, "grad_norm": 0.0, - "learning_rate": 1.871299401650737e-05, - "loss": 1.0433, + "learning_rate": 1.7349125421712346e-05, + "loss": 1.1144, "step": 6648 }, { - "epoch": 0.18841565360310578, + "epoch": 0.2601533766335394, "grad_norm": 0.0, - "learning_rate": 1.8712543573055907e-05, - "loss": 0.9197, + "learning_rate": 1.7348265974852847e-05, + "loss": 1.084, "step": 6649 }, { - "epoch": 0.18844399104536824, + "epoch": 0.2601925033257688, "grad_norm": 0.0, - "learning_rate": 1.8712093056215453e-05, - "loss": 0.956, + "learning_rate": 1.7347406409989407e-05, + "loss": 1.1658, "step": 6650 }, { - "epoch": 0.1884723284876307, + "epoch": 0.26023163001799826, "grad_norm": 0.0, - "learning_rate": 1.8711642465989806e-05, - "loss": 0.9458, + "learning_rate": 1.7346546727135823e-05, + "loss": 1.0538, "step": 6651 }, { - "epoch": 0.18850066592989317, + "epoch": 0.2602707567102277, "grad_norm": 0.0, - "learning_rate": 1.871119180238276e-05, - "loss": 0.9609, + "learning_rate": 1.7345686926305908e-05, + "loss": 1.1579, "step": 6652 }, { - "epoch": 0.18852900337215564, + "epoch": 0.26030988340245714, "grad_norm": 0.0, - "learning_rate": 1.8710741065398112e-05, - "loss": 1.0028, + "learning_rate": 1.7344827007513464e-05, + "loss": 1.0239, "step": 6653 }, { - "epoch": 0.18855734081441808, + "epoch": 0.2603490100946866, "grad_norm": 0.0, - "learning_rate": 1.8710290255039654e-05, - "loss": 1.0254, + "learning_rate": 1.7343966970772303e-05, + "loss": 1.162, "step": 6654 }, { - "epoch": 0.18858567825668054, + "epoch": 0.260388136786916, "grad_norm": 0.0, - "learning_rate": 1.870983937131119e-05, - "loss": 1.1281, + "learning_rate": 1.7343106816096234e-05, + "loss": 0.9659, "step": 6655 }, { - "epoch": 0.188614015698943, + "epoch": 0.26042726347914547, "grad_norm": 0.0, - "learning_rate": 1.870938841421651e-05, - "loss": 1.0001, + "learning_rate": 1.7342246543499074e-05, + "loss": 1.2029, "step": 6656 }, { - "epoch": 0.18864235314120548, + "epoch": 0.2604663901713749, "grad_norm": 0.0, - "learning_rate": 1.870893738375942e-05, - "loss": 1.0669, + "learning_rate": 1.734138615299463e-05, + "loss": 0.9999, "step": 6657 }, { - "epoch": 0.18867069058346794, + "epoch": 0.26050551686360435, "grad_norm": 0.0, - "learning_rate": 1.870848627994372e-05, - "loss": 0.9194, + "learning_rate": 1.7340525644596728e-05, + "loss": 1.1097, "step": 6658 }, { - "epoch": 0.1886990280257304, + "epoch": 0.2605446435558338, "grad_norm": 0.0, - "learning_rate": 1.8708035102773198e-05, - "loss": 1.0083, + "learning_rate": 1.7339665018319178e-05, + "loss": 1.2263, "step": 6659 }, { - "epoch": 0.18872736546799285, + "epoch": 0.26058377024806323, "grad_norm": 0.0, - "learning_rate": 1.870758385225167e-05, - "loss": 1.0823, + "learning_rate": 1.7338804274175805e-05, + "loss": 1.1777, "step": 6660 }, { - "epoch": 0.1887557029102553, + "epoch": 0.26062289694029267, "grad_norm": 0.0, - "learning_rate": 1.8707132528382927e-05, - "loss": 0.9995, + "learning_rate": 1.7337943412180435e-05, + "loss": 1.1581, "step": 6661 }, { - "epoch": 0.18878404035251778, + "epoch": 0.2606620236325221, "grad_norm": 0.0, - "learning_rate": 1.8706681131170773e-05, - "loss": 0.8715, + "learning_rate": 1.7337082432346888e-05, + "loss": 1.0647, "step": 6662 }, { - "epoch": 0.18881237779478025, + "epoch": 0.26070115032475155, "grad_norm": 0.0, - "learning_rate": 1.870622966061901e-05, - "loss": 0.9914, + "learning_rate": 1.733622133468899e-05, + "loss": 1.1975, "step": 6663 }, { - "epoch": 0.1888407152370427, + "epoch": 0.260740277016981, "grad_norm": 0.0, - "learning_rate": 1.8705778116731443e-05, - "loss": 0.9873, + "learning_rate": 1.733536011922057e-05, + "loss": 1.0816, "step": 6664 }, { - "epoch": 0.18886905267930518, + "epoch": 0.26077940370921043, "grad_norm": 0.0, - "learning_rate": 1.8705326499511873e-05, - "loss": 1.0146, + "learning_rate": 1.733449878595546e-05, + "loss": 1.0363, "step": 6665 }, { - "epoch": 0.18889739012156762, + "epoch": 0.2608185304014399, "grad_norm": 0.0, - "learning_rate": 1.8704874808964105e-05, - "loss": 0.9007, + "learning_rate": 1.7333637334907487e-05, + "loss": 1.1218, "step": 6666 }, { - "epoch": 0.18892572756383008, + "epoch": 0.2608576570936693, "grad_norm": 0.0, - "learning_rate": 1.8704423045091948e-05, - "loss": 0.9739, + "learning_rate": 1.7332775766090492e-05, + "loss": 1.1593, "step": 6667 }, { - "epoch": 0.18895406500609255, + "epoch": 0.26089678378589876, "grad_norm": 0.0, - "learning_rate": 1.87039712078992e-05, - "loss": 0.9584, + "learning_rate": 1.7331914079518305e-05, + "loss": 1.2074, "step": 6668 }, { - "epoch": 0.188982402448355, + "epoch": 0.2609359104781282, "grad_norm": 0.0, - "learning_rate": 1.870351929738967e-05, - "loss": 0.9591, + "learning_rate": 1.733105227520476e-05, + "loss": 1.1467, "step": 6669 }, { - "epoch": 0.18901073989061748, + "epoch": 0.26097503717035764, "grad_norm": 0.0, - "learning_rate": 1.8703067313567166e-05, - "loss": 0.9989, + "learning_rate": 1.733019035316371e-05, + "loss": 1.187, "step": 6670 }, { - "epoch": 0.18903907733287995, + "epoch": 0.2610141638625871, "grad_norm": 0.0, - "learning_rate": 1.8702615256435495e-05, - "loss": 0.9837, + "learning_rate": 1.7329328313408984e-05, + "loss": 1.1909, "step": 6671 }, { - "epoch": 0.18906741477514238, + "epoch": 0.2610532905548165, "grad_norm": 0.0, - "learning_rate": 1.8702163125998462e-05, - "loss": 0.9364, + "learning_rate": 1.7328466155954428e-05, + "loss": 1.1895, "step": 6672 }, { - "epoch": 0.18909575221740485, + "epoch": 0.26109241724704596, "grad_norm": 0.0, - "learning_rate": 1.870171092225988e-05, - "loss": 0.9362, + "learning_rate": 1.7327603880813893e-05, + "loss": 1.1223, "step": 6673 }, { - "epoch": 0.18912408965966732, + "epoch": 0.2611315439392754, "grad_norm": 0.0, - "learning_rate": 1.8701258645223555e-05, - "loss": 1.0687, + "learning_rate": 1.732674148800122e-05, + "loss": 1.1246, "step": 6674 }, { - "epoch": 0.18915242710192978, + "epoch": 0.2611706706315048, "grad_norm": 0.0, - "learning_rate": 1.8700806294893296e-05, - "loss": 1.0142, + "learning_rate": 1.7325878977530258e-05, + "loss": 1.1206, "step": 6675 }, { - "epoch": 0.18918076454419225, + "epoch": 0.2612097973237342, "grad_norm": 0.0, - "learning_rate": 1.8700353871272915e-05, - "loss": 0.9825, + "learning_rate": 1.7325016349414858e-05, + "loss": 1.1287, "step": 6676 }, { - "epoch": 0.18920910198645471, + "epoch": 0.26124892401596367, "grad_norm": 0.0, - "learning_rate": 1.8699901374366224e-05, - "loss": 1.0449, + "learning_rate": 1.732415360366888e-05, + "loss": 1.1027, "step": 6677 }, { - "epoch": 0.18923743942871715, + "epoch": 0.2612880507081931, "grad_norm": 0.0, - "learning_rate": 1.8699448804177028e-05, - "loss": 1.0262, + "learning_rate": 1.732329074030617e-05, + "loss": 1.204, "step": 6678 }, { - "epoch": 0.18926577687097962, + "epoch": 0.26132717740042255, "grad_norm": 0.0, - "learning_rate": 1.8698996160709147e-05, - "loss": 1.0977, + "learning_rate": 1.7322427759340584e-05, + "loss": 1.1281, "step": 6679 }, { - "epoch": 0.18929411431324208, + "epoch": 0.261366304092652, "grad_norm": 0.0, - "learning_rate": 1.8698543443966395e-05, - "loss": 0.9293, + "learning_rate": 1.732156466078599e-05, + "loss": 1.1635, "step": 6680 }, { - "epoch": 0.18932245175550455, + "epoch": 0.26140543078488143, "grad_norm": 0.0, - "learning_rate": 1.869809065395258e-05, - "loss": 1.0205, + "learning_rate": 1.7320701444656235e-05, + "loss": 1.2204, "step": 6681 }, { - "epoch": 0.18935078919776702, + "epoch": 0.2614445574771109, "grad_norm": 0.0, - "learning_rate": 1.8697637790671514e-05, - "loss": 1.0232, + "learning_rate": 1.7319838110965192e-05, + "loss": 1.1428, "step": 6682 }, { - "epoch": 0.18937912664002948, + "epoch": 0.2614836841693403, "grad_norm": 0.0, - "learning_rate": 1.8697184854127015e-05, - "loss": 0.9609, + "learning_rate": 1.7318974659726722e-05, + "loss": 1.0007, "step": 6683 }, { - "epoch": 0.18940746408229192, + "epoch": 0.26152281086156975, "grad_norm": 0.0, - "learning_rate": 1.86967318443229e-05, - "loss": 1.0215, + "learning_rate": 1.731811109095469e-05, + "loss": 1.0828, "step": 6684 }, { - "epoch": 0.1894358015245544, + "epoch": 0.2615619375537992, "grad_norm": 0.0, - "learning_rate": 1.8696278761262986e-05, - "loss": 1.0681, + "learning_rate": 1.7317247404662963e-05, + "loss": 1.2355, "step": 6685 }, { - "epoch": 0.18946413896681685, + "epoch": 0.26160106424602864, "grad_norm": 0.0, - "learning_rate": 1.8695825604951083e-05, - "loss": 1.1314, + "learning_rate": 1.731638360086541e-05, + "loss": 1.0446, "step": 6686 }, { - "epoch": 0.18949247640907932, + "epoch": 0.2616401909382581, "grad_norm": 0.0, - "learning_rate": 1.8695372375391013e-05, - "loss": 0.9563, + "learning_rate": 1.7315519679575905e-05, + "loss": 1.2053, "step": 6687 }, { - "epoch": 0.18952081385134179, + "epoch": 0.2616793176304875, "grad_norm": 0.0, - "learning_rate": 1.8694919072586595e-05, - "loss": 1.076, + "learning_rate": 1.731465564080832e-05, + "loss": 1.1868, "step": 6688 }, { - "epoch": 0.18954915129360425, + "epoch": 0.26171844432271696, "grad_norm": 0.0, - "learning_rate": 1.8694465696541643e-05, - "loss": 0.9604, + "learning_rate": 1.7313791484576533e-05, + "loss": 1.3285, "step": 6689 }, { - "epoch": 0.1895774887358667, + "epoch": 0.2617575710149464, "grad_norm": 0.0, - "learning_rate": 1.8694012247259977e-05, - "loss": 0.9033, + "learning_rate": 1.731292721089442e-05, + "loss": 1.0479, "step": 6690 }, { - "epoch": 0.18960582617812916, + "epoch": 0.26179669770717584, "grad_norm": 0.0, - "learning_rate": 1.8693558724745417e-05, - "loss": 1.0011, + "learning_rate": 1.731206281977586e-05, + "loss": 1.1077, "step": 6691 }, { - "epoch": 0.18963416362039162, + "epoch": 0.2618358243994053, "grad_norm": 0.0, - "learning_rate": 1.8693105129001786e-05, - "loss": 1.0041, + "learning_rate": 1.7311198311234734e-05, + "loss": 1.0486, "step": 6692 }, { - "epoch": 0.1896625010626541, + "epoch": 0.2618749510916347, "grad_norm": 0.0, - "learning_rate": 1.86926514600329e-05, - "loss": 1.0047, + "learning_rate": 1.7310333685284924e-05, + "loss": 1.1622, "step": 6693 }, { - "epoch": 0.18969083850491655, + "epoch": 0.26191407778386416, "grad_norm": 0.0, - "learning_rate": 1.8692197717842583e-05, - "loss": 1.0811, + "learning_rate": 1.7309468941940312e-05, + "loss": 1.233, "step": 6694 }, { - "epoch": 0.18971917594717902, + "epoch": 0.2619532044760936, "grad_norm": 0.0, - "learning_rate": 1.869174390243466e-05, - "loss": 0.9352, + "learning_rate": 1.7308604081214793e-05, + "loss": 1.2091, "step": 6695 }, { - "epoch": 0.18974751338944146, + "epoch": 0.26199233116832304, "grad_norm": 0.0, - "learning_rate": 1.8691290013812943e-05, - "loss": 1.0889, + "learning_rate": 1.7307739103122247e-05, + "loss": 1.2341, "step": 6696 }, { - "epoch": 0.18977585083170392, + "epoch": 0.2620314578605525, "grad_norm": 0.0, - "learning_rate": 1.869083605198127e-05, - "loss": 1.0464, + "learning_rate": 1.730687400767657e-05, + "loss": 1.2513, "step": 6697 }, { - "epoch": 0.1898041882739664, + "epoch": 0.2620705845527819, "grad_norm": 0.0, - "learning_rate": 1.8690382016943455e-05, - "loss": 0.9091, + "learning_rate": 1.730600879489165e-05, + "loss": 0.9594, "step": 6698 }, { - "epoch": 0.18983252571622886, + "epoch": 0.26210971124501137, "grad_norm": 0.0, - "learning_rate": 1.8689927908703325e-05, - "loss": 1.0464, + "learning_rate": 1.7305143464781387e-05, + "loss": 1.1366, "step": 6699 }, { - "epoch": 0.18986086315849132, + "epoch": 0.2621488379372408, "grad_norm": 0.0, - "learning_rate": 1.8689473727264705e-05, - "loss": 1.0135, + "learning_rate": 1.7304278017359672e-05, + "loss": 1.0854, "step": 6700 }, { - "epoch": 0.1898892006007538, + "epoch": 0.26218796462947025, "grad_norm": 0.0, - "learning_rate": 1.8689019472631424e-05, - "loss": 1.1419, + "learning_rate": 1.7303412452640404e-05, + "loss": 1.0668, "step": 6701 }, { - "epoch": 0.18991753804301623, + "epoch": 0.2622270913216997, "grad_norm": 0.0, - "learning_rate": 1.8688565144807303e-05, - "loss": 0.9715, + "learning_rate": 1.730254677063749e-05, + "loss": 1.1745, "step": 6702 }, { - "epoch": 0.1899458754852787, + "epoch": 0.2622662180139291, "grad_norm": 0.0, - "learning_rate": 1.8688110743796174e-05, - "loss": 1.1027, + "learning_rate": 1.7301680971364817e-05, + "loss": 1.1131, "step": 6703 }, { - "epoch": 0.18997421292754116, + "epoch": 0.2623053447061585, "grad_norm": 0.0, - "learning_rate": 1.8687656269601856e-05, - "loss": 0.9724, + "learning_rate": 1.73008150548363e-05, + "loss": 1.168, "step": 6704 }, { - "epoch": 0.19000255036980362, + "epoch": 0.26234447139838796, "grad_norm": 0.0, - "learning_rate": 1.8687201722228188e-05, - "loss": 1.1088, + "learning_rate": 1.7299949021065842e-05, + "loss": 1.0228, "step": 6705 }, { - "epoch": 0.1900308878120661, + "epoch": 0.2623835980906174, "grad_norm": 0.0, - "learning_rate": 1.8686747101678994e-05, - "loss": 0.9497, + "learning_rate": 1.729908287006735e-05, + "loss": 1.1752, "step": 6706 }, { - "epoch": 0.19005922525432856, + "epoch": 0.26242272478284684, "grad_norm": 0.0, - "learning_rate": 1.86862924079581e-05, - "loss": 1.1106, + "learning_rate": 1.7298216601854734e-05, + "loss": 1.1863, "step": 6707 }, { - "epoch": 0.190087562696591, + "epoch": 0.2624618514750763, "grad_norm": 0.0, - "learning_rate": 1.8685837641069342e-05, - "loss": 1.002, + "learning_rate": 1.7297350216441903e-05, + "loss": 1.1524, "step": 6708 }, { - "epoch": 0.19011590013885346, + "epoch": 0.2625009781673057, "grad_norm": 0.0, - "learning_rate": 1.868538280101655e-05, - "loss": 1.0854, + "learning_rate": 1.7296483713842772e-05, + "loss": 1.1494, "step": 6709 }, { - "epoch": 0.19014423758111593, + "epoch": 0.26254010485953516, "grad_norm": 0.0, - "learning_rate": 1.868492788780355e-05, - "loss": 1.0592, + "learning_rate": 1.7295617094071256e-05, + "loss": 1.0369, "step": 6710 }, { - "epoch": 0.1901725750233784, + "epoch": 0.2625792315517646, "grad_norm": 0.0, - "learning_rate": 1.8684472901434178e-05, - "loss": 0.9075, + "learning_rate": 1.7294750357141273e-05, + "loss": 1.1031, "step": 6711 }, { - "epoch": 0.19020091246564086, + "epoch": 0.26261835824399404, "grad_norm": 0.0, - "learning_rate": 1.8684017841912266e-05, - "loss": 1.0649, + "learning_rate": 1.729388350306674e-05, + "loss": 1.2446, "step": 6712 }, { - "epoch": 0.19022924990790333, + "epoch": 0.2626574849362235, "grad_norm": 0.0, - "learning_rate": 1.868356270924165e-05, - "loss": 1.0265, + "learning_rate": 1.7293016531861575e-05, + "loss": 1.2765, "step": 6713 }, { - "epoch": 0.19025758735016576, + "epoch": 0.2626966116284529, "grad_norm": 0.0, - "learning_rate": 1.8683107503426158e-05, - "loss": 1.1414, + "learning_rate": 1.7292149443539706e-05, + "loss": 1.2275, "step": 6714 }, { - "epoch": 0.19028592479242823, + "epoch": 0.26273573832068237, "grad_norm": 0.0, - "learning_rate": 1.8682652224469625e-05, - "loss": 0.9534, + "learning_rate": 1.7291282238115052e-05, + "loss": 1.1304, "step": 6715 }, { - "epoch": 0.1903142622346907, + "epoch": 0.2627748650129118, "grad_norm": 0.0, - "learning_rate": 1.868219687237589e-05, - "loss": 1.1245, + "learning_rate": 1.7290414915601543e-05, + "loss": 1.2815, "step": 6716 }, { - "epoch": 0.19034259967695316, + "epoch": 0.26281399170514125, "grad_norm": 0.0, - "learning_rate": 1.8681741447148788e-05, - "loss": 0.9369, + "learning_rate": 1.7289547476013105e-05, + "loss": 1.2474, "step": 6717 }, { - "epoch": 0.19037093711921563, + "epoch": 0.2628531183973707, "grad_norm": 0.0, - "learning_rate": 1.8681285948792154e-05, - "loss": 0.9283, + "learning_rate": 1.728867991936367e-05, + "loss": 1.2285, "step": 6718 }, { - "epoch": 0.1903992745614781, + "epoch": 0.26289224508960013, "grad_norm": 0.0, - "learning_rate": 1.868083037730982e-05, - "loss": 1.0712, + "learning_rate": 1.7287812245667168e-05, + "loss": 1.0813, "step": 6719 }, { - "epoch": 0.19042761200374053, + "epoch": 0.26293137178182957, "grad_norm": 0.0, - "learning_rate": 1.8680374732705634e-05, - "loss": 1.1053, + "learning_rate": 1.7286944454937536e-05, + "loss": 1.0217, "step": 6720 }, { - "epoch": 0.190455949446003, + "epoch": 0.262970498474059, "grad_norm": 0.0, - "learning_rate": 1.867991901498343e-05, - "loss": 1.0451, + "learning_rate": 1.7286076547188703e-05, + "loss": 1.1456, "step": 6721 }, { - "epoch": 0.19048428688826546, + "epoch": 0.26300962516628845, "grad_norm": 0.0, - "learning_rate": 1.8679463224147037e-05, - "loss": 1.0748, + "learning_rate": 1.7285208522434615e-05, + "loss": 1.1633, "step": 6722 }, { - "epoch": 0.19051262433052793, + "epoch": 0.2630487518585179, "grad_norm": 0.0, - "learning_rate": 1.8679007360200304e-05, - "loss": 1.0131, + "learning_rate": 1.7284340380689203e-05, + "loss": 1.0519, "step": 6723 }, { - "epoch": 0.1905409617727904, + "epoch": 0.26308787855074733, "grad_norm": 0.0, - "learning_rate": 1.8678551423147073e-05, - "loss": 0.9774, + "learning_rate": 1.7283472121966414e-05, + "loss": 1.1991, "step": 6724 }, { - "epoch": 0.19056929921505286, + "epoch": 0.2631270052429768, "grad_norm": 0.0, - "learning_rate": 1.867809541299118e-05, - "loss": 0.9775, + "learning_rate": 1.728260374628019e-05, + "loss": 1.2553, "step": 6725 }, { - "epoch": 0.1905976366573153, + "epoch": 0.2631661319352062, "grad_norm": 0.0, - "learning_rate": 1.8677639329736464e-05, - "loss": 0.929, + "learning_rate": 1.728173525364447e-05, + "loss": 1.1265, "step": 6726 }, { - "epoch": 0.19062597409957777, + "epoch": 0.26320525862743566, "grad_norm": 0.0, - "learning_rate": 1.867718317338677e-05, - "loss": 0.9494, + "learning_rate": 1.7280866644073214e-05, + "loss": 1.0154, "step": 6727 }, { - "epoch": 0.19065431154184023, + "epoch": 0.2632443853196651, "grad_norm": 0.0, - "learning_rate": 1.8676726943945945e-05, - "loss": 1.0006, + "learning_rate": 1.727999791758036e-05, + "loss": 1.141, "step": 6728 }, { - "epoch": 0.1906826489841027, + "epoch": 0.26328351201189454, "grad_norm": 0.0, - "learning_rate": 1.8676270641417824e-05, - "loss": 1.0298, + "learning_rate": 1.727912907417986e-05, + "loss": 1.2047, "step": 6729 }, { - "epoch": 0.19071098642636516, + "epoch": 0.263322638704124, "grad_norm": 0.0, - "learning_rate": 1.867581426580625e-05, - "loss": 1.0563, + "learning_rate": 1.727826011388567e-05, + "loss": 1.1212, "step": 6730 }, { - "epoch": 0.19073932386862763, + "epoch": 0.2633617653963534, "grad_norm": 0.0, - "learning_rate": 1.8675357817115076e-05, - "loss": 1.0182, + "learning_rate": 1.7277391036711747e-05, + "loss": 1.1066, "step": 6731 }, { - "epoch": 0.19076766131089007, + "epoch": 0.2634008920885828, "grad_norm": 0.0, - "learning_rate": 1.8674901295348136e-05, - "loss": 1.0214, + "learning_rate": 1.727652184267204e-05, + "loss": 1.1135, "step": 6732 }, { - "epoch": 0.19079599875315253, + "epoch": 0.26344001878081225, "grad_norm": 0.0, - "learning_rate": 1.8674444700509287e-05, - "loss": 0.9954, + "learning_rate": 1.7275652531780508e-05, + "loss": 1.0594, "step": 6733 }, { - "epoch": 0.190824336195415, + "epoch": 0.2634791454730417, "grad_norm": 0.0, - "learning_rate": 1.8673988032602368e-05, - "loss": 1.0346, + "learning_rate": 1.7274783104051112e-05, + "loss": 1.0533, "step": 6734 }, { - "epoch": 0.19085267363767747, + "epoch": 0.26351827216527113, "grad_norm": 0.0, - "learning_rate": 1.8673531291631223e-05, - "loss": 1.0785, + "learning_rate": 1.7273913559497818e-05, + "loss": 1.1683, "step": 6735 }, { - "epoch": 0.19088101107993993, + "epoch": 0.26355739885750057, "grad_norm": 0.0, - "learning_rate": 1.8673074477599705e-05, - "loss": 1.0786, + "learning_rate": 1.7273043898134587e-05, + "loss": 1.246, "step": 6736 }, { - "epoch": 0.1909093485222024, + "epoch": 0.26359652554973, "grad_norm": 0.0, - "learning_rate": 1.867261759051166e-05, - "loss": 1.0279, + "learning_rate": 1.7272174119975386e-05, + "loss": 1.0787, "step": 6737 }, { - "epoch": 0.19093768596446484, + "epoch": 0.26363565224195945, "grad_norm": 0.0, - "learning_rate": 1.867216063037094e-05, - "loss": 0.9453, + "learning_rate": 1.7271304225034177e-05, + "loss": 1.1102, "step": 6738 }, { - "epoch": 0.1909660234067273, + "epoch": 0.2636747789341889, "grad_norm": 0.0, - "learning_rate": 1.8671703597181387e-05, - "loss": 0.9546, + "learning_rate": 1.7270434213324936e-05, + "loss": 1.1442, "step": 6739 }, { - "epoch": 0.19099436084898977, + "epoch": 0.26371390562641833, "grad_norm": 0.0, - "learning_rate": 1.8671246490946853e-05, - "loss": 1.0259, + "learning_rate": 1.7269564084861632e-05, + "loss": 1.0429, "step": 6740 }, { - "epoch": 0.19102269829125224, + "epoch": 0.2637530323186478, "grad_norm": 0.0, - "learning_rate": 1.8670789311671195e-05, - "loss": 1.0204, + "learning_rate": 1.726869383965824e-05, + "loss": 1.2886, "step": 6741 }, { - "epoch": 0.1910510357335147, + "epoch": 0.2637921590108772, "grad_norm": 0.0, - "learning_rate": 1.867033205935825e-05, - "loss": 1.0466, + "learning_rate": 1.7267823477728727e-05, + "loss": 1.2391, "step": 6742 }, { - "epoch": 0.19107937317577717, + "epoch": 0.26383128570310665, "grad_norm": 0.0, - "learning_rate": 1.8669874734011885e-05, - "loss": 0.9119, + "learning_rate": 1.7266952999087082e-05, + "loss": 1.1257, "step": 6743 }, { - "epoch": 0.1911077106180396, + "epoch": 0.2638704123953361, "grad_norm": 0.0, - "learning_rate": 1.8669417335635946e-05, - "loss": 1.1426, + "learning_rate": 1.7266082403747278e-05, + "loss": 1.1819, "step": 6744 }, { - "epoch": 0.19113604806030207, + "epoch": 0.26390953908756554, "grad_norm": 0.0, - "learning_rate": 1.8668959864234284e-05, - "loss": 0.9354, + "learning_rate": 1.7265211691723292e-05, + "loss": 1.0703, "step": 6745 }, { - "epoch": 0.19116438550256454, + "epoch": 0.263948665779795, "grad_norm": 0.0, - "learning_rate": 1.866850231981075e-05, - "loss": 1.1165, + "learning_rate": 1.7264340863029113e-05, + "loss": 1.0772, "step": 6746 }, { - "epoch": 0.191192722944827, + "epoch": 0.2639877924720244, "grad_norm": 0.0, - "learning_rate": 1.866804470236921e-05, - "loss": 1.0293, + "learning_rate": 1.726346991767872e-05, + "loss": 1.1359, "step": 6747 }, { - "epoch": 0.19122106038708947, + "epoch": 0.26402691916425386, "grad_norm": 0.0, - "learning_rate": 1.8667587011913507e-05, - "loss": 0.9415, + "learning_rate": 1.7262598855686105e-05, + "loss": 1.1827, "step": 6748 }, { - "epoch": 0.19124939782935194, + "epoch": 0.2640660458564833, "grad_norm": 0.0, - "learning_rate": 1.8667129248447498e-05, - "loss": 0.9945, + "learning_rate": 1.7261727677065248e-05, + "loss": 1.1741, "step": 6749 }, { - "epoch": 0.19127773527161437, + "epoch": 0.26410517254871274, "grad_norm": 0.0, - "learning_rate": 1.8666671411975043e-05, - "loss": 0.9858, + "learning_rate": 1.726085638183015e-05, + "loss": 1.079, "step": 6750 }, { - "epoch": 0.19130607271387684, + "epoch": 0.2641442992409422, "grad_norm": 0.0, - "learning_rate": 1.8666213502499995e-05, - "loss": 1.0963, + "learning_rate": 1.7259984969994793e-05, + "loss": 1.2155, "step": 6751 }, { - "epoch": 0.1913344101561393, + "epoch": 0.2641834259331716, "grad_norm": 0.0, - "learning_rate": 1.8665755520026215e-05, - "loss": 1.0158, + "learning_rate": 1.7259113441573174e-05, + "loss": 1.1924, "step": 6752 }, { - "epoch": 0.19136274759840177, + "epoch": 0.26422255262540106, "grad_norm": 0.0, - "learning_rate": 1.866529746455756e-05, - "loss": 1.0722, + "learning_rate": 1.725824179657929e-05, + "loss": 1.0557, "step": 6753 }, { - "epoch": 0.19139108504066424, + "epoch": 0.2642616793176305, "grad_norm": 0.0, - "learning_rate": 1.866483933609788e-05, - "loss": 1.0343, + "learning_rate": 1.725737003502714e-05, + "loss": 1.1863, "step": 6754 }, { - "epoch": 0.1914194224829267, + "epoch": 0.26430080600985995, "grad_norm": 0.0, - "learning_rate": 1.8664381134651047e-05, - "loss": 0.9732, + "learning_rate": 1.7256498156930717e-05, + "loss": 1.1888, "step": 6755 }, { - "epoch": 0.19144775992518914, + "epoch": 0.2643399327020894, "grad_norm": 0.0, - "learning_rate": 1.8663922860220908e-05, - "loss": 1.0977, + "learning_rate": 1.7255626162304027e-05, + "loss": 1.1833, "step": 6756 }, { - "epoch": 0.1914760973674516, + "epoch": 0.2643790593943188, "grad_norm": 0.0, - "learning_rate": 1.8663464512811334e-05, - "loss": 0.9622, + "learning_rate": 1.7254754051161075e-05, + "loss": 1.0015, "step": 6757 }, { - "epoch": 0.19150443480971407, + "epoch": 0.26441818608654827, "grad_norm": 0.0, - "learning_rate": 1.8663006092426182e-05, - "loss": 0.9933, + "learning_rate": 1.7253881823515866e-05, + "loss": 1.0428, "step": 6758 }, { - "epoch": 0.19153277225197654, + "epoch": 0.2644573127787777, "grad_norm": 0.0, - "learning_rate": 1.866254759906931e-05, - "loss": 0.9289, + "learning_rate": 1.72530094793824e-05, + "loss": 1.0559, "step": 6759 }, { - "epoch": 0.191561109694239, + "epoch": 0.2644964394710071, "grad_norm": 0.0, - "learning_rate": 1.8662089032744585e-05, - "loss": 0.8337, + "learning_rate": 1.7252137018774694e-05, + "loss": 1.1107, "step": 6760 }, { - "epoch": 0.19158944713650147, + "epoch": 0.26453556616323654, "grad_norm": 0.0, - "learning_rate": 1.8661630393455864e-05, - "loss": 1.0072, + "learning_rate": 1.7251264441706754e-05, + "loss": 1.0775, "step": 6761 }, { - "epoch": 0.1916177845787639, + "epoch": 0.264574692855466, "grad_norm": 0.0, - "learning_rate": 1.8661171681207014e-05, - "loss": 0.9708, + "learning_rate": 1.725039174819259e-05, + "loss": 1.1353, "step": 6762 }, { - "epoch": 0.19164612202102638, + "epoch": 0.2646138195476954, "grad_norm": 0.0, - "learning_rate": 1.8660712896001903e-05, - "loss": 0.978, + "learning_rate": 1.7249518938246223e-05, + "loss": 1.1451, "step": 6763 }, { - "epoch": 0.19167445946328884, + "epoch": 0.26465294623992486, "grad_norm": 0.0, - "learning_rate": 1.866025403784439e-05, - "loss": 1.0621, + "learning_rate": 1.7248646011881665e-05, + "loss": 1.1069, "step": 6764 }, { - "epoch": 0.1917027969055513, + "epoch": 0.2646920729321543, "grad_norm": 0.0, - "learning_rate": 1.865979510673834e-05, - "loss": 0.9957, + "learning_rate": 1.7247772969112934e-05, + "loss": 1.1614, "step": 6765 }, { - "epoch": 0.19173113434781378, + "epoch": 0.26473119962438374, "grad_norm": 0.0, - "learning_rate": 1.8659336102687618e-05, - "loss": 0.9223, + "learning_rate": 1.7246899809954053e-05, + "loss": 1.1941, "step": 6766 }, { - "epoch": 0.19175947179007624, + "epoch": 0.2647703263166132, "grad_norm": 0.0, - "learning_rate": 1.8658877025696095e-05, - "loss": 1.0888, + "learning_rate": 1.724602653441904e-05, + "loss": 1.2394, "step": 6767 }, { - "epoch": 0.19178780923233868, + "epoch": 0.2648094530088426, "grad_norm": 0.0, - "learning_rate": 1.8658417875767636e-05, - "loss": 1.0059, + "learning_rate": 1.7245153142521923e-05, + "loss": 1.2094, "step": 6768 }, { - "epoch": 0.19181614667460115, + "epoch": 0.26484857970107206, "grad_norm": 0.0, - "learning_rate": 1.8657958652906106e-05, - "loss": 0.9867, + "learning_rate": 1.7244279634276725e-05, + "loss": 1.1125, "step": 6769 }, { - "epoch": 0.1918444841168636, + "epoch": 0.2648877063933015, "grad_norm": 0.0, - "learning_rate": 1.865749935711538e-05, - "loss": 0.9309, + "learning_rate": 1.724340600969747e-05, + "loss": 1.2842, "step": 6770 }, { - "epoch": 0.19187282155912608, + "epoch": 0.26492683308553094, "grad_norm": 0.0, - "learning_rate": 1.8657039988399315e-05, - "loss": 1.0479, + "learning_rate": 1.72425322687982e-05, + "loss": 1.1042, "step": 6771 }, { - "epoch": 0.19190115900138854, + "epoch": 0.2649659597777604, "grad_norm": 0.0, - "learning_rate": 1.8656580546761792e-05, - "loss": 1.1563, + "learning_rate": 1.7241658411592926e-05, + "loss": 1.0487, "step": 6772 }, { - "epoch": 0.191929496443651, + "epoch": 0.2650050864699898, "grad_norm": 0.0, - "learning_rate": 1.8656121032206673e-05, - "loss": 1.041, + "learning_rate": 1.72407844380957e-05, + "loss": 1.2023, "step": 6773 }, { - "epoch": 0.19195783388591345, + "epoch": 0.26504421316221927, "grad_norm": 0.0, - "learning_rate": 1.8655661444737835e-05, - "loss": 1.0643, + "learning_rate": 1.7239910348320546e-05, + "loss": 1.2093, "step": 6774 }, { - "epoch": 0.19198617132817591, + "epoch": 0.2650833398544487, "grad_norm": 0.0, - "learning_rate": 1.8655201784359146e-05, - "loss": 1.0596, + "learning_rate": 1.7239036142281502e-05, + "loss": 1.2138, "step": 6775 }, { - "epoch": 0.19201450877043838, + "epoch": 0.26512246654667815, "grad_norm": 0.0, - "learning_rate": 1.865474205107448e-05, - "loss": 1.0199, + "learning_rate": 1.7238161819992613e-05, + "loss": 1.1141, "step": 6776 }, { - "epoch": 0.19204284621270085, + "epoch": 0.2651615932389076, "grad_norm": 0.0, - "learning_rate": 1.8654282244887704e-05, - "loss": 0.9596, + "learning_rate": 1.7237287381467915e-05, + "loss": 1.156, "step": 6777 }, { - "epoch": 0.1920711836549633, + "epoch": 0.26520071993113703, "grad_norm": 0.0, - "learning_rate": 1.86538223658027e-05, - "loss": 1.0128, + "learning_rate": 1.7236412826721445e-05, + "loss": 1.1246, "step": 6778 }, { - "epoch": 0.19209952109722575, + "epoch": 0.26523984662336647, "grad_norm": 0.0, - "learning_rate": 1.8653362413823333e-05, - "loss": 1.0611, + "learning_rate": 1.7235538155767257e-05, + "loss": 1.1487, "step": 6779 }, { - "epoch": 0.19212785853948822, + "epoch": 0.2652789733155959, "grad_norm": 0.0, - "learning_rate": 1.8652902388953478e-05, - "loss": 0.9743, + "learning_rate": 1.7234663368619392e-05, + "loss": 1.1458, "step": 6780 }, { - "epoch": 0.19215619598175068, + "epoch": 0.26531810000782535, "grad_norm": 0.0, - "learning_rate": 1.865244229119702e-05, - "loss": 1.0432, + "learning_rate": 1.72337884652919e-05, + "loss": 1.2936, "step": 6781 }, { - "epoch": 0.19218453342401315, + "epoch": 0.2653572267000548, "grad_norm": 0.0, - "learning_rate": 1.8651982120557824e-05, - "loss": 1.0497, + "learning_rate": 1.7232913445798825e-05, + "loss": 1.0685, "step": 6782 }, { - "epoch": 0.19221287086627561, + "epoch": 0.26539635339228423, "grad_norm": 0.0, - "learning_rate": 1.865152187703977e-05, - "loss": 0.9582, + "learning_rate": 1.723203831015423e-05, + "loss": 1.1583, "step": 6783 }, { - "epoch": 0.19224120830853808, + "epoch": 0.2654354800845137, "grad_norm": 0.0, - "learning_rate": 1.865106156064674e-05, - "loss": 1.1229, + "learning_rate": 1.7231163058372158e-05, + "loss": 1.1637, "step": 6784 }, { - "epoch": 0.19226954575080052, + "epoch": 0.2654746067767431, "grad_norm": 0.0, - "learning_rate": 1.8650601171382595e-05, - "loss": 1.0217, + "learning_rate": 1.723028769046667e-05, + "loss": 1.2148, "step": 6785 }, { - "epoch": 0.19229788319306299, + "epoch": 0.26551373346897256, "grad_norm": 0.0, - "learning_rate": 1.8650140709251233e-05, - "loss": 0.886, + "learning_rate": 1.722941220645182e-05, + "loss": 1.14, "step": 6786 }, { - "epoch": 0.19232622063532545, + "epoch": 0.265552860161202, "grad_norm": 0.0, - "learning_rate": 1.8649680174256518e-05, - "loss": 1.0188, + "learning_rate": 1.7228536606341672e-05, + "loss": 1.0232, "step": 6787 }, { - "epoch": 0.19235455807758792, + "epoch": 0.2655919868534314, "grad_norm": 0.0, - "learning_rate": 1.8649219566402336e-05, - "loss": 1.0608, + "learning_rate": 1.722766089015028e-05, + "loss": 1.1797, "step": 6788 }, { - "epoch": 0.19238289551985038, + "epoch": 0.2656311135456608, "grad_norm": 0.0, - "learning_rate": 1.864875888569257e-05, - "loss": 0.9291, + "learning_rate": 1.722678505789171e-05, + "loss": 1.175, "step": 6789 }, { - "epoch": 0.19241123296211285, + "epoch": 0.26567024023789026, "grad_norm": 0.0, - "learning_rate": 1.8648298132131092e-05, - "loss": 1.0337, + "learning_rate": 1.7225909109580038e-05, + "loss": 1.1359, "step": 6790 }, { - "epoch": 0.1924395704043753, + "epoch": 0.2657093669301197, "grad_norm": 0.0, - "learning_rate": 1.864783730572179e-05, - "loss": 1.0862, + "learning_rate": 1.7225033045229312e-05, + "loss": 1.0245, "step": 6791 }, { - "epoch": 0.19246790784663775, + "epoch": 0.26574849362234915, "grad_norm": 0.0, - "learning_rate": 1.864737640646854e-05, - "loss": 1.0909, + "learning_rate": 1.722415686485361e-05, + "loss": 1.0877, "step": 6792 }, { - "epoch": 0.19249624528890022, + "epoch": 0.2657876203145786, "grad_norm": 0.0, - "learning_rate": 1.8646915434375233e-05, - "loss": 1.0115, + "learning_rate": 1.7223280568467e-05, + "loss": 1.2192, "step": 6793 }, { - "epoch": 0.19252458273116269, + "epoch": 0.26582674700680803, "grad_norm": 0.0, - "learning_rate": 1.864645438944574e-05, - "loss": 0.9192, + "learning_rate": 1.7222404156083555e-05, + "loss": 1.108, "step": 6794 }, { - "epoch": 0.19255292017342515, + "epoch": 0.26586587369903747, "grad_norm": 0.0, - "learning_rate": 1.8645993271683953e-05, - "loss": 1.0443, + "learning_rate": 1.722152762771735e-05, + "loss": 1.1658, "step": 6795 }, { - "epoch": 0.19258125761568762, + "epoch": 0.2659050003912669, "grad_norm": 0.0, - "learning_rate": 1.8645532081093756e-05, - "loss": 1.133, + "learning_rate": 1.7220650983382462e-05, + "loss": 1.0706, "step": 6796 }, { - "epoch": 0.19260959505795006, + "epoch": 0.26594412708349635, "grad_norm": 0.0, - "learning_rate": 1.864507081767903e-05, - "loss": 1.0343, + "learning_rate": 1.7219774223092964e-05, + "loss": 1.1353, "step": 6797 }, { - "epoch": 0.19263793250021252, + "epoch": 0.2659832537757258, "grad_norm": 0.0, - "learning_rate": 1.864460948144366e-05, - "loss": 0.9762, + "learning_rate": 1.721889734686294e-05, + "loss": 1.1559, "step": 6798 }, { - "epoch": 0.192666269942475, + "epoch": 0.26602238046795523, "grad_norm": 0.0, - "learning_rate": 1.864414807239154e-05, - "loss": 0.95, + "learning_rate": 1.7218020354706473e-05, + "loss": 1.176, "step": 6799 }, { - "epoch": 0.19269460738473745, + "epoch": 0.2660615071601847, "grad_norm": 0.0, - "learning_rate": 1.8643686590526547e-05, - "loss": 1.0011, + "learning_rate": 1.7217143246637643e-05, + "loss": 1.1991, "step": 6800 }, { - "epoch": 0.19272294482699992, + "epoch": 0.2661006338524141, "grad_norm": 0.0, - "learning_rate": 1.8643225035852573e-05, - "loss": 1.0001, + "learning_rate": 1.7216266022670532e-05, + "loss": 1.2748, "step": 6801 }, { - "epoch": 0.1927512822692624, + "epoch": 0.26613976054464356, "grad_norm": 0.0, - "learning_rate": 1.8642763408373502e-05, - "loss": 1.0051, + "learning_rate": 1.7215388682819237e-05, + "loss": 1.2018, "step": 6802 }, { - "epoch": 0.19277961971152482, + "epoch": 0.266178887236873, "grad_norm": 0.0, - "learning_rate": 1.864230170809323e-05, - "loss": 0.9931, + "learning_rate": 1.721451122709784e-05, + "loss": 1.2091, "step": 6803 }, { - "epoch": 0.1928079571537873, + "epoch": 0.26621801392910244, "grad_norm": 0.0, - "learning_rate": 1.864183993501564e-05, - "loss": 1.0965, + "learning_rate": 1.721363365552043e-05, + "loss": 1.0593, "step": 6804 }, { - "epoch": 0.19283629459604976, + "epoch": 0.2662571406213319, "grad_norm": 0.0, - "learning_rate": 1.864137808914462e-05, - "loss": 0.9758, + "learning_rate": 1.7212755968101104e-05, + "loss": 1.1989, "step": 6805 }, { - "epoch": 0.19286463203831222, + "epoch": 0.2662962673135613, "grad_norm": 0.0, - "learning_rate": 1.864091617048407e-05, - "loss": 1.0869, + "learning_rate": 1.7211878164853954e-05, + "loss": 1.0609, "step": 6806 }, { - "epoch": 0.1928929694805747, + "epoch": 0.26633539400579076, "grad_norm": 0.0, - "learning_rate": 1.864045417903787e-05, - "loss": 0.886, + "learning_rate": 1.721100024579308e-05, + "loss": 1.1497, "step": 6807 }, { - "epoch": 0.19292130692283715, + "epoch": 0.2663745206980202, "grad_norm": 0.0, - "learning_rate": 1.8639992114809918e-05, - "loss": 1.0107, + "learning_rate": 1.7210122210932576e-05, + "loss": 1.0911, "step": 6808 }, { - "epoch": 0.1929496443650996, + "epoch": 0.26641364739024964, "grad_norm": 0.0, - "learning_rate": 1.86395299778041e-05, - "loss": 1.0707, + "learning_rate": 1.7209244060286545e-05, + "loss": 1.1968, "step": 6809 }, { - "epoch": 0.19297798180736206, + "epoch": 0.2664527740824791, "grad_norm": 0.0, - "learning_rate": 1.8639067768024315e-05, - "loss": 1.0172, + "learning_rate": 1.7208365793869087e-05, + "loss": 1.1903, "step": 6810 }, { - "epoch": 0.19300631924962453, + "epoch": 0.2664919007747085, "grad_norm": 0.0, - "learning_rate": 1.8638605485474455e-05, - "loss": 0.8744, + "learning_rate": 1.720748741169431e-05, + "loss": 1.1755, "step": 6811 }, { - "epoch": 0.193034656691887, + "epoch": 0.26653102746693796, "grad_norm": 0.0, - "learning_rate": 1.8638143130158415e-05, - "loss": 0.9527, + "learning_rate": 1.7206608913776315e-05, + "loss": 1.237, "step": 6812 }, { - "epoch": 0.19306299413414946, + "epoch": 0.2665701541591674, "grad_norm": 0.0, - "learning_rate": 1.8637680702080082e-05, - "loss": 1.0393, + "learning_rate": 1.720573030012921e-05, + "loss": 1.1134, "step": 6813 }, { - "epoch": 0.19309133157641192, + "epoch": 0.26660928085139685, "grad_norm": 0.0, - "learning_rate": 1.863721820124336e-05, - "loss": 0.983, + "learning_rate": 1.7204851570767108e-05, + "loss": 1.1304, "step": 6814 }, { - "epoch": 0.19311966901867436, + "epoch": 0.2666484075436263, "grad_norm": 0.0, - "learning_rate": 1.8636755627652143e-05, - "loss": 1.0562, + "learning_rate": 1.7203972725704114e-05, + "loss": 0.9573, "step": 6815 }, { - "epoch": 0.19314800646093683, + "epoch": 0.2666875342358557, "grad_norm": 0.0, - "learning_rate": 1.8636292981310327e-05, - "loss": 0.9621, + "learning_rate": 1.720309376495435e-05, + "loss": 1.1462, "step": 6816 }, { - "epoch": 0.1931763439031993, + "epoch": 0.2667266609280851, "grad_norm": 0.0, - "learning_rate": 1.8635830262221804e-05, - "loss": 0.9478, + "learning_rate": 1.7202214688531925e-05, + "loss": 1.2164, "step": 6817 }, { - "epoch": 0.19320468134546176, + "epoch": 0.26676578762031455, "grad_norm": 0.0, - "learning_rate": 1.8635367470390478e-05, - "loss": 0.9526, + "learning_rate": 1.7201335496450954e-05, + "loss": 1.2998, "step": 6818 }, { - "epoch": 0.19323301878772423, + "epoch": 0.266804914312544, "grad_norm": 0.0, - "learning_rate": 1.863490460582025e-05, - "loss": 1.1111, + "learning_rate": 1.720045618872556e-05, + "loss": 1.1024, "step": 6819 }, { - "epoch": 0.1932613562299867, + "epoch": 0.26684404100477344, "grad_norm": 0.0, - "learning_rate": 1.8634441668515005e-05, - "loss": 1.0538, + "learning_rate": 1.7199576765369865e-05, + "loss": 1.2053, "step": 6820 }, { - "epoch": 0.19328969367224913, + "epoch": 0.2668831676970029, "grad_norm": 0.0, - "learning_rate": 1.8633978658478658e-05, - "loss": 1.0901, + "learning_rate": 1.7198697226397985e-05, + "loss": 1.0699, "step": 6821 }, { - "epoch": 0.1933180311145116, + "epoch": 0.2669222943892323, "grad_norm": 0.0, - "learning_rate": 1.86335155757151e-05, - "loss": 0.9707, + "learning_rate": 1.7197817571824048e-05, + "loss": 1.104, "step": 6822 }, { - "epoch": 0.19334636855677406, + "epoch": 0.26696142108146176, "grad_norm": 0.0, - "learning_rate": 1.8633052420228236e-05, - "loss": 1.1342, + "learning_rate": 1.7196937801662182e-05, + "loss": 1.1416, "step": 6823 }, { - "epoch": 0.19337470599903653, + "epoch": 0.2670005477736912, "grad_norm": 0.0, - "learning_rate": 1.8632589192021964e-05, - "loss": 1.052, + "learning_rate": 1.7196057915926513e-05, + "loss": 1.0955, "step": 6824 }, { - "epoch": 0.193403043441299, + "epoch": 0.26703967446592064, "grad_norm": 0.0, - "learning_rate": 1.8632125891100184e-05, - "loss": 0.966, + "learning_rate": 1.7195177914631172e-05, + "loss": 1.1069, "step": 6825 }, { - "epoch": 0.19343138088356146, + "epoch": 0.2670788011581501, "grad_norm": 0.0, - "learning_rate": 1.863166251746681e-05, - "loss": 1.0015, + "learning_rate": 1.7194297797790288e-05, + "loss": 1.1134, "step": 6826 }, { - "epoch": 0.1934597183258239, + "epoch": 0.2671179278503795, "grad_norm": 0.0, - "learning_rate": 1.8631199071125735e-05, - "loss": 0.9412, + "learning_rate": 1.7193417565418e-05, + "loss": 1.1429, "step": 6827 }, { - "epoch": 0.19348805576808636, + "epoch": 0.26715705454260896, "grad_norm": 0.0, - "learning_rate": 1.8630735552080862e-05, - "loss": 0.9366, + "learning_rate": 1.7192537217528435e-05, + "loss": 1.2532, "step": 6828 }, { - "epoch": 0.19351639321034883, + "epoch": 0.2671961812348384, "grad_norm": 0.0, - "learning_rate": 1.86302719603361e-05, - "loss": 1.0072, + "learning_rate": 1.7191656754135733e-05, + "loss": 1.1178, "step": 6829 }, { - "epoch": 0.1935447306526113, + "epoch": 0.26723530792706784, "grad_norm": 0.0, - "learning_rate": 1.8629808295895352e-05, - "loss": 1.025, + "learning_rate": 1.7190776175254043e-05, + "loss": 1.2356, "step": 6830 }, { - "epoch": 0.19357306809487376, + "epoch": 0.2672744346192973, "grad_norm": 0.0, - "learning_rate": 1.8629344558762524e-05, - "loss": 1.1133, + "learning_rate": 1.7189895480897493e-05, + "loss": 1.1091, "step": 6831 }, { - "epoch": 0.19360140553713623, + "epoch": 0.2673135613115267, "grad_norm": 0.0, - "learning_rate": 1.8628880748941523e-05, - "loss": 1.0267, + "learning_rate": 1.7189014671080232e-05, + "loss": 1.0674, "step": 6832 }, { - "epoch": 0.19362974297939867, + "epoch": 0.26735268800375617, "grad_norm": 0.0, - "learning_rate": 1.8628416866436256e-05, - "loss": 1.0723, + "learning_rate": 1.7188133745816406e-05, + "loss": 1.1056, "step": 6833 }, { - "epoch": 0.19365808042166113, + "epoch": 0.2673918146959856, "grad_norm": 0.0, - "learning_rate": 1.8627952911250632e-05, - "loss": 1.0352, + "learning_rate": 1.7187252705120155e-05, + "loss": 1.1871, "step": 6834 }, { - "epoch": 0.1936864178639236, + "epoch": 0.26743094138821505, "grad_norm": 0.0, - "learning_rate": 1.862748888338855e-05, - "loss": 0.968, + "learning_rate": 1.7186371549005634e-05, + "loss": 0.9976, "step": 6835 }, { - "epoch": 0.19371475530618606, + "epoch": 0.2674700680804445, "grad_norm": 0.0, - "learning_rate": 1.862702478285393e-05, - "loss": 1.0943, + "learning_rate": 1.718549027748699e-05, + "loss": 1.0659, "step": 6836 }, { - "epoch": 0.19374309274844853, + "epoch": 0.26750919477267393, "grad_norm": 0.0, - "learning_rate": 1.8626560609650676e-05, - "loss": 1.0231, + "learning_rate": 1.718460889057838e-05, + "loss": 1.2539, "step": 6837 }, { - "epoch": 0.193771430190711, + "epoch": 0.26754832146490337, "grad_norm": 0.0, - "learning_rate": 1.8626096363782697e-05, - "loss": 0.9533, + "learning_rate": 1.718372738829395e-05, + "loss": 0.9921, "step": 6838 }, { - "epoch": 0.19379976763297344, + "epoch": 0.2675874481571328, "grad_norm": 0.0, - "learning_rate": 1.862563204525391e-05, - "loss": 1.0689, + "learning_rate": 1.718284577064786e-05, + "loss": 1.2213, "step": 6839 }, { - "epoch": 0.1938281050752359, + "epoch": 0.26762657484936225, "grad_norm": 0.0, - "learning_rate": 1.8625167654068216e-05, - "loss": 1.0281, + "learning_rate": 1.7181964037654268e-05, + "loss": 1.2169, "step": 6840 }, { - "epoch": 0.19385644251749837, + "epoch": 0.2676657015415917, "grad_norm": 0.0, - "learning_rate": 1.8624703190229535e-05, - "loss": 0.9468, + "learning_rate": 1.7181082189327335e-05, + "loss": 0.9831, "step": 6841 }, { - "epoch": 0.19388477995976083, + "epoch": 0.26770482823382113, "grad_norm": 0.0, - "learning_rate": 1.8624238653741775e-05, - "loss": 1.0336, + "learning_rate": 1.7180200225681217e-05, + "loss": 1.0408, "step": 6842 }, { - "epoch": 0.1939131174020233, + "epoch": 0.2677439549260506, "grad_norm": 0.0, - "learning_rate": 1.862377404460885e-05, - "loss": 1.0384, + "learning_rate": 1.7179318146730083e-05, + "loss": 1.1537, "step": 6843 }, { - "epoch": 0.19394145484428577, + "epoch": 0.26778308161828, "grad_norm": 0.0, - "learning_rate": 1.8623309362834674e-05, - "loss": 1.0101, + "learning_rate": 1.7178435952488092e-05, + "loss": 1.2324, "step": 6844 }, { - "epoch": 0.1939697922865482, + "epoch": 0.2678222083105094, "grad_norm": 0.0, - "learning_rate": 1.862284460842316e-05, - "loss": 1.078, + "learning_rate": 1.717755364296942e-05, + "loss": 1.069, "step": 6845 }, { - "epoch": 0.19399812972881067, + "epoch": 0.26786133500273884, "grad_norm": 0.0, - "learning_rate": 1.8622379781378226e-05, - "loss": 0.9557, + "learning_rate": 1.7176671218188228e-05, + "loss": 1.0621, "step": 6846 }, { - "epoch": 0.19402646717107314, + "epoch": 0.2679004616949683, "grad_norm": 0.0, - "learning_rate": 1.8621914881703785e-05, - "loss": 1.0434, + "learning_rate": 1.717578867815869e-05, + "loss": 1.0906, "step": 6847 }, { - "epoch": 0.1940548046133356, + "epoch": 0.2679395883871977, "grad_norm": 0.0, - "learning_rate": 1.862144990940375e-05, - "loss": 1.0623, + "learning_rate": 1.7174906022894976e-05, + "loss": 1.1658, "step": 6848 }, { - "epoch": 0.19408314205559807, + "epoch": 0.26797871507942717, "grad_norm": 0.0, - "learning_rate": 1.8620984864482046e-05, - "loss": 1.0211, + "learning_rate": 1.7174023252411266e-05, + "loss": 1.2456, "step": 6849 }, { - "epoch": 0.19411147949786053, + "epoch": 0.2680178417716566, "grad_norm": 0.0, - "learning_rate": 1.8620519746942582e-05, - "loss": 1.0251, + "learning_rate": 1.7173140366721725e-05, + "loss": 1.2363, "step": 6850 }, { - "epoch": 0.19413981694012297, + "epoch": 0.26805696846388605, "grad_norm": 0.0, - "learning_rate": 1.862005455678928e-05, - "loss": 1.0604, + "learning_rate": 1.7172257365840544e-05, + "loss": 1.1496, "step": 6851 }, { - "epoch": 0.19416815438238544, + "epoch": 0.2680960951561155, "grad_norm": 0.0, - "learning_rate": 1.8619589294026058e-05, - "loss": 0.9934, + "learning_rate": 1.7171374249781897e-05, + "loss": 1.1098, "step": 6852 }, { - "epoch": 0.1941964918246479, + "epoch": 0.26813522184834493, "grad_norm": 0.0, - "learning_rate": 1.8619123958656832e-05, - "loss": 1.0005, + "learning_rate": 1.7170491018559962e-05, + "loss": 1.1371, "step": 6853 }, { - "epoch": 0.19422482926691037, + "epoch": 0.26817434854057437, "grad_norm": 0.0, - "learning_rate": 1.8618658550685528e-05, - "loss": 1.0195, + "learning_rate": 1.716960767218893e-05, + "loss": 1.2033, "step": 6854 }, { - "epoch": 0.19425316670917284, + "epoch": 0.2682134752328038, "grad_norm": 0.0, - "learning_rate": 1.861819307011606e-05, - "loss": 1.017, + "learning_rate": 1.7168724210682982e-05, + "loss": 1.1513, "step": 6855 }, { - "epoch": 0.1942815041514353, + "epoch": 0.26825260192503325, "grad_norm": 0.0, - "learning_rate": 1.8617727516952353e-05, - "loss": 1.1924, + "learning_rate": 1.7167840634056302e-05, + "loss": 1.1226, "step": 6856 }, { - "epoch": 0.19430984159369774, + "epoch": 0.2682917286172627, "grad_norm": 0.0, - "learning_rate": 1.8617261891198325e-05, - "loss": 1.171, + "learning_rate": 1.7166956942323086e-05, + "loss": 1.1313, "step": 6857 }, { - "epoch": 0.1943381790359602, + "epoch": 0.26833085530949213, "grad_norm": 0.0, - "learning_rate": 1.86167961928579e-05, - "loss": 1.0729, + "learning_rate": 1.7166073135497527e-05, + "loss": 1.019, "step": 6858 }, { - "epoch": 0.19436651647822267, + "epoch": 0.2683699820017216, "grad_norm": 0.0, - "learning_rate": 1.8616330421935004e-05, - "loss": 1.0464, + "learning_rate": 1.7165189213593808e-05, + "loss": 1.1001, "step": 6859 }, { - "epoch": 0.19439485392048514, + "epoch": 0.268409108693951, "grad_norm": 0.0, - "learning_rate": 1.8615864578433552e-05, - "loss": 1.04, + "learning_rate": 1.7164305176626127e-05, + "loss": 1.1187, "step": 6860 }, { - "epoch": 0.1944231913627476, + "epoch": 0.26844823538618046, "grad_norm": 0.0, - "learning_rate": 1.8615398662357477e-05, - "loss": 0.9707, + "learning_rate": 1.7163421024608685e-05, + "loss": 1.1466, "step": 6861 }, { - "epoch": 0.19445152880501007, + "epoch": 0.2684873620784099, "grad_norm": 0.0, - "learning_rate": 1.8614932673710702e-05, - "loss": 1.1287, + "learning_rate": 1.716253675755568e-05, + "loss": 1.2588, "step": 6862 }, { - "epoch": 0.1944798662472725, + "epoch": 0.26852648877063934, "grad_norm": 0.0, - "learning_rate": 1.8614466612497147e-05, - "loss": 1.076, + "learning_rate": 1.7161652375481307e-05, + "loss": 1.2079, "step": 6863 }, { - "epoch": 0.19450820368953498, + "epoch": 0.2685656154628688, "grad_norm": 0.0, - "learning_rate": 1.8614000478720743e-05, - "loss": 1.0607, + "learning_rate": 1.716076787839977e-05, + "loss": 1.1068, "step": 6864 }, { - "epoch": 0.19453654113179744, + "epoch": 0.2686047421550982, "grad_norm": 0.0, - "learning_rate": 1.861353427238541e-05, - "loss": 0.9718, + "learning_rate": 1.7159883266325273e-05, + "loss": 1.0395, "step": 6865 }, { - "epoch": 0.1945648785740599, + "epoch": 0.26864386884732766, "grad_norm": 0.0, - "learning_rate": 1.8613067993495084e-05, - "loss": 1.0369, + "learning_rate": 1.7158998539272027e-05, + "loss": 1.157, "step": 6866 }, { - "epoch": 0.19459321601632237, + "epoch": 0.2686829955395571, "grad_norm": 0.0, - "learning_rate": 1.8612601642053686e-05, - "loss": 0.9893, + "learning_rate": 1.7158113697254232e-05, + "loss": 1.2073, "step": 6867 }, { - "epoch": 0.19462155345858484, + "epoch": 0.26872212223178654, "grad_norm": 0.0, - "learning_rate": 1.8612135218065142e-05, - "loss": 0.9362, + "learning_rate": 1.71572287402861e-05, + "loss": 1.0681, "step": 6868 }, { - "epoch": 0.19464989090084728, + "epoch": 0.268761248924016, "grad_norm": 0.0, - "learning_rate": 1.861166872153339e-05, - "loss": 1.1258, + "learning_rate": 1.7156343668381845e-05, + "loss": 1.0219, "step": 6869 }, { - "epoch": 0.19467822834310974, + "epoch": 0.2688003756162454, "grad_norm": 0.0, - "learning_rate": 1.8611202152462354e-05, - "loss": 0.9669, + "learning_rate": 1.7155458481555676e-05, + "loss": 0.9837, "step": 6870 }, { - "epoch": 0.1947065657853722, + "epoch": 0.26883950230847486, "grad_norm": 0.0, - "learning_rate": 1.8610735510855966e-05, - "loss": 1.0541, + "learning_rate": 1.7154573179821815e-05, + "loss": 1.2389, "step": 6871 }, { - "epoch": 0.19473490322763468, + "epoch": 0.2688786290007043, "grad_norm": 0.0, - "learning_rate": 1.8610268796718153e-05, - "loss": 0.9662, + "learning_rate": 1.715368776319447e-05, + "loss": 1.2232, "step": 6872 }, { - "epoch": 0.19476324066989714, + "epoch": 0.26891775569293375, "grad_norm": 0.0, - "learning_rate": 1.8609802010052846e-05, - "loss": 1.0254, + "learning_rate": 1.7152802231687863e-05, + "loss": 1.1619, "step": 6873 }, { - "epoch": 0.1947915781121596, + "epoch": 0.26895688238516313, "grad_norm": 0.0, - "learning_rate": 1.8609335150863982e-05, - "loss": 1.0386, + "learning_rate": 1.7151916585316217e-05, + "loss": 1.0852, "step": 6874 }, { - "epoch": 0.19481991555442205, + "epoch": 0.2689960090773926, "grad_norm": 0.0, - "learning_rate": 1.8608868219155494e-05, - "loss": 1.0811, + "learning_rate": 1.715103082409375e-05, + "loss": 1.1927, "step": 6875 }, { - "epoch": 0.1948482529966845, + "epoch": 0.269035135769622, "grad_norm": 0.0, - "learning_rate": 1.860840121493131e-05, - "loss": 1.0643, + "learning_rate": 1.715014494803469e-05, + "loss": 1.1873, "step": 6876 }, { - "epoch": 0.19487659043894698, + "epoch": 0.26907426246185145, "grad_norm": 0.0, - "learning_rate": 1.860793413819536e-05, - "loss": 1.0762, + "learning_rate": 1.714925895715326e-05, + "loss": 1.1447, "step": 6877 }, { - "epoch": 0.19490492788120944, + "epoch": 0.2691133891540809, "grad_norm": 0.0, - "learning_rate": 1.8607466988951594e-05, - "loss": 1.0881, + "learning_rate": 1.7148372851463695e-05, + "loss": 1.1131, "step": 6878 }, { - "epoch": 0.1949332653234719, + "epoch": 0.26915251584631034, "grad_norm": 0.0, - "learning_rate": 1.860699976720393e-05, - "loss": 1.0111, + "learning_rate": 1.7147486630980216e-05, + "loss": 1.0453, "step": 6879 }, { - "epoch": 0.19496160276573438, + "epoch": 0.2691916425385398, "grad_norm": 0.0, - "learning_rate": 1.860653247295632e-05, - "loss": 1.0248, + "learning_rate": 1.714660029571706e-05, + "loss": 1.2123, "step": 6880 }, { - "epoch": 0.19498994020799681, + "epoch": 0.2692307692307692, "grad_norm": 0.0, - "learning_rate": 1.8606065106212682e-05, - "loss": 1.1519, + "learning_rate": 1.7145713845688455e-05, + "loss": 1.2596, "step": 6881 }, { - "epoch": 0.19501827765025928, + "epoch": 0.26926989592299866, "grad_norm": 0.0, - "learning_rate": 1.8605597666976964e-05, - "loss": 0.9545, + "learning_rate": 1.714482728090864e-05, + "loss": 1.0637, "step": 6882 }, { - "epoch": 0.19504661509252175, + "epoch": 0.2693090226152281, "grad_norm": 0.0, - "learning_rate": 1.86051301552531e-05, - "loss": 1.0517, + "learning_rate": 1.7143940601391854e-05, + "loss": 1.1932, "step": 6883 }, { - "epoch": 0.1950749525347842, + "epoch": 0.26934814930745754, "grad_norm": 0.0, - "learning_rate": 1.8604662571045033e-05, - "loss": 1.0422, + "learning_rate": 1.7143053807152332e-05, + "loss": 1.1756, "step": 6884 }, { - "epoch": 0.19510328997704668, + "epoch": 0.269387275999687, "grad_norm": 0.0, - "learning_rate": 1.8604194914356695e-05, - "loss": 0.9208, + "learning_rate": 1.714216689820432e-05, + "loss": 1.1169, "step": 6885 }, { - "epoch": 0.19513162741930914, + "epoch": 0.2694264026919164, "grad_norm": 0.0, - "learning_rate": 1.8603727185192028e-05, - "loss": 1.0304, + "learning_rate": 1.7141279874562054e-05, + "loss": 1.1486, "step": 6886 }, { - "epoch": 0.19515996486157158, + "epoch": 0.26946552938414586, "grad_norm": 0.0, - "learning_rate": 1.8603259383554973e-05, - "loss": 1.0767, + "learning_rate": 1.7140392736239785e-05, + "loss": 1.0921, "step": 6887 }, { - "epoch": 0.19518830230383405, + "epoch": 0.2695046560763753, "grad_norm": 0.0, - "learning_rate": 1.860279150944947e-05, - "loss": 0.9729, + "learning_rate": 1.713950548325175e-05, + "loss": 1.184, "step": 6888 }, { - "epoch": 0.19521663974609652, + "epoch": 0.26954378276860474, "grad_norm": 0.0, - "learning_rate": 1.8602323562879464e-05, - "loss": 0.982, + "learning_rate": 1.7138618115612206e-05, + "loss": 1.188, "step": 6889 }, { - "epoch": 0.19524497718835898, + "epoch": 0.2695829094608342, "grad_norm": 0.0, - "learning_rate": 1.8601855543848884e-05, - "loss": 1.0792, + "learning_rate": 1.7137730633335404e-05, + "loss": 1.178, "step": 6890 }, { - "epoch": 0.19527331463062145, + "epoch": 0.2696220361530636, "grad_norm": 0.0, - "learning_rate": 1.8601387452361685e-05, - "loss": 0.9579, + "learning_rate": 1.7136843036435586e-05, + "loss": 1.0706, "step": 6891 }, { - "epoch": 0.1953016520728839, + "epoch": 0.26966116284529307, "grad_norm": 0.0, - "learning_rate": 1.8600919288421805e-05, - "loss": 0.8808, + "learning_rate": 1.713595532492702e-05, + "loss": 1.1385, "step": 6892 }, { - "epoch": 0.19532998951514635, + "epoch": 0.2697002895375225, "grad_norm": 0.0, - "learning_rate": 1.8600451052033185e-05, - "loss": 0.9251, + "learning_rate": 1.7135067498823945e-05, + "loss": 1.1937, "step": 6893 }, { - "epoch": 0.19535832695740882, + "epoch": 0.26973941622975195, "grad_norm": 0.0, - "learning_rate": 1.8599982743199775e-05, - "loss": 1.0822, + "learning_rate": 1.713417955814063e-05, + "loss": 1.1218, "step": 6894 }, { - "epoch": 0.19538666439967128, + "epoch": 0.2697785429219814, "grad_norm": 0.0, - "learning_rate": 1.859951436192552e-05, - "loss": 1.0249, + "learning_rate": 1.713329150289133e-05, + "loss": 0.9317, "step": 6895 }, { - "epoch": 0.19541500184193375, + "epoch": 0.26981766961421083, "grad_norm": 0.0, - "learning_rate": 1.8599045908214356e-05, - "loss": 1.0796, + "learning_rate": 1.713240333309031e-05, + "loss": 1.1447, "step": 6896 }, { - "epoch": 0.19544333928419622, + "epoch": 0.26985679630644027, "grad_norm": 0.0, - "learning_rate": 1.859857738207024e-05, - "loss": 0.9997, + "learning_rate": 1.7131515048751826e-05, + "loss": 1.1415, "step": 6897 }, { - "epoch": 0.19547167672645868, + "epoch": 0.2698959229986697, "grad_norm": 0.0, - "learning_rate": 1.859810878349711e-05, - "loss": 0.8778, + "learning_rate": 1.7130626649890148e-05, + "loss": 1.0088, "step": 6898 }, { - "epoch": 0.19550001416872112, + "epoch": 0.26993504969089915, "grad_norm": 0.0, - "learning_rate": 1.8597640112498917e-05, - "loss": 0.9818, + "learning_rate": 1.7129738136519543e-05, + "loss": 1.1617, "step": 6899 }, { - "epoch": 0.1955283516109836, + "epoch": 0.2699741763831286, "grad_norm": 0.0, - "learning_rate": 1.859717136907961e-05, - "loss": 1.0465, + "learning_rate": 1.7128849508654278e-05, + "loss": 1.1238, "step": 6900 }, { - "epoch": 0.19555668905324605, + "epoch": 0.27001330307535804, "grad_norm": 0.0, - "learning_rate": 1.8596702553243137e-05, - "loss": 0.9363, + "learning_rate": 1.712796076630862e-05, + "loss": 1.1407, "step": 6901 }, { - "epoch": 0.19558502649550852, + "epoch": 0.2700524297675874, "grad_norm": 0.0, - "learning_rate": 1.8596233664993444e-05, - "loss": 1.019, + "learning_rate": 1.7127071909496844e-05, + "loss": 1.0309, "step": 6902 }, { - "epoch": 0.19561336393777098, + "epoch": 0.27009155645981686, "grad_norm": 0.0, - "learning_rate": 1.8595764704334486e-05, - "loss": 1.0063, + "learning_rate": 1.7126182938233228e-05, + "loss": 1.1661, "step": 6903 }, { - "epoch": 0.19564170138003345, + "epoch": 0.2701306831520463, "grad_norm": 0.0, - "learning_rate": 1.8595295671270203e-05, - "loss": 1.1426, + "learning_rate": 1.7125293852532035e-05, + "loss": 1.0834, "step": 6904 }, { - "epoch": 0.1956700388222959, + "epoch": 0.27016980984427574, "grad_norm": 0.0, - "learning_rate": 1.859482656580456e-05, - "loss": 0.9895, + "learning_rate": 1.712440465240756e-05, + "loss": 1.0251, "step": 6905 }, { - "epoch": 0.19569837626455835, + "epoch": 0.2702089365365052, "grad_norm": 0.0, - "learning_rate": 1.8594357387941498e-05, - "loss": 1.0516, + "learning_rate": 1.712351533787407e-05, + "loss": 1.1721, "step": 6906 }, { - "epoch": 0.19572671370682082, + "epoch": 0.2702480632287346, "grad_norm": 0.0, - "learning_rate": 1.859388813768497e-05, - "loss": 1.1113, + "learning_rate": 1.7122625908945848e-05, + "loss": 1.1094, "step": 6907 }, { - "epoch": 0.1957550511490833, + "epoch": 0.27028718992096407, "grad_norm": 0.0, - "learning_rate": 1.8593418815038937e-05, - "loss": 0.9952, + "learning_rate": 1.7121736365637182e-05, + "loss": 1.2315, "step": 6908 }, { - "epoch": 0.19578338859134575, + "epoch": 0.2703263166131935, "grad_norm": 0.0, - "learning_rate": 1.859294942000734e-05, - "loss": 0.95, + "learning_rate": 1.7120846707962355e-05, + "loss": 1.0275, "step": 6909 }, { - "epoch": 0.19581172603360822, + "epoch": 0.27036544330542295, "grad_norm": 0.0, - "learning_rate": 1.8592479952594145e-05, - "loss": 1.0372, + "learning_rate": 1.711995693593565e-05, + "loss": 1.0752, "step": 6910 }, { - "epoch": 0.19584006347587066, + "epoch": 0.2704045699976524, "grad_norm": 0.0, - "learning_rate": 1.8592010412803297e-05, - "loss": 0.9911, + "learning_rate": 1.711906704957136e-05, + "loss": 1.1559, "step": 6911 }, { - "epoch": 0.19586840091813312, + "epoch": 0.27044369668988183, "grad_norm": 0.0, - "learning_rate": 1.859154080063876e-05, - "loss": 0.975, + "learning_rate": 1.7118177048883774e-05, + "loss": 1.2339, "step": 6912 }, { - "epoch": 0.1958967383603956, + "epoch": 0.27048282338211127, "grad_norm": 0.0, - "learning_rate": 1.8591071116104476e-05, - "loss": 0.9304, + "learning_rate": 1.7117286933887182e-05, + "loss": 1.3002, "step": 6913 }, { - "epoch": 0.19592507580265806, + "epoch": 0.2705219500743407, "grad_norm": 0.0, - "learning_rate": 1.8590601359204417e-05, - "loss": 1.1116, + "learning_rate": 1.7116396704595883e-05, + "loss": 1.0801, "step": 6914 }, { - "epoch": 0.19595341324492052, + "epoch": 0.27056107676657015, "grad_norm": 0.0, - "learning_rate": 1.8590131529942526e-05, - "loss": 1.0267, + "learning_rate": 1.711550636102417e-05, + "loss": 1.0989, "step": 6915 }, { - "epoch": 0.195981750687183, + "epoch": 0.2706002034587996, "grad_norm": 0.0, - "learning_rate": 1.858966162832277e-05, - "loss": 1.0247, + "learning_rate": 1.711461590318634e-05, + "loss": 0.9985, "step": 6916 }, { - "epoch": 0.19601008812944543, + "epoch": 0.27063933015102903, "grad_norm": 0.0, - "learning_rate": 1.8589191654349107e-05, - "loss": 1.0288, + "learning_rate": 1.7113725331096692e-05, + "loss": 1.2605, "step": 6917 }, { - "epoch": 0.1960384255717079, + "epoch": 0.2706784568432585, "grad_norm": 0.0, - "learning_rate": 1.858872160802549e-05, - "loss": 1.0579, + "learning_rate": 1.7112834644769533e-05, + "loss": 1.1765, "step": 6918 }, { - "epoch": 0.19606676301397036, + "epoch": 0.2707175835354879, "grad_norm": 0.0, - "learning_rate": 1.8588251489355883e-05, - "loss": 0.9895, + "learning_rate": 1.711194384421916e-05, + "loss": 1.2146, "step": 6919 }, { - "epoch": 0.19609510045623282, + "epoch": 0.27075671022771736, "grad_norm": 0.0, - "learning_rate": 1.858778129834425e-05, - "loss": 1.0884, + "learning_rate": 1.7111052929459883e-05, + "loss": 1.1969, "step": 6920 }, { - "epoch": 0.1961234378984953, + "epoch": 0.2707958369199468, "grad_norm": 0.0, - "learning_rate": 1.8587311034994537e-05, - "loss": 1.0264, + "learning_rate": 1.7110161900506003e-05, + "loss": 1.0953, "step": 6921 }, { - "epoch": 0.19615177534075776, + "epoch": 0.27083496361217624, "grad_norm": 0.0, - "learning_rate": 1.858684069931072e-05, - "loss": 1.0261, + "learning_rate": 1.7109270757371833e-05, + "loss": 1.1827, "step": 6922 }, { - "epoch": 0.1961801127830202, + "epoch": 0.2708740903044057, "grad_norm": 0.0, - "learning_rate": 1.858637029129675e-05, - "loss": 0.9832, + "learning_rate": 1.7108379500071687e-05, + "loss": 1.1493, "step": 6923 }, { - "epoch": 0.19620845022528266, + "epoch": 0.2709132169966351, "grad_norm": 0.0, - "learning_rate": 1.85858998109566e-05, - "loss": 1.1477, + "learning_rate": 1.7107488128619868e-05, + "loss": 1.0132, "step": 6924 }, { - "epoch": 0.19623678766754513, + "epoch": 0.27095234368886456, "grad_norm": 0.0, - "learning_rate": 1.8585429258294226e-05, - "loss": 1.0325, + "learning_rate": 1.7106596643030702e-05, + "loss": 1.2065, "step": 6925 }, { - "epoch": 0.1962651251098076, + "epoch": 0.270991470381094, "grad_norm": 0.0, - "learning_rate": 1.858495863331359e-05, - "loss": 1.0367, + "learning_rate": 1.7105705043318493e-05, + "loss": 1.1652, "step": 6926 }, { - "epoch": 0.19629346255207006, + "epoch": 0.27103059707332344, "grad_norm": 0.0, - "learning_rate": 1.8584487936018663e-05, - "loss": 1.0294, + "learning_rate": 1.710481332949757e-05, + "loss": 1.039, "step": 6927 }, { - "epoch": 0.19632179999433252, + "epoch": 0.2710697237655529, "grad_norm": 0.0, - "learning_rate": 1.8584017166413406e-05, - "loss": 0.9694, + "learning_rate": 1.7103921501582243e-05, + "loss": 1.2018, "step": 6928 }, { - "epoch": 0.19635013743659496, + "epoch": 0.2711088504577823, "grad_norm": 0.0, - "learning_rate": 1.8583546324501783e-05, - "loss": 0.9823, + "learning_rate": 1.7103029559586843e-05, + "loss": 1.1505, "step": 6929 }, { - "epoch": 0.19637847487885743, + "epoch": 0.27114797715001177, "grad_norm": 0.0, - "learning_rate": 1.8583075410287764e-05, - "loss": 1.0675, + "learning_rate": 1.7102137503525686e-05, + "loss": 1.0786, "step": 6930 }, { - "epoch": 0.1964068123211199, + "epoch": 0.27118710384224115, "grad_norm": 0.0, - "learning_rate": 1.858260442377531e-05, - "loss": 0.8871, + "learning_rate": 1.7101245333413098e-05, + "loss": 1.2097, "step": 6931 }, { - "epoch": 0.19643514976338236, + "epoch": 0.2712262305344706, "grad_norm": 0.0, - "learning_rate": 1.8582133364968394e-05, - "loss": 1.0392, + "learning_rate": 1.710035304926341e-05, + "loss": 1.1417, "step": 6932 }, { - "epoch": 0.19646348720564483, + "epoch": 0.27126535722670003, "grad_norm": 0.0, - "learning_rate": 1.8581662233870985e-05, - "loss": 1.0851, + "learning_rate": 1.7099460651090952e-05, + "loss": 1.1229, "step": 6933 }, { - "epoch": 0.1964918246479073, + "epoch": 0.2713044839189295, "grad_norm": 0.0, - "learning_rate": 1.8581191030487046e-05, - "loss": 0.8895, + "learning_rate": 1.709856813891005e-05, + "loss": 1.1624, "step": 6934 }, { - "epoch": 0.19652016209016973, + "epoch": 0.2713436106111589, "grad_norm": 0.0, - "learning_rate": 1.8580719754820548e-05, - "loss": 1.0626, + "learning_rate": 1.7097675512735042e-05, + "loss": 1.1736, "step": 6935 }, { - "epoch": 0.1965484995324322, + "epoch": 0.27138273730338835, "grad_norm": 0.0, - "learning_rate": 1.858024840687546e-05, - "loss": 0.9939, + "learning_rate": 1.7096782772580255e-05, + "loss": 0.9431, "step": 6936 }, { - "epoch": 0.19657683697469466, + "epoch": 0.2714218639956178, "grad_norm": 0.0, - "learning_rate": 1.8579776986655753e-05, - "loss": 0.996, + "learning_rate": 1.709588991846003e-05, + "loss": 1.0508, "step": 6937 }, { - "epoch": 0.19660517441695713, + "epoch": 0.27146099068784724, "grad_norm": 0.0, - "learning_rate": 1.8579305494165402e-05, - "loss": 0.9833, + "learning_rate": 1.7094996950388704e-05, + "loss": 1.2195, "step": 6938 }, { - "epoch": 0.1966335118592196, + "epoch": 0.2715001173800767, "grad_norm": 0.0, - "learning_rate": 1.857883392940837e-05, - "loss": 0.9805, + "learning_rate": 1.7094103868380618e-05, + "loss": 1.2393, "step": 6939 }, { - "epoch": 0.19666184930148206, + "epoch": 0.2715392440723061, "grad_norm": 0.0, - "learning_rate": 1.857836229238864e-05, - "loss": 1.0186, + "learning_rate": 1.7093210672450114e-05, + "loss": 1.1552, "step": 6940 }, { - "epoch": 0.1966901867437445, + "epoch": 0.27157837076453556, "grad_norm": 0.0, - "learning_rate": 1.8577890583110173e-05, - "loss": 1.015, + "learning_rate": 1.7092317362611537e-05, + "loss": 1.2435, "step": 6941 }, { - "epoch": 0.19671852418600697, + "epoch": 0.271617497456765, "grad_norm": 0.0, - "learning_rate": 1.8577418801576953e-05, - "loss": 1.1379, + "learning_rate": 1.7091423938879227e-05, + "loss": 1.1109, "step": 6942 }, { - "epoch": 0.19674686162826943, + "epoch": 0.27165662414899444, "grad_norm": 0.0, - "learning_rate": 1.857694694779295e-05, - "loss": 1.0238, + "learning_rate": 1.7090530401267534e-05, + "loss": 1.1354, "step": 6943 }, { - "epoch": 0.1967751990705319, + "epoch": 0.2716957508412239, "grad_norm": 0.0, - "learning_rate": 1.8576475021762132e-05, - "loss": 0.9608, + "learning_rate": 1.7089636749790812e-05, + "loss": 1.1753, "step": 6944 }, { - "epoch": 0.19680353651279436, + "epoch": 0.2717348775334533, "grad_norm": 0.0, - "learning_rate": 1.8576003023488486e-05, - "loss": 1.0286, + "learning_rate": 1.7088742984463405e-05, + "loss": 1.0514, "step": 6945 }, { - "epoch": 0.19683187395505683, + "epoch": 0.27177400422568276, "grad_norm": 0.0, - "learning_rate": 1.8575530952975977e-05, - "loss": 1.1588, + "learning_rate": 1.708784910529967e-05, + "loss": 1.1071, "step": 6946 }, { - "epoch": 0.19686021139731927, + "epoch": 0.2718131309179122, "grad_norm": 0.0, - "learning_rate": 1.857505881022859e-05, - "loss": 1.0565, + "learning_rate": 1.7086955112313958e-05, + "loss": 1.067, "step": 6947 }, { - "epoch": 0.19688854883958173, + "epoch": 0.27185225761014165, "grad_norm": 0.0, - "learning_rate": 1.8574586595250298e-05, - "loss": 1.0106, + "learning_rate": 1.7086061005520628e-05, + "loss": 1.1378, "step": 6948 }, { - "epoch": 0.1969168862818442, + "epoch": 0.2718913843023711, "grad_norm": 0.0, - "learning_rate": 1.8574114308045077e-05, - "loss": 1.0414, + "learning_rate": 1.708516678493404e-05, + "loss": 1.1965, "step": 6949 }, { - "epoch": 0.19694522372410667, + "epoch": 0.2719305109946005, "grad_norm": 0.0, - "learning_rate": 1.857364194861691e-05, - "loss": 1.0355, + "learning_rate": 1.7084272450568543e-05, + "loss": 1.077, "step": 6950 }, { - "epoch": 0.19697356116636913, + "epoch": 0.27196963768682997, "grad_norm": 0.0, - "learning_rate": 1.857316951696977e-05, - "loss": 1.0084, + "learning_rate": 1.7083378002438516e-05, + "loss": 1.2202, "step": 6951 }, { - "epoch": 0.1970018986086316, + "epoch": 0.2720087643790594, "grad_norm": 0.0, - "learning_rate": 1.8572697013107643e-05, - "loss": 0.9507, + "learning_rate": 1.7082483440558314e-05, + "loss": 1.1353, "step": 6952 }, { - "epoch": 0.19703023605089404, + "epoch": 0.27204789107128885, "grad_norm": 0.0, - "learning_rate": 1.8572224437034503e-05, - "loss": 1.0133, + "learning_rate": 1.7081588764942298e-05, + "loss": 1.1027, "step": 6953 }, { - "epoch": 0.1970585734931565, + "epoch": 0.2720870177635183, "grad_norm": 0.0, - "learning_rate": 1.8571751788754336e-05, - "loss": 0.9959, + "learning_rate": 1.7080693975604842e-05, + "loss": 1.1512, "step": 6954 }, { - "epoch": 0.19708691093541897, + "epoch": 0.27212614445574773, "grad_norm": 0.0, - "learning_rate": 1.857127906827112e-05, - "loss": 1.0666, + "learning_rate": 1.7079799072560318e-05, + "loss": 1.0848, "step": 6955 }, { - "epoch": 0.19711524837768143, + "epoch": 0.2721652711479772, "grad_norm": 0.0, - "learning_rate": 1.8570806275588832e-05, - "loss": 0.9408, + "learning_rate": 1.7078904055823087e-05, + "loss": 1.2422, "step": 6956 }, { - "epoch": 0.1971435858199439, + "epoch": 0.2722043978402066, "grad_norm": 0.0, - "learning_rate": 1.8570333410711464e-05, - "loss": 1.063, + "learning_rate": 1.7078008925407527e-05, + "loss": 1.1702, "step": 6957 }, { - "epoch": 0.19717192326220637, + "epoch": 0.27224352453243605, "grad_norm": 0.0, - "learning_rate": 1.8569860473642996e-05, - "loss": 1.1077, + "learning_rate": 1.7077113681328016e-05, + "loss": 1.2685, "step": 6958 }, { - "epoch": 0.1972002607044688, + "epoch": 0.27228265122466544, "grad_norm": 0.0, - "learning_rate": 1.8569387464387412e-05, - "loss": 1.0207, + "learning_rate": 1.7076218323598926e-05, + "loss": 1.1698, "step": 6959 }, { - "epoch": 0.19722859814673127, + "epoch": 0.2723217779168949, "grad_norm": 0.0, - "learning_rate": 1.8568914382948694e-05, - "loss": 0.9679, + "learning_rate": 1.7075322852234637e-05, + "loss": 1.2643, "step": 6960 }, { - "epoch": 0.19725693558899374, + "epoch": 0.2723609046091243, "grad_norm": 0.0, - "learning_rate": 1.856844122933083e-05, - "loss": 1.027, + "learning_rate": 1.7074427267249528e-05, + "loss": 1.1145, "step": 6961 }, { - "epoch": 0.1972852730312562, + "epoch": 0.27240003130135376, "grad_norm": 0.0, - "learning_rate": 1.85679680035378e-05, - "loss": 0.9443, + "learning_rate": 1.707353156865798e-05, + "loss": 1.1112, "step": 6962 }, { - "epoch": 0.19731361047351867, + "epoch": 0.2724391579935832, "grad_norm": 0.0, - "learning_rate": 1.8567494705573595e-05, - "loss": 1.0526, + "learning_rate": 1.7072635756474384e-05, + "loss": 1.1048, "step": 6963 }, { - "epoch": 0.19734194791578114, + "epoch": 0.27247828468581264, "grad_norm": 0.0, - "learning_rate": 1.8567021335442202e-05, - "loss": 1.0328, + "learning_rate": 1.7071739830713117e-05, + "loss": 1.1045, "step": 6964 }, { - "epoch": 0.19737028535804357, + "epoch": 0.2725174113780421, "grad_norm": 0.0, - "learning_rate": 1.8566547893147607e-05, - "loss": 0.9847, + "learning_rate": 1.7070843791388568e-05, + "loss": 1.1449, "step": 6965 }, { - "epoch": 0.19739862280030604, + "epoch": 0.2725565380702715, "grad_norm": 0.0, - "learning_rate": 1.8566074378693795e-05, - "loss": 0.9215, + "learning_rate": 1.7069947638515132e-05, + "loss": 1.0226, "step": 6966 }, { - "epoch": 0.1974269602425685, + "epoch": 0.27259566476250097, "grad_norm": 0.0, - "learning_rate": 1.856560079208476e-05, - "loss": 1.0582, + "learning_rate": 1.7069051372107193e-05, + "loss": 1.0793, "step": 6967 }, { - "epoch": 0.19745529768483097, + "epoch": 0.2726347914547304, "grad_norm": 0.0, - "learning_rate": 1.8565127133324487e-05, - "loss": 1.0391, + "learning_rate": 1.706815499217915e-05, + "loss": 1.1205, "step": 6968 }, { - "epoch": 0.19748363512709344, + "epoch": 0.27267391814695985, "grad_norm": 0.0, - "learning_rate": 1.856465340241697e-05, - "loss": 1.0236, + "learning_rate": 1.7067258498745393e-05, + "loss": 1.2505, "step": 6969 }, { - "epoch": 0.1975119725693559, + "epoch": 0.2727130448391893, "grad_norm": 0.0, - "learning_rate": 1.8564179599366195e-05, - "loss": 0.9909, + "learning_rate": 1.706636189182032e-05, + "loss": 1.1732, "step": 6970 }, { - "epoch": 0.19754031001161834, + "epoch": 0.27275217153141873, "grad_norm": 0.0, - "learning_rate": 1.8563705724176158e-05, - "loss": 0.877, + "learning_rate": 1.706546517141833e-05, + "loss": 1.0059, "step": 6971 }, { - "epoch": 0.1975686474538808, + "epoch": 0.27279129822364817, "grad_norm": 0.0, - "learning_rate": 1.8563231776850843e-05, - "loss": 0.9911, + "learning_rate": 1.706456833755382e-05, + "loss": 1.0602, "step": 6972 }, { - "epoch": 0.19759698489614327, + "epoch": 0.2728304249158776, "grad_norm": 0.0, - "learning_rate": 1.856275775739425e-05, - "loss": 1.093, + "learning_rate": 1.70636713902412e-05, + "loss": 1.127, "step": 6973 }, { - "epoch": 0.19762532233840574, + "epoch": 0.27286955160810705, "grad_norm": 0.0, - "learning_rate": 1.856228366581037e-05, - "loss": 0.8883, + "learning_rate": 1.7062774329494865e-05, + "loss": 1.1792, "step": 6974 }, { - "epoch": 0.1976536597806682, + "epoch": 0.2729086783003365, "grad_norm": 0.0, - "learning_rate": 1.856180950210319e-05, - "loss": 0.9356, + "learning_rate": 1.7061877155329224e-05, + "loss": 1.1074, "step": 6975 }, { - "epoch": 0.19768199722293067, + "epoch": 0.27294780499256593, "grad_norm": 0.0, - "learning_rate": 1.8561335266276713e-05, - "loss": 1.0343, + "learning_rate": 1.7060979867758685e-05, + "loss": 1.1655, "step": 6976 }, { - "epoch": 0.1977103346651931, + "epoch": 0.2729869316847954, "grad_norm": 0.0, - "learning_rate": 1.856086095833493e-05, - "loss": 0.981, + "learning_rate": 1.7060082466797662e-05, + "loss": 1.0202, "step": 6977 }, { - "epoch": 0.19773867210745558, + "epoch": 0.2730260583770248, "grad_norm": 0.0, - "learning_rate": 1.8560386578281835e-05, - "loss": 0.8764, + "learning_rate": 1.705918495246056e-05, + "loss": 1.1165, "step": 6978 }, { - "epoch": 0.19776700954971804, + "epoch": 0.27306518506925426, "grad_norm": 0.0, - "learning_rate": 1.8559912126121428e-05, - "loss": 0.8153, + "learning_rate": 1.705828732476179e-05, + "loss": 0.9697, "step": 6979 }, { - "epoch": 0.1977953469919805, + "epoch": 0.2731043117614837, "grad_norm": 0.0, - "learning_rate": 1.85594376018577e-05, - "loss": 0.9361, + "learning_rate": 1.705738958371577e-05, + "loss": 1.133, "step": 6980 }, { - "epoch": 0.19782368443424297, + "epoch": 0.27314343845371314, "grad_norm": 0.0, - "learning_rate": 1.855896300549465e-05, - "loss": 1.0044, + "learning_rate": 1.7056491729336917e-05, + "loss": 1.0978, "step": 6981 }, { - "epoch": 0.1978520218765054, + "epoch": 0.2731825651459426, "grad_norm": 0.0, - "learning_rate": 1.855848833703628e-05, - "loss": 0.9753, + "learning_rate": 1.7055593761639653e-05, + "loss": 1.1201, "step": 6982 }, { - "epoch": 0.19788035931876788, + "epoch": 0.273221691838172, "grad_norm": 0.0, - "learning_rate": 1.8558013596486578e-05, - "loss": 1.1175, + "learning_rate": 1.705469568063839e-05, + "loss": 1.1237, "step": 6983 }, { - "epoch": 0.19790869676103034, + "epoch": 0.27326081853040146, "grad_norm": 0.0, - "learning_rate": 1.8557538783849555e-05, - "loss": 1.1322, + "learning_rate": 1.705379748634756e-05, + "loss": 1.0771, "step": 6984 }, { - "epoch": 0.1979370342032928, + "epoch": 0.2732999452226309, "grad_norm": 0.0, - "learning_rate": 1.8557063899129205e-05, - "loss": 0.9397, + "learning_rate": 1.7052899178781575e-05, + "loss": 1.1635, "step": 6985 }, { - "epoch": 0.19796537164555528, + "epoch": 0.27333907191486034, "grad_norm": 0.0, - "learning_rate": 1.8556588942329522e-05, - "loss": 1.0768, + "learning_rate": 1.705200075795487e-05, + "loss": 1.1343, "step": 6986 }, { - "epoch": 0.19799370908781774, + "epoch": 0.2733781986070898, "grad_norm": 0.0, - "learning_rate": 1.855611391345452e-05, - "loss": 1.0821, + "learning_rate": 1.705110222388187e-05, + "loss": 1.0273, "step": 6987 }, { - "epoch": 0.19802204653008018, + "epoch": 0.27341732529931917, "grad_norm": 0.0, - "learning_rate": 1.855563881250819e-05, - "loss": 1.056, + "learning_rate": 1.7050203576577e-05, + "loss": 1.0114, "step": 6988 }, { - "epoch": 0.19805038397234265, + "epoch": 0.2734564519915486, "grad_norm": 0.0, - "learning_rate": 1.8555163639494537e-05, - "loss": 1.0912, + "learning_rate": 1.70493048160547e-05, + "loss": 1.1075, "step": 6989 }, { - "epoch": 0.1980787214146051, + "epoch": 0.27349557868377805, "grad_norm": 0.0, - "learning_rate": 1.8554688394417566e-05, - "loss": 1.092, + "learning_rate": 1.7048405942329393e-05, + "loss": 1.1074, "step": 6990 }, { - "epoch": 0.19810705885686758, + "epoch": 0.2735347053760075, "grad_norm": 0.0, - "learning_rate": 1.8554213077281275e-05, - "loss": 0.8805, + "learning_rate": 1.704750695541552e-05, + "loss": 1.1593, "step": 6991 }, { - "epoch": 0.19813539629913005, + "epoch": 0.27357383206823693, "grad_norm": 0.0, - "learning_rate": 1.8553737688089674e-05, - "loss": 1.0039, + "learning_rate": 1.704660785532752e-05, + "loss": 1.0842, "step": 6992 }, { - "epoch": 0.1981637337413925, + "epoch": 0.2736129587604664, "grad_norm": 0.0, - "learning_rate": 1.8553262226846763e-05, - "loss": 1.1237, + "learning_rate": 1.7045708642079824e-05, + "loss": 0.9985, "step": 6993 }, { - "epoch": 0.19819207118365495, + "epoch": 0.2736520854526958, "grad_norm": 0.0, - "learning_rate": 1.855278669355655e-05, - "loss": 1.0616, + "learning_rate": 1.704480931568688e-05, + "loss": 1.2453, "step": 6994 }, { - "epoch": 0.19822040862591742, + "epoch": 0.27369121214492526, "grad_norm": 0.0, - "learning_rate": 1.855231108822303e-05, - "loss": 0.971, + "learning_rate": 1.704390987616312e-05, + "loss": 1.051, "step": 6995 }, { - "epoch": 0.19824874606817988, + "epoch": 0.2737303388371547, "grad_norm": 0.0, - "learning_rate": 1.8551835410850227e-05, - "loss": 1.0582, + "learning_rate": 1.7043010323522998e-05, + "loss": 1.3528, "step": 6996 }, { - "epoch": 0.19827708351044235, + "epoch": 0.27376946552938414, "grad_norm": 0.0, - "learning_rate": 1.8551359661442134e-05, - "loss": 0.9644, + "learning_rate": 1.7042110657780953e-05, + "loss": 1.1677, "step": 6997 }, { - "epoch": 0.19830542095270481, + "epoch": 0.2738085922216136, "grad_norm": 0.0, - "learning_rate": 1.8550883840002766e-05, - "loss": 0.9508, + "learning_rate": 1.704121087895144e-05, + "loss": 1.1386, "step": 6998 }, { - "epoch": 0.19833375839496728, + "epoch": 0.273847718913843, "grad_norm": 0.0, - "learning_rate": 1.8550407946536127e-05, - "loss": 1.038, + "learning_rate": 1.7040310987048897e-05, + "loss": 1.1561, "step": 6999 }, { - "epoch": 0.19836209583722972, + "epoch": 0.27388684560607246, "grad_norm": 0.0, - "learning_rate": 1.8549931981046226e-05, - "loss": 1.0116, + "learning_rate": 1.7039410982087786e-05, + "loss": 1.1029, "step": 7000 }, { - "epoch": 0.19839043327949218, + "epoch": 0.2739259722983019, "grad_norm": 0.0, - "learning_rate": 1.8549455943537077e-05, - "loss": 0.9402, + "learning_rate": 1.7038510864082555e-05, + "loss": 1.2509, "step": 7001 }, { - "epoch": 0.19841877072175465, + "epoch": 0.27396509899053134, "grad_norm": 0.0, - "learning_rate": 1.854897983401268e-05, - "loss": 1.0445, + "learning_rate": 1.703761063304766e-05, + "loss": 1.1747, "step": 7002 }, { - "epoch": 0.19844710816401712, + "epoch": 0.2740042256827608, "grad_norm": 0.0, - "learning_rate": 1.8548503652477054e-05, - "loss": 1.0131, + "learning_rate": 1.7036710288997555e-05, + "loss": 1.2322, "step": 7003 }, { - "epoch": 0.19847544560627958, + "epoch": 0.2740433523749902, "grad_norm": 0.0, - "learning_rate": 1.854802739893421e-05, - "loss": 0.9049, + "learning_rate": 1.70358098319467e-05, + "loss": 1.1766, "step": 7004 }, { - "epoch": 0.19850378304854205, + "epoch": 0.27408247906721966, "grad_norm": 0.0, - "learning_rate": 1.8547551073388152e-05, - "loss": 0.955, + "learning_rate": 1.7034909261909556e-05, + "loss": 1.1671, "step": 7005 }, { - "epoch": 0.1985321204908045, + "epoch": 0.2741216057594491, "grad_norm": 0.0, - "learning_rate": 1.85470746758429e-05, - "loss": 1.031, + "learning_rate": 1.7034008578900584e-05, + "loss": 1.1711, "step": 7006 }, { - "epoch": 0.19856045793306695, + "epoch": 0.27416073245167855, "grad_norm": 0.0, - "learning_rate": 1.854659820630246e-05, - "loss": 0.9322, + "learning_rate": 1.703310778293425e-05, + "loss": 1.0688, "step": 7007 }, { - "epoch": 0.19858879537532942, + "epoch": 0.274199859143908, "grad_norm": 0.0, - "learning_rate": 1.8546121664770857e-05, - "loss": 0.9903, + "learning_rate": 1.7032206874025017e-05, + "loss": 1.0698, "step": 7008 }, { - "epoch": 0.19861713281759188, + "epoch": 0.2742389858361374, "grad_norm": 0.0, - "learning_rate": 1.8545645051252094e-05, - "loss": 1.0319, + "learning_rate": 1.703130585218735e-05, + "loss": 1.0759, "step": 7009 }, { - "epoch": 0.19864547025985435, + "epoch": 0.27427811252836687, "grad_norm": 0.0, - "learning_rate": 1.8545168365750188e-05, - "loss": 1.0678, + "learning_rate": 1.703040471743573e-05, + "loss": 1.0925, "step": 7010 }, { - "epoch": 0.19867380770211682, + "epoch": 0.2743172392205963, "grad_norm": 0.0, - "learning_rate": 1.8544691608269156e-05, - "loss": 1.0032, + "learning_rate": 1.7029503469784613e-05, + "loss": 1.176, "step": 7011 }, { - "epoch": 0.19870214514437926, + "epoch": 0.27435636591282575, "grad_norm": 0.0, - "learning_rate": 1.8544214778813018e-05, - "loss": 0.9551, + "learning_rate": 1.7028602109248484e-05, + "loss": 1.0603, "step": 7012 }, { - "epoch": 0.19873048258664172, + "epoch": 0.2743954926050552, "grad_norm": 0.0, - "learning_rate": 1.8543737877385778e-05, - "loss": 1.0255, + "learning_rate": 1.702770063584181e-05, + "loss": 1.0674, "step": 7013 }, { - "epoch": 0.1987588200289042, + "epoch": 0.27443461929728463, "grad_norm": 0.0, - "learning_rate": 1.8543260903991467e-05, - "loss": 0.9578, + "learning_rate": 1.7026799049579063e-05, + "loss": 1.1623, "step": 7014 }, { - "epoch": 0.19878715747116665, + "epoch": 0.2744737459895141, "grad_norm": 0.0, - "learning_rate": 1.85427838586341e-05, - "loss": 1.0775, + "learning_rate": 1.702589735047474e-05, + "loss": 1.0362, "step": 7015 }, { - "epoch": 0.19881549491342912, + "epoch": 0.27451287268174346, "grad_norm": 0.0, - "learning_rate": 1.8542306741317686e-05, - "loss": 1.0111, + "learning_rate": 1.70249955385433e-05, + "loss": 1.1394, "step": 7016 }, { - "epoch": 0.19884383235569159, + "epoch": 0.2745519993739729, "grad_norm": 0.0, - "learning_rate": 1.854182955204625e-05, - "loss": 1.1158, + "learning_rate": 1.702409361379924e-05, + "loss": 1.0408, "step": 7017 }, { - "epoch": 0.19887216979795402, + "epoch": 0.27459112606620234, "grad_norm": 0.0, - "learning_rate": 1.8541352290823816e-05, - "loss": 1.1788, + "learning_rate": 1.7023191576257038e-05, + "loss": 0.9655, "step": 7018 }, { - "epoch": 0.1989005072402165, + "epoch": 0.2746302527584318, "grad_norm": 0.0, - "learning_rate": 1.85408749576544e-05, - "loss": 0.9448, + "learning_rate": 1.7022289425931176e-05, + "loss": 1.13, "step": 7019 }, { - "epoch": 0.19892884468247896, + "epoch": 0.2746693794506612, "grad_norm": 0.0, - "learning_rate": 1.854039755254202e-05, - "loss": 0.9776, + "learning_rate": 1.702138716283615e-05, + "loss": 1.0302, "step": 7020 }, { - "epoch": 0.19895718212474142, + "epoch": 0.27470850614289066, "grad_norm": 0.0, - "learning_rate": 1.85399200754907e-05, - "loss": 0.9149, + "learning_rate": 1.702048478698644e-05, + "loss": 1.1376, "step": 7021 }, { - "epoch": 0.1989855195670039, + "epoch": 0.2747476328351201, "grad_norm": 0.0, - "learning_rate": 1.8539442526504457e-05, - "loss": 0.9828, + "learning_rate": 1.7019582298396544e-05, + "loss": 1.0327, "step": 7022 }, { - "epoch": 0.19901385700926635, + "epoch": 0.27478675952734954, "grad_norm": 0.0, - "learning_rate": 1.8538964905587327e-05, - "loss": 1.1036, + "learning_rate": 1.7018679697080952e-05, + "loss": 1.0997, "step": 7023 }, { - "epoch": 0.1990421944515288, + "epoch": 0.274825886219579, "grad_norm": 0.0, - "learning_rate": 1.8538487212743322e-05, - "loss": 1.0492, + "learning_rate": 1.701777698305416e-05, + "loss": 1.1218, "step": 7024 }, { - "epoch": 0.19907053189379126, + "epoch": 0.2748650129118084, "grad_norm": 0.0, - "learning_rate": 1.8538009447976467e-05, - "loss": 1.0533, + "learning_rate": 1.701687415633066e-05, + "loss": 1.1749, "step": 7025 }, { - "epoch": 0.19909886933605372, + "epoch": 0.27490413960403787, "grad_norm": 0.0, - "learning_rate": 1.853753161129079e-05, - "loss": 1.1261, + "learning_rate": 1.7015971216924957e-05, + "loss": 1.0305, "step": 7026 }, { - "epoch": 0.1991272067783162, + "epoch": 0.2749432662962673, "grad_norm": 0.0, - "learning_rate": 1.8537053702690314e-05, - "loss": 0.9309, + "learning_rate": 1.701506816485155e-05, + "loss": 1.125, "step": 7027 }, { - "epoch": 0.19915554422057866, + "epoch": 0.27498239298849675, "grad_norm": 0.0, - "learning_rate": 1.853657572217906e-05, - "loss": 0.9391, + "learning_rate": 1.7014165000124932e-05, + "loss": 1.2236, "step": 7028 }, { - "epoch": 0.19918388166284112, + "epoch": 0.2750215196807262, "grad_norm": 0.0, - "learning_rate": 1.8536097669761066e-05, - "loss": 0.8529, + "learning_rate": 1.701326172275962e-05, + "loss": 1.2162, "step": 7029 }, { - "epoch": 0.19921221910510356, + "epoch": 0.27506064637295563, "grad_norm": 0.0, - "learning_rate": 1.8535619545440345e-05, - "loss": 1.1038, + "learning_rate": 1.7012358332770105e-05, + "loss": 1.1615, "step": 7030 }, { - "epoch": 0.19924055654736603, + "epoch": 0.27509977306518507, "grad_norm": 0.0, - "learning_rate": 1.8535141349220937e-05, - "loss": 1.0213, + "learning_rate": 1.701145483017091e-05, + "loss": 1.1685, "step": 7031 }, { - "epoch": 0.1992688939896285, + "epoch": 0.2751388997574145, "grad_norm": 0.0, - "learning_rate": 1.853466308110686e-05, - "loss": 0.9362, + "learning_rate": 1.701055121497653e-05, + "loss": 1.2342, "step": 7032 }, { - "epoch": 0.19929723143189096, + "epoch": 0.27517802644964395, "grad_norm": 0.0, - "learning_rate": 1.853418474110215e-05, - "loss": 1.033, + "learning_rate": 1.7009647487201492e-05, + "loss": 1.1956, "step": 7033 }, { - "epoch": 0.19932556887415342, + "epoch": 0.2752171531418734, "grad_norm": 0.0, - "learning_rate": 1.853370632921083e-05, - "loss": 0.9853, + "learning_rate": 1.7008743646860288e-05, + "loss": 1.1354, "step": 7034 }, { - "epoch": 0.1993539063164159, + "epoch": 0.27525627983410283, "grad_norm": 0.0, - "learning_rate": 1.8533227845436932e-05, - "loss": 0.8502, + "learning_rate": 1.7007839693967446e-05, + "loss": 1.1672, "step": 7035 }, { - "epoch": 0.19938224375867833, + "epoch": 0.2752954065263323, "grad_norm": 0.0, - "learning_rate": 1.853274928978449e-05, - "loss": 0.9072, + "learning_rate": 1.7006935628537485e-05, + "loss": 1.1076, "step": 7036 }, { - "epoch": 0.1994105812009408, + "epoch": 0.2753345332185617, "grad_norm": 0.0, - "learning_rate": 1.8532270662257528e-05, - "loss": 1.0056, + "learning_rate": 1.7006031450584913e-05, + "loss": 1.1104, "step": 7037 }, { - "epoch": 0.19943891864320326, + "epoch": 0.27537365991079116, "grad_norm": 0.0, - "learning_rate": 1.8531791962860084e-05, - "loss": 0.9077, + "learning_rate": 1.700512716012426e-05, + "loss": 1.1495, "step": 7038 }, { - "epoch": 0.19946725608546573, + "epoch": 0.2754127866030206, "grad_norm": 0.0, - "learning_rate": 1.853131319159619e-05, - "loss": 1.0266, + "learning_rate": 1.700422275717004e-05, + "loss": 1.0091, "step": 7039 }, { - "epoch": 0.1994955935277282, + "epoch": 0.27545191329525004, "grad_norm": 0.0, - "learning_rate": 1.853083434846987e-05, - "loss": 1.0316, + "learning_rate": 1.7003318241736775e-05, + "loss": 1.13, "step": 7040 }, { - "epoch": 0.19952393096999066, + "epoch": 0.2754910399874795, "grad_norm": 0.0, - "learning_rate": 1.8530355433485172e-05, - "loss": 1.0244, + "learning_rate": 1.7002413613838997e-05, + "loss": 1.1443, "step": 7041 }, { - "epoch": 0.1995522684122531, + "epoch": 0.2755301666797089, "grad_norm": 0.0, - "learning_rate": 1.8529876446646122e-05, - "loss": 1.006, + "learning_rate": 1.7001508873491236e-05, + "loss": 1.1266, "step": 7042 }, { - "epoch": 0.19958060585451556, + "epoch": 0.27556929337193836, "grad_norm": 0.0, - "learning_rate": 1.852939738795675e-05, - "loss": 0.975, + "learning_rate": 1.700060402070801e-05, + "loss": 1.2426, "step": 7043 }, { - "epoch": 0.19960894329677803, + "epoch": 0.2756084200641678, "grad_norm": 0.0, - "learning_rate": 1.85289182574211e-05, - "loss": 0.9638, + "learning_rate": 1.6999699055503856e-05, + "loss": 1.1136, "step": 7044 }, { - "epoch": 0.1996372807390405, + "epoch": 0.2756475467563972, "grad_norm": 0.0, - "learning_rate": 1.8528439055043207e-05, - "loss": 1.097, + "learning_rate": 1.6998793977893312e-05, + "loss": 1.0602, "step": 7045 }, { - "epoch": 0.19966561818130296, + "epoch": 0.27568667344862663, "grad_norm": 0.0, - "learning_rate": 1.85279597808271e-05, - "loss": 1.0722, + "learning_rate": 1.69978887878909e-05, + "loss": 1.1055, "step": 7046 }, { - "epoch": 0.19969395562356543, + "epoch": 0.27572580014085607, "grad_norm": 0.0, - "learning_rate": 1.8527480434776825e-05, - "loss": 1.0922, + "learning_rate": 1.6996983485511164e-05, + "loss": 1.166, "step": 7047 }, { - "epoch": 0.19972229306582787, + "epoch": 0.2757649268330855, "grad_norm": 0.0, - "learning_rate": 1.8527001016896413e-05, - "loss": 1.0672, + "learning_rate": 1.6996078070768642e-05, + "loss": 1.2081, "step": 7048 }, { - "epoch": 0.19975063050809033, + "epoch": 0.27580405352531495, "grad_norm": 0.0, - "learning_rate": 1.8526521527189905e-05, - "loss": 0.9174, + "learning_rate": 1.6995172543677875e-05, + "loss": 1.0446, "step": 7049 }, { - "epoch": 0.1997789679503528, + "epoch": 0.2758431802175444, "grad_norm": 0.0, - "learning_rate": 1.8526041965661342e-05, - "loss": 1.0066, + "learning_rate": 1.69942669042534e-05, + "loss": 1.0399, "step": 7050 }, { - "epoch": 0.19980730539261526, + "epoch": 0.27588230690977383, "grad_norm": 0.0, - "learning_rate": 1.852556233231476e-05, - "loss": 1.0031, + "learning_rate": 1.6993361152509762e-05, + "loss": 1.0946, "step": 7051 }, { - "epoch": 0.19983564283487773, + "epoch": 0.2759214336020033, "grad_norm": 0.0, - "learning_rate": 1.85250826271542e-05, - "loss": 0.9253, + "learning_rate": 1.699245528846151e-05, + "loss": 1.1807, "step": 7052 }, { - "epoch": 0.1998639802771402, + "epoch": 0.2759605602942327, "grad_norm": 0.0, - "learning_rate": 1.85246028501837e-05, - "loss": 0.9588, + "learning_rate": 1.6991549312123187e-05, + "loss": 1.1884, "step": 7053 }, { - "epoch": 0.19989231771940263, + "epoch": 0.27599968698646216, "grad_norm": 0.0, - "learning_rate": 1.8524123001407312e-05, - "loss": 1.0711, + "learning_rate": 1.6990643223509342e-05, + "loss": 1.2018, "step": 7054 }, { - "epoch": 0.1999206551616651, + "epoch": 0.2760388136786916, "grad_norm": 0.0, - "learning_rate": 1.8523643080829065e-05, - "loss": 1.0231, + "learning_rate": 1.698973702263453e-05, + "loss": 1.0782, "step": 7055 }, { - "epoch": 0.19994899260392757, + "epoch": 0.27607794037092104, "grad_norm": 0.0, - "learning_rate": 1.8523163088453013e-05, - "loss": 1.0964, + "learning_rate": 1.6988830709513294e-05, + "loss": 0.9938, "step": 7056 }, { - "epoch": 0.19997733004619003, + "epoch": 0.2761170670631505, "grad_norm": 0.0, - "learning_rate": 1.852268302428319e-05, - "loss": 1.0675, + "learning_rate": 1.6987924284160197e-05, + "loss": 1.0971, "step": 7057 }, { - "epoch": 0.2000056674884525, + "epoch": 0.2761561937553799, "grad_norm": 0.0, - "learning_rate": 1.852220288832364e-05, - "loss": 0.9937, + "learning_rate": 1.6987017746589797e-05, + "loss": 1.0402, "step": 7058 }, { - "epoch": 0.20003400493071496, + "epoch": 0.27619532044760936, "grad_norm": 0.0, - "learning_rate": 1.8521722680578413e-05, - "loss": 1.0751, + "learning_rate": 1.698611109681664e-05, + "loss": 1.1567, "step": 7059 }, { - "epoch": 0.2000623423729774, + "epoch": 0.2762344471398388, "grad_norm": 0.0, - "learning_rate": 1.8521242401051554e-05, - "loss": 1.0281, + "learning_rate": 1.6985204334855298e-05, + "loss": 1.1089, "step": 7060 }, { - "epoch": 0.20009067981523987, + "epoch": 0.27627357383206824, "grad_norm": 0.0, - "learning_rate": 1.8520762049747102e-05, - "loss": 0.9973, + "learning_rate": 1.6984297460720323e-05, + "loss": 1.1685, "step": 7061 }, { - "epoch": 0.20011901725750234, + "epoch": 0.2763127005242977, "grad_norm": 0.0, - "learning_rate": 1.852028162666911e-05, - "loss": 0.9827, + "learning_rate": 1.6983390474426284e-05, + "loss": 1.1534, "step": 7062 }, { - "epoch": 0.2001473546997648, + "epoch": 0.2763518272165271, "grad_norm": 0.0, - "learning_rate": 1.851980113182162e-05, - "loss": 0.9271, + "learning_rate": 1.6982483375987746e-05, + "loss": 1.0626, "step": 7063 }, { - "epoch": 0.20017569214202727, + "epoch": 0.27639095390875656, "grad_norm": 0.0, - "learning_rate": 1.8519320565208682e-05, - "loss": 1.0162, + "learning_rate": 1.6981576165419275e-05, + "loss": 0.9927, "step": 7064 }, { - "epoch": 0.20020402958428973, + "epoch": 0.276430080600986, "grad_norm": 0.0, - "learning_rate": 1.8518839926834343e-05, - "loss": 1.0068, + "learning_rate": 1.6980668842735438e-05, + "loss": 0.9493, "step": 7065 }, { - "epoch": 0.20023236702655217, + "epoch": 0.27646920729321545, "grad_norm": 0.0, - "learning_rate": 1.8518359216702653e-05, - "loss": 1.0618, + "learning_rate": 1.6979761407950806e-05, + "loss": 1.2271, "step": 7066 }, { - "epoch": 0.20026070446881464, + "epoch": 0.2765083339854449, "grad_norm": 0.0, - "learning_rate": 1.851787843481766e-05, - "loss": 0.953, + "learning_rate": 1.6978853861079954e-05, + "loss": 1.1474, "step": 7067 }, { - "epoch": 0.2002890419110771, + "epoch": 0.27654746067767433, "grad_norm": 0.0, - "learning_rate": 1.8517397581183412e-05, - "loss": 0.9147, + "learning_rate": 1.697794620213745e-05, + "loss": 1.1251, "step": 7068 }, { - "epoch": 0.20031737935333957, + "epoch": 0.27658658736990377, "grad_norm": 0.0, - "learning_rate": 1.8516916655803963e-05, - "loss": 0.9704, + "learning_rate": 1.697703843113788e-05, + "loss": 1.2307, "step": 7069 }, { - "epoch": 0.20034571679560204, + "epoch": 0.2766257140621332, "grad_norm": 0.0, - "learning_rate": 1.851643565868336e-05, - "loss": 1.0437, + "learning_rate": 1.697613054809581e-05, + "loss": 1.1639, "step": 7070 }, { - "epoch": 0.2003740542378645, + "epoch": 0.27666484075436265, "grad_norm": 0.0, - "learning_rate": 1.851595458982566e-05, - "loss": 0.959, + "learning_rate": 1.697522255302583e-05, + "loss": 1.1524, "step": 7071 }, { - "epoch": 0.20040239168012694, + "epoch": 0.2767039674465921, "grad_norm": 0.0, - "learning_rate": 1.851547344923491e-05, - "loss": 0.9939, + "learning_rate": 1.6974314445942514e-05, + "loss": 1.1664, "step": 7072 }, { - "epoch": 0.2004307291223894, + "epoch": 0.2767430941388215, "grad_norm": 0.0, - "learning_rate": 1.8514992236915166e-05, - "loss": 1.0465, + "learning_rate": 1.6973406226860444e-05, + "loss": 1.077, "step": 7073 }, { - "epoch": 0.20045906656465187, + "epoch": 0.2767822208310509, "grad_norm": 0.0, - "learning_rate": 1.851451095287048e-05, - "loss": 0.9392, + "learning_rate": 1.697249789579421e-05, + "loss": 1.0295, "step": 7074 }, { - "epoch": 0.20048740400691434, + "epoch": 0.27682134752328036, "grad_norm": 0.0, - "learning_rate": 1.8514029597104907e-05, - "loss": 1.1049, + "learning_rate": 1.6971589452758397e-05, + "loss": 1.0681, "step": 7075 }, { - "epoch": 0.2005157414491768, + "epoch": 0.2768604742155098, "grad_norm": 0.0, - "learning_rate": 1.85135481696225e-05, - "loss": 0.9467, + "learning_rate": 1.6970680897767597e-05, + "loss": 1.09, "step": 7076 }, { - "epoch": 0.20054407889143927, + "epoch": 0.27689960090773924, "grad_norm": 0.0, - "learning_rate": 1.851306667042732e-05, - "loss": 0.9263, + "learning_rate": 1.696977223083639e-05, + "loss": 1.0657, "step": 7077 }, { - "epoch": 0.2005724163337017, + "epoch": 0.2769387275999687, "grad_norm": 0.0, - "learning_rate": 1.8512585099523412e-05, - "loss": 0.9655, + "learning_rate": 1.696886345197938e-05, + "loss": 1.0466, "step": 7078 }, { - "epoch": 0.20060075377596417, + "epoch": 0.2769778542921981, "grad_norm": 0.0, - "learning_rate": 1.851210345691484e-05, - "loss": 1.0525, + "learning_rate": 1.6967954561211154e-05, + "loss": 1.163, "step": 7079 }, { - "epoch": 0.20062909121822664, + "epoch": 0.27701698098442756, "grad_norm": 0.0, - "learning_rate": 1.8511621742605662e-05, - "loss": 1.0176, + "learning_rate": 1.696704555854631e-05, + "loss": 1.1243, "step": 7080 }, { - "epoch": 0.2006574286604891, + "epoch": 0.277056107676657, "grad_norm": 0.0, - "learning_rate": 1.851113995659993e-05, - "loss": 0.9273, + "learning_rate": 1.696613644399944e-05, + "loss": 1.0965, "step": 7081 }, { - "epoch": 0.20068576610275157, + "epoch": 0.27709523436888644, "grad_norm": 0.0, - "learning_rate": 1.851065809890171e-05, - "loss": 0.998, + "learning_rate": 1.696522721758515e-05, + "loss": 1.1109, "step": 7082 }, { - "epoch": 0.20071410354501404, + "epoch": 0.2771343610611159, "grad_norm": 0.0, - "learning_rate": 1.8510176169515056e-05, - "loss": 0.9803, + "learning_rate": 1.696431787931804e-05, + "loss": 1.0919, "step": 7083 }, { - "epoch": 0.20074244098727648, + "epoch": 0.2771734877533453, "grad_norm": 0.0, - "learning_rate": 1.8509694168444025e-05, - "loss": 1.0673, + "learning_rate": 1.6963408429212712e-05, + "loss": 1.2177, "step": 7084 }, { - "epoch": 0.20077077842953894, + "epoch": 0.27721261444557477, "grad_norm": 0.0, - "learning_rate": 1.850921209569268e-05, - "loss": 1.0309, + "learning_rate": 1.696249886728377e-05, + "loss": 1.0958, "step": 7085 }, { - "epoch": 0.2007991158718014, + "epoch": 0.2772517411378042, "grad_norm": 0.0, - "learning_rate": 1.8508729951265082e-05, - "loss": 0.8971, + "learning_rate": 1.696158919354582e-05, + "loss": 1.1142, "step": 7086 }, { - "epoch": 0.20082745331406388, + "epoch": 0.27729086783003365, "grad_norm": 0.0, - "learning_rate": 1.8508247735165294e-05, - "loss": 1.0184, + "learning_rate": 1.6960679408013475e-05, + "loss": 1.0965, "step": 7087 }, { - "epoch": 0.20085579075632634, + "epoch": 0.2773299945222631, "grad_norm": 0.0, - "learning_rate": 1.8507765447397375e-05, - "loss": 0.9122, + "learning_rate": 1.6959769510701333e-05, + "loss": 1.1415, "step": 7088 }, { - "epoch": 0.2008841281985888, + "epoch": 0.27736912121449253, "grad_norm": 0.0, - "learning_rate": 1.850728308796539e-05, - "loss": 0.9452, + "learning_rate": 1.695885950162402e-05, + "loss": 1.1531, "step": 7089 }, { - "epoch": 0.20091246564085125, + "epoch": 0.27740824790672197, "grad_norm": 0.0, - "learning_rate": 1.8506800656873397e-05, - "loss": 0.9868, + "learning_rate": 1.695794938079614e-05, + "loss": 1.1714, "step": 7090 }, { - "epoch": 0.2009408030831137, + "epoch": 0.2774473745989514, "grad_norm": 0.0, - "learning_rate": 1.8506318154125463e-05, - "loss": 0.9431, + "learning_rate": 1.6957039148232315e-05, + "loss": 1.15, "step": 7091 }, { - "epoch": 0.20096914052537618, + "epoch": 0.27748650129118085, "grad_norm": 0.0, - "learning_rate": 1.8505835579725653e-05, - "loss": 0.9469, + "learning_rate": 1.6956128803947155e-05, + "loss": 1.0163, "step": 7092 }, { - "epoch": 0.20099747796763864, + "epoch": 0.2775256279834103, "grad_norm": 0.0, - "learning_rate": 1.8505352933678037e-05, - "loss": 1.0043, + "learning_rate": 1.6955218347955286e-05, + "loss": 1.1738, "step": 7093 }, { - "epoch": 0.2010258154099011, + "epoch": 0.27756475467563974, "grad_norm": 0.0, - "learning_rate": 1.8504870215986667e-05, - "loss": 0.8636, + "learning_rate": 1.6954307780271325e-05, + "loss": 1.104, "step": 7094 }, { - "epoch": 0.20105415285216358, + "epoch": 0.2776038813678692, "grad_norm": 0.0, - "learning_rate": 1.850438742665562e-05, - "loss": 0.9615, + "learning_rate": 1.6953397100909896e-05, + "loss": 1.2696, "step": 7095 }, { - "epoch": 0.20108249029442601, + "epoch": 0.2776430080600986, "grad_norm": 0.0, - "learning_rate": 1.850390456568896e-05, - "loss": 0.9097, + "learning_rate": 1.6952486309885617e-05, + "loss": 1.0852, "step": 7096 }, { - "epoch": 0.20111082773668848, + "epoch": 0.27768213475232806, "grad_norm": 0.0, - "learning_rate": 1.8503421633090755e-05, - "loss": 1.0417, + "learning_rate": 1.6951575407213126e-05, + "loss": 1.1553, "step": 7097 }, { - "epoch": 0.20113916517895095, + "epoch": 0.2777212614445575, "grad_norm": 0.0, - "learning_rate": 1.850293862886507e-05, - "loss": 1.0078, + "learning_rate": 1.6950664392907042e-05, + "loss": 1.0663, "step": 7098 }, { - "epoch": 0.2011675026212134, + "epoch": 0.27776038813678694, "grad_norm": 0.0, - "learning_rate": 1.8502455553015978e-05, - "loss": 1.116, + "learning_rate": 1.6949753266982e-05, + "loss": 1.1503, "step": 7099 }, { - "epoch": 0.20119584006347588, + "epoch": 0.2777995148290164, "grad_norm": 0.0, - "learning_rate": 1.8501972405547547e-05, - "loss": 1.0158, + "learning_rate": 1.6948842029452627e-05, + "loss": 1.0057, "step": 7100 }, { - "epoch": 0.20122417750573834, + "epoch": 0.2778386415212458, "grad_norm": 0.0, - "learning_rate": 1.850148918646384e-05, - "loss": 1.0357, + "learning_rate": 1.6947930680333556e-05, + "loss": 1.1348, "step": 7101 }, { - "epoch": 0.20125251494800078, + "epoch": 0.2778777682134752, "grad_norm": 0.0, - "learning_rate": 1.8501005895768934e-05, - "loss": 0.9885, + "learning_rate": 1.6947019219639432e-05, + "loss": 1.1273, "step": 7102 }, { - "epoch": 0.20128085239026325, + "epoch": 0.27791689490570465, "grad_norm": 0.0, - "learning_rate": 1.8500522533466897e-05, - "loss": 1.0759, + "learning_rate": 1.694610764738488e-05, + "loss": 1.176, "step": 7103 }, { - "epoch": 0.20130918983252571, + "epoch": 0.2779560215979341, "grad_norm": 0.0, - "learning_rate": 1.8500039099561807e-05, - "loss": 0.975, + "learning_rate": 1.6945195963584543e-05, + "loss": 1.1523, "step": 7104 }, { - "epoch": 0.20133752727478818, + "epoch": 0.27799514829016353, "grad_norm": 0.0, - "learning_rate": 1.849955559405773e-05, - "loss": 0.9822, + "learning_rate": 1.6944284168253062e-05, + "loss": 1.0909, "step": 7105 }, { - "epoch": 0.20136586471705065, + "epoch": 0.27803427498239297, "grad_norm": 0.0, - "learning_rate": 1.849907201695874e-05, - "loss": 0.9823, + "learning_rate": 1.694337226140508e-05, + "loss": 1.1245, "step": 7106 }, { - "epoch": 0.2013942021593131, + "epoch": 0.2780734016746224, "grad_norm": 0.0, - "learning_rate": 1.8498588368268905e-05, - "loss": 0.9697, + "learning_rate": 1.6942460243055237e-05, + "loss": 1.0918, "step": 7107 }, { - "epoch": 0.20142253960157555, + "epoch": 0.27811252836685185, "grad_norm": 0.0, - "learning_rate": 1.849810464799231e-05, - "loss": 0.8958, + "learning_rate": 1.694154811321818e-05, + "loss": 1.1862, "step": 7108 }, { - "epoch": 0.20145087704383802, + "epoch": 0.2781516550590813, "grad_norm": 0.0, - "learning_rate": 1.849762085613302e-05, - "loss": 0.979, + "learning_rate": 1.6940635871908564e-05, + "loss": 1.0662, "step": 7109 }, { - "epoch": 0.20147921448610048, + "epoch": 0.27819078175131073, "grad_norm": 0.0, - "learning_rate": 1.8497136992695117e-05, - "loss": 1.1234, + "learning_rate": 1.693972351914103e-05, + "loss": 1.0626, "step": 7110 }, { - "epoch": 0.20150755192836295, + "epoch": 0.2782299084435402, "grad_norm": 0.0, - "learning_rate": 1.8496653057682673e-05, - "loss": 0.824, + "learning_rate": 1.6938811054930237e-05, + "loss": 1.1416, "step": 7111 }, { - "epoch": 0.20153588937062542, + "epoch": 0.2782690351357696, "grad_norm": 0.0, - "learning_rate": 1.8496169051099766e-05, - "loss": 0.9648, + "learning_rate": 1.6937898479290826e-05, + "loss": 1.091, "step": 7112 }, { - "epoch": 0.20156422681288788, + "epoch": 0.27830816182799906, "grad_norm": 0.0, - "learning_rate": 1.8495684972950473e-05, - "loss": 0.8095, + "learning_rate": 1.6936985792237464e-05, + "loss": 1.1155, "step": 7113 }, { - "epoch": 0.20159256425515032, + "epoch": 0.2783472885202285, "grad_norm": 0.0, - "learning_rate": 1.849520082323887e-05, - "loss": 1.0166, + "learning_rate": 1.6936072993784802e-05, + "loss": 1.1124, "step": 7114 }, { - "epoch": 0.20162090169741279, + "epoch": 0.27838641521245794, "grad_norm": 0.0, - "learning_rate": 1.8494716601969034e-05, - "loss": 0.9413, + "learning_rate": 1.6935160083947498e-05, + "loss": 1.2565, "step": 7115 }, { - "epoch": 0.20164923913967525, + "epoch": 0.2784255419046874, "grad_norm": 0.0, - "learning_rate": 1.8494232309145044e-05, - "loss": 1.0276, + "learning_rate": 1.6934247062740215e-05, + "loss": 1.1502, "step": 7116 }, { - "epoch": 0.20167757658193772, + "epoch": 0.2784646685969168, "grad_norm": 0.0, - "learning_rate": 1.8493747944770985e-05, - "loss": 1.1169, + "learning_rate": 1.6933333930177613e-05, + "loss": 1.2311, "step": 7117 }, { - "epoch": 0.20170591402420018, + "epoch": 0.27850379528914626, "grad_norm": 0.0, - "learning_rate": 1.849326350885093e-05, - "loss": 0.9512, + "learning_rate": 1.6932420686274353e-05, + "loss": 1.1657, "step": 7118 }, { - "epoch": 0.20173425146646265, + "epoch": 0.2785429219813757, "grad_norm": 0.0, - "learning_rate": 1.8492779001388964e-05, - "loss": 0.9433, + "learning_rate": 1.693150733104511e-05, + "loss": 1.1926, "step": 7119 }, { - "epoch": 0.2017625889087251, + "epoch": 0.27858204867360514, "grad_norm": 0.0, - "learning_rate": 1.8492294422389167e-05, - "loss": 1.1046, + "learning_rate": 1.6930593864504538e-05, + "loss": 0.9451, "step": 7120 }, { - "epoch": 0.20179092635098755, + "epoch": 0.2786211753658346, "grad_norm": 0.0, - "learning_rate": 1.849180977185562e-05, - "loss": 1.0003, + "learning_rate": 1.6929680286667313e-05, + "loss": 1.2469, "step": 7121 }, { - "epoch": 0.20181926379325002, + "epoch": 0.278660302058064, "grad_norm": 0.0, - "learning_rate": 1.8491325049792407e-05, - "loss": 0.9601, + "learning_rate": 1.692876659754811e-05, + "loss": 1.1003, "step": 7122 }, { - "epoch": 0.20184760123551249, + "epoch": 0.27869942875029347, "grad_norm": 0.0, - "learning_rate": 1.849084025620361e-05, - "loss": 0.9421, + "learning_rate": 1.69278527971616e-05, + "loss": 1.0416, "step": 7123 }, { - "epoch": 0.20187593867777495, + "epoch": 0.2787385554425229, "grad_norm": 0.0, - "learning_rate": 1.849035539109331e-05, - "loss": 1.0256, + "learning_rate": 1.692693888552245e-05, + "loss": 1.1392, "step": 7124 }, { - "epoch": 0.20190427612003742, + "epoch": 0.27877768213475235, "grad_norm": 0.0, - "learning_rate": 1.8489870454465596e-05, - "loss": 1.0707, + "learning_rate": 1.692602486264534e-05, + "loss": 1.1405, "step": 7125 }, { - "epoch": 0.20193261356229986, + "epoch": 0.2788168088269818, "grad_norm": 0.0, - "learning_rate": 1.8489385446324552e-05, - "loss": 1.0101, + "learning_rate": 1.6925110728544953e-05, + "loss": 0.9554, "step": 7126 }, { - "epoch": 0.20196095100456232, + "epoch": 0.27885593551921123, "grad_norm": 0.0, - "learning_rate": 1.848890036667426e-05, - "loss": 1.0515, + "learning_rate": 1.6924196483235968e-05, + "loss": 1.0675, "step": 7127 }, { - "epoch": 0.2019892884468248, + "epoch": 0.27889506221144067, "grad_norm": 0.0, - "learning_rate": 1.8488415215518807e-05, - "loss": 1.088, + "learning_rate": 1.6923282126733058e-05, + "loss": 1.0405, "step": 7128 }, { - "epoch": 0.20201762588908725, + "epoch": 0.2789341889036701, "grad_norm": 0.0, - "learning_rate": 1.8487929992862282e-05, - "loss": 0.9541, + "learning_rate": 1.6922367659050914e-05, + "loss": 1.1296, "step": 7129 }, { - "epoch": 0.20204596333134972, + "epoch": 0.2789733155958995, "grad_norm": 0.0, - "learning_rate": 1.848744469870877e-05, - "loss": 0.9803, + "learning_rate": 1.6921453080204224e-05, + "loss": 1.1603, "step": 7130 }, { - "epoch": 0.2020743007736122, + "epoch": 0.27901244228812894, "grad_norm": 0.0, - "learning_rate": 1.8486959333062356e-05, - "loss": 1.045, + "learning_rate": 1.6920538390207664e-05, + "loss": 1.1612, "step": 7131 }, { - "epoch": 0.20210263821587462, + "epoch": 0.2790515689803584, "grad_norm": 0.0, - "learning_rate": 1.848647389592714e-05, - "loss": 0.9567, + "learning_rate": 1.6919623589075934e-05, + "loss": 1.2462, "step": 7132 }, { - "epoch": 0.2021309756581371, + "epoch": 0.2790906956725878, "grad_norm": 0.0, - "learning_rate": 1.8485988387307197e-05, - "loss": 0.9494, + "learning_rate": 1.691870867682372e-05, + "loss": 1.0887, "step": 7133 }, { - "epoch": 0.20215931310039956, + "epoch": 0.27912982236481726, "grad_norm": 0.0, - "learning_rate": 1.8485502807206624e-05, - "loss": 1.0073, + "learning_rate": 1.6917793653465712e-05, + "loss": 1.0852, "step": 7134 }, { - "epoch": 0.20218765054266202, + "epoch": 0.2791689490570467, "grad_norm": 0.0, - "learning_rate": 1.848501715562951e-05, - "loss": 1.0015, + "learning_rate": 1.691687851901661e-05, + "loss": 0.9153, "step": 7135 }, { - "epoch": 0.2022159879849245, + "epoch": 0.27920807574927614, "grad_norm": 0.0, - "learning_rate": 1.8484531432579947e-05, - "loss": 0.9569, + "learning_rate": 1.6915963273491103e-05, + "loss": 1.1937, "step": 7136 }, { - "epoch": 0.20224432542718696, + "epoch": 0.2792472024415056, "grad_norm": 0.0, - "learning_rate": 1.8484045638062022e-05, - "loss": 1.0783, + "learning_rate": 1.691504791690389e-05, + "loss": 1.2247, "step": 7137 }, { - "epoch": 0.2022726628694494, + "epoch": 0.279286329133735, "grad_norm": 0.0, - "learning_rate": 1.8483559772079833e-05, - "loss": 1.0591, + "learning_rate": 1.6914132449269676e-05, + "loss": 1.1797, "step": 7138 }, { - "epoch": 0.20230100031171186, + "epoch": 0.27932545582596446, "grad_norm": 0.0, - "learning_rate": 1.848307383463747e-05, - "loss": 1.029, + "learning_rate": 1.6913216870603157e-05, + "loss": 1.2275, "step": 7139 }, { - "epoch": 0.20232933775397433, + "epoch": 0.2793645825181939, "grad_norm": 0.0, - "learning_rate": 1.8482587825739023e-05, - "loss": 1.0962, + "learning_rate": 1.691230118091904e-05, + "loss": 1.2075, "step": 7140 }, { - "epoch": 0.2023576751962368, + "epoch": 0.27940370921042335, "grad_norm": 0.0, - "learning_rate": 1.848210174538859e-05, - "loss": 0.8982, + "learning_rate": 1.6911385380232027e-05, + "loss": 1.097, "step": 7141 }, { - "epoch": 0.20238601263849926, + "epoch": 0.2794428359026528, "grad_norm": 0.0, - "learning_rate": 1.8481615593590265e-05, - "loss": 0.9791, + "learning_rate": 1.6910469468556826e-05, + "loss": 1.167, "step": 7142 }, { - "epoch": 0.20241435008076172, + "epoch": 0.2794819625948822, "grad_norm": 0.0, - "learning_rate": 1.8481129370348142e-05, - "loss": 1.0317, + "learning_rate": 1.690955344590814e-05, + "loss": 1.1405, "step": 7143 }, { - "epoch": 0.20244268752302416, + "epoch": 0.27952108928711167, "grad_norm": 0.0, - "learning_rate": 1.848064307566632e-05, - "loss": 0.897, + "learning_rate": 1.690863731230069e-05, + "loss": 1.1369, "step": 7144 }, { - "epoch": 0.20247102496528663, + "epoch": 0.2795602159793411, "grad_norm": 0.0, - "learning_rate": 1.8480156709548888e-05, - "loss": 0.9442, + "learning_rate": 1.690772106774918e-05, + "loss": 1.054, "step": 7145 }, { - "epoch": 0.2024993624075491, + "epoch": 0.27959934267157055, "grad_norm": 0.0, - "learning_rate": 1.8479670271999945e-05, - "loss": 0.9917, + "learning_rate": 1.690680471226832e-05, + "loss": 1.1249, "step": 7146 }, { - "epoch": 0.20252769984981156, + "epoch": 0.2796384693638, "grad_norm": 0.0, - "learning_rate": 1.8479183763023597e-05, - "loss": 0.9615, + "learning_rate": 1.690588824587284e-05, + "loss": 1.116, "step": 7147 }, { - "epoch": 0.20255603729207403, + "epoch": 0.27967759605602943, "grad_norm": 0.0, - "learning_rate": 1.847869718262393e-05, - "loss": 1.0397, + "learning_rate": 1.690497166857744e-05, + "loss": 1.1163, "step": 7148 }, { - "epoch": 0.2025843747343365, + "epoch": 0.2797167227482589, "grad_norm": 0.0, - "learning_rate": 1.847821053080505e-05, - "loss": 0.9554, + "learning_rate": 1.6904054980396852e-05, + "loss": 1.1267, "step": 7149 }, { - "epoch": 0.20261271217659893, + "epoch": 0.2797558494404883, "grad_norm": 0.0, - "learning_rate": 1.8477723807571055e-05, - "loss": 0.8673, + "learning_rate": 1.690313818134579e-05, + "loss": 1.1439, "step": 7150 }, { - "epoch": 0.2026410496188614, + "epoch": 0.27979497613271775, "grad_norm": 0.0, - "learning_rate": 1.8477237012926042e-05, - "loss": 1.1939, + "learning_rate": 1.6902221271438982e-05, + "loss": 1.0449, "step": 7151 }, { - "epoch": 0.20266938706112386, + "epoch": 0.2798341028249472, "grad_norm": 0.0, - "learning_rate": 1.847675014687412e-05, - "loss": 1.0607, + "learning_rate": 1.690130425069115e-05, + "loss": 1.166, "step": 7152 }, { - "epoch": 0.20269772450338633, + "epoch": 0.27987322951717664, "grad_norm": 0.0, - "learning_rate": 1.8476263209419376e-05, - "loss": 0.9694, + "learning_rate": 1.6900387119117013e-05, + "loss": 1.1402, "step": 7153 }, { - "epoch": 0.2027260619456488, + "epoch": 0.2799123562094061, "grad_norm": 0.0, - "learning_rate": 1.8475776200565924e-05, - "loss": 0.9679, + "learning_rate": 1.6899469876731313e-05, + "loss": 1.1492, "step": 7154 }, { - "epoch": 0.20275439938791126, + "epoch": 0.2799514829016355, "grad_norm": 0.0, - "learning_rate": 1.847528912031786e-05, - "loss": 1.0791, + "learning_rate": 1.6898552523548767e-05, + "loss": 1.203, "step": 7155 }, { - "epoch": 0.2027827368301737, + "epoch": 0.27999060959386496, "grad_norm": 0.0, - "learning_rate": 1.847480196867929e-05, - "loss": 0.9817, + "learning_rate": 1.6897635059584114e-05, + "loss": 1.043, "step": 7156 }, { - "epoch": 0.20281107427243616, + "epoch": 0.2800297362860944, "grad_norm": 0.0, - "learning_rate": 1.8474314745654313e-05, - "loss": 0.9444, + "learning_rate": 1.6896717484852084e-05, + "loss": 1.3303, "step": 7157 }, { - "epoch": 0.20283941171469863, + "epoch": 0.2800688629783238, "grad_norm": 0.0, - "learning_rate": 1.8473827451247038e-05, - "loss": 0.9917, + "learning_rate": 1.6895799799367417e-05, + "loss": 1.025, "step": 7158 }, { - "epoch": 0.2028677491569611, + "epoch": 0.2801079896705532, "grad_norm": 0.0, - "learning_rate": 1.847334008546157e-05, - "loss": 0.8993, + "learning_rate": 1.689488200314484e-05, + "loss": 1.1763, "step": 7159 }, { - "epoch": 0.20289608659922356, + "epoch": 0.28014711636278267, "grad_norm": 0.0, - "learning_rate": 1.847285264830201e-05, - "loss": 1.0797, + "learning_rate": 1.6893964096199103e-05, + "loss": 1.1831, "step": 7160 }, { - "epoch": 0.20292442404148603, + "epoch": 0.2801862430550121, "grad_norm": 0.0, - "learning_rate": 1.8472365139772465e-05, - "loss": 0.9964, + "learning_rate": 1.689304607854494e-05, + "loss": 1.1263, "step": 7161 }, { - "epoch": 0.20295276148374847, + "epoch": 0.28022536974724155, "grad_norm": 0.0, - "learning_rate": 1.847187755987704e-05, - "loss": 1.0619, + "learning_rate": 1.6892127950197092e-05, + "loss": 1.1396, "step": 7162 }, { - "epoch": 0.20298109892601093, + "epoch": 0.280264496439471, "grad_norm": 0.0, - "learning_rate": 1.847138990861985e-05, - "loss": 1.0882, + "learning_rate": 1.689120971117031e-05, + "loss": 1.1705, "step": 7163 }, { - "epoch": 0.2030094363682734, + "epoch": 0.28030362313170043, "grad_norm": 0.0, - "learning_rate": 1.8470902186004995e-05, - "loss": 0.8842, + "learning_rate": 1.6890291361479332e-05, + "loss": 1.0513, "step": 7164 }, { - "epoch": 0.20303777381053587, + "epoch": 0.28034274982392987, "grad_norm": 0.0, - "learning_rate": 1.8470414392036582e-05, - "loss": 0.9557, + "learning_rate": 1.688937290113891e-05, + "loss": 1.0646, "step": 7165 }, { - "epoch": 0.20306611125279833, + "epoch": 0.2803818765161593, "grad_norm": 0.0, - "learning_rate": 1.8469926526718726e-05, - "loss": 1.0698, + "learning_rate": 1.688845433016379e-05, + "loss": 1.1937, "step": 7166 }, { - "epoch": 0.2030944486950608, + "epoch": 0.28042100320838875, "grad_norm": 0.0, - "learning_rate": 1.846943859005553e-05, - "loss": 1.1021, + "learning_rate": 1.688753564856873e-05, + "loss": 1.121, "step": 7167 }, { - "epoch": 0.20312278613732324, + "epoch": 0.2804601299006182, "grad_norm": 0.0, - "learning_rate": 1.8468950582051116e-05, - "loss": 0.9461, + "learning_rate": 1.6886616856368472e-05, + "loss": 1.1534, "step": 7168 }, { - "epoch": 0.2031511235795857, + "epoch": 0.28049925659284763, "grad_norm": 0.0, - "learning_rate": 1.846846250270958e-05, - "loss": 1.0026, + "learning_rate": 1.688569795357778e-05, + "loss": 1.2186, "step": 7169 }, { - "epoch": 0.20317946102184817, + "epoch": 0.2805383832850771, "grad_norm": 0.0, - "learning_rate": 1.846797435203504e-05, - "loss": 0.9417, + "learning_rate": 1.6884778940211408e-05, + "loss": 1.1593, "step": 7170 }, { - "epoch": 0.20320779846411063, + "epoch": 0.2805775099773065, "grad_norm": 0.0, - "learning_rate": 1.846748613003161e-05, - "loss": 0.9696, + "learning_rate": 1.688385981628411e-05, + "loss": 1.2131, "step": 7171 }, { - "epoch": 0.2032361359063731, + "epoch": 0.28061663666953596, "grad_norm": 0.0, - "learning_rate": 1.8466997836703397e-05, - "loss": 0.976, + "learning_rate": 1.6882940581810655e-05, + "loss": 1.1591, "step": 7172 }, { - "epoch": 0.20326447334863557, + "epoch": 0.2806557633617654, "grad_norm": 0.0, - "learning_rate": 1.846650947205452e-05, - "loss": 0.9908, + "learning_rate": 1.6882021236805793e-05, + "loss": 1.1956, "step": 7173 }, { - "epoch": 0.203292810790898, + "epoch": 0.28069489005399484, "grad_norm": 0.0, - "learning_rate": 1.8466021036089085e-05, - "loss": 0.9703, + "learning_rate": 1.6881101781284294e-05, + "loss": 1.2729, "step": 7174 }, { - "epoch": 0.20332114823316047, + "epoch": 0.2807340167462243, "grad_norm": 0.0, - "learning_rate": 1.8465532528811216e-05, - "loss": 0.9445, + "learning_rate": 1.6880182215260924e-05, + "loss": 1.1787, "step": 7175 }, { - "epoch": 0.20334948567542294, + "epoch": 0.2807731434384537, "grad_norm": 0.0, - "learning_rate": 1.846504395022502e-05, - "loss": 1.0736, + "learning_rate": 1.6879262538750453e-05, + "loss": 1.1138, "step": 7176 }, { - "epoch": 0.2033778231176854, + "epoch": 0.28081227013068316, "grad_norm": 0.0, - "learning_rate": 1.846455530033462e-05, - "loss": 0.9591, + "learning_rate": 1.6878342751767642e-05, + "loss": 1.005, "step": 7177 }, { - "epoch": 0.20340616055994787, + "epoch": 0.2808513968229126, "grad_norm": 0.0, - "learning_rate": 1.8464066579144123e-05, - "loss": 1.0245, + "learning_rate": 1.6877422854327265e-05, + "loss": 1.0713, "step": 7178 }, { - "epoch": 0.2034344980022103, + "epoch": 0.28089052351514204, "grad_norm": 0.0, - "learning_rate": 1.8463577786657653e-05, - "loss": 0.9157, + "learning_rate": 1.6876502846444096e-05, + "loss": 0.9827, "step": 7179 }, { - "epoch": 0.20346283544447277, + "epoch": 0.2809296502073715, "grad_norm": 0.0, - "learning_rate": 1.846308892287932e-05, - "loss": 0.9226, + "learning_rate": 1.687558272813291e-05, + "loss": 1.1227, "step": 7180 }, { - "epoch": 0.20349117288673524, + "epoch": 0.2809687768996009, "grad_norm": 0.0, - "learning_rate": 1.846259998781325e-05, - "loss": 0.9868, + "learning_rate": 1.687466249940848e-05, + "loss": 1.1964, "step": 7181 }, { - "epoch": 0.2035195103289977, + "epoch": 0.28100790359183037, "grad_norm": 0.0, - "learning_rate": 1.8462110981463555e-05, - "loss": 0.9848, + "learning_rate": 1.687374216028558e-05, + "loss": 1.2675, "step": 7182 }, { - "epoch": 0.20354784777126017, + "epoch": 0.2810470302840598, "grad_norm": 0.0, - "learning_rate": 1.846162190383436e-05, - "loss": 1.0436, + "learning_rate": 1.6872821710778997e-05, + "loss": 1.0932, "step": 7183 }, { - "epoch": 0.20357618521352264, + "epoch": 0.28108615697628925, "grad_norm": 0.0, - "learning_rate": 1.846113275492978e-05, - "loss": 0.9812, + "learning_rate": 1.687190115090351e-05, + "loss": 1.1411, "step": 7184 }, { - "epoch": 0.20360452265578508, + "epoch": 0.2811252836685187, "grad_norm": 0.0, - "learning_rate": 1.8460643534753938e-05, - "loss": 0.9841, + "learning_rate": 1.6870980480673905e-05, + "loss": 1.0549, "step": 7185 }, { - "epoch": 0.20363286009804754, + "epoch": 0.28116441036074813, "grad_norm": 0.0, - "learning_rate": 1.8460154243310953e-05, - "loss": 0.9897, + "learning_rate": 1.6870059700104956e-05, + "loss": 1.1755, "step": 7186 }, { - "epoch": 0.20366119754031, + "epoch": 0.2812035370529775, "grad_norm": 0.0, - "learning_rate": 1.8459664880604946e-05, - "loss": 1.0611, + "learning_rate": 1.686913880921146e-05, + "loss": 1.2122, "step": 7187 }, { - "epoch": 0.20368953498257247, + "epoch": 0.28124266374520696, "grad_norm": 0.0, - "learning_rate": 1.8459175446640042e-05, - "loss": 1.0201, + "learning_rate": 1.6868217808008203e-05, + "loss": 1.0643, "step": 7188 }, { - "epoch": 0.20371787242483494, + "epoch": 0.2812817904374364, "grad_norm": 0.0, - "learning_rate": 1.845868594142036e-05, - "loss": 0.867, + "learning_rate": 1.6867296696509978e-05, + "loss": 1.254, "step": 7189 }, { - "epoch": 0.2037462098670974, + "epoch": 0.28132091712966584, "grad_norm": 0.0, - "learning_rate": 1.845819636495003e-05, - "loss": 0.9593, + "learning_rate": 1.686637547473157e-05, + "loss": 1.0258, "step": 7190 }, { - "epoch": 0.20377454730935984, + "epoch": 0.2813600438218953, "grad_norm": 0.0, - "learning_rate": 1.8457706717233165e-05, - "loss": 1.0895, + "learning_rate": 1.6865454142687773e-05, + "loss": 1.1826, "step": 7191 }, { - "epoch": 0.2038028847516223, + "epoch": 0.2813991705141247, "grad_norm": 0.0, - "learning_rate": 1.8457216998273896e-05, - "loss": 0.9368, + "learning_rate": 1.686453270039339e-05, + "loss": 1.0758, "step": 7192 }, { - "epoch": 0.20383122219388478, + "epoch": 0.28143829720635416, "grad_norm": 0.0, - "learning_rate": 1.845672720807635e-05, - "loss": 0.9547, + "learning_rate": 1.686361114786321e-05, + "loss": 1.11, "step": 7193 }, { - "epoch": 0.20385955963614724, + "epoch": 0.2814774238985836, "grad_norm": 0.0, - "learning_rate": 1.845623734664465e-05, - "loss": 1.0063, + "learning_rate": 1.686268948511204e-05, + "loss": 1.0463, "step": 7194 }, { - "epoch": 0.2038878970784097, + "epoch": 0.28151655059081304, "grad_norm": 0.0, - "learning_rate": 1.8455747413982927e-05, - "loss": 0.9928, + "learning_rate": 1.686176771215467e-05, + "loss": 1.1329, "step": 7195 }, { - "epoch": 0.20391623452067217, + "epoch": 0.2815556772830425, "grad_norm": 0.0, - "learning_rate": 1.8455257410095296e-05, - "loss": 0.9662, + "learning_rate": 1.6860845829005914e-05, + "loss": 1.1375, "step": 7196 }, { - "epoch": 0.2039445719629346, + "epoch": 0.2815948039752719, "grad_norm": 0.0, - "learning_rate": 1.8454767334985896e-05, - "loss": 0.9202, + "learning_rate": 1.685992383568057e-05, + "loss": 1.2158, "step": 7197 }, { - "epoch": 0.20397290940519708, + "epoch": 0.28163393066750136, "grad_norm": 0.0, - "learning_rate": 1.845427718865885e-05, - "loss": 1.093, + "learning_rate": 1.6859001732193442e-05, + "loss": 1.099, "step": 7198 }, { - "epoch": 0.20400124684745954, + "epoch": 0.2816730573597308, "grad_norm": 0.0, - "learning_rate": 1.845378697111829e-05, - "loss": 1.0011, + "learning_rate": 1.6858079518559343e-05, + "loss": 0.8829, "step": 7199 }, { - "epoch": 0.204029584289722, + "epoch": 0.28171218405196025, "grad_norm": 0.0, - "learning_rate": 1.8453296682368344e-05, - "loss": 1.1125, + "learning_rate": 1.6857157194793083e-05, + "loss": 1.1949, "step": 7200 }, { - "epoch": 0.20405792173198448, + "epoch": 0.2817513107441897, "grad_norm": 0.0, - "learning_rate": 1.845280632241314e-05, - "loss": 0.9913, + "learning_rate": 1.685623476090947e-05, + "loss": 1.0036, "step": 7201 }, { - "epoch": 0.20408625917424694, + "epoch": 0.28179043743641913, "grad_norm": 0.0, - "learning_rate": 1.8452315891256806e-05, - "loss": 1.1284, + "learning_rate": 1.6855312216923316e-05, + "loss": 1.1252, "step": 7202 }, { - "epoch": 0.20411459661650938, + "epoch": 0.28182956412864857, "grad_norm": 0.0, - "learning_rate": 1.845182538890348e-05, - "loss": 1.0348, + "learning_rate": 1.685438956284944e-05, + "loss": 1.1337, "step": 7203 }, { - "epoch": 0.20414293405877185, + "epoch": 0.281868690820878, "grad_norm": 0.0, - "learning_rate": 1.845133481535729e-05, - "loss": 0.9814, + "learning_rate": 1.6853466798702654e-05, + "loss": 1.1987, "step": 7204 }, { - "epoch": 0.2041712715010343, + "epoch": 0.28190781751310745, "grad_norm": 0.0, - "learning_rate": 1.8450844170622366e-05, - "loss": 1.0581, + "learning_rate": 1.6852543924497782e-05, + "loss": 1.1703, "step": 7205 }, { - "epoch": 0.20419960894329678, + "epoch": 0.2819469442053369, "grad_norm": 0.0, - "learning_rate": 1.8450353454702845e-05, - "loss": 1.1135, + "learning_rate": 1.685162094024964e-05, + "loss": 1.0881, "step": 7206 }, { - "epoch": 0.20422794638555924, + "epoch": 0.28198607089756633, "grad_norm": 0.0, - "learning_rate": 1.8449862667602858e-05, - "loss": 1.1407, + "learning_rate": 1.685069784597305e-05, + "loss": 1.1308, "step": 7207 }, { - "epoch": 0.2042562838278217, + "epoch": 0.2820251975897958, "grad_norm": 0.0, - "learning_rate": 1.844937180932654e-05, - "loss": 1.1403, + "learning_rate": 1.6849774641682838e-05, + "loss": 1.1629, "step": 7208 }, { - "epoch": 0.20428462127008415, + "epoch": 0.2820643242820252, "grad_norm": 0.0, - "learning_rate": 1.8448880879878026e-05, - "loss": 0.9614, + "learning_rate": 1.6848851327393833e-05, + "loss": 0.9789, "step": 7209 }, { - "epoch": 0.20431295871234662, + "epoch": 0.28210345097425465, "grad_norm": 0.0, - "learning_rate": 1.844838987926145e-05, - "loss": 1.0351, + "learning_rate": 1.6847927903120852e-05, + "loss": 1.1575, "step": 7210 }, { - "epoch": 0.20434129615460908, + "epoch": 0.2821425776664841, "grad_norm": 0.0, - "learning_rate": 1.844789880748095e-05, - "loss": 1.0644, + "learning_rate": 1.684700436887873e-05, + "loss": 1.2281, "step": 7211 }, { - "epoch": 0.20436963359687155, + "epoch": 0.28218170435871354, "grad_norm": 0.0, - "learning_rate": 1.8447407664540662e-05, - "loss": 1.0245, + "learning_rate": 1.68460807246823e-05, + "loss": 1.121, "step": 7212 }, { - "epoch": 0.204397971039134, + "epoch": 0.282220831050943, "grad_norm": 0.0, - "learning_rate": 1.8446916450444723e-05, - "loss": 0.9756, + "learning_rate": 1.6845156970546393e-05, + "loss": 1.2744, "step": 7213 }, { - "epoch": 0.20442630848139648, + "epoch": 0.2822599577431724, "grad_norm": 0.0, - "learning_rate": 1.844642516519727e-05, - "loss": 1.0331, + "learning_rate": 1.684423310648584e-05, + "loss": 0.9843, "step": 7214 }, { - "epoch": 0.20445464592365892, + "epoch": 0.2822990844354018, "grad_norm": 0.0, - "learning_rate": 1.844593380880244e-05, - "loss": 0.9456, + "learning_rate": 1.684330913251548e-05, + "loss": 1.0938, "step": 7215 }, { - "epoch": 0.20448298336592138, + "epoch": 0.28233821112763124, "grad_norm": 0.0, - "learning_rate": 1.8445442381264372e-05, - "loss": 0.9127, + "learning_rate": 1.684238504865015e-05, + "loss": 1.0566, "step": 7216 }, { - "epoch": 0.20451132080818385, + "epoch": 0.2823773378198607, "grad_norm": 0.0, - "learning_rate": 1.8444950882587213e-05, - "loss": 0.8988, + "learning_rate": 1.684146085490469e-05, + "loss": 1.1219, "step": 7217 }, { - "epoch": 0.20453965825044632, + "epoch": 0.2824164645120901, "grad_norm": 0.0, - "learning_rate": 1.844445931277509e-05, - "loss": 0.9511, + "learning_rate": 1.6840536551293946e-05, + "loss": 1.0483, "step": 7218 }, { - "epoch": 0.20456799569270878, + "epoch": 0.28245559120431957, "grad_norm": 0.0, - "learning_rate": 1.844396767183215e-05, - "loss": 1.0195, + "learning_rate": 1.6839612137832752e-05, + "loss": 1.0287, "step": 7219 }, { - "epoch": 0.20459633313497125, + "epoch": 0.282494717896549, "grad_norm": 0.0, - "learning_rate": 1.8443475959762538e-05, - "loss": 0.9566, + "learning_rate": 1.683868761453596e-05, + "loss": 0.9698, "step": 7220 }, { - "epoch": 0.20462467057723369, + "epoch": 0.28253384458877845, "grad_norm": 0.0, - "learning_rate": 1.844298417657039e-05, - "loss": 1.0201, + "learning_rate": 1.683776298141841e-05, + "loss": 1.1606, "step": 7221 }, { - "epoch": 0.20465300801949615, + "epoch": 0.2825729712810079, "grad_norm": 0.0, - "learning_rate": 1.8442492322259854e-05, - "loss": 1.0927, + "learning_rate": 1.6836838238494956e-05, + "loss": 1.1627, "step": 7222 }, { - "epoch": 0.20468134546175862, + "epoch": 0.28261209797323733, "grad_norm": 0.0, - "learning_rate": 1.8442000396835072e-05, - "loss": 0.9476, + "learning_rate": 1.6835913385780453e-05, + "loss": 1.1609, "step": 7223 }, { - "epoch": 0.20470968290402108, + "epoch": 0.28265122466546677, "grad_norm": 0.0, - "learning_rate": 1.844150840030018e-05, - "loss": 0.9874, + "learning_rate": 1.683498842328974e-05, + "loss": 1.2047, "step": 7224 }, { - "epoch": 0.20473802034628355, + "epoch": 0.2826903513576962, "grad_norm": 0.0, - "learning_rate": 1.844101633265933e-05, - "loss": 0.9334, + "learning_rate": 1.683406335103768e-05, + "loss": 1.21, "step": 7225 }, { - "epoch": 0.20476635778854602, + "epoch": 0.28272947804992565, "grad_norm": 0.0, - "learning_rate": 1.8440524193916667e-05, - "loss": 1.0776, + "learning_rate": 1.6833138169039127e-05, + "loss": 0.9925, "step": 7226 }, { - "epoch": 0.20479469523080845, + "epoch": 0.2827686047421551, "grad_norm": 0.0, - "learning_rate": 1.8440031984076333e-05, - "loss": 0.993, + "learning_rate": 1.6832212877308934e-05, + "loss": 1.0343, "step": 7227 }, { - "epoch": 0.20482303267307092, + "epoch": 0.28280773143438453, "grad_norm": 0.0, - "learning_rate": 1.8439539703142475e-05, - "loss": 1.0166, + "learning_rate": 1.683128747586197e-05, + "loss": 1.1501, "step": 7228 }, { - "epoch": 0.2048513701153334, + "epoch": 0.282846858126614, "grad_norm": 0.0, - "learning_rate": 1.8439047351119242e-05, - "loss": 0.851, + "learning_rate": 1.6830361964713082e-05, + "loss": 1.056, "step": 7229 }, { - "epoch": 0.20487970755759585, + "epoch": 0.2828859848188434, "grad_norm": 0.0, - "learning_rate": 1.843855492801078e-05, - "loss": 0.9052, + "learning_rate": 1.6829436343877142e-05, + "loss": 1.147, "step": 7230 }, { - "epoch": 0.20490804499985832, + "epoch": 0.28292511151107286, "grad_norm": 0.0, - "learning_rate": 1.8438062433821234e-05, - "loss": 1.0486, + "learning_rate": 1.682851061336902e-05, + "loss": 1.224, "step": 7231 }, { - "epoch": 0.20493638244212078, + "epoch": 0.2829642382033023, "grad_norm": 0.0, - "learning_rate": 1.8437569868554753e-05, - "loss": 0.9744, + "learning_rate": 1.6827584773203564e-05, + "loss": 1.2388, "step": 7232 }, { - "epoch": 0.20496471988438322, + "epoch": 0.28300336489553174, "grad_norm": 0.0, - "learning_rate": 1.843707723221549e-05, - "loss": 0.8793, + "learning_rate": 1.6826658823395657e-05, + "loss": 1.1721, "step": 7233 }, { - "epoch": 0.2049930573266457, + "epoch": 0.2830424915877612, "grad_norm": 0.0, - "learning_rate": 1.8436584524807593e-05, - "loss": 0.9881, + "learning_rate": 1.6825732763960162e-05, + "loss": 1.0671, "step": 7234 }, { - "epoch": 0.20502139476890816, + "epoch": 0.2830816182799906, "grad_norm": 0.0, - "learning_rate": 1.843609174633521e-05, - "loss": 1.1596, + "learning_rate": 1.682480659491195e-05, + "loss": 1.2374, "step": 7235 }, { - "epoch": 0.20504973221117062, + "epoch": 0.28312074497222006, "grad_norm": 0.0, - "learning_rate": 1.8435598896802497e-05, - "loss": 0.9481, + "learning_rate": 1.68238803162659e-05, + "loss": 1.1328, "step": 7236 }, { - "epoch": 0.2050780696534331, + "epoch": 0.2831598716644495, "grad_norm": 0.0, - "learning_rate": 1.84351059762136e-05, - "loss": 1.054, + "learning_rate": 1.682295392803688e-05, + "loss": 0.9437, "step": 7237 }, { - "epoch": 0.20510640709569555, + "epoch": 0.28319899835667894, "grad_norm": 0.0, - "learning_rate": 1.843461298457267e-05, - "loss": 0.865, + "learning_rate": 1.682202743023977e-05, + "loss": 1.2204, "step": 7238 }, { - "epoch": 0.205134744537958, + "epoch": 0.2832381250489084, "grad_norm": 0.0, - "learning_rate": 1.8434119921883865e-05, - "loss": 1.1013, + "learning_rate": 1.682110082288945e-05, + "loss": 1.2586, "step": 7239 }, { - "epoch": 0.20516308198022046, + "epoch": 0.2832772517411378, "grad_norm": 0.0, - "learning_rate": 1.8433626788151337e-05, - "loss": 0.9751, + "learning_rate": 1.6820174106000794e-05, + "loss": 1.0565, "step": 7240 }, { - "epoch": 0.20519141942248292, + "epoch": 0.28331637843336727, "grad_norm": 0.0, - "learning_rate": 1.8433133583379236e-05, - "loss": 1.0762, + "learning_rate": 1.681924727958869e-05, + "loss": 1.1092, "step": 7241 }, { - "epoch": 0.2052197568647454, + "epoch": 0.2833555051255967, "grad_norm": 0.0, - "learning_rate": 1.843264030757172e-05, - "loss": 1.1281, + "learning_rate": 1.681832034366802e-05, + "loss": 1.1735, "step": 7242 }, { - "epoch": 0.20524809430700786, + "epoch": 0.28339463181782615, "grad_norm": 0.0, - "learning_rate": 1.843214696073295e-05, - "loss": 1.065, + "learning_rate": 1.681739329825367e-05, + "loss": 1.1162, "step": 7243 }, { - "epoch": 0.20527643174927032, + "epoch": 0.28343375851005553, "grad_norm": 0.0, - "learning_rate": 1.8431653542867068e-05, - "loss": 0.9805, + "learning_rate": 1.6816466143360527e-05, + "loss": 1.0736, "step": 7244 }, { - "epoch": 0.20530476919153276, + "epoch": 0.283472885202285, "grad_norm": 0.0, - "learning_rate": 1.843116005397824e-05, - "loss": 1.0274, + "learning_rate": 1.6815538879003477e-05, + "loss": 0.9967, "step": 7245 }, { - "epoch": 0.20533310663379523, + "epoch": 0.2835120118945144, "grad_norm": 0.0, - "learning_rate": 1.8430666494070615e-05, - "loss": 1.0752, + "learning_rate": 1.6814611505197413e-05, + "loss": 1.0916, "step": 7246 }, { - "epoch": 0.2053614440760577, + "epoch": 0.28355113858674386, "grad_norm": 0.0, - "learning_rate": 1.843017286314836e-05, - "loss": 1.0083, + "learning_rate": 1.6813684021957226e-05, + "loss": 1.1506, "step": 7247 }, { - "epoch": 0.20538978151832016, + "epoch": 0.2835902652789733, "grad_norm": 0.0, - "learning_rate": 1.842967916121563e-05, - "loss": 0.8928, + "learning_rate": 1.6812756429297815e-05, + "loss": 1.0266, "step": 7248 }, { - "epoch": 0.20541811896058262, + "epoch": 0.28362939197120274, "grad_norm": 0.0, - "learning_rate": 1.8429185388276578e-05, - "loss": 0.9922, + "learning_rate": 1.6811828727234073e-05, + "loss": 1.086, "step": 7249 }, { - "epoch": 0.2054464564028451, + "epoch": 0.2836685186634322, "grad_norm": 0.0, - "learning_rate": 1.842869154433537e-05, - "loss": 1.0239, + "learning_rate": 1.6810900915780894e-05, + "loss": 1.1202, "step": 7250 }, { - "epoch": 0.20547479384510753, + "epoch": 0.2837076453556616, "grad_norm": 0.0, - "learning_rate": 1.8428197629396162e-05, - "loss": 1.0751, + "learning_rate": 1.6809972994953184e-05, + "loss": 1.0865, "step": 7251 }, { - "epoch": 0.20550313128737, + "epoch": 0.28374677204789106, "grad_norm": 0.0, - "learning_rate": 1.8427703643463118e-05, - "loss": 1.0023, + "learning_rate": 1.680904496476584e-05, + "loss": 1.1071, "step": 7252 }, { - "epoch": 0.20553146872963246, + "epoch": 0.2837858987401205, "grad_norm": 0.0, - "learning_rate": 1.8427209586540392e-05, - "loss": 0.8556, + "learning_rate": 1.6808116825233765e-05, + "loss": 1.0682, "step": 7253 }, { - "epoch": 0.20555980617189493, + "epoch": 0.28382502543234994, "grad_norm": 0.0, - "learning_rate": 1.8426715458632154e-05, - "loss": 1.0618, + "learning_rate": 1.6807188576371864e-05, + "loss": 0.9411, "step": 7254 }, { - "epoch": 0.2055881436141574, + "epoch": 0.2838641521245794, "grad_norm": 0.0, - "learning_rate": 1.842622125974256e-05, - "loss": 1.0212, + "learning_rate": 1.6806260218195046e-05, + "loss": 1.0457, "step": 7255 }, { - "epoch": 0.20561648105641986, + "epoch": 0.2839032788168088, "grad_norm": 0.0, - "learning_rate": 1.8425726989875777e-05, - "loss": 0.9821, + "learning_rate": 1.6805331750718218e-05, + "loss": 1.0612, "step": 7256 }, { - "epoch": 0.2056448184986823, + "epoch": 0.28394240550903826, "grad_norm": 0.0, - "learning_rate": 1.842523264903597e-05, - "loss": 1.0434, + "learning_rate": 1.680440317395629e-05, + "loss": 1.0667, "step": 7257 }, { - "epoch": 0.20567315594094476, + "epoch": 0.2839815322012677, "grad_norm": 0.0, - "learning_rate": 1.842473823722729e-05, - "loss": 1.0841, + "learning_rate": 1.6803474487924173e-05, + "loss": 1.1059, "step": 7258 }, { - "epoch": 0.20570149338320723, + "epoch": 0.28402065889349715, "grad_norm": 0.0, - "learning_rate": 1.842424375445392e-05, - "loss": 1.0513, + "learning_rate": 1.680254569263678e-05, + "loss": 1.0945, "step": 7259 }, { - "epoch": 0.2057298308254697, + "epoch": 0.2840597855857266, "grad_norm": 0.0, - "learning_rate": 1.8423749200720017e-05, - "loss": 1.0061, + "learning_rate": 1.6801616788109028e-05, + "loss": 1.0754, "step": 7260 }, { - "epoch": 0.20575816826773216, + "epoch": 0.28409891227795603, "grad_norm": 0.0, - "learning_rate": 1.8423254576029744e-05, - "loss": 1.0773, + "learning_rate": 1.6800687774355834e-05, + "loss": 1.0824, "step": 7261 }, { - "epoch": 0.20578650570999463, + "epoch": 0.28413803897018547, "grad_norm": 0.0, - "learning_rate": 1.842275988038727e-05, - "loss": 1.041, + "learning_rate": 1.6799758651392114e-05, + "loss": 1.1848, "step": 7262 }, { - "epoch": 0.20581484315225707, + "epoch": 0.2841771656624149, "grad_norm": 0.0, - "learning_rate": 1.842226511379676e-05, - "loss": 1.0204, + "learning_rate": 1.6798829419232793e-05, + "loss": 1.0528, "step": 7263 }, { - "epoch": 0.20584318059451953, + "epoch": 0.28421629235464435, "grad_norm": 0.0, - "learning_rate": 1.8421770276262386e-05, - "loss": 0.904, + "learning_rate": 1.6797900077892788e-05, + "loss": 1.1208, "step": 7264 }, { - "epoch": 0.205871518036782, + "epoch": 0.2842554190468738, "grad_norm": 0.0, - "learning_rate": 1.842127536778831e-05, - "loss": 1.0078, + "learning_rate": 1.6796970627387028e-05, + "loss": 1.1214, "step": 7265 }, { - "epoch": 0.20589985547904446, + "epoch": 0.28429454573910323, "grad_norm": 0.0, - "learning_rate": 1.8420780388378708e-05, - "loss": 0.9718, + "learning_rate": 1.6796041067730437e-05, + "loss": 1.1902, "step": 7266 }, { - "epoch": 0.20592819292130693, + "epoch": 0.2843336724313327, "grad_norm": 0.0, - "learning_rate": 1.8420285338037747e-05, - "loss": 0.9913, + "learning_rate": 1.6795111398937944e-05, + "loss": 1.0521, "step": 7267 }, { - "epoch": 0.2059565303635694, + "epoch": 0.2843727991235621, "grad_norm": 0.0, - "learning_rate": 1.8419790216769594e-05, - "loss": 0.9554, + "learning_rate": 1.6794181621024473e-05, + "loss": 1.1893, "step": 7268 }, { - "epoch": 0.20598486780583183, + "epoch": 0.28441192581579156, "grad_norm": 0.0, - "learning_rate": 1.8419295024578417e-05, - "loss": 1.1017, + "learning_rate": 1.679325173400496e-05, + "loss": 1.1754, "step": 7269 }, { - "epoch": 0.2060132052480943, + "epoch": 0.284451052508021, "grad_norm": 0.0, - "learning_rate": 1.8418799761468397e-05, - "loss": 0.9688, + "learning_rate": 1.6792321737894337e-05, + "loss": 1.145, "step": 7270 }, { - "epoch": 0.20604154269035677, + "epoch": 0.28449017920025044, "grad_norm": 0.0, - "learning_rate": 1.84183044274437e-05, - "loss": 0.9302, + "learning_rate": 1.6791391632707535e-05, + "loss": 1.0786, "step": 7271 }, { - "epoch": 0.20606988013261923, + "epoch": 0.2845293058924798, "grad_norm": 0.0, - "learning_rate": 1.8417809022508498e-05, - "loss": 0.9849, + "learning_rate": 1.67904614184595e-05, + "loss": 1.2043, "step": 7272 }, { - "epoch": 0.2060982175748817, + "epoch": 0.28456843258470926, "grad_norm": 0.0, - "learning_rate": 1.8417313546666963e-05, - "loss": 1.1033, + "learning_rate": 1.678953109516516e-05, + "loss": 1.0768, "step": 7273 }, { - "epoch": 0.20612655501714416, + "epoch": 0.2846075592769387, "grad_norm": 0.0, - "learning_rate": 1.841681799992327e-05, - "loss": 0.9613, + "learning_rate": 1.6788600662839457e-05, + "loss": 1.2474, "step": 7274 }, { - "epoch": 0.2061548924594066, + "epoch": 0.28464668596916815, "grad_norm": 0.0, - "learning_rate": 1.8416322382281597e-05, - "loss": 1.1103, + "learning_rate": 1.6787670121497335e-05, + "loss": 1.2545, "step": 7275 }, { - "epoch": 0.20618322990166907, + "epoch": 0.2846858126613976, "grad_norm": 0.0, - "learning_rate": 1.841582669374611e-05, - "loss": 0.9522, + "learning_rate": 1.678673947115374e-05, + "loss": 1.0763, "step": 7276 }, { - "epoch": 0.20621156734393153, + "epoch": 0.284724939353627, "grad_norm": 0.0, - "learning_rate": 1.8415330934320995e-05, - "loss": 1.0595, + "learning_rate": 1.678580871182361e-05, + "loss": 1.2645, "step": 7277 }, { - "epoch": 0.206239904786194, + "epoch": 0.28476406604585647, "grad_norm": 0.0, - "learning_rate": 1.8414835104010422e-05, - "loss": 1.0109, + "learning_rate": 1.678487784352189e-05, + "loss": 1.1193, "step": 7278 }, { - "epoch": 0.20626824222845647, + "epoch": 0.2848031927380859, "grad_norm": 0.0, - "learning_rate": 1.8414339202818564e-05, - "loss": 1.0129, + "learning_rate": 1.6783946866263542e-05, + "loss": 1.1387, "step": 7279 }, { - "epoch": 0.20629657967071893, + "epoch": 0.28484231943031535, "grad_norm": 0.0, - "learning_rate": 1.8413843230749607e-05, - "loss": 1.0395, + "learning_rate": 1.6783015780063503e-05, + "loss": 1.2352, "step": 7280 }, { - "epoch": 0.20632491711298137, + "epoch": 0.2848814461225448, "grad_norm": 0.0, - "learning_rate": 1.8413347187807724e-05, - "loss": 1.011, + "learning_rate": 1.6782084584936734e-05, + "loss": 1.1381, "step": 7281 }, { - "epoch": 0.20635325455524384, + "epoch": 0.28492057281477423, "grad_norm": 0.0, - "learning_rate": 1.841285107399709e-05, - "loss": 0.9326, + "learning_rate": 1.678115328089818e-05, + "loss": 0.9892, "step": 7282 }, { - "epoch": 0.2063815919975063, + "epoch": 0.28495969950700367, "grad_norm": 0.0, - "learning_rate": 1.8412354889321888e-05, - "loss": 1.0433, + "learning_rate": 1.6780221867962806e-05, + "loss": 1.1926, "step": 7283 }, { - "epoch": 0.20640992943976877, + "epoch": 0.2849988261992331, "grad_norm": 0.0, - "learning_rate": 1.8411858633786298e-05, - "loss": 0.9976, + "learning_rate": 1.6779290346145563e-05, + "loss": 1.1827, "step": 7284 }, { - "epoch": 0.20643826688203123, + "epoch": 0.28503795289146255, "grad_norm": 0.0, - "learning_rate": 1.84113623073945e-05, - "loss": 0.9738, + "learning_rate": 1.6778358715461416e-05, + "loss": 1.0476, "step": 7285 }, { - "epoch": 0.2064666043242937, + "epoch": 0.285077079583692, "grad_norm": 0.0, - "learning_rate": 1.841086591015067e-05, - "loss": 1.0478, + "learning_rate": 1.6777426975925318e-05, + "loss": 1.1855, "step": 7286 }, { - "epoch": 0.20649494176655614, + "epoch": 0.28511620627592144, "grad_norm": 0.0, - "learning_rate": 1.8410369442058998e-05, - "loss": 1.0434, + "learning_rate": 1.6776495127552236e-05, + "loss": 1.1671, "step": 7287 }, { - "epoch": 0.2065232792088186, + "epoch": 0.2851553329681509, "grad_norm": 0.0, - "learning_rate": 1.8409872903123657e-05, - "loss": 0.9654, + "learning_rate": 1.6775563170357134e-05, + "loss": 1.0518, "step": 7288 }, { - "epoch": 0.20655161665108107, + "epoch": 0.2851944596603803, "grad_norm": 0.0, - "learning_rate": 1.8409376293348836e-05, - "loss": 1.019, + "learning_rate": 1.6774631104354976e-05, + "loss": 1.0684, "step": 7289 }, { - "epoch": 0.20657995409334354, + "epoch": 0.28523358635260976, "grad_norm": 0.0, - "learning_rate": 1.8408879612738716e-05, - "loss": 0.9447, + "learning_rate": 1.6773698929560732e-05, + "loss": 1.2657, "step": 7290 }, { - "epoch": 0.206608291535606, + "epoch": 0.2852727130448392, "grad_norm": 0.0, - "learning_rate": 1.840838286129748e-05, - "loss": 0.9483, + "learning_rate": 1.6772766645989372e-05, + "loss": 1.0321, "step": 7291 }, { - "epoch": 0.20663662897786847, + "epoch": 0.28531183973706864, "grad_norm": 0.0, - "learning_rate": 1.8407886039029312e-05, - "loss": 1.0778, + "learning_rate": 1.677183425365587e-05, + "loss": 1.2057, "step": 7292 }, { - "epoch": 0.2066649664201309, + "epoch": 0.2853509664292981, "grad_norm": 0.0, - "learning_rate": 1.84073891459384e-05, - "loss": 0.9529, + "learning_rate": 1.6770901752575186e-05, + "loss": 1.1001, "step": 7293 }, { - "epoch": 0.20669330386239337, + "epoch": 0.2853900931215275, "grad_norm": 0.0, - "learning_rate": 1.8406892182028925e-05, - "loss": 1.0267, + "learning_rate": 1.6769969142762313e-05, + "loss": 1.1752, "step": 7294 }, { - "epoch": 0.20672164130465584, + "epoch": 0.28542921981375696, "grad_norm": 0.0, - "learning_rate": 1.8406395147305074e-05, - "loss": 0.9696, + "learning_rate": 1.6769036424232213e-05, + "loss": 1.0252, "step": 7295 }, { - "epoch": 0.2067499787469183, + "epoch": 0.2854683465059864, "grad_norm": 0.0, - "learning_rate": 1.8405898041771037e-05, - "loss": 0.9981, + "learning_rate": 1.6768103596999874e-05, + "loss": 1.2076, "step": 7296 }, { - "epoch": 0.20677831618918077, + "epoch": 0.28550747319821584, "grad_norm": 0.0, - "learning_rate": 1.8405400865431e-05, - "loss": 1.0622, + "learning_rate": 1.6767170661080273e-05, + "loss": 1.2391, "step": 7297 }, { - "epoch": 0.20680665363144324, + "epoch": 0.2855465998904453, "grad_norm": 0.0, - "learning_rate": 1.840490361828915e-05, - "loss": 0.9299, + "learning_rate": 1.676623761648839e-05, + "loss": 1.2499, "step": 7298 }, { - "epoch": 0.20683499107370568, + "epoch": 0.2855857265826747, "grad_norm": 0.0, - "learning_rate": 1.8404406300349673e-05, - "loss": 1.0637, + "learning_rate": 1.6765304463239206e-05, + "loss": 1.0705, "step": 7299 }, { - "epoch": 0.20686332851596814, + "epoch": 0.28562485327490417, "grad_norm": 0.0, - "learning_rate": 1.8403908911616764e-05, - "loss": 1.0272, + "learning_rate": 1.676437120134771e-05, + "loss": 1.1336, "step": 7300 }, { - "epoch": 0.2068916659582306, + "epoch": 0.28566397996713355, "grad_norm": 0.0, - "learning_rate": 1.8403411452094607e-05, - "loss": 0.9885, + "learning_rate": 1.6763437830828896e-05, + "loss": 1.2656, "step": 7301 }, { - "epoch": 0.20692000340049307, + "epoch": 0.285703106659363, "grad_norm": 0.0, - "learning_rate": 1.8402913921787395e-05, - "loss": 1.054, + "learning_rate": 1.6762504351697738e-05, + "loss": 1.0446, "step": 7302 }, { - "epoch": 0.20694834084275554, + "epoch": 0.28574223335159243, "grad_norm": 0.0, - "learning_rate": 1.840241632069932e-05, - "loss": 1.0055, + "learning_rate": 1.6761570763969237e-05, + "loss": 1.0836, "step": 7303 }, { - "epoch": 0.206976678285018, + "epoch": 0.2857813600438219, "grad_norm": 0.0, - "learning_rate": 1.8401918648834573e-05, - "loss": 1.0811, + "learning_rate": 1.676063706765838e-05, + "loss": 1.1874, "step": 7304 }, { - "epoch": 0.20700501572728044, + "epoch": 0.2858204867360513, "grad_norm": 0.0, - "learning_rate": 1.8401420906197346e-05, - "loss": 1.0592, + "learning_rate": 1.6759703262780167e-05, + "loss": 1.0088, "step": 7305 }, { - "epoch": 0.2070333531695429, + "epoch": 0.28585961342828076, "grad_norm": 0.0, - "learning_rate": 1.8400923092791827e-05, - "loss": 0.9088, + "learning_rate": 1.6758769349349586e-05, + "loss": 1.1674, "step": 7306 }, { - "epoch": 0.20706169061180538, + "epoch": 0.2858987401205102, "grad_norm": 0.0, - "learning_rate": 1.8400425208622215e-05, - "loss": 0.9206, + "learning_rate": 1.675783532738164e-05, + "loss": 1.1989, "step": 7307 }, { - "epoch": 0.20709002805406784, + "epoch": 0.28593786681273964, "grad_norm": 0.0, - "learning_rate": 1.83999272536927e-05, - "loss": 0.982, + "learning_rate": 1.675690119689133e-05, + "loss": 1.2415, "step": 7308 }, { - "epoch": 0.2071183654963303, + "epoch": 0.2859769935049691, "grad_norm": 0.0, - "learning_rate": 1.8399429228007484e-05, - "loss": 0.9995, + "learning_rate": 1.675596695789365e-05, + "loss": 1.2014, "step": 7309 }, { - "epoch": 0.20714670293859277, + "epoch": 0.2860161201971985, "grad_norm": 0.0, - "learning_rate": 1.8398931131570755e-05, - "loss": 0.9039, + "learning_rate": 1.6755032610403606e-05, + "loss": 1.045, "step": 7310 }, { - "epoch": 0.2071750403808552, + "epoch": 0.28605524688942796, "grad_norm": 0.0, - "learning_rate": 1.8398432964386707e-05, - "loss": 0.9406, + "learning_rate": 1.6754098154436204e-05, + "loss": 1.1223, "step": 7311 }, { - "epoch": 0.20720337782311768, + "epoch": 0.2860943735816574, "grad_norm": 0.0, - "learning_rate": 1.8397934726459544e-05, - "loss": 1.0751, + "learning_rate": 1.675316359000645e-05, + "loss": 1.1474, "step": 7312 }, { - "epoch": 0.20723171526538015, + "epoch": 0.28613350027388684, "grad_norm": 0.0, - "learning_rate": 1.8397436417793454e-05, - "loss": 1.0565, + "learning_rate": 1.6752228917129347e-05, + "loss": 1.1707, "step": 7313 }, { - "epoch": 0.2072600527076426, + "epoch": 0.2861726269661163, "grad_norm": 0.0, - "learning_rate": 1.8396938038392636e-05, - "loss": 0.9594, + "learning_rate": 1.675129413581991e-05, + "loss": 1.2997, "step": 7314 }, { - "epoch": 0.20728839014990508, + "epoch": 0.2862117536583457, "grad_norm": 0.0, - "learning_rate": 1.8396439588261298e-05, - "loss": 0.9184, + "learning_rate": 1.6750359246093154e-05, + "loss": 1.0973, "step": 7315 }, { - "epoch": 0.20731672759216754, + "epoch": 0.28625088035057517, "grad_norm": 0.0, - "learning_rate": 1.839594106740363e-05, - "loss": 0.9793, + "learning_rate": 1.6749424247964082e-05, + "loss": 1.0455, "step": 7316 }, { - "epoch": 0.20734506503442998, + "epoch": 0.2862900070428046, "grad_norm": 0.0, - "learning_rate": 1.839544247582383e-05, - "loss": 0.8726, + "learning_rate": 1.6748489141447716e-05, + "loss": 1.1749, "step": 7317 }, { - "epoch": 0.20737340247669245, + "epoch": 0.28632913373503405, "grad_norm": 0.0, - "learning_rate": 1.8394943813526103e-05, - "loss": 0.9823, + "learning_rate": 1.6747553926559072e-05, + "loss": 0.9977, "step": 7318 }, { - "epoch": 0.2074017399189549, + "epoch": 0.2863682604272635, "grad_norm": 0.0, - "learning_rate": 1.8394445080514643e-05, - "loss": 0.9183, + "learning_rate": 1.6746618603313165e-05, + "loss": 1.1118, "step": 7319 }, { - "epoch": 0.20743007736121738, + "epoch": 0.28640738711949293, "grad_norm": 0.0, - "learning_rate": 1.839394627679366e-05, - "loss": 1.0741, + "learning_rate": 1.6745683171725015e-05, + "loss": 1.0812, "step": 7320 }, { - "epoch": 0.20745841480347985, + "epoch": 0.28644651381172237, "grad_norm": 0.0, - "learning_rate": 1.8393447402367346e-05, - "loss": 1.0964, + "learning_rate": 1.674474763180965e-05, + "loss": 1.1607, "step": 7321 }, { - "epoch": 0.2074867522457423, + "epoch": 0.2864856405039518, "grad_norm": 0.0, - "learning_rate": 1.839294845723991e-05, - "loss": 0.9607, + "learning_rate": 1.674381198358209e-05, + "loss": 1.2078, "step": 7322 }, { - "epoch": 0.20751508968800475, + "epoch": 0.28652476719618125, "grad_norm": 0.0, - "learning_rate": 1.839244944141555e-05, - "loss": 0.9374, + "learning_rate": 1.6742876227057356e-05, + "loss": 1.1146, "step": 7323 }, { - "epoch": 0.20754342713026722, + "epoch": 0.2865638938884107, "grad_norm": 0.0, - "learning_rate": 1.8391950354898476e-05, - "loss": 1.0153, + "learning_rate": 1.6741940362250485e-05, + "loss": 1.2402, "step": 7324 }, { - "epoch": 0.20757176457252968, + "epoch": 0.28660302058064013, "grad_norm": 0.0, - "learning_rate": 1.8391451197692883e-05, - "loss": 0.9751, + "learning_rate": 1.6741004389176496e-05, + "loss": 1.0298, "step": 7325 }, { - "epoch": 0.20760010201479215, + "epoch": 0.2866421472728696, "grad_norm": 0.0, - "learning_rate": 1.8390951969802983e-05, - "loss": 0.9972, + "learning_rate": 1.6740068307850423e-05, + "loss": 1.0411, "step": 7326 }, { - "epoch": 0.20762843945705461, + "epoch": 0.286681273965099, "grad_norm": 0.0, - "learning_rate": 1.8390452671232982e-05, - "loss": 0.9455, + "learning_rate": 1.67391321182873e-05, + "loss": 1.1891, "step": 7327 }, { - "epoch": 0.20765677689931708, + "epoch": 0.28672040065732846, "grad_norm": 0.0, - "learning_rate": 1.8389953301987076e-05, - "loss": 1.0361, + "learning_rate": 1.673819582050216e-05, + "loss": 1.0591, "step": 7328 }, { - "epoch": 0.20768511434157952, + "epoch": 0.28675952734955784, "grad_norm": 0.0, - "learning_rate": 1.838945386206948e-05, - "loss": 0.9363, + "learning_rate": 1.6737259414510038e-05, + "loss": 1.0687, "step": 7329 }, { - "epoch": 0.20771345178384198, + "epoch": 0.2867986540417873, "grad_norm": 0.0, - "learning_rate": 1.8388954351484403e-05, - "loss": 0.9043, + "learning_rate": 1.673632290032597e-05, + "loss": 1.1809, "step": 7330 }, { - "epoch": 0.20774178922610445, + "epoch": 0.2868377807340167, "grad_norm": 0.0, - "learning_rate": 1.8388454770236044e-05, - "loss": 1.0662, + "learning_rate": 1.6735386277965e-05, + "loss": 1.1024, "step": 7331 }, { - "epoch": 0.20777012666836692, + "epoch": 0.28687690742624616, "grad_norm": 0.0, - "learning_rate": 1.8387955118328617e-05, - "loss": 1.0064, + "learning_rate": 1.6734449547442165e-05, + "loss": 1.1144, "step": 7332 }, { - "epoch": 0.20779846411062938, + "epoch": 0.2869160341184756, "grad_norm": 0.0, - "learning_rate": 1.838745539576633e-05, - "loss": 1.1676, + "learning_rate": 1.673351270877251e-05, + "loss": 1.1152, "step": 7333 }, { - "epoch": 0.20782680155289185, + "epoch": 0.28695516081070505, "grad_norm": 0.0, - "learning_rate": 1.8386955602553393e-05, - "loss": 1.0422, + "learning_rate": 1.6732575761971078e-05, + "loss": 1.0408, "step": 7334 }, { - "epoch": 0.2078551389951543, + "epoch": 0.2869942875029345, "grad_norm": 0.0, - "learning_rate": 1.838645573869401e-05, - "loss": 0.9055, + "learning_rate": 1.6731638707052917e-05, + "loss": 1.0468, "step": 7335 }, { - "epoch": 0.20788347643741675, + "epoch": 0.2870334141951639, "grad_norm": 0.0, - "learning_rate": 1.8385955804192396e-05, - "loss": 0.8505, + "learning_rate": 1.6730701544033072e-05, + "loss": 1.0584, "step": 7336 }, { - "epoch": 0.20791181387967922, + "epoch": 0.28707254088739337, "grad_norm": 0.0, - "learning_rate": 1.8385455799052768e-05, - "loss": 1.0002, + "learning_rate": 1.6729764272926594e-05, + "loss": 1.0123, "step": 7337 }, { - "epoch": 0.20794015132194169, + "epoch": 0.2871116675796228, "grad_norm": 0.0, - "learning_rate": 1.8384955723279327e-05, - "loss": 0.8794, + "learning_rate": 1.6728826893748535e-05, + "loss": 1.0574, "step": 7338 }, { - "epoch": 0.20796848876420415, + "epoch": 0.28715079427185225, "grad_norm": 0.0, - "learning_rate": 1.8384455576876292e-05, - "loss": 1.0255, + "learning_rate": 1.672788940651395e-05, + "loss": 1.045, "step": 7339 }, { - "epoch": 0.20799682620646662, + "epoch": 0.2871899209640817, "grad_norm": 0.0, - "learning_rate": 1.8383955359847872e-05, - "loss": 1.0451, + "learning_rate": 1.6726951811237887e-05, + "loss": 1.1066, "step": 7340 }, { - "epoch": 0.20802516364872906, + "epoch": 0.28722904765631113, "grad_norm": 0.0, - "learning_rate": 1.8383455072198284e-05, - "loss": 0.9559, + "learning_rate": 1.672601410793541e-05, + "loss": 1.2224, "step": 7341 }, { - "epoch": 0.20805350109099152, + "epoch": 0.2872681743485406, "grad_norm": 0.0, - "learning_rate": 1.8382954713931743e-05, - "loss": 0.9605, + "learning_rate": 1.6725076296621578e-05, + "loss": 1.099, "step": 7342 }, { - "epoch": 0.208081838533254, + "epoch": 0.28730730104077, "grad_norm": 0.0, - "learning_rate": 1.8382454285052458e-05, - "loss": 1.0145, + "learning_rate": 1.672413837731144e-05, + "loss": 1.3236, "step": 7343 }, { - "epoch": 0.20811017597551645, + "epoch": 0.28734642773299945, "grad_norm": 0.0, - "learning_rate": 1.8381953785564653e-05, - "loss": 0.9992, + "learning_rate": 1.672320035002007e-05, + "loss": 1.0377, "step": 7344 }, { - "epoch": 0.20813851341777892, + "epoch": 0.2873855544252289, "grad_norm": 0.0, - "learning_rate": 1.8381453215472532e-05, - "loss": 0.9932, + "learning_rate": 1.6722262214762527e-05, + "loss": 1.1024, "step": 7345 }, { - "epoch": 0.20816685086004139, + "epoch": 0.28742468111745834, "grad_norm": 0.0, - "learning_rate": 1.8380952574780323e-05, - "loss": 0.8914, + "learning_rate": 1.6721323971553877e-05, + "loss": 1.1749, "step": 7346 }, { - "epoch": 0.20819518830230382, + "epoch": 0.2874638078096878, "grad_norm": 0.0, - "learning_rate": 1.8380451863492238e-05, - "loss": 1.0026, + "learning_rate": 1.6720385620409186e-05, + "loss": 1.1203, "step": 7347 }, { - "epoch": 0.2082235257445663, + "epoch": 0.2875029345019172, "grad_norm": 0.0, - "learning_rate": 1.837995108161249e-05, - "loss": 0.9301, + "learning_rate": 1.6719447161343523e-05, + "loss": 1.0868, "step": 7348 }, { - "epoch": 0.20825186318682876, + "epoch": 0.28754206119414666, "grad_norm": 0.0, - "learning_rate": 1.8379450229145308e-05, - "loss": 1.0231, + "learning_rate": 1.6718508594371955e-05, + "loss": 1.2323, "step": 7349 }, { - "epoch": 0.20828020062909122, + "epoch": 0.2875811878863761, "grad_norm": 0.0, - "learning_rate": 1.8378949306094904e-05, - "loss": 0.962, + "learning_rate": 1.6717569919509565e-05, + "loss": 1.1662, "step": 7350 }, { - "epoch": 0.2083085380713537, + "epoch": 0.28762031457860554, "grad_norm": 0.0, - "learning_rate": 1.8378448312465495e-05, - "loss": 0.8921, + "learning_rate": 1.6716631136771413e-05, + "loss": 0.985, "step": 7351 }, { - "epoch": 0.20833687551361615, + "epoch": 0.287659441270835, "grad_norm": 0.0, - "learning_rate": 1.837794724826131e-05, - "loss": 0.9581, + "learning_rate": 1.6715692246172584e-05, + "loss": 1.2539, "step": 7352 }, { - "epoch": 0.2083652129558786, + "epoch": 0.2876985679630644, "grad_norm": 0.0, - "learning_rate": 1.8377446113486562e-05, - "loss": 0.9173, + "learning_rate": 1.671475324772815e-05, + "loss": 1.1063, "step": 7353 }, { - "epoch": 0.20839355039814106, + "epoch": 0.28773769465529386, "grad_norm": 0.0, - "learning_rate": 1.8376944908145474e-05, - "loss": 1.0847, + "learning_rate": 1.67138141414532e-05, + "loss": 1.1598, "step": 7354 }, { - "epoch": 0.20842188784040352, + "epoch": 0.2877768213475233, "grad_norm": 0.0, - "learning_rate": 1.8376443632242265e-05, - "loss": 1.0862, + "learning_rate": 1.67128749273628e-05, + "loss": 1.0471, "step": 7355 }, { - "epoch": 0.208450225282666, + "epoch": 0.28781594803975274, "grad_norm": 0.0, - "learning_rate": 1.8375942285781164e-05, - "loss": 1.0071, + "learning_rate": 1.6711935605472043e-05, + "loss": 1.0882, "step": 7356 }, { - "epoch": 0.20847856272492846, + "epoch": 0.2878550747319822, "grad_norm": 0.0, - "learning_rate": 1.8375440868766393e-05, - "loss": 1.0116, + "learning_rate": 1.6710996175796012e-05, + "loss": 1.175, "step": 7357 }, { - "epoch": 0.20850690016719092, + "epoch": 0.28789420142421157, "grad_norm": 0.0, - "learning_rate": 1.837493938120217e-05, - "loss": 1.0535, + "learning_rate": 1.671005663834979e-05, + "loss": 1.0562, "step": 7358 }, { - "epoch": 0.20853523760945336, + "epoch": 0.287933328116441, "grad_norm": 0.0, - "learning_rate": 1.8374437823092726e-05, - "loss": 1.0949, + "learning_rate": 1.6709116993148468e-05, + "loss": 1.1796, "step": 7359 }, { - "epoch": 0.20856357505171583, + "epoch": 0.28797245480867045, "grad_norm": 0.0, - "learning_rate": 1.8373936194442277e-05, - "loss": 1.009, + "learning_rate": 1.6708177240207133e-05, + "loss": 1.1991, "step": 7360 }, { - "epoch": 0.2085919124939783, + "epoch": 0.2880115815008999, "grad_norm": 0.0, - "learning_rate": 1.837343449525506e-05, - "loss": 1.0983, + "learning_rate": 1.670723737954088e-05, + "loss": 1.0911, "step": 7361 }, { - "epoch": 0.20862024993624076, + "epoch": 0.28805070819312933, "grad_norm": 0.0, - "learning_rate": 1.837293272553529e-05, - "loss": 1.0894, + "learning_rate": 1.6706297411164797e-05, + "loss": 1.1724, "step": 7362 }, { - "epoch": 0.20864858737850323, + "epoch": 0.2880898348853588, "grad_norm": 0.0, - "learning_rate": 1.83724308852872e-05, - "loss": 0.9448, + "learning_rate": 1.670535733509398e-05, + "loss": 1.0841, "step": 7363 }, { - "epoch": 0.2086769248207657, + "epoch": 0.2881289615775882, "grad_norm": 0.0, - "learning_rate": 1.8371928974515017e-05, - "loss": 0.9926, + "learning_rate": 1.670441715134353e-05, + "loss": 1.1075, "step": 7364 }, { - "epoch": 0.20870526226302813, + "epoch": 0.28816808826981766, "grad_norm": 0.0, - "learning_rate": 1.8371426993222966e-05, - "loss": 0.9988, + "learning_rate": 1.6703476859928537e-05, + "loss": 1.2229, "step": 7365 }, { - "epoch": 0.2087335997052906, + "epoch": 0.2882072149620471, "grad_norm": 0.0, - "learning_rate": 1.8370924941415277e-05, - "loss": 1.0345, + "learning_rate": 1.6702536460864108e-05, + "loss": 1.1857, "step": 7366 }, { - "epoch": 0.20876193714755306, + "epoch": 0.28824634165427654, "grad_norm": 0.0, - "learning_rate": 1.8370422819096177e-05, - "loss": 1.0043, + "learning_rate": 1.670159595416534e-05, + "loss": 1.176, "step": 7367 }, { - "epoch": 0.20879027458981553, + "epoch": 0.288285468346506, "grad_norm": 0.0, - "learning_rate": 1.8369920626269903e-05, - "loss": 1.1373, + "learning_rate": 1.6700655339847346e-05, + "loss": 1.1767, "step": 7368 }, { - "epoch": 0.208818612032078, + "epoch": 0.2883245950387354, "grad_norm": 0.0, - "learning_rate": 1.8369418362940675e-05, - "loss": 0.9104, + "learning_rate": 1.6699714617925216e-05, + "loss": 1.1897, "step": 7369 }, { - "epoch": 0.20884694947434046, + "epoch": 0.28836372173096486, "grad_norm": 0.0, - "learning_rate": 1.836891602911273e-05, - "loss": 1.0523, + "learning_rate": 1.6698773788414066e-05, + "loss": 1.1464, "step": 7370 }, { - "epoch": 0.2088752869166029, + "epoch": 0.2884028484231943, "grad_norm": 0.0, - "learning_rate": 1.8368413624790297e-05, - "loss": 1.1359, + "learning_rate": 1.6697832851329002e-05, + "loss": 1.0078, "step": 7371 }, { - "epoch": 0.20890362435886536, + "epoch": 0.28844197511542374, "grad_norm": 0.0, - "learning_rate": 1.8367911149977606e-05, - "loss": 0.9838, + "learning_rate": 1.6696891806685137e-05, + "loss": 1.171, "step": 7372 }, { - "epoch": 0.20893196180112783, + "epoch": 0.2884811018076532, "grad_norm": 0.0, - "learning_rate": 1.8367408604678893e-05, - "loss": 1.0173, + "learning_rate": 1.6695950654497582e-05, + "loss": 1.0688, "step": 7373 }, { - "epoch": 0.2089602992433903, + "epoch": 0.2885202284998826, "grad_norm": 0.0, - "learning_rate": 1.836690598889839e-05, - "loss": 1.0186, + "learning_rate": 1.669500939478145e-05, + "loss": 1.0715, "step": 7374 }, { - "epoch": 0.20898863668565276, + "epoch": 0.28855935519211207, "grad_norm": 0.0, - "learning_rate": 1.8366403302640338e-05, - "loss": 1.0521, + "learning_rate": 1.6694068027551856e-05, + "loss": 0.9894, "step": 7375 }, { - "epoch": 0.20901697412791523, + "epoch": 0.2885984818843415, "grad_norm": 0.0, - "learning_rate": 1.8365900545908957e-05, - "loss": 1.0036, + "learning_rate": 1.6693126552823916e-05, + "loss": 1.1008, "step": 7376 }, { - "epoch": 0.20904531157017767, + "epoch": 0.28863760857657095, "grad_norm": 0.0, - "learning_rate": 1.836539771870849e-05, - "loss": 1.0884, + "learning_rate": 1.6692184970612752e-05, + "loss": 1.1292, "step": 7377 }, { - "epoch": 0.20907364901244013, + "epoch": 0.2886767352688004, "grad_norm": 0.0, - "learning_rate": 1.8364894821043172e-05, - "loss": 1.0146, + "learning_rate": 1.669124328093348e-05, + "loss": 1.164, "step": 7378 }, { - "epoch": 0.2091019864547026, + "epoch": 0.28871586196102983, "grad_norm": 0.0, - "learning_rate": 1.836439185291724e-05, - "loss": 0.9042, + "learning_rate": 1.6690301483801233e-05, + "loss": 1.1036, "step": 7379 }, { - "epoch": 0.20913032389696506, + "epoch": 0.28875498865325927, "grad_norm": 0.0, - "learning_rate": 1.8363888814334927e-05, - "loss": 0.9166, + "learning_rate": 1.6689359579231122e-05, + "loss": 1.1932, "step": 7380 }, { - "epoch": 0.20915866133922753, + "epoch": 0.2887941153454887, "grad_norm": 0.0, - "learning_rate": 1.8363385705300473e-05, - "loss": 1.0346, + "learning_rate": 1.668841756723828e-05, + "loss": 1.1378, "step": 7381 }, { - "epoch": 0.20918699878148997, + "epoch": 0.28883324203771815, "grad_norm": 0.0, - "learning_rate": 1.836288252581812e-05, - "loss": 1.0419, + "learning_rate": 1.6687475447837833e-05, + "loss": 1.1065, "step": 7382 }, { - "epoch": 0.20921533622375243, + "epoch": 0.2888723687299476, "grad_norm": 0.0, - "learning_rate": 1.8362379275892093e-05, - "loss": 1.0627, + "learning_rate": 1.6686533221044907e-05, + "loss": 1.1084, "step": 7383 }, { - "epoch": 0.2092436736660149, + "epoch": 0.28891149542217703, "grad_norm": 0.0, - "learning_rate": 1.836187595552665e-05, - "loss": 1.0731, + "learning_rate": 1.668559088687464e-05, + "loss": 1.1544, "step": 7384 }, { - "epoch": 0.20927201110827737, + "epoch": 0.2889506221144065, "grad_norm": 0.0, - "learning_rate": 1.8361372564726014e-05, - "loss": 0.9587, + "learning_rate": 1.668464844534216e-05, + "loss": 1.1298, "step": 7385 }, { - "epoch": 0.20930034855053983, + "epoch": 0.28898974880663586, "grad_norm": 0.0, - "learning_rate": 1.8360869103494434e-05, - "loss": 1.0596, + "learning_rate": 1.6683705896462603e-05, + "loss": 1.1203, "step": 7386 }, { - "epoch": 0.2093286859928023, + "epoch": 0.2890288754988653, "grad_norm": 0.0, - "learning_rate": 1.836036557183615e-05, - "loss": 1.0653, + "learning_rate": 1.66827632402511e-05, + "loss": 1.174, "step": 7387 }, { - "epoch": 0.20935702343506474, + "epoch": 0.28906800219109474, "grad_norm": 0.0, - "learning_rate": 1.83598619697554e-05, - "loss": 1.0294, + "learning_rate": 1.66818204767228e-05, + "loss": 1.2218, "step": 7388 }, { - "epoch": 0.2093853608773272, + "epoch": 0.2891071288833242, "grad_norm": 0.0, - "learning_rate": 1.835935829725643e-05, - "loss": 0.9721, + "learning_rate": 1.668087760589283e-05, + "loss": 1.1069, "step": 7389 }, { - "epoch": 0.20941369831958967, + "epoch": 0.2891462555755536, "grad_norm": 0.0, - "learning_rate": 1.835885455434348e-05, - "loss": 0.896, + "learning_rate": 1.6679934627776343e-05, + "loss": 1.1551, "step": 7390 }, { - "epoch": 0.20944203576185214, + "epoch": 0.28918538226778306, "grad_norm": 0.0, - "learning_rate": 1.8358350741020794e-05, - "loss": 1.0532, + "learning_rate": 1.667899154238847e-05, + "loss": 1.2618, "step": 7391 }, { - "epoch": 0.2094703732041146, + "epoch": 0.2892245089600125, "grad_norm": 0.0, - "learning_rate": 1.8357846857292615e-05, - "loss": 0.9877, + "learning_rate": 1.6678048349744366e-05, + "loss": 1.2392, "step": 7392 }, { - "epoch": 0.20949871064637707, + "epoch": 0.28926363565224195, "grad_norm": 0.0, - "learning_rate": 1.835734290316319e-05, - "loss": 0.9631, + "learning_rate": 1.6677105049859175e-05, + "loss": 1.1849, "step": 7393 }, { - "epoch": 0.2095270480886395, + "epoch": 0.2893027623444714, "grad_norm": 0.0, - "learning_rate": 1.8356838878636764e-05, - "loss": 1.0529, + "learning_rate": 1.667616164274804e-05, + "loss": 1.1174, "step": 7394 }, { - "epoch": 0.20955538553090197, + "epoch": 0.28934188903670083, "grad_norm": 0.0, - "learning_rate": 1.835633478371758e-05, - "loss": 0.9881, + "learning_rate": 1.6675218128426117e-05, + "loss": 1.0955, "step": 7395 }, { - "epoch": 0.20958372297316444, + "epoch": 0.28938101572893027, "grad_norm": 0.0, - "learning_rate": 1.835583061840988e-05, - "loss": 1.0241, + "learning_rate": 1.6674274506908554e-05, + "loss": 1.1533, "step": 7396 }, { - "epoch": 0.2096120604154269, + "epoch": 0.2894201424211597, "grad_norm": 0.0, - "learning_rate": 1.835532638271792e-05, - "loss": 0.935, + "learning_rate": 1.6673330778210508e-05, + "loss": 1.1114, "step": 7397 }, { - "epoch": 0.20964039785768937, + "epoch": 0.28945926911338915, "grad_norm": 0.0, - "learning_rate": 1.8354822076645944e-05, - "loss": 0.9025, + "learning_rate": 1.6672386942347127e-05, + "loss": 1.1499, "step": 7398 }, { - "epoch": 0.20966873529995184, + "epoch": 0.2894983958056186, "grad_norm": 0.0, - "learning_rate": 1.8354317700198198e-05, - "loss": 1.0193, + "learning_rate": 1.6671442999333577e-05, + "loss": 1.1406, "step": 7399 }, { - "epoch": 0.20969707274221427, + "epoch": 0.28953752249784803, "grad_norm": 0.0, - "learning_rate": 1.835381325337893e-05, - "loss": 1.1194, + "learning_rate": 1.6670498949185013e-05, + "loss": 1.1489, "step": 7400 }, { - "epoch": 0.20972541018447674, + "epoch": 0.2895766491900775, "grad_norm": 0.0, - "learning_rate": 1.8353308736192396e-05, - "loss": 1.029, + "learning_rate": 1.6669554791916593e-05, + "loss": 1.0648, "step": 7401 }, { - "epoch": 0.2097537476267392, + "epoch": 0.2896157758823069, "grad_norm": 0.0, - "learning_rate": 1.8352804148642836e-05, - "loss": 1.0361, + "learning_rate": 1.6668610527543476e-05, + "loss": 1.1721, "step": 7402 }, { - "epoch": 0.20978208506900167, + "epoch": 0.28965490257453635, "grad_norm": 0.0, - "learning_rate": 1.8352299490734506e-05, - "loss": 0.9778, + "learning_rate": 1.6667666156080837e-05, + "loss": 1.081, "step": 7403 }, { - "epoch": 0.20981042251126414, + "epoch": 0.2896940292667658, "grad_norm": 0.0, - "learning_rate": 1.8351794762471656e-05, - "loss": 0.944, + "learning_rate": 1.666672167754383e-05, + "loss": 1.1434, "step": 7404 }, { - "epoch": 0.2098387599535266, + "epoch": 0.28973315595899524, "grad_norm": 0.0, - "learning_rate": 1.8351289963858535e-05, - "loss": 1.0697, + "learning_rate": 1.666577709194763e-05, + "loss": 1.1689, "step": 7405 }, { - "epoch": 0.20986709739578904, + "epoch": 0.2897722826512247, "grad_norm": 0.0, - "learning_rate": 1.8350785094899403e-05, - "loss": 0.9048, + "learning_rate": 1.6664832399307402e-05, + "loss": 1.236, "step": 7406 }, { - "epoch": 0.2098954348380515, + "epoch": 0.2898114093434541, "grad_norm": 0.0, - "learning_rate": 1.8350280155598504e-05, - "loss": 1.0027, + "learning_rate": 1.6663887599638316e-05, + "loss": 1.1542, "step": 7407 }, { - "epoch": 0.20992377228031397, + "epoch": 0.28985053603568356, "grad_norm": 0.0, - "learning_rate": 1.8349775145960094e-05, - "loss": 1.0859, + "learning_rate": 1.6662942692955548e-05, + "loss": 1.1355, "step": 7408 }, { - "epoch": 0.20995210972257644, + "epoch": 0.289889662727913, "grad_norm": 0.0, - "learning_rate": 1.834927006598843e-05, - "loss": 0.9441, + "learning_rate": 1.6661997679274263e-05, + "loss": 1.0264, "step": 7409 }, { - "epoch": 0.2099804471648389, + "epoch": 0.28992878942014244, "grad_norm": 0.0, - "learning_rate": 1.834876491568776e-05, - "loss": 0.8908, + "learning_rate": 1.666105255860965e-05, + "loss": 1.2787, "step": 7410 }, { - "epoch": 0.21000878460710137, + "epoch": 0.2899679161123719, "grad_norm": 0.0, - "learning_rate": 1.8348259695062344e-05, - "loss": 0.9854, + "learning_rate": 1.6660107330976876e-05, + "loss": 1.2917, "step": 7411 }, { - "epoch": 0.2100371220493638, + "epoch": 0.2900070428046013, "grad_norm": 0.0, - "learning_rate": 1.834775440411644e-05, - "loss": 0.9824, + "learning_rate": 1.6659161996391125e-05, + "loss": 1.2099, "step": 7412 }, { - "epoch": 0.21006545949162628, + "epoch": 0.29004616949683076, "grad_norm": 0.0, - "learning_rate": 1.8347249042854294e-05, - "loss": 1.0587, + "learning_rate": 1.665821655486758e-05, + "loss": 1.0507, "step": 7413 }, { - "epoch": 0.21009379693388874, + "epoch": 0.2900852961890602, "grad_norm": 0.0, - "learning_rate": 1.8346743611280174e-05, - "loss": 0.9819, + "learning_rate": 1.6657271006421412e-05, + "loss": 1.1039, "step": 7414 }, { - "epoch": 0.2101221343761512, + "epoch": 0.2901244228812896, "grad_norm": 0.0, - "learning_rate": 1.8346238109398335e-05, - "loss": 1.0575, + "learning_rate": 1.665632535106782e-05, + "loss": 1.1371, "step": 7415 }, { - "epoch": 0.21015047181841368, + "epoch": 0.29016354957351903, "grad_norm": 0.0, - "learning_rate": 1.834573253721303e-05, - "loss": 0.9693, + "learning_rate": 1.6655379588821983e-05, + "loss": 1.1794, "step": 7416 }, { - "epoch": 0.21017880926067614, + "epoch": 0.29020267626574847, "grad_norm": 0.0, - "learning_rate": 1.834522689472852e-05, - "loss": 0.973, + "learning_rate": 1.665443371969909e-05, + "loss": 1.1246, "step": 7417 }, { - "epoch": 0.21020714670293858, + "epoch": 0.2902418029579779, "grad_norm": 0.0, - "learning_rate": 1.8344721181949065e-05, - "loss": 1.0003, + "learning_rate": 1.6653487743714328e-05, + "loss": 1.1547, "step": 7418 }, { - "epoch": 0.21023548414520105, + "epoch": 0.29028092965020735, "grad_norm": 0.0, - "learning_rate": 1.8344215398878925e-05, - "loss": 1.0147, + "learning_rate": 1.6652541660882894e-05, + "loss": 1.0905, "step": 7419 }, { - "epoch": 0.2102638215874635, + "epoch": 0.2903200563424368, "grad_norm": 0.0, - "learning_rate": 1.8343709545522364e-05, - "loss": 1.0548, + "learning_rate": 1.665159547121997e-05, + "loss": 1.2206, "step": 7420 }, { - "epoch": 0.21029215902972598, + "epoch": 0.29035918303466624, "grad_norm": 0.0, - "learning_rate": 1.8343203621883634e-05, - "loss": 1.0036, + "learning_rate": 1.6650649174740766e-05, + "loss": 1.112, "step": 7421 }, { - "epoch": 0.21032049647198844, + "epoch": 0.2903983097268957, "grad_norm": 0.0, - "learning_rate": 1.8342697627967006e-05, - "loss": 1.01, + "learning_rate": 1.6649702771460464e-05, + "loss": 1.0714, "step": 7422 }, { - "epoch": 0.2103488339142509, + "epoch": 0.2904374364191251, "grad_norm": 0.0, - "learning_rate": 1.8342191563776738e-05, - "loss": 0.9313, + "learning_rate": 1.664875626139427e-05, + "loss": 1.1918, "step": 7423 }, { - "epoch": 0.21037717135651335, + "epoch": 0.29047656311135456, "grad_norm": 0.0, - "learning_rate": 1.8341685429317087e-05, - "loss": 0.9803, + "learning_rate": 1.664780964455738e-05, + "loss": 1.1544, "step": 7424 }, { - "epoch": 0.21040550879877581, + "epoch": 0.290515689803584, "grad_norm": 0.0, - "learning_rate": 1.8341179224592327e-05, - "loss": 0.9754, + "learning_rate": 1.6646862920965e-05, + "loss": 1.0199, "step": 7425 }, { - "epoch": 0.21043384624103828, + "epoch": 0.29055481649581344, "grad_norm": 0.0, - "learning_rate": 1.8340672949606715e-05, - "loss": 1.0738, + "learning_rate": 1.664591609063233e-05, + "loss": 1.1509, "step": 7426 }, { - "epoch": 0.21046218368330075, + "epoch": 0.2905939431880429, "grad_norm": 0.0, - "learning_rate": 1.8340166604364518e-05, - "loss": 0.9358, + "learning_rate": 1.6644969153574574e-05, + "loss": 1.1436, "step": 7427 }, { - "epoch": 0.2104905211255632, + "epoch": 0.2906330698802723, "grad_norm": 0.0, - "learning_rate": 1.833966018887e-05, - "loss": 0.9293, + "learning_rate": 1.6644022109806938e-05, + "loss": 1.157, "step": 7428 }, { - "epoch": 0.21051885856782568, + "epoch": 0.29067219657250176, "grad_norm": 0.0, - "learning_rate": 1.833915370312743e-05, - "loss": 0.836, + "learning_rate": 1.664307495934464e-05, + "loss": 1.0693, "step": 7429 }, { - "epoch": 0.21054719601008812, + "epoch": 0.2907113232647312, "grad_norm": 0.0, - "learning_rate": 1.8338647147141067e-05, - "loss": 0.9973, + "learning_rate": 1.664212770220287e-05, + "loss": 1.1588, "step": 7430 }, { - "epoch": 0.21057553345235058, + "epoch": 0.29075044995696064, "grad_norm": 0.0, - "learning_rate": 1.8338140520915185e-05, - "loss": 1.0239, + "learning_rate": 1.6641180338396867e-05, + "loss": 1.0651, "step": 7431 }, { - "epoch": 0.21060387089461305, + "epoch": 0.2907895766491901, "grad_norm": 0.0, - "learning_rate": 1.833763382445405e-05, - "loss": 0.9044, + "learning_rate": 1.664023286794182e-05, + "loss": 1.0561, "step": 7432 }, { - "epoch": 0.21063220833687551, + "epoch": 0.2908287033414195, "grad_norm": 0.0, - "learning_rate": 1.833712705776193e-05, - "loss": 1.0538, + "learning_rate": 1.6639285290852954e-05, + "loss": 1.2411, "step": 7433 }, { - "epoch": 0.21066054577913798, + "epoch": 0.29086783003364897, "grad_norm": 0.0, - "learning_rate": 1.833662022084309e-05, - "loss": 1.0321, + "learning_rate": 1.663833760714549e-05, + "loss": 1.1922, "step": 7434 }, { - "epoch": 0.21068888322140045, + "epoch": 0.2909069567258784, "grad_norm": 0.0, - "learning_rate": 1.8336113313701807e-05, - "loss": 1.0609, + "learning_rate": 1.6637389816834638e-05, + "loss": 1.1401, "step": 7435 }, { - "epoch": 0.21071722066366289, + "epoch": 0.29094608341810785, "grad_norm": 0.0, - "learning_rate": 1.833560633634234e-05, - "loss": 1.0759, + "learning_rate": 1.6636441919935627e-05, + "loss": 1.113, "step": 7436 }, { - "epoch": 0.21074555810592535, + "epoch": 0.2909852101103373, "grad_norm": 0.0, - "learning_rate": 1.833509928876897e-05, - "loss": 1.0511, + "learning_rate": 1.6635493916463673e-05, + "loss": 1.2631, "step": 7437 }, { - "epoch": 0.21077389554818782, + "epoch": 0.29102433680256673, "grad_norm": 0.0, - "learning_rate": 1.833459217098596e-05, - "loss": 1.0214, + "learning_rate": 1.6634545806434e-05, + "loss": 1.0952, "step": 7438 }, { - "epoch": 0.21080223299045028, + "epoch": 0.29106346349479617, "grad_norm": 0.0, - "learning_rate": 1.833408498299759e-05, - "loss": 0.9818, + "learning_rate": 1.6633597589861836e-05, + "loss": 1.0762, "step": 7439 }, { - "epoch": 0.21083057043271275, + "epoch": 0.2911025901870256, "grad_norm": 0.0, - "learning_rate": 1.8333577724808123e-05, - "loss": 0.968, + "learning_rate": 1.6632649266762406e-05, + "loss": 1.0574, "step": 7440 }, { - "epoch": 0.21085890787497522, + "epoch": 0.29114171687925505, "grad_norm": 0.0, - "learning_rate": 1.8333070396421838e-05, - "loss": 0.9173, + "learning_rate": 1.663170083715094e-05, + "loss": 1.1699, "step": 7441 }, { - "epoch": 0.21088724531723765, + "epoch": 0.2911808435714845, "grad_norm": 0.0, - "learning_rate": 1.8332562997843007e-05, - "loss": 0.9077, + "learning_rate": 1.663075230104267e-05, + "loss": 1.1013, "step": 7442 }, { - "epoch": 0.21091558275950012, + "epoch": 0.2912199702637139, "grad_norm": 0.0, - "learning_rate": 1.8332055529075902e-05, - "loss": 0.9843, + "learning_rate": 1.6629803658452825e-05, + "loss": 1.1548, "step": 7443 }, { - "epoch": 0.21094392020176259, + "epoch": 0.2912590969559433, "grad_norm": 0.0, - "learning_rate": 1.83315479901248e-05, - "loss": 0.9495, + "learning_rate": 1.6628854909396643e-05, + "loss": 1.2241, "step": 7444 }, { - "epoch": 0.21097225764402505, + "epoch": 0.29129822364817276, "grad_norm": 0.0, - "learning_rate": 1.8331040380993977e-05, - "loss": 1.1165, + "learning_rate": 1.6627906053889354e-05, + "loss": 1.0305, "step": 7445 }, { - "epoch": 0.21100059508628752, + "epoch": 0.2913373503404022, "grad_norm": 0.0, - "learning_rate": 1.8330532701687705e-05, - "loss": 0.9809, + "learning_rate": 1.6626957091946203e-05, + "loss": 1.0999, "step": 7446 }, { - "epoch": 0.21102893252854998, + "epoch": 0.29137647703263164, "grad_norm": 0.0, - "learning_rate": 1.8330024952210263e-05, - "loss": 0.9505, + "learning_rate": 1.6626008023582425e-05, + "loss": 1.1095, "step": 7447 }, { - "epoch": 0.21105726997081242, + "epoch": 0.2914156037248611, "grad_norm": 0.0, - "learning_rate": 1.8329517132565926e-05, - "loss": 1.0171, + "learning_rate": 1.662505884881326e-05, + "loss": 1.0983, "step": 7448 }, { - "epoch": 0.2110856074130749, + "epoch": 0.2914547304170905, "grad_norm": 0.0, - "learning_rate": 1.8329009242758977e-05, - "loss": 1.0414, + "learning_rate": 1.662410956765395e-05, + "loss": 1.0801, "step": 7449 }, { - "epoch": 0.21111394485533735, + "epoch": 0.29149385710931996, "grad_norm": 0.0, - "learning_rate": 1.8328501282793688e-05, - "loss": 1.024, + "learning_rate": 1.6623160180119745e-05, + "loss": 1.1115, "step": 7450 }, { - "epoch": 0.21114228229759982, + "epoch": 0.2915329838015494, "grad_norm": 0.0, - "learning_rate": 1.832799325267434e-05, - "loss": 1.0068, + "learning_rate": 1.6622210686225882e-05, + "loss": 1.1501, "step": 7451 }, { - "epoch": 0.2111706197398623, + "epoch": 0.29157211049377885, "grad_norm": 0.0, - "learning_rate": 1.832748515240521e-05, - "loss": 1.0307, + "learning_rate": 1.6621261085987613e-05, + "loss": 1.1929, "step": 7452 }, { - "epoch": 0.21119895718212475, + "epoch": 0.2916112371860083, "grad_norm": 0.0, - "learning_rate": 1.832697698199058e-05, - "loss": 0.9348, + "learning_rate": 1.662031137942019e-05, + "loss": 1.0357, "step": 7453 }, { - "epoch": 0.2112272946243872, + "epoch": 0.29165036387823773, "grad_norm": 0.0, - "learning_rate": 1.8326468741434736e-05, - "loss": 1.0751, + "learning_rate": 1.6619361566538863e-05, + "loss": 1.1893, "step": 7454 }, { - "epoch": 0.21125563206664966, + "epoch": 0.29168949057046717, "grad_norm": 0.0, - "learning_rate": 1.8325960430741954e-05, - "loss": 1.0059, + "learning_rate": 1.6618411647358886e-05, + "loss": 1.0197, "step": 7455 }, { - "epoch": 0.21128396950891212, + "epoch": 0.2917286172626966, "grad_norm": 0.0, - "learning_rate": 1.8325452049916514e-05, - "loss": 0.964, + "learning_rate": 1.6617461621895508e-05, + "loss": 1.149, "step": 7456 }, { - "epoch": 0.2113123069511746, + "epoch": 0.29176774395492605, "grad_norm": 0.0, - "learning_rate": 1.83249435989627e-05, - "loss": 0.9636, + "learning_rate": 1.661651149016399e-05, + "loss": 1.0921, "step": 7457 }, { - "epoch": 0.21134064439343705, + "epoch": 0.2918068706471555, "grad_norm": 0.0, - "learning_rate": 1.832443507788479e-05, - "loss": 1.0526, + "learning_rate": 1.6615561252179585e-05, + "loss": 1.1414, "step": 7458 }, { - "epoch": 0.21136898183569952, + "epoch": 0.29184599733938493, "grad_norm": 0.0, - "learning_rate": 1.8323926486687076e-05, - "loss": 0.9297, + "learning_rate": 1.6614610907957556e-05, + "loss": 1.0708, "step": 7459 }, { - "epoch": 0.21139731927796196, + "epoch": 0.2918851240316144, "grad_norm": 0.0, - "learning_rate": 1.832341782537384e-05, - "loss": 1.0153, + "learning_rate": 1.6613660457513168e-05, + "loss": 1.174, "step": 7460 }, { - "epoch": 0.21142565672022443, + "epoch": 0.2919242507238438, "grad_norm": 0.0, - "learning_rate": 1.832290909394936e-05, - "loss": 0.9594, + "learning_rate": 1.661270990086168e-05, + "loss": 1.1675, "step": 7461 }, { - "epoch": 0.2114539941624869, + "epoch": 0.29196337741607326, "grad_norm": 0.0, - "learning_rate": 1.8322400292417928e-05, - "loss": 1.1256, + "learning_rate": 1.6611759238018356e-05, + "loss": 1.1289, "step": 7462 }, { - "epoch": 0.21148233160474936, + "epoch": 0.2920025041083027, "grad_norm": 0.0, - "learning_rate": 1.8321891420783827e-05, - "loss": 0.9701, + "learning_rate": 1.6610808468998462e-05, + "loss": 1.1144, "step": 7463 }, { - "epoch": 0.21151066904701182, + "epoch": 0.29204163080053214, "grad_norm": 0.0, - "learning_rate": 1.832138247905135e-05, - "loss": 1.0672, + "learning_rate": 1.660985759381727e-05, + "loss": 1.0113, "step": 7464 }, { - "epoch": 0.2115390064892743, + "epoch": 0.2920807574927616, "grad_norm": 0.0, - "learning_rate": 1.8320873467224772e-05, - "loss": 0.9213, + "learning_rate": 1.660890661249005e-05, + "loss": 1.1123, "step": 7465 }, { - "epoch": 0.21156734393153673, + "epoch": 0.292119884184991, "grad_norm": 0.0, - "learning_rate": 1.832036438530839e-05, - "loss": 1.0953, + "learning_rate": 1.6607955525032066e-05, + "loss": 1.2365, "step": 7466 }, { - "epoch": 0.2115956813737992, + "epoch": 0.29215901087722046, "grad_norm": 0.0, - "learning_rate": 1.8319855233306488e-05, - "loss": 0.9952, + "learning_rate": 1.6607004331458598e-05, + "loss": 1.0501, "step": 7467 }, { - "epoch": 0.21162401881606166, + "epoch": 0.2921981375694499, "grad_norm": 0.0, - "learning_rate": 1.8319346011223354e-05, - "loss": 1.0881, + "learning_rate": 1.660605303178492e-05, + "loss": 1.1244, "step": 7468 }, { - "epoch": 0.21165235625832413, + "epoch": 0.29223726426167934, "grad_norm": 0.0, - "learning_rate": 1.831883671906328e-05, - "loss": 0.9493, + "learning_rate": 1.660510162602631e-05, + "loss": 1.1846, "step": 7469 }, { - "epoch": 0.2116806937005866, + "epoch": 0.2922763909539088, "grad_norm": 0.0, - "learning_rate": 1.831832735683056e-05, - "loss": 1.0302, + "learning_rate": 1.660415011419804e-05, + "loss": 1.0024, "step": 7470 }, { - "epoch": 0.21170903114284906, + "epoch": 0.2923155176461382, "grad_norm": 0.0, - "learning_rate": 1.831781792452947e-05, - "loss": 1.0947, + "learning_rate": 1.6603198496315403e-05, + "loss": 1.0851, "step": 7471 }, { - "epoch": 0.2117373685851115, + "epoch": 0.2923546443383676, "grad_norm": 0.0, - "learning_rate": 1.831730842216432e-05, - "loss": 0.888, + "learning_rate": 1.6602246772393665e-05, + "loss": 0.9959, "step": 7472 }, { - "epoch": 0.21176570602737396, + "epoch": 0.29239377103059705, "grad_norm": 0.0, - "learning_rate": 1.831679884973939e-05, - "loss": 0.8613, + "learning_rate": 1.6601294942448122e-05, + "loss": 0.9717, "step": 7473 }, { - "epoch": 0.21179404346963643, + "epoch": 0.2924328977228265, "grad_norm": 0.0, - "learning_rate": 1.8316289207258973e-05, - "loss": 0.927, + "learning_rate": 1.660034300649405e-05, + "loss": 1.1078, "step": 7474 }, { - "epoch": 0.2118223809118989, + "epoch": 0.29247202441505593, "grad_norm": 0.0, - "learning_rate": 1.8315779494727368e-05, - "loss": 0.9945, + "learning_rate": 1.659939096454674e-05, + "loss": 1.1921, "step": 7475 }, { - "epoch": 0.21185071835416136, + "epoch": 0.29251115110728537, "grad_norm": 0.0, - "learning_rate": 1.831526971214886e-05, - "loss": 0.9157, + "learning_rate": 1.6598438816621484e-05, + "loss": 1.0745, "step": 7476 }, { - "epoch": 0.21187905579642383, + "epoch": 0.2925502777995148, "grad_norm": 0.0, - "learning_rate": 1.8314759859527748e-05, - "loss": 1.0927, + "learning_rate": 1.6597486562733565e-05, + "loss": 1.2017, "step": 7477 }, { - "epoch": 0.21190739323868626, + "epoch": 0.29258940449174425, "grad_norm": 0.0, - "learning_rate": 1.831424993686833e-05, - "loss": 0.9809, + "learning_rate": 1.659653420289828e-05, + "loss": 1.1979, "step": 7478 }, { - "epoch": 0.21193573068094873, + "epoch": 0.2926285311839737, "grad_norm": 0.0, - "learning_rate": 1.8313739944174894e-05, - "loss": 0.9088, + "learning_rate": 1.6595581737130923e-05, + "loss": 1.1163, "step": 7479 }, { - "epoch": 0.2119640681232112, + "epoch": 0.29266765787620314, "grad_norm": 0.0, - "learning_rate": 1.831322988145174e-05, - "loss": 1.0219, + "learning_rate": 1.659462916544679e-05, + "loss": 1.1435, "step": 7480 }, { - "epoch": 0.21199240556547366, + "epoch": 0.2927067845684326, "grad_norm": 0.0, - "learning_rate": 1.8312719748703163e-05, - "loss": 0.8799, + "learning_rate": 1.659367648786117e-05, + "loss": 1.0449, "step": 7481 }, { - "epoch": 0.21202074300773613, + "epoch": 0.292745911260662, "grad_norm": 0.0, - "learning_rate": 1.8312209545933458e-05, - "loss": 0.9363, + "learning_rate": 1.6592723704389374e-05, + "loss": 1.1166, "step": 7482 }, { - "epoch": 0.2120490804499986, + "epoch": 0.29278503795289146, "grad_norm": 0.0, - "learning_rate": 1.8311699273146933e-05, - "loss": 1.0133, + "learning_rate": 1.659177081504669e-05, + "loss": 1.0286, "step": 7483 }, { - "epoch": 0.21207741789226103, + "epoch": 0.2928241646451209, "grad_norm": 0.0, - "learning_rate": 1.8311188930347873e-05, - "loss": 0.9933, + "learning_rate": 1.659081781984843e-05, + "loss": 1.2441, "step": 7484 }, { - "epoch": 0.2121057553345235, + "epoch": 0.29286329133735034, "grad_norm": 0.0, - "learning_rate": 1.831067851754058e-05, - "loss": 0.9152, + "learning_rate": 1.6589864718809896e-05, + "loss": 1.1594, "step": 7485 }, { - "epoch": 0.21213409277678597, + "epoch": 0.2929024180295798, "grad_norm": 0.0, - "learning_rate": 1.831016803472936e-05, - "loss": 1.1488, + "learning_rate": 1.658891151194639e-05, + "loss": 0.9778, "step": 7486 }, { - "epoch": 0.21216243021904843, + "epoch": 0.2929415447218092, "grad_norm": 0.0, - "learning_rate": 1.830965748191851e-05, - "loss": 0.9602, + "learning_rate": 1.6587958199273225e-05, + "loss": 1.2089, "step": 7487 }, { - "epoch": 0.2121907676613109, + "epoch": 0.29298067141403866, "grad_norm": 0.0, - "learning_rate": 1.8309146859112328e-05, - "loss": 1.0173, + "learning_rate": 1.6587004780805704e-05, + "loss": 1.1231, "step": 7488 }, { - "epoch": 0.21221910510357336, + "epoch": 0.2930197981062681, "grad_norm": 0.0, - "learning_rate": 1.8308636166315114e-05, - "loss": 1.0041, + "learning_rate": 1.658605125655914e-05, + "loss": 1.1834, "step": 7489 }, { - "epoch": 0.2122474425458358, + "epoch": 0.29305892479849754, "grad_norm": 0.0, - "learning_rate": 1.8308125403531175e-05, - "loss": 1.0789, + "learning_rate": 1.6585097626548848e-05, + "loss": 1.1208, "step": 7490 }, { - "epoch": 0.21227577998809827, + "epoch": 0.293098051490727, "grad_norm": 0.0, - "learning_rate": 1.8307614570764806e-05, - "loss": 0.9185, + "learning_rate": 1.6584143890790138e-05, + "loss": 1.0186, "step": 7491 }, { - "epoch": 0.21230411743036073, + "epoch": 0.2931371781829564, "grad_norm": 0.0, - "learning_rate": 1.8307103668020318e-05, - "loss": 0.9931, + "learning_rate": 1.6583190049298327e-05, + "loss": 1.139, "step": 7492 }, { - "epoch": 0.2123324548726232, + "epoch": 0.29317630487518587, "grad_norm": 0.0, - "learning_rate": 1.830659269530201e-05, - "loss": 0.9483, + "learning_rate": 1.6582236102088734e-05, + "loss": 1.1665, "step": 7493 }, { - "epoch": 0.21236079231488567, + "epoch": 0.2932154315674153, "grad_norm": 0.0, - "learning_rate": 1.8306081652614192e-05, - "loss": 0.9184, + "learning_rate": 1.6581282049176674e-05, + "loss": 1.188, "step": 7494 }, { - "epoch": 0.21238912975714813, + "epoch": 0.29325455825964475, "grad_norm": 0.0, - "learning_rate": 1.830557053996116e-05, - "loss": 0.9761, + "learning_rate": 1.6580327890577476e-05, + "loss": 1.1483, "step": 7495 }, { - "epoch": 0.21241746719941057, + "epoch": 0.2932936849518742, "grad_norm": 0.0, - "learning_rate": 1.8305059357347222e-05, - "loss": 0.9017, + "learning_rate": 1.6579373626306453e-05, + "loss": 1.1649, "step": 7496 }, { - "epoch": 0.21244580464167304, + "epoch": 0.29333281164410363, "grad_norm": 0.0, - "learning_rate": 1.8304548104776687e-05, - "loss": 1.0192, + "learning_rate": 1.6578419256378935e-05, + "loss": 1.0864, "step": 7497 }, { - "epoch": 0.2124741420839355, + "epoch": 0.29337193833633307, "grad_norm": 0.0, - "learning_rate": 1.8304036782253858e-05, - "loss": 1.0472, + "learning_rate": 1.657746478081025e-05, + "loss": 1.1581, "step": 7498 }, { - "epoch": 0.21250247952619797, + "epoch": 0.2934110650285625, "grad_norm": 0.0, - "learning_rate": 1.8303525389783045e-05, - "loss": 1.0535, + "learning_rate": 1.657651019961572e-05, + "loss": 1.122, "step": 7499 }, { - "epoch": 0.21253081696846043, + "epoch": 0.2934501917207919, "grad_norm": 0.0, - "learning_rate": 1.830301392736855e-05, - "loss": 0.9445, + "learning_rate": 1.6575555512810678e-05, + "loss": 1.198, "step": 7500 }, { - "epoch": 0.2125591544107229, + "epoch": 0.29348931841302134, "grad_norm": 0.0, - "learning_rate": 1.830250239501469e-05, - "loss": 1.0567, + "learning_rate": 1.6574600720410455e-05, + "loss": 1.0694, "step": 7501 }, { - "epoch": 0.21258749185298534, + "epoch": 0.2935284451052508, "grad_norm": 0.0, - "learning_rate": 1.830199079272577e-05, - "loss": 1.038, + "learning_rate": 1.657364582243038e-05, + "loss": 1.1804, "step": 7502 }, { - "epoch": 0.2126158292952478, + "epoch": 0.2935675717974802, "grad_norm": 0.0, - "learning_rate": 1.8301479120506097e-05, - "loss": 0.9789, + "learning_rate": 1.6572690818885796e-05, + "loss": 1.0295, "step": 7503 }, { - "epoch": 0.21264416673751027, + "epoch": 0.29360669848970966, "grad_norm": 0.0, - "learning_rate": 1.830096737835998e-05, - "loss": 1.0729, + "learning_rate": 1.657173570979203e-05, + "loss": 1.1211, "step": 7504 }, { - "epoch": 0.21267250417977274, + "epoch": 0.2936458251819391, "grad_norm": 0.0, - "learning_rate": 1.8300455566291736e-05, - "loss": 1.0117, + "learning_rate": 1.657078049516442e-05, + "loss": 1.1822, "step": 7505 }, { - "epoch": 0.2127008416220352, + "epoch": 0.29368495187416854, "grad_norm": 0.0, - "learning_rate": 1.8299943684305672e-05, - "loss": 1.0462, + "learning_rate": 1.6569825175018315e-05, + "loss": 1.1733, "step": 7506 }, { - "epoch": 0.21272917906429767, + "epoch": 0.293724078566398, "grad_norm": 0.0, - "learning_rate": 1.8299431732406097e-05, - "loss": 1.026, + "learning_rate": 1.6568869749369044e-05, + "loss": 1.0023, "step": 7507 }, { - "epoch": 0.2127575165065601, + "epoch": 0.2937632052586274, "grad_norm": 0.0, - "learning_rate": 1.8298919710597333e-05, - "loss": 1.0623, + "learning_rate": 1.6567914218231963e-05, + "loss": 1.0936, "step": 7508 }, { - "epoch": 0.21278585394882257, + "epoch": 0.29380233195085687, "grad_norm": 0.0, - "learning_rate": 1.829840761888368e-05, - "loss": 0.9028, + "learning_rate": 1.6566958581622404e-05, + "loss": 1.0666, "step": 7509 }, { - "epoch": 0.21281419139108504, + "epoch": 0.2938414586430863, "grad_norm": 0.0, - "learning_rate": 1.8297895457269462e-05, - "loss": 1.1308, + "learning_rate": 1.6566002839555722e-05, + "loss": 1.1797, "step": 7510 }, { - "epoch": 0.2128425288333475, + "epoch": 0.29388058533531575, "grad_norm": 0.0, - "learning_rate": 1.8297383225758986e-05, - "loss": 0.9728, + "learning_rate": 1.656504699204726e-05, + "loss": 1.1249, "step": 7511 }, { - "epoch": 0.21287086627560997, + "epoch": 0.2939197120275452, "grad_norm": 0.0, - "learning_rate": 1.8296870924356575e-05, - "loss": 0.937, + "learning_rate": 1.656409103911237e-05, + "loss": 1.1919, "step": 7512 }, { - "epoch": 0.21289920371787244, + "epoch": 0.29395883871977463, "grad_norm": 0.0, - "learning_rate": 1.8296358553066532e-05, - "loss": 1.0539, + "learning_rate": 1.6563134980766406e-05, + "loss": 0.9508, "step": 7513 }, { - "epoch": 0.21292754116013488, + "epoch": 0.29399796541200407, "grad_norm": 0.0, - "learning_rate": 1.8295846111893186e-05, - "loss": 1.0457, + "learning_rate": 1.6562178817024713e-05, + "loss": 1.1726, "step": 7514 }, { - "epoch": 0.21295587860239734, + "epoch": 0.2940370921042335, "grad_norm": 0.0, - "learning_rate": 1.8295333600840847e-05, - "loss": 1.0303, + "learning_rate": 1.6561222547902656e-05, + "loss": 0.903, "step": 7515 }, { - "epoch": 0.2129842160446598, + "epoch": 0.29407621879646295, "grad_norm": 0.0, - "learning_rate": 1.829482101991383e-05, - "loss": 0.8921, + "learning_rate": 1.656026617341558e-05, + "loss": 0.9619, "step": 7516 }, { - "epoch": 0.21301255348692227, + "epoch": 0.2941153454886924, "grad_norm": 0.0, - "learning_rate": 1.8294308369116457e-05, - "loss": 1.0169, + "learning_rate": 1.655930969357886e-05, + "loss": 1.211, "step": 7517 }, { - "epoch": 0.21304089092918474, + "epoch": 0.29415447218092183, "grad_norm": 0.0, - "learning_rate": 1.8293795648453043e-05, - "loss": 0.9726, + "learning_rate": 1.655835310840784e-05, + "loss": 1.2164, "step": 7518 }, { - "epoch": 0.2130692283714472, + "epoch": 0.2941935988731513, "grad_norm": 0.0, - "learning_rate": 1.8293282857927913e-05, - "loss": 0.9293, + "learning_rate": 1.6557396417917885e-05, + "loss": 1.1348, "step": 7519 }, { - "epoch": 0.21309756581370964, + "epoch": 0.2942327255653807, "grad_norm": 0.0, - "learning_rate": 1.8292769997545376e-05, - "loss": 0.9856, + "learning_rate": 1.6556439622124364e-05, + "loss": 1.1669, "step": 7520 }, { - "epoch": 0.2131259032559721, + "epoch": 0.29427185225761016, "grad_norm": 0.0, - "learning_rate": 1.829225706730976e-05, - "loss": 0.9716, + "learning_rate": 1.6555482721042636e-05, + "loss": 1.0031, "step": 7521 }, { - "epoch": 0.21315424069823458, + "epoch": 0.2943109789498396, "grad_norm": 0.0, - "learning_rate": 1.8291744067225387e-05, - "loss": 1.0104, + "learning_rate": 1.655452571468807e-05, + "loss": 1.253, "step": 7522 }, { - "epoch": 0.21318257814049704, + "epoch": 0.29435010564206904, "grad_norm": 0.0, - "learning_rate": 1.8291230997296572e-05, - "loss": 0.9224, + "learning_rate": 1.6553568603076036e-05, + "loss": 1.1939, "step": 7523 }, { - "epoch": 0.2132109155827595, + "epoch": 0.2943892323342985, "grad_norm": 0.0, - "learning_rate": 1.829071785752764e-05, - "loss": 1.0703, + "learning_rate": 1.6552611386221902e-05, + "loss": 1.1196, "step": 7524 }, { - "epoch": 0.21323925302502197, + "epoch": 0.2944283590265279, "grad_norm": 0.0, - "learning_rate": 1.829020464792291e-05, - "loss": 0.905, + "learning_rate": 1.655165406414104e-05, + "loss": 1.2126, "step": 7525 }, { - "epoch": 0.2132675904672844, + "epoch": 0.29446748571875736, "grad_norm": 0.0, - "learning_rate": 1.828969136848671e-05, - "loss": 1.0144, + "learning_rate": 1.6550696636848823e-05, + "loss": 1.0558, "step": 7526 }, { - "epoch": 0.21329592790954688, + "epoch": 0.2945066124109868, "grad_norm": 0.0, - "learning_rate": 1.8289178019223363e-05, - "loss": 0.9595, + "learning_rate": 1.6549739104360627e-05, + "loss": 1.1344, "step": 7527 }, { - "epoch": 0.21332426535180934, + "epoch": 0.2945457391032162, "grad_norm": 0.0, - "learning_rate": 1.8288664600137187e-05, - "loss": 1.0121, + "learning_rate": 1.6548781466691828e-05, + "loss": 1.1677, "step": 7528 }, { - "epoch": 0.2133526027940718, + "epoch": 0.2945848657954456, "grad_norm": 0.0, - "learning_rate": 1.8288151111232514e-05, - "loss": 0.852, + "learning_rate": 1.6547823723857806e-05, + "loss": 1.0674, "step": 7529 }, { - "epoch": 0.21338094023633428, + "epoch": 0.29462399248767507, "grad_norm": 0.0, - "learning_rate": 1.8287637552513668e-05, - "loss": 0.9276, + "learning_rate": 1.6546865875873938e-05, + "loss": 1.131, "step": 7530 }, { - "epoch": 0.21340927767859674, + "epoch": 0.2946631191799045, "grad_norm": 0.0, - "learning_rate": 1.828712392398497e-05, - "loss": 1.0327, + "learning_rate": 1.6545907922755605e-05, + "loss": 1.2896, "step": 7531 }, { - "epoch": 0.21343761512085918, + "epoch": 0.29470224587213395, "grad_norm": 0.0, - "learning_rate": 1.8286610225650752e-05, - "loss": 1.0549, + "learning_rate": 1.65449498645182e-05, + "loss": 1.1141, "step": 7532 }, { - "epoch": 0.21346595256312165, + "epoch": 0.2947413725643634, "grad_norm": 0.0, - "learning_rate": 1.8286096457515336e-05, - "loss": 0.9188, + "learning_rate": 1.6543991701177094e-05, + "loss": 1.1124, "step": 7533 }, { - "epoch": 0.2134942900053841, + "epoch": 0.29478049925659283, "grad_norm": 0.0, - "learning_rate": 1.8285582619583056e-05, - "loss": 0.9878, + "learning_rate": 1.6543033432747687e-05, + "loss": 1.1177, "step": 7534 }, { - "epoch": 0.21352262744764658, + "epoch": 0.2948196259488223, "grad_norm": 0.0, - "learning_rate": 1.8285068711858237e-05, - "loss": 0.9358, + "learning_rate": 1.654207505924536e-05, + "loss": 1.1408, "step": 7535 }, { - "epoch": 0.21355096488990905, + "epoch": 0.2948587526410517, "grad_norm": 0.0, - "learning_rate": 1.8284554734345204e-05, - "loss": 1.0164, + "learning_rate": 1.6541116580685504e-05, + "loss": 1.1713, "step": 7536 }, { - "epoch": 0.2135793023321715, + "epoch": 0.29489787933328115, "grad_norm": 0.0, - "learning_rate": 1.828404068704829e-05, - "loss": 1.2352, + "learning_rate": 1.6540157997083516e-05, + "loss": 1.2232, "step": 7537 }, { - "epoch": 0.21360763977443395, + "epoch": 0.2949370060255106, "grad_norm": 0.0, - "learning_rate": 1.8283526569971828e-05, - "loss": 1.0633, + "learning_rate": 1.6539199308454783e-05, + "loss": 1.1476, "step": 7538 }, { - "epoch": 0.21363597721669642, + "epoch": 0.29497613271774004, "grad_norm": 0.0, - "learning_rate": 1.8283012383120148e-05, - "loss": 1.0102, + "learning_rate": 1.6538240514814702e-05, + "loss": 1.2307, "step": 7539 }, { - "epoch": 0.21366431465895888, + "epoch": 0.2950152594099695, "grad_norm": 0.0, - "learning_rate": 1.8282498126497575e-05, - "loss": 1.0306, + "learning_rate": 1.6537281616178674e-05, + "loss": 1.1466, "step": 7540 }, { - "epoch": 0.21369265210122135, + "epoch": 0.2950543861021989, "grad_norm": 0.0, - "learning_rate": 1.8281983800108446e-05, - "loss": 1.0609, + "learning_rate": 1.653632261256209e-05, + "loss": 1.288, "step": 7541 }, { - "epoch": 0.2137209895434838, + "epoch": 0.29509351279442836, "grad_norm": 0.0, - "learning_rate": 1.828146940395709e-05, - "loss": 0.9702, + "learning_rate": 1.6535363503980365e-05, + "loss": 0.9048, "step": 7542 }, { - "epoch": 0.21374932698574628, + "epoch": 0.2951326394866578, "grad_norm": 0.0, - "learning_rate": 1.8280954938047844e-05, - "loss": 1.0678, + "learning_rate": 1.6534404290448885e-05, + "loss": 1.0999, "step": 7543 }, { - "epoch": 0.21377766442800872, + "epoch": 0.29517176617888724, "grad_norm": 0.0, - "learning_rate": 1.828044040238504e-05, - "loss": 1.0107, + "learning_rate": 1.653344497198306e-05, + "loss": 1.1461, "step": 7544 }, { - "epoch": 0.21380600187027118, + "epoch": 0.2952108928711167, "grad_norm": 0.0, - "learning_rate": 1.827992579697301e-05, - "loss": 1.0427, + "learning_rate": 1.6532485548598293e-05, + "loss": 1.111, "step": 7545 }, { - "epoch": 0.21383433931253365, + "epoch": 0.2952500195633461, "grad_norm": 0.0, - "learning_rate": 1.827941112181609e-05, - "loss": 1.1002, + "learning_rate": 1.6531526020309997e-05, + "loss": 1.0596, "step": 7546 }, { - "epoch": 0.21386267675479612, + "epoch": 0.29528914625557556, "grad_norm": 0.0, - "learning_rate": 1.8278896376918617e-05, - "loss": 1.052, + "learning_rate": 1.6530566387133577e-05, + "loss": 1.2079, "step": 7547 }, { - "epoch": 0.21389101419705858, + "epoch": 0.295328272947805, "grad_norm": 0.0, - "learning_rate": 1.827838156228493e-05, - "loss": 0.9009, + "learning_rate": 1.6529606649084446e-05, + "loss": 1.059, "step": 7548 }, { - "epoch": 0.21391935163932105, + "epoch": 0.29536739964003444, "grad_norm": 0.0, - "learning_rate": 1.8277866677919354e-05, - "loss": 0.933, + "learning_rate": 1.6528646806178016e-05, + "loss": 0.9839, "step": 7549 }, { - "epoch": 0.2139476890815835, + "epoch": 0.2954065263322639, "grad_norm": 0.0, - "learning_rate": 1.8277351723826237e-05, - "loss": 0.9983, + "learning_rate": 1.6527686858429694e-05, + "loss": 1.0453, "step": 7550 }, { - "epoch": 0.21397602652384595, + "epoch": 0.2954456530244933, "grad_norm": 0.0, - "learning_rate": 1.8276836700009908e-05, - "loss": 0.8573, + "learning_rate": 1.65267268058549e-05, + "loss": 1.214, "step": 7551 }, { - "epoch": 0.21400436396610842, + "epoch": 0.29548477971672277, "grad_norm": 0.0, - "learning_rate": 1.8276321606474713e-05, - "loss": 1.0485, + "learning_rate": 1.652576664846906e-05, + "loss": 1.1442, "step": 7552 }, { - "epoch": 0.21403270140837088, + "epoch": 0.2955239064089522, "grad_norm": 0.0, - "learning_rate": 1.8275806443224987e-05, - "loss": 0.9672, + "learning_rate": 1.6524806386287578e-05, + "loss": 1.1362, "step": 7553 }, { - "epoch": 0.21406103885063335, + "epoch": 0.29556303310118165, "grad_norm": 0.0, - "learning_rate": 1.827529121026507e-05, - "loss": 1.0991, + "learning_rate": 1.652384601932588e-05, + "loss": 1.0031, "step": 7554 }, { - "epoch": 0.21408937629289582, + "epoch": 0.2956021597934111, "grad_norm": 0.0, - "learning_rate": 1.8274775907599304e-05, - "loss": 0.9799, + "learning_rate": 1.6522885547599394e-05, + "loss": 1.0789, "step": 7555 }, { - "epoch": 0.21411771373515825, + "epoch": 0.29564128648564053, "grad_norm": 0.0, - "learning_rate": 1.8274260535232028e-05, - "loss": 0.892, + "learning_rate": 1.652192497112354e-05, + "loss": 1.1255, "step": 7556 }, { - "epoch": 0.21414605117742072, + "epoch": 0.2956804131778699, "grad_norm": 0.0, - "learning_rate": 1.827374509316758e-05, - "loss": 1.0403, + "learning_rate": 1.652096428991374e-05, + "loss": 1.2069, "step": 7557 }, { - "epoch": 0.2141743886196832, + "epoch": 0.29571953987009936, "grad_norm": 0.0, - "learning_rate": 1.827322958141031e-05, - "loss": 0.9053, + "learning_rate": 1.652000350398543e-05, + "loss": 0.9448, "step": 7558 }, { - "epoch": 0.21420272606194565, + "epoch": 0.2957586665623288, "grad_norm": 0.0, - "learning_rate": 1.8272713999964547e-05, - "loss": 0.9575, + "learning_rate": 1.6519042613354027e-05, + "loss": 1.2164, "step": 7559 }, { - "epoch": 0.21423106350420812, + "epoch": 0.29579779325455824, "grad_norm": 0.0, - "learning_rate": 1.8272198348834648e-05, - "loss": 0.9936, + "learning_rate": 1.6518081618034973e-05, + "loss": 1.1307, "step": 7560 }, { - "epoch": 0.21425940094647059, + "epoch": 0.2958369199467877, "grad_norm": 0.0, - "learning_rate": 1.827168262802495e-05, - "loss": 1.0251, + "learning_rate": 1.6517120518043693e-05, + "loss": 1.1627, "step": 7561 }, { - "epoch": 0.21428773838873302, + "epoch": 0.2958760466390171, "grad_norm": 0.0, - "learning_rate": 1.8271166837539794e-05, - "loss": 1.0261, + "learning_rate": 1.651615931339563e-05, + "loss": 1.0665, "step": 7562 }, { - "epoch": 0.2143160758309955, + "epoch": 0.29591517333124656, "grad_norm": 0.0, - "learning_rate": 1.8270650977383533e-05, - "loss": 0.967, + "learning_rate": 1.6515198004106204e-05, + "loss": 1.315, "step": 7563 }, { - "epoch": 0.21434441327325796, + "epoch": 0.295954300023476, "grad_norm": 0.0, - "learning_rate": 1.8270135047560506e-05, - "loss": 0.988, + "learning_rate": 1.6514236590190867e-05, + "loss": 1.1621, "step": 7564 }, { - "epoch": 0.21437275071552042, + "epoch": 0.29599342671570544, "grad_norm": 0.0, - "learning_rate": 1.8269619048075056e-05, - "loss": 0.942, + "learning_rate": 1.6513275071665057e-05, + "loss": 1.2134, "step": 7565 }, { - "epoch": 0.2144010881577829, + "epoch": 0.2960325534079349, "grad_norm": 0.0, - "learning_rate": 1.8269102978931542e-05, - "loss": 0.9903, + "learning_rate": 1.6512313448544207e-05, + "loss": 1.1129, "step": 7566 }, { - "epoch": 0.21442942560004535, + "epoch": 0.2960716801001643, "grad_norm": 0.0, - "learning_rate": 1.8268586840134296e-05, - "loss": 1.1516, + "learning_rate": 1.6511351720843763e-05, + "loss": 1.0866, "step": 7567 }, { - "epoch": 0.2144577630423078, + "epoch": 0.29611080679239377, "grad_norm": 0.0, - "learning_rate": 1.826807063168768e-05, - "loss": 0.941, + "learning_rate": 1.651038988857917e-05, + "loss": 1.1797, "step": 7568 }, { - "epoch": 0.21448610048457026, + "epoch": 0.2961499334846232, "grad_norm": 0.0, - "learning_rate": 1.8267554353596027e-05, - "loss": 1.05, + "learning_rate": 1.6509427951765876e-05, + "loss": 1.2188, "step": 7569 }, { - "epoch": 0.21451443792683272, + "epoch": 0.29618906017685265, "grad_norm": 0.0, - "learning_rate": 1.8267038005863698e-05, - "loss": 1.0173, + "learning_rate": 1.650846591041932e-05, + "loss": 1.1021, "step": 7570 }, { - "epoch": 0.2145427753690952, + "epoch": 0.2962281868690821, "grad_norm": 0.0, - "learning_rate": 1.8266521588495035e-05, - "loss": 0.9594, + "learning_rate": 1.6507503764554963e-05, + "loss": 1.1697, "step": 7571 }, { - "epoch": 0.21457111281135766, + "epoch": 0.29626731356131153, "grad_norm": 0.0, - "learning_rate": 1.8266005101494393e-05, - "loss": 0.9703, + "learning_rate": 1.6506541514188244e-05, + "loss": 0.9818, "step": 7572 }, { - "epoch": 0.21459945025362012, + "epoch": 0.29630644025354097, "grad_norm": 0.0, - "learning_rate": 1.826548854486612e-05, - "loss": 0.9967, + "learning_rate": 1.6505579159334628e-05, + "loss": 1.1705, "step": 7573 }, { - "epoch": 0.21462778769588256, + "epoch": 0.2963455669457704, "grad_norm": 0.0, - "learning_rate": 1.826497191861457e-05, - "loss": 0.9911, + "learning_rate": 1.6504616700009557e-05, + "loss": 1.062, "step": 7574 }, { - "epoch": 0.21465612513814503, + "epoch": 0.29638469363799985, "grad_norm": 0.0, - "learning_rate": 1.826445522274409e-05, - "loss": 0.9897, + "learning_rate": 1.65036541362285e-05, + "loss": 1.2003, "step": 7575 }, { - "epoch": 0.2146844625804075, + "epoch": 0.2964238203302293, "grad_norm": 0.0, - "learning_rate": 1.8263938457259038e-05, - "loss": 1.0613, + "learning_rate": 1.65026914680069e-05, + "loss": 1.1359, "step": 7576 }, { - "epoch": 0.21471280002266996, + "epoch": 0.29646294702245873, "grad_norm": 0.0, - "learning_rate": 1.8263421622163758e-05, - "loss": 1.0057, + "learning_rate": 1.6501728695360224e-05, + "loss": 1.1331, "step": 7577 }, { - "epoch": 0.21474113746493242, + "epoch": 0.2965020737146882, "grad_norm": 0.0, - "learning_rate": 1.826290471746261e-05, - "loss": 1.0489, + "learning_rate": 1.6500765818303935e-05, + "loss": 1.0292, "step": 7578 }, { - "epoch": 0.21476947490719486, + "epoch": 0.2965412004069176, "grad_norm": 0.0, - "learning_rate": 1.826238774315995e-05, - "loss": 0.9601, + "learning_rate": 1.649980283685349e-05, + "loss": 1.1134, "step": 7579 }, { - "epoch": 0.21479781234945733, + "epoch": 0.29658032709914706, "grad_norm": 0.0, - "learning_rate": 1.8261870699260128e-05, - "loss": 0.958, + "learning_rate": 1.6498839751024357e-05, + "loss": 1.0035, "step": 7580 }, { - "epoch": 0.2148261497917198, + "epoch": 0.2966194537913765, "grad_norm": 0.0, - "learning_rate": 1.8261353585767504e-05, - "loss": 1.0914, + "learning_rate": 1.6497876560832e-05, + "loss": 1.2873, "step": 7581 }, { - "epoch": 0.21485448723398226, + "epoch": 0.29665858048360594, "grad_norm": 0.0, - "learning_rate": 1.8260836402686427e-05, - "loss": 0.9875, + "learning_rate": 1.6496913266291895e-05, + "loss": 1.1442, "step": 7582 }, { - "epoch": 0.21488282467624473, + "epoch": 0.2966977071758354, "grad_norm": 0.0, - "learning_rate": 1.8260319150021262e-05, - "loss": 0.9876, + "learning_rate": 1.6495949867419495e-05, + "loss": 1.0667, "step": 7583 }, { - "epoch": 0.2149111621185072, + "epoch": 0.2967368338680648, "grad_norm": 0.0, - "learning_rate": 1.8259801827776358e-05, - "loss": 1.0512, + "learning_rate": 1.6494986364230285e-05, + "loss": 1.1324, "step": 7584 }, { - "epoch": 0.21493949956076963, + "epoch": 0.2967759605602942, "grad_norm": 0.0, - "learning_rate": 1.8259284435956077e-05, - "loss": 0.9783, + "learning_rate": 1.649402275673973e-05, + "loss": 1.2008, "step": 7585 }, { - "epoch": 0.2149678370030321, + "epoch": 0.29681508725252365, "grad_norm": 0.0, - "learning_rate": 1.8258766974564778e-05, - "loss": 1.0018, + "learning_rate": 1.6493059044963312e-05, + "loss": 1.1118, "step": 7586 }, { - "epoch": 0.21499617444529456, + "epoch": 0.2968542139447531, "grad_norm": 0.0, - "learning_rate": 1.8258249443606813e-05, - "loss": 1.0005, + "learning_rate": 1.6492095228916496e-05, + "loss": 1.2047, "step": 7587 }, { - "epoch": 0.21502451188755703, + "epoch": 0.29689334063698253, "grad_norm": 0.0, - "learning_rate": 1.825773184308655e-05, - "loss": 0.9908, + "learning_rate": 1.649113130861477e-05, + "loss": 1.0745, "step": 7588 }, { - "epoch": 0.2150528493298195, + "epoch": 0.29693246732921197, "grad_norm": 0.0, - "learning_rate": 1.8257214173008347e-05, - "loss": 1.0349, + "learning_rate": 1.6490167284073607e-05, + "loss": 1.075, "step": 7589 }, { - "epoch": 0.21508118677208196, + "epoch": 0.2969715940214414, "grad_norm": 0.0, - "learning_rate": 1.8256696433376557e-05, - "loss": 0.9329, + "learning_rate": 1.6489203155308488e-05, + "loss": 1.125, "step": 7590 }, { - "epoch": 0.2151095242143444, + "epoch": 0.29701072071367085, "grad_norm": 0.0, - "learning_rate": 1.825617862419555e-05, - "loss": 1.0508, + "learning_rate": 1.64882389223349e-05, + "loss": 1.1134, "step": 7591 }, { - "epoch": 0.21513786165660687, + "epoch": 0.2970498474059003, "grad_norm": 0.0, - "learning_rate": 1.8255660745469685e-05, - "loss": 0.9225, + "learning_rate": 1.6487274585168327e-05, + "loss": 1.1016, "step": 7592 }, { - "epoch": 0.21516619909886933, + "epoch": 0.29708897409812973, "grad_norm": 0.0, - "learning_rate": 1.8255142797203326e-05, - "loss": 0.9323, + "learning_rate": 1.6486310143824252e-05, + "loss": 1.0763, "step": 7593 }, { - "epoch": 0.2151945365411318, + "epoch": 0.2971281007903592, "grad_norm": 0.0, - "learning_rate": 1.8254624779400828e-05, - "loss": 0.8434, + "learning_rate": 1.648534559831816e-05, + "loss": 1.0054, "step": 7594 }, { - "epoch": 0.21522287398339426, + "epoch": 0.2971672274825886, "grad_norm": 0.0, - "learning_rate": 1.8254106692066567e-05, - "loss": 0.9993, + "learning_rate": 1.6484380948665547e-05, + "loss": 1.0709, "step": 7595 }, { - "epoch": 0.21525121142565673, + "epoch": 0.29720635417481805, "grad_norm": 0.0, - "learning_rate": 1.8253588535204894e-05, - "loss": 1.07, + "learning_rate": 1.6483416194881904e-05, + "loss": 1.1119, "step": 7596 }, { - "epoch": 0.21527954886791917, + "epoch": 0.2972454808670475, "grad_norm": 0.0, - "learning_rate": 1.8253070308820184e-05, - "loss": 1.0101, + "learning_rate": 1.648245133698272e-05, + "loss": 1.2091, "step": 7597 }, { - "epoch": 0.21530788631018163, + "epoch": 0.29728460755927694, "grad_norm": 0.0, - "learning_rate": 1.82525520129168e-05, - "loss": 1.0695, + "learning_rate": 1.6481486374983488e-05, + "loss": 1.1998, "step": 7598 }, { - "epoch": 0.2153362237524441, + "epoch": 0.2973237342515064, "grad_norm": 0.0, - "learning_rate": 1.82520336474991e-05, - "loss": 1.0101, + "learning_rate": 1.6480521308899705e-05, + "loss": 1.1686, "step": 7599 }, { - "epoch": 0.21536456119470657, + "epoch": 0.2973628609437358, "grad_norm": 0.0, - "learning_rate": 1.8251515212571457e-05, - "loss": 0.9377, + "learning_rate": 1.6479556138746877e-05, + "loss": 1.2815, "step": 7600 }, { - "epoch": 0.21539289863696903, + "epoch": 0.29740198763596526, "grad_norm": 0.0, - "learning_rate": 1.825099670813824e-05, - "loss": 0.8748, + "learning_rate": 1.647859086454049e-05, + "loss": 0.9929, "step": 7601 }, { - "epoch": 0.2154212360792315, + "epoch": 0.2974411143281947, "grad_norm": 0.0, - "learning_rate": 1.8250478134203816e-05, - "loss": 0.9289, + "learning_rate": 1.6477625486296057e-05, + "loss": 1.2183, "step": 7602 }, { - "epoch": 0.21544957352149394, + "epoch": 0.29748024102042414, "grad_norm": 0.0, - "learning_rate": 1.8249959490772547e-05, - "loss": 1.0431, + "learning_rate": 1.6476660004029073e-05, + "loss": 1.1799, "step": 7603 }, { - "epoch": 0.2154779109637564, + "epoch": 0.2975193677126536, "grad_norm": 0.0, - "learning_rate": 1.8249440777848805e-05, - "loss": 1.1104, + "learning_rate": 1.6475694417755046e-05, + "loss": 1.0211, "step": 7604 }, { - "epoch": 0.21550624840601887, + "epoch": 0.297558494404883, "grad_norm": 0.0, - "learning_rate": 1.824892199543696e-05, - "loss": 1.0036, + "learning_rate": 1.647472872748948e-05, + "loss": 1.1697, "step": 7605 }, { - "epoch": 0.21553458584828133, + "epoch": 0.29759762109711246, "grad_norm": 0.0, - "learning_rate": 1.8248403143541386e-05, - "loss": 0.992, + "learning_rate": 1.6473762933247885e-05, + "loss": 1.1366, "step": 7606 }, { - "epoch": 0.2155629232905438, + "epoch": 0.2976367477893419, "grad_norm": 0.0, - "learning_rate": 1.8247884222166447e-05, - "loss": 1.0168, + "learning_rate": 1.6472797035045766e-05, + "loss": 1.0972, "step": 7607 }, { - "epoch": 0.21559126073280627, + "epoch": 0.29767587448157135, "grad_norm": 0.0, - "learning_rate": 1.8247365231316517e-05, - "loss": 1.0245, + "learning_rate": 1.6471831032898643e-05, + "loss": 1.103, "step": 7608 }, { - "epoch": 0.2156195981750687, + "epoch": 0.2977150011738008, "grad_norm": 0.0, - "learning_rate": 1.8246846170995964e-05, - "loss": 1.0228, + "learning_rate": 1.647086492682202e-05, + "loss": 1.1198, "step": 7609 }, { - "epoch": 0.21564793561733117, + "epoch": 0.2977541278660302, "grad_norm": 0.0, - "learning_rate": 1.8246327041209165e-05, - "loss": 0.93, + "learning_rate": 1.6469898716831414e-05, + "loss": 1.0739, "step": 7610 }, { - "epoch": 0.21567627305959364, + "epoch": 0.29779325455825967, "grad_norm": 0.0, - "learning_rate": 1.8245807841960494e-05, - "loss": 1.0421, + "learning_rate": 1.6468932402942343e-05, + "loss": 1.0953, "step": 7611 }, { - "epoch": 0.2157046105018561, + "epoch": 0.2978323812504891, "grad_norm": 0.0, - "learning_rate": 1.824528857325432e-05, - "loss": 1.0973, + "learning_rate": 1.6467965985170326e-05, + "loss": 1.134, "step": 7612 }, { - "epoch": 0.21573294794411857, + "epoch": 0.29787150794271855, "grad_norm": 0.0, - "learning_rate": 1.8244769235095018e-05, - "loss": 0.9381, + "learning_rate": 1.6466999463530877e-05, + "loss": 1.2233, "step": 7613 }, { - "epoch": 0.21576128538638104, + "epoch": 0.29791063463494794, "grad_norm": 0.0, - "learning_rate": 1.8244249827486962e-05, - "loss": 1.0167, + "learning_rate": 1.646603283803952e-05, + "loss": 0.9492, "step": 7614 }, { - "epoch": 0.21578962282864347, + "epoch": 0.2979497613271774, "grad_norm": 0.0, - "learning_rate": 1.8243730350434527e-05, - "loss": 1.0949, + "learning_rate": 1.646506610871178e-05, + "loss": 1.0644, "step": 7615 }, { - "epoch": 0.21581796027090594, + "epoch": 0.2979888880194068, "grad_norm": 0.0, - "learning_rate": 1.8243210803942097e-05, - "loss": 1.0285, + "learning_rate": 1.646409927556318e-05, + "loss": 1.0883, "step": 7616 }, { - "epoch": 0.2158462977131684, + "epoch": 0.29802801471163626, "grad_norm": 0.0, - "learning_rate": 1.8242691188014032e-05, - "loss": 1.0537, + "learning_rate": 1.6463132338609243e-05, + "loss": 1.1677, "step": 7617 }, { - "epoch": 0.21587463515543087, + "epoch": 0.2980671414038657, "grad_norm": 0.0, - "learning_rate": 1.8242171502654725e-05, - "loss": 1.0085, + "learning_rate": 1.6462165297865503e-05, + "loss": 1.1115, "step": 7618 }, { - "epoch": 0.21590297259769334, + "epoch": 0.29810626809609514, "grad_norm": 0.0, - "learning_rate": 1.824165174786854e-05, - "loss": 1.0078, + "learning_rate": 1.646119815334748e-05, + "loss": 1.0625, "step": 7619 }, { - "epoch": 0.2159313100399558, + "epoch": 0.2981453947883246, "grad_norm": 0.0, - "learning_rate": 1.824113192365987e-05, - "loss": 1.0887, + "learning_rate": 1.6460230905070714e-05, + "loss": 0.9795, "step": 7620 }, { - "epoch": 0.21595964748221824, + "epoch": 0.298184521480554, "grad_norm": 0.0, - "learning_rate": 1.8240612030033084e-05, - "loss": 1.002, + "learning_rate": 1.6459263553050738e-05, + "loss": 1.1523, "step": 7621 }, { - "epoch": 0.2159879849244807, + "epoch": 0.29822364817278346, "grad_norm": 0.0, - "learning_rate": 1.8240092066992557e-05, - "loss": 1.0049, + "learning_rate": 1.645829609730308e-05, + "loss": 1.0385, "step": 7622 }, { - "epoch": 0.21601632236674317, + "epoch": 0.2982627748650129, "grad_norm": 0.0, - "learning_rate": 1.8239572034542682e-05, - "loss": 0.9979, + "learning_rate": 1.6457328537843278e-05, + "loss": 1.1274, "step": 7623 }, { - "epoch": 0.21604465980900564, + "epoch": 0.29830190155724234, "grad_norm": 0.0, - "learning_rate": 1.8239051932687828e-05, - "loss": 1.0731, + "learning_rate": 1.6456360874686873e-05, + "loss": 1.1537, "step": 7624 }, { - "epoch": 0.2160729972512681, + "epoch": 0.2983410282494718, "grad_norm": 0.0, - "learning_rate": 1.823853176143238e-05, - "loss": 0.9778, + "learning_rate": 1.6455393107849404e-05, + "loss": 1.1749, "step": 7625 }, { - "epoch": 0.21610133469353057, + "epoch": 0.2983801549417012, "grad_norm": 0.0, - "learning_rate": 1.8238011520780722e-05, - "loss": 0.9714, + "learning_rate": 1.645442523734641e-05, + "loss": 1.0217, "step": 7626 }, { - "epoch": 0.216129672135793, + "epoch": 0.29841928163393067, "grad_norm": 0.0, - "learning_rate": 1.823749121073723e-05, - "loss": 0.9631, + "learning_rate": 1.645345726319343e-05, + "loss": 1.2006, "step": 7627 }, { - "epoch": 0.21615800957805548, + "epoch": 0.2984584083261601, "grad_norm": 0.0, - "learning_rate": 1.8236970831306293e-05, - "loss": 0.9383, + "learning_rate": 1.645248918540602e-05, + "loss": 1.2175, "step": 7628 }, { - "epoch": 0.21618634702031794, + "epoch": 0.29849753501838955, "grad_norm": 0.0, - "learning_rate": 1.8236450382492293e-05, - "loss": 0.9769, + "learning_rate": 1.6451521003999714e-05, + "loss": 1.0976, "step": 7629 }, { - "epoch": 0.2162146844625804, + "epoch": 0.298536661710619, "grad_norm": 0.0, - "learning_rate": 1.823592986429961e-05, - "loss": 0.8668, + "learning_rate": 1.645055271899007e-05, + "loss": 1.1414, "step": 7630 }, { - "epoch": 0.21624302190484287, + "epoch": 0.29857578840284843, "grad_norm": 0.0, - "learning_rate": 1.8235409276732633e-05, - "loss": 1.0272, + "learning_rate": 1.6449584330392627e-05, + "loss": 1.1054, "step": 7631 }, { - "epoch": 0.21627135934710534, + "epoch": 0.29861491509507787, "grad_norm": 0.0, - "learning_rate": 1.8234888619795747e-05, - "loss": 0.9883, + "learning_rate": 1.6448615838222942e-05, + "loss": 1.0358, "step": 7632 }, { - "epoch": 0.21629969678936778, + "epoch": 0.2986540417873073, "grad_norm": 0.0, - "learning_rate": 1.8234367893493334e-05, - "loss": 1.0593, + "learning_rate": 1.644764724249657e-05, + "loss": 1.0242, "step": 7633 }, { - "epoch": 0.21632803423163025, + "epoch": 0.29869316847953675, "grad_norm": 0.0, - "learning_rate": 1.823384709782978e-05, - "loss": 1.0166, + "learning_rate": 1.6446678543229066e-05, + "loss": 0.9194, "step": 7634 }, { - "epoch": 0.2163563716738927, + "epoch": 0.2987322951717662, "grad_norm": 0.0, - "learning_rate": 1.823332623280948e-05, - "loss": 0.9371, + "learning_rate": 1.6445709740435974e-05, + "loss": 1.0069, "step": 7635 }, { - "epoch": 0.21638470911615518, + "epoch": 0.29877142186399563, "grad_norm": 0.0, - "learning_rate": 1.8232805298436815e-05, - "loss": 1.0087, + "learning_rate": 1.6444740834132867e-05, + "loss": 1.1148, "step": 7636 }, { - "epoch": 0.21641304655841764, + "epoch": 0.2988105485562251, "grad_norm": 0.0, - "learning_rate": 1.823228429471617e-05, - "loss": 0.9344, + "learning_rate": 1.6443771824335294e-05, + "loss": 1.1803, "step": 7637 }, { - "epoch": 0.2164413840006801, + "epoch": 0.2988496752484545, "grad_norm": 0.0, - "learning_rate": 1.823176322165194e-05, - "loss": 1.0602, + "learning_rate": 1.6442802711058826e-05, + "loss": 1.1365, "step": 7638 }, { - "epoch": 0.21646972144294255, + "epoch": 0.29888880194068396, "grad_norm": 0.0, - "learning_rate": 1.8231242079248512e-05, - "loss": 0.9469, + "learning_rate": 1.644183349431901e-05, + "loss": 1.0208, "step": 7639 }, { - "epoch": 0.216498058885205, + "epoch": 0.2989279286329134, "grad_norm": 0.0, - "learning_rate": 1.8230720867510273e-05, - "loss": 0.9845, + "learning_rate": 1.644086417413143e-05, + "loss": 0.9985, "step": 7640 }, { - "epoch": 0.21652639632746748, + "epoch": 0.29896705532514284, "grad_norm": 0.0, - "learning_rate": 1.823019958644162e-05, - "loss": 1.0736, + "learning_rate": 1.6439894750511634e-05, + "loss": 1.0857, "step": 7641 }, { - "epoch": 0.21655473376972995, + "epoch": 0.2990061820173722, "grad_norm": 0.0, - "learning_rate": 1.8229678236046936e-05, - "loss": 1.1113, + "learning_rate": 1.6438925223475204e-05, + "loss": 1.1107, "step": 7642 }, { - "epoch": 0.2165830712119924, + "epoch": 0.29904530870960166, "grad_norm": 0.0, - "learning_rate": 1.8229156816330616e-05, - "loss": 1.0503, + "learning_rate": 1.64379555930377e-05, + "loss": 1.0278, "step": 7643 }, { - "epoch": 0.21661140865425488, + "epoch": 0.2990844354018311, "grad_norm": 0.0, - "learning_rate": 1.8228635327297054e-05, - "loss": 0.983, + "learning_rate": 1.6436985859214698e-05, + "loss": 1.0351, "step": 7644 }, { - "epoch": 0.21663974609651732, + "epoch": 0.29912356209406055, "grad_norm": 0.0, - "learning_rate": 1.822811376895064e-05, - "loss": 1.0477, + "learning_rate": 1.643601602202177e-05, + "loss": 1.1137, "step": 7645 }, { - "epoch": 0.21666808353877978, + "epoch": 0.29916268878629, "grad_norm": 0.0, - "learning_rate": 1.8227592141295768e-05, - "loss": 1.0392, + "learning_rate": 1.6435046081474487e-05, + "loss": 1.1425, "step": 7646 }, { - "epoch": 0.21669642098104225, + "epoch": 0.29920181547851943, "grad_norm": 0.0, - "learning_rate": 1.8227070444336833e-05, - "loss": 1.0167, + "learning_rate": 1.643407603758843e-05, + "loss": 1.1404, "step": 7647 }, { - "epoch": 0.21672475842330471, + "epoch": 0.29924094217074887, "grad_norm": 0.0, - "learning_rate": 1.822654867807823e-05, - "loss": 1.1311, + "learning_rate": 1.6433105890379168e-05, + "loss": 1.0308, "step": 7648 }, { - "epoch": 0.21675309586556718, + "epoch": 0.2992800688629783, "grad_norm": 0.0, - "learning_rate": 1.8226026842524353e-05, - "loss": 1.0398, + "learning_rate": 1.6432135639862296e-05, + "loss": 1.1999, "step": 7649 }, { - "epoch": 0.21678143330782965, + "epoch": 0.29931919555520775, "grad_norm": 0.0, - "learning_rate": 1.8225504937679592e-05, - "loss": 0.9705, + "learning_rate": 1.6431165286053377e-05, + "loss": 1.1871, "step": 7650 }, { - "epoch": 0.21680977075009208, + "epoch": 0.2993583222474372, "grad_norm": 0.0, - "learning_rate": 1.8224982963548353e-05, - "loss": 0.9147, + "learning_rate": 1.6430194828968005e-05, + "loss": 1.0984, "step": 7651 }, { - "epoch": 0.21683810819235455, + "epoch": 0.29939744893966663, "grad_norm": 0.0, - "learning_rate": 1.8224460920135027e-05, - "loss": 0.9839, + "learning_rate": 1.6429224268621765e-05, + "loss": 1.1257, "step": 7652 }, { - "epoch": 0.21686644563461702, + "epoch": 0.2994365756318961, "grad_norm": 0.0, - "learning_rate": 1.8223938807444014e-05, - "loss": 1.0349, + "learning_rate": 1.6428253605030238e-05, + "loss": 1.2504, "step": 7653 }, { - "epoch": 0.21689478307687948, + "epoch": 0.2994757023241255, "grad_norm": 0.0, - "learning_rate": 1.8223416625479706e-05, - "loss": 0.96, + "learning_rate": 1.642728283820901e-05, + "loss": 1.0585, "step": 7654 }, { - "epoch": 0.21692312051914195, + "epoch": 0.29951482901635496, "grad_norm": 0.0, - "learning_rate": 1.822289437424651e-05, - "loss": 1.0301, + "learning_rate": 1.6426311968173677e-05, + "loss": 0.9985, "step": 7655 }, { - "epoch": 0.21695145796140441, + "epoch": 0.2995539557085844, "grad_norm": 0.0, - "learning_rate": 1.822237205374882e-05, - "loss": 0.9446, + "learning_rate": 1.6425340994939824e-05, + "loss": 1.1414, "step": 7656 }, { - "epoch": 0.21697979540366685, + "epoch": 0.29959308240081384, "grad_norm": 0.0, - "learning_rate": 1.822184966399104e-05, - "loss": 1.0027, + "learning_rate": 1.642436991852305e-05, + "loss": 1.0953, "step": 7657 }, { - "epoch": 0.21700813284592932, + "epoch": 0.2996322090930433, "grad_norm": 0.0, - "learning_rate": 1.8221327204977564e-05, - "loss": 1.1851, + "learning_rate": 1.642339873893894e-05, + "loss": 1.1583, "step": 7658 }, { - "epoch": 0.21703647028819179, + "epoch": 0.2996713357852727, "grad_norm": 0.0, - "learning_rate": 1.8220804676712797e-05, - "loss": 1.0309, + "learning_rate": 1.6422427456203097e-05, + "loss": 1.1514, "step": 7659 }, { - "epoch": 0.21706480773045425, + "epoch": 0.29971046247750216, "grad_norm": 0.0, - "learning_rate": 1.8220282079201138e-05, - "loss": 0.9838, + "learning_rate": 1.642145607033112e-05, + "loss": 1.166, "step": 7660 }, { - "epoch": 0.21709314517271672, + "epoch": 0.2997495891697316, "grad_norm": 0.0, - "learning_rate": 1.8219759412446992e-05, - "loss": 1.0081, + "learning_rate": 1.64204845813386e-05, + "loss": 1.1391, "step": 7661 }, { - "epoch": 0.21712148261497918, + "epoch": 0.29978871586196104, "grad_norm": 0.0, - "learning_rate": 1.821923667645476e-05, - "loss": 0.947, + "learning_rate": 1.6419512989241147e-05, + "loss": 1.0638, "step": 7662 }, { - "epoch": 0.21714982005724162, + "epoch": 0.2998278425541905, "grad_norm": 0.0, - "learning_rate": 1.8218713871228844e-05, - "loss": 1.0385, + "learning_rate": 1.6418541294054356e-05, + "loss": 1.0897, "step": 7663 }, { - "epoch": 0.2171781574995041, + "epoch": 0.2998669692464199, "grad_norm": 0.0, - "learning_rate": 1.821819099677365e-05, - "loss": 0.8885, + "learning_rate": 1.6417569495793838e-05, + "loss": 1.158, "step": 7664 }, { - "epoch": 0.21720649494176655, + "epoch": 0.29990609593864936, "grad_norm": 0.0, - "learning_rate": 1.8217668053093583e-05, - "loss": 0.9212, + "learning_rate": 1.6416597594475193e-05, + "loss": 1.1767, "step": 7665 }, { - "epoch": 0.21723483238402902, + "epoch": 0.2999452226308788, "grad_norm": 0.0, - "learning_rate": 1.8217145040193043e-05, - "loss": 1.0135, + "learning_rate": 1.641562559011403e-05, + "loss": 0.9315, "step": 7666 }, { - "epoch": 0.21726316982629149, + "epoch": 0.29998434932310825, "grad_norm": 0.0, - "learning_rate": 1.821662195807644e-05, - "loss": 0.9392, + "learning_rate": 1.6414653482725962e-05, + "loss": 1.2816, "step": 7667 }, { - "epoch": 0.21729150726855395, + "epoch": 0.3000234760153377, "grad_norm": 0.0, - "learning_rate": 1.821609880674818e-05, - "loss": 1.0592, + "learning_rate": 1.64136812723266e-05, + "loss": 1.0499, "step": 7668 }, { - "epoch": 0.2173198447108164, + "epoch": 0.3000626027075671, "grad_norm": 0.0, - "learning_rate": 1.8215575586212672e-05, - "loss": 1.0957, + "learning_rate": 1.6412708958931547e-05, + "loss": 1.1439, "step": 7669 }, { - "epoch": 0.21734818215307886, + "epoch": 0.30010172939979657, "grad_norm": 0.0, - "learning_rate": 1.8215052296474315e-05, - "loss": 1.0336, + "learning_rate": 1.641173654255643e-05, + "loss": 1.1754, "step": 7670 }, { - "epoch": 0.21737651959534132, + "epoch": 0.30014085609202595, "grad_norm": 0.0, - "learning_rate": 1.8214528937537523e-05, - "loss": 1.0242, + "learning_rate": 1.6410764023216855e-05, + "loss": 1.2428, "step": 7671 }, { - "epoch": 0.2174048570376038, + "epoch": 0.3001799827842554, "grad_norm": 0.0, - "learning_rate": 1.8214005509406708e-05, - "loss": 1.0616, + "learning_rate": 1.6409791400928445e-05, + "loss": 1.0124, "step": 7672 }, { - "epoch": 0.21743319447986625, + "epoch": 0.30021910947648484, "grad_norm": 0.0, - "learning_rate": 1.8213482012086268e-05, - "loss": 0.9572, + "learning_rate": 1.6408818675706812e-05, + "loss": 1.1736, "step": 7673 }, { - "epoch": 0.21746153192212872, + "epoch": 0.3002582361687143, "grad_norm": 0.0, - "learning_rate": 1.8212958445580623e-05, - "loss": 1.1776, + "learning_rate": 1.6407845847567586e-05, + "loss": 0.9424, "step": 7674 }, { - "epoch": 0.21748986936439116, + "epoch": 0.3002973628609437, "grad_norm": 0.0, - "learning_rate": 1.8212434809894176e-05, - "loss": 0.9799, + "learning_rate": 1.6406872916526384e-05, + "loss": 1.223, "step": 7675 }, { - "epoch": 0.21751820680665362, + "epoch": 0.30033648955317316, "grad_norm": 0.0, - "learning_rate": 1.8211911105031344e-05, - "loss": 1.0463, + "learning_rate": 1.6405899882598833e-05, + "loss": 1.1502, "step": 7676 }, { - "epoch": 0.2175465442489161, + "epoch": 0.3003756162454026, "grad_norm": 0.0, - "learning_rate": 1.8211387330996536e-05, - "loss": 1.1146, + "learning_rate": 1.6404926745800554e-05, + "loss": 1.178, "step": 7677 }, { - "epoch": 0.21757488169117856, + "epoch": 0.30041474293763204, "grad_norm": 0.0, - "learning_rate": 1.821086348779416e-05, - "loss": 1.0505, + "learning_rate": 1.640395350614718e-05, + "loss": 1.1374, "step": 7678 }, { - "epoch": 0.21760321913344102, + "epoch": 0.3004538696298615, "grad_norm": 0.0, - "learning_rate": 1.8210339575428632e-05, - "loss": 0.9878, + "learning_rate": 1.6402980163654335e-05, + "loss": 1.2178, "step": 7679 }, { - "epoch": 0.2176315565757035, + "epoch": 0.3004929963220909, "grad_norm": 0.0, - "learning_rate": 1.8209815593904365e-05, - "loss": 1.0798, + "learning_rate": 1.6402006718337654e-05, + "loss": 1.1256, "step": 7680 }, { - "epoch": 0.21765989401796593, + "epoch": 0.30053212301432036, "grad_norm": 0.0, - "learning_rate": 1.8209291543225774e-05, - "loss": 1.0375, + "learning_rate": 1.6401033170212763e-05, + "loss": 1.1321, "step": 7681 }, { - "epoch": 0.2176882314602284, + "epoch": 0.3005712497065498, "grad_norm": 0.0, - "learning_rate": 1.8208767423397273e-05, - "loss": 0.9504, + "learning_rate": 1.6400059519295304e-05, + "loss": 1.185, "step": 7682 }, { - "epoch": 0.21771656890249086, + "epoch": 0.30061037639877924, "grad_norm": 0.0, - "learning_rate": 1.8208243234423274e-05, - "loss": 0.9681, + "learning_rate": 1.639908576560091e-05, + "loss": 1.1976, "step": 7683 }, { - "epoch": 0.21774490634475333, + "epoch": 0.3006495030910087, "grad_norm": 0.0, - "learning_rate": 1.8207718976308194e-05, - "loss": 1.1966, + "learning_rate": 1.6398111909145214e-05, + "loss": 1.0917, "step": 7684 }, { - "epoch": 0.2177732437870158, + "epoch": 0.3006886297832381, "grad_norm": 0.0, - "learning_rate": 1.820719464905645e-05, - "loss": 1.0814, + "learning_rate": 1.639713794994386e-05, + "loss": 1.0363, "step": 7685 }, { - "epoch": 0.21780158122927826, + "epoch": 0.30072775647546757, "grad_norm": 0.0, - "learning_rate": 1.8206670252672457e-05, - "loss": 1.0188, + "learning_rate": 1.6396163888012485e-05, + "loss": 1.0622, "step": 7686 }, { - "epoch": 0.2178299186715407, + "epoch": 0.300766883167697, "grad_norm": 0.0, - "learning_rate": 1.8206145787160635e-05, - "loss": 1.0657, + "learning_rate": 1.6395189723366735e-05, + "loss": 1.145, "step": 7687 }, { - "epoch": 0.21785825611380316, + "epoch": 0.30080600985992645, "grad_norm": 0.0, - "learning_rate": 1.82056212525254e-05, - "loss": 1.0674, + "learning_rate": 1.6394215456022248e-05, + "loss": 1.2159, "step": 7688 }, { - "epoch": 0.21788659355606563, + "epoch": 0.3008451365521559, "grad_norm": 0.0, - "learning_rate": 1.8205096648771166e-05, - "loss": 0.9394, + "learning_rate": 1.6393241085994674e-05, + "loss": 1.3502, "step": 7689 }, { - "epoch": 0.2179149309983281, + "epoch": 0.30088426324438533, "grad_norm": 0.0, - "learning_rate": 1.8204571975902362e-05, - "loss": 0.9611, + "learning_rate": 1.6392266613299663e-05, + "loss": 1.1878, "step": 7690 }, { - "epoch": 0.21794326844059056, + "epoch": 0.30092338993661477, "grad_norm": 0.0, - "learning_rate": 1.8204047233923394e-05, - "loss": 1.057, + "learning_rate": 1.6391292037952858e-05, + "loss": 1.0825, "step": 7691 }, { - "epoch": 0.21797160588285303, + "epoch": 0.3009625166288442, "grad_norm": 0.0, - "learning_rate": 1.8203522422838694e-05, - "loss": 1.01, + "learning_rate": 1.639031735996991e-05, + "loss": 1.0955, "step": 7692 }, { - "epoch": 0.21799994332511546, + "epoch": 0.30100164332107365, "grad_norm": 0.0, - "learning_rate": 1.820299754265268e-05, - "loss": 1.0099, + "learning_rate": 1.6389342579366478e-05, + "loss": 0.9028, "step": 7693 }, { - "epoch": 0.21802828076737793, + "epoch": 0.3010407700133031, "grad_norm": 0.0, - "learning_rate": 1.8202472593369765e-05, - "loss": 1.0674, + "learning_rate": 1.6388367696158206e-05, + "loss": 1.1454, "step": 7694 }, { - "epoch": 0.2180566182096404, + "epoch": 0.30107989670553253, "grad_norm": 0.0, - "learning_rate": 1.8201947574994385e-05, - "loss": 1.0161, + "learning_rate": 1.6387392710360752e-05, + "loss": 1.17, "step": 7695 }, { - "epoch": 0.21808495565190286, + "epoch": 0.301119023397762, "grad_norm": 0.0, - "learning_rate": 1.8201422487530953e-05, - "loss": 1.0441, + "learning_rate": 1.638641762198978e-05, + "loss": 1.0394, "step": 7696 }, { - "epoch": 0.21811329309416533, + "epoch": 0.3011581500899914, "grad_norm": 0.0, - "learning_rate": 1.820089733098389e-05, - "loss": 0.9651, + "learning_rate": 1.638544243106094e-05, + "loss": 1.2354, "step": 7697 }, { - "epoch": 0.2181416305364278, + "epoch": 0.30119727678222086, "grad_norm": 0.0, - "learning_rate": 1.820037210535763e-05, - "loss": 0.952, + "learning_rate": 1.63844671375899e-05, + "loss": 1.0229, "step": 7698 }, { - "epoch": 0.21816996797869023, + "epoch": 0.30123640347445024, "grad_norm": 0.0, - "learning_rate": 1.8199846810656586e-05, - "loss": 1.1507, + "learning_rate": 1.6383491741592316e-05, + "loss": 1.1124, "step": 7699 }, { - "epoch": 0.2181983054209527, + "epoch": 0.3012755301666797, "grad_norm": 0.0, - "learning_rate": 1.819932144688519e-05, - "loss": 0.9574, + "learning_rate": 1.638251624308385e-05, + "loss": 1.0005, "step": 7700 }, { - "epoch": 0.21822664286321516, + "epoch": 0.3013146568589091, "grad_norm": 0.0, - "learning_rate": 1.819879601404786e-05, - "loss": 1.0464, + "learning_rate": 1.6381540642080175e-05, + "loss": 1.246, "step": 7701 }, { - "epoch": 0.21825498030547763, + "epoch": 0.30135378355113857, "grad_norm": 0.0, - "learning_rate": 1.819827051214903e-05, - "loss": 0.9483, + "learning_rate": 1.6380564938596953e-05, + "loss": 1.0851, "step": 7702 }, { - "epoch": 0.2182833177477401, + "epoch": 0.301392910243368, "grad_norm": 0.0, - "learning_rate": 1.819774494119312e-05, - "loss": 1.0738, + "learning_rate": 1.6379589132649854e-05, + "loss": 1.035, "step": 7703 }, { - "epoch": 0.21831165519000256, + "epoch": 0.30143203693559745, "grad_norm": 0.0, - "learning_rate": 1.8197219301184565e-05, - "loss": 1.0919, + "learning_rate": 1.6378613224254546e-05, + "loss": 1.0606, "step": 7704 }, { - "epoch": 0.218339992632265, + "epoch": 0.3014711636278269, "grad_norm": 0.0, - "learning_rate": 1.8196693592127786e-05, - "loss": 1.0471, + "learning_rate": 1.6377637213426704e-05, + "loss": 1.1234, "step": 7705 }, { - "epoch": 0.21836833007452747, + "epoch": 0.30151029032005633, "grad_norm": 0.0, - "learning_rate": 1.819616781402721e-05, - "loss": 0.9196, + "learning_rate": 1.6376661100181994e-05, + "loss": 1.1172, "step": 7706 }, { - "epoch": 0.21839666751678993, + "epoch": 0.30154941701228577, "grad_norm": 0.0, - "learning_rate": 1.8195641966887274e-05, - "loss": 0.9644, + "learning_rate": 1.6375684884536106e-05, + "loss": 1.0298, "step": 7707 }, { - "epoch": 0.2184250049590524, + "epoch": 0.3015885437045152, "grad_norm": 0.0, - "learning_rate": 1.81951160507124e-05, - "loss": 1.0578, + "learning_rate": 1.63747085665047e-05, + "loss": 1.2592, "step": 7708 }, { - "epoch": 0.21845334240131487, + "epoch": 0.30162767039674465, "grad_norm": 0.0, - "learning_rate": 1.819459006550702e-05, - "loss": 1.0253, + "learning_rate": 1.6373732146103466e-05, + "loss": 1.2058, "step": 7709 }, { - "epoch": 0.21848167984357733, + "epoch": 0.3016667970889741, "grad_norm": 0.0, - "learning_rate": 1.8194064011275568e-05, - "loss": 1.0306, + "learning_rate": 1.637275562334808e-05, + "loss": 1.1291, "step": 7710 }, { - "epoch": 0.21851001728583977, + "epoch": 0.30170592378120353, "grad_norm": 0.0, - "learning_rate": 1.8193537888022466e-05, - "loss": 0.9034, + "learning_rate": 1.6371778998254225e-05, + "loss": 1.1664, "step": 7711 }, { - "epoch": 0.21853835472810224, + "epoch": 0.301745050473433, "grad_norm": 0.0, - "learning_rate": 1.8193011695752155e-05, - "loss": 0.9885, + "learning_rate": 1.6370802270837587e-05, + "loss": 1.1581, "step": 7712 }, { - "epoch": 0.2185666921703647, + "epoch": 0.3017841771656624, "grad_norm": 0.0, - "learning_rate": 1.819248543446907e-05, - "loss": 0.9557, + "learning_rate": 1.6369825441113843e-05, + "loss": 1.0682, "step": 7713 }, { - "epoch": 0.21859502961262717, + "epoch": 0.30182330385789186, "grad_norm": 0.0, - "learning_rate": 1.8191959104177628e-05, - "loss": 1.0227, + "learning_rate": 1.6368848509098687e-05, + "loss": 1.2238, "step": 7714 }, { - "epoch": 0.21862336705488963, + "epoch": 0.3018624305501213, "grad_norm": 0.0, - "learning_rate": 1.8191432704882276e-05, - "loss": 0.9571, + "learning_rate": 1.6367871474807802e-05, + "loss": 1.1865, "step": 7715 }, { - "epoch": 0.2186517044971521, + "epoch": 0.30190155724235074, "grad_norm": 0.0, - "learning_rate": 1.8190906236587448e-05, - "loss": 0.9821, + "learning_rate": 1.636689433825688e-05, + "loss": 1.207, "step": 7716 }, { - "epoch": 0.21868004193941454, + "epoch": 0.3019406839345802, "grad_norm": 0.0, - "learning_rate": 1.8190379699297567e-05, - "loss": 1.0188, + "learning_rate": 1.6365917099461616e-05, + "loss": 1.196, "step": 7717 }, { - "epoch": 0.218708379381677, + "epoch": 0.3019798106268096, "grad_norm": 0.0, - "learning_rate": 1.8189853093017084e-05, - "loss": 0.9608, + "learning_rate": 1.6364939758437695e-05, + "loss": 1.0596, "step": 7718 }, { - "epoch": 0.21873671682393947, + "epoch": 0.30201893731903906, "grad_norm": 0.0, - "learning_rate": 1.818932641775043e-05, - "loss": 1.0369, + "learning_rate": 1.636396231520082e-05, + "loss": 1.1503, "step": 7719 }, { - "epoch": 0.21876505426620194, + "epoch": 0.3020580640112685, "grad_norm": 0.0, - "learning_rate": 1.818879967350203e-05, - "loss": 1.0298, + "learning_rate": 1.636298476976669e-05, + "loss": 1.1156, "step": 7720 }, { - "epoch": 0.2187933917084644, + "epoch": 0.30209719070349794, "grad_norm": 0.0, - "learning_rate": 1.818827286027633e-05, - "loss": 1.0605, + "learning_rate": 1.6362007122150993e-05, + "loss": 1.089, "step": 7721 }, { - "epoch": 0.21882172915072687, + "epoch": 0.3021363173957274, "grad_norm": 0.0, - "learning_rate": 1.8187745978077772e-05, - "loss": 1.0383, + "learning_rate": 1.6361029372369433e-05, + "loss": 1.1143, "step": 7722 }, { - "epoch": 0.2188500665929893, + "epoch": 0.3021754440879568, "grad_norm": 0.0, - "learning_rate": 1.818721902691079e-05, - "loss": 0.9617, + "learning_rate": 1.6360051520437716e-05, + "loss": 1.1466, "step": 7723 }, { - "epoch": 0.21887840403525177, + "epoch": 0.30221457078018626, "grad_norm": 0.0, - "learning_rate": 1.818669200677982e-05, - "loss": 0.9957, + "learning_rate": 1.6359073566371538e-05, + "loss": 1.0162, "step": 7724 }, { - "epoch": 0.21890674147751424, + "epoch": 0.3022536974724157, "grad_norm": 0.0, - "learning_rate": 1.81861649176893e-05, - "loss": 0.9152, + "learning_rate": 1.6358095510186607e-05, + "loss": 1.1141, "step": 7725 }, { - "epoch": 0.2189350789197767, + "epoch": 0.30229282416464515, "grad_norm": 0.0, - "learning_rate": 1.8185637759643676e-05, - "loss": 1.0335, + "learning_rate": 1.635711735189863e-05, + "loss": 1.0879, "step": 7726 }, { - "epoch": 0.21896341636203917, + "epoch": 0.3023319508568746, "grad_norm": 0.0, - "learning_rate": 1.8185110532647382e-05, - "loss": 1.0638, + "learning_rate": 1.6356139091523317e-05, + "loss": 1.109, "step": 7727 }, { - "epoch": 0.21899175380430164, + "epoch": 0.302371077549104, "grad_norm": 0.0, - "learning_rate": 1.8184583236704867e-05, - "loss": 1.0203, + "learning_rate": 1.635516072907637e-05, + "loss": 1.1891, "step": 7728 }, { - "epoch": 0.21902009124656407, + "epoch": 0.3024102042413334, "grad_norm": 0.0, - "learning_rate": 1.8184055871820568e-05, - "loss": 1.1385, + "learning_rate": 1.6354182264573507e-05, + "loss": 1.1543, "step": 7729 }, { - "epoch": 0.21904842868882654, + "epoch": 0.30244933093356285, "grad_norm": 0.0, - "learning_rate": 1.8183528437998924e-05, - "loss": 0.98, + "learning_rate": 1.635320369803044e-05, + "loss": 1.0591, "step": 7730 }, { - "epoch": 0.219076766131089, + "epoch": 0.3024884576257923, "grad_norm": 0.0, - "learning_rate": 1.8183000935244383e-05, - "loss": 0.9294, + "learning_rate": 1.635222502946288e-05, + "loss": 1.0383, "step": 7731 }, { - "epoch": 0.21910510357335147, + "epoch": 0.30252758431802174, "grad_norm": 0.0, - "learning_rate": 1.8182473363561385e-05, - "loss": 0.8702, + "learning_rate": 1.6351246258886546e-05, + "loss": 1.1031, "step": 7732 }, { - "epoch": 0.21913344101561394, + "epoch": 0.3025667110102512, "grad_norm": 0.0, - "learning_rate": 1.8181945722954377e-05, - "loss": 1.0758, + "learning_rate": 1.6350267386317156e-05, + "loss": 1.1803, "step": 7733 }, { - "epoch": 0.2191617784578764, + "epoch": 0.3026058377024806, "grad_norm": 0.0, - "learning_rate": 1.81814180134278e-05, - "loss": 1.1253, + "learning_rate": 1.634928841177043e-05, + "loss": 0.9663, "step": 7734 }, { - "epoch": 0.21919011590013884, + "epoch": 0.30264496439471006, "grad_norm": 0.0, - "learning_rate": 1.8180890234986103e-05, - "loss": 1.0325, + "learning_rate": 1.634830933526209e-05, + "loss": 1.0403, "step": 7735 }, { - "epoch": 0.2192184533424013, + "epoch": 0.3026840910869395, "grad_norm": 0.0, - "learning_rate": 1.818036238763373e-05, - "loss": 1.0951, + "learning_rate": 1.6347330156807856e-05, + "loss": 1.1329, "step": 7736 }, { - "epoch": 0.21924679078466378, + "epoch": 0.30272321777916894, "grad_norm": 0.0, - "learning_rate": 1.8179834471375127e-05, - "loss": 0.9896, + "learning_rate": 1.6346350876423452e-05, + "loss": 1.1078, "step": 7737 }, { - "epoch": 0.21927512822692624, + "epoch": 0.3027623444713984, "grad_norm": 0.0, - "learning_rate": 1.817930648621474e-05, - "loss": 0.9817, + "learning_rate": 1.6345371494124607e-05, + "loss": 1.0546, "step": 7738 }, { - "epoch": 0.2193034656691887, + "epoch": 0.3028014711636278, "grad_norm": 0.0, - "learning_rate": 1.8178778432157015e-05, - "loss": 0.9321, + "learning_rate": 1.6344392009927047e-05, + "loss": 1.1951, "step": 7739 }, { - "epoch": 0.21933180311145117, + "epoch": 0.30284059785585726, "grad_norm": 0.0, - "learning_rate": 1.8178250309206404e-05, - "loss": 1.0194, + "learning_rate": 1.6343412423846498e-05, + "loss": 1.0789, "step": 7740 }, { - "epoch": 0.2193601405537136, + "epoch": 0.3028797245480867, "grad_norm": 0.0, - "learning_rate": 1.8177722117367356e-05, - "loss": 1.0154, + "learning_rate": 1.63424327358987e-05, + "loss": 1.1191, "step": 7741 }, { - "epoch": 0.21938847799597608, + "epoch": 0.30291885124031614, "grad_norm": 0.0, - "learning_rate": 1.8177193856644315e-05, - "loss": 0.9099, + "learning_rate": 1.6341452946099374e-05, + "loss": 1.0306, "step": 7742 }, { - "epoch": 0.21941681543823854, + "epoch": 0.3029579779325456, "grad_norm": 0.0, - "learning_rate": 1.8176665527041734e-05, - "loss": 0.8859, + "learning_rate": 1.6340473054464263e-05, + "loss": 1.1007, "step": 7743 }, { - "epoch": 0.219445152880501, + "epoch": 0.302997104624775, "grad_norm": 0.0, - "learning_rate": 1.817613712856406e-05, - "loss": 1.0218, + "learning_rate": 1.63394930610091e-05, + "loss": 1.1192, "step": 7744 }, { - "epoch": 0.21947349032276348, + "epoch": 0.30303623131700447, "grad_norm": 0.0, - "learning_rate": 1.8175608661215753e-05, - "loss": 1.017, + "learning_rate": 1.633851296574962e-05, + "loss": 1.1639, "step": 7745 }, { - "epoch": 0.21950182776502594, + "epoch": 0.3030753580092339, "grad_norm": 0.0, - "learning_rate": 1.8175080125001257e-05, - "loss": 0.8762, + "learning_rate": 1.6337532768701568e-05, + "loss": 1.182, "step": 7746 }, { - "epoch": 0.21953016520728838, + "epoch": 0.30311448470146335, "grad_norm": 0.0, - "learning_rate": 1.8174551519925025e-05, - "loss": 1.1072, + "learning_rate": 1.6336552469880676e-05, + "loss": 1.1679, "step": 7747 }, { - "epoch": 0.21955850264955085, + "epoch": 0.3031536113936928, "grad_norm": 0.0, - "learning_rate": 1.8174022845991506e-05, - "loss": 0.9751, + "learning_rate": 1.6335572069302694e-05, + "loss": 1.0481, "step": 7748 }, { - "epoch": 0.2195868400918133, + "epoch": 0.30319273808592223, "grad_norm": 0.0, - "learning_rate": 1.817349410320516e-05, - "loss": 1.041, + "learning_rate": 1.6334591566983363e-05, + "loss": 1.2145, "step": 7749 }, { - "epoch": 0.21961517753407578, + "epoch": 0.30323186477815167, "grad_norm": 0.0, - "learning_rate": 1.817296529157044e-05, - "loss": 0.9819, + "learning_rate": 1.633361096293843e-05, + "loss": 1.1693, "step": 7750 }, { - "epoch": 0.21964351497633824, + "epoch": 0.3032709914703811, "grad_norm": 0.0, - "learning_rate": 1.8172436411091795e-05, - "loss": 0.9867, + "learning_rate": 1.6332630257183644e-05, + "loss": 1.0967, "step": 7751 }, { - "epoch": 0.2196718524186007, + "epoch": 0.30331011816261055, "grad_norm": 0.0, - "learning_rate": 1.8171907461773686e-05, - "loss": 1.0002, + "learning_rate": 1.633164944973475e-05, + "loss": 1.0929, "step": 7752 }, { - "epoch": 0.21970018986086315, + "epoch": 0.30334924485484, "grad_norm": 0.0, - "learning_rate": 1.8171378443620563e-05, - "loss": 1.0738, + "learning_rate": 1.6330668540607498e-05, + "loss": 1.2263, "step": 7753 }, { - "epoch": 0.21972852730312561, + "epoch": 0.30338837154706944, "grad_norm": 0.0, - "learning_rate": 1.817084935663689e-05, - "loss": 0.962, + "learning_rate": 1.6329687529817643e-05, + "loss": 1.1297, "step": 7754 }, { - "epoch": 0.21975686474538808, + "epoch": 0.3034274982392989, "grad_norm": 0.0, - "learning_rate": 1.8170320200827113e-05, - "loss": 0.987, + "learning_rate": 1.6328706417380934e-05, + "loss": 1.2185, "step": 7755 }, { - "epoch": 0.21978520218765055, + "epoch": 0.30346662493152826, "grad_norm": 0.0, - "learning_rate": 1.8169790976195696e-05, - "loss": 1.0411, + "learning_rate": 1.6327725203313133e-05, + "loss": 1.0241, "step": 7756 }, { - "epoch": 0.219813539629913, + "epoch": 0.3035057516237577, "grad_norm": 0.0, - "learning_rate": 1.8169261682747098e-05, - "loss": 1.0803, + "learning_rate": 1.6326743887629995e-05, + "loss": 1.1592, "step": 7757 }, { - "epoch": 0.21984187707217548, + "epoch": 0.30354487831598714, "grad_norm": 0.0, - "learning_rate": 1.8168732320485776e-05, - "loss": 1.0168, + "learning_rate": 1.632576247034728e-05, + "loss": 1.1588, "step": 7758 }, { - "epoch": 0.21987021451443792, + "epoch": 0.3035840050082166, "grad_norm": 0.0, - "learning_rate": 1.8168202889416184e-05, - "loss": 0.9103, + "learning_rate": 1.6324780951480745e-05, + "loss": 1.1671, "step": 7759 }, { - "epoch": 0.21989855195670038, + "epoch": 0.303623131700446, "grad_norm": 0.0, - "learning_rate": 1.816767338954279e-05, - "loss": 1.0057, + "learning_rate": 1.632379933104615e-05, + "loss": 1.1289, "step": 7760 }, { - "epoch": 0.21992688939896285, + "epoch": 0.30366225839267547, "grad_norm": 0.0, - "learning_rate": 1.8167143820870046e-05, - "loss": 1.0404, + "learning_rate": 1.6322817609059267e-05, + "loss": 1.0016, "step": 7761 }, { - "epoch": 0.21995522684122532, + "epoch": 0.3037013850849049, "grad_norm": 0.0, - "learning_rate": 1.816661418340242e-05, - "loss": 0.8738, + "learning_rate": 1.632183578553585e-05, + "loss": 0.9656, "step": 7762 }, { - "epoch": 0.21998356428348778, + "epoch": 0.30374051177713435, "grad_norm": 0.0, - "learning_rate": 1.816608447714437e-05, - "loss": 1.0148, + "learning_rate": 1.632085386049168e-05, + "loss": 1.2013, "step": 7763 }, { - "epoch": 0.22001190172575025, + "epoch": 0.3037796384693638, "grad_norm": 0.0, - "learning_rate": 1.816555470210036e-05, - "loss": 0.9575, + "learning_rate": 1.631987183394251e-05, + "loss": 1.1171, "step": 7764 }, { - "epoch": 0.22004023916801269, + "epoch": 0.30381876516159323, "grad_norm": 0.0, - "learning_rate": 1.8165024858274845e-05, - "loss": 1.0737, + "learning_rate": 1.6318889705904123e-05, + "loss": 1.182, "step": 7765 }, { - "epoch": 0.22006857661027515, + "epoch": 0.30385789185382267, "grad_norm": 0.0, - "learning_rate": 1.81644949456723e-05, - "loss": 0.9342, + "learning_rate": 1.631790747639228e-05, + "loss": 1.125, "step": 7766 }, { - "epoch": 0.22009691405253762, + "epoch": 0.3038970185460521, "grad_norm": 0.0, - "learning_rate": 1.8163964964297177e-05, - "loss": 0.9956, + "learning_rate": 1.6316925145422765e-05, + "loss": 0.9782, "step": 7767 }, { - "epoch": 0.22012525149480008, + "epoch": 0.30393614523828155, "grad_norm": 0.0, - "learning_rate": 1.8163434914153948e-05, - "loss": 0.9687, + "learning_rate": 1.6315942713011344e-05, + "loss": 1.1523, "step": 7768 }, { - "epoch": 0.22015358893706255, + "epoch": 0.303975271930511, "grad_norm": 0.0, - "learning_rate": 1.8162904795247077e-05, - "loss": 1.0825, + "learning_rate": 1.63149601791738e-05, + "loss": 1.1191, "step": 7769 }, { - "epoch": 0.22018192637932502, + "epoch": 0.30401439862274043, "grad_norm": 0.0, - "learning_rate": 1.8162374607581022e-05, - "loss": 0.9798, + "learning_rate": 1.6313977543925907e-05, + "loss": 1.2458, "step": 7770 }, { - "epoch": 0.22021026382158745, + "epoch": 0.3040535253149699, "grad_norm": 0.0, - "learning_rate": 1.816184435116026e-05, - "loss": 1.08, + "learning_rate": 1.6312994807283448e-05, + "loss": 1.1015, "step": 7771 }, { - "epoch": 0.22023860126384992, + "epoch": 0.3040926520071993, "grad_norm": 0.0, - "learning_rate": 1.816131402598925e-05, - "loss": 1.0475, + "learning_rate": 1.6312011969262203e-05, + "loss": 1.0864, "step": 7772 }, { - "epoch": 0.2202669387061124, + "epoch": 0.30413177869942876, "grad_norm": 0.0, - "learning_rate": 1.8160783632072463e-05, - "loss": 1.0182, + "learning_rate": 1.6311029029877952e-05, + "loss": 1.1108, "step": 7773 }, { - "epoch": 0.22029527614837485, + "epoch": 0.3041709053916582, "grad_norm": 0.0, - "learning_rate": 1.8160253169414363e-05, - "loss": 1.032, + "learning_rate": 1.6310045989146486e-05, + "loss": 1.0948, "step": 7774 }, { - "epoch": 0.22032361359063732, + "epoch": 0.30421003208388764, "grad_norm": 0.0, - "learning_rate": 1.815972263801942e-05, - "loss": 0.9996, + "learning_rate": 1.6309062847083585e-05, + "loss": 1.1783, "step": 7775 }, { - "epoch": 0.22035195103289976, + "epoch": 0.3042491587761171, "grad_norm": 0.0, - "learning_rate": 1.8159192037892106e-05, - "loss": 0.9036, + "learning_rate": 1.6308079603705044e-05, + "loss": 1.2394, "step": 7776 }, { - "epoch": 0.22038028847516222, + "epoch": 0.3042882854683465, "grad_norm": 0.0, - "learning_rate": 1.8158661369036883e-05, - "loss": 1.0232, + "learning_rate": 1.6307096259026647e-05, + "loss": 1.0536, "step": 7777 }, { - "epoch": 0.2204086259174247, + "epoch": 0.30432741216057596, "grad_norm": 0.0, - "learning_rate": 1.815813063145823e-05, - "loss": 1.1722, + "learning_rate": 1.6306112813064188e-05, + "loss": 0.9669, "step": 7778 }, { - "epoch": 0.22043696335968715, + "epoch": 0.3043665388528054, "grad_norm": 0.0, - "learning_rate": 1.815759982516061e-05, - "loss": 0.9811, + "learning_rate": 1.6305129265833457e-05, + "loss": 1.1063, "step": 7779 }, { - "epoch": 0.22046530080194962, + "epoch": 0.30440566554503484, "grad_norm": 0.0, - "learning_rate": 1.81570689501485e-05, - "loss": 0.9652, + "learning_rate": 1.6304145617350255e-05, + "loss": 1.0542, "step": 7780 }, { - "epoch": 0.2204936382442121, + "epoch": 0.3044447922372643, "grad_norm": 0.0, - "learning_rate": 1.815653800642637e-05, - "loss": 0.8948, + "learning_rate": 1.6303161867630373e-05, + "loss": 1.2471, "step": 7781 }, { - "epoch": 0.22052197568647453, + "epoch": 0.3044839189294937, "grad_norm": 0.0, - "learning_rate": 1.815600699399869e-05, - "loss": 0.8941, + "learning_rate": 1.6302178016689606e-05, + "loss": 1.0585, "step": 7782 }, { - "epoch": 0.220550313128737, + "epoch": 0.30452304562172317, "grad_norm": 0.0, - "learning_rate": 1.8155475912869932e-05, - "loss": 1.0147, + "learning_rate": 1.630119406454376e-05, + "loss": 1.1231, "step": 7783 }, { - "epoch": 0.22057865057099946, + "epoch": 0.3045621723139526, "grad_norm": 0.0, - "learning_rate": 1.8154944763044574e-05, - "loss": 1.0023, + "learning_rate": 1.6300210011208635e-05, + "loss": 1.0083, "step": 7784 }, { - "epoch": 0.22060698801326192, + "epoch": 0.304601299006182, "grad_norm": 0.0, - "learning_rate": 1.815441354452709e-05, - "loss": 1.0205, + "learning_rate": 1.629922585670003e-05, + "loss": 1.0658, "step": 7785 }, { - "epoch": 0.2206353254555244, + "epoch": 0.30464042569841143, "grad_norm": 0.0, - "learning_rate": 1.815388225732195e-05, - "loss": 0.9402, + "learning_rate": 1.629824160103375e-05, + "loss": 1.1115, "step": 7786 }, { - "epoch": 0.22066366289778686, + "epoch": 0.3046795523906409, "grad_norm": 0.0, - "learning_rate": 1.8153350901433633e-05, - "loss": 0.9216, + "learning_rate": 1.6297257244225602e-05, + "loss": 1.0703, "step": 7787 }, { - "epoch": 0.2206920003400493, + "epoch": 0.3047186790828703, "grad_norm": 0.0, - "learning_rate": 1.8152819476866616e-05, - "loss": 1.0817, + "learning_rate": 1.6296272786291397e-05, + "loss": 1.1628, "step": 7788 }, { - "epoch": 0.22072033778231176, + "epoch": 0.30475780577509975, "grad_norm": 0.0, - "learning_rate": 1.815228798362537e-05, - "loss": 0.9279, + "learning_rate": 1.6295288227246936e-05, + "loss": 1.1053, "step": 7789 }, { - "epoch": 0.22074867522457423, + "epoch": 0.3047969324673292, "grad_norm": 0.0, - "learning_rate": 1.8151756421714375e-05, - "loss": 1.0171, + "learning_rate": 1.629430356710804e-05, + "loss": 1.1922, "step": 7790 }, { - "epoch": 0.2207770126668367, + "epoch": 0.30483605915955864, "grad_norm": 0.0, - "learning_rate": 1.8151224791138106e-05, - "loss": 1.1104, + "learning_rate": 1.629331880589051e-05, + "loss": 1.0554, "step": 7791 }, { - "epoch": 0.22080535010909916, + "epoch": 0.3048751858517881, "grad_norm": 0.0, - "learning_rate": 1.815069309190105e-05, - "loss": 0.9346, + "learning_rate": 1.629233394361017e-05, + "loss": 1.1678, "step": 7792 }, { - "epoch": 0.22083368755136162, + "epoch": 0.3049143125440175, "grad_norm": 0.0, - "learning_rate": 1.8150161324007674e-05, - "loss": 0.9297, + "learning_rate": 1.629134898028283e-05, + "loss": 1.2717, "step": 7793 }, { - "epoch": 0.22086202499362406, + "epoch": 0.30495343923624696, "grad_norm": 0.0, - "learning_rate": 1.8149629487462466e-05, - "loss": 1.0035, + "learning_rate": 1.6290363915924306e-05, + "loss": 1.0705, "step": 7794 }, { - "epoch": 0.22089036243588653, + "epoch": 0.3049925659284764, "grad_norm": 0.0, - "learning_rate": 1.81490975822699e-05, - "loss": 0.8739, + "learning_rate": 1.6289378750550423e-05, + "loss": 1.0562, "step": 7795 }, { - "epoch": 0.220918699878149, + "epoch": 0.30503169262070584, "grad_norm": 0.0, - "learning_rate": 1.814856560843446e-05, - "loss": 1.0869, + "learning_rate": 1.6288393484177e-05, + "loss": 1.176, "step": 7796 }, { - "epoch": 0.22094703732041146, + "epoch": 0.3050708193129353, "grad_norm": 0.0, - "learning_rate": 1.814803356596063e-05, - "loss": 0.9479, + "learning_rate": 1.6287408116819855e-05, + "loss": 1.088, "step": 7797 }, { - "epoch": 0.22097537476267393, + "epoch": 0.3051099460051647, "grad_norm": 0.0, - "learning_rate": 1.814750145485288e-05, - "loss": 0.9804, + "learning_rate": 1.6286422648494815e-05, + "loss": 1.1522, "step": 7798 }, { - "epoch": 0.2210037122049364, + "epoch": 0.30514907269739416, "grad_norm": 0.0, - "learning_rate": 1.8146969275115704e-05, - "loss": 1.0034, + "learning_rate": 1.6285437079217702e-05, + "loss": 1.1135, "step": 7799 }, { - "epoch": 0.22103204964719883, + "epoch": 0.3051881993896236, "grad_norm": 0.0, - "learning_rate": 1.8146437026753584e-05, - "loss": 0.9802, + "learning_rate": 1.6284451409004352e-05, + "loss": 1.1328, "step": 7800 }, { - "epoch": 0.2210603870894613, + "epoch": 0.30522732608185305, "grad_norm": 0.0, - "learning_rate": 1.8145904709770993e-05, - "loss": 1.0907, + "learning_rate": 1.628346563787058e-05, + "loss": 1.1382, "step": 7801 }, { - "epoch": 0.22108872453172376, + "epoch": 0.3052664527740825, "grad_norm": 0.0, - "learning_rate": 1.8145372324172425e-05, - "loss": 1.0526, + "learning_rate": 1.628247976583223e-05, + "loss": 1.1083, "step": 7802 }, { - "epoch": 0.22111706197398623, + "epoch": 0.3053055794663119, "grad_norm": 0.0, - "learning_rate": 1.814483986996236e-05, - "loss": 1.011, + "learning_rate": 1.6281493792905124e-05, + "loss": 1.0962, "step": 7803 }, { - "epoch": 0.2211453994162487, + "epoch": 0.30534470615854137, "grad_norm": 0.0, - "learning_rate": 1.8144307347145287e-05, - "loss": 0.9338, + "learning_rate": 1.6280507719105097e-05, + "loss": 1.2245, "step": 7804 }, { - "epoch": 0.22117373685851116, + "epoch": 0.3053838328507708, "grad_norm": 0.0, - "learning_rate": 1.8143774755725685e-05, - "loss": 1.0018, + "learning_rate": 1.627952154444799e-05, + "loss": 1.1017, "step": 7805 }, { - "epoch": 0.2212020743007736, + "epoch": 0.30542295954300025, "grad_norm": 0.0, - "learning_rate": 1.814324209570805e-05, - "loss": 0.96, + "learning_rate": 1.6278535268949633e-05, + "loss": 1.0956, "step": 7806 }, { - "epoch": 0.22123041174303607, + "epoch": 0.3054620862352297, "grad_norm": 0.0, - "learning_rate": 1.8142709367096855e-05, - "loss": 0.8734, + "learning_rate": 1.6277548892625867e-05, + "loss": 1.2379, "step": 7807 }, { - "epoch": 0.22125874918529853, + "epoch": 0.30550121292745913, "grad_norm": 0.0, - "learning_rate": 1.8142176569896603e-05, - "loss": 1.0502, + "learning_rate": 1.6276562415492533e-05, + "loss": 1.0479, "step": 7808 }, { - "epoch": 0.221287086627561, + "epoch": 0.3055403396196886, "grad_norm": 0.0, - "learning_rate": 1.814164370411177e-05, - "loss": 0.9925, + "learning_rate": 1.6275575837565472e-05, + "loss": 1.1883, "step": 7809 }, { - "epoch": 0.22131542406982346, + "epoch": 0.305579466311918, "grad_norm": 0.0, - "learning_rate": 1.8141110769746848e-05, - "loss": 1.0377, + "learning_rate": 1.6274589158860523e-05, + "loss": 0.9793, "step": 7810 }, { - "epoch": 0.22134376151208593, + "epoch": 0.30561859300414745, "grad_norm": 0.0, - "learning_rate": 1.8140577766806328e-05, - "loss": 0.9901, + "learning_rate": 1.627360237939354e-05, + "loss": 1.1248, "step": 7811 }, { - "epoch": 0.22137209895434837, + "epoch": 0.3056577196963769, "grad_norm": 0.0, - "learning_rate": 1.81400446952947e-05, - "loss": 0.9155, + "learning_rate": 1.627261549918036e-05, + "loss": 0.9392, "step": 7812 }, { - "epoch": 0.22140043639661083, + "epoch": 0.3056968463886063, "grad_norm": 0.0, - "learning_rate": 1.813951155521645e-05, - "loss": 1.0878, + "learning_rate": 1.6271628518236836e-05, + "loss": 0.991, "step": 7813 }, { - "epoch": 0.2214287738388733, + "epoch": 0.3057359730808357, "grad_norm": 0.0, - "learning_rate": 1.8138978346576073e-05, - "loss": 0.9297, + "learning_rate": 1.627064143657882e-05, + "loss": 1.097, "step": 7814 }, { - "epoch": 0.22145711128113577, + "epoch": 0.30577509977306516, "grad_norm": 0.0, - "learning_rate": 1.8138445069378062e-05, - "loss": 1.0733, + "learning_rate": 1.6269654254222155e-05, + "loss": 1.1293, "step": 7815 }, { - "epoch": 0.22148544872339823, + "epoch": 0.3058142264652946, "grad_norm": 0.0, - "learning_rate": 1.8137911723626903e-05, - "loss": 1.0031, + "learning_rate": 1.62686669711827e-05, + "loss": 1.0819, "step": 7816 }, { - "epoch": 0.2215137861656607, + "epoch": 0.30585335315752404, "grad_norm": 0.0, - "learning_rate": 1.813737830932709e-05, - "loss": 0.9691, + "learning_rate": 1.6267679587476312e-05, + "loss": 1.132, "step": 7817 }, { - "epoch": 0.22154212360792314, + "epoch": 0.3058924798497535, "grad_norm": 0.0, - "learning_rate": 1.813684482648312e-05, - "loss": 0.8888, + "learning_rate": 1.626669210311884e-05, + "loss": 1.028, "step": 7818 }, { - "epoch": 0.2215704610501856, + "epoch": 0.3059316065419829, "grad_norm": 0.0, - "learning_rate": 1.8136311275099484e-05, - "loss": 1.0089, + "learning_rate": 1.626570451812615e-05, + "loss": 1.0923, "step": 7819 }, { - "epoch": 0.22159879849244807, + "epoch": 0.30597073323421237, "grad_norm": 0.0, - "learning_rate": 1.8135777655180676e-05, - "loss": 1.0382, + "learning_rate": 1.6264716832514095e-05, + "loss": 1.1835, "step": 7820 }, { - "epoch": 0.22162713593471053, + "epoch": 0.3060098599264418, "grad_norm": 0.0, - "learning_rate": 1.8135243966731194e-05, - "loss": 0.9741, + "learning_rate": 1.6263729046298534e-05, + "loss": 1.2462, "step": 7821 }, { - "epoch": 0.221655473376973, + "epoch": 0.30604898661867125, "grad_norm": 0.0, - "learning_rate": 1.8134710209755527e-05, - "loss": 1.0322, + "learning_rate": 1.6262741159495336e-05, + "loss": 1.143, "step": 7822 }, { - "epoch": 0.22168381081923547, + "epoch": 0.3060881133109007, "grad_norm": 0.0, - "learning_rate": 1.813417638425818e-05, - "loss": 0.9821, + "learning_rate": 1.6261753172120363e-05, + "loss": 1.1288, "step": 7823 }, { - "epoch": 0.2217121482614979, + "epoch": 0.30612724000313013, "grad_norm": 0.0, - "learning_rate": 1.8133642490243642e-05, - "loss": 0.9704, + "learning_rate": 1.626076508418948e-05, + "loss": 1.1775, "step": 7824 }, { - "epoch": 0.22174048570376037, + "epoch": 0.30616636669535957, "grad_norm": 0.0, - "learning_rate": 1.8133108527716413e-05, - "loss": 0.9926, + "learning_rate": 1.6259776895718555e-05, + "loss": 1.2592, "step": 7825 }, { - "epoch": 0.22176882314602284, + "epoch": 0.306205493387589, "grad_norm": 0.0, - "learning_rate": 1.8132574496680996e-05, - "loss": 0.9763, + "learning_rate": 1.6258788606723457e-05, + "loss": 1.0433, "step": 7826 }, { - "epoch": 0.2217971605882853, + "epoch": 0.30624462007981845, "grad_norm": 0.0, - "learning_rate": 1.8132040397141878e-05, - "loss": 0.9066, + "learning_rate": 1.625780021722006e-05, + "loss": 1.1497, "step": 7827 }, { - "epoch": 0.22182549803054777, + "epoch": 0.3062837467720479, "grad_norm": 0.0, - "learning_rate": 1.8131506229103565e-05, - "loss": 0.9997, + "learning_rate": 1.625681172722423e-05, + "loss": 1.2572, "step": 7828 }, { - "epoch": 0.22185383547281023, + "epoch": 0.30632287346427733, "grad_norm": 0.0, - "learning_rate": 1.8130971992570555e-05, - "loss": 1.0402, + "learning_rate": 1.625582313675184e-05, + "loss": 1.1707, "step": 7829 }, { - "epoch": 0.22188217291507267, + "epoch": 0.3063620001565068, "grad_norm": 0.0, - "learning_rate": 1.813043768754735e-05, - "loss": 0.9902, + "learning_rate": 1.6254834445818775e-05, + "loss": 1.2506, "step": 7830 }, { - "epoch": 0.22191051035733514, + "epoch": 0.3064011268487362, "grad_norm": 0.0, - "learning_rate": 1.8129903314038447e-05, - "loss": 1.0331, + "learning_rate": 1.6253845654440904e-05, + "loss": 1.1566, "step": 7831 }, { - "epoch": 0.2219388477995976, + "epoch": 0.30644025354096566, "grad_norm": 0.0, - "learning_rate": 1.8129368872048353e-05, - "loss": 0.9829, + "learning_rate": 1.625285676263411e-05, + "loss": 1.1237, "step": 7832 }, { - "epoch": 0.22196718524186007, + "epoch": 0.3064793802331951, "grad_norm": 0.0, - "learning_rate": 1.812883436158156e-05, - "loss": 0.9711, + "learning_rate": 1.625186777041427e-05, + "loss": 1.1915, "step": 7833 }, { - "epoch": 0.22199552268412254, + "epoch": 0.30651850692542454, "grad_norm": 0.0, - "learning_rate": 1.8128299782642585e-05, - "loss": 0.985, + "learning_rate": 1.625087867779727e-05, + "loss": 1.0842, "step": 7834 }, { - "epoch": 0.222023860126385, + "epoch": 0.306557633617654, "grad_norm": 0.0, - "learning_rate": 1.812776513523592e-05, - "loss": 0.9409, + "learning_rate": 1.6249889484798987e-05, + "loss": 1.1751, "step": 7835 }, { - "epoch": 0.22205219756864744, + "epoch": 0.3065967603098834, "grad_norm": 0.0, - "learning_rate": 1.812723041936607e-05, - "loss": 0.9867, + "learning_rate": 1.6248900191435314e-05, + "loss": 1.1187, "step": 7836 }, { - "epoch": 0.2220805350109099, + "epoch": 0.30663588700211286, "grad_norm": 0.0, - "learning_rate": 1.8126695635037538e-05, - "loss": 1.002, + "learning_rate": 1.624791079772213e-05, + "loss": 1.1718, "step": 7837 }, { - "epoch": 0.22210887245317237, + "epoch": 0.3066750136943423, "grad_norm": 0.0, - "learning_rate": 1.8126160782254832e-05, - "loss": 1.0188, + "learning_rate": 1.6246921303675334e-05, + "loss": 1.0671, "step": 7838 }, { - "epoch": 0.22213720989543484, + "epoch": 0.30671414038657174, "grad_norm": 0.0, - "learning_rate": 1.8125625861022455e-05, - "loss": 0.966, + "learning_rate": 1.6245931709310806e-05, + "loss": 1.2157, "step": 7839 }, { - "epoch": 0.2221655473376973, + "epoch": 0.3067532670788012, "grad_norm": 0.0, - "learning_rate": 1.8125090871344917e-05, - "loss": 0.9766, + "learning_rate": 1.6244942014644443e-05, + "loss": 1.2037, "step": 7840 }, { - "epoch": 0.22219388477995977, + "epoch": 0.3067923937710306, "grad_norm": 0.0, - "learning_rate": 1.812455581322672e-05, - "loss": 0.9676, + "learning_rate": 1.6243952219692135e-05, + "loss": 1.0305, "step": 7841 }, { - "epoch": 0.2222222222222222, + "epoch": 0.30683152046326, "grad_norm": 0.0, - "learning_rate": 1.812402068667237e-05, - "loss": 0.9795, + "learning_rate": 1.6242962324469777e-05, + "loss": 1.0545, "step": 7842 }, { - "epoch": 0.22225055966448468, + "epoch": 0.30687064715548945, "grad_norm": 0.0, - "learning_rate": 1.8123485491686382e-05, - "loss": 1.0911, + "learning_rate": 1.624197232899327e-05, + "loss": 1.2817, "step": 7843 }, { - "epoch": 0.22227889710674714, + "epoch": 0.3069097738477189, "grad_norm": 0.0, - "learning_rate": 1.8122950228273257e-05, - "loss": 0.9581, + "learning_rate": 1.6240982233278505e-05, + "loss": 1.0506, "step": 7844 }, { - "epoch": 0.2223072345490096, + "epoch": 0.30694890053994833, "grad_norm": 0.0, - "learning_rate": 1.8122414896437502e-05, - "loss": 0.8579, + "learning_rate": 1.623999203734139e-05, + "loss": 1.1725, "step": 7845 }, { - "epoch": 0.22233557199127207, + "epoch": 0.3069880272321778, "grad_norm": 0.0, - "learning_rate": 1.8121879496183636e-05, - "loss": 1.0794, + "learning_rate": 1.623900174119782e-05, + "loss": 1.1441, "step": 7846 }, { - "epoch": 0.22236390943353454, + "epoch": 0.3070271539244072, "grad_norm": 0.0, - "learning_rate": 1.8121344027516163e-05, - "loss": 1.0783, + "learning_rate": 1.62380113448637e-05, + "loss": 1.1382, "step": 7847 }, { - "epoch": 0.22239224687579698, + "epoch": 0.30706628061663666, "grad_norm": 0.0, - "learning_rate": 1.8120808490439588e-05, - "loss": 0.8834, + "learning_rate": 1.6237020848354937e-05, + "loss": 1.131, "step": 7848 }, { - "epoch": 0.22242058431805944, + "epoch": 0.3071054073088661, "grad_norm": 0.0, - "learning_rate": 1.812027288495843e-05, - "loss": 0.9376, + "learning_rate": 1.6236030251687435e-05, + "loss": 1.0975, "step": 7849 }, { - "epoch": 0.2224489217603219, + "epoch": 0.30714453400109554, "grad_norm": 0.0, - "learning_rate": 1.81197372110772e-05, - "loss": 1.0469, + "learning_rate": 1.6235039554877097e-05, + "loss": 1.1565, "step": 7850 }, { - "epoch": 0.22247725920258438, + "epoch": 0.307183660693325, "grad_norm": 0.0, - "learning_rate": 1.8119201468800407e-05, - "loss": 1.0635, + "learning_rate": 1.623404875793984e-05, + "loss": 1.1304, "step": 7851 }, { - "epoch": 0.22250559664484684, + "epoch": 0.3072227873855544, "grad_norm": 0.0, - "learning_rate": 1.8118665658132566e-05, - "loss": 0.9716, + "learning_rate": 1.6233057860891566e-05, + "loss": 1.1595, "step": 7852 }, { - "epoch": 0.2225339340871093, + "epoch": 0.30726191407778386, "grad_norm": 0.0, - "learning_rate": 1.8118129779078185e-05, - "loss": 1.0783, + "learning_rate": 1.62320668637482e-05, + "loss": 1.1707, "step": 7853 }, { - "epoch": 0.22256227152937175, + "epoch": 0.3073010407700133, "grad_norm": 0.0, - "learning_rate": 1.8117593831641788e-05, - "loss": 1.0859, + "learning_rate": 1.6231075766525647e-05, + "loss": 1.1141, "step": 7854 }, { - "epoch": 0.2225906089716342, + "epoch": 0.30734016746224274, "grad_norm": 0.0, - "learning_rate": 1.8117057815827883e-05, - "loss": 0.9622, + "learning_rate": 1.6230084569239824e-05, + "loss": 1.0975, "step": 7855 }, { - "epoch": 0.22261894641389668, + "epoch": 0.3073792941544722, "grad_norm": 0.0, - "learning_rate": 1.8116521731640984e-05, - "loss": 1.1288, + "learning_rate": 1.6229093271906654e-05, + "loss": 1.1218, "step": 7856 }, { - "epoch": 0.22264728385615914, + "epoch": 0.3074184208467016, "grad_norm": 0.0, - "learning_rate": 1.8115985579085607e-05, - "loss": 1.0037, + "learning_rate": 1.6228101874542047e-05, + "loss": 1.1877, "step": 7857 }, { - "epoch": 0.2226756212984216, + "epoch": 0.30745754753893106, "grad_norm": 0.0, - "learning_rate": 1.811544935816627e-05, - "loss": 1.0247, + "learning_rate": 1.6227110377161928e-05, + "loss": 1.1807, "step": 7858 }, { - "epoch": 0.22270395874068408, + "epoch": 0.3074966742311605, "grad_norm": 0.0, - "learning_rate": 1.8114913068887493e-05, - "loss": 0.9782, + "learning_rate": 1.6226118779782224e-05, + "loss": 1.1301, "step": 7859 }, { - "epoch": 0.22273229618294652, + "epoch": 0.30753580092338995, "grad_norm": 0.0, - "learning_rate": 1.8114376711253788e-05, - "loss": 1.0657, + "learning_rate": 1.622512708241885e-05, + "loss": 1.0153, "step": 7860 }, { - "epoch": 0.22276063362520898, + "epoch": 0.3075749276156194, "grad_norm": 0.0, - "learning_rate": 1.8113840285269674e-05, - "loss": 1.0312, + "learning_rate": 1.6224135285087734e-05, + "loss": 1.0949, "step": 7861 }, { - "epoch": 0.22278897106747145, + "epoch": 0.3076140543078488, "grad_norm": 0.0, - "learning_rate": 1.811330379093967e-05, - "loss": 0.9066, + "learning_rate": 1.6223143387804804e-05, + "loss": 1.1287, "step": 7862 }, { - "epoch": 0.2228173085097339, + "epoch": 0.30765318100007827, "grad_norm": 0.0, - "learning_rate": 1.8112767228268295e-05, - "loss": 1.0319, + "learning_rate": 1.6222151390585996e-05, + "loss": 1.2133, "step": 7863 }, { - "epoch": 0.22284564595199638, + "epoch": 0.3076923076923077, "grad_norm": 0.0, - "learning_rate": 1.8112230597260073e-05, - "loss": 1.0026, + "learning_rate": 1.6221159293447227e-05, + "loss": 1.0915, "step": 7864 }, { - "epoch": 0.22287398339425885, + "epoch": 0.30773143438453715, "grad_norm": 0.0, - "learning_rate": 1.8111693897919518e-05, - "loss": 0.9291, + "learning_rate": 1.622016709640444e-05, + "loss": 1.149, "step": 7865 }, { - "epoch": 0.22290232083652128, + "epoch": 0.3077705610767666, "grad_norm": 0.0, - "learning_rate": 1.8111157130251153e-05, - "loss": 0.9934, + "learning_rate": 1.621917479947356e-05, + "loss": 1.1415, "step": 7866 }, { - "epoch": 0.22293065827878375, + "epoch": 0.30780968776899603, "grad_norm": 0.0, - "learning_rate": 1.8110620294259497e-05, - "loss": 1.0191, + "learning_rate": 1.621818240267053e-05, + "loss": 1.2538, "step": 7867 }, { - "epoch": 0.22295899572104622, + "epoch": 0.3078488144612255, "grad_norm": 0.0, - "learning_rate": 1.8110083389949074e-05, - "loss": 1.1242, + "learning_rate": 1.621718990601128e-05, + "loss": 1.0739, "step": 7868 }, { - "epoch": 0.22298733316330868, + "epoch": 0.3078879411534549, "grad_norm": 0.0, - "learning_rate": 1.810954641732441e-05, - "loss": 1.1241, + "learning_rate": 1.621619730951175e-05, + "loss": 1.0395, "step": 7869 }, { - "epoch": 0.22301567060557115, + "epoch": 0.3079270678456843, "grad_norm": 0.0, - "learning_rate": 1.8109009376390024e-05, - "loss": 1.0078, + "learning_rate": 1.621520461318788e-05, + "loss": 1.2267, "step": 7870 }, { - "epoch": 0.22304400804783361, + "epoch": 0.30796619453791374, "grad_norm": 0.0, - "learning_rate": 1.8108472267150442e-05, - "loss": 1.0193, + "learning_rate": 1.6214211817055612e-05, + "loss": 1.0431, "step": 7871 }, { - "epoch": 0.22307234549009605, + "epoch": 0.3080053212301432, "grad_norm": 0.0, - "learning_rate": 1.8107935089610186e-05, - "loss": 0.9232, + "learning_rate": 1.621321892113089e-05, + "loss": 1.1198, "step": 7872 }, { - "epoch": 0.22310068293235852, + "epoch": 0.3080444479223726, "grad_norm": 0.0, - "learning_rate": 1.8107397843773785e-05, - "loss": 0.9071, + "learning_rate": 1.621222592542966e-05, + "loss": 1.0812, "step": 7873 }, { - "epoch": 0.22312902037462098, + "epoch": 0.30808357461460206, "grad_norm": 0.0, - "learning_rate": 1.8106860529645756e-05, - "loss": 1.0731, + "learning_rate": 1.6211232829967865e-05, + "loss": 1.1912, "step": 7874 }, { - "epoch": 0.22315735781688345, + "epoch": 0.3081227013068315, "grad_norm": 0.0, - "learning_rate": 1.8106323147230636e-05, - "loss": 0.9579, + "learning_rate": 1.6210239634761452e-05, + "loss": 1.1179, "step": 7875 }, { - "epoch": 0.22318569525914592, + "epoch": 0.30816182799906094, "grad_norm": 0.0, - "learning_rate": 1.8105785696532944e-05, - "loss": 0.9701, + "learning_rate": 1.6209246339826372e-05, + "loss": 1.1593, "step": 7876 }, { - "epoch": 0.22321403270140838, + "epoch": 0.3082009546912904, "grad_norm": 0.0, - "learning_rate": 1.8105248177557207e-05, - "loss": 0.9977, + "learning_rate": 1.6208252945178578e-05, + "loss": 1.1981, "step": 7877 }, { - "epoch": 0.22324237014367082, + "epoch": 0.3082400813835198, "grad_norm": 0.0, - "learning_rate": 1.8104710590307954e-05, - "loss": 0.817, + "learning_rate": 1.6207259450834022e-05, + "loss": 1.235, "step": 7878 }, { - "epoch": 0.2232707075859333, + "epoch": 0.30827920807574927, "grad_norm": 0.0, - "learning_rate": 1.8104172934789716e-05, - "loss": 1.079, + "learning_rate": 1.6206265856808655e-05, + "loss": 1.2128, "step": 7879 }, { - "epoch": 0.22329904502819575, + "epoch": 0.3083183347679787, "grad_norm": 0.0, - "learning_rate": 1.810363521100702e-05, - "loss": 1.0858, + "learning_rate": 1.6205272163118436e-05, + "loss": 1.1635, "step": 7880 }, { - "epoch": 0.22332738247045822, + "epoch": 0.30835746146020815, "grad_norm": 0.0, - "learning_rate": 1.8103097418964398e-05, - "loss": 0.9889, + "learning_rate": 1.6204278369779324e-05, + "loss": 1.1439, "step": 7881 }, { - "epoch": 0.22335571991272068, + "epoch": 0.3083965881524376, "grad_norm": 0.0, - "learning_rate": 1.8102559558666374e-05, - "loss": 0.8813, + "learning_rate": 1.620328447680727e-05, + "loss": 1.1791, "step": 7882 }, { - "epoch": 0.22338405735498315, + "epoch": 0.30843571484466703, "grad_norm": 0.0, - "learning_rate": 1.810202163011748e-05, - "loss": 1.101, + "learning_rate": 1.6202290484218244e-05, + "loss": 1.0632, "step": 7883 }, { - "epoch": 0.2234123947972456, + "epoch": 0.30847484153689647, "grad_norm": 0.0, - "learning_rate": 1.8101483633322255e-05, - "loss": 0.9596, + "learning_rate": 1.6201296392028206e-05, + "loss": 1.1864, "step": 7884 }, { - "epoch": 0.22344073223950806, + "epoch": 0.3085139682291259, "grad_norm": 0.0, - "learning_rate": 1.810094556828522e-05, - "loss": 1.0122, + "learning_rate": 1.6200302200253117e-05, + "loss": 1.1223, "step": 7885 }, { - "epoch": 0.22346906968177052, + "epoch": 0.30855309492135535, "grad_norm": 0.0, - "learning_rate": 1.8100407435010914e-05, - "loss": 1.0648, + "learning_rate": 1.6199307908908943e-05, + "loss": 1.1348, "step": 7886 }, { - "epoch": 0.223497407124033, + "epoch": 0.3085922216135848, "grad_norm": 0.0, - "learning_rate": 1.8099869233503868e-05, - "loss": 1.0441, + "learning_rate": 1.6198313518011655e-05, + "loss": 1.1889, "step": 7887 }, { - "epoch": 0.22352574456629545, + "epoch": 0.30863134830581423, "grad_norm": 0.0, - "learning_rate": 1.809933096376862e-05, - "loss": 0.8931, + "learning_rate": 1.6197319027577218e-05, + "loss": 1.1006, "step": 7888 }, { - "epoch": 0.22355408200855792, + "epoch": 0.3086704749980437, "grad_norm": 0.0, - "learning_rate": 1.809879262580969e-05, - "loss": 0.9915, + "learning_rate": 1.6196324437621603e-05, + "loss": 1.102, "step": 7889 }, { - "epoch": 0.22358241945082036, + "epoch": 0.3087096016902731, "grad_norm": 0.0, - "learning_rate": 1.809825421963163e-05, - "loss": 1.0616, + "learning_rate": 1.6195329748160783e-05, + "loss": 1.0438, "step": 7890 }, { - "epoch": 0.22361075689308282, + "epoch": 0.30874872838250256, "grad_norm": 0.0, - "learning_rate": 1.8097715745238966e-05, - "loss": 1.0583, + "learning_rate": 1.6194334959210726e-05, + "loss": 1.0844, "step": 7891 }, { - "epoch": 0.2236390943353453, + "epoch": 0.308787855074732, "grad_norm": 0.0, - "learning_rate": 1.8097177202636235e-05, - "loss": 0.9473, + "learning_rate": 1.6193340070787417e-05, + "loss": 1.049, "step": 7892 }, { - "epoch": 0.22366743177760776, + "epoch": 0.30882698176696144, "grad_norm": 0.0, - "learning_rate": 1.8096638591827974e-05, - "loss": 1.0085, + "learning_rate": 1.6192345082906823e-05, + "loss": 1.0962, "step": 7893 }, { - "epoch": 0.22369576921987022, + "epoch": 0.3088661084591909, "grad_norm": 0.0, - "learning_rate": 1.8096099912818718e-05, - "loss": 1.0391, + "learning_rate": 1.6191349995584928e-05, + "loss": 1.0955, "step": 7894 }, { - "epoch": 0.2237241066621327, + "epoch": 0.3089052351514203, "grad_norm": 0.0, - "learning_rate": 1.8095561165613007e-05, - "loss": 1.0395, + "learning_rate": 1.619035480883771e-05, + "loss": 1.2358, "step": 7895 }, { - "epoch": 0.22375244410439513, + "epoch": 0.30894436184364976, "grad_norm": 0.0, - "learning_rate": 1.8095022350215376e-05, - "loss": 1.0175, + "learning_rate": 1.618935952268115e-05, + "loss": 1.1801, "step": 7896 }, { - "epoch": 0.2237807815466576, + "epoch": 0.3089834885358792, "grad_norm": 0.0, - "learning_rate": 1.8094483466630367e-05, - "loss": 1.1368, + "learning_rate": 1.6188364137131233e-05, + "loss": 1.1002, "step": 7897 }, { - "epoch": 0.22380911898892006, + "epoch": 0.3090226152281086, "grad_norm": 0.0, - "learning_rate": 1.8093944514862523e-05, - "loss": 1.0223, + "learning_rate": 1.6187368652203944e-05, + "loss": 1.1535, "step": 7898 }, { - "epoch": 0.22383745643118252, + "epoch": 0.30906174192033803, "grad_norm": 0.0, - "learning_rate": 1.8093405494916373e-05, - "loss": 0.8958, + "learning_rate": 1.6186373067915265e-05, + "loss": 1.0721, "step": 7899 }, { - "epoch": 0.223865793873445, + "epoch": 0.30910086861256747, "grad_norm": 0.0, - "learning_rate": 1.8092866406796465e-05, - "loss": 0.9988, + "learning_rate": 1.6185377384281185e-05, + "loss": 1.0481, "step": 7900 }, { - "epoch": 0.22389413131570746, + "epoch": 0.3091399953047969, "grad_norm": 0.0, - "learning_rate": 1.8092327250507335e-05, - "loss": 0.9302, + "learning_rate": 1.61843816013177e-05, + "loss": 1.3419, "step": 7901 }, { - "epoch": 0.2239224687579699, + "epoch": 0.30917912199702635, "grad_norm": 0.0, - "learning_rate": 1.8091788026053533e-05, - "loss": 0.9471, + "learning_rate": 1.618338571904079e-05, + "loss": 1.0812, "step": 7902 }, { - "epoch": 0.22395080620023236, + "epoch": 0.3092182486892558, "grad_norm": 0.0, - "learning_rate": 1.8091248733439593e-05, - "loss": 1.11, + "learning_rate": 1.618238973746646e-05, + "loss": 1.0181, "step": 7903 }, { - "epoch": 0.22397914364249483, + "epoch": 0.30925737538148523, "grad_norm": 0.0, - "learning_rate": 1.809070937267006e-05, - "loss": 1.0482, + "learning_rate": 1.6181393656610693e-05, + "loss": 1.1688, "step": 7904 }, { - "epoch": 0.2240074810847573, + "epoch": 0.3092965020737147, "grad_norm": 0.0, - "learning_rate": 1.8090169943749477e-05, - "loss": 1.0126, + "learning_rate": 1.618039747648949e-05, + "loss": 1.1564, "step": 7905 }, { - "epoch": 0.22403581852701976, + "epoch": 0.3093356287659441, "grad_norm": 0.0, - "learning_rate": 1.808963044668239e-05, - "loss": 0.9963, + "learning_rate": 1.617940119711885e-05, + "loss": 1.0064, "step": 7906 }, { - "epoch": 0.22406415596928222, + "epoch": 0.30937475545817356, "grad_norm": 0.0, - "learning_rate": 1.808909088147334e-05, - "loss": 1.0554, + "learning_rate": 1.617840481851477e-05, + "loss": 1.0458, "step": 7907 }, { - "epoch": 0.22409249341154466, + "epoch": 0.309413882150403, "grad_norm": 0.0, - "learning_rate": 1.8088551248126875e-05, - "loss": 1.1307, + "learning_rate": 1.617740834069325e-05, + "loss": 1.1231, "step": 7908 }, { - "epoch": 0.22412083085380713, + "epoch": 0.30945300884263244, "grad_norm": 0.0, - "learning_rate": 1.8088011546647536e-05, - "loss": 0.9269, + "learning_rate": 1.6176411763670292e-05, + "loss": 1.0005, "step": 7909 }, { - "epoch": 0.2241491682960696, + "epoch": 0.3094921355348619, "grad_norm": 0.0, - "learning_rate": 1.8087471777039877e-05, - "loss": 0.9984, + "learning_rate": 1.6175415087461904e-05, + "loss": 1.091, "step": 7910 }, { - "epoch": 0.22417750573833206, + "epoch": 0.3095312622270913, "grad_norm": 0.0, - "learning_rate": 1.8086931939308438e-05, - "loss": 1.0963, + "learning_rate": 1.617441831208408e-05, + "loss": 1.011, "step": 7911 }, { - "epoch": 0.22420584318059453, + "epoch": 0.30957038891932076, "grad_norm": 0.0, - "learning_rate": 1.8086392033457766e-05, - "loss": 0.9825, + "learning_rate": 1.617342143755284e-05, + "loss": 1.1384, "step": 7912 }, { - "epoch": 0.224234180622857, + "epoch": 0.3096095156115502, "grad_norm": 0.0, - "learning_rate": 1.8085852059492414e-05, - "loss": 0.9056, + "learning_rate": 1.6172424463884187e-05, + "loss": 1.2124, "step": 7913 }, { - "epoch": 0.22426251806511943, + "epoch": 0.30964864230377964, "grad_norm": 0.0, - "learning_rate": 1.8085312017416926e-05, - "loss": 1.0823, + "learning_rate": 1.6171427391094136e-05, + "loss": 0.9992, "step": 7914 }, { - "epoch": 0.2242908555073819, + "epoch": 0.3096877689960091, "grad_norm": 0.0, - "learning_rate": 1.8084771907235855e-05, - "loss": 1.0631, + "learning_rate": 1.617043021919869e-05, + "loss": 1.1131, "step": 7915 }, { - "epoch": 0.22431919294964436, + "epoch": 0.3097268956882385, "grad_norm": 0.0, - "learning_rate": 1.8084231728953746e-05, - "loss": 1.0382, + "learning_rate": 1.6169432948213864e-05, + "loss": 1.1586, "step": 7916 }, { - "epoch": 0.22434753039190683, + "epoch": 0.30976602238046796, "grad_norm": 0.0, - "learning_rate": 1.808369148257515e-05, - "loss": 0.8884, + "learning_rate": 1.616843557815568e-05, + "loss": 1.087, "step": 7917 }, { - "epoch": 0.2243758678341693, + "epoch": 0.3098051490726974, "grad_norm": 0.0, - "learning_rate": 1.808315116810462e-05, - "loss": 0.9461, + "learning_rate": 1.6167438109040148e-05, + "loss": 1.2209, "step": 7918 }, { - "epoch": 0.22440420527643176, + "epoch": 0.30984427576492685, "grad_norm": 0.0, - "learning_rate": 1.808261078554671e-05, - "loss": 0.938, + "learning_rate": 1.616644054088329e-05, + "loss": 1.2152, "step": 7919 }, { - "epoch": 0.2244325427186942, + "epoch": 0.3098834024571563, "grad_norm": 0.0, - "learning_rate": 1.808207033490596e-05, - "loss": 0.9548, + "learning_rate": 1.616544287370112e-05, + "loss": 1.0604, "step": 7920 }, { - "epoch": 0.22446088016095667, + "epoch": 0.30992252914938573, "grad_norm": 0.0, - "learning_rate": 1.8081529816186937e-05, - "loss": 0.8256, + "learning_rate": 1.616444510750967e-05, + "loss": 1.2168, "step": 7921 }, { - "epoch": 0.22448921760321913, + "epoch": 0.30996165584161517, "grad_norm": 0.0, - "learning_rate": 1.8080989229394183e-05, - "loss": 0.9397, + "learning_rate": 1.616344724232495e-05, + "loss": 1.1343, "step": 7922 }, { - "epoch": 0.2245175550454816, + "epoch": 0.3100007825338446, "grad_norm": 0.0, - "learning_rate": 1.808044857453226e-05, - "loss": 1.0164, + "learning_rate": 1.616244927816299e-05, + "loss": 0.9808, "step": 7923 }, { - "epoch": 0.22454589248774406, + "epoch": 0.31003990922607405, "grad_norm": 0.0, - "learning_rate": 1.8079907851605714e-05, - "loss": 1.0613, + "learning_rate": 1.616145121503982e-05, + "loss": 1.0319, "step": 7924 }, { - "epoch": 0.22457422993000653, + "epoch": 0.3100790359183035, "grad_norm": 0.0, - "learning_rate": 1.8079367060619107e-05, - "loss": 0.8944, + "learning_rate": 1.6160453052971466e-05, + "loss": 1.0648, "step": 7925 }, { - "epoch": 0.22460256737226897, + "epoch": 0.31011816261053293, "grad_norm": 0.0, - "learning_rate": 1.807882620157699e-05, - "loss": 1.0133, + "learning_rate": 1.6159454791973953e-05, + "loss": 1.0951, "step": 7926 }, { - "epoch": 0.22463090481453143, + "epoch": 0.3101572893027623, "grad_norm": 0.0, - "learning_rate": 1.807828527448392e-05, - "loss": 0.9575, + "learning_rate": 1.615845643206331e-05, + "loss": 1.0272, "step": 7927 }, { - "epoch": 0.2246592422567939, + "epoch": 0.31019641599499176, "grad_norm": 0.0, - "learning_rate": 1.807774427934445e-05, - "loss": 0.9924, + "learning_rate": 1.615745797325558e-05, + "loss": 1.2076, "step": 7928 }, { - "epoch": 0.22468757969905637, + "epoch": 0.3102355426872212, "grad_norm": 0.0, - "learning_rate": 1.8077203216163145e-05, - "loss": 0.9742, + "learning_rate": 1.6156459415566786e-05, + "loss": 0.9814, "step": 7929 }, { - "epoch": 0.22471591714131883, + "epoch": 0.31027466937945064, "grad_norm": 0.0, - "learning_rate": 1.807666208494456e-05, - "loss": 1.0828, + "learning_rate": 1.615546075901297e-05, + "loss": 1.1421, "step": 7930 }, { - "epoch": 0.2247442545835813, + "epoch": 0.3103137960716801, "grad_norm": 0.0, - "learning_rate": 1.8076120885693245e-05, - "loss": 1.0018, + "learning_rate": 1.6154462003610168e-05, + "loss": 1.0519, "step": 7931 }, { - "epoch": 0.22477259202584374, + "epoch": 0.3103529227639095, "grad_norm": 0.0, - "learning_rate": 1.8075579618413767e-05, - "loss": 1.07, + "learning_rate": 1.615346314937442e-05, + "loss": 1.1417, "step": 7932 }, { - "epoch": 0.2248009294681062, + "epoch": 0.31039204945613896, "grad_norm": 0.0, - "learning_rate": 1.8075038283110682e-05, - "loss": 0.8322, + "learning_rate": 1.6152464196321762e-05, + "loss": 1.0498, "step": 7933 }, { - "epoch": 0.22482926691036867, + "epoch": 0.3104311761483684, "grad_norm": 0.0, - "learning_rate": 1.8074496879788555e-05, - "loss": 0.9298, + "learning_rate": 1.6151465144468242e-05, + "loss": 1.1204, "step": 7934 }, { - "epoch": 0.22485760435263114, + "epoch": 0.31047030284059784, "grad_norm": 0.0, - "learning_rate": 1.807395540845194e-05, - "loss": 1.0814, + "learning_rate": 1.6150465993829894e-05, + "loss": 1.0482, "step": 7935 }, { - "epoch": 0.2248859417948936, + "epoch": 0.3105094295328273, "grad_norm": 0.0, - "learning_rate": 1.8073413869105397e-05, - "loss": 0.9903, + "learning_rate": 1.6149466744422772e-05, + "loss": 1.159, "step": 7936 }, { - "epoch": 0.22491427923715607, + "epoch": 0.3105485562250567, "grad_norm": 0.0, - "learning_rate": 1.8072872261753494e-05, - "loss": 0.9873, + "learning_rate": 1.614846739626292e-05, + "loss": 1.136, "step": 7937 }, { - "epoch": 0.2249426166794185, + "epoch": 0.31058768291728617, "grad_norm": 0.0, - "learning_rate": 1.8072330586400793e-05, - "loss": 1.0485, + "learning_rate": 1.6147467949366386e-05, + "loss": 0.9992, "step": 7938 }, { - "epoch": 0.22497095412168097, + "epoch": 0.3106268096095156, "grad_norm": 0.0, - "learning_rate": 1.807178884305185e-05, - "loss": 1.0106, + "learning_rate": 1.6146468403749223e-05, + "loss": 1.2538, "step": 7939 }, { - "epoch": 0.22499929156394344, + "epoch": 0.31066593630174505, "grad_norm": 0.0, - "learning_rate": 1.8071247031711232e-05, - "loss": 0.9247, + "learning_rate": 1.6145468759427476e-05, + "loss": 1.0443, "step": 7940 }, { - "epoch": 0.2250276290062059, + "epoch": 0.3107050629939745, "grad_norm": 0.0, - "learning_rate": 1.8070705152383504e-05, - "loss": 1.0614, + "learning_rate": 1.6144469016417202e-05, + "loss": 1.0572, "step": 7941 }, { - "epoch": 0.22505596644846837, + "epoch": 0.31074418968620393, "grad_norm": 0.0, - "learning_rate": 1.807016320507323e-05, - "loss": 1.0125, + "learning_rate": 1.6143469174734455e-05, + "loss": 1.0659, "step": 7942 }, { - "epoch": 0.22508430389073084, + "epoch": 0.31078331637843337, "grad_norm": 0.0, - "learning_rate": 1.8069621189784974e-05, - "loss": 1.0357, + "learning_rate": 1.614246923439529e-05, + "loss": 1.156, "step": 7943 }, { - "epoch": 0.22511264133299327, + "epoch": 0.3108224430706628, "grad_norm": 0.0, - "learning_rate": 1.8069079106523303e-05, - "loss": 0.9887, + "learning_rate": 1.6141469195415766e-05, + "loss": 1.1415, "step": 7944 }, { - "epoch": 0.22514097877525574, + "epoch": 0.31086156976289225, "grad_norm": 0.0, - "learning_rate": 1.806853695529278e-05, - "loss": 0.9376, + "learning_rate": 1.6140469057811944e-05, + "loss": 1.0798, "step": 7945 }, { - "epoch": 0.2251693162175182, + "epoch": 0.3109006964551217, "grad_norm": 0.0, - "learning_rate": 1.8067994736097978e-05, - "loss": 0.9955, + "learning_rate": 1.6139468821599888e-05, + "loss": 1.1789, "step": 7946 }, { - "epoch": 0.22519765365978067, + "epoch": 0.31093982314735114, "grad_norm": 0.0, - "learning_rate": 1.8067452448943455e-05, - "loss": 1.1027, + "learning_rate": 1.6138468486795646e-05, + "loss": 1.1345, "step": 7947 }, { - "epoch": 0.22522599110204314, + "epoch": 0.3109789498395806, "grad_norm": 0.0, - "learning_rate": 1.806691009383379e-05, - "loss": 1.0663, + "learning_rate": 1.61374680534153e-05, + "loss": 1.1368, "step": 7948 }, { - "epoch": 0.2252543285443056, + "epoch": 0.31101807653181, "grad_norm": 0.0, - "learning_rate": 1.8066367670773543e-05, - "loss": 0.9747, + "learning_rate": 1.6136467521474902e-05, + "loss": 1.027, "step": 7949 }, { - "epoch": 0.22528266598656804, + "epoch": 0.31105720322403946, "grad_norm": 0.0, - "learning_rate": 1.8065825179767287e-05, - "loss": 0.8943, + "learning_rate": 1.613546689099053e-05, + "loss": 1.1648, "step": 7950 }, { - "epoch": 0.2253110034288305, + "epoch": 0.3110963299162689, "grad_norm": 0.0, - "learning_rate": 1.8065282620819587e-05, - "loss": 0.9309, + "learning_rate": 1.6134466161978242e-05, + "loss": 1.1518, "step": 7951 }, { - "epoch": 0.22533934087109297, + "epoch": 0.31113545660849834, "grad_norm": 0.0, - "learning_rate": 1.806473999393502e-05, - "loss": 0.8914, + "learning_rate": 1.613346533445412e-05, + "loss": 1.1826, "step": 7952 }, { - "epoch": 0.22536767831335544, + "epoch": 0.3111745833007278, "grad_norm": 0.0, - "learning_rate": 1.8064197299118153e-05, - "loss": 1.0605, + "learning_rate": 1.613246440843423e-05, + "loss": 1.1068, "step": 7953 }, { - "epoch": 0.2253960157556179, + "epoch": 0.3112137099929572, "grad_norm": 0.0, - "learning_rate": 1.806365453637356e-05, - "loss": 0.9352, + "learning_rate": 1.6131463383934643e-05, + "loss": 1.1096, "step": 7954 }, { - "epoch": 0.22542435319788037, + "epoch": 0.3112528366851866, "grad_norm": 0.0, - "learning_rate": 1.80631117057058e-05, - "loss": 1.0533, + "learning_rate": 1.613046226097144e-05, + "loss": 1.0496, "step": 7955 }, { - "epoch": 0.2254526906401428, + "epoch": 0.31129196337741605, "grad_norm": 0.0, - "learning_rate": 1.8062568807119465e-05, - "loss": 0.926, + "learning_rate": 1.6129461039560693e-05, + "loss": 1.1901, "step": 7956 }, { - "epoch": 0.22548102808240528, + "epoch": 0.3113310900696455, "grad_norm": 0.0, - "learning_rate": 1.8062025840619118e-05, - "loss": 0.9659, + "learning_rate": 1.6128459719718482e-05, + "loss": 1.2886, "step": 7957 }, { - "epoch": 0.22550936552466774, + "epoch": 0.31137021676187493, "grad_norm": 0.0, - "learning_rate": 1.806148280620933e-05, - "loss": 0.9477, + "learning_rate": 1.612745830146089e-05, + "loss": 1.339, "step": 7958 }, { - "epoch": 0.2255377029669302, + "epoch": 0.31140934345410437, "grad_norm": 0.0, - "learning_rate": 1.8060939703894684e-05, - "loss": 1.0101, + "learning_rate": 1.6126456784803993e-05, + "loss": 1.1132, "step": 7959 }, { - "epoch": 0.22556604040919268, + "epoch": 0.3114484701463338, "grad_norm": 0.0, - "learning_rate": 1.8060396533679746e-05, - "loss": 0.9361, + "learning_rate": 1.612545516976388e-05, + "loss": 1.1945, "step": 7960 }, { - "epoch": 0.22559437785145514, + "epoch": 0.31148759683856325, "grad_norm": 0.0, - "learning_rate": 1.8059853295569095e-05, - "loss": 0.8883, + "learning_rate": 1.6124453456356628e-05, + "loss": 0.9921, "step": 7961 }, { - "epoch": 0.22562271529371758, + "epoch": 0.3115267235307927, "grad_norm": 0.0, - "learning_rate": 1.8059309989567308e-05, - "loss": 1.0159, + "learning_rate": 1.612345164459833e-05, + "loss": 1.0621, "step": 7962 }, { - "epoch": 0.22565105273598005, + "epoch": 0.31156585022302213, "grad_norm": 0.0, - "learning_rate": 1.8058766615678963e-05, - "loss": 0.9454, + "learning_rate": 1.612244973450507e-05, + "loss": 1.0437, "step": 7963 }, { - "epoch": 0.2256793901782425, + "epoch": 0.3116049769152516, "grad_norm": 0.0, - "learning_rate": 1.805822317390863e-05, - "loss": 1.1332, + "learning_rate": 1.6121447726092942e-05, + "loss": 1.0811, "step": 7964 }, { - "epoch": 0.22570772762050498, + "epoch": 0.311644103607481, "grad_norm": 0.0, - "learning_rate": 1.805767966426089e-05, - "loss": 1.0527, + "learning_rate": 1.6120445619378035e-05, + "loss": 1.148, "step": 7965 }, { - "epoch": 0.22573606506276744, + "epoch": 0.31168323029971046, "grad_norm": 0.0, - "learning_rate": 1.8057136086740326e-05, - "loss": 1.0263, + "learning_rate": 1.611944341437644e-05, + "loss": 1.0571, "step": 7966 }, { - "epoch": 0.2257644025050299, + "epoch": 0.3117223569919399, "grad_norm": 0.0, - "learning_rate": 1.805659244135151e-05, - "loss": 1.1428, + "learning_rate": 1.611844111110425e-05, + "loss": 1.1111, "step": 7967 }, { - "epoch": 0.22579273994729235, + "epoch": 0.31176148368416934, "grad_norm": 0.0, - "learning_rate": 1.8056048728099024e-05, - "loss": 1.0378, + "learning_rate": 1.6117438709577565e-05, + "loss": 1.1299, "step": 7968 }, { - "epoch": 0.22582107738955481, + "epoch": 0.3118006103763988, "grad_norm": 0.0, - "learning_rate": 1.805550494698745e-05, - "loss": 0.9818, + "learning_rate": 1.6116436209812476e-05, + "loss": 1.1126, "step": 7969 }, { - "epoch": 0.22584941483181728, + "epoch": 0.3118397370686282, "grad_norm": 0.0, - "learning_rate": 1.8054961098021366e-05, - "loss": 1.0139, + "learning_rate": 1.611543361182509e-05, + "loss": 1.0751, "step": 7970 }, { - "epoch": 0.22587775227407975, + "epoch": 0.31187886376085766, "grad_norm": 0.0, - "learning_rate": 1.805441718120535e-05, - "loss": 0.9074, + "learning_rate": 1.61144309156315e-05, + "loss": 1.1707, "step": 7971 }, { - "epoch": 0.2259060897163422, + "epoch": 0.3119179904530871, "grad_norm": 0.0, - "learning_rate": 1.8053873196543993e-05, - "loss": 0.9778, + "learning_rate": 1.6113428121247813e-05, + "loss": 0.9675, "step": 7972 }, { - "epoch": 0.22593442715860468, + "epoch": 0.31195711714531654, "grad_norm": 0.0, - "learning_rate": 1.8053329144041867e-05, - "loss": 1.0122, + "learning_rate": 1.611242522869013e-05, + "loss": 1.15, "step": 7973 }, { - "epoch": 0.22596276460086712, + "epoch": 0.311996243837546, "grad_norm": 0.0, - "learning_rate": 1.805278502370356e-05, - "loss": 0.9608, + "learning_rate": 1.611142223797456e-05, + "loss": 1.1736, "step": 7974 }, { - "epoch": 0.22599110204312958, + "epoch": 0.3120353705297754, "grad_norm": 0.0, - "learning_rate": 1.805224083553365e-05, - "loss": 0.9952, + "learning_rate": 1.6110419149117205e-05, + "loss": 1.1799, "step": 7975 }, { - "epoch": 0.22601943948539205, + "epoch": 0.31207449722200487, "grad_norm": 0.0, - "learning_rate": 1.805169657953673e-05, - "loss": 1.0691, + "learning_rate": 1.6109415962134174e-05, + "loss": 1.1798, "step": 7976 }, { - "epoch": 0.22604777692765451, + "epoch": 0.3121136239142343, "grad_norm": 0.0, - "learning_rate": 1.8051152255717383e-05, - "loss": 1.0128, + "learning_rate": 1.610841267704158e-05, + "loss": 1.0947, "step": 7977 }, { - "epoch": 0.22607611436991698, + "epoch": 0.31215275060646375, "grad_norm": 0.0, - "learning_rate": 1.805060786408019e-05, - "loss": 1.0187, + "learning_rate": 1.610740929385553e-05, + "loss": 1.1824, "step": 7978 }, { - "epoch": 0.22610445181217942, + "epoch": 0.3121918772986932, "grad_norm": 0.0, - "learning_rate": 1.8050063404629733e-05, - "loss": 0.9214, + "learning_rate": 1.6106405812592143e-05, + "loss": 1.0967, "step": 7979 }, { - "epoch": 0.22613278925444188, + "epoch": 0.31223100399092263, "grad_norm": 0.0, - "learning_rate": 1.8049518877370604e-05, - "loss": 1.0087, + "learning_rate": 1.6105402233267526e-05, + "loss": 1.2648, "step": 7980 }, { - "epoch": 0.22616112669670435, + "epoch": 0.31227013068315207, "grad_norm": 0.0, - "learning_rate": 1.804897428230739e-05, - "loss": 0.9348, + "learning_rate": 1.6104398555897805e-05, + "loss": 1.0115, "step": 7981 }, { - "epoch": 0.22618946413896682, + "epoch": 0.3123092573753815, "grad_norm": 0.0, - "learning_rate": 1.8048429619444675e-05, - "loss": 1.0916, + "learning_rate": 1.6103394780499088e-05, + "loss": 1.2064, "step": 7982 }, { - "epoch": 0.22621780158122928, + "epoch": 0.31234838406761095, "grad_norm": 0.0, - "learning_rate": 1.804788488878705e-05, - "loss": 1.0173, + "learning_rate": 1.61023909070875e-05, + "loss": 1.1009, "step": 7983 }, { - "epoch": 0.22624613902349175, + "epoch": 0.31238751075984034, "grad_norm": 0.0, - "learning_rate": 1.80473400903391e-05, - "loss": 1.0257, + "learning_rate": 1.6101386935679163e-05, + "loss": 1.0587, "step": 7984 }, { - "epoch": 0.2262744764657542, + "epoch": 0.3124266374520698, "grad_norm": 0.0, - "learning_rate": 1.804679522410542e-05, - "loss": 1.0052, + "learning_rate": 1.6100382866290197e-05, + "loss": 1.1752, "step": 7985 }, { - "epoch": 0.22630281390801665, + "epoch": 0.3124657641442992, "grad_norm": 0.0, - "learning_rate": 1.8046250290090594e-05, - "loss": 0.9699, + "learning_rate": 1.6099378698936724e-05, + "loss": 1.1511, "step": 7986 }, { - "epoch": 0.22633115135027912, + "epoch": 0.31250489083652866, "grad_norm": 0.0, - "learning_rate": 1.8045705288299213e-05, - "loss": 0.9276, + "learning_rate": 1.6098374433634876e-05, + "loss": 1.1778, "step": 7987 }, { - "epoch": 0.22635948879254159, + "epoch": 0.3125440175287581, "grad_norm": 0.0, - "learning_rate": 1.8045160218735866e-05, - "loss": 1.1014, + "learning_rate": 1.609737007040077e-05, + "loss": 1.0403, "step": 7988 }, { - "epoch": 0.22638782623480405, + "epoch": 0.31258314422098754, "grad_norm": 0.0, - "learning_rate": 1.8044615081405153e-05, - "loss": 1.0504, + "learning_rate": 1.6096365609250546e-05, + "loss": 1.1, "step": 7989 }, { - "epoch": 0.22641616367706652, + "epoch": 0.312622270913217, "grad_norm": 0.0, - "learning_rate": 1.8044069876311655e-05, - "loss": 0.9431, + "learning_rate": 1.609536105020033e-05, + "loss": 1.0587, "step": 7990 }, { - "epoch": 0.22644450111932896, + "epoch": 0.3126613976054464, "grad_norm": 0.0, - "learning_rate": 1.8043524603459973e-05, - "loss": 0.9987, + "learning_rate": 1.609435639326625e-05, + "loss": 1.0911, "step": 7991 }, { - "epoch": 0.22647283856159142, + "epoch": 0.31270052429767586, "grad_norm": 0.0, - "learning_rate": 1.8042979262854695e-05, - "loss": 1.0596, + "learning_rate": 1.6093351638464447e-05, + "loss": 1.137, "step": 7992 }, { - "epoch": 0.2265011760038539, + "epoch": 0.3127396509899053, "grad_norm": 0.0, - "learning_rate": 1.8042433854500416e-05, - "loss": 0.9828, + "learning_rate": 1.609234678581105e-05, + "loss": 1.0193, "step": 7993 }, { - "epoch": 0.22652951344611635, + "epoch": 0.31277877768213475, "grad_norm": 0.0, - "learning_rate": 1.8041888378401728e-05, - "loss": 0.9453, + "learning_rate": 1.6091341835322193e-05, + "loss": 1.1414, "step": 7994 }, { - "epoch": 0.22655785088837882, + "epoch": 0.3128179043743642, "grad_norm": 0.0, - "learning_rate": 1.8041342834563227e-05, - "loss": 0.8588, + "learning_rate": 1.6090336787014028e-05, + "loss": 1.1279, "step": 7995 }, { - "epoch": 0.22658618833064129, + "epoch": 0.3128570310665936, "grad_norm": 0.0, - "learning_rate": 1.8040797222989514e-05, - "loss": 0.9315, + "learning_rate": 1.608933164090268e-05, + "loss": 1.0743, "step": 7996 }, { - "epoch": 0.22661452577290372, + "epoch": 0.31289615775882307, "grad_norm": 0.0, - "learning_rate": 1.804025154368518e-05, - "loss": 0.9223, + "learning_rate": 1.6088326397004296e-05, + "loss": 1.0597, "step": 7997 }, { - "epoch": 0.2266428632151662, + "epoch": 0.3129352844510525, "grad_norm": 0.0, - "learning_rate": 1.8039705796654815e-05, - "loss": 0.9143, + "learning_rate": 1.608732105533502e-05, + "loss": 1.0895, "step": 7998 }, { - "epoch": 0.22667120065742866, + "epoch": 0.31297441114328195, "grad_norm": 0.0, - "learning_rate": 1.8039159981903028e-05, - "loss": 1.0148, + "learning_rate": 1.6086315615911e-05, + "loss": 0.9594, "step": 7999 }, { - "epoch": 0.22669953809969112, + "epoch": 0.3130135378355114, "grad_norm": 0.0, - "learning_rate": 1.803861409943441e-05, - "loss": 1.0098, + "learning_rate": 1.608531007874837e-05, + "loss": 1.1869, "step": 8000 }, { - "epoch": 0.2267278755419536, + "epoch": 0.31305266452774083, "grad_norm": 0.0, - "learning_rate": 1.803806814925356e-05, - "loss": 1.1061, + "learning_rate": 1.608430444386329e-05, + "loss": 1.1313, "step": 8001 }, { - "epoch": 0.22675621298421605, + "epoch": 0.3130917912199703, "grad_norm": 0.0, - "learning_rate": 1.803752213136508e-05, - "loss": 1.0615, + "learning_rate": 1.6083298711271903e-05, + "loss": 1.0521, "step": 8002 }, { - "epoch": 0.2267845504264785, + "epoch": 0.3131309179121997, "grad_norm": 0.0, - "learning_rate": 1.8036976045773564e-05, - "loss": 1.026, + "learning_rate": 1.6082292880990364e-05, + "loss": 1.1334, "step": 8003 }, { - "epoch": 0.22681288786874096, + "epoch": 0.31317004460442915, "grad_norm": 0.0, - "learning_rate": 1.8036429892483615e-05, - "loss": 0.9504, + "learning_rate": 1.6081286953034824e-05, + "loss": 1.1432, "step": 8004 }, { - "epoch": 0.22684122531100342, + "epoch": 0.3132091712966586, "grad_norm": 0.0, - "learning_rate": 1.803588367149983e-05, - "loss": 0.9883, + "learning_rate": 1.6080280927421434e-05, + "loss": 1.0248, "step": 8005 }, { - "epoch": 0.2268695627532659, + "epoch": 0.31324829798888804, "grad_norm": 0.0, - "learning_rate": 1.8035337382826818e-05, - "loss": 1.0699, + "learning_rate": 1.607927480416635e-05, + "loss": 1.1494, "step": 8006 }, { - "epoch": 0.22689790019552836, + "epoch": 0.3132874246811175, "grad_norm": 0.0, - "learning_rate": 1.803479102646917e-05, - "loss": 0.8907, + "learning_rate": 1.607826858328573e-05, + "loss": 1.0671, "step": 8007 }, { - "epoch": 0.22692623763779082, + "epoch": 0.3133265513733469, "grad_norm": 0.0, - "learning_rate": 1.8034244602431497e-05, - "loss": 0.9443, + "learning_rate": 1.6077262264795735e-05, + "loss": 1.1307, "step": 8008 }, { - "epoch": 0.22695457508005326, + "epoch": 0.31336567806557636, "grad_norm": 0.0, - "learning_rate": 1.8033698110718395e-05, - "loss": 0.8723, + "learning_rate": 1.607625584871252e-05, + "loss": 1.0498, "step": 8009 }, { - "epoch": 0.22698291252231573, + "epoch": 0.3134048047578058, "grad_norm": 0.0, - "learning_rate": 1.8033151551334475e-05, - "loss": 0.9596, + "learning_rate": 1.6075249335052253e-05, + "loss": 1.061, "step": 8010 }, { - "epoch": 0.2270112499645782, + "epoch": 0.31344393145003524, "grad_norm": 0.0, - "learning_rate": 1.8032604924284332e-05, - "loss": 1.0029, + "learning_rate": 1.6074242723831095e-05, + "loss": 1.1108, "step": 8011 }, { - "epoch": 0.22703958740684066, + "epoch": 0.3134830581422646, "grad_norm": 0.0, - "learning_rate": 1.803205822957258e-05, - "loss": 0.9818, + "learning_rate": 1.607323601506521e-05, + "loss": 1.1255, "step": 8012 }, { - "epoch": 0.22706792484910313, + "epoch": 0.31352218483449407, "grad_norm": 0.0, - "learning_rate": 1.8031511467203816e-05, - "loss": 0.8709, + "learning_rate": 1.6072229208770766e-05, + "loss": 1.1102, "step": 8013 }, { - "epoch": 0.2270962622913656, + "epoch": 0.3135613115267235, "grad_norm": 0.0, - "learning_rate": 1.8030964637182648e-05, - "loss": 0.9405, + "learning_rate": 1.6071222304963926e-05, + "loss": 1.1714, "step": 8014 }, { - "epoch": 0.22712459973362803, + "epoch": 0.31360043821895295, "grad_norm": 0.0, - "learning_rate": 1.8030417739513684e-05, - "loss": 0.9451, + "learning_rate": 1.6070215303660866e-05, + "loss": 1.1643, "step": 8015 }, { - "epoch": 0.2271529371758905, + "epoch": 0.3136395649111824, "grad_norm": 0.0, - "learning_rate": 1.8029870774201527e-05, - "loss": 0.8985, + "learning_rate": 1.6069208204877755e-05, + "loss": 1.0816, "step": 8016 }, { - "epoch": 0.22718127461815296, + "epoch": 0.31367869160341183, "grad_norm": 0.0, - "learning_rate": 1.8029323741250787e-05, - "loss": 0.936, + "learning_rate": 1.606820100863076e-05, + "loss": 1.1384, "step": 8017 }, { - "epoch": 0.22720961206041543, + "epoch": 0.31371781829564127, "grad_norm": 0.0, - "learning_rate": 1.8028776640666075e-05, - "loss": 0.9133, + "learning_rate": 1.6067193714936067e-05, + "loss": 1.1453, "step": 8018 }, { - "epoch": 0.2272379495026779, + "epoch": 0.3137569449878707, "grad_norm": 0.0, - "learning_rate": 1.8028229472451994e-05, - "loss": 0.9061, + "learning_rate": 1.6066186323809844e-05, + "loss": 1.1862, "step": 8019 }, { - "epoch": 0.22726628694494036, + "epoch": 0.31379607168010015, "grad_norm": 0.0, - "learning_rate": 1.8027682236613152e-05, - "loss": 0.9722, + "learning_rate": 1.606517883526827e-05, + "loss": 1.1638, "step": 8020 }, { - "epoch": 0.2272946243872028, + "epoch": 0.3138351983723296, "grad_norm": 0.0, - "learning_rate": 1.8027134933154164e-05, - "loss": 1.0323, + "learning_rate": 1.606417124932752e-05, + "loss": 1.0695, "step": 8021 }, { - "epoch": 0.22732296182946526, + "epoch": 0.31387432506455903, "grad_norm": 0.0, - "learning_rate": 1.802658756207964e-05, - "loss": 1.0241, + "learning_rate": 1.6063163566003788e-05, + "loss": 1.0731, "step": 8022 }, { - "epoch": 0.22735129927172773, + "epoch": 0.3139134517567885, "grad_norm": 0.0, - "learning_rate": 1.8026040123394187e-05, - "loss": 1.0637, + "learning_rate": 1.6062155785313238e-05, + "loss": 1.2149, "step": 8023 }, { - "epoch": 0.2273796367139902, + "epoch": 0.3139525784490179, "grad_norm": 0.0, - "learning_rate": 1.8025492617102415e-05, - "loss": 1.0036, + "learning_rate": 1.6061147907272067e-05, + "loss": 1.1299, "step": 8024 }, { - "epoch": 0.22740797415625266, + "epoch": 0.31399170514124736, "grad_norm": 0.0, - "learning_rate": 1.802494504320894e-05, - "loss": 1.0197, + "learning_rate": 1.6060139931896452e-05, + "loss": 1.1639, "step": 8025 }, { - "epoch": 0.22743631159851513, + "epoch": 0.3140308318334768, "grad_norm": 0.0, - "learning_rate": 1.8024397401718374e-05, - "loss": 0.8985, + "learning_rate": 1.6059131859202586e-05, + "loss": 1.15, "step": 8026 }, { - "epoch": 0.22746464904077757, + "epoch": 0.31406995852570624, "grad_norm": 0.0, - "learning_rate": 1.8023849692635327e-05, - "loss": 0.9416, + "learning_rate": 1.6058123689206654e-05, + "loss": 1.0125, "step": 8027 }, { - "epoch": 0.22749298648304003, + "epoch": 0.3141090852179357, "grad_norm": 0.0, - "learning_rate": 1.8023301915964414e-05, - "loss": 1.0496, + "learning_rate": 1.6057115421924848e-05, + "loss": 0.9721, "step": 8028 }, { - "epoch": 0.2275213239253025, + "epoch": 0.3141482119101651, "grad_norm": 0.0, - "learning_rate": 1.8022754071710254e-05, - "loss": 0.8897, + "learning_rate": 1.6056107057373356e-05, + "loss": 1.1692, "step": 8029 }, { - "epoch": 0.22754966136756496, + "epoch": 0.31418733860239456, "grad_norm": 0.0, - "learning_rate": 1.8022206159877453e-05, - "loss": 0.9195, + "learning_rate": 1.6055098595568373e-05, + "loss": 1.2362, "step": 8030 }, { - "epoch": 0.22757799880982743, + "epoch": 0.314226465294624, "grad_norm": 0.0, - "learning_rate": 1.802165818047063e-05, - "loss": 1.0603, + "learning_rate": 1.60540900365261e-05, + "loss": 1.0784, "step": 8031 }, { - "epoch": 0.2276063362520899, + "epoch": 0.31426559198685344, "grad_norm": 0.0, - "learning_rate": 1.8021110133494405e-05, - "loss": 0.9223, + "learning_rate": 1.605308138026272e-05, + "loss": 0.9564, "step": 8032 }, { - "epoch": 0.22763467369435234, + "epoch": 0.3143047186790829, "grad_norm": 0.0, - "learning_rate": 1.802056201895339e-05, - "loss": 0.9546, + "learning_rate": 1.6052072626794442e-05, + "loss": 1.1341, "step": 8033 }, { - "epoch": 0.2276630111366148, + "epoch": 0.3143438453713123, "grad_norm": 0.0, - "learning_rate": 1.80200138368522e-05, - "loss": 1.0032, + "learning_rate": 1.605106377613746e-05, + "loss": 1.1978, "step": 8034 }, { - "epoch": 0.22769134857887727, + "epoch": 0.31438297206354177, "grad_norm": 0.0, - "learning_rate": 1.801946558719546e-05, - "loss": 1.0701, + "learning_rate": 1.6050054828307978e-05, + "loss": 1.1225, "step": 8035 }, { - "epoch": 0.22771968602113973, + "epoch": 0.3144220987557712, "grad_norm": 0.0, - "learning_rate": 1.8018917269987775e-05, - "loss": 1.0472, + "learning_rate": 1.6049045783322193e-05, + "loss": 1.0826, "step": 8036 }, { - "epoch": 0.2277480234634022, + "epoch": 0.31446122544800065, "grad_norm": 0.0, - "learning_rate": 1.801836888523378e-05, - "loss": 1.1017, + "learning_rate": 1.6048036641196312e-05, + "loss": 1.1522, "step": 8037 }, { - "epoch": 0.22777636090566467, + "epoch": 0.3145003521402301, "grad_norm": 0.0, - "learning_rate": 1.8017820432938086e-05, - "loss": 1.0055, + "learning_rate": 1.6047027401946547e-05, + "loss": 1.0699, "step": 8038 }, { - "epoch": 0.2278046983479271, + "epoch": 0.31453947883245953, "grad_norm": 0.0, - "learning_rate": 1.801727191310531e-05, - "loss": 0.9936, + "learning_rate": 1.6046018065589096e-05, + "loss": 1.1746, "step": 8039 }, { - "epoch": 0.22783303579018957, + "epoch": 0.31457860552468897, "grad_norm": 0.0, - "learning_rate": 1.8016723325740075e-05, - "loss": 1.0999, + "learning_rate": 1.6045008632140172e-05, + "loss": 1.1637, "step": 8040 }, { - "epoch": 0.22786137323245204, + "epoch": 0.31461773221691836, "grad_norm": 0.0, - "learning_rate": 1.8016174670847005e-05, - "loss": 1.0431, + "learning_rate": 1.6043999101615983e-05, + "loss": 1.0926, "step": 8041 }, { - "epoch": 0.2278897106747145, + "epoch": 0.3146568589091478, "grad_norm": 0.0, - "learning_rate": 1.801562594843072e-05, - "loss": 1.0462, + "learning_rate": 1.604298947403274e-05, + "loss": 1.1028, "step": 8042 }, { - "epoch": 0.22791804811697697, + "epoch": 0.31469598560137724, "grad_norm": 0.0, - "learning_rate": 1.8015077158495836e-05, - "loss": 0.9949, + "learning_rate": 1.6041979749406663e-05, + "loss": 1.1407, "step": 8043 }, { - "epoch": 0.22794638555923943, + "epoch": 0.3147351122936067, "grad_norm": 0.0, - "learning_rate": 1.8014528301046987e-05, - "loss": 0.9598, + "learning_rate": 1.6040969927753957e-05, + "loss": 1.1865, "step": 8044 }, { - "epoch": 0.22797472300150187, + "epoch": 0.3147742389858361, "grad_norm": 0.0, - "learning_rate": 1.8013979376088785e-05, - "loss": 0.9912, + "learning_rate": 1.603996000909085e-05, + "loss": 0.9804, "step": 8045 }, { - "epoch": 0.22800306044376434, + "epoch": 0.31481336567806556, "grad_norm": 0.0, - "learning_rate": 1.801343038362586e-05, - "loss": 1.0056, + "learning_rate": 1.6038949993433546e-05, + "loss": 1.0937, "step": 8046 }, { - "epoch": 0.2280313978860268, + "epoch": 0.314852492370295, "grad_norm": 0.0, - "learning_rate": 1.801288132366284e-05, - "loss": 0.9419, + "learning_rate": 1.6037939880798277e-05, + "loss": 1.1274, "step": 8047 }, { - "epoch": 0.22805973532828927, + "epoch": 0.31489161906252444, "grad_norm": 0.0, - "learning_rate": 1.8012332196204338e-05, - "loss": 0.9319, + "learning_rate": 1.603692967120126e-05, + "loss": 1.0001, "step": 8048 }, { - "epoch": 0.22808807277055174, + "epoch": 0.3149307457547539, "grad_norm": 0.0, - "learning_rate": 1.801178300125499e-05, - "loss": 1.0188, + "learning_rate": 1.6035919364658714e-05, + "loss": 1.1009, "step": 8049 }, { - "epoch": 0.2281164102128142, + "epoch": 0.3149698724469833, "grad_norm": 0.0, - "learning_rate": 1.8011233738819418e-05, - "loss": 0.9314, + "learning_rate": 1.603490896118687e-05, + "loss": 0.9767, "step": 8050 }, { - "epoch": 0.22814474765507664, + "epoch": 0.31500899913921276, "grad_norm": 0.0, - "learning_rate": 1.801068440890225e-05, - "loss": 0.9188, + "learning_rate": 1.603389846080195e-05, + "loss": 1.2018, "step": 8051 }, { - "epoch": 0.2281730850973391, + "epoch": 0.3150481258314422, "grad_norm": 0.0, - "learning_rate": 1.8010135011508113e-05, - "loss": 0.9456, + "learning_rate": 1.603288786352018e-05, + "loss": 1.256, "step": 8052 }, { - "epoch": 0.22820142253960157, + "epoch": 0.31508725252367165, "grad_norm": 0.0, - "learning_rate": 1.8009585546641634e-05, - "loss": 1.0719, + "learning_rate": 1.603187716935779e-05, + "loss": 1.1891, "step": 8053 }, { - "epoch": 0.22822975998186404, + "epoch": 0.3151263792159011, "grad_norm": 0.0, - "learning_rate": 1.800903601430744e-05, - "loss": 1.0253, + "learning_rate": 1.6030866378331013e-05, + "loss": 1.0197, "step": 8054 }, { - "epoch": 0.2282580974241265, + "epoch": 0.3151655059081305, "grad_norm": 0.0, - "learning_rate": 1.8008486414510163e-05, - "loss": 1.1252, + "learning_rate": 1.602985549045608e-05, + "loss": 1.0894, "step": 8055 }, { - "epoch": 0.22828643486638897, + "epoch": 0.31520463260035997, "grad_norm": 0.0, - "learning_rate": 1.800793674725443e-05, - "loss": 1.097, + "learning_rate": 1.602884450574922e-05, + "loss": 1.0237, "step": 8056 }, { - "epoch": 0.2283147723086514, + "epoch": 0.3152437592925894, "grad_norm": 0.0, - "learning_rate": 1.8007387012544874e-05, - "loss": 1.012, + "learning_rate": 1.6027833424226673e-05, + "loss": 1.2164, "step": 8057 }, { - "epoch": 0.22834310975091388, + "epoch": 0.31528288598481885, "grad_norm": 0.0, - "learning_rate": 1.8006837210386124e-05, - "loss": 0.9979, + "learning_rate": 1.6026822245904673e-05, + "loss": 1.1104, "step": 8058 }, { - "epoch": 0.22837144719317634, + "epoch": 0.3153220126770483, "grad_norm": 0.0, - "learning_rate": 1.8006287340782807e-05, - "loss": 0.993, + "learning_rate": 1.6025810970799462e-05, + "loss": 1.2646, "step": 8059 }, { - "epoch": 0.2283997846354388, + "epoch": 0.31536113936927773, "grad_norm": 0.0, - "learning_rate": 1.800573740373956e-05, - "loss": 0.964, + "learning_rate": 1.602479959892728e-05, + "loss": 0.9714, "step": 8060 }, { - "epoch": 0.22842812207770127, + "epoch": 0.3154002660615072, "grad_norm": 0.0, - "learning_rate": 1.8005187399261017e-05, - "loss": 0.8796, + "learning_rate": 1.602378813030436e-05, + "loss": 1.112, "step": 8061 }, { - "epoch": 0.22845645951996374, + "epoch": 0.3154393927537366, "grad_norm": 0.0, - "learning_rate": 1.8004637327351805e-05, - "loss": 0.9855, + "learning_rate": 1.6022776564946957e-05, + "loss": 1.1945, "step": 8062 }, { - "epoch": 0.22848479696222618, + "epoch": 0.31547851944596605, "grad_norm": 0.0, - "learning_rate": 1.800408718801656e-05, - "loss": 0.9957, + "learning_rate": 1.6021764902871305e-05, + "loss": 1.1713, "step": 8063 }, { - "epoch": 0.22851313440448864, + "epoch": 0.3155176461381955, "grad_norm": 0.0, - "learning_rate": 1.800353698125992e-05, - "loss": 0.9853, + "learning_rate": 1.6020753144093656e-05, + "loss": 1.0723, "step": 8064 }, { - "epoch": 0.2285414718467511, + "epoch": 0.31555677283042494, "grad_norm": 0.0, - "learning_rate": 1.8002986707086515e-05, - "loss": 0.9959, + "learning_rate": 1.6019741288630255e-05, + "loss": 1.0873, "step": 8065 }, { - "epoch": 0.22856980928901358, + "epoch": 0.3155958995226544, "grad_norm": 0.0, - "learning_rate": 1.8002436365500975e-05, - "loss": 0.8421, + "learning_rate": 1.6018729336497356e-05, + "loss": 1.0954, "step": 8066 }, { - "epoch": 0.22859814673127604, + "epoch": 0.3156350262148838, "grad_norm": 0.0, - "learning_rate": 1.800188595650795e-05, - "loss": 1.0481, + "learning_rate": 1.6017717287711197e-05, + "loss": 1.2073, "step": 8067 }, { - "epoch": 0.2286264841735385, + "epoch": 0.31567415290711326, "grad_norm": 0.0, - "learning_rate": 1.8001335480112067e-05, - "loss": 0.9623, + "learning_rate": 1.601670514228805e-05, + "loss": 1.117, "step": 8068 }, { - "epoch": 0.22865482161580095, + "epoch": 0.31571327959934264, "grad_norm": 0.0, - "learning_rate": 1.800078493631796e-05, - "loss": 0.9648, + "learning_rate": 1.6015692900244148e-05, + "loss": 1.1862, "step": 8069 }, { - "epoch": 0.2286831590580634, + "epoch": 0.3157524062915721, "grad_norm": 0.0, - "learning_rate": 1.8000234325130274e-05, - "loss": 0.9783, + "learning_rate": 1.6014680561595763e-05, + "loss": 1.1639, "step": 8070 }, { - "epoch": 0.22871149650032588, + "epoch": 0.3157915329838015, "grad_norm": 0.0, - "learning_rate": 1.7999683646553642e-05, - "loss": 0.9508, + "learning_rate": 1.6013668126359143e-05, + "loss": 1.1009, "step": 8071 }, { - "epoch": 0.22873983394258834, + "epoch": 0.31583065967603097, "grad_norm": 0.0, - "learning_rate": 1.7999132900592703e-05, - "loss": 1.0508, + "learning_rate": 1.6012655594550546e-05, + "loss": 1.3154, "step": 8072 }, { - "epoch": 0.2287681713848508, + "epoch": 0.3158697863682604, "grad_norm": 0.0, - "learning_rate": 1.7998582087252096e-05, - "loss": 0.8571, + "learning_rate": 1.6011642966186237e-05, + "loss": 1.131, "step": 8073 }, { - "epoch": 0.22879650882711328, + "epoch": 0.31590891306048985, "grad_norm": 0.0, - "learning_rate": 1.7998031206536466e-05, - "loss": 0.9174, + "learning_rate": 1.6010630241282476e-05, + "loss": 1.1277, "step": 8074 }, { - "epoch": 0.22882484626937571, + "epoch": 0.3159480397527193, "grad_norm": 0.0, - "learning_rate": 1.7997480258450447e-05, - "loss": 0.9998, + "learning_rate": 1.6009617419855523e-05, + "loss": 1.1855, "step": 8075 }, { - "epoch": 0.22885318371163818, + "epoch": 0.31598716644494873, "grad_norm": 0.0, - "learning_rate": 1.7996929242998682e-05, - "loss": 1.0323, + "learning_rate": 1.6008604501921647e-05, + "loss": 1.1269, "step": 8076 }, { - "epoch": 0.22888152115390065, + "epoch": 0.31602629313717817, "grad_norm": 0.0, - "learning_rate": 1.799637816018581e-05, - "loss": 0.9909, + "learning_rate": 1.600759148749711e-05, + "loss": 1.1771, "step": 8077 }, { - "epoch": 0.2289098585961631, + "epoch": 0.3160654198294076, "grad_norm": 0.0, - "learning_rate": 1.799582701001648e-05, - "loss": 0.9957, + "learning_rate": 1.600657837659818e-05, + "loss": 1.0954, "step": 8078 }, { - "epoch": 0.22893819603842558, + "epoch": 0.31610454652163705, "grad_norm": 0.0, - "learning_rate": 1.7995275792495327e-05, - "loss": 1.0028, + "learning_rate": 1.6005565169241132e-05, + "loss": 1.0405, "step": 8079 }, { - "epoch": 0.22896653348068804, + "epoch": 0.3161436732138665, "grad_norm": 0.0, - "learning_rate": 1.7994724507626996e-05, - "loss": 0.9582, + "learning_rate": 1.6004551865442228e-05, + "loss": 0.9945, "step": 8080 }, { - "epoch": 0.22899487092295048, + "epoch": 0.31618279990609593, "grad_norm": 0.0, - "learning_rate": 1.7994173155416133e-05, - "loss": 1.041, + "learning_rate": 1.6003538465217746e-05, + "loss": 1.0789, "step": 8081 }, { - "epoch": 0.22902320836521295, + "epoch": 0.3162219265983254, "grad_norm": 0.0, - "learning_rate": 1.7993621735867385e-05, - "loss": 0.9959, + "learning_rate": 1.600252496858396e-05, + "loss": 1.0922, "step": 8082 }, { - "epoch": 0.22905154580747542, + "epoch": 0.3162610532905548, "grad_norm": 0.0, - "learning_rate": 1.7993070248985386e-05, - "loss": 0.9733, + "learning_rate": 1.6001511375557146e-05, + "loss": 1.0677, "step": 8083 }, { - "epoch": 0.22907988324973788, + "epoch": 0.31630017998278426, "grad_norm": 0.0, - "learning_rate": 1.7992518694774794e-05, - "loss": 0.8974, + "learning_rate": 1.600049768615358e-05, + "loss": 1.1403, "step": 8084 }, { - "epoch": 0.22910822069200035, + "epoch": 0.3163393066750137, "grad_norm": 0.0, - "learning_rate": 1.7991967073240245e-05, - "loss": 0.985, + "learning_rate": 1.5999483900389536e-05, + "loss": 1.1324, "step": 8085 }, { - "epoch": 0.2291365581342628, + "epoch": 0.31637843336724314, "grad_norm": 0.0, - "learning_rate": 1.799141538438639e-05, - "loss": 1.0749, + "learning_rate": 1.5998470018281303e-05, + "loss": 1.1517, "step": 8086 }, { - "epoch": 0.22916489557652525, + "epoch": 0.3164175600594726, "grad_norm": 0.0, - "learning_rate": 1.799086362821788e-05, - "loss": 1.0669, + "learning_rate": 1.5997456039845155e-05, + "loss": 1.0782, "step": 8087 }, { - "epoch": 0.22919323301878772, + "epoch": 0.316456686751702, "grad_norm": 0.0, - "learning_rate": 1.7990311804739352e-05, - "loss": 0.8955, + "learning_rate": 1.599644196509738e-05, + "loss": 1.2303, "step": 8088 }, { - "epoch": 0.22922157046105018, + "epoch": 0.31649581344393146, "grad_norm": 0.0, - "learning_rate": 1.7989759913955465e-05, - "loss": 1.0596, + "learning_rate": 1.5995427794054262e-05, + "loss": 1.2717, "step": 8089 }, { - "epoch": 0.22924990790331265, + "epoch": 0.3165349401361609, "grad_norm": 0.0, - "learning_rate": 1.798920795587086e-05, - "loss": 1.1535, + "learning_rate": 1.5994413526732083e-05, + "loss": 1.1275, "step": 8090 }, { - "epoch": 0.22927824534557512, + "epoch": 0.31657406682839034, "grad_norm": 0.0, - "learning_rate": 1.7988655930490192e-05, - "loss": 1.033, + "learning_rate": 1.5993399163147137e-05, + "loss": 0.9987, "step": 8091 }, { - "epoch": 0.22930658278783758, + "epoch": 0.3166131935206198, "grad_norm": 0.0, - "learning_rate": 1.798810383781811e-05, - "loss": 0.8563, + "learning_rate": 1.5992384703315707e-05, + "loss": 1.076, "step": 8092 }, { - "epoch": 0.22933492023010002, + "epoch": 0.3166523202128492, "grad_norm": 0.0, - "learning_rate": 1.798755167785926e-05, - "loss": 1.0191, + "learning_rate": 1.599137014725409e-05, + "loss": 1.0682, "step": 8093 }, { - "epoch": 0.22936325767236249, + "epoch": 0.31669144690507867, "grad_norm": 0.0, - "learning_rate": 1.7986999450618295e-05, - "loss": 0.9257, + "learning_rate": 1.599035549497858e-05, + "loss": 1.1341, "step": 8094 }, { - "epoch": 0.22939159511462495, + "epoch": 0.3167305735973081, "grad_norm": 0.0, - "learning_rate": 1.7986447156099874e-05, - "loss": 1.0088, + "learning_rate": 1.5989340746505463e-05, + "loss": 1.1902, "step": 8095 }, { - "epoch": 0.22941993255688742, + "epoch": 0.31676970028953755, "grad_norm": 0.0, - "learning_rate": 1.798589479430864e-05, - "loss": 0.9367, + "learning_rate": 1.5988325901851038e-05, + "loss": 1.0133, "step": 8096 }, { - "epoch": 0.22944826999914988, + "epoch": 0.316808826981767, "grad_norm": 0.0, - "learning_rate": 1.7985342365249247e-05, - "loss": 0.9814, + "learning_rate": 1.5987310961031604e-05, + "loss": 1.0516, "step": 8097 }, { - "epoch": 0.22947660744141235, + "epoch": 0.3168479536739964, "grad_norm": 0.0, - "learning_rate": 1.7984789868926348e-05, - "loss": 0.9033, + "learning_rate": 1.598629592406346e-05, + "loss": 1.1294, "step": 8098 }, { - "epoch": 0.2295049448836748, + "epoch": 0.3168870803662258, "grad_norm": 0.0, - "learning_rate": 1.7984237305344604e-05, - "loss": 0.9261, + "learning_rate": 1.5985280790962903e-05, + "loss": 1.0692, "step": 8099 }, { - "epoch": 0.22953328232593725, + "epoch": 0.31692620705845526, "grad_norm": 0.0, - "learning_rate": 1.7983684674508658e-05, - "loss": 0.9762, + "learning_rate": 1.5984265561746236e-05, + "loss": 1.1619, "step": 8100 }, { - "epoch": 0.22956161976819972, + "epoch": 0.3169653337506847, "grad_norm": 0.0, - "learning_rate": 1.7983131976423175e-05, - "loss": 0.9792, + "learning_rate": 1.5983250236429765e-05, + "loss": 1.0321, "step": 8101 }, { - "epoch": 0.2295899572104622, + "epoch": 0.31700446044291414, "grad_norm": 0.0, - "learning_rate": 1.7982579211092807e-05, - "loss": 0.9683, + "learning_rate": 1.598223481502979e-05, + "loss": 1.0459, "step": 8102 }, { - "epoch": 0.22961829465272465, + "epoch": 0.3170435871351436, "grad_norm": 0.0, - "learning_rate": 1.798202637852221e-05, - "loss": 0.9854, + "learning_rate": 1.598121929756262e-05, + "loss": 1.0252, "step": 8103 }, { - "epoch": 0.22964663209498712, + "epoch": 0.317082713827373, "grad_norm": 0.0, - "learning_rate": 1.7981473478716042e-05, - "loss": 0.9901, + "learning_rate": 1.5980203684044565e-05, + "loss": 1.1245, "step": 8104 }, { - "epoch": 0.22967496953724956, + "epoch": 0.31712184051960246, "grad_norm": 0.0, - "learning_rate": 1.798092051167896e-05, - "loss": 1.0747, + "learning_rate": 1.597918797449193e-05, + "loss": 1.0678, "step": 8105 }, { - "epoch": 0.22970330697951202, + "epoch": 0.3171609672118319, "grad_norm": 0.0, - "learning_rate": 1.7980367477415614e-05, - "loss": 0.9549, + "learning_rate": 1.5978172168921032e-05, + "loss": 1.0967, "step": 8106 }, { - "epoch": 0.2297316444217745, + "epoch": 0.31720009390406134, "grad_norm": 0.0, - "learning_rate": 1.7979814375930676e-05, - "loss": 0.9905, + "learning_rate": 1.5977156267348175e-05, + "loss": 1.1106, "step": 8107 }, { - "epoch": 0.22975998186403696, + "epoch": 0.3172392205962908, "grad_norm": 0.0, - "learning_rate": 1.79792612072288e-05, - "loss": 0.985, + "learning_rate": 1.597614026978968e-05, + "loss": 1.1326, "step": 8108 }, { - "epoch": 0.22978831930629942, + "epoch": 0.3172783472885202, "grad_norm": 0.0, - "learning_rate": 1.7978707971314636e-05, - "loss": 0.9193, + "learning_rate": 1.5975124176261866e-05, + "loss": 1.09, "step": 8109 }, { - "epoch": 0.2298166567485619, + "epoch": 0.31731747398074966, "grad_norm": 0.0, - "learning_rate": 1.7978154668192858e-05, - "loss": 1.0749, + "learning_rate": 1.5974107986781036e-05, + "loss": 1.1032, "step": 8110 }, { - "epoch": 0.22984499419082433, + "epoch": 0.3173566006729791, "grad_norm": 0.0, - "learning_rate": 1.7977601297868116e-05, - "loss": 1.0402, + "learning_rate": 1.5973091701363524e-05, + "loss": 1.1805, "step": 8111 }, { - "epoch": 0.2298733316330868, + "epoch": 0.31739572736520855, "grad_norm": 0.0, - "learning_rate": 1.797704786034508e-05, - "loss": 0.9608, + "learning_rate": 1.5972075320025643e-05, + "loss": 1.1094, "step": 8112 }, { - "epoch": 0.22990166907534926, + "epoch": 0.317434854057438, "grad_norm": 0.0, - "learning_rate": 1.7976494355628406e-05, - "loss": 0.933, + "learning_rate": 1.5971058842783717e-05, + "loss": 1.1411, "step": 8113 }, { - "epoch": 0.22993000651761172, + "epoch": 0.31747398074966743, "grad_norm": 0.0, - "learning_rate": 1.797594078372276e-05, - "loss": 1.007, + "learning_rate": 1.5970042269654063e-05, + "loss": 1.1909, "step": 8114 }, { - "epoch": 0.2299583439598742, + "epoch": 0.31751310744189687, "grad_norm": 0.0, - "learning_rate": 1.79753871446328e-05, - "loss": 0.964, + "learning_rate": 1.5969025600653015e-05, + "loss": 1.1388, "step": 8115 }, { - "epoch": 0.22998668140213666, + "epoch": 0.3175522341341263, "grad_norm": 0.0, - "learning_rate": 1.7974833438363192e-05, - "loss": 0.9689, + "learning_rate": 1.5968008835796895e-05, + "loss": 1.067, "step": 8116 }, { - "epoch": 0.2300150188443991, + "epoch": 0.31759136082635575, "grad_norm": 0.0, - "learning_rate": 1.7974279664918605e-05, - "loss": 1.0108, + "learning_rate": 1.5966991975102032e-05, + "loss": 1.13, "step": 8117 }, { - "epoch": 0.23004335628666156, + "epoch": 0.3176304875185852, "grad_norm": 0.0, - "learning_rate": 1.7973725824303694e-05, - "loss": 1.1088, + "learning_rate": 1.5965975018584754e-05, + "loss": 1.0569, "step": 8118 }, { - "epoch": 0.23007169372892403, + "epoch": 0.31766961421081463, "grad_norm": 0.0, - "learning_rate": 1.7973171916523134e-05, - "loss": 1.0426, + "learning_rate": 1.5964957966261392e-05, + "loss": 1.0287, "step": 8119 }, { - "epoch": 0.2301000311711865, + "epoch": 0.3177087409030441, "grad_norm": 0.0, - "learning_rate": 1.797261794158158e-05, - "loss": 1.0277, + "learning_rate": 1.5963940818148284e-05, + "loss": 1.1504, "step": 8120 }, { - "epoch": 0.23012836861344896, + "epoch": 0.3177478675952735, "grad_norm": 0.0, - "learning_rate": 1.797206389948371e-05, - "loss": 0.9723, + "learning_rate": 1.5962923574261754e-05, + "loss": 1.1172, "step": 8121 }, { - "epoch": 0.23015670605571142, + "epoch": 0.31778699428750296, "grad_norm": 0.0, - "learning_rate": 1.7971509790234184e-05, - "loss": 1.0322, + "learning_rate": 1.596190623461815e-05, + "loss": 1.1328, "step": 8122 }, { - "epoch": 0.23018504349797386, + "epoch": 0.3178261209797324, "grad_norm": 0.0, - "learning_rate": 1.7970955613837673e-05, - "loss": 0.9982, + "learning_rate": 1.5960888799233796e-05, + "loss": 1.0591, "step": 8123 }, { - "epoch": 0.23021338094023633, + "epoch": 0.31786524767196184, "grad_norm": 0.0, - "learning_rate": 1.797040137029884e-05, - "loss": 0.9277, + "learning_rate": 1.595987126812504e-05, + "loss": 1.1805, "step": 8124 }, { - "epoch": 0.2302417183824988, + "epoch": 0.3179043743641913, "grad_norm": 0.0, - "learning_rate": 1.7969847059622355e-05, - "loss": 0.9534, + "learning_rate": 1.595885364130822e-05, + "loss": 1.0481, "step": 8125 }, { - "epoch": 0.23027005582476126, + "epoch": 0.31794350105642066, "grad_norm": 0.0, - "learning_rate": 1.796929268181289e-05, - "loss": 1.1049, + "learning_rate": 1.5957835918799674e-05, + "loss": 1.1458, "step": 8126 }, { - "epoch": 0.23029839326702373, + "epoch": 0.3179826277486501, "grad_norm": 0.0, - "learning_rate": 1.796873823687512e-05, - "loss": 0.9916, + "learning_rate": 1.5956818100615753e-05, + "loss": 1.1762, "step": 8127 }, { - "epoch": 0.2303267307092862, + "epoch": 0.31802175444087954, "grad_norm": 0.0, - "learning_rate": 1.7968183724813698e-05, - "loss": 0.9205, + "learning_rate": 1.5955800186772795e-05, + "loss": 1.1749, "step": 8128 }, { - "epoch": 0.23035506815154863, + "epoch": 0.318060881133109, "grad_norm": 0.0, - "learning_rate": 1.7967629145633312e-05, - "loss": 1.0779, + "learning_rate": 1.5954782177287153e-05, + "loss": 1.199, "step": 8129 }, { - "epoch": 0.2303834055938111, + "epoch": 0.3181000078253384, "grad_norm": 0.0, - "learning_rate": 1.796707449933863e-05, - "loss": 0.9785, + "learning_rate": 1.5953764072175167e-05, + "loss": 1.129, "step": 8130 }, { - "epoch": 0.23041174303607356, + "epoch": 0.31813913451756787, "grad_norm": 0.0, - "learning_rate": 1.7966519785934313e-05, - "loss": 1.0872, + "learning_rate": 1.595274587145319e-05, + "loss": 1.0177, "step": 8131 }, { - "epoch": 0.23044008047833603, + "epoch": 0.3181782612097973, "grad_norm": 0.0, - "learning_rate": 1.7965965005425044e-05, - "loss": 0.8696, + "learning_rate": 1.5951727575137576e-05, + "loss": 1.2217, "step": 8132 }, { - "epoch": 0.2304684179205985, + "epoch": 0.31821738790202675, "grad_norm": 0.0, - "learning_rate": 1.7965410157815496e-05, - "loss": 0.8994, + "learning_rate": 1.5950709183244676e-05, + "loss": 1.207, "step": 8133 }, { - "epoch": 0.23049675536286096, + "epoch": 0.3182565145942562, "grad_norm": 0.0, - "learning_rate": 1.796485524311034e-05, - "loss": 1.0388, + "learning_rate": 1.5949690695790837e-05, + "loss": 1.0948, "step": 8134 }, { - "epoch": 0.2305250928051234, + "epoch": 0.31829564128648563, "grad_norm": 0.0, - "learning_rate": 1.796430026131425e-05, - "loss": 0.8846, + "learning_rate": 1.5948672112792427e-05, + "loss": 1.1907, "step": 8135 }, { - "epoch": 0.23055343024738587, + "epoch": 0.31833476797871507, "grad_norm": 0.0, - "learning_rate": 1.79637452124319e-05, - "loss": 0.9569, + "learning_rate": 1.5947653434265794e-05, + "loss": 1.1042, "step": 8136 }, { - "epoch": 0.23058176768964833, + "epoch": 0.3183738946709445, "grad_norm": 0.0, - "learning_rate": 1.796319009646797e-05, - "loss": 1.007, + "learning_rate": 1.5946634660227302e-05, + "loss": 1.0826, "step": 8137 }, { - "epoch": 0.2306101051319108, + "epoch": 0.31841302136317395, "grad_norm": 0.0, - "learning_rate": 1.796263491342713e-05, - "loss": 1.0465, + "learning_rate": 1.594561579069331e-05, + "loss": 1.1602, "step": 8138 }, { - "epoch": 0.23063844257417326, + "epoch": 0.3184521480554034, "grad_norm": 0.0, - "learning_rate": 1.796207966331406e-05, - "loss": 0.9811, + "learning_rate": 1.5944596825680174e-05, + "loss": 0.9823, "step": 8139 }, { - "epoch": 0.23066678001643573, + "epoch": 0.31849127474763284, "grad_norm": 0.0, - "learning_rate": 1.7961524346133437e-05, - "loss": 0.9606, + "learning_rate": 1.5943577765204264e-05, + "loss": 0.9599, "step": 8140 }, { - "epoch": 0.23069511745869817, + "epoch": 0.3185304014398623, "grad_norm": 0.0, - "learning_rate": 1.7960968961889936e-05, - "loss": 1.027, + "learning_rate": 1.5942558609281943e-05, + "loss": 1.0436, "step": 8141 }, { - "epoch": 0.23072345490096063, + "epoch": 0.3185695281320917, "grad_norm": 0.0, - "learning_rate": 1.7960413510588242e-05, - "loss": 0.9086, + "learning_rate": 1.5941539357929577e-05, + "loss": 1.0985, "step": 8142 }, { - "epoch": 0.2307517923432231, + "epoch": 0.31860865482432116, "grad_norm": 0.0, - "learning_rate": 1.7959857992233022e-05, - "loss": 0.9443, + "learning_rate": 1.5940520011163534e-05, + "loss": 1.0115, "step": 8143 }, { - "epoch": 0.23078012978548557, + "epoch": 0.3186477815165506, "grad_norm": 0.0, - "learning_rate": 1.7959302406828967e-05, - "loss": 1.0837, + "learning_rate": 1.5939500569000185e-05, + "loss": 1.0273, "step": 8144 }, { - "epoch": 0.23080846722774803, + "epoch": 0.31868690820878004, "grad_norm": 0.0, - "learning_rate": 1.7958746754380754e-05, - "loss": 1.0769, + "learning_rate": 1.5938481031455898e-05, + "loss": 1.2299, "step": 8145 }, { - "epoch": 0.2308368046700105, + "epoch": 0.3187260349010095, "grad_norm": 0.0, - "learning_rate": 1.7958191034893057e-05, - "loss": 0.9557, + "learning_rate": 1.5937461398547048e-05, + "loss": 1.0342, "step": 8146 }, { - "epoch": 0.23086514211227294, + "epoch": 0.3187651615932389, "grad_norm": 0.0, - "learning_rate": 1.7957635248370563e-05, - "loss": 1.0588, + "learning_rate": 1.5936441670290006e-05, + "loss": 1.1154, "step": 8147 }, { - "epoch": 0.2308934795545354, + "epoch": 0.31880428828546836, "grad_norm": 0.0, - "learning_rate": 1.7957079394817954e-05, - "loss": 0.9566, + "learning_rate": 1.593542184670115e-05, + "loss": 1.1028, "step": 8148 }, { - "epoch": 0.23092181699679787, + "epoch": 0.3188434149776978, "grad_norm": 0.0, - "learning_rate": 1.795652347423991e-05, - "loss": 0.9743, + "learning_rate": 1.5934401927796858e-05, + "loss": 1.1942, "step": 8149 }, { - "epoch": 0.23095015443906033, + "epoch": 0.31888254166992724, "grad_norm": 0.0, - "learning_rate": 1.795596748664111e-05, - "loss": 0.9367, + "learning_rate": 1.5933381913593508e-05, + "loss": 1.0457, "step": 8150 }, { - "epoch": 0.2309784918813228, + "epoch": 0.3189216683621567, "grad_norm": 0.0, - "learning_rate": 1.7955411432026245e-05, - "loss": 0.9196, + "learning_rate": 1.593236180410748e-05, + "loss": 0.9611, "step": 8151 }, { - "epoch": 0.23100682932358527, + "epoch": 0.3189607950543861, "grad_norm": 0.0, - "learning_rate": 1.7954855310399997e-05, - "loss": 0.9965, + "learning_rate": 1.5931341599355153e-05, + "loss": 1.18, "step": 8152 }, { - "epoch": 0.2310351667658477, + "epoch": 0.31899992174661557, "grad_norm": 0.0, - "learning_rate": 1.795429912176705e-05, - "loss": 1.0109, + "learning_rate": 1.5930321299352912e-05, + "loss": 1.0681, "step": 8153 }, { - "epoch": 0.23106350420811017, + "epoch": 0.319039048438845, "grad_norm": 0.0, - "learning_rate": 1.7953742866132082e-05, - "loss": 0.9931, + "learning_rate": 1.592930090411714e-05, + "loss": 1.0465, "step": 8154 }, { - "epoch": 0.23109184165037264, + "epoch": 0.3190781751310744, "grad_norm": 0.0, - "learning_rate": 1.7953186543499786e-05, - "loss": 1.0494, + "learning_rate": 1.592828041366423e-05, + "loss": 1.168, "step": 8155 }, { - "epoch": 0.2311201790926351, + "epoch": 0.31911730182330383, "grad_norm": 0.0, - "learning_rate": 1.795263015387485e-05, - "loss": 1.0785, + "learning_rate": 1.5927259828010563e-05, + "loss": 1.0833, "step": 8156 }, { - "epoch": 0.23114851653489757, + "epoch": 0.3191564285155333, "grad_norm": 0.0, - "learning_rate": 1.7952073697261954e-05, - "loss": 1.0786, + "learning_rate": 1.5926239147172527e-05, + "loss": 1.0087, "step": 8157 }, { - "epoch": 0.23117685397716004, + "epoch": 0.3191955552077627, "grad_norm": 0.0, - "learning_rate": 1.795151717366579e-05, - "loss": 0.9721, + "learning_rate": 1.592521837116652e-05, + "loss": 0.972, "step": 8158 }, { - "epoch": 0.23120519141942247, + "epoch": 0.31923468189999216, "grad_norm": 0.0, - "learning_rate": 1.7950960583091045e-05, - "loss": 1.051, + "learning_rate": 1.5924197500008933e-05, + "loss": 1.0313, "step": 8159 }, { - "epoch": 0.23123352886168494, + "epoch": 0.3192738085922216, "grad_norm": 0.0, - "learning_rate": 1.79504039255424e-05, - "loss": 0.9236, + "learning_rate": 1.5923176533716156e-05, + "loss": 1.1208, "step": 8160 }, { - "epoch": 0.2312618663039474, + "epoch": 0.31931293528445104, "grad_norm": 0.0, - "learning_rate": 1.7949847201024558e-05, - "loss": 1.0464, + "learning_rate": 1.5922155472304584e-05, + "loss": 1.097, "step": 8161 }, { - "epoch": 0.23129020374620987, + "epoch": 0.3193520619766805, "grad_norm": 0.0, - "learning_rate": 1.7949290409542196e-05, - "loss": 0.946, + "learning_rate": 1.592113431579062e-05, + "loss": 1.041, "step": 8162 }, { - "epoch": 0.23131854118847234, + "epoch": 0.3193911886689099, "grad_norm": 0.0, - "learning_rate": 1.7948733551100012e-05, - "loss": 0.9413, + "learning_rate": 1.5920113064190653e-05, + "loss": 1.1291, "step": 8163 }, { - "epoch": 0.2313468786307348, + "epoch": 0.31943031536113936, "grad_norm": 0.0, - "learning_rate": 1.7948176625702692e-05, - "loss": 1.0337, + "learning_rate": 1.591909171752109e-05, + "loss": 1.0004, "step": 8164 }, { - "epoch": 0.23137521607299724, + "epoch": 0.3194694420533688, "grad_norm": 0.0, - "learning_rate": 1.794761963335493e-05, - "loss": 0.8673, + "learning_rate": 1.5918070275798333e-05, + "loss": 1.1552, "step": 8165 }, { - "epoch": 0.2314035535152597, + "epoch": 0.31950856874559824, "grad_norm": 0.0, - "learning_rate": 1.7947062574061417e-05, - "loss": 1.0046, + "learning_rate": 1.5917048739038782e-05, + "loss": 1.2408, "step": 8166 }, { - "epoch": 0.23143189095752217, + "epoch": 0.3195476954378277, "grad_norm": 0.0, - "learning_rate": 1.7946505447826843e-05, - "loss": 1.0815, + "learning_rate": 1.5916027107258846e-05, + "loss": 1.1304, "step": 8167 }, { - "epoch": 0.23146022839978464, + "epoch": 0.3195868221300571, "grad_norm": 0.0, - "learning_rate": 1.7945948254655904e-05, - "loss": 0.9492, + "learning_rate": 1.591500538047492e-05, + "loss": 1.0932, "step": 8168 }, { - "epoch": 0.2314885658420471, + "epoch": 0.31962594882228657, "grad_norm": 0.0, - "learning_rate": 1.794539099455329e-05, - "loss": 1.0931, + "learning_rate": 1.5913983558703427e-05, + "loss": 1.042, "step": 8169 }, { - "epoch": 0.23151690328430957, + "epoch": 0.319665075514516, "grad_norm": 0.0, - "learning_rate": 1.79448336675237e-05, - "loss": 1.0366, + "learning_rate": 1.5912961641960763e-05, + "loss": 1.1245, "step": 8170 }, { - "epoch": 0.231545240726572, + "epoch": 0.31970420220674545, "grad_norm": 0.0, - "learning_rate": 1.7944276273571823e-05, - "loss": 1.0958, + "learning_rate": 1.5911939630263348e-05, + "loss": 1.115, "step": 8171 }, { - "epoch": 0.23157357816883448, + "epoch": 0.3197433288989749, "grad_norm": 0.0, - "learning_rate": 1.794371881270236e-05, - "loss": 1.0722, + "learning_rate": 1.5910917523627593e-05, + "loss": 1.1585, "step": 8172 }, { - "epoch": 0.23160191561109694, + "epoch": 0.31978245559120433, "grad_norm": 0.0, - "learning_rate": 1.794316128492e-05, - "loss": 0.9442, + "learning_rate": 1.5909895322069907e-05, + "loss": 1.1878, "step": 8173 }, { - "epoch": 0.2316302530533594, + "epoch": 0.31982158228343377, "grad_norm": 0.0, - "learning_rate": 1.7942603690229447e-05, - "loss": 0.9256, + "learning_rate": 1.5908873025606703e-05, + "loss": 1.1254, "step": 8174 }, { - "epoch": 0.23165859049562187, + "epoch": 0.3198607089756632, "grad_norm": 0.0, - "learning_rate": 1.794204602863539e-05, - "loss": 0.9798, + "learning_rate": 1.590785063425441e-05, + "loss": 1.0468, "step": 8175 }, { - "epoch": 0.2316869279378843, + "epoch": 0.31989983566789265, "grad_norm": 0.0, - "learning_rate": 1.7941488300142535e-05, - "loss": 0.9356, + "learning_rate": 1.590682814802943e-05, + "loss": 1.1114, "step": 8176 }, { - "epoch": 0.23171526538014678, + "epoch": 0.3199389623601221, "grad_norm": 0.0, - "learning_rate": 1.7940930504755568e-05, - "loss": 1.0337, + "learning_rate": 1.59058055669482e-05, + "loss": 1.1471, "step": 8177 }, { - "epoch": 0.23174360282240924, + "epoch": 0.31997808905235153, "grad_norm": 0.0, - "learning_rate": 1.79403726424792e-05, - "loss": 1.0356, + "learning_rate": 1.5904782891027132e-05, + "loss": 1.1418, "step": 8178 }, { - "epoch": 0.2317719402646717, + "epoch": 0.320017215744581, "grad_norm": 0.0, - "learning_rate": 1.7939814713318123e-05, - "loss": 1.0204, + "learning_rate": 1.5903760120282645e-05, + "loss": 1.0343, "step": 8179 }, { - "epoch": 0.23180027770693418, + "epoch": 0.3200563424368104, "grad_norm": 0.0, - "learning_rate": 1.793925671727704e-05, - "loss": 1.0341, + "learning_rate": 1.590273725473117e-05, + "loss": 1.0043, "step": 8180 }, { - "epoch": 0.23182861514919664, + "epoch": 0.32009546912903986, "grad_norm": 0.0, - "learning_rate": 1.7938698654360646e-05, - "loss": 0.906, + "learning_rate": 1.5901714294389132e-05, + "loss": 0.9769, "step": 8181 }, { - "epoch": 0.23185695259145908, + "epoch": 0.3201345958212693, "grad_norm": 0.0, - "learning_rate": 1.793814052457365e-05, - "loss": 0.9808, + "learning_rate": 1.5900691239272957e-05, + "loss": 1.2689, "step": 8182 }, { - "epoch": 0.23188529003372155, + "epoch": 0.3201737225134987, "grad_norm": 0.0, - "learning_rate": 1.7937582327920745e-05, - "loss": 0.9417, + "learning_rate": 1.5899668089399078e-05, + "loss": 1.2096, "step": 8183 }, { - "epoch": 0.231913627475984, + "epoch": 0.3202128492057281, "grad_norm": 0.0, - "learning_rate": 1.7937024064406637e-05, - "loss": 1.002, + "learning_rate": 1.589864484478392e-05, + "loss": 1.1851, "step": 8184 }, { - "epoch": 0.23194196491824648, + "epoch": 0.32025197589795756, "grad_norm": 0.0, - "learning_rate": 1.793646573403603e-05, - "loss": 0.9472, + "learning_rate": 1.5897621505443915e-05, + "loss": 1.1976, "step": 8185 }, { - "epoch": 0.23197030236050895, + "epoch": 0.320291102590187, "grad_norm": 0.0, - "learning_rate": 1.793590733681362e-05, - "loss": 0.9945, + "learning_rate": 1.58965980713955e-05, + "loss": 1.0892, "step": 8186 }, { - "epoch": 0.2319986398027714, + "epoch": 0.32033022928241645, "grad_norm": 0.0, - "learning_rate": 1.793534887274412e-05, - "loss": 1.1028, + "learning_rate": 1.5895574542655113e-05, + "loss": 1.1936, "step": 8187 }, { - "epoch": 0.23202697724503385, + "epoch": 0.3203693559746459, "grad_norm": 0.0, - "learning_rate": 1.793479034183223e-05, - "loss": 0.9301, + "learning_rate": 1.589455091923918e-05, + "loss": 1.0251, "step": 8188 }, { - "epoch": 0.23205531468729632, + "epoch": 0.3204084826668753, "grad_norm": 0.0, - "learning_rate": 1.7934231744082652e-05, - "loss": 1.1094, + "learning_rate": 1.5893527201164152e-05, + "loss": 1.1156, "step": 8189 }, { - "epoch": 0.23208365212955878, + "epoch": 0.32044760935910477, "grad_norm": 0.0, - "learning_rate": 1.7933673079500097e-05, - "loss": 1.0428, + "learning_rate": 1.5892503388446456e-05, + "loss": 1.0981, "step": 8190 }, { - "epoch": 0.23211198957182125, + "epoch": 0.3204867360513342, "grad_norm": 0.0, - "learning_rate": 1.793311434808926e-05, - "loss": 1.2004, + "learning_rate": 1.5891479481102545e-05, + "loss": 1.2075, "step": 8191 }, { - "epoch": 0.2321403270140837, + "epoch": 0.32052586274356365, "grad_norm": 0.0, - "learning_rate": 1.7932555549854862e-05, - "loss": 0.95, + "learning_rate": 1.5890455479148852e-05, + "loss": 1.1106, "step": 8192 }, { - "epoch": 0.23216866445634618, + "epoch": 0.3205649894357931, "grad_norm": 0.0, - "learning_rate": 1.7931996684801604e-05, - "loss": 0.9839, + "learning_rate": 1.5889431382601827e-05, + "loss": 1.1178, "step": 8193 }, { - "epoch": 0.23219700189860862, + "epoch": 0.32060411612802253, "grad_norm": 0.0, - "learning_rate": 1.7931437752934187e-05, - "loss": 0.97, + "learning_rate": 1.588840719147791e-05, + "loss": 1.1556, "step": 8194 }, { - "epoch": 0.23222533934087108, + "epoch": 0.320643242820252, "grad_norm": 0.0, - "learning_rate": 1.7930878754257328e-05, - "loss": 0.886, + "learning_rate": 1.5887382905793555e-05, + "loss": 0.9509, "step": 8195 }, { - "epoch": 0.23225367678313355, + "epoch": 0.3206823695124814, "grad_norm": 0.0, - "learning_rate": 1.793031968877573e-05, - "loss": 0.9714, + "learning_rate": 1.588635852556521e-05, + "loss": 1.2617, "step": 8196 }, { - "epoch": 0.23228201422539602, + "epoch": 0.32072149620471085, "grad_norm": 0.0, - "learning_rate": 1.7929760556494107e-05, - "loss": 0.8426, + "learning_rate": 1.588533405080932e-05, + "loss": 1.0393, "step": 8197 }, { - "epoch": 0.23231035166765848, + "epoch": 0.3207606228969403, "grad_norm": 0.0, - "learning_rate": 1.7929201357417164e-05, - "loss": 1.038, + "learning_rate": 1.588430948154234e-05, + "loss": 1.1387, "step": 8198 }, { - "epoch": 0.23233868910992095, + "epoch": 0.32079974958916974, "grad_norm": 0.0, - "learning_rate": 1.7928642091549616e-05, - "loss": 1.0071, + "learning_rate": 1.5883284817780726e-05, + "loss": 1.1438, "step": 8199 }, { - "epoch": 0.2323670265521834, + "epoch": 0.3208388762813992, "grad_norm": 0.0, - "learning_rate": 1.792808275889617e-05, - "loss": 0.9412, + "learning_rate": 1.5882260059540927e-05, + "loss": 1.1049, "step": 8200 }, { - "epoch": 0.23239536399444585, + "epoch": 0.3208780029736286, "grad_norm": 0.0, - "learning_rate": 1.792752335946154e-05, - "loss": 1.0456, + "learning_rate": 1.58812352068394e-05, + "loss": 1.0122, "step": 8201 }, { - "epoch": 0.23242370143670832, + "epoch": 0.32091712966585806, "grad_norm": 0.0, - "learning_rate": 1.7926963893250434e-05, - "loss": 1.0511, + "learning_rate": 1.588021025969261e-05, + "loss": 1.0461, "step": 8202 }, { - "epoch": 0.23245203887897078, + "epoch": 0.3209562563580875, "grad_norm": 0.0, - "learning_rate": 1.7926404360267567e-05, - "loss": 1.0986, + "learning_rate": 1.5879185218117012e-05, + "loss": 1.0455, "step": 8203 }, { - "epoch": 0.23248037632123325, + "epoch": 0.32099538305031694, "grad_norm": 0.0, - "learning_rate": 1.7925844760517657e-05, - "loss": 0.8525, + "learning_rate": 1.5878160082129064e-05, + "loss": 0.9386, "step": 8204 }, { - "epoch": 0.23250871376349572, + "epoch": 0.3210345097425464, "grad_norm": 0.0, - "learning_rate": 1.7925285094005412e-05, - "loss": 0.9513, + "learning_rate": 1.587713485174523e-05, + "loss": 1.1075, "step": 8205 }, { - "epoch": 0.23253705120575816, + "epoch": 0.3210736364347758, "grad_norm": 0.0, - "learning_rate": 1.7924725360735547e-05, - "loss": 0.9374, + "learning_rate": 1.5876109526981975e-05, + "loss": 1.1345, "step": 8206 }, { - "epoch": 0.23256538864802062, + "epoch": 0.32111276312700526, "grad_norm": 0.0, - "learning_rate": 1.7924165560712776e-05, - "loss": 1.0282, + "learning_rate": 1.5875084107855767e-05, + "loss": 1.0644, "step": 8207 }, { - "epoch": 0.2325937260902831, + "epoch": 0.3211518898192347, "grad_norm": 0.0, - "learning_rate": 1.7923605693941818e-05, - "loss": 1.0059, + "learning_rate": 1.587405859438307e-05, + "loss": 1.1321, "step": 8208 }, { - "epoch": 0.23262206353254555, + "epoch": 0.32119101651146414, "grad_norm": 0.0, - "learning_rate": 1.7923045760427387e-05, - "loss": 1.0399, + "learning_rate": 1.587303298658035e-05, + "loss": 1.3369, "step": 8209 }, { - "epoch": 0.23265040097480802, + "epoch": 0.3212301432036936, "grad_norm": 0.0, - "learning_rate": 1.7922485760174197e-05, - "loss": 0.9394, + "learning_rate": 1.5872007284464078e-05, + "loss": 1.123, "step": 8210 }, { - "epoch": 0.23267873841707049, + "epoch": 0.321269269895923, "grad_norm": 0.0, - "learning_rate": 1.792192569318697e-05, - "loss": 0.8667, + "learning_rate": 1.587098148805073e-05, + "loss": 1.1627, "step": 8211 }, { - "epoch": 0.23270707585933292, + "epoch": 0.3213083965881524, "grad_norm": 0.0, - "learning_rate": 1.792136555947042e-05, - "loss": 0.9454, + "learning_rate": 1.5869955597356778e-05, + "loss": 1.1738, "step": 8212 }, { - "epoch": 0.2327354133015954, + "epoch": 0.32134752328038185, "grad_norm": 0.0, - "learning_rate": 1.7920805359029267e-05, - "loss": 0.9552, + "learning_rate": 1.586892961239869e-05, + "loss": 1.0861, "step": 8213 }, { - "epoch": 0.23276375074385786, + "epoch": 0.3213866499726113, "grad_norm": 0.0, - "learning_rate": 1.792024509186823e-05, - "loss": 0.9088, + "learning_rate": 1.586790353319295e-05, + "loss": 1.1589, "step": 8214 }, { - "epoch": 0.23279208818612032, + "epoch": 0.32142577666484073, "grad_norm": 0.0, - "learning_rate": 1.7919684757992027e-05, - "loss": 1.0208, + "learning_rate": 1.586687735975603e-05, + "loss": 1.0245, "step": 8215 }, { - "epoch": 0.2328204256283828, + "epoch": 0.3214649033570702, "grad_norm": 0.0, - "learning_rate": 1.7919124357405374e-05, - "loss": 0.9677, + "learning_rate": 1.5865851092104414e-05, + "loss": 1.0749, "step": 8216 }, { - "epoch": 0.23284876307064525, + "epoch": 0.3215040300492996, "grad_norm": 0.0, - "learning_rate": 1.7918563890113003e-05, - "loss": 0.9756, + "learning_rate": 1.5864824730254578e-05, + "loss": 1.0154, "step": 8217 }, { - "epoch": 0.2328771005129077, + "epoch": 0.32154315674152906, "grad_norm": 0.0, - "learning_rate": 1.7918003356119622e-05, - "loss": 1.0596, + "learning_rate": 1.5863798274223007e-05, + "loss": 1.079, "step": 8218 }, { - "epoch": 0.23290543795517016, + "epoch": 0.3215822834337585, "grad_norm": 0.0, - "learning_rate": 1.791744275542996e-05, - "loss": 0.925, + "learning_rate": 1.586277172402618e-05, + "loss": 1.0251, "step": 8219 }, { - "epoch": 0.23293377539743262, + "epoch": 0.32162141012598794, "grad_norm": 0.0, - "learning_rate": 1.791688208804874e-05, - "loss": 1.0092, + "learning_rate": 1.586174507968059e-05, + "loss": 1.1012, "step": 8220 }, { - "epoch": 0.2329621128396951, + "epoch": 0.3216605368182174, "grad_norm": 0.0, - "learning_rate": 1.791632135398068e-05, - "loss": 1.0729, + "learning_rate": 1.5860718341202714e-05, + "loss": 1.0266, "step": 8221 }, { - "epoch": 0.23299045028195756, + "epoch": 0.3216996635104468, "grad_norm": 0.0, - "learning_rate": 1.791576055323051e-05, - "loss": 0.8632, + "learning_rate": 1.585969150860905e-05, + "loss": 1.018, "step": 8222 }, { - "epoch": 0.23301878772422002, + "epoch": 0.32173879020267626, "grad_norm": 0.0, - "learning_rate": 1.7915199685802944e-05, - "loss": 0.9654, + "learning_rate": 1.5858664581916083e-05, + "loss": 1.1169, "step": 8223 }, { - "epoch": 0.23304712516648246, + "epoch": 0.3217779168949057, "grad_norm": 0.0, - "learning_rate": 1.7914638751702713e-05, - "loss": 1.0712, + "learning_rate": 1.58576375611403e-05, + "loss": 1.0357, "step": 8224 }, { - "epoch": 0.23307546260874493, + "epoch": 0.32181704358713514, "grad_norm": 0.0, - "learning_rate": 1.791407775093454e-05, - "loss": 1.0128, + "learning_rate": 1.5856610446298198e-05, + "loss": 0.9878, "step": 8225 }, { - "epoch": 0.2331038000510074, + "epoch": 0.3218561702793646, "grad_norm": 0.0, - "learning_rate": 1.7913516683503155e-05, - "loss": 0.9541, + "learning_rate": 1.5855583237406277e-05, + "loss": 1.0431, "step": 8226 }, { - "epoch": 0.23313213749326986, + "epoch": 0.321895296971594, "grad_norm": 0.0, - "learning_rate": 1.7912955549413274e-05, - "loss": 1.1517, + "learning_rate": 1.585455593448102e-05, + "loss": 1.1059, "step": 8227 }, { - "epoch": 0.23316047493553232, + "epoch": 0.32193442366382347, "grad_norm": 0.0, - "learning_rate": 1.791239434866964e-05, - "loss": 0.9799, + "learning_rate": 1.5853528537538933e-05, + "loss": 1.0688, "step": 8228 }, { - "epoch": 0.2331888123777948, + "epoch": 0.3219735503560529, "grad_norm": 0.0, - "learning_rate": 1.7911833081276962e-05, - "loss": 1.0166, + "learning_rate": 1.5852501046596516e-05, + "loss": 1.1892, "step": 8229 }, { - "epoch": 0.23321714982005723, + "epoch": 0.32201267704828235, "grad_norm": 0.0, - "learning_rate": 1.7911271747239977e-05, - "loss": 0.9858, + "learning_rate": 1.5851473461670265e-05, + "loss": 1.1468, "step": 8230 }, { - "epoch": 0.2332454872623197, + "epoch": 0.3220518037405118, "grad_norm": 0.0, - "learning_rate": 1.7910710346563417e-05, - "loss": 1.0447, + "learning_rate": 1.585044578277668e-05, + "loss": 1.1038, "step": 8231 }, { - "epoch": 0.23327382470458216, + "epoch": 0.32209093043274123, "grad_norm": 0.0, - "learning_rate": 1.7910148879251998e-05, - "loss": 1.2229, + "learning_rate": 1.5849418009932265e-05, + "loss": 1.1011, "step": 8232 }, { - "epoch": 0.23330216214684463, + "epoch": 0.32213005712497067, "grad_norm": 0.0, - "learning_rate": 1.7909587345310464e-05, - "loss": 0.943, + "learning_rate": 1.584839014315353e-05, + "loss": 1.1212, "step": 8233 }, { - "epoch": 0.2333304995891071, + "epoch": 0.3221691838172001, "grad_norm": 0.0, - "learning_rate": 1.7909025744743537e-05, - "loss": 1.0169, + "learning_rate": 1.5847362182456975e-05, + "loss": 1.1039, "step": 8234 }, { - "epoch": 0.23335883703136956, + "epoch": 0.32220831050942955, "grad_norm": 0.0, - "learning_rate": 1.790846407755595e-05, - "loss": 1.017, + "learning_rate": 1.5846334127859113e-05, + "loss": 1.1812, "step": 8235 }, { - "epoch": 0.233387174473632, + "epoch": 0.322247437201659, "grad_norm": 0.0, - "learning_rate": 1.7907902343752432e-05, - "loss": 1.039, + "learning_rate": 1.584530597937645e-05, + "loss": 1.0276, "step": 8236 }, { - "epoch": 0.23341551191589446, + "epoch": 0.32228656389388843, "grad_norm": 0.0, - "learning_rate": 1.7907340543337714e-05, - "loss": 1.063, + "learning_rate": 1.5844277737025496e-05, + "loss": 1.16, "step": 8237 }, { - "epoch": 0.23344384935815693, + "epoch": 0.3223256905861179, "grad_norm": 0.0, - "learning_rate": 1.7906778676316536e-05, - "loss": 0.9122, + "learning_rate": 1.5843249400822765e-05, + "loss": 1.1411, "step": 8238 }, { - "epoch": 0.2334721868004194, + "epoch": 0.3223648172783473, "grad_norm": 0.0, - "learning_rate": 1.790621674269362e-05, - "loss": 0.9619, + "learning_rate": 1.5842220970784773e-05, + "loss": 1.1177, "step": 8239 }, { - "epoch": 0.23350052424268186, + "epoch": 0.3224039439705767, "grad_norm": 0.0, - "learning_rate": 1.7905654742473707e-05, - "loss": 1.0486, + "learning_rate": 1.5841192446928032e-05, + "loss": 0.9876, "step": 8240 }, { - "epoch": 0.23352886168494433, + "epoch": 0.32244307066280614, "grad_norm": 0.0, - "learning_rate": 1.7905092675661526e-05, - "loss": 0.9819, + "learning_rate": 1.584016382926906e-05, + "loss": 1.2944, "step": 8241 }, { - "epoch": 0.23355719912720677, + "epoch": 0.3224821973550356, "grad_norm": 0.0, - "learning_rate": 1.790453054226182e-05, - "loss": 1.043, + "learning_rate": 1.5839135117824375e-05, + "loss": 1.1553, "step": 8242 }, { - "epoch": 0.23358553656946923, + "epoch": 0.322521324047265, "grad_norm": 0.0, - "learning_rate": 1.790396834227931e-05, - "loss": 1.0862, + "learning_rate": 1.5838106312610496e-05, + "loss": 1.1535, "step": 8243 }, { - "epoch": 0.2336138740117317, + "epoch": 0.32256045073949446, "grad_norm": 0.0, - "learning_rate": 1.7903406075718744e-05, - "loss": 0.9327, + "learning_rate": 1.5837077413643947e-05, + "loss": 1.015, "step": 8244 }, { - "epoch": 0.23364221145399416, + "epoch": 0.3225995774317239, "grad_norm": 0.0, - "learning_rate": 1.7902843742584855e-05, - "loss": 0.8341, + "learning_rate": 1.5836048420941246e-05, + "loss": 1.0398, "step": 8245 }, { - "epoch": 0.23367054889625663, + "epoch": 0.32263870412395335, "grad_norm": 0.0, - "learning_rate": 1.7902281342882374e-05, - "loss": 1.1031, + "learning_rate": 1.583501933451892e-05, + "loss": 1.0978, "step": 8246 }, { - "epoch": 0.2336988863385191, + "epoch": 0.3226778308161828, "grad_norm": 0.0, - "learning_rate": 1.7901718876616048e-05, - "loss": 1.0213, + "learning_rate": 1.58339901543935e-05, + "loss": 1.142, "step": 8247 }, { - "epoch": 0.23372722378078153, + "epoch": 0.3227169575084122, "grad_norm": 0.0, - "learning_rate": 1.7901156343790606e-05, - "loss": 0.8977, + "learning_rate": 1.5832960880581506e-05, + "loss": 1.164, "step": 8248 }, { - "epoch": 0.233755561223044, + "epoch": 0.32275608420064167, "grad_norm": 0.0, - "learning_rate": 1.790059374441079e-05, - "loss": 0.8993, + "learning_rate": 1.583193151309947e-05, + "loss": 1.1168, "step": 8249 }, { - "epoch": 0.23378389866530647, + "epoch": 0.3227952108928711, "grad_norm": 0.0, - "learning_rate": 1.7900031078481343e-05, - "loss": 0.9408, + "learning_rate": 1.583090205196392e-05, + "loss": 1.1092, "step": 8250 }, { - "epoch": 0.23381223610756893, + "epoch": 0.32283433758510055, "grad_norm": 0.0, - "learning_rate": 1.7899468346006995e-05, - "loss": 1.0297, + "learning_rate": 1.5829872497191388e-05, + "loss": 0.987, "step": 8251 }, { - "epoch": 0.2338405735498314, + "epoch": 0.32287346427733, "grad_norm": 0.0, - "learning_rate": 1.7898905546992494e-05, - "loss": 0.8784, + "learning_rate": 1.5828842848798413e-05, + "loss": 1.164, "step": 8252 }, { - "epoch": 0.23386891099209386, + "epoch": 0.32291259096955943, "grad_norm": 0.0, - "learning_rate": 1.789834268144258e-05, - "loss": 0.9341, + "learning_rate": 1.5827813106801524e-05, + "loss": 1.2147, "step": 8253 }, { - "epoch": 0.2338972484343563, + "epoch": 0.3229517176617889, "grad_norm": 0.0, - "learning_rate": 1.789777974936199e-05, - "loss": 1.0062, + "learning_rate": 1.582678327121726e-05, + "loss": 1.0036, "step": 8254 }, { - "epoch": 0.23392558587661877, + "epoch": 0.3229908443540183, "grad_norm": 0.0, - "learning_rate": 1.789721675075547e-05, - "loss": 0.9545, + "learning_rate": 1.5825753342062155e-05, + "loss": 1.0466, "step": 8255 }, { - "epoch": 0.23395392331888124, + "epoch": 0.32302997104624775, "grad_norm": 0.0, - "learning_rate": 1.7896653685627762e-05, - "loss": 0.9163, + "learning_rate": 1.5824723319352754e-05, + "loss": 1.1137, "step": 8256 }, { - "epoch": 0.2339822607611437, + "epoch": 0.3230690977384772, "grad_norm": 0.0, - "learning_rate": 1.7896090553983606e-05, - "loss": 1.0483, + "learning_rate": 1.5823693203105595e-05, + "loss": 1.1357, "step": 8257 }, { - "epoch": 0.23401059820340617, + "epoch": 0.32310822443070664, "grad_norm": 0.0, - "learning_rate": 1.789552735582775e-05, - "loss": 1.0419, + "learning_rate": 1.582266299333722e-05, + "loss": 1.16, "step": 8258 }, { - "epoch": 0.23403893564566863, + "epoch": 0.3231473511229361, "grad_norm": 0.0, - "learning_rate": 1.7894964091164932e-05, - "loss": 0.9552, + "learning_rate": 1.5821632690064175e-05, + "loss": 1.1871, "step": 8259 }, { - "epoch": 0.23406727308793107, + "epoch": 0.3231864778151655, "grad_norm": 0.0, - "learning_rate": 1.7894400759999898e-05, - "loss": 0.9401, + "learning_rate": 1.5820602293303004e-05, + "loss": 0.9099, "step": 8260 }, { - "epoch": 0.23409561053019354, + "epoch": 0.32322560450739496, "grad_norm": 0.0, - "learning_rate": 1.7893837362337397e-05, - "loss": 0.9994, + "learning_rate": 1.581957180307025e-05, + "loss": 1.0651, "step": 8261 }, { - "epoch": 0.234123947972456, + "epoch": 0.3232647311996244, "grad_norm": 0.0, - "learning_rate": 1.7893273898182177e-05, - "loss": 0.945, + "learning_rate": 1.5818541219382472e-05, + "loss": 1.0807, "step": 8262 }, { - "epoch": 0.23415228541471847, + "epoch": 0.32330385789185384, "grad_norm": 0.0, - "learning_rate": 1.7892710367538973e-05, - "loss": 1.0057, + "learning_rate": 1.5817510542256208e-05, + "loss": 1.1684, "step": 8263 }, { - "epoch": 0.23418062285698094, + "epoch": 0.3233429845840833, "grad_norm": 0.0, - "learning_rate": 1.7892146770412543e-05, - "loss": 0.8438, + "learning_rate": 1.5816479771708014e-05, + "loss": 1.1591, "step": 8264 }, { - "epoch": 0.2342089602992434, + "epoch": 0.3233821112763127, "grad_norm": 0.0, - "learning_rate": 1.7891583106807626e-05, - "loss": 0.9108, + "learning_rate": 1.5815448907754448e-05, + "loss": 1.0497, "step": 8265 }, { - "epoch": 0.23423729774150584, + "epoch": 0.32342123796854216, "grad_norm": 0.0, - "learning_rate": 1.7891019376728976e-05, - "loss": 0.9562, + "learning_rate": 1.5814417950412053e-05, + "loss": 1.1922, "step": 8266 }, { - "epoch": 0.2342656351837683, + "epoch": 0.3234603646607716, "grad_norm": 0.0, - "learning_rate": 1.789045558018134e-05, - "loss": 1.1026, + "learning_rate": 1.5813386899697395e-05, + "loss": 1.11, "step": 8267 }, { - "epoch": 0.23429397262603077, + "epoch": 0.323499491353001, "grad_norm": 0.0, - "learning_rate": 1.7889891717169466e-05, - "loss": 0.9608, + "learning_rate": 1.5812355755627028e-05, + "loss": 1.0959, "step": 8268 }, { - "epoch": 0.23432231006829324, + "epoch": 0.32353861804523043, "grad_norm": 0.0, - "learning_rate": 1.7889327787698105e-05, - "loss": 1.0464, + "learning_rate": 1.5811324518217513e-05, + "loss": 1.0095, "step": 8269 }, { - "epoch": 0.2343506475105557, + "epoch": 0.32357774473745987, "grad_norm": 0.0, - "learning_rate": 1.7888763791772006e-05, - "loss": 1.1124, + "learning_rate": 1.5810293187485407e-05, + "loss": 1.1328, "step": 8270 }, { - "epoch": 0.23437898495281817, + "epoch": 0.3236168714296893, "grad_norm": 0.0, - "learning_rate": 1.7888199729395924e-05, - "loss": 1.0165, + "learning_rate": 1.580926176344727e-05, + "loss": 1.0482, "step": 8271 }, { - "epoch": 0.2344073223950806, + "epoch": 0.32365599812191875, "grad_norm": 0.0, - "learning_rate": 1.78876356005746e-05, - "loss": 0.9479, + "learning_rate": 1.580823024611967e-05, + "loss": 1.0085, "step": 8272 }, { - "epoch": 0.23443565983734307, + "epoch": 0.3236951248141482, "grad_norm": 0.0, - "learning_rate": 1.7887071405312798e-05, - "loss": 0.9062, + "learning_rate": 1.5807198635519172e-05, + "loss": 0.9737, "step": 8273 }, { - "epoch": 0.23446399727960554, + "epoch": 0.32373425150637763, "grad_norm": 0.0, - "learning_rate": 1.788650714361526e-05, - "loss": 0.8811, + "learning_rate": 1.5806166931662338e-05, + "loss": 1.1042, "step": 8274 }, { - "epoch": 0.234492334721868, + "epoch": 0.3237733781986071, "grad_norm": 0.0, - "learning_rate": 1.7885942815486746e-05, - "loss": 1.0134, + "learning_rate": 1.580513513456574e-05, + "loss": 1.1328, "step": 8275 }, { - "epoch": 0.23452067216413047, + "epoch": 0.3238125048908365, "grad_norm": 0.0, - "learning_rate": 1.7885378420932006e-05, - "loss": 0.883, + "learning_rate": 1.580410324424595e-05, + "loss": 1.0304, "step": 8276 }, { - "epoch": 0.23454900960639294, + "epoch": 0.32385163158306596, "grad_norm": 0.0, - "learning_rate": 1.7884813959955796e-05, - "loss": 1.02, + "learning_rate": 1.5803071260719528e-05, + "loss": 1.1844, "step": 8277 }, { - "epoch": 0.23457734704865538, + "epoch": 0.3238907582752954, "grad_norm": 0.0, - "learning_rate": 1.788424943256287e-05, - "loss": 0.869, + "learning_rate": 1.580203918400306e-05, + "loss": 1.0766, "step": 8278 }, { - "epoch": 0.23460568449091784, + "epoch": 0.32392988496752484, "grad_norm": 0.0, - "learning_rate": 1.7883684838757983e-05, - "loss": 0.9163, + "learning_rate": 1.5801007014113106e-05, + "loss": 0.9823, "step": 8279 }, { - "epoch": 0.2346340219331803, + "epoch": 0.3239690116597543, "grad_norm": 0.0, - "learning_rate": 1.7883120178545895e-05, - "loss": 0.8725, + "learning_rate": 1.5799974751066252e-05, + "loss": 1.1364, "step": 8280 }, { - "epoch": 0.23466235937544278, + "epoch": 0.3240081383519837, "grad_norm": 0.0, - "learning_rate": 1.7882555451931353e-05, - "loss": 1.0162, + "learning_rate": 1.5798942394879073e-05, + "loss": 1.2115, "step": 8281 }, { - "epoch": 0.23469069681770524, + "epoch": 0.32404726504421316, "grad_norm": 0.0, - "learning_rate": 1.7881990658919122e-05, - "loss": 1.0114, + "learning_rate": 1.5797909945568146e-05, + "loss": 1.0315, "step": 8282 }, { - "epoch": 0.2347190342599677, + "epoch": 0.3240863917364426, "grad_norm": 0.0, - "learning_rate": 1.7881425799513955e-05, - "loss": 0.9451, + "learning_rate": 1.579687740315005e-05, + "loss": 1.1663, "step": 8283 }, { - "epoch": 0.23474737170223015, + "epoch": 0.32412551842867204, "grad_norm": 0.0, - "learning_rate": 1.7880860873720615e-05, - "loss": 0.9896, + "learning_rate": 1.579584476764136e-05, + "loss": 1.1031, "step": 8284 }, { - "epoch": 0.2347757091444926, + "epoch": 0.3241646451209015, "grad_norm": 0.0, - "learning_rate": 1.7880295881543856e-05, - "loss": 1.1351, + "learning_rate": 1.5794812039058674e-05, + "loss": 1.0292, "step": 8285 }, { - "epoch": 0.23480404658675508, + "epoch": 0.3242037718131309, "grad_norm": 0.0, - "learning_rate": 1.787973082298844e-05, - "loss": 0.9756, + "learning_rate": 1.5793779217418562e-05, + "loss": 1.1381, "step": 8286 }, { - "epoch": 0.23483238402901754, + "epoch": 0.32424289850536037, "grad_norm": 0.0, - "learning_rate": 1.787916569805912e-05, - "loss": 0.92, + "learning_rate": 1.579274630273762e-05, + "loss": 1.2523, "step": 8287 }, { - "epoch": 0.23486072147128, + "epoch": 0.3242820251975898, "grad_norm": 0.0, - "learning_rate": 1.7878600506760665e-05, - "loss": 0.9412, + "learning_rate": 1.579171329503243e-05, + "loss": 1.1212, "step": 8288 }, { - "epoch": 0.23488905891354248, + "epoch": 0.32432115188981925, "grad_norm": 0.0, - "learning_rate": 1.787803524909783e-05, - "loss": 0.9571, + "learning_rate": 1.579068019431958e-05, + "loss": 1.1093, "step": 8289 }, { - "epoch": 0.2349173963558049, + "epoch": 0.3243602785820487, "grad_norm": 0.0, - "learning_rate": 1.7877469925075383e-05, - "loss": 1.0575, + "learning_rate": 1.5789647000615665e-05, + "loss": 1.0723, "step": 8290 }, { - "epoch": 0.23494573379806738, + "epoch": 0.32439940527427813, "grad_norm": 0.0, - "learning_rate": 1.7876904534698082e-05, - "loss": 1.0167, + "learning_rate": 1.5788613713937273e-05, + "loss": 1.1087, "step": 8291 }, { - "epoch": 0.23497407124032985, + "epoch": 0.32443853196650757, "grad_norm": 0.0, - "learning_rate": 1.7876339077970684e-05, - "loss": 0.9666, + "learning_rate": 1.5787580334300997e-05, + "loss": 1.1776, "step": 8292 }, { - "epoch": 0.2350024086825923, + "epoch": 0.324477658658737, "grad_norm": 0.0, - "learning_rate": 1.787577355489796e-05, - "loss": 0.9379, + "learning_rate": 1.5786546861723434e-05, + "loss": 1.1179, "step": 8293 }, { - "epoch": 0.23503074612485478, + "epoch": 0.32451678535096645, "grad_norm": 0.0, - "learning_rate": 1.787520796548467e-05, - "loss": 1.0452, + "learning_rate": 1.578551329622118e-05, + "loss": 1.2154, "step": 8294 }, { - "epoch": 0.23505908356711724, + "epoch": 0.3245559120431959, "grad_norm": 0.0, - "learning_rate": 1.7874642309735576e-05, - "loss": 1.0709, + "learning_rate": 1.5784479637810832e-05, + "loss": 1.1607, "step": 8295 }, { - "epoch": 0.23508742100937968, + "epoch": 0.32459503873542533, "grad_norm": 0.0, - "learning_rate": 1.787407658765545e-05, - "loss": 0.9528, + "learning_rate": 1.5783445886508987e-05, + "loss": 1.1527, "step": 8296 }, { - "epoch": 0.23511575845164215, + "epoch": 0.3246341654276547, "grad_norm": 0.0, - "learning_rate": 1.7873510799249052e-05, - "loss": 0.9015, + "learning_rate": 1.578241204233225e-05, + "loss": 1.1062, "step": 8297 }, { - "epoch": 0.23514409589390461, + "epoch": 0.32467329211988416, "grad_norm": 0.0, - "learning_rate": 1.787294494452115e-05, - "loss": 0.9666, + "learning_rate": 1.5781378105297225e-05, + "loss": 1.2365, "step": 8298 }, { - "epoch": 0.23517243333616708, + "epoch": 0.3247124188121136, "grad_norm": 0.0, - "learning_rate": 1.787237902347651e-05, - "loss": 1.119, + "learning_rate": 1.5780344075420507e-05, + "loss": 1.0874, "step": 8299 }, { - "epoch": 0.23520077077842955, + "epoch": 0.32475154550434304, "grad_norm": 0.0, - "learning_rate": 1.7871813036119893e-05, - "loss": 1.0185, + "learning_rate": 1.5779309952718706e-05, + "loss": 1.0651, "step": 8300 }, { - "epoch": 0.235229108220692, + "epoch": 0.3247906721965725, "grad_norm": 0.0, - "learning_rate": 1.7871246982456075e-05, - "loss": 1.0765, + "learning_rate": 1.577827573720843e-05, + "loss": 1.152, "step": 8301 }, { - "epoch": 0.23525744566295445, + "epoch": 0.3248297988888019, "grad_norm": 0.0, - "learning_rate": 1.787068086248982e-05, - "loss": 0.9918, + "learning_rate": 1.577724142890629e-05, + "loss": 1.1099, "step": 8302 }, { - "epoch": 0.23528578310521692, + "epoch": 0.32486892558103136, "grad_norm": 0.0, - "learning_rate": 1.78701146762259e-05, - "loss": 0.8581, + "learning_rate": 1.577620702782889e-05, + "loss": 1.1133, "step": 8303 }, { - "epoch": 0.23531412054747938, + "epoch": 0.3249080522732608, "grad_norm": 0.0, - "learning_rate": 1.7869548423669075e-05, - "loss": 0.9724, + "learning_rate": 1.577517253399284e-05, + "loss": 1.0048, "step": 8304 }, { - "epoch": 0.23534245798974185, + "epoch": 0.32494717896549025, "grad_norm": 0.0, - "learning_rate": 1.7868982104824127e-05, - "loss": 0.981, + "learning_rate": 1.5774137947414757e-05, + "loss": 1.1121, "step": 8305 }, { - "epoch": 0.23537079543200431, + "epoch": 0.3249863056577197, "grad_norm": 0.0, - "learning_rate": 1.786841571969582e-05, - "loss": 1.0252, + "learning_rate": 1.5773103268111255e-05, + "loss": 1.1048, "step": 8306 }, { - "epoch": 0.23539913287426678, + "epoch": 0.32502543234994913, "grad_norm": 0.0, - "learning_rate": 1.7867849268288924e-05, - "loss": 1.0493, + "learning_rate": 1.577206849609895e-05, + "loss": 1.0099, "step": 8307 }, { - "epoch": 0.23542747031652922, + "epoch": 0.32506455904217857, "grad_norm": 0.0, - "learning_rate": 1.7867282750608212e-05, - "loss": 0.9641, + "learning_rate": 1.5771033631394455e-05, + "loss": 1.1194, "step": 8308 }, { - "epoch": 0.23545580775879169, + "epoch": 0.325103685734408, "grad_norm": 0.0, - "learning_rate": 1.786671616665846e-05, - "loss": 1.0369, + "learning_rate": 1.5769998674014393e-05, + "loss": 1.2639, "step": 8309 }, { - "epoch": 0.23548414520105415, + "epoch": 0.32514281242663745, "grad_norm": 0.0, - "learning_rate": 1.786614951644443e-05, - "loss": 1.0096, + "learning_rate": 1.5768963623975386e-05, + "loss": 1.1326, "step": 8310 }, { - "epoch": 0.23551248264331662, + "epoch": 0.3251819391188669, "grad_norm": 0.0, - "learning_rate": 1.7865582799970904e-05, - "loss": 0.9304, + "learning_rate": 1.5767928481294046e-05, + "loss": 1.1096, "step": 8311 }, { - "epoch": 0.23554082008557908, + "epoch": 0.32522106581109633, "grad_norm": 0.0, - "learning_rate": 1.7865016017242656e-05, - "loss": 1.044, + "learning_rate": 1.5766893245987005e-05, + "loss": 1.1833, "step": 8312 }, { - "epoch": 0.23556915752784155, + "epoch": 0.3252601925033258, "grad_norm": 0.0, - "learning_rate": 1.7864449168264456e-05, - "loss": 1.1052, + "learning_rate": 1.5765857918070883e-05, + "loss": 1.2768, "step": 8313 }, { - "epoch": 0.235597494970104, + "epoch": 0.3252993191955552, "grad_norm": 0.0, - "learning_rate": 1.786388225304108e-05, - "loss": 1.0865, + "learning_rate": 1.576482249756231e-05, + "loss": 1.0384, "step": 8314 }, { - "epoch": 0.23562583241236645, + "epoch": 0.32533844588778466, "grad_norm": 0.0, - "learning_rate": 1.7863315271577303e-05, - "loss": 1.0193, + "learning_rate": 1.576378698447791e-05, + "loss": 1.1293, "step": 8315 }, { - "epoch": 0.23565416985462892, + "epoch": 0.3253775725800141, "grad_norm": 0.0, - "learning_rate": 1.78627482238779e-05, - "loss": 0.9313, + "learning_rate": 1.5762751378834314e-05, + "loss": 1.0543, "step": 8316 }, { - "epoch": 0.23568250729689139, + "epoch": 0.32541669927224354, "grad_norm": 0.0, - "learning_rate": 1.7862181109947653e-05, - "loss": 0.9684, + "learning_rate": 1.5761715680648154e-05, + "loss": 1.1655, "step": 8317 }, { - "epoch": 0.23571084473915385, + "epoch": 0.325455825964473, "grad_norm": 0.0, - "learning_rate": 1.7861613929791333e-05, - "loss": 0.9531, + "learning_rate": 1.5760679889936056e-05, + "loss": 1.0388, "step": 8318 }, { - "epoch": 0.23573918218141632, + "epoch": 0.3254949526567024, "grad_norm": 0.0, - "learning_rate": 1.7861046683413717e-05, - "loss": 1.1447, + "learning_rate": 1.575964400671466e-05, + "loss": 1.0237, "step": 8319 }, { - "epoch": 0.23576751962367876, + "epoch": 0.32553407934893186, "grad_norm": 0.0, - "learning_rate": 1.7860479370819588e-05, - "loss": 0.9361, + "learning_rate": 1.57586080310006e-05, + "loss": 1.162, "step": 8320 }, { - "epoch": 0.23579585706594122, + "epoch": 0.3255732060411613, "grad_norm": 0.0, - "learning_rate": 1.7859911992013724e-05, - "loss": 1.111, + "learning_rate": 1.5757571962810507e-05, + "loss": 1.1241, "step": 8321 }, { - "epoch": 0.2358241945082037, + "epoch": 0.32561233273339074, "grad_norm": 0.0, - "learning_rate": 1.7859344547000898e-05, - "loss": 0.9464, + "learning_rate": 1.5756535802161028e-05, + "loss": 1.0555, "step": 8322 }, { - "epoch": 0.23585253195046615, + "epoch": 0.3256514594256202, "grad_norm": 0.0, - "learning_rate": 1.7858777035785898e-05, - "loss": 0.9535, + "learning_rate": 1.5755499549068792e-05, + "loss": 1.205, "step": 8323 }, { - "epoch": 0.23588086939272862, + "epoch": 0.3256905861178496, "grad_norm": 0.0, - "learning_rate": 1.78582094583735e-05, - "loss": 1.0449, + "learning_rate": 1.575446320355045e-05, + "loss": 0.9938, "step": 8324 }, { - "epoch": 0.2359092068349911, + "epoch": 0.325729712810079, "grad_norm": 0.0, - "learning_rate": 1.785764181476849e-05, - "loss": 0.9711, + "learning_rate": 1.5753426765622637e-05, + "loss": 1.1445, "step": 8325 }, { - "epoch": 0.23593754427725352, + "epoch": 0.32576883950230845, "grad_norm": 0.0, - "learning_rate": 1.7857074104975637e-05, - "loss": 0.9051, + "learning_rate": 1.5752390235301996e-05, + "loss": 1.171, "step": 8326 }, { - "epoch": 0.235965881719516, + "epoch": 0.3258079661945379, "grad_norm": 0.0, - "learning_rate": 1.7856506328999734e-05, - "loss": 1.009, + "learning_rate": 1.5751353612605183e-05, + "loss": 1.1017, "step": 8327 }, { - "epoch": 0.23599421916177846, + "epoch": 0.32584709288676733, "grad_norm": 0.0, - "learning_rate": 1.7855938486845563e-05, - "loss": 1.0649, + "learning_rate": 1.575031689754883e-05, + "loss": 1.0432, "step": 8328 }, { - "epoch": 0.23602255660404092, + "epoch": 0.32588621957899677, "grad_norm": 0.0, - "learning_rate": 1.7855370578517902e-05, - "loss": 1.0751, + "learning_rate": 1.5749280090149602e-05, + "loss": 1.1303, "step": 8329 }, { - "epoch": 0.2360508940463034, + "epoch": 0.3259253462712262, "grad_norm": 0.0, - "learning_rate": 1.785480260402154e-05, - "loss": 1.0606, + "learning_rate": 1.5748243190424134e-05, + "loss": 1.1049, "step": 8330 }, { - "epoch": 0.23607923148856585, + "epoch": 0.32596447296345565, "grad_norm": 0.0, - "learning_rate": 1.785423456336126e-05, - "loss": 0.9769, + "learning_rate": 1.5747206198389086e-05, + "loss": 1.064, "step": 8331 }, { - "epoch": 0.2361075689308283, + "epoch": 0.3260035996556851, "grad_norm": 0.0, - "learning_rate": 1.7853666456541843e-05, - "loss": 0.8819, + "learning_rate": 1.5746169114061108e-05, + "loss": 0.9478, "step": 8332 }, { - "epoch": 0.23613590637309076, + "epoch": 0.32604272634791454, "grad_norm": 0.0, - "learning_rate": 1.785309828356808e-05, - "loss": 1.0499, + "learning_rate": 1.5745131937456853e-05, + "loss": 1.1714, "step": 8333 }, { - "epoch": 0.23616424381535323, + "epoch": 0.326081853040144, "grad_norm": 0.0, - "learning_rate": 1.785253004444475e-05, - "loss": 0.8816, + "learning_rate": 1.574409466859298e-05, + "loss": 1.1464, "step": 8334 }, { - "epoch": 0.2361925812576157, + "epoch": 0.3261209797323734, "grad_norm": 0.0, - "learning_rate": 1.7851961739176645e-05, - "loss": 0.9909, + "learning_rate": 1.574305730748614e-05, + "loss": 1.0966, "step": 8335 }, { - "epoch": 0.23622091869987816, + "epoch": 0.32616010642460286, "grad_norm": 0.0, - "learning_rate": 1.785139336776855e-05, - "loss": 0.8975, + "learning_rate": 1.5742019854153003e-05, + "loss": 1.0485, "step": 8336 }, { - "epoch": 0.23624925614214062, + "epoch": 0.3261992331168323, "grad_norm": 0.0, - "learning_rate": 1.7850824930225255e-05, - "loss": 1.014, + "learning_rate": 1.5740982308610218e-05, + "loss": 1.0773, "step": 8337 }, { - "epoch": 0.23627759358440306, + "epoch": 0.32623835980906174, "grad_norm": 0.0, - "learning_rate": 1.7850256426551546e-05, - "loss": 1.1055, + "learning_rate": 1.5739944670874453e-05, + "loss": 1.1885, "step": 8338 }, { - "epoch": 0.23630593102666553, + "epoch": 0.3262774865012912, "grad_norm": 0.0, - "learning_rate": 1.784968785675221e-05, - "loss": 0.982, + "learning_rate": 1.5738906940962368e-05, + "loss": 1.0697, "step": 8339 }, { - "epoch": 0.236334268468928, + "epoch": 0.3263166131935206, "grad_norm": 0.0, - "learning_rate": 1.7849119220832037e-05, - "loss": 0.9158, + "learning_rate": 1.5737869118890628e-05, + "loss": 1.0814, "step": 8340 }, { - "epoch": 0.23636260591119046, + "epoch": 0.32635573988575006, "grad_norm": 0.0, - "learning_rate": 1.7848550518795826e-05, - "loss": 1.0353, + "learning_rate": 1.57368312046759e-05, + "loss": 1.0265, "step": 8341 }, { - "epoch": 0.23639094335345293, + "epoch": 0.3263948665779795, "grad_norm": 0.0, - "learning_rate": 1.784798175064835e-05, - "loss": 1.1361, + "learning_rate": 1.573579319833485e-05, + "loss": 1.1449, "step": 8342 }, { - "epoch": 0.2364192807957154, + "epoch": 0.32643399327020894, "grad_norm": 0.0, - "learning_rate": 1.7847412916394416e-05, - "loss": 0.8591, + "learning_rate": 1.573475509988415e-05, + "loss": 0.9836, "step": 8343 }, { - "epoch": 0.23644761823797783, + "epoch": 0.3264731199624384, "grad_norm": 0.0, - "learning_rate": 1.7846844016038803e-05, - "loss": 0.9946, + "learning_rate": 1.573371690934047e-05, + "loss": 1.0798, "step": 8344 }, { - "epoch": 0.2364759556802403, + "epoch": 0.3265122466546678, "grad_norm": 0.0, - "learning_rate": 1.7846275049586316e-05, - "loss": 0.9064, + "learning_rate": 1.573267862672048e-05, + "loss": 0.9966, "step": 8345 }, { - "epoch": 0.23650429312250276, + "epoch": 0.32655137334689727, "grad_norm": 0.0, - "learning_rate": 1.7845706017041734e-05, - "loss": 0.9806, + "learning_rate": 1.5731640252040857e-05, + "loss": 1.1639, "step": 8346 }, { - "epoch": 0.23653263056476523, + "epoch": 0.3265905000391267, "grad_norm": 0.0, - "learning_rate": 1.784513691840986e-05, - "loss": 0.963, + "learning_rate": 1.573060178531827e-05, + "loss": 1.0747, "step": 8347 }, { - "epoch": 0.2365609680070277, + "epoch": 0.32662962673135615, "grad_norm": 0.0, - "learning_rate": 1.7844567753695485e-05, - "loss": 0.9277, + "learning_rate": 1.5729563226569402e-05, + "loss": 1.1157, "step": 8348 }, { - "epoch": 0.23658930544929016, + "epoch": 0.3266687534235856, "grad_norm": 0.0, - "learning_rate": 1.78439985229034e-05, - "loss": 0.9946, + "learning_rate": 1.572852457581093e-05, + "loss": 1.024, "step": 8349 }, { - "epoch": 0.2366176428915526, + "epoch": 0.32670788011581503, "grad_norm": 0.0, - "learning_rate": 1.7843429226038408e-05, - "loss": 0.9135, + "learning_rate": 1.5727485833059526e-05, + "loss": 1.1975, "step": 8350 }, { - "epoch": 0.23664598033381506, + "epoch": 0.32674700680804447, "grad_norm": 0.0, - "learning_rate": 1.7842859863105295e-05, - "loss": 1.0477, + "learning_rate": 1.572644699833188e-05, + "loss": 1.1256, "step": 8351 }, { - "epoch": 0.23667431777607753, + "epoch": 0.3267861335002739, "grad_norm": 0.0, - "learning_rate": 1.784229043410886e-05, - "loss": 0.8697, + "learning_rate": 1.572540807164467e-05, + "loss": 1.0616, "step": 8352 }, { - "epoch": 0.23670265521834, + "epoch": 0.32682526019250335, "grad_norm": 0.0, - "learning_rate": 1.7841720939053902e-05, - "loss": 1.0806, + "learning_rate": 1.5724369053014583e-05, + "loss": 1.205, "step": 8353 }, { - "epoch": 0.23673099266060246, + "epoch": 0.32686438688473274, "grad_norm": 0.0, - "learning_rate": 1.784115137794522e-05, - "loss": 1.0602, + "learning_rate": 1.5723329942458302e-05, + "loss": 1.1667, "step": 8354 }, { - "epoch": 0.23675933010286493, + "epoch": 0.3269035135769622, "grad_norm": 0.0, - "learning_rate": 1.7840581750787603e-05, - "loss": 0.9949, + "learning_rate": 1.572229073999251e-05, + "loss": 1.2708, "step": 8355 }, { - "epoch": 0.23678766754512737, + "epoch": 0.3269426402691916, "grad_norm": 0.0, - "learning_rate": 1.7840012057585858e-05, - "loss": 1.0033, + "learning_rate": 1.57212514456339e-05, + "loss": 1.2173, "step": 8356 }, { - "epoch": 0.23681600498738983, + "epoch": 0.32698176696142106, "grad_norm": 0.0, - "learning_rate": 1.783944229834478e-05, - "loss": 1.0223, + "learning_rate": 1.5720212059399163e-05, + "loss": 1.1771, "step": 8357 }, { - "epoch": 0.2368443424296523, + "epoch": 0.3270208936536505, "grad_norm": 0.0, - "learning_rate": 1.7838872473069164e-05, - "loss": 1.0331, + "learning_rate": 1.5719172581304987e-05, + "loss": 0.9752, "step": 8358 }, { - "epoch": 0.23687267987191477, + "epoch": 0.32706002034587994, "grad_norm": 0.0, - "learning_rate": 1.7838302581763818e-05, - "loss": 0.9786, + "learning_rate": 1.5718133011368065e-05, + "loss": 1.1197, "step": 8359 }, { - "epoch": 0.23690101731417723, + "epoch": 0.3270991470381094, "grad_norm": 0.0, - "learning_rate": 1.783773262443354e-05, - "loss": 0.904, + "learning_rate": 1.5717093349605093e-05, + "loss": 0.9719, "step": 8360 }, { - "epoch": 0.2369293547564397, + "epoch": 0.3271382737303388, "grad_norm": 0.0, - "learning_rate": 1.7837162601083123e-05, - "loss": 1.0348, + "learning_rate": 1.5716053596032767e-05, + "loss": 1.2187, "step": 8361 }, { - "epoch": 0.23695769219870214, + "epoch": 0.32717740042256827, "grad_norm": 0.0, - "learning_rate": 1.7836592511717384e-05, - "loss": 0.9586, + "learning_rate": 1.571501375066778e-05, + "loss": 1.1702, "step": 8362 }, { - "epoch": 0.2369860296409646, + "epoch": 0.3272165271147977, "grad_norm": 0.0, - "learning_rate": 1.7836022356341113e-05, - "loss": 0.986, + "learning_rate": 1.5713973813526836e-05, + "loss": 1.0453, "step": 8363 }, { - "epoch": 0.23701436708322707, + "epoch": 0.32725565380702715, "grad_norm": 0.0, - "learning_rate": 1.7835452134959112e-05, - "loss": 0.8822, + "learning_rate": 1.5712933784626633e-05, + "loss": 1.1655, "step": 8364 }, { - "epoch": 0.23704270452548953, + "epoch": 0.3272947804992566, "grad_norm": 0.0, - "learning_rate": 1.7834881847576192e-05, - "loss": 1.1043, + "learning_rate": 1.571189366398387e-05, + "loss": 1.1596, "step": 8365 }, { - "epoch": 0.237071041967752, + "epoch": 0.32733390719148603, "grad_norm": 0.0, - "learning_rate": 1.783431149419715e-05, - "loss": 1.0421, + "learning_rate": 1.5710853451615254e-05, + "loss": 1.1611, "step": 8366 }, { - "epoch": 0.23709937941001447, + "epoch": 0.32737303388371547, "grad_norm": 0.0, - "learning_rate": 1.7833741074826796e-05, - "loss": 1.0664, + "learning_rate": 1.570981314753749e-05, + "loss": 1.1483, "step": 8367 }, { - "epoch": 0.2371277168522769, + "epoch": 0.3274121605759449, "grad_norm": 0.0, - "learning_rate": 1.7833170589469932e-05, - "loss": 1.126, + "learning_rate": 1.5708772751767275e-05, + "loss": 1.1471, "step": 8368 }, { - "epoch": 0.23715605429453937, + "epoch": 0.32745128726817435, "grad_norm": 0.0, - "learning_rate": 1.783260003813136e-05, - "loss": 0.9725, + "learning_rate": 1.5707732264321327e-05, + "loss": 1.1286, "step": 8369 }, { - "epoch": 0.23718439173680184, + "epoch": 0.3274904139604038, "grad_norm": 0.0, - "learning_rate": 1.783202942081589e-05, - "loss": 1.1045, + "learning_rate": 1.570669168521635e-05, + "loss": 1.3379, "step": 8370 }, { - "epoch": 0.2372127291790643, + "epoch": 0.32752954065263323, "grad_norm": 0.0, - "learning_rate": 1.783145873752833e-05, - "loss": 0.9345, + "learning_rate": 1.5705651014469054e-05, + "loss": 1.1024, "step": 8371 }, { - "epoch": 0.23724106662132677, + "epoch": 0.3275686673448627, "grad_norm": 0.0, - "learning_rate": 1.7830887988273486e-05, - "loss": 1.0374, + "learning_rate": 1.5704610252096158e-05, + "loss": 1.1288, "step": 8372 }, { - "epoch": 0.23726940406358923, + "epoch": 0.3276077940370921, "grad_norm": 0.0, - "learning_rate": 1.783031717305616e-05, - "loss": 1.069, + "learning_rate": 1.5703569398114364e-05, + "loss": 1.2219, "step": 8373 }, { - "epoch": 0.23729774150585167, + "epoch": 0.32764692072932156, "grad_norm": 0.0, - "learning_rate": 1.782974629188117e-05, - "loss": 0.9906, + "learning_rate": 1.5702528452540394e-05, + "loss": 1.1082, "step": 8374 }, { - "epoch": 0.23732607894811414, + "epoch": 0.327686047421551, "grad_norm": 0.0, - "learning_rate": 1.7829175344753316e-05, - "loss": 1.0271, + "learning_rate": 1.570148741539096e-05, + "loss": 1.089, "step": 8375 }, { - "epoch": 0.2373544163903766, + "epoch": 0.32772517411378044, "grad_norm": 0.0, - "learning_rate": 1.7828604331677412e-05, - "loss": 0.9522, + "learning_rate": 1.5700446286682786e-05, + "loss": 1.0193, "step": 8376 }, { - "epoch": 0.23738275383263907, + "epoch": 0.3277643008060099, "grad_norm": 0.0, - "learning_rate": 1.782803325265827e-05, - "loss": 0.9594, + "learning_rate": 1.569940506643259e-05, + "loss": 1.0176, "step": 8377 }, { - "epoch": 0.23741109127490154, + "epoch": 0.3278034274982393, "grad_norm": 0.0, - "learning_rate": 1.782746210770069e-05, - "loss": 1.06, + "learning_rate": 1.5698363754657087e-05, + "loss": 1.0817, "step": 8378 }, { - "epoch": 0.23743942871716398, + "epoch": 0.32784255419046876, "grad_norm": 0.0, - "learning_rate": 1.7826890896809492e-05, - "loss": 1.0408, + "learning_rate": 1.5697322351373e-05, + "loss": 1.2126, "step": 8379 }, { - "epoch": 0.23746776615942644, + "epoch": 0.3278816808826982, "grad_norm": 0.0, - "learning_rate": 1.7826319619989487e-05, - "loss": 0.983, + "learning_rate": 1.569628085659706e-05, + "loss": 1.1345, "step": 8380 }, { - "epoch": 0.2374961036016889, + "epoch": 0.32792080757492764, "grad_norm": 0.0, - "learning_rate": 1.7825748277245484e-05, - "loss": 0.9722, + "learning_rate": 1.5695239270345986e-05, + "loss": 0.9465, "step": 8381 }, { - "epoch": 0.23752444104395137, + "epoch": 0.327959934267157, "grad_norm": 0.0, - "learning_rate": 1.78251768685823e-05, - "loss": 1.0854, + "learning_rate": 1.5694197592636506e-05, + "loss": 1.1228, "step": 8382 }, { - "epoch": 0.23755277848621384, + "epoch": 0.32799906095938647, "grad_norm": 0.0, - "learning_rate": 1.7824605394004747e-05, - "loss": 0.9867, + "learning_rate": 1.5693155823485348e-05, + "loss": 1.0888, "step": 8383 }, { - "epoch": 0.2375811159284763, + "epoch": 0.3280381876516159, "grad_norm": 0.0, - "learning_rate": 1.782403385351763e-05, - "loss": 0.975, + "learning_rate": 1.569211396290924e-05, + "loss": 1.0363, "step": 8384 }, { - "epoch": 0.23760945337073874, + "epoch": 0.32807731434384535, "grad_norm": 0.0, - "learning_rate": 1.7823462247125775e-05, - "loss": 1.0342, + "learning_rate": 1.5691072010924915e-05, + "loss": 1.1675, "step": 8385 }, { - "epoch": 0.2376377908130012, + "epoch": 0.3281164410360748, "grad_norm": 0.0, - "learning_rate": 1.7822890574833995e-05, - "loss": 0.9708, + "learning_rate": 1.5690029967549107e-05, + "loss": 1.2169, "step": 8386 }, { - "epoch": 0.23766612825526368, + "epoch": 0.32815556772830423, "grad_norm": 0.0, - "learning_rate": 1.78223188366471e-05, - "loss": 1.0991, + "learning_rate": 1.5688987832798545e-05, + "loss": 1.0966, "step": 8387 }, { - "epoch": 0.23769446569752614, + "epoch": 0.3281946944205337, "grad_norm": 0.0, - "learning_rate": 1.782174703256991e-05, - "loss": 0.9411, + "learning_rate": 1.5687945606689967e-05, + "loss": 1.1288, "step": 8388 }, { - "epoch": 0.2377228031397886, + "epoch": 0.3282338211127631, "grad_norm": 0.0, - "learning_rate": 1.7821175162607235e-05, - "loss": 0.9646, + "learning_rate": 1.568690328924011e-05, + "loss": 1.0775, "step": 8389 }, { - "epoch": 0.23775114058205107, + "epoch": 0.32827294780499255, "grad_norm": 0.0, - "learning_rate": 1.78206032267639e-05, - "loss": 0.9053, + "learning_rate": 1.5685860880465713e-05, + "loss": 1.1536, "step": 8390 }, { - "epoch": 0.2377794780243135, + "epoch": 0.328312074497222, "grad_norm": 0.0, - "learning_rate": 1.782003122504472e-05, - "loss": 1.0023, + "learning_rate": 1.5684818380383515e-05, + "loss": 1.1138, "step": 8391 }, { - "epoch": 0.23780781546657598, + "epoch": 0.32835120118945144, "grad_norm": 0.0, - "learning_rate": 1.7819459157454516e-05, - "loss": 1.056, + "learning_rate": 1.5683775789010257e-05, + "loss": 1.2057, "step": 8392 }, { - "epoch": 0.23783615290883844, + "epoch": 0.3283903278816809, "grad_norm": 0.0, - "learning_rate": 1.7818887023998104e-05, - "loss": 0.8832, + "learning_rate": 1.568273310636268e-05, + "loss": 1.0452, "step": 8393 }, { - "epoch": 0.2378644903511009, + "epoch": 0.3284294545739103, "grad_norm": 0.0, - "learning_rate": 1.78183148246803e-05, - "loss": 0.9017, + "learning_rate": 1.5681690332457537e-05, + "loss": 1.1496, "step": 8394 }, { - "epoch": 0.23789282779336338, + "epoch": 0.32846858126613976, "grad_norm": 0.0, - "learning_rate": 1.7817742559505928e-05, - "loss": 1.0028, + "learning_rate": 1.568064746731156e-05, + "loss": 1.1823, "step": 8395 }, { - "epoch": 0.23792116523562584, + "epoch": 0.3285077079583692, "grad_norm": 0.0, - "learning_rate": 1.7817170228479806e-05, - "loss": 0.857, + "learning_rate": 1.5679604510941504e-05, + "loss": 1.1431, "step": 8396 }, { - "epoch": 0.23794950267788828, + "epoch": 0.32854683465059864, "grad_norm": 0.0, - "learning_rate": 1.781659783160676e-05, - "loss": 0.9653, + "learning_rate": 1.5678561463364118e-05, + "loss": 1.0723, "step": 8397 }, { - "epoch": 0.23797784012015075, + "epoch": 0.3285859613428281, "grad_norm": 0.0, - "learning_rate": 1.7816025368891602e-05, - "loss": 0.9881, + "learning_rate": 1.567751832459615e-05, + "loss": 1.0932, "step": 8398 }, { - "epoch": 0.2380061775624132, + "epoch": 0.3286250880350575, "grad_norm": 0.0, - "learning_rate": 1.7815452840339166e-05, - "loss": 0.9345, + "learning_rate": 1.5676475094654353e-05, + "loss": 1.1101, "step": 8399 }, { - "epoch": 0.23803451500467568, + "epoch": 0.32866421472728696, "grad_norm": 0.0, - "learning_rate": 1.7814880245954268e-05, - "loss": 1.0118, + "learning_rate": 1.567543177355548e-05, + "loss": 1.0723, "step": 8400 }, { - "epoch": 0.23806285244693814, + "epoch": 0.3287033414195164, "grad_norm": 0.0, - "learning_rate": 1.7814307585741727e-05, - "loss": 1.0621, + "learning_rate": 1.567438836131628e-05, + "loss": 1.1993, "step": 8401 }, { - "epoch": 0.2380911898892006, + "epoch": 0.32874246811174584, "grad_norm": 0.0, - "learning_rate": 1.7813734859706374e-05, - "loss": 0.9944, + "learning_rate": 1.5673344857953518e-05, + "loss": 1.1046, "step": 8402 }, { - "epoch": 0.23811952733146305, + "epoch": 0.3287815948039753, "grad_norm": 0.0, - "learning_rate": 1.781316206785303e-05, - "loss": 0.9233, + "learning_rate": 1.5672301263483945e-05, + "loss": 1.1436, "step": 8403 }, { - "epoch": 0.23814786477372551, + "epoch": 0.3288207214962047, "grad_norm": 0.0, - "learning_rate": 1.7812589210186523e-05, - "loss": 1.0082, + "learning_rate": 1.5671257577924318e-05, + "loss": 0.9937, "step": 8404 }, { - "epoch": 0.23817620221598798, + "epoch": 0.32885984818843417, "grad_norm": 0.0, - "learning_rate": 1.7812016286711673e-05, - "loss": 0.9737, + "learning_rate": 1.5670213801291406e-05, + "loss": 1.1261, "step": 8405 }, { - "epoch": 0.23820453965825045, + "epoch": 0.3288989748806636, "grad_norm": 0.0, - "learning_rate": 1.781144329743331e-05, - "loss": 1.0428, + "learning_rate": 1.5669169933601965e-05, + "loss": 1.0365, "step": 8406 }, { - "epoch": 0.2382328771005129, + "epoch": 0.32893810157289305, "grad_norm": 0.0, - "learning_rate": 1.781087024235626e-05, - "loss": 0.9621, + "learning_rate": 1.5668125974872755e-05, + "loss": 1.1305, "step": 8407 }, { - "epoch": 0.23826121454277538, + "epoch": 0.3289772282651225, "grad_norm": 0.0, - "learning_rate": 1.7810297121485348e-05, - "loss": 0.9503, + "learning_rate": 1.5667081925120548e-05, + "loss": 1.0665, "step": 8408 }, { - "epoch": 0.23828955198503782, + "epoch": 0.32901635495735193, "grad_norm": 0.0, - "learning_rate": 1.7809723934825405e-05, - "loss": 1.063, + "learning_rate": 1.5666037784362104e-05, + "loss": 1.0019, "step": 8409 }, { - "epoch": 0.23831788942730028, + "epoch": 0.32905548164958137, "grad_norm": 0.0, - "learning_rate": 1.7809150682381257e-05, - "loss": 0.9659, + "learning_rate": 1.5664993552614192e-05, + "loss": 1.1, "step": 8410 }, { - "epoch": 0.23834622686956275, + "epoch": 0.32909460834181076, "grad_norm": 0.0, - "learning_rate": 1.780857736415773e-05, - "loss": 0.8936, + "learning_rate": 1.5663949229893587e-05, + "loss": 1.0256, "step": 8411 }, { - "epoch": 0.23837456431182522, + "epoch": 0.3291337350340402, "grad_norm": 0.0, - "learning_rate": 1.780800398015966e-05, - "loss": 1.0154, + "learning_rate": 1.566290481621705e-05, + "loss": 1.0456, "step": 8412 }, { - "epoch": 0.23840290175408768, + "epoch": 0.32917286172626964, "grad_norm": 0.0, - "learning_rate": 1.7807430530391873e-05, - "loss": 1.0468, + "learning_rate": 1.566186031160136e-05, + "loss": 1.1884, "step": 8413 }, { - "epoch": 0.23843123919635015, + "epoch": 0.3292119884184991, "grad_norm": 0.0, - "learning_rate": 1.7806857014859197e-05, - "loss": 1.0001, + "learning_rate": 1.5660815716063292e-05, + "loss": 1.0487, "step": 8414 }, { - "epoch": 0.23845957663861259, + "epoch": 0.3292511151107285, "grad_norm": 0.0, - "learning_rate": 1.7806283433566465e-05, - "loss": 1.0287, + "learning_rate": 1.565977102961961e-05, + "loss": 1.0276, "step": 8415 }, { - "epoch": 0.23848791408087505, + "epoch": 0.32929024180295796, "grad_norm": 0.0, - "learning_rate": 1.7805709786518514e-05, - "loss": 0.9757, + "learning_rate": 1.56587262522871e-05, + "loss": 1.0081, "step": 8416 }, { - "epoch": 0.23851625152313752, + "epoch": 0.3293293684951874, "grad_norm": 0.0, - "learning_rate": 1.7805136073720163e-05, - "loss": 0.9996, + "learning_rate": 1.565768138408254e-05, + "loss": 1.0235, "step": 8417 }, { - "epoch": 0.23854458896539998, + "epoch": 0.32936849518741684, "grad_norm": 0.0, - "learning_rate": 1.780456229517626e-05, - "loss": 1.0164, + "learning_rate": 1.5656636425022702e-05, + "loss": 1.2762, "step": 8418 }, { - "epoch": 0.23857292640766245, + "epoch": 0.3294076218796463, "grad_norm": 0.0, - "learning_rate": 1.7803988450891628e-05, - "loss": 0.9864, + "learning_rate": 1.5655591375124375e-05, + "loss": 1.0891, "step": 8419 }, { - "epoch": 0.23860126384992492, + "epoch": 0.3294467485718757, "grad_norm": 0.0, - "learning_rate": 1.7803414540871097e-05, - "loss": 1.0715, + "learning_rate": 1.5654546234404333e-05, + "loss": 1.1033, "step": 8420 }, { - "epoch": 0.23862960129218735, + "epoch": 0.32948587526410517, "grad_norm": 0.0, - "learning_rate": 1.7802840565119516e-05, - "loss": 0.9182, + "learning_rate": 1.5653501002879368e-05, + "loss": 1.0046, "step": 8421 }, { - "epoch": 0.23865793873444982, + "epoch": 0.3295250019563346, "grad_norm": 0.0, - "learning_rate": 1.780226652364171e-05, - "loss": 1.0188, + "learning_rate": 1.565245568056626e-05, + "loss": 1.1413, "step": 8422 }, { - "epoch": 0.2386862761767123, + "epoch": 0.32956412864856405, "grad_norm": 0.0, - "learning_rate": 1.7801692416442513e-05, - "loss": 0.9341, + "learning_rate": 1.5651410267481795e-05, + "loss": 1.0881, "step": 8423 }, { - "epoch": 0.23871461361897475, + "epoch": 0.3296032553407935, "grad_norm": 0.0, - "learning_rate": 1.7801118243526764e-05, - "loss": 1.0284, + "learning_rate": 1.5650364763642764e-05, + "loss": 1.1137, "step": 8424 }, { - "epoch": 0.23874295106123722, + "epoch": 0.32964238203302293, "grad_norm": 0.0, - "learning_rate": 1.78005440048993e-05, - "loss": 1.0472, + "learning_rate": 1.5649319169065955e-05, + "loss": 1.0626, "step": 8425 }, { - "epoch": 0.23877128850349968, + "epoch": 0.32968150872525237, "grad_norm": 0.0, - "learning_rate": 1.779996970056496e-05, - "loss": 0.9517, + "learning_rate": 1.564827348376816e-05, + "loss": 1.0923, "step": 8426 }, { - "epoch": 0.23879962594576212, + "epoch": 0.3297206354174818, "grad_norm": 0.0, - "learning_rate": 1.7799395330528574e-05, - "loss": 1.0388, + "learning_rate": 1.5647227707766167e-05, + "loss": 1.0671, "step": 8427 }, { - "epoch": 0.2388279633880246, + "epoch": 0.32975976210971125, "grad_norm": 0.0, - "learning_rate": 1.7798820894794988e-05, - "loss": 1.1252, + "learning_rate": 1.564618184107678e-05, + "loss": 1.1255, "step": 8428 }, { - "epoch": 0.23885630083028705, + "epoch": 0.3297988888019407, "grad_norm": 0.0, - "learning_rate": 1.7798246393369037e-05, - "loss": 0.9787, + "learning_rate": 1.564513588371678e-05, + "loss": 0.9719, "step": 8429 }, { - "epoch": 0.23888463827254952, + "epoch": 0.32983801549417013, "grad_norm": 0.0, - "learning_rate": 1.779767182625556e-05, - "loss": 1.0391, + "learning_rate": 1.564408983570298e-05, + "loss": 1.0042, "step": 8430 }, { - "epoch": 0.238912975714812, + "epoch": 0.3298771421863996, "grad_norm": 0.0, - "learning_rate": 1.77970971934594e-05, - "loss": 1.0352, + "learning_rate": 1.5643043697052164e-05, + "loss": 1.0724, "step": 8431 }, { - "epoch": 0.23894131315707445, + "epoch": 0.329916268878629, "grad_norm": 0.0, - "learning_rate": 1.779652249498539e-05, - "loss": 1.0209, + "learning_rate": 1.5641997467781137e-05, + "loss": 1.0312, "step": 8432 }, { - "epoch": 0.2389696505993369, + "epoch": 0.32995539557085846, "grad_norm": 0.0, - "learning_rate": 1.779594773083838e-05, - "loss": 1.0121, + "learning_rate": 1.56409511479067e-05, + "loss": 1.2842, "step": 8433 }, { - "epoch": 0.23899798804159936, + "epoch": 0.3299945222630879, "grad_norm": 0.0, - "learning_rate": 1.7795372901023206e-05, - "loss": 1.0439, + "learning_rate": 1.5639904737445658e-05, + "loss": 1.1379, "step": 8434 }, { - "epoch": 0.23902632548386182, + "epoch": 0.33003364895531734, "grad_norm": 0.0, - "learning_rate": 1.779479800554471e-05, - "loss": 1.08, + "learning_rate": 1.5638858236414812e-05, + "loss": 1.1946, "step": 8435 }, { - "epoch": 0.2390546629261243, + "epoch": 0.3300727756475468, "grad_norm": 0.0, - "learning_rate": 1.7794223044407738e-05, - "loss": 1.0444, + "learning_rate": 1.563781164483097e-05, + "loss": 1.1682, "step": 8436 }, { - "epoch": 0.23908300036838676, + "epoch": 0.3301119023397762, "grad_norm": 0.0, - "learning_rate": 1.779364801761713e-05, - "loss": 0.9682, + "learning_rate": 1.5636764962710936e-05, + "loss": 1.2208, "step": 8437 }, { - "epoch": 0.23911133781064922, + "epoch": 0.33015102903200566, "grad_norm": 0.0, - "learning_rate": 1.779307292517773e-05, - "loss": 1.0084, + "learning_rate": 1.5635718190071526e-05, + "loss": 1.1448, "step": 8438 }, { - "epoch": 0.23913967525291166, + "epoch": 0.33019015572423505, "grad_norm": 0.0, - "learning_rate": 1.7792497767094384e-05, - "loss": 0.9576, + "learning_rate": 1.563467132692954e-05, + "loss": 1.1057, "step": 8439 }, { - "epoch": 0.23916801269517413, + "epoch": 0.3302292824164645, "grad_norm": 0.0, - "learning_rate": 1.7791922543371936e-05, - "loss": 1.0398, + "learning_rate": 1.563362437330179e-05, + "loss": 1.0419, "step": 8440 }, { - "epoch": 0.2391963501374366, + "epoch": 0.33026840910869393, "grad_norm": 0.0, - "learning_rate": 1.779134725401523e-05, - "loss": 1.0549, + "learning_rate": 1.5632577329205095e-05, + "loss": 1.1091, "step": 8441 }, { - "epoch": 0.23922468757969906, + "epoch": 0.33030753580092337, "grad_norm": 0.0, - "learning_rate": 1.7790771899029115e-05, - "loss": 0.9805, + "learning_rate": 1.5631530194656265e-05, + "loss": 1.0105, "step": 8442 }, { - "epoch": 0.23925302502196152, + "epoch": 0.3303466624931528, "grad_norm": 0.0, - "learning_rate": 1.7790196478418432e-05, - "loss": 0.9811, + "learning_rate": 1.5630482969672116e-05, + "loss": 1.1986, "step": 8443 }, { - "epoch": 0.239281362464224, + "epoch": 0.33038578918538225, "grad_norm": 0.0, - "learning_rate": 1.7789620992188033e-05, - "loss": 1.0995, + "learning_rate": 1.5629435654269464e-05, + "loss": 1.0249, "step": 8444 }, { - "epoch": 0.23930969990648643, + "epoch": 0.3304249158776117, "grad_norm": 0.0, - "learning_rate": 1.778904544034276e-05, - "loss": 1.0571, + "learning_rate": 1.5628388248465136e-05, + "loss": 1.0425, "step": 8445 }, { - "epoch": 0.2393380373487489, + "epoch": 0.33046404256984113, "grad_norm": 0.0, - "learning_rate": 1.778846982288747e-05, - "loss": 1.0824, + "learning_rate": 1.562734075227594e-05, + "loss": 1.1215, "step": 8446 }, { - "epoch": 0.23936637479101136, + "epoch": 0.3305031692620706, "grad_norm": 0.0, - "learning_rate": 1.7787894139827006e-05, - "loss": 1.0711, + "learning_rate": 1.5626293165718704e-05, + "loss": 1.0972, "step": 8447 }, { - "epoch": 0.23939471223327383, + "epoch": 0.3305422959543, "grad_norm": 0.0, - "learning_rate": 1.7787318391166216e-05, - "loss": 1.04, + "learning_rate": 1.5625245488810253e-05, + "loss": 1.1004, "step": 8448 }, { - "epoch": 0.2394230496755363, + "epoch": 0.33058142264652945, "grad_norm": 0.0, - "learning_rate": 1.7786742576909955e-05, - "loss": 1.0898, + "learning_rate": 1.5624197721567405e-05, + "loss": 1.0065, "step": 8449 }, { - "epoch": 0.23945138711779876, + "epoch": 0.3306205493387589, "grad_norm": 0.0, - "learning_rate": 1.7786166697063067e-05, - "loss": 0.9697, + "learning_rate": 1.5623149864006993e-05, + "loss": 1.0425, "step": 8450 }, { - "epoch": 0.2394797245600612, + "epoch": 0.33065967603098834, "grad_norm": 0.0, - "learning_rate": 1.7785590751630404e-05, - "loss": 0.9435, + "learning_rate": 1.5622101916145835e-05, + "loss": 1.1342, "step": 8451 }, { - "epoch": 0.23950806200232366, + "epoch": 0.3306988027232178, "grad_norm": 0.0, - "learning_rate": 1.778501474061682e-05, - "loss": 1.072, + "learning_rate": 1.5621053878000767e-05, + "loss": 1.1607, "step": 8452 }, { - "epoch": 0.23953639944458613, + "epoch": 0.3307379294154472, "grad_norm": 0.0, - "learning_rate": 1.7784438664027165e-05, - "loss": 0.9433, + "learning_rate": 1.5620005749588617e-05, + "loss": 1.1257, "step": 8453 }, { - "epoch": 0.2395647368868486, + "epoch": 0.33077705610767666, "grad_norm": 0.0, - "learning_rate": 1.7783862521866296e-05, - "loss": 1.0605, + "learning_rate": 1.561895753092622e-05, + "loss": 1.0885, "step": 8454 }, { - "epoch": 0.23959307432911106, + "epoch": 0.3308161827999061, "grad_norm": 0.0, - "learning_rate": 1.778328631413906e-05, - "loss": 0.8719, + "learning_rate": 1.56179092220304e-05, + "loss": 1.1931, "step": 8455 }, { - "epoch": 0.23962141177137353, + "epoch": 0.33085530949213554, "grad_norm": 0.0, - "learning_rate": 1.7782710040850314e-05, - "loss": 1.0058, + "learning_rate": 1.5616860822918004e-05, + "loss": 1.0755, "step": 8456 }, { - "epoch": 0.23964974921363597, + "epoch": 0.330894436184365, "grad_norm": 0.0, - "learning_rate": 1.778213370200491e-05, - "loss": 1.013, + "learning_rate": 1.561581233360586e-05, + "loss": 1.0314, "step": 8457 }, { - "epoch": 0.23967808665589843, + "epoch": 0.3309335628765944, "grad_norm": 0.0, - "learning_rate": 1.7781557297607704e-05, - "loss": 1.0011, + "learning_rate": 1.5614763754110804e-05, + "loss": 1.0708, "step": 8458 }, { - "epoch": 0.2397064240981609, + "epoch": 0.33097268956882386, "grad_norm": 0.0, - "learning_rate": 1.7780980827663553e-05, - "loss": 0.889, + "learning_rate": 1.561371508444968e-05, + "loss": 1.0924, "step": 8459 }, { - "epoch": 0.23973476154042336, + "epoch": 0.3310118162610533, "grad_norm": 0.0, - "learning_rate": 1.7780404292177308e-05, - "loss": 1.0471, + "learning_rate": 1.5612666324639327e-05, + "loss": 1.0412, "step": 8460 }, { - "epoch": 0.23976309898268583, + "epoch": 0.33105094295328275, "grad_norm": 0.0, - "learning_rate": 1.7779827691153832e-05, - "loss": 1.0064, + "learning_rate": 1.5611617474696584e-05, + "loss": 1.0317, "step": 8461 }, { - "epoch": 0.2397914364249483, + "epoch": 0.3310900696455122, "grad_norm": 0.0, - "learning_rate": 1.7779251024597976e-05, - "loss": 1.002, + "learning_rate": 1.5610568534638294e-05, + "loss": 1.0906, "step": 8462 }, { - "epoch": 0.23981977386721073, + "epoch": 0.3311291963377416, "grad_norm": 0.0, - "learning_rate": 1.77786742925146e-05, - "loss": 0.987, + "learning_rate": 1.5609519504481306e-05, + "loss": 1.1743, "step": 8463 }, { - "epoch": 0.2398481113094732, + "epoch": 0.33116832302997107, "grad_norm": 0.0, - "learning_rate": 1.7778097494908564e-05, - "loss": 0.9337, + "learning_rate": 1.5608470384242466e-05, + "loss": 1.2103, "step": 8464 }, { - "epoch": 0.23987644875173567, + "epoch": 0.3312074497222005, "grad_norm": 0.0, - "learning_rate": 1.7777520631784723e-05, - "loss": 0.998, + "learning_rate": 1.560742117393862e-05, + "loss": 1.1716, "step": 8465 }, { - "epoch": 0.23990478619399813, + "epoch": 0.33124657641442995, "grad_norm": 0.0, - "learning_rate": 1.777694370314794e-05, - "loss": 0.9602, + "learning_rate": 1.560637187358661e-05, + "loss": 0.948, "step": 8466 }, { - "epoch": 0.2399331236362606, + "epoch": 0.3312857031066594, "grad_norm": 0.0, - "learning_rate": 1.777636670900307e-05, - "loss": 0.9739, + "learning_rate": 1.56053224832033e-05, + "loss": 1.2032, "step": 8467 }, { - "epoch": 0.23996146107852306, + "epoch": 0.3313248297988888, "grad_norm": 0.0, - "learning_rate": 1.7775789649354973e-05, - "loss": 0.9774, + "learning_rate": 1.560427300280553e-05, + "loss": 1.0045, "step": 8468 }, { - "epoch": 0.2399897985207855, + "epoch": 0.3313639564911182, "grad_norm": 0.0, - "learning_rate": 1.7775212524208513e-05, - "loss": 1.0417, + "learning_rate": 1.560322343241016e-05, + "loss": 1.1723, "step": 8469 }, { - "epoch": 0.24001813596304797, + "epoch": 0.33140308318334766, "grad_norm": 0.0, - "learning_rate": 1.7774635333568554e-05, - "loss": 0.957, + "learning_rate": 1.5602173772034045e-05, + "loss": 1.0218, "step": 8470 }, { - "epoch": 0.24004647340531043, + "epoch": 0.3314422098755771, "grad_norm": 0.0, - "learning_rate": 1.777405807743995e-05, - "loss": 1.0032, + "learning_rate": 1.5601124021694036e-05, + "loss": 1.1866, "step": 8471 }, { - "epoch": 0.2400748108475729, + "epoch": 0.33148133656780654, "grad_norm": 0.0, - "learning_rate": 1.7773480755827574e-05, - "loss": 0.8873, + "learning_rate": 1.5600074181406995e-05, + "loss": 1.0576, "step": 8472 }, { - "epoch": 0.24010314828983537, + "epoch": 0.331520463260036, "grad_norm": 0.0, - "learning_rate": 1.777290336873628e-05, - "loss": 1.0007, + "learning_rate": 1.5599024251189782e-05, + "loss": 1.1419, "step": 8473 }, { - "epoch": 0.24013148573209783, + "epoch": 0.3315595899522654, "grad_norm": 0.0, - "learning_rate": 1.7772325916170935e-05, - "loss": 1.0312, + "learning_rate": 1.5597974231059252e-05, + "loss": 1.1982, "step": 8474 }, { - "epoch": 0.24015982317436027, + "epoch": 0.33159871664449486, "grad_norm": 0.0, - "learning_rate": 1.77717483981364e-05, - "loss": 0.989, + "learning_rate": 1.5596924121032272e-05, + "loss": 1.1342, "step": 8475 }, { - "epoch": 0.24018816061662274, + "epoch": 0.3316378433367243, "grad_norm": 0.0, - "learning_rate": 1.7771170814637547e-05, - "loss": 1.0535, + "learning_rate": 1.55958739211257e-05, + "loss": 1.0682, "step": 8476 }, { - "epoch": 0.2402164980588852, + "epoch": 0.33167697002895374, "grad_norm": 0.0, - "learning_rate": 1.7770593165679234e-05, - "loss": 1.1198, + "learning_rate": 1.5594823631356412e-05, + "loss": 1.1488, "step": 8477 }, { - "epoch": 0.24024483550114767, + "epoch": 0.3317160967211832, "grad_norm": 0.0, - "learning_rate": 1.777001545126633e-05, - "loss": 1.1423, + "learning_rate": 1.5593773251741264e-05, + "loss": 1.056, "step": 8478 }, { - "epoch": 0.24027317294341013, + "epoch": 0.3317552234134126, "grad_norm": 0.0, - "learning_rate": 1.77694376714037e-05, - "loss": 0.9464, + "learning_rate": 1.5592722782297127e-05, + "loss": 1.1074, "step": 8479 }, { - "epoch": 0.2403015103856726, + "epoch": 0.33179435010564207, "grad_norm": 0.0, - "learning_rate": 1.776885982609621e-05, - "loss": 1.0001, + "learning_rate": 1.5591672223040867e-05, + "loss": 1.2, "step": 8480 }, { - "epoch": 0.24032984782793504, + "epoch": 0.3318334767978715, "grad_norm": 0.0, - "learning_rate": 1.776828191534873e-05, - "loss": 1.0189, + "learning_rate": 1.5590621573989363e-05, + "loss": 1.1847, "step": 8481 }, { - "epoch": 0.2403581852701975, + "epoch": 0.33187260349010095, "grad_norm": 0.0, - "learning_rate": 1.7767703939166124e-05, - "loss": 0.9586, + "learning_rate": 1.558957083515948e-05, + "loss": 1.0615, "step": 8482 }, { - "epoch": 0.24038652271245997, + "epoch": 0.3319117301823304, "grad_norm": 0.0, - "learning_rate": 1.7767125897553268e-05, - "loss": 1.0146, + "learning_rate": 1.5588520006568093e-05, + "loss": 1.209, "step": 8483 }, { - "epoch": 0.24041486015472244, + "epoch": 0.33195085687455983, "grad_norm": 0.0, - "learning_rate": 1.776654779051502e-05, - "loss": 1.0353, + "learning_rate": 1.5587469088232076e-05, + "loss": 1.1124, "step": 8484 }, { - "epoch": 0.2404431975969849, + "epoch": 0.33198998356678927, "grad_norm": 0.0, - "learning_rate": 1.7765969618056266e-05, - "loss": 0.8729, + "learning_rate": 1.558641808016831e-05, + "loss": 1.1116, "step": 8485 }, { - "epoch": 0.24047153503924737, + "epoch": 0.3320291102590187, "grad_norm": 0.0, - "learning_rate": 1.7765391380181858e-05, - "loss": 0.9601, + "learning_rate": 1.558536698239367e-05, + "loss": 1.1176, "step": 8486 }, { - "epoch": 0.2404998724815098, + "epoch": 0.33206823695124815, "grad_norm": 0.0, - "learning_rate": 1.7764813076896675e-05, - "loss": 0.9453, + "learning_rate": 1.5584315794925032e-05, + "loss": 1.1049, "step": 8487 }, { - "epoch": 0.24052820992377227, + "epoch": 0.3321073636434776, "grad_norm": 0.0, - "learning_rate": 1.7764234708205594e-05, - "loss": 1.0399, + "learning_rate": 1.5583264517779282e-05, + "loss": 1.0469, "step": 8488 }, { - "epoch": 0.24055654736603474, + "epoch": 0.33214649033570703, "grad_norm": 0.0, - "learning_rate": 1.7763656274113476e-05, - "loss": 0.9921, + "learning_rate": 1.5582213150973296e-05, + "loss": 1.151, "step": 8489 }, { - "epoch": 0.2405848848082972, + "epoch": 0.3321856170279365, "grad_norm": 0.0, - "learning_rate": 1.77630777746252e-05, - "loss": 1.0141, + "learning_rate": 1.5581161694523966e-05, + "loss": 1.1277, "step": 8490 }, { - "epoch": 0.24061322225055967, + "epoch": 0.3322247437201659, "grad_norm": 0.0, - "learning_rate": 1.7762499209745634e-05, - "loss": 1.0374, + "learning_rate": 1.5580110148448173e-05, + "loss": 1.1285, "step": 8491 }, { - "epoch": 0.24064155969282214, + "epoch": 0.33226387041239536, "grad_norm": 0.0, - "learning_rate": 1.7761920579479656e-05, - "loss": 0.9672, + "learning_rate": 1.5579058512762802e-05, + "loss": 1.1062, "step": 8492 }, { - "epoch": 0.24066989713508458, + "epoch": 0.3323029971046248, "grad_norm": 0.0, - "learning_rate": 1.776134188383214e-05, - "loss": 0.9636, + "learning_rate": 1.5578006787484744e-05, + "loss": 1.104, "step": 8493 }, { - "epoch": 0.24069823457734704, + "epoch": 0.33234212379685424, "grad_norm": 0.0, - "learning_rate": 1.776076312280796e-05, - "loss": 0.9019, + "learning_rate": 1.5576954972630885e-05, + "loss": 1.0664, "step": 8494 }, { - "epoch": 0.2407265720196095, + "epoch": 0.3323812504890837, "grad_norm": 0.0, - "learning_rate": 1.776018429641199e-05, - "loss": 1.0197, + "learning_rate": 1.5575903068218115e-05, + "loss": 0.9892, "step": 8495 }, { - "epoch": 0.24075490946187197, + "epoch": 0.33242037718131306, "grad_norm": 0.0, - "learning_rate": 1.77596054046491e-05, - "loss": 1.0933, + "learning_rate": 1.5574851074263334e-05, + "loss": 1.098, "step": 8496 }, { - "epoch": 0.24078324690413444, + "epoch": 0.3324595038735425, "grad_norm": 0.0, - "learning_rate": 1.775902644752418e-05, - "loss": 0.997, + "learning_rate": 1.5573798990783425e-05, + "loss": 1.062, "step": 8497 }, { - "epoch": 0.2408115843463969, + "epoch": 0.33249863056577195, "grad_norm": 0.0, - "learning_rate": 1.7758447425042096e-05, - "loss": 0.9894, + "learning_rate": 1.5572746817795294e-05, + "loss": 1.094, "step": 8498 }, { - "epoch": 0.24083992178865934, + "epoch": 0.3325377572580014, "grad_norm": 0.0, - "learning_rate": 1.775786833720773e-05, - "loss": 0.8733, + "learning_rate": 1.557169455531583e-05, + "loss": 1.1191, "step": 8499 }, { - "epoch": 0.2408682592309218, + "epoch": 0.33257688395023083, "grad_norm": 0.0, - "learning_rate": 1.7757289184025958e-05, - "loss": 1.0574, + "learning_rate": 1.5570642203361932e-05, + "loss": 1.1978, "step": 8500 }, { - "epoch": 0.24089659667318428, + "epoch": 0.33261601064246027, "grad_norm": 0.0, - "learning_rate": 1.775670996550166e-05, - "loss": 1.0641, + "learning_rate": 1.55695897619505e-05, + "loss": 1.0121, "step": 8501 }, { - "epoch": 0.24092493411544674, + "epoch": 0.3326551373346897, "grad_norm": 0.0, - "learning_rate": 1.7756130681639708e-05, - "loss": 0.9955, + "learning_rate": 1.5568537231098438e-05, + "loss": 1.0768, "step": 8502 }, { - "epoch": 0.2409532715577092, + "epoch": 0.33269426402691915, "grad_norm": 0.0, - "learning_rate": 1.7755551332444988e-05, - "loss": 0.996, + "learning_rate": 1.5567484610822644e-05, + "loss": 1.083, "step": 8503 }, { - "epoch": 0.24098160899997167, + "epoch": 0.3327333907191486, "grad_norm": 0.0, - "learning_rate": 1.7754971917922384e-05, - "loss": 1.0101, + "learning_rate": 1.5566431901140025e-05, + "loss": 1.1033, "step": 8504 }, { - "epoch": 0.2410099464422341, + "epoch": 0.33277251741137803, "grad_norm": 0.0, - "learning_rate": 1.775439243807677e-05, - "loss": 1.0351, + "learning_rate": 1.5565379102067485e-05, + "loss": 1.0348, "step": 8505 }, { - "epoch": 0.24103828388449658, + "epoch": 0.3328116441036075, "grad_norm": 0.0, - "learning_rate": 1.7753812892913024e-05, - "loss": 0.9104, + "learning_rate": 1.556432621362193e-05, + "loss": 1.0613, "step": 8506 }, { - "epoch": 0.24106662132675905, + "epoch": 0.3328507707958369, "grad_norm": 0.0, - "learning_rate": 1.7753233282436036e-05, - "loss": 0.963, + "learning_rate": 1.5563273235820268e-05, + "loss": 1.0551, "step": 8507 }, { - "epoch": 0.2410949587690215, + "epoch": 0.33288989748806636, "grad_norm": 0.0, - "learning_rate": 1.7752653606650687e-05, - "loss": 0.9737, + "learning_rate": 1.5562220168679408e-05, + "loss": 1.1187, "step": 8508 }, { - "epoch": 0.24112329621128398, + "epoch": 0.3329290241802958, "grad_norm": 0.0, - "learning_rate": 1.775207386556186e-05, - "loss": 0.9826, + "learning_rate": 1.556116701221626e-05, + "loss": 1.1489, "step": 8509 }, { - "epoch": 0.24115163365354644, + "epoch": 0.33296815087252524, "grad_norm": 0.0, - "learning_rate": 1.775149405917443e-05, - "loss": 0.9476, + "learning_rate": 1.5560113766447743e-05, + "loss": 0.9839, "step": 8510 }, { - "epoch": 0.24117997109580888, + "epoch": 0.3330072775647547, "grad_norm": 0.0, - "learning_rate": 1.775091418749329e-05, - "loss": 0.962, + "learning_rate": 1.555906043139076e-05, + "loss": 1.1001, "step": 8511 }, { - "epoch": 0.24120830853807135, + "epoch": 0.3330464042569841, "grad_norm": 0.0, - "learning_rate": 1.775033425052332e-05, - "loss": 1.0023, + "learning_rate": 1.555800700706224e-05, + "loss": 1.2067, "step": 8512 }, { - "epoch": 0.2412366459803338, + "epoch": 0.33308553094921356, "grad_norm": 0.0, - "learning_rate": 1.7749754248269407e-05, - "loss": 1.0866, + "learning_rate": 1.555695349347909e-05, + "loss": 1.0676, "step": 8513 }, { - "epoch": 0.24126498342259628, + "epoch": 0.333124657641443, "grad_norm": 0.0, - "learning_rate": 1.7749174180736443e-05, - "loss": 1.0349, + "learning_rate": 1.555589989065823e-05, + "loss": 1.0533, "step": 8514 }, { - "epoch": 0.24129332086485875, + "epoch": 0.33316378433367244, "grad_norm": 0.0, - "learning_rate": 1.7748594047929297e-05, - "loss": 1.0058, + "learning_rate": 1.5554846198616576e-05, + "loss": 1.0185, "step": 8515 }, { - "epoch": 0.2413216583071212, + "epoch": 0.3332029110259019, "grad_norm": 0.0, - "learning_rate": 1.774801384985287e-05, - "loss": 1.012, + "learning_rate": 1.5553792417371058e-05, + "loss": 1.0616, "step": 8516 }, { - "epoch": 0.24134999574938365, + "epoch": 0.3332420377181313, "grad_norm": 0.0, - "learning_rate": 1.774743358651205e-05, - "loss": 0.9968, + "learning_rate": 1.555273854693859e-05, + "loss": 1.1442, "step": 8517 }, { - "epoch": 0.24137833319164612, + "epoch": 0.33328116441036076, "grad_norm": 0.0, - "learning_rate": 1.7746853257911713e-05, - "loss": 0.9144, + "learning_rate": 1.5551684587336097e-05, + "loss": 1.093, "step": 8518 }, { - "epoch": 0.24140667063390858, + "epoch": 0.3333202911025902, "grad_norm": 0.0, - "learning_rate": 1.7746272864056754e-05, - "loss": 1.0408, + "learning_rate": 1.5550630538580508e-05, + "loss": 0.968, "step": 8519 }, { - "epoch": 0.24143500807617105, + "epoch": 0.33335941779481965, "grad_norm": 0.0, - "learning_rate": 1.7745692404952066e-05, - "loss": 0.869, + "learning_rate": 1.5549576400688748e-05, + "loss": 1.0397, "step": 8520 }, { - "epoch": 0.24146334551843351, + "epoch": 0.3333985444870491, "grad_norm": 0.0, - "learning_rate": 1.7745111880602534e-05, - "loss": 1.1005, + "learning_rate": 1.554852217367775e-05, + "loss": 1.1454, "step": 8521 }, { - "epoch": 0.24149168296069598, + "epoch": 0.3334376711792785, "grad_norm": 0.0, - "learning_rate": 1.7744531291013047e-05, - "loss": 0.9463, + "learning_rate": 1.554746785756443e-05, + "loss": 1.0364, "step": 8522 }, { - "epoch": 0.24152002040295842, + "epoch": 0.33347679787150797, "grad_norm": 0.0, - "learning_rate": 1.77439506361885e-05, - "loss": 0.9556, + "learning_rate": 1.5546413452365734e-05, + "loss": 1.0366, "step": 8523 }, { - "epoch": 0.24154835784522088, + "epoch": 0.3335159245637374, "grad_norm": 0.0, - "learning_rate": 1.774336991613378e-05, - "loss": 1.0484, + "learning_rate": 1.5545358958098584e-05, + "loss": 1.1171, "step": 8524 }, { - "epoch": 0.24157669528748335, + "epoch": 0.3335550512559668, "grad_norm": 0.0, - "learning_rate": 1.774278913085378e-05, - "loss": 1.0663, + "learning_rate": 1.554430437477992e-05, + "loss": 1.0765, "step": 8525 }, { - "epoch": 0.24160503272974582, + "epoch": 0.33359417794819624, "grad_norm": 0.0, - "learning_rate": 1.7742208280353387e-05, - "loss": 0.9848, + "learning_rate": 1.5543249702426674e-05, + "loss": 1.0775, "step": 8526 }, { - "epoch": 0.24163337017200828, + "epoch": 0.3336333046404257, "grad_norm": 0.0, - "learning_rate": 1.7741627364637506e-05, - "loss": 0.9935, + "learning_rate": 1.5542194941055785e-05, + "loss": 1.1147, "step": 8527 }, { - "epoch": 0.24166170761427075, + "epoch": 0.3336724313326551, "grad_norm": 0.0, - "learning_rate": 1.774104638371102e-05, - "loss": 1.0084, + "learning_rate": 1.554114009068419e-05, + "loss": 1.0499, "step": 8528 }, { - "epoch": 0.2416900450565332, + "epoch": 0.33371155802488456, "grad_norm": 0.0, - "learning_rate": 1.7740465337578823e-05, - "loss": 0.9685, + "learning_rate": 1.5540085151328826e-05, + "loss": 1.1245, "step": 8529 }, { - "epoch": 0.24171838249879565, + "epoch": 0.333750684717114, "grad_norm": 0.0, - "learning_rate": 1.7739884226245813e-05, - "loss": 0.9457, + "learning_rate": 1.5539030123006636e-05, + "loss": 1.1656, "step": 8530 }, { - "epoch": 0.24174671994105812, + "epoch": 0.33378981140934344, "grad_norm": 0.0, - "learning_rate": 1.7739303049716886e-05, - "loss": 1.007, + "learning_rate": 1.5537975005734566e-05, + "loss": 0.9885, "step": 8531 }, { - "epoch": 0.24177505738332059, + "epoch": 0.3338289381015729, "grad_norm": 0.0, - "learning_rate": 1.7738721807996933e-05, - "loss": 0.9879, + "learning_rate": 1.553691979952956e-05, + "loss": 1.0934, "step": 8532 }, { - "epoch": 0.24180339482558305, + "epoch": 0.3338680647938023, "grad_norm": 0.0, - "learning_rate": 1.7738140501090856e-05, - "loss": 0.9581, + "learning_rate": 1.5535864504408553e-05, + "loss": 1.0004, "step": 8533 }, { - "epoch": 0.24183173226784552, + "epoch": 0.33390719148603176, "grad_norm": 0.0, - "learning_rate": 1.7737559129003547e-05, - "loss": 0.924, + "learning_rate": 1.5534809120388502e-05, + "loss": 0.971, "step": 8534 }, { - "epoch": 0.24186006971010796, + "epoch": 0.3339463181782612, "grad_norm": 0.0, - "learning_rate": 1.7736977691739906e-05, - "loss": 0.8825, + "learning_rate": 1.5533753647486352e-05, + "loss": 1.1833, "step": 8535 }, { - "epoch": 0.24188840715237042, + "epoch": 0.33398544487049064, "grad_norm": 0.0, - "learning_rate": 1.7736396189304824e-05, - "loss": 1.0541, + "learning_rate": 1.5532698085719052e-05, + "loss": 0.9976, "step": 8536 }, { - "epoch": 0.2419167445946329, + "epoch": 0.3340245715627201, "grad_norm": 0.0, - "learning_rate": 1.773581462170321e-05, - "loss": 0.9719, + "learning_rate": 1.5531642435103556e-05, + "loss": 1.1047, "step": 8537 }, { - "epoch": 0.24194508203689535, + "epoch": 0.3340636982549495, "grad_norm": 0.0, - "learning_rate": 1.773523298893995e-05, - "loss": 1.0317, + "learning_rate": 1.5530586695656814e-05, + "loss": 0.9996, "step": 8538 }, { - "epoch": 0.24197341947915782, + "epoch": 0.33410282494717897, "grad_norm": 0.0, - "learning_rate": 1.7734651291019955e-05, - "loss": 0.8802, + "learning_rate": 1.5529530867395778e-05, + "loss": 1.0464, "step": 8539 }, { - "epoch": 0.24200175692142029, + "epoch": 0.3341419516394084, "grad_norm": 0.0, - "learning_rate": 1.773406952794812e-05, - "loss": 1.0111, + "learning_rate": 1.5528474950337405e-05, + "loss": 1.15, "step": 8540 }, { - "epoch": 0.24203009436368272, + "epoch": 0.33418107833163785, "grad_norm": 0.0, - "learning_rate": 1.7733487699729344e-05, - "loss": 0.9823, + "learning_rate": 1.5527418944498656e-05, + "loss": 1.151, "step": 8541 }, { - "epoch": 0.2420584318059452, + "epoch": 0.3342202050238673, "grad_norm": 0.0, - "learning_rate": 1.7732905806368526e-05, - "loss": 0.978, + "learning_rate": 1.5526362849896478e-05, + "loss": 1.1218, "step": 8542 }, { - "epoch": 0.24208676924820766, + "epoch": 0.33425933171609673, "grad_norm": 0.0, - "learning_rate": 1.7732323847870577e-05, - "loss": 0.9628, + "learning_rate": 1.5525306666547843e-05, + "loss": 1.1735, "step": 8543 }, { - "epoch": 0.24211510669047012, + "epoch": 0.33429845840832617, "grad_norm": 0.0, - "learning_rate": 1.7731741824240385e-05, - "loss": 0.9894, + "learning_rate": 1.5524250394469708e-05, + "loss": 1.0681, "step": 8544 }, { - "epoch": 0.2421434441327326, + "epoch": 0.3343375851005556, "grad_norm": 0.0, - "learning_rate": 1.773115973548287e-05, - "loss": 0.9144, + "learning_rate": 1.5523194033679027e-05, + "loss": 1.1636, "step": 8545 }, { - "epoch": 0.24217178157499505, + "epoch": 0.33437671179278505, "grad_norm": 0.0, - "learning_rate": 1.773057758160292e-05, - "loss": 0.7567, + "learning_rate": 1.5522137584192775e-05, + "loss": 1.0809, "step": 8546 }, { - "epoch": 0.2422001190172575, + "epoch": 0.3344158384850145, "grad_norm": 0.0, - "learning_rate": 1.7729995362605444e-05, - "loss": 0.9574, + "learning_rate": 1.552108104602791e-05, + "loss": 1.0638, "step": 8547 }, { - "epoch": 0.24222845645951996, + "epoch": 0.33445496517724393, "grad_norm": 0.0, - "learning_rate": 1.772941307849535e-05, - "loss": 1.0564, + "learning_rate": 1.5520024419201406e-05, + "loss": 1.1503, "step": 8548 }, { - "epoch": 0.24225679390178242, + "epoch": 0.3344940918694734, "grad_norm": 0.0, - "learning_rate": 1.772883072927754e-05, - "loss": 1.0253, + "learning_rate": 1.5518967703730224e-05, + "loss": 0.9993, "step": 8549 }, { - "epoch": 0.2422851313440449, + "epoch": 0.3345332185617028, "grad_norm": 0.0, - "learning_rate": 1.7728248314956915e-05, - "loss": 1.1062, + "learning_rate": 1.551791089963134e-05, + "loss": 1.0652, "step": 8550 }, { - "epoch": 0.24231346878630736, + "epoch": 0.33457234525393226, "grad_norm": 0.0, - "learning_rate": 1.7727665835538386e-05, - "loss": 0.9017, + "learning_rate": 1.5516854006921714e-05, + "loss": 1.1044, "step": 8551 }, { - "epoch": 0.24234180622856982, + "epoch": 0.3346114719461617, "grad_norm": 0.0, - "learning_rate": 1.7727083291026855e-05, - "loss": 1.0041, + "learning_rate": 1.5515797025618332e-05, + "loss": 1.1785, "step": 8552 }, { - "epoch": 0.24237014367083226, + "epoch": 0.3346505986383911, "grad_norm": 0.0, - "learning_rate": 1.7726500681427236e-05, - "loss": 0.9172, + "learning_rate": 1.551473995573816e-05, + "loss": 1.0978, "step": 8553 }, { - "epoch": 0.24239848111309473, + "epoch": 0.3346897253306205, "grad_norm": 0.0, - "learning_rate": 1.772591800674443e-05, - "loss": 0.9829, + "learning_rate": 1.5513682797298172e-05, + "loss": 1.1885, "step": 8554 }, { - "epoch": 0.2424268185553572, + "epoch": 0.33472885202284997, "grad_norm": 0.0, - "learning_rate": 1.7725335266983352e-05, - "loss": 0.9382, + "learning_rate": 1.5512625550315354e-05, + "loss": 1.0986, "step": 8555 }, { - "epoch": 0.24245515599761966, + "epoch": 0.3347679787150794, "grad_norm": 0.0, - "learning_rate": 1.7724752462148903e-05, - "loss": 0.9698, + "learning_rate": 1.551156821480667e-05, + "loss": 1.0875, "step": 8556 }, { - "epoch": 0.24248349343988213, + "epoch": 0.33480710540730885, "grad_norm": 0.0, - "learning_rate": 1.7724169592245996e-05, - "loss": 1.0176, + "learning_rate": 1.551051079078911e-05, + "loss": 1.1472, "step": 8557 }, { - "epoch": 0.2425118308821446, + "epoch": 0.3348462320995383, "grad_norm": 0.0, - "learning_rate": 1.772358665727954e-05, - "loss": 0.966, + "learning_rate": 1.550945327827965e-05, + "loss": 1.127, "step": 8558 }, { - "epoch": 0.24254016832440703, + "epoch": 0.33488535879176773, "grad_norm": 0.0, - "learning_rate": 1.7723003657254447e-05, - "loss": 0.9902, + "learning_rate": 1.5508395677295278e-05, + "loss": 1.1069, "step": 8559 }, { - "epoch": 0.2425685057666695, + "epoch": 0.33492448548399717, "grad_norm": 0.0, - "learning_rate": 1.7722420592175624e-05, - "loss": 0.8776, + "learning_rate": 1.5507337987852972e-05, + "loss": 1.0716, "step": 8560 }, { - "epoch": 0.24259684320893196, + "epoch": 0.3349636121762266, "grad_norm": 0.0, - "learning_rate": 1.7721837462047987e-05, - "loss": 1.071, + "learning_rate": 1.5506280209969716e-05, + "loss": 1.0451, "step": 8561 }, { - "epoch": 0.24262518065119443, + "epoch": 0.33500273886845605, "grad_norm": 0.0, - "learning_rate": 1.7721254266876443e-05, - "loss": 1.1115, + "learning_rate": 1.5505222343662506e-05, + "loss": 1.0559, "step": 8562 }, { - "epoch": 0.2426535180934569, + "epoch": 0.3350418655606855, "grad_norm": 0.0, - "learning_rate": 1.772067100666591e-05, - "loss": 0.9984, + "learning_rate": 1.550416438894832e-05, + "loss": 1.0748, "step": 8563 }, { - "epoch": 0.24268185553571936, + "epoch": 0.33508099225291493, "grad_norm": 0.0, - "learning_rate": 1.7720087681421297e-05, - "loss": 0.9964, + "learning_rate": 1.550310634584415e-05, + "loss": 1.0234, "step": 8564 }, { - "epoch": 0.2427101929779818, + "epoch": 0.3351201189451444, "grad_norm": 0.0, - "learning_rate": 1.7719504291147517e-05, - "loss": 1.1205, + "learning_rate": 1.5502048214366986e-05, + "loss": 1.0584, "step": 8565 }, { - "epoch": 0.24273853042024426, + "epoch": 0.3351592456373738, "grad_norm": 0.0, - "learning_rate": 1.771892083584949e-05, - "loss": 1.0724, + "learning_rate": 1.5500989994533828e-05, + "loss": 0.944, "step": 8566 }, { - "epoch": 0.24276686786250673, + "epoch": 0.33519837232960326, "grad_norm": 0.0, - "learning_rate": 1.771833731553212e-05, - "loss": 0.9792, + "learning_rate": 1.5499931686361658e-05, + "loss": 1.1675, "step": 8567 }, { - "epoch": 0.2427952053047692, + "epoch": 0.3352374990218327, "grad_norm": 0.0, - "learning_rate": 1.7717753730200334e-05, - "loss": 0.8866, + "learning_rate": 1.549887328986748e-05, + "loss": 1.0949, "step": 8568 }, { - "epoch": 0.24282354274703166, + "epoch": 0.33527662571406214, "grad_norm": 0.0, - "learning_rate": 1.771717007985904e-05, - "loss": 1.0465, + "learning_rate": 1.5497814805068286e-05, + "loss": 1.0143, "step": 8569 }, { - "epoch": 0.24285188018929413, + "epoch": 0.3353157524062916, "grad_norm": 0.0, - "learning_rate": 1.771658636451316e-05, - "loss": 0.8956, + "learning_rate": 1.5496756231981077e-05, + "loss": 1.0553, "step": 8570 }, { - "epoch": 0.24288021763155657, + "epoch": 0.335354879098521, "grad_norm": 0.0, - "learning_rate": 1.7716002584167605e-05, - "loss": 0.9786, + "learning_rate": 1.549569757062285e-05, + "loss": 1.1057, "step": 8571 }, { - "epoch": 0.24290855507381903, + "epoch": 0.33539400579075046, "grad_norm": 0.0, - "learning_rate": 1.7715418738827296e-05, - "loss": 0.9292, + "learning_rate": 1.5494638821010607e-05, + "loss": 1.022, "step": 8572 }, { - "epoch": 0.2429368925160815, + "epoch": 0.3354331324829799, "grad_norm": 0.0, - "learning_rate": 1.7714834828497144e-05, - "loss": 1.0084, + "learning_rate": 1.549357998316135e-05, + "loss": 1.1337, "step": 8573 }, { - "epoch": 0.24296522995834396, + "epoch": 0.33547225917520934, "grad_norm": 0.0, - "learning_rate": 1.771425085318208e-05, - "loss": 0.9237, + "learning_rate": 1.549252105709208e-05, + "loss": 1.1539, "step": 8574 }, { - "epoch": 0.24299356740060643, + "epoch": 0.3355113858674388, "grad_norm": 0.0, - "learning_rate": 1.7713666812887016e-05, - "loss": 0.9189, + "learning_rate": 1.5491462042819808e-05, + "loss": 1.2344, "step": 8575 }, { - "epoch": 0.24302190484286887, + "epoch": 0.3355505125596682, "grad_norm": 0.0, - "learning_rate": 1.771308270761687e-05, - "loss": 0.9814, + "learning_rate": 1.549040294036153e-05, + "loss": 1.1789, "step": 8576 }, { - "epoch": 0.24305024228513133, + "epoch": 0.33558963925189766, "grad_norm": 0.0, - "learning_rate": 1.7712498537376565e-05, - "loss": 1.0885, + "learning_rate": 1.5489343749734268e-05, + "loss": 1.0545, "step": 8577 }, { - "epoch": 0.2430785797273938, + "epoch": 0.3356287659441271, "grad_norm": 0.0, - "learning_rate": 1.7711914302171022e-05, - "loss": 0.9665, + "learning_rate": 1.548828447095502e-05, + "loss": 1.1187, "step": 8578 }, { - "epoch": 0.24310691716965627, + "epoch": 0.33566789263635655, "grad_norm": 0.0, - "learning_rate": 1.7711330002005157e-05, - "loss": 0.9607, + "learning_rate": 1.54872251040408e-05, + "loss": 1.1722, "step": 8579 }, { - "epoch": 0.24313525461191873, + "epoch": 0.335707019328586, "grad_norm": 0.0, - "learning_rate": 1.77107456368839e-05, - "loss": 1.0037, + "learning_rate": 1.5486165649008623e-05, + "loss": 1.1342, "step": 8580 }, { - "epoch": 0.2431635920541812, + "epoch": 0.3357461460208154, "grad_norm": 0.0, - "learning_rate": 1.7710161206812166e-05, - "loss": 0.9296, + "learning_rate": 1.54851061058755e-05, + "loss": 1.1177, "step": 8581 }, { - "epoch": 0.24319192949644364, + "epoch": 0.3357852727130448, "grad_norm": 0.0, - "learning_rate": 1.7709576711794886e-05, - "loss": 0.9578, + "learning_rate": 1.5484046474658448e-05, + "loss": 1.1263, "step": 8582 }, { - "epoch": 0.2432202669387061, + "epoch": 0.33582439940527425, "grad_norm": 0.0, - "learning_rate": 1.7708992151836972e-05, - "loss": 1.0883, + "learning_rate": 1.5482986755374478e-05, + "loss": 1.227, "step": 8583 }, { - "epoch": 0.24324860438096857, + "epoch": 0.3358635260975037, "grad_norm": 0.0, - "learning_rate": 1.770840752694336e-05, - "loss": 0.985, + "learning_rate": 1.5481926948040613e-05, + "loss": 1.0955, "step": 8584 }, { - "epoch": 0.24327694182323104, + "epoch": 0.33590265278973314, "grad_norm": 0.0, - "learning_rate": 1.7707822837118966e-05, - "loss": 1.0059, + "learning_rate": 1.5480867052673868e-05, + "loss": 1.0164, "step": 8585 }, { - "epoch": 0.2433052792654935, + "epoch": 0.3359417794819626, "grad_norm": 0.0, - "learning_rate": 1.770723808236872e-05, - "loss": 0.8465, + "learning_rate": 1.547980706929127e-05, + "loss": 1.093, "step": 8586 }, { - "epoch": 0.24333361670775597, + "epoch": 0.335980906174192, "grad_norm": 0.0, - "learning_rate": 1.770665326269754e-05, - "loss": 1.02, + "learning_rate": 1.547874699790983e-05, + "loss": 1.1096, "step": 8587 }, { - "epoch": 0.2433619541500184, + "epoch": 0.33602003286642146, "grad_norm": 0.0, - "learning_rate": 1.7706068378110367e-05, - "loss": 1.0472, + "learning_rate": 1.547768683854659e-05, + "loss": 1.1152, "step": 8588 }, { - "epoch": 0.24339029159228087, + "epoch": 0.3360591595586509, "grad_norm": 0.0, - "learning_rate": 1.7705483428612114e-05, - "loss": 1.0405, + "learning_rate": 1.5476626591218553e-05, + "loss": 1.2123, "step": 8589 }, { - "epoch": 0.24341862903454334, + "epoch": 0.33609828625088034, "grad_norm": 0.0, - "learning_rate": 1.770489841420771e-05, - "loss": 1.0407, + "learning_rate": 1.5475566255942764e-05, + "loss": 1.0779, "step": 8590 }, { - "epoch": 0.2434469664768058, + "epoch": 0.3361374129431098, "grad_norm": 0.0, - "learning_rate": 1.7704313334902087e-05, - "loss": 0.9988, + "learning_rate": 1.5474505832736233e-05, + "loss": 1.0834, "step": 8591 }, { - "epoch": 0.24347530391906827, + "epoch": 0.3361765396353392, "grad_norm": 0.0, - "learning_rate": 1.7703728190700172e-05, - "loss": 1.0168, + "learning_rate": 1.5473445321616004e-05, + "loss": 1.0179, "step": 8592 }, { - "epoch": 0.24350364136133074, + "epoch": 0.33621566632756866, "grad_norm": 0.0, - "learning_rate": 1.7703142981606894e-05, - "loss": 0.9742, + "learning_rate": 1.5472384722599102e-05, + "loss": 1.1425, "step": 8593 }, { - "epoch": 0.24353197880359317, + "epoch": 0.3362547930197981, "grad_norm": 0.0, - "learning_rate": 1.7702557707627185e-05, - "loss": 1.0493, + "learning_rate": 1.5471324035702555e-05, + "loss": 1.0605, "step": 8594 }, { - "epoch": 0.24356031624585564, + "epoch": 0.33629391971202754, "grad_norm": 0.0, - "learning_rate": 1.7701972368765973e-05, - "loss": 1.0051, + "learning_rate": 1.5470263260943402e-05, + "loss": 1.1158, "step": 8595 }, { - "epoch": 0.2435886536881181, + "epoch": 0.336333046404257, "grad_norm": 0.0, - "learning_rate": 1.7701386965028182e-05, - "loss": 0.9921, + "learning_rate": 1.5469202398338676e-05, + "loss": 1.0988, "step": 8596 }, { - "epoch": 0.24361699113038057, + "epoch": 0.3363721730964864, "grad_norm": 0.0, - "learning_rate": 1.770080149641875e-05, - "loss": 1.0927, + "learning_rate": 1.5468141447905412e-05, + "loss": 1.125, "step": 8597 }, { - "epoch": 0.24364532857264304, + "epoch": 0.33641129978871587, "grad_norm": 0.0, - "learning_rate": 1.770021596294261e-05, - "loss": 1.0193, + "learning_rate": 1.546708040966065e-05, + "loss": 1.0938, "step": 8598 }, { - "epoch": 0.2436736660149055, + "epoch": 0.3364504264809453, "grad_norm": 0.0, - "learning_rate": 1.769963036460469e-05, - "loss": 0.8829, + "learning_rate": 1.5466019283621426e-05, + "loss": 1.1923, "step": 8599 }, { - "epoch": 0.24370200345716794, + "epoch": 0.33648955317317475, "grad_norm": 0.0, - "learning_rate": 1.7699044701409923e-05, - "loss": 1.0595, + "learning_rate": 1.546495806980478e-05, + "loss": 1.1893, "step": 8600 }, { - "epoch": 0.2437303408994304, + "epoch": 0.3365286798654042, "grad_norm": 0.0, - "learning_rate": 1.7698458973363248e-05, - "loss": 0.8894, + "learning_rate": 1.546389676822776e-05, + "loss": 1.1641, "step": 8601 }, { - "epoch": 0.24375867834169287, + "epoch": 0.33656780655763363, "grad_norm": 0.0, - "learning_rate": 1.769787318046959e-05, - "loss": 0.9392, + "learning_rate": 1.5462835378907405e-05, + "loss": 1.1555, "step": 8602 }, { - "epoch": 0.24378701578395534, + "epoch": 0.33660693324986307, "grad_norm": 0.0, - "learning_rate": 1.769728732273389e-05, - "loss": 1.1183, + "learning_rate": 1.5461773901860754e-05, + "loss": 1.1185, "step": 8603 }, { - "epoch": 0.2438153532262178, + "epoch": 0.3366460599420925, "grad_norm": 0.0, - "learning_rate": 1.7696701400161077e-05, - "loss": 1.0106, + "learning_rate": 1.546071233710486e-05, + "loss": 1.1942, "step": 8604 }, { - "epoch": 0.24384369066848027, + "epoch": 0.33668518663432195, "grad_norm": 0.0, - "learning_rate": 1.7696115412756095e-05, - "loss": 1.0379, + "learning_rate": 1.545965068465677e-05, + "loss": 1.1306, "step": 8605 }, { - "epoch": 0.2438720281107427, + "epoch": 0.3367243133265514, "grad_norm": 0.0, - "learning_rate": 1.769552936052387e-05, - "loss": 0.963, + "learning_rate": 1.545858894453353e-05, + "loss": 1.0649, "step": 8606 }, { - "epoch": 0.24390036555300518, + "epoch": 0.33676344001878084, "grad_norm": 0.0, - "learning_rate": 1.7694943243469348e-05, - "loss": 0.9231, + "learning_rate": 1.545752711675219e-05, + "loss": 1.0665, "step": 8607 }, { - "epoch": 0.24392870299526764, + "epoch": 0.3368025667110103, "grad_norm": 0.0, - "learning_rate": 1.769435706159746e-05, - "loss": 1.0054, + "learning_rate": 1.5456465201329805e-05, + "loss": 1.1611, "step": 8608 }, { - "epoch": 0.2439570404375301, + "epoch": 0.3368416934032397, "grad_norm": 0.0, - "learning_rate": 1.7693770814913144e-05, - "loss": 0.9394, + "learning_rate": 1.5455403198283426e-05, + "loss": 1.1844, "step": 8609 }, { - "epoch": 0.24398537787979258, + "epoch": 0.3368808200954691, "grad_norm": 0.0, - "learning_rate": 1.7693184503421342e-05, - "loss": 0.9067, + "learning_rate": 1.5454341107630106e-05, + "loss": 1.0643, "step": 8610 }, { - "epoch": 0.24401371532205504, + "epoch": 0.33691994678769854, "grad_norm": 0.0, - "learning_rate": 1.7692598127126986e-05, - "loss": 1.0477, + "learning_rate": 1.5453278929386904e-05, + "loss": 1.0868, "step": 8611 }, { - "epoch": 0.24404205276431748, + "epoch": 0.336959073479928, "grad_norm": 0.0, - "learning_rate": 1.7692011686035023e-05, - "loss": 1.0353, + "learning_rate": 1.5452216663570877e-05, + "loss": 1.0671, "step": 8612 }, { - "epoch": 0.24407039020657995, + "epoch": 0.3369982001721574, "grad_norm": 0.0, - "learning_rate": 1.7691425180150386e-05, - "loss": 0.9556, + "learning_rate": 1.545115431019908e-05, + "loss": 1.0547, "step": 8613 }, { - "epoch": 0.2440987276488424, + "epoch": 0.33703732686438687, "grad_norm": 0.0, - "learning_rate": 1.769083860947802e-05, - "loss": 0.9325, + "learning_rate": 1.5450091869288577e-05, + "loss": 1.0729, "step": 8614 }, { - "epoch": 0.24412706509110488, + "epoch": 0.3370764535566163, "grad_norm": 0.0, - "learning_rate": 1.7690251974022866e-05, - "loss": 1.0458, + "learning_rate": 1.544902934085643e-05, + "loss": 1.0145, "step": 8615 }, { - "epoch": 0.24415540253336734, + "epoch": 0.33711558024884575, "grad_norm": 0.0, - "learning_rate": 1.7689665273789863e-05, - "loss": 1.0219, + "learning_rate": 1.5447966724919692e-05, + "loss": 1.3272, "step": 8616 }, { - "epoch": 0.2441837399756298, + "epoch": 0.3371547069410752, "grad_norm": 0.0, - "learning_rate": 1.7689078508783953e-05, - "loss": 0.8989, + "learning_rate": 1.544690402149544e-05, + "loss": 1.0305, "step": 8617 }, { - "epoch": 0.24421207741789225, + "epoch": 0.33719383363330463, "grad_norm": 0.0, - "learning_rate": 1.768849167901008e-05, - "loss": 0.9576, + "learning_rate": 1.5445841230600738e-05, + "loss": 1.148, "step": 8618 }, { - "epoch": 0.24424041486015471, + "epoch": 0.33723296032553407, "grad_norm": 0.0, - "learning_rate": 1.768790478447319e-05, - "loss": 0.9409, + "learning_rate": 1.544477835225265e-05, + "loss": 1.2292, "step": 8619 }, { - "epoch": 0.24426875230241718, + "epoch": 0.3372720870177635, "grad_norm": 0.0, - "learning_rate": 1.7687317825178222e-05, - "loss": 0.9604, + "learning_rate": 1.5443715386468235e-05, + "loss": 1.0347, "step": 8620 }, { - "epoch": 0.24429708974467965, + "epoch": 0.33731121370999295, "grad_norm": 0.0, - "learning_rate": 1.7686730801130118e-05, - "loss": 0.939, + "learning_rate": 1.544265233326458e-05, + "loss": 1.0835, "step": 8621 }, { - "epoch": 0.2443254271869421, + "epoch": 0.3373503404022224, "grad_norm": 0.0, - "learning_rate": 1.768614371233383e-05, - "loss": 1.0027, + "learning_rate": 1.544158919265875e-05, + "loss": 0.9922, "step": 8622 }, { - "epoch": 0.24435376462920458, + "epoch": 0.33738946709445183, "grad_norm": 0.0, - "learning_rate": 1.768555655879429e-05, - "loss": 1.127, + "learning_rate": 1.5440525964667813e-05, + "loss": 1.1064, "step": 8623 }, { - "epoch": 0.24438210207146702, + "epoch": 0.3374285937866813, "grad_norm": 0.0, - "learning_rate": 1.7684969340516463e-05, - "loss": 1.0585, + "learning_rate": 1.5439462649308847e-05, + "loss": 1.1742, "step": 8624 }, { - "epoch": 0.24441043951372948, + "epoch": 0.3374677204789107, "grad_norm": 0.0, - "learning_rate": 1.7684382057505284e-05, - "loss": 0.9617, + "learning_rate": 1.5438399246598926e-05, + "loss": 1.0181, "step": 8625 }, { - "epoch": 0.24443877695599195, + "epoch": 0.33750684717114016, "grad_norm": 0.0, - "learning_rate": 1.7683794709765697e-05, - "loss": 1.0735, + "learning_rate": 1.543733575655513e-05, + "loss": 1.0975, "step": 8626 }, { - "epoch": 0.24446711439825441, + "epoch": 0.3375459738633696, "grad_norm": 0.0, - "learning_rate": 1.768320729730266e-05, - "loss": 0.995, + "learning_rate": 1.5436272179194533e-05, + "loss": 1.053, "step": 8627 }, { - "epoch": 0.24449545184051688, + "epoch": 0.33758510055559904, "grad_norm": 0.0, - "learning_rate": 1.768261982012111e-05, - "loss": 1.051, + "learning_rate": 1.5435208514534215e-05, + "loss": 1.1241, "step": 8628 }, { - "epoch": 0.24452378928277935, + "epoch": 0.3376242272478285, "grad_norm": 0.0, - "learning_rate": 1.7682032278226002e-05, - "loss": 0.8854, + "learning_rate": 1.5434144762591263e-05, + "loss": 1.1078, "step": 8629 }, { - "epoch": 0.24455212672504179, + "epoch": 0.3376633539400579, "grad_norm": 0.0, - "learning_rate": 1.7681444671622284e-05, - "loss": 0.9606, + "learning_rate": 1.5433080923382754e-05, + "loss": 1.1551, "step": 8630 }, { - "epoch": 0.24458046416730425, + "epoch": 0.33770248063228736, "grad_norm": 0.0, - "learning_rate": 1.7680857000314904e-05, - "loss": 1.0704, + "learning_rate": 1.5432016996925772e-05, + "loss": 1.062, "step": 8631 }, { - "epoch": 0.24460880160956672, + "epoch": 0.3377416073245168, "grad_norm": 0.0, - "learning_rate": 1.7680269264308814e-05, - "loss": 0.9332, + "learning_rate": 1.5430952983237404e-05, + "loss": 1.1437, "step": 8632 }, { - "epoch": 0.24463713905182918, + "epoch": 0.33778073401674624, "grad_norm": 0.0, - "learning_rate": 1.7679681463608963e-05, - "loss": 0.9759, + "learning_rate": 1.542988888233474e-05, + "loss": 1.074, "step": 8633 }, { - "epoch": 0.24466547649409165, + "epoch": 0.3378198607089757, "grad_norm": 0.0, - "learning_rate": 1.7679093598220305e-05, - "loss": 0.8499, + "learning_rate": 1.5428824694234857e-05, + "loss": 1.1867, "step": 8634 }, { - "epoch": 0.24469381393635412, + "epoch": 0.3378589874012051, "grad_norm": 0.0, - "learning_rate": 1.767850566814779e-05, - "loss": 0.9568, + "learning_rate": 1.542776041895486e-05, + "loss": 1.1895, "step": 8635 }, { - "epoch": 0.24472215137861655, + "epoch": 0.33789811409343457, "grad_norm": 0.0, - "learning_rate": 1.767791767339637e-05, - "loss": 1.0112, + "learning_rate": 1.5426696056511827e-05, + "loss": 1.0718, "step": 8636 }, { - "epoch": 0.24475048882087902, + "epoch": 0.337937240785664, "grad_norm": 0.0, - "learning_rate": 1.7677329613970995e-05, - "loss": 1.0533, + "learning_rate": 1.542563160692286e-05, + "loss": 0.9999, "step": 8637 }, { - "epoch": 0.24477882626314149, + "epoch": 0.3379763674778934, "grad_norm": 0.0, - "learning_rate": 1.7676741489876625e-05, - "loss": 1.0185, + "learning_rate": 1.5424567070205043e-05, + "loss": 1.1321, "step": 8638 }, { - "epoch": 0.24480716370540395, + "epoch": 0.33801549417012283, "grad_norm": 0.0, - "learning_rate": 1.7676153301118207e-05, - "loss": 0.8327, + "learning_rate": 1.542350244637548e-05, + "loss": 1.3097, "step": 8639 }, { - "epoch": 0.24483550114766642, + "epoch": 0.3380546208623523, "grad_norm": 0.0, - "learning_rate": 1.7675565047700706e-05, - "loss": 0.9483, + "learning_rate": 1.5422437735451262e-05, + "loss": 1.0145, "step": 8640 }, { - "epoch": 0.24486383858992888, + "epoch": 0.3380937475545817, "grad_norm": 0.0, - "learning_rate": 1.7674976729629065e-05, - "loss": 0.9728, + "learning_rate": 1.5421372937449487e-05, + "loss": 1.1736, "step": 8641 }, { - "epoch": 0.24489217603219132, + "epoch": 0.33813287424681115, "grad_norm": 0.0, - "learning_rate": 1.7674388346908248e-05, - "loss": 0.984, + "learning_rate": 1.542030805238726e-05, + "loss": 1.0751, "step": 8642 }, { - "epoch": 0.2449205134744538, + "epoch": 0.3381720009390406, "grad_norm": 0.0, - "learning_rate": 1.7673799899543207e-05, - "loss": 0.8709, + "learning_rate": 1.5419243080281675e-05, + "loss": 1.135, "step": 8643 }, { - "epoch": 0.24494885091671625, + "epoch": 0.33821112763127004, "grad_norm": 0.0, - "learning_rate": 1.76732113875389e-05, - "loss": 1.055, + "learning_rate": 1.5418178021149837e-05, + "loss": 1.1316, "step": 8644 }, { - "epoch": 0.24497718835897872, + "epoch": 0.3382502543234995, "grad_norm": 0.0, - "learning_rate": 1.7672622810900285e-05, - "loss": 0.9888, + "learning_rate": 1.5417112875008854e-05, + "loss": 1.1794, "step": 8645 }, { - "epoch": 0.2450055258012412, + "epoch": 0.3382893810157289, "grad_norm": 0.0, - "learning_rate": 1.7672034169632316e-05, - "loss": 0.927, + "learning_rate": 1.5416047641875823e-05, + "loss": 1.0552, "step": 8646 }, { - "epoch": 0.24503386324350365, + "epoch": 0.33832850770795836, "grad_norm": 0.0, - "learning_rate": 1.767144546373996e-05, - "loss": 1.0701, + "learning_rate": 1.541498232176785e-05, + "loss": 1.1213, "step": 8647 }, { - "epoch": 0.2450622006857661, + "epoch": 0.3383676344001878, "grad_norm": 0.0, - "learning_rate": 1.7670856693228163e-05, - "loss": 0.9325, + "learning_rate": 1.541391691470205e-05, + "loss": 1.2429, "step": 8648 }, { - "epoch": 0.24509053812802856, + "epoch": 0.33840676109241724, "grad_norm": 0.0, - "learning_rate": 1.7670267858101895e-05, - "loss": 0.9529, + "learning_rate": 1.5412851420695524e-05, + "loss": 1.1356, "step": 8649 }, { - "epoch": 0.24511887557029102, + "epoch": 0.3384458877846467, "grad_norm": 0.0, - "learning_rate": 1.7669678958366112e-05, - "loss": 1.0443, + "learning_rate": 1.541178583976539e-05, + "loss": 1.0198, "step": 8650 }, { - "epoch": 0.2451472130125535, + "epoch": 0.3384850144768761, "grad_norm": 0.0, - "learning_rate": 1.7669089994025778e-05, - "loss": 0.8906, + "learning_rate": 1.5410720171928758e-05, + "loss": 1.1216, "step": 8651 }, { - "epoch": 0.24517555045481595, + "epoch": 0.33852414116910556, "grad_norm": 0.0, - "learning_rate": 1.7668500965085845e-05, - "loss": 0.8872, + "learning_rate": 1.540965441720274e-05, + "loss": 1.3082, "step": 8652 }, { - "epoch": 0.24520388789707842, + "epoch": 0.338563267861335, "grad_norm": 0.0, - "learning_rate": 1.7667911871551286e-05, - "loss": 1.0922, + "learning_rate": 1.5408588575604452e-05, + "loss": 1.2566, "step": 8653 }, { - "epoch": 0.24523222533934086, + "epoch": 0.33860239455356445, "grad_norm": 0.0, - "learning_rate": 1.7667322713427055e-05, - "loss": 0.8818, + "learning_rate": 1.5407522647151005e-05, + "loss": 1.0782, "step": 8654 }, { - "epoch": 0.24526056278160333, + "epoch": 0.3386415212457939, "grad_norm": 0.0, - "learning_rate": 1.766673349071812e-05, - "loss": 0.9146, + "learning_rate": 1.5406456631859523e-05, + "loss": 1.2216, "step": 8655 }, { - "epoch": 0.2452889002238658, + "epoch": 0.3386806479380233, "grad_norm": 0.0, - "learning_rate": 1.766614420342944e-05, - "loss": 0.9404, + "learning_rate": 1.5405390529747123e-05, + "loss": 1.1201, "step": 8656 }, { - "epoch": 0.24531723766612826, + "epoch": 0.33871977463025277, "grad_norm": 0.0, - "learning_rate": 1.7665554851565983e-05, - "loss": 1.0916, + "learning_rate": 1.540432434083092e-05, + "loss": 1.2527, "step": 8657 }, { - "epoch": 0.24534557510839072, + "epoch": 0.3387589013224822, "grad_norm": 0.0, - "learning_rate": 1.766496543513271e-05, - "loss": 0.9699, + "learning_rate": 1.5403258065128042e-05, + "loss": 0.9666, "step": 8658 }, { - "epoch": 0.2453739125506532, + "epoch": 0.33879802801471165, "grad_norm": 0.0, - "learning_rate": 1.7664375954134586e-05, - "loss": 0.943, + "learning_rate": 1.5402191702655614e-05, + "loss": 1.0194, "step": 8659 }, { - "epoch": 0.24540224999291563, + "epoch": 0.3388371547069411, "grad_norm": 0.0, - "learning_rate": 1.7663786408576574e-05, - "loss": 0.8811, + "learning_rate": 1.5401125253430753e-05, + "loss": 1.1066, "step": 8660 }, { - "epoch": 0.2454305874351781, + "epoch": 0.33887628139917053, "grad_norm": 0.0, - "learning_rate": 1.7663196798463647e-05, - "loss": 1.0013, + "learning_rate": 1.5400058717470585e-05, + "loss": 1.2485, "step": 8661 }, { - "epoch": 0.24545892487744056, + "epoch": 0.3389154080914, "grad_norm": 0.0, - "learning_rate": 1.766260712380077e-05, - "loss": 0.9621, + "learning_rate": 1.5398992094792247e-05, + "loss": 1.1955, "step": 8662 }, { - "epoch": 0.24548726231970303, + "epoch": 0.3389545347836294, "grad_norm": 0.0, - "learning_rate": 1.7662017384592905e-05, - "loss": 0.9886, + "learning_rate": 1.5397925385412858e-05, + "loss": 1.0389, "step": 8663 }, { - "epoch": 0.2455155997619655, + "epoch": 0.33899366147585885, "grad_norm": 0.0, - "learning_rate": 1.766142758084502e-05, - "loss": 1.0958, + "learning_rate": 1.5396858589349553e-05, + "loss": 1.0963, "step": 8664 }, { - "epoch": 0.24554393720422796, + "epoch": 0.3390327881680883, "grad_norm": 0.0, - "learning_rate": 1.766083771256209e-05, - "loss": 1.0445, + "learning_rate": 1.539579170661946e-05, + "loss": 1.1642, "step": 8665 }, { - "epoch": 0.2455722746464904, + "epoch": 0.33907191486031774, "grad_norm": 0.0, - "learning_rate": 1.7660247779749074e-05, - "loss": 0.9992, + "learning_rate": 1.5394724737239712e-05, + "loss": 1.1651, "step": 8666 }, { - "epoch": 0.24560061208875286, + "epoch": 0.3391110415525471, "grad_norm": 0.0, - "learning_rate": 1.7659657782410952e-05, - "loss": 0.9196, + "learning_rate": 1.5393657681227448e-05, + "loss": 1.0625, "step": 8667 }, { - "epoch": 0.24562894953101533, + "epoch": 0.33915016824477656, "grad_norm": 0.0, - "learning_rate": 1.7659067720552685e-05, - "loss": 0.9402, + "learning_rate": 1.53925905385998e-05, + "loss": 1.0311, "step": 8668 }, { - "epoch": 0.2456572869732778, + "epoch": 0.339189294937006, "grad_norm": 0.0, - "learning_rate": 1.7658477594179248e-05, - "loss": 1.0028, + "learning_rate": 1.53915233093739e-05, + "loss": 1.0749, "step": 8669 }, { - "epoch": 0.24568562441554026, + "epoch": 0.33922842162923544, "grad_norm": 0.0, - "learning_rate": 1.7657887403295605e-05, - "loss": 1.1236, + "learning_rate": 1.53904559935669e-05, + "loss": 1.1212, "step": 8670 }, { - "epoch": 0.24571396185780273, + "epoch": 0.3392675483214649, "grad_norm": 0.0, - "learning_rate": 1.7657297147906738e-05, - "loss": 0.9617, + "learning_rate": 1.5389388591195928e-05, + "loss": 1.053, "step": 8671 }, { - "epoch": 0.24574229930006516, + "epoch": 0.3393066750136943, "grad_norm": 0.0, - "learning_rate": 1.7656706828017616e-05, - "loss": 1.0473, + "learning_rate": 1.5388321102278124e-05, + "loss": 1.1605, "step": 8672 }, { - "epoch": 0.24577063674232763, + "epoch": 0.33934580170592377, "grad_norm": 0.0, - "learning_rate": 1.7656116443633204e-05, - "loss": 1.0643, + "learning_rate": 1.538725352683064e-05, + "loss": 1.0556, "step": 8673 }, { - "epoch": 0.2457989741845901, + "epoch": 0.3393849283981532, "grad_norm": 0.0, - "learning_rate": 1.7655525994758484e-05, - "loss": 0.846, + "learning_rate": 1.5386185864870615e-05, + "loss": 1.1765, "step": 8674 }, { - "epoch": 0.24582731162685256, + "epoch": 0.33942405509038265, "grad_norm": 0.0, - "learning_rate": 1.7654935481398424e-05, - "loss": 1.0439, + "learning_rate": 1.5385118116415194e-05, + "loss": 1.1217, "step": 8675 }, { - "epoch": 0.24585564906911503, + "epoch": 0.3394631817826121, "grad_norm": 0.0, - "learning_rate": 1.7654344903557995e-05, - "loss": 0.9588, + "learning_rate": 1.538405028148152e-05, + "loss": 1.209, "step": 8676 }, { - "epoch": 0.2458839865113775, + "epoch": 0.33950230847484153, "grad_norm": 0.0, - "learning_rate": 1.7653754261242184e-05, - "loss": 0.9457, + "learning_rate": 1.5382982360086753e-05, + "loss": 1.2601, "step": 8677 }, { - "epoch": 0.24591232395363993, + "epoch": 0.33954143516707097, "grad_norm": 0.0, - "learning_rate": 1.7653163554455957e-05, - "loss": 1.0145, + "learning_rate": 1.5381914352248027e-05, + "loss": 1.199, "step": 8678 }, { - "epoch": 0.2459406613959024, + "epoch": 0.3395805618593004, "grad_norm": 0.0, - "learning_rate": 1.7652572783204286e-05, - "loss": 0.8852, + "learning_rate": 1.53808462579825e-05, + "loss": 1.2377, "step": 8679 }, { - "epoch": 0.24596899883816487, + "epoch": 0.33961968855152985, "grad_norm": 0.0, - "learning_rate": 1.7651981947492157e-05, - "loss": 0.8188, + "learning_rate": 1.5379778077307333e-05, + "loss": 1.0605, "step": 8680 }, { - "epoch": 0.24599733628042733, + "epoch": 0.3396588152437593, "grad_norm": 0.0, - "learning_rate": 1.7651391047324544e-05, - "loss": 1.0105, + "learning_rate": 1.5378709810239666e-05, + "loss": 1.0494, "step": 8681 }, { - "epoch": 0.2460256737226898, + "epoch": 0.33969794193598873, "grad_norm": 0.0, - "learning_rate": 1.7650800082706422e-05, - "loss": 0.8801, + "learning_rate": 1.5377641456796658e-05, + "loss": 1.181, "step": 8682 }, { - "epoch": 0.24605401116495226, + "epoch": 0.3397370686282182, "grad_norm": 0.0, - "learning_rate": 1.765020905364277e-05, - "loss": 0.9545, + "learning_rate": 1.537657301699547e-05, + "loss": 1.0969, "step": 8683 }, { - "epoch": 0.2460823486072147, + "epoch": 0.3397761953204476, "grad_norm": 0.0, - "learning_rate": 1.7649617960138566e-05, - "loss": 1.0611, + "learning_rate": 1.5375504490853255e-05, + "loss": 1.1502, "step": 8684 }, { - "epoch": 0.24611068604947717, + "epoch": 0.33981532201267706, "grad_norm": 0.0, - "learning_rate": 1.7649026802198786e-05, - "loss": 0.9896, + "learning_rate": 1.537443587838717e-05, + "loss": 1.1922, "step": 8685 }, { - "epoch": 0.24613902349173963, + "epoch": 0.3398544487049065, "grad_norm": 0.0, - "learning_rate": 1.764843557982842e-05, - "loss": 0.9948, + "learning_rate": 1.5373367179614387e-05, + "loss": 1.1213, "step": 8686 }, { - "epoch": 0.2461673609340021, + "epoch": 0.33989357539713594, "grad_norm": 0.0, - "learning_rate": 1.7647844293032435e-05, - "loss": 0.9906, + "learning_rate": 1.5372298394552053e-05, + "loss": 1.1523, "step": 8687 }, { - "epoch": 0.24619569837626457, + "epoch": 0.3399327020893654, "grad_norm": 0.0, - "learning_rate": 1.7647252941815817e-05, - "loss": 1.0172, + "learning_rate": 1.5371229523217343e-05, + "loss": 1.1516, "step": 8688 }, { - "epoch": 0.24622403581852703, + "epoch": 0.3399718287815948, "grad_norm": 0.0, - "learning_rate": 1.764666152618355e-05, - "loss": 0.9846, + "learning_rate": 1.5370160565627417e-05, + "loss": 1.1469, "step": 8689 }, { - "epoch": 0.24625237326078947, + "epoch": 0.34001095547382426, "grad_norm": 0.0, - "learning_rate": 1.7646070046140614e-05, - "loss": 0.9105, + "learning_rate": 1.5369091521799438e-05, + "loss": 1.0207, "step": 8690 }, { - "epoch": 0.24628071070305194, + "epoch": 0.3400500821660537, "grad_norm": 0.0, - "learning_rate": 1.764547850169199e-05, - "loss": 0.8639, + "learning_rate": 1.5368022391750583e-05, + "loss": 1.1346, "step": 8691 }, { - "epoch": 0.2463090481453144, + "epoch": 0.34008920885828314, "grad_norm": 0.0, - "learning_rate": 1.7644886892842664e-05, - "loss": 1.0053, + "learning_rate": 1.5366953175498012e-05, + "loss": 1.0328, "step": 8692 }, { - "epoch": 0.24633738558757687, + "epoch": 0.3401283355505126, "grad_norm": 0.0, - "learning_rate": 1.7644295219597614e-05, - "loss": 0.9615, + "learning_rate": 1.5365883873058897e-05, + "loss": 1.2458, "step": 8693 }, { - "epoch": 0.24636572302983933, + "epoch": 0.340167462242742, "grad_norm": 0.0, - "learning_rate": 1.764370348196183e-05, - "loss": 0.9815, + "learning_rate": 1.536481448445041e-05, + "loss": 1.0721, "step": 8694 }, { - "epoch": 0.2463940604721018, + "epoch": 0.3402065889349714, "grad_norm": 0.0, - "learning_rate": 1.7643111679940286e-05, - "loss": 1.0655, + "learning_rate": 1.536374500968973e-05, + "loss": 1.0857, "step": 8695 }, { - "epoch": 0.24642239791436424, + "epoch": 0.34024571562720085, "grad_norm": 0.0, - "learning_rate": 1.7642519813537983e-05, - "loss": 1.0287, + "learning_rate": 1.536267544879402e-05, + "loss": 1.1306, "step": 8696 }, { - "epoch": 0.2464507353566267, + "epoch": 0.3402848423194303, "grad_norm": 0.0, - "learning_rate": 1.764192788275989e-05, - "loss": 1.0761, + "learning_rate": 1.5361605801780465e-05, + "loss": 1.143, "step": 8697 }, { - "epoch": 0.24647907279888917, + "epoch": 0.34032396901165973, "grad_norm": 0.0, - "learning_rate": 1.7641335887611005e-05, - "loss": 1.0022, + "learning_rate": 1.536053606866624e-05, + "loss": 1.0528, "step": 8698 }, { - "epoch": 0.24650741024115164, + "epoch": 0.3403630957038892, "grad_norm": 0.0, - "learning_rate": 1.7640743828096306e-05, - "loss": 0.9025, + "learning_rate": 1.535946624946852e-05, + "loss": 1.1377, "step": 8699 }, { - "epoch": 0.2465357476834141, + "epoch": 0.3404022223961186, "grad_norm": 0.0, - "learning_rate": 1.764015170422079e-05, - "loss": 0.8312, + "learning_rate": 1.5358396344204494e-05, + "loss": 1.1437, "step": 8700 }, { - "epoch": 0.24656408512567657, + "epoch": 0.34044134908834806, "grad_norm": 0.0, - "learning_rate": 1.7639559515989436e-05, - "loss": 1.0258, + "learning_rate": 1.535732635289133e-05, + "loss": 1.1049, "step": 8701 }, { - "epoch": 0.246592422567939, + "epoch": 0.3404804757805775, "grad_norm": 0.0, - "learning_rate": 1.7638967263407238e-05, - "loss": 1.024, + "learning_rate": 1.535625627554622e-05, + "loss": 1.1694, "step": 8702 }, { - "epoch": 0.24662076001020147, + "epoch": 0.34051960247280694, "grad_norm": 0.0, - "learning_rate": 1.763837494647918e-05, - "loss": 0.8562, + "learning_rate": 1.535518611218635e-05, + "loss": 1.1136, "step": 8703 }, { - "epoch": 0.24664909745246394, + "epoch": 0.3405587291650364, "grad_norm": 0.0, - "learning_rate": 1.7637782565210252e-05, - "loss": 0.9193, + "learning_rate": 1.53541158628289e-05, + "loss": 1.0199, "step": 8704 }, { - "epoch": 0.2466774348947264, + "epoch": 0.3405978558572658, "grad_norm": 0.0, - "learning_rate": 1.7637190119605447e-05, - "loss": 1.0818, + "learning_rate": 1.5353045527491052e-05, + "loss": 1.261, "step": 8705 }, { - "epoch": 0.24670577233698887, + "epoch": 0.34063698254949526, "grad_norm": 0.0, - "learning_rate": 1.7636597609669753e-05, - "loss": 0.9466, + "learning_rate": 1.5351975106190006e-05, + "loss": 1.1104, "step": 8706 }, { - "epoch": 0.24673410977925134, + "epoch": 0.3406761092417247, "grad_norm": 0.0, - "learning_rate": 1.763600503540816e-05, - "loss": 1.0781, + "learning_rate": 1.5350904598942947e-05, + "loss": 1.1063, "step": 8707 }, { - "epoch": 0.24676244722151378, + "epoch": 0.34071523593395414, "grad_norm": 0.0, - "learning_rate": 1.7635412396825663e-05, - "loss": 1.1277, + "learning_rate": 1.534983400576706e-05, + "loss": 1.1192, "step": 8708 }, { - "epoch": 0.24679078466377624, + "epoch": 0.3407543626261836, "grad_norm": 0.0, - "learning_rate": 1.7634819693927254e-05, - "loss": 1.0943, + "learning_rate": 1.534876332667955e-05, + "loss": 1.1195, "step": 8709 }, { - "epoch": 0.2468191221060387, + "epoch": 0.340793489318413, "grad_norm": 0.0, - "learning_rate": 1.763422692671792e-05, - "loss": 1.0471, + "learning_rate": 1.53476925616976e-05, + "loss": 1.0052, "step": 8710 }, { - "epoch": 0.24684745954830117, + "epoch": 0.34083261601064246, "grad_norm": 0.0, - "learning_rate": 1.7633634095202657e-05, - "loss": 0.9356, + "learning_rate": 1.534662171083841e-05, + "loss": 1.0535, "step": 8711 }, { - "epoch": 0.24687579699056364, + "epoch": 0.3408717427028719, "grad_norm": 0.0, - "learning_rate": 1.7633041199386464e-05, - "loss": 0.8932, + "learning_rate": 1.534555077411917e-05, + "loss": 1.0587, "step": 8712 }, { - "epoch": 0.2469041344328261, + "epoch": 0.34091086939510135, "grad_norm": 0.0, - "learning_rate": 1.7632448239274328e-05, - "loss": 1.0021, + "learning_rate": 1.5344479751557085e-05, + "loss": 1.1689, "step": 8713 }, { - "epoch": 0.24693247187508854, + "epoch": 0.3409499960873308, "grad_norm": 0.0, - "learning_rate": 1.7631855214871245e-05, - "loss": 0.9988, + "learning_rate": 1.5343408643169354e-05, + "loss": 1.0348, "step": 8714 }, { - "epoch": 0.246960809317351, + "epoch": 0.3409891227795602, "grad_norm": 0.0, - "learning_rate": 1.7631262126182215e-05, - "loss": 0.9514, + "learning_rate": 1.5342337448973176e-05, + "loss": 1.1177, "step": 8715 }, { - "epoch": 0.24698914675961348, + "epoch": 0.34102824947178967, "grad_norm": 0.0, - "learning_rate": 1.7630668973212226e-05, - "loss": 0.8931, + "learning_rate": 1.534126616898575e-05, + "loss": 1.1489, "step": 8716 }, { - "epoch": 0.24701748420187594, + "epoch": 0.3410673761640191, "grad_norm": 0.0, - "learning_rate": 1.7630075755966284e-05, - "loss": 0.9787, + "learning_rate": 1.5340194803224284e-05, + "loss": 1.0009, "step": 8717 }, { - "epoch": 0.2470458216441384, + "epoch": 0.34110650285624855, "grad_norm": 0.0, - "learning_rate": 1.7629482474449373e-05, - "loss": 1.0264, + "learning_rate": 1.5339123351705986e-05, + "loss": 1.0039, "step": 8718 }, { - "epoch": 0.24707415908640087, + "epoch": 0.341145629548478, "grad_norm": 0.0, - "learning_rate": 1.7628889128666503e-05, - "loss": 1.0189, + "learning_rate": 1.5338051814448048e-05, + "loss": 1.0619, "step": 8719 }, { - "epoch": 0.2471024965286633, + "epoch": 0.34118475624070743, "grad_norm": 0.0, - "learning_rate": 1.7628295718622666e-05, - "loss": 1.0425, + "learning_rate": 1.5336980191467696e-05, + "loss": 1.1315, "step": 8720 }, { - "epoch": 0.24713083397092578, + "epoch": 0.3412238829329369, "grad_norm": 0.0, - "learning_rate": 1.7627702244322865e-05, - "loss": 0.9296, + "learning_rate": 1.533590848278212e-05, + "loss": 1.1919, "step": 8721 }, { - "epoch": 0.24715917141318824, + "epoch": 0.3412630096251663, "grad_norm": 0.0, - "learning_rate": 1.7627108705772088e-05, - "loss": 1.0456, + "learning_rate": 1.5334836688408548e-05, + "loss": 0.9978, "step": 8722 }, { - "epoch": 0.2471875088554507, + "epoch": 0.34130213631739575, "grad_norm": 0.0, - "learning_rate": 1.7626515102975346e-05, - "loss": 0.9984, + "learning_rate": 1.533376480836418e-05, + "loss": 1.1498, "step": 8723 }, { - "epoch": 0.24721584629771318, + "epoch": 0.34134126300962514, "grad_norm": 0.0, - "learning_rate": 1.762592143593764e-05, - "loss": 1.0941, + "learning_rate": 1.5332692842666236e-05, + "loss": 1.1345, "step": 8724 }, { - "epoch": 0.24724418373997564, + "epoch": 0.3413803897018546, "grad_norm": 0.0, - "learning_rate": 1.762532770466396e-05, - "loss": 1.0595, + "learning_rate": 1.5331620791331924e-05, + "loss": 1.0057, "step": 8725 }, { - "epoch": 0.24727252118223808, + "epoch": 0.341419516394084, "grad_norm": 0.0, - "learning_rate": 1.7624733909159312e-05, - "loss": 1.036, + "learning_rate": 1.5330548654378464e-05, + "loss": 1.2075, "step": 8726 }, { - "epoch": 0.24730085862450055, + "epoch": 0.34145864308631346, "grad_norm": 0.0, - "learning_rate": 1.7624140049428705e-05, - "loss": 0.9857, + "learning_rate": 1.5329476431823072e-05, + "loss": 1.0696, "step": 8727 }, { - "epoch": 0.247329196066763, + "epoch": 0.3414977697785429, "grad_norm": 0.0, - "learning_rate": 1.762354612547713e-05, - "loss": 1.0226, + "learning_rate": 1.5328404123682966e-05, + "loss": 1.0584, "step": 8728 }, { - "epoch": 0.24735753350902548, + "epoch": 0.34153689647077234, "grad_norm": 0.0, - "learning_rate": 1.7622952137309596e-05, - "loss": 0.9582, + "learning_rate": 1.532733172997537e-05, + "loss": 1.0242, "step": 8729 }, { - "epoch": 0.24738587095128795, + "epoch": 0.3415760231630018, "grad_norm": 0.0, - "learning_rate": 1.7622358084931107e-05, - "loss": 0.9609, + "learning_rate": 1.5326259250717497e-05, + "loss": 1.2058, "step": 8730 }, { - "epoch": 0.2474142083935504, + "epoch": 0.3416151498552312, "grad_norm": 0.0, - "learning_rate": 1.7621763968346663e-05, - "loss": 0.9624, + "learning_rate": 1.5325186685926578e-05, + "loss": 1.0872, "step": 8731 }, { - "epoch": 0.24744254583581285, + "epoch": 0.34165427654746067, "grad_norm": 0.0, - "learning_rate": 1.7621169787561275e-05, - "loss": 1.0211, + "learning_rate": 1.5324114035619832e-05, + "loss": 1.0728, "step": 8732 }, { - "epoch": 0.24747088327807532, + "epoch": 0.3416934032396901, "grad_norm": 0.0, - "learning_rate": 1.7620575542579938e-05, - "loss": 0.8258, + "learning_rate": 1.532304129981449e-05, + "loss": 1.0006, "step": 8733 }, { - "epoch": 0.24749922072033778, + "epoch": 0.34173252993191955, "grad_norm": 0.0, - "learning_rate": 1.761998123340767e-05, - "loss": 1.0601, + "learning_rate": 1.532196847852777e-05, + "loss": 1.0727, "step": 8734 }, { - "epoch": 0.24752755816260025, + "epoch": 0.341771656624149, "grad_norm": 0.0, - "learning_rate": 1.7619386860049466e-05, - "loss": 1.0703, + "learning_rate": 1.5320895571776903e-05, + "loss": 1.1371, "step": 8735 }, { - "epoch": 0.2475558956048627, + "epoch": 0.34181078331637843, "grad_norm": 0.0, - "learning_rate": 1.7618792422510337e-05, - "loss": 0.9662, + "learning_rate": 1.5319822579579125e-05, + "loss": 1.0787, "step": 8736 }, { - "epoch": 0.24758423304712518, + "epoch": 0.34184991000860787, "grad_norm": 0.0, - "learning_rate": 1.7618197920795292e-05, - "loss": 0.9862, + "learning_rate": 1.531874950195166e-05, + "loss": 1.1111, "step": 8737 }, { - "epoch": 0.24761257048938762, + "epoch": 0.3418890367008373, "grad_norm": 0.0, - "learning_rate": 1.7617603354909336e-05, - "loss": 0.9096, + "learning_rate": 1.5317676338911748e-05, + "loss": 1.2657, "step": 8738 }, { - "epoch": 0.24764090793165008, + "epoch": 0.34192816339306675, "grad_norm": 0.0, - "learning_rate": 1.761700872485748e-05, - "loss": 0.98, + "learning_rate": 1.5316603090476614e-05, + "loss": 1.0328, "step": 8739 }, { - "epoch": 0.24766924537391255, + "epoch": 0.3419672900852962, "grad_norm": 0.0, - "learning_rate": 1.761641403064473e-05, - "loss": 0.9981, + "learning_rate": 1.5315529756663495e-05, + "loss": 1.0851, "step": 8740 }, { - "epoch": 0.24769758281617502, + "epoch": 0.34200641677752563, "grad_norm": 0.0, - "learning_rate": 1.7615819272276095e-05, - "loss": 0.9092, + "learning_rate": 1.531445633748963e-05, + "loss": 1.1053, "step": 8741 }, { - "epoch": 0.24772592025843748, + "epoch": 0.3420455434697551, "grad_norm": 0.0, - "learning_rate": 1.7615224449756588e-05, - "loss": 1.0997, + "learning_rate": 1.5313382832972257e-05, + "loss": 1.2402, "step": 8742 }, { - "epoch": 0.24775425770069995, + "epoch": 0.3420846701619845, "grad_norm": 0.0, - "learning_rate": 1.761462956309122e-05, - "loss": 0.9086, + "learning_rate": 1.531230924312861e-05, + "loss": 1.0619, "step": 8743 }, { - "epoch": 0.2477825951429624, + "epoch": 0.34212379685421396, "grad_norm": 0.0, - "learning_rate": 1.761403461228499e-05, - "loss": 0.8421, + "learning_rate": 1.5311235567975936e-05, + "loss": 1.0052, "step": 8744 }, { - "epoch": 0.24781093258522485, + "epoch": 0.3421629235464434, "grad_norm": 0.0, - "learning_rate": 1.7613439597342928e-05, - "loss": 0.8884, + "learning_rate": 1.5310161807531476e-05, + "loss": 1.1005, "step": 8745 }, { - "epoch": 0.24783927002748732, + "epoch": 0.34220205023867284, "grad_norm": 0.0, - "learning_rate": 1.7612844518270033e-05, - "loss": 0.9792, + "learning_rate": 1.5309087961812468e-05, + "loss": 1.0942, "step": 8746 }, { - "epoch": 0.24786760746974978, + "epoch": 0.3422411769309023, "grad_norm": 0.0, - "learning_rate": 1.7612249375071323e-05, - "loss": 1.0127, + "learning_rate": 1.530801403083616e-05, + "loss": 1.1062, "step": 8747 }, { - "epoch": 0.24789594491201225, + "epoch": 0.3422803036231317, "grad_norm": 0.0, - "learning_rate": 1.761165416775181e-05, - "loss": 0.9402, + "learning_rate": 1.53069400146198e-05, + "loss": 1.1732, "step": 8748 }, { - "epoch": 0.24792428235427472, + "epoch": 0.34231943031536116, "grad_norm": 0.0, - "learning_rate": 1.761105889631651e-05, - "loss": 0.8768, + "learning_rate": 1.5305865913180633e-05, + "loss": 0.944, "step": 8749 }, { - "epoch": 0.24795261979653715, + "epoch": 0.3423585570075906, "grad_norm": 0.0, - "learning_rate": 1.761046356077043e-05, - "loss": 0.9391, + "learning_rate": 1.5304791726535905e-05, + "loss": 1.0647, "step": 8750 }, { - "epoch": 0.24798095723879962, + "epoch": 0.34239768369982004, "grad_norm": 0.0, - "learning_rate": 1.760986816111859e-05, - "loss": 0.9428, + "learning_rate": 1.5303717454702872e-05, + "loss": 1.1603, "step": 8751 }, { - "epoch": 0.2480092946810621, + "epoch": 0.34243681039204943, "grad_norm": 0.0, - "learning_rate": 1.7609272697366008e-05, - "loss": 0.9738, + "learning_rate": 1.530264309769878e-05, + "loss": 1.0897, "step": 8752 }, { - "epoch": 0.24803763212332455, + "epoch": 0.34247593708427887, "grad_norm": 0.0, - "learning_rate": 1.7608677169517693e-05, - "loss": 1.0581, + "learning_rate": 1.530156865554089e-05, + "loss": 0.9761, "step": 8753 }, { - "epoch": 0.24806596956558702, + "epoch": 0.3425150637765083, "grad_norm": 0.0, - "learning_rate": 1.7608081577578665e-05, - "loss": 1.0092, + "learning_rate": 1.5300494128246447e-05, + "loss": 1.2428, "step": 8754 }, { - "epoch": 0.24809430700784948, + "epoch": 0.34255419046873775, "grad_norm": 0.0, - "learning_rate": 1.7607485921553943e-05, - "loss": 0.9015, + "learning_rate": 1.529941951583271e-05, + "loss": 1.1503, "step": 8755 }, { - "epoch": 0.24812264445011192, + "epoch": 0.3425933171609672, "grad_norm": 0.0, - "learning_rate": 1.760689020144854e-05, - "loss": 0.9581, + "learning_rate": 1.5298344818316937e-05, + "loss": 1.2796, "step": 8756 }, { - "epoch": 0.2481509818923744, + "epoch": 0.34263244385319663, "grad_norm": 0.0, - "learning_rate": 1.760629441726748e-05, - "loss": 0.9992, + "learning_rate": 1.5297270035716386e-05, + "loss": 1.028, "step": 8757 }, { - "epoch": 0.24817931933463686, + "epoch": 0.3426715705454261, "grad_norm": 0.0, - "learning_rate": 1.7605698569015773e-05, - "loss": 0.8892, + "learning_rate": 1.5296195168048315e-05, + "loss": 1.0863, "step": 8758 }, { - "epoch": 0.24820765677689932, + "epoch": 0.3427106972376555, "grad_norm": 0.0, - "learning_rate": 1.7605102656698444e-05, - "loss": 1.0689, + "learning_rate": 1.5295120215329985e-05, + "loss": 1.0028, "step": 8759 }, { - "epoch": 0.2482359942191618, + "epoch": 0.34274982392988496, "grad_norm": 0.0, - "learning_rate": 1.7604506680320512e-05, - "loss": 1.056, + "learning_rate": 1.529404517757866e-05, + "loss": 1.0538, "step": 8760 }, { - "epoch": 0.24826433166142425, + "epoch": 0.3427889506221144, "grad_norm": 0.0, - "learning_rate": 1.7603910639886998e-05, - "loss": 0.9668, + "learning_rate": 1.5292970054811606e-05, + "loss": 1.2142, "step": 8761 }, { - "epoch": 0.2482926691036867, + "epoch": 0.34282807731434384, "grad_norm": 0.0, - "learning_rate": 1.7603314535402915e-05, - "loss": 0.9494, + "learning_rate": 1.529189484704608e-05, + "loss": 1.1408, "step": 8762 }, { - "epoch": 0.24832100654594916, + "epoch": 0.3428672040065733, "grad_norm": 0.0, - "learning_rate": 1.7602718366873296e-05, - "loss": 1.0024, + "learning_rate": 1.5290819554299358e-05, + "loss": 1.018, "step": 8763 }, { - "epoch": 0.24834934398821162, + "epoch": 0.3429063306988027, "grad_norm": 0.0, - "learning_rate": 1.7602122134303154e-05, - "loss": 0.9982, + "learning_rate": 1.5289744176588704e-05, + "loss": 0.9889, "step": 8764 }, { - "epoch": 0.2483776814304741, + "epoch": 0.34294545739103216, "grad_norm": 0.0, - "learning_rate": 1.7601525837697515e-05, - "loss": 1.0692, + "learning_rate": 1.5288668713931387e-05, + "loss": 1.0557, "step": 8765 }, { - "epoch": 0.24840601887273656, + "epoch": 0.3429845840832616, "grad_norm": 0.0, - "learning_rate": 1.7600929477061403e-05, - "loss": 1.0563, + "learning_rate": 1.5287593166344677e-05, + "loss": 1.1954, "step": 8766 }, { - "epoch": 0.24843435631499902, + "epoch": 0.34302371077549104, "grad_norm": 0.0, - "learning_rate": 1.760033305239984e-05, - "loss": 1.128, + "learning_rate": 1.5286517533845845e-05, + "loss": 1.0335, "step": 8767 }, { - "epoch": 0.24846269375726146, + "epoch": 0.3430628374677205, "grad_norm": 0.0, - "learning_rate": 1.7599736563717847e-05, - "loss": 0.8914, + "learning_rate": 1.5285441816452168e-05, + "loss": 1.1544, "step": 8768 }, { - "epoch": 0.24849103119952393, + "epoch": 0.3431019641599499, "grad_norm": 0.0, - "learning_rate": 1.759914001102045e-05, - "loss": 0.9995, + "learning_rate": 1.5284366014180916e-05, + "loss": 1.0999, "step": 8769 }, { - "epoch": 0.2485193686417864, + "epoch": 0.34314109085217936, "grad_norm": 0.0, - "learning_rate": 1.7598543394312674e-05, - "loss": 0.9242, + "learning_rate": 1.528329012704937e-05, + "loss": 0.9557, "step": 8770 }, { - "epoch": 0.24854770608404886, + "epoch": 0.3431802175444088, "grad_norm": 0.0, - "learning_rate": 1.759794671359955e-05, - "loss": 0.9958, + "learning_rate": 1.5282214155074805e-05, + "loss": 1.1402, "step": 8771 }, { - "epoch": 0.24857604352631132, + "epoch": 0.34321934423663825, "grad_norm": 0.0, - "learning_rate": 1.7597349968886096e-05, - "loss": 0.8764, + "learning_rate": 1.5281138098274496e-05, + "loss": 1.0716, "step": 8772 }, { - "epoch": 0.24860438096857376, + "epoch": 0.3432584709288677, "grad_norm": 0.0, - "learning_rate": 1.759675316017734e-05, - "loss": 0.9525, + "learning_rate": 1.528006195666573e-05, + "loss": 1.1842, "step": 8773 }, { - "epoch": 0.24863271841083623, + "epoch": 0.34329759762109713, "grad_norm": 0.0, - "learning_rate": 1.7596156287478316e-05, - "loss": 0.9803, + "learning_rate": 1.5278985730265782e-05, + "loss": 1.1579, "step": 8774 }, { - "epoch": 0.2486610558530987, + "epoch": 0.34333672431332657, "grad_norm": 0.0, - "learning_rate": 1.7595559350794046e-05, - "loss": 0.9064, + "learning_rate": 1.5277909419091942e-05, + "loss": 1.0832, "step": 8775 }, { - "epoch": 0.24868939329536116, + "epoch": 0.343375851005556, "grad_norm": 0.0, - "learning_rate": 1.7594962350129555e-05, - "loss": 0.8684, + "learning_rate": 1.527683302316149e-05, + "loss": 0.9991, "step": 8776 }, { - "epoch": 0.24871773073762363, + "epoch": 0.34341497769778545, "grad_norm": 0.0, - "learning_rate": 1.759436528548988e-05, - "loss": 1.1658, + "learning_rate": 1.527575654249171e-05, + "loss": 1.022, "step": 8777 }, { - "epoch": 0.2487460681798861, + "epoch": 0.3434541043900149, "grad_norm": 0.0, - "learning_rate": 1.7593768156880043e-05, - "loss": 0.9651, + "learning_rate": 1.527467997709989e-05, + "loss": 1.1412, "step": 8778 }, { - "epoch": 0.24877440562214853, + "epoch": 0.34349323108224433, "grad_norm": 0.0, - "learning_rate": 1.759317096430508e-05, - "loss": 0.9328, + "learning_rate": 1.527360332700332e-05, + "loss": 1.0337, "step": 8779 }, { - "epoch": 0.248802743064411, + "epoch": 0.3435323577744738, "grad_norm": 0.0, - "learning_rate": 1.759257370777002e-05, - "loss": 1.0492, + "learning_rate": 1.527252659221929e-05, + "loss": 1.0904, "step": 8780 }, { - "epoch": 0.24883108050667346, + "epoch": 0.34357148446670316, "grad_norm": 0.0, - "learning_rate": 1.7591976387279887e-05, - "loss": 0.9515, + "learning_rate": 1.527144977276509e-05, + "loss": 1.2372, "step": 8781 }, { - "epoch": 0.24885941794893593, + "epoch": 0.3436106111589326, "grad_norm": 0.0, - "learning_rate": 1.759137900283972e-05, - "loss": 0.9738, + "learning_rate": 1.5270372868658012e-05, + "loss": 1.2811, "step": 8782 }, { - "epoch": 0.2488877553911984, + "epoch": 0.34364973785116204, "grad_norm": 0.0, - "learning_rate": 1.759078155445455e-05, - "loss": 0.9691, + "learning_rate": 1.526929587991535e-05, + "loss": 1.0625, "step": 8783 }, { - "epoch": 0.24891609283346086, + "epoch": 0.3436888645433915, "grad_norm": 0.0, - "learning_rate": 1.7590184042129406e-05, - "loss": 1.0466, + "learning_rate": 1.5268218806554398e-05, + "loss": 1.1256, "step": 8784 }, { - "epoch": 0.2489444302757233, + "epoch": 0.3437279912356209, "grad_norm": 0.0, - "learning_rate": 1.7589586465869324e-05, - "loss": 1.037, + "learning_rate": 1.526714164859245e-05, + "loss": 1.1305, "step": 8785 }, { - "epoch": 0.24897276771798577, + "epoch": 0.34376711792785036, "grad_norm": 0.0, - "learning_rate": 1.7588988825679336e-05, - "loss": 0.8962, + "learning_rate": 1.5266064406046813e-05, + "loss": 1.0562, "step": 8786 }, { - "epoch": 0.24900110516024823, + "epoch": 0.3438062446200798, "grad_norm": 0.0, - "learning_rate": 1.758839112156448e-05, - "loss": 0.9132, + "learning_rate": 1.5264987078934778e-05, + "loss": 1.138, "step": 8787 }, { - "epoch": 0.2490294426025107, + "epoch": 0.34384537131230924, "grad_norm": 0.0, - "learning_rate": 1.7587793353529786e-05, - "loss": 1.0358, + "learning_rate": 1.526390966727365e-05, + "loss": 1.0267, "step": 8788 }, { - "epoch": 0.24905778004477316, + "epoch": 0.3438844980045387, "grad_norm": 0.0, - "learning_rate": 1.758719552158029e-05, - "loss": 0.9382, + "learning_rate": 1.5262832171080726e-05, + "loss": 1.0262, "step": 8789 }, { - "epoch": 0.24908611748703563, + "epoch": 0.3439236246967681, "grad_norm": 0.0, - "learning_rate": 1.7586597625721026e-05, - "loss": 0.9558, + "learning_rate": 1.5261754590373312e-05, + "loss": 1.1362, "step": 8790 }, { - "epoch": 0.24911445492929807, + "epoch": 0.34396275138899757, "grad_norm": 0.0, - "learning_rate": 1.758599966595704e-05, - "loss": 0.8992, + "learning_rate": 1.5260676925168713e-05, + "loss": 1.2211, "step": 8791 }, { - "epoch": 0.24914279237156053, + "epoch": 0.344001878081227, "grad_norm": 0.0, - "learning_rate": 1.7585401642293356e-05, - "loss": 0.9956, + "learning_rate": 1.5259599175484233e-05, + "loss": 1.0749, "step": 8792 }, { - "epoch": 0.249171129813823, + "epoch": 0.34404100477345645, "grad_norm": 0.0, - "learning_rate": 1.758480355473502e-05, - "loss": 1.0311, + "learning_rate": 1.5258521341337185e-05, + "loss": 1.0777, "step": 8793 }, { - "epoch": 0.24919946725608547, + "epoch": 0.3440801314656859, "grad_norm": 0.0, - "learning_rate": 1.7584205403287064e-05, - "loss": 1.0103, + "learning_rate": 1.525744342274487e-05, + "loss": 1.1267, "step": 8794 }, { - "epoch": 0.24922780469834793, + "epoch": 0.34411925815791533, "grad_norm": 0.0, - "learning_rate": 1.7583607187954532e-05, - "loss": 0.9628, + "learning_rate": 1.5256365419724603e-05, + "loss": 1.06, "step": 8795 }, { - "epoch": 0.2492561421406104, + "epoch": 0.34415838485014477, "grad_norm": 0.0, - "learning_rate": 1.758300890874246e-05, - "loss": 0.9415, + "learning_rate": 1.5255287332293697e-05, + "loss": 1.1667, "step": 8796 }, { - "epoch": 0.24928447958287284, + "epoch": 0.3441975115423742, "grad_norm": 0.0, - "learning_rate": 1.758241056565589e-05, - "loss": 0.8233, + "learning_rate": 1.5254209160469457e-05, + "loss": 1.0749, "step": 8797 }, { - "epoch": 0.2493128170251353, + "epoch": 0.34423663823460365, "grad_norm": 0.0, - "learning_rate": 1.758181215869986e-05, - "loss": 0.9835, + "learning_rate": 1.5253130904269205e-05, + "loss": 1.0844, "step": 8798 }, { - "epoch": 0.24934115446739777, + "epoch": 0.3442757649268331, "grad_norm": 0.0, - "learning_rate": 1.758121368787941e-05, - "loss": 1.1035, + "learning_rate": 1.5252052563710252e-05, + "loss": 1.0364, "step": 8799 }, { - "epoch": 0.24936949190966023, + "epoch": 0.34431489161906254, "grad_norm": 0.0, - "learning_rate": 1.758061515319958e-05, - "loss": 0.9125, + "learning_rate": 1.5250974138809915e-05, + "loss": 1.1268, "step": 8800 }, { - "epoch": 0.2493978293519227, + "epoch": 0.344354018311292, "grad_norm": 0.0, - "learning_rate": 1.7580016554665412e-05, - "loss": 1.0477, + "learning_rate": 1.5249895629585511e-05, + "loss": 1.2153, "step": 8801 }, { - "epoch": 0.24942616679418517, + "epoch": 0.3443931450035214, "grad_norm": 0.0, - "learning_rate": 1.7579417892281955e-05, - "loss": 0.9566, + "learning_rate": 1.5248817036054366e-05, + "loss": 1.1224, "step": 8802 }, { - "epoch": 0.2494545042364476, + "epoch": 0.34443227169575086, "grad_norm": 0.0, - "learning_rate": 1.7578819166054244e-05, - "loss": 0.9155, + "learning_rate": 1.5247738358233795e-05, + "loss": 1.0793, "step": 8803 }, { - "epoch": 0.24948284167871007, + "epoch": 0.3444713983879803, "grad_norm": 0.0, - "learning_rate": 1.7578220375987326e-05, - "loss": 1.0193, + "learning_rate": 1.5246659596141123e-05, + "loss": 1.1866, "step": 8804 }, { - "epoch": 0.24951117912097254, + "epoch": 0.34451052508020974, "grad_norm": 0.0, - "learning_rate": 1.757762152208624e-05, - "loss": 1.0933, + "learning_rate": 1.5245580749793672e-05, + "loss": 1.1823, "step": 8805 }, { - "epoch": 0.249539516563235, + "epoch": 0.3445496517724392, "grad_norm": 0.0, - "learning_rate": 1.757702260435604e-05, - "loss": 0.9804, + "learning_rate": 1.5244501819208766e-05, + "loss": 1.0154, "step": 8806 }, { - "epoch": 0.24956785400549747, + "epoch": 0.3445887784646686, "grad_norm": 0.0, - "learning_rate": 1.757642362280176e-05, - "loss": 0.9328, + "learning_rate": 1.5243422804403731e-05, + "loss": 1.0181, "step": 8807 }, { - "epoch": 0.24959619144775994, + "epoch": 0.34462790515689806, "grad_norm": 0.0, - "learning_rate": 1.7575824577428453e-05, - "loss": 0.9536, + "learning_rate": 1.5242343705395897e-05, + "loss": 1.0289, "step": 8808 }, { - "epoch": 0.24962452889002237, + "epoch": 0.34466703184912745, "grad_norm": 0.0, - "learning_rate": 1.757522546824116e-05, - "loss": 0.984, + "learning_rate": 1.5241264522202594e-05, + "loss": 1.1824, "step": 8809 }, { - "epoch": 0.24965286633228484, + "epoch": 0.3447061585413569, "grad_norm": 0.0, - "learning_rate": 1.7574626295244935e-05, - "loss": 1.0624, + "learning_rate": 1.5240185254841149e-05, + "loss": 1.2224, "step": 8810 }, { - "epoch": 0.2496812037745473, + "epoch": 0.34474528523358633, "grad_norm": 0.0, - "learning_rate": 1.7574027058444815e-05, - "loss": 0.9753, + "learning_rate": 1.5239105903328896e-05, + "loss": 1.2039, "step": 8811 }, { - "epoch": 0.24970954121680977, + "epoch": 0.34478441192581577, "grad_norm": 0.0, - "learning_rate": 1.757342775784585e-05, - "loss": 0.9726, + "learning_rate": 1.5238026467683167e-05, + "loss": 1.111, "step": 8812 }, { - "epoch": 0.24973787865907224, + "epoch": 0.3448235386180452, "grad_norm": 0.0, - "learning_rate": 1.7572828393453098e-05, - "loss": 0.9379, + "learning_rate": 1.5236946947921296e-05, + "loss": 1.1166, "step": 8813 }, { - "epoch": 0.2497662161013347, + "epoch": 0.34486266531027465, "grad_norm": 0.0, - "learning_rate": 1.7572228965271595e-05, - "loss": 0.954, + "learning_rate": 1.5235867344060622e-05, + "loss": 1.2039, "step": 8814 }, { - "epoch": 0.24979455354359714, + "epoch": 0.3449017920025041, "grad_norm": 0.0, - "learning_rate": 1.75716294733064e-05, - "loss": 0.9212, + "learning_rate": 1.5234787656118477e-05, + "loss": 1.0923, "step": 8815 }, { - "epoch": 0.2498228909858596, + "epoch": 0.34494091869473353, "grad_norm": 0.0, - "learning_rate": 1.7571029917562553e-05, - "loss": 0.9776, + "learning_rate": 1.5233707884112201e-05, + "loss": 1.0011, "step": 8816 }, { - "epoch": 0.24985122842812207, + "epoch": 0.344980045386963, "grad_norm": 0.0, - "learning_rate": 1.7570430298045113e-05, - "loss": 0.9688, + "learning_rate": 1.5232628028059137e-05, + "loss": 1.0426, "step": 8817 }, { - "epoch": 0.24987956587038454, + "epoch": 0.3450191720791924, "grad_norm": 0.0, - "learning_rate": 1.7569830614759126e-05, - "loss": 1.076, + "learning_rate": 1.5231548087976622e-05, + "loss": 1.0853, "step": 8818 }, { - "epoch": 0.249907903312647, + "epoch": 0.34505829877142186, "grad_norm": 0.0, - "learning_rate": 1.7569230867709648e-05, - "loss": 0.9628, + "learning_rate": 1.5230468063882e-05, + "loss": 1.2627, "step": 8819 }, { - "epoch": 0.24993624075490947, + "epoch": 0.3450974254636513, "grad_norm": 0.0, - "learning_rate": 1.7568631056901723e-05, - "loss": 1.0044, + "learning_rate": 1.5229387955792616e-05, + "loss": 0.945, "step": 8820 }, { - "epoch": 0.2499645781971719, + "epoch": 0.34513655215588074, "grad_norm": 0.0, - "learning_rate": 1.756803118234041e-05, - "loss": 0.9566, + "learning_rate": 1.5228307763725814e-05, + "loss": 1.1454, "step": 8821 }, { - "epoch": 0.24999291563943438, + "epoch": 0.3451756788481102, "grad_norm": 0.0, - "learning_rate": 1.756743124403076e-05, - "loss": 1.084, + "learning_rate": 1.5227227487698941e-05, + "loss": 1.0679, "step": 8822 }, { - "epoch": 0.25002125308169687, + "epoch": 0.3452148055403396, "grad_norm": 0.0, - "learning_rate": 1.756683124197783e-05, - "loss": 1.0502, + "learning_rate": 1.5226147127729341e-05, + "loss": 0.9807, "step": 8823 }, { - "epoch": 0.2500495905239593, + "epoch": 0.34525393223256906, "grad_norm": 0.0, - "learning_rate": 1.7566231176186664e-05, - "loss": 1.0301, + "learning_rate": 1.522506668383437e-05, + "loss": 1.1486, "step": 8824 }, { - "epoch": 0.25007792796622175, + "epoch": 0.3452930589247985, "grad_norm": 0.0, - "learning_rate": 1.7565631046662328e-05, - "loss": 0.9993, + "learning_rate": 1.5223986156031379e-05, + "loss": 1.009, "step": 8825 }, { - "epoch": 0.25010626540848424, + "epoch": 0.34533218561702794, "grad_norm": 0.0, - "learning_rate": 1.756503085340987e-05, - "loss": 0.9434, + "learning_rate": 1.5222905544337709e-05, + "loss": 1.004, "step": 8826 }, { - "epoch": 0.2501346028507467, + "epoch": 0.3453713123092574, "grad_norm": 0.0, - "learning_rate": 1.7564430596434346e-05, - "loss": 0.9082, + "learning_rate": 1.5221824848770728e-05, + "loss": 1.0406, "step": 8827 }, { - "epoch": 0.2501629402930092, + "epoch": 0.3454104390014868, "grad_norm": 0.0, - "learning_rate": 1.756383027574082e-05, - "loss": 1.0791, + "learning_rate": 1.5220744069347777e-05, + "loss": 0.9498, "step": 8828 }, { - "epoch": 0.2501912777352716, + "epoch": 0.34544956569371627, "grad_norm": 0.0, - "learning_rate": 1.756322989133434e-05, - "loss": 0.9465, + "learning_rate": 1.5219663206086221e-05, + "loss": 1.1246, "step": 8829 }, { - "epoch": 0.25021961517753405, + "epoch": 0.3454886923859457, "grad_norm": 0.0, - "learning_rate": 1.7562629443219964e-05, - "loss": 1.0094, + "learning_rate": 1.5218582259003413e-05, + "loss": 1.0243, "step": 8830 }, { - "epoch": 0.25024795261979654, + "epoch": 0.34552781907817515, "grad_norm": 0.0, - "learning_rate": 1.7562028931402754e-05, - "loss": 1.0856, + "learning_rate": 1.5217501228116715e-05, + "loss": 1.0661, "step": 8831 }, { - "epoch": 0.250276290062059, + "epoch": 0.3455669457704046, "grad_norm": 0.0, - "learning_rate": 1.7561428355887765e-05, - "loss": 1.0853, + "learning_rate": 1.5216420113443484e-05, + "loss": 1.103, "step": 8832 }, { - "epoch": 0.2503046275043215, + "epoch": 0.34560607246263403, "grad_norm": 0.0, - "learning_rate": 1.756082771668006e-05, - "loss": 0.9202, + "learning_rate": 1.5215338915001084e-05, + "loss": 1.0517, "step": 8833 }, { - "epoch": 0.2503329649465839, + "epoch": 0.34564519915486347, "grad_norm": 0.0, - "learning_rate": 1.756022701378469e-05, - "loss": 0.8565, + "learning_rate": 1.5214257632806875e-05, + "loss": 1.1376, "step": 8834 }, { - "epoch": 0.2503613023888464, + "epoch": 0.3456843258470929, "grad_norm": 0.0, - "learning_rate": 1.7559626247206723e-05, - "loss": 0.9851, + "learning_rate": 1.5213176266878223e-05, + "loss": 1.108, "step": 8835 }, { - "epoch": 0.25038963983110885, + "epoch": 0.34572345253932235, "grad_norm": 0.0, - "learning_rate": 1.755902541695122e-05, - "loss": 0.9071, + "learning_rate": 1.5212094817232492e-05, + "loss": 1.1151, "step": 8836 }, { - "epoch": 0.2504179772733713, + "epoch": 0.3457625792315518, "grad_norm": 0.0, - "learning_rate": 1.7558424523023238e-05, - "loss": 1.0063, + "learning_rate": 1.521101328388705e-05, + "loss": 1.1578, "step": 8837 }, { - "epoch": 0.2504463147156338, + "epoch": 0.3458017059237812, "grad_norm": 0.0, - "learning_rate": 1.7557823565427838e-05, - "loss": 0.9538, + "learning_rate": 1.5209931666859263e-05, + "loss": 1.1025, "step": 8838 }, { - "epoch": 0.2504746521578962, + "epoch": 0.3458408326160106, "grad_norm": 0.0, - "learning_rate": 1.755722254417008e-05, - "loss": 0.9835, + "learning_rate": 1.5208849966166503e-05, + "loss": 1.1846, "step": 8839 }, { - "epoch": 0.2505029896001587, + "epoch": 0.34587995930824006, "grad_norm": 0.0, - "learning_rate": 1.7556621459255038e-05, - "loss": 1.0049, + "learning_rate": 1.5207768181826138e-05, + "loss": 1.0379, "step": 8840 }, { - "epoch": 0.25053132704242115, + "epoch": 0.3459190860004695, "grad_norm": 0.0, - "learning_rate": 1.7556020310687762e-05, - "loss": 0.9952, + "learning_rate": 1.5206686313855542e-05, + "loss": 1.0441, "step": 8841 }, { - "epoch": 0.2505596644846836, + "epoch": 0.34595821269269894, "grad_norm": 0.0, - "learning_rate": 1.7555419098473323e-05, - "loss": 1.039, + "learning_rate": 1.5205604362272093e-05, + "loss": 1.2103, "step": 8842 }, { - "epoch": 0.2505880019269461, + "epoch": 0.3459973393849284, "grad_norm": 0.0, - "learning_rate": 1.7554817822616782e-05, - "loss": 0.8627, + "learning_rate": 1.5204522327093157e-05, + "loss": 0.9778, "step": 8843 }, { - "epoch": 0.2506163393692085, + "epoch": 0.3460364660771578, "grad_norm": 0.0, - "learning_rate": 1.7554216483123205e-05, - "loss": 1.0007, + "learning_rate": 1.5203440208336114e-05, + "loss": 1.0892, "step": 8844 }, { - "epoch": 0.250644676811471, + "epoch": 0.34607559276938726, "grad_norm": 0.0, - "learning_rate": 1.755361507999766e-05, - "loss": 1.0656, + "learning_rate": 1.5202358006018342e-05, + "loss": 1.1503, "step": 8845 }, { - "epoch": 0.25067301425373345, + "epoch": 0.3461147194616167, "grad_norm": 0.0, - "learning_rate": 1.7553013613245208e-05, - "loss": 1.0149, + "learning_rate": 1.5201275720157218e-05, + "loss": 0.9611, "step": 8846 }, { - "epoch": 0.25070135169599594, + "epoch": 0.34615384615384615, "grad_norm": 0.0, - "learning_rate": 1.7552412082870916e-05, - "loss": 0.9451, + "learning_rate": 1.5200193350770124e-05, + "loss": 1.0958, "step": 8847 }, { - "epoch": 0.2507296891382584, + "epoch": 0.3461929728460756, "grad_norm": 0.0, - "learning_rate": 1.7551810488879856e-05, - "loss": 0.9518, + "learning_rate": 1.5199110897874442e-05, + "loss": 1.1259, "step": 8848 }, { - "epoch": 0.2507580265805208, + "epoch": 0.346232099538305, "grad_norm": 0.0, - "learning_rate": 1.7551208831277092e-05, - "loss": 1.033, + "learning_rate": 1.5198028361487554e-05, + "loss": 1.0861, "step": 8849 }, { - "epoch": 0.2507863640227833, + "epoch": 0.34627122623053447, "grad_norm": 0.0, - "learning_rate": 1.7550607110067686e-05, - "loss": 1.0366, + "learning_rate": 1.519694574162684e-05, + "loss": 1.1841, "step": 8850 }, { - "epoch": 0.25081470146504575, + "epoch": 0.3463103529227639, "grad_norm": 0.0, - "learning_rate": 1.7550005325256718e-05, - "loss": 0.8785, + "learning_rate": 1.5195863038309698e-05, + "loss": 1.1237, "step": 8851 }, { - "epoch": 0.25084303890730825, + "epoch": 0.34634947961499335, "grad_norm": 0.0, - "learning_rate": 1.7549403476849253e-05, - "loss": 1.007, + "learning_rate": 1.5194780251553497e-05, + "loss": 1.0766, "step": 8852 }, { - "epoch": 0.2508713763495707, + "epoch": 0.3463886063072228, "grad_norm": 0.0, - "learning_rate": 1.754880156485035e-05, - "loss": 1.0218, + "learning_rate": 1.5193697381375641e-05, + "loss": 1.1677, "step": 8853 }, { - "epoch": 0.2508997137918331, + "epoch": 0.34642773299945223, "grad_norm": 0.0, - "learning_rate": 1.7548199589265097e-05, - "loss": 1.0333, + "learning_rate": 1.5192614427793513e-05, + "loss": 1.0162, "step": 8854 }, { - "epoch": 0.2509280512340956, + "epoch": 0.3464668596916817, "grad_norm": 0.0, - "learning_rate": 1.7547597550098558e-05, - "loss": 0.9761, + "learning_rate": 1.5191531390824498e-05, + "loss": 1.2028, "step": 8855 }, { - "epoch": 0.25095638867635806, + "epoch": 0.3465059863839111, "grad_norm": 0.0, - "learning_rate": 1.7546995447355795e-05, - "loss": 0.9664, + "learning_rate": 1.5190448270486e-05, + "loss": 1.2156, "step": 8856 }, { - "epoch": 0.25098472611862055, + "epoch": 0.34654511307614055, "grad_norm": 0.0, - "learning_rate": 1.754639328104189e-05, - "loss": 1.0026, + "learning_rate": 1.5189365066795405e-05, + "loss": 1.0305, "step": 8857 }, { - "epoch": 0.251013063560883, + "epoch": 0.34658423976837, "grad_norm": 0.0, - "learning_rate": 1.754579105116191e-05, - "loss": 1.0254, + "learning_rate": 1.5188281779770111e-05, + "loss": 0.8735, "step": 8858 }, { - "epoch": 0.2510414010031455, + "epoch": 0.34662336646059944, "grad_norm": 0.0, - "learning_rate": 1.7545188757720933e-05, - "loss": 0.9786, + "learning_rate": 1.518719840942751e-05, + "loss": 1.1418, "step": 8859 }, { - "epoch": 0.2510697384454079, + "epoch": 0.3466624931528289, "grad_norm": 0.0, - "learning_rate": 1.754458640072403e-05, - "loss": 0.9319, + "learning_rate": 1.5186114955785005e-05, + "loss": 1.1048, "step": 8860 }, { - "epoch": 0.25109807588767036, + "epoch": 0.3467016198450583, "grad_norm": 0.0, - "learning_rate": 1.7543983980176268e-05, - "loss": 0.9115, + "learning_rate": 1.5185031418859992e-05, + "loss": 1.0775, "step": 8861 }, { - "epoch": 0.25112641332993285, + "epoch": 0.34674074653728776, "grad_norm": 0.0, - "learning_rate": 1.754338149608273e-05, - "loss": 0.9726, + "learning_rate": 1.5183947798669874e-05, + "loss": 1.1228, "step": 8862 }, { - "epoch": 0.2511547507721953, + "epoch": 0.3467798732295172, "grad_norm": 0.0, - "learning_rate": 1.754277894844849e-05, - "loss": 0.9039, + "learning_rate": 1.5182864095232044e-05, + "loss": 1.0997, "step": 8863 }, { - "epoch": 0.2511830882144578, + "epoch": 0.34681899992174664, "grad_norm": 0.0, - "learning_rate": 1.7542176337278623e-05, - "loss": 0.9021, + "learning_rate": 1.5181780308563916e-05, + "loss": 1.1309, "step": 8864 }, { - "epoch": 0.2512114256567202, + "epoch": 0.3468581266139761, "grad_norm": 0.0, - "learning_rate": 1.7541573662578205e-05, - "loss": 1.1199, + "learning_rate": 1.5180696438682887e-05, + "loss": 0.9367, "step": 8865 }, { - "epoch": 0.25123976309898266, + "epoch": 0.34689725330620547, "grad_norm": 0.0, - "learning_rate": 1.754097092435231e-05, - "loss": 0.8827, + "learning_rate": 1.5179612485606366e-05, + "loss": 1.1282, "step": 8866 }, { - "epoch": 0.25126810054124515, + "epoch": 0.3469363799984349, "grad_norm": 0.0, - "learning_rate": 1.754036812260601e-05, - "loss": 0.9763, + "learning_rate": 1.517852844935176e-05, + "loss": 1.0394, "step": 8867 }, { - "epoch": 0.2512964379835076, + "epoch": 0.34697550669066435, "grad_norm": 0.0, - "learning_rate": 1.75397652573444e-05, - "loss": 1.0322, + "learning_rate": 1.5177444329936472e-05, + "loss": 0.999, "step": 8868 }, { - "epoch": 0.2513247754257701, + "epoch": 0.3470146333828938, "grad_norm": 0.0, - "learning_rate": 1.7539162328572543e-05, - "loss": 1.0536, + "learning_rate": 1.517636012737792e-05, + "loss": 1.075, "step": 8869 }, { - "epoch": 0.2513531128680325, + "epoch": 0.34705376007512323, "grad_norm": 0.0, - "learning_rate": 1.7538559336295522e-05, - "loss": 0.982, + "learning_rate": 1.517527584169351e-05, + "loss": 1.0068, "step": 8870 }, { - "epoch": 0.251381450310295, + "epoch": 0.34709288676735267, "grad_norm": 0.0, - "learning_rate": 1.7537956280518417e-05, - "loss": 1.0329, + "learning_rate": 1.5174191472900654e-05, + "loss": 1.0734, "step": 8871 }, { - "epoch": 0.25140978775255746, + "epoch": 0.3471320134595821, "grad_norm": 0.0, - "learning_rate": 1.7537353161246305e-05, - "loss": 0.8557, + "learning_rate": 1.5173107021016766e-05, + "loss": 1.2061, "step": 8872 }, { - "epoch": 0.2514381251948199, + "epoch": 0.34717114015181155, "grad_norm": 0.0, - "learning_rate": 1.753674997848427e-05, - "loss": 1.027, + "learning_rate": 1.5172022486059264e-05, + "loss": 1.0836, "step": 8873 }, { - "epoch": 0.2514664626370824, + "epoch": 0.347210266844041, "grad_norm": 0.0, - "learning_rate": 1.753614673223739e-05, - "loss": 0.9955, + "learning_rate": 1.5170937868045558e-05, + "loss": 1.1163, "step": 8874 }, { - "epoch": 0.2514948000793448, + "epoch": 0.34724939353627043, "grad_norm": 0.0, - "learning_rate": 1.753554342251075e-05, - "loss": 1.0342, + "learning_rate": 1.516985316699307e-05, + "loss": 1.1658, "step": 8875 }, { - "epoch": 0.2515231375216073, + "epoch": 0.3472885202284999, "grad_norm": 0.0, - "learning_rate": 1.7534940049309427e-05, - "loss": 0.9392, + "learning_rate": 1.5168768382919222e-05, + "loss": 1.0613, "step": 8876 }, { - "epoch": 0.25155147496386976, + "epoch": 0.3473276469207293, "grad_norm": 0.0, - "learning_rate": 1.7534336612638508e-05, - "loss": 0.9722, + "learning_rate": 1.5167683515841427e-05, + "loss": 1.0327, "step": 8877 }, { - "epoch": 0.2515798124061322, + "epoch": 0.34736677361295876, "grad_norm": 0.0, - "learning_rate": 1.7533733112503073e-05, - "loss": 1.0148, + "learning_rate": 1.5166598565777113e-05, + "loss": 1.061, "step": 8878 }, { - "epoch": 0.2516081498483947, + "epoch": 0.3474059003051882, "grad_norm": 0.0, - "learning_rate": 1.7533129548908205e-05, - "loss": 0.9904, + "learning_rate": 1.5165513532743696e-05, + "loss": 1.1433, "step": 8879 }, { - "epoch": 0.25163648729065713, + "epoch": 0.34744502699741764, "grad_norm": 0.0, - "learning_rate": 1.7532525921858988e-05, - "loss": 0.9356, + "learning_rate": 1.516442841675861e-05, + "loss": 1.1007, "step": 8880 }, { - "epoch": 0.2516648247329196, + "epoch": 0.3474841536896471, "grad_norm": 0.0, - "learning_rate": 1.7531922231360515e-05, - "loss": 0.8821, + "learning_rate": 1.516334321783927e-05, + "loss": 0.9626, "step": 8881 }, { - "epoch": 0.25169316217518206, + "epoch": 0.3475232803818765, "grad_norm": 0.0, - "learning_rate": 1.7531318477417858e-05, - "loss": 1.0133, + "learning_rate": 1.5162257936003112e-05, + "loss": 1.1428, "step": 8882 }, { - "epoch": 0.25172149961744456, + "epoch": 0.34756240707410596, "grad_norm": 0.0, - "learning_rate": 1.7530714660036112e-05, - "loss": 0.9321, + "learning_rate": 1.516117257126756e-05, + "loss": 1.1009, "step": 8883 }, { - "epoch": 0.251749837059707, + "epoch": 0.3476015337663354, "grad_norm": 0.0, - "learning_rate": 1.7530110779220358e-05, - "loss": 0.9274, + "learning_rate": 1.5160087123650041e-05, + "loss": 1.1638, "step": 8884 }, { - "epoch": 0.25177817450196943, + "epoch": 0.34764066045856484, "grad_norm": 0.0, - "learning_rate": 1.7529506834975686e-05, - "loss": 1.0283, + "learning_rate": 1.515900159316799e-05, + "loss": 1.1598, "step": 8885 }, { - "epoch": 0.2518065119442319, + "epoch": 0.3476797871507943, "grad_norm": 0.0, - "learning_rate": 1.752890282730718e-05, - "loss": 1.0468, + "learning_rate": 1.5157915979838835e-05, + "loss": 1.2122, "step": 8886 }, { - "epoch": 0.25183484938649436, + "epoch": 0.3477189138430237, "grad_norm": 0.0, - "learning_rate": 1.752829875621993e-05, - "loss": 1.0485, + "learning_rate": 1.5156830283680017e-05, + "loss": 1.1116, "step": 8887 }, { - "epoch": 0.25186318682875686, + "epoch": 0.34775804053525317, "grad_norm": 0.0, - "learning_rate": 1.7527694621719024e-05, - "loss": 0.9546, + "learning_rate": 1.5155744504708964e-05, + "loss": 1.1711, "step": 8888 }, { - "epoch": 0.2518915242710193, + "epoch": 0.3477971672274826, "grad_norm": 0.0, - "learning_rate": 1.7527090423809553e-05, - "loss": 1.0159, + "learning_rate": 1.5154658642943116e-05, + "loss": 1.0488, "step": 8889 }, { - "epoch": 0.25191986171328173, + "epoch": 0.34783629391971205, "grad_norm": 0.0, - "learning_rate": 1.7526486162496604e-05, - "loss": 1.016, + "learning_rate": 1.515357269839991e-05, + "loss": 1.1047, "step": 8890 }, { - "epoch": 0.25194819915554423, + "epoch": 0.3478754206119415, "grad_norm": 0.0, - "learning_rate": 1.7525881837785264e-05, - "loss": 0.9186, + "learning_rate": 1.5152486671096785e-05, + "loss": 1.0899, "step": 8891 }, { - "epoch": 0.25197653659780667, + "epoch": 0.34791454730417093, "grad_norm": 0.0, - "learning_rate": 1.752527744968063e-05, - "loss": 0.9149, + "learning_rate": 1.5151400561051177e-05, + "loss": 1.2236, "step": 8892 }, { - "epoch": 0.25200487404006916, + "epoch": 0.34795367399640037, "grad_norm": 0.0, - "learning_rate": 1.7524672998187788e-05, - "loss": 1.0427, + "learning_rate": 1.5150314368280535e-05, + "loss": 1.0468, "step": 8893 }, { - "epoch": 0.2520332114823316, + "epoch": 0.3479928006886298, "grad_norm": 0.0, - "learning_rate": 1.7524068483311832e-05, - "loss": 0.9424, + "learning_rate": 1.5149228092802296e-05, + "loss": 1.0577, "step": 8894 }, { - "epoch": 0.2520615489245941, + "epoch": 0.3480319273808592, "grad_norm": 0.0, - "learning_rate": 1.7523463905057853e-05, - "loss": 1.1085, + "learning_rate": 1.5148141734633903e-05, + "loss": 1.1078, "step": 8895 }, { - "epoch": 0.25208988636685653, + "epoch": 0.34807105407308864, "grad_norm": 0.0, - "learning_rate": 1.7522859263430942e-05, - "loss": 1.032, + "learning_rate": 1.514705529379281e-05, + "loss": 1.1443, "step": 8896 }, { - "epoch": 0.25211822380911897, + "epoch": 0.3481101807653181, "grad_norm": 0.0, - "learning_rate": 1.7522254558436195e-05, - "loss": 0.9312, + "learning_rate": 1.5145968770296456e-05, + "loss": 1.1016, "step": 8897 }, { - "epoch": 0.25214656125138146, + "epoch": 0.3481493074575475, "grad_norm": 0.0, - "learning_rate": 1.7521649790078705e-05, - "loss": 0.9683, + "learning_rate": 1.514488216416229e-05, + "loss": 1.1123, "step": 8898 }, { - "epoch": 0.2521748986936439, + "epoch": 0.34818843414977696, "grad_norm": 0.0, - "learning_rate": 1.7521044958363567e-05, - "loss": 0.9224, + "learning_rate": 1.5143795475407767e-05, + "loss": 1.124, "step": 8899 }, { - "epoch": 0.2522032361359064, + "epoch": 0.3482275608420064, "grad_norm": 0.0, - "learning_rate": 1.7520440063295874e-05, - "loss": 0.938, + "learning_rate": 1.5142708704050333e-05, + "loss": 0.9843, "step": 8900 }, { - "epoch": 0.25223157357816883, + "epoch": 0.34826668753423584, "grad_norm": 0.0, - "learning_rate": 1.751983510488072e-05, - "loss": 0.8417, + "learning_rate": 1.5141621850107441e-05, + "loss": 1.249, "step": 8901 }, { - "epoch": 0.25225991102043127, + "epoch": 0.3483058142264653, "grad_norm": 0.0, - "learning_rate": 1.75192300831232e-05, - "loss": 0.9411, + "learning_rate": 1.5140534913596545e-05, + "loss": 1.2092, "step": 8902 }, { - "epoch": 0.25228824846269376, + "epoch": 0.3483449409186947, "grad_norm": 0.0, - "learning_rate": 1.751862499802842e-05, - "loss": 0.9693, + "learning_rate": 1.5139447894535102e-05, + "loss": 1.0971, "step": 8903 }, { - "epoch": 0.2523165859049562, + "epoch": 0.34838406761092416, "grad_norm": 0.0, - "learning_rate": 1.7518019849601466e-05, - "loss": 0.9898, + "learning_rate": 1.5138360792940562e-05, + "loss": 1.0854, "step": 8904 }, { - "epoch": 0.2523449233472187, + "epoch": 0.3484231943031536, "grad_norm": 0.0, - "learning_rate": 1.7517414637847435e-05, - "loss": 1.0319, + "learning_rate": 1.5137273608830387e-05, + "loss": 1.1248, "step": 8905 }, { - "epoch": 0.25237326078948114, + "epoch": 0.34846232099538305, "grad_norm": 0.0, - "learning_rate": 1.7516809362771434e-05, - "loss": 0.8142, + "learning_rate": 1.5136186342222038e-05, + "loss": 1.0826, "step": 8906 }, { - "epoch": 0.25240159823174363, + "epoch": 0.3485014476876125, "grad_norm": 0.0, - "learning_rate": 1.7516204024378555e-05, - "loss": 0.9356, + "learning_rate": 1.5135098993132969e-05, + "loss": 0.9735, "step": 8907 }, { - "epoch": 0.25242993567400607, + "epoch": 0.3485405743798419, "grad_norm": 0.0, - "learning_rate": 1.75155986226739e-05, - "loss": 0.9771, + "learning_rate": 1.5134011561580648e-05, + "loss": 1.2521, "step": 8908 }, { - "epoch": 0.2524582731162685, + "epoch": 0.34857970107207137, "grad_norm": 0.0, - "learning_rate": 1.7514993157662564e-05, - "loss": 1.0602, + "learning_rate": 1.5132924047582533e-05, + "loss": 1.1149, "step": 8909 }, { - "epoch": 0.252486610558531, + "epoch": 0.3486188277643008, "grad_norm": 0.0, - "learning_rate": 1.7514387629349655e-05, - "loss": 0.9417, + "learning_rate": 1.5131836451156089e-05, + "loss": 1.1162, "step": 8910 }, { - "epoch": 0.25251494800079344, + "epoch": 0.34865795445653025, "grad_norm": 0.0, - "learning_rate": 1.751378203774026e-05, - "loss": 0.9658, + "learning_rate": 1.5130748772318784e-05, + "loss": 1.1298, "step": 8911 }, { - "epoch": 0.25254328544305593, + "epoch": 0.3486970811487597, "grad_norm": 0.0, - "learning_rate": 1.7513176382839496e-05, - "loss": 0.8992, + "learning_rate": 1.5129661011088082e-05, + "loss": 1.1145, "step": 8912 }, { - "epoch": 0.25257162288531837, + "epoch": 0.34873620784098913, "grad_norm": 0.0, - "learning_rate": 1.7512570664652456e-05, - "loss": 1.0089, + "learning_rate": 1.512857316748145e-05, + "loss": 1.0497, "step": 8913 }, { - "epoch": 0.2525999603275808, + "epoch": 0.3487753345332186, "grad_norm": 0.0, - "learning_rate": 1.751196488318424e-05, - "loss": 1.1005, + "learning_rate": 1.5127485241516362e-05, + "loss": 1.1701, "step": 8914 }, { - "epoch": 0.2526282977698433, + "epoch": 0.348814461225448, "grad_norm": 0.0, - "learning_rate": 1.751135903843996e-05, - "loss": 1.0483, + "learning_rate": 1.5126397233210286e-05, + "loss": 1.0134, "step": 8915 }, { - "epoch": 0.25265663521210574, + "epoch": 0.34885358791767745, "grad_norm": 0.0, - "learning_rate": 1.751075313042471e-05, - "loss": 0.9622, + "learning_rate": 1.5125309142580694e-05, + "loss": 1.1217, "step": 8916 }, { - "epoch": 0.25268497265436823, + "epoch": 0.3488927146099069, "grad_norm": 0.0, - "learning_rate": 1.7510147159143598e-05, - "loss": 1.0704, + "learning_rate": 1.5124220969645059e-05, + "loss": 1.1891, "step": 8917 }, { - "epoch": 0.25271331009663067, + "epoch": 0.34893184130213634, "grad_norm": 0.0, - "learning_rate": 1.750954112460173e-05, - "loss": 0.9399, + "learning_rate": 1.5123132714420856e-05, + "loss": 1.14, "step": 8918 }, { - "epoch": 0.25274164753889317, + "epoch": 0.3489709679943658, "grad_norm": 0.0, - "learning_rate": 1.7508935026804202e-05, - "loss": 1.0589, + "learning_rate": 1.5122044376925559e-05, + "loss": 1.0649, "step": 8919 }, { - "epoch": 0.2527699849811556, + "epoch": 0.3490100946865952, "grad_norm": 0.0, - "learning_rate": 1.750832886575613e-05, - "loss": 0.9965, + "learning_rate": 1.5120955957176651e-05, + "loss": 1.0465, "step": 8920 }, { - "epoch": 0.25279832242341804, + "epoch": 0.34904922137882466, "grad_norm": 0.0, - "learning_rate": 1.750772264146262e-05, - "loss": 1.0163, + "learning_rate": 1.5119867455191607e-05, + "loss": 1.1651, "step": 8921 }, { - "epoch": 0.25282665986568054, + "epoch": 0.3490883480710541, "grad_norm": 0.0, - "learning_rate": 1.7507116353928767e-05, - "loss": 1.031, + "learning_rate": 1.5118778870987906e-05, + "loss": 1.1172, "step": 8922 }, { - "epoch": 0.252854997307943, + "epoch": 0.3491274747632835, "grad_norm": 0.0, - "learning_rate": 1.7506510003159687e-05, - "loss": 0.9787, + "learning_rate": 1.5117690204583033e-05, + "loss": 1.1258, "step": 8923 }, { - "epoch": 0.25288333475020547, + "epoch": 0.3491666014555129, "grad_norm": 0.0, - "learning_rate": 1.7505903589160488e-05, - "loss": 1.0089, + "learning_rate": 1.5116601455994464e-05, + "loss": 1.0944, "step": 8924 }, { - "epoch": 0.2529116721924679, + "epoch": 0.34920572814774237, "grad_norm": 0.0, - "learning_rate": 1.7505297111936273e-05, - "loss": 0.9874, + "learning_rate": 1.511551262523969e-05, + "loss": 1.1361, "step": 8925 }, { - "epoch": 0.25294000963473035, + "epoch": 0.3492448548399718, "grad_norm": 0.0, - "learning_rate": 1.750469057149216e-05, - "loss": 0.8704, + "learning_rate": 1.5114423712336193e-05, + "loss": 1.1822, "step": 8926 }, { - "epoch": 0.25296834707699284, + "epoch": 0.34928398153220125, "grad_norm": 0.0, - "learning_rate": 1.7504083967833246e-05, - "loss": 0.9461, + "learning_rate": 1.5113334717301458e-05, + "loss": 1.1159, "step": 8927 }, { - "epoch": 0.2529966845192553, + "epoch": 0.3493231082244307, "grad_norm": 0.0, - "learning_rate": 1.7503477300964643e-05, - "loss": 0.9249, + "learning_rate": 1.5112245640152975e-05, + "loss": 1.1276, "step": 8928 }, { - "epoch": 0.25302502196151777, + "epoch": 0.34936223491666013, "grad_norm": 0.0, - "learning_rate": 1.750287057089147e-05, - "loss": 0.9179, + "learning_rate": 1.5111156480908236e-05, + "loss": 0.8925, "step": 8929 }, { - "epoch": 0.2530533594037802, + "epoch": 0.34940136160888957, "grad_norm": 0.0, - "learning_rate": 1.7502263777618833e-05, - "loss": 0.9538, + "learning_rate": 1.5110067239584725e-05, + "loss": 0.9465, "step": 8930 }, { - "epoch": 0.2530816968460427, + "epoch": 0.349440488301119, "grad_norm": 0.0, - "learning_rate": 1.750165692115184e-05, - "loss": 1.0073, + "learning_rate": 1.5108977916199941e-05, + "loss": 1.2056, "step": 8931 }, { - "epoch": 0.25311003428830514, + "epoch": 0.34947961499334845, "grad_norm": 0.0, - "learning_rate": 1.7501050001495603e-05, - "loss": 1.0394, + "learning_rate": 1.5107888510771374e-05, + "loss": 1.1422, "step": 8932 }, { - "epoch": 0.2531383717305676, + "epoch": 0.3495187416855779, "grad_norm": 0.0, - "learning_rate": 1.7500443018655237e-05, - "loss": 0.9763, + "learning_rate": 1.5106799023316514e-05, + "loss": 1.1007, "step": 8933 }, { - "epoch": 0.2531667091728301, + "epoch": 0.34955786837780733, "grad_norm": 0.0, - "learning_rate": 1.749983597263586e-05, - "loss": 0.9826, + "learning_rate": 1.5105709453852863e-05, + "loss": 1.0708, "step": 8934 }, { - "epoch": 0.2531950466150925, + "epoch": 0.3495969950700368, "grad_norm": 0.0, - "learning_rate": 1.749922886344257e-05, - "loss": 1.0378, + "learning_rate": 1.5104619802397916e-05, + "loss": 1.1021, "step": 8935 }, { - "epoch": 0.253223384057355, + "epoch": 0.3496361217622662, "grad_norm": 0.0, - "learning_rate": 1.7498621691080497e-05, - "loss": 1.011, + "learning_rate": 1.5103530068969172e-05, + "loss": 1.16, "step": 8936 }, { - "epoch": 0.25325172149961744, + "epoch": 0.34967524845449566, "grad_norm": 0.0, - "learning_rate": 1.749801445555475e-05, - "loss": 0.9242, + "learning_rate": 1.5102440253584128e-05, + "loss": 1.0784, "step": 8937 }, { - "epoch": 0.2532800589418799, + "epoch": 0.3497143751467251, "grad_norm": 0.0, - "learning_rate": 1.749740715687044e-05, - "loss": 1.0359, + "learning_rate": 1.510135035626029e-05, + "loss": 1.1576, "step": 8938 }, { - "epoch": 0.2533083963841424, + "epoch": 0.34975350183895454, "grad_norm": 0.0, - "learning_rate": 1.7496799795032685e-05, - "loss": 0.9464, + "learning_rate": 1.5100260377015155e-05, + "loss": 1.0701, "step": 8939 }, { - "epoch": 0.2533367338264048, + "epoch": 0.349792628531184, "grad_norm": 0.0, - "learning_rate": 1.7496192370046602e-05, - "loss": 1.0431, + "learning_rate": 1.509917031586623e-05, + "loss": 1.2122, "step": 8940 }, { - "epoch": 0.2533650712686673, + "epoch": 0.3498317552234134, "grad_norm": 0.0, - "learning_rate": 1.7495584881917307e-05, - "loss": 0.8592, + "learning_rate": 1.5098080172831022e-05, + "loss": 0.9686, "step": 8941 }, { - "epoch": 0.25339340871092975, + "epoch": 0.34987088191564286, "grad_norm": 0.0, - "learning_rate": 1.7494977330649917e-05, - "loss": 0.9219, + "learning_rate": 1.5096989947927032e-05, + "loss": 1.186, "step": 8942 }, { - "epoch": 0.25342174615319224, + "epoch": 0.3499100086078723, "grad_norm": 0.0, - "learning_rate": 1.749436971624955e-05, - "loss": 1.0256, + "learning_rate": 1.509589964117177e-05, + "loss": 1.0649, "step": 8943 }, { - "epoch": 0.2534500835954547, + "epoch": 0.34994913530010174, "grad_norm": 0.0, - "learning_rate": 1.7493762038721326e-05, - "loss": 1.0442, + "learning_rate": 1.5094809252582744e-05, + "loss": 1.1041, "step": 8944 }, { - "epoch": 0.2534784210377171, + "epoch": 0.3499882619923312, "grad_norm": 0.0, - "learning_rate": 1.7493154298070357e-05, - "loss": 1.0061, + "learning_rate": 1.509371878217747e-05, + "loss": 1.2971, "step": 8945 }, { - "epoch": 0.2535067584799796, + "epoch": 0.3500273886845606, "grad_norm": 0.0, - "learning_rate": 1.749254649430177e-05, - "loss": 0.8333, + "learning_rate": 1.5092628229973452e-05, + "loss": 1.0524, "step": 8946 }, { - "epoch": 0.25353509592224205, + "epoch": 0.35006651537679007, "grad_norm": 0.0, - "learning_rate": 1.749193862742068e-05, - "loss": 1.0479, + "learning_rate": 1.5091537595988208e-05, + "loss": 1.0392, "step": 8947 }, { - "epoch": 0.25356343336450454, + "epoch": 0.3501056420690195, "grad_norm": 0.0, - "learning_rate": 1.7491330697432213e-05, - "loss": 0.9428, + "learning_rate": 1.5090446880239249e-05, + "loss": 1.2343, "step": 8948 }, { - "epoch": 0.253591770806767, + "epoch": 0.35014476876124895, "grad_norm": 0.0, - "learning_rate": 1.749072270434148e-05, - "loss": 0.9365, + "learning_rate": 1.5089356082744093e-05, + "loss": 0.9917, "step": 8949 }, { - "epoch": 0.2536201082490294, + "epoch": 0.3501838954534784, "grad_norm": 0.0, - "learning_rate": 1.7490114648153615e-05, - "loss": 1.0727, + "learning_rate": 1.5088265203520254e-05, + "loss": 1.1761, "step": 8950 }, { - "epoch": 0.2536484456912919, + "epoch": 0.3502230221457078, "grad_norm": 0.0, - "learning_rate": 1.7489506528873724e-05, - "loss": 0.9582, + "learning_rate": 1.5087174242585251e-05, + "loss": 1.1575, "step": 8951 }, { - "epoch": 0.25367678313355435, + "epoch": 0.3502621488379372, "grad_norm": 0.0, - "learning_rate": 1.7488898346506948e-05, - "loss": 1.0581, + "learning_rate": 1.5086083199956608e-05, + "loss": 1.0918, "step": 8952 }, { - "epoch": 0.25370512057581684, + "epoch": 0.35030127553016666, "grad_norm": 0.0, - "learning_rate": 1.7488290101058392e-05, - "loss": 0.8865, + "learning_rate": 1.5084992075651838e-05, + "loss": 1.1994, "step": 8953 }, { - "epoch": 0.2537334580180793, + "epoch": 0.3503404022223961, "grad_norm": 0.0, - "learning_rate": 1.748768179253319e-05, - "loss": 1.0156, + "learning_rate": 1.5083900869688473e-05, + "loss": 1.1102, "step": 8954 }, { - "epoch": 0.2537617954603418, + "epoch": 0.35037952891462554, "grad_norm": 0.0, - "learning_rate": 1.7487073420936466e-05, - "loss": 0.9869, + "learning_rate": 1.5082809582084023e-05, + "loss": 1.1156, "step": 8955 }, { - "epoch": 0.2537901329026042, + "epoch": 0.350418655606855, "grad_norm": 0.0, - "learning_rate": 1.748646498627334e-05, - "loss": 1.0666, + "learning_rate": 1.5081718212856025e-05, + "loss": 1.1978, "step": 8956 }, { - "epoch": 0.25381847034486665, + "epoch": 0.3504577822990844, "grad_norm": 0.0, - "learning_rate": 1.7485856488548944e-05, - "loss": 1.0115, + "learning_rate": 1.5080626762021997e-05, + "loss": 1.1661, "step": 8957 }, { - "epoch": 0.25384680778712915, + "epoch": 0.35049690899131386, "grad_norm": 0.0, - "learning_rate": 1.7485247927768393e-05, - "loss": 0.9426, + "learning_rate": 1.507953522959947e-05, + "loss": 1.1302, "step": 8958 }, { - "epoch": 0.2538751452293916, + "epoch": 0.3505360356835433, "grad_norm": 0.0, - "learning_rate": 1.7484639303936823e-05, - "loss": 1.0776, + "learning_rate": 1.5078443615605976e-05, + "loss": 1.1003, "step": 8959 }, { - "epoch": 0.2539034826716541, + "epoch": 0.35057516237577274, "grad_norm": 0.0, - "learning_rate": 1.7484030617059354e-05, - "loss": 1.0192, + "learning_rate": 1.507735192005904e-05, + "loss": 1.0915, "step": 8960 }, { - "epoch": 0.2539318201139165, + "epoch": 0.3506142890680022, "grad_norm": 0.0, - "learning_rate": 1.748342186714112e-05, - "loss": 0.9797, + "learning_rate": 1.5076260142976194e-05, + "loss": 1.2153, "step": 8961 }, { - "epoch": 0.25396015755617896, + "epoch": 0.3506534157602316, "grad_norm": 0.0, - "learning_rate": 1.7482813054187242e-05, - "loss": 0.9195, + "learning_rate": 1.5075168284374968e-05, + "loss": 1.0552, "step": 8962 }, { - "epoch": 0.25398849499844145, + "epoch": 0.35069254245246106, "grad_norm": 0.0, - "learning_rate": 1.748220417820285e-05, - "loss": 0.8775, + "learning_rate": 1.50740763442729e-05, + "loss": 1.2354, "step": 8963 }, { - "epoch": 0.2540168324407039, + "epoch": 0.3507316691446905, "grad_norm": 0.0, - "learning_rate": 1.7481595239193073e-05, - "loss": 0.953, + "learning_rate": 1.5072984322687526e-05, + "loss": 1.1366, "step": 8964 }, { - "epoch": 0.2540451698829664, + "epoch": 0.35077079583691995, "grad_norm": 0.0, - "learning_rate": 1.7480986237163044e-05, - "loss": 0.8688, + "learning_rate": 1.5071892219636382e-05, + "loss": 1.1124, "step": 8965 }, { - "epoch": 0.2540735073252288, + "epoch": 0.3508099225291494, "grad_norm": 0.0, - "learning_rate": 1.748037717211789e-05, - "loss": 0.9865, + "learning_rate": 1.5070800035136998e-05, + "loss": 1.1413, "step": 8966 }, { - "epoch": 0.2541018447674913, + "epoch": 0.35084904922137883, "grad_norm": 0.0, - "learning_rate": 1.7479768044062743e-05, - "loss": 0.9199, + "learning_rate": 1.5069707769206926e-05, + "loss": 1.0641, "step": 8967 }, { - "epoch": 0.25413018220975375, + "epoch": 0.35088817591360827, "grad_norm": 0.0, - "learning_rate": 1.7479158853002726e-05, - "loss": 0.9859, + "learning_rate": 1.5068615421863697e-05, + "loss": 0.9123, "step": 8968 }, { - "epoch": 0.2541585196520162, + "epoch": 0.3509273026058377, "grad_norm": 0.0, - "learning_rate": 1.7478549598942983e-05, - "loss": 0.843, + "learning_rate": 1.5067522993124856e-05, + "loss": 0.9941, "step": 8969 }, { - "epoch": 0.2541868570942787, + "epoch": 0.35096642929806715, "grad_norm": 0.0, - "learning_rate": 1.7477940281888635e-05, - "loss": 0.898, + "learning_rate": 1.5066430483007949e-05, + "loss": 1.1542, "step": 8970 }, { - "epoch": 0.2542151945365411, + "epoch": 0.3510055559902966, "grad_norm": 0.0, - "learning_rate": 1.747733090184482e-05, - "loss": 1.0404, + "learning_rate": 1.5065337891530511e-05, + "loss": 1.1447, "step": 8971 }, { - "epoch": 0.2542435319788036, + "epoch": 0.35104468268252603, "grad_norm": 0.0, - "learning_rate": 1.7476721458816672e-05, - "loss": 1.0886, + "learning_rate": 1.5064245218710099e-05, + "loss": 1.1171, "step": 8972 }, { - "epoch": 0.25427186942106605, + "epoch": 0.3510838093747555, "grad_norm": 0.0, - "learning_rate": 1.747611195280932e-05, - "loss": 1.0102, + "learning_rate": 1.506315246456425e-05, + "loss": 1.058, "step": 8973 }, { - "epoch": 0.2543002068633285, + "epoch": 0.3511229360669849, "grad_norm": 0.0, - "learning_rate": 1.7475502383827906e-05, - "loss": 0.9126, + "learning_rate": 1.5062059629110518e-05, + "loss": 1.128, "step": 8974 }, { - "epoch": 0.254328544305591, + "epoch": 0.35116206275921436, "grad_norm": 0.0, - "learning_rate": 1.7474892751877553e-05, - "loss": 1.0552, + "learning_rate": 1.5060966712366454e-05, + "loss": 1.1346, "step": 8975 }, { - "epoch": 0.2543568817478534, + "epoch": 0.3512011894514438, "grad_norm": 0.0, - "learning_rate": 1.747428305696341e-05, - "loss": 0.8888, + "learning_rate": 1.5059873714349606e-05, + "loss": 1.1942, "step": 8976 }, { - "epoch": 0.2543852191901159, + "epoch": 0.35124031614367324, "grad_norm": 0.0, - "learning_rate": 1.7473673299090598e-05, - "loss": 0.9364, + "learning_rate": 1.5058780635077525e-05, + "loss": 1.139, "step": 8977 }, { - "epoch": 0.25441355663237836, + "epoch": 0.3512794428359027, "grad_norm": 0.0, - "learning_rate": 1.7473063478264264e-05, - "loss": 1.0481, + "learning_rate": 1.5057687474567769e-05, + "loss": 1.2609, "step": 8978 }, { - "epoch": 0.2544418940746408, + "epoch": 0.3513185695281321, "grad_norm": 0.0, - "learning_rate": 1.747245359448954e-05, - "loss": 1.108, + "learning_rate": 1.5056594232837884e-05, + "loss": 1.1246, "step": 8979 }, { - "epoch": 0.2544702315169033, + "epoch": 0.3513576962203615, "grad_norm": 0.0, - "learning_rate": 1.7471843647771565e-05, - "loss": 0.9456, + "learning_rate": 1.5055500909905437e-05, + "loss": 1.2423, "step": 8980 }, { - "epoch": 0.2544985689591657, + "epoch": 0.35139682291259094, "grad_norm": 0.0, - "learning_rate": 1.747123363811548e-05, - "loss": 0.988, + "learning_rate": 1.505440750578798e-05, + "loss": 1.0763, "step": 8981 }, { - "epoch": 0.2545269064014282, + "epoch": 0.3514359496048204, "grad_norm": 0.0, - "learning_rate": 1.7470623565526414e-05, - "loss": 0.9526, + "learning_rate": 1.505331402050307e-05, + "loss": 1.1002, "step": 8982 }, { - "epoch": 0.25455524384369066, + "epoch": 0.3514750762970498, "grad_norm": 0.0, - "learning_rate": 1.7470013430009512e-05, - "loss": 0.9218, + "learning_rate": 1.5052220454068267e-05, + "loss": 1.2033, "step": 8983 }, { - "epoch": 0.25458358128595315, + "epoch": 0.35151420298927927, "grad_norm": 0.0, - "learning_rate": 1.7469403231569918e-05, - "loss": 0.9936, + "learning_rate": 1.5051126806501137e-05, + "loss": 1.1677, "step": 8984 }, { - "epoch": 0.2546119187282156, + "epoch": 0.3515533296815087, "grad_norm": 0.0, - "learning_rate": 1.7468792970212764e-05, - "loss": 0.9933, + "learning_rate": 1.505003307781924e-05, + "loss": 1.1642, "step": 8985 }, { - "epoch": 0.25464025617047803, + "epoch": 0.35159245637373815, "grad_norm": 0.0, - "learning_rate": 1.7468182645943193e-05, - "loss": 0.9103, + "learning_rate": 1.504893926804014e-05, + "loss": 1.0706, "step": 8986 }, { - "epoch": 0.2546685936127405, + "epoch": 0.3516315830659676, "grad_norm": 0.0, - "learning_rate": 1.7467572258766345e-05, - "loss": 1.0228, + "learning_rate": 1.5047845377181403e-05, + "loss": 1.1858, "step": 8987 }, { - "epoch": 0.25469693105500296, + "epoch": 0.35167070975819703, "grad_norm": 0.0, - "learning_rate": 1.7466961808687367e-05, - "loss": 0.9895, + "learning_rate": 1.5046751405260592e-05, + "loss": 1.0515, "step": 8988 }, { - "epoch": 0.25472526849726546, + "epoch": 0.35170983645042647, "grad_norm": 0.0, - "learning_rate": 1.746635129571139e-05, - "loss": 1.0419, + "learning_rate": 1.504565735229528e-05, + "loss": 1.1455, "step": 8989 }, { - "epoch": 0.2547536059395279, + "epoch": 0.3517489631426559, "grad_norm": 0.0, - "learning_rate": 1.746574071984357e-05, - "loss": 0.985, + "learning_rate": 1.5044563218303032e-05, + "loss": 0.9571, "step": 8990 }, { - "epoch": 0.25478194338179033, + "epoch": 0.35178808983488535, "grad_norm": 0.0, - "learning_rate": 1.746513008108904e-05, - "loss": 0.9623, + "learning_rate": 1.5043469003301417e-05, + "loss": 1.1066, "step": 8991 }, { - "epoch": 0.2548102808240528, + "epoch": 0.3518272165271148, "grad_norm": 0.0, - "learning_rate": 1.746451937945295e-05, - "loss": 1.0104, + "learning_rate": 1.5042374707308014e-05, + "loss": 1.0579, "step": 8992 }, { - "epoch": 0.25483861826631526, + "epoch": 0.35186634321934424, "grad_norm": 0.0, - "learning_rate": 1.746390861494044e-05, - "loss": 1.0494, + "learning_rate": 1.5041280330340391e-05, + "loss": 1.0148, "step": 8993 }, { - "epoch": 0.25486695570857776, + "epoch": 0.3519054699115737, "grad_norm": 0.0, - "learning_rate": 1.7463297787556656e-05, - "loss": 0.9316, + "learning_rate": 1.5040185872416122e-05, + "loss": 1.0348, "step": 8994 }, { - "epoch": 0.2548952931508402, + "epoch": 0.3519445966038031, "grad_norm": 0.0, - "learning_rate": 1.746268689730674e-05, - "loss": 1.0141, + "learning_rate": 1.5039091333552783e-05, + "loss": 1.1579, "step": 8995 }, { - "epoch": 0.2549236305931027, + "epoch": 0.35198372329603256, "grad_norm": 0.0, - "learning_rate": 1.7462075944195848e-05, - "loss": 0.9817, + "learning_rate": 1.5037996713767956e-05, + "loss": 0.9099, "step": 8996 }, { - "epoch": 0.25495196803536513, + "epoch": 0.352022849988262, "grad_norm": 0.0, - "learning_rate": 1.7461464928229116e-05, - "loss": 0.9633, + "learning_rate": 1.5036902013079209e-05, + "loss": 1.2559, "step": 8997 }, { - "epoch": 0.25498030547762757, + "epoch": 0.35206197668049144, "grad_norm": 0.0, - "learning_rate": 1.7460853849411692e-05, - "loss": 0.9532, + "learning_rate": 1.503580723150413e-05, + "loss": 1.076, "step": 8998 }, { - "epoch": 0.25500864291989006, + "epoch": 0.3521011033727209, "grad_norm": 0.0, - "learning_rate": 1.746024270774873e-05, - "loss": 1.0372, + "learning_rate": 1.5034712369060297e-05, + "loss": 1.018, "step": 8999 }, { - "epoch": 0.2550369803621525, + "epoch": 0.3521402300649503, "grad_norm": 0.0, - "learning_rate": 1.745963150324537e-05, - "loss": 1.0228, + "learning_rate": 1.5033617425765288e-05, + "loss": 1.0664, "step": 9000 }, { - "epoch": 0.255065317804415, + "epoch": 0.35217935675717976, "grad_norm": 0.0, - "learning_rate": 1.745902023590676e-05, - "loss": 0.9158, + "learning_rate": 1.5032522401636698e-05, + "loss": 0.9784, "step": 9001 }, { - "epoch": 0.25509365524667743, + "epoch": 0.3522184834494092, "grad_norm": 0.0, - "learning_rate": 1.7458408905738064e-05, - "loss": 0.9378, + "learning_rate": 1.5031427296692096e-05, + "loss": 1.1314, "step": 9002 }, { - "epoch": 0.25512199268893987, + "epoch": 0.35225761014163864, "grad_norm": 0.0, - "learning_rate": 1.745779751274441e-05, - "loss": 1.0021, + "learning_rate": 1.5030332110949081e-05, + "loss": 1.1762, "step": 9003 }, { - "epoch": 0.25515033013120236, + "epoch": 0.3522967368338681, "grad_norm": 0.0, - "learning_rate": 1.7457186056930963e-05, - "loss": 0.9874, + "learning_rate": 1.5029236844425236e-05, + "loss": 1.2505, "step": 9004 }, { - "epoch": 0.2551786675734648, + "epoch": 0.3523358635260975, "grad_norm": 0.0, - "learning_rate": 1.745657453830287e-05, - "loss": 1.0591, + "learning_rate": 1.5028141497138151e-05, + "loss": 1.2623, "step": 9005 }, { - "epoch": 0.2552070050157273, + "epoch": 0.35237499021832697, "grad_norm": 0.0, - "learning_rate": 1.7455962956865273e-05, - "loss": 0.9651, + "learning_rate": 1.5027046069105411e-05, + "loss": 1.049, "step": 9006 }, { - "epoch": 0.25523534245798973, + "epoch": 0.3524141169105564, "grad_norm": 0.0, - "learning_rate": 1.745535131262334e-05, - "loss": 0.971, + "learning_rate": 1.502595056034461e-05, + "loss": 1.0488, "step": 9007 }, { - "epoch": 0.2552636799002522, + "epoch": 0.3524532436027858, "grad_norm": 0.0, - "learning_rate": 1.745473960558221e-05, - "loss": 0.9677, + "learning_rate": 1.5024854970873342e-05, + "loss": 1.0701, "step": 9008 }, { - "epoch": 0.25529201734251467, + "epoch": 0.35249237029501523, "grad_norm": 0.0, - "learning_rate": 1.745412783574704e-05, - "loss": 0.9854, + "learning_rate": 1.5023759300709201e-05, + "loss": 1.001, "step": 9009 }, { - "epoch": 0.2553203547847771, + "epoch": 0.3525314969872447, "grad_norm": 0.0, - "learning_rate": 1.7453516003122982e-05, - "loss": 0.9942, + "learning_rate": 1.5022663549869781e-05, + "loss": 1.2239, "step": 9010 }, { - "epoch": 0.2553486922270396, + "epoch": 0.3525706236794741, "grad_norm": 0.0, - "learning_rate": 1.7452904107715196e-05, - "loss": 0.8927, + "learning_rate": 1.5021567718372674e-05, + "loss": 1.0557, "step": 9011 }, { - "epoch": 0.25537702966930204, + "epoch": 0.35260975037170356, "grad_norm": 0.0, - "learning_rate": 1.7452292149528827e-05, - "loss": 0.9128, + "learning_rate": 1.5020471806235485e-05, + "loss": 1.1531, "step": 9012 }, { - "epoch": 0.25540536711156453, + "epoch": 0.352648877063933, "grad_norm": 0.0, - "learning_rate": 1.7451680128569033e-05, - "loss": 0.9964, + "learning_rate": 1.501937581347581e-05, + "loss": 1.2322, "step": 9013 }, { - "epoch": 0.25543370455382697, + "epoch": 0.35268800375616244, "grad_norm": 0.0, - "learning_rate": 1.7451068044840974e-05, - "loss": 0.9831, + "learning_rate": 1.5018279740111247e-05, + "loss": 0.9839, "step": 9014 }, { - "epoch": 0.2554620419960894, + "epoch": 0.3527271304483919, "grad_norm": 0.0, - "learning_rate": 1.74504558983498e-05, - "loss": 0.9507, + "learning_rate": 1.5017183586159401e-05, + "loss": 1.018, "step": 9015 }, { - "epoch": 0.2554903794383519, + "epoch": 0.3527662571406213, "grad_norm": 0.0, - "learning_rate": 1.744984368910067e-05, - "loss": 0.9868, + "learning_rate": 1.5016087351637874e-05, + "loss": 1.1202, "step": 9016 }, { - "epoch": 0.25551871688061434, + "epoch": 0.35280538383285076, "grad_norm": 0.0, - "learning_rate": 1.744923141709874e-05, - "loss": 0.9454, + "learning_rate": 1.501499103656427e-05, + "loss": 1.097, "step": 9017 }, { - "epoch": 0.25554705432287683, + "epoch": 0.3528445105250802, "grad_norm": 0.0, - "learning_rate": 1.7448619082349166e-05, - "loss": 0.9263, + "learning_rate": 1.5013894640956193e-05, + "loss": 1.183, "step": 9018 }, { - "epoch": 0.25557539176513927, + "epoch": 0.35288363721730964, "grad_norm": 0.0, - "learning_rate": 1.7448006684857108e-05, - "loss": 0.9549, + "learning_rate": 1.501279816483125e-05, + "loss": 1.0076, "step": 9019 }, { - "epoch": 0.25560372920740176, + "epoch": 0.3529227639095391, "grad_norm": 0.0, - "learning_rate": 1.7447394224627725e-05, - "loss": 0.9905, + "learning_rate": 1.5011701608207053e-05, + "loss": 1.0513, "step": 9020 }, { - "epoch": 0.2556320666496642, + "epoch": 0.3529618906017685, "grad_norm": 0.0, - "learning_rate": 1.7446781701666174e-05, - "loss": 0.9181, + "learning_rate": 1.5010604971101206e-05, + "loss": 1.1089, "step": 9021 }, { - "epoch": 0.25566040409192664, + "epoch": 0.35300101729399797, "grad_norm": 0.0, - "learning_rate": 1.7446169115977616e-05, - "loss": 0.9806, + "learning_rate": 1.5009508253531321e-05, + "loss": 1.2885, "step": 9022 }, { - "epoch": 0.25568874153418913, + "epoch": 0.3530401439862274, "grad_norm": 0.0, - "learning_rate": 1.7445556467567212e-05, - "loss": 1.1248, + "learning_rate": 1.500841145551501e-05, + "loss": 1.1013, "step": 9023 }, { - "epoch": 0.2557170789764516, + "epoch": 0.35307927067845685, "grad_norm": 0.0, - "learning_rate": 1.744494375644012e-05, - "loss": 1.0304, + "learning_rate": 1.5007314577069889e-05, + "loss": 1.093, "step": 9024 }, { - "epoch": 0.25574541641871407, + "epoch": 0.3531183973706863, "grad_norm": 0.0, - "learning_rate": 1.74443309826015e-05, - "loss": 1.0296, + "learning_rate": 1.500621761821357e-05, + "loss": 1.159, "step": 9025 }, { - "epoch": 0.2557737538609765, + "epoch": 0.35315752406291573, "grad_norm": 0.0, - "learning_rate": 1.7443718146056517e-05, - "loss": 1.0566, + "learning_rate": 1.500512057896367e-05, + "loss": 1.1206, "step": 9026 }, { - "epoch": 0.25580209130323894, + "epoch": 0.35319665075514517, "grad_norm": 0.0, - "learning_rate": 1.7443105246810333e-05, - "loss": 0.9468, + "learning_rate": 1.5004023459337804e-05, + "loss": 1.1347, "step": 9027 }, { - "epoch": 0.25583042874550144, + "epoch": 0.3532357774473746, "grad_norm": 0.0, - "learning_rate": 1.744249228486811e-05, - "loss": 0.886, + "learning_rate": 1.5002926259353592e-05, + "loss": 1.056, "step": 9028 }, { - "epoch": 0.2558587661877639, + "epoch": 0.35327490413960405, "grad_norm": 0.0, - "learning_rate": 1.744187926023501e-05, - "loss": 0.8913, + "learning_rate": 1.5001828979028652e-05, + "loss": 0.9846, "step": 9029 }, { - "epoch": 0.25588710363002637, + "epoch": 0.3533140308318335, "grad_norm": 0.0, - "learning_rate": 1.7441266172916195e-05, - "loss": 0.9438, + "learning_rate": 1.5000731618380608e-05, + "loss": 1.1287, "step": 9030 }, { - "epoch": 0.2559154410722888, + "epoch": 0.35335315752406293, "grad_norm": 0.0, - "learning_rate": 1.7440653022916834e-05, - "loss": 0.9578, + "learning_rate": 1.4999634177427081e-05, + "loss": 0.9115, "step": 9031 }, { - "epoch": 0.2559437785145513, + "epoch": 0.3533922842162924, "grad_norm": 0.0, - "learning_rate": 1.7440039810242087e-05, - "loss": 1.0214, + "learning_rate": 1.4998536656185693e-05, + "loss": 1.1217, "step": 9032 }, { - "epoch": 0.25597211595681374, + "epoch": 0.3534314109085218, "grad_norm": 0.0, - "learning_rate": 1.7439426534897127e-05, - "loss": 0.89, + "learning_rate": 1.499743905467407e-05, + "loss": 1.1219, "step": 9033 }, { - "epoch": 0.2560004533990762, + "epoch": 0.35347053760075126, "grad_norm": 0.0, - "learning_rate": 1.7438813196887112e-05, - "loss": 1.1207, + "learning_rate": 1.499634137290984e-05, + "loss": 1.1382, "step": 9034 }, { - "epoch": 0.25602879084133867, + "epoch": 0.3535096642929807, "grad_norm": 0.0, - "learning_rate": 1.743819979621721e-05, - "loss": 0.9716, + "learning_rate": 1.4995243610910625e-05, + "loss": 1.1816, "step": 9035 }, { - "epoch": 0.2560571282836011, + "epoch": 0.35354879098521014, "grad_norm": 0.0, - "learning_rate": 1.743758633289259e-05, - "loss": 0.9999, + "learning_rate": 1.4994145768694057e-05, + "loss": 1.175, "step": 9036 }, { - "epoch": 0.2560854657258636, + "epoch": 0.3535879176774395, "grad_norm": 0.0, - "learning_rate": 1.7436972806918418e-05, - "loss": 0.9565, + "learning_rate": 1.4993047846277769e-05, + "loss": 1.0234, "step": 9037 }, { - "epoch": 0.25611380316812604, + "epoch": 0.35362704436966896, "grad_norm": 0.0, - "learning_rate": 1.7436359218299865e-05, - "loss": 0.9341, + "learning_rate": 1.4991949843679388e-05, + "loss": 1.1005, "step": 9038 }, { - "epoch": 0.2561421406103885, + "epoch": 0.3536661710618984, "grad_norm": 0.0, - "learning_rate": 1.7435745567042096e-05, - "loss": 0.9368, + "learning_rate": 1.4990851760916544e-05, + "loss": 1.2023, "step": 9039 }, { - "epoch": 0.256170478052651, + "epoch": 0.35370529775412785, "grad_norm": 0.0, - "learning_rate": 1.7435131853150277e-05, - "loss": 0.9933, + "learning_rate": 1.498975359800688e-05, + "loss": 1.0856, "step": 9040 }, { - "epoch": 0.2561988154949134, + "epoch": 0.3537444244463573, "grad_norm": 0.0, - "learning_rate": 1.7434518076629586e-05, - "loss": 0.9966, + "learning_rate": 1.4988655354968025e-05, + "loss": 1.0915, "step": 9041 }, { - "epoch": 0.2562271529371759, + "epoch": 0.3537835511385867, "grad_norm": 0.0, - "learning_rate": 1.7433904237485186e-05, - "loss": 0.9773, + "learning_rate": 1.4987557031817613e-05, + "loss": 1.1433, "step": 9042 }, { - "epoch": 0.25625549037943834, + "epoch": 0.35382267783081617, "grad_norm": 0.0, - "learning_rate": 1.743329033572225e-05, - "loss": 0.9656, + "learning_rate": 1.4986458628573285e-05, + "loss": 1.2031, "step": 9043 }, { - "epoch": 0.25628382782170084, + "epoch": 0.3538618045230456, "grad_norm": 0.0, - "learning_rate": 1.743267637134595e-05, - "loss": 0.9296, + "learning_rate": 1.4985360145252684e-05, + "loss": 1.1115, "step": 9044 }, { - "epoch": 0.2563121652639633, + "epoch": 0.35390093121527505, "grad_norm": 0.0, - "learning_rate": 1.7432062344361456e-05, - "loss": 1.0559, + "learning_rate": 1.4984261581873442e-05, + "loss": 1.0562, "step": 9045 }, { - "epoch": 0.2563405027062257, + "epoch": 0.3539400579075045, "grad_norm": 0.0, - "learning_rate": 1.7431448254773943e-05, - "loss": 0.9685, + "learning_rate": 1.4983162938453203e-05, + "loss": 1.0098, "step": 9046 }, { - "epoch": 0.2563688401484882, + "epoch": 0.35397918459973393, "grad_norm": 0.0, - "learning_rate": 1.743083410258858e-05, - "loss": 0.8209, + "learning_rate": 1.4982064215009617e-05, + "loss": 1.0624, "step": 9047 }, { - "epoch": 0.25639717759075065, + "epoch": 0.3540183112919634, "grad_norm": 0.0, - "learning_rate": 1.7430219887810543e-05, - "loss": 1.018, + "learning_rate": 1.498096541156032e-05, + "loss": 1.0833, "step": 9048 }, { - "epoch": 0.25642551503301314, + "epoch": 0.3540574379841928, "grad_norm": 0.0, - "learning_rate": 1.7429605610445007e-05, - "loss": 0.9143, + "learning_rate": 1.497986652812296e-05, + "loss": 1.0228, "step": 9049 }, { - "epoch": 0.2564538524752756, + "epoch": 0.35409656467642225, "grad_norm": 0.0, - "learning_rate": 1.742899127049714e-05, - "loss": 1.0023, + "learning_rate": 1.4978767564715185e-05, + "loss": 0.9262, "step": 9050 }, { - "epoch": 0.256482189917538, + "epoch": 0.3541356913686517, "grad_norm": 0.0, - "learning_rate": 1.7428376867972122e-05, - "loss": 0.8602, + "learning_rate": 1.4977668521354639e-05, + "loss": 1.0147, "step": 9051 }, { - "epoch": 0.2565105273598005, + "epoch": 0.35417481806088114, "grad_norm": 0.0, - "learning_rate": 1.7427762402875127e-05, - "loss": 1.0536, + "learning_rate": 1.4976569398058975e-05, + "loss": 1.1767, "step": 9052 }, { - "epoch": 0.25653886480206295, + "epoch": 0.3542139447531106, "grad_norm": 0.0, - "learning_rate": 1.742714787521133e-05, - "loss": 0.9415, + "learning_rate": 1.4975470194845842e-05, + "loss": 1.1422, "step": 9053 }, { - "epoch": 0.25656720224432544, + "epoch": 0.35425307144534, "grad_norm": 0.0, - "learning_rate": 1.7426533284985912e-05, - "loss": 1.0622, + "learning_rate": 1.4974370911732895e-05, + "loss": 1.2357, "step": 9054 }, { - "epoch": 0.2565955396865879, + "epoch": 0.35429219813756946, "grad_norm": 0.0, - "learning_rate": 1.7425918632204044e-05, - "loss": 0.9007, + "learning_rate": 1.497327154873778e-05, + "loss": 1.1083, "step": 9055 }, { - "epoch": 0.2566238771288504, + "epoch": 0.3543313248297989, "grad_norm": 0.0, - "learning_rate": 1.7425303916870907e-05, - "loss": 0.9306, + "learning_rate": 1.4972172105878158e-05, + "loss": 1.0016, "step": 9056 }, { - "epoch": 0.2566522145711128, + "epoch": 0.35437045152202834, "grad_norm": 0.0, - "learning_rate": 1.742468913899168e-05, - "loss": 1.0808, + "learning_rate": 1.4971072583171684e-05, + "loss": 1.1409, "step": 9057 }, { - "epoch": 0.25668055201337525, + "epoch": 0.3544095782142578, "grad_norm": 0.0, - "learning_rate": 1.742407429857153e-05, - "loss": 1.0083, + "learning_rate": 1.4969972980636009e-05, + "loss": 1.0561, "step": 9058 }, { - "epoch": 0.25670888945563775, + "epoch": 0.3544487049064872, "grad_norm": 0.0, - "learning_rate": 1.7423459395615654e-05, - "loss": 0.9928, + "learning_rate": 1.4968873298288801e-05, + "loss": 1.0748, "step": 9059 }, { - "epoch": 0.2567372268979002, + "epoch": 0.35448783159871666, "grad_norm": 0.0, - "learning_rate": 1.742284443012922e-05, - "loss": 0.8564, + "learning_rate": 1.4967773536147712e-05, + "loss": 1.12, "step": 9060 }, { - "epoch": 0.2567655643401627, + "epoch": 0.3545269582909461, "grad_norm": 0.0, - "learning_rate": 1.7422229402117413e-05, - "loss": 1.0451, + "learning_rate": 1.4966673694230406e-05, + "loss": 1.1088, "step": 9061 }, { - "epoch": 0.2567939017824251, + "epoch": 0.35456608498317554, "grad_norm": 0.0, - "learning_rate": 1.7421614311585407e-05, - "loss": 0.9004, + "learning_rate": 1.496557377255454e-05, + "loss": 1.1671, "step": 9062 }, { - "epoch": 0.25682223922468755, + "epoch": 0.354605211675405, "grad_norm": 0.0, - "learning_rate": 1.7420999158538393e-05, - "loss": 1.0042, + "learning_rate": 1.4964473771137784e-05, + "loss": 1.1849, "step": 9063 }, { - "epoch": 0.25685057666695005, + "epoch": 0.3546443383676344, "grad_norm": 0.0, - "learning_rate": 1.7420383942981543e-05, - "loss": 0.8951, + "learning_rate": 1.49633736899978e-05, + "loss": 1.0842, "step": 9064 }, { - "epoch": 0.2568789141092125, + "epoch": 0.3546834650598638, "grad_norm": 0.0, - "learning_rate": 1.741976866492005e-05, - "loss": 0.959, + "learning_rate": 1.4962273529152257e-05, + "loss": 0.9742, "step": 9065 }, { - "epoch": 0.256907251551475, + "epoch": 0.35472259175209325, "grad_norm": 0.0, - "learning_rate": 1.7419153324359082e-05, - "loss": 1.0067, + "learning_rate": 1.4961173288618814e-05, + "loss": 1.1746, "step": 9066 }, { - "epoch": 0.2569355889937374, + "epoch": 0.3547617184443227, "grad_norm": 0.0, - "learning_rate": 1.7418537921303836e-05, - "loss": 1.0269, + "learning_rate": 1.4960072968415146e-05, + "loss": 1.1733, "step": 9067 }, { - "epoch": 0.2569639264359999, + "epoch": 0.35480084513655213, "grad_norm": 0.0, - "learning_rate": 1.741792245575949e-05, - "loss": 1.0088, + "learning_rate": 1.4958972568558925e-05, + "loss": 1.1403, "step": 9068 }, { - "epoch": 0.25699226387826235, + "epoch": 0.3548399718287816, "grad_norm": 0.0, - "learning_rate": 1.7417306927731226e-05, - "loss": 1.087, + "learning_rate": 1.4957872089067815e-05, + "loss": 1.126, "step": 9069 }, { - "epoch": 0.2570206013205248, + "epoch": 0.354879098521011, "grad_norm": 0.0, - "learning_rate": 1.7416691337224234e-05, - "loss": 1.0814, + "learning_rate": 1.4956771529959495e-05, + "loss": 1.2935, "step": 9070 }, { - "epoch": 0.2570489387627873, + "epoch": 0.35491822521324046, "grad_norm": 0.0, - "learning_rate": 1.7416075684243693e-05, - "loss": 0.9808, + "learning_rate": 1.4955670891251633e-05, + "loss": 1.1447, "step": 9071 }, { - "epoch": 0.2570772762050497, + "epoch": 0.3549573519054699, "grad_norm": 0.0, - "learning_rate": 1.7415459968794795e-05, - "loss": 0.9832, + "learning_rate": 1.4954570172961906e-05, + "loss": 1.213, "step": 9072 }, { - "epoch": 0.2571056136473122, + "epoch": 0.35499647859769934, "grad_norm": 0.0, - "learning_rate": 1.7414844190882725e-05, - "loss": 0.8858, + "learning_rate": 1.495346937510799e-05, + "loss": 1.0183, "step": 9073 }, { - "epoch": 0.25713395108957465, + "epoch": 0.3550356052899288, "grad_norm": 0.0, - "learning_rate": 1.741422835051267e-05, - "loss": 1.0367, + "learning_rate": 1.4952368497707566e-05, + "loss": 1.0537, "step": 9074 }, { - "epoch": 0.2571622885318371, + "epoch": 0.3550747319821582, "grad_norm": 0.0, - "learning_rate": 1.7413612447689813e-05, - "loss": 1.0698, + "learning_rate": 1.4951267540778305e-05, + "loss": 1.077, "step": 9075 }, { - "epoch": 0.2571906259740996, + "epoch": 0.35511385867438766, "grad_norm": 0.0, - "learning_rate": 1.7412996482419348e-05, - "loss": 0.9547, + "learning_rate": 1.4950166504337896e-05, + "loss": 1.0878, "step": 9076 }, { - "epoch": 0.257218963416362, + "epoch": 0.3551529853666171, "grad_norm": 0.0, - "learning_rate": 1.7412380454706458e-05, - "loss": 1.0057, + "learning_rate": 1.4949065388404014e-05, + "loss": 1.0773, "step": 9077 }, { - "epoch": 0.2572473008586245, + "epoch": 0.35519211205884654, "grad_norm": 0.0, - "learning_rate": 1.7411764364556336e-05, - "loss": 1.0716, + "learning_rate": 1.4947964192994343e-05, + "loss": 1.0812, "step": 9078 }, { - "epoch": 0.25727563830088696, + "epoch": 0.355231238751076, "grad_norm": 0.0, - "learning_rate": 1.741114821197417e-05, - "loss": 0.9937, + "learning_rate": 1.4946862918126567e-05, + "loss": 1.0517, "step": 9079 }, { - "epoch": 0.25730397574314945, + "epoch": 0.3552703654433054, "grad_norm": 0.0, - "learning_rate": 1.7410531996965152e-05, - "loss": 1.0839, + "learning_rate": 1.4945761563818372e-05, + "loss": 1.1497, "step": 9080 }, { - "epoch": 0.2573323131854119, + "epoch": 0.35530949213553487, "grad_norm": 0.0, - "learning_rate": 1.740991571953447e-05, - "loss": 1.0112, + "learning_rate": 1.4944660130087445e-05, + "loss": 1.1167, "step": 9081 }, { - "epoch": 0.2573606506276743, + "epoch": 0.3553486188277643, "grad_norm": 0.0, - "learning_rate": 1.7409299379687316e-05, - "loss": 1.0668, + "learning_rate": 1.494355861695147e-05, + "loss": 1.1698, "step": 9082 }, { - "epoch": 0.2573889880699368, + "epoch": 0.35538774551999375, "grad_norm": 0.0, - "learning_rate": 1.7408682977428884e-05, - "loss": 1.0506, + "learning_rate": 1.494245702442814e-05, + "loss": 0.9946, "step": 9083 }, { - "epoch": 0.25741732551219926, + "epoch": 0.3554268722122232, "grad_norm": 0.0, - "learning_rate": 1.7408066512764365e-05, - "loss": 1.01, + "learning_rate": 1.4941355352535142e-05, + "loss": 1.0573, "step": 9084 }, { - "epoch": 0.25744566295446175, + "epoch": 0.35546599890445263, "grad_norm": 0.0, - "learning_rate": 1.740744998569895e-05, - "loss": 1.0195, + "learning_rate": 1.4940253601290171e-05, + "loss": 1.1801, "step": 9085 }, { - "epoch": 0.2574740003967242, + "epoch": 0.35550512559668207, "grad_norm": 0.0, - "learning_rate": 1.740683339623783e-05, - "loss": 0.9845, + "learning_rate": 1.4939151770710915e-05, + "loss": 1.1784, "step": 9086 }, { - "epoch": 0.25750233783898663, + "epoch": 0.3555442522889115, "grad_norm": 0.0, - "learning_rate": 1.7406216744386205e-05, - "loss": 0.9548, + "learning_rate": 1.4938049860815072e-05, + "loss": 1.0785, "step": 9087 }, { - "epoch": 0.2575306752812491, + "epoch": 0.35558337898114095, "grad_norm": 0.0, - "learning_rate": 1.7405600030149262e-05, - "loss": 1.068, + "learning_rate": 1.4936947871620338e-05, + "loss": 1.1053, "step": 9088 }, { - "epoch": 0.25755901272351156, + "epoch": 0.3556225056733704, "grad_norm": 0.0, - "learning_rate": 1.7404983253532205e-05, - "loss": 0.9435, + "learning_rate": 1.4935845803144404e-05, + "loss": 1.0602, "step": 9089 }, { - "epoch": 0.25758735016577405, + "epoch": 0.35566163236559983, "grad_norm": 0.0, - "learning_rate": 1.740436641454022e-05, - "loss": 1.0555, + "learning_rate": 1.4934743655404972e-05, + "loss": 1.1323, "step": 9090 }, { - "epoch": 0.2576156876080365, + "epoch": 0.3557007590578293, "grad_norm": 0.0, - "learning_rate": 1.740374951317851e-05, - "loss": 1.0515, + "learning_rate": 1.493364142841974e-05, + "loss": 1.1973, "step": 9091 }, { - "epoch": 0.257644025050299, + "epoch": 0.3557398857500587, "grad_norm": 0.0, - "learning_rate": 1.740313254945227e-05, - "loss": 0.9739, + "learning_rate": 1.493253912220641e-05, + "loss": 1.1225, "step": 9092 }, { - "epoch": 0.2576723624925614, + "epoch": 0.35577901244228816, "grad_norm": 0.0, - "learning_rate": 1.7402515523366692e-05, - "loss": 0.9779, + "learning_rate": 1.4931436736782682e-05, + "loss": 1.1642, "step": 9093 }, { - "epoch": 0.25770069993482386, + "epoch": 0.35581813913451754, "grad_norm": 0.0, - "learning_rate": 1.7401898434926978e-05, - "loss": 1.0018, + "learning_rate": 1.4930334272166263e-05, + "loss": 1.1396, "step": 9094 }, { - "epoch": 0.25772903737708636, + "epoch": 0.355857265826747, "grad_norm": 0.0, - "learning_rate": 1.7401281284138324e-05, - "loss": 1.0423, + "learning_rate": 1.4929231728374847e-05, + "loss": 0.9605, "step": 9095 }, { - "epoch": 0.2577573748193488, + "epoch": 0.3558963925189764, "grad_norm": 0.0, - "learning_rate": 1.740066407100593e-05, - "loss": 0.9472, + "learning_rate": 1.492812910542615e-05, + "loss": 1.032, "step": 9096 }, { - "epoch": 0.2577857122616113, + "epoch": 0.35593551921120586, "grad_norm": 0.0, - "learning_rate": 1.7400046795534996e-05, - "loss": 0.9726, + "learning_rate": 1.4927026403337876e-05, + "loss": 1.0682, "step": 9097 }, { - "epoch": 0.2578140497038737, + "epoch": 0.3559746459034353, "grad_norm": 0.0, - "learning_rate": 1.739942945773072e-05, - "loss": 0.9385, + "learning_rate": 1.492592362212773e-05, + "loss": 1.1571, "step": 9098 }, { - "epoch": 0.25784238714613616, + "epoch": 0.35601377259566475, "grad_norm": 0.0, - "learning_rate": 1.73988120575983e-05, - "loss": 0.9352, + "learning_rate": 1.4924820761813426e-05, + "loss": 1.0376, "step": 9099 }, { - "epoch": 0.25787072458839866, + "epoch": 0.3560528992878942, "grad_norm": 0.0, - "learning_rate": 1.739819459514294e-05, - "loss": 1.017, + "learning_rate": 1.4923717822412666e-05, + "loss": 1.1838, "step": 9100 }, { - "epoch": 0.2578990620306611, + "epoch": 0.3560920259801236, "grad_norm": 0.0, - "learning_rate": 1.739757707036984e-05, - "loss": 0.9027, + "learning_rate": 1.4922614803943172e-05, + "loss": 1.0459, "step": 9101 }, { - "epoch": 0.2579273994729236, + "epoch": 0.35613115267235307, "grad_norm": 0.0, - "learning_rate": 1.7396959483284197e-05, - "loss": 1.025, + "learning_rate": 1.4921511706422652e-05, + "loss": 1.0875, "step": 9102 }, { - "epoch": 0.25795573691518603, + "epoch": 0.3561702793645825, "grad_norm": 0.0, - "learning_rate": 1.7396341833891225e-05, - "loss": 0.9938, + "learning_rate": 1.492040852986882e-05, + "loss": 1.0823, "step": 9103 }, { - "epoch": 0.2579840743574485, + "epoch": 0.35620940605681195, "grad_norm": 0.0, - "learning_rate": 1.7395724122196113e-05, - "loss": 0.9541, + "learning_rate": 1.4919305274299392e-05, + "loss": 1.0005, "step": 9104 }, { - "epoch": 0.25801241179971096, + "epoch": 0.3562485327490414, "grad_norm": 0.0, - "learning_rate": 1.7395106348204073e-05, - "loss": 1.0759, + "learning_rate": 1.4918201939732087e-05, + "loss": 1.1624, "step": 9105 }, { - "epoch": 0.2580407492419734, + "epoch": 0.35628765944127083, "grad_norm": 0.0, - "learning_rate": 1.73944885119203e-05, - "loss": 0.9877, + "learning_rate": 1.4917098526184623e-05, + "loss": 0.9651, "step": 9106 }, { - "epoch": 0.2580690866842359, + "epoch": 0.3563267861335003, "grad_norm": 0.0, - "learning_rate": 1.7393870613350012e-05, - "loss": 0.9213, + "learning_rate": 1.4915995033674715e-05, + "loss": 1.0938, "step": 9107 }, { - "epoch": 0.25809742412649833, + "epoch": 0.3563659128257297, "grad_norm": 0.0, - "learning_rate": 1.7393252652498404e-05, - "loss": 1.0615, + "learning_rate": 1.491489146222009e-05, + "loss": 1.1408, "step": 9108 }, { - "epoch": 0.2581257615687608, + "epoch": 0.35640503951795915, "grad_norm": 0.0, - "learning_rate": 1.7392634629370684e-05, - "loss": 0.8821, + "learning_rate": 1.4913787811838463e-05, + "loss": 0.9813, "step": 9109 }, { - "epoch": 0.25815409901102326, + "epoch": 0.3564441662101886, "grad_norm": 0.0, - "learning_rate": 1.7392016543972056e-05, - "loss": 0.9365, + "learning_rate": 1.4912684082547564e-05, + "loss": 1.103, "step": 9110 }, { - "epoch": 0.2581824364532857, + "epoch": 0.35648329290241804, "grad_norm": 0.0, - "learning_rate": 1.7391398396307728e-05, - "loss": 0.9921, + "learning_rate": 1.4911580274365112e-05, + "loss": 1.0804, "step": 9111 }, { - "epoch": 0.2582107738955482, + "epoch": 0.3565224195946475, "grad_norm": 0.0, - "learning_rate": 1.7390780186382907e-05, - "loss": 0.9072, + "learning_rate": 1.4910476387308839e-05, + "loss": 0.9644, "step": 9112 }, { - "epoch": 0.25823911133781063, + "epoch": 0.3565615462868769, "grad_norm": 0.0, - "learning_rate": 1.73901619142028e-05, - "loss": 0.9333, + "learning_rate": 1.4909372421396464e-05, + "loss": 1.1732, "step": 9113 }, { - "epoch": 0.25826744878007313, + "epoch": 0.35660067297910636, "grad_norm": 0.0, - "learning_rate": 1.7389543579772613e-05, - "loss": 0.9679, + "learning_rate": 1.4908268376645723e-05, + "loss": 1.1055, "step": 9114 }, { - "epoch": 0.25829578622233557, + "epoch": 0.3566397996713358, "grad_norm": 0.0, - "learning_rate": 1.738892518309756e-05, - "loss": 0.9666, + "learning_rate": 1.4907164253074342e-05, + "loss": 1.0409, "step": 9115 }, { - "epoch": 0.25832412366459806, + "epoch": 0.35667892636356524, "grad_norm": 0.0, - "learning_rate": 1.7388306724182847e-05, - "loss": 0.914, + "learning_rate": 1.4906060050700052e-05, + "loss": 1.0679, "step": 9116 }, { - "epoch": 0.2583524611068605, + "epoch": 0.3567180530557947, "grad_norm": 0.0, - "learning_rate": 1.738768820303368e-05, - "loss": 0.8883, + "learning_rate": 1.4904955769540585e-05, + "loss": 1.128, "step": 9117 }, { - "epoch": 0.25838079854912294, + "epoch": 0.3567571797480241, "grad_norm": 0.0, - "learning_rate": 1.738706961965527e-05, - "loss": 1.0633, + "learning_rate": 1.4903851409613674e-05, + "loss": 1.1481, "step": 9118 }, { - "epoch": 0.25840913599138543, + "epoch": 0.35679630644025356, "grad_norm": 0.0, - "learning_rate": 1.7386450974052836e-05, - "loss": 0.8596, + "learning_rate": 1.4902746970937053e-05, + "loss": 1.0767, "step": 9119 }, { - "epoch": 0.25843747343364787, + "epoch": 0.356835433132483, "grad_norm": 0.0, - "learning_rate": 1.7385832266231576e-05, - "loss": 0.9704, + "learning_rate": 1.4901642453528462e-05, + "loss": 1.131, "step": 9120 }, { - "epoch": 0.25846581087591036, + "epoch": 0.35687455982471245, "grad_norm": 0.0, - "learning_rate": 1.738521349619671e-05, - "loss": 0.8651, + "learning_rate": 1.4900537857405635e-05, + "loss": 1.1819, "step": 9121 }, { - "epoch": 0.2584941483181728, + "epoch": 0.35691368651694183, "grad_norm": 0.0, - "learning_rate": 1.738459466395345e-05, - "loss": 1.0451, + "learning_rate": 1.4899433182586307e-05, + "loss": 1.1086, "step": 9122 }, { - "epoch": 0.25852248576043524, + "epoch": 0.35695281320917127, "grad_norm": 0.0, - "learning_rate": 1.7383975769507006e-05, - "loss": 1.058, + "learning_rate": 1.4898328429088227e-05, + "loss": 1.1031, "step": 9123 }, { - "epoch": 0.25855082320269773, + "epoch": 0.3569919399014007, "grad_norm": 0.0, - "learning_rate": 1.7383356812862595e-05, - "loss": 0.8834, + "learning_rate": 1.4897223596929127e-05, + "loss": 1.1606, "step": 9124 }, { - "epoch": 0.25857916064496017, + "epoch": 0.35703106659363015, "grad_norm": 0.0, - "learning_rate": 1.7382737794025422e-05, - "loss": 0.867, + "learning_rate": 1.4896118686126752e-05, + "loss": 1.0609, "step": 9125 }, { - "epoch": 0.25860749808722266, + "epoch": 0.3570701932858596, "grad_norm": 0.0, - "learning_rate": 1.738211871300071e-05, - "loss": 1.04, + "learning_rate": 1.4895013696698847e-05, + "loss": 1.0718, "step": 9126 }, { - "epoch": 0.2586358355294851, + "epoch": 0.35710931997808903, "grad_norm": 0.0, - "learning_rate": 1.738149956979367e-05, - "loss": 0.9688, + "learning_rate": 1.489390862866316e-05, + "loss": 1.0566, "step": 9127 }, { - "epoch": 0.2586641729717476, + "epoch": 0.3571484466703185, "grad_norm": 0.0, - "learning_rate": 1.738088036440952e-05, - "loss": 0.8956, + "learning_rate": 1.4892803482037425e-05, + "loss": 1.0907, "step": 9128 }, { - "epoch": 0.25869251041401004, + "epoch": 0.3571875733625479, "grad_norm": 0.0, - "learning_rate": 1.738026109685347e-05, - "loss": 0.8867, + "learning_rate": 1.4891698256839402e-05, + "loss": 1.1147, "step": 9129 }, { - "epoch": 0.2587208478562725, + "epoch": 0.35722670005477736, "grad_norm": 0.0, - "learning_rate": 1.7379641767130745e-05, - "loss": 1.0948, + "learning_rate": 1.4890592953086835e-05, + "loss": 1.11, "step": 9130 }, { - "epoch": 0.25874918529853497, + "epoch": 0.3572658267470068, "grad_norm": 0.0, - "learning_rate": 1.7379022375246554e-05, - "loss": 0.9422, + "learning_rate": 1.4889487570797471e-05, + "loss": 1.2126, "step": 9131 }, { - "epoch": 0.2587775227407974, + "epoch": 0.35730495343923624, "grad_norm": 0.0, - "learning_rate": 1.737840292120612e-05, - "loss": 0.7953, + "learning_rate": 1.4888382109989065e-05, + "loss": 1.3104, "step": 9132 }, { - "epoch": 0.2588058601830599, + "epoch": 0.3573440801314657, "grad_norm": 0.0, - "learning_rate": 1.7377783405014653e-05, - "loss": 1.0057, + "learning_rate": 1.4887276570679368e-05, + "loss": 0.9772, "step": 9133 }, { - "epoch": 0.25883419762532234, + "epoch": 0.3573832068236951, "grad_norm": 0.0, - "learning_rate": 1.7377163826677383e-05, - "loss": 0.9861, + "learning_rate": 1.4886170952886132e-05, + "loss": 0.9851, "step": 9134 }, { - "epoch": 0.2588625350675848, + "epoch": 0.35742233351592456, "grad_norm": 0.0, - "learning_rate": 1.7376544186199518e-05, - "loss": 0.9852, + "learning_rate": 1.4885065256627115e-05, + "loss": 0.9661, "step": 9135 }, { - "epoch": 0.25889087250984727, + "epoch": 0.357461460208154, "grad_norm": 0.0, - "learning_rate": 1.7375924483586285e-05, - "loss": 1.0344, + "learning_rate": 1.488395948192007e-05, + "loss": 1.167, "step": 9136 }, { - "epoch": 0.2589192099521097, + "epoch": 0.35750058690038344, "grad_norm": 0.0, - "learning_rate": 1.73753047188429e-05, - "loss": 0.918, + "learning_rate": 1.4882853628782756e-05, + "loss": 1.0837, "step": 9137 }, { - "epoch": 0.2589475473943722, + "epoch": 0.3575397135926129, "grad_norm": 0.0, - "learning_rate": 1.7374684891974585e-05, - "loss": 1.0081, + "learning_rate": 1.4881747697232931e-05, + "loss": 1.0201, "step": 9138 }, { - "epoch": 0.25897588483663464, + "epoch": 0.3575788402848423, "grad_norm": 0.0, - "learning_rate": 1.737406500298656e-05, - "loss": 0.9615, + "learning_rate": 1.4880641687288356e-05, + "loss": 1.1738, "step": 9139 }, { - "epoch": 0.25900422227889713, + "epoch": 0.35761796697707177, "grad_norm": 0.0, - "learning_rate": 1.737344505188405e-05, - "loss": 0.9426, + "learning_rate": 1.487953559896679e-05, + "loss": 1.1015, "step": 9140 }, { - "epoch": 0.25903255972115957, + "epoch": 0.3576570936693012, "grad_norm": 0.0, - "learning_rate": 1.737282503867227e-05, - "loss": 0.8767, + "learning_rate": 1.4878429432285996e-05, + "loss": 1.0915, "step": 9141 }, { - "epoch": 0.259060897163422, + "epoch": 0.35769622036153065, "grad_norm": 0.0, - "learning_rate": 1.737220496335645e-05, - "loss": 0.894, + "learning_rate": 1.4877323187263738e-05, + "loss": 1.0241, "step": 9142 }, { - "epoch": 0.2590892346056845, + "epoch": 0.3577353470537601, "grad_norm": 0.0, - "learning_rate": 1.7371584825941808e-05, - "loss": 0.996, + "learning_rate": 1.4876216863917785e-05, + "loss": 1.2347, "step": 9143 }, { - "epoch": 0.25911757204794694, + "epoch": 0.35777447374598953, "grad_norm": 0.0, - "learning_rate": 1.737096462643357e-05, - "loss": 1.0552, + "learning_rate": 1.4875110462265898e-05, + "loss": 0.9811, "step": 9144 }, { - "epoch": 0.25914590949020944, + "epoch": 0.35781360043821897, "grad_norm": 0.0, - "learning_rate": 1.737034436483696e-05, - "loss": 1.1226, + "learning_rate": 1.4874003982325844e-05, + "loss": 1.1505, "step": 9145 }, { - "epoch": 0.2591742469324719, + "epoch": 0.3578527271304484, "grad_norm": 0.0, - "learning_rate": 1.7369724041157202e-05, - "loss": 0.873, + "learning_rate": 1.4872897424115395e-05, + "loss": 1.1487, "step": 9146 }, { - "epoch": 0.2592025843747343, + "epoch": 0.35789185382267785, "grad_norm": 0.0, - "learning_rate": 1.7369103655399523e-05, - "loss": 0.9501, + "learning_rate": 1.4871790787652319e-05, + "loss": 0.917, "step": 9147 }, { - "epoch": 0.2592309218169968, + "epoch": 0.3579309805149073, "grad_norm": 0.0, - "learning_rate": 1.7368483207569146e-05, - "loss": 1.0538, + "learning_rate": 1.4870684072954388e-05, + "loss": 1.0352, "step": 9148 }, { - "epoch": 0.25925925925925924, + "epoch": 0.35797010720713673, "grad_norm": 0.0, - "learning_rate": 1.73678626976713e-05, - "loss": 0.9276, + "learning_rate": 1.4869577280039373e-05, + "loss": 1.1358, "step": 9149 }, { - "epoch": 0.25928759670152174, + "epoch": 0.3580092338993662, "grad_norm": 0.0, - "learning_rate": 1.736724212571121e-05, - "loss": 0.9482, + "learning_rate": 1.4868470408925052e-05, + "loss": 1.0979, "step": 9150 }, { - "epoch": 0.2593159341437842, + "epoch": 0.35804836059159556, "grad_norm": 0.0, - "learning_rate": 1.7366621491694103e-05, - "loss": 0.9543, + "learning_rate": 1.4867363459629191e-05, + "loss": 1.0747, "step": 9151 }, { - "epoch": 0.25934427158604667, + "epoch": 0.358087487283825, "grad_norm": 0.0, - "learning_rate": 1.736600079562521e-05, - "loss": 0.863, + "learning_rate": 1.4866256432169577e-05, + "loss": 1.0268, "step": 9152 }, { - "epoch": 0.2593726090283091, + "epoch": 0.35812661397605444, "grad_norm": 0.0, - "learning_rate": 1.7365380037509756e-05, - "loss": 0.9523, + "learning_rate": 1.486514932656398e-05, + "loss": 1.0005, "step": 9153 }, { - "epoch": 0.25940094647057155, + "epoch": 0.3581657406682839, "grad_norm": 0.0, - "learning_rate": 1.736475921735297e-05, - "loss": 0.88, + "learning_rate": 1.4864042142830184e-05, + "loss": 1.0773, "step": 9154 }, { - "epoch": 0.25942928391283404, + "epoch": 0.3582048673605133, "grad_norm": 0.0, - "learning_rate": 1.736413833516008e-05, - "loss": 0.9609, + "learning_rate": 1.4862934880985964e-05, + "loss": 1.1484, "step": 9155 }, { - "epoch": 0.2594576213550965, + "epoch": 0.35824399405274276, "grad_norm": 0.0, - "learning_rate": 1.736351739093632e-05, - "loss": 0.9688, + "learning_rate": 1.4861827541049103e-05, + "loss": 1.1707, "step": 9156 }, { - "epoch": 0.259485958797359, + "epoch": 0.3582831207449722, "grad_norm": 0.0, - "learning_rate": 1.736289638468692e-05, - "loss": 0.9351, + "learning_rate": 1.4860720123037385e-05, + "loss": 1.1158, "step": 9157 }, { - "epoch": 0.2595142962396214, + "epoch": 0.35832224743720165, "grad_norm": 0.0, - "learning_rate": 1.7362275316417112e-05, - "loss": 0.9114, + "learning_rate": 1.4859612626968592e-05, + "loss": 1.0834, "step": 9158 }, { - "epoch": 0.25954263368188385, + "epoch": 0.3583613741294311, "grad_norm": 0.0, - "learning_rate": 1.736165418613212e-05, - "loss": 0.9713, + "learning_rate": 1.485850505286051e-05, + "loss": 1.0704, "step": 9159 }, { - "epoch": 0.25957097112414634, + "epoch": 0.35840050082166053, "grad_norm": 0.0, - "learning_rate": 1.7361032993837184e-05, - "loss": 1.0672, + "learning_rate": 1.4857397400730924e-05, + "loss": 1.0168, "step": 9160 }, { - "epoch": 0.2595993085664088, + "epoch": 0.35843962751388997, "grad_norm": 0.0, - "learning_rate": 1.7360411739537535e-05, - "loss": 0.9768, + "learning_rate": 1.4856289670597623e-05, + "loss": 1.1714, "step": 9161 }, { - "epoch": 0.2596276460086713, + "epoch": 0.3584787542061194, "grad_norm": 0.0, - "learning_rate": 1.73597904232384e-05, - "loss": 1.0385, + "learning_rate": 1.4855181862478394e-05, + "loss": 1.0978, "step": 9162 }, { - "epoch": 0.2596559834509337, + "epoch": 0.35851788089834885, "grad_norm": 0.0, - "learning_rate": 1.735916904494502e-05, - "loss": 0.8051, + "learning_rate": 1.485407397639103e-05, + "loss": 1.1381, "step": 9163 }, { - "epoch": 0.2596843208931962, + "epoch": 0.3585570075905783, "grad_norm": 0.0, - "learning_rate": 1.7358547604662626e-05, - "loss": 0.9457, + "learning_rate": 1.485296601235332e-05, + "loss": 1.1342, "step": 9164 }, { - "epoch": 0.25971265833545865, + "epoch": 0.35859613428280773, "grad_norm": 0.0, - "learning_rate": 1.7357926102396454e-05, - "loss": 0.9761, + "learning_rate": 1.4851857970383057e-05, + "loss": 0.9924, "step": 9165 }, { - "epoch": 0.2597409957777211, + "epoch": 0.3586352609750372, "grad_norm": 0.0, - "learning_rate": 1.735730453815174e-05, - "loss": 0.9784, + "learning_rate": 1.4850749850498036e-05, + "loss": 1.1281, "step": 9166 }, { - "epoch": 0.2597693332199836, + "epoch": 0.3586743876672666, "grad_norm": 0.0, - "learning_rate": 1.7356682911933713e-05, - "loss": 1.0373, + "learning_rate": 1.4849641652716047e-05, + "loss": 1.0837, "step": 9167 }, { - "epoch": 0.259797670662246, + "epoch": 0.35871351435949606, "grad_norm": 0.0, - "learning_rate": 1.7356061223747617e-05, - "loss": 0.9261, + "learning_rate": 1.4848533377054892e-05, + "loss": 1.0869, "step": 9168 }, { - "epoch": 0.2598260081045085, + "epoch": 0.3587526410517255, "grad_norm": 0.0, - "learning_rate": 1.7355439473598682e-05, - "loss": 1.0622, + "learning_rate": 1.4847425023532369e-05, + "loss": 1.1458, "step": 9169 }, { - "epoch": 0.25985434554677095, + "epoch": 0.35879176774395494, "grad_norm": 0.0, - "learning_rate": 1.7354817661492154e-05, - "loss": 1.0176, + "learning_rate": 1.484631659216627e-05, + "loss": 1.1738, "step": 9170 }, { - "epoch": 0.2598826829890334, + "epoch": 0.3588308944361844, "grad_norm": 0.0, - "learning_rate": 1.7354195787433263e-05, - "loss": 1.0879, + "learning_rate": 1.4845208082974402e-05, + "loss": 1.1767, "step": 9171 }, { - "epoch": 0.2599110204312959, + "epoch": 0.3588700211284138, "grad_norm": 0.0, - "learning_rate": 1.735357385142725e-05, - "loss": 0.8988, + "learning_rate": 1.4844099495974565e-05, + "loss": 1.0643, "step": 9172 }, { - "epoch": 0.2599393578735583, + "epoch": 0.35890914782064326, "grad_norm": 0.0, - "learning_rate": 1.7352951853479357e-05, - "loss": 1.015, + "learning_rate": 1.4842990831184559e-05, + "loss": 1.0597, "step": 9173 }, { - "epoch": 0.2599676953158208, + "epoch": 0.3589482745128727, "grad_norm": 0.0, - "learning_rate": 1.7352329793594817e-05, - "loss": 0.9812, + "learning_rate": 1.4841882088622186e-05, + "loss": 1.0092, "step": 9174 }, { - "epoch": 0.25999603275808325, + "epoch": 0.35898740120510214, "grad_norm": 0.0, - "learning_rate": 1.7351707671778874e-05, - "loss": 1.0191, + "learning_rate": 1.4840773268305258e-05, + "loss": 1.1207, "step": 9175 }, { - "epoch": 0.2600243702003457, + "epoch": 0.3590265278973316, "grad_norm": 0.0, - "learning_rate": 1.7351085488036762e-05, - "loss": 0.9102, + "learning_rate": 1.4839664370251572e-05, + "loss": 1.0182, "step": 9176 }, { - "epoch": 0.2600527076426082, + "epoch": 0.359065654589561, "grad_norm": 0.0, - "learning_rate": 1.7350463242373733e-05, - "loss": 1.0139, + "learning_rate": 1.4838555394478947e-05, + "loss": 1.0842, "step": 9177 }, { - "epoch": 0.2600810450848706, + "epoch": 0.35910478128179046, "grad_norm": 0.0, - "learning_rate": 1.7349840934795024e-05, - "loss": 0.9798, + "learning_rate": 1.4837446341005179e-05, + "loss": 1.0501, "step": 9178 }, { - "epoch": 0.2601093825271331, + "epoch": 0.35914390797401985, "grad_norm": 0.0, - "learning_rate": 1.734921856530587e-05, - "loss": 0.9753, + "learning_rate": 1.4836337209848088e-05, + "loss": 1.0797, "step": 9179 }, { - "epoch": 0.26013771996939555, + "epoch": 0.3591830346662493, "grad_norm": 0.0, - "learning_rate": 1.7348596133911522e-05, - "loss": 0.976, + "learning_rate": 1.483522800102548e-05, + "loss": 1.0309, "step": 9180 }, { - "epoch": 0.26016605741165805, + "epoch": 0.35922216135847873, "grad_norm": 0.0, - "learning_rate": 1.7347973640617222e-05, - "loss": 1.0867, + "learning_rate": 1.4834118714555172e-05, + "loss": 1.1395, "step": 9181 }, { - "epoch": 0.2601943948539205, + "epoch": 0.35926128805070817, "grad_norm": 0.0, - "learning_rate": 1.7347351085428208e-05, - "loss": 1.0363, + "learning_rate": 1.4833009350454972e-05, + "loss": 1.0707, "step": 9182 }, { - "epoch": 0.2602227322961829, + "epoch": 0.3593004147429376, "grad_norm": 0.0, - "learning_rate": 1.734672846834973e-05, - "loss": 0.9311, + "learning_rate": 1.4831899908742699e-05, + "loss": 1.0233, "step": 9183 }, { - "epoch": 0.2602510697384454, + "epoch": 0.35933954143516705, "grad_norm": 0.0, - "learning_rate": 1.734610578938703e-05, - "loss": 1.0313, + "learning_rate": 1.4830790389436169e-05, + "loss": 1.2071, "step": 9184 }, { - "epoch": 0.26027940718070786, + "epoch": 0.3593786681273965, "grad_norm": 0.0, - "learning_rate": 1.7345483048545347e-05, - "loss": 1.0155, + "learning_rate": 1.4829680792553198e-05, + "loss": 1.1226, "step": 9185 }, { - "epoch": 0.26030774462297035, + "epoch": 0.35941779481962594, "grad_norm": 0.0, - "learning_rate": 1.734486024582994e-05, - "loss": 0.9562, + "learning_rate": 1.4828571118111605e-05, + "loss": 1.2007, "step": 9186 }, { - "epoch": 0.2603360820652328, + "epoch": 0.3594569215118554, "grad_norm": 0.0, - "learning_rate": 1.7344237381246043e-05, - "loss": 1.0263, + "learning_rate": 1.482746136612921e-05, + "loss": 1.045, "step": 9187 }, { - "epoch": 0.2603644195074952, + "epoch": 0.3594960482040848, "grad_norm": 0.0, - "learning_rate": 1.734361445479891e-05, - "loss": 1.0244, + "learning_rate": 1.4826351536623838e-05, + "loss": 1.1286, "step": 9188 }, { - "epoch": 0.2603927569497577, + "epoch": 0.35953517489631426, "grad_norm": 0.0, - "learning_rate": 1.7342991466493785e-05, - "loss": 1.0589, + "learning_rate": 1.4825241629613304e-05, + "loss": 1.2228, "step": 9189 }, { - "epoch": 0.26042109439202016, + "epoch": 0.3595743015885437, "grad_norm": 0.0, - "learning_rate": 1.7342368416335915e-05, - "loss": 0.9329, + "learning_rate": 1.4824131645115438e-05, + "loss": 1.109, "step": 9190 }, { - "epoch": 0.26044943183428265, + "epoch": 0.35961342828077314, "grad_norm": 0.0, - "learning_rate": 1.734174530433055e-05, - "loss": 0.9724, + "learning_rate": 1.482302158314806e-05, + "loss": 1.0378, "step": 9191 }, { - "epoch": 0.2604777692765451, + "epoch": 0.3596525549730026, "grad_norm": 0.0, - "learning_rate": 1.7341122130482938e-05, - "loss": 0.9149, + "learning_rate": 1.4821911443729002e-05, + "loss": 1.1537, "step": 9192 }, { - "epoch": 0.2605061067188076, + "epoch": 0.359691681665232, "grad_norm": 0.0, - "learning_rate": 1.7340498894798327e-05, - "loss": 0.9275, + "learning_rate": 1.482080122687609e-05, + "loss": 1.2168, "step": 9193 }, { - "epoch": 0.26053444416107, + "epoch": 0.35973080835746146, "grad_norm": 0.0, - "learning_rate": 1.733987559728197e-05, - "loss": 0.9222, + "learning_rate": 1.4819690932607145e-05, + "loss": 1.2112, "step": 9194 }, { - "epoch": 0.26056278160333246, + "epoch": 0.3597699350496909, "grad_norm": 0.0, - "learning_rate": 1.7339252237939118e-05, - "loss": 0.9882, + "learning_rate": 1.4818580560940008e-05, + "loss": 0.9993, "step": 9195 }, { - "epoch": 0.26059111904559495, + "epoch": 0.35980906174192034, "grad_norm": 0.0, - "learning_rate": 1.7338628816775013e-05, - "loss": 1.0189, + "learning_rate": 1.4817470111892503e-05, + "loss": 1.0721, "step": 9196 }, { - "epoch": 0.2606194564878574, + "epoch": 0.3598481884341498, "grad_norm": 0.0, - "learning_rate": 1.7338005333794915e-05, - "loss": 0.9829, + "learning_rate": 1.4816359585482465e-05, + "loss": 1.113, "step": 9197 }, { - "epoch": 0.2606477939301199, + "epoch": 0.3598873151263792, "grad_norm": 0.0, - "learning_rate": 1.7337381789004074e-05, - "loss": 1.02, + "learning_rate": 1.4815248981727728e-05, + "loss": 1.106, "step": 9198 }, { - "epoch": 0.2606761313723823, + "epoch": 0.35992644181860867, "grad_norm": 0.0, - "learning_rate": 1.733675818240774e-05, - "loss": 1.0941, + "learning_rate": 1.4814138300646127e-05, + "loss": 1.2255, "step": 9199 }, { - "epoch": 0.26070446881464476, + "epoch": 0.3599655685108381, "grad_norm": 0.0, - "learning_rate": 1.7336134514011168e-05, - "loss": 0.8962, + "learning_rate": 1.4813027542255494e-05, + "loss": 1.1519, "step": 9200 }, { - "epoch": 0.26073280625690726, + "epoch": 0.36000469520306755, "grad_norm": 0.0, - "learning_rate": 1.733551078381961e-05, - "loss": 0.9013, + "learning_rate": 1.4811916706573673e-05, + "loss": 1.0052, "step": 9201 }, { - "epoch": 0.2607611436991697, + "epoch": 0.360043821895297, "grad_norm": 0.0, - "learning_rate": 1.7334886991838323e-05, - "loss": 0.9768, + "learning_rate": 1.4810805793618498e-05, + "loss": 1.0732, "step": 9202 }, { - "epoch": 0.2607894811414322, + "epoch": 0.36008294858752643, "grad_norm": 0.0, - "learning_rate": 1.7334263138072557e-05, - "loss": 0.9364, + "learning_rate": 1.480969480340781e-05, + "loss": 1.1018, "step": 9203 }, { - "epoch": 0.2608178185836946, + "epoch": 0.36012207527975587, "grad_norm": 0.0, - "learning_rate": 1.7333639222527572e-05, - "loss": 1.0043, + "learning_rate": 1.4808583735959453e-05, + "loss": 1.0652, "step": 9204 }, { - "epoch": 0.2608461560259571, + "epoch": 0.3601612019719853, "grad_norm": 0.0, - "learning_rate": 1.7333015245208614e-05, - "loss": 1.0666, + "learning_rate": 1.4807472591291263e-05, + "loss": 1.1201, "step": 9205 }, { - "epoch": 0.26087449346821956, + "epoch": 0.36020032866421475, "grad_norm": 0.0, - "learning_rate": 1.7332391206120954e-05, - "loss": 0.9644, + "learning_rate": 1.480636136942109e-05, + "loss": 1.1796, "step": 9206 }, { - "epoch": 0.260902830910482, + "epoch": 0.3602394553564442, "grad_norm": 0.0, - "learning_rate": 1.7331767105269833e-05, - "loss": 0.9835, + "learning_rate": 1.4805250070366773e-05, + "loss": 1.0705, "step": 9207 }, { - "epoch": 0.2609311683527445, + "epoch": 0.3602785820486736, "grad_norm": 0.0, - "learning_rate": 1.733114294266052e-05, - "loss": 0.9171, + "learning_rate": 1.4804138694146163e-05, + "loss": 1.1555, "step": 9208 }, { - "epoch": 0.26095950579500693, + "epoch": 0.360317708740903, "grad_norm": 0.0, - "learning_rate": 1.7330518718298263e-05, - "loss": 1.0244, + "learning_rate": 1.4803027240777104e-05, + "loss": 1.1594, "step": 9209 }, { - "epoch": 0.2609878432372694, + "epoch": 0.36035683543313246, "grad_norm": 0.0, - "learning_rate": 1.7329894432188328e-05, - "loss": 1.0729, + "learning_rate": 1.4801915710277451e-05, + "loss": 1.2664, "step": 9210 }, { - "epoch": 0.26101618067953186, + "epoch": 0.3603959621253619, "grad_norm": 0.0, - "learning_rate": 1.7329270084335972e-05, - "loss": 0.7637, + "learning_rate": 1.4800804102665045e-05, + "loss": 1.1003, "step": 9211 }, { - "epoch": 0.2610445181217943, + "epoch": 0.36043508881759134, "grad_norm": 0.0, - "learning_rate": 1.7328645674746448e-05, - "loss": 1.0047, + "learning_rate": 1.479969241795774e-05, + "loss": 1.0554, "step": 9212 }, { - "epoch": 0.2610728555640568, + "epoch": 0.3604742155098208, "grad_norm": 0.0, - "learning_rate": 1.7328021203425023e-05, - "loss": 0.9533, + "learning_rate": 1.4798580656173391e-05, + "loss": 0.99, "step": 9213 }, { - "epoch": 0.26110119300631923, + "epoch": 0.3605133422020502, "grad_norm": 0.0, - "learning_rate": 1.7327396670376954e-05, - "loss": 0.899, + "learning_rate": 1.4797468817329847e-05, + "loss": 1.241, "step": 9214 }, { - "epoch": 0.2611295304485817, + "epoch": 0.36055246889427967, "grad_norm": 0.0, - "learning_rate": 1.73267720756075e-05, - "loss": 0.8906, + "learning_rate": 1.479635690144497e-05, + "loss": 1.0709, "step": 9215 }, { - "epoch": 0.26115786789084416, + "epoch": 0.3605915955865091, "grad_norm": 0.0, - "learning_rate": 1.7326147419121926e-05, - "loss": 1.07, + "learning_rate": 1.479524490853661e-05, + "loss": 1.0836, "step": 9216 }, { - "epoch": 0.26118620533310666, + "epoch": 0.36063072227873855, "grad_norm": 0.0, - "learning_rate": 1.732552270092549e-05, - "loss": 1.0435, + "learning_rate": 1.4794132838622624e-05, + "loss": 1.144, "step": 9217 }, { - "epoch": 0.2612145427753691, + "epoch": 0.360669848970968, "grad_norm": 0.0, - "learning_rate": 1.7324897921023456e-05, - "loss": 0.8745, + "learning_rate": 1.4793020691720871e-05, + "loss": 1.0933, "step": 9218 }, { - "epoch": 0.26124288021763153, + "epoch": 0.36070897566319743, "grad_norm": 0.0, - "learning_rate": 1.732427307942109e-05, - "loss": 0.9885, + "learning_rate": 1.4791908467849214e-05, + "loss": 0.9507, "step": 9219 }, { - "epoch": 0.26127121765989403, + "epoch": 0.36074810235542687, "grad_norm": 0.0, - "learning_rate": 1.732364817612365e-05, - "loss": 1.0806, + "learning_rate": 1.479079616702551e-05, + "loss": 1.0997, "step": 9220 }, { - "epoch": 0.26129955510215647, + "epoch": 0.3607872290476563, "grad_norm": 0.0, - "learning_rate": 1.73230232111364e-05, - "loss": 0.9633, + "learning_rate": 1.4789683789267623e-05, + "loss": 1.0883, "step": 9221 }, { - "epoch": 0.26132789254441896, + "epoch": 0.36082635573988575, "grad_norm": 0.0, - "learning_rate": 1.732239818446461e-05, - "loss": 1.0033, + "learning_rate": 1.4788571334593418e-05, + "loss": 1.0014, "step": 9222 }, { - "epoch": 0.2613562299866814, + "epoch": 0.3608654824321152, "grad_norm": 0.0, - "learning_rate": 1.732177309611354e-05, - "loss": 0.9667, + "learning_rate": 1.4787458803020755e-05, + "loss": 1.0325, "step": 9223 }, { - "epoch": 0.26138456742894384, + "epoch": 0.36090460912434463, "grad_norm": 0.0, - "learning_rate": 1.7321147946088454e-05, - "loss": 0.9759, + "learning_rate": 1.4786346194567505e-05, + "loss": 1.1263, "step": 9224 }, { - "epoch": 0.26141290487120633, + "epoch": 0.3609437358165741, "grad_norm": 0.0, - "learning_rate": 1.7320522734394623e-05, - "loss": 0.8677, + "learning_rate": 1.4785233509251531e-05, + "loss": 1.0128, "step": 9225 }, { - "epoch": 0.26144124231346877, + "epoch": 0.3609828625088035, "grad_norm": 0.0, - "learning_rate": 1.7319897461037308e-05, - "loss": 0.9094, + "learning_rate": 1.4784120747090704e-05, + "loss": 1.0888, "step": 9226 }, { - "epoch": 0.26146957975573126, + "epoch": 0.36102198920103296, "grad_norm": 0.0, - "learning_rate": 1.731927212602178e-05, - "loss": 0.9808, + "learning_rate": 1.478300790810289e-05, + "loss": 1.0437, "step": 9227 }, { - "epoch": 0.2614979171979937, + "epoch": 0.3610611158932624, "grad_norm": 0.0, - "learning_rate": 1.73186467293533e-05, - "loss": 0.9964, + "learning_rate": 1.4781894992305967e-05, + "loss": 1.1424, "step": 9228 }, { - "epoch": 0.2615262546402562, + "epoch": 0.36110024258549184, "grad_norm": 0.0, - "learning_rate": 1.731802127103715e-05, - "loss": 0.9719, + "learning_rate": 1.4780781999717799e-05, + "loss": 1.0142, "step": 9229 }, { - "epoch": 0.26155459208251863, + "epoch": 0.3611393692777213, "grad_norm": 0.0, - "learning_rate": 1.7317395751078583e-05, - "loss": 0.9334, + "learning_rate": 1.4779668930356265e-05, + "loss": 0.9615, "step": 9230 }, { - "epoch": 0.26158292952478107, + "epoch": 0.3611784959699507, "grad_norm": 0.0, - "learning_rate": 1.7316770169482878e-05, - "loss": 0.9676, + "learning_rate": 1.4778555784239237e-05, + "loss": 1.0795, "step": 9231 }, { - "epoch": 0.26161126696704357, + "epoch": 0.36121762266218016, "grad_norm": 0.0, - "learning_rate": 1.7316144526255297e-05, - "loss": 1.035, + "learning_rate": 1.477744256138459e-05, + "loss": 1.0848, "step": 9232 }, { - "epoch": 0.261639604409306, + "epoch": 0.3612567493544096, "grad_norm": 0.0, - "learning_rate": 1.7315518821401117e-05, - "loss": 1.0069, + "learning_rate": 1.4776329261810204e-05, + "loss": 1.1233, "step": 9233 }, { - "epoch": 0.2616679418515685, + "epoch": 0.36129587604663904, "grad_norm": 0.0, - "learning_rate": 1.7314893054925604e-05, - "loss": 0.9232, + "learning_rate": 1.477521588553395e-05, + "loss": 1.0927, "step": 9234 }, { - "epoch": 0.26169627929383094, + "epoch": 0.3613350027388685, "grad_norm": 0.0, - "learning_rate": 1.731426722683403e-05, - "loss": 0.8618, + "learning_rate": 1.4774102432573718e-05, + "loss": 1.0074, "step": 9235 }, { - "epoch": 0.2617246167360934, + "epoch": 0.36137412943109787, "grad_norm": 0.0, - "learning_rate": 1.7313641337131668e-05, - "loss": 0.9944, + "learning_rate": 1.477298890294738e-05, + "loss": 1.0873, "step": 9236 }, { - "epoch": 0.26175295417835587, + "epoch": 0.3614132561233273, "grad_norm": 0.0, - "learning_rate": 1.731301538582379e-05, - "loss": 1.0239, + "learning_rate": 1.4771875296672824e-05, + "loss": 1.1014, "step": 9237 }, { - "epoch": 0.2617812916206183, + "epoch": 0.36145238281555675, "grad_norm": 0.0, - "learning_rate": 1.7312389372915664e-05, - "loss": 1.0183, + "learning_rate": 1.4770761613767925e-05, + "loss": 1.1446, "step": 9238 }, { - "epoch": 0.2618096290628808, + "epoch": 0.3614915095077862, "grad_norm": 0.0, - "learning_rate": 1.731176329841257e-05, - "loss": 0.9444, + "learning_rate": 1.4769647854250578e-05, + "loss": 1.0652, "step": 9239 }, { - "epoch": 0.26183796650514324, + "epoch": 0.36153063620001563, "grad_norm": 0.0, - "learning_rate": 1.731113716231978e-05, - "loss": 0.9002, + "learning_rate": 1.476853401813866e-05, + "loss": 1.107, "step": 9240 }, { - "epoch": 0.26186630394740573, + "epoch": 0.3615697628922451, "grad_norm": 0.0, - "learning_rate": 1.7310510964642564e-05, - "loss": 0.961, + "learning_rate": 1.4767420105450064e-05, + "loss": 1.0864, "step": 9241 }, { - "epoch": 0.26189464138966817, + "epoch": 0.3616088895844745, "grad_norm": 0.0, - "learning_rate": 1.73098847053862e-05, - "loss": 0.8992, + "learning_rate": 1.4766306116202674e-05, + "loss": 1.069, "step": 9242 }, { - "epoch": 0.2619229788319306, + "epoch": 0.36164801627670395, "grad_norm": 0.0, - "learning_rate": 1.7309258384555962e-05, - "loss": 1.019, + "learning_rate": 1.4765192050414378e-05, + "loss": 1.0208, "step": 9243 }, { - "epoch": 0.2619513162741931, + "epoch": 0.3616871429689334, "grad_norm": 0.0, - "learning_rate": 1.730863200215713e-05, - "loss": 0.9866, + "learning_rate": 1.4764077908103071e-05, + "loss": 1.0058, "step": 9244 }, { - "epoch": 0.26197965371645554, + "epoch": 0.36172626966116284, "grad_norm": 0.0, - "learning_rate": 1.7308005558194974e-05, - "loss": 0.9746, + "learning_rate": 1.476296368928664e-05, + "loss": 1.1526, "step": 9245 }, { - "epoch": 0.26200799115871803, + "epoch": 0.3617653963533923, "grad_norm": 0.0, - "learning_rate": 1.7307379052674772e-05, - "loss": 1.0692, + "learning_rate": 1.4761849393982983e-05, + "loss": 0.994, "step": 9246 }, { - "epoch": 0.2620363286009805, + "epoch": 0.3618045230456217, "grad_norm": 0.0, - "learning_rate": 1.7306752485601807e-05, - "loss": 0.9337, + "learning_rate": 1.4760735022209992e-05, + "loss": 1.1253, "step": 9247 }, { - "epoch": 0.2620646660432429, + "epoch": 0.36184364973785116, "grad_norm": 0.0, - "learning_rate": 1.7306125856981348e-05, - "loss": 0.9077, + "learning_rate": 1.4759620573985561e-05, + "loss": 1.1083, "step": 9248 }, { - "epoch": 0.2620930034855054, + "epoch": 0.3618827764300806, "grad_norm": 0.0, - "learning_rate": 1.730549916681868e-05, - "loss": 1.0121, + "learning_rate": 1.4758506049327586e-05, + "loss": 1.0487, "step": 9249 }, { - "epoch": 0.26212134092776784, + "epoch": 0.36192190312231004, "grad_norm": 0.0, - "learning_rate": 1.7304872415119078e-05, - "loss": 0.9665, + "learning_rate": 1.4757391448253968e-05, + "loss": 1.1252, "step": 9250 }, { - "epoch": 0.26214967837003034, + "epoch": 0.3619610298145395, "grad_norm": 0.0, - "learning_rate": 1.7304245601887825e-05, - "loss": 0.9115, + "learning_rate": 1.4756276770782607e-05, + "loss": 1.2333, "step": 9251 }, { - "epoch": 0.2621780158122928, + "epoch": 0.3620001565067689, "grad_norm": 0.0, - "learning_rate": 1.73036187271302e-05, - "loss": 1.0773, + "learning_rate": 1.4755162016931397e-05, + "loss": 1.1399, "step": 9252 }, { - "epoch": 0.26220635325455527, + "epoch": 0.36203928319899836, "grad_norm": 0.0, - "learning_rate": 1.7302991790851477e-05, - "loss": 0.9559, + "learning_rate": 1.4754047186718245e-05, + "loss": 1.1595, "step": 9253 }, { - "epoch": 0.2622346906968177, + "epoch": 0.3620784098912278, "grad_norm": 0.0, - "learning_rate": 1.730236479305695e-05, - "loss": 0.9652, + "learning_rate": 1.4752932280161055e-05, + "loss": 1.0916, "step": 9254 }, { - "epoch": 0.26226302813908015, + "epoch": 0.36211753658345724, "grad_norm": 0.0, - "learning_rate": 1.7301737733751888e-05, - "loss": 1.0104, + "learning_rate": 1.4751817297277725e-05, + "loss": 1.0759, "step": 9255 }, { - "epoch": 0.26229136558134264, + "epoch": 0.3621566632756867, "grad_norm": 0.0, - "learning_rate": 1.730111061294158e-05, - "loss": 0.8769, + "learning_rate": 1.4750702238086164e-05, + "loss": 1.1783, "step": 9256 }, { - "epoch": 0.2623197030236051, + "epoch": 0.3621957899679161, "grad_norm": 0.0, - "learning_rate": 1.73004834306313e-05, - "loss": 0.9313, + "learning_rate": 1.4749587102604279e-05, + "loss": 1.2032, "step": 9257 }, { - "epoch": 0.26234804046586757, + "epoch": 0.36223491666014557, "grad_norm": 0.0, - "learning_rate": 1.7299856186826344e-05, - "loss": 1.0465, + "learning_rate": 1.4748471890849979e-05, + "loss": 0.9703, "step": 9258 }, { - "epoch": 0.26237637790813, + "epoch": 0.362274043352375, "grad_norm": 0.0, - "learning_rate": 1.7299228881531984e-05, - "loss": 0.973, + "learning_rate": 1.4747356602841167e-05, + "loss": 1.1539, "step": 9259 }, { - "epoch": 0.26240471535039245, + "epoch": 0.36231317004460445, "grad_norm": 0.0, - "learning_rate": 1.729860151475351e-05, - "loss": 0.9476, + "learning_rate": 1.4746241238595757e-05, + "loss": 1.0464, "step": 9260 }, { - "epoch": 0.26243305279265494, + "epoch": 0.3623522967368339, "grad_norm": 0.0, - "learning_rate": 1.7297974086496204e-05, - "loss": 0.9858, + "learning_rate": 1.4745125798131664e-05, + "loss": 1.0142, "step": 9261 }, { - "epoch": 0.2624613902349174, + "epoch": 0.36239142342906333, "grad_norm": 0.0, - "learning_rate": 1.7297346596765357e-05, - "loss": 1.0053, + "learning_rate": 1.4744010281466792e-05, + "loss": 1.1644, "step": 9262 }, { - "epoch": 0.2624897276771799, + "epoch": 0.36243055012129277, "grad_norm": 0.0, - "learning_rate": 1.7296719045566244e-05, - "loss": 1.0378, + "learning_rate": 1.4742894688619061e-05, + "loss": 1.0634, "step": 9263 }, { - "epoch": 0.2625180651194423, + "epoch": 0.3624696768135222, "grad_norm": 0.0, - "learning_rate": 1.7296091432904164e-05, - "loss": 1.0253, + "learning_rate": 1.4741779019606386e-05, + "loss": 1.0953, "step": 9264 }, { - "epoch": 0.2625464025617048, + "epoch": 0.3625088035057516, "grad_norm": 0.0, - "learning_rate": 1.7295463758784392e-05, - "loss": 0.9129, + "learning_rate": 1.4740663274446677e-05, + "loss": 1.1641, "step": 9265 }, { - "epoch": 0.26257474000396724, + "epoch": 0.36254793019798104, "grad_norm": 0.0, - "learning_rate": 1.729483602321222e-05, - "loss": 1.0601, + "learning_rate": 1.473954745315786e-05, + "loss": 1.1139, "step": 9266 }, { - "epoch": 0.2626030774462297, + "epoch": 0.3625870568902105, "grad_norm": 0.0, - "learning_rate": 1.7294208226192935e-05, - "loss": 0.8058, + "learning_rate": 1.473843155575785e-05, + "loss": 1.0862, "step": 9267 }, { - "epoch": 0.2626314148884922, + "epoch": 0.3626261835824399, "grad_norm": 0.0, - "learning_rate": 1.7293580367731824e-05, - "loss": 0.988, + "learning_rate": 1.4737315582264566e-05, + "loss": 1.0614, "step": 9268 }, { - "epoch": 0.2626597523307546, + "epoch": 0.36266531027466936, "grad_norm": 0.0, - "learning_rate": 1.729295244783418e-05, - "loss": 0.9088, + "learning_rate": 1.4736199532695929e-05, + "loss": 1.0097, "step": 9269 }, { - "epoch": 0.2626880897730171, + "epoch": 0.3627044369668988, "grad_norm": 0.0, - "learning_rate": 1.729232446650529e-05, - "loss": 0.9603, + "learning_rate": 1.4735083407069866e-05, + "loss": 1.0575, "step": 9270 }, { - "epoch": 0.26271642721527955, + "epoch": 0.36274356365912824, "grad_norm": 0.0, - "learning_rate": 1.729169642375044e-05, - "loss": 0.9191, + "learning_rate": 1.4733967205404293e-05, + "loss": 1.1132, "step": 9271 }, { - "epoch": 0.262744764657542, + "epoch": 0.3627826903513577, "grad_norm": 0.0, - "learning_rate": 1.7291068319574923e-05, - "loss": 1.0137, + "learning_rate": 1.4732850927717139e-05, + "loss": 1.1244, "step": 9272 }, { - "epoch": 0.2627731020998045, + "epoch": 0.3628218170435871, "grad_norm": 0.0, - "learning_rate": 1.7290440153984033e-05, - "loss": 0.9611, + "learning_rate": 1.4731734574026334e-05, + "loss": 1.0644, "step": 9273 }, { - "epoch": 0.2628014395420669, + "epoch": 0.36286094373581657, "grad_norm": 0.0, - "learning_rate": 1.7289811926983054e-05, - "loss": 1.0367, + "learning_rate": 1.4730618144349795e-05, + "loss": 1.2094, "step": 9274 }, { - "epoch": 0.2628297769843294, + "epoch": 0.362900070428046, "grad_norm": 0.0, - "learning_rate": 1.7289183638577286e-05, - "loss": 0.9269, + "learning_rate": 1.4729501638705461e-05, + "loss": 1.1097, "step": 9275 }, { - "epoch": 0.26285811442659185, + "epoch": 0.36293919712027545, "grad_norm": 0.0, - "learning_rate": 1.728855528877202e-05, - "loss": 0.9821, + "learning_rate": 1.4728385057111254e-05, + "loss": 1.0199, "step": 9276 }, { - "epoch": 0.26288645186885434, + "epoch": 0.3629783238125049, "grad_norm": 0.0, - "learning_rate": 1.7287926877572543e-05, - "loss": 1.0603, + "learning_rate": 1.4727268399585109e-05, + "loss": 1.0565, "step": 9277 }, { - "epoch": 0.2629147893111168, + "epoch": 0.36301745050473433, "grad_norm": 0.0, - "learning_rate": 1.728729840498415e-05, - "loss": 0.9521, + "learning_rate": 1.4726151666144954e-05, + "loss": 1.1317, "step": 9278 }, { - "epoch": 0.2629431267533792, + "epoch": 0.36305657719696377, "grad_norm": 0.0, - "learning_rate": 1.728666987101214e-05, - "loss": 0.9852, + "learning_rate": 1.472503485680873e-05, + "loss": 1.1281, "step": 9279 }, { - "epoch": 0.2629714641956417, + "epoch": 0.3630957038891932, "grad_norm": 0.0, - "learning_rate": 1.7286041275661796e-05, - "loss": 0.9367, + "learning_rate": 1.4723917971594368e-05, + "loss": 1.0969, "step": 9280 }, { - "epoch": 0.26299980163790415, + "epoch": 0.36313483058142265, "grad_norm": 0.0, - "learning_rate": 1.728541261893843e-05, - "loss": 0.9519, + "learning_rate": 1.4722801010519799e-05, + "loss": 1.0468, "step": 9281 }, { - "epoch": 0.26302813908016665, + "epoch": 0.3631739572736521, "grad_norm": 0.0, - "learning_rate": 1.7284783900847327e-05, - "loss": 1.0379, + "learning_rate": 1.4721683973602965e-05, + "loss": 1.1187, "step": 9282 }, { - "epoch": 0.2630564765224291, + "epoch": 0.36321308396588153, "grad_norm": 0.0, - "learning_rate": 1.728415512139378e-05, - "loss": 1.0128, + "learning_rate": 1.4720566860861802e-05, + "loss": 1.1048, "step": 9283 }, { - "epoch": 0.2630848139646915, + "epoch": 0.363252210658111, "grad_norm": 0.0, - "learning_rate": 1.7283526280583092e-05, - "loss": 0.9529, + "learning_rate": 1.4719449672314252e-05, + "loss": 1.047, "step": 9284 }, { - "epoch": 0.263113151406954, + "epoch": 0.3632913373503404, "grad_norm": 0.0, - "learning_rate": 1.7282897378420557e-05, - "loss": 0.974, + "learning_rate": 1.4718332407978252e-05, + "loss": 1.1381, "step": 9285 }, { - "epoch": 0.26314148884921645, + "epoch": 0.36333046404256986, "grad_norm": 0.0, - "learning_rate": 1.728226841491147e-05, - "loss": 1.0457, + "learning_rate": 1.471721506787175e-05, + "loss": 1.2054, "step": 9286 }, { - "epoch": 0.26316982629147895, + "epoch": 0.3633695907347993, "grad_norm": 0.0, - "learning_rate": 1.7281639390061136e-05, - "loss": 0.9419, + "learning_rate": 1.4716097652012683e-05, + "loss": 1.1649, "step": 9287 }, { - "epoch": 0.2631981637337414, + "epoch": 0.36340871742702874, "grad_norm": 0.0, - "learning_rate": 1.728101030387485e-05, - "loss": 0.918, + "learning_rate": 1.4714980160418995e-05, + "loss": 1.0209, "step": 9288 }, { - "epoch": 0.2632265011760039, + "epoch": 0.3634478441192582, "grad_norm": 0.0, - "learning_rate": 1.7280381156357907e-05, - "loss": 0.9347, + "learning_rate": 1.4713862593108637e-05, + "loss": 1.1644, "step": 9289 }, { - "epoch": 0.2632548386182663, + "epoch": 0.3634869708114876, "grad_norm": 0.0, - "learning_rate": 1.727975194751561e-05, - "loss": 0.9269, + "learning_rate": 1.4712744950099551e-05, + "loss": 1.0587, "step": 9290 }, { - "epoch": 0.26328317606052876, + "epoch": 0.36352609750371706, "grad_norm": 0.0, - "learning_rate": 1.7279122677353263e-05, - "loss": 0.9424, + "learning_rate": 1.4711627231409686e-05, + "loss": 1.0692, "step": 9291 }, { - "epoch": 0.26331151350279125, + "epoch": 0.3635652241959465, "grad_norm": 0.0, - "learning_rate": 1.7278493345876158e-05, - "loss": 1.0365, + "learning_rate": 1.4710509437056992e-05, + "loss": 1.1236, "step": 9292 }, { - "epoch": 0.2633398509450537, + "epoch": 0.3636043508881759, "grad_norm": 0.0, - "learning_rate": 1.7277863953089605e-05, - "loss": 0.9938, + "learning_rate": 1.470939156705942e-05, + "loss": 1.0864, "step": 9293 }, { - "epoch": 0.2633681883873162, + "epoch": 0.3636434775804053, "grad_norm": 0.0, - "learning_rate": 1.7277234498998897e-05, - "loss": 1.0443, + "learning_rate": 1.470827362143492e-05, + "loss": 1.0872, "step": 9294 }, { - "epoch": 0.2633965258295786, + "epoch": 0.36368260427263477, "grad_norm": 0.0, - "learning_rate": 1.7276604983609344e-05, - "loss": 0.9567, + "learning_rate": 1.4707155600201447e-05, + "loss": 1.0266, "step": 9295 }, { - "epoch": 0.26342486327184106, + "epoch": 0.3637217309648642, "grad_norm": 0.0, - "learning_rate": 1.7275975406926243e-05, - "loss": 0.9076, + "learning_rate": 1.4706037503376948e-05, + "loss": 1.1531, "step": 9296 }, { - "epoch": 0.26345320071410355, + "epoch": 0.36376085765709365, "grad_norm": 0.0, - "learning_rate": 1.72753457689549e-05, - "loss": 1.0137, + "learning_rate": 1.470491933097939e-05, + "loss": 1.1071, "step": 9297 }, { - "epoch": 0.263481538156366, + "epoch": 0.3637999843493231, "grad_norm": 0.0, - "learning_rate": 1.727471606970062e-05, - "loss": 1.003, + "learning_rate": 1.470380108302672e-05, + "loss": 0.9628, "step": 9298 }, { - "epoch": 0.2635098755986285, + "epoch": 0.36383911104155253, "grad_norm": 0.0, - "learning_rate": 1.7274086309168702e-05, - "loss": 0.995, + "learning_rate": 1.47026827595369e-05, + "loss": 1.1635, "step": 9299 }, { - "epoch": 0.2635382130408909, + "epoch": 0.363878237733782, "grad_norm": 0.0, - "learning_rate": 1.7273456487364458e-05, - "loss": 1.1048, + "learning_rate": 1.470156436052789e-05, + "loss": 1.1541, "step": 9300 }, { - "epoch": 0.2635665504831534, + "epoch": 0.3639173644260114, "grad_norm": 0.0, - "learning_rate": 1.7272826604293182e-05, - "loss": 0.7808, + "learning_rate": 1.4700445886017643e-05, + "loss": 1.1124, "step": 9301 }, { - "epoch": 0.26359488792541586, + "epoch": 0.36395649111824085, "grad_norm": 0.0, - "learning_rate": 1.727219665996019e-05, - "loss": 1.0707, + "learning_rate": 1.4699327336024127e-05, + "loss": 1.1787, "step": 9302 }, { - "epoch": 0.2636232253676783, + "epoch": 0.3639956178104703, "grad_norm": 0.0, - "learning_rate": 1.727156665437079e-05, - "loss": 1.0773, + "learning_rate": 1.4698208710565302e-05, + "loss": 1.0702, "step": 9303 }, { - "epoch": 0.2636515628099408, + "epoch": 0.36403474450269974, "grad_norm": 0.0, - "learning_rate": 1.7270936587530278e-05, - "loss": 1.0435, + "learning_rate": 1.4697090009659131e-05, + "loss": 1.0826, "step": 9304 }, { - "epoch": 0.2636799002522032, + "epoch": 0.3640738711949292, "grad_norm": 0.0, - "learning_rate": 1.7270306459443972e-05, - "loss": 0.923, + "learning_rate": 1.4695971233323584e-05, + "loss": 1.0728, "step": 9305 }, { - "epoch": 0.2637082376944657, + "epoch": 0.3641129978871586, "grad_norm": 0.0, - "learning_rate": 1.726967627011717e-05, - "loss": 1.0712, + "learning_rate": 1.4694852381576622e-05, + "loss": 1.0849, "step": 9306 }, { - "epoch": 0.26373657513672816, + "epoch": 0.36415212457938806, "grad_norm": 0.0, - "learning_rate": 1.7269046019555188e-05, - "loss": 1.0206, + "learning_rate": 1.4693733454436214e-05, + "loss": 1.2011, "step": 9307 }, { - "epoch": 0.2637649125789906, + "epoch": 0.3641912512716175, "grad_norm": 0.0, - "learning_rate": 1.726841570776333e-05, - "loss": 0.9612, + "learning_rate": 1.4692614451920328e-05, + "loss": 1.0495, "step": 9308 }, { - "epoch": 0.2637932500212531, + "epoch": 0.36423037796384694, "grad_norm": 0.0, - "learning_rate": 1.726778533474691e-05, - "loss": 1.0316, + "learning_rate": 1.4691495374046933e-05, + "loss": 1.0459, "step": 9309 }, { - "epoch": 0.26382158746351553, + "epoch": 0.3642695046560764, "grad_norm": 0.0, - "learning_rate": 1.7267154900511233e-05, - "loss": 0.9657, + "learning_rate": 1.4690376220834002e-05, + "loss": 1.0758, "step": 9310 }, { - "epoch": 0.263849924905778, + "epoch": 0.3643086313483058, "grad_norm": 0.0, - "learning_rate": 1.726652440506161e-05, - "loss": 0.9633, + "learning_rate": 1.4689256992299506e-05, + "loss": 1.0818, "step": 9311 }, { - "epoch": 0.26387826234804046, + "epoch": 0.36434775804053526, "grad_norm": 0.0, - "learning_rate": 1.726589384840336e-05, - "loss": 0.9995, + "learning_rate": 1.4688137688461419e-05, + "loss": 1.0443, "step": 9312 }, { - "epoch": 0.26390659979030295, + "epoch": 0.3643868847327647, "grad_norm": 0.0, - "learning_rate": 1.7265263230541783e-05, - "loss": 1.0277, + "learning_rate": 1.4687018309337716e-05, + "loss": 1.191, "step": 9313 }, { - "epoch": 0.2639349372325654, + "epoch": 0.36442601142499415, "grad_norm": 0.0, - "learning_rate": 1.7264632551482198e-05, - "loss": 0.9752, + "learning_rate": 1.468589885494637e-05, + "loss": 1.2015, "step": 9314 }, { - "epoch": 0.26396327467482783, + "epoch": 0.3644651381172236, "grad_norm": 0.0, - "learning_rate": 1.7264001811229917e-05, - "loss": 1.0762, + "learning_rate": 1.4684779325305361e-05, + "loss": 1.0515, "step": 9315 }, { - "epoch": 0.2639916121170903, + "epoch": 0.364504264809453, "grad_norm": 0.0, - "learning_rate": 1.726337100979025e-05, - "loss": 1.0334, + "learning_rate": 1.4683659720432663e-05, + "loss": 1.116, "step": 9316 }, { - "epoch": 0.26401994955935276, + "epoch": 0.36454339150168247, "grad_norm": 0.0, - "learning_rate": 1.7262740147168508e-05, - "loss": 0.9027, + "learning_rate": 1.4682540040346265e-05, + "loss": 1.1703, "step": 9317 }, { - "epoch": 0.26404828700161526, + "epoch": 0.3645825181939119, "grad_norm": 0.0, - "learning_rate": 1.726210922337001e-05, - "loss": 0.8699, + "learning_rate": 1.468142028506414e-05, + "loss": 1.1018, "step": 9318 }, { - "epoch": 0.2640766244438777, + "epoch": 0.36462164488614135, "grad_norm": 0.0, - "learning_rate": 1.726147823840007e-05, - "loss": 0.9842, + "learning_rate": 1.4680300454604267e-05, + "loss": 1.0671, "step": 9319 }, { - "epoch": 0.26410496188614013, + "epoch": 0.3646607715783708, "grad_norm": 0.0, - "learning_rate": 1.7260847192264005e-05, - "loss": 0.9851, + "learning_rate": 1.4679180548984639e-05, + "loss": 1.1716, "step": 9320 }, { - "epoch": 0.2641332993284026, + "epoch": 0.3646998982706002, "grad_norm": 0.0, - "learning_rate": 1.7260216084967127e-05, - "loss": 0.9522, + "learning_rate": 1.4678060568223232e-05, + "loss": 1.0866, "step": 9321 }, { - "epoch": 0.26416163677066506, + "epoch": 0.3647390249628296, "grad_norm": 0.0, - "learning_rate": 1.725958491651475e-05, - "loss": 0.8544, + "learning_rate": 1.4676940512338035e-05, + "loss": 1.1307, "step": 9322 }, { - "epoch": 0.26418997421292756, + "epoch": 0.36477815165505906, "grad_norm": 0.0, - "learning_rate": 1.7258953686912197e-05, - "loss": 0.979, + "learning_rate": 1.4675820381347032e-05, + "loss": 1.0707, "step": 9323 }, { - "epoch": 0.26421831165519, + "epoch": 0.3648172783472885, "grad_norm": 0.0, - "learning_rate": 1.725832239616478e-05, - "loss": 1.0422, + "learning_rate": 1.4674700175268215e-05, + "loss": 0.9666, "step": 9324 }, { - "epoch": 0.2642466490974525, + "epoch": 0.36485640503951794, "grad_norm": 0.0, - "learning_rate": 1.725769104427782e-05, - "loss": 0.9971, + "learning_rate": 1.4673579894119572e-05, + "loss": 1.1324, "step": 9325 }, { - "epoch": 0.26427498653971493, + "epoch": 0.3648955317317474, "grad_norm": 0.0, - "learning_rate": 1.725705963125663e-05, - "loss": 0.9796, + "learning_rate": 1.467245953791909e-05, + "loss": 1.0504, "step": 9326 }, { - "epoch": 0.26430332398197737, + "epoch": 0.3649346584239768, "grad_norm": 0.0, - "learning_rate": 1.7256428157106532e-05, - "loss": 0.9632, + "learning_rate": 1.4671339106684762e-05, + "loss": 1.0466, "step": 9327 }, { - "epoch": 0.26433166142423986, + "epoch": 0.36497378511620626, "grad_norm": 0.0, - "learning_rate": 1.7255796621832844e-05, - "loss": 0.9911, + "learning_rate": 1.4670218600434583e-05, + "loss": 1.0045, "step": 9328 }, { - "epoch": 0.2643599988665023, + "epoch": 0.3650129118084357, "grad_norm": 0.0, - "learning_rate": 1.7255165025440893e-05, - "loss": 0.9072, + "learning_rate": 1.4669098019186546e-05, + "loss": 1.1158, "step": 9329 }, { - "epoch": 0.2643883363087648, + "epoch": 0.36505203850066514, "grad_norm": 0.0, - "learning_rate": 1.7254533367935987e-05, - "loss": 0.9865, + "learning_rate": 1.4667977362958644e-05, + "loss": 1.2082, "step": 9330 }, { - "epoch": 0.26441667375102723, + "epoch": 0.3650911651928946, "grad_norm": 0.0, - "learning_rate": 1.7253901649323454e-05, - "loss": 0.9721, + "learning_rate": 1.4666856631768875e-05, + "loss": 1.2363, "step": 9331 }, { - "epoch": 0.26444501119328967, + "epoch": 0.365130291885124, "grad_norm": 0.0, - "learning_rate": 1.7253269869608616e-05, - "loss": 1.0095, + "learning_rate": 1.4665735825635236e-05, + "loss": 1.2063, "step": 9332 }, { - "epoch": 0.26447334863555216, + "epoch": 0.36516941857735347, "grad_norm": 0.0, - "learning_rate": 1.7252638028796788e-05, - "loss": 0.9896, + "learning_rate": 1.4664614944575728e-05, + "loss": 1.0746, "step": 9333 }, { - "epoch": 0.2645016860778146, + "epoch": 0.3652085452695829, "grad_norm": 0.0, - "learning_rate": 1.72520061268933e-05, - "loss": 0.8992, + "learning_rate": 1.4663493988608348e-05, + "loss": 1.038, "step": 9334 }, { - "epoch": 0.2645300235200771, + "epoch": 0.36524767196181235, "grad_norm": 0.0, - "learning_rate": 1.725137416390347e-05, - "loss": 0.9673, + "learning_rate": 1.46623729577511e-05, + "loss": 1.1588, "step": 9335 }, { - "epoch": 0.26455836096233953, + "epoch": 0.3652867986540418, "grad_norm": 0.0, - "learning_rate": 1.7250742139832623e-05, - "loss": 1.0047, + "learning_rate": 1.466125185202198e-05, + "loss": 1.117, "step": 9336 }, { - "epoch": 0.264586698404602, + "epoch": 0.36532592534627123, "grad_norm": 0.0, - "learning_rate": 1.7250110054686084e-05, - "loss": 0.9633, + "learning_rate": 1.4660130671439002e-05, + "loss": 1.1988, "step": 9337 }, { - "epoch": 0.26461503584686447, + "epoch": 0.36536505203850067, "grad_norm": 0.0, - "learning_rate": 1.7249477908469174e-05, - "loss": 1.048, + "learning_rate": 1.465900941602016e-05, + "loss": 1.0974, "step": 9338 }, { - "epoch": 0.2646433732891269, + "epoch": 0.3654041787307301, "grad_norm": 0.0, - "learning_rate": 1.724884570118722e-05, - "loss": 0.9617, + "learning_rate": 1.4657888085783468e-05, + "loss": 1.1047, "step": 9339 }, { - "epoch": 0.2646717107313894, + "epoch": 0.36544330542295955, "grad_norm": 0.0, - "learning_rate": 1.7248213432845546e-05, - "loss": 1.0076, + "learning_rate": 1.4656766680746927e-05, + "loss": 1.0989, "step": 9340 }, { - "epoch": 0.26470004817365184, + "epoch": 0.365482432115189, "grad_norm": 0.0, - "learning_rate": 1.724758110344948e-05, - "loss": 0.8993, + "learning_rate": 1.465564520092855e-05, + "loss": 1.0132, "step": 9341 }, { - "epoch": 0.26472838561591433, + "epoch": 0.36552155880741843, "grad_norm": 0.0, - "learning_rate": 1.7246948713004346e-05, - "loss": 0.9516, + "learning_rate": 1.4654523646346345e-05, + "loss": 0.9304, "step": 9342 }, { - "epoch": 0.26475672305817677, + "epoch": 0.3655606854996479, "grad_norm": 0.0, - "learning_rate": 1.7246316261515475e-05, - "loss": 0.926, + "learning_rate": 1.4653402017018323e-05, + "loss": 1.1104, "step": 9343 }, { - "epoch": 0.2647850605004392, + "epoch": 0.3655998121918773, "grad_norm": 0.0, - "learning_rate": 1.724568374898819e-05, - "loss": 0.8513, + "learning_rate": 1.4652280312962496e-05, + "loss": 0.9711, "step": 9344 }, { - "epoch": 0.2648133979427017, + "epoch": 0.36563893888410676, "grad_norm": 0.0, - "learning_rate": 1.7245051175427816e-05, - "loss": 1.0131, + "learning_rate": 1.4651158534196876e-05, + "loss": 0.9505, "step": 9345 }, { - "epoch": 0.26484173538496414, + "epoch": 0.3656780655763362, "grad_norm": 0.0, - "learning_rate": 1.7244418540839688e-05, - "loss": 0.9584, + "learning_rate": 1.4650036680739479e-05, + "loss": 0.9962, "step": 9346 }, { - "epoch": 0.26487007282722663, + "epoch": 0.36571719226856564, "grad_norm": 0.0, - "learning_rate": 1.7243785845229134e-05, - "loss": 0.8795, + "learning_rate": 1.464891475260832e-05, + "loss": 1.1196, "step": 9347 }, { - "epoch": 0.26489841026948907, + "epoch": 0.3657563189607951, "grad_norm": 0.0, - "learning_rate": 1.7243153088601482e-05, - "loss": 1.0121, + "learning_rate": 1.4647792749821414e-05, + "loss": 1.073, "step": 9348 }, { - "epoch": 0.26492674771175156, + "epoch": 0.3657954456530245, "grad_norm": 0.0, - "learning_rate": 1.724252027096206e-05, - "loss": 1.0938, + "learning_rate": 1.4646670672396781e-05, + "loss": 1.0273, "step": 9349 }, { - "epoch": 0.264955085154014, + "epoch": 0.3658345723452539, "grad_norm": 0.0, - "learning_rate": 1.7241887392316204e-05, - "loss": 0.9519, + "learning_rate": 1.464554852035244e-05, + "loss": 1.0591, "step": 9350 }, { - "epoch": 0.26498342259627644, + "epoch": 0.36587369903748335, "grad_norm": 0.0, - "learning_rate": 1.7241254452669235e-05, - "loss": 0.9717, + "learning_rate": 1.464442629370641e-05, + "loss": 1.1251, "step": 9351 }, { - "epoch": 0.26501176003853893, + "epoch": 0.3659128257297128, "grad_norm": 0.0, - "learning_rate": 1.7240621452026494e-05, - "loss": 1.0188, + "learning_rate": 1.4643303992476715e-05, + "loss": 1.2606, "step": 9352 }, { - "epoch": 0.2650400974808014, + "epoch": 0.36595195242194223, "grad_norm": 0.0, - "learning_rate": 1.723998839039331e-05, - "loss": 1.0629, + "learning_rate": 1.4642181616681375e-05, + "loss": 1.0992, "step": 9353 }, { - "epoch": 0.26506843492306387, + "epoch": 0.36599107911417167, "grad_norm": 0.0, - "learning_rate": 1.723935526777502e-05, - "loss": 0.9527, + "learning_rate": 1.4641059166338413e-05, + "loss": 1.0725, "step": 9354 }, { - "epoch": 0.2650967723653263, + "epoch": 0.3660302058064011, "grad_norm": 0.0, - "learning_rate": 1.7238722084176943e-05, - "loss": 0.9781, + "learning_rate": 1.4639936641465861e-05, + "loss": 1.0161, "step": 9355 }, { - "epoch": 0.26512510980758874, + "epoch": 0.36606933249863055, "grad_norm": 0.0, - "learning_rate": 1.723808883960443e-05, - "loss": 1.1164, + "learning_rate": 1.463881404208174e-05, + "loss": 1.1367, "step": 9356 }, { - "epoch": 0.26515344724985124, + "epoch": 0.36610845919086, "grad_norm": 0.0, - "learning_rate": 1.72374555340628e-05, - "loss": 0.9905, + "learning_rate": 1.4637691368204076e-05, + "loss": 1.1152, "step": 9357 }, { - "epoch": 0.2651817846921137, + "epoch": 0.36614758588308943, "grad_norm": 0.0, - "learning_rate": 1.72368221675574e-05, - "loss": 0.8927, + "learning_rate": 1.4636568619850902e-05, + "loss": 1.0429, "step": 9358 }, { - "epoch": 0.26521012213437617, + "epoch": 0.3661867125753189, "grad_norm": 0.0, - "learning_rate": 1.7236188740093557e-05, - "loss": 0.9353, + "learning_rate": 1.4635445797040245e-05, + "loss": 1.0571, "step": 9359 }, { - "epoch": 0.2652384595766386, + "epoch": 0.3662258392675483, "grad_norm": 0.0, - "learning_rate": 1.7235555251676608e-05, - "loss": 0.9792, + "learning_rate": 1.4634322899790137e-05, + "loss": 1.0917, "step": 9360 }, { - "epoch": 0.2652667970189011, + "epoch": 0.36626496595977776, "grad_norm": 0.0, - "learning_rate": 1.723492170231189e-05, - "loss": 0.868, + "learning_rate": 1.4633199928118608e-05, + "loss": 1.0359, "step": 9361 }, { - "epoch": 0.26529513446116354, + "epoch": 0.3663040926520072, "grad_norm": 0.0, - "learning_rate": 1.7234288092004745e-05, - "loss": 1.0407, + "learning_rate": 1.4632076882043698e-05, + "loss": 1.0904, "step": 9362 }, { - "epoch": 0.265323471903426, + "epoch": 0.36634321934423664, "grad_norm": 0.0, - "learning_rate": 1.72336544207605e-05, - "loss": 1.0099, + "learning_rate": 1.4630953761583433e-05, + "loss": 1.0489, "step": 9363 }, { - "epoch": 0.26535180934568847, + "epoch": 0.3663823460364661, "grad_norm": 0.0, - "learning_rate": 1.72330206885845e-05, - "loss": 0.9926, + "learning_rate": 1.4629830566755858e-05, + "loss": 1.1281, "step": 9364 }, { - "epoch": 0.2653801467879509, + "epoch": 0.3664214727286955, "grad_norm": 0.0, - "learning_rate": 1.723238689548208e-05, - "loss": 0.966, + "learning_rate": 1.4628707297579002e-05, + "loss": 1.0359, "step": 9365 }, { - "epoch": 0.2654084842302134, + "epoch": 0.36646059942092496, "grad_norm": 0.0, - "learning_rate": 1.7231753041458578e-05, - "loss": 0.9475, + "learning_rate": 1.4627583954070909e-05, + "loss": 1.2154, "step": 9366 }, { - "epoch": 0.26543682167247584, + "epoch": 0.3664997261131544, "grad_norm": 0.0, - "learning_rate": 1.7231119126519336e-05, - "loss": 0.9387, + "learning_rate": 1.4626460536249619e-05, + "loss": 1.1062, "step": 9367 }, { - "epoch": 0.2654651591147383, + "epoch": 0.36653885280538384, "grad_norm": 0.0, - "learning_rate": 1.7230485150669695e-05, - "loss": 1.0368, + "learning_rate": 1.4625337044133163e-05, + "loss": 1.1393, "step": 9368 }, { - "epoch": 0.2654934965570008, + "epoch": 0.3665779794976133, "grad_norm": 0.0, - "learning_rate": 1.722985111391499e-05, - "loss": 0.9939, + "learning_rate": 1.4624213477739595e-05, + "loss": 1.131, "step": 9369 }, { - "epoch": 0.2655218339992632, + "epoch": 0.3666171061898427, "grad_norm": 0.0, - "learning_rate": 1.7229217016260563e-05, - "loss": 0.9312, + "learning_rate": 1.4623089837086953e-05, + "loss": 1.0703, "step": 9370 }, { - "epoch": 0.2655501714415257, + "epoch": 0.36665623288207216, "grad_norm": 0.0, - "learning_rate": 1.7228582857711756e-05, - "loss": 1.0098, + "learning_rate": 1.4621966122193282e-05, + "loss": 1.1847, "step": 9371 }, { - "epoch": 0.26557850888378814, + "epoch": 0.3666953595743016, "grad_norm": 0.0, - "learning_rate": 1.7227948638273918e-05, - "loss": 0.951, + "learning_rate": 1.4620842333076622e-05, + "loss": 1.1226, "step": 9372 }, { - "epoch": 0.2656068463260506, + "epoch": 0.36673448626653105, "grad_norm": 0.0, - "learning_rate": 1.7227314357952378e-05, - "loss": 0.9563, + "learning_rate": 1.4619718469755029e-05, + "loss": 1.0816, "step": 9373 }, { - "epoch": 0.2656351837683131, + "epoch": 0.3667736129587605, "grad_norm": 0.0, - "learning_rate": 1.722668001675249e-05, - "loss": 0.9566, + "learning_rate": 1.4618594532246544e-05, + "loss": 1.0817, "step": 9374 }, { - "epoch": 0.2656635212105755, + "epoch": 0.3668127396509899, "grad_norm": 0.0, - "learning_rate": 1.722604561467959e-05, - "loss": 0.9408, + "learning_rate": 1.4617470520569219e-05, + "loss": 1.1421, "step": 9375 }, { - "epoch": 0.265691858652838, + "epoch": 0.36685186634321937, "grad_norm": 0.0, - "learning_rate": 1.7225411151739023e-05, - "loss": 1.0768, + "learning_rate": 1.4616346434741104e-05, + "loss": 0.9765, "step": 9376 }, { - "epoch": 0.26572019609510045, + "epoch": 0.3668909930354488, "grad_norm": 0.0, - "learning_rate": 1.7224776627936135e-05, - "loss": 0.9432, + "learning_rate": 1.4615222274780248e-05, + "loss": 1.0686, "step": 9377 }, { - "epoch": 0.26574853353736294, + "epoch": 0.3669301197276782, "grad_norm": 0.0, - "learning_rate": 1.7224142043276273e-05, - "loss": 0.9767, + "learning_rate": 1.4614098040704708e-05, + "loss": 1.0996, "step": 9378 }, { - "epoch": 0.2657768709796254, + "epoch": 0.36696924641990764, "grad_norm": 0.0, - "learning_rate": 1.722350739776478e-05, - "loss": 0.9376, + "learning_rate": 1.4612973732532531e-05, + "loss": 1.1562, "step": 9379 }, { - "epoch": 0.2658052084218878, + "epoch": 0.3670083731121371, "grad_norm": 0.0, - "learning_rate": 1.7222872691407e-05, - "loss": 0.9713, + "learning_rate": 1.4611849350281782e-05, + "loss": 1.1147, "step": 9380 }, { - "epoch": 0.2658335458641503, + "epoch": 0.3670474998043665, "grad_norm": 0.0, - "learning_rate": 1.722223792420828e-05, - "loss": 0.8972, + "learning_rate": 1.4610724893970507e-05, + "loss": 1.161, "step": 9381 }, { - "epoch": 0.26586188330641275, + "epoch": 0.36708662649659596, "grad_norm": 0.0, - "learning_rate": 1.7221603096173974e-05, - "loss": 0.8692, + "learning_rate": 1.460960036361677e-05, + "loss": 1.1871, "step": 9382 }, { - "epoch": 0.26589022074867524, + "epoch": 0.3671257531888254, "grad_norm": 0.0, - "learning_rate": 1.7220968207309416e-05, - "loss": 0.9036, + "learning_rate": 1.4608475759238625e-05, + "loss": 1.1493, "step": 9383 }, { - "epoch": 0.2659185581909377, + "epoch": 0.36716487988105484, "grad_norm": 0.0, - "learning_rate": 1.7220333257619967e-05, - "loss": 0.8754, + "learning_rate": 1.4607351080854138e-05, + "loss": 1.0945, "step": 9384 }, { - "epoch": 0.2659468956332001, + "epoch": 0.3672040065732843, "grad_norm": 0.0, - "learning_rate": 1.7219698247110964e-05, - "loss": 1.0059, + "learning_rate": 1.4606226328481361e-05, + "loss": 1.2526, "step": 9385 }, { - "epoch": 0.2659752330754626, + "epoch": 0.3672431332655137, "grad_norm": 0.0, - "learning_rate": 1.7219063175787768e-05, - "loss": 1.0238, + "learning_rate": 1.4605101502138363e-05, + "loss": 1.211, "step": 9386 }, { - "epoch": 0.26600357051772505, + "epoch": 0.36728225995774316, "grad_norm": 0.0, - "learning_rate": 1.721842804365572e-05, - "loss": 0.8687, + "learning_rate": 1.4603976601843208e-05, + "loss": 1.0414, "step": 9387 }, { - "epoch": 0.26603190795998755, + "epoch": 0.3673213866499726, "grad_norm": 0.0, - "learning_rate": 1.721779285072017e-05, - "loss": 0.9719, + "learning_rate": 1.4602851627613954e-05, + "loss": 1.0137, "step": 9388 }, { - "epoch": 0.26606024540225, + "epoch": 0.36736051334220204, "grad_norm": 0.0, - "learning_rate": 1.7217157596986474e-05, - "loss": 1.0893, + "learning_rate": 1.4601726579468674e-05, + "loss": 1.0771, "step": 9389 }, { - "epoch": 0.2660885828445125, + "epoch": 0.3673996400344315, "grad_norm": 0.0, - "learning_rate": 1.7216522282459976e-05, - "loss": 0.8737, + "learning_rate": 1.460060145742543e-05, + "loss": 1.0842, "step": 9390 }, { - "epoch": 0.2661169202867749, + "epoch": 0.3674387667266609, "grad_norm": 0.0, - "learning_rate": 1.7215886907146033e-05, - "loss": 1.0321, + "learning_rate": 1.4599476261502292e-05, + "loss": 0.9964, "step": 9391 }, { - "epoch": 0.26614525772903735, + "epoch": 0.36747789341889037, "grad_norm": 0.0, - "learning_rate": 1.7215251471049994e-05, - "loss": 0.9886, + "learning_rate": 1.4598350991717329e-05, + "loss": 1.0392, "step": 9392 }, { - "epoch": 0.26617359517129985, + "epoch": 0.3675170201111198, "grad_norm": 0.0, - "learning_rate": 1.7214615974177215e-05, - "loss": 0.9101, + "learning_rate": 1.459722564808861e-05, + "loss": 1.0535, "step": 9393 }, { - "epoch": 0.2662019326135623, + "epoch": 0.36755614680334925, "grad_norm": 0.0, - "learning_rate": 1.7213980416533043e-05, - "loss": 1.068, + "learning_rate": 1.4596100230634208e-05, + "loss": 1.1033, "step": 9394 }, { - "epoch": 0.2662302700558248, + "epoch": 0.3675952734955787, "grad_norm": 0.0, - "learning_rate": 1.7213344798122836e-05, - "loss": 1.0865, + "learning_rate": 1.4594974739372196e-05, + "loss": 1.0386, "step": 9395 }, { - "epoch": 0.2662586074980872, + "epoch": 0.36763440018780813, "grad_norm": 0.0, - "learning_rate": 1.721270911895195e-05, - "loss": 0.9173, + "learning_rate": 1.4593849174320648e-05, + "loss": 0.8586, "step": 9396 }, { - "epoch": 0.26628694494034966, + "epoch": 0.36767352688003757, "grad_norm": 0.0, - "learning_rate": 1.7212073379025733e-05, - "loss": 1.0283, + "learning_rate": 1.4592723535497637e-05, + "loss": 1.0694, "step": 9397 }, { - "epoch": 0.26631528238261215, + "epoch": 0.367712653572267, "grad_norm": 0.0, - "learning_rate": 1.7211437578349547e-05, - "loss": 1.0171, + "learning_rate": 1.4591597822921242e-05, + "loss": 1.121, "step": 9398 }, { - "epoch": 0.2663436198248746, + "epoch": 0.36775178026449645, "grad_norm": 0.0, - "learning_rate": 1.721080171692874e-05, - "loss": 0.9847, + "learning_rate": 1.459047203660954e-05, + "loss": 1.0996, "step": 9399 }, { - "epoch": 0.2663719572671371, + "epoch": 0.3677909069567259, "grad_norm": 0.0, - "learning_rate": 1.7210165794768678e-05, - "loss": 0.8907, + "learning_rate": 1.4589346176580608e-05, + "loss": 1.0986, "step": 9400 }, { - "epoch": 0.2664002947093995, + "epoch": 0.36783003364895533, "grad_norm": 0.0, - "learning_rate": 1.720952981187471e-05, - "loss": 1.0629, + "learning_rate": 1.4588220242852527e-05, + "loss": 1.1371, "step": 9401 }, { - "epoch": 0.266428632151662, + "epoch": 0.3678691603411848, "grad_norm": 0.0, - "learning_rate": 1.7208893768252193e-05, - "loss": 0.9447, + "learning_rate": 1.458709423544338e-05, + "loss": 1.0378, "step": 9402 }, { - "epoch": 0.26645696959392445, + "epoch": 0.3679082870334142, "grad_norm": 0.0, - "learning_rate": 1.720825766390649e-05, - "loss": 0.9008, + "learning_rate": 1.4585968154371246e-05, + "loss": 1.1302, "step": 9403 }, { - "epoch": 0.2664853070361869, + "epoch": 0.36794741372564366, "grad_norm": 0.0, - "learning_rate": 1.7207621498842953e-05, - "loss": 0.9913, + "learning_rate": 1.4584841999654212e-05, + "loss": 1.1503, "step": 9404 }, { - "epoch": 0.2665136444784494, + "epoch": 0.3679865404178731, "grad_norm": 0.0, - "learning_rate": 1.7206985273066944e-05, - "loss": 0.951, + "learning_rate": 1.4583715771310358e-05, + "loss": 1.0845, "step": 9405 }, { - "epoch": 0.2665419819207118, + "epoch": 0.36802566711010254, "grad_norm": 0.0, - "learning_rate": 1.7206348986583824e-05, - "loss": 1.0008, + "learning_rate": 1.4582589469357773e-05, + "loss": 1.1126, "step": 9406 }, { - "epoch": 0.2665703193629743, + "epoch": 0.3680647938023319, "grad_norm": 0.0, - "learning_rate": 1.720571263939895e-05, - "loss": 1.0336, + "learning_rate": 1.4581463093814545e-05, + "loss": 1.1639, "step": 9407 }, { - "epoch": 0.26659865680523676, + "epoch": 0.36810392049456137, "grad_norm": 0.0, - "learning_rate": 1.7205076231517682e-05, - "loss": 0.9999, + "learning_rate": 1.4580336644698758e-05, + "loss": 0.9242, "step": 9408 }, { - "epoch": 0.2666269942474992, + "epoch": 0.3681430471867908, "grad_norm": 0.0, - "learning_rate": 1.7204439762945382e-05, - "loss": 0.8923, + "learning_rate": 1.4579210122028506e-05, + "loss": 1.0787, "step": 9409 }, { - "epoch": 0.2666553316897617, + "epoch": 0.36818217387902025, "grad_norm": 0.0, - "learning_rate": 1.720380323368741e-05, - "loss": 0.8022, + "learning_rate": 1.4578083525821876e-05, + "loss": 1.132, "step": 9410 }, { - "epoch": 0.2666836691320241, + "epoch": 0.3682213005712497, "grad_norm": 0.0, - "learning_rate": 1.7203166643749125e-05, - "loss": 1.0122, + "learning_rate": 1.4576956856096965e-05, + "loss": 1.059, "step": 9411 }, { - "epoch": 0.2667120065742866, + "epoch": 0.36826042726347913, "grad_norm": 0.0, - "learning_rate": 1.7202529993135898e-05, - "loss": 1.092, + "learning_rate": 1.4575830112871855e-05, + "loss": 1.1376, "step": 9412 }, { - "epoch": 0.26674034401654906, + "epoch": 0.36829955395570857, "grad_norm": 0.0, - "learning_rate": 1.7201893281853083e-05, - "loss": 0.9166, + "learning_rate": 1.4574703296164654e-05, + "loss": 1.1312, "step": 9413 }, { - "epoch": 0.26676868145881155, + "epoch": 0.368338680647938, "grad_norm": 0.0, - "learning_rate": 1.720125650990605e-05, - "loss": 0.9524, + "learning_rate": 1.4573576405993449e-05, + "loss": 1.0836, "step": 9414 }, { - "epoch": 0.266797018901074, + "epoch": 0.36837780734016745, "grad_norm": 0.0, - "learning_rate": 1.7200619677300153e-05, - "loss": 1.0052, + "learning_rate": 1.4572449442376337e-05, + "loss": 1.076, "step": 9415 }, { - "epoch": 0.26682535634333643, + "epoch": 0.3684169340323969, "grad_norm": 0.0, - "learning_rate": 1.7199982784040766e-05, - "loss": 1.0398, + "learning_rate": 1.4571322405331416e-05, + "loss": 1.0403, "step": 9416 }, { - "epoch": 0.2668536937855989, + "epoch": 0.36845606072462633, "grad_norm": 0.0, - "learning_rate": 1.719934583013325e-05, - "loss": 1.0555, + "learning_rate": 1.4570195294876785e-05, + "loss": 0.9989, "step": 9417 }, { - "epoch": 0.26688203122786136, + "epoch": 0.3684951874168558, "grad_norm": 0.0, - "learning_rate": 1.7198708815582973e-05, - "loss": 0.9152, + "learning_rate": 1.4569068111030546e-05, + "loss": 1.1409, "step": 9418 }, { - "epoch": 0.26691036867012385, + "epoch": 0.3685343141090852, "grad_norm": 0.0, - "learning_rate": 1.71980717403953e-05, - "loss": 0.9156, + "learning_rate": 1.4567940853810797e-05, + "loss": 1.0616, "step": 9419 }, { - "epoch": 0.2669387061123863, + "epoch": 0.36857344080131466, "grad_norm": 0.0, - "learning_rate": 1.7197434604575592e-05, - "loss": 0.9926, + "learning_rate": 1.4566813523235643e-05, + "loss": 1.1342, "step": 9420 }, { - "epoch": 0.26696704355464873, + "epoch": 0.3686125674935441, "grad_norm": 0.0, - "learning_rate": 1.719679740812922e-05, - "loss": 0.9199, + "learning_rate": 1.4565686119323187e-05, + "loss": 0.9397, "step": 9421 }, { - "epoch": 0.2669953809969112, + "epoch": 0.36865169418577354, "grad_norm": 0.0, - "learning_rate": 1.719616015106155e-05, - "loss": 0.9444, + "learning_rate": 1.4564558642091533e-05, + "loss": 1.0905, "step": 9422 }, { - "epoch": 0.26702371843917366, + "epoch": 0.368690820878003, "grad_norm": 0.0, - "learning_rate": 1.7195522833377955e-05, - "loss": 0.9874, + "learning_rate": 1.4563431091558785e-05, + "loss": 1.1898, "step": 9423 }, { - "epoch": 0.26705205588143616, + "epoch": 0.3687299475702324, "grad_norm": 0.0, - "learning_rate": 1.7194885455083795e-05, - "loss": 1.1551, + "learning_rate": 1.4562303467743053e-05, + "loss": 1.1258, "step": 9424 }, { - "epoch": 0.2670803933236986, + "epoch": 0.36876907426246186, "grad_norm": 0.0, - "learning_rate": 1.719424801618444e-05, - "loss": 0.8634, + "learning_rate": 1.4561175770662446e-05, + "loss": 1.2086, "step": 9425 }, { - "epoch": 0.2671087307659611, + "epoch": 0.3688082009546913, "grad_norm": 0.0, - "learning_rate": 1.719361051668527e-05, - "loss": 0.9666, + "learning_rate": 1.4560048000335067e-05, + "loss": 1.0717, "step": 9426 }, { - "epoch": 0.2671370682082235, + "epoch": 0.36884732764692074, "grad_norm": 0.0, - "learning_rate": 1.7192972956591644e-05, - "loss": 0.9467, + "learning_rate": 1.4558920156779034e-05, + "loss": 1.1142, "step": 9427 }, { - "epoch": 0.26716540565048597, + "epoch": 0.3688864543391502, "grad_norm": 0.0, - "learning_rate": 1.7192335335908937e-05, - "loss": 1.0577, + "learning_rate": 1.4557792240012457e-05, + "loss": 1.1863, "step": 9428 }, { - "epoch": 0.26719374309274846, + "epoch": 0.3689255810313796, "grad_norm": 0.0, - "learning_rate": 1.7191697654642517e-05, - "loss": 1.0219, + "learning_rate": 1.4556664250053447e-05, + "loss": 1.1643, "step": 9429 }, { - "epoch": 0.2672220805350109, + "epoch": 0.36896470772360906, "grad_norm": 0.0, - "learning_rate": 1.7191059912797758e-05, - "loss": 0.9777, + "learning_rate": 1.4555536186920115e-05, + "loss": 1.1042, "step": 9430 }, { - "epoch": 0.2672504179772734, + "epoch": 0.3690038344158385, "grad_norm": 0.0, - "learning_rate": 1.719042211038003e-05, - "loss": 1.0252, + "learning_rate": 1.4554408050630589e-05, + "loss": 1.1219, "step": 9431 }, { - "epoch": 0.26727875541953583, + "epoch": 0.36904296110806795, "grad_norm": 0.0, - "learning_rate": 1.7189784247394707e-05, - "loss": 1.0773, + "learning_rate": 1.455327984120297e-05, + "loss": 1.083, "step": 9432 }, { - "epoch": 0.26730709286179827, + "epoch": 0.3690820878002974, "grad_norm": 0.0, - "learning_rate": 1.718914632384716e-05, - "loss": 0.9799, + "learning_rate": 1.4552151558655383e-05, + "loss": 1.1724, "step": 9433 }, { - "epoch": 0.26733543030406076, + "epoch": 0.36912121449252683, "grad_norm": 0.0, - "learning_rate": 1.7188508339742765e-05, - "loss": 0.9401, + "learning_rate": 1.4551023203005947e-05, + "loss": 1.1729, "step": 9434 }, { - "epoch": 0.2673637677463232, + "epoch": 0.3691603411847562, "grad_norm": 0.0, - "learning_rate": 1.71878702950869e-05, - "loss": 0.8486, + "learning_rate": 1.4549894774272781e-05, + "loss": 1.1362, "step": 9435 }, { - "epoch": 0.2673921051885857, + "epoch": 0.36919946787698565, "grad_norm": 0.0, - "learning_rate": 1.7187232189884927e-05, - "loss": 1.096, + "learning_rate": 1.4548766272474008e-05, + "loss": 1.0878, "step": 9436 }, { - "epoch": 0.26742044263084813, + "epoch": 0.3692385945692151, "grad_norm": 0.0, - "learning_rate": 1.718659402414223e-05, - "loss": 0.9595, + "learning_rate": 1.4547637697627747e-05, + "loss": 1.1105, "step": 9437 }, { - "epoch": 0.2674487800731106, + "epoch": 0.36927772126144454, "grad_norm": 0.0, - "learning_rate": 1.7185955797864184e-05, - "loss": 0.9888, + "learning_rate": 1.4546509049752122e-05, + "loss": 1.1652, "step": 9438 }, { - "epoch": 0.26747711751537306, + "epoch": 0.369316847953674, "grad_norm": 0.0, - "learning_rate": 1.7185317511056166e-05, - "loss": 0.9029, + "learning_rate": 1.4545380328865261e-05, + "loss": 1.2395, "step": 9439 }, { - "epoch": 0.2675054549576355, + "epoch": 0.3693559746459034, "grad_norm": 0.0, - "learning_rate": 1.7184679163723545e-05, - "loss": 0.9526, + "learning_rate": 1.4544251534985288e-05, + "loss": 1.1951, "step": 9440 }, { - "epoch": 0.267533792399898, + "epoch": 0.36939510133813286, "grad_norm": 0.0, - "learning_rate": 1.718404075587171e-05, - "loss": 1.0122, + "learning_rate": 1.4543122668130327e-05, + "loss": 1.1271, "step": 9441 }, { - "epoch": 0.26756212984216043, + "epoch": 0.3694342280303623, "grad_norm": 0.0, - "learning_rate": 1.7183402287506026e-05, - "loss": 1.0854, + "learning_rate": 1.4541993728318511e-05, + "loss": 1.0656, "step": 9442 }, { - "epoch": 0.26759046728442293, + "epoch": 0.36947335472259174, "grad_norm": 0.0, - "learning_rate": 1.718276375863188e-05, - "loss": 1.0485, + "learning_rate": 1.4540864715567967e-05, + "loss": 1.1511, "step": 9443 }, { - "epoch": 0.26761880472668537, + "epoch": 0.3695124814148212, "grad_norm": 0.0, - "learning_rate": 1.7182125169254646e-05, - "loss": 0.9666, + "learning_rate": 1.4539735629896824e-05, + "loss": 1.2549, "step": 9444 }, { - "epoch": 0.2676471421689478, + "epoch": 0.3695516081070506, "grad_norm": 0.0, - "learning_rate": 1.7181486519379705e-05, - "loss": 1.0159, + "learning_rate": 1.4538606471323218e-05, + "loss": 1.0404, "step": 9445 }, { - "epoch": 0.2676754796112103, + "epoch": 0.36959073479928006, "grad_norm": 0.0, - "learning_rate": 1.718084780901244e-05, - "loss": 0.9731, + "learning_rate": 1.4537477239865275e-05, + "loss": 1.0913, "step": 9446 }, { - "epoch": 0.26770381705347274, + "epoch": 0.3696298614915095, "grad_norm": 0.0, - "learning_rate": 1.7180209038158224e-05, - "loss": 0.9944, + "learning_rate": 1.4536347935541138e-05, + "loss": 1.1384, "step": 9447 }, { - "epoch": 0.26773215449573523, + "epoch": 0.36966898818373894, "grad_norm": 0.0, - "learning_rate": 1.7179570206822442e-05, - "loss": 0.9666, + "learning_rate": 1.4535218558368934e-05, + "loss": 0.9851, "step": 9448 }, { - "epoch": 0.26776049193799767, + "epoch": 0.3697081148759684, "grad_norm": 0.0, - "learning_rate": 1.7178931315010473e-05, - "loss": 0.9388, + "learning_rate": 1.4534089108366805e-05, + "loss": 1.0546, "step": 9449 }, { - "epoch": 0.26778882938026016, + "epoch": 0.3697472415681978, "grad_norm": 0.0, - "learning_rate": 1.71782923627277e-05, - "loss": 0.8481, + "learning_rate": 1.4532959585552885e-05, + "loss": 1.194, "step": 9450 }, { - "epoch": 0.2678171668225226, + "epoch": 0.36978636826042727, "grad_norm": 0.0, - "learning_rate": 1.7177653349979504e-05, - "loss": 0.8249, + "learning_rate": 1.4531829989945315e-05, + "loss": 1.0771, "step": 9451 }, { - "epoch": 0.26784550426478504, + "epoch": 0.3698254949526567, "grad_norm": 0.0, - "learning_rate": 1.717701427677127e-05, - "loss": 0.9862, + "learning_rate": 1.453070032156223e-05, + "loss": 1.0628, "step": 9452 }, { - "epoch": 0.26787384170704753, + "epoch": 0.36986462164488615, "grad_norm": 0.0, - "learning_rate": 1.717637514310838e-05, - "loss": 0.957, + "learning_rate": 1.4529570580421782e-05, + "loss": 1.1534, "step": 9453 }, { - "epoch": 0.26790217914930997, + "epoch": 0.3699037483371156, "grad_norm": 0.0, - "learning_rate": 1.7175735948996213e-05, - "loss": 1.0002, + "learning_rate": 1.4528440766542104e-05, + "loss": 1.1212, "step": 9454 }, { - "epoch": 0.26793051659157247, + "epoch": 0.36994287502934503, "grad_norm": 0.0, - "learning_rate": 1.7175096694440165e-05, - "loss": 0.9866, + "learning_rate": 1.4527310879941339e-05, + "loss": 1.1991, "step": 9455 }, { - "epoch": 0.2679588540338349, + "epoch": 0.36998200172157447, "grad_norm": 0.0, - "learning_rate": 1.717445737944561e-05, - "loss": 1.0296, + "learning_rate": 1.4526180920637637e-05, + "loss": 1.1371, "step": 9456 }, { - "epoch": 0.26798719147609734, + "epoch": 0.3700211284138039, "grad_norm": 0.0, - "learning_rate": 1.7173818004017935e-05, - "loss": 0.9084, + "learning_rate": 1.4525050888649139e-05, + "loss": 1.0954, "step": 9457 }, { - "epoch": 0.26801552891835984, + "epoch": 0.37006025510603335, "grad_norm": 0.0, - "learning_rate": 1.7173178568162525e-05, - "loss": 1.0117, + "learning_rate": 1.4523920783993997e-05, + "loss": 1.0031, "step": 9458 }, { - "epoch": 0.2680438663606223, + "epoch": 0.3700993817982628, "grad_norm": 0.0, - "learning_rate": 1.717253907188477e-05, - "loss": 0.9811, + "learning_rate": 1.4522790606690354e-05, + "loss": 1.0662, "step": 9459 }, { - "epoch": 0.26807220380288477, + "epoch": 0.37013850849049224, "grad_norm": 0.0, - "learning_rate": 1.7171899515190058e-05, - "loss": 1.0333, + "learning_rate": 1.4521660356756361e-05, + "loss": 1.1301, "step": 9460 }, { - "epoch": 0.2681005412451472, + "epoch": 0.3701776351827217, "grad_norm": 0.0, - "learning_rate": 1.717125989808377e-05, - "loss": 0.9259, + "learning_rate": 1.4520530034210168e-05, + "loss": 1.0678, "step": 9461 }, { - "epoch": 0.2681288786874097, + "epoch": 0.3702167618749511, "grad_norm": 0.0, - "learning_rate": 1.7170620220571295e-05, - "loss": 0.9165, + "learning_rate": 1.4519399639069929e-05, + "loss": 1.0608, "step": 9462 }, { - "epoch": 0.26815721612967214, + "epoch": 0.37025588856718056, "grad_norm": 0.0, - "learning_rate": 1.7169980482658027e-05, - "loss": 0.9403, + "learning_rate": 1.4518269171353796e-05, + "loss": 1.0614, "step": 9463 }, { - "epoch": 0.2681855535719346, + "epoch": 0.37029501525940994, "grad_norm": 0.0, - "learning_rate": 1.716934068434935e-05, - "loss": 1.0125, + "learning_rate": 1.451713863107992e-05, + "loss": 1.0521, "step": 9464 }, { - "epoch": 0.26821389101419707, + "epoch": 0.3703341419516394, "grad_norm": 0.0, - "learning_rate": 1.7168700825650655e-05, - "loss": 1.0229, + "learning_rate": 1.4516008018266457e-05, + "loss": 1.1377, "step": 9465 }, { - "epoch": 0.2682422284564595, + "epoch": 0.3703732686438688, "grad_norm": 0.0, - "learning_rate": 1.716806090656733e-05, - "loss": 0.9619, + "learning_rate": 1.4514877332931564e-05, + "loss": 1.1707, "step": 9466 }, { - "epoch": 0.268270565898722, + "epoch": 0.37041239533609827, "grad_norm": 0.0, - "learning_rate": 1.7167420927104766e-05, - "loss": 0.897, + "learning_rate": 1.4513746575093401e-05, + "loss": 1.2516, "step": 9467 }, { - "epoch": 0.26829890334098444, + "epoch": 0.3704515220283277, "grad_norm": 0.0, - "learning_rate": 1.7166780887268352e-05, - "loss": 0.9409, + "learning_rate": 1.4512615744770122e-05, + "loss": 1.1129, "step": 9468 }, { - "epoch": 0.2683272407832469, + "epoch": 0.37049064872055715, "grad_norm": 0.0, - "learning_rate": 1.7166140787063486e-05, - "loss": 0.9828, + "learning_rate": 1.451148484197989e-05, + "loss": 1.071, "step": 9469 }, { - "epoch": 0.2683555782255094, + "epoch": 0.3705297754127866, "grad_norm": 0.0, - "learning_rate": 1.716550062649555e-05, - "loss": 0.9928, + "learning_rate": 1.4510353866740863e-05, + "loss": 1.1836, "step": 9470 }, { - "epoch": 0.2683839156677718, + "epoch": 0.37056890210501603, "grad_norm": 0.0, - "learning_rate": 1.7164860405569946e-05, - "loss": 1.1304, + "learning_rate": 1.4509222819071207e-05, + "loss": 1.0577, "step": 9471 }, { - "epoch": 0.2684122531100343, + "epoch": 0.37060802879724547, "grad_norm": 0.0, - "learning_rate": 1.7164220124292058e-05, - "loss": 0.9569, + "learning_rate": 1.4508091698989079e-05, + "loss": 1.0773, "step": 9472 }, { - "epoch": 0.26844059055229674, + "epoch": 0.3706471554894749, "grad_norm": 0.0, - "learning_rate": 1.7163579782667285e-05, - "loss": 0.988, + "learning_rate": 1.4506960506512652e-05, + "loss": 1.0572, "step": 9473 }, { - "epoch": 0.26846892799455924, + "epoch": 0.37068628218170435, "grad_norm": 0.0, - "learning_rate": 1.716293938070102e-05, - "loss": 0.968, + "learning_rate": 1.4505829241660086e-05, + "loss": 1.1292, "step": 9474 }, { - "epoch": 0.2684972654368217, + "epoch": 0.3707254088739338, "grad_norm": 0.0, - "learning_rate": 1.7162298918398656e-05, - "loss": 0.8682, + "learning_rate": 1.4504697904449545e-05, + "loss": 1.1735, "step": 9475 }, { - "epoch": 0.2685256028790841, + "epoch": 0.37076453556616323, "grad_norm": 0.0, - "learning_rate": 1.7161658395765588e-05, - "loss": 1.0972, + "learning_rate": 1.4503566494899204e-05, + "loss": 1.0589, "step": 9476 }, { - "epoch": 0.2685539403213466, + "epoch": 0.3708036622583927, "grad_norm": 0.0, - "learning_rate": 1.7161017812807213e-05, - "loss": 1.002, + "learning_rate": 1.4502435013027225e-05, + "loss": 1.1337, "step": 9477 }, { - "epoch": 0.26858227776360905, + "epoch": 0.3708427889506221, "grad_norm": 0.0, - "learning_rate": 1.7160377169528928e-05, - "loss": 0.9649, + "learning_rate": 1.4501303458851786e-05, + "loss": 1.1386, "step": 9478 }, { - "epoch": 0.26861061520587154, + "epoch": 0.37088191564285156, "grad_norm": 0.0, - "learning_rate": 1.7159736465936124e-05, - "loss": 0.8822, + "learning_rate": 1.4500171832391052e-05, + "loss": 1.1215, "step": 9479 }, { - "epoch": 0.268638952648134, + "epoch": 0.370921042335081, "grad_norm": 0.0, - "learning_rate": 1.71590957020342e-05, - "loss": 0.9462, + "learning_rate": 1.4499040133663194e-05, + "loss": 1.1665, "step": 9480 }, { - "epoch": 0.2686672900903964, + "epoch": 0.37096016902731044, "grad_norm": 0.0, - "learning_rate": 1.7158454877828557e-05, - "loss": 0.9873, + "learning_rate": 1.4497908362686393e-05, + "loss": 1.0652, "step": 9481 }, { - "epoch": 0.2686956275326589, + "epoch": 0.3709992957195399, "grad_norm": 0.0, - "learning_rate": 1.715781399332459e-05, - "loss": 0.9334, + "learning_rate": 1.449677651947882e-05, + "loss": 1.1036, "step": 9482 }, { - "epoch": 0.26872396497492135, + "epoch": 0.3710384224117693, "grad_norm": 0.0, - "learning_rate": 1.71571730485277e-05, - "loss": 1.0746, + "learning_rate": 1.4495644604058647e-05, + "loss": 0.9378, "step": 9483 }, { - "epoch": 0.26875230241718384, + "epoch": 0.37107754910399876, "grad_norm": 0.0, - "learning_rate": 1.7156532043443278e-05, - "loss": 1.0146, + "learning_rate": 1.4494512616444061e-05, + "loss": 1.0621, "step": 9484 }, { - "epoch": 0.2687806398594463, + "epoch": 0.3711166757962282, "grad_norm": 0.0, - "learning_rate": 1.7155890978076732e-05, - "loss": 0.9005, + "learning_rate": 1.4493380556653232e-05, + "loss": 1.1242, "step": 9485 }, { - "epoch": 0.2688089773017088, + "epoch": 0.37115580248845764, "grad_norm": 0.0, - "learning_rate": 1.715524985243346e-05, - "loss": 0.9272, + "learning_rate": 1.4492248424704339e-05, + "loss": 1.2365, "step": 9486 }, { - "epoch": 0.2688373147439712, + "epoch": 0.3711949291806871, "grad_norm": 0.0, - "learning_rate": 1.715460866651886e-05, - "loss": 0.9943, + "learning_rate": 1.4491116220615569e-05, + "loss": 1.0633, "step": 9487 }, { - "epoch": 0.26886565218623365, + "epoch": 0.3712340558729165, "grad_norm": 0.0, - "learning_rate": 1.7153967420338337e-05, - "loss": 1.038, + "learning_rate": 1.4489983944405096e-05, + "loss": 1.0038, "step": 9488 }, { - "epoch": 0.26889398962849614, + "epoch": 0.37127318256514596, "grad_norm": 0.0, - "learning_rate": 1.7153326113897286e-05, - "loss": 1.0255, + "learning_rate": 1.4488851596091109e-05, + "loss": 1.1395, "step": 9489 }, { - "epoch": 0.2689223270707586, + "epoch": 0.3713123092573754, "grad_norm": 0.0, - "learning_rate": 1.7152684747201114e-05, - "loss": 0.9666, + "learning_rate": 1.448771917569179e-05, + "loss": 1.1509, "step": 9490 }, { - "epoch": 0.2689506645130211, + "epoch": 0.37135143594960485, "grad_norm": 0.0, - "learning_rate": 1.7152043320255224e-05, - "loss": 1.0257, + "learning_rate": 1.4486586683225325e-05, + "loss": 1.2066, "step": 9491 }, { - "epoch": 0.2689790019552835, + "epoch": 0.37139056264183423, "grad_norm": 0.0, - "learning_rate": 1.7151401833065014e-05, - "loss": 0.9765, + "learning_rate": 1.4485454118709904e-05, + "loss": 1.0814, "step": 9492 }, { - "epoch": 0.26900733939754595, + "epoch": 0.3714296893340637, "grad_norm": 0.0, - "learning_rate": 1.7150760285635897e-05, - "loss": 0.9992, + "learning_rate": 1.4484321482163704e-05, + "loss": 1.0841, "step": 9493 }, { - "epoch": 0.26903567683980845, + "epoch": 0.3714688160262931, "grad_norm": 0.0, - "learning_rate": 1.7150118677973265e-05, - "loss": 1.0162, + "learning_rate": 1.4483188773604924e-05, + "loss": 1.1588, "step": 9494 }, { - "epoch": 0.2690640142820709, + "epoch": 0.37150794271852255, "grad_norm": 0.0, - "learning_rate": 1.714947701008253e-05, - "loss": 1.0544, + "learning_rate": 1.4482055993051748e-05, + "loss": 1.0389, "step": 9495 }, { - "epoch": 0.2690923517243334, + "epoch": 0.371547069410752, "grad_norm": 0.0, - "learning_rate": 1.7148835281969093e-05, - "loss": 0.9689, + "learning_rate": 1.448092314052237e-05, + "loss": 1.0079, "step": 9496 }, { - "epoch": 0.2691206891665958, + "epoch": 0.37158619610298144, "grad_norm": 0.0, - "learning_rate": 1.7148193493638364e-05, - "loss": 0.9401, + "learning_rate": 1.447979021603498e-05, + "loss": 1.053, "step": 9497 }, { - "epoch": 0.2691490266088583, + "epoch": 0.3716253227952109, "grad_norm": 0.0, - "learning_rate": 1.7147551645095747e-05, - "loss": 0.9802, + "learning_rate": 1.4478657219607772e-05, + "loss": 1.0859, "step": 9498 }, { - "epoch": 0.26917736405112075, + "epoch": 0.3716644494874403, "grad_norm": 0.0, - "learning_rate": 1.714690973634665e-05, - "loss": 0.949, + "learning_rate": 1.4477524151258941e-05, + "loss": 1.0576, "step": 9499 }, { - "epoch": 0.2692057014933832, + "epoch": 0.37170357617966976, "grad_norm": 0.0, - "learning_rate": 1.7146267767396477e-05, - "loss": 1.1245, + "learning_rate": 1.4476391011006688e-05, + "loss": 0.9842, "step": 9500 }, { - "epoch": 0.2692340389356457, + "epoch": 0.3717427028718992, "grad_norm": 0.0, - "learning_rate": 1.7145625738250636e-05, - "loss": 1.1373, + "learning_rate": 1.44752577988692e-05, + "loss": 1.097, "step": 9501 }, { - "epoch": 0.2692623763779081, + "epoch": 0.37178182956412864, "grad_norm": 0.0, - "learning_rate": 1.7144983648914536e-05, - "loss": 0.9884, + "learning_rate": 1.447412451486468e-05, + "loss": 0.9954, "step": 9502 }, { - "epoch": 0.2692907138201706, + "epoch": 0.3718209562563581, "grad_norm": 0.0, - "learning_rate": 1.7144341499393587e-05, - "loss": 0.946, + "learning_rate": 1.4472991159011329e-05, + "loss": 1.1754, "step": 9503 }, { - "epoch": 0.26931905126243305, + "epoch": 0.3718600829485875, "grad_norm": 0.0, - "learning_rate": 1.7143699289693193e-05, - "loss": 0.9584, + "learning_rate": 1.447185773132734e-05, + "loss": 1.028, "step": 9504 }, { - "epoch": 0.2693473887046955, + "epoch": 0.37189920964081696, "grad_norm": 0.0, - "learning_rate": 1.7143057019818773e-05, - "loss": 1.0079, + "learning_rate": 1.4470724231830926e-05, + "loss": 1.0907, "step": 9505 }, { - "epoch": 0.269375726146958, + "epoch": 0.3719383363330464, "grad_norm": 0.0, - "learning_rate": 1.7142414689775723e-05, - "loss": 0.9438, + "learning_rate": 1.4469590660540277e-05, + "loss": 1.1167, "step": 9506 }, { - "epoch": 0.2694040635892204, + "epoch": 0.37197746302527585, "grad_norm": 0.0, - "learning_rate": 1.7141772299569467e-05, - "loss": 0.9286, + "learning_rate": 1.4468457017473606e-05, + "loss": 1.0585, "step": 9507 }, { - "epoch": 0.2694324010314829, + "epoch": 0.3720165897175053, "grad_norm": 0.0, - "learning_rate": 1.7141129849205414e-05, - "loss": 1.0405, + "learning_rate": 1.4467323302649115e-05, + "loss": 1.148, "step": 9508 }, { - "epoch": 0.26946073847374535, + "epoch": 0.3720557164097347, "grad_norm": 0.0, - "learning_rate": 1.7140487338688967e-05, - "loss": 0.9719, + "learning_rate": 1.446618951608501e-05, + "loss": 1.0995, "step": 9509 }, { - "epoch": 0.26948907591600785, + "epoch": 0.37209484310196417, "grad_norm": 0.0, - "learning_rate": 1.7139844768025544e-05, - "loss": 1.05, + "learning_rate": 1.4465055657799493e-05, + "loss": 1.1115, "step": 9510 }, { - "epoch": 0.2695174133582703, + "epoch": 0.3721339697941936, "grad_norm": 0.0, - "learning_rate": 1.713920213722056e-05, - "loss": 1.0273, + "learning_rate": 1.4463921727810783e-05, + "loss": 1.1001, "step": 9511 }, { - "epoch": 0.2695457508005327, + "epoch": 0.37217309648642305, "grad_norm": 0.0, - "learning_rate": 1.7138559446279424e-05, - "loss": 1.0483, + "learning_rate": 1.4462787726137083e-05, + "loss": 1.1639, "step": 9512 }, { - "epoch": 0.2695740882427952, + "epoch": 0.3722122231786525, "grad_norm": 0.0, - "learning_rate": 1.713791669520755e-05, - "loss": 1.0023, + "learning_rate": 1.44616536527966e-05, + "loss": 1.1857, "step": 9513 }, { - "epoch": 0.26960242568505766, + "epoch": 0.37225134987088193, "grad_norm": 0.0, - "learning_rate": 1.7137273884010356e-05, - "loss": 1.0277, + "learning_rate": 1.4460519507807553e-05, + "loss": 1.2017, "step": 9514 }, { - "epoch": 0.26963076312732015, + "epoch": 0.37229047656311137, "grad_norm": 0.0, - "learning_rate": 1.713663101269325e-05, - "loss": 0.9348, + "learning_rate": 1.445938529118815e-05, + "loss": 1.1929, "step": 9515 }, { - "epoch": 0.2696591005695826, + "epoch": 0.3723296032553408, "grad_norm": 0.0, - "learning_rate": 1.7135988081261655e-05, - "loss": 1.0546, + "learning_rate": 1.4458251002956612e-05, + "loss": 1.0864, "step": 9516 }, { - "epoch": 0.269687438011845, + "epoch": 0.37236872994757025, "grad_norm": 0.0, - "learning_rate": 1.713534508972098e-05, - "loss": 0.9258, + "learning_rate": 1.4457116643131144e-05, + "loss": 1.1258, "step": 9517 }, { - "epoch": 0.2697157754541075, + "epoch": 0.3724078566397997, "grad_norm": 0.0, - "learning_rate": 1.7134702038076644e-05, - "loss": 1.0219, + "learning_rate": 1.445598221172997e-05, + "loss": 1.1907, "step": 9518 }, { - "epoch": 0.26974411289636996, + "epoch": 0.37244698333202914, "grad_norm": 0.0, - "learning_rate": 1.7134058926334063e-05, - "loss": 0.8467, + "learning_rate": 1.4454847708771305e-05, + "loss": 0.9881, "step": 9519 }, { - "epoch": 0.26977245033863245, + "epoch": 0.3724861100242586, "grad_norm": 0.0, - "learning_rate": 1.7133415754498655e-05, - "loss": 1.035, + "learning_rate": 1.4453713134273368e-05, + "loss": 1.1394, "step": 9520 }, { - "epoch": 0.2698007877808949, + "epoch": 0.37252523671648796, "grad_norm": 0.0, - "learning_rate": 1.7132772522575835e-05, - "loss": 0.8332, + "learning_rate": 1.4452578488254381e-05, + "loss": 1.0961, "step": 9521 }, { - "epoch": 0.2698291252231574, + "epoch": 0.3725643634087174, "grad_norm": 0.0, - "learning_rate": 1.7132129230571022e-05, - "loss": 1.0117, + "learning_rate": 1.4451443770732561e-05, + "loss": 1.2518, "step": 9522 }, { - "epoch": 0.2698574626654198, + "epoch": 0.37260349010094684, "grad_norm": 0.0, - "learning_rate": 1.7131485878489643e-05, - "loss": 1.0053, + "learning_rate": 1.4450308981726134e-05, + "loss": 1.2009, "step": 9523 }, { - "epoch": 0.26988580010768226, + "epoch": 0.3726426167931763, "grad_norm": 0.0, - "learning_rate": 1.7130842466337106e-05, - "loss": 0.8723, + "learning_rate": 1.4449174121253315e-05, + "loss": 0.9607, "step": 9524 }, { - "epoch": 0.26991413754994475, + "epoch": 0.3726817434854057, "grad_norm": 0.0, - "learning_rate": 1.7130198994118835e-05, - "loss": 0.9354, + "learning_rate": 1.4448039189332341e-05, + "loss": 1.2163, "step": 9525 }, { - "epoch": 0.2699424749922072, + "epoch": 0.37272087017763517, "grad_norm": 0.0, - "learning_rate": 1.7129555461840252e-05, - "loss": 0.9529, + "learning_rate": 1.4446904185981432e-05, + "loss": 1.0573, "step": 9526 }, { - "epoch": 0.2699708124344697, + "epoch": 0.3727599968698646, "grad_norm": 0.0, - "learning_rate": 1.7128911869506772e-05, - "loss": 0.9455, + "learning_rate": 1.444576911121881e-05, + "loss": 1.1014, "step": 9527 }, { - "epoch": 0.2699991498767321, + "epoch": 0.37279912356209405, "grad_norm": 0.0, - "learning_rate": 1.7128268217123824e-05, - "loss": 0.921, + "learning_rate": 1.4444633965062708e-05, + "loss": 1.1663, "step": 9528 }, { - "epoch": 0.27002748731899456, + "epoch": 0.3728382502543235, "grad_norm": 0.0, - "learning_rate": 1.7127624504696824e-05, - "loss": 0.939, + "learning_rate": 1.4443498747531358e-05, + "loss": 1.0684, "step": 9529 }, { - "epoch": 0.27005582476125706, + "epoch": 0.37287737694655293, "grad_norm": 0.0, - "learning_rate": 1.7126980732231196e-05, - "loss": 0.9915, + "learning_rate": 1.444236345864298e-05, + "loss": 1.0335, "step": 9530 }, { - "epoch": 0.2700841622035195, + "epoch": 0.37291650363878237, "grad_norm": 0.0, - "learning_rate": 1.7126336899732363e-05, - "loss": 0.8897, + "learning_rate": 1.4441228098415816e-05, + "loss": 1.0347, "step": 9531 }, { - "epoch": 0.270112499645782, + "epoch": 0.3729556303310118, "grad_norm": 0.0, - "learning_rate": 1.7125693007205745e-05, - "loss": 0.9409, + "learning_rate": 1.4440092666868091e-05, + "loss": 1.08, "step": 9532 }, { - "epoch": 0.2701408370880444, + "epoch": 0.37299475702324125, "grad_norm": 0.0, - "learning_rate": 1.7125049054656773e-05, - "loss": 0.9469, + "learning_rate": 1.4438957164018041e-05, + "loss": 1.1662, "step": 9533 }, { - "epoch": 0.2701691745303069, + "epoch": 0.3730338837154707, "grad_norm": 0.0, - "learning_rate": 1.7124405042090865e-05, - "loss": 1.0512, + "learning_rate": 1.4437821589883905e-05, + "loss": 1.1234, "step": 9534 }, { - "epoch": 0.27019751197256936, + "epoch": 0.37307301040770013, "grad_norm": 0.0, - "learning_rate": 1.712376096951345e-05, - "loss": 1.0356, + "learning_rate": 1.4436685944483912e-05, + "loss": 1.0784, "step": 9535 }, { - "epoch": 0.2702258494148318, + "epoch": 0.3731121370999296, "grad_norm": 0.0, - "learning_rate": 1.712311683692995e-05, - "loss": 0.9327, + "learning_rate": 1.4435550227836304e-05, + "loss": 1.0064, "step": 9536 }, { - "epoch": 0.2702541868570943, + "epoch": 0.373151263792159, "grad_norm": 0.0, - "learning_rate": 1.712247264434579e-05, - "loss": 0.9492, + "learning_rate": 1.4434414439959315e-05, + "loss": 1.0443, "step": 9537 }, { - "epoch": 0.27028252429935673, + "epoch": 0.37319039048438846, "grad_norm": 0.0, - "learning_rate": 1.7121828391766398e-05, - "loss": 1.0402, + "learning_rate": 1.4433278580871188e-05, + "loss": 1.1093, "step": 9538 }, { - "epoch": 0.2703108617416192, + "epoch": 0.3732295171766179, "grad_norm": 0.0, - "learning_rate": 1.7121184079197202e-05, - "loss": 0.9474, + "learning_rate": 1.4432142650590162e-05, + "loss": 1.0176, "step": 9539 }, { - "epoch": 0.27033919918388166, + "epoch": 0.37326864386884734, "grad_norm": 0.0, - "learning_rate": 1.712053970664363e-05, - "loss": 1.0188, + "learning_rate": 1.443100664913448e-05, + "loss": 1.0339, "step": 9540 }, { - "epoch": 0.2703675366261441, + "epoch": 0.3733077705610768, "grad_norm": 0.0, - "learning_rate": 1.7119895274111105e-05, - "loss": 0.9147, + "learning_rate": 1.4429870576522384e-05, + "loss": 1.0093, "step": 9541 }, { - "epoch": 0.2703958740684066, + "epoch": 0.3733468972533062, "grad_norm": 0.0, - "learning_rate": 1.711925078160506e-05, - "loss": 1.0182, + "learning_rate": 1.4428734432772115e-05, + "loss": 1.1757, "step": 9542 }, { - "epoch": 0.27042421151066903, + "epoch": 0.37338602394553566, "grad_norm": 0.0, - "learning_rate": 1.7118606229130922e-05, - "loss": 1.0466, + "learning_rate": 1.4427598217901921e-05, + "loss": 1.0887, "step": 9543 }, { - "epoch": 0.2704525489529315, + "epoch": 0.3734251506377651, "grad_norm": 0.0, - "learning_rate": 1.711796161669412e-05, - "loss": 0.9941, + "learning_rate": 1.442646193193005e-05, + "loss": 1.0834, "step": 9544 }, { - "epoch": 0.27048088639519396, + "epoch": 0.37346427732999454, "grad_norm": 0.0, - "learning_rate": 1.7117316944300082e-05, - "loss": 1.0206, + "learning_rate": 1.4425325574874745e-05, + "loss": 1.2091, "step": 9545 }, { - "epoch": 0.27050922383745646, + "epoch": 0.373503404022224, "grad_norm": 0.0, - "learning_rate": 1.7116672211954242e-05, - "loss": 1.0142, + "learning_rate": 1.4424189146754257e-05, + "loss": 1.1165, "step": 9546 }, { - "epoch": 0.2705375612797189, + "epoch": 0.3735425307144534, "grad_norm": 0.0, - "learning_rate": 1.711602741966203e-05, - "loss": 1.0853, + "learning_rate": 1.4423052647586835e-05, + "loss": 1.0092, "step": 9547 }, { - "epoch": 0.27056589872198133, + "epoch": 0.37358165740668287, "grad_norm": 0.0, - "learning_rate": 1.7115382567428875e-05, - "loss": 0.979, + "learning_rate": 1.442191607739073e-05, + "loss": 1.0902, "step": 9548 }, { - "epoch": 0.27059423616424383, + "epoch": 0.37362078409891225, "grad_norm": 0.0, - "learning_rate": 1.711473765526021e-05, - "loss": 0.922, + "learning_rate": 1.4420779436184194e-05, + "loss": 1.0682, "step": 9549 }, { - "epoch": 0.27062257360650627, + "epoch": 0.3736599107911417, "grad_norm": 0.0, - "learning_rate": 1.7114092683161468e-05, - "loss": 0.9898, + "learning_rate": 1.441964272398548e-05, + "loss": 1.0597, "step": 9550 }, { - "epoch": 0.27065091104876876, + "epoch": 0.37369903748337113, "grad_norm": 0.0, - "learning_rate": 1.7113447651138086e-05, - "loss": 0.9991, + "learning_rate": 1.4418505940812842e-05, + "loss": 0.9791, "step": 9551 }, { - "epoch": 0.2706792484910312, + "epoch": 0.3737381641756006, "grad_norm": 0.0, - "learning_rate": 1.711280255919549e-05, - "loss": 1.0937, + "learning_rate": 1.4417369086684536e-05, + "loss": 1.0695, "step": 9552 }, { - "epoch": 0.27070758593329364, + "epoch": 0.37377729086783, "grad_norm": 0.0, - "learning_rate": 1.7112157407339118e-05, - "loss": 0.8118, + "learning_rate": 1.4416232161618815e-05, + "loss": 1.1989, "step": 9553 }, { - "epoch": 0.27073592337555613, + "epoch": 0.37381641756005946, "grad_norm": 0.0, - "learning_rate": 1.7111512195574402e-05, - "loss": 0.985, + "learning_rate": 1.4415095165633942e-05, + "loss": 0.9195, "step": 9554 }, { - "epoch": 0.27076426081781857, + "epoch": 0.3738555442522889, "grad_norm": 0.0, - "learning_rate": 1.7110866923906774e-05, - "loss": 0.9939, + "learning_rate": 1.4413958098748171e-05, + "loss": 1.0014, "step": 9555 }, { - "epoch": 0.27079259826008106, + "epoch": 0.37389467094451834, "grad_norm": 0.0, - "learning_rate": 1.711022159234168e-05, - "loss": 0.9581, + "learning_rate": 1.4412820960979765e-05, + "loss": 1.1595, "step": 9556 }, { - "epoch": 0.2708209357023435, + "epoch": 0.3739337976367478, "grad_norm": 0.0, - "learning_rate": 1.7109576200884543e-05, - "loss": 1.0699, + "learning_rate": 1.4411683752346983e-05, + "loss": 1.0753, "step": 9557 }, { - "epoch": 0.270849273144606, + "epoch": 0.3739729243289772, "grad_norm": 0.0, - "learning_rate": 1.7108930749540807e-05, - "loss": 0.9753, + "learning_rate": 1.441054647286809e-05, + "loss": 1.2131, "step": 9558 }, { - "epoch": 0.27087761058686843, + "epoch": 0.37401205102120666, "grad_norm": 0.0, - "learning_rate": 1.710828523831591e-05, - "loss": 0.9102, + "learning_rate": 1.4409409122561347e-05, + "loss": 1.0018, "step": 9559 }, { - "epoch": 0.27090594802913087, + "epoch": 0.3740511777134361, "grad_norm": 0.0, - "learning_rate": 1.7107639667215288e-05, - "loss": 1.0478, + "learning_rate": 1.4408271701445019e-05, + "loss": 1.0177, "step": 9560 }, { - "epoch": 0.27093428547139337, + "epoch": 0.37409030440566554, "grad_norm": 0.0, - "learning_rate": 1.7106994036244375e-05, - "loss": 0.9549, + "learning_rate": 1.440713420953737e-05, + "loss": 1.082, "step": 9561 }, { - "epoch": 0.2709626229136558, + "epoch": 0.374129431097895, "grad_norm": 0.0, - "learning_rate": 1.710634834540861e-05, - "loss": 1.0015, + "learning_rate": 1.4405996646856668e-05, + "loss": 1.0385, "step": 9562 }, { - "epoch": 0.2709909603559183, + "epoch": 0.3741685577901244, "grad_norm": 0.0, - "learning_rate": 1.7105702594713437e-05, - "loss": 1.0983, + "learning_rate": 1.4404859013421182e-05, + "loss": 1.1356, "step": 9563 }, { - "epoch": 0.27101929779818074, + "epoch": 0.37420768448235386, "grad_norm": 0.0, - "learning_rate": 1.7105056784164295e-05, - "loss": 1.0307, + "learning_rate": 1.4403721309249178e-05, + "loss": 1.1844, "step": 9564 }, { - "epoch": 0.2710476352404432, + "epoch": 0.3742468111745833, "grad_norm": 0.0, - "learning_rate": 1.7104410913766617e-05, - "loss": 0.8746, + "learning_rate": 1.4402583534358929e-05, + "loss": 1.0144, "step": 9565 }, { - "epoch": 0.27107597268270567, + "epoch": 0.37428593786681275, "grad_norm": 0.0, - "learning_rate": 1.710376498352585e-05, - "loss": 0.9748, + "learning_rate": 1.4401445688768704e-05, + "loss": 1.1114, "step": 9566 }, { - "epoch": 0.2711043101249681, + "epoch": 0.3743250645590422, "grad_norm": 0.0, - "learning_rate": 1.7103118993447432e-05, - "loss": 0.931, + "learning_rate": 1.440030777249678e-05, + "loss": 1.1821, "step": 9567 }, { - "epoch": 0.2711326475672306, + "epoch": 0.3743641912512716, "grad_norm": 0.0, - "learning_rate": 1.7102472943536805e-05, - "loss": 0.9438, + "learning_rate": 1.4399169785561426e-05, + "loss": 1.1467, "step": 9568 }, { - "epoch": 0.27116098500949304, + "epoch": 0.37440331794350107, "grad_norm": 0.0, - "learning_rate": 1.710182683379941e-05, - "loss": 0.9256, + "learning_rate": 1.4398031727980915e-05, + "loss": 1.0685, "step": 9569 }, { - "epoch": 0.2711893224517555, + "epoch": 0.3744424446357305, "grad_norm": 0.0, - "learning_rate": 1.710118066424069e-05, - "loss": 1.0251, + "learning_rate": 1.4396893599773529e-05, + "loss": 1.0348, "step": 9570 }, { - "epoch": 0.27121765989401797, + "epoch": 0.37448157132795995, "grad_norm": 0.0, - "learning_rate": 1.710053443486609e-05, - "loss": 1.0428, + "learning_rate": 1.439575540095754e-05, + "loss": 1.0909, "step": 9571 }, { - "epoch": 0.2712459973362804, + "epoch": 0.3745206980201894, "grad_norm": 0.0, - "learning_rate": 1.709988814568105e-05, - "loss": 1.0149, + "learning_rate": 1.4394617131551228e-05, + "loss": 1.0156, "step": 9572 }, { - "epoch": 0.2712743347785429, + "epoch": 0.37455982471241883, "grad_norm": 0.0, - "learning_rate": 1.709924179669102e-05, - "loss": 1.0177, + "learning_rate": 1.439347879157287e-05, + "loss": 1.1033, "step": 9573 }, { - "epoch": 0.27130267222080534, + "epoch": 0.3745989514046483, "grad_norm": 0.0, - "learning_rate": 1.7098595387901434e-05, - "loss": 0.9836, + "learning_rate": 1.439234038104075e-05, + "loss": 1.1412, "step": 9574 }, { - "epoch": 0.27133100966306783, + "epoch": 0.3746380780968777, "grad_norm": 0.0, - "learning_rate": 1.709794891931775e-05, - "loss": 0.8844, + "learning_rate": 1.4391201899973143e-05, + "loss": 1.0055, "step": 9575 }, { - "epoch": 0.2713593471053303, + "epoch": 0.37467720478910715, "grad_norm": 0.0, - "learning_rate": 1.70973023909454e-05, - "loss": 1.0055, + "learning_rate": 1.4390063348388341e-05, + "loss": 1.0281, "step": 9576 }, { - "epoch": 0.2713876845475927, + "epoch": 0.3747163314813366, "grad_norm": 0.0, - "learning_rate": 1.709665580278984e-05, - "loss": 1.0219, + "learning_rate": 1.438892472630462e-05, + "loss": 1.0785, "step": 9577 }, { - "epoch": 0.2714160219898552, + "epoch": 0.374755458173566, "grad_norm": 0.0, - "learning_rate": 1.7096009154856513e-05, - "loss": 0.9568, + "learning_rate": 1.438778603374027e-05, + "loss": 0.9688, "step": 9578 }, { - "epoch": 0.27144435943211764, + "epoch": 0.3747945848657954, "grad_norm": 0.0, - "learning_rate": 1.7095362447150866e-05, - "loss": 0.8824, + "learning_rate": 1.4386647270713572e-05, + "loss": 1.0567, "step": 9579 }, { - "epoch": 0.27147269687438014, + "epoch": 0.37483371155802486, "grad_norm": 0.0, - "learning_rate": 1.709471567967834e-05, - "loss": 1.0629, + "learning_rate": 1.4385508437242817e-05, + "loss": 0.9404, "step": 9580 }, { - "epoch": 0.2715010343166426, + "epoch": 0.3748728382502543, "grad_norm": 0.0, - "learning_rate": 1.7094068852444395e-05, - "loss": 0.9251, + "learning_rate": 1.4384369533346292e-05, + "loss": 1.1489, "step": 9581 }, { - "epoch": 0.271529371758905, + "epoch": 0.37491196494248374, "grad_norm": 0.0, - "learning_rate": 1.7093421965454474e-05, - "loss": 0.9435, + "learning_rate": 1.4383230559042282e-05, + "loss": 1.0566, "step": 9582 }, { - "epoch": 0.2715577092011675, + "epoch": 0.3749510916347132, "grad_norm": 0.0, - "learning_rate": 1.7092775018714026e-05, - "loss": 0.9229, + "learning_rate": 1.4382091514349088e-05, + "loss": 1.3304, "step": 9583 }, { - "epoch": 0.27158604664342995, + "epoch": 0.3749902183269426, "grad_norm": 0.0, - "learning_rate": 1.7092128012228498e-05, - "loss": 0.956, + "learning_rate": 1.4380952399284991e-05, + "loss": 1.049, "step": 9584 }, { - "epoch": 0.27161438408569244, + "epoch": 0.37502934501917207, "grad_norm": 0.0, - "learning_rate": 1.7091480946003342e-05, - "loss": 1.0987, + "learning_rate": 1.437981321386829e-05, + "loss": 1.0704, "step": 9585 }, { - "epoch": 0.2716427215279549, + "epoch": 0.3750684717114015, "grad_norm": 0.0, - "learning_rate": 1.7090833820044014e-05, - "loss": 0.9137, + "learning_rate": 1.4378673958117276e-05, + "loss": 1.1349, "step": 9586 }, { - "epoch": 0.27167105897021737, + "epoch": 0.37510759840363095, "grad_norm": 0.0, - "learning_rate": 1.7090186634355954e-05, - "loss": 0.9516, + "learning_rate": 1.4377534632050245e-05, + "loss": 1.1863, "step": 9587 }, { - "epoch": 0.2716993964124798, + "epoch": 0.3751467250958604, "grad_norm": 0.0, - "learning_rate": 1.708953938894462e-05, - "loss": 1.0004, + "learning_rate": 1.4376395235685494e-05, + "loss": 1.1191, "step": 9588 }, { - "epoch": 0.27172773385474225, + "epoch": 0.37518585178808983, "grad_norm": 0.0, - "learning_rate": 1.708889208381546e-05, - "loss": 0.9522, + "learning_rate": 1.4375255769041321e-05, + "loss": 1.1815, "step": 9589 }, { - "epoch": 0.27175607129700474, + "epoch": 0.37522497848031927, "grad_norm": 0.0, - "learning_rate": 1.7088244718973936e-05, - "loss": 1.0272, + "learning_rate": 1.4374116232136022e-05, + "loss": 1.165, "step": 9590 }, { - "epoch": 0.2717844087392672, + "epoch": 0.3752641051725487, "grad_norm": 0.0, - "learning_rate": 1.7087597294425492e-05, - "loss": 0.922, + "learning_rate": 1.4372976624987894e-05, + "loss": 1.0471, "step": 9591 }, { - "epoch": 0.2718127461815297, + "epoch": 0.37530323186477815, "grad_norm": 0.0, - "learning_rate": 1.7086949810175584e-05, - "loss": 0.9617, + "learning_rate": 1.4371836947615245e-05, + "loss": 1.0408, "step": 9592 }, { - "epoch": 0.2718410836237921, + "epoch": 0.3753423585570076, "grad_norm": 0.0, - "learning_rate": 1.7086302266229663e-05, - "loss": 0.9144, + "learning_rate": 1.437069720003637e-05, + "loss": 1.163, "step": 9593 }, { - "epoch": 0.27186942106605455, + "epoch": 0.37538148524923703, "grad_norm": 0.0, - "learning_rate": 1.7085654662593192e-05, - "loss": 0.8985, + "learning_rate": 1.4369557382269577e-05, + "loss": 0.9934, "step": 9594 }, { - "epoch": 0.27189775850831704, + "epoch": 0.3754206119414665, "grad_norm": 0.0, - "learning_rate": 1.7085006999271615e-05, - "loss": 0.9667, + "learning_rate": 1.4368417494333167e-05, + "loss": 0.9703, "step": 9595 }, { - "epoch": 0.2719260959505795, + "epoch": 0.3754597386336959, "grad_norm": 0.0, - "learning_rate": 1.70843592762704e-05, - "loss": 0.9569, + "learning_rate": 1.4367277536245445e-05, + "loss": 1.1379, "step": 9596 }, { - "epoch": 0.271954433392842, + "epoch": 0.37549886532592536, "grad_norm": 0.0, - "learning_rate": 1.708371149359499e-05, - "loss": 0.987, + "learning_rate": 1.4366137508024718e-05, + "loss": 1.0687, "step": 9597 }, { - "epoch": 0.2719827708351044, + "epoch": 0.3755379920181548, "grad_norm": 0.0, - "learning_rate": 1.708306365125085e-05, - "loss": 0.9592, + "learning_rate": 1.4364997409689297e-05, + "loss": 1.0155, "step": 9598 }, { - "epoch": 0.2720111082773669, + "epoch": 0.37557711871038424, "grad_norm": 0.0, - "learning_rate": 1.7082415749243436e-05, - "loss": 0.8364, + "learning_rate": 1.4363857241257484e-05, + "loss": 1.1601, "step": 9599 }, { - "epoch": 0.27203944571962935, + "epoch": 0.3756162454026137, "grad_norm": 0.0, - "learning_rate": 1.70817677875782e-05, - "loss": 0.9761, + "learning_rate": 1.4362717002747592e-05, + "loss": 1.1672, "step": 9600 }, { - "epoch": 0.2720677831618918, + "epoch": 0.3756553720948431, "grad_norm": 0.0, - "learning_rate": 1.7081119766260607e-05, - "loss": 0.8985, + "learning_rate": 1.4361576694177935e-05, + "loss": 1.1351, "step": 9601 }, { - "epoch": 0.2720961206041543, + "epoch": 0.37569449878707256, "grad_norm": 0.0, - "learning_rate": 1.7080471685296113e-05, - "loss": 1.0474, + "learning_rate": 1.436043631556682e-05, + "loss": 1.0595, "step": 9602 }, { - "epoch": 0.2721244580464167, + "epoch": 0.375733625479302, "grad_norm": 0.0, - "learning_rate": 1.7079823544690176e-05, - "loss": 0.9583, + "learning_rate": 1.4359295866932561e-05, + "loss": 1.2867, "step": 9603 }, { - "epoch": 0.2721527954886792, + "epoch": 0.37577275217153144, "grad_norm": 0.0, - "learning_rate": 1.7079175344448258e-05, - "loss": 0.8331, + "learning_rate": 1.435815534829347e-05, + "loss": 1.0654, "step": 9604 }, { - "epoch": 0.27218113293094165, + "epoch": 0.3758118788637609, "grad_norm": 0.0, - "learning_rate": 1.7078527084575816e-05, - "loss": 0.9136, + "learning_rate": 1.4357014759667868e-05, + "loss": 1.1234, "step": 9605 }, { - "epoch": 0.2722094703732041, + "epoch": 0.37585100555599027, "grad_norm": 0.0, - "learning_rate": 1.707787876507831e-05, - "loss": 0.9815, + "learning_rate": 1.435587410107407e-05, + "loss": 1.0688, "step": 9606 }, { - "epoch": 0.2722378078154666, + "epoch": 0.3758901322482197, "grad_norm": 0.0, - "learning_rate": 1.7077230385961206e-05, - "loss": 1.0618, + "learning_rate": 1.4354733372530391e-05, + "loss": 1.0068, "step": 9607 }, { - "epoch": 0.272266145257729, + "epoch": 0.37592925894044915, "grad_norm": 0.0, - "learning_rate": 1.707658194722996e-05, - "loss": 0.886, + "learning_rate": 1.4353592574055152e-05, + "loss": 1.1057, "step": 9608 }, { - "epoch": 0.2722944826999915, + "epoch": 0.3759683856326786, "grad_norm": 0.0, - "learning_rate": 1.7075933448890037e-05, - "loss": 0.9668, + "learning_rate": 1.4352451705666668e-05, + "loss": 1.147, "step": 9609 }, { - "epoch": 0.27232282014225395, + "epoch": 0.37600751232490803, "grad_norm": 0.0, - "learning_rate": 1.7075284890946898e-05, - "loss": 1.0355, + "learning_rate": 1.4351310767383268e-05, + "loss": 1.124, "step": 9610 }, { - "epoch": 0.27235115758451645, + "epoch": 0.3760466390171375, "grad_norm": 0.0, - "learning_rate": 1.7074636273406012e-05, - "loss": 1.0378, + "learning_rate": 1.4350169759223266e-05, + "loss": 1.1058, "step": 9611 }, { - "epoch": 0.2723794950267789, + "epoch": 0.3760857657093669, "grad_norm": 0.0, - "learning_rate": 1.7073987596272828e-05, - "loss": 1.0596, + "learning_rate": 1.434902868120499e-05, + "loss": 1.1073, "step": 9612 }, { - "epoch": 0.2724078324690413, + "epoch": 0.37612489240159636, "grad_norm": 0.0, - "learning_rate": 1.7073338859552828e-05, - "loss": 1.0153, + "learning_rate": 1.4347887533346765e-05, + "loss": 1.1214, "step": 9613 }, { - "epoch": 0.2724361699113038, + "epoch": 0.3761640190938258, "grad_norm": 0.0, - "learning_rate": 1.7072690063251466e-05, - "loss": 0.912, + "learning_rate": 1.4346746315666913e-05, + "loss": 1.0589, "step": 9614 }, { - "epoch": 0.27246450735356625, + "epoch": 0.37620314578605524, "grad_norm": 0.0, - "learning_rate": 1.7072041207374212e-05, - "loss": 1.0279, + "learning_rate": 1.4345605028183762e-05, + "loss": 1.0721, "step": 9615 }, { - "epoch": 0.27249284479582875, + "epoch": 0.3762422724782847, "grad_norm": 0.0, - "learning_rate": 1.7071392291926523e-05, - "loss": 1.0441, + "learning_rate": 1.4344463670915638e-05, + "loss": 1.0524, "step": 9616 }, { - "epoch": 0.2725211822380912, + "epoch": 0.3762813991705141, "grad_norm": 0.0, - "learning_rate": 1.7070743316913874e-05, - "loss": 0.9631, + "learning_rate": 1.4343322243880873e-05, + "loss": 1.1992, "step": 9617 }, { - "epoch": 0.2725495196803536, + "epoch": 0.37632052586274356, "grad_norm": 0.0, - "learning_rate": 1.7070094282341727e-05, - "loss": 0.9351, + "learning_rate": 1.4342180747097796e-05, + "loss": 1.0685, "step": 9618 }, { - "epoch": 0.2725778571226161, + "epoch": 0.376359652554973, "grad_norm": 0.0, - "learning_rate": 1.706944518821555e-05, - "loss": 1.0563, + "learning_rate": 1.4341039180584736e-05, + "loss": 1.0445, "step": 9619 }, { - "epoch": 0.27260619456487856, + "epoch": 0.37639877924720244, "grad_norm": 0.0, - "learning_rate": 1.706879603454081e-05, - "loss": 1.0453, + "learning_rate": 1.4339897544360026e-05, + "loss": 1.0479, "step": 9620 }, { - "epoch": 0.27263453200714105, + "epoch": 0.3764379059394319, "grad_norm": 0.0, - "learning_rate": 1.7068146821322983e-05, - "loss": 0.9458, + "learning_rate": 1.4338755838442003e-05, + "loss": 1.2613, "step": 9621 }, { - "epoch": 0.2726628694494035, + "epoch": 0.3764770326316613, "grad_norm": 0.0, - "learning_rate": 1.7067497548567523e-05, - "loss": 0.9198, + "learning_rate": 1.4337614062848992e-05, + "loss": 0.9825, "step": 9622 }, { - "epoch": 0.272691206891666, + "epoch": 0.37651615932389076, "grad_norm": 0.0, - "learning_rate": 1.706684821627991e-05, - "loss": 0.8624, + "learning_rate": 1.4336472217599338e-05, + "loss": 1.1085, "step": 9623 }, { - "epoch": 0.2727195443339284, + "epoch": 0.3765552860161202, "grad_norm": 0.0, - "learning_rate": 1.706619882446561e-05, - "loss": 1.0677, + "learning_rate": 1.4335330302711373e-05, + "loss": 1.1999, "step": 9624 }, { - "epoch": 0.27274788177619086, + "epoch": 0.37659441270834965, "grad_norm": 0.0, - "learning_rate": 1.7065549373130094e-05, - "loss": 1.0764, + "learning_rate": 1.4334188318203437e-05, + "loss": 1.0257, "step": 9625 }, { - "epoch": 0.27277621921845335, + "epoch": 0.3766335394005791, "grad_norm": 0.0, - "learning_rate": 1.7064899862278833e-05, - "loss": 0.9956, + "learning_rate": 1.4333046264093866e-05, + "loss": 1.0997, "step": 9626 }, { - "epoch": 0.2728045566607158, + "epoch": 0.37667266609280853, "grad_norm": 0.0, - "learning_rate": 1.7064250291917293e-05, - "loss": 0.9635, + "learning_rate": 1.4331904140401003e-05, + "loss": 0.9966, "step": 9627 }, { - "epoch": 0.2728328941029783, + "epoch": 0.37671179278503797, "grad_norm": 0.0, - "learning_rate": 1.7063600662050954e-05, - "loss": 0.8943, + "learning_rate": 1.4330761947143189e-05, + "loss": 1.0893, "step": 9628 }, { - "epoch": 0.2728612315452407, + "epoch": 0.3767509194772674, "grad_norm": 0.0, - "learning_rate": 1.706295097268528e-05, - "loss": 0.9091, + "learning_rate": 1.4329619684338763e-05, + "loss": 1.0882, "step": 9629 }, { - "epoch": 0.27288956898750316, + "epoch": 0.37679004616949685, "grad_norm": 0.0, - "learning_rate": 1.706230122382575e-05, - "loss": 0.9797, + "learning_rate": 1.432847735200607e-05, + "loss": 1.0922, "step": 9630 }, { - "epoch": 0.27291790642976566, + "epoch": 0.3768291728617263, "grad_norm": 0.0, - "learning_rate": 1.7061651415477832e-05, - "loss": 1.0614, + "learning_rate": 1.4327334950163454e-05, + "loss": 1.1407, "step": 9631 }, { - "epoch": 0.2729462438720281, + "epoch": 0.37686829955395573, "grad_norm": 0.0, - "learning_rate": 1.7061001547647e-05, - "loss": 0.9133, + "learning_rate": 1.4326192478829263e-05, + "loss": 1.0641, "step": 9632 }, { - "epoch": 0.2729745813142906, + "epoch": 0.3769074262461852, "grad_norm": 0.0, - "learning_rate": 1.7060351620338734e-05, - "loss": 0.9574, + "learning_rate": 1.432504993802184e-05, + "loss": 0.9848, "step": 9633 }, { - "epoch": 0.273002918756553, + "epoch": 0.3769465529384146, "grad_norm": 0.0, - "learning_rate": 1.7059701633558505e-05, - "loss": 0.9402, + "learning_rate": 1.4323907327759537e-05, + "loss": 1.0773, "step": 9634 }, { - "epoch": 0.2730312561988155, + "epoch": 0.376985679630644, "grad_norm": 0.0, - "learning_rate": 1.7059051587311785e-05, - "loss": 1.018, + "learning_rate": 1.43227646480607e-05, + "loss": 1.1721, "step": 9635 }, { - "epoch": 0.27305959364107796, + "epoch": 0.37702480632287344, "grad_norm": 0.0, - "learning_rate": 1.7058401481604054e-05, - "loss": 1.0326, + "learning_rate": 1.4321621898943678e-05, + "loss": 1.1313, "step": 9636 }, { - "epoch": 0.2730879310833404, + "epoch": 0.3770639330151029, "grad_norm": 0.0, - "learning_rate": 1.7057751316440786e-05, - "loss": 0.968, + "learning_rate": 1.4320479080426824e-05, + "loss": 1.0496, "step": 9637 }, { - "epoch": 0.2731162685256029, + "epoch": 0.3771030597073323, "grad_norm": 0.0, - "learning_rate": 1.7057101091827454e-05, - "loss": 1.024, + "learning_rate": 1.431933619252849e-05, + "loss": 1.0008, "step": 9638 }, { - "epoch": 0.27314460596786533, + "epoch": 0.37714218639956176, "grad_norm": 0.0, - "learning_rate": 1.7056450807769543e-05, - "loss": 1.0468, + "learning_rate": 1.431819323526703e-05, + "loss": 1.1944, "step": 9639 }, { - "epoch": 0.2731729434101278, + "epoch": 0.3771813130917912, "grad_norm": 0.0, - "learning_rate": 1.705580046427252e-05, - "loss": 1.0199, + "learning_rate": 1.4317050208660797e-05, + "loss": 1.1935, "step": 9640 }, { - "epoch": 0.27320128085239026, + "epoch": 0.37722043978402064, "grad_norm": 0.0, - "learning_rate": 1.7055150061341878e-05, - "loss": 0.9661, + "learning_rate": 1.4315907112728148e-05, + "loss": 1.0938, "step": 9641 }, { - "epoch": 0.2732296182946527, + "epoch": 0.3772595664762501, "grad_norm": 0.0, - "learning_rate": 1.7054499598983084e-05, - "loss": 0.8853, + "learning_rate": 1.4314763947487436e-05, + "loss": 1.0566, "step": 9642 }, { - "epoch": 0.2732579557369152, + "epoch": 0.3772986931684795, "grad_norm": 0.0, - "learning_rate": 1.7053849077201622e-05, - "loss": 1.0159, + "learning_rate": 1.4313620712957023e-05, + "loss": 1.0213, "step": 9643 }, { - "epoch": 0.27328629317917763, + "epoch": 0.37733781986070897, "grad_norm": 0.0, - "learning_rate": 1.7053198496002967e-05, - "loss": 0.9005, + "learning_rate": 1.4312477409155268e-05, + "loss": 1.1018, "step": 9644 }, { - "epoch": 0.2733146306214401, + "epoch": 0.3773769465529384, "grad_norm": 0.0, - "learning_rate": 1.7052547855392605e-05, - "loss": 1.0117, + "learning_rate": 1.4311334036100528e-05, + "loss": 1.2009, "step": 9645 }, { - "epoch": 0.27334296806370256, + "epoch": 0.37741607324516785, "grad_norm": 0.0, - "learning_rate": 1.705189715537601e-05, - "loss": 0.9959, + "learning_rate": 1.4310190593811167e-05, + "loss": 1.1216, "step": 9646 }, { - "epoch": 0.27337130550596506, + "epoch": 0.3774551999373973, "grad_norm": 0.0, - "learning_rate": 1.7051246395958666e-05, - "loss": 0.9317, + "learning_rate": 1.4309047082305548e-05, + "loss": 1.0915, "step": 9647 }, { - "epoch": 0.2733996429482275, + "epoch": 0.37749432662962673, "grad_norm": 0.0, - "learning_rate": 1.705059557714606e-05, - "loss": 1.0094, + "learning_rate": 1.430790350160203e-05, + "loss": 1.1526, "step": 9648 }, { - "epoch": 0.27342798039048993, + "epoch": 0.37753345332185617, "grad_norm": 0.0, - "learning_rate": 1.7049944698943668e-05, - "loss": 0.9767, + "learning_rate": 1.4306759851718979e-05, + "loss": 1.0178, "step": 9649 }, { - "epoch": 0.2734563178327524, + "epoch": 0.3775725800140856, "grad_norm": 0.0, - "learning_rate": 1.7049293761356968e-05, - "loss": 0.994, + "learning_rate": 1.4305616132674763e-05, + "loss": 1.0317, "step": 9650 }, { - "epoch": 0.27348465527501487, + "epoch": 0.37761170670631505, "grad_norm": 0.0, - "learning_rate": 1.7048642764391456e-05, - "loss": 0.9624, + "learning_rate": 1.4304472344487746e-05, + "loss": 1.1381, "step": 9651 }, { - "epoch": 0.27351299271727736, + "epoch": 0.3776508333985445, "grad_norm": 0.0, - "learning_rate": 1.7047991708052607e-05, - "loss": 0.8527, + "learning_rate": 1.4303328487176298e-05, + "loss": 1.1236, "step": 9652 }, { - "epoch": 0.2735413301595398, + "epoch": 0.37768996009077394, "grad_norm": 0.0, - "learning_rate": 1.7047340592345903e-05, - "loss": 1.0499, + "learning_rate": 1.4302184560758786e-05, + "loss": 1.0517, "step": 9653 }, { - "epoch": 0.27356966760180224, + "epoch": 0.3777290867830034, "grad_norm": 0.0, - "learning_rate": 1.7046689417276836e-05, - "loss": 1.0645, + "learning_rate": 1.4301040565253582e-05, + "loss": 1.1561, "step": 9654 }, { - "epoch": 0.27359800504406473, + "epoch": 0.3777682134752328, "grad_norm": 0.0, - "learning_rate": 1.7046038182850886e-05, - "loss": 0.9857, + "learning_rate": 1.4299896500679055e-05, + "loss": 1.0934, "step": 9655 }, { - "epoch": 0.27362634248632717, + "epoch": 0.37780734016746226, "grad_norm": 0.0, - "learning_rate": 1.704538688907354e-05, - "loss": 1.0329, + "learning_rate": 1.4298752367053577e-05, + "loss": 1.1212, "step": 9656 }, { - "epoch": 0.27365467992858966, + "epoch": 0.3778464668596917, "grad_norm": 0.0, - "learning_rate": 1.7044735535950284e-05, - "loss": 0.9702, + "learning_rate": 1.4297608164395524e-05, + "loss": 1.0045, "step": 9657 }, { - "epoch": 0.2736830173708521, + "epoch": 0.37788559355192114, "grad_norm": 0.0, - "learning_rate": 1.7044084123486604e-05, - "loss": 1.0047, + "learning_rate": 1.429646389272327e-05, + "loss": 1.0732, "step": 9658 }, { - "epoch": 0.2737113548131146, + "epoch": 0.3779247202441506, "grad_norm": 0.0, - "learning_rate": 1.7043432651687987e-05, - "loss": 0.961, + "learning_rate": 1.4295319552055191e-05, + "loss": 1.0474, "step": 9659 }, { - "epoch": 0.27373969225537703, + "epoch": 0.37796384693638, "grad_norm": 0.0, - "learning_rate": 1.7042781120559924e-05, - "loss": 0.9685, + "learning_rate": 1.4294175142409662e-05, + "loss": 0.9644, "step": 9660 }, { - "epoch": 0.27376802969763947, + "epoch": 0.37800297362860946, "grad_norm": 0.0, - "learning_rate": 1.70421295301079e-05, - "loss": 0.9828, + "learning_rate": 1.4293030663805056e-05, + "loss": 1.0961, "step": 9661 }, { - "epoch": 0.27379636713990196, + "epoch": 0.3780421003208389, "grad_norm": 0.0, - "learning_rate": 1.7041477880337405e-05, - "loss": 0.9021, + "learning_rate": 1.429188611625976e-05, + "loss": 1.1285, "step": 9662 }, { - "epoch": 0.2738247045821644, + "epoch": 0.3780812270130683, "grad_norm": 0.0, - "learning_rate": 1.7040826171253923e-05, - "loss": 0.8623, + "learning_rate": 1.4290741499792154e-05, + "loss": 1.036, "step": 9663 }, { - "epoch": 0.2738530420244269, + "epoch": 0.37812035370529773, "grad_norm": 0.0, - "learning_rate": 1.704017440286295e-05, - "loss": 0.9201, + "learning_rate": 1.4289596814420612e-05, + "loss": 1.0571, "step": 9664 }, { - "epoch": 0.27388137946668933, + "epoch": 0.37815948039752717, "grad_norm": 0.0, - "learning_rate": 1.7039522575169973e-05, - "loss": 0.8734, + "learning_rate": 1.4288452060163525e-05, + "loss": 1.0715, "step": 9665 }, { - "epoch": 0.2739097169089518, + "epoch": 0.3781986070897566, "grad_norm": 0.0, - "learning_rate": 1.7038870688180485e-05, - "loss": 0.9926, + "learning_rate": 1.4287307237039267e-05, + "loss": 1.0049, "step": 9666 }, { - "epoch": 0.27393805435121427, + "epoch": 0.37823773378198605, "grad_norm": 0.0, - "learning_rate": 1.703821874189997e-05, - "loss": 0.968, + "learning_rate": 1.4286162345066228e-05, + "loss": 0.9075, "step": 9667 }, { - "epoch": 0.2739663917934767, + "epoch": 0.3782768604742155, "grad_norm": 0.0, - "learning_rate": 1.703756673633393e-05, - "loss": 1.0175, + "learning_rate": 1.4285017384262794e-05, + "loss": 1.1401, "step": 9668 }, { - "epoch": 0.2739947292357392, + "epoch": 0.37831598716644493, "grad_norm": 0.0, - "learning_rate": 1.7036914671487854e-05, - "loss": 0.9053, + "learning_rate": 1.4283872354647348e-05, + "loss": 1.1079, "step": 9669 }, { - "epoch": 0.27402306667800164, + "epoch": 0.3783551138586744, "grad_norm": 0.0, - "learning_rate": 1.703626254736723e-05, - "loss": 0.89, + "learning_rate": 1.4282727256238282e-05, + "loss": 1.077, "step": 9670 }, { - "epoch": 0.27405140412026413, + "epoch": 0.3783942405509038, "grad_norm": 0.0, - "learning_rate": 1.703561036397755e-05, - "loss": 0.9532, + "learning_rate": 1.4281582089053983e-05, + "loss": 1.0042, "step": 9671 }, { - "epoch": 0.27407974156252657, + "epoch": 0.37843336724313326, "grad_norm": 0.0, - "learning_rate": 1.7034958121324314e-05, - "loss": 0.89, + "learning_rate": 1.428043685311284e-05, + "loss": 1.1382, "step": 9672 }, { - "epoch": 0.274108079004789, + "epoch": 0.3784724939353627, "grad_norm": 0.0, - "learning_rate": 1.7034305819413016e-05, - "loss": 1.058, + "learning_rate": 1.4279291548433243e-05, + "loss": 1.0806, "step": 9673 }, { - "epoch": 0.2741364164470515, + "epoch": 0.37851162062759214, "grad_norm": 0.0, - "learning_rate": 1.7033653458249145e-05, - "loss": 1.0093, + "learning_rate": 1.4278146175033589e-05, + "loss": 1.0585, "step": 9674 }, { - "epoch": 0.27416475388931394, + "epoch": 0.3785507473198216, "grad_norm": 0.0, - "learning_rate": 1.70330010378382e-05, - "loss": 0.9196, + "learning_rate": 1.4277000732932267e-05, + "loss": 1.2059, "step": 9675 }, { - "epoch": 0.27419309133157643, + "epoch": 0.378589874012051, "grad_norm": 0.0, - "learning_rate": 1.7032348558185674e-05, - "loss": 1.0695, + "learning_rate": 1.4275855222147672e-05, + "loss": 1.1071, "step": 9676 }, { - "epoch": 0.27422142877383887, + "epoch": 0.37862900070428046, "grad_norm": 0.0, - "learning_rate": 1.7031696019297065e-05, - "loss": 0.9321, + "learning_rate": 1.42747096426982e-05, + "loss": 1.162, "step": 9677 }, { - "epoch": 0.2742497662161013, + "epoch": 0.3786681273965099, "grad_norm": 0.0, - "learning_rate": 1.7031043421177874e-05, - "loss": 0.8084, + "learning_rate": 1.427356399460225e-05, + "loss": 1.14, "step": 9678 }, { - "epoch": 0.2742781036583638, + "epoch": 0.37870725408873934, "grad_norm": 0.0, - "learning_rate": 1.7030390763833588e-05, - "loss": 0.9036, + "learning_rate": 1.4272418277878217e-05, + "loss": 1.0744, "step": 9679 }, { - "epoch": 0.27430644110062624, + "epoch": 0.3787463807809688, "grad_norm": 0.0, - "learning_rate": 1.702973804726971e-05, - "loss": 0.9774, + "learning_rate": 1.4271272492544497e-05, + "loss": 1.0111, "step": 9680 }, { - "epoch": 0.27433477854288874, + "epoch": 0.3787855074731982, "grad_norm": 0.0, - "learning_rate": 1.702908527149174e-05, - "loss": 0.9617, + "learning_rate": 1.4270126638619495e-05, + "loss": 1.0883, "step": 9681 }, { - "epoch": 0.2743631159851512, + "epoch": 0.37882463416542766, "grad_norm": 0.0, - "learning_rate": 1.7028432436505177e-05, - "loss": 1.0587, + "learning_rate": 1.426898071612161e-05, + "loss": 1.0668, "step": 9682 }, { - "epoch": 0.27439145342741367, + "epoch": 0.3788637608576571, "grad_norm": 0.0, - "learning_rate": 1.7027779542315513e-05, - "loss": 0.8618, + "learning_rate": 1.4267834725069245e-05, + "loss": 1.0536, "step": 9683 }, { - "epoch": 0.2744197908696761, + "epoch": 0.37890288754988655, "grad_norm": 0.0, - "learning_rate": 1.7027126588928255e-05, - "loss": 1.0291, + "learning_rate": 1.4266688665480799e-05, + "loss": 1.0527, "step": 9684 }, { - "epoch": 0.27444812831193854, + "epoch": 0.378942014242116, "grad_norm": 0.0, - "learning_rate": 1.70264735763489e-05, - "loss": 0.9894, + "learning_rate": 1.4265542537374684e-05, + "loss": 1.0229, "step": 9685 }, { - "epoch": 0.27447646575420104, + "epoch": 0.37898114093434543, "grad_norm": 0.0, - "learning_rate": 1.702582050458295e-05, - "loss": 0.9485, + "learning_rate": 1.4264396340769297e-05, + "loss": 1.0893, "step": 9686 }, { - "epoch": 0.2745048031964635, + "epoch": 0.37902026762657487, "grad_norm": 0.0, - "learning_rate": 1.7025167373635903e-05, - "loss": 1.0901, + "learning_rate": 1.426325007568305e-05, + "loss": 1.1414, "step": 9687 }, { - "epoch": 0.27453314063872597, + "epoch": 0.3790593943188043, "grad_norm": 0.0, - "learning_rate": 1.702451418351326e-05, - "loss": 0.949, + "learning_rate": 1.4262103742134347e-05, + "loss": 1.0923, "step": 9688 }, { - "epoch": 0.2745614780809884, + "epoch": 0.37909852101103375, "grad_norm": 0.0, - "learning_rate": 1.702386093422053e-05, - "loss": 0.9721, + "learning_rate": 1.4260957340141601e-05, + "loss": 1.0801, "step": 9689 }, { - "epoch": 0.27458981552325085, + "epoch": 0.3791376477032632, "grad_norm": 0.0, - "learning_rate": 1.702320762576321e-05, - "loss": 0.9574, + "learning_rate": 1.4259810869723221e-05, + "loss": 1.0457, "step": 9690 }, { - "epoch": 0.27461815296551334, + "epoch": 0.3791767743954926, "grad_norm": 0.0, - "learning_rate": 1.7022554258146802e-05, - "loss": 1.051, + "learning_rate": 1.4258664330897611e-05, + "loss": 1.0404, "step": 9691 }, { - "epoch": 0.2746464904077758, + "epoch": 0.379215901087722, "grad_norm": 0.0, - "learning_rate": 1.702190083137681e-05, - "loss": 0.9924, + "learning_rate": 1.4257517723683192e-05, + "loss": 1.0352, "step": 9692 }, { - "epoch": 0.2746748278500383, + "epoch": 0.37925502777995146, "grad_norm": 0.0, - "learning_rate": 1.7021247345458746e-05, - "loss": 1.0335, + "learning_rate": 1.4256371048098371e-05, + "loss": 1.0572, "step": 9693 }, { - "epoch": 0.2747031652923007, + "epoch": 0.3792941544721809, "grad_norm": 0.0, - "learning_rate": 1.7020593800398107e-05, - "loss": 0.9764, + "learning_rate": 1.4255224304161569e-05, + "loss": 1.1703, "step": 9694 }, { - "epoch": 0.2747315027345632, + "epoch": 0.37933328116441034, "grad_norm": 0.0, - "learning_rate": 1.70199401962004e-05, - "loss": 0.9548, + "learning_rate": 1.4254077491891194e-05, + "loss": 1.0974, "step": 9695 }, { - "epoch": 0.27475984017682564, + "epoch": 0.3793724078566398, "grad_norm": 0.0, - "learning_rate": 1.7019286532871124e-05, - "loss": 0.9708, + "learning_rate": 1.4252930611305664e-05, + "loss": 1.109, "step": 9696 }, { - "epoch": 0.2747881776190881, + "epoch": 0.3794115345488692, "grad_norm": 0.0, - "learning_rate": 1.7018632810415795e-05, - "loss": 0.8768, + "learning_rate": 1.42517836624234e-05, + "loss": 1.0339, "step": 9697 }, { - "epoch": 0.2748165150613506, + "epoch": 0.37945066124109866, "grad_norm": 0.0, - "learning_rate": 1.7017979028839918e-05, - "loss": 0.9685, + "learning_rate": 1.4250636645262813e-05, + "loss": 1.1543, "step": 9698 }, { - "epoch": 0.274844852503613, + "epoch": 0.3794897879333281, "grad_norm": 0.0, - "learning_rate": 1.701732518814899e-05, - "loss": 1.0337, + "learning_rate": 1.4249489559842333e-05, + "loss": 1.1636, "step": 9699 }, { - "epoch": 0.2748731899458755, + "epoch": 0.37952891462555755, "grad_norm": 0.0, - "learning_rate": 1.701667128834853e-05, - "loss": 0.9363, + "learning_rate": 1.4248342406180373e-05, + "loss": 1.1055, "step": 9700 }, { - "epoch": 0.27490152738813795, + "epoch": 0.379568041317787, "grad_norm": 0.0, - "learning_rate": 1.7016017329444047e-05, - "loss": 0.8394, + "learning_rate": 1.424719518429536e-05, + "loss": 1.0814, "step": 9701 }, { - "epoch": 0.2749298648304004, + "epoch": 0.3796071680100164, "grad_norm": 0.0, - "learning_rate": 1.701536331144104e-05, - "loss": 1.0296, + "learning_rate": 1.4246047894205714e-05, + "loss": 1.0922, "step": 9702 }, { - "epoch": 0.2749582022726629, + "epoch": 0.37964629470224587, "grad_norm": 0.0, - "learning_rate": 1.7014709234345024e-05, - "loss": 0.9156, + "learning_rate": 1.4244900535929858e-05, + "loss": 1.1068, "step": 9703 }, { - "epoch": 0.2749865397149253, + "epoch": 0.3796854213944753, "grad_norm": 0.0, - "learning_rate": 1.7014055098161507e-05, - "loss": 1.0363, + "learning_rate": 1.4243753109486217e-05, + "loss": 1.09, "step": 9704 }, { - "epoch": 0.2750148771571878, + "epoch": 0.37972454808670475, "grad_norm": 0.0, - "learning_rate": 1.7013400902896e-05, - "loss": 1.0359, + "learning_rate": 1.4242605614893224e-05, + "loss": 1.1942, "step": 9705 }, { - "epoch": 0.27504321459945025, + "epoch": 0.3797636747789342, "grad_norm": 0.0, - "learning_rate": 1.7012746648554008e-05, - "loss": 1.1619, + "learning_rate": 1.4241458052169295e-05, + "loss": 0.994, "step": 9706 }, { - "epoch": 0.27507155204171274, + "epoch": 0.37980280147116363, "grad_norm": 0.0, - "learning_rate": 1.701209233514105e-05, - "loss": 0.9706, + "learning_rate": 1.424031042133287e-05, + "loss": 0.9564, "step": 9707 }, { - "epoch": 0.2750998894839752, + "epoch": 0.37984192816339307, "grad_norm": 0.0, - "learning_rate": 1.7011437962662637e-05, - "loss": 1.0015, + "learning_rate": 1.4239162722402371e-05, + "loss": 1.0644, "step": 9708 }, { - "epoch": 0.2751282269262376, + "epoch": 0.3798810548556225, "grad_norm": 0.0, - "learning_rate": 1.7010783531124278e-05, - "loss": 1.0175, + "learning_rate": 1.4238014955396228e-05, + "loss": 1.093, "step": 9709 }, { - "epoch": 0.2751565643685001, + "epoch": 0.37992018154785195, "grad_norm": 0.0, - "learning_rate": 1.7010129040531483e-05, - "loss": 1.0142, + "learning_rate": 1.4236867120332877e-05, + "loss": 1.1369, "step": 9710 }, { - "epoch": 0.27518490181076255, + "epoch": 0.3799593082400814, "grad_norm": 0.0, - "learning_rate": 1.700947449088977e-05, - "loss": 0.9422, + "learning_rate": 1.4235719217230751e-05, + "loss": 1.1569, "step": 9711 }, { - "epoch": 0.27521323925302504, + "epoch": 0.37999843493231084, "grad_norm": 0.0, - "learning_rate": 1.700881988220465e-05, - "loss": 0.9286, + "learning_rate": 1.4234571246108279e-05, + "loss": 1.0498, "step": 9712 }, { - "epoch": 0.2752415766952875, + "epoch": 0.3800375616245403, "grad_norm": 0.0, - "learning_rate": 1.7008165214481636e-05, - "loss": 0.9989, + "learning_rate": 1.4233423206983901e-05, + "loss": 0.9964, "step": 9713 }, { - "epoch": 0.2752699141375499, + "epoch": 0.3800766883167697, "grad_norm": 0.0, - "learning_rate": 1.7007510487726247e-05, - "loss": 0.9358, + "learning_rate": 1.423227509987605e-05, + "loss": 1.1979, "step": 9714 }, { - "epoch": 0.2752982515798124, + "epoch": 0.38011581500899916, "grad_norm": 0.0, - "learning_rate": 1.7006855701943994e-05, - "loss": 0.9474, + "learning_rate": 1.4231126924803167e-05, + "loss": 1.2448, "step": 9715 }, { - "epoch": 0.27532658902207485, + "epoch": 0.3801549417012286, "grad_norm": 0.0, - "learning_rate": 1.7006200857140395e-05, - "loss": 1.0801, + "learning_rate": 1.4229978681783684e-05, + "loss": 0.9779, "step": 9716 }, { - "epoch": 0.27535492646433735, + "epoch": 0.38019406839345804, "grad_norm": 0.0, - "learning_rate": 1.700554595332096e-05, - "loss": 0.8201, + "learning_rate": 1.4228830370836046e-05, + "loss": 0.9355, "step": 9717 }, { - "epoch": 0.2753832639065998, + "epoch": 0.3802331950856875, "grad_norm": 0.0, - "learning_rate": 1.700489099049121e-05, - "loss": 0.955, + "learning_rate": 1.422768199197869e-05, + "loss": 1.1477, "step": 9718 }, { - "epoch": 0.2754116013488623, + "epoch": 0.3802723217779169, "grad_norm": 0.0, - "learning_rate": 1.7004235968656665e-05, - "loss": 1.06, + "learning_rate": 1.4226533545230058e-05, + "loss": 1.0366, "step": 9719 }, { - "epoch": 0.2754399387911247, + "epoch": 0.3803114484701463, "grad_norm": 0.0, - "learning_rate": 1.7003580887822838e-05, - "loss": 0.8762, + "learning_rate": 1.4225385030608594e-05, + "loss": 1.1385, "step": 9720 }, { - "epoch": 0.27546827623338715, + "epoch": 0.38035057516237575, "grad_norm": 0.0, - "learning_rate": 1.700292574799525e-05, - "loss": 0.9495, + "learning_rate": 1.4224236448132742e-05, + "loss": 1.1233, "step": 9721 }, { - "epoch": 0.27549661367564965, + "epoch": 0.3803897018546052, "grad_norm": 0.0, - "learning_rate": 1.7002270549179418e-05, - "loss": 0.9269, + "learning_rate": 1.4223087797820945e-05, + "loss": 1.084, "step": 9722 }, { - "epoch": 0.2755249511179121, + "epoch": 0.38042882854683463, "grad_norm": 0.0, - "learning_rate": 1.700161529138086e-05, - "loss": 0.9121, + "learning_rate": 1.4221939079691652e-05, + "loss": 1.0852, "step": 9723 }, { - "epoch": 0.2755532885601746, + "epoch": 0.38046795523906407, "grad_norm": 0.0, - "learning_rate": 1.7000959974605094e-05, - "loss": 0.9446, + "learning_rate": 1.4220790293763307e-05, + "loss": 1.1036, "step": 9724 }, { - "epoch": 0.275581626002437, + "epoch": 0.3805070819312935, "grad_norm": 0.0, - "learning_rate": 1.7000304598857645e-05, - "loss": 1.0347, + "learning_rate": 1.4219641440054357e-05, + "loss": 1.0778, "step": 9725 }, { - "epoch": 0.27560996344469946, + "epoch": 0.38054620862352295, "grad_norm": 0.0, - "learning_rate": 1.699964916414403e-05, - "loss": 0.9194, + "learning_rate": 1.4218492518583253e-05, + "loss": 1.0031, "step": 9726 }, { - "epoch": 0.27563830088696195, + "epoch": 0.3805853353157524, "grad_norm": 0.0, - "learning_rate": 1.699899367046978e-05, - "loss": 0.9609, + "learning_rate": 1.4217343529368446e-05, + "loss": 1.074, "step": 9727 }, { - "epoch": 0.2756666383292244, + "epoch": 0.38062446200798183, "grad_norm": 0.0, - "learning_rate": 1.6998338117840396e-05, - "loss": 0.8925, + "learning_rate": 1.421619447242839e-05, + "loss": 0.9501, "step": 9728 }, { - "epoch": 0.2756949757714869, + "epoch": 0.3806635887002113, "grad_norm": 0.0, - "learning_rate": 1.699768250626141e-05, - "loss": 0.9395, + "learning_rate": 1.4215045347781527e-05, + "loss": 1.1797, "step": 9729 }, { - "epoch": 0.2757233132137493, + "epoch": 0.3807027153924407, "grad_norm": 0.0, - "learning_rate": 1.6997026835738354e-05, - "loss": 0.9599, + "learning_rate": 1.4213896155446323e-05, + "loss": 0.9762, "step": 9730 }, { - "epoch": 0.2757516506560118, + "epoch": 0.38074184208467016, "grad_norm": 0.0, - "learning_rate": 1.6996371106276735e-05, - "loss": 0.944, + "learning_rate": 1.4212746895441224e-05, + "loss": 1.0888, "step": 9731 }, { - "epoch": 0.27577998809827425, + "epoch": 0.3807809687768996, "grad_norm": 0.0, - "learning_rate": 1.699571531788209e-05, - "loss": 0.9902, + "learning_rate": 1.4211597567784692e-05, + "loss": 1.0582, "step": 9732 }, { - "epoch": 0.2758083255405367, + "epoch": 0.38082009546912904, "grad_norm": 0.0, - "learning_rate": 1.6995059470559935e-05, - "loss": 1.0069, + "learning_rate": 1.4210448172495176e-05, + "loss": 1.049, "step": 9733 }, { - "epoch": 0.2758366629827992, + "epoch": 0.3808592221613585, "grad_norm": 0.0, - "learning_rate": 1.6994403564315795e-05, - "loss": 0.9263, + "learning_rate": 1.420929870959114e-05, + "loss": 1.1271, "step": 9734 }, { - "epoch": 0.2758650004250616, + "epoch": 0.3808983488535879, "grad_norm": 0.0, - "learning_rate": 1.6993747599155198e-05, - "loss": 0.9659, + "learning_rate": 1.4208149179091044e-05, + "loss": 1.1225, "step": 9735 }, { - "epoch": 0.2758933378673241, + "epoch": 0.38093747554581736, "grad_norm": 0.0, - "learning_rate": 1.699309157508367e-05, - "loss": 1.055, + "learning_rate": 1.4206999581013343e-05, + "loss": 1.1016, "step": 9736 }, { - "epoch": 0.27592167530958656, + "epoch": 0.3809766022380468, "grad_norm": 0.0, - "learning_rate": 1.6992435492106728e-05, - "loss": 0.9173, + "learning_rate": 1.4205849915376501e-05, + "loss": 1.0797, "step": 9737 }, { - "epoch": 0.275950012751849, + "epoch": 0.38101572893027624, "grad_norm": 0.0, - "learning_rate": 1.6991779350229914e-05, - "loss": 1.0334, + "learning_rate": 1.4204700182198979e-05, + "loss": 1.1503, "step": 9738 }, { - "epoch": 0.2759783501941115, + "epoch": 0.3810548556225057, "grad_norm": 0.0, - "learning_rate": 1.699112314945874e-05, - "loss": 0.9285, + "learning_rate": 1.420355038149924e-05, + "loss": 1.1645, "step": 9739 }, { - "epoch": 0.2760066876363739, + "epoch": 0.3810939823147351, "grad_norm": 0.0, - "learning_rate": 1.6990466889798743e-05, - "loss": 0.9325, + "learning_rate": 1.420240051329575e-05, + "loss": 1.0389, "step": 9740 }, { - "epoch": 0.2760350250786364, + "epoch": 0.38113310900696457, "grad_norm": 0.0, - "learning_rate": 1.6989810571255444e-05, - "loss": 1.0461, + "learning_rate": 1.4201250577606975e-05, + "loss": 1.1527, "step": 9741 }, { - "epoch": 0.27606336252089886, + "epoch": 0.381172235699194, "grad_norm": 0.0, - "learning_rate": 1.698915419383438e-05, - "loss": 0.9237, + "learning_rate": 1.4200100574451378e-05, + "loss": 1.0098, "step": 9742 }, { - "epoch": 0.27609169996316135, + "epoch": 0.38121136239142345, "grad_norm": 0.0, - "learning_rate": 1.698849775754107e-05, - "loss": 0.9551, + "learning_rate": 1.419895050384743e-05, + "loss": 1.1302, "step": 9743 }, { - "epoch": 0.2761200374054238, + "epoch": 0.3812504890836529, "grad_norm": 0.0, - "learning_rate": 1.698784126238105e-05, - "loss": 0.9505, + "learning_rate": 1.41978003658136e-05, + "loss": 1.1375, "step": 9744 }, { - "epoch": 0.27614837484768623, + "epoch": 0.38128961577588233, "grad_norm": 0.0, - "learning_rate": 1.698718470835985e-05, - "loss": 0.9907, + "learning_rate": 1.4196650160368356e-05, + "loss": 1.1773, "step": 9745 }, { - "epoch": 0.2761767122899487, + "epoch": 0.38132874246811177, "grad_norm": 0.0, - "learning_rate": 1.6986528095482996e-05, - "loss": 0.9627, + "learning_rate": 1.4195499887530167e-05, + "loss": 0.905, "step": 9746 }, { - "epoch": 0.27620504973221116, + "epoch": 0.3813678691603412, "grad_norm": 0.0, - "learning_rate": 1.698587142375602e-05, - "loss": 0.963, + "learning_rate": 1.419434954731751e-05, + "loss": 1.0062, "step": 9747 }, { - "epoch": 0.27623338717447365, + "epoch": 0.3814069958525706, "grad_norm": 0.0, - "learning_rate": 1.698521469318446e-05, - "loss": 0.8977, + "learning_rate": 1.419319913974885e-05, + "loss": 1.1024, "step": 9748 }, { - "epoch": 0.2762617246167361, + "epoch": 0.38144612254480004, "grad_norm": 0.0, - "learning_rate": 1.698455790377384e-05, - "loss": 0.9192, + "learning_rate": 1.4192048664842671e-05, + "loss": 1.1124, "step": 9749 }, { - "epoch": 0.27629006205899853, + "epoch": 0.3814852492370295, "grad_norm": 0.0, - "learning_rate": 1.6983901055529696e-05, - "loss": 0.871, + "learning_rate": 1.4190898122617443e-05, + "loss": 1.0635, "step": 9750 }, { - "epoch": 0.276318399501261, + "epoch": 0.3815243759292589, "grad_norm": 0.0, - "learning_rate": 1.6983244148457558e-05, - "loss": 0.9956, + "learning_rate": 1.4189747513091639e-05, + "loss": 0.9336, "step": 9751 }, { - "epoch": 0.27634673694352346, + "epoch": 0.38156350262148836, "grad_norm": 0.0, - "learning_rate": 1.6982587182562963e-05, - "loss": 0.9055, + "learning_rate": 1.4188596836283744e-05, + "loss": 1.1846, "step": 9752 }, { - "epoch": 0.27637507438578596, + "epoch": 0.3816026293137178, "grad_norm": 0.0, - "learning_rate": 1.6981930157851443e-05, - "loss": 0.9394, + "learning_rate": 1.4187446092212232e-05, + "loss": 1.1071, "step": 9753 }, { - "epoch": 0.2764034118280484, + "epoch": 0.38164175600594724, "grad_norm": 0.0, - "learning_rate": 1.698127307432853e-05, - "loss": 0.9992, + "learning_rate": 1.418629528089558e-05, + "loss": 1.0838, "step": 9754 }, { - "epoch": 0.2764317492703109, + "epoch": 0.3816808826981767, "grad_norm": 0.0, - "learning_rate": 1.6980615931999767e-05, - "loss": 0.9826, + "learning_rate": 1.4185144402352274e-05, + "loss": 1.1472, "step": 9755 }, { - "epoch": 0.2764600867125733, + "epoch": 0.3817200093904061, "grad_norm": 0.0, - "learning_rate": 1.6979958730870678e-05, - "loss": 0.9753, + "learning_rate": 1.418399345660079e-05, + "loss": 1.1426, "step": 9756 }, { - "epoch": 0.27648842415483577, + "epoch": 0.38175913608263556, "grad_norm": 0.0, - "learning_rate": 1.697930147094681e-05, - "loss": 0.9586, + "learning_rate": 1.4182842443659617e-05, + "loss": 1.1157, "step": 9757 }, { - "epoch": 0.27651676159709826, + "epoch": 0.381798262774865, "grad_norm": 0.0, - "learning_rate": 1.697864415223369e-05, - "loss": 1.1201, + "learning_rate": 1.4181691363547234e-05, + "loss": 1.1904, "step": 9758 }, { - "epoch": 0.2765450990393607, + "epoch": 0.38183738946709445, "grad_norm": 0.0, - "learning_rate": 1.697798677473686e-05, - "loss": 0.9716, + "learning_rate": 1.4180540216282128e-05, + "loss": 1.1494, "step": 9759 }, { - "epoch": 0.2765734364816232, + "epoch": 0.3818765161593239, "grad_norm": 0.0, - "learning_rate": 1.6977329338461857e-05, - "loss": 0.9619, + "learning_rate": 1.4179389001882787e-05, + "loss": 1.0515, "step": 9760 }, { - "epoch": 0.27660177392388563, + "epoch": 0.3819156428515533, "grad_norm": 0.0, - "learning_rate": 1.697667184341422e-05, - "loss": 0.888, + "learning_rate": 1.4178237720367693e-05, + "loss": 1.0622, "step": 9761 }, { - "epoch": 0.27663011136614807, + "epoch": 0.38195476954378277, "grad_norm": 0.0, - "learning_rate": 1.6976014289599477e-05, - "loss": 0.8928, + "learning_rate": 1.4177086371755337e-05, + "loss": 0.9653, "step": 9762 }, { - "epoch": 0.27665844880841056, + "epoch": 0.3819938962360122, "grad_norm": 0.0, - "learning_rate": 1.6975356677023182e-05, - "loss": 0.9872, + "learning_rate": 1.417593495606421e-05, + "loss": 0.9879, "step": 9763 }, { - "epoch": 0.276686786250673, + "epoch": 0.38203302292824165, "grad_norm": 0.0, - "learning_rate": 1.6974699005690868e-05, - "loss": 0.9114, + "learning_rate": 1.4174783473312799e-05, + "loss": 0.9874, "step": 9764 }, { - "epoch": 0.2767151236929355, + "epoch": 0.3820721496204711, "grad_norm": 0.0, - "learning_rate": 1.6974041275608074e-05, - "loss": 1.0936, + "learning_rate": 1.4173631923519598e-05, + "loss": 1.0972, "step": 9765 }, { - "epoch": 0.27674346113519793, + "epoch": 0.38211127631270053, "grad_norm": 0.0, - "learning_rate": 1.697338348678034e-05, - "loss": 0.9764, + "learning_rate": 1.4172480306703096e-05, + "loss": 1.0717, "step": 9766 }, { - "epoch": 0.2767717985774604, + "epoch": 0.38215040300493, "grad_norm": 0.0, - "learning_rate": 1.6972725639213206e-05, - "loss": 0.8581, + "learning_rate": 1.4171328622881788e-05, + "loss": 1.2536, "step": 9767 }, { - "epoch": 0.27680013601972286, + "epoch": 0.3821895296971594, "grad_norm": 0.0, - "learning_rate": 1.6972067732912215e-05, - "loss": 0.8969, + "learning_rate": 1.4170176872074173e-05, + "loss": 1.2181, "step": 9768 }, { - "epoch": 0.2768284734619853, + "epoch": 0.38222865638938885, "grad_norm": 0.0, - "learning_rate": 1.697140976788291e-05, - "loss": 0.96, + "learning_rate": 1.416902505429874e-05, + "loss": 1.1731, "step": 9769 }, { - "epoch": 0.2768568109042478, + "epoch": 0.3822677830816183, "grad_norm": 0.0, - "learning_rate": 1.6970751744130827e-05, - "loss": 0.9644, + "learning_rate": 1.4167873169573988e-05, + "loss": 1.0839, "step": 9770 }, { - "epoch": 0.27688514834651023, + "epoch": 0.38230690977384774, "grad_norm": 0.0, - "learning_rate": 1.697009366166152e-05, - "loss": 1.0961, + "learning_rate": 1.4166721217918418e-05, + "loss": 1.1349, "step": 9771 }, { - "epoch": 0.27691348578877273, + "epoch": 0.3823460364660772, "grad_norm": 0.0, - "learning_rate": 1.6969435520480522e-05, - "loss": 1.0385, + "learning_rate": 1.4165569199350526e-05, + "loss": 1.1699, "step": 9772 }, { - "epoch": 0.27694182323103517, + "epoch": 0.3823851631583066, "grad_norm": 0.0, - "learning_rate": 1.6968777320593385e-05, - "loss": 1.0973, + "learning_rate": 1.4164417113888814e-05, + "loss": 1.0652, "step": 9773 }, { - "epoch": 0.2769701606732976, + "epoch": 0.38242428985053606, "grad_norm": 0.0, - "learning_rate": 1.6968119062005644e-05, - "loss": 1.0457, + "learning_rate": 1.4163264961551777e-05, + "loss": 1.1766, "step": 9774 }, { - "epoch": 0.2769984981155601, + "epoch": 0.3824634165427655, "grad_norm": 0.0, - "learning_rate": 1.6967460744722847e-05, - "loss": 0.9647, + "learning_rate": 1.4162112742357926e-05, + "loss": 1.0258, "step": 9775 }, { - "epoch": 0.27702683555782254, + "epoch": 0.38250254323499494, "grad_norm": 0.0, - "learning_rate": 1.6966802368750546e-05, - "loss": 0.959, + "learning_rate": 1.4160960456325757e-05, + "loss": 1.0881, "step": 9776 }, { - "epoch": 0.27705517300008503, + "epoch": 0.3825416699272243, "grad_norm": 0.0, - "learning_rate": 1.6966143934094278e-05, - "loss": 1.0005, + "learning_rate": 1.4159808103473778e-05, + "loss": 1.2261, "step": 9777 }, { - "epoch": 0.27708351044234747, + "epoch": 0.38258079661945377, "grad_norm": 0.0, - "learning_rate": 1.6965485440759596e-05, - "loss": 0.9167, + "learning_rate": 1.4158655683820492e-05, + "loss": 1.2252, "step": 9778 }, { - "epoch": 0.2771118478846099, + "epoch": 0.3826199233116832, "grad_norm": 0.0, - "learning_rate": 1.696482688875204e-05, - "loss": 1.0605, + "learning_rate": 1.4157503197384407e-05, + "loss": 1.1897, "step": 9779 }, { - "epoch": 0.2771401853268724, + "epoch": 0.38265905000391265, "grad_norm": 0.0, - "learning_rate": 1.696416827807716e-05, - "loss": 1.0755, + "learning_rate": 1.4156350644184032e-05, + "loss": 1.0524, "step": 9780 }, { - "epoch": 0.27716852276913484, + "epoch": 0.3826981766961421, "grad_norm": 0.0, - "learning_rate": 1.6963509608740508e-05, - "loss": 0.8573, + "learning_rate": 1.4155198024237876e-05, + "loss": 1.1423, "step": 9781 }, { - "epoch": 0.27719686021139733, + "epoch": 0.38273730338837153, "grad_norm": 0.0, - "learning_rate": 1.696285088074763e-05, - "loss": 0.8842, + "learning_rate": 1.4154045337564442e-05, + "loss": 1.0352, "step": 9782 }, { - "epoch": 0.27722519765365977, + "epoch": 0.38277643008060097, "grad_norm": 0.0, - "learning_rate": 1.696219209410407e-05, - "loss": 1.0037, + "learning_rate": 1.4152892584182247e-05, + "loss": 1.0192, "step": 9783 }, { - "epoch": 0.27725353509592227, + "epoch": 0.3828155567728304, "grad_norm": 0.0, - "learning_rate": 1.6961533248815383e-05, - "loss": 0.9097, + "learning_rate": 1.4151739764109803e-05, + "loss": 1.0597, "step": 9784 }, { - "epoch": 0.2772818725381847, + "epoch": 0.38285468346505985, "grad_norm": 0.0, - "learning_rate": 1.6960874344887114e-05, - "loss": 0.9832, + "learning_rate": 1.4150586877365615e-05, + "loss": 1.0846, "step": 9785 }, { - "epoch": 0.27731020998044714, + "epoch": 0.3828938101572893, "grad_norm": 0.0, - "learning_rate": 1.696021538232482e-05, - "loss": 0.8658, + "learning_rate": 1.4149433923968207e-05, + "loss": 1.0295, "step": 9786 }, { - "epoch": 0.27733854742270964, + "epoch": 0.38293293684951873, "grad_norm": 0.0, - "learning_rate": 1.695955636113404e-05, - "loss": 1.0052, + "learning_rate": 1.4148280903936089e-05, + "loss": 1.1862, "step": 9787 }, { - "epoch": 0.2773668848649721, + "epoch": 0.3829720635417482, "grad_norm": 0.0, - "learning_rate": 1.6958897281320336e-05, - "loss": 0.9907, + "learning_rate": 1.414712781728778e-05, + "loss": 1.1247, "step": 9788 }, { - "epoch": 0.27739522230723457, + "epoch": 0.3830111902339776, "grad_norm": 0.0, - "learning_rate": 1.6958238142889258e-05, - "loss": 1.0167, + "learning_rate": 1.4145974664041793e-05, + "loss": 1.1854, "step": 9789 }, { - "epoch": 0.277423559749497, + "epoch": 0.38305031692620706, "grad_norm": 0.0, - "learning_rate": 1.6957578945846356e-05, - "loss": 0.9065, + "learning_rate": 1.4144821444216646e-05, + "loss": 1.174, "step": 9790 }, { - "epoch": 0.27745189719175944, + "epoch": 0.3830894436184365, "grad_norm": 0.0, - "learning_rate": 1.695691969019718e-05, - "loss": 1.0922, + "learning_rate": 1.4143668157830863e-05, + "loss": 1.0612, "step": 9791 }, { - "epoch": 0.27748023463402194, + "epoch": 0.38312857031066594, "grad_norm": 0.0, - "learning_rate": 1.6956260375947286e-05, - "loss": 0.906, + "learning_rate": 1.4142514804902962e-05, + "loss": 1.1357, "step": 9792 }, { - "epoch": 0.2775085720762844, + "epoch": 0.3831676970028954, "grad_norm": 0.0, - "learning_rate": 1.6955601003102235e-05, - "loss": 0.8964, + "learning_rate": 1.4141361385451462e-05, + "loss": 0.9746, "step": 9793 }, { - "epoch": 0.27753690951854687, + "epoch": 0.3832068236951248, "grad_norm": 0.0, - "learning_rate": 1.6954941571667566e-05, - "loss": 1.0426, + "learning_rate": 1.4140207899494888e-05, + "loss": 1.1147, "step": 9794 }, { - "epoch": 0.2775652469608093, + "epoch": 0.38324595038735426, "grad_norm": 0.0, - "learning_rate": 1.6954282081648842e-05, - "loss": 1.009, + "learning_rate": 1.4139054347051764e-05, + "loss": 1.1187, "step": 9795 }, { - "epoch": 0.2775935844030718, + "epoch": 0.3832850770795837, "grad_norm": 0.0, - "learning_rate": 1.695362253305162e-05, - "loss": 1.0223, + "learning_rate": 1.4137900728140615e-05, + "loss": 1.0925, "step": 9796 }, { - "epoch": 0.27762192184533424, + "epoch": 0.38332420377181314, "grad_norm": 0.0, - "learning_rate": 1.6952962925881453e-05, - "loss": 0.9653, + "learning_rate": 1.4136747042779963e-05, + "loss": 1.0997, "step": 9797 }, { - "epoch": 0.2776502592875967, + "epoch": 0.3833633304640426, "grad_norm": 0.0, - "learning_rate": 1.6952303260143898e-05, - "loss": 0.9435, + "learning_rate": 1.4135593290988338e-05, + "loss": 1.0057, "step": 9798 }, { - "epoch": 0.2776785967298592, + "epoch": 0.383402457156272, "grad_norm": 0.0, - "learning_rate": 1.6951643535844508e-05, - "loss": 1.0379, + "learning_rate": 1.4134439472784267e-05, + "loss": 1.1, "step": 9799 }, { - "epoch": 0.2777069341721216, + "epoch": 0.38344158384850147, "grad_norm": 0.0, - "learning_rate": 1.6950983752988845e-05, - "loss": 0.9594, + "learning_rate": 1.4133285588186278e-05, + "loss": 1.1381, "step": 9800 }, { - "epoch": 0.2777352716143841, + "epoch": 0.3834807105407309, "grad_norm": 0.0, - "learning_rate": 1.6950323911582464e-05, - "loss": 0.8844, + "learning_rate": 1.4132131637212903e-05, + "loss": 1.1118, "step": 9801 }, { - "epoch": 0.27776360905664654, + "epoch": 0.38351983723296035, "grad_norm": 0.0, - "learning_rate": 1.6949664011630927e-05, - "loss": 0.96, + "learning_rate": 1.4130977619882673e-05, + "loss": 1.1575, "step": 9802 }, { - "epoch": 0.277791946498909, + "epoch": 0.3835589639251898, "grad_norm": 0.0, - "learning_rate": 1.6949004053139785e-05, - "loss": 1.0264, + "learning_rate": 1.4129823536214115e-05, + "loss": 0.902, "step": 9803 }, { - "epoch": 0.2778202839411715, + "epoch": 0.38359809061741923, "grad_norm": 0.0, - "learning_rate": 1.6948344036114604e-05, - "loss": 1.0357, + "learning_rate": 1.4128669386225768e-05, + "loss": 1.0353, "step": 9804 }, { - "epoch": 0.2778486213834339, + "epoch": 0.3836372173096486, "grad_norm": 0.0, - "learning_rate": 1.694768396056094e-05, - "loss": 1.0781, + "learning_rate": 1.4127515169936164e-05, + "loss": 1.0122, "step": 9805 }, { - "epoch": 0.2778769588256964, + "epoch": 0.38367634400187806, "grad_norm": 0.0, - "learning_rate": 1.6947023826484353e-05, - "loss": 1.0212, + "learning_rate": 1.412636088736384e-05, + "loss": 1.0347, "step": 9806 }, { - "epoch": 0.27790529626795885, + "epoch": 0.3837154706941075, "grad_norm": 0.0, - "learning_rate": 1.6946363633890408e-05, - "loss": 0.9677, + "learning_rate": 1.4125206538527327e-05, + "loss": 1.0602, "step": 9807 }, { - "epoch": 0.27793363371022134, + "epoch": 0.38375459738633694, "grad_norm": 0.0, - "learning_rate": 1.694570338278466e-05, - "loss": 0.9453, + "learning_rate": 1.4124052123445168e-05, + "loss": 1.0314, "step": 9808 }, { - "epoch": 0.2779619711524838, + "epoch": 0.3837937240785664, "grad_norm": 0.0, - "learning_rate": 1.694504307317267e-05, - "loss": 1.0437, + "learning_rate": 1.4122897642135903e-05, + "loss": 1.0673, "step": 9809 }, { - "epoch": 0.2779903085947462, + "epoch": 0.3838328507707958, "grad_norm": 0.0, - "learning_rate": 1.694438270506001e-05, - "loss": 0.9172, + "learning_rate": 1.4121743094618063e-05, + "loss": 1.0334, "step": 9810 }, { - "epoch": 0.2780186460370087, + "epoch": 0.38387197746302526, "grad_norm": 0.0, - "learning_rate": 1.6943722278452234e-05, - "loss": 0.897, + "learning_rate": 1.4120588480910198e-05, + "loss": 1.0989, "step": 9811 }, { - "epoch": 0.27804698347927115, + "epoch": 0.3839111041552547, "grad_norm": 0.0, - "learning_rate": 1.69430617933549e-05, - "loss": 1.0101, + "learning_rate": 1.4119433801030841e-05, + "loss": 1.0872, "step": 9812 }, { - "epoch": 0.27807532092153364, + "epoch": 0.38395023084748414, "grad_norm": 0.0, - "learning_rate": 1.6942401249773585e-05, - "loss": 0.7717, + "learning_rate": 1.4118279054998543e-05, + "loss": 1.0579, "step": 9813 }, { - "epoch": 0.2781036583637961, + "epoch": 0.3839893575397136, "grad_norm": 0.0, - "learning_rate": 1.6941740647713847e-05, - "loss": 0.85, + "learning_rate": 1.4117124242831842e-05, + "loss": 1.0336, "step": 9814 }, { - "epoch": 0.2781319958060585, + "epoch": 0.384028484231943, "grad_norm": 0.0, - "learning_rate": 1.6941079987181245e-05, - "loss": 1.0312, + "learning_rate": 1.4115969364549288e-05, + "loss": 1.2322, "step": 9815 }, { - "epoch": 0.278160333248321, + "epoch": 0.38406761092417246, "grad_norm": 0.0, - "learning_rate": 1.694041926818135e-05, - "loss": 0.9571, + "learning_rate": 1.4114814420169415e-05, + "loss": 1.0392, "step": 9816 }, { - "epoch": 0.27818867069058345, + "epoch": 0.3841067376164019, "grad_norm": 0.0, - "learning_rate": 1.6939758490719727e-05, - "loss": 1.0854, + "learning_rate": 1.4113659409710787e-05, + "loss": 1.0674, "step": 9817 }, { - "epoch": 0.27821700813284594, + "epoch": 0.38414586430863135, "grad_norm": 0.0, - "learning_rate": 1.6939097654801947e-05, - "loss": 0.9801, + "learning_rate": 1.4112504333191938e-05, + "loss": 1.2293, "step": 9818 }, { - "epoch": 0.2782453455751084, + "epoch": 0.3841849910008608, "grad_norm": 0.0, - "learning_rate": 1.6938436760433565e-05, - "loss": 1.0011, + "learning_rate": 1.4111349190631426e-05, + "loss": 1.1852, "step": 9819 }, { - "epoch": 0.2782736830173709, + "epoch": 0.38422411769309023, "grad_norm": 0.0, - "learning_rate": 1.6937775807620152e-05, - "loss": 0.876, + "learning_rate": 1.41101939820478e-05, + "loss": 1.0432, "step": 9820 }, { - "epoch": 0.2783020204596333, + "epoch": 0.38426324438531967, "grad_norm": 0.0, - "learning_rate": 1.693711479636728e-05, - "loss": 0.9877, + "learning_rate": 1.4109038707459606e-05, + "loss": 1.1603, "step": 9821 }, { - "epoch": 0.27833035790189575, + "epoch": 0.3843023710775491, "grad_norm": 0.0, - "learning_rate": 1.6936453726680514e-05, - "loss": 0.9882, + "learning_rate": 1.41078833668854e-05, + "loss": 1.0638, "step": 9822 }, { - "epoch": 0.27835869534415825, + "epoch": 0.38434149776977855, "grad_norm": 0.0, - "learning_rate": 1.693579259856542e-05, - "loss": 0.8587, + "learning_rate": 1.4106727960343733e-05, + "loss": 1.0948, "step": 9823 }, { - "epoch": 0.2783870327864207, + "epoch": 0.384380624462008, "grad_norm": 0.0, - "learning_rate": 1.693513141202757e-05, - "loss": 0.932, + "learning_rate": 1.4105572487853164e-05, + "loss": 0.9632, "step": 9824 }, { - "epoch": 0.2784153702286832, + "epoch": 0.38441975115423743, "grad_norm": 0.0, - "learning_rate": 1.6934470167072536e-05, - "loss": 1.0207, + "learning_rate": 1.410441694943224e-05, + "loss": 1.1431, "step": 9825 }, { - "epoch": 0.2784437076709456, + "epoch": 0.3844588778464669, "grad_norm": 0.0, - "learning_rate": 1.6933808863705885e-05, - "loss": 1.0701, + "learning_rate": 1.4103261345099528e-05, + "loss": 0.9859, "step": 9826 }, { - "epoch": 0.27847204511320806, + "epoch": 0.3844980045386963, "grad_norm": 0.0, - "learning_rate": 1.6933147501933182e-05, - "loss": 0.9382, + "learning_rate": 1.4102105674873579e-05, + "loss": 1.0536, "step": 9827 }, { - "epoch": 0.27850038255547055, + "epoch": 0.38453713123092575, "grad_norm": 0.0, - "learning_rate": 1.693248608176001e-05, - "loss": 0.9619, + "learning_rate": 1.4100949938772953e-05, + "loss": 1.0915, "step": 9828 }, { - "epoch": 0.278528719997733, + "epoch": 0.3845762579231552, "grad_norm": 0.0, - "learning_rate": 1.6931824603191926e-05, - "loss": 0.8069, + "learning_rate": 1.409979413681621e-05, + "loss": 1.1338, "step": 9829 }, { - "epoch": 0.2785570574399955, + "epoch": 0.38461538461538464, "grad_norm": 0.0, - "learning_rate": 1.6931163066234514e-05, - "loss": 0.9262, + "learning_rate": 1.409863826902191e-05, + "loss": 0.9989, "step": 9830 }, { - "epoch": 0.2785853948822579, + "epoch": 0.3846545113076141, "grad_norm": 0.0, - "learning_rate": 1.693050147089334e-05, - "loss": 1.0262, + "learning_rate": 1.4097482335408617e-05, + "loss": 1.0908, "step": 9831 }, { - "epoch": 0.2786137323245204, + "epoch": 0.3846936379998435, "grad_norm": 0.0, - "learning_rate": 1.6929839817173977e-05, - "loss": 1.1096, + "learning_rate": 1.4096326335994887e-05, + "loss": 1.0459, "step": 9832 }, { - "epoch": 0.27864206976678285, + "epoch": 0.38473276469207296, "grad_norm": 0.0, - "learning_rate": 1.6929178105082003e-05, - "loss": 1.0125, + "learning_rate": 1.4095170270799296e-05, + "loss": 0.9934, "step": 9833 }, { - "epoch": 0.2786704072090453, + "epoch": 0.38477189138430234, "grad_norm": 0.0, - "learning_rate": 1.6928516334622988e-05, - "loss": 0.959, + "learning_rate": 1.4094014139840397e-05, + "loss": 1.1689, "step": 9834 }, { - "epoch": 0.2786987446513078, + "epoch": 0.3848110180765318, "grad_norm": 0.0, - "learning_rate": 1.6927854505802504e-05, - "loss": 1.0106, + "learning_rate": 1.4092857943136764e-05, + "loss": 1.1446, "step": 9835 }, { - "epoch": 0.2787270820935702, + "epoch": 0.3848501447687612, "grad_norm": 0.0, - "learning_rate": 1.6927192618626133e-05, - "loss": 0.9457, + "learning_rate": 1.409170168070696e-05, + "loss": 1.1158, "step": 9836 }, { - "epoch": 0.2787554195358327, + "epoch": 0.38488927146099067, "grad_norm": 0.0, - "learning_rate": 1.6926530673099444e-05, - "loss": 1.0112, + "learning_rate": 1.4090545352569553e-05, + "loss": 1.1852, "step": 9837 }, { - "epoch": 0.27878375697809515, + "epoch": 0.3849283981532201, "grad_norm": 0.0, - "learning_rate": 1.6925868669228015e-05, - "loss": 1.0356, + "learning_rate": 1.4089388958743115e-05, + "loss": 1.2206, "step": 9838 }, { - "epoch": 0.2788120944203576, + "epoch": 0.38496752484544955, "grad_norm": 0.0, - "learning_rate": 1.6925206607017425e-05, - "loss": 1.0557, + "learning_rate": 1.4088232499246214e-05, + "loss": 1.0732, "step": 9839 }, { - "epoch": 0.2788404318626201, + "epoch": 0.385006651537679, "grad_norm": 0.0, - "learning_rate": 1.6924544486473245e-05, - "loss": 0.974, + "learning_rate": 1.4087075974097421e-05, + "loss": 1.1094, "step": 9840 }, { - "epoch": 0.2788687693048825, + "epoch": 0.38504577822990843, "grad_norm": 0.0, - "learning_rate": 1.6923882307601055e-05, - "loss": 0.846, + "learning_rate": 1.4085919383315311e-05, + "loss": 1.1735, "step": 9841 }, { - "epoch": 0.278897106747145, + "epoch": 0.38508490492213787, "grad_norm": 0.0, - "learning_rate": 1.6923220070406438e-05, - "loss": 0.9401, + "learning_rate": 1.4084762726918455e-05, + "loss": 1.1308, "step": 9842 }, { - "epoch": 0.27892544418940746, + "epoch": 0.3851240316143673, "grad_norm": 0.0, - "learning_rate": 1.6922557774894965e-05, - "loss": 1.0092, + "learning_rate": 1.4083606004925427e-05, + "loss": 1.0973, "step": 9843 }, { - "epoch": 0.27895378163166995, + "epoch": 0.38516315830659675, "grad_norm": 0.0, - "learning_rate": 1.6921895421072215e-05, - "loss": 1.0462, + "learning_rate": 1.4082449217354807e-05, + "loss": 1.1551, "step": 9844 }, { - "epoch": 0.2789821190739324, + "epoch": 0.3852022849988262, "grad_norm": 0.0, - "learning_rate": 1.692123300894377e-05, - "loss": 0.9875, + "learning_rate": 1.4081292364225163e-05, + "loss": 1.0642, "step": 9845 }, { - "epoch": 0.2790104565161948, + "epoch": 0.38524141169105564, "grad_norm": 0.0, - "learning_rate": 1.692057053851521e-05, - "loss": 0.9428, + "learning_rate": 1.4080135445555083e-05, + "loss": 1.0237, "step": 9846 }, { - "epoch": 0.2790387939584573, + "epoch": 0.3852805383832851, "grad_norm": 0.0, - "learning_rate": 1.6919908009792117e-05, - "loss": 0.8795, + "learning_rate": 1.4078978461363136e-05, + "loss": 1.1159, "step": 9847 }, { - "epoch": 0.27906713140071976, + "epoch": 0.3853196650755145, "grad_norm": 0.0, - "learning_rate": 1.6919245422780065e-05, - "loss": 1.1565, + "learning_rate": 1.407782141166791e-05, + "loss": 1.0701, "step": 9848 }, { - "epoch": 0.27909546884298225, + "epoch": 0.38535879176774396, "grad_norm": 0.0, - "learning_rate": 1.6918582777484642e-05, - "loss": 0.9553, + "learning_rate": 1.4076664296487978e-05, + "loss": 1.0688, "step": 9849 }, { - "epoch": 0.2791238062852447, + "epoch": 0.3853979184599734, "grad_norm": 0.0, - "learning_rate": 1.6917920073911425e-05, - "loss": 0.962, + "learning_rate": 1.4075507115841929e-05, + "loss": 1.1298, "step": 9850 }, { - "epoch": 0.27915214372750713, + "epoch": 0.38543704515220284, "grad_norm": 0.0, - "learning_rate": 1.6917257312066e-05, - "loss": 0.9442, + "learning_rate": 1.407434986974834e-05, + "loss": 1.0732, "step": 9851 }, { - "epoch": 0.2791804811697696, + "epoch": 0.3854761718444323, "grad_norm": 0.0, - "learning_rate": 1.6916594491953948e-05, - "loss": 0.9973, + "learning_rate": 1.40731925582258e-05, + "loss": 1.0946, "step": 9852 }, { - "epoch": 0.27920881861203206, + "epoch": 0.3855152985366617, "grad_norm": 0.0, - "learning_rate": 1.691593161358085e-05, - "loss": 0.9971, + "learning_rate": 1.4072035181292888e-05, + "loss": 1.0227, "step": 9853 }, { - "epoch": 0.27923715605429456, + "epoch": 0.38555442522889116, "grad_norm": 0.0, - "learning_rate": 1.6915268676952295e-05, - "loss": 0.9926, + "learning_rate": 1.4070877738968196e-05, + "loss": 1.1058, "step": 9854 }, { - "epoch": 0.279265493496557, + "epoch": 0.3855935519211206, "grad_norm": 0.0, - "learning_rate": 1.6914605682073863e-05, - "loss": 1.1188, + "learning_rate": 1.406972023127031e-05, + "loss": 1.1611, "step": 9855 }, { - "epoch": 0.2792938309388195, + "epoch": 0.38563267861335004, "grad_norm": 0.0, - "learning_rate": 1.691394262895114e-05, - "loss": 0.9284, + "learning_rate": 1.4068562658217815e-05, + "loss": 1.1102, "step": 9856 }, { - "epoch": 0.2793221683810819, + "epoch": 0.3856718053055795, "grad_norm": 0.0, - "learning_rate": 1.691327951758971e-05, - "loss": 0.9156, + "learning_rate": 1.4067405019829301e-05, + "loss": 1.1952, "step": 9857 }, { - "epoch": 0.27935050582334436, + "epoch": 0.3857109319978089, "grad_norm": 0.0, - "learning_rate": 1.691261634799516e-05, - "loss": 1.121, + "learning_rate": 1.406624731612336e-05, + "loss": 1.0364, "step": 9858 }, { - "epoch": 0.27937884326560686, + "epoch": 0.38575005869003837, "grad_norm": 0.0, - "learning_rate": 1.6911953120173075e-05, - "loss": 0.9724, + "learning_rate": 1.4065089547118584e-05, + "loss": 1.0743, "step": 9859 }, { - "epoch": 0.2794071807078693, + "epoch": 0.3857891853822678, "grad_norm": 0.0, - "learning_rate": 1.6911289834129042e-05, - "loss": 0.8594, + "learning_rate": 1.4063931712833563e-05, + "loss": 1.1853, "step": 9860 }, { - "epoch": 0.2794355181501318, + "epoch": 0.38582831207449725, "grad_norm": 0.0, - "learning_rate": 1.691062648986865e-05, - "loss": 0.899, + "learning_rate": 1.406277381328689e-05, + "loss": 1.0557, "step": 9861 }, { - "epoch": 0.27946385559239423, + "epoch": 0.38586743876672663, "grad_norm": 0.0, - "learning_rate": 1.6909963087397484e-05, - "loss": 1.0147, + "learning_rate": 1.4061615848497161e-05, + "loss": 1.0825, "step": 9862 }, { - "epoch": 0.27949219303465667, + "epoch": 0.3859065654589561, "grad_norm": 0.0, - "learning_rate": 1.690929962672113e-05, - "loss": 0.856, + "learning_rate": 1.4060457818482971e-05, + "loss": 1.0301, "step": 9863 }, { - "epoch": 0.27952053047691916, + "epoch": 0.3859456921511855, "grad_norm": 0.0, - "learning_rate": 1.690863610784518e-05, - "loss": 0.9286, + "learning_rate": 1.4059299723262916e-05, + "loss": 1.0767, "step": 9864 }, { - "epoch": 0.2795488679191816, + "epoch": 0.38598481884341496, "grad_norm": 0.0, - "learning_rate": 1.6907972530775227e-05, - "loss": 1.0663, + "learning_rate": 1.4058141562855595e-05, + "loss": 1.0627, "step": 9865 }, { - "epoch": 0.2795772053614441, + "epoch": 0.3860239455356444, "grad_norm": 0.0, - "learning_rate": 1.6907308895516854e-05, - "loss": 0.9938, + "learning_rate": 1.4056983337279603e-05, + "loss": 1.0867, "step": 9866 }, { - "epoch": 0.27960554280370653, + "epoch": 0.38606307222787384, "grad_norm": 0.0, - "learning_rate": 1.6906645202075652e-05, - "loss": 0.9388, + "learning_rate": 1.4055825046553544e-05, + "loss": 0.9363, "step": 9867 }, { - "epoch": 0.279633880245969, + "epoch": 0.3861021989201033, "grad_norm": 0.0, - "learning_rate": 1.6905981450457216e-05, - "loss": 0.9493, + "learning_rate": 1.4054666690696017e-05, + "loss": 1.0254, "step": 9868 }, { - "epoch": 0.27966221768823146, + "epoch": 0.3861413256123327, "grad_norm": 0.0, - "learning_rate": 1.690531764066713e-05, - "loss": 1.016, + "learning_rate": 1.4053508269725626e-05, + "loss": 0.9914, "step": 9869 }, { - "epoch": 0.2796905551304939, + "epoch": 0.38618045230456216, "grad_norm": 0.0, - "learning_rate": 1.690465377271099e-05, - "loss": 1.0692, + "learning_rate": 1.4052349783660968e-05, + "loss": 1.0667, "step": 9870 }, { - "epoch": 0.2797188925727564, + "epoch": 0.3862195789967916, "grad_norm": 0.0, - "learning_rate": 1.690398984659439e-05, - "loss": 1.0518, + "learning_rate": 1.4051191232520653e-05, + "loss": 1.0653, "step": 9871 }, { - "epoch": 0.27974723001501883, + "epoch": 0.38625870568902104, "grad_norm": 0.0, - "learning_rate": 1.6903325862322918e-05, - "loss": 0.8875, + "learning_rate": 1.4050032616323282e-05, + "loss": 0.9386, "step": 9872 }, { - "epoch": 0.2797755674572813, + "epoch": 0.3862978323812505, "grad_norm": 0.0, - "learning_rate": 1.6902661819902167e-05, - "loss": 0.9854, + "learning_rate": 1.4048873935087462e-05, + "loss": 1.0591, "step": 9873 }, { - "epoch": 0.27980390489954376, + "epoch": 0.3863369590734799, "grad_norm": 0.0, - "learning_rate": 1.6901997719337733e-05, - "loss": 0.9853, + "learning_rate": 1.4047715188831798e-05, + "loss": 1.1451, "step": 9874 }, { - "epoch": 0.2798322423418062, + "epoch": 0.38637608576570937, "grad_norm": 0.0, - "learning_rate": 1.690133356063521e-05, - "loss": 0.9053, + "learning_rate": 1.4046556377574904e-05, + "loss": 1.0732, "step": 9875 }, { - "epoch": 0.2798605797840687, + "epoch": 0.3864152124579388, "grad_norm": 0.0, - "learning_rate": 1.6900669343800195e-05, - "loss": 0.8823, + "learning_rate": 1.4045397501335382e-05, + "loss": 1.0621, "step": 9876 }, { - "epoch": 0.27988891722633114, + "epoch": 0.38645433915016825, "grad_norm": 0.0, - "learning_rate": 1.6900005068838274e-05, - "loss": 0.9503, + "learning_rate": 1.4044238560131849e-05, + "loss": 1.1235, "step": 9877 }, { - "epoch": 0.27991725466859363, + "epoch": 0.3864934658423977, "grad_norm": 0.0, - "learning_rate": 1.689934073575505e-05, - "loss": 0.978, + "learning_rate": 1.4043079553982912e-05, + "loss": 1.0983, "step": 9878 }, { - "epoch": 0.27994559211085607, + "epoch": 0.38653259253462713, "grad_norm": 0.0, - "learning_rate": 1.689867634455612e-05, - "loss": 1.011, + "learning_rate": 1.4041920482907181e-05, + "loss": 1.1352, "step": 9879 }, { - "epoch": 0.27997392955311856, + "epoch": 0.38657171922685657, "grad_norm": 0.0, - "learning_rate": 1.6898011895247072e-05, - "loss": 0.904, + "learning_rate": 1.4040761346923275e-05, + "loss": 1.0692, "step": 9880 }, { - "epoch": 0.280002266995381, + "epoch": 0.386610845919086, "grad_norm": 0.0, - "learning_rate": 1.689734738783351e-05, - "loss": 0.896, + "learning_rate": 1.4039602146049802e-05, + "loss": 1.0892, "step": 9881 }, { - "epoch": 0.28003060443764344, + "epoch": 0.38664997261131545, "grad_norm": 0.0, - "learning_rate": 1.6896682822321033e-05, - "loss": 0.7306, + "learning_rate": 1.4038442880305381e-05, + "loss": 1.0172, "step": 9882 }, { - "epoch": 0.28005894187990593, + "epoch": 0.3866890993035449, "grad_norm": 0.0, - "learning_rate": 1.6896018198715235e-05, - "loss": 0.9203, + "learning_rate": 1.4037283549708628e-05, + "loss": 1.2244, "step": 9883 }, { - "epoch": 0.28008727932216837, + "epoch": 0.38672822599577433, "grad_norm": 0.0, - "learning_rate": 1.6895353517021714e-05, - "loss": 1.0004, + "learning_rate": 1.4036124154278164e-05, + "loss": 1.1574, "step": 9884 }, { - "epoch": 0.28011561676443086, + "epoch": 0.3867673526880038, "grad_norm": 0.0, - "learning_rate": 1.6894688777246065e-05, - "loss": 0.9331, + "learning_rate": 1.4034964694032598e-05, + "loss": 1.0116, "step": 9885 }, { - "epoch": 0.2801439542066933, + "epoch": 0.3868064793802332, "grad_norm": 0.0, - "learning_rate": 1.6894023979393898e-05, - "loss": 0.9473, + "learning_rate": 1.4033805168990557e-05, + "loss": 1.0795, "step": 9886 }, { - "epoch": 0.28017229164895574, + "epoch": 0.38684560607246266, "grad_norm": 0.0, - "learning_rate": 1.6893359123470805e-05, - "loss": 0.9765, + "learning_rate": 1.4032645579170661e-05, + "loss": 1.1445, "step": 9887 }, { - "epoch": 0.28020062909121823, + "epoch": 0.3868847327646921, "grad_norm": 0.0, - "learning_rate": 1.689269420948239e-05, - "loss": 0.9808, + "learning_rate": 1.4031485924591528e-05, + "loss": 1.1259, "step": 9888 }, { - "epoch": 0.2802289665334807, + "epoch": 0.38692385945692154, "grad_norm": 0.0, - "learning_rate": 1.6892029237434248e-05, - "loss": 1.0101, + "learning_rate": 1.4030326205271785e-05, + "loss": 1.1481, "step": 9889 }, { - "epoch": 0.28025730397574317, + "epoch": 0.386962986149151, "grad_norm": 0.0, - "learning_rate": 1.6891364207331992e-05, - "loss": 0.9955, + "learning_rate": 1.4029166421230052e-05, + "loss": 1.0938, "step": 9890 }, { - "epoch": 0.2802856414180056, + "epoch": 0.38700211284138036, "grad_norm": 0.0, - "learning_rate": 1.6890699119181206e-05, - "loss": 1.0042, + "learning_rate": 1.4028006572484956e-05, + "loss": 1.1047, "step": 9891 }, { - "epoch": 0.2803139788602681, + "epoch": 0.3870412395336098, "grad_norm": 0.0, - "learning_rate": 1.689003397298751e-05, - "loss": 0.9776, + "learning_rate": 1.402684665905512e-05, + "loss": 1.0684, "step": 9892 }, { - "epoch": 0.28034231630253054, + "epoch": 0.38708036622583925, "grad_norm": 0.0, - "learning_rate": 1.6889368768756495e-05, - "loss": 0.8471, + "learning_rate": 1.4025686680959174e-05, + "loss": 1.0532, "step": 9893 }, { - "epoch": 0.280370653744793, + "epoch": 0.3871194929180687, "grad_norm": 0.0, - "learning_rate": 1.6888703506493774e-05, - "loss": 0.943, + "learning_rate": 1.4024526638215743e-05, + "loss": 1.0909, "step": 9894 }, { - "epoch": 0.28039899118705547, + "epoch": 0.3871586196102981, "grad_norm": 0.0, - "learning_rate": 1.688803818620494e-05, - "loss": 0.934, + "learning_rate": 1.4023366530843458e-05, + "loss": 0.9879, "step": 9895 }, { - "epoch": 0.2804273286293179, + "epoch": 0.38719774630252757, "grad_norm": 0.0, - "learning_rate": 1.6887372807895604e-05, - "loss": 0.9945, + "learning_rate": 1.4022206358860949e-05, + "loss": 1.0162, "step": 9896 }, { - "epoch": 0.2804556660715804, + "epoch": 0.387236872994757, "grad_norm": 0.0, - "learning_rate": 1.6886707371571373e-05, - "loss": 1.0718, + "learning_rate": 1.4021046122286845e-05, + "loss": 1.0461, "step": 9897 }, { - "epoch": 0.28048400351384284, + "epoch": 0.38727599968698645, "grad_norm": 0.0, - "learning_rate": 1.6886041877237843e-05, - "loss": 1.0033, + "learning_rate": 1.4019885821139782e-05, + "loss": 1.0515, "step": 9898 }, { - "epoch": 0.2805123409561053, + "epoch": 0.3873151263792159, "grad_norm": 0.0, - "learning_rate": 1.688537632490063e-05, - "loss": 1.0158, + "learning_rate": 1.4018725455438386e-05, + "loss": 1.1033, "step": 9899 }, { - "epoch": 0.28054067839836777, + "epoch": 0.38735425307144533, "grad_norm": 0.0, - "learning_rate": 1.688471071456533e-05, - "loss": 1.0999, + "learning_rate": 1.4017565025201296e-05, + "loss": 1.1697, "step": 9900 }, { - "epoch": 0.2805690158406302, + "epoch": 0.38739337976367477, "grad_norm": 0.0, - "learning_rate": 1.688404504623756e-05, - "loss": 1.0366, + "learning_rate": 1.4016404530447146e-05, + "loss": 1.1021, "step": 9901 }, { - "epoch": 0.2805973532828927, + "epoch": 0.3874325064559042, "grad_norm": 0.0, - "learning_rate": 1.6883379319922922e-05, - "loss": 0.9474, + "learning_rate": 1.4015243971194572e-05, + "loss": 0.9697, "step": 9902 }, { - "epoch": 0.28062569072515514, + "epoch": 0.38747163314813365, "grad_norm": 0.0, - "learning_rate": 1.688271353562702e-05, - "loss": 0.9177, + "learning_rate": 1.401408334746221e-05, + "loss": 1.174, "step": 9903 }, { - "epoch": 0.28065402816741764, + "epoch": 0.3875107598403631, "grad_norm": 0.0, - "learning_rate": 1.688204769335547e-05, - "loss": 0.9958, + "learning_rate": 1.4012922659268702e-05, + "loss": 0.9457, "step": 9904 }, { - "epoch": 0.2806823656096801, + "epoch": 0.38754988653259254, "grad_norm": 0.0, - "learning_rate": 1.688138179311387e-05, - "loss": 0.9791, + "learning_rate": 1.4011761906632684e-05, + "loss": 1.0605, "step": 9905 }, { - "epoch": 0.2807107030519425, + "epoch": 0.387589013224822, "grad_norm": 0.0, - "learning_rate": 1.6880715834907844e-05, - "loss": 1.004, + "learning_rate": 1.4010601089572794e-05, + "loss": 1.1197, "step": 9906 }, { - "epoch": 0.280739040494205, + "epoch": 0.3876281399170514, "grad_norm": 0.0, - "learning_rate": 1.688004981874299e-05, - "loss": 0.9359, + "learning_rate": 1.4009440208107678e-05, + "loss": 1.0875, "step": 9907 }, { - "epoch": 0.28076737793646744, + "epoch": 0.38766726660928086, "grad_norm": 0.0, - "learning_rate": 1.6879383744624922e-05, - "loss": 0.9391, + "learning_rate": 1.400827926225598e-05, + "loss": 1.1991, "step": 9908 }, { - "epoch": 0.28079571537872994, + "epoch": 0.3877063933015103, "grad_norm": 0.0, - "learning_rate": 1.687871761255925e-05, - "loss": 1.0476, + "learning_rate": 1.4007118252036335e-05, + "loss": 0.9725, "step": 9909 }, { - "epoch": 0.2808240528209924, + "epoch": 0.38774551999373974, "grad_norm": 0.0, - "learning_rate": 1.6878051422551584e-05, - "loss": 0.9758, + "learning_rate": 1.4005957177467394e-05, + "loss": 1.1865, "step": 9910 }, { - "epoch": 0.2808523902632548, + "epoch": 0.3877846466859692, "grad_norm": 0.0, - "learning_rate": 1.687738517460754e-05, - "loss": 0.9838, + "learning_rate": 1.4004796038567801e-05, + "loss": 0.9819, "step": 9911 }, { - "epoch": 0.2808807277055173, + "epoch": 0.3878237733781986, "grad_norm": 0.0, - "learning_rate": 1.687671886873272e-05, - "loss": 0.966, + "learning_rate": 1.4003634835356199e-05, + "loss": 1.1299, "step": 9912 }, { - "epoch": 0.28090906514777975, + "epoch": 0.38786290007042806, "grad_norm": 0.0, - "learning_rate": 1.6876052504932753e-05, - "loss": 0.9849, + "learning_rate": 1.400247356785124e-05, + "loss": 1.1744, "step": 9913 }, { - "epoch": 0.28093740259004224, + "epoch": 0.3879020267626575, "grad_norm": 0.0, - "learning_rate": 1.6875386083213238e-05, - "loss": 0.9041, + "learning_rate": 1.400131223607157e-05, + "loss": 1.1331, "step": 9914 }, { - "epoch": 0.2809657400323047, + "epoch": 0.38794115345488694, "grad_norm": 0.0, - "learning_rate": 1.687471960357979e-05, - "loss": 0.8276, + "learning_rate": 1.4000150840035842e-05, + "loss": 1.0374, "step": 9915 }, { - "epoch": 0.28099407747456717, + "epoch": 0.3879802801471164, "grad_norm": 0.0, - "learning_rate": 1.687405306603803e-05, - "loss": 1.0439, + "learning_rate": 1.39989893797627e-05, + "loss": 1.0321, "step": 9916 }, { - "epoch": 0.2810224149168296, + "epoch": 0.3880194068393458, "grad_norm": 0.0, - "learning_rate": 1.6873386470593564e-05, - "loss": 0.8375, + "learning_rate": 1.3997827855270804e-05, + "loss": 1.1164, "step": 9917 }, { - "epoch": 0.28105075235909205, + "epoch": 0.38805853353157527, "grad_norm": 0.0, - "learning_rate": 1.6872719817252015e-05, - "loss": 0.9689, + "learning_rate": 1.39966662665788e-05, + "loss": 1.0169, "step": 9918 }, { - "epoch": 0.28107908980135454, + "epoch": 0.38809766022380465, "grad_norm": 0.0, - "learning_rate": 1.6872053106018996e-05, - "loss": 1.0584, + "learning_rate": 1.3995504613705344e-05, + "loss": 1.022, "step": 9919 }, { - "epoch": 0.281107427243617, + "epoch": 0.3881367869160341, "grad_norm": 0.0, - "learning_rate": 1.687138633690012e-05, - "loss": 1.1068, + "learning_rate": 1.399434289666909e-05, + "loss": 1.0401, "step": 9920 }, { - "epoch": 0.2811357646858795, + "epoch": 0.38817591360826353, "grad_norm": 0.0, - "learning_rate": 1.6870719509901003e-05, - "loss": 1.0257, + "learning_rate": 1.3993181115488693e-05, + "loss": 1.1185, "step": 9921 }, { - "epoch": 0.2811641021281419, + "epoch": 0.388215040300493, "grad_norm": 0.0, - "learning_rate": 1.6870052625027263e-05, - "loss": 1.0669, + "learning_rate": 1.3992019270182815e-05, + "loss": 0.9549, "step": 9922 }, { - "epoch": 0.28119243957040435, + "epoch": 0.3882541669927224, "grad_norm": 0.0, - "learning_rate": 1.6869385682284524e-05, - "loss": 0.9558, + "learning_rate": 1.3990857360770108e-05, + "loss": 1.1437, "step": 9923 }, { - "epoch": 0.28122077701266684, + "epoch": 0.38829329368495186, "grad_norm": 0.0, - "learning_rate": 1.6868718681678397e-05, - "loss": 0.9475, + "learning_rate": 1.3989695387269232e-05, + "loss": 0.9819, "step": 9924 }, { - "epoch": 0.2812491144549293, + "epoch": 0.3883324203771813, "grad_norm": 0.0, - "learning_rate": 1.6868051623214497e-05, - "loss": 1.056, + "learning_rate": 1.398853334969885e-05, + "loss": 1.098, "step": 9925 }, { - "epoch": 0.2812774518971918, + "epoch": 0.38837154706941074, "grad_norm": 0.0, - "learning_rate": 1.6867384506898458e-05, - "loss": 0.91, + "learning_rate": 1.3987371248077617e-05, + "loss": 1.0492, "step": 9926 }, { - "epoch": 0.2813057893394542, + "epoch": 0.3884106737616402, "grad_norm": 0.0, - "learning_rate": 1.686671733273588e-05, - "loss": 0.8912, + "learning_rate": 1.3986209082424198e-05, + "loss": 0.9811, "step": 9927 }, { - "epoch": 0.2813341267817167, + "epoch": 0.3884498004538696, "grad_norm": 0.0, - "learning_rate": 1.6866050100732395e-05, - "loss": 1.0344, + "learning_rate": 1.398504685275726e-05, + "loss": 1.0758, "step": 9928 }, { - "epoch": 0.28136246422397915, + "epoch": 0.38848892714609906, "grad_norm": 0.0, - "learning_rate": 1.686538281089362e-05, - "loss": 1.0446, + "learning_rate": 1.398388455909546e-05, + "loss": 1.0156, "step": 9929 }, { - "epoch": 0.2813908016662416, + "epoch": 0.3885280538383285, "grad_norm": 0.0, - "learning_rate": 1.6864715463225177e-05, - "loss": 1.0468, + "learning_rate": 1.3982722201457467e-05, + "loss": 1.0658, "step": 9930 }, { - "epoch": 0.2814191391085041, + "epoch": 0.38856718053055794, "grad_norm": 0.0, - "learning_rate": 1.6864048057732686e-05, - "loss": 0.9461, + "learning_rate": 1.3981559779861947e-05, + "loss": 1.1166, "step": 9931 }, { - "epoch": 0.2814474765507665, + "epoch": 0.3886063072227874, "grad_norm": 0.0, - "learning_rate": 1.6863380594421766e-05, - "loss": 0.9482, + "learning_rate": 1.3980397294327563e-05, + "loss": 1.165, "step": 9932 }, { - "epoch": 0.281475813993029, + "epoch": 0.3886454339150168, "grad_norm": 0.0, - "learning_rate": 1.686271307329805e-05, - "loss": 1.0374, + "learning_rate": 1.397923474487299e-05, + "loss": 1.0795, "step": 9933 }, { - "epoch": 0.28150415143529145, + "epoch": 0.38868456060724627, "grad_norm": 0.0, - "learning_rate": 1.686204549436715e-05, - "loss": 0.9187, + "learning_rate": 1.3978072131516889e-05, + "loss": 1.0795, "step": 9934 }, { - "epoch": 0.2815324888775539, + "epoch": 0.3887236872994757, "grad_norm": 0.0, - "learning_rate": 1.686137785763469e-05, - "loss": 0.9188, + "learning_rate": 1.3976909454277937e-05, + "loss": 1.1447, "step": 9935 }, { - "epoch": 0.2815608263198164, + "epoch": 0.38876281399170515, "grad_norm": 0.0, - "learning_rate": 1.68607101631063e-05, - "loss": 0.9907, + "learning_rate": 1.39757467131748e-05, + "loss": 1.1699, "step": 9936 }, { - "epoch": 0.2815891637620788, + "epoch": 0.3888019406839346, "grad_norm": 0.0, - "learning_rate": 1.6860042410787597e-05, - "loss": 1.0319, + "learning_rate": 1.3974583908226157e-05, + "loss": 1.1028, "step": 9937 }, { - "epoch": 0.2816175012043413, + "epoch": 0.38884106737616403, "grad_norm": 0.0, - "learning_rate": 1.685937460068421e-05, - "loss": 1.0049, + "learning_rate": 1.3973421039450675e-05, + "loss": 1.1918, "step": 9938 }, { - "epoch": 0.28164583864660375, + "epoch": 0.38888019406839347, "grad_norm": 0.0, - "learning_rate": 1.6858706732801767e-05, - "loss": 1.0235, + "learning_rate": 1.397225810686703e-05, + "loss": 1.1368, "step": 9939 }, { - "epoch": 0.28167417608886625, + "epoch": 0.3889193207606229, "grad_norm": 0.0, - "learning_rate": 1.685803880714589e-05, - "loss": 1.0339, + "learning_rate": 1.3971095110493895e-05, + "loss": 1.1327, "step": 9940 }, { - "epoch": 0.2817025135311287, + "epoch": 0.38895844745285235, "grad_norm": 0.0, - "learning_rate": 1.6857370823722204e-05, - "loss": 1.0149, + "learning_rate": 1.396993205034995e-05, + "loss": 1.0912, "step": 9941 }, { - "epoch": 0.2817308509733911, + "epoch": 0.3889975741450818, "grad_norm": 0.0, - "learning_rate": 1.6856702782536335e-05, - "loss": 1.0165, + "learning_rate": 1.3968768926453873e-05, + "loss": 1.1458, "step": 9942 }, { - "epoch": 0.2817591884156536, + "epoch": 0.38903670083731123, "grad_norm": 0.0, - "learning_rate": 1.6856034683593917e-05, - "loss": 0.8699, + "learning_rate": 1.3967605738824338e-05, + "loss": 1.0974, "step": 9943 }, { - "epoch": 0.28178752585791605, + "epoch": 0.3890758275295407, "grad_norm": 0.0, - "learning_rate": 1.685536652690057e-05, - "loss": 1.1215, + "learning_rate": 1.3966442487480028e-05, + "loss": 1.0629, "step": 9944 }, { - "epoch": 0.28181586330017855, + "epoch": 0.3891149542217701, "grad_norm": 0.0, - "learning_rate": 1.6854698312461924e-05, - "loss": 1.051, + "learning_rate": 1.396527917243962e-05, + "loss": 1.0745, "step": 9945 }, { - "epoch": 0.281844200742441, + "epoch": 0.38915408091399956, "grad_norm": 0.0, - "learning_rate": 1.685403004028361e-05, - "loss": 1.0014, + "learning_rate": 1.3964115793721798e-05, + "loss": 0.966, "step": 9946 }, { - "epoch": 0.2818725381847034, + "epoch": 0.389193207606229, "grad_norm": 0.0, - "learning_rate": 1.6853361710371256e-05, - "loss": 1.0085, + "learning_rate": 1.3962952351345247e-05, + "loss": 1.1254, "step": 9947 }, { - "epoch": 0.2819008756269659, + "epoch": 0.3892323342984584, "grad_norm": 0.0, - "learning_rate": 1.6852693322730493e-05, - "loss": 0.9342, + "learning_rate": 1.396178884532864e-05, + "loss": 1.0151, "step": 9948 }, { - "epoch": 0.28192921306922836, + "epoch": 0.3892714609906878, "grad_norm": 0.0, - "learning_rate": 1.6852024877366945e-05, - "loss": 0.9982, + "learning_rate": 1.3960625275690676e-05, + "loss": 1.0522, "step": 9949 }, { - "epoch": 0.28195755051149085, + "epoch": 0.38931058768291726, "grad_norm": 0.0, - "learning_rate": 1.685135637428625e-05, - "loss": 0.9873, + "learning_rate": 1.3959461642450027e-05, + "loss": 1.1037, "step": 9950 }, { - "epoch": 0.2819858879537533, + "epoch": 0.3893497143751467, "grad_norm": 0.0, - "learning_rate": 1.6850687813494036e-05, - "loss": 0.9203, + "learning_rate": 1.395829794562539e-05, + "loss": 1.0795, "step": 9951 }, { - "epoch": 0.2820142253960158, + "epoch": 0.38938884106737615, "grad_norm": 0.0, - "learning_rate": 1.685001919499593e-05, - "loss": 1.0881, + "learning_rate": 1.3957134185235445e-05, + "loss": 1.0731, "step": 9952 }, { - "epoch": 0.2820425628382782, + "epoch": 0.3894279677596056, "grad_norm": 0.0, - "learning_rate": 1.6849350518797575e-05, - "loss": 0.9367, + "learning_rate": 1.3955970361298884e-05, + "loss": 1.1302, "step": 9953 }, { - "epoch": 0.28207090028054066, + "epoch": 0.389467094451835, "grad_norm": 0.0, - "learning_rate": 1.6848681784904597e-05, - "loss": 0.9549, + "learning_rate": 1.3954806473834397e-05, + "loss": 1.0073, "step": 9954 }, { - "epoch": 0.28209923772280315, + "epoch": 0.38950622114406447, "grad_norm": 0.0, - "learning_rate": 1.6848012993322627e-05, - "loss": 1.1149, + "learning_rate": 1.3953642522860675e-05, + "loss": 1.1552, "step": 9955 }, { - "epoch": 0.2821275751650656, + "epoch": 0.3895453478362939, "grad_norm": 0.0, - "learning_rate": 1.68473441440573e-05, - "loss": 1.0173, + "learning_rate": 1.3952478508396406e-05, + "loss": 1.0135, "step": 9956 }, { - "epoch": 0.2821559126073281, + "epoch": 0.38958447452852335, "grad_norm": 0.0, - "learning_rate": 1.684667523711425e-05, - "loss": 0.9378, + "learning_rate": 1.3951314430460285e-05, + "loss": 0.9518, "step": 9957 }, { - "epoch": 0.2821842500495905, + "epoch": 0.3896236012207528, "grad_norm": 0.0, - "learning_rate": 1.6846006272499113e-05, - "loss": 0.9987, + "learning_rate": 1.3950150289071007e-05, + "loss": 0.9673, "step": 9958 }, { - "epoch": 0.28221258749185296, + "epoch": 0.38966272791298223, "grad_norm": 0.0, - "learning_rate": 1.6845337250217525e-05, - "loss": 0.9252, + "learning_rate": 1.3948986084247264e-05, + "loss": 1.0453, "step": 9959 }, { - "epoch": 0.28224092493411546, + "epoch": 0.3897018546052117, "grad_norm": 0.0, - "learning_rate": 1.6844668170275117e-05, - "loss": 0.9638, + "learning_rate": 1.3947821816007755e-05, + "loss": 1.1985, "step": 9960 }, { - "epoch": 0.2822692623763779, + "epoch": 0.3897409812974411, "grad_norm": 0.0, - "learning_rate": 1.6843999032677525e-05, - "loss": 0.9483, + "learning_rate": 1.3946657484371172e-05, + "loss": 1.0123, "step": 9961 }, { - "epoch": 0.2822975998186404, + "epoch": 0.38978010798967055, "grad_norm": 0.0, - "learning_rate": 1.6843329837430393e-05, - "loss": 0.8958, + "learning_rate": 1.3945493089356218e-05, + "loss": 1.0267, "step": 9962 }, { - "epoch": 0.2823259372609028, + "epoch": 0.3898192346819, "grad_norm": 0.0, - "learning_rate": 1.6842660584539352e-05, - "loss": 0.9672, + "learning_rate": 1.3944328630981583e-05, + "loss": 1.0173, "step": 9963 }, { - "epoch": 0.2823542747031653, + "epoch": 0.38985836137412944, "grad_norm": 0.0, - "learning_rate": 1.6841991274010037e-05, - "loss": 0.9132, + "learning_rate": 1.3943164109265978e-05, + "loss": 1.0727, "step": 9964 }, { - "epoch": 0.28238261214542776, + "epoch": 0.3898974880663589, "grad_norm": 0.0, - "learning_rate": 1.6841321905848088e-05, - "loss": 0.9473, + "learning_rate": 1.3941999524228098e-05, + "loss": 1.1606, "step": 9965 }, { - "epoch": 0.2824109495876902, + "epoch": 0.3899366147585883, "grad_norm": 0.0, - "learning_rate": 1.684065248005915e-05, - "loss": 0.9161, + "learning_rate": 1.3940834875886644e-05, + "loss": 1.1461, "step": 9966 }, { - "epoch": 0.2824392870299527, + "epoch": 0.38997574145081776, "grad_norm": 0.0, - "learning_rate": 1.6839982996648848e-05, - "loss": 1.0583, + "learning_rate": 1.3939670164260321e-05, + "loss": 1.1306, "step": 9967 }, { - "epoch": 0.28246762447221513, + "epoch": 0.3900148681430472, "grad_norm": 0.0, - "learning_rate": 1.6839313455622836e-05, - "loss": 1.012, + "learning_rate": 1.393850538936783e-05, + "loss": 1.2645, "step": 9968 }, { - "epoch": 0.2824959619144776, + "epoch": 0.39005399483527664, "grad_norm": 0.0, - "learning_rate": 1.6838643856986746e-05, - "loss": 0.9055, + "learning_rate": 1.393734055122788e-05, + "loss": 1.1033, "step": 9969 }, { - "epoch": 0.28252429935674006, + "epoch": 0.3900931215275061, "grad_norm": 0.0, - "learning_rate": 1.6837974200746218e-05, - "loss": 0.9911, + "learning_rate": 1.3936175649859173e-05, + "loss": 1.087, "step": 9970 }, { - "epoch": 0.2825526367990025, + "epoch": 0.3901322482197355, "grad_norm": 0.0, - "learning_rate": 1.683730448690689e-05, - "loss": 0.9039, + "learning_rate": 1.3935010685280417e-05, + "loss": 1.1862, "step": 9971 }, { - "epoch": 0.282580974241265, + "epoch": 0.39017137491196496, "grad_norm": 0.0, - "learning_rate": 1.6836634715474413e-05, - "loss": 0.9979, + "learning_rate": 1.3933845657510321e-05, + "loss": 0.996, "step": 9972 }, { - "epoch": 0.28260931168352743, + "epoch": 0.3902105016041944, "grad_norm": 0.0, - "learning_rate": 1.683596488645442e-05, - "loss": 0.9812, + "learning_rate": 1.3932680566567596e-05, + "loss": 1.1722, "step": 9973 }, { - "epoch": 0.2826376491257899, + "epoch": 0.39024962829642384, "grad_norm": 0.0, - "learning_rate": 1.6835294999852556e-05, - "loss": 0.9644, + "learning_rate": 1.3931515412470943e-05, + "loss": 1.1082, "step": 9974 }, { - "epoch": 0.28266598656805236, + "epoch": 0.3902887549886533, "grad_norm": 0.0, - "learning_rate": 1.6834625055674467e-05, - "loss": 0.9332, + "learning_rate": 1.3930350195239085e-05, + "loss": 1.1805, "step": 9975 }, { - "epoch": 0.2826943240103148, + "epoch": 0.39032788168088267, "grad_norm": 0.0, - "learning_rate": 1.6833955053925792e-05, - "loss": 1.0124, + "learning_rate": 1.3929184914890725e-05, + "loss": 1.0506, "step": 9976 }, { - "epoch": 0.2827226614525773, + "epoch": 0.3903670083731121, "grad_norm": 0.0, - "learning_rate": 1.6833284994612175e-05, - "loss": 0.9141, + "learning_rate": 1.3928019571444577e-05, + "loss": 1.111, "step": 9977 }, { - "epoch": 0.28275099889483973, + "epoch": 0.39040613506534155, "grad_norm": 0.0, - "learning_rate": 1.683261487773926e-05, - "loss": 1.0311, + "learning_rate": 1.392685416491936e-05, + "loss": 1.0931, "step": 9978 }, { - "epoch": 0.2827793363371022, + "epoch": 0.390445261757571, "grad_norm": 0.0, - "learning_rate": 1.6831944703312694e-05, - "loss": 0.9372, + "learning_rate": 1.3925688695333783e-05, + "loss": 0.999, "step": 9979 }, { - "epoch": 0.28280767377936467, + "epoch": 0.39048438844980043, "grad_norm": 0.0, - "learning_rate": 1.6831274471338122e-05, - "loss": 0.9591, + "learning_rate": 1.3924523162706567e-05, + "loss": 1.1045, "step": 9980 }, { - "epoch": 0.28283601122162716, + "epoch": 0.3905235151420299, "grad_norm": 0.0, - "learning_rate": 1.6830604181821188e-05, - "loss": 0.9752, + "learning_rate": 1.3923357567056424e-05, + "loss": 1.1301, "step": 9981 }, { - "epoch": 0.2828643486638896, + "epoch": 0.3905626418342593, "grad_norm": 0.0, - "learning_rate": 1.6829933834767537e-05, - "loss": 0.8926, + "learning_rate": 1.3922191908402079e-05, + "loss": 1.009, "step": 9982 }, { - "epoch": 0.28289268610615204, + "epoch": 0.39060176852648876, "grad_norm": 0.0, - "learning_rate": 1.682926343018282e-05, - "loss": 0.9109, + "learning_rate": 1.392102618676224e-05, + "loss": 1.0667, "step": 9983 }, { - "epoch": 0.28292102354841453, + "epoch": 0.3906408952187182, "grad_norm": 0.0, - "learning_rate": 1.682859296807268e-05, - "loss": 1.064, + "learning_rate": 1.391986040215564e-05, + "loss": 1.0864, "step": 9984 }, { - "epoch": 0.28294936099067697, + "epoch": 0.39068002191094764, "grad_norm": 0.0, - "learning_rate": 1.682792244844276e-05, - "loss": 0.9354, + "learning_rate": 1.3918694554600992e-05, + "loss": 1.0454, "step": 9985 }, { - "epoch": 0.28297769843293946, + "epoch": 0.3907191486031771, "grad_norm": 0.0, - "learning_rate": 1.6827251871298723e-05, - "loss": 1.0271, + "learning_rate": 1.391752864411702e-05, + "loss": 0.9895, "step": 9986 }, { - "epoch": 0.2830060358752019, + "epoch": 0.3907582752954065, "grad_norm": 0.0, - "learning_rate": 1.68265812366462e-05, - "loss": 0.9611, + "learning_rate": 1.3916362670722447e-05, + "loss": 1.0634, "step": 9987 }, { - "epoch": 0.28303437331746434, + "epoch": 0.39079740198763596, "grad_norm": 0.0, - "learning_rate": 1.6825910544490852e-05, - "loss": 0.9208, + "learning_rate": 1.3915196634435995e-05, + "loss": 0.9807, "step": 9988 }, { - "epoch": 0.28306271075972683, + "epoch": 0.3908365286798654, "grad_norm": 0.0, - "learning_rate": 1.6825239794838326e-05, - "loss": 0.8386, + "learning_rate": 1.3914030535276394e-05, + "loss": 1.2319, "step": 9989 }, { - "epoch": 0.28309104820198927, + "epoch": 0.39087565537209484, "grad_norm": 0.0, - "learning_rate": 1.6824568987694268e-05, - "loss": 1.0533, + "learning_rate": 1.3912864373262365e-05, + "loss": 1.1043, "step": 9990 }, { - "epoch": 0.28311938564425176, + "epoch": 0.3909147820643243, "grad_norm": 0.0, - "learning_rate": 1.6823898123064334e-05, - "loss": 0.9793, + "learning_rate": 1.3911698148412638e-05, + "loss": 1.1335, "step": 9991 }, { - "epoch": 0.2831477230865142, + "epoch": 0.3909539087565537, "grad_norm": 0.0, - "learning_rate": 1.682322720095417e-05, - "loss": 1.0149, + "learning_rate": 1.391053186074594e-05, + "loss": 1.0112, "step": 9992 }, { - "epoch": 0.2831760605287767, + "epoch": 0.39099303544878317, "grad_norm": 0.0, - "learning_rate": 1.6822556221369432e-05, - "loss": 0.8599, + "learning_rate": 1.3909365510281001e-05, + "loss": 1.1188, "step": 9993 }, { - "epoch": 0.28320439797103913, + "epoch": 0.3910321621410126, "grad_norm": 0.0, - "learning_rate": 1.6821885184315767e-05, - "loss": 0.9391, + "learning_rate": 1.3908199097036549e-05, + "loss": 1.0787, "step": 9994 }, { - "epoch": 0.2832327354133016, + "epoch": 0.39107128883324205, "grad_norm": 0.0, - "learning_rate": 1.682121408979883e-05, - "loss": 0.9196, + "learning_rate": 1.390703262103132e-05, + "loss": 1.0353, "step": 9995 }, { - "epoch": 0.28326107285556407, + "epoch": 0.3911104155254715, "grad_norm": 0.0, - "learning_rate": 1.6820542937824272e-05, - "loss": 1.0133, + "learning_rate": 1.390586608228404e-05, + "loss": 1.1016, "step": 9996 }, { - "epoch": 0.2832894102978265, + "epoch": 0.39114954221770093, "grad_norm": 0.0, - "learning_rate": 1.6819871728397755e-05, - "loss": 0.9131, + "learning_rate": 1.3904699480813446e-05, + "loss": 1.1083, "step": 9997 }, { - "epoch": 0.283317747740089, + "epoch": 0.39118866890993037, "grad_norm": 0.0, - "learning_rate": 1.6819200461524922e-05, - "loss": 0.9574, + "learning_rate": 1.3903532816638272e-05, + "loss": 1.0103, "step": 9998 }, { - "epoch": 0.28334608518235144, + "epoch": 0.3912277956021598, "grad_norm": 0.0, - "learning_rate": 1.6818529137211427e-05, - "loss": 0.9421, + "learning_rate": 1.390236608977725e-05, + "loss": 1.0305, "step": 9999 }, { - "epoch": 0.2833744226246139, + "epoch": 0.39126692229438925, "grad_norm": 0.0, - "learning_rate": 1.6817857755462932e-05, - "loss": 1.02, + "learning_rate": 1.3901199300249121e-05, + "loss": 1.1525, "step": 10000 }, { - "epoch": 0.28340276006687637, + "epoch": 0.3913060489866187, "grad_norm": 0.0, - "learning_rate": 1.681718631628509e-05, - "loss": 0.9177, + "learning_rate": 1.390003244807262e-05, + "loss": 1.1277, "step": 10001 }, { - "epoch": 0.2834310975091388, + "epoch": 0.39134517567884813, "grad_norm": 0.0, - "learning_rate": 1.6816514819683557e-05, - "loss": 1.038, + "learning_rate": 1.3898865533266483e-05, + "loss": 1.11, "step": 10002 }, { - "epoch": 0.2834594349514013, + "epoch": 0.3913843023710776, "grad_norm": 0.0, - "learning_rate": 1.681584326566399e-05, - "loss": 0.9987, + "learning_rate": 1.3897698555849453e-05, + "loss": 1.0789, "step": 10003 }, { - "epoch": 0.28348777239366374, + "epoch": 0.391423429063307, "grad_norm": 0.0, - "learning_rate": 1.681517165423204e-05, - "loss": 0.9448, + "learning_rate": 1.3896531515840268e-05, + "loss": 1.2353, "step": 10004 }, { - "epoch": 0.28351610983592623, + "epoch": 0.3914625557555364, "grad_norm": 0.0, - "learning_rate": 1.681449998539337e-05, - "loss": 1.0686, + "learning_rate": 1.3895364413257669e-05, + "loss": 1.1479, "step": 10005 }, { - "epoch": 0.28354444727818867, + "epoch": 0.39150168244776584, "grad_norm": 0.0, - "learning_rate": 1.6813828259153638e-05, - "loss": 1.0425, + "learning_rate": 1.3894197248120396e-05, + "loss": 0.9423, "step": 10006 }, { - "epoch": 0.2835727847204511, + "epoch": 0.3915408091399953, "grad_norm": 0.0, - "learning_rate": 1.6813156475518496e-05, - "loss": 0.9367, + "learning_rate": 1.38930300204472e-05, + "loss": 0.9914, "step": 10007 }, { - "epoch": 0.2836011221627136, + "epoch": 0.3915799358322247, "grad_norm": 0.0, - "learning_rate": 1.6812484634493612e-05, - "loss": 0.9123, + "learning_rate": 1.3891862730256815e-05, + "loss": 1.2346, "step": 10008 }, { - "epoch": 0.28362945960497604, + "epoch": 0.39161906252445416, "grad_norm": 0.0, - "learning_rate": 1.6811812736084635e-05, - "loss": 0.9482, + "learning_rate": 1.3890695377567996e-05, + "loss": 1.0407, "step": 10009 }, { - "epoch": 0.28365779704723854, + "epoch": 0.3916581892166836, "grad_norm": 0.0, - "learning_rate": 1.6811140780297236e-05, - "loss": 0.9202, + "learning_rate": 1.388952796239948e-05, + "loss": 0.9792, "step": 10010 }, { - "epoch": 0.283686134489501, + "epoch": 0.39169731590891305, "grad_norm": 0.0, - "learning_rate": 1.6810468767137066e-05, - "loss": 0.894, + "learning_rate": 1.3888360484770023e-05, + "loss": 1.1668, "step": 10011 }, { - "epoch": 0.2837144719317634, + "epoch": 0.3917364426011425, "grad_norm": 0.0, - "learning_rate": 1.6809796696609784e-05, - "loss": 0.9041, + "learning_rate": 1.3887192944698366e-05, + "loss": 1.256, "step": 10012 }, { - "epoch": 0.2837428093740259, + "epoch": 0.39177556929337193, "grad_norm": 0.0, - "learning_rate": 1.6809124568721062e-05, - "loss": 0.8869, + "learning_rate": 1.3886025342203263e-05, + "loss": 1.0548, "step": 10013 }, { - "epoch": 0.28377114681628834, + "epoch": 0.39181469598560137, "grad_norm": 0.0, - "learning_rate": 1.680845238347655e-05, - "loss": 0.9244, + "learning_rate": 1.3884857677303463e-05, + "loss": 1.1521, "step": 10014 }, { - "epoch": 0.28379948425855084, + "epoch": 0.3918538226778308, "grad_norm": 0.0, - "learning_rate": 1.6807780140881922e-05, - "loss": 0.9829, + "learning_rate": 1.3883689950017716e-05, + "loss": 1.0015, "step": 10015 }, { - "epoch": 0.2838278217008133, + "epoch": 0.39189294937006025, "grad_norm": 0.0, - "learning_rate": 1.6807107840942827e-05, - "loss": 0.9624, + "learning_rate": 1.3882522160364773e-05, + "loss": 1.0703, "step": 10016 }, { - "epoch": 0.28385615914307577, + "epoch": 0.3919320760622897, "grad_norm": 0.0, - "learning_rate": 1.6806435483664942e-05, - "loss": 0.8937, + "learning_rate": 1.3881354308363391e-05, + "loss": 1.0543, "step": 10017 }, { - "epoch": 0.2838844965853382, + "epoch": 0.39197120275451913, "grad_norm": 0.0, - "learning_rate": 1.6805763069053917e-05, - "loss": 1.0329, + "learning_rate": 1.3880186394032322e-05, + "loss": 1.1402, "step": 10018 }, { - "epoch": 0.28391283402760065, + "epoch": 0.3920103294467486, "grad_norm": 0.0, - "learning_rate": 1.6805090597115424e-05, - "loss": 0.9169, + "learning_rate": 1.3879018417390323e-05, + "loss": 1.0092, "step": 10019 }, { - "epoch": 0.28394117146986314, + "epoch": 0.392049456138978, "grad_norm": 0.0, - "learning_rate": 1.680441806785513e-05, - "loss": 0.9233, + "learning_rate": 1.3877850378456145e-05, + "loss": 1.1747, "step": 10020 }, { - "epoch": 0.2839695089121256, + "epoch": 0.39208858283120746, "grad_norm": 0.0, - "learning_rate": 1.680374548127869e-05, - "loss": 0.8896, + "learning_rate": 1.3876682277248552e-05, + "loss": 1.0733, "step": 10021 }, { - "epoch": 0.2839978463543881, + "epoch": 0.3921277095234369, "grad_norm": 0.0, - "learning_rate": 1.680307283739178e-05, - "loss": 1.0608, + "learning_rate": 1.3875514113786301e-05, + "loss": 0.9604, "step": 10022 }, { - "epoch": 0.2840261837966505, + "epoch": 0.39216683621566634, "grad_norm": 0.0, - "learning_rate": 1.6802400136200056e-05, - "loss": 0.9021, + "learning_rate": 1.3874345888088145e-05, + "loss": 0.9394, "step": 10023 }, { - "epoch": 0.28405452123891295, + "epoch": 0.3922059629078958, "grad_norm": 0.0, - "learning_rate": 1.6801727377709195e-05, - "loss": 0.9568, + "learning_rate": 1.3873177600172854e-05, + "loss": 1.083, "step": 10024 }, { - "epoch": 0.28408285868117544, + "epoch": 0.3922450896001252, "grad_norm": 0.0, - "learning_rate": 1.6801054561924857e-05, - "loss": 1.0157, + "learning_rate": 1.3872009250059181e-05, + "loss": 1.1477, "step": 10025 }, { - "epoch": 0.2841111961234379, + "epoch": 0.39228421629235466, "grad_norm": 0.0, - "learning_rate": 1.680038168885271e-05, - "loss": 0.881, + "learning_rate": 1.387084083776589e-05, + "loss": 1.1238, "step": 10026 }, { - "epoch": 0.2841395335657004, + "epoch": 0.3923233429845841, "grad_norm": 0.0, - "learning_rate": 1.6799708758498424e-05, - "loss": 1.0162, + "learning_rate": 1.386967236331175e-05, + "loss": 1.2132, "step": 10027 }, { - "epoch": 0.2841678710079628, + "epoch": 0.39236246967681354, "grad_norm": 0.0, - "learning_rate": 1.6799035770867665e-05, - "loss": 1.0022, + "learning_rate": 1.3868503826715518e-05, + "loss": 1.2034, "step": 10028 }, { - "epoch": 0.2841962084502253, + "epoch": 0.392401596369043, "grad_norm": 0.0, - "learning_rate": 1.6798362725966102e-05, - "loss": 1.0135, + "learning_rate": 1.3867335227995961e-05, + "loss": 1.0827, "step": 10029 }, { - "epoch": 0.28422454589248775, + "epoch": 0.3924407230612724, "grad_norm": 0.0, - "learning_rate": 1.6797689623799406e-05, - "loss": 0.9313, + "learning_rate": 1.3866166567171848e-05, + "loss": 0.9912, "step": 10030 }, { - "epoch": 0.2842528833347502, + "epoch": 0.39247984975350186, "grad_norm": 0.0, - "learning_rate": 1.679701646437325e-05, - "loss": 1.0436, + "learning_rate": 1.3864997844261945e-05, + "loss": 1.0978, "step": 10031 }, { - "epoch": 0.2842812207770127, + "epoch": 0.3925189764457313, "grad_norm": 0.0, - "learning_rate": 1.6796343247693293e-05, - "loss": 1.0183, + "learning_rate": 1.3863829059285019e-05, + "loss": 1.1614, "step": 10032 }, { - "epoch": 0.2843095582192751, + "epoch": 0.3925581031379607, "grad_norm": 0.0, - "learning_rate": 1.6795669973765218e-05, - "loss": 1.1303, + "learning_rate": 1.386266021225984e-05, + "loss": 1.2528, "step": 10033 }, { - "epoch": 0.2843378956615376, + "epoch": 0.39259722983019013, "grad_norm": 0.0, - "learning_rate": 1.679499664259469e-05, - "loss": 0.7888, + "learning_rate": 1.3861491303205179e-05, + "loss": 1.17, "step": 10034 }, { - "epoch": 0.28436623310380005, + "epoch": 0.39263635652241957, "grad_norm": 0.0, - "learning_rate": 1.679432325418738e-05, - "loss": 1.0547, + "learning_rate": 1.3860322332139805e-05, + "loss": 1.1973, "step": 10035 }, { - "epoch": 0.2843945705460625, + "epoch": 0.392675483214649, "grad_norm": 0.0, - "learning_rate": 1.6793649808548966e-05, - "loss": 1.0093, + "learning_rate": 1.3859153299082493e-05, + "loss": 1.0062, "step": 10036 }, { - "epoch": 0.284422907988325, + "epoch": 0.39271460990687845, "grad_norm": 0.0, - "learning_rate": 1.6792976305685115e-05, - "loss": 0.8876, + "learning_rate": 1.3857984204052014e-05, + "loss": 1.132, "step": 10037 }, { - "epoch": 0.2844512454305874, + "epoch": 0.3927537365991079, "grad_norm": 0.0, - "learning_rate": 1.6792302745601505e-05, - "loss": 0.9117, + "learning_rate": 1.3856815047067143e-05, + "loss": 1.1251, "step": 10038 }, { - "epoch": 0.2844795828728499, + "epoch": 0.39279286329133734, "grad_norm": 0.0, - "learning_rate": 1.67916291283038e-05, - "loss": 0.9253, + "learning_rate": 1.3855645828146653e-05, + "loss": 1.2589, "step": 10039 }, { - "epoch": 0.28450792031511235, + "epoch": 0.3928319899835668, "grad_norm": 0.0, - "learning_rate": 1.6790955453797687e-05, - "loss": 1.0265, + "learning_rate": 1.3854476547309326e-05, + "loss": 1.1037, "step": 10040 }, { - "epoch": 0.28453625775737484, + "epoch": 0.3928711166757962, "grad_norm": 0.0, - "learning_rate": 1.6790281722088834e-05, - "loss": 1.0347, + "learning_rate": 1.3853307204573931e-05, + "loss": 1.0893, "step": 10041 }, { - "epoch": 0.2845645951996373, + "epoch": 0.39291024336802566, "grad_norm": 0.0, - "learning_rate": 1.6789607933182912e-05, - "loss": 0.9044, + "learning_rate": 1.3852137799959256e-05, + "loss": 1.2145, "step": 10042 }, { - "epoch": 0.2845929326418997, + "epoch": 0.3929493700602551, "grad_norm": 0.0, - "learning_rate": 1.6788934087085606e-05, - "loss": 0.8865, + "learning_rate": 1.3850968333484072e-05, + "loss": 1.0886, "step": 10043 }, { - "epoch": 0.2846212700841622, + "epoch": 0.39298849675248454, "grad_norm": 0.0, - "learning_rate": 1.6788260183802586e-05, - "loss": 0.8337, + "learning_rate": 1.3849798805167163e-05, + "loss": 1.072, "step": 10044 }, { - "epoch": 0.28464960752642465, + "epoch": 0.393027623444714, "grad_norm": 0.0, - "learning_rate": 1.678758622333953e-05, - "loss": 0.9079, + "learning_rate": 1.3848629215027309e-05, + "loss": 1.1625, "step": 10045 }, { - "epoch": 0.28467794496868715, + "epoch": 0.3930667501369434, "grad_norm": 0.0, - "learning_rate": 1.6786912205702114e-05, - "loss": 1.0497, + "learning_rate": 1.3847459563083292e-05, + "loss": 1.0813, "step": 10046 }, { - "epoch": 0.2847062824109496, + "epoch": 0.39310587682917286, "grad_norm": 0.0, - "learning_rate": 1.6786238130896016e-05, - "loss": 1.1371, + "learning_rate": 1.3846289849353897e-05, + "loss": 1.1489, "step": 10047 }, { - "epoch": 0.284734619853212, + "epoch": 0.3931450035214023, "grad_norm": 0.0, - "learning_rate": 1.678556399892691e-05, - "loss": 1.0304, + "learning_rate": 1.3845120073857906e-05, + "loss": 1.0226, "step": 10048 }, { - "epoch": 0.2847629572954745, + "epoch": 0.39318413021363174, "grad_norm": 0.0, - "learning_rate": 1.678488980980048e-05, - "loss": 0.9253, + "learning_rate": 1.3843950236614103e-05, + "loss": 1.0412, "step": 10049 }, { - "epoch": 0.28479129473773696, + "epoch": 0.3932232569058612, "grad_norm": 0.0, - "learning_rate": 1.678421556352241e-05, - "loss": 0.948, + "learning_rate": 1.3842780337641278e-05, + "loss": 1.085, "step": 10050 }, { - "epoch": 0.28481963217999945, + "epoch": 0.3932623835980906, "grad_norm": 0.0, - "learning_rate": 1.678354126009837e-05, - "loss": 0.9133, + "learning_rate": 1.3841610376958217e-05, + "loss": 1.0626, "step": 10051 }, { - "epoch": 0.2848479696222619, + "epoch": 0.39330151029032007, "grad_norm": 0.0, - "learning_rate": 1.6782866899534043e-05, - "loss": 0.9955, + "learning_rate": 1.3840440354583704e-05, + "loss": 1.2355, "step": 10052 }, { - "epoch": 0.2848763070645244, + "epoch": 0.3933406369825495, "grad_norm": 0.0, - "learning_rate": 1.6782192481835107e-05, - "loss": 0.9262, + "learning_rate": 1.3839270270536534e-05, + "loss": 1.1019, "step": 10053 }, { - "epoch": 0.2849046445067868, + "epoch": 0.39337976367477895, "grad_norm": 0.0, - "learning_rate": 1.6781518007007247e-05, - "loss": 0.961, + "learning_rate": 1.3838100124835494e-05, + "loss": 1.0954, "step": 10054 }, { - "epoch": 0.28493298194904926, + "epoch": 0.3934188903670084, "grad_norm": 0.0, - "learning_rate": 1.6780843475056143e-05, - "loss": 0.9571, + "learning_rate": 1.3836929917499374e-05, + "loss": 1.1725, "step": 10055 }, { - "epoch": 0.28496131939131175, + "epoch": 0.39345801705923783, "grad_norm": 0.0, - "learning_rate": 1.678016888598748e-05, - "loss": 0.9668, + "learning_rate": 1.383575964854697e-05, + "loss": 1.061, "step": 10056 }, { - "epoch": 0.2849896568335742, + "epoch": 0.39349714375146727, "grad_norm": 0.0, - "learning_rate": 1.6779494239806928e-05, - "loss": 0.8657, + "learning_rate": 1.383458931799707e-05, + "loss": 1.1194, "step": 10057 }, { - "epoch": 0.2850179942758367, + "epoch": 0.3935362704436967, "grad_norm": 0.0, - "learning_rate": 1.6778819536520184e-05, - "loss": 0.9105, + "learning_rate": 1.383341892586847e-05, + "loss": 1.1586, "step": 10058 }, { - "epoch": 0.2850463317180991, + "epoch": 0.39357539713592615, "grad_norm": 0.0, - "learning_rate": 1.6778144776132927e-05, - "loss": 0.9255, + "learning_rate": 1.3832248472179967e-05, + "loss": 1.1043, "step": 10059 }, { - "epoch": 0.28507466916036156, + "epoch": 0.3936145238281556, "grad_norm": 0.0, - "learning_rate": 1.6777469958650838e-05, - "loss": 0.9422, + "learning_rate": 1.3831077956950355e-05, + "loss": 1.1495, "step": 10060 }, { - "epoch": 0.28510300660262405, + "epoch": 0.393653650520385, "grad_norm": 0.0, - "learning_rate": 1.67767950840796e-05, - "loss": 0.9306, + "learning_rate": 1.3829907380198433e-05, + "loss": 1.0704, "step": 10061 }, { - "epoch": 0.2851313440448865, + "epoch": 0.3936927772126144, "grad_norm": 0.0, - "learning_rate": 1.6776120152424905e-05, - "loss": 0.8471, + "learning_rate": 1.3828736741942998e-05, + "loss": 1.0341, "step": 10062 }, { - "epoch": 0.285159681487149, + "epoch": 0.39373190390484386, "grad_norm": 0.0, - "learning_rate": 1.677544516369243e-05, - "loss": 0.9808, + "learning_rate": 1.3827566042202849e-05, + "loss": 1.1366, "step": 10063 }, { - "epoch": 0.2851880189294114, + "epoch": 0.3937710305970733, "grad_norm": 0.0, - "learning_rate": 1.6774770117887866e-05, - "loss": 1.0012, + "learning_rate": 1.3826395280996783e-05, + "loss": 1.1804, "step": 10064 }, { - "epoch": 0.2852163563716739, + "epoch": 0.39381015728930274, "grad_norm": 0.0, - "learning_rate": 1.6774095015016897e-05, - "loss": 0.9202, + "learning_rate": 1.3825224458343604e-05, + "loss": 1.0135, "step": 10065 }, { - "epoch": 0.28524469381393636, + "epoch": 0.3938492839815322, "grad_norm": 0.0, - "learning_rate": 1.6773419855085208e-05, - "loss": 0.8465, + "learning_rate": 1.3824053574262113e-05, + "loss": 1.084, "step": 10066 }, { - "epoch": 0.2852730312561988, + "epoch": 0.3938884106737616, "grad_norm": 0.0, - "learning_rate": 1.6772744638098495e-05, - "loss": 0.9759, + "learning_rate": 1.3822882628771115e-05, + "loss": 1.0457, "step": 10067 }, { - "epoch": 0.2853013686984613, + "epoch": 0.39392753736599107, "grad_norm": 0.0, - "learning_rate": 1.677206936406243e-05, - "loss": 0.9953, + "learning_rate": 1.3821711621889412e-05, + "loss": 0.956, "step": 10068 }, { - "epoch": 0.2853297061407237, + "epoch": 0.3939666640582205, "grad_norm": 0.0, - "learning_rate": 1.6771394032982718e-05, - "loss": 1.0675, + "learning_rate": 1.3820540553635808e-05, + "loss": 1.1422, "step": 10069 }, { - "epoch": 0.2853580435829862, + "epoch": 0.39400579075044995, "grad_norm": 0.0, - "learning_rate": 1.6770718644865035e-05, - "loss": 0.8991, + "learning_rate": 1.381936942402911e-05, + "loss": 0.9774, "step": 10070 }, { - "epoch": 0.28538638102524866, + "epoch": 0.3940449174426794, "grad_norm": 0.0, - "learning_rate": 1.677004319971508e-05, - "loss": 1.0221, + "learning_rate": 1.3818198233088128e-05, + "loss": 1.1802, "step": 10071 }, { - "epoch": 0.2854147184675111, + "epoch": 0.39408404413490883, "grad_norm": 0.0, - "learning_rate": 1.6769367697538532e-05, - "loss": 0.9689, + "learning_rate": 1.3817026980831662e-05, + "loss": 0.9934, "step": 10072 }, { - "epoch": 0.2854430559097736, + "epoch": 0.39412317082713827, "grad_norm": 0.0, - "learning_rate": 1.6768692138341086e-05, - "loss": 1.0275, + "learning_rate": 1.381585566727853e-05, + "loss": 1.0769, "step": 10073 }, { - "epoch": 0.28547139335203603, + "epoch": 0.3941622975193677, "grad_norm": 0.0, - "learning_rate": 1.6768016522128435e-05, - "loss": 0.9031, + "learning_rate": 1.3814684292447537e-05, + "loss": 1.1198, "step": 10074 }, { - "epoch": 0.2854997307942985, + "epoch": 0.39420142421159715, "grad_norm": 0.0, - "learning_rate": 1.6767340848906266e-05, - "loss": 1.0298, + "learning_rate": 1.3813512856357491e-05, + "loss": 1.043, "step": 10075 }, { - "epoch": 0.28552806823656096, + "epoch": 0.3942405509038266, "grad_norm": 0.0, - "learning_rate": 1.676666511868027e-05, - "loss": 0.9675, + "learning_rate": 1.3812341359027212e-05, + "loss": 1.1647, "step": 10076 }, { - "epoch": 0.28555640567882346, + "epoch": 0.39427967759605603, "grad_norm": 0.0, - "learning_rate": 1.6765989331456144e-05, - "loss": 0.9528, + "learning_rate": 1.3811169800475503e-05, + "loss": 1.1481, "step": 10077 }, { - "epoch": 0.2855847431210859, + "epoch": 0.3943188042882855, "grad_norm": 0.0, - "learning_rate": 1.6765313487239578e-05, - "loss": 1.0272, + "learning_rate": 1.3809998180721187e-05, + "loss": 1.127, "step": 10078 }, { - "epoch": 0.28561308056334833, + "epoch": 0.3943579309805149, "grad_norm": 0.0, - "learning_rate": 1.676463758603626e-05, - "loss": 0.9747, + "learning_rate": 1.380882649978307e-05, + "loss": 1.1171, "step": 10079 }, { - "epoch": 0.2856414180056108, + "epoch": 0.39439705767274436, "grad_norm": 0.0, - "learning_rate": 1.6763961627851894e-05, - "loss": 1.0077, + "learning_rate": 1.3807654757679976e-05, + "loss": 1.0204, "step": 10080 }, { - "epoch": 0.28566975544787326, + "epoch": 0.3944361843649738, "grad_norm": 0.0, - "learning_rate": 1.6763285612692163e-05, - "loss": 0.9808, + "learning_rate": 1.3806482954430716e-05, + "loss": 1.0965, "step": 10081 }, { - "epoch": 0.28569809289013576, + "epoch": 0.39447531105720324, "grad_norm": 0.0, - "learning_rate": 1.676260954056277e-05, - "loss": 0.996, + "learning_rate": 1.380531109005411e-05, + "loss": 0.9947, "step": 10082 }, { - "epoch": 0.2857264303323982, + "epoch": 0.3945144377494327, "grad_norm": 0.0, - "learning_rate": 1.67619334114694e-05, - "loss": 1.0061, + "learning_rate": 1.3804139164568976e-05, + "loss": 0.9131, "step": 10083 }, { - "epoch": 0.28575476777466063, + "epoch": 0.3945535644416621, "grad_norm": 0.0, - "learning_rate": 1.676125722541776e-05, - "loss": 1.0592, + "learning_rate": 1.3802967177994133e-05, + "loss": 1.0026, "step": 10084 }, { - "epoch": 0.28578310521692313, + "epoch": 0.39459269113389156, "grad_norm": 0.0, - "learning_rate": 1.6760580982413538e-05, - "loss": 1.0485, + "learning_rate": 1.3801795130348405e-05, + "loss": 1.037, "step": 10085 }, { - "epoch": 0.28581144265918557, + "epoch": 0.394631817826121, "grad_norm": 0.0, - "learning_rate": 1.6759904682462428e-05, - "loss": 0.9181, + "learning_rate": 1.380062302165061e-05, + "loss": 1.0694, "step": 10086 }, { - "epoch": 0.28583978010144806, + "epoch": 0.39467094451835044, "grad_norm": 0.0, - "learning_rate": 1.675922832557013e-05, - "loss": 0.8288, + "learning_rate": 1.379945085191957e-05, + "loss": 1.0201, "step": 10087 }, { - "epoch": 0.2858681175437105, + "epoch": 0.3947100712105799, "grad_norm": 0.0, - "learning_rate": 1.6758551911742346e-05, - "loss": 0.9752, + "learning_rate": 1.3798278621174113e-05, + "loss": 1.1213, "step": 10088 }, { - "epoch": 0.285896454985973, + "epoch": 0.3947491979028093, "grad_norm": 0.0, - "learning_rate": 1.675787544098477e-05, - "loss": 1.0493, + "learning_rate": 1.3797106329433062e-05, + "loss": 1.1009, "step": 10089 }, { - "epoch": 0.28592479242823543, + "epoch": 0.3947883245950387, "grad_norm": 0.0, - "learning_rate": 1.6757198913303098e-05, - "loss": 1.0561, + "learning_rate": 1.3795933976715236e-05, + "loss": 0.9745, "step": 10090 }, { - "epoch": 0.28595312987049787, + "epoch": 0.39482745128726815, "grad_norm": 0.0, - "learning_rate": 1.6756522328703026e-05, - "loss": 0.8649, + "learning_rate": 1.3794761563039472e-05, + "loss": 1.1074, "step": 10091 }, { - "epoch": 0.28598146731276036, + "epoch": 0.3948665779794976, "grad_norm": 0.0, - "learning_rate": 1.6755845687190264e-05, - "loss": 0.9174, + "learning_rate": 1.3793589088424591e-05, + "loss": 1.1702, "step": 10092 }, { - "epoch": 0.2860098047550228, + "epoch": 0.39490570467172703, "grad_norm": 0.0, - "learning_rate": 1.67551689887705e-05, - "loss": 0.9084, + "learning_rate": 1.3792416552889419e-05, + "loss": 1.1144, "step": 10093 }, { - "epoch": 0.2860381421972853, + "epoch": 0.3949448313639565, "grad_norm": 0.0, - "learning_rate": 1.6754492233449445e-05, - "loss": 0.9552, + "learning_rate": 1.3791243956452794e-05, + "loss": 1.2206, "step": 10094 }, { - "epoch": 0.28606647963954773, + "epoch": 0.3949839580561859, "grad_norm": 0.0, - "learning_rate": 1.675381542123279e-05, - "loss": 0.9445, + "learning_rate": 1.3790071299133539e-05, + "loss": 1.1482, "step": 10095 }, { - "epoch": 0.28609481708181017, + "epoch": 0.39502308474841535, "grad_norm": 0.0, - "learning_rate": 1.675313855212624e-05, - "loss": 1.0234, + "learning_rate": 1.378889858095049e-05, + "loss": 1.0367, "step": 10096 }, { - "epoch": 0.28612315452407266, + "epoch": 0.3950622114406448, "grad_norm": 0.0, - "learning_rate": 1.6752461626135495e-05, - "loss": 0.9012, + "learning_rate": 1.3787725801922477e-05, + "loss": 1.0075, "step": 10097 }, { - "epoch": 0.2861514919663351, + "epoch": 0.39510133813287424, "grad_norm": 0.0, - "learning_rate": 1.675178464326626e-05, - "loss": 0.8626, + "learning_rate": 1.3786552962068334e-05, + "loss": 1.0723, "step": 10098 }, { - "epoch": 0.2861798294085976, + "epoch": 0.3951404648251037, "grad_norm": 0.0, - "learning_rate": 1.6751107603524238e-05, - "loss": 0.9519, + "learning_rate": 1.3785380061406897e-05, + "loss": 0.9789, "step": 10099 }, { - "epoch": 0.28620816685086004, + "epoch": 0.3951795915173331, "grad_norm": 0.0, - "learning_rate": 1.6750430506915124e-05, - "loss": 1.099, + "learning_rate": 1.3784207099956994e-05, + "loss": 1.1899, "step": 10100 }, { - "epoch": 0.28623650429312253, + "epoch": 0.39521871820956256, "grad_norm": 0.0, - "learning_rate": 1.6749753353444634e-05, - "loss": 1.0166, + "learning_rate": 1.3783034077737472e-05, + "loss": 0.8785, "step": 10101 }, { - "epoch": 0.28626484173538497, + "epoch": 0.395257844901792, "grad_norm": 0.0, - "learning_rate": 1.6749076143118457e-05, - "loss": 0.9303, + "learning_rate": 1.3781860994767162e-05, + "loss": 1.0358, "step": 10102 }, { - "epoch": 0.2862931791776474, + "epoch": 0.39529697159402144, "grad_norm": 0.0, - "learning_rate": 1.6748398875942312e-05, - "loss": 1.0431, + "learning_rate": 1.3780687851064901e-05, + "loss": 1.1044, "step": 10103 }, { - "epoch": 0.2863215166199099, + "epoch": 0.3953360982862509, "grad_norm": 0.0, - "learning_rate": 1.6747721551921894e-05, - "loss": 0.9084, + "learning_rate": 1.3779514646649534e-05, + "loss": 1.1901, "step": 10104 }, { - "epoch": 0.28634985406217234, + "epoch": 0.3953752249784803, "grad_norm": 0.0, - "learning_rate": 1.6747044171062916e-05, - "loss": 0.9102, + "learning_rate": 1.3778341381539896e-05, + "loss": 1.0886, "step": 10105 }, { - "epoch": 0.28637819150443483, + "epoch": 0.39541435167070976, "grad_norm": 0.0, - "learning_rate": 1.6746366733371076e-05, - "loss": 0.9873, + "learning_rate": 1.377716805575483e-05, + "loss": 1.0418, "step": 10106 }, { - "epoch": 0.28640652894669727, + "epoch": 0.3954534783629392, "grad_norm": 0.0, - "learning_rate": 1.6745689238852084e-05, - "loss": 1.0029, + "learning_rate": 1.377599466931318e-05, + "loss": 1.158, "step": 10107 }, { - "epoch": 0.2864348663889597, + "epoch": 0.39549260505516864, "grad_norm": 0.0, - "learning_rate": 1.6745011687511646e-05, - "loss": 0.9293, + "learning_rate": 1.3774821222233784e-05, + "loss": 1.0364, "step": 10108 }, { - "epoch": 0.2864632038312222, + "epoch": 0.3955317317473981, "grad_norm": 0.0, - "learning_rate": 1.6744334079355472e-05, - "loss": 0.9238, + "learning_rate": 1.3773647714535491e-05, + "loss": 1.071, "step": 10109 }, { - "epoch": 0.28649154127348464, + "epoch": 0.3955708584396275, "grad_norm": 0.0, - "learning_rate": 1.6743656414389263e-05, - "loss": 1.0954, + "learning_rate": 1.3772474146237145e-05, + "loss": 1.1675, "step": 10110 }, { - "epoch": 0.28651987871574713, + "epoch": 0.39560998513185697, "grad_norm": 0.0, - "learning_rate": 1.6742978692618735e-05, - "loss": 0.9667, + "learning_rate": 1.377130051735759e-05, + "loss": 1.0236, "step": 10111 }, { - "epoch": 0.28654821615800957, + "epoch": 0.3956491118240864, "grad_norm": 0.0, - "learning_rate": 1.6742300914049595e-05, - "loss": 0.8704, + "learning_rate": 1.3770126827915678e-05, + "loss": 1.0792, "step": 10112 }, { - "epoch": 0.28657655360027207, + "epoch": 0.39568823851631585, "grad_norm": 0.0, - "learning_rate": 1.674162307868755e-05, - "loss": 1.0564, + "learning_rate": 1.3768953077930248e-05, + "loss": 1.1252, "step": 10113 }, { - "epoch": 0.2866048910425345, + "epoch": 0.3957273652085453, "grad_norm": 0.0, - "learning_rate": 1.674094518653831e-05, - "loss": 1.0927, + "learning_rate": 1.3767779267420158e-05, + "loss": 0.8905, "step": 10114 }, { - "epoch": 0.28663322848479694, + "epoch": 0.39576649190077473, "grad_norm": 0.0, - "learning_rate": 1.674026723760758e-05, - "loss": 0.9353, + "learning_rate": 1.3766605396404252e-05, + "loss": 1.0756, "step": 10115 }, { - "epoch": 0.28666156592705944, + "epoch": 0.39580561859300417, "grad_norm": 0.0, - "learning_rate": 1.6739589231901085e-05, - "loss": 0.9431, + "learning_rate": 1.3765431464901384e-05, + "loss": 1.1013, "step": 10116 }, { - "epoch": 0.2866899033693219, + "epoch": 0.3958447452852336, "grad_norm": 0.0, - "learning_rate": 1.6738911169424523e-05, - "loss": 1.0343, + "learning_rate": 1.3764257472930404e-05, + "loss": 1.1346, "step": 10117 }, { - "epoch": 0.28671824081158437, + "epoch": 0.395883871977463, "grad_norm": 0.0, - "learning_rate": 1.673823305018361e-05, - "loss": 0.9169, + "learning_rate": 1.3763083420510168e-05, + "loss": 1.1384, "step": 10118 }, { - "epoch": 0.2867465782538468, + "epoch": 0.39592299866969244, "grad_norm": 0.0, - "learning_rate": 1.6737554874184058e-05, - "loss": 0.8206, + "learning_rate": 1.3761909307659525e-05, + "loss": 1.1967, "step": 10119 }, { - "epoch": 0.28677491569610924, + "epoch": 0.3959621253619219, "grad_norm": 0.0, - "learning_rate": 1.673687664143158e-05, - "loss": 1.0616, + "learning_rate": 1.3760735134397335e-05, + "loss": 1.1324, "step": 10120 }, { - "epoch": 0.28680325313837174, + "epoch": 0.3960012520541513, "grad_norm": 0.0, - "learning_rate": 1.6736198351931888e-05, - "loss": 0.934, + "learning_rate": 1.3759560900742451e-05, + "loss": 1.0079, "step": 10121 }, { - "epoch": 0.2868315905806342, + "epoch": 0.39604037874638076, "grad_norm": 0.0, - "learning_rate": 1.6735520005690697e-05, - "loss": 0.8522, + "learning_rate": 1.3758386606713727e-05, + "loss": 1.1726, "step": 10122 }, { - "epoch": 0.28685992802289667, + "epoch": 0.3960795054386102, "grad_norm": 0.0, - "learning_rate": 1.6734841602713717e-05, - "loss": 0.9223, + "learning_rate": 1.3757212252330028e-05, + "loss": 0.9677, "step": 10123 }, { - "epoch": 0.2868882654651591, + "epoch": 0.39611863213083964, "grad_norm": 0.0, - "learning_rate": 1.6734163143006665e-05, - "loss": 0.9681, + "learning_rate": 1.3756037837610205e-05, + "loss": 1.1006, "step": 10124 }, { - "epoch": 0.2869166029074216, + "epoch": 0.3961577588230691, "grad_norm": 0.0, - "learning_rate": 1.673348462657526e-05, - "loss": 0.9674, + "learning_rate": 1.3754863362573124e-05, + "loss": 1.0788, "step": 10125 }, { - "epoch": 0.28694494034968404, + "epoch": 0.3961968855152985, "grad_norm": 0.0, - "learning_rate": 1.673280605342521e-05, - "loss": 0.8861, + "learning_rate": 1.3753688827237639e-05, + "loss": 1.0299, "step": 10126 }, { - "epoch": 0.2869732777919465, + "epoch": 0.39623601220752797, "grad_norm": 0.0, - "learning_rate": 1.6732127423562236e-05, - "loss": 0.9336, + "learning_rate": 1.3752514231622617e-05, + "loss": 1.2603, "step": 10127 }, { - "epoch": 0.287001615234209, + "epoch": 0.3962751388997574, "grad_norm": 0.0, - "learning_rate": 1.6731448736992053e-05, - "loss": 0.9669, + "learning_rate": 1.3751339575746915e-05, + "loss": 1.078, "step": 10128 }, { - "epoch": 0.2870299526764714, + "epoch": 0.39631426559198685, "grad_norm": 0.0, - "learning_rate": 1.6730769993720376e-05, - "loss": 0.9608, + "learning_rate": 1.3750164859629407e-05, + "loss": 1.0853, "step": 10129 }, { - "epoch": 0.2870582901187339, + "epoch": 0.3963533922842163, "grad_norm": 0.0, - "learning_rate": 1.6730091193752925e-05, - "loss": 1.0041, + "learning_rate": 1.3748990083288944e-05, + "loss": 1.1279, "step": 10130 }, { - "epoch": 0.28708662756099634, + "epoch": 0.39639251897644573, "grad_norm": 0.0, - "learning_rate": 1.672941233709542e-05, - "loss": 0.8652, + "learning_rate": 1.3747815246744403e-05, + "loss": 1.061, "step": 10131 }, { - "epoch": 0.2871149650032588, + "epoch": 0.39643164566867517, "grad_norm": 0.0, - "learning_rate": 1.672873342375357e-05, - "loss": 1.1399, + "learning_rate": 1.3746640350014643e-05, + "loss": 1.1165, "step": 10132 }, { - "epoch": 0.2871433024455213, + "epoch": 0.3964707723609046, "grad_norm": 0.0, - "learning_rate": 1.67280544537331e-05, - "loss": 1.0486, + "learning_rate": 1.3745465393118533e-05, + "loss": 1.0984, "step": 10133 }, { - "epoch": 0.2871716398877837, + "epoch": 0.39650989905313405, "grad_norm": 0.0, - "learning_rate": 1.6727375427039734e-05, - "loss": 0.9902, + "learning_rate": 1.3744290376074945e-05, + "loss": 1.1115, "step": 10134 }, { - "epoch": 0.2871999773300462, + "epoch": 0.3965490257453635, "grad_norm": 0.0, - "learning_rate": 1.6726696343679186e-05, - "loss": 0.9271, + "learning_rate": 1.3743115298902743e-05, + "loss": 1.1668, "step": 10135 }, { - "epoch": 0.28722831477230865, + "epoch": 0.39658815243759293, "grad_norm": 0.0, - "learning_rate": 1.6726017203657175e-05, - "loss": 0.8426, + "learning_rate": 1.3741940161620799e-05, + "loss": 1.1587, "step": 10136 }, { - "epoch": 0.28725665221457114, + "epoch": 0.3966272791298224, "grad_norm": 0.0, - "learning_rate": 1.6725338006979424e-05, - "loss": 0.9932, + "learning_rate": 1.3740764964247986e-05, + "loss": 1.2047, "step": 10137 }, { - "epoch": 0.2872849896568336, + "epoch": 0.3966664058220518, "grad_norm": 0.0, - "learning_rate": 1.6724658753651652e-05, - "loss": 0.9565, + "learning_rate": 1.3739589706803176e-05, + "loss": 1.1647, "step": 10138 }, { - "epoch": 0.287313327099096, + "epoch": 0.39670553251428126, "grad_norm": 0.0, - "learning_rate": 1.672397944367958e-05, - "loss": 0.9427, + "learning_rate": 1.3738414389305242e-05, + "loss": 1.0588, "step": 10139 }, { - "epoch": 0.2873416645413585, + "epoch": 0.3967446592065107, "grad_norm": 0.0, - "learning_rate": 1.672330007706894e-05, - "loss": 1.035, + "learning_rate": 1.3737239011773054e-05, + "loss": 1.0564, "step": 10140 }, { - "epoch": 0.28737000198362095, + "epoch": 0.39678378589874014, "grad_norm": 0.0, - "learning_rate": 1.672262065382544e-05, - "loss": 0.8621, + "learning_rate": 1.3736063574225496e-05, + "loss": 1.1736, "step": 10141 }, { - "epoch": 0.28739833942588344, + "epoch": 0.3968229125909696, "grad_norm": 0.0, - "learning_rate": 1.6721941173954813e-05, - "loss": 1.0355, + "learning_rate": 1.3734888076681432e-05, + "loss": 1.1008, "step": 10142 }, { - "epoch": 0.2874266768681459, + "epoch": 0.396862039283199, "grad_norm": 0.0, - "learning_rate": 1.672126163746278e-05, - "loss": 0.9076, + "learning_rate": 1.373371251915975e-05, + "loss": 1.1889, "step": 10143 }, { - "epoch": 0.2874550143104083, + "epoch": 0.39690116597542846, "grad_norm": 0.0, - "learning_rate": 1.672058204435506e-05, - "loss": 0.9441, + "learning_rate": 1.3732536901679321e-05, + "loss": 1.0092, "step": 10144 }, { - "epoch": 0.2874833517526708, + "epoch": 0.3969402926676579, "grad_norm": 0.0, - "learning_rate": 1.6719902394637388e-05, - "loss": 0.9975, + "learning_rate": 1.3731361224259027e-05, + "loss": 1.1719, "step": 10145 }, { - "epoch": 0.28751168919493325, + "epoch": 0.39697941935988734, "grad_norm": 0.0, - "learning_rate": 1.6719222688315478e-05, - "loss": 1.0649, + "learning_rate": 1.373018548691775e-05, + "loss": 1.1032, "step": 10146 }, { - "epoch": 0.28754002663719574, + "epoch": 0.3970185460521167, "grad_norm": 0.0, - "learning_rate": 1.6718542925395063e-05, - "loss": 0.9306, + "learning_rate": 1.3729009689674366e-05, + "loss": 1.1677, "step": 10147 }, { - "epoch": 0.2875683640794582, + "epoch": 0.39705767274434617, "grad_norm": 0.0, - "learning_rate": 1.6717863105881863e-05, - "loss": 0.9765, + "learning_rate": 1.3727833832547758e-05, + "loss": 1.106, "step": 10148 }, { - "epoch": 0.2875967015217207, + "epoch": 0.3970967994365756, "grad_norm": 0.0, - "learning_rate": 1.671718322978161e-05, - "loss": 1.0301, + "learning_rate": 1.3726657915556814e-05, + "loss": 1.0051, "step": 10149 }, { - "epoch": 0.2876250389639831, + "epoch": 0.39713592612880505, "grad_norm": 0.0, - "learning_rate": 1.6716503297100025e-05, - "loss": 0.9686, + "learning_rate": 1.3725481938720409e-05, + "loss": 0.9881, "step": 10150 }, { - "epoch": 0.28765337640624555, + "epoch": 0.3971750528210345, "grad_norm": 0.0, - "learning_rate": 1.6715823307842845e-05, - "loss": 0.912, + "learning_rate": 1.3724305902057436e-05, + "loss": 1.2267, "step": 10151 }, { - "epoch": 0.28768171384850805, + "epoch": 0.39721417951326393, "grad_norm": 0.0, - "learning_rate": 1.6715143262015784e-05, - "loss": 0.9424, + "learning_rate": 1.3723129805586775e-05, + "loss": 1.0269, "step": 10152 }, { - "epoch": 0.2877100512907705, + "epoch": 0.3972533062054934, "grad_norm": 0.0, - "learning_rate": 1.671446315962458e-05, - "loss": 1.1, + "learning_rate": 1.3721953649327316e-05, + "loss": 1.0748, "step": 10153 }, { - "epoch": 0.287738388733033, + "epoch": 0.3972924328977228, "grad_norm": 0.0, - "learning_rate": 1.6713783000674963e-05, - "loss": 0.8907, + "learning_rate": 1.3720777433297942e-05, + "loss": 1.1429, "step": 10154 }, { - "epoch": 0.2877667261752954, + "epoch": 0.39733155958995225, "grad_norm": 0.0, - "learning_rate": 1.6713102785172654e-05, - "loss": 1.0457, + "learning_rate": 1.3719601157517548e-05, + "loss": 1.0695, "step": 10155 }, { - "epoch": 0.28779506361755786, + "epoch": 0.3973706862821817, "grad_norm": 0.0, - "learning_rate": 1.671242251312339e-05, - "loss": 0.9775, + "learning_rate": 1.3718424822005019e-05, + "loss": 1.0724, "step": 10156 }, { - "epoch": 0.28782340105982035, + "epoch": 0.39740981297441114, "grad_norm": 0.0, - "learning_rate": 1.67117421845329e-05, - "loss": 0.8802, + "learning_rate": 1.3717248426779249e-05, + "loss": 1.0717, "step": 10157 }, { - "epoch": 0.2878517385020828, + "epoch": 0.3974489396666406, "grad_norm": 0.0, - "learning_rate": 1.671106179940691e-05, - "loss": 1.0118, + "learning_rate": 1.3716071971859123e-05, + "loss": 1.2051, "step": 10158 }, { - "epoch": 0.2878800759443453, + "epoch": 0.39748806635887, "grad_norm": 0.0, - "learning_rate": 1.6710381357751155e-05, - "loss": 0.9139, + "learning_rate": 1.371489545726354e-05, + "loss": 1.1089, "step": 10159 }, { - "epoch": 0.2879084133866077, + "epoch": 0.39752719305109946, "grad_norm": 0.0, - "learning_rate": 1.6709700859571366e-05, - "loss": 1.0343, + "learning_rate": 1.3713718883011393e-05, + "loss": 1.2631, "step": 10160 }, { - "epoch": 0.2879367508288702, + "epoch": 0.3975663197433289, "grad_norm": 0.0, - "learning_rate": 1.6709020304873277e-05, - "loss": 0.8695, + "learning_rate": 1.3712542249121573e-05, + "loss": 1.0844, "step": 10161 }, { - "epoch": 0.28796508827113265, + "epoch": 0.39760544643555834, "grad_norm": 0.0, - "learning_rate": 1.670833969366262e-05, - "loss": 0.9185, + "learning_rate": 1.3711365555612974e-05, + "loss": 1.222, "step": 10162 }, { - "epoch": 0.2879934257133951, + "epoch": 0.3976445731277878, "grad_norm": 0.0, - "learning_rate": 1.6707659025945124e-05, - "loss": 1.0072, + "learning_rate": 1.3710188802504498e-05, + "loss": 1.1199, "step": 10163 }, { - "epoch": 0.2880217631556576, + "epoch": 0.3976836998200172, "grad_norm": 0.0, - "learning_rate": 1.6706978301726523e-05, - "loss": 0.9438, + "learning_rate": 1.3709011989815035e-05, + "loss": 1.0455, "step": 10164 }, { - "epoch": 0.28805010059792, + "epoch": 0.39772282651224666, "grad_norm": 0.0, - "learning_rate": 1.6706297521012556e-05, - "loss": 1.1498, + "learning_rate": 1.3707835117563493e-05, + "loss": 1.0597, "step": 10165 }, { - "epoch": 0.2880784380401825, + "epoch": 0.3977619532044761, "grad_norm": 0.0, - "learning_rate": 1.6705616683808955e-05, - "loss": 0.8944, + "learning_rate": 1.370665818576876e-05, + "loss": 0.9389, "step": 10166 }, { - "epoch": 0.28810677548244495, + "epoch": 0.39780107989670555, "grad_norm": 0.0, - "learning_rate": 1.6704935790121456e-05, - "loss": 0.9517, + "learning_rate": 1.3705481194449743e-05, + "loss": 1.0157, "step": 10167 }, { - "epoch": 0.2881351129247074, + "epoch": 0.397840206588935, "grad_norm": 0.0, - "learning_rate": 1.670425483995579e-05, - "loss": 0.9214, + "learning_rate": 1.370430414362534e-05, + "loss": 1.0795, "step": 10168 }, { - "epoch": 0.2881634503669699, + "epoch": 0.3978793332811644, "grad_norm": 0.0, - "learning_rate": 1.6703573833317698e-05, - "loss": 0.9931, + "learning_rate": 1.3703127033314458e-05, + "loss": 1.087, "step": 10169 }, { - "epoch": 0.2881917878092323, + "epoch": 0.39791845997339387, "grad_norm": 0.0, - "learning_rate": 1.670289277021291e-05, - "loss": 1.057, + "learning_rate": 1.3701949863535994e-05, + "loss": 0.9906, "step": 10170 }, { - "epoch": 0.2882201252514948, + "epoch": 0.3979575866656233, "grad_norm": 0.0, - "learning_rate": 1.6702211650647173e-05, - "loss": 1.0695, + "learning_rate": 1.3700772634308852e-05, + "loss": 1.1302, "step": 10171 }, { - "epoch": 0.28824846269375726, + "epoch": 0.39799671335785275, "grad_norm": 0.0, - "learning_rate": 1.6701530474626216e-05, - "loss": 1.0607, + "learning_rate": 1.3699595345651941e-05, + "loss": 1.2292, "step": 10172 }, { - "epoch": 0.2882768001360197, + "epoch": 0.3980358400500822, "grad_norm": 0.0, - "learning_rate": 1.6700849242155778e-05, - "loss": 0.9595, + "learning_rate": 1.3698417997584164e-05, + "loss": 0.9078, "step": 10173 }, { - "epoch": 0.2883051375782822, + "epoch": 0.39807496674231163, "grad_norm": 0.0, - "learning_rate": 1.6700167953241598e-05, - "loss": 0.916, + "learning_rate": 1.369724059012443e-05, + "loss": 1.1234, "step": 10174 }, { - "epoch": 0.2883334750205446, + "epoch": 0.398114093434541, "grad_norm": 0.0, - "learning_rate": 1.6699486607889417e-05, - "loss": 1.0022, + "learning_rate": 1.369606312329164e-05, + "loss": 1.0825, "step": 10175 }, { - "epoch": 0.2883618124628071, + "epoch": 0.39815322012677046, "grad_norm": 0.0, - "learning_rate": 1.6698805206104973e-05, - "loss": 1.0453, + "learning_rate": 1.3694885597104715e-05, + "loss": 0.9744, "step": 10176 }, { - "epoch": 0.28839014990506956, + "epoch": 0.3981923468189999, "grad_norm": 0.0, - "learning_rate": 1.6698123747894004e-05, - "loss": 0.9582, + "learning_rate": 1.3693708011582551e-05, + "loss": 0.9053, "step": 10177 }, { - "epoch": 0.28841848734733205, + "epoch": 0.39823147351122934, "grad_norm": 0.0, - "learning_rate": 1.6697442233262254e-05, - "loss": 0.9098, + "learning_rate": 1.3692530366744068e-05, + "loss": 1.0103, "step": 10178 }, { - "epoch": 0.2884468247895945, + "epoch": 0.3982706002034588, "grad_norm": 0.0, - "learning_rate": 1.6696760662215457e-05, - "loss": 0.9372, + "learning_rate": 1.3691352662608175e-05, + "loss": 1.0925, "step": 10179 }, { - "epoch": 0.28847516223185693, + "epoch": 0.3983097268956882, "grad_norm": 0.0, - "learning_rate": 1.669607903475936e-05, - "loss": 0.9404, + "learning_rate": 1.369017489919378e-05, + "loss": 1.1334, "step": 10180 }, { - "epoch": 0.2885034996741194, + "epoch": 0.39834885358791766, "grad_norm": 0.0, - "learning_rate": 1.6695397350899703e-05, - "loss": 0.9689, + "learning_rate": 1.3688997076519803e-05, + "loss": 1.0931, "step": 10181 }, { - "epoch": 0.28853183711638186, + "epoch": 0.3983879802801471, "grad_norm": 0.0, - "learning_rate": 1.6694715610642226e-05, - "loss": 0.9911, + "learning_rate": 1.3687819194605154e-05, + "loss": 1.0734, "step": 10182 }, { - "epoch": 0.28856017455864436, + "epoch": 0.39842710697237654, "grad_norm": 0.0, - "learning_rate": 1.6694033813992676e-05, - "loss": 0.9288, + "learning_rate": 1.3686641253468754e-05, + "loss": 0.9661, "step": 10183 }, { - "epoch": 0.2885885120009068, + "epoch": 0.398466233664606, "grad_norm": 0.0, - "learning_rate": 1.6693351960956793e-05, - "loss": 1.0219, + "learning_rate": 1.368546325312951e-05, + "loss": 1.0423, "step": 10184 }, { - "epoch": 0.28861684944316923, + "epoch": 0.3985053603568354, "grad_norm": 0.0, - "learning_rate": 1.669267005154032e-05, - "loss": 0.9733, + "learning_rate": 1.3684285193606346e-05, + "loss": 1.0385, "step": 10185 }, { - "epoch": 0.2886451868854317, + "epoch": 0.39854448704906487, "grad_norm": 0.0, - "learning_rate": 1.6691988085749004e-05, - "loss": 0.9123, + "learning_rate": 1.3683107074918179e-05, + "loss": 1.2006, "step": 10186 }, { - "epoch": 0.28867352432769416, + "epoch": 0.3985836137412943, "grad_norm": 0.0, - "learning_rate": 1.6691306063588583e-05, - "loss": 0.9363, + "learning_rate": 1.3681928897083928e-05, + "loss": 1.1589, "step": 10187 }, { - "epoch": 0.28870186176995666, + "epoch": 0.39862274043352375, "grad_norm": 0.0, - "learning_rate": 1.669062398506481e-05, - "loss": 1.0354, + "learning_rate": 1.3680750660122511e-05, + "loss": 1.0334, "step": 10188 }, { - "epoch": 0.2887301992122191, + "epoch": 0.3986618671257532, "grad_norm": 0.0, - "learning_rate": 1.6689941850183425e-05, - "loss": 1.0397, + "learning_rate": 1.3679572364052852e-05, + "loss": 0.9943, "step": 10189 }, { - "epoch": 0.2887585366544816, + "epoch": 0.39870099381798263, "grad_norm": 0.0, - "learning_rate": 1.6689259658950177e-05, - "loss": 1.0143, + "learning_rate": 1.3678394008893871e-05, + "loss": 1.0502, "step": 10190 }, { - "epoch": 0.28878687409674403, + "epoch": 0.39874012051021207, "grad_norm": 0.0, - "learning_rate": 1.668857741137081e-05, - "loss": 0.915, + "learning_rate": 1.3677215594664493e-05, + "loss": 1.0278, "step": 10191 }, { - "epoch": 0.28881521153900647, + "epoch": 0.3987792472024415, "grad_norm": 0.0, - "learning_rate": 1.6687895107451072e-05, - "loss": 0.9629, + "learning_rate": 1.3676037121383638e-05, + "loss": 1.145, "step": 10192 }, { - "epoch": 0.28884354898126896, + "epoch": 0.39881837389467095, "grad_norm": 0.0, - "learning_rate": 1.668721274719671e-05, - "loss": 0.963, + "learning_rate": 1.3674858589070234e-05, + "loss": 0.9107, "step": 10193 }, { - "epoch": 0.2888718864235314, + "epoch": 0.3988575005869004, "grad_norm": 0.0, - "learning_rate": 1.6686530330613472e-05, - "loss": 0.9561, + "learning_rate": 1.3673679997743207e-05, + "loss": 0.9708, "step": 10194 }, { - "epoch": 0.2889002238657939, + "epoch": 0.39889662727912983, "grad_norm": 0.0, - "learning_rate": 1.6685847857707105e-05, - "loss": 0.9743, + "learning_rate": 1.3672501347421481e-05, + "loss": 1.0674, "step": 10195 }, { - "epoch": 0.28892856130805633, + "epoch": 0.3989357539713593, "grad_norm": 0.0, - "learning_rate": 1.6685165328483356e-05, - "loss": 0.9267, + "learning_rate": 1.3671322638123988e-05, + "loss": 1.1199, "step": 10196 }, { - "epoch": 0.28895689875031877, + "epoch": 0.3989748806635887, "grad_norm": 0.0, - "learning_rate": 1.6684482742947984e-05, - "loss": 1.0303, + "learning_rate": 1.3670143869869649e-05, + "loss": 1.1125, "step": 10197 }, { - "epoch": 0.28898523619258126, + "epoch": 0.39901400735581816, "grad_norm": 0.0, - "learning_rate": 1.6683800101106726e-05, - "loss": 1.1169, + "learning_rate": 1.3668965042677403e-05, + "loss": 1.1007, "step": 10198 }, { - "epoch": 0.2890135736348437, + "epoch": 0.3990531340480476, "grad_norm": 0.0, - "learning_rate": 1.668311740296534e-05, - "loss": 0.9539, + "learning_rate": 1.3667786156566175e-05, + "loss": 1.1313, "step": 10199 }, { - "epoch": 0.2890419110771062, + "epoch": 0.39909226074027704, "grad_norm": 0.0, - "learning_rate": 1.6682434648529574e-05, - "loss": 0.9541, + "learning_rate": 1.3666607211554894e-05, + "loss": 1.1055, "step": 10200 }, { - "epoch": 0.28907024851936863, + "epoch": 0.3991313874325065, "grad_norm": 0.0, - "learning_rate": 1.668175183780518e-05, - "loss": 0.9564, + "learning_rate": 1.3665428207662498e-05, + "loss": 1.1662, "step": 10201 }, { - "epoch": 0.2890985859616311, + "epoch": 0.3991705141247359, "grad_norm": 0.0, - "learning_rate": 1.668106897079791e-05, - "loss": 1.0997, + "learning_rate": 1.366424914490792e-05, + "loss": 1.0364, "step": 10202 }, { - "epoch": 0.28912692340389357, + "epoch": 0.39920964081696536, "grad_norm": 0.0, - "learning_rate": 1.6680386047513512e-05, - "loss": 0.899, + "learning_rate": 1.366307002331009e-05, + "loss": 1.0982, "step": 10203 }, { - "epoch": 0.289155260846156, + "epoch": 0.39924876750919475, "grad_norm": 0.0, - "learning_rate": 1.6679703067957745e-05, - "loss": 1.0714, + "learning_rate": 1.3661890842887944e-05, + "loss": 1.0614, "step": 10204 }, { - "epoch": 0.2891835982884185, + "epoch": 0.3992878942014242, "grad_norm": 0.0, - "learning_rate": 1.6679020032136354e-05, - "loss": 0.9253, + "learning_rate": 1.3660711603660422e-05, + "loss": 1.1073, "step": 10205 }, { - "epoch": 0.28921193573068094, + "epoch": 0.39932702089365363, "grad_norm": 0.0, - "learning_rate": 1.6678336940055103e-05, - "loss": 0.9321, + "learning_rate": 1.365953230564646e-05, + "loss": 1.1043, "step": 10206 }, { - "epoch": 0.28924027317294343, + "epoch": 0.39936614758588307, "grad_norm": 0.0, - "learning_rate": 1.6677653791719737e-05, - "loss": 1.0147, + "learning_rate": 1.3658352948864993e-05, + "loss": 1.084, "step": 10207 }, { - "epoch": 0.28926861061520587, + "epoch": 0.3994052742781125, "grad_norm": 0.0, - "learning_rate": 1.6676970587136013e-05, - "loss": 1.0013, + "learning_rate": 1.3657173533334962e-05, + "loss": 1.0842, "step": 10208 }, { - "epoch": 0.2892969480574683, + "epoch": 0.39944440097034195, "grad_norm": 0.0, - "learning_rate": 1.6676287326309684e-05, - "loss": 0.8839, + "learning_rate": 1.3655994059075306e-05, + "loss": 1.159, "step": 10209 }, { - "epoch": 0.2893252854997308, + "epoch": 0.3994835276625714, "grad_norm": 0.0, - "learning_rate": 1.6675604009246514e-05, - "loss": 0.8854, + "learning_rate": 1.3654814526104967e-05, + "loss": 0.9694, "step": 10210 }, { - "epoch": 0.28935362294199324, + "epoch": 0.39952265435480083, "grad_norm": 0.0, - "learning_rate": 1.667492063595225e-05, - "loss": 0.8656, + "learning_rate": 1.3653634934442885e-05, + "loss": 1.0138, "step": 10211 }, { - "epoch": 0.28938196038425573, + "epoch": 0.3995617810470303, "grad_norm": 0.0, - "learning_rate": 1.6674237206432648e-05, - "loss": 0.9126, + "learning_rate": 1.3652455284108009e-05, + "loss": 1.0418, "step": 10212 }, { - "epoch": 0.28941029782651817, + "epoch": 0.3996009077392597, "grad_norm": 0.0, - "learning_rate": 1.667355372069347e-05, - "loss": 0.8677, + "learning_rate": 1.3651275575119272e-05, + "loss": 1.1074, "step": 10213 }, { - "epoch": 0.28943863526878066, + "epoch": 0.39964003443148916, "grad_norm": 0.0, - "learning_rate": 1.6672870178740468e-05, - "loss": 0.8732, + "learning_rate": 1.365009580749563e-05, + "loss": 1.1661, "step": 10214 }, { - "epoch": 0.2894669727110431, + "epoch": 0.3996791611237186, "grad_norm": 0.0, - "learning_rate": 1.6672186580579406e-05, - "loss": 0.921, + "learning_rate": 1.364891598125602e-05, + "loss": 1.04, "step": 10215 }, { - "epoch": 0.28949531015330554, + "epoch": 0.39971828781594804, "grad_norm": 0.0, - "learning_rate": 1.667150292621604e-05, - "loss": 0.9834, + "learning_rate": 1.3647736096419393e-05, + "loss": 1.0885, "step": 10216 }, { - "epoch": 0.28952364759556803, + "epoch": 0.3997574145081775, "grad_norm": 0.0, - "learning_rate": 1.6670819215656125e-05, - "loss": 0.8599, + "learning_rate": 1.3646556153004693e-05, + "loss": 1.1514, "step": 10217 }, { - "epoch": 0.2895519850378305, + "epoch": 0.3997965412004069, "grad_norm": 0.0, - "learning_rate": 1.667013544890542e-05, - "loss": 0.9113, + "learning_rate": 1.3645376151030871e-05, + "loss": 1.0552, "step": 10218 }, { - "epoch": 0.28958032248009297, + "epoch": 0.39983566789263636, "grad_norm": 0.0, - "learning_rate": 1.666945162596969e-05, - "loss": 0.9594, + "learning_rate": 1.364419609051688e-05, + "loss": 1.087, "step": 10219 }, { - "epoch": 0.2896086599223554, + "epoch": 0.3998747945848658, "grad_norm": 0.0, - "learning_rate": 1.6668767746854694e-05, - "loss": 0.8798, + "learning_rate": 1.3643015971481661e-05, + "loss": 0.9102, "step": 10220 }, { - "epoch": 0.28963699736461784, + "epoch": 0.39991392127709524, "grad_norm": 0.0, - "learning_rate": 1.6668083811566188e-05, - "loss": 0.9738, + "learning_rate": 1.3641835793944174e-05, + "loss": 1.1761, "step": 10221 }, { - "epoch": 0.28966533480688034, + "epoch": 0.3999530479693247, "grad_norm": 0.0, - "learning_rate": 1.6667399820109937e-05, - "loss": 0.8831, + "learning_rate": 1.3640655557923365e-05, + "loss": 1.0735, "step": 10222 }, { - "epoch": 0.2896936722491428, + "epoch": 0.3999921746615541, "grad_norm": 0.0, - "learning_rate": 1.6666715772491702e-05, - "loss": 0.9214, + "learning_rate": 1.3639475263438194e-05, + "loss": 1.1187, "step": 10223 }, { - "epoch": 0.28972200969140527, + "epoch": 0.40003130135378356, "grad_norm": 0.0, - "learning_rate": 1.6666031668717246e-05, - "loss": 0.9376, + "learning_rate": 1.3638294910507606e-05, + "loss": 1.1224, "step": 10224 }, { - "epoch": 0.2897503471336677, + "epoch": 0.400070428046013, "grad_norm": 0.0, - "learning_rate": 1.6665347508792325e-05, - "loss": 0.9233, + "learning_rate": 1.3637114499150563e-05, + "loss": 1.0765, "step": 10225 }, { - "epoch": 0.2897786845759302, + "epoch": 0.40010955473824245, "grad_norm": 0.0, - "learning_rate": 1.666466329272271e-05, - "loss": 0.9101, + "learning_rate": 1.3635934029386015e-05, + "loss": 1.1267, "step": 10226 }, { - "epoch": 0.28980702201819264, + "epoch": 0.4001486814304719, "grad_norm": 0.0, - "learning_rate": 1.666397902051416e-05, - "loss": 1.0488, + "learning_rate": 1.3634753501232929e-05, + "loss": 1.1268, "step": 10227 }, { - "epoch": 0.2898353594604551, + "epoch": 0.4001878081227013, "grad_norm": 0.0, - "learning_rate": 1.666329469217244e-05, - "loss": 0.888, + "learning_rate": 1.3633572914710254e-05, + "loss": 1.0362, "step": 10228 }, { - "epoch": 0.28986369690271757, + "epoch": 0.40022693481493077, "grad_norm": 0.0, - "learning_rate": 1.6662610307703318e-05, - "loss": 0.8791, + "learning_rate": 1.363239226983695e-05, + "loss": 1.1519, "step": 10229 }, { - "epoch": 0.28989203434498, + "epoch": 0.4002660615071602, "grad_norm": 0.0, - "learning_rate": 1.6661925867112553e-05, - "loss": 0.9958, + "learning_rate": 1.3631211566631976e-05, + "loss": 1.069, "step": 10230 }, { - "epoch": 0.2899203717872425, + "epoch": 0.40030518819938965, "grad_norm": 0.0, - "learning_rate": 1.666124137040591e-05, - "loss": 0.9351, + "learning_rate": 1.3630030805114297e-05, + "loss": 1.1067, "step": 10231 }, { - "epoch": 0.28994870922950494, + "epoch": 0.40034431489161904, "grad_norm": 0.0, - "learning_rate": 1.666055681758916e-05, - "loss": 0.9251, + "learning_rate": 1.3628849985302873e-05, + "loss": 1.0449, "step": 10232 }, { - "epoch": 0.2899770466717674, + "epoch": 0.4003834415838485, "grad_norm": 0.0, - "learning_rate": 1.6659872208668067e-05, - "loss": 1.0123, + "learning_rate": 1.3627669107216663e-05, + "loss": 1.1066, "step": 10233 }, { - "epoch": 0.2900053841140299, + "epoch": 0.4004225682760779, "grad_norm": 0.0, - "learning_rate": 1.6659187543648398e-05, - "loss": 0.9844, + "learning_rate": 1.3626488170874634e-05, + "loss": 1.1175, "step": 10234 }, { - "epoch": 0.2900337215562923, + "epoch": 0.40046169496830736, "grad_norm": 0.0, - "learning_rate": 1.6658502822535916e-05, - "loss": 0.9518, + "learning_rate": 1.362530717629575e-05, + "loss": 1.0676, "step": 10235 }, { - "epoch": 0.2900620589985548, + "epoch": 0.4005008216605368, "grad_norm": 0.0, - "learning_rate": 1.6657818045336392e-05, - "loss": 0.7718, + "learning_rate": 1.3624126123498976e-05, + "loss": 0.9029, "step": 10236 }, { - "epoch": 0.29009039644081724, + "epoch": 0.40053994835276624, "grad_norm": 0.0, - "learning_rate": 1.66571332120556e-05, - "loss": 0.9076, + "learning_rate": 1.3622945012503275e-05, + "loss": 1.0887, "step": 10237 }, { - "epoch": 0.29011873388307974, + "epoch": 0.4005790750449957, "grad_norm": 0.0, - "learning_rate": 1.66564483226993e-05, - "loss": 1.0634, + "learning_rate": 1.3621763843327618e-05, + "loss": 1.0428, "step": 10238 }, { - "epoch": 0.2901470713253422, + "epoch": 0.4006182017372251, "grad_norm": 0.0, - "learning_rate": 1.6655763377273258e-05, - "loss": 1.0468, + "learning_rate": 1.362058261599097e-05, + "loss": 1.1078, "step": 10239 }, { - "epoch": 0.2901754087676046, + "epoch": 0.40065732842945456, "grad_norm": 0.0, - "learning_rate": 1.665507837578326e-05, - "loss": 0.8574, + "learning_rate": 1.3619401330512307e-05, + "loss": 1.1175, "step": 10240 }, { - "epoch": 0.2902037462098671, + "epoch": 0.400696455121684, "grad_norm": 0.0, - "learning_rate": 1.6654393318235057e-05, - "loss": 0.998, + "learning_rate": 1.3618219986910592e-05, + "loss": 1.078, "step": 10241 }, { - "epoch": 0.29023208365212955, + "epoch": 0.40073558181391344, "grad_norm": 0.0, - "learning_rate": 1.6653708204634434e-05, - "loss": 0.9514, + "learning_rate": 1.3617038585204796e-05, + "loss": 1.0479, "step": 10242 }, { - "epoch": 0.29026042109439204, + "epoch": 0.4007747085061429, "grad_norm": 0.0, - "learning_rate": 1.665302303498715e-05, - "loss": 0.9835, + "learning_rate": 1.3615857125413894e-05, + "loss": 1.1606, "step": 10243 }, { - "epoch": 0.2902887585366545, + "epoch": 0.4008138351983723, "grad_norm": 0.0, - "learning_rate": 1.665233780929899e-05, - "loss": 0.9731, + "learning_rate": 1.3614675607556857e-05, + "loss": 0.9774, "step": 10244 }, { - "epoch": 0.2903170959789169, + "epoch": 0.40085296189060177, "grad_norm": 0.0, - "learning_rate": 1.6651652527575712e-05, - "loss": 0.9853, + "learning_rate": 1.3613494031652659e-05, + "loss": 0.877, "step": 10245 }, { - "epoch": 0.2903454334211794, + "epoch": 0.4008920885828312, "grad_norm": 0.0, - "learning_rate": 1.66509671898231e-05, - "loss": 1.0133, + "learning_rate": 1.3612312397720275e-05, + "loss": 1.0031, "step": 10246 }, { - "epoch": 0.29037377086344185, + "epoch": 0.40093121527506065, "grad_norm": 0.0, - "learning_rate": 1.6650281796046917e-05, - "loss": 1.0052, + "learning_rate": 1.3611130705778682e-05, + "loss": 1.0972, "step": 10247 }, { - "epoch": 0.29040210830570434, + "epoch": 0.4009703419672901, "grad_norm": 0.0, - "learning_rate": 1.664959634625294e-05, - "loss": 0.9648, + "learning_rate": 1.3609948955846855e-05, + "loss": 1.0297, "step": 10248 }, { - "epoch": 0.2904304457479668, + "epoch": 0.40100946865951953, "grad_norm": 0.0, - "learning_rate": 1.6648910840446947e-05, - "loss": 0.9588, + "learning_rate": 1.360876714794377e-05, + "loss": 1.0366, "step": 10249 }, { - "epoch": 0.2904587831902293, + "epoch": 0.40104859535174897, "grad_norm": 0.0, - "learning_rate": 1.664822527863471e-05, - "loss": 0.9835, + "learning_rate": 1.3607585282088405e-05, + "loss": 1.0443, "step": 10250 }, { - "epoch": 0.2904871206324917, + "epoch": 0.4010877220439784, "grad_norm": 0.0, - "learning_rate": 1.6647539660822e-05, - "loss": 1.0674, + "learning_rate": 1.3606403358299742e-05, + "loss": 1.0081, "step": 10251 }, { - "epoch": 0.29051545807475415, + "epoch": 0.40112684873620785, "grad_norm": 0.0, - "learning_rate": 1.6646853987014594e-05, - "loss": 0.9539, + "learning_rate": 1.360522137659676e-05, + "loss": 1.218, "step": 10252 }, { - "epoch": 0.29054379551701665, + "epoch": 0.4011659754284373, "grad_norm": 0.0, - "learning_rate": 1.664616825721827e-05, - "loss": 1.0372, + "learning_rate": 1.360403933699844e-05, + "loss": 1.0835, "step": 10253 }, { - "epoch": 0.2905721329592791, + "epoch": 0.40120510212066673, "grad_norm": 0.0, - "learning_rate": 1.6645482471438805e-05, - "loss": 0.9822, + "learning_rate": 1.3602857239523766e-05, + "loss": 1.0097, "step": 10254 }, { - "epoch": 0.2906004704015416, + "epoch": 0.4012442288128962, "grad_norm": 0.0, - "learning_rate": 1.6644796629681968e-05, - "loss": 0.9701, + "learning_rate": 1.360167508419172e-05, + "loss": 1.0964, "step": 10255 }, { - "epoch": 0.290628807843804, + "epoch": 0.4012833555051256, "grad_norm": 0.0, - "learning_rate": 1.6644110731953546e-05, - "loss": 0.9511, + "learning_rate": 1.3600492871021282e-05, + "loss": 0.9907, "step": 10256 }, { - "epoch": 0.29065714528606645, + "epoch": 0.40132248219735506, "grad_norm": 0.0, - "learning_rate": 1.6643424778259313e-05, - "loss": 0.8437, + "learning_rate": 1.3599310600031443e-05, + "loss": 1.0957, "step": 10257 }, { - "epoch": 0.29068548272832895, + "epoch": 0.4013616088895845, "grad_norm": 0.0, - "learning_rate": 1.6642738768605045e-05, - "loss": 0.9289, + "learning_rate": 1.3598128271241184e-05, + "loss": 1.0016, "step": 10258 }, { - "epoch": 0.2907138201705914, + "epoch": 0.40140073558181394, "grad_norm": 0.0, - "learning_rate": 1.664205270299652e-05, - "loss": 0.9882, + "learning_rate": 1.3596945884669498e-05, + "loss": 1.0467, "step": 10259 }, { - "epoch": 0.2907421576128539, + "epoch": 0.4014398622740434, "grad_norm": 0.0, - "learning_rate": 1.664136658143952e-05, - "loss": 1.0142, + "learning_rate": 1.3595763440335361e-05, + "loss": 1.1675, "step": 10260 }, { - "epoch": 0.2907704950551163, + "epoch": 0.40147898896627277, "grad_norm": 0.0, - "learning_rate": 1.664068040393982e-05, - "loss": 0.8821, + "learning_rate": 1.3594580938257776e-05, + "loss": 1.0559, "step": 10261 }, { - "epoch": 0.2907988324973788, + "epoch": 0.4015181156585022, "grad_norm": 0.0, - "learning_rate": 1.6639994170503206e-05, - "loss": 0.9472, + "learning_rate": 1.359339837845572e-05, + "loss": 1.081, "step": 10262 }, { - "epoch": 0.29082716993964125, + "epoch": 0.40155724235073165, "grad_norm": 0.0, - "learning_rate": 1.6639307881135457e-05, - "loss": 0.9849, + "learning_rate": 1.359221576094819e-05, + "loss": 1.0982, "step": 10263 }, { - "epoch": 0.2908555073819037, + "epoch": 0.4015963690429611, "grad_norm": 0.0, - "learning_rate": 1.6638621535842347e-05, - "loss": 0.8956, + "learning_rate": 1.3591033085754177e-05, + "loss": 1.046, "step": 10264 }, { - "epoch": 0.2908838448241662, + "epoch": 0.40163549573519053, "grad_norm": 0.0, - "learning_rate": 1.6637935134629664e-05, - "loss": 1.0889, + "learning_rate": 1.3589850352892676e-05, + "loss": 1.1126, "step": 10265 }, { - "epoch": 0.2909121822664286, + "epoch": 0.40167462242741997, "grad_norm": 0.0, - "learning_rate": 1.6637248677503194e-05, - "loss": 0.9902, + "learning_rate": 1.358866756238267e-05, + "loss": 1.0807, "step": 10266 }, { - "epoch": 0.2909405197086911, + "epoch": 0.4017137491196494, "grad_norm": 0.0, - "learning_rate": 1.663656216446871e-05, - "loss": 1.0575, + "learning_rate": 1.3587484714243165e-05, + "loss": 1.1158, "step": 10267 }, { - "epoch": 0.29096885715095355, + "epoch": 0.40175287581187885, "grad_norm": 0.0, - "learning_rate": 1.6635875595531995e-05, - "loss": 0.952, + "learning_rate": 1.358630180849315e-05, + "loss": 1.1114, "step": 10268 }, { - "epoch": 0.290997194593216, + "epoch": 0.4017920025041083, "grad_norm": 0.0, - "learning_rate": 1.6635188970698843e-05, - "loss": 1.0129, + "learning_rate": 1.358511884515162e-05, + "loss": 1.161, "step": 10269 }, { - "epoch": 0.2910255320354785, + "epoch": 0.40183112919633773, "grad_norm": 0.0, - "learning_rate": 1.6634502289975025e-05, - "loss": 0.9382, + "learning_rate": 1.3583935824237576e-05, + "loss": 1.0541, "step": 10270 }, { - "epoch": 0.2910538694777409, + "epoch": 0.4018702558885672, "grad_norm": 0.0, - "learning_rate": 1.6633815553366334e-05, - "loss": 0.8974, + "learning_rate": 1.358275274577001e-05, + "loss": 1.1674, "step": 10271 }, { - "epoch": 0.2910822069200034, + "epoch": 0.4019093825807966, "grad_norm": 0.0, - "learning_rate": 1.6633128760878548e-05, - "loss": 1.0214, + "learning_rate": 1.3581569609767927e-05, + "loss": 1.0269, "step": 10272 }, { - "epoch": 0.29111054436226586, + "epoch": 0.40194850927302606, "grad_norm": 0.0, - "learning_rate": 1.6632441912517453e-05, - "loss": 0.8458, + "learning_rate": 1.3580386416250321e-05, + "loss": 1.0979, "step": 10273 }, { - "epoch": 0.29113888180452835, + "epoch": 0.4019876359652555, "grad_norm": 0.0, - "learning_rate": 1.6631755008288843e-05, - "loss": 1.0051, + "learning_rate": 1.3579203165236201e-05, + "loss": 1.1006, "step": 10274 }, { - "epoch": 0.2911672192467908, + "epoch": 0.40202676265748494, "grad_norm": 0.0, - "learning_rate": 1.6631068048198495e-05, - "loss": 0.9073, + "learning_rate": 1.357801985674456e-05, + "loss": 1.0594, "step": 10275 }, { - "epoch": 0.2911955566890532, + "epoch": 0.4020658893497144, "grad_norm": 0.0, - "learning_rate": 1.66303810322522e-05, - "loss": 0.9519, + "learning_rate": 1.3576836490794404e-05, + "loss": 1.072, "step": 10276 }, { - "epoch": 0.2912238941313157, + "epoch": 0.4021050160419438, "grad_norm": 0.0, - "learning_rate": 1.6629693960455743e-05, - "loss": 1.0981, + "learning_rate": 1.3575653067404736e-05, + "loss": 1.0355, "step": 10277 }, { - "epoch": 0.29125223157357816, + "epoch": 0.40214414273417326, "grad_norm": 0.0, - "learning_rate": 1.6629006832814912e-05, - "loss": 0.9946, + "learning_rate": 1.3574469586594558e-05, + "loss": 1.1331, "step": 10278 }, { - "epoch": 0.29128056901584065, + "epoch": 0.4021832694264027, "grad_norm": 0.0, - "learning_rate": 1.662831964933549e-05, - "loss": 0.8854, + "learning_rate": 1.3573286048382884e-05, + "loss": 1.0883, "step": 10279 }, { - "epoch": 0.2913089064581031, + "epoch": 0.40222239611863214, "grad_norm": 0.0, - "learning_rate": 1.6627632410023277e-05, - "loss": 1.0633, + "learning_rate": 1.357210245278871e-05, + "loss": 1.2501, "step": 10280 }, { - "epoch": 0.29133724390036553, + "epoch": 0.4022615228108616, "grad_norm": 0.0, - "learning_rate": 1.662694511488405e-05, - "loss": 0.9594, + "learning_rate": 1.3570918799831044e-05, + "loss": 1.1212, "step": 10281 }, { - "epoch": 0.291365581342628, + "epoch": 0.402300649503091, "grad_norm": 0.0, - "learning_rate": 1.6626257763923605e-05, - "loss": 0.921, + "learning_rate": 1.35697350895289e-05, + "loss": 1.0929, "step": 10282 }, { - "epoch": 0.29139391878489046, + "epoch": 0.40233977619532046, "grad_norm": 0.0, - "learning_rate": 1.6625570357147732e-05, - "loss": 0.9703, + "learning_rate": 1.3568551321901282e-05, + "loss": 1.048, "step": 10283 }, { - "epoch": 0.29142225622715295, + "epoch": 0.4023789028875499, "grad_norm": 0.0, - "learning_rate": 1.662488289456222e-05, - "loss": 0.9631, + "learning_rate": 1.3567367496967201e-05, + "loss": 1.0332, "step": 10284 }, { - "epoch": 0.2914505936694154, + "epoch": 0.40241802957977935, "grad_norm": 0.0, - "learning_rate": 1.6624195376172857e-05, - "loss": 0.9712, + "learning_rate": 1.356618361474567e-05, + "loss": 1.1528, "step": 10285 }, { - "epoch": 0.2914789311116779, + "epoch": 0.4024571562720088, "grad_norm": 0.0, - "learning_rate": 1.6623507801985438e-05, - "loss": 0.9289, + "learning_rate": 1.35649996752557e-05, + "loss": 1.2017, "step": 10286 }, { - "epoch": 0.2915072685539403, + "epoch": 0.40249628296423823, "grad_norm": 0.0, - "learning_rate": 1.662282017200575e-05, - "loss": 0.9912, + "learning_rate": 1.3563815678516296e-05, + "loss": 1.0486, "step": 10287 }, { - "epoch": 0.29153560599620276, + "epoch": 0.40253540965646767, "grad_norm": 0.0, - "learning_rate": 1.6622132486239594e-05, - "loss": 0.9655, + "learning_rate": 1.3562631624546485e-05, + "loss": 1.1389, "step": 10288 }, { - "epoch": 0.29156394343846526, + "epoch": 0.40257453634869705, "grad_norm": 0.0, - "learning_rate": 1.6621444744692753e-05, - "loss": 0.9711, + "learning_rate": 1.3561447513365269e-05, + "loss": 1.1016, "step": 10289 }, { - "epoch": 0.2915922808807277, + "epoch": 0.4026136630409265, "grad_norm": 0.0, - "learning_rate": 1.6620756947371025e-05, - "loss": 0.9472, + "learning_rate": 1.3560263344991673e-05, + "loss": 0.9976, "step": 10290 }, { - "epoch": 0.2916206183229902, + "epoch": 0.40265278973315594, "grad_norm": 0.0, - "learning_rate": 1.66200690942802e-05, - "loss": 1.0047, + "learning_rate": 1.3559079119444705e-05, + "loss": 1.2186, "step": 10291 }, { - "epoch": 0.2916489557652526, + "epoch": 0.4026919164253854, "grad_norm": 0.0, - "learning_rate": 1.661938118542608e-05, - "loss": 0.9745, + "learning_rate": 1.355789483674339e-05, + "loss": 1.102, "step": 10292 }, { - "epoch": 0.29167729320751506, + "epoch": 0.4027310431176148, "grad_norm": 0.0, - "learning_rate": 1.661869322081445e-05, - "loss": 1.1422, + "learning_rate": 1.355671049690674e-05, + "loss": 1.081, "step": 10293 }, { - "epoch": 0.29170563064977756, + "epoch": 0.40277016980984426, "grad_norm": 0.0, - "learning_rate": 1.6618005200451112e-05, - "loss": 0.9593, + "learning_rate": 1.3555526099953778e-05, + "loss": 1.1198, "step": 10294 }, { - "epoch": 0.29173396809204, + "epoch": 0.4028092965020737, "grad_norm": 0.0, - "learning_rate": 1.6617317124341856e-05, - "loss": 0.9769, + "learning_rate": 1.355434164590352e-05, + "loss": 0.9753, "step": 10295 }, { - "epoch": 0.2917623055343025, + "epoch": 0.40284842319430314, "grad_norm": 0.0, - "learning_rate": 1.661662899249248e-05, - "loss": 0.8939, + "learning_rate": 1.355315713477499e-05, + "loss": 1.0294, "step": 10296 }, { - "epoch": 0.29179064297656493, + "epoch": 0.4028875498865326, "grad_norm": 0.0, - "learning_rate": 1.6615940804908787e-05, - "loss": 1.0589, + "learning_rate": 1.3551972566587208e-05, + "loss": 1.1315, "step": 10297 }, { - "epoch": 0.2918189804188274, + "epoch": 0.402926676578762, "grad_norm": 0.0, - "learning_rate": 1.661525256159656e-05, - "loss": 0.8668, + "learning_rate": 1.3550787941359199e-05, + "loss": 1.2324, "step": 10298 }, { - "epoch": 0.29184731786108986, + "epoch": 0.40296580327099146, "grad_norm": 0.0, - "learning_rate": 1.661456426256161e-05, - "loss": 1.1202, + "learning_rate": 1.3549603259109985e-05, + "loss": 1.1551, "step": 10299 }, { - "epoch": 0.2918756553033523, + "epoch": 0.4030049299632209, "grad_norm": 0.0, - "learning_rate": 1.6613875907809728e-05, - "loss": 0.9423, + "learning_rate": 1.3548418519858585e-05, + "loss": 1.1418, "step": 10300 }, { - "epoch": 0.2919039927456148, + "epoch": 0.40304405665545034, "grad_norm": 0.0, - "learning_rate": 1.661318749734671e-05, - "loss": 0.9635, + "learning_rate": 1.3547233723624036e-05, + "loss": 1.1633, "step": 10301 }, { - "epoch": 0.29193233018787723, + "epoch": 0.4030831833476798, "grad_norm": 0.0, - "learning_rate": 1.661249903117836e-05, - "loss": 0.9615, + "learning_rate": 1.3546048870425356e-05, + "loss": 1.1062, "step": 10302 }, { - "epoch": 0.2919606676301397, + "epoch": 0.4031223100399092, "grad_norm": 0.0, - "learning_rate": 1.6611810509310476e-05, - "loss": 1.0733, + "learning_rate": 1.3544863960281578e-05, + "loss": 1.0638, "step": 10303 }, { - "epoch": 0.29198900507240216, + "epoch": 0.40316143673213867, "grad_norm": 0.0, - "learning_rate": 1.6611121931748858e-05, - "loss": 0.9552, + "learning_rate": 1.3543678993211722e-05, + "loss": 1.212, "step": 10304 }, { - "epoch": 0.2920173425146646, + "epoch": 0.4032005634243681, "grad_norm": 0.0, - "learning_rate": 1.66104332984993e-05, - "loss": 0.9881, + "learning_rate": 1.3542493969234825e-05, + "loss": 1.1636, "step": 10305 }, { - "epoch": 0.2920456799569271, + "epoch": 0.40323969011659755, "grad_norm": 0.0, - "learning_rate": 1.6609744609567614e-05, - "loss": 0.9222, + "learning_rate": 1.3541308888369916e-05, + "loss": 1.119, "step": 10306 }, { - "epoch": 0.29207401739918953, + "epoch": 0.403278816808827, "grad_norm": 0.0, - "learning_rate": 1.660905586495959e-05, - "loss": 0.9192, + "learning_rate": 1.3540123750636018e-05, + "loss": 1.1636, "step": 10307 }, { - "epoch": 0.292102354841452, + "epoch": 0.40331794350105643, "grad_norm": 0.0, - "learning_rate": 1.6608367064681033e-05, - "loss": 0.9313, + "learning_rate": 1.3538938556052172e-05, + "loss": 1.1175, "step": 10308 }, { - "epoch": 0.29213069228371447, + "epoch": 0.40335707019328587, "grad_norm": 0.0, - "learning_rate": 1.660767820873775e-05, - "loss": 1.0187, + "learning_rate": 1.3537753304637406e-05, + "loss": 1.1019, "step": 10309 }, { - "epoch": 0.29215902972597696, + "epoch": 0.4033961968855153, "grad_norm": 0.0, - "learning_rate": 1.6606989297135538e-05, - "loss": 0.9543, + "learning_rate": 1.3536567996410756e-05, + "loss": 1.0729, "step": 10310 }, { - "epoch": 0.2921873671682394, + "epoch": 0.40343532357774475, "grad_norm": 0.0, - "learning_rate": 1.6606300329880204e-05, - "loss": 0.9805, + "learning_rate": 1.3535382631391254e-05, + "loss": 1.1099, "step": 10311 }, { - "epoch": 0.29221570461050184, + "epoch": 0.4034744502699742, "grad_norm": 0.0, - "learning_rate": 1.6605611306977546e-05, - "loss": 0.9627, + "learning_rate": 1.3534197209597939e-05, + "loss": 1.1294, "step": 10312 }, { - "epoch": 0.29224404205276433, + "epoch": 0.40351357696220364, "grad_norm": 0.0, - "learning_rate": 1.6604922228433372e-05, - "loss": 0.9372, + "learning_rate": 1.3533011731049843e-05, + "loss": 1.0864, "step": 10313 }, { - "epoch": 0.29227237949502677, + "epoch": 0.4035527036544331, "grad_norm": 0.0, - "learning_rate": 1.660423309425349e-05, - "loss": 1.1082, + "learning_rate": 1.3531826195766008e-05, + "loss": 1.0342, "step": 10314 }, { - "epoch": 0.29230071693728926, + "epoch": 0.4035918303466625, "grad_norm": 0.0, - "learning_rate": 1.6603543904443694e-05, - "loss": 0.9585, + "learning_rate": 1.3530640603765473e-05, + "loss": 1.0873, "step": 10315 }, { - "epoch": 0.2923290543795517, + "epoch": 0.40363095703889196, "grad_norm": 0.0, - "learning_rate": 1.6602854659009797e-05, - "loss": 0.9575, + "learning_rate": 1.3529454955067267e-05, + "loss": 1.0887, "step": 10316 }, { - "epoch": 0.29235739182181414, + "epoch": 0.4036700837311214, "grad_norm": 0.0, - "learning_rate": 1.6602165357957603e-05, - "loss": 1.0516, + "learning_rate": 1.3528269249690441e-05, + "loss": 1.0413, "step": 10317 }, { - "epoch": 0.29238572926407663, + "epoch": 0.4037092104233508, "grad_norm": 0.0, - "learning_rate": 1.660147600129292e-05, - "loss": 0.9727, + "learning_rate": 1.3527083487654032e-05, + "loss": 1.1521, "step": 10318 }, { - "epoch": 0.29241406670633907, + "epoch": 0.4037483371155802, "grad_norm": 0.0, - "learning_rate": 1.6600786589021555e-05, - "loss": 1.0717, + "learning_rate": 1.3525897668977081e-05, + "loss": 1.0522, "step": 10319 }, { - "epoch": 0.29244240414860156, + "epoch": 0.40378746380780967, "grad_norm": 0.0, - "learning_rate": 1.660009712114931e-05, - "loss": 0.8735, + "learning_rate": 1.3524711793678631e-05, + "loss": 1.0716, "step": 10320 }, { - "epoch": 0.292470741590864, + "epoch": 0.4038265905000391, "grad_norm": 0.0, - "learning_rate": 1.6599407597681997e-05, - "loss": 1.0464, + "learning_rate": 1.352352586177773e-05, + "loss": 1.0017, "step": 10321 }, { - "epoch": 0.2924990790331265, + "epoch": 0.40386571719226855, "grad_norm": 0.0, - "learning_rate": 1.6598718018625424e-05, - "loss": 0.8937, + "learning_rate": 1.3522339873293416e-05, + "loss": 1.0891, "step": 10322 }, { - "epoch": 0.29252741647538893, + "epoch": 0.403904843884498, "grad_norm": 0.0, - "learning_rate": 1.6598028383985398e-05, - "loss": 0.9916, + "learning_rate": 1.3521153828244735e-05, + "loss": 1.0245, "step": 10323 }, { - "epoch": 0.2925557539176514, + "epoch": 0.40394397057672743, "grad_norm": 0.0, - "learning_rate": 1.659733869376773e-05, - "loss": 0.9951, + "learning_rate": 1.351996772665074e-05, + "loss": 1.0373, "step": 10324 }, { - "epoch": 0.29258409135991387, + "epoch": 0.40398309726895687, "grad_norm": 0.0, - "learning_rate": 1.6596648947978225e-05, - "loss": 0.9708, + "learning_rate": 1.3518781568530472e-05, + "loss": 1.1153, "step": 10325 }, { - "epoch": 0.2926124288021763, + "epoch": 0.4040222239611863, "grad_norm": 0.0, - "learning_rate": 1.65959591466227e-05, - "loss": 1.0457, + "learning_rate": 1.3517595353902982e-05, + "loss": 1.0367, "step": 10326 }, { - "epoch": 0.2926407662444388, + "epoch": 0.40406135065341575, "grad_norm": 0.0, - "learning_rate": 1.659526928970696e-05, - "loss": 1.0006, + "learning_rate": 1.3516409082787316e-05, + "loss": 1.0229, "step": 10327 }, { - "epoch": 0.29266910368670124, + "epoch": 0.4041004773456452, "grad_norm": 0.0, - "learning_rate": 1.6594579377236817e-05, - "loss": 0.9374, + "learning_rate": 1.351522275520253e-05, + "loss": 1.1323, "step": 10328 }, { - "epoch": 0.2926974411289637, + "epoch": 0.40413960403787463, "grad_norm": 0.0, - "learning_rate": 1.6593889409218084e-05, - "loss": 0.9919, + "learning_rate": 1.3514036371167669e-05, + "loss": 1.0444, "step": 10329 }, { - "epoch": 0.29272577857122617, + "epoch": 0.4041787307301041, "grad_norm": 0.0, - "learning_rate": 1.6593199385656574e-05, - "loss": 1.0253, + "learning_rate": 1.351284993070179e-05, + "loss": 1.0607, "step": 10330 }, { - "epoch": 0.2927541160134886, + "epoch": 0.4042178574223335, "grad_norm": 0.0, - "learning_rate": 1.6592509306558095e-05, - "loss": 1.0315, + "learning_rate": 1.3511663433823938e-05, + "loss": 1.0283, "step": 10331 }, { - "epoch": 0.2927824534557511, + "epoch": 0.40425698411456296, "grad_norm": 0.0, - "learning_rate": 1.6591819171928462e-05, - "loss": 1.0729, + "learning_rate": 1.3510476880553177e-05, + "loss": 1.1338, "step": 10332 }, { - "epoch": 0.29281079089801354, + "epoch": 0.4042961108067924, "grad_norm": 0.0, - "learning_rate": 1.659112898177349e-05, - "loss": 0.8943, + "learning_rate": 1.3509290270908552e-05, + "loss": 1.0244, "step": 10333 }, { - "epoch": 0.29283912834027603, + "epoch": 0.40433523749902184, "grad_norm": 0.0, - "learning_rate": 1.6590438736098987e-05, - "loss": 1.0595, + "learning_rate": 1.3508103604909127e-05, + "loss": 1.0304, "step": 10334 }, { - "epoch": 0.29286746578253847, + "epoch": 0.4043743641912513, "grad_norm": 0.0, - "learning_rate": 1.6589748434910774e-05, - "loss": 0.9966, + "learning_rate": 1.350691688257395e-05, + "loss": 1.1638, "step": 10335 }, { - "epoch": 0.2928958032248009, + "epoch": 0.4044134908834807, "grad_norm": 0.0, - "learning_rate": 1.6589058078214662e-05, - "loss": 1.0048, + "learning_rate": 1.3505730103922083e-05, + "loss": 1.1051, "step": 10336 }, { - "epoch": 0.2929241406670634, + "epoch": 0.40445261757571016, "grad_norm": 0.0, - "learning_rate": 1.658836766601647e-05, - "loss": 1.0554, + "learning_rate": 1.3504543268972585e-05, + "loss": 1.0349, "step": 10337 }, { - "epoch": 0.29295247810932584, + "epoch": 0.4044917442679396, "grad_norm": 0.0, - "learning_rate": 1.6587677198322008e-05, - "loss": 0.9675, + "learning_rate": 1.3503356377744512e-05, + "loss": 1.064, "step": 10338 }, { - "epoch": 0.29298081555158834, + "epoch": 0.40453087096016904, "grad_norm": 0.0, - "learning_rate": 1.6586986675137095e-05, - "loss": 0.7435, + "learning_rate": 1.350216943025693e-05, + "loss": 1.0814, "step": 10339 }, { - "epoch": 0.2930091529938508, + "epoch": 0.4045699976523985, "grad_norm": 0.0, - "learning_rate": 1.6586296096467545e-05, - "loss": 0.9251, + "learning_rate": 1.3500982426528888e-05, + "loss": 1.0298, "step": 10340 }, { - "epoch": 0.2930374904361132, + "epoch": 0.4046091243446279, "grad_norm": 0.0, - "learning_rate": 1.658560546231918e-05, - "loss": 0.86, + "learning_rate": 1.349979536657946e-05, + "loss": 1.131, "step": 10341 }, { - "epoch": 0.2930658278783757, + "epoch": 0.40464825103685736, "grad_norm": 0.0, - "learning_rate": 1.6584914772697816e-05, - "loss": 1.0726, + "learning_rate": 1.34986082504277e-05, + "loss": 1.0137, "step": 10342 }, { - "epoch": 0.29309416532063814, + "epoch": 0.4046873777290868, "grad_norm": 0.0, - "learning_rate": 1.6584224027609268e-05, - "loss": 1.0045, + "learning_rate": 1.349742107809268e-05, + "loss": 1.1024, "step": 10343 }, { - "epoch": 0.29312250276290064, + "epoch": 0.40472650442131625, "grad_norm": 0.0, - "learning_rate": 1.6583533227059353e-05, - "loss": 0.8388, + "learning_rate": 1.3496233849593458e-05, + "loss": 1.0476, "step": 10344 }, { - "epoch": 0.2931508402051631, + "epoch": 0.4047656311135457, "grad_norm": 0.0, - "learning_rate": 1.6582842371053897e-05, - "loss": 1.0746, + "learning_rate": 1.3495046564949102e-05, + "loss": 1.1298, "step": 10345 }, { - "epoch": 0.29317917764742557, + "epoch": 0.4048047578057751, "grad_norm": 0.0, - "learning_rate": 1.6582151459598716e-05, - "loss": 1.0064, + "learning_rate": 1.3493859224178678e-05, + "loss": 1.0953, "step": 10346 }, { - "epoch": 0.293207515089688, + "epoch": 0.4048438844980045, "grad_norm": 0.0, - "learning_rate": 1.6581460492699625e-05, - "loss": 0.9391, + "learning_rate": 1.3492671827301251e-05, + "loss": 1.1395, "step": 10347 }, { - "epoch": 0.29323585253195045, + "epoch": 0.40488301119023395, "grad_norm": 0.0, - "learning_rate": 1.658076947036245e-05, - "loss": 0.9546, + "learning_rate": 1.3491484374335893e-05, + "loss": 1.0111, "step": 10348 }, { - "epoch": 0.29326418997421294, + "epoch": 0.4049221378824634, "grad_norm": 0.0, - "learning_rate": 1.6580078392593012e-05, - "loss": 0.8562, + "learning_rate": 1.3490296865301668e-05, + "loss": 1.0157, "step": 10349 }, { - "epoch": 0.2932925274164754, + "epoch": 0.40496126457469284, "grad_norm": 0.0, - "learning_rate": 1.657938725939713e-05, - "loss": 0.9723, + "learning_rate": 1.3489109300217654e-05, + "loss": 1.0047, "step": 10350 }, { - "epoch": 0.2933208648587379, + "epoch": 0.4050003912669223, "grad_norm": 0.0, - "learning_rate": 1.657869607078062e-05, - "loss": 1.0364, + "learning_rate": 1.3487921679102912e-05, + "loss": 0.8996, "step": 10351 }, { - "epoch": 0.2933492023010003, + "epoch": 0.4050395179591517, "grad_norm": 0.0, - "learning_rate": 1.6578004826749317e-05, - "loss": 1.0085, + "learning_rate": 1.3486734001976523e-05, + "loss": 1.0687, "step": 10352 }, { - "epoch": 0.29337753974326275, + "epoch": 0.40507864465138116, "grad_norm": 0.0, - "learning_rate": 1.6577313527309035e-05, - "loss": 1.0529, + "learning_rate": 1.3485546268857552e-05, + "loss": 0.9986, "step": 10353 }, { - "epoch": 0.29340587718552524, + "epoch": 0.4051177713436106, "grad_norm": 0.0, - "learning_rate": 1.6576622172465598e-05, - "loss": 0.9133, + "learning_rate": 1.3484358479765075e-05, + "loss": 1.0882, "step": 10354 }, { - "epoch": 0.2934342146277877, + "epoch": 0.40515689803584004, "grad_norm": 0.0, - "learning_rate": 1.6575930762224828e-05, - "loss": 1.0285, + "learning_rate": 1.3483170634718169e-05, + "loss": 1.1606, "step": 10355 }, { - "epoch": 0.2934625520700502, + "epoch": 0.4051960247280695, "grad_norm": 0.0, - "learning_rate": 1.6575239296592554e-05, - "loss": 0.9592, + "learning_rate": 1.3481982733735905e-05, + "loss": 1.0023, "step": 10356 }, { - "epoch": 0.2934908895123126, + "epoch": 0.4052351514202989, "grad_norm": 0.0, - "learning_rate": 1.65745477755746e-05, - "loss": 0.9308, + "learning_rate": 1.3480794776837362e-05, + "loss": 1.0098, "step": 10357 }, { - "epoch": 0.2935192269545751, + "epoch": 0.40527427811252836, "grad_norm": 0.0, - "learning_rate": 1.6573856199176782e-05, - "loss": 0.9048, + "learning_rate": 1.3479606764041616e-05, + "loss": 1.1736, "step": 10358 }, { - "epoch": 0.29354756439683755, + "epoch": 0.4053134048047578, "grad_norm": 0.0, - "learning_rate": 1.657316456740494e-05, - "loss": 0.9493, + "learning_rate": 1.347841869536775e-05, + "loss": 1.1562, "step": 10359 }, { - "epoch": 0.2935759018391, + "epoch": 0.40535253149698725, "grad_norm": 0.0, - "learning_rate": 1.6572472880264883e-05, - "loss": 0.8681, + "learning_rate": 1.3477230570834831e-05, + "loss": 1.2156, "step": 10360 }, { - "epoch": 0.2936042392813625, + "epoch": 0.4053916581892167, "grad_norm": 0.0, - "learning_rate": 1.6571781137762456e-05, - "loss": 0.8808, + "learning_rate": 1.3476042390461954e-05, + "loss": 1.0472, "step": 10361 }, { - "epoch": 0.2936325767236249, + "epoch": 0.4054307848814461, "grad_norm": 0.0, - "learning_rate": 1.657108933990347e-05, - "loss": 0.9479, + "learning_rate": 1.3474854154268186e-05, + "loss": 1.0389, "step": 10362 }, { - "epoch": 0.2936609141658874, + "epoch": 0.40546991157367557, "grad_norm": 0.0, - "learning_rate": 1.657039748669376e-05, - "loss": 1.0437, + "learning_rate": 1.3473665862272619e-05, + "loss": 1.0925, "step": 10363 }, { - "epoch": 0.29368925160814985, + "epoch": 0.405509038265905, "grad_norm": 0.0, - "learning_rate": 1.6569705578139152e-05, - "loss": 0.9793, + "learning_rate": 1.3472477514494328e-05, + "loss": 1.0858, "step": 10364 }, { - "epoch": 0.2937175890504123, + "epoch": 0.40554816495813445, "grad_norm": 0.0, - "learning_rate": 1.6569013614245473e-05, - "loss": 0.9602, + "learning_rate": 1.34712891109524e-05, + "loss": 0.9988, "step": 10365 }, { - "epoch": 0.2937459264926748, + "epoch": 0.4055872916503639, "grad_norm": 0.0, - "learning_rate": 1.6568321595018554e-05, - "loss": 0.9572, + "learning_rate": 1.3470100651665921e-05, + "loss": 1.0029, "step": 10366 }, { - "epoch": 0.2937742639349372, + "epoch": 0.40562641834259333, "grad_norm": 0.0, - "learning_rate": 1.6567629520464222e-05, - "loss": 0.9308, + "learning_rate": 1.3468912136653974e-05, + "loss": 1.1203, "step": 10367 }, { - "epoch": 0.2938026013771997, + "epoch": 0.40566554503482277, "grad_norm": 0.0, - "learning_rate": 1.656693739058831e-05, - "loss": 0.8816, + "learning_rate": 1.3467723565935639e-05, + "loss": 1.1071, "step": 10368 }, { - "epoch": 0.29383093881946215, + "epoch": 0.4057046717270522, "grad_norm": 0.0, - "learning_rate": 1.6566245205396647e-05, - "loss": 0.9766, + "learning_rate": 1.3466534939530014e-05, + "loss": 1.2299, "step": 10369 }, { - "epoch": 0.2938592762617246, + "epoch": 0.40574379841928165, "grad_norm": 0.0, - "learning_rate": 1.656555296489506e-05, - "loss": 0.7447, + "learning_rate": 1.3465346257456181e-05, + "loss": 1.0631, "step": 10370 }, { - "epoch": 0.2938876137039871, + "epoch": 0.4057829251115111, "grad_norm": 0.0, - "learning_rate": 1.6564860669089382e-05, - "loss": 0.8925, + "learning_rate": 1.3464157519733228e-05, + "loss": 1.0796, "step": 10371 }, { - "epoch": 0.2939159511462495, + "epoch": 0.40582205180374054, "grad_norm": 0.0, - "learning_rate": 1.6564168317985444e-05, - "loss": 0.9873, + "learning_rate": 1.3462968726380248e-05, + "loss": 1.1522, "step": 10372 }, { - "epoch": 0.293944288588512, + "epoch": 0.40586117849597, "grad_norm": 0.0, - "learning_rate": 1.656347591158908e-05, - "loss": 1.0395, + "learning_rate": 1.3461779877416327e-05, + "loss": 0.882, "step": 10373 }, { - "epoch": 0.29397262603077445, + "epoch": 0.4059003051881994, "grad_norm": 0.0, - "learning_rate": 1.656278344990612e-05, - "loss": 0.9591, + "learning_rate": 1.3460590972860561e-05, + "loss": 1.0824, "step": 10374 }, { - "epoch": 0.29400096347303695, + "epoch": 0.4059394318804288, "grad_norm": 0.0, - "learning_rate": 1.65620909329424e-05, - "loss": 1.0476, + "learning_rate": 1.345940201273204e-05, + "loss": 1.1268, "step": 10375 }, { - "epoch": 0.2940293009152994, + "epoch": 0.40597855857265824, "grad_norm": 0.0, - "learning_rate": 1.656139836070375e-05, - "loss": 0.9746, + "learning_rate": 1.3458212997049855e-05, + "loss": 1.1201, "step": 10376 }, { - "epoch": 0.2940576383575618, + "epoch": 0.4060176852648877, "grad_norm": 0.0, - "learning_rate": 1.6560705733196004e-05, - "loss": 1.0219, + "learning_rate": 1.3457023925833106e-05, + "loss": 1.1545, "step": 10377 }, { - "epoch": 0.2940859757998243, + "epoch": 0.4060568119571171, "grad_norm": 0.0, - "learning_rate": 1.6560013050425003e-05, - "loss": 0.8689, + "learning_rate": 1.3455834799100881e-05, + "loss": 1.1678, "step": 10378 }, { - "epoch": 0.29411431324208676, + "epoch": 0.40609593864934657, "grad_norm": 0.0, - "learning_rate": 1.6559320312396573e-05, - "loss": 0.9971, + "learning_rate": 1.3454645616872286e-05, + "loss": 1.0417, "step": 10379 }, { - "epoch": 0.29414265068434925, + "epoch": 0.406135065341576, "grad_norm": 0.0, - "learning_rate": 1.6558627519116547e-05, - "loss": 0.9519, + "learning_rate": 1.3453456379166405e-05, + "loss": 0.9727, "step": 10380 }, { - "epoch": 0.2941709881266117, + "epoch": 0.40617419203380545, "grad_norm": 0.0, - "learning_rate": 1.655793467059077e-05, - "loss": 1.0473, + "learning_rate": 1.3452267086002345e-05, + "loss": 1.0334, "step": 10381 }, { - "epoch": 0.2941993255688741, + "epoch": 0.4062133187260349, "grad_norm": 0.0, - "learning_rate": 1.6557241766825077e-05, - "loss": 0.8788, + "learning_rate": 1.3451077737399202e-05, + "loss": 1.0258, "step": 10382 }, { - "epoch": 0.2942276630111366, + "epoch": 0.40625244541826433, "grad_norm": 0.0, - "learning_rate": 1.6556548807825298e-05, - "loss": 0.8993, + "learning_rate": 1.3449888333376073e-05, + "loss": 1.1078, "step": 10383 }, { - "epoch": 0.29425600045339906, + "epoch": 0.40629157211049377, "grad_norm": 0.0, - "learning_rate": 1.6555855793597273e-05, - "loss": 0.8869, + "learning_rate": 1.3448698873952063e-05, + "loss": 1.2384, "step": 10384 }, { - "epoch": 0.29428433789566155, + "epoch": 0.4063306988027232, "grad_norm": 0.0, - "learning_rate": 1.6555162724146844e-05, - "loss": 0.8806, + "learning_rate": 1.3447509359146267e-05, + "loss": 1.0285, "step": 10385 }, { - "epoch": 0.294312675337924, + "epoch": 0.40636982549495265, "grad_norm": 0.0, - "learning_rate": 1.655446959947984e-05, - "loss": 0.8719, + "learning_rate": 1.3446319788977793e-05, + "loss": 0.9624, "step": 10386 }, { - "epoch": 0.2943410127801865, + "epoch": 0.4064089521871821, "grad_norm": 0.0, - "learning_rate": 1.655377641960211e-05, - "loss": 0.995, + "learning_rate": 1.3445130163465739e-05, + "loss": 1.0927, "step": 10387 }, { - "epoch": 0.2943693502224489, + "epoch": 0.40644807887941153, "grad_norm": 0.0, - "learning_rate": 1.6553083184519483e-05, - "loss": 0.9643, + "learning_rate": 1.3443940482629214e-05, + "loss": 1.1597, "step": 10388 }, { - "epoch": 0.29439768766471136, + "epoch": 0.406487205571641, "grad_norm": 0.0, - "learning_rate": 1.6552389894237806e-05, - "loss": 0.941, + "learning_rate": 1.3442750746487319e-05, + "loss": 1.0691, "step": 10389 }, { - "epoch": 0.29442602510697385, + "epoch": 0.4065263322638704, "grad_norm": 0.0, - "learning_rate": 1.6551696548762914e-05, - "loss": 0.9611, + "learning_rate": 1.3441560955059163e-05, + "loss": 0.9617, "step": 10390 }, { - "epoch": 0.2944543625492363, + "epoch": 0.40656545895609986, "grad_norm": 0.0, - "learning_rate": 1.6551003148100647e-05, - "loss": 0.8554, + "learning_rate": 1.3440371108363847e-05, + "loss": 1.0219, "step": 10391 }, { - "epoch": 0.2944826999914988, + "epoch": 0.4066045856483293, "grad_norm": 0.0, - "learning_rate": 1.655030969225685e-05, - "loss": 0.9559, + "learning_rate": 1.3439181206420486e-05, + "loss": 1.1863, "step": 10392 }, { - "epoch": 0.2945110374337612, + "epoch": 0.40664371234055874, "grad_norm": 0.0, - "learning_rate": 1.6549616181237365e-05, - "loss": 0.9544, + "learning_rate": 1.3437991249248184e-05, + "loss": 0.9779, "step": 10393 }, { - "epoch": 0.29453937487602366, + "epoch": 0.4066828390327882, "grad_norm": 0.0, - "learning_rate": 1.6548922615048028e-05, - "loss": 1.06, + "learning_rate": 1.3436801236866048e-05, + "loss": 1.0428, "step": 10394 }, { - "epoch": 0.29456771231828616, + "epoch": 0.4067219657250176, "grad_norm": 0.0, - "learning_rate": 1.6548228993694685e-05, - "loss": 1.0025, + "learning_rate": 1.3435611169293194e-05, + "loss": 1.0361, "step": 10395 }, { - "epoch": 0.2945960497605486, + "epoch": 0.40676109241724706, "grad_norm": 0.0, - "learning_rate": 1.6547535317183176e-05, - "loss": 0.9987, + "learning_rate": 1.3434421046548727e-05, + "loss": 1.0794, "step": 10396 }, { - "epoch": 0.2946243872028111, + "epoch": 0.4068002191094765, "grad_norm": 0.0, - "learning_rate": 1.6546841585519346e-05, - "loss": 0.9456, + "learning_rate": 1.3433230868651763e-05, + "loss": 1.2128, "step": 10397 }, { - "epoch": 0.2946527246450735, + "epoch": 0.40683934580170594, "grad_norm": 0.0, - "learning_rate": 1.654614779870904e-05, - "loss": 0.8733, + "learning_rate": 1.343204063562141e-05, + "loss": 0.9736, "step": 10398 }, { - "epoch": 0.294681062087336, + "epoch": 0.4068784724939354, "grad_norm": 0.0, - "learning_rate": 1.6545453956758098e-05, - "loss": 1.0686, + "learning_rate": 1.343085034747679e-05, + "loss": 1.0222, "step": 10399 }, { - "epoch": 0.29470939952959846, + "epoch": 0.4069175991861648, "grad_norm": 0.0, - "learning_rate": 1.654476005967237e-05, - "loss": 1.0109, + "learning_rate": 1.3429660004237008e-05, + "loss": 1.1782, "step": 10400 }, { - "epoch": 0.2947377369718609, + "epoch": 0.40695672587839427, "grad_norm": 0.0, - "learning_rate": 1.6544066107457693e-05, - "loss": 0.949, + "learning_rate": 1.3428469605921189e-05, + "loss": 1.2345, "step": 10401 }, { - "epoch": 0.2947660744141234, + "epoch": 0.4069958525706237, "grad_norm": 0.0, - "learning_rate": 1.654337210011992e-05, - "loss": 1.0089, + "learning_rate": 1.3427279152548442e-05, + "loss": 1.0628, "step": 10402 }, { - "epoch": 0.29479441185638583, + "epoch": 0.4070349792628531, "grad_norm": 0.0, - "learning_rate": 1.6542678037664892e-05, - "loss": 0.8363, + "learning_rate": 1.3426088644137884e-05, + "loss": 1.0222, "step": 10403 }, { - "epoch": 0.2948227492986483, + "epoch": 0.40707410595508253, "grad_norm": 0.0, - "learning_rate": 1.6541983920098462e-05, - "loss": 0.9502, + "learning_rate": 1.3424898080708639e-05, + "loss": 1.1411, "step": 10404 }, { - "epoch": 0.29485108674091076, + "epoch": 0.407113232647312, "grad_norm": 0.0, - "learning_rate": 1.6541289747426467e-05, - "loss": 1.0315, + "learning_rate": 1.342370746227982e-05, + "loss": 1.1421, "step": 10405 }, { - "epoch": 0.2948794241831732, + "epoch": 0.4071523593395414, "grad_norm": 0.0, - "learning_rate": 1.6540595519654762e-05, - "loss": 1.0374, + "learning_rate": 1.342251678887055e-05, + "loss": 1.0836, "step": 10406 }, { - "epoch": 0.2949077616254357, + "epoch": 0.40719148603177086, "grad_norm": 0.0, - "learning_rate": 1.6539901236789192e-05, - "loss": 0.9016, + "learning_rate": 1.3421326060499949e-05, + "loss": 1.058, "step": 10407 }, { - "epoch": 0.29493609906769813, + "epoch": 0.4072306127240003, "grad_norm": 0.0, - "learning_rate": 1.6539206898835604e-05, - "loss": 0.932, + "learning_rate": 1.3420135277187139e-05, + "loss": 1.0319, "step": 10408 }, { - "epoch": 0.2949644365099606, + "epoch": 0.40726973941622974, "grad_norm": 0.0, - "learning_rate": 1.653851250579985e-05, - "loss": 0.8981, + "learning_rate": 1.3418944438951242e-05, + "loss": 1.0165, "step": 10409 }, { - "epoch": 0.29499277395222306, + "epoch": 0.4073088661084592, "grad_norm": 0.0, - "learning_rate": 1.653781805768777e-05, - "loss": 0.8884, + "learning_rate": 1.341775354581138e-05, + "loss": 0.9758, "step": 10410 }, { - "epoch": 0.29502111139448556, + "epoch": 0.4073479928006886, "grad_norm": 0.0, - "learning_rate": 1.653712355450523e-05, - "loss": 1.0133, + "learning_rate": 1.3416562597786683e-05, + "loss": 0.9966, "step": 10411 }, { - "epoch": 0.295049448836748, + "epoch": 0.40738711949291806, "grad_norm": 0.0, - "learning_rate": 1.653642899625807e-05, - "loss": 1.0668, + "learning_rate": 1.3415371594896266e-05, + "loss": 1.1071, "step": 10412 }, { - "epoch": 0.29507778627901043, + "epoch": 0.4074262461851475, "grad_norm": 0.0, - "learning_rate": 1.6535734382952135e-05, - "loss": 1.0262, + "learning_rate": 1.3414180537159265e-05, + "loss": 0.964, "step": 10413 }, { - "epoch": 0.29510612372127293, + "epoch": 0.40746537287737694, "grad_norm": 0.0, - "learning_rate": 1.6535039714593288e-05, - "loss": 0.9505, + "learning_rate": 1.3412989424594803e-05, + "loss": 1.1846, "step": 10414 }, { - "epoch": 0.29513446116353537, + "epoch": 0.4075044995696064, "grad_norm": 0.0, - "learning_rate": 1.6534344991187373e-05, - "loss": 0.9265, + "learning_rate": 1.3411798257222004e-05, + "loss": 1.0651, "step": 10415 }, { - "epoch": 0.29516279860579786, + "epoch": 0.4075436262618358, "grad_norm": 0.0, - "learning_rate": 1.6533650212740243e-05, - "loss": 1.0354, + "learning_rate": 1.3410607035060004e-05, + "loss": 1.0477, "step": 10416 }, { - "epoch": 0.2951911360480603, + "epoch": 0.40758275295406526, "grad_norm": 0.0, - "learning_rate": 1.653295537925775e-05, - "loss": 0.9903, + "learning_rate": 1.3409415758127929e-05, + "loss": 0.9948, "step": 10417 }, { - "epoch": 0.29521947349032274, + "epoch": 0.4076218796462947, "grad_norm": 0.0, - "learning_rate": 1.653226049074575e-05, - "loss": 0.8917, + "learning_rate": 1.3408224426444908e-05, + "loss": 1.006, "step": 10418 }, { - "epoch": 0.29524781093258523, + "epoch": 0.40766100633852415, "grad_norm": 0.0, - "learning_rate": 1.6531565547210095e-05, - "loss": 0.9136, + "learning_rate": 1.3407033040030071e-05, + "loss": 1.1399, "step": 10419 }, { - "epoch": 0.29527614837484767, + "epoch": 0.4077001330307536, "grad_norm": 0.0, - "learning_rate": 1.6530870548656636e-05, - "loss": 0.9614, + "learning_rate": 1.3405841598902553e-05, + "loss": 1.0636, "step": 10420 }, { - "epoch": 0.29530448581711016, + "epoch": 0.407739259722983, "grad_norm": 0.0, - "learning_rate": 1.653017549509123e-05, - "loss": 1.0182, + "learning_rate": 1.340465010308149e-05, + "loss": 1.0922, "step": 10421 }, { - "epoch": 0.2953328232593726, + "epoch": 0.40777838641521247, "grad_norm": 0.0, - "learning_rate": 1.6529480386519728e-05, - "loss": 0.9429, + "learning_rate": 1.340345855258601e-05, + "loss": 1.0223, "step": 10422 }, { - "epoch": 0.2953611607016351, + "epoch": 0.4078175131074419, "grad_norm": 0.0, - "learning_rate": 1.6528785222947995e-05, - "loss": 0.9371, + "learning_rate": 1.3402266947435251e-05, + "loss": 1.0231, "step": 10423 }, { - "epoch": 0.29538949814389753, + "epoch": 0.40785663979967135, "grad_norm": 0.0, - "learning_rate": 1.6528090004381872e-05, - "loss": 1.0205, + "learning_rate": 1.3401075287648348e-05, + "loss": 0.9553, "step": 10424 }, { - "epoch": 0.29541783558615997, + "epoch": 0.4078957664919008, "grad_norm": 0.0, - "learning_rate": 1.6527394730827227e-05, - "loss": 0.9577, + "learning_rate": 1.3399883573244438e-05, + "loss": 1.2144, "step": 10425 }, { - "epoch": 0.29544617302842247, + "epoch": 0.40793489318413023, "grad_norm": 0.0, - "learning_rate": 1.6526699402289912e-05, - "loss": 0.9927, + "learning_rate": 1.3398691804242658e-05, + "loss": 1.099, "step": 10426 }, { - "epoch": 0.2954745104706849, + "epoch": 0.4079740198763597, "grad_norm": 0.0, - "learning_rate": 1.6526004018775785e-05, - "loss": 0.8023, + "learning_rate": 1.3397499980662145e-05, + "loss": 1.1054, "step": 10427 }, { - "epoch": 0.2955028479129474, + "epoch": 0.4080131465685891, "grad_norm": 0.0, - "learning_rate": 1.65253085802907e-05, - "loss": 0.906, + "learning_rate": 1.3396308102522042e-05, + "loss": 1.2227, "step": 10428 }, { - "epoch": 0.29553118535520984, + "epoch": 0.40805227326081855, "grad_norm": 0.0, - "learning_rate": 1.652461308684052e-05, - "loss": 0.9335, + "learning_rate": 1.3395116169841487e-05, + "loss": 1.0613, "step": 10429 }, { - "epoch": 0.2955595227974723, + "epoch": 0.408091399953048, "grad_norm": 0.0, - "learning_rate": 1.65239175384311e-05, - "loss": 1.0198, + "learning_rate": 1.3393924182639619e-05, + "loss": 1.1166, "step": 10430 }, { - "epoch": 0.29558786023973477, + "epoch": 0.4081305266452774, "grad_norm": 0.0, - "learning_rate": 1.6523221935068302e-05, - "loss": 0.9226, + "learning_rate": 1.3392732140935583e-05, + "loss": 1.0591, "step": 10431 }, { - "epoch": 0.2956161976819972, + "epoch": 0.4081696533375068, "grad_norm": 0.0, - "learning_rate": 1.652252627675798e-05, - "loss": 0.9662, + "learning_rate": 1.3391540044748517e-05, + "loss": 1.0499, "step": 10432 }, { - "epoch": 0.2956445351242597, + "epoch": 0.40820878002973626, "grad_norm": 0.0, - "learning_rate": 1.6521830563506e-05, - "loss": 1.0294, + "learning_rate": 1.339034789409757e-05, + "loss": 1.1078, "step": 10433 }, { - "epoch": 0.29567287256652214, + "epoch": 0.4082479067219657, "grad_norm": 0.0, - "learning_rate": 1.6521134795318214e-05, - "loss": 1.0757, + "learning_rate": 1.3389155689001884e-05, + "loss": 1.0277, "step": 10434 }, { - "epoch": 0.29570121000878463, + "epoch": 0.40828703341419514, "grad_norm": 0.0, - "learning_rate": 1.6520438972200496e-05, - "loss": 1.0036, + "learning_rate": 1.3387963429480605e-05, + "loss": 0.9703, "step": 10435 }, { - "epoch": 0.29572954745104707, + "epoch": 0.4083261601064246, "grad_norm": 0.0, - "learning_rate": 1.6519743094158694e-05, - "loss": 1.0041, + "learning_rate": 1.3386771115552876e-05, + "loss": 1.111, "step": 10436 }, { - "epoch": 0.2957578848933095, + "epoch": 0.408365286798654, "grad_norm": 0.0, - "learning_rate": 1.6519047161198675e-05, - "loss": 1.0185, + "learning_rate": 1.338557874723785e-05, + "loss": 1.0869, "step": 10437 }, { - "epoch": 0.295786222335572, + "epoch": 0.40840441349088347, "grad_norm": 0.0, - "learning_rate": 1.6518351173326302e-05, - "loss": 0.9554, + "learning_rate": 1.338438632455467e-05, + "loss": 1.0073, "step": 10438 }, { - "epoch": 0.29581455977783444, + "epoch": 0.4084435401831129, "grad_norm": 0.0, - "learning_rate": 1.6517655130547435e-05, - "loss": 0.9637, + "learning_rate": 1.3383193847522487e-05, + "loss": 1.0193, "step": 10439 }, { - "epoch": 0.29584289722009693, + "epoch": 0.40848266687534235, "grad_norm": 0.0, - "learning_rate": 1.651695903286794e-05, - "loss": 1.0563, + "learning_rate": 1.3382001316160451e-05, + "loss": 1.0436, "step": 10440 }, { - "epoch": 0.2958712346623594, + "epoch": 0.4085217935675718, "grad_norm": 0.0, - "learning_rate": 1.6516262880293684e-05, - "loss": 1.0716, + "learning_rate": 1.3380808730487708e-05, + "loss": 1.0388, "step": 10441 }, { - "epoch": 0.2958995721046218, + "epoch": 0.40856092025980123, "grad_norm": 0.0, - "learning_rate": 1.651556667283052e-05, - "loss": 0.9623, + "learning_rate": 1.337961609052342e-05, + "loss": 1.1454, "step": 10442 }, { - "epoch": 0.2959279095468843, + "epoch": 0.40860004695203067, "grad_norm": 0.0, - "learning_rate": 1.6514870410484317e-05, - "loss": 1.0671, + "learning_rate": 1.3378423396286726e-05, + "loss": 1.1074, "step": 10443 }, { - "epoch": 0.29595624698914674, + "epoch": 0.4086391736442601, "grad_norm": 0.0, - "learning_rate": 1.6514174093260947e-05, - "loss": 0.9761, + "learning_rate": 1.337723064779679e-05, + "loss": 0.9149, "step": 10444 }, { - "epoch": 0.29598458443140924, + "epoch": 0.40867830033648955, "grad_norm": 0.0, - "learning_rate": 1.6513477721166268e-05, - "loss": 1.0181, + "learning_rate": 1.3376037845072759e-05, + "loss": 1.0215, "step": 10445 }, { - "epoch": 0.2960129218736717, + "epoch": 0.408717427028719, "grad_norm": 0.0, - "learning_rate": 1.6512781294206144e-05, - "loss": 0.9743, + "learning_rate": 1.3374844988133791e-05, + "loss": 1.1806, "step": 10446 }, { - "epoch": 0.29604125931593417, + "epoch": 0.40875655372094843, "grad_norm": 0.0, - "learning_rate": 1.6512084812386447e-05, - "loss": 0.9631, + "learning_rate": 1.3373652076999041e-05, + "loss": 1.1587, "step": 10447 }, { - "epoch": 0.2960695967581966, + "epoch": 0.4087956804131779, "grad_norm": 0.0, - "learning_rate": 1.651138827571304e-05, - "loss": 0.985, + "learning_rate": 1.3372459111687668e-05, + "loss": 1.0987, "step": 10448 }, { - "epoch": 0.29609793420045905, + "epoch": 0.4088348071054073, "grad_norm": 0.0, - "learning_rate": 1.6510691684191795e-05, - "loss": 1.047, + "learning_rate": 1.3371266092218824e-05, + "loss": 1.1484, "step": 10449 }, { - "epoch": 0.29612627164272154, + "epoch": 0.40887393379763676, "grad_norm": 0.0, - "learning_rate": 1.6509995037828575e-05, - "loss": 0.8994, + "learning_rate": 1.3370073018611676e-05, + "loss": 0.9866, "step": 10450 }, { - "epoch": 0.296154609084984, + "epoch": 0.4089130604898662, "grad_norm": 0.0, - "learning_rate": 1.6509298336629246e-05, - "loss": 0.8872, + "learning_rate": 1.3368879890885379e-05, + "loss": 1.0665, "step": 10451 }, { - "epoch": 0.29618294652724647, + "epoch": 0.40895218718209564, "grad_norm": 0.0, - "learning_rate": 1.650860158059968e-05, - "loss": 0.9446, + "learning_rate": 1.3367686709059084e-05, + "loss": 1.155, "step": 10452 }, { - "epoch": 0.2962112839695089, + "epoch": 0.4089913138743251, "grad_norm": 0.0, - "learning_rate": 1.650790476974575e-05, - "loss": 0.8313, + "learning_rate": 1.3366493473151966e-05, + "loss": 1.142, "step": 10453 }, { - "epoch": 0.29623962141177135, + "epoch": 0.4090304405665545, "grad_norm": 0.0, - "learning_rate": 1.650720790407332e-05, - "loss": 1.0052, + "learning_rate": 1.3365300183183183e-05, + "loss": 1.063, "step": 10454 }, { - "epoch": 0.29626795885403384, + "epoch": 0.40906956725878396, "grad_norm": 0.0, - "learning_rate": 1.650651098358826e-05, - "loss": 0.8812, + "learning_rate": 1.3364106839171893e-05, + "loss": 1.083, "step": 10455 }, { - "epoch": 0.2962962962962963, + "epoch": 0.4091086939510134, "grad_norm": 0.0, - "learning_rate": 1.6505814008296446e-05, - "loss": 0.9482, + "learning_rate": 1.3362913441137267e-05, + "loss": 1.119, "step": 10456 }, { - "epoch": 0.2963246337385588, + "epoch": 0.40914782064324284, "grad_norm": 0.0, - "learning_rate": 1.650511697820374e-05, - "loss": 0.9662, + "learning_rate": 1.3361719989098458e-05, + "loss": 1.0493, "step": 10457 }, { - "epoch": 0.2963529711808212, + "epoch": 0.4091869473354723, "grad_norm": 0.0, - "learning_rate": 1.650441989331602e-05, - "loss": 0.9495, + "learning_rate": 1.3360526483074643e-05, + "loss": 1.0664, "step": 10458 }, { - "epoch": 0.2963813086230837, + "epoch": 0.4092260740277017, "grad_norm": 0.0, - "learning_rate": 1.6503722753639155e-05, - "loss": 0.8971, + "learning_rate": 1.3359332923084985e-05, + "loss": 0.9941, "step": 10459 }, { - "epoch": 0.29640964606534614, + "epoch": 0.4092652007199311, "grad_norm": 0.0, - "learning_rate": 1.650302555917902e-05, - "loss": 1.018, + "learning_rate": 1.3358139309148645e-05, + "loss": 1.1633, "step": 10460 }, { - "epoch": 0.2964379835076086, + "epoch": 0.40930432741216055, "grad_norm": 0.0, - "learning_rate": 1.6502328309941488e-05, - "loss": 0.9409, + "learning_rate": 1.33569456412848e-05, + "loss": 0.974, "step": 10461 }, { - "epoch": 0.2964663209498711, + "epoch": 0.40934345410439, "grad_norm": 0.0, - "learning_rate": 1.6501631005932425e-05, - "loss": 0.9514, + "learning_rate": 1.3355751919512614e-05, + "loss": 1.1151, "step": 10462 }, { - "epoch": 0.2964946583921335, + "epoch": 0.40938258079661943, "grad_norm": 0.0, - "learning_rate": 1.6500933647157712e-05, - "loss": 0.9128, + "learning_rate": 1.3354558143851252e-05, + "loss": 1.1639, "step": 10463 }, { - "epoch": 0.296522995834396, + "epoch": 0.4094217074888489, "grad_norm": 0.0, - "learning_rate": 1.650023623362322e-05, - "loss": 0.942, + "learning_rate": 1.3353364314319896e-05, + "loss": 1.1454, "step": 10464 }, { - "epoch": 0.29655133327665845, + "epoch": 0.4094608341810783, "grad_norm": 0.0, - "learning_rate": 1.6499538765334825e-05, - "loss": 0.9176, + "learning_rate": 1.3352170430937707e-05, + "loss": 1.0202, "step": 10465 }, { - "epoch": 0.2965796707189209, + "epoch": 0.40949996087330776, "grad_norm": 0.0, - "learning_rate": 1.64988412422984e-05, - "loss": 1.0012, + "learning_rate": 1.3350976493723864e-05, + "loss": 1.1675, "step": 10466 }, { - "epoch": 0.2966080081611834, + "epoch": 0.4095390875655372, "grad_norm": 0.0, - "learning_rate": 1.6498143664519822e-05, - "loss": 0.9506, + "learning_rate": 1.3349782502697535e-05, + "loss": 1.0865, "step": 10467 }, { - "epoch": 0.2966363456034458, + "epoch": 0.40957821425776664, "grad_norm": 0.0, - "learning_rate": 1.649744603200497e-05, - "loss": 0.8983, + "learning_rate": 1.33485884578779e-05, + "loss": 0.9377, "step": 10468 }, { - "epoch": 0.2966646830457083, + "epoch": 0.4096173409499961, "grad_norm": 0.0, - "learning_rate": 1.6496748344759715e-05, - "loss": 1.0838, + "learning_rate": 1.334739435928413e-05, + "loss": 1.0419, "step": 10469 }, { - "epoch": 0.29669302048797075, + "epoch": 0.4096564676422255, "grad_norm": 0.0, - "learning_rate": 1.6496050602789936e-05, - "loss": 0.9658, + "learning_rate": 1.3346200206935398e-05, + "loss": 1.0762, "step": 10470 }, { - "epoch": 0.29672135793023324, + "epoch": 0.40969559433445496, "grad_norm": 0.0, - "learning_rate": 1.649535280610151e-05, - "loss": 0.8289, + "learning_rate": 1.3345006000850887e-05, + "loss": 1.1515, "step": 10471 }, { - "epoch": 0.2967496953724957, + "epoch": 0.4097347210266844, "grad_norm": 0.0, - "learning_rate": 1.649465495470032e-05, - "loss": 0.9332, + "learning_rate": 1.3343811741049768e-05, + "loss": 0.9894, "step": 10472 }, { - "epoch": 0.2967780328147581, + "epoch": 0.40977384771891384, "grad_norm": 0.0, - "learning_rate": 1.6493957048592234e-05, - "loss": 0.8845, + "learning_rate": 1.3342617427551223e-05, + "loss": 1.1159, "step": 10473 }, { - "epoch": 0.2968063702570206, + "epoch": 0.4098129744111433, "grad_norm": 0.0, - "learning_rate": 1.649325908778314e-05, - "loss": 1.0227, + "learning_rate": 1.334142306037443e-05, + "loss": 0.9655, "step": 10474 }, { - "epoch": 0.29683470769928305, + "epoch": 0.4098521011033727, "grad_norm": 0.0, - "learning_rate": 1.649256107227891e-05, - "loss": 1.0481, + "learning_rate": 1.3340228639538572e-05, + "loss": 1.2433, "step": 10475 }, { - "epoch": 0.29686304514154555, + "epoch": 0.40989122779560216, "grad_norm": 0.0, - "learning_rate": 1.6491863002085428e-05, - "loss": 0.8441, + "learning_rate": 1.3339034165062827e-05, + "loss": 1.0582, "step": 10476 }, { - "epoch": 0.296891382583808, + "epoch": 0.4099303544878316, "grad_norm": 0.0, - "learning_rate": 1.6491164877208574e-05, - "loss": 1.006, + "learning_rate": 1.3337839636966377e-05, + "loss": 1.1555, "step": 10477 }, { - "epoch": 0.2969197200260704, + "epoch": 0.40996948118006105, "grad_norm": 0.0, - "learning_rate": 1.649046669765423e-05, - "loss": 1.088, + "learning_rate": 1.3336645055268405e-05, + "loss": 1.0771, "step": 10478 }, { - "epoch": 0.2969480574683329, + "epoch": 0.4100086078722905, "grad_norm": 0.0, - "learning_rate": 1.648976846342827e-05, - "loss": 0.9018, + "learning_rate": 1.3335450419988095e-05, + "loss": 1.1041, "step": 10479 }, { - "epoch": 0.29697639491059535, + "epoch": 0.41004773456451993, "grad_norm": 0.0, - "learning_rate": 1.6489070174536586e-05, - "loss": 0.954, + "learning_rate": 1.3334255731144633e-05, + "loss": 1.2028, "step": 10480 }, { - "epoch": 0.29700473235285785, + "epoch": 0.41008686125674937, "grad_norm": 0.0, - "learning_rate": 1.648837183098505e-05, - "loss": 0.9702, + "learning_rate": 1.3333060988757196e-05, + "loss": 0.982, "step": 10481 }, { - "epoch": 0.2970330697951203, + "epoch": 0.4101259879489788, "grad_norm": 0.0, - "learning_rate": 1.648767343277955e-05, - "loss": 0.9495, + "learning_rate": 1.333186619284498e-05, + "loss": 1.1779, "step": 10482 }, { - "epoch": 0.2970614072373828, + "epoch": 0.41016511464120825, "grad_norm": 0.0, - "learning_rate": 1.6486974979925968e-05, - "loss": 0.8703, + "learning_rate": 1.3330671343427169e-05, + "loss": 1.1104, "step": 10483 }, { - "epoch": 0.2970897446796452, + "epoch": 0.4102042413334377, "grad_norm": 0.0, - "learning_rate": 1.6486276472430186e-05, - "loss": 0.8482, + "learning_rate": 1.3329476440522948e-05, + "loss": 1.0827, "step": 10484 }, { - "epoch": 0.29711808212190766, + "epoch": 0.41024336802566713, "grad_norm": 0.0, - "learning_rate": 1.648557791029809e-05, - "loss": 0.9544, + "learning_rate": 1.3328281484151506e-05, + "loss": 0.87, "step": 10485 }, { - "epoch": 0.29714641956417015, + "epoch": 0.4102824947178966, "grad_norm": 0.0, - "learning_rate": 1.6484879293535562e-05, - "loss": 0.9238, + "learning_rate": 1.3327086474332037e-05, + "loss": 1.097, "step": 10486 }, { - "epoch": 0.2971747570064326, + "epoch": 0.410321621410126, "grad_norm": 0.0, - "learning_rate": 1.648418062214849e-05, - "loss": 0.9958, + "learning_rate": 1.3325891411083727e-05, + "loss": 0.998, "step": 10487 }, { - "epoch": 0.2972030944486951, + "epoch": 0.4103607481023554, "grad_norm": 0.0, - "learning_rate": 1.648348189614275e-05, - "loss": 1.0689, + "learning_rate": 1.3324696294425768e-05, + "loss": 1.197, "step": 10488 }, { - "epoch": 0.2972314318909575, + "epoch": 0.41039987479458484, "grad_norm": 0.0, - "learning_rate": 1.648278311552424e-05, - "loss": 0.9949, + "learning_rate": 1.3323501124377354e-05, + "loss": 1.0221, "step": 10489 }, { - "epoch": 0.29725976933321996, + "epoch": 0.4104390014868143, "grad_norm": 0.0, - "learning_rate": 1.648208428029884e-05, - "loss": 1.0662, + "learning_rate": 1.3322305900957675e-05, + "loss": 1.1158, "step": 10490 }, { - "epoch": 0.29728810677548245, + "epoch": 0.4104781281790437, "grad_norm": 0.0, - "learning_rate": 1.6481385390472438e-05, - "loss": 0.9107, + "learning_rate": 1.3321110624185927e-05, + "loss": 1.2158, "step": 10491 }, { - "epoch": 0.2973164442177449, + "epoch": 0.41051725487127316, "grad_norm": 0.0, - "learning_rate": 1.6480686446050916e-05, - "loss": 1.0672, + "learning_rate": 1.3319915294081303e-05, + "loss": 1.0486, "step": 10492 }, { - "epoch": 0.2973447816600074, + "epoch": 0.4105563815635026, "grad_norm": 0.0, - "learning_rate": 1.647998744704017e-05, - "loss": 0.9094, + "learning_rate": 1.3318719910663001e-05, + "loss": 1.0644, "step": 10493 }, { - "epoch": 0.2973731191022698, + "epoch": 0.41059550825573204, "grad_norm": 0.0, - "learning_rate": 1.647928839344608e-05, - "loss": 1.0302, + "learning_rate": 1.3317524473950214e-05, + "loss": 1.195, "step": 10494 }, { - "epoch": 0.2974014565445323, + "epoch": 0.4106346349479615, "grad_norm": 0.0, - "learning_rate": 1.647858928527454e-05, - "loss": 0.9135, + "learning_rate": 1.3316328983962144e-05, + "loss": 1.0641, "step": 10495 }, { - "epoch": 0.29742979398679475, + "epoch": 0.4106737616401909, "grad_norm": 0.0, - "learning_rate": 1.6477890122531433e-05, - "loss": 1.0013, + "learning_rate": 1.3315133440717982e-05, + "loss": 1.0617, "step": 10496 }, { - "epoch": 0.2974581314290572, + "epoch": 0.41071288833242037, "grad_norm": 0.0, - "learning_rate": 1.647719090522266e-05, - "loss": 0.9521, + "learning_rate": 1.3313937844236935e-05, + "loss": 1.1398, "step": 10497 }, { - "epoch": 0.2974864688713197, + "epoch": 0.4107520150246498, "grad_norm": 0.0, - "learning_rate": 1.6476491633354096e-05, - "loss": 0.8983, + "learning_rate": 1.3312742194538198e-05, + "loss": 0.9958, "step": 10498 }, { - "epoch": 0.2975148063135821, + "epoch": 0.41079114171687925, "grad_norm": 0.0, - "learning_rate": 1.647579230693164e-05, - "loss": 0.9568, + "learning_rate": 1.3311546491640969e-05, + "loss": 0.8824, "step": 10499 }, { - "epoch": 0.2975431437558446, + "epoch": 0.4108302684091087, "grad_norm": 0.0, - "learning_rate": 1.6475092925961177e-05, - "loss": 0.9736, + "learning_rate": 1.3310350735564457e-05, + "loss": 1.0649, "step": 10500 }, { - "epoch": 0.29757148119810706, + "epoch": 0.41086939510133813, "grad_norm": 0.0, - "learning_rate": 1.6474393490448607e-05, - "loss": 0.9756, + "learning_rate": 1.3309154926327859e-05, + "loss": 1.0653, "step": 10501 }, { - "epoch": 0.2975998186403695, + "epoch": 0.41090852179356757, "grad_norm": 0.0, - "learning_rate": 1.6473694000399815e-05, - "loss": 0.9875, + "learning_rate": 1.330795906395038e-05, + "loss": 1.0214, "step": 10502 }, { - "epoch": 0.297628156082632, + "epoch": 0.410947648485797, "grad_norm": 0.0, - "learning_rate": 1.6472994455820694e-05, - "loss": 0.8842, + "learning_rate": 1.3306763148451223e-05, + "loss": 1.1545, "step": 10503 }, { - "epoch": 0.2976564935248944, + "epoch": 0.41098677517802645, "grad_norm": 0.0, - "learning_rate": 1.6472294856717134e-05, - "loss": 1.0721, + "learning_rate": 1.3305567179849594e-05, + "loss": 0.9519, "step": 10504 }, { - "epoch": 0.2976848309671569, + "epoch": 0.4110259018702559, "grad_norm": 0.0, - "learning_rate": 1.6471595203095034e-05, - "loss": 0.9242, + "learning_rate": 1.3304371158164697e-05, + "loss": 1.0484, "step": 10505 }, { - "epoch": 0.29771316840941936, + "epoch": 0.41106502856248534, "grad_norm": 0.0, - "learning_rate": 1.6470895494960286e-05, - "loss": 0.9334, + "learning_rate": 1.330317508341574e-05, + "loss": 1.0089, "step": 10506 }, { - "epoch": 0.29774150585168185, + "epoch": 0.4111041552547148, "grad_norm": 0.0, - "learning_rate": 1.6470195732318784e-05, - "loss": 0.9336, + "learning_rate": 1.330197895562193e-05, + "loss": 1.1211, "step": 10507 }, { - "epoch": 0.2977698432939443, + "epoch": 0.4111432819469442, "grad_norm": 0.0, - "learning_rate": 1.6469495915176414e-05, - "loss": 1.0491, + "learning_rate": 1.3300782774802476e-05, + "loss": 1.1471, "step": 10508 }, { - "epoch": 0.29779818073620673, + "epoch": 0.41118240863917366, "grad_norm": 0.0, - "learning_rate": 1.6468796043539082e-05, - "loss": 0.9518, + "learning_rate": 1.3299586540976588e-05, + "loss": 1.1099, "step": 10509 }, { - "epoch": 0.2978265181784692, + "epoch": 0.4112215353314031, "grad_norm": 0.0, - "learning_rate": 1.6468096117412676e-05, - "loss": 0.8937, + "learning_rate": 1.3298390254163473e-05, + "loss": 1.1584, "step": 10510 }, { - "epoch": 0.29785485562073166, + "epoch": 0.41126066202363254, "grad_norm": 0.0, - "learning_rate": 1.64673961368031e-05, - "loss": 0.9984, + "learning_rate": 1.3297193914382344e-05, + "loss": 1.1017, "step": 10511 }, { - "epoch": 0.29788319306299416, + "epoch": 0.411299788715862, "grad_norm": 0.0, - "learning_rate": 1.646669610171624e-05, - "loss": 0.9524, + "learning_rate": 1.3295997521652413e-05, + "loss": 1.1384, "step": 10512 }, { - "epoch": 0.2979115305052566, + "epoch": 0.4113389154080914, "grad_norm": 0.0, - "learning_rate": 1.6465996012157996e-05, - "loss": 1.0098, + "learning_rate": 1.3294801075992892e-05, + "loss": 1.0331, "step": 10513 }, { - "epoch": 0.29793986794751903, + "epoch": 0.41137804210032086, "grad_norm": 0.0, - "learning_rate": 1.646529586813427e-05, - "loss": 0.9491, + "learning_rate": 1.3293604577422992e-05, + "loss": 1.0646, "step": 10514 }, { - "epoch": 0.2979682053897815, + "epoch": 0.4114171687925503, "grad_norm": 0.0, - "learning_rate": 1.6464595669650954e-05, - "loss": 0.977, + "learning_rate": 1.3292408025961934e-05, + "loss": 0.9855, "step": 10515 }, { - "epoch": 0.29799654283204396, + "epoch": 0.41145629548477974, "grad_norm": 0.0, - "learning_rate": 1.6463895416713952e-05, - "loss": 1.0444, + "learning_rate": 1.3291211421628924e-05, + "loss": 1.0631, "step": 10516 }, { - "epoch": 0.29802488027430646, + "epoch": 0.41149542217700913, "grad_norm": 0.0, - "learning_rate": 1.6463195109329156e-05, - "loss": 0.9176, + "learning_rate": 1.3290014764443186e-05, + "loss": 1.1486, "step": 10517 }, { - "epoch": 0.2980532177165689, + "epoch": 0.41153454886923857, "grad_norm": 0.0, - "learning_rate": 1.6462494747502467e-05, - "loss": 1.0066, + "learning_rate": 1.3288818054423933e-05, + "loss": 1.1797, "step": 10518 }, { - "epoch": 0.2980815551588314, + "epoch": 0.411573675561468, "grad_norm": 0.0, - "learning_rate": 1.6461794331239785e-05, - "loss": 0.8252, + "learning_rate": 1.3287621291590383e-05, + "loss": 1.0273, "step": 10519 }, { - "epoch": 0.29810989260109383, + "epoch": 0.41161280225369745, "grad_norm": 0.0, - "learning_rate": 1.646109386054701e-05, - "loss": 0.8702, + "learning_rate": 1.3286424475961755e-05, + "loss": 1.0822, "step": 10520 }, { - "epoch": 0.29813823004335627, + "epoch": 0.4116519289459269, "grad_norm": 0.0, - "learning_rate": 1.646039333543004e-05, - "loss": 1.067, + "learning_rate": 1.3285227607557265e-05, + "loss": 1.1175, "step": 10521 }, { - "epoch": 0.29816656748561876, + "epoch": 0.41169105563815633, "grad_norm": 0.0, - "learning_rate": 1.645969275589478e-05, - "loss": 0.8951, + "learning_rate": 1.328403068639614e-05, + "loss": 1.0457, "step": 10522 }, { - "epoch": 0.2981949049278812, + "epoch": 0.4117301823303858, "grad_norm": 0.0, - "learning_rate": 1.645899212194713e-05, - "loss": 0.8654, + "learning_rate": 1.3282833712497594e-05, + "loss": 1.1137, "step": 10523 }, { - "epoch": 0.2982232423701437, + "epoch": 0.4117693090226152, "grad_norm": 0.0, - "learning_rate": 1.645829143359299e-05, - "loss": 0.9075, + "learning_rate": 1.3281636685880855e-05, + "loss": 1.0298, "step": 10524 }, { - "epoch": 0.29825157981240613, + "epoch": 0.41180843571484466, "grad_norm": 0.0, - "learning_rate": 1.645759069083826e-05, - "loss": 0.9391, + "learning_rate": 1.3280439606565141e-05, + "loss": 1.1218, "step": 10525 }, { - "epoch": 0.29827991725466857, + "epoch": 0.4118475624070741, "grad_norm": 0.0, - "learning_rate": 1.6456889893688855e-05, - "loss": 0.9211, + "learning_rate": 1.3279242474569678e-05, + "loss": 1.1143, "step": 10526 }, { - "epoch": 0.29830825469693106, + "epoch": 0.41188668909930354, "grad_norm": 0.0, - "learning_rate": 1.645618904215066e-05, - "loss": 0.9199, + "learning_rate": 1.3278045289913693e-05, + "loss": 0.9869, "step": 10527 }, { - "epoch": 0.2983365921391935, + "epoch": 0.411925815791533, "grad_norm": 0.0, - "learning_rate": 1.6455488136229592e-05, - "loss": 1.0012, + "learning_rate": 1.3276848052616405e-05, + "loss": 1.0596, "step": 10528 }, { - "epoch": 0.298364929581456, + "epoch": 0.4119649424837624, "grad_norm": 0.0, - "learning_rate": 1.6454787175931547e-05, - "loss": 1.0029, + "learning_rate": 1.3275650762697043e-05, + "loss": 1.1321, "step": 10529 }, { - "epoch": 0.29839326702371843, + "epoch": 0.41200406917599186, "grad_norm": 0.0, - "learning_rate": 1.6454086161262436e-05, - "loss": 0.9487, + "learning_rate": 1.3274453420174835e-05, + "loss": 1.0134, "step": 10530 }, { - "epoch": 0.2984216044659809, + "epoch": 0.4120431958682213, "grad_norm": 0.0, - "learning_rate": 1.645338509222816e-05, - "loss": 1.0363, + "learning_rate": 1.327325602506901e-05, + "loss": 0.9236, "step": 10531 }, { - "epoch": 0.29844994190824337, + "epoch": 0.41208232256045074, "grad_norm": 0.0, - "learning_rate": 1.645268396883462e-05, - "loss": 0.9528, + "learning_rate": 1.3272058577398792e-05, + "loss": 1.1233, "step": 10532 }, { - "epoch": 0.2984782793505058, + "epoch": 0.4121214492526802, "grad_norm": 0.0, - "learning_rate": 1.645198279108773e-05, - "loss": 0.9228, + "learning_rate": 1.3270861077183416e-05, + "loss": 1.1288, "step": 10533 }, { - "epoch": 0.2985066167927683, + "epoch": 0.4121605759449096, "grad_norm": 0.0, - "learning_rate": 1.6451281558993394e-05, - "loss": 0.9247, + "learning_rate": 1.326966352444211e-05, + "loss": 1.0878, "step": 10534 }, { - "epoch": 0.29853495423503074, + "epoch": 0.41219970263713906, "grad_norm": 0.0, - "learning_rate": 1.6450580272557516e-05, - "loss": 0.9135, + "learning_rate": 1.3268465919194103e-05, + "loss": 1.1001, "step": 10535 }, { - "epoch": 0.29856329167729323, + "epoch": 0.4122388293293685, "grad_norm": 0.0, - "learning_rate": 1.6449878931786007e-05, - "loss": 1.0192, + "learning_rate": 1.3267268261458628e-05, + "loss": 1.1353, "step": 10536 }, { - "epoch": 0.29859162911955567, + "epoch": 0.41227795602159795, "grad_norm": 0.0, - "learning_rate": 1.644917753668477e-05, - "loss": 0.9124, + "learning_rate": 1.3266070551254922e-05, + "loss": 1.1681, "step": 10537 }, { - "epoch": 0.2986199665618181, + "epoch": 0.4123170827138274, "grad_norm": 0.0, - "learning_rate": 1.644847608725972e-05, - "loss": 0.882, + "learning_rate": 1.3264872788602215e-05, + "loss": 0.9735, "step": 10538 }, { - "epoch": 0.2986483040040806, + "epoch": 0.41235620940605683, "grad_norm": 0.0, - "learning_rate": 1.6447774583516756e-05, - "loss": 1.0104, + "learning_rate": 1.3263674973519737e-05, + "loss": 1.1335, "step": 10539 }, { - "epoch": 0.29867664144634304, + "epoch": 0.41239533609828627, "grad_norm": 0.0, - "learning_rate": 1.6447073025461797e-05, - "loss": 0.9783, + "learning_rate": 1.3262477106026737e-05, + "loss": 1.1416, "step": 10540 }, { - "epoch": 0.29870497888860553, + "epoch": 0.4124344627905157, "grad_norm": 0.0, - "learning_rate": 1.6446371413100746e-05, - "loss": 0.8413, + "learning_rate": 1.3261279186142435e-05, + "loss": 0.9856, "step": 10541 }, { - "epoch": 0.29873331633086797, + "epoch": 0.41247358948274515, "grad_norm": 0.0, - "learning_rate": 1.6445669746439514e-05, - "loss": 0.8907, + "learning_rate": 1.326008121388608e-05, + "loss": 0.965, "step": 10542 }, { - "epoch": 0.29876165377313046, + "epoch": 0.4125127161749746, "grad_norm": 0.0, - "learning_rate": 1.6444968025484015e-05, - "loss": 0.8365, + "learning_rate": 1.3258883189276906e-05, + "loss": 1.0952, "step": 10543 }, { - "epoch": 0.2987899912153929, + "epoch": 0.41255184286720403, "grad_norm": 0.0, - "learning_rate": 1.644426625024015e-05, - "loss": 0.8401, + "learning_rate": 1.3257685112334149e-05, + "loss": 1.089, "step": 10544 }, { - "epoch": 0.29881832865765534, + "epoch": 0.4125909695594334, "grad_norm": 0.0, - "learning_rate": 1.6443564420713846e-05, - "loss": 0.8677, + "learning_rate": 1.3256486983077055e-05, + "loss": 1.0134, "step": 10545 }, { - "epoch": 0.29884666609991783, + "epoch": 0.41263009625166286, "grad_norm": 0.0, - "learning_rate": 1.6442862536911e-05, - "loss": 0.9232, + "learning_rate": 1.325528880152486e-05, + "loss": 1.1746, "step": 10546 }, { - "epoch": 0.2988750035421803, + "epoch": 0.4126692229438923, "grad_norm": 0.0, - "learning_rate": 1.6442160598837532e-05, - "loss": 0.9879, + "learning_rate": 1.3254090567696802e-05, + "loss": 1.0415, "step": 10547 }, { - "epoch": 0.29890334098444277, + "epoch": 0.41270834963612174, "grad_norm": 0.0, - "learning_rate": 1.6441458606499355e-05, - "loss": 1.0066, + "learning_rate": 1.325289228161213e-05, + "loss": 1.1668, "step": 10548 }, { - "epoch": 0.2989316784267052, + "epoch": 0.4127474763283512, "grad_norm": 0.0, - "learning_rate": 1.644075655990238e-05, - "loss": 0.9769, + "learning_rate": 1.3251693943290084e-05, + "loss": 1.0587, "step": 10549 }, { - "epoch": 0.29896001586896764, + "epoch": 0.4127866030205806, "grad_norm": 0.0, - "learning_rate": 1.644005445905252e-05, - "loss": 0.803, + "learning_rate": 1.325049555274991e-05, + "loss": 1.1442, "step": 10550 }, { - "epoch": 0.29898835331123014, + "epoch": 0.41282572971281006, "grad_norm": 0.0, - "learning_rate": 1.643935230395569e-05, - "loss": 0.9092, + "learning_rate": 1.3249297110010847e-05, + "loss": 0.9775, "step": 10551 }, { - "epoch": 0.2990166907534926, + "epoch": 0.4128648564050395, "grad_norm": 0.0, - "learning_rate": 1.6438650094617804e-05, - "loss": 0.9991, + "learning_rate": 1.3248098615092144e-05, + "loss": 1.1205, "step": 10552 }, { - "epoch": 0.29904502819575507, + "epoch": 0.41290398309726895, "grad_norm": 0.0, - "learning_rate": 1.6437947831044776e-05, - "loss": 0.9924, + "learning_rate": 1.324690006801305e-05, + "loss": 1.146, "step": 10553 }, { - "epoch": 0.2990733656380175, + "epoch": 0.4129431097894984, "grad_norm": 0.0, - "learning_rate": 1.6437245513242523e-05, - "loss": 0.965, + "learning_rate": 1.3245701468792804e-05, + "loss": 1.126, "step": 10554 }, { - "epoch": 0.29910170308028, + "epoch": 0.4129822364817278, "grad_norm": 0.0, - "learning_rate": 1.6436543141216962e-05, - "loss": 0.8928, + "learning_rate": 1.3244502817450666e-05, + "loss": 1.0894, "step": 10555 }, { - "epoch": 0.29913004052254244, + "epoch": 0.41302136317395727, "grad_norm": 0.0, - "learning_rate": 1.6435840714974008e-05, - "loss": 0.8861, + "learning_rate": 1.3243304114005878e-05, + "loss": 1.1645, "step": 10556 }, { - "epoch": 0.2991583779648049, + "epoch": 0.4130604898661867, "grad_norm": 0.0, - "learning_rate": 1.643513823451958e-05, - "loss": 1.0424, + "learning_rate": 1.3242105358477684e-05, + "loss": 1.0992, "step": 10557 }, { - "epoch": 0.29918671540706737, + "epoch": 0.41309961655841615, "grad_norm": 0.0, - "learning_rate": 1.643443569985959e-05, - "loss": 0.9012, + "learning_rate": 1.3240906550885347e-05, + "loss": 1.0434, "step": 10558 }, { - "epoch": 0.2992150528493298, + "epoch": 0.4131387432506456, "grad_norm": 0.0, - "learning_rate": 1.6433733110999956e-05, - "loss": 0.9148, + "learning_rate": 1.3239707691248107e-05, + "loss": 1.0515, "step": 10559 }, { - "epoch": 0.2992433902915923, + "epoch": 0.41317786994287503, "grad_norm": 0.0, - "learning_rate": 1.6433030467946603e-05, - "loss": 0.9818, + "learning_rate": 1.3238508779585224e-05, + "loss": 0.9972, "step": 10560 }, { - "epoch": 0.29927172773385474, + "epoch": 0.41321699663510447, "grad_norm": 0.0, - "learning_rate": 1.643232777070545e-05, - "loss": 1.0291, + "learning_rate": 1.3237309815915946e-05, + "loss": 1.0951, "step": 10561 }, { - "epoch": 0.2993000651761172, + "epoch": 0.4132561233273339, "grad_norm": 0.0, - "learning_rate": 1.6431625019282402e-05, - "loss": 1.0474, + "learning_rate": 1.3236110800259531e-05, + "loss": 1.1014, "step": 10562 }, { - "epoch": 0.2993284026183797, + "epoch": 0.41329525001956335, "grad_norm": 0.0, - "learning_rate": 1.6430922213683393e-05, - "loss": 1.0383, + "learning_rate": 1.323491173263523e-05, + "loss": 1.1254, "step": 10563 }, { - "epoch": 0.2993567400606421, + "epoch": 0.4133343767117928, "grad_norm": 0.0, - "learning_rate": 1.6430219353914336e-05, - "loss": 1.1289, + "learning_rate": 1.3233712613062301e-05, + "loss": 1.1533, "step": 10564 }, { - "epoch": 0.2993850775029046, + "epoch": 0.41337350340402224, "grad_norm": 0.0, - "learning_rate": 1.6429516439981157e-05, - "loss": 0.9887, + "learning_rate": 1.323251344156e-05, + "loss": 1.0887, "step": 10565 }, { - "epoch": 0.29941341494516704, + "epoch": 0.4134126300962517, "grad_norm": 0.0, - "learning_rate": 1.642881347188977e-05, - "loss": 1.0036, + "learning_rate": 1.3231314218147584e-05, + "loss": 1.0495, "step": 10566 }, { - "epoch": 0.2994417523874295, + "epoch": 0.4134517567884811, "grad_norm": 0.0, - "learning_rate": 1.64281104496461e-05, - "loss": 0.9639, + "learning_rate": 1.323011494284431e-05, + "loss": 1.0406, "step": 10567 }, { - "epoch": 0.299470089829692, + "epoch": 0.41349088348071056, "grad_norm": 0.0, - "learning_rate": 1.6427407373256073e-05, - "loss": 0.962, + "learning_rate": 1.3228915615669436e-05, + "loss": 1.0503, "step": 10568 }, { - "epoch": 0.2994984272719544, + "epoch": 0.41353001017294, "grad_norm": 0.0, - "learning_rate": 1.6426704242725603e-05, - "loss": 1.0018, + "learning_rate": 1.3227716236642226e-05, + "loss": 1.1504, "step": 10569 }, { - "epoch": 0.2995267647142169, + "epoch": 0.41356913686516944, "grad_norm": 0.0, - "learning_rate": 1.6426001058060616e-05, - "loss": 1.0336, + "learning_rate": 1.3226516805781934e-05, + "loss": 1.135, "step": 10570 }, { - "epoch": 0.29955510215647935, + "epoch": 0.4136082635573989, "grad_norm": 0.0, - "learning_rate": 1.642529781926704e-05, - "loss": 1.0369, + "learning_rate": 1.322531732310783e-05, + "loss": 1.1162, "step": 10571 }, { - "epoch": 0.29958343959874184, + "epoch": 0.4136473902496283, "grad_norm": 0.0, - "learning_rate": 1.642459452635079e-05, - "loss": 0.935, + "learning_rate": 1.3224117788639168e-05, + "loss": 1.1016, "step": 10572 }, { - "epoch": 0.2996117770410043, + "epoch": 0.41368651694185776, "grad_norm": 0.0, - "learning_rate": 1.6423891179317796e-05, - "loss": 0.8889, + "learning_rate": 1.3222918202395217e-05, + "loss": 1.1149, "step": 10573 }, { - "epoch": 0.2996401144832667, + "epoch": 0.41372564363408715, "grad_norm": 0.0, - "learning_rate": 1.6423187778173983e-05, - "loss": 1.0221, + "learning_rate": 1.3221718564395234e-05, + "loss": 1.162, "step": 10574 }, { - "epoch": 0.2996684519255292, + "epoch": 0.4137647703263166, "grad_norm": 0.0, - "learning_rate": 1.642248432292527e-05, - "loss": 0.9488, + "learning_rate": 1.3220518874658492e-05, + "loss": 1.0191, "step": 10575 }, { - "epoch": 0.29969678936779165, + "epoch": 0.41380389701854603, "grad_norm": 0.0, - "learning_rate": 1.6421780813577593e-05, - "loss": 0.8737, + "learning_rate": 1.3219319133204251e-05, + "loss": 0.9699, "step": 10576 }, { - "epoch": 0.29972512681005414, + "epoch": 0.41384302371077547, "grad_norm": 0.0, - "learning_rate": 1.6421077250136867e-05, - "loss": 1.1403, + "learning_rate": 1.3218119340051778e-05, + "loss": 1.1317, "step": 10577 }, { - "epoch": 0.2997534642523166, + "epoch": 0.4138821504030049, "grad_norm": 0.0, - "learning_rate": 1.6420373632609026e-05, - "loss": 0.9043, + "learning_rate": 1.3216919495220344e-05, + "loss": 1.1417, "step": 10578 }, { - "epoch": 0.299781801694579, + "epoch": 0.41392127709523435, "grad_norm": 0.0, - "learning_rate": 1.641966996099999e-05, - "loss": 0.9394, + "learning_rate": 1.3215719598729208e-05, + "loss": 1.1302, "step": 10579 }, { - "epoch": 0.2998101391368415, + "epoch": 0.4139604037874638, "grad_norm": 0.0, - "learning_rate": 1.6418966235315694e-05, - "loss": 0.9214, + "learning_rate": 1.321451965059765e-05, + "loss": 1.1019, "step": 10580 }, { - "epoch": 0.29983847657910395, + "epoch": 0.41399953047969323, "grad_norm": 0.0, - "learning_rate": 1.641826245556206e-05, - "loss": 0.9013, + "learning_rate": 1.321331965084493e-05, + "loss": 1.098, "step": 10581 }, { - "epoch": 0.29986681402136645, + "epoch": 0.4140386571719227, "grad_norm": 0.0, - "learning_rate": 1.6417558621745014e-05, - "loss": 0.9654, + "learning_rate": 1.3212119599490327e-05, + "loss": 1.0141, "step": 10582 }, { - "epoch": 0.2998951514636289, + "epoch": 0.4140777838641521, "grad_norm": 0.0, - "learning_rate": 1.641685473387049e-05, - "loss": 0.9369, + "learning_rate": 1.3210919496553107e-05, + "loss": 1.1039, "step": 10583 }, { - "epoch": 0.2999234889058914, + "epoch": 0.41411691055638156, "grad_norm": 0.0, - "learning_rate": 1.6416150791944422e-05, - "loss": 0.9447, + "learning_rate": 1.3209719342052545e-05, + "loss": 1.1494, "step": 10584 }, { - "epoch": 0.2999518263481538, + "epoch": 0.414156037248611, "grad_norm": 0.0, - "learning_rate": 1.641544679597273e-05, - "loss": 1.0223, + "learning_rate": 1.3208519136007912e-05, + "loss": 1.0414, "step": 10585 }, { - "epoch": 0.29998016379041625, + "epoch": 0.41419516394084044, "grad_norm": 0.0, - "learning_rate": 1.6414742745961346e-05, - "loss": 0.9688, + "learning_rate": 1.3207318878438478e-05, + "loss": 1.0368, "step": 10586 }, { - "epoch": 0.30000850123267875, + "epoch": 0.4142342906330699, "grad_norm": 0.0, - "learning_rate": 1.64140386419162e-05, - "loss": 1.0268, + "learning_rate": 1.3206118569363526e-05, + "loss": 1.1293, "step": 10587 }, { - "epoch": 0.3000368386749412, + "epoch": 0.4142734173252993, "grad_norm": 0.0, - "learning_rate": 1.6413334483843225e-05, - "loss": 1.087, + "learning_rate": 1.3204918208802324e-05, + "loss": 1.0925, "step": 10588 }, { - "epoch": 0.3000651761172037, + "epoch": 0.41431254401752876, "grad_norm": 0.0, - "learning_rate": 1.6412630271748354e-05, - "loss": 0.9288, + "learning_rate": 1.3203717796774154e-05, + "loss": 1.0927, "step": 10589 }, { - "epoch": 0.3000935135594661, + "epoch": 0.4143516707097582, "grad_norm": 0.0, - "learning_rate": 1.6411926005637518e-05, - "loss": 0.9505, + "learning_rate": 1.3202517333298292e-05, + "loss": 1.0105, "step": 10590 }, { - "epoch": 0.30012185100172856, + "epoch": 0.41439079740198764, "grad_norm": 0.0, - "learning_rate": 1.641122168551665e-05, - "loss": 0.956, + "learning_rate": 1.3201316818394012e-05, + "loss": 1.1331, "step": 10591 }, { - "epoch": 0.30015018844399105, + "epoch": 0.4144299240942171, "grad_norm": 0.0, - "learning_rate": 1.6410517311391674e-05, - "loss": 1.0135, + "learning_rate": 1.3200116252080596e-05, + "loss": 1.0226, "step": 10592 }, { - "epoch": 0.3001785258862535, + "epoch": 0.4144690507864465, "grad_norm": 0.0, - "learning_rate": 1.6409812883268535e-05, - "loss": 0.9538, + "learning_rate": 1.3198915634377326e-05, + "loss": 1.1069, "step": 10593 }, { - "epoch": 0.300206863328516, + "epoch": 0.41450817747867597, "grad_norm": 0.0, - "learning_rate": 1.6409108401153164e-05, - "loss": 0.8882, + "learning_rate": 1.3197714965303474e-05, + "loss": 1.1693, "step": 10594 }, { - "epoch": 0.3002352007707784, + "epoch": 0.4145473041709054, "grad_norm": 0.0, - "learning_rate": 1.640840386505149e-05, - "loss": 0.9035, + "learning_rate": 1.319651424487833e-05, + "loss": 1.1247, "step": 10595 }, { - "epoch": 0.3002635382130409, + "epoch": 0.41458643086313485, "grad_norm": 0.0, - "learning_rate": 1.640769927496945e-05, - "loss": 0.955, + "learning_rate": 1.3195313473121172e-05, + "loss": 1.1273, "step": 10596 }, { - "epoch": 0.30029187565530335, + "epoch": 0.4146255575553643, "grad_norm": 0.0, - "learning_rate": 1.640699463091298e-05, - "loss": 0.9035, + "learning_rate": 1.3194112650051282e-05, + "loss": 1.1973, "step": 10597 }, { - "epoch": 0.3003202130975658, + "epoch": 0.41466468424759373, "grad_norm": 0.0, - "learning_rate": 1.6406289932888016e-05, - "loss": 0.8732, + "learning_rate": 1.3192911775687949e-05, + "loss": 1.0969, "step": 10598 }, { - "epoch": 0.3003485505398283, + "epoch": 0.41470381093982317, "grad_norm": 0.0, - "learning_rate": 1.640558518090049e-05, - "loss": 0.94, + "learning_rate": 1.319171085005045e-05, + "loss": 1.1185, "step": 10599 }, { - "epoch": 0.3003768879820907, + "epoch": 0.4147429376320526, "grad_norm": 0.0, - "learning_rate": 1.6404880374956347e-05, - "loss": 0.97, + "learning_rate": 1.3190509873158076e-05, + "loss": 1.1541, "step": 10600 }, { - "epoch": 0.3004052254243532, + "epoch": 0.41478206432428205, "grad_norm": 0.0, - "learning_rate": 1.6404175515061514e-05, - "loss": 0.8973, + "learning_rate": 1.3189308845030109e-05, + "loss": 1.1302, "step": 10601 }, { - "epoch": 0.30043356286661566, + "epoch": 0.41482119101651144, "grad_norm": 0.0, - "learning_rate": 1.6403470601221934e-05, - "loss": 0.997, + "learning_rate": 1.3188107765685842e-05, + "loss": 1.0368, "step": 10602 }, { - "epoch": 0.3004619003088781, + "epoch": 0.4148603177087409, "grad_norm": 0.0, - "learning_rate": 1.6402765633443546e-05, - "loss": 0.9746, + "learning_rate": 1.3186906635144556e-05, + "loss": 1.0108, "step": 10603 }, { - "epoch": 0.3004902377511406, + "epoch": 0.4148994444009703, "grad_norm": 0.0, - "learning_rate": 1.640206061173228e-05, - "loss": 1.0455, + "learning_rate": 1.3185705453425546e-05, + "loss": 1.0775, "step": 10604 }, { - "epoch": 0.300518575193403, + "epoch": 0.41493857109319976, "grad_norm": 0.0, - "learning_rate": 1.640135553609408e-05, - "loss": 1.017, + "learning_rate": 1.3184504220548097e-05, + "loss": 0.9739, "step": 10605 }, { - "epoch": 0.3005469126356655, + "epoch": 0.4149776977854292, "grad_norm": 0.0, - "learning_rate": 1.640065040653489e-05, - "loss": 0.985, + "learning_rate": 1.3183302936531499e-05, + "loss": 1.1293, "step": 10606 }, { - "epoch": 0.30057525007792796, + "epoch": 0.41501682447765864, "grad_norm": 0.0, - "learning_rate": 1.639994522306064e-05, - "loss": 0.9168, + "learning_rate": 1.3182101601395047e-05, + "loss": 1.192, "step": 10607 }, { - "epoch": 0.30060358752019045, + "epoch": 0.4150559511698881, "grad_norm": 0.0, - "learning_rate": 1.639923998567728e-05, - "loss": 1.0319, + "learning_rate": 1.3180900215158028e-05, + "loss": 1.0889, "step": 10608 }, { - "epoch": 0.3006319249624529, + "epoch": 0.4150950778621175, "grad_norm": 0.0, - "learning_rate": 1.639853469439074e-05, - "loss": 1.0194, + "learning_rate": 1.3179698777839742e-05, + "loss": 1.1627, "step": 10609 }, { - "epoch": 0.30066026240471533, + "epoch": 0.41513420455434696, "grad_norm": 0.0, - "learning_rate": 1.6397829349206968e-05, - "loss": 1.0215, + "learning_rate": 1.3178497289459474e-05, + "loss": 1.1276, "step": 10610 }, { - "epoch": 0.3006885998469778, + "epoch": 0.4151733312465764, "grad_norm": 0.0, - "learning_rate": 1.6397123950131906e-05, - "loss": 0.9724, + "learning_rate": 1.3177295750036522e-05, + "loss": 1.1494, "step": 10611 }, { - "epoch": 0.30071693728924026, + "epoch": 0.41521245793880585, "grad_norm": 0.0, - "learning_rate": 1.6396418497171488e-05, - "loss": 0.9183, + "learning_rate": 1.3176094159590185e-05, + "loss": 1.1954, "step": 10612 }, { - "epoch": 0.30074527473150275, + "epoch": 0.4152515846310353, "grad_norm": 0.0, - "learning_rate": 1.639571299033167e-05, - "loss": 0.9338, + "learning_rate": 1.3174892518139752e-05, + "loss": 0.9987, "step": 10613 }, { - "epoch": 0.3007736121737652, + "epoch": 0.4152907113232647, "grad_norm": 0.0, - "learning_rate": 1.639500742961838e-05, - "loss": 0.9458, + "learning_rate": 1.3173690825704529e-05, + "loss": 1.0295, "step": 10614 }, { - "epoch": 0.30080194961602763, + "epoch": 0.41532983801549417, "grad_norm": 0.0, - "learning_rate": 1.639430181503757e-05, - "loss": 0.9967, + "learning_rate": 1.3172489082303802e-05, + "loss": 1.1039, "step": 10615 }, { - "epoch": 0.3008302870582901, + "epoch": 0.4153689647077236, "grad_norm": 0.0, - "learning_rate": 1.639359614659518e-05, - "loss": 0.9284, + "learning_rate": 1.317128728795688e-05, + "loss": 1.1323, "step": 10616 }, { - "epoch": 0.30085862450055256, + "epoch": 0.41540809139995305, "grad_norm": 0.0, - "learning_rate": 1.639289042429716e-05, - "loss": 0.9451, + "learning_rate": 1.3170085442683056e-05, + "loss": 1.0254, "step": 10617 }, { - "epoch": 0.30088696194281506, + "epoch": 0.4154472180921825, "grad_norm": 0.0, - "learning_rate": 1.6392184648149443e-05, - "loss": 1.0424, + "learning_rate": 1.3168883546501631e-05, + "loss": 1.0254, "step": 10618 }, { - "epoch": 0.3009152993850775, + "epoch": 0.41548634478441193, "grad_norm": 0.0, - "learning_rate": 1.6391478818157987e-05, - "loss": 0.929, + "learning_rate": 1.3167681599431909e-05, + "loss": 1.0885, "step": 10619 }, { - "epoch": 0.30094363682734, + "epoch": 0.4155254714766414, "grad_norm": 0.0, - "learning_rate": 1.6390772934328728e-05, - "loss": 0.9684, + "learning_rate": 1.316647960149319e-05, + "loss": 1.0959, "step": 10620 }, { - "epoch": 0.3009719742696024, + "epoch": 0.4155645981688708, "grad_norm": 0.0, - "learning_rate": 1.6390066996667617e-05, - "loss": 0.9247, + "learning_rate": 1.3165277552704774e-05, + "loss": 0.9682, "step": 10621 }, { - "epoch": 0.30100031171186487, + "epoch": 0.41560372486110025, "grad_norm": 0.0, - "learning_rate": 1.63893610051806e-05, - "loss": 0.9508, + "learning_rate": 1.316407545308597e-05, + "loss": 0.9606, "step": 10622 }, { - "epoch": 0.30102864915412736, + "epoch": 0.4156428515533297, "grad_norm": 0.0, - "learning_rate": 1.638865495987362e-05, - "loss": 1.031, + "learning_rate": 1.3162873302656077e-05, + "loss": 1.0463, "step": 10623 }, { - "epoch": 0.3010569865963898, + "epoch": 0.41568197824555914, "grad_norm": 0.0, - "learning_rate": 1.638794886075263e-05, - "loss": 0.9285, + "learning_rate": 1.3161671101434403e-05, + "loss": 1.0139, "step": 10624 }, { - "epoch": 0.3010853240386523, + "epoch": 0.4157211049377886, "grad_norm": 0.0, - "learning_rate": 1.638724270782357e-05, - "loss": 1.0356, + "learning_rate": 1.3160468849440253e-05, + "loss": 0.9686, "step": 10625 }, { - "epoch": 0.30111366148091473, + "epoch": 0.415760231630018, "grad_norm": 0.0, - "learning_rate": 1.6386536501092398e-05, - "loss": 0.8371, + "learning_rate": 1.3159266546692933e-05, + "loss": 1.1225, "step": 10626 }, { - "epoch": 0.30114199892317717, + "epoch": 0.41579935832224746, "grad_norm": 0.0, - "learning_rate": 1.6385830240565052e-05, - "loss": 0.8682, + "learning_rate": 1.3158064193211753e-05, + "loss": 1.0477, "step": 10627 }, { - "epoch": 0.30117033636543966, + "epoch": 0.4158384850144769, "grad_norm": 0.0, - "learning_rate": 1.638512392624749e-05, - "loss": 0.9183, + "learning_rate": 1.3156861789016016e-05, + "loss": 1.0999, "step": 10628 }, { - "epoch": 0.3011986738077021, + "epoch": 0.41587761170670634, "grad_norm": 0.0, - "learning_rate": 1.6384417558145654e-05, - "loss": 0.8079, + "learning_rate": 1.3155659334125037e-05, + "loss": 1.0888, "step": 10629 }, { - "epoch": 0.3012270112499646, + "epoch": 0.4159167383989358, "grad_norm": 0.0, - "learning_rate": 1.6383711136265504e-05, - "loss": 0.9611, + "learning_rate": 1.3154456828558119e-05, + "loss": 1.0509, "step": 10630 }, { - "epoch": 0.30125534869222703, + "epoch": 0.41595586509116517, "grad_norm": 0.0, - "learning_rate": 1.6383004660612983e-05, - "loss": 0.9403, + "learning_rate": 1.3153254272334583e-05, + "loss": 0.9993, "step": 10631 }, { - "epoch": 0.3012836861344895, + "epoch": 0.4159949917833946, "grad_norm": 0.0, - "learning_rate": 1.638229813119404e-05, - "loss": 0.9599, + "learning_rate": 1.3152051665473732e-05, + "loss": 0.9445, "step": 10632 }, { - "epoch": 0.30131202357675196, + "epoch": 0.41603411847562405, "grad_norm": 0.0, - "learning_rate": 1.638159154801463e-05, - "loss": 0.9246, + "learning_rate": 1.3150849007994882e-05, + "loss": 1.0575, "step": 10633 }, { - "epoch": 0.3013403610190144, + "epoch": 0.4160732451678535, "grad_norm": 0.0, - "learning_rate": 1.638088491108071e-05, - "loss": 0.9595, + "learning_rate": 1.3149646299917342e-05, + "loss": 1.2632, "step": 10634 }, { - "epoch": 0.3013686984612769, + "epoch": 0.41611237186008293, "grad_norm": 0.0, - "learning_rate": 1.6380178220398226e-05, - "loss": 1.0732, + "learning_rate": 1.3148443541260433e-05, + "loss": 1.0763, "step": 10635 }, { - "epoch": 0.30139703590353933, + "epoch": 0.41615149855231237, "grad_norm": 0.0, - "learning_rate": 1.6379471475973128e-05, - "loss": 0.9871, + "learning_rate": 1.3147240732043466e-05, + "loss": 1.0286, "step": 10636 }, { - "epoch": 0.30142537334580183, + "epoch": 0.4161906252445418, "grad_norm": 0.0, - "learning_rate": 1.6378764677811375e-05, - "loss": 0.9574, + "learning_rate": 1.3146037872285753e-05, + "loss": 0.9878, "step": 10637 }, { - "epoch": 0.30145371078806427, + "epoch": 0.41622975193677125, "grad_norm": 0.0, - "learning_rate": 1.6378057825918917e-05, - "loss": 0.9919, + "learning_rate": 1.3144834962006619e-05, + "loss": 1.063, "step": 10638 }, { - "epoch": 0.3014820482303267, + "epoch": 0.4162688786290007, "grad_norm": 0.0, - "learning_rate": 1.637735092030171e-05, - "loss": 0.9596, + "learning_rate": 1.314363200122537e-05, + "loss": 1.1452, "step": 10639 }, { - "epoch": 0.3015103856725892, + "epoch": 0.41630800532123013, "grad_norm": 0.0, - "learning_rate": 1.6376643960965712e-05, - "loss": 0.9789, + "learning_rate": 1.3142428989961336e-05, + "loss": 1.1417, "step": 10640 }, { - "epoch": 0.30153872311485164, + "epoch": 0.4163471320134596, "grad_norm": 0.0, - "learning_rate": 1.6375936947916867e-05, - "loss": 1.0262, + "learning_rate": 1.3141225928233826e-05, + "loss": 1.2387, "step": 10641 }, { - "epoch": 0.30156706055711413, + "epoch": 0.416386258705689, "grad_norm": 0.0, - "learning_rate": 1.637522988116114e-05, - "loss": 0.9958, + "learning_rate": 1.3140022816062166e-05, + "loss": 1.1024, "step": 10642 }, { - "epoch": 0.30159539799937657, + "epoch": 0.41642538539791846, "grad_norm": 0.0, - "learning_rate": 1.6374522760704485e-05, - "loss": 0.9812, + "learning_rate": 1.3138819653465674e-05, + "loss": 1.0799, "step": 10643 }, { - "epoch": 0.30162373544163906, + "epoch": 0.4164645120901479, "grad_norm": 0.0, - "learning_rate": 1.637381558655286e-05, - "loss": 0.9462, + "learning_rate": 1.313761644046367e-05, + "loss": 1.045, "step": 10644 }, { - "epoch": 0.3016520728839015, + "epoch": 0.41650363878237734, "grad_norm": 0.0, - "learning_rate": 1.6373108358712215e-05, - "loss": 0.9177, + "learning_rate": 1.3136413177075479e-05, + "loss": 1.0896, "step": 10645 }, { - "epoch": 0.30168041032616394, + "epoch": 0.4165427654746068, "grad_norm": 0.0, - "learning_rate": 1.6372401077188515e-05, - "loss": 0.9516, + "learning_rate": 1.3135209863320422e-05, + "loss": 1.0924, "step": 10646 }, { - "epoch": 0.30170874776842643, + "epoch": 0.4165818921668362, "grad_norm": 0.0, - "learning_rate": 1.6371693741987714e-05, - "loss": 0.9693, + "learning_rate": 1.3134006499217824e-05, + "loss": 1.0104, "step": 10647 }, { - "epoch": 0.30173708521068887, + "epoch": 0.41662101885906566, "grad_norm": 0.0, - "learning_rate": 1.637098635311577e-05, - "loss": 0.9492, + "learning_rate": 1.3132803084787008e-05, + "loss": 1.1694, "step": 10648 }, { - "epoch": 0.30176542265295137, + "epoch": 0.4166601455512951, "grad_norm": 0.0, - "learning_rate": 1.6370278910578644e-05, - "loss": 0.9911, + "learning_rate": 1.31315996200473e-05, + "loss": 1.0929, "step": 10649 }, { - "epoch": 0.3017937600952138, + "epoch": 0.41669927224352454, "grad_norm": 0.0, - "learning_rate": 1.6369571414382288e-05, - "loss": 1.0786, + "learning_rate": 1.3130396105018024e-05, + "loss": 1.0343, "step": 10650 }, { - "epoch": 0.30182209753747624, + "epoch": 0.416738398935754, "grad_norm": 0.0, - "learning_rate": 1.636886386453267e-05, - "loss": 0.9583, + "learning_rate": 1.3129192539718514e-05, + "loss": 1.1236, "step": 10651 }, { - "epoch": 0.30185043497973874, + "epoch": 0.4167775256279834, "grad_norm": 0.0, - "learning_rate": 1.6368156261035747e-05, - "loss": 0.9339, + "learning_rate": 1.312798892416809e-05, + "loss": 1.2426, "step": 10652 }, { - "epoch": 0.3018787724220012, + "epoch": 0.41681665232021287, "grad_norm": 0.0, - "learning_rate": 1.636744860389748e-05, - "loss": 1.0394, + "learning_rate": 1.3126785258386083e-05, + "loss": 1.0703, "step": 10653 }, { - "epoch": 0.30190710986426367, + "epoch": 0.4168557790124423, "grad_norm": 0.0, - "learning_rate": 1.6366740893123828e-05, - "loss": 0.9786, + "learning_rate": 1.3125581542391825e-05, + "loss": 1.1004, "step": 10654 }, { - "epoch": 0.3019354473065261, + "epoch": 0.41689490570467175, "grad_norm": 0.0, - "learning_rate": 1.636603312872075e-05, - "loss": 0.9535, + "learning_rate": 1.3124377776204641e-05, + "loss": 0.9923, "step": 10655 }, { - "epoch": 0.3019637847487886, + "epoch": 0.4169340323969012, "grad_norm": 0.0, - "learning_rate": 1.6365325310694215e-05, - "loss": 0.9673, + "learning_rate": 1.312317395984387e-05, + "loss": 1.1691, "step": 10656 }, { - "epoch": 0.30199212219105104, + "epoch": 0.41697315908913063, "grad_norm": 0.0, - "learning_rate": 1.636461743905018e-05, - "loss": 0.9981, + "learning_rate": 1.3121970093328833e-05, + "loss": 1.1575, "step": 10657 }, { - "epoch": 0.3020204596333135, + "epoch": 0.41701228578136007, "grad_norm": 0.0, - "learning_rate": 1.6363909513794606e-05, - "loss": 1.0319, + "learning_rate": 1.3120766176678872e-05, + "loss": 1.1895, "step": 10658 }, { - "epoch": 0.30204879707557597, + "epoch": 0.41705141247358946, "grad_norm": 0.0, - "learning_rate": 1.6363201534933465e-05, - "loss": 1.0042, + "learning_rate": 1.3119562209913314e-05, + "loss": 1.1613, "step": 10659 }, { - "epoch": 0.3020771345178384, + "epoch": 0.4170905391658189, "grad_norm": 0.0, - "learning_rate": 1.6362493502472713e-05, - "loss": 1.016, + "learning_rate": 1.3118358193051499e-05, + "loss": 1.142, "step": 10660 }, { - "epoch": 0.3021054719601009, + "epoch": 0.41712966585804834, "grad_norm": 0.0, - "learning_rate": 1.6361785416418313e-05, - "loss": 1.0329, + "learning_rate": 1.3117154126112755e-05, + "loss": 0.9964, "step": 10661 }, { - "epoch": 0.30213380940236334, + "epoch": 0.4171687925502778, "grad_norm": 0.0, - "learning_rate": 1.636107727677623e-05, - "loss": 0.8716, + "learning_rate": 1.3115950009116425e-05, + "loss": 1.1159, "step": 10662 }, { - "epoch": 0.3021621468446258, + "epoch": 0.4172079192425072, "grad_norm": 0.0, - "learning_rate": 1.6360369083552433e-05, - "loss": 1.0127, + "learning_rate": 1.3114745842081841e-05, + "loss": 1.0351, "step": 10663 }, { - "epoch": 0.3021904842868883, + "epoch": 0.41724704593473666, "grad_norm": 0.0, - "learning_rate": 1.6359660836752887e-05, - "loss": 1.0841, + "learning_rate": 1.3113541625028344e-05, + "loss": 1.0723, "step": 10664 }, { - "epoch": 0.3022188217291507, + "epoch": 0.4172861726269661, "grad_norm": 0.0, - "learning_rate": 1.635895253638356e-05, - "loss": 0.9416, + "learning_rate": 1.311233735797527e-05, + "loss": 1.0436, "step": 10665 }, { - "epoch": 0.3022471591714132, + "epoch": 0.41732529931919554, "grad_norm": 0.0, - "learning_rate": 1.6358244182450408e-05, - "loss": 0.9732, + "learning_rate": 1.3111133040941955e-05, + "loss": 1.0543, "step": 10666 }, { - "epoch": 0.30227549661367564, + "epoch": 0.417364426011425, "grad_norm": 0.0, - "learning_rate": 1.6357535774959405e-05, - "loss": 0.9536, + "learning_rate": 1.3109928673947742e-05, + "loss": 1.0993, "step": 10667 }, { - "epoch": 0.30230383405593814, + "epoch": 0.4174035527036544, "grad_norm": 0.0, - "learning_rate": 1.635682731391652e-05, - "loss": 0.9446, + "learning_rate": 1.3108724257011973e-05, + "loss": 1.1629, "step": 10668 }, { - "epoch": 0.3023321714982006, + "epoch": 0.41744267939588386, "grad_norm": 0.0, - "learning_rate": 1.6356118799327716e-05, - "loss": 0.856, + "learning_rate": 1.3107519790153988e-05, + "loss": 1.1452, "step": 10669 }, { - "epoch": 0.302360508940463, + "epoch": 0.4174818060881133, "grad_norm": 0.0, - "learning_rate": 1.6355410231198964e-05, - "loss": 1.0579, + "learning_rate": 1.3106315273393126e-05, + "loss": 0.9787, "step": 10670 }, { - "epoch": 0.3023888463827255, + "epoch": 0.41752093278034275, "grad_norm": 0.0, - "learning_rate": 1.635470160953623e-05, - "loss": 1.0614, + "learning_rate": 1.3105110706748738e-05, + "loss": 1.0353, "step": 10671 }, { - "epoch": 0.30241718382498795, + "epoch": 0.4175600594725722, "grad_norm": 0.0, - "learning_rate": 1.6353992934345484e-05, - "loss": 0.8782, + "learning_rate": 1.310390609024016e-05, + "loss": 0.9891, "step": 10672 }, { - "epoch": 0.30244552126725044, + "epoch": 0.41759918616480163, "grad_norm": 0.0, - "learning_rate": 1.63532842056327e-05, - "loss": 0.9037, + "learning_rate": 1.310270142388674e-05, + "loss": 1.204, "step": 10673 }, { - "epoch": 0.3024738587095129, + "epoch": 0.41763831285703107, "grad_norm": 0.0, - "learning_rate": 1.635257542340384e-05, - "loss": 0.9387, + "learning_rate": 1.3101496707707825e-05, + "loss": 1.1177, "step": 10674 }, { - "epoch": 0.3025021961517753, + "epoch": 0.4176774395492605, "grad_norm": 0.0, - "learning_rate": 1.635186658766488e-05, - "loss": 0.9781, + "learning_rate": 1.3100291941722756e-05, + "loss": 1.217, "step": 10675 }, { - "epoch": 0.3025305335940378, + "epoch": 0.41771656624148995, "grad_norm": 0.0, - "learning_rate": 1.635115769842179e-05, - "loss": 0.9716, + "learning_rate": 1.3099087125950886e-05, + "loss": 0.9893, "step": 10676 }, { - "epoch": 0.30255887103630025, + "epoch": 0.4177556929337194, "grad_norm": 0.0, - "learning_rate": 1.635044875568054e-05, - "loss": 0.8508, + "learning_rate": 1.3097882260411561e-05, + "loss": 1.1165, "step": 10677 }, { - "epoch": 0.30258720847856274, + "epoch": 0.41779481962594883, "grad_norm": 0.0, - "learning_rate": 1.63497397594471e-05, - "loss": 1.0352, + "learning_rate": 1.3096677345124125e-05, + "loss": 1.0936, "step": 10678 }, { - "epoch": 0.3026155459208252, + "epoch": 0.4178339463181783, "grad_norm": 0.0, - "learning_rate": 1.6349030709727444e-05, - "loss": 1.0347, + "learning_rate": 1.3095472380107934e-05, + "loss": 0.9999, "step": 10679 }, { - "epoch": 0.3026438833630877, + "epoch": 0.4178730730104077, "grad_norm": 0.0, - "learning_rate": 1.6348321606527545e-05, - "loss": 0.9804, + "learning_rate": 1.3094267365382337e-05, + "loss": 1.1743, "step": 10680 }, { - "epoch": 0.3026722208053501, + "epoch": 0.41791219970263715, "grad_norm": 0.0, - "learning_rate": 1.634761244985338e-05, - "loss": 0.9664, + "learning_rate": 1.3093062300966679e-05, + "loss": 1.1009, "step": 10681 }, { - "epoch": 0.30270055824761255, + "epoch": 0.4179513263948666, "grad_norm": 0.0, - "learning_rate": 1.6346903239710913e-05, - "loss": 0.9436, + "learning_rate": 1.3091857186880317e-05, + "loss": 1.0198, "step": 10682 }, { - "epoch": 0.30272889568987504, + "epoch": 0.41799045308709604, "grad_norm": 0.0, - "learning_rate": 1.6346193976106125e-05, - "loss": 0.9948, + "learning_rate": 1.3090652023142606e-05, + "loss": 1.097, "step": 10683 }, { - "epoch": 0.3027572331321375, + "epoch": 0.4180295797793255, "grad_norm": 0.0, - "learning_rate": 1.6345484659044987e-05, - "loss": 0.8708, + "learning_rate": 1.3089446809772892e-05, + "loss": 1.0432, "step": 10684 }, { - "epoch": 0.3027855705744, + "epoch": 0.4180687064715549, "grad_norm": 0.0, - "learning_rate": 1.6344775288533477e-05, - "loss": 0.9246, + "learning_rate": 1.3088241546790538e-05, + "loss": 1.1958, "step": 10685 }, { - "epoch": 0.3028139080166624, + "epoch": 0.41810783316378436, "grad_norm": 0.0, - "learning_rate": 1.634406586457757e-05, - "loss": 0.8941, + "learning_rate": 1.3087036234214892e-05, + "loss": 1.0919, "step": 10686 }, { - "epoch": 0.30284224545892485, + "epoch": 0.4181469598560138, "grad_norm": 0.0, - "learning_rate": 1.634335638718324e-05, - "loss": 0.9928, + "learning_rate": 1.3085830872065313e-05, + "loss": 1.1254, "step": 10687 }, { - "epoch": 0.30287058290118735, + "epoch": 0.4181860865482432, "grad_norm": 0.0, - "learning_rate": 1.634264685635646e-05, - "loss": 0.9937, + "learning_rate": 1.3084625460361155e-05, + "loss": 1.1218, "step": 10688 }, { - "epoch": 0.3028989203434498, + "epoch": 0.4182252132404726, "grad_norm": 0.0, - "learning_rate": 1.6341937272103213e-05, - "loss": 0.8774, + "learning_rate": 1.308341999912178e-05, + "loss": 1.1325, "step": 10689 }, { - "epoch": 0.3029272577857123, + "epoch": 0.41826433993270207, "grad_norm": 0.0, - "learning_rate": 1.6341227634429472e-05, - "loss": 1.0669, + "learning_rate": 1.3082214488366542e-05, + "loss": 0.9181, "step": 10690 }, { - "epoch": 0.3029555952279747, + "epoch": 0.4183034666249315, "grad_norm": 0.0, - "learning_rate": 1.6340517943341217e-05, - "loss": 1.0079, + "learning_rate": 1.3081008928114804e-05, + "loss": 1.1168, "step": 10691 }, { - "epoch": 0.3029839326702372, + "epoch": 0.41834259331716095, "grad_norm": 0.0, - "learning_rate": 1.6339808198844424e-05, - "loss": 1.0011, + "learning_rate": 1.3079803318385922e-05, + "loss": 1.0421, "step": 10692 }, { - "epoch": 0.30301227011249965, + "epoch": 0.4183817200093904, "grad_norm": 0.0, - "learning_rate": 1.6339098400945074e-05, - "loss": 0.9012, + "learning_rate": 1.3078597659199255e-05, + "loss": 1.0855, "step": 10693 }, { - "epoch": 0.3030406075547621, + "epoch": 0.41842084670161983, "grad_norm": 0.0, - "learning_rate": 1.6338388549649146e-05, - "loss": 1.0447, + "learning_rate": 1.3077391950574172e-05, + "loss": 1.0934, "step": 10694 }, { - "epoch": 0.3030689449970246, + "epoch": 0.41845997339384927, "grad_norm": 0.0, - "learning_rate": 1.633767864496261e-05, - "loss": 0.9229, + "learning_rate": 1.3076186192530027e-05, + "loss": 1.0109, "step": 10695 }, { - "epoch": 0.303097282439287, + "epoch": 0.4184991000860787, "grad_norm": 0.0, - "learning_rate": 1.633696868689146e-05, - "loss": 1.0243, + "learning_rate": 1.3074980385086189e-05, + "loss": 0.9843, "step": 10696 }, { - "epoch": 0.3031256198815495, + "epoch": 0.41853822677830815, "grad_norm": 0.0, - "learning_rate": 1.633625867544167e-05, - "loss": 0.8755, + "learning_rate": 1.3073774528262015e-05, + "loss": 1.0175, "step": 10697 }, { - "epoch": 0.30315395732381195, + "epoch": 0.4185773534705376, "grad_norm": 0.0, - "learning_rate": 1.6335548610619215e-05, - "loss": 1.0025, + "learning_rate": 1.3072568622076878e-05, + "loss": 1.1447, "step": 10698 }, { - "epoch": 0.3031822947660744, + "epoch": 0.41861648016276704, "grad_norm": 0.0, - "learning_rate": 1.6334838492430084e-05, - "loss": 0.8971, + "learning_rate": 1.3071362666550136e-05, + "loss": 1.0651, "step": 10699 }, { - "epoch": 0.3032106322083369, + "epoch": 0.4186556068549965, "grad_norm": 0.0, - "learning_rate": 1.6334128320880258e-05, - "loss": 0.9954, + "learning_rate": 1.3070156661701161e-05, + "loss": 1.1971, "step": 10700 }, { - "epoch": 0.3032389696505993, + "epoch": 0.4186947335472259, "grad_norm": 0.0, - "learning_rate": 1.633341809597572e-05, - "loss": 0.9532, + "learning_rate": 1.3068950607549318e-05, + "loss": 1.0298, "step": 10701 }, { - "epoch": 0.3032673070928618, + "epoch": 0.41873386023945536, "grad_norm": 0.0, - "learning_rate": 1.6332707817722446e-05, - "loss": 0.9552, + "learning_rate": 1.306774450411397e-05, + "loss": 1.241, "step": 10702 }, { - "epoch": 0.30329564453512425, + "epoch": 0.4187729869316848, "grad_norm": 0.0, - "learning_rate": 1.6331997486126415e-05, - "loss": 0.936, + "learning_rate": 1.3066538351414493e-05, + "loss": 1.0754, "step": 10703 }, { - "epoch": 0.30332398197738675, + "epoch": 0.41881211362391424, "grad_norm": 0.0, - "learning_rate": 1.6331287101193625e-05, - "loss": 0.8947, + "learning_rate": 1.3065332149470249e-05, + "loss": 1.0004, "step": 10704 }, { - "epoch": 0.3033523194196492, + "epoch": 0.4188512403161437, "grad_norm": 0.0, - "learning_rate": 1.6330576662930052e-05, - "loss": 0.9713, + "learning_rate": 1.3064125898300615e-05, + "loss": 1.2278, "step": 10705 }, { - "epoch": 0.3033806568619116, + "epoch": 0.4188903670083731, "grad_norm": 0.0, - "learning_rate": 1.632986617134168e-05, - "loss": 0.9193, + "learning_rate": 1.3062919597924957e-05, + "loss": 1.0609, "step": 10706 }, { - "epoch": 0.3034089943041741, + "epoch": 0.41892949370060256, "grad_norm": 0.0, - "learning_rate": 1.6329155626434498e-05, - "loss": 0.9758, + "learning_rate": 1.3061713248362648e-05, + "loss": 1.0166, "step": 10707 }, { - "epoch": 0.30343733174643656, + "epoch": 0.418968620392832, "grad_norm": 0.0, - "learning_rate": 1.6328445028214485e-05, - "loss": 1.0058, + "learning_rate": 1.3060506849633062e-05, + "loss": 1.0568, "step": 10708 }, { - "epoch": 0.30346566918869905, + "epoch": 0.41900774708506144, "grad_norm": 0.0, - "learning_rate": 1.632773437668763e-05, - "loss": 0.9764, + "learning_rate": 1.3059300401755571e-05, + "loss": 0.9569, "step": 10709 }, { - "epoch": 0.3034940066309615, + "epoch": 0.4190468737772909, "grad_norm": 0.0, - "learning_rate": 1.632702367185992e-05, - "loss": 0.8968, + "learning_rate": 1.3058093904749547e-05, + "loss": 1.0091, "step": 10710 }, { - "epoch": 0.3035223440732239, + "epoch": 0.4190860004695203, "grad_norm": 0.0, - "learning_rate": 1.632631291373734e-05, - "loss": 0.9447, + "learning_rate": 1.305688735863437e-05, + "loss": 1.0184, "step": 10711 }, { - "epoch": 0.3035506815154864, + "epoch": 0.41912512716174977, "grad_norm": 0.0, - "learning_rate": 1.6325602102325873e-05, - "loss": 1.0292, + "learning_rate": 1.3055680763429411e-05, + "loss": 0.9998, "step": 10712 }, { - "epoch": 0.30357901895774886, + "epoch": 0.4191642538539792, "grad_norm": 0.0, - "learning_rate": 1.6324891237631514e-05, - "loss": 1.0669, + "learning_rate": 1.3054474119154046e-05, + "loss": 1.0585, "step": 10713 }, { - "epoch": 0.30360735640001135, + "epoch": 0.41920338054620865, "grad_norm": 0.0, - "learning_rate": 1.6324180319660247e-05, - "loss": 0.9502, + "learning_rate": 1.3053267425827656e-05, + "loss": 1.066, "step": 10714 }, { - "epoch": 0.3036356938422738, + "epoch": 0.4192425072384381, "grad_norm": 0.0, - "learning_rate": 1.632346934841806e-05, - "loss": 1.0512, + "learning_rate": 1.3052060683469617e-05, + "loss": 0.8908, "step": 10715 }, { - "epoch": 0.3036640312845363, + "epoch": 0.4192816339306675, "grad_norm": 0.0, - "learning_rate": 1.6322758323910943e-05, - "loss": 0.9809, + "learning_rate": 1.3050853892099307e-05, + "loss": 1.0114, "step": 10716 }, { - "epoch": 0.3036923687267987, + "epoch": 0.4193207606228969, "grad_norm": 0.0, - "learning_rate": 1.6322047246144887e-05, - "loss": 0.8369, + "learning_rate": 1.3049647051736108e-05, + "loss": 1.0652, "step": 10717 }, { - "epoch": 0.30372070616906116, + "epoch": 0.41935988731512636, "grad_norm": 0.0, - "learning_rate": 1.6321336115125876e-05, - "loss": 0.9234, + "learning_rate": 1.3048440162399399e-05, + "loss": 1.1071, "step": 10718 }, { - "epoch": 0.30374904361132365, + "epoch": 0.4193990140073558, "grad_norm": 0.0, - "learning_rate": 1.6320624930859905e-05, - "loss": 0.9532, + "learning_rate": 1.3047233224108558e-05, + "loss": 1.0825, "step": 10719 }, { - "epoch": 0.3037773810535861, + "epoch": 0.41943814069958524, "grad_norm": 0.0, - "learning_rate": 1.6319913693352963e-05, - "loss": 0.9477, + "learning_rate": 1.3046026236882972e-05, + "loss": 1.1835, "step": 10720 }, { - "epoch": 0.3038057184958486, + "epoch": 0.4194772673918147, "grad_norm": 0.0, - "learning_rate": 1.631920240261104e-05, - "loss": 1.0042, + "learning_rate": 1.3044819200742022e-05, + "loss": 1.0689, "step": 10721 }, { - "epoch": 0.303834055938111, + "epoch": 0.4195163940840441, "grad_norm": 0.0, - "learning_rate": 1.631849105864013e-05, - "loss": 0.9313, + "learning_rate": 1.3043612115705088e-05, + "loss": 1.02, "step": 10722 }, { - "epoch": 0.30386239338037346, + "epoch": 0.41955552077627356, "grad_norm": 0.0, - "learning_rate": 1.6317779661446223e-05, - "loss": 1.0223, + "learning_rate": 1.304240498179156e-05, + "loss": 1.072, "step": 10723 }, { - "epoch": 0.30389073082263596, + "epoch": 0.419594647468503, "grad_norm": 0.0, - "learning_rate": 1.631706821103531e-05, - "loss": 0.9483, + "learning_rate": 1.304119779902082e-05, + "loss": 1.1179, "step": 10724 }, { - "epoch": 0.3039190682648984, + "epoch": 0.41963377416073244, "grad_norm": 0.0, - "learning_rate": 1.631635670741339e-05, - "loss": 1.0381, + "learning_rate": 1.3039990567412255e-05, + "loss": 1.1559, "step": 10725 }, { - "epoch": 0.3039474057071609, + "epoch": 0.4196729008529619, "grad_norm": 0.0, - "learning_rate": 1.631564515058645e-05, - "loss": 0.9566, + "learning_rate": 1.303878328698525e-05, + "loss": 1.1351, "step": 10726 }, { - "epoch": 0.3039757431494233, + "epoch": 0.4197120275451913, "grad_norm": 0.0, - "learning_rate": 1.6314933540560485e-05, - "loss": 0.9531, + "learning_rate": 1.3037575957759195e-05, + "loss": 1.0342, "step": 10727 }, { - "epoch": 0.3040040805916858, + "epoch": 0.41975115423742076, "grad_norm": 0.0, - "learning_rate": 1.6314221877341488e-05, - "loss": 0.9494, + "learning_rate": 1.3036368579753473e-05, + "loss": 1.1381, "step": 10728 }, { - "epoch": 0.30403241803394826, + "epoch": 0.4197902809296502, "grad_norm": 0.0, - "learning_rate": 1.6313510160935457e-05, - "loss": 0.8624, + "learning_rate": 1.303516115298748e-05, + "loss": 1.0746, "step": 10729 }, { - "epoch": 0.3040607554762107, + "epoch": 0.41982940762187965, "grad_norm": 0.0, - "learning_rate": 1.6312798391348387e-05, - "loss": 0.9043, + "learning_rate": 1.3033953677480603e-05, + "loss": 1.0234, "step": 10730 }, { - "epoch": 0.3040890929184732, + "epoch": 0.4198685343141091, "grad_norm": 0.0, - "learning_rate": 1.6312086568586273e-05, - "loss": 1.0292, + "learning_rate": 1.3032746153252225e-05, + "loss": 1.1991, "step": 10731 }, { - "epoch": 0.30411743036073563, + "epoch": 0.41990766100633853, "grad_norm": 0.0, - "learning_rate": 1.6311374692655107e-05, - "loss": 1.0599, + "learning_rate": 1.3031538580321748e-05, + "loss": 1.056, "step": 10732 }, { - "epoch": 0.3041457678029981, + "epoch": 0.41994678769856797, "grad_norm": 0.0, - "learning_rate": 1.631066276356089e-05, - "loss": 0.9, + "learning_rate": 1.303033095870856e-05, + "loss": 1.159, "step": 10733 }, { - "epoch": 0.30417410524526056, + "epoch": 0.4199859143907974, "grad_norm": 0.0, - "learning_rate": 1.6309950781309612e-05, - "loss": 0.9777, + "learning_rate": 1.3029123288432056e-05, + "loss": 1.1193, "step": 10734 }, { - "epoch": 0.304202442687523, + "epoch": 0.42002504108302685, "grad_norm": 0.0, - "learning_rate": 1.630923874590728e-05, - "loss": 1.071, + "learning_rate": 1.3027915569511622e-05, + "loss": 0.993, "step": 10735 }, { - "epoch": 0.3042307801297855, + "epoch": 0.4200641677752563, "grad_norm": 0.0, - "learning_rate": 1.6308526657359888e-05, - "loss": 0.9996, + "learning_rate": 1.3026707801966665e-05, + "loss": 1.062, "step": 10736 }, { - "epoch": 0.30425911757204793, + "epoch": 0.42010329446748573, "grad_norm": 0.0, - "learning_rate": 1.6307814515673433e-05, - "loss": 0.9539, + "learning_rate": 1.3025499985816568e-05, + "loss": 1.0129, "step": 10737 }, { - "epoch": 0.3042874550143104, + "epoch": 0.4201424211597152, "grad_norm": 0.0, - "learning_rate": 1.6307102320853913e-05, - "loss": 1.0164, + "learning_rate": 1.3024292121080735e-05, + "loss": 1.0791, "step": 10738 }, { - "epoch": 0.30431579245657286, + "epoch": 0.4201815478519446, "grad_norm": 0.0, - "learning_rate": 1.6306390072907327e-05, - "loss": 0.9763, + "learning_rate": 1.3023084207778558e-05, + "loss": 1.0995, "step": 10739 }, { - "epoch": 0.30434412989883536, + "epoch": 0.42022067454417406, "grad_norm": 0.0, - "learning_rate": 1.630567777183968e-05, - "loss": 1.0046, + "learning_rate": 1.302187624592944e-05, + "loss": 1.0145, "step": 10740 }, { - "epoch": 0.3043724673410978, + "epoch": 0.4202598012364035, "grad_norm": 0.0, - "learning_rate": 1.6304965417656962e-05, - "loss": 0.9573, + "learning_rate": 1.3020668235552776e-05, + "loss": 1.0923, "step": 10741 }, { - "epoch": 0.30440080478336023, + "epoch": 0.42029892792863294, "grad_norm": 0.0, - "learning_rate": 1.630425301036518e-05, - "loss": 0.8424, + "learning_rate": 1.3019460176667963e-05, + "loss": 1.0183, "step": 10742 }, { - "epoch": 0.30442914222562273, + "epoch": 0.4203380546208624, "grad_norm": 0.0, - "learning_rate": 1.6303540549970338e-05, - "loss": 0.9625, + "learning_rate": 1.3018252069294404e-05, + "loss": 1.1111, "step": 10743 }, { - "epoch": 0.30445747966788517, + "epoch": 0.4203771813130918, "grad_norm": 0.0, - "learning_rate": 1.630282803647843e-05, - "loss": 0.9141, + "learning_rate": 1.3017043913451498e-05, + "loss": 1.065, "step": 10744 }, { - "epoch": 0.30448581711014766, + "epoch": 0.4204163080053212, "grad_norm": 0.0, - "learning_rate": 1.630211546989546e-05, - "loss": 1.0318, + "learning_rate": 1.3015835709158649e-05, + "loss": 1.1385, "step": 10745 }, { - "epoch": 0.3045141545524101, + "epoch": 0.42045543469755065, "grad_norm": 0.0, - "learning_rate": 1.6301402850227432e-05, - "loss": 1.0289, + "learning_rate": 1.3014627456435257e-05, + "loss": 1.0415, "step": 10746 }, { - "epoch": 0.30454249199467254, + "epoch": 0.4204945613897801, "grad_norm": 0.0, - "learning_rate": 1.630069017748035e-05, - "loss": 1.0095, + "learning_rate": 1.3013419155300725e-05, + "loss": 1.0011, "step": 10747 }, { - "epoch": 0.30457082943693503, + "epoch": 0.4205336880820095, "grad_norm": 0.0, - "learning_rate": 1.629997745166021e-05, - "loss": 1.0097, + "learning_rate": 1.3012210805774456e-05, + "loss": 1.058, "step": 10748 }, { - "epoch": 0.30459916687919747, + "epoch": 0.42057281477423897, "grad_norm": 0.0, - "learning_rate": 1.6299264672773025e-05, - "loss": 1.0266, + "learning_rate": 1.301100240787586e-05, + "loss": 1.0252, "step": 10749 }, { - "epoch": 0.30462750432145996, + "epoch": 0.4206119414664684, "grad_norm": 0.0, - "learning_rate": 1.629855184082479e-05, - "loss": 1.0683, + "learning_rate": 1.3009793961624334e-05, + "loss": 1.1815, "step": 10750 }, { - "epoch": 0.3046558417637224, + "epoch": 0.42065106815869785, "grad_norm": 0.0, - "learning_rate": 1.629783895582152e-05, - "loss": 1.0615, + "learning_rate": 1.3008585467039291e-05, + "loss": 1.1817, "step": 10751 }, { - "epoch": 0.3046841792059849, + "epoch": 0.4206901948509273, "grad_norm": 0.0, - "learning_rate": 1.629712601776921e-05, - "loss": 0.8918, + "learning_rate": 1.3007376924140136e-05, + "loss": 1.1127, "step": 10752 }, { - "epoch": 0.30471251664824733, + "epoch": 0.42072932154315673, "grad_norm": 0.0, - "learning_rate": 1.629641302667387e-05, - "loss": 0.8846, + "learning_rate": 1.3006168332946275e-05, + "loss": 1.1517, "step": 10753 }, { - "epoch": 0.30474085409050977, + "epoch": 0.42076844823538617, "grad_norm": 0.0, - "learning_rate": 1.6295699982541506e-05, - "loss": 0.9464, + "learning_rate": 1.3004959693477117e-05, + "loss": 1.1418, "step": 10754 }, { - "epoch": 0.30476919153277227, + "epoch": 0.4208075749276156, "grad_norm": 0.0, - "learning_rate": 1.6294986885378123e-05, - "loss": 1.0203, + "learning_rate": 1.300375100575207e-05, + "loss": 1.1648, "step": 10755 }, { - "epoch": 0.3047975289750347, + "epoch": 0.42084670161984505, "grad_norm": 0.0, - "learning_rate": 1.6294273735189728e-05, - "loss": 0.962, + "learning_rate": 1.300254226979055e-05, + "loss": 1.1195, "step": 10756 }, { - "epoch": 0.3048258664172972, + "epoch": 0.4208858283120745, "grad_norm": 0.0, - "learning_rate": 1.6293560531982326e-05, - "loss": 1.0446, + "learning_rate": 1.300133348561196e-05, + "loss": 1.0102, "step": 10757 }, { - "epoch": 0.30485420385955964, + "epoch": 0.42092495500430394, "grad_norm": 0.0, - "learning_rate": 1.629284727576193e-05, - "loss": 0.9975, + "learning_rate": 1.3000124653235717e-05, + "loss": 1.0066, "step": 10758 }, { - "epoch": 0.3048825413018221, + "epoch": 0.4209640816965334, "grad_norm": 0.0, - "learning_rate": 1.629213396653454e-05, - "loss": 1.0178, + "learning_rate": 1.2998915772681233e-05, + "loss": 1.0597, "step": 10759 }, { - "epoch": 0.30491087874408457, + "epoch": 0.4210032083887628, "grad_norm": 0.0, - "learning_rate": 1.6291420604306172e-05, - "loss": 0.9075, + "learning_rate": 1.2997706843967915e-05, + "loss": 1.1029, "step": 10760 }, { - "epoch": 0.304939216186347, + "epoch": 0.42104233508099226, "grad_norm": 0.0, - "learning_rate": 1.629070718908283e-05, - "loss": 0.866, + "learning_rate": 1.2996497867115185e-05, + "loss": 1.0511, "step": 10761 }, { - "epoch": 0.3049675536286095, + "epoch": 0.4210814617732217, "grad_norm": 0.0, - "learning_rate": 1.6289993720870526e-05, - "loss": 0.9688, + "learning_rate": 1.2995288842142453e-05, + "loss": 1.1685, "step": 10762 }, { - "epoch": 0.30499589107087194, + "epoch": 0.42112058846545114, "grad_norm": 0.0, - "learning_rate": 1.628928019967527e-05, - "loss": 1.0017, + "learning_rate": 1.2994079769069137e-05, + "loss": 1.0389, "step": 10763 }, { - "epoch": 0.30502422851313443, + "epoch": 0.4211597151576806, "grad_norm": 0.0, - "learning_rate": 1.6288566625503076e-05, - "loss": 0.9869, + "learning_rate": 1.2992870647914648e-05, + "loss": 1.0323, "step": 10764 }, { - "epoch": 0.30505256595539687, + "epoch": 0.42119884184991, "grad_norm": 0.0, - "learning_rate": 1.6287852998359943e-05, - "loss": 0.8995, + "learning_rate": 1.299166147869841e-05, + "loss": 1.0413, "step": 10765 }, { - "epoch": 0.3050809033976593, + "epoch": 0.42123796854213946, "grad_norm": 0.0, - "learning_rate": 1.628713931825189e-05, - "loss": 0.8451, + "learning_rate": 1.2990452261439837e-05, + "loss": 1.0933, "step": 10766 }, { - "epoch": 0.3051092408399218, + "epoch": 0.4212770952343689, "grad_norm": 0.0, - "learning_rate": 1.628642558518493e-05, - "loss": 0.9078, + "learning_rate": 1.2989242996158347e-05, + "loss": 1.062, "step": 10767 }, { - "epoch": 0.30513757828218424, + "epoch": 0.42131622192659834, "grad_norm": 0.0, - "learning_rate": 1.628571179916507e-05, - "loss": 1.0681, + "learning_rate": 1.298803368287336e-05, + "loss": 1.0875, "step": 10768 }, { - "epoch": 0.30516591572444673, + "epoch": 0.4213553486188278, "grad_norm": 0.0, - "learning_rate": 1.628499796019833e-05, - "loss": 0.9273, + "learning_rate": 1.2986824321604298e-05, + "loss": 0.9339, "step": 10769 }, { - "epoch": 0.3051942531667092, + "epoch": 0.4213944753110572, "grad_norm": 0.0, - "learning_rate": 1.6284284068290716e-05, - "loss": 0.9872, + "learning_rate": 1.2985614912370577e-05, + "loss": 1.1023, "step": 10770 }, { - "epoch": 0.3052225906089716, + "epoch": 0.42143360200328667, "grad_norm": 0.0, - "learning_rate": 1.6283570123448244e-05, - "loss": 0.9937, + "learning_rate": 1.2984405455191624e-05, + "loss": 1.1993, "step": 10771 }, { - "epoch": 0.3052509280512341, + "epoch": 0.4214727286955161, "grad_norm": 0.0, - "learning_rate": 1.6282856125676927e-05, - "loss": 1.1097, + "learning_rate": 1.298319595008686e-05, + "loss": 1.0642, "step": 10772 }, { - "epoch": 0.30527926549349654, + "epoch": 0.4215118553877455, "grad_norm": 0.0, - "learning_rate": 1.628214207498278e-05, - "loss": 0.9939, + "learning_rate": 1.2981986397075705e-05, + "loss": 1.0016, "step": 10773 }, { - "epoch": 0.30530760293575904, + "epoch": 0.42155098207997493, "grad_norm": 0.0, - "learning_rate": 1.6281427971371817e-05, - "loss": 1.0587, + "learning_rate": 1.2980776796177588e-05, + "loss": 1.0108, "step": 10774 }, { - "epoch": 0.3053359403780215, + "epoch": 0.4215901087722044, "grad_norm": 0.0, - "learning_rate": 1.6280713814850056e-05, - "loss": 1.0029, + "learning_rate": 1.2979567147411927e-05, + "loss": 1.0598, "step": 10775 }, { - "epoch": 0.3053642778202839, + "epoch": 0.4216292354644338, "grad_norm": 0.0, - "learning_rate": 1.6279999605423508e-05, - "loss": 0.8761, + "learning_rate": 1.2978357450798153e-05, + "loss": 1.1495, "step": 10776 }, { - "epoch": 0.3053926152625464, + "epoch": 0.42166836215666326, "grad_norm": 0.0, - "learning_rate": 1.627928534309819e-05, - "loss": 0.9156, + "learning_rate": 1.2977147706355688e-05, + "loss": 1.1435, "step": 10777 }, { - "epoch": 0.30542095270480885, + "epoch": 0.4217074888488927, "grad_norm": 0.0, - "learning_rate": 1.627857102788012e-05, - "loss": 0.944, + "learning_rate": 1.2975937914103967e-05, + "loss": 1.1392, "step": 10778 }, { - "epoch": 0.30544929014707134, + "epoch": 0.42174661554112214, "grad_norm": 0.0, - "learning_rate": 1.627785665977532e-05, - "loss": 0.9665, + "learning_rate": 1.2974728074062409e-05, + "loss": 1.0334, "step": 10779 }, { - "epoch": 0.3054776275893338, + "epoch": 0.4217857422333516, "grad_norm": 0.0, - "learning_rate": 1.6277142238789798e-05, - "loss": 1.0155, + "learning_rate": 1.2973518186250444e-05, + "loss": 1.2034, "step": 10780 }, { - "epoch": 0.30550596503159627, + "epoch": 0.421824868925581, "grad_norm": 0.0, - "learning_rate": 1.6276427764929576e-05, - "loss": 0.9744, + "learning_rate": 1.2972308250687507e-05, + "loss": 0.9534, "step": 10781 }, { - "epoch": 0.3055343024738587, + "epoch": 0.42186399561781046, "grad_norm": 0.0, - "learning_rate": 1.6275713238200674e-05, - "loss": 0.9739, + "learning_rate": 1.2971098267393019e-05, + "loss": 0.9997, "step": 10782 }, { - "epoch": 0.30556263991612115, + "epoch": 0.4219031223100399, "grad_norm": 0.0, - "learning_rate": 1.6274998658609107e-05, - "loss": 0.8345, + "learning_rate": 1.296988823638642e-05, + "loss": 1.0587, "step": 10783 }, { - "epoch": 0.30559097735838364, + "epoch": 0.42194224900226934, "grad_norm": 0.0, - "learning_rate": 1.6274284026160894e-05, - "loss": 1.0042, + "learning_rate": 1.2968678157687133e-05, + "loss": 1.1497, "step": 10784 }, { - "epoch": 0.3056193148006461, + "epoch": 0.4219813756944988, "grad_norm": 0.0, - "learning_rate": 1.627356934086206e-05, - "loss": 1.0381, + "learning_rate": 1.2967468031314598e-05, + "loss": 0.9773, "step": 10785 }, { - "epoch": 0.3056476522429086, + "epoch": 0.4220205023867282, "grad_norm": 0.0, - "learning_rate": 1.6272854602718622e-05, - "loss": 0.9841, + "learning_rate": 1.2966257857288245e-05, + "loss": 1.1367, "step": 10786 }, { - "epoch": 0.305675989685171, + "epoch": 0.42205962907895767, "grad_norm": 0.0, - "learning_rate": 1.62721398117366e-05, - "loss": 0.9862, + "learning_rate": 1.2965047635627507e-05, + "loss": 1.0803, "step": 10787 }, { - "epoch": 0.30570432712743345, + "epoch": 0.4220987557711871, "grad_norm": 0.0, - "learning_rate": 1.6271424967922015e-05, - "loss": 1.0367, + "learning_rate": 1.2963837366351822e-05, + "loss": 1.0701, "step": 10788 }, { - "epoch": 0.30573266456969594, + "epoch": 0.42213788246341655, "grad_norm": 0.0, - "learning_rate": 1.627071007128089e-05, - "loss": 0.8096, + "learning_rate": 1.2962627049480618e-05, + "loss": 1.0197, "step": 10789 }, { - "epoch": 0.3057610020119584, + "epoch": 0.422177009155646, "grad_norm": 0.0, - "learning_rate": 1.6269995121819243e-05, - "loss": 1.0089, + "learning_rate": 1.2961416685033339e-05, + "loss": 0.9651, "step": 10790 }, { - "epoch": 0.3057893394542209, + "epoch": 0.42221613584787543, "grad_norm": 0.0, - "learning_rate": 1.62692801195431e-05, - "loss": 0.9636, + "learning_rate": 1.2960206273029417e-05, + "loss": 1.2126, "step": 10791 }, { - "epoch": 0.3058176768964833, + "epoch": 0.42225526254010487, "grad_norm": 0.0, - "learning_rate": 1.6268565064458482e-05, - "loss": 0.9367, + "learning_rate": 1.2958995813488293e-05, + "loss": 1.0184, "step": 10792 }, { - "epoch": 0.3058460143387458, + "epoch": 0.4222943892323343, "grad_norm": 0.0, - "learning_rate": 1.6267849956571415e-05, - "loss": 1.0004, + "learning_rate": 1.2957785306429402e-05, + "loss": 1.143, "step": 10793 }, { - "epoch": 0.30587435178100825, + "epoch": 0.42233351592456375, "grad_norm": 0.0, - "learning_rate": 1.6267134795887914e-05, - "loss": 0.93, + "learning_rate": 1.2956574751872188e-05, + "loss": 1.0782, "step": 10794 }, { - "epoch": 0.3059026892232707, + "epoch": 0.4223726426167932, "grad_norm": 0.0, - "learning_rate": 1.6266419582414016e-05, - "loss": 0.955, + "learning_rate": 1.2955364149836088e-05, + "loss": 1.0558, "step": 10795 }, { - "epoch": 0.3059310266655332, + "epoch": 0.42241176930902263, "grad_norm": 0.0, - "learning_rate": 1.6265704316155735e-05, - "loss": 1.0296, + "learning_rate": 1.2954153500340543e-05, + "loss": 1.0712, "step": 10796 }, { - "epoch": 0.3059593641077956, + "epoch": 0.4224508960012521, "grad_norm": 0.0, - "learning_rate": 1.6264988997119103e-05, - "loss": 1.0756, + "learning_rate": 1.2952942803404991e-05, + "loss": 1.0982, "step": 10797 }, { - "epoch": 0.3059877015500581, + "epoch": 0.4224900226934815, "grad_norm": 0.0, - "learning_rate": 1.626427362531014e-05, - "loss": 0.9732, + "learning_rate": 1.2951732059048882e-05, + "loss": 1.0365, "step": 10798 }, { - "epoch": 0.30601603899232055, + "epoch": 0.42252914938571096, "grad_norm": 0.0, - "learning_rate": 1.6263558200734875e-05, - "loss": 0.9999, + "learning_rate": 1.2950521267291656e-05, + "loss": 1.0577, "step": 10799 }, { - "epoch": 0.306044376434583, + "epoch": 0.4225682760779404, "grad_norm": 0.0, - "learning_rate": 1.6262842723399335e-05, - "loss": 0.8965, + "learning_rate": 1.294931042815275e-05, + "loss": 1.1026, "step": 10800 }, { - "epoch": 0.3060727138768455, + "epoch": 0.4226074027701698, "grad_norm": 0.0, - "learning_rate": 1.6262127193309543e-05, - "loss": 1.0754, + "learning_rate": 1.294809954165162e-05, + "loss": 1.1716, "step": 10801 }, { - "epoch": 0.3061010513191079, + "epoch": 0.4226465294623992, "grad_norm": 0.0, - "learning_rate": 1.6261411610471526e-05, - "loss": 0.9333, + "learning_rate": 1.2946888607807702e-05, + "loss": 1.0137, "step": 10802 }, { - "epoch": 0.3061293887613704, + "epoch": 0.42268565615462866, "grad_norm": 0.0, - "learning_rate": 1.626069597489132e-05, - "loss": 0.9942, + "learning_rate": 1.2945677626640447e-05, + "loss": 1.1226, "step": 10803 }, { - "epoch": 0.30615772620363285, + "epoch": 0.4227247828468581, "grad_norm": 0.0, - "learning_rate": 1.6259980286574938e-05, - "loss": 0.9734, + "learning_rate": 1.2944466598169299e-05, + "loss": 1.116, "step": 10804 }, { - "epoch": 0.30618606364589535, + "epoch": 0.42276390953908755, "grad_norm": 0.0, - "learning_rate": 1.6259264545528426e-05, - "loss": 1.0319, + "learning_rate": 1.2943255522413708e-05, + "loss": 1.0054, "step": 10805 }, { - "epoch": 0.3062144010881578, + "epoch": 0.422803036231317, "grad_norm": 0.0, - "learning_rate": 1.6258548751757802e-05, - "loss": 1.0496, + "learning_rate": 1.294204439939312e-05, + "loss": 1.1633, "step": 10806 }, { - "epoch": 0.3062427385304202, + "epoch": 0.4228421629235464, "grad_norm": 0.0, - "learning_rate": 1.6257832905269095e-05, - "loss": 1.0157, + "learning_rate": 1.294083322912699e-05, + "loss": 1.1501, "step": 10807 }, { - "epoch": 0.3062710759726827, + "epoch": 0.42288128961577587, "grad_norm": 0.0, - "learning_rate": 1.6257117006068338e-05, - "loss": 0.9356, + "learning_rate": 1.2939622011634762e-05, + "loss": 1.0437, "step": 10808 }, { - "epoch": 0.30629941341494515, + "epoch": 0.4229204163080053, "grad_norm": 0.0, - "learning_rate": 1.6256401054161565e-05, - "loss": 0.9626, + "learning_rate": 1.2938410746935883e-05, + "loss": 1.0757, "step": 10809 }, { - "epoch": 0.30632775085720765, + "epoch": 0.42295954300023475, "grad_norm": 0.0, - "learning_rate": 1.6255685049554802e-05, - "loss": 0.9413, + "learning_rate": 1.2937199435049816e-05, + "loss": 1.04, "step": 10810 }, { - "epoch": 0.3063560882994701, + "epoch": 0.4229986696924642, "grad_norm": 0.0, - "learning_rate": 1.6254968992254078e-05, - "loss": 1.1578, + "learning_rate": 1.2935988075996004e-05, + "loss": 1.0497, "step": 10811 }, { - "epoch": 0.3063844257417325, + "epoch": 0.42303779638469363, "grad_norm": 0.0, - "learning_rate": 1.6254252882265428e-05, - "loss": 0.9107, + "learning_rate": 1.29347766697939e-05, + "loss": 1.1053, "step": 10812 }, { - "epoch": 0.306412763183995, + "epoch": 0.4230769230769231, "grad_norm": 0.0, - "learning_rate": 1.6253536719594883e-05, - "loss": 0.8949, + "learning_rate": 1.2933565216462965e-05, + "loss": 1.0578, "step": 10813 }, { - "epoch": 0.30644110062625746, + "epoch": 0.4231160497691525, "grad_norm": 0.0, - "learning_rate": 1.6252820504248477e-05, - "loss": 0.9304, + "learning_rate": 1.2932353716022646e-05, + "loss": 0.8955, "step": 10814 }, { - "epoch": 0.30646943806851995, + "epoch": 0.42315517646138195, "grad_norm": 0.0, - "learning_rate": 1.625210423623224e-05, - "loss": 0.9941, + "learning_rate": 1.2931142168492399e-05, + "loss": 0.8147, "step": 10815 }, { - "epoch": 0.3064977755107824, + "epoch": 0.4231943031536114, "grad_norm": 0.0, - "learning_rate": 1.6251387915552213e-05, - "loss": 1.0087, + "learning_rate": 1.2929930573891685e-05, + "loss": 0.9838, "step": 10816 }, { - "epoch": 0.3065261129530449, + "epoch": 0.42323342984584084, "grad_norm": 0.0, - "learning_rate": 1.625067154221442e-05, - "loss": 1.0311, + "learning_rate": 1.2928718932239957e-05, + "loss": 1.0261, "step": 10817 }, { - "epoch": 0.3065544503953073, + "epoch": 0.4232725565380703, "grad_norm": 0.0, - "learning_rate": 1.62499551162249e-05, - "loss": 0.8738, + "learning_rate": 1.2927507243556669e-05, + "loss": 1.1439, "step": 10818 }, { - "epoch": 0.30658278783756976, + "epoch": 0.4233116832302997, "grad_norm": 0.0, - "learning_rate": 1.624923863758969e-05, - "loss": 1.0045, + "learning_rate": 1.2926295507861287e-05, + "loss": 0.9851, "step": 10819 }, { - "epoch": 0.30661112527983225, + "epoch": 0.42335080992252916, "grad_norm": 0.0, - "learning_rate": 1.6248522106314814e-05, - "loss": 1.0147, + "learning_rate": 1.2925083725173264e-05, + "loss": 1.0941, "step": 10820 }, { - "epoch": 0.3066394627220947, + "epoch": 0.4233899366147586, "grad_norm": 0.0, - "learning_rate": 1.6247805522406324e-05, - "loss": 1.0844, + "learning_rate": 1.2923871895512064e-05, + "loss": 1.0995, "step": 10821 }, { - "epoch": 0.3066678001643572, + "epoch": 0.42342906330698804, "grad_norm": 0.0, - "learning_rate": 1.6247088885870244e-05, - "loss": 1.0526, + "learning_rate": 1.292266001889714e-05, + "loss": 1.0377, "step": 10822 }, { - "epoch": 0.3066961376066196, + "epoch": 0.4234681899992175, "grad_norm": 0.0, - "learning_rate": 1.6246372196712615e-05, - "loss": 1.0143, + "learning_rate": 1.2921448095347964e-05, + "loss": 1.0449, "step": 10823 }, { - "epoch": 0.30672447504888206, + "epoch": 0.4235073166914469, "grad_norm": 0.0, - "learning_rate": 1.6245655454939474e-05, - "loss": 0.9933, + "learning_rate": 1.2920236124883989e-05, + "loss": 1.0889, "step": 10824 }, { - "epoch": 0.30675281249114456, + "epoch": 0.42354644338367636, "grad_norm": 0.0, - "learning_rate": 1.624493866055686e-05, - "loss": 0.8859, + "learning_rate": 1.2919024107524683e-05, + "loss": 1.1101, "step": 10825 }, { - "epoch": 0.306781149933407, + "epoch": 0.4235855700759058, "grad_norm": 0.0, - "learning_rate": 1.6244221813570806e-05, - "loss": 0.9902, + "learning_rate": 1.2917812043289506e-05, + "loss": 1.142, "step": 10826 }, { - "epoch": 0.3068094873756695, + "epoch": 0.42362469676813524, "grad_norm": 0.0, - "learning_rate": 1.6243504913987357e-05, - "loss": 0.9955, + "learning_rate": 1.2916599932197924e-05, + "loss": 1.0593, "step": 10827 }, { - "epoch": 0.3068378248179319, + "epoch": 0.4236638234603647, "grad_norm": 0.0, - "learning_rate": 1.6242787961812543e-05, - "loss": 0.9018, + "learning_rate": 1.2915387774269403e-05, + "loss": 1.1215, "step": 10828 }, { - "epoch": 0.3068661622601944, + "epoch": 0.4237029501525941, "grad_norm": 0.0, - "learning_rate": 1.624207095705241e-05, - "loss": 1.1221, + "learning_rate": 1.2914175569523408e-05, + "loss": 1.0074, "step": 10829 }, { - "epoch": 0.30689449970245686, + "epoch": 0.4237420768448235, "grad_norm": 0.0, - "learning_rate": 1.6241353899712994e-05, - "loss": 0.9868, + "learning_rate": 1.2912963317979403e-05, + "loss": 1.0397, "step": 10830 }, { - "epoch": 0.3069228371447193, + "epoch": 0.42378120353705295, "grad_norm": 0.0, - "learning_rate": 1.6240636789800337e-05, - "loss": 0.9694, + "learning_rate": 1.2911751019656858e-05, + "loss": 0.9621, "step": 10831 }, { - "epoch": 0.3069511745869818, + "epoch": 0.4238203302292824, "grad_norm": 0.0, - "learning_rate": 1.6239919627320477e-05, - "loss": 0.9146, + "learning_rate": 1.2910538674575242e-05, + "loss": 1.1629, "step": 10832 }, { - "epoch": 0.30697951202924423, + "epoch": 0.42385945692151183, "grad_norm": 0.0, - "learning_rate": 1.623920241227946e-05, - "loss": 0.922, + "learning_rate": 1.290932628275402e-05, + "loss": 0.9405, "step": 10833 }, { - "epoch": 0.3070078494715067, + "epoch": 0.4238985836137413, "grad_norm": 0.0, - "learning_rate": 1.6238485144683323e-05, - "loss": 1.0804, + "learning_rate": 1.2908113844212665e-05, + "loss": 1.1071, "step": 10834 }, { - "epoch": 0.30703618691376916, + "epoch": 0.4239377103059707, "grad_norm": 0.0, - "learning_rate": 1.6237767824538112e-05, - "loss": 0.9797, + "learning_rate": 1.2906901358970643e-05, + "loss": 0.9796, "step": 10835 }, { - "epoch": 0.3070645243560316, + "epoch": 0.42397683699820016, "grad_norm": 0.0, - "learning_rate": 1.6237050451849862e-05, - "loss": 1.03, + "learning_rate": 1.2905688827047431e-05, + "loss": 1.0019, "step": 10836 }, { - "epoch": 0.3070928617982941, + "epoch": 0.4240159636904296, "grad_norm": 0.0, - "learning_rate": 1.6236333026624623e-05, - "loss": 0.8972, + "learning_rate": 1.2904476248462496e-05, + "loss": 1.0723, "step": 10837 }, { - "epoch": 0.30712119924055653, + "epoch": 0.42405509038265904, "grad_norm": 0.0, - "learning_rate": 1.6235615548868434e-05, - "loss": 1.0463, + "learning_rate": 1.2903263623235312e-05, + "loss": 1.0392, "step": 10838 }, { - "epoch": 0.307149536682819, + "epoch": 0.4240942170748885, "grad_norm": 0.0, - "learning_rate": 1.6234898018587336e-05, - "loss": 1.1473, + "learning_rate": 1.2902050951385353e-05, + "loss": 1.1155, "step": 10839 }, { - "epoch": 0.30717787412508146, + "epoch": 0.4241333437671179, "grad_norm": 0.0, - "learning_rate": 1.6234180435787382e-05, - "loss": 0.9886, + "learning_rate": 1.290083823293209e-05, + "loss": 1.0692, "step": 10840 }, { - "epoch": 0.30720621156734396, + "epoch": 0.42417247045934736, "grad_norm": 0.0, - "learning_rate": 1.6233462800474608e-05, - "loss": 0.9625, + "learning_rate": 1.2899625467895e-05, + "loss": 1.12, "step": 10841 }, { - "epoch": 0.3072345490096064, + "epoch": 0.4242115971515768, "grad_norm": 0.0, - "learning_rate": 1.6232745112655065e-05, - "loss": 0.9618, + "learning_rate": 1.289841265629356e-05, + "loss": 1.1561, "step": 10842 }, { - "epoch": 0.30726288645186883, + "epoch": 0.42425072384380624, "grad_norm": 0.0, - "learning_rate": 1.6232027372334793e-05, - "loss": 0.9722, + "learning_rate": 1.2897199798147243e-05, + "loss": 1.0982, "step": 10843 }, { - "epoch": 0.3072912238941313, + "epoch": 0.4242898505360357, "grad_norm": 0.0, - "learning_rate": 1.623130957951984e-05, - "loss": 0.9586, + "learning_rate": 1.289598689347553e-05, + "loss": 1.0093, "step": 10844 }, { - "epoch": 0.30731956133639377, + "epoch": 0.4243289772282651, "grad_norm": 0.0, - "learning_rate": 1.6230591734216252e-05, - "loss": 0.8907, + "learning_rate": 1.2894773942297896e-05, + "loss": 1.196, "step": 10845 }, { - "epoch": 0.30734789877865626, + "epoch": 0.42436810392049457, "grad_norm": 0.0, - "learning_rate": 1.6229873836430078e-05, - "loss": 0.9048, + "learning_rate": 1.289356094463382e-05, + "loss": 0.9801, "step": 10846 }, { - "epoch": 0.3073762362209187, + "epoch": 0.424407230612724, "grad_norm": 0.0, - "learning_rate": 1.6229155886167364e-05, - "loss": 0.9044, + "learning_rate": 1.289234790050278e-05, + "loss": 1.0156, "step": 10847 }, { - "epoch": 0.30740457366318114, + "epoch": 0.42444635730495345, "grad_norm": 0.0, - "learning_rate": 1.6228437883434158e-05, - "loss": 0.8614, + "learning_rate": 1.2891134809924257e-05, + "loss": 1.013, "step": 10848 }, { - "epoch": 0.30743291110544363, + "epoch": 0.4244854839971829, "grad_norm": 0.0, - "learning_rate": 1.6227719828236503e-05, - "loss": 0.9353, + "learning_rate": 1.2889921672917731e-05, + "loss": 1.1384, "step": 10849 }, { - "epoch": 0.30746124854770607, + "epoch": 0.42452461068941233, "grad_norm": 0.0, - "learning_rate": 1.622700172058045e-05, - "loss": 0.9486, + "learning_rate": 1.2888708489502686e-05, + "loss": 0.9999, "step": 10850 }, { - "epoch": 0.30748958598996856, + "epoch": 0.42456373738164177, "grad_norm": 0.0, - "learning_rate": 1.6226283560472053e-05, - "loss": 0.9714, + "learning_rate": 1.2887495259698602e-05, + "loss": 1.1664, "step": 10851 }, { - "epoch": 0.307517923432231, + "epoch": 0.4246028640738712, "grad_norm": 0.0, - "learning_rate": 1.6225565347917357e-05, - "loss": 1.0287, + "learning_rate": 1.2886281983524962e-05, + "loss": 1.0167, "step": 10852 }, { - "epoch": 0.3075462608744935, + "epoch": 0.42464199076610065, "grad_norm": 0.0, - "learning_rate": 1.622484708292241e-05, - "loss": 1.0154, + "learning_rate": 1.288506866100125e-05, + "loss": 1.0237, "step": 10853 }, { - "epoch": 0.30757459831675593, + "epoch": 0.4246811174583301, "grad_norm": 0.0, - "learning_rate": 1.622412876549327e-05, - "loss": 0.9457, + "learning_rate": 1.288385529214695e-05, + "loss": 1.0122, "step": 10854 }, { - "epoch": 0.30760293575901837, + "epoch": 0.42472024415055953, "grad_norm": 0.0, - "learning_rate": 1.6223410395635976e-05, - "loss": 0.8823, + "learning_rate": 1.288264187698155e-05, + "loss": 1.2428, "step": 10855 }, { - "epoch": 0.30763127320128086, + "epoch": 0.424759370842789, "grad_norm": 0.0, - "learning_rate": 1.6222691973356587e-05, - "loss": 0.8383, + "learning_rate": 1.2881428415524531e-05, + "loss": 1.0507, "step": 10856 }, { - "epoch": 0.3076596106435433, + "epoch": 0.4247984975350184, "grad_norm": 0.0, - "learning_rate": 1.622197349866115e-05, - "loss": 0.977, + "learning_rate": 1.2880214907795383e-05, + "loss": 0.9315, "step": 10857 }, { - "epoch": 0.3076879480858058, + "epoch": 0.4248376242272478, "grad_norm": 0.0, - "learning_rate": 1.6221254971555726e-05, - "loss": 1.1367, + "learning_rate": 1.2879001353813595e-05, + "loss": 1.0302, "step": 10858 }, { - "epoch": 0.30771628552806823, + "epoch": 0.42487675091947724, "grad_norm": 0.0, - "learning_rate": 1.6220536392046357e-05, - "loss": 0.873, + "learning_rate": 1.2877787753598647e-05, + "loss": 1.2037, "step": 10859 }, { - "epoch": 0.3077446229703307, + "epoch": 0.4249158776117067, "grad_norm": 0.0, - "learning_rate": 1.6219817760139103e-05, - "loss": 0.9967, + "learning_rate": 1.287657410717004e-05, + "loss": 1.123, "step": 10860 }, { - "epoch": 0.30777296041259317, + "epoch": 0.4249550043039361, "grad_norm": 0.0, - "learning_rate": 1.621909907584001e-05, - "loss": 0.9908, + "learning_rate": 1.2875360414547256e-05, + "loss": 1.132, "step": 10861 }, { - "epoch": 0.3078012978548556, + "epoch": 0.42499413099616556, "grad_norm": 0.0, - "learning_rate": 1.621838033915514e-05, - "loss": 0.9045, + "learning_rate": 1.2874146675749784e-05, + "loss": 1.0009, "step": 10862 }, { - "epoch": 0.3078296352971181, + "epoch": 0.425033257688395, "grad_norm": 0.0, - "learning_rate": 1.621766155009054e-05, - "loss": 1.0004, + "learning_rate": 1.2872932890797121e-05, + "loss": 0.9825, "step": 10863 }, { - "epoch": 0.30785797273938054, + "epoch": 0.42507238438062445, "grad_norm": 0.0, - "learning_rate": 1.6216942708652276e-05, - "loss": 1.0546, + "learning_rate": 1.2871719059708751e-05, + "loss": 1.0756, "step": 10864 }, { - "epoch": 0.30788631018164303, + "epoch": 0.4251115110728539, "grad_norm": 0.0, - "learning_rate": 1.6216223814846385e-05, - "loss": 0.92, + "learning_rate": 1.2870505182504175e-05, + "loss": 0.9943, "step": 10865 }, { - "epoch": 0.30791464762390547, + "epoch": 0.42515063776508333, "grad_norm": 0.0, - "learning_rate": 1.6215504868678937e-05, - "loss": 0.9829, + "learning_rate": 1.2869291259202886e-05, + "loss": 1.1245, "step": 10866 }, { - "epoch": 0.3079429850661679, + "epoch": 0.42518976445731277, "grad_norm": 0.0, - "learning_rate": 1.6214785870155983e-05, - "loss": 0.9666, + "learning_rate": 1.2868077289824368e-05, + "loss": 0.9968, "step": 10867 }, { - "epoch": 0.3079713225084304, + "epoch": 0.4252288911495422, "grad_norm": 0.0, - "learning_rate": 1.6214066819283577e-05, - "loss": 0.9131, + "learning_rate": 1.2866863274388128e-05, + "loss": 1.0561, "step": 10868 }, { - "epoch": 0.30799965995069284, + "epoch": 0.42526801784177165, "grad_norm": 0.0, - "learning_rate": 1.621334771606778e-05, - "loss": 0.9019, + "learning_rate": 1.2865649212913654e-05, + "loss": 1.0556, "step": 10869 }, { - "epoch": 0.30802799739295533, + "epoch": 0.4253071445340011, "grad_norm": 0.0, - "learning_rate": 1.6212628560514652e-05, - "loss": 0.9864, + "learning_rate": 1.2864435105420442e-05, + "loss": 0.9847, "step": 10870 }, { - "epoch": 0.30805633483521777, + "epoch": 0.42534627122623053, "grad_norm": 0.0, - "learning_rate": 1.6211909352630246e-05, - "loss": 1.0175, + "learning_rate": 1.2863220951927995e-05, + "loss": 1.191, "step": 10871 }, { - "epoch": 0.3080846722774802, + "epoch": 0.42538539791846, "grad_norm": 0.0, - "learning_rate": 1.6211190092420616e-05, - "loss": 0.9957, + "learning_rate": 1.2862006752455806e-05, + "loss": 1.1302, "step": 10872 }, { - "epoch": 0.3081130097197427, + "epoch": 0.4254245246106894, "grad_norm": 0.0, - "learning_rate": 1.621047077989183e-05, - "loss": 0.8391, + "learning_rate": 1.2860792507023374e-05, + "loss": 1.2042, "step": 10873 }, { - "epoch": 0.30814134716200514, + "epoch": 0.42546365130291885, "grad_norm": 0.0, - "learning_rate": 1.6209751415049937e-05, - "loss": 0.7892, + "learning_rate": 1.2859578215650202e-05, + "loss": 1.0479, "step": 10874 }, { - "epoch": 0.30816968460426764, + "epoch": 0.4255027779951483, "grad_norm": 0.0, - "learning_rate": 1.6209031997901006e-05, - "loss": 0.791, + "learning_rate": 1.2858363878355786e-05, + "loss": 1.1042, "step": 10875 }, { - "epoch": 0.3081980220465301, + "epoch": 0.42554190468737774, "grad_norm": 0.0, - "learning_rate": 1.6208312528451094e-05, - "loss": 0.947, + "learning_rate": 1.2857149495159627e-05, + "loss": 1.1573, "step": 10876 }, { - "epoch": 0.30822635948879257, + "epoch": 0.4255810313796072, "grad_norm": 0.0, - "learning_rate": 1.6207593006706256e-05, - "loss": 0.9696, + "learning_rate": 1.2855935066081227e-05, + "loss": 1.1351, "step": 10877 }, { - "epoch": 0.308254696931055, + "epoch": 0.4256201580718366, "grad_norm": 0.0, - "learning_rate": 1.620687343267256e-05, - "loss": 0.986, + "learning_rate": 1.2854720591140088e-05, + "loss": 1.0562, "step": 10878 }, { - "epoch": 0.30828303437331744, + "epoch": 0.42565928476406606, "grad_norm": 0.0, - "learning_rate": 1.6206153806356062e-05, - "loss": 0.9886, + "learning_rate": 1.2853506070355717e-05, + "loss": 1.1557, "step": 10879 }, { - "epoch": 0.30831137181557994, + "epoch": 0.4256984114562955, "grad_norm": 0.0, - "learning_rate": 1.6205434127762827e-05, - "loss": 0.8535, + "learning_rate": 1.285229150374761e-05, + "loss": 1.1676, "step": 10880 }, { - "epoch": 0.3083397092578424, + "epoch": 0.42573753814852494, "grad_norm": 0.0, - "learning_rate": 1.6204714396898916e-05, - "loss": 1.1142, + "learning_rate": 1.2851076891335277e-05, + "loss": 1.0314, "step": 10881 }, { - "epoch": 0.30836804670010487, + "epoch": 0.4257766648407544, "grad_norm": 0.0, - "learning_rate": 1.6203994613770393e-05, - "loss": 1.0273, + "learning_rate": 1.2849862233138222e-05, + "loss": 1.0946, "step": 10882 }, { - "epoch": 0.3083963841423673, + "epoch": 0.4258157915329838, "grad_norm": 0.0, - "learning_rate": 1.620327477838332e-05, - "loss": 1.0607, + "learning_rate": 1.284864752917595e-05, + "loss": 1.0735, "step": 10883 }, { - "epoch": 0.30842472158462975, + "epoch": 0.42585491822521326, "grad_norm": 0.0, - "learning_rate": 1.6202554890743754e-05, - "loss": 0.9409, + "learning_rate": 1.284743277946797e-05, + "loss": 1.0451, "step": 10884 }, { - "epoch": 0.30845305902689224, + "epoch": 0.4258940449174427, "grad_norm": 0.0, - "learning_rate": 1.620183495085777e-05, - "loss": 0.8416, + "learning_rate": 1.2846217984033786e-05, + "loss": 1.0697, "step": 10885 }, { - "epoch": 0.3084813964691547, + "epoch": 0.42593317160967215, "grad_norm": 0.0, - "learning_rate": 1.6201114958731427e-05, - "loss": 0.949, + "learning_rate": 1.284500314289291e-05, + "loss": 1.0024, "step": 10886 }, { - "epoch": 0.30850973391141717, + "epoch": 0.42597229830190153, "grad_norm": 0.0, - "learning_rate": 1.620039491437079e-05, - "loss": 1.071, + "learning_rate": 1.2843788256064844e-05, + "loss": 0.9757, "step": 10887 }, { - "epoch": 0.3085380713536796, + "epoch": 0.42601142499413097, "grad_norm": 0.0, - "learning_rate": 1.6199674817781924e-05, - "loss": 0.9776, + "learning_rate": 1.2842573323569107e-05, + "loss": 1.1214, "step": 10888 }, { - "epoch": 0.3085664087959421, + "epoch": 0.4260505516863604, "grad_norm": 0.0, - "learning_rate": 1.6198954668970893e-05, - "loss": 0.9278, + "learning_rate": 1.2841358345425202e-05, + "loss": 1.0215, "step": 10889 }, { - "epoch": 0.30859474623820454, + "epoch": 0.42608967837858985, "grad_norm": 0.0, - "learning_rate": 1.6198234467943765e-05, - "loss": 1.0067, + "learning_rate": 1.2840143321652642e-05, + "loss": 1.048, "step": 10890 }, { - "epoch": 0.308623083680467, + "epoch": 0.4261288050708193, "grad_norm": 0.0, - "learning_rate": 1.619751421470661e-05, - "loss": 0.9885, + "learning_rate": 1.2838928252270937e-05, + "loss": 1.1561, "step": 10891 }, { - "epoch": 0.3086514211227295, + "epoch": 0.42616793176304874, "grad_norm": 0.0, - "learning_rate": 1.619679390926549e-05, - "loss": 0.8412, + "learning_rate": 1.2837713137299605e-05, + "loss": 1.1036, "step": 10892 }, { - "epoch": 0.3086797585649919, + "epoch": 0.4262070584552782, "grad_norm": 0.0, - "learning_rate": 1.619607355162647e-05, - "loss": 1.036, + "learning_rate": 1.2836497976758156e-05, + "loss": 1.1544, "step": 10893 }, { - "epoch": 0.3087080960072544, + "epoch": 0.4262461851475076, "grad_norm": 0.0, - "learning_rate": 1.619535314179563e-05, - "loss": 0.8943, + "learning_rate": 1.2835282770666101e-05, + "loss": 1.04, "step": 10894 }, { - "epoch": 0.30873643344951684, + "epoch": 0.42628531183973706, "grad_norm": 0.0, - "learning_rate": 1.619463267977902e-05, - "loss": 0.9476, + "learning_rate": 1.2834067519042962e-05, + "loss": 1.0612, "step": 10895 }, { - "epoch": 0.3087647708917793, + "epoch": 0.4263244385319665, "grad_norm": 0.0, - "learning_rate": 1.6193912165582727e-05, - "loss": 1.0009, + "learning_rate": 1.2832852221908247e-05, + "loss": 1.0952, "step": 10896 }, { - "epoch": 0.3087931083340418, + "epoch": 0.42636356522419594, "grad_norm": 0.0, - "learning_rate": 1.6193191599212806e-05, - "loss": 0.9068, + "learning_rate": 1.2831636879281475e-05, + "loss": 0.9871, "step": 10897 }, { - "epoch": 0.3088214457763042, + "epoch": 0.4264026919164254, "grad_norm": 0.0, - "learning_rate": 1.6192470980675335e-05, - "loss": 0.9469, + "learning_rate": 1.2830421491182164e-05, + "loss": 1.075, "step": 10898 }, { - "epoch": 0.3088497832185667, + "epoch": 0.4264418186086548, "grad_norm": 0.0, - "learning_rate": 1.619175030997638e-05, - "loss": 1.0551, + "learning_rate": 1.282920605762983e-05, + "loss": 1.0811, "step": 10899 }, { - "epoch": 0.30887812066082915, + "epoch": 0.42648094530088426, "grad_norm": 0.0, - "learning_rate": 1.6191029587122013e-05, - "loss": 0.9569, + "learning_rate": 1.2827990578643994e-05, + "loss": 0.9326, "step": 10900 }, { - "epoch": 0.30890645810309164, + "epoch": 0.4265200719931137, "grad_norm": 0.0, - "learning_rate": 1.6190308812118305e-05, - "loss": 0.952, + "learning_rate": 1.2826775054244167e-05, + "loss": 1.1078, "step": 10901 }, { - "epoch": 0.3089347955453541, + "epoch": 0.42655919868534314, "grad_norm": 0.0, - "learning_rate": 1.6189587984971327e-05, - "loss": 0.962, + "learning_rate": 1.2825559484449882e-05, + "loss": 1.1224, "step": 10902 }, { - "epoch": 0.3089631329876165, + "epoch": 0.4265983253775726, "grad_norm": 0.0, - "learning_rate": 1.618886710568715e-05, - "loss": 0.9021, + "learning_rate": 1.2824343869280648e-05, + "loss": 1.103, "step": 10903 }, { - "epoch": 0.308991470429879, + "epoch": 0.426637452069802, "grad_norm": 0.0, - "learning_rate": 1.618814617427185e-05, - "loss": 0.9093, + "learning_rate": 1.282312820875599e-05, + "loss": 0.9832, "step": 10904 }, { - "epoch": 0.30901980787214145, + "epoch": 0.42667657876203147, "grad_norm": 0.0, - "learning_rate": 1.6187425190731496e-05, - "loss": 1.0415, + "learning_rate": 1.2821912502895436e-05, + "loss": 1.1079, "step": 10905 }, { - "epoch": 0.30904814531440394, + "epoch": 0.4267157054542609, "grad_norm": 0.0, - "learning_rate": 1.6186704155072162e-05, - "loss": 0.9529, + "learning_rate": 1.28206967517185e-05, + "loss": 1.0634, "step": 10906 }, { - "epoch": 0.3090764827566664, + "epoch": 0.42675483214649035, "grad_norm": 0.0, - "learning_rate": 1.618598306729992e-05, - "loss": 0.8805, + "learning_rate": 1.2819480955244705e-05, + "loss": 0.9874, "step": 10907 }, { - "epoch": 0.3091048201989288, + "epoch": 0.4267939588387198, "grad_norm": 0.0, - "learning_rate": 1.6185261927420845e-05, - "loss": 1.0092, + "learning_rate": 1.2818265113493582e-05, + "loss": 1.1507, "step": 10908 }, { - "epoch": 0.3091331576411913, + "epoch": 0.42683308553094923, "grad_norm": 0.0, - "learning_rate": 1.6184540735441015e-05, - "loss": 0.95, + "learning_rate": 1.2817049226484652e-05, + "loss": 1.048, "step": 10909 }, { - "epoch": 0.30916149508345375, + "epoch": 0.42687221222317867, "grad_norm": 0.0, - "learning_rate": 1.61838194913665e-05, - "loss": 0.9459, + "learning_rate": 1.2815833294237444e-05, + "loss": 1.0288, "step": 10910 }, { - "epoch": 0.30918983252571625, + "epoch": 0.4269113389154081, "grad_norm": 0.0, - "learning_rate": 1.6183098195203376e-05, - "loss": 0.9635, + "learning_rate": 1.2814617316771478e-05, + "loss": 1.0243, "step": 10911 }, { - "epoch": 0.3092181699679787, + "epoch": 0.42695046560763755, "grad_norm": 0.0, - "learning_rate": 1.6182376846957724e-05, - "loss": 0.9201, + "learning_rate": 1.2813401294106286e-05, + "loss": 1.0409, "step": 10912 }, { - "epoch": 0.3092465074102412, + "epoch": 0.426989592299867, "grad_norm": 0.0, - "learning_rate": 1.6181655446635613e-05, - "loss": 0.9852, + "learning_rate": 1.2812185226261396e-05, + "loss": 1.175, "step": 10913 }, { - "epoch": 0.3092748448525036, + "epoch": 0.42702871899209643, "grad_norm": 0.0, - "learning_rate": 1.6180933994243123e-05, - "loss": 0.9204, + "learning_rate": 1.2810969113256335e-05, + "loss": 1.1893, "step": 10914 }, { - "epoch": 0.30930318229476605, + "epoch": 0.4270678456843258, "grad_norm": 0.0, - "learning_rate": 1.618021248978633e-05, - "loss": 0.9068, + "learning_rate": 1.2809752955110632e-05, + "loss": 1.1122, "step": 10915 }, { - "epoch": 0.30933151973702855, + "epoch": 0.42710697237655526, "grad_norm": 0.0, - "learning_rate": 1.6179490933271312e-05, - "loss": 0.992, + "learning_rate": 1.2808536751843816e-05, + "loss": 1.0891, "step": 10916 }, { - "epoch": 0.309359857179291, + "epoch": 0.4271460990687847, "grad_norm": 0.0, - "learning_rate": 1.6178769324704148e-05, - "loss": 0.9733, + "learning_rate": 1.2807320503475422e-05, + "loss": 1.0981, "step": 10917 }, { - "epoch": 0.3093881946215535, + "epoch": 0.42718522576101414, "grad_norm": 0.0, - "learning_rate": 1.6178047664090915e-05, - "loss": 0.8163, + "learning_rate": 1.2806104210024974e-05, + "loss": 1.1289, "step": 10918 }, { - "epoch": 0.3094165320638159, + "epoch": 0.4272243524532436, "grad_norm": 0.0, - "learning_rate": 1.6177325951437693e-05, - "loss": 0.9684, + "learning_rate": 1.2804887871512013e-05, + "loss": 1.1738, "step": 10919 }, { - "epoch": 0.30944486950607836, + "epoch": 0.427263479145473, "grad_norm": 0.0, - "learning_rate": 1.6176604186750563e-05, - "loss": 0.8562, + "learning_rate": 1.2803671487956063e-05, + "loss": 1.1876, "step": 10920 }, { - "epoch": 0.30947320694834085, + "epoch": 0.42730260583770246, "grad_norm": 0.0, - "learning_rate": 1.61758823700356e-05, - "loss": 0.9561, + "learning_rate": 1.280245505937667e-05, + "loss": 1.0464, "step": 10921 }, { - "epoch": 0.3095015443906033, + "epoch": 0.4273417325299319, "grad_norm": 0.0, - "learning_rate": 1.6175160501298887e-05, - "loss": 0.9097, + "learning_rate": 1.2801238585793352e-05, + "loss": 1.1959, "step": 10922 }, { - "epoch": 0.3095298818328658, + "epoch": 0.42738085922216135, "grad_norm": 0.0, - "learning_rate": 1.6174438580546502e-05, - "loss": 0.878, + "learning_rate": 1.2800022067225655e-05, + "loss": 1.114, "step": 10923 }, { - "epoch": 0.3095582192751282, + "epoch": 0.4274199859143908, "grad_norm": 0.0, - "learning_rate": 1.617371660778453e-05, - "loss": 1.055, + "learning_rate": 1.2798805503693115e-05, + "loss": 1.071, "step": 10924 }, { - "epoch": 0.3095865567173907, + "epoch": 0.42745911260662023, "grad_norm": 0.0, - "learning_rate": 1.617299458301905e-05, - "loss": 0.9506, + "learning_rate": 1.2797588895215263e-05, + "loss": 0.9398, "step": 10925 }, { - "epoch": 0.30961489415965315, + "epoch": 0.42749823929884967, "grad_norm": 0.0, - "learning_rate": 1.6172272506256144e-05, - "loss": 1.0059, + "learning_rate": 1.2796372241811642e-05, + "loss": 1.1363, "step": 10926 }, { - "epoch": 0.3096432316019156, + "epoch": 0.4275373659910791, "grad_norm": 0.0, - "learning_rate": 1.6171550377501893e-05, - "loss": 0.9296, + "learning_rate": 1.279515554350178e-05, + "loss": 1.1516, "step": 10927 }, { - "epoch": 0.3096715690441781, + "epoch": 0.42757649268330855, "grad_norm": 0.0, - "learning_rate": 1.6170828196762383e-05, - "loss": 0.9882, + "learning_rate": 1.279393880030523e-05, + "loss": 1.0545, "step": 10928 }, { - "epoch": 0.3096999064864405, + "epoch": 0.427615619375538, "grad_norm": 0.0, - "learning_rate": 1.6170105964043698e-05, - "loss": 0.9766, + "learning_rate": 1.2792722012241519e-05, + "loss": 1.112, "step": 10929 }, { - "epoch": 0.309728243928703, + "epoch": 0.42765474606776743, "grad_norm": 0.0, - "learning_rate": 1.6169383679351915e-05, - "loss": 0.9859, + "learning_rate": 1.2791505179330195e-05, + "loss": 1.1216, "step": 10930 }, { - "epoch": 0.30975658137096546, + "epoch": 0.4276938727599969, "grad_norm": 0.0, - "learning_rate": 1.6168661342693125e-05, - "loss": 1.0067, + "learning_rate": 1.2790288301590793e-05, + "loss": 1.1363, "step": 10931 }, { - "epoch": 0.3097849188132279, + "epoch": 0.4277329994522263, "grad_norm": 0.0, - "learning_rate": 1.6167938954073408e-05, - "loss": 0.9749, + "learning_rate": 1.278907137904286e-05, + "loss": 1.0426, "step": 10932 }, { - "epoch": 0.3098132562554904, + "epoch": 0.42777212614445576, "grad_norm": 0.0, - "learning_rate": 1.6167216513498854e-05, - "loss": 1.048, + "learning_rate": 1.2787854411705935e-05, + "loss": 1.2088, "step": 10933 }, { - "epoch": 0.3098415936977528, + "epoch": 0.4278112528366852, "grad_norm": 0.0, - "learning_rate": 1.6166494020975543e-05, - "loss": 1.0243, + "learning_rate": 1.278663739959956e-05, + "loss": 1.1211, "step": 10934 }, { - "epoch": 0.3098699311400153, + "epoch": 0.42785037952891464, "grad_norm": 0.0, - "learning_rate": 1.6165771476509563e-05, - "loss": 0.8622, + "learning_rate": 1.2785420342743282e-05, + "loss": 1.0108, "step": 10935 }, { - "epoch": 0.30989826858227776, + "epoch": 0.4278895062211441, "grad_norm": 0.0, - "learning_rate": 1.6165048880107002e-05, - "loss": 0.9066, + "learning_rate": 1.2784203241156642e-05, + "loss": 1.135, "step": 10936 }, { - "epoch": 0.30992660602454025, + "epoch": 0.4279286329133735, "grad_norm": 0.0, - "learning_rate": 1.6164326231773942e-05, - "loss": 1.2172, + "learning_rate": 1.2782986094859187e-05, + "loss": 1.1052, "step": 10937 }, { - "epoch": 0.3099549434668027, + "epoch": 0.42796775960560296, "grad_norm": 0.0, - "learning_rate": 1.6163603531516475e-05, - "loss": 0.9906, + "learning_rate": 1.2781768903870462e-05, + "loss": 1.1212, "step": 10938 }, { - "epoch": 0.30998328090906513, + "epoch": 0.4280068862978324, "grad_norm": 0.0, - "learning_rate": 1.6162880779340686e-05, - "loss": 0.933, + "learning_rate": 1.2780551668210016e-05, + "loss": 1.0382, "step": 10939 }, { - "epoch": 0.3100116183513276, + "epoch": 0.42804601299006184, "grad_norm": 0.0, - "learning_rate": 1.6162157975252663e-05, - "loss": 0.9637, + "learning_rate": 1.2779334387897393e-05, + "loss": 1.1395, "step": 10940 }, { - "epoch": 0.31003995579359006, + "epoch": 0.4280851396822913, "grad_norm": 0.0, - "learning_rate": 1.6161435119258495e-05, - "loss": 1.0118, + "learning_rate": 1.2778117062952143e-05, + "loss": 1.157, "step": 10941 }, { - "epoch": 0.31006829323585255, + "epoch": 0.4281242663745207, "grad_norm": 0.0, - "learning_rate": 1.6160712211364274e-05, - "loss": 0.8681, + "learning_rate": 1.2776899693393815e-05, + "loss": 1.049, "step": 10942 }, { - "epoch": 0.310096630678115, + "epoch": 0.42816339306675016, "grad_norm": 0.0, - "learning_rate": 1.615998925157608e-05, - "loss": 1.0239, + "learning_rate": 1.2775682279241956e-05, + "loss": 1.11, "step": 10943 }, { - "epoch": 0.31012496812037743, + "epoch": 0.42820251975897955, "grad_norm": 0.0, - "learning_rate": 1.6159266239900015e-05, - "loss": 0.9491, + "learning_rate": 1.277446482051612e-05, + "loss": 1.1176, "step": 10944 }, { - "epoch": 0.3101533055626399, + "epoch": 0.428241646451209, "grad_norm": 0.0, - "learning_rate": 1.6158543176342162e-05, - "loss": 1.0431, + "learning_rate": 1.2773247317235855e-05, + "loss": 0.9989, "step": 10945 }, { - "epoch": 0.31018164300490236, + "epoch": 0.42828077314343843, "grad_norm": 0.0, - "learning_rate": 1.6157820060908616e-05, - "loss": 0.9329, + "learning_rate": 1.277202976942071e-05, + "loss": 1.0042, "step": 10946 }, { - "epoch": 0.31020998044716486, + "epoch": 0.42831989983566787, "grad_norm": 0.0, - "learning_rate": 1.615709689360546e-05, - "loss": 0.9903, + "learning_rate": 1.2770812177090243e-05, + "loss": 1.1231, "step": 10947 }, { - "epoch": 0.3102383178894273, + "epoch": 0.4283590265278973, "grad_norm": 0.0, - "learning_rate": 1.615637367443879e-05, - "loss": 1.0716, + "learning_rate": 1.2769594540264008e-05, + "loss": 0.8767, "step": 10948 }, { - "epoch": 0.3102666553316898, + "epoch": 0.42839815322012675, "grad_norm": 0.0, - "learning_rate": 1.6155650403414703e-05, - "loss": 0.9943, + "learning_rate": 1.2768376858961549e-05, + "loss": 1.0217, "step": 10949 }, { - "epoch": 0.3102949927739522, + "epoch": 0.4284372799123562, "grad_norm": 0.0, - "learning_rate": 1.615492708053928e-05, - "loss": 1.0526, + "learning_rate": 1.276715913320243e-05, + "loss": 0.8254, "step": 10950 }, { - "epoch": 0.31032333021621467, + "epoch": 0.42847640660458564, "grad_norm": 0.0, - "learning_rate": 1.615420370581863e-05, - "loss": 0.924, + "learning_rate": 1.27659413630062e-05, + "loss": 1.1548, "step": 10951 }, { - "epoch": 0.31035166765847716, + "epoch": 0.4285155332968151, "grad_norm": 0.0, - "learning_rate": 1.615348027925883e-05, - "loss": 0.8665, + "learning_rate": 1.2764723548392421e-05, + "loss": 1.0157, "step": 10952 }, { - "epoch": 0.3103800051007396, + "epoch": 0.4285546599890445, "grad_norm": 0.0, - "learning_rate": 1.6152756800865984e-05, - "loss": 0.9082, + "learning_rate": 1.2763505689380647e-05, + "loss": 1.1308, "step": 10953 }, { - "epoch": 0.3104083425430021, + "epoch": 0.42859378668127396, "grad_norm": 0.0, - "learning_rate": 1.615203327064618e-05, - "loss": 0.9272, + "learning_rate": 1.2762287785990428e-05, + "loss": 0.9818, "step": 10954 }, { - "epoch": 0.31043667998526453, + "epoch": 0.4286329133735034, "grad_norm": 0.0, - "learning_rate": 1.6151309688605518e-05, - "loss": 0.9481, + "learning_rate": 1.2761069838241334e-05, + "loss": 0.9811, "step": 10955 }, { - "epoch": 0.31046501742752697, + "epoch": 0.42867204006573284, "grad_norm": 0.0, - "learning_rate": 1.6150586054750085e-05, - "loss": 1.0185, + "learning_rate": 1.2759851846152914e-05, + "loss": 1.0554, "step": 10956 }, { - "epoch": 0.31049335486978946, + "epoch": 0.4287111667579623, "grad_norm": 0.0, - "learning_rate": 1.6149862369085986e-05, - "loss": 0.9142, + "learning_rate": 1.2758633809744736e-05, + "loss": 1.0678, "step": 10957 }, { - "epoch": 0.3105216923120519, + "epoch": 0.4287502934501917, "grad_norm": 0.0, - "learning_rate": 1.614913863161931e-05, - "loss": 0.939, + "learning_rate": 1.2757415729036354e-05, + "loss": 1.0404, "step": 10958 }, { - "epoch": 0.3105500297543144, + "epoch": 0.42878942014242116, "grad_norm": 0.0, - "learning_rate": 1.614841484235616e-05, - "loss": 0.9969, + "learning_rate": 1.2756197604047333e-05, + "loss": 1.0457, "step": 10959 }, { - "epoch": 0.31057836719657683, + "epoch": 0.4288285468346506, "grad_norm": 0.0, - "learning_rate": 1.6147691001302628e-05, - "loss": 0.8671, + "learning_rate": 1.2754979434797228e-05, + "loss": 1.0552, "step": 10960 }, { - "epoch": 0.3106067046388393, + "epoch": 0.42886767352688004, "grad_norm": 0.0, - "learning_rate": 1.6146967108464808e-05, - "loss": 1.0468, + "learning_rate": 1.2753761221305609e-05, + "loss": 1.1204, "step": 10961 }, { - "epoch": 0.31063504208110176, + "epoch": 0.4289068002191095, "grad_norm": 0.0, - "learning_rate": 1.6146243163848802e-05, - "loss": 0.8605, + "learning_rate": 1.2752542963592033e-05, + "loss": 1.0777, "step": 10962 }, { - "epoch": 0.3106633795233642, + "epoch": 0.4289459269113389, "grad_norm": 0.0, - "learning_rate": 1.614551916746071e-05, - "loss": 1.0749, + "learning_rate": 1.2751324661676068e-05, + "loss": 1.234, "step": 10963 }, { - "epoch": 0.3106917169656267, + "epoch": 0.42898505360356837, "grad_norm": 0.0, - "learning_rate": 1.6144795119306628e-05, - "loss": 1.0392, + "learning_rate": 1.2750106315577276e-05, + "loss": 1.0586, "step": 10964 }, { - "epoch": 0.31072005440788913, + "epoch": 0.4290241802957978, "grad_norm": 0.0, - "learning_rate": 1.6144071019392654e-05, - "loss": 0.9173, + "learning_rate": 1.2748887925315221e-05, + "loss": 0.9859, "step": 10965 }, { - "epoch": 0.31074839185015163, + "epoch": 0.42906330698802725, "grad_norm": 0.0, - "learning_rate": 1.6143346867724886e-05, - "loss": 0.9106, + "learning_rate": 1.2747669490909473e-05, + "loss": 1.1307, "step": 10966 }, { - "epoch": 0.31077672929241407, + "epoch": 0.4291024336802567, "grad_norm": 0.0, - "learning_rate": 1.6142622664309428e-05, - "loss": 1.0377, + "learning_rate": 1.2746451012379593e-05, + "loss": 1.1552, "step": 10967 }, { - "epoch": 0.3108050667346765, + "epoch": 0.42914156037248613, "grad_norm": 0.0, - "learning_rate": 1.614189840915238e-05, - "loss": 0.9091, + "learning_rate": 1.2745232489745153e-05, + "loss": 1.0354, "step": 10968 }, { - "epoch": 0.310833404176939, + "epoch": 0.42918068706471557, "grad_norm": 0.0, - "learning_rate": 1.6141174102259838e-05, - "loss": 1.0069, + "learning_rate": 1.2744013923025717e-05, + "loss": 1.0672, "step": 10969 }, { - "epoch": 0.31086174161920144, + "epoch": 0.429219813756945, "grad_norm": 0.0, - "learning_rate": 1.614044974363791e-05, - "loss": 0.8983, + "learning_rate": 1.2742795312240862e-05, + "loss": 1.0275, "step": 10970 }, { - "epoch": 0.31089007906146393, + "epoch": 0.42925894044917445, "grad_norm": 0.0, - "learning_rate": 1.613972533329269e-05, - "loss": 1.0199, + "learning_rate": 1.2741576657410144e-05, + "loss": 1.0955, "step": 10971 }, { - "epoch": 0.31091841650372637, + "epoch": 0.42929806714140384, "grad_norm": 0.0, - "learning_rate": 1.6139000871230286e-05, - "loss": 0.9725, + "learning_rate": 1.2740357958553144e-05, + "loss": 1.0874, "step": 10972 }, { - "epoch": 0.3109467539459888, + "epoch": 0.4293371938336333, "grad_norm": 0.0, - "learning_rate": 1.6138276357456796e-05, - "loss": 0.9298, + "learning_rate": 1.2739139215689428e-05, + "loss": 1.1138, "step": 10973 }, { - "epoch": 0.3109750913882513, + "epoch": 0.4293763205258627, "grad_norm": 0.0, - "learning_rate": 1.6137551791978325e-05, - "loss": 1.0292, + "learning_rate": 1.273792042883857e-05, + "loss": 1.0673, "step": 10974 }, { - "epoch": 0.31100342883051374, + "epoch": 0.42941544721809216, "grad_norm": 0.0, - "learning_rate": 1.6136827174800978e-05, - "loss": 0.9598, + "learning_rate": 1.273670159802014e-05, + "loss": 1.0771, "step": 10975 }, { - "epoch": 0.31103176627277623, + "epoch": 0.4294545739103216, "grad_norm": 0.0, - "learning_rate": 1.6136102505930857e-05, - "loss": 0.9657, + "learning_rate": 1.2735482723253711e-05, + "loss": 1.1649, "step": 10976 }, { - "epoch": 0.31106010371503867, + "epoch": 0.42949370060255104, "grad_norm": 0.0, - "learning_rate": 1.6135377785374063e-05, - "loss": 0.8861, + "learning_rate": 1.2734263804558858e-05, + "loss": 1.0911, "step": 10977 }, { - "epoch": 0.31108844115730117, + "epoch": 0.4295328272947805, "grad_norm": 0.0, - "learning_rate": 1.6134653013136703e-05, - "loss": 0.9872, + "learning_rate": 1.2733044841955153e-05, + "loss": 1.0431, "step": 10978 }, { - "epoch": 0.3111167785995636, + "epoch": 0.4295719539870099, "grad_norm": 0.0, - "learning_rate": 1.613392818922489e-05, - "loss": 1.0329, + "learning_rate": 1.2731825835462175e-05, + "loss": 1.0438, "step": 10979 }, { - "epoch": 0.31114511604182604, + "epoch": 0.42961108067923937, "grad_norm": 0.0, - "learning_rate": 1.6133203313644714e-05, - "loss": 0.9702, + "learning_rate": 1.2730606785099493e-05, + "loss": 1.0273, "step": 10980 }, { - "epoch": 0.31117345348408854, + "epoch": 0.4296502073714688, "grad_norm": 0.0, - "learning_rate": 1.6132478386402287e-05, - "loss": 0.9061, + "learning_rate": 1.2729387690886692e-05, + "loss": 1.0583, "step": 10981 }, { - "epoch": 0.311201790926351, + "epoch": 0.42968933406369825, "grad_norm": 0.0, - "learning_rate": 1.6131753407503724e-05, - "loss": 1.1038, + "learning_rate": 1.2728168552843345e-05, + "loss": 1.1367, "step": 10982 }, { - "epoch": 0.31123012836861347, + "epoch": 0.4297284607559277, "grad_norm": 0.0, - "learning_rate": 1.6131028376955125e-05, - "loss": 0.9917, + "learning_rate": 1.2726949370989026e-05, + "loss": 1.0068, "step": 10983 }, { - "epoch": 0.3112584658108759, + "epoch": 0.42976758744815713, "grad_norm": 0.0, - "learning_rate": 1.6130303294762595e-05, - "loss": 0.9379, + "learning_rate": 1.272573014534332e-05, + "loss": 0.8564, "step": 10984 }, { - "epoch": 0.31128680325313834, + "epoch": 0.42980671414038657, "grad_norm": 0.0, - "learning_rate": 1.6129578160932244e-05, - "loss": 0.9821, + "learning_rate": 1.2724510875925802e-05, + "loss": 0.886, "step": 10985 }, { - "epoch": 0.31131514069540084, + "epoch": 0.429845840832616, "grad_norm": 0.0, - "learning_rate": 1.6128852975470182e-05, - "loss": 0.9241, + "learning_rate": 1.2723291562756055e-05, + "loss": 1.1152, "step": 10986 }, { - "epoch": 0.3113434781376633, + "epoch": 0.42988496752484545, "grad_norm": 0.0, - "learning_rate": 1.6128127738382513e-05, - "loss": 0.9682, + "learning_rate": 1.2722072205853658e-05, + "loss": 0.9373, "step": 10987 }, { - "epoch": 0.31137181557992577, + "epoch": 0.4299240942170749, "grad_norm": 0.0, - "learning_rate": 1.6127402449675348e-05, - "loss": 0.9625, + "learning_rate": 1.2720852805238191e-05, + "loss": 1.1065, "step": 10988 }, { - "epoch": 0.3114001530221882, + "epoch": 0.42996322090930433, "grad_norm": 0.0, - "learning_rate": 1.61266771093548e-05, - "loss": 0.8186, + "learning_rate": 1.2719633360929238e-05, + "loss": 1.065, "step": 10989 }, { - "epoch": 0.3114284904644507, + "epoch": 0.4300023476015338, "grad_norm": 0.0, - "learning_rate": 1.6125951717426973e-05, - "loss": 1.0032, + "learning_rate": 1.2718413872946381e-05, + "loss": 1.1122, "step": 10990 }, { - "epoch": 0.31145682790671314, + "epoch": 0.4300414742937632, "grad_norm": 0.0, - "learning_rate": 1.612522627389798e-05, - "loss": 0.9676, + "learning_rate": 1.2717194341309203e-05, + "loss": 1.0864, "step": 10991 }, { - "epoch": 0.3114851653489756, + "epoch": 0.43008060098599266, "grad_norm": 0.0, - "learning_rate": 1.6124500778773934e-05, - "loss": 0.9611, + "learning_rate": 1.2715974766037289e-05, + "loss": 0.9189, "step": 10992 }, { - "epoch": 0.3115135027912381, + "epoch": 0.4301197276782221, "grad_norm": 0.0, - "learning_rate": 1.612377523206094e-05, - "loss": 0.9069, + "learning_rate": 1.2714755147150222e-05, + "loss": 1.104, "step": 10993 }, { - "epoch": 0.3115418402335005, + "epoch": 0.43015885437045154, "grad_norm": 0.0, - "learning_rate": 1.6123049633765117e-05, - "loss": 0.9139, + "learning_rate": 1.2713535484667592e-05, + "loss": 1.2152, "step": 10994 }, { - "epoch": 0.311570177675763, + "epoch": 0.430197981062681, "grad_norm": 0.0, - "learning_rate": 1.612232398389257e-05, - "loss": 0.9494, + "learning_rate": 1.2712315778608982e-05, + "loss": 1.1678, "step": 10995 }, { - "epoch": 0.31159851511802544, + "epoch": 0.4302371077549104, "grad_norm": 0.0, - "learning_rate": 1.6121598282449414e-05, - "loss": 1.039, + "learning_rate": 1.2711096028993977e-05, + "loss": 1.1193, "step": 10996 }, { - "epoch": 0.3116268525602879, + "epoch": 0.43027623444713986, "grad_norm": 0.0, - "learning_rate": 1.6120872529441766e-05, - "loss": 1.0298, + "learning_rate": 1.2709876235842166e-05, + "loss": 1.1394, "step": 10997 }, { - "epoch": 0.3116551900025504, + "epoch": 0.4303153611393693, "grad_norm": 0.0, - "learning_rate": 1.6120146724875734e-05, - "loss": 0.9703, + "learning_rate": 1.270865639917314e-05, + "loss": 0.9002, "step": 10998 }, { - "epoch": 0.3116835274448128, + "epoch": 0.43035448783159874, "grad_norm": 0.0, - "learning_rate": 1.6119420868757433e-05, - "loss": 1.0565, + "learning_rate": 1.2707436519006489e-05, + "loss": 1.0664, "step": 10999 }, { - "epoch": 0.3117118648870753, + "epoch": 0.4303936145238282, "grad_norm": 0.0, - "learning_rate": 1.6118694961092976e-05, - "loss": 0.9134, + "learning_rate": 1.2706216595361797e-05, + "loss": 1.1779, "step": 11000 }, { - "epoch": 0.31174020232933775, + "epoch": 0.43043274121605757, "grad_norm": 0.0, - "learning_rate": 1.6117969001888486e-05, - "loss": 1.0758, + "learning_rate": 1.2704996628258659e-05, + "loss": 1.0959, "step": 11001 }, { - "epoch": 0.31176853977160024, + "epoch": 0.430471867908287, "grad_norm": 0.0, - "learning_rate": 1.6117242991150064e-05, - "loss": 0.9, + "learning_rate": 1.2703776617716666e-05, + "loss": 1.0409, "step": 11002 }, { - "epoch": 0.3117968772138627, + "epoch": 0.43051099460051645, "grad_norm": 0.0, - "learning_rate": 1.6116516928883836e-05, - "loss": 1.0276, + "learning_rate": 1.2702556563755406e-05, + "loss": 1.163, "step": 11003 }, { - "epoch": 0.3118252146561251, + "epoch": 0.4305501212927459, "grad_norm": 0.0, - "learning_rate": 1.6115790815095914e-05, - "loss": 1.0281, + "learning_rate": 1.2701336466394476e-05, + "loss": 1.114, "step": 11004 }, { - "epoch": 0.3118535520983876, + "epoch": 0.43058924798497533, "grad_norm": 0.0, - "learning_rate": 1.6115064649792417e-05, - "loss": 0.9449, + "learning_rate": 1.2700116325653464e-05, + "loss": 1.0078, "step": 11005 }, { - "epoch": 0.31188188954065005, + "epoch": 0.4306283746772048, "grad_norm": 0.0, - "learning_rate": 1.6114338432979454e-05, - "loss": 1.0102, + "learning_rate": 1.269889614155197e-05, + "loss": 1.0694, "step": 11006 }, { - "epoch": 0.31191022698291254, + "epoch": 0.4306675013694342, "grad_norm": 0.0, - "learning_rate": 1.611361216466315e-05, - "loss": 1.0991, + "learning_rate": 1.2697675914109583e-05, + "loss": 1.1631, "step": 11007 }, { - "epoch": 0.311938564425175, + "epoch": 0.43070662806166365, "grad_norm": 0.0, - "learning_rate": 1.6112885844849623e-05, - "loss": 0.9757, + "learning_rate": 1.2696455643345906e-05, + "loss": 1.1294, "step": 11008 }, { - "epoch": 0.3119669018674374, + "epoch": 0.4307457547538931, "grad_norm": 0.0, - "learning_rate": 1.611215947354499e-05, - "loss": 0.989, + "learning_rate": 1.2695235329280527e-05, + "loss": 1.0935, "step": 11009 }, { - "epoch": 0.3119952393096999, + "epoch": 0.43078488144612254, "grad_norm": 0.0, - "learning_rate": 1.6111433050755363e-05, - "loss": 0.9679, + "learning_rate": 1.2694014971933047e-05, + "loss": 1.1945, "step": 11010 }, { - "epoch": 0.31202357675196235, + "epoch": 0.430824008138352, "grad_norm": 0.0, - "learning_rate": 1.611070657648687e-05, - "loss": 1.024, + "learning_rate": 1.2692794571323064e-05, + "loss": 1.0269, "step": 11011 }, { - "epoch": 0.31205191419422484, + "epoch": 0.4308631348305814, "grad_norm": 0.0, - "learning_rate": 1.6109980050745627e-05, - "loss": 0.9526, + "learning_rate": 1.269157412747017e-05, + "loss": 1.0667, "step": 11012 }, { - "epoch": 0.3120802516364873, + "epoch": 0.43090226152281086, "grad_norm": 0.0, - "learning_rate": 1.610925347353775e-05, - "loss": 0.8864, + "learning_rate": 1.2690353640393974e-05, + "loss": 1.1419, "step": 11013 }, { - "epoch": 0.3121085890787498, + "epoch": 0.4309413882150403, "grad_norm": 0.0, - "learning_rate": 1.6108526844869365e-05, - "loss": 0.9199, + "learning_rate": 1.2689133110114065e-05, + "loss": 0.9483, "step": 11014 }, { - "epoch": 0.3121369265210122, + "epoch": 0.43098051490726974, "grad_norm": 0.0, - "learning_rate": 1.610780016474659e-05, - "loss": 0.8926, + "learning_rate": 1.2687912536650048e-05, + "loss": 1.1192, "step": 11015 }, { - "epoch": 0.31216526396327465, + "epoch": 0.4310196415994992, "grad_norm": 0.0, - "learning_rate": 1.6107073433175542e-05, - "loss": 0.977, + "learning_rate": 1.2686691920021526e-05, + "loss": 1.0008, "step": 11016 }, { - "epoch": 0.31219360140553715, + "epoch": 0.4310587682917286, "grad_norm": 0.0, - "learning_rate": 1.610634665016235e-05, - "loss": 0.8396, + "learning_rate": 1.2685471260248096e-05, + "loss": 0.9835, "step": 11017 }, { - "epoch": 0.3122219388477996, + "epoch": 0.43109789498395806, "grad_norm": 0.0, - "learning_rate": 1.610561981571313e-05, - "loss": 0.9159, + "learning_rate": 1.2684250557349365e-05, + "loss": 1.1716, "step": 11018 }, { - "epoch": 0.3122502762900621, + "epoch": 0.4311370216761875, "grad_norm": 0.0, - "learning_rate": 1.610489292983401e-05, - "loss": 1.0125, + "learning_rate": 1.2683029811344933e-05, + "loss": 0.9968, "step": 11019 }, { - "epoch": 0.3122786137323245, + "epoch": 0.43117614836841694, "grad_norm": 0.0, - "learning_rate": 1.6104165992531105e-05, - "loss": 1.0168, + "learning_rate": 1.2681809022254404e-05, + "loss": 1.0528, "step": 11020 }, { - "epoch": 0.31230695117458696, + "epoch": 0.4312152750606464, "grad_norm": 0.0, - "learning_rate": 1.610343900381055e-05, - "loss": 0.992, + "learning_rate": 1.2680588190097382e-05, + "loss": 1.0245, "step": 11021 }, { - "epoch": 0.31233528861684945, + "epoch": 0.4312544017528758, "grad_norm": 0.0, - "learning_rate": 1.610271196367846e-05, - "loss": 0.9552, + "learning_rate": 1.2679367314893476e-05, + "loss": 1.0012, "step": 11022 }, { - "epoch": 0.3123636260591119, + "epoch": 0.43129352844510527, "grad_norm": 0.0, - "learning_rate": 1.6101984872140958e-05, - "loss": 0.9365, + "learning_rate": 1.2678146396662282e-05, + "loss": 1.0949, "step": 11023 }, { - "epoch": 0.3123919635013744, + "epoch": 0.4313326551373347, "grad_norm": 0.0, - "learning_rate": 1.6101257729204173e-05, - "loss": 1.1042, + "learning_rate": 1.267692543542342e-05, + "loss": 1.165, "step": 11024 }, { - "epoch": 0.3124203009436368, + "epoch": 0.43137178182956415, "grad_norm": 0.0, - "learning_rate": 1.6100530534874226e-05, - "loss": 0.8976, + "learning_rate": 1.2675704431196483e-05, + "loss": 1.1055, "step": 11025 }, { - "epoch": 0.3124486383858993, + "epoch": 0.4314109085217936, "grad_norm": 0.0, - "learning_rate": 1.609980328915725e-05, - "loss": 0.9414, + "learning_rate": 1.2674483384001091e-05, + "loss": 1.2231, "step": 11026 }, { - "epoch": 0.31247697582816175, + "epoch": 0.43145003521402303, "grad_norm": 0.0, - "learning_rate": 1.609907599205936e-05, - "loss": 1.0251, + "learning_rate": 1.2673262293856845e-05, + "loss": 1.1746, "step": 11027 }, { - "epoch": 0.3125053132704242, + "epoch": 0.43148916190625247, "grad_norm": 0.0, - "learning_rate": 1.6098348643586695e-05, - "loss": 0.8849, + "learning_rate": 1.2672041160783354e-05, + "loss": 1.0871, "step": 11028 }, { - "epoch": 0.3125336507126867, + "epoch": 0.43152828859848186, "grad_norm": 0.0, - "learning_rate": 1.6097621243745373e-05, - "loss": 1.0265, + "learning_rate": 1.2670819984800233e-05, + "loss": 1.0954, "step": 11029 }, { - "epoch": 0.3125619881549491, + "epoch": 0.4315674152907113, "grad_norm": 0.0, - "learning_rate": 1.609689379254152e-05, - "loss": 1.021, + "learning_rate": 1.266959876592709e-05, + "loss": 1.1228, "step": 11030 }, { - "epoch": 0.3125903255972116, + "epoch": 0.43160654198294074, "grad_norm": 0.0, - "learning_rate": 1.609616628998127e-05, - "loss": 0.9147, + "learning_rate": 1.2668377504183537e-05, + "loss": 1.07, "step": 11031 }, { - "epoch": 0.31261866303947405, + "epoch": 0.4316456686751702, "grad_norm": 0.0, - "learning_rate": 1.6095438736070742e-05, - "loss": 0.9983, + "learning_rate": 1.2667156199589183e-05, + "loss": 1.0711, "step": 11032 }, { - "epoch": 0.3126470004817365, + "epoch": 0.4316847953673996, "grad_norm": 0.0, - "learning_rate": 1.6094711130816074e-05, - "loss": 0.8965, + "learning_rate": 1.2665934852163644e-05, + "loss": 1.0912, "step": 11033 }, { - "epoch": 0.312675337923999, + "epoch": 0.43172392205962906, "grad_norm": 0.0, - "learning_rate": 1.6093983474223392e-05, - "loss": 0.985, + "learning_rate": 1.2664713461926533e-05, + "loss": 1.0912, "step": 11034 }, { - "epoch": 0.3127036753662614, + "epoch": 0.4317630487518585, "grad_norm": 0.0, - "learning_rate": 1.609325576629882e-05, - "loss": 0.9488, + "learning_rate": 1.2663492028897459e-05, + "loss": 1.0704, "step": 11035 }, { - "epoch": 0.3127320128085239, + "epoch": 0.43180217544408794, "grad_norm": 0.0, - "learning_rate": 1.6092528007048495e-05, - "loss": 0.9481, + "learning_rate": 1.2662270553096043e-05, + "loss": 1.1631, "step": 11036 }, { - "epoch": 0.31276035025078636, + "epoch": 0.4318413021363174, "grad_norm": 0.0, - "learning_rate": 1.6091800196478546e-05, - "loss": 0.89, + "learning_rate": 1.2661049034541897e-05, + "loss": 1.1113, "step": 11037 }, { - "epoch": 0.31278868769304885, + "epoch": 0.4318804288285468, "grad_norm": 0.0, - "learning_rate": 1.60910723345951e-05, - "loss": 0.9882, + "learning_rate": 1.2659827473254637e-05, + "loss": 1.0601, "step": 11038 }, { - "epoch": 0.3128170251353113, + "epoch": 0.43191955552077627, "grad_norm": 0.0, - "learning_rate": 1.6090344421404286e-05, - "loss": 0.9561, + "learning_rate": 1.2658605869253884e-05, + "loss": 1.1124, "step": 11039 }, { - "epoch": 0.3128453625775737, + "epoch": 0.4319586822130057, "grad_norm": 0.0, - "learning_rate": 1.6089616456912245e-05, - "loss": 0.8335, + "learning_rate": 1.265738422255925e-05, + "loss": 0.9903, "step": 11040 }, { - "epoch": 0.3128737000198362, + "epoch": 0.43199780890523515, "grad_norm": 0.0, - "learning_rate": 1.60888884411251e-05, - "loss": 0.8386, + "learning_rate": 1.2656162533190354e-05, + "loss": 1.1768, "step": 11041 }, { - "epoch": 0.31290203746209866, + "epoch": 0.4320369355974646, "grad_norm": 0.0, - "learning_rate": 1.6088160374048987e-05, - "loss": 0.8978, + "learning_rate": 1.2654940801166819e-05, + "loss": 1.0791, "step": 11042 }, { - "epoch": 0.31293037490436115, + "epoch": 0.43207606228969403, "grad_norm": 0.0, - "learning_rate": 1.608743225569004e-05, - "loss": 0.9733, + "learning_rate": 1.2653719026508258e-05, + "loss": 1.0428, "step": 11043 }, { - "epoch": 0.3129587123466236, + "epoch": 0.43211518898192347, "grad_norm": 0.0, - "learning_rate": 1.608670408605439e-05, - "loss": 1.0139, + "learning_rate": 1.2652497209234299e-05, + "loss": 1.1625, "step": 11044 }, { - "epoch": 0.31298704978888603, + "epoch": 0.4321543156741529, "grad_norm": 0.0, - "learning_rate": 1.608597586514817e-05, - "loss": 1.0588, + "learning_rate": 1.2651275349364553e-05, + "loss": 0.9513, "step": 11045 }, { - "epoch": 0.3130153872311485, + "epoch": 0.43219344236638235, "grad_norm": 0.0, - "learning_rate": 1.608524759297752e-05, - "loss": 0.9202, + "learning_rate": 1.2650053446918653e-05, + "loss": 1.1328, "step": 11046 }, { - "epoch": 0.31304372467341096, + "epoch": 0.4322325690586118, "grad_norm": 0.0, - "learning_rate": 1.6084519269548564e-05, - "loss": 0.8372, + "learning_rate": 1.2648831501916212e-05, + "loss": 0.8956, "step": 11047 }, { - "epoch": 0.31307206211567346, + "epoch": 0.43227169575084123, "grad_norm": 0.0, - "learning_rate": 1.6083790894867445e-05, - "loss": 0.984, + "learning_rate": 1.2647609514376858e-05, + "loss": 1.0944, "step": 11048 }, { - "epoch": 0.3131003995579359, + "epoch": 0.4323108224430707, "grad_norm": 0.0, - "learning_rate": 1.6083062468940297e-05, - "loss": 0.9998, + "learning_rate": 1.2646387484320211e-05, + "loss": 1.1656, "step": 11049 }, { - "epoch": 0.3131287370001984, + "epoch": 0.4323499491353001, "grad_norm": 0.0, - "learning_rate": 1.6082333991773253e-05, - "loss": 0.9621, + "learning_rate": 1.2645165411765899e-05, + "loss": 1.1763, "step": 11050 }, { - "epoch": 0.3131570744424608, + "epoch": 0.43238907582752956, "grad_norm": 0.0, - "learning_rate": 1.6081605463372453e-05, - "loss": 0.9498, + "learning_rate": 1.2643943296733545e-05, + "loss": 1.1261, "step": 11051 }, { - "epoch": 0.31318541188472326, + "epoch": 0.432428202519759, "grad_norm": 0.0, - "learning_rate": 1.6080876883744028e-05, - "loss": 0.9231, + "learning_rate": 1.2642721139242773e-05, + "loss": 1.1068, "step": 11052 }, { - "epoch": 0.31321374932698576, + "epoch": 0.43246732921198844, "grad_norm": 0.0, - "learning_rate": 1.6080148252894124e-05, - "loss": 0.935, + "learning_rate": 1.2641498939313213e-05, + "loss": 1.0533, "step": 11053 }, { - "epoch": 0.3132420867692482, + "epoch": 0.4325064559042179, "grad_norm": 0.0, - "learning_rate": 1.6079419570828872e-05, - "loss": 0.9973, + "learning_rate": 1.2640276696964487e-05, + "loss": 1.1345, "step": 11054 }, { - "epoch": 0.3132704242115107, + "epoch": 0.4325455825964473, "grad_norm": 0.0, - "learning_rate": 1.607869083755441e-05, - "loss": 0.9032, + "learning_rate": 1.2639054412216227e-05, + "loss": 1.1002, "step": 11055 }, { - "epoch": 0.31329876165377313, + "epoch": 0.43258470928867676, "grad_norm": 0.0, - "learning_rate": 1.6077962053076877e-05, - "loss": 0.9346, + "learning_rate": 1.263783208508806e-05, + "loss": 1.2067, "step": 11056 }, { - "epoch": 0.31332709909603557, + "epoch": 0.4326238359809062, "grad_norm": 0.0, - "learning_rate": 1.6077233217402413e-05, - "loss": 1.0061, + "learning_rate": 1.2636609715599617e-05, + "loss": 1.2342, "step": 11057 }, { - "epoch": 0.31335543653829806, + "epoch": 0.4326629626731356, "grad_norm": 0.0, - "learning_rate": 1.607650433053716e-05, - "loss": 1.0763, + "learning_rate": 1.2635387303770523e-05, + "loss": 1.042, "step": 11058 }, { - "epoch": 0.3133837739805605, + "epoch": 0.43270208936536503, "grad_norm": 0.0, - "learning_rate": 1.6075775392487252e-05, - "loss": 1.0427, + "learning_rate": 1.2634164849620414e-05, + "loss": 1.029, "step": 11059 }, { - "epoch": 0.313412111422823, + "epoch": 0.43274121605759447, "grad_norm": 0.0, - "learning_rate": 1.607504640325883e-05, - "loss": 0.9493, + "learning_rate": 1.2632942353168917e-05, + "loss": 1.0508, "step": 11060 }, { - "epoch": 0.31344044886508543, + "epoch": 0.4327803427498239, "grad_norm": 0.0, - "learning_rate": 1.607431736285804e-05, - "loss": 1.0174, + "learning_rate": 1.2631719814435664e-05, + "loss": 1.04, "step": 11061 }, { - "epoch": 0.3134687863073479, + "epoch": 0.43281946944205335, "grad_norm": 0.0, - "learning_rate": 1.6073588271291018e-05, - "loss": 1.0194, + "learning_rate": 1.263049723344029e-05, + "loss": 0.9339, "step": 11062 }, { - "epoch": 0.31349712374961036, + "epoch": 0.4328585961342828, "grad_norm": 0.0, - "learning_rate": 1.6072859128563905e-05, - "loss": 0.9763, + "learning_rate": 1.2629274610202427e-05, + "loss": 1.2342, "step": 11063 }, { - "epoch": 0.3135254611918728, + "epoch": 0.43289772282651223, "grad_norm": 0.0, - "learning_rate": 1.6072129934682847e-05, - "loss": 0.8769, + "learning_rate": 1.2628051944741709e-05, + "loss": 1.1349, "step": 11064 }, { - "epoch": 0.3135537986341353, + "epoch": 0.4329368495187417, "grad_norm": 0.0, - "learning_rate": 1.607140068965398e-05, - "loss": 0.903, + "learning_rate": 1.2626829237077766e-05, + "loss": 1.1714, "step": 11065 }, { - "epoch": 0.31358213607639773, + "epoch": 0.4329759762109711, "grad_norm": 0.0, - "learning_rate": 1.6070671393483456e-05, - "loss": 0.96, + "learning_rate": 1.2625606487230239e-05, + "loss": 1.0874, "step": 11066 }, { - "epoch": 0.3136104735186602, + "epoch": 0.43301510290320055, "grad_norm": 0.0, - "learning_rate": 1.606994204617741e-05, - "loss": 0.9797, + "learning_rate": 1.2624383695218762e-05, + "loss": 1.0945, "step": 11067 }, { - "epoch": 0.31363881096092266, + "epoch": 0.43305422959543, "grad_norm": 0.0, - "learning_rate": 1.6069212647741987e-05, - "loss": 0.8632, + "learning_rate": 1.2623160861062976e-05, + "loss": 1.0724, "step": 11068 }, { - "epoch": 0.3136671484031851, + "epoch": 0.43309335628765944, "grad_norm": 0.0, - "learning_rate": 1.606848319818333e-05, - "loss": 0.9158, + "learning_rate": 1.2621937984782508e-05, + "loss": 0.9877, "step": 11069 }, { - "epoch": 0.3136954858454476, + "epoch": 0.4331324829798889, "grad_norm": 0.0, - "learning_rate": 1.606775369750759e-05, - "loss": 0.989, + "learning_rate": 1.2620715066397002e-05, + "loss": 0.9922, "step": 11070 }, { - "epoch": 0.31372382328771004, + "epoch": 0.4331716096721183, "grad_norm": 0.0, - "learning_rate": 1.6067024145720905e-05, - "loss": 0.9368, + "learning_rate": 1.2619492105926096e-05, + "loss": 1.1637, "step": 11071 }, { - "epoch": 0.31375216072997253, + "epoch": 0.43321073636434776, "grad_norm": 0.0, - "learning_rate": 1.6066294542829425e-05, - "loss": 0.9901, + "learning_rate": 1.261826910338943e-05, + "loss": 0.9438, "step": 11072 }, { - "epoch": 0.31378049817223497, + "epoch": 0.4332498630565772, "grad_norm": 0.0, - "learning_rate": 1.606556488883929e-05, - "loss": 0.9164, + "learning_rate": 1.2617046058806645e-05, + "loss": 1.1483, "step": 11073 }, { - "epoch": 0.31380883561449746, + "epoch": 0.43328898974880664, "grad_norm": 0.0, - "learning_rate": 1.6064835183756654e-05, - "loss": 0.991, + "learning_rate": 1.2615822972197375e-05, + "loss": 1.1512, "step": 11074 }, { - "epoch": 0.3138371730567599, + "epoch": 0.4333281164410361, "grad_norm": 0.0, - "learning_rate": 1.6064105427587654e-05, - "loss": 0.8837, + "learning_rate": 1.261459984358127e-05, + "loss": 1.0882, "step": 11075 }, { - "epoch": 0.31386551049902234, + "epoch": 0.4333672431332655, "grad_norm": 0.0, - "learning_rate": 1.6063375620338448e-05, - "loss": 1.0425, + "learning_rate": 1.2613376672977968e-05, + "loss": 1.2028, "step": 11076 }, { - "epoch": 0.31389384794128483, + "epoch": 0.43340636982549496, "grad_norm": 0.0, - "learning_rate": 1.6062645762015174e-05, - "loss": 0.9214, + "learning_rate": 1.2612153460407109e-05, + "loss": 1.0078, "step": 11077 }, { - "epoch": 0.31392218538354727, + "epoch": 0.4334454965177244, "grad_norm": 0.0, - "learning_rate": 1.6061915852623982e-05, - "loss": 0.8752, + "learning_rate": 1.2610930205888341e-05, + "loss": 1.0672, "step": 11078 }, { - "epoch": 0.31395052282580976, + "epoch": 0.43348462320995385, "grad_norm": 0.0, - "learning_rate": 1.606118589217102e-05, - "loss": 1.1251, + "learning_rate": 1.2609706909441303e-05, + "loss": 1.0926, "step": 11079 }, { - "epoch": 0.3139788602680722, + "epoch": 0.4335237499021833, "grad_norm": 0.0, - "learning_rate": 1.6060455880662446e-05, - "loss": 0.9847, + "learning_rate": 1.2608483571085644e-05, + "loss": 1.1689, "step": 11080 }, { - "epoch": 0.31400719771033464, + "epoch": 0.4335628765944127, "grad_norm": 0.0, - "learning_rate": 1.6059725818104393e-05, - "loss": 0.9699, + "learning_rate": 1.2607260190841007e-05, + "loss": 1.1344, "step": 11081 }, { - "epoch": 0.31403553515259713, + "epoch": 0.43360200328664217, "grad_norm": 0.0, - "learning_rate": 1.6058995704503028e-05, - "loss": 0.9613, + "learning_rate": 1.260603676872704e-05, + "loss": 1.054, "step": 11082 }, { - "epoch": 0.31406387259485957, + "epoch": 0.4336411299788716, "grad_norm": 0.0, - "learning_rate": 1.6058265539864488e-05, - "loss": 0.9725, + "learning_rate": 1.2604813304763383e-05, + "loss": 1.046, "step": 11083 }, { - "epoch": 0.31409221003712207, + "epoch": 0.43368025667110105, "grad_norm": 0.0, - "learning_rate": 1.6057535324194928e-05, - "loss": 0.9831, + "learning_rate": 1.2603589798969693e-05, + "loss": 1.0695, "step": 11084 }, { - "epoch": 0.3141205474793845, + "epoch": 0.4337193833633305, "grad_norm": 0.0, - "learning_rate": 1.6056805057500495e-05, - "loss": 0.9919, + "learning_rate": 1.2602366251365613e-05, + "loss": 1.0017, "step": 11085 }, { - "epoch": 0.314148884921647, + "epoch": 0.4337585100555599, "grad_norm": 0.0, - "learning_rate": 1.6056074739787347e-05, - "loss": 1.036, + "learning_rate": 1.2601142661970789e-05, + "loss": 1.0153, "step": 11086 }, { - "epoch": 0.31417722236390944, + "epoch": 0.4337976367477893, "grad_norm": 0.0, - "learning_rate": 1.6055344371061633e-05, - "loss": 0.9932, + "learning_rate": 1.2599919030804875e-05, + "loss": 1.0852, "step": 11087 }, { - "epoch": 0.3142055598061719, + "epoch": 0.43383676344001876, "grad_norm": 0.0, - "learning_rate": 1.6054613951329506e-05, - "loss": 0.9195, + "learning_rate": 1.259869535788752e-05, + "loss": 1.0906, "step": 11088 }, { - "epoch": 0.31423389724843437, + "epoch": 0.4338758901322482, "grad_norm": 0.0, - "learning_rate": 1.6053883480597115e-05, - "loss": 0.8213, + "learning_rate": 1.2597471643238372e-05, + "loss": 1.0932, "step": 11089 }, { - "epoch": 0.3142622346906968, + "epoch": 0.43391501682447764, "grad_norm": 0.0, - "learning_rate": 1.6053152958870617e-05, - "loss": 0.9507, + "learning_rate": 1.2596247886877086e-05, + "loss": 0.9979, "step": 11090 }, { - "epoch": 0.3142905721329593, + "epoch": 0.4339541435167071, "grad_norm": 0.0, - "learning_rate": 1.6052422386156164e-05, - "loss": 1.0723, + "learning_rate": 1.2595024088823313e-05, + "loss": 0.9064, "step": 11091 }, { - "epoch": 0.31431890957522174, + "epoch": 0.4339932702089365, "grad_norm": 0.0, - "learning_rate": 1.605169176245991e-05, - "loss": 0.9705, + "learning_rate": 1.2593800249096702e-05, + "loss": 1.1193, "step": 11092 }, { - "epoch": 0.3143472470174842, + "epoch": 0.43403239690116596, "grad_norm": 0.0, - "learning_rate": 1.6050961087788005e-05, - "loss": 0.907, + "learning_rate": 1.259257636771691e-05, + "loss": 1.1232, "step": 11093 }, { - "epoch": 0.31437558445974667, + "epoch": 0.4340715235933954, "grad_norm": 0.0, - "learning_rate": 1.605023036214661e-05, - "loss": 1.0142, + "learning_rate": 1.2591352444703591e-05, + "loss": 1.2031, "step": 11094 }, { - "epoch": 0.3144039219020091, + "epoch": 0.43411065028562484, "grad_norm": 0.0, - "learning_rate": 1.604949958554188e-05, - "loss": 0.8886, + "learning_rate": 1.25901284800764e-05, + "loss": 0.9991, "step": 11095 }, { - "epoch": 0.3144322593442716, + "epoch": 0.4341497769778543, "grad_norm": 0.0, - "learning_rate": 1.6048768757979966e-05, - "loss": 1.0178, + "learning_rate": 1.2588904473854988e-05, + "loss": 1.1971, "step": 11096 }, { - "epoch": 0.31446059678653404, + "epoch": 0.4341889036700837, "grad_norm": 0.0, - "learning_rate": 1.6048037879467025e-05, - "loss": 1.0071, + "learning_rate": 1.258768042605902e-05, + "loss": 1.0622, "step": 11097 }, { - "epoch": 0.31448893422879654, + "epoch": 0.43422803036231317, "grad_norm": 0.0, - "learning_rate": 1.6047306950009217e-05, - "loss": 0.8659, + "learning_rate": 1.2586456336708141e-05, + "loss": 1.1382, "step": 11098 }, { - "epoch": 0.314517271671059, + "epoch": 0.4342671570545426, "grad_norm": 0.0, - "learning_rate": 1.60465759696127e-05, - "loss": 1.1271, + "learning_rate": 1.2585232205822015e-05, + "loss": 1.0647, "step": 11099 }, { - "epoch": 0.3145456091133214, + "epoch": 0.43430628374677205, "grad_norm": 0.0, - "learning_rate": 1.604584493828363e-05, - "loss": 0.9526, + "learning_rate": 1.2584008033420304e-05, + "loss": 1.0985, "step": 11100 }, { - "epoch": 0.3145739465555839, + "epoch": 0.4343454104390015, "grad_norm": 0.0, - "learning_rate": 1.6045113856028155e-05, - "loss": 0.9027, + "learning_rate": 1.2582783819522656e-05, + "loss": 1.0086, "step": 11101 }, { - "epoch": 0.31460228399784634, + "epoch": 0.43438453713123093, "grad_norm": 0.0, - "learning_rate": 1.6044382722852445e-05, - "loss": 0.9265, + "learning_rate": 1.258155956414874e-05, + "loss": 1.1724, "step": 11102 }, { - "epoch": 0.31463062144010884, + "epoch": 0.43442366382346037, "grad_norm": 0.0, - "learning_rate": 1.6043651538762658e-05, - "loss": 0.9945, + "learning_rate": 1.2580335267318209e-05, + "loss": 1.1188, "step": 11103 }, { - "epoch": 0.3146589588823713, + "epoch": 0.4344627905156898, "grad_norm": 0.0, - "learning_rate": 1.6042920303764944e-05, - "loss": 0.9255, + "learning_rate": 1.2579110929050731e-05, + "loss": 1.1005, "step": 11104 }, { - "epoch": 0.3146872963246337, + "epoch": 0.43450191720791925, "grad_norm": 0.0, - "learning_rate": 1.6042189017865473e-05, - "loss": 1.0232, + "learning_rate": 1.2577886549365958e-05, + "loss": 0.9999, "step": 11105 }, { - "epoch": 0.3147156337668962, + "epoch": 0.4345410439001487, "grad_norm": 0.0, - "learning_rate": 1.6041457681070395e-05, - "loss": 0.9444, + "learning_rate": 1.2576662128283564e-05, + "loss": 1.0982, "step": 11106 }, { - "epoch": 0.31474397120915865, + "epoch": 0.43458017059237813, "grad_norm": 0.0, - "learning_rate": 1.604072629338588e-05, - "loss": 1.085, + "learning_rate": 1.25754376658232e-05, + "loss": 1.0394, "step": 11107 }, { - "epoch": 0.31477230865142114, + "epoch": 0.4346192972846076, "grad_norm": 0.0, - "learning_rate": 1.603999485481808e-05, - "loss": 0.9457, + "learning_rate": 1.2574213162004536e-05, + "loss": 0.9924, "step": 11108 }, { - "epoch": 0.3148006460936836, + "epoch": 0.434658423976837, "grad_norm": 0.0, - "learning_rate": 1.6039263365373167e-05, - "loss": 0.937, + "learning_rate": 1.2572988616847234e-05, + "loss": 1.0804, "step": 11109 }, { - "epoch": 0.31482898353594607, + "epoch": 0.43469755066906646, "grad_norm": 0.0, - "learning_rate": 1.6038531825057295e-05, - "loss": 0.9064, + "learning_rate": 1.2571764030370958e-05, + "loss": 1.008, "step": 11110 }, { - "epoch": 0.3148573209782085, + "epoch": 0.4347366773612959, "grad_norm": 0.0, - "learning_rate": 1.6037800233876623e-05, - "loss": 0.9475, + "learning_rate": 1.257053940259538e-05, + "loss": 0.9752, "step": 11111 }, { - "epoch": 0.31488565842047095, + "epoch": 0.43477580405352534, "grad_norm": 0.0, - "learning_rate": 1.6037068591837318e-05, - "loss": 0.9488, + "learning_rate": 1.2569314733540153e-05, + "loss": 1.1564, "step": 11112 }, { - "epoch": 0.31491399586273344, + "epoch": 0.4348149307457548, "grad_norm": 0.0, - "learning_rate": 1.6036336898945543e-05, - "loss": 1.0397, + "learning_rate": 1.2568090023224952e-05, + "loss": 1.1071, "step": 11113 }, { - "epoch": 0.3149423333049959, + "epoch": 0.4348540574379842, "grad_norm": 0.0, - "learning_rate": 1.603560515520746e-05, - "loss": 0.9149, + "learning_rate": 1.2566865271669446e-05, + "loss": 1.0349, "step": 11114 }, { - "epoch": 0.3149706707472584, + "epoch": 0.4348931841302136, "grad_norm": 0.0, - "learning_rate": 1.6034873360629238e-05, - "loss": 0.916, + "learning_rate": 1.2565640478893299e-05, + "loss": 0.891, "step": 11115 }, { - "epoch": 0.3149990081895208, + "epoch": 0.43493231082244305, "grad_norm": 0.0, - "learning_rate": 1.603414151521703e-05, - "loss": 0.9226, + "learning_rate": 1.2564415644916179e-05, + "loss": 1.0808, "step": 11116 }, { - "epoch": 0.31502734563178325, + "epoch": 0.4349714375146725, "grad_norm": 0.0, - "learning_rate": 1.6033409618977013e-05, - "loss": 0.8732, + "learning_rate": 1.256319076975776e-05, + "loss": 1.0225, "step": 11117 }, { - "epoch": 0.31505568307404574, + "epoch": 0.43501056420690193, "grad_norm": 0.0, - "learning_rate": 1.6032677671915343e-05, - "loss": 1.0107, + "learning_rate": 1.256196585343771e-05, + "loss": 0.9223, "step": 11118 }, { - "epoch": 0.3150840205163082, + "epoch": 0.43504969089913137, "grad_norm": 0.0, - "learning_rate": 1.603194567403819e-05, - "loss": 0.9371, + "learning_rate": 1.2560740895975694e-05, + "loss": 1.1313, "step": 11119 }, { - "epoch": 0.3151123579585707, + "epoch": 0.4350888175913608, "grad_norm": 0.0, - "learning_rate": 1.603121362535172e-05, - "loss": 0.9312, + "learning_rate": 1.2559515897391392e-05, + "loss": 0.9194, "step": 11120 }, { - "epoch": 0.3151406954008331, + "epoch": 0.43512794428359025, "grad_norm": 0.0, - "learning_rate": 1.6030481525862096e-05, - "loss": 1.0519, + "learning_rate": 1.2558290857704472e-05, + "loss": 0.9732, "step": 11121 }, { - "epoch": 0.3151690328430956, + "epoch": 0.4351670709758197, "grad_norm": 0.0, - "learning_rate": 1.6029749375575487e-05, - "loss": 0.9714, + "learning_rate": 1.2557065776934604e-05, + "loss": 1.0432, "step": 11122 }, { - "epoch": 0.31519737028535805, + "epoch": 0.43520619766804913, "grad_norm": 0.0, - "learning_rate": 1.6029017174498062e-05, - "loss": 0.942, + "learning_rate": 1.2555840655101465e-05, + "loss": 1.1086, "step": 11123 }, { - "epoch": 0.3152257077276205, + "epoch": 0.4352453243602786, "grad_norm": 0.0, - "learning_rate": 1.602828492263598e-05, - "loss": 0.9151, + "learning_rate": 1.2554615492224731e-05, + "loss": 1.0469, "step": 11124 }, { - "epoch": 0.315254045169883, + "epoch": 0.435284451052508, "grad_norm": 0.0, - "learning_rate": 1.6027552619995423e-05, - "loss": 0.9197, + "learning_rate": 1.2553390288324067e-05, + "loss": 1.0204, "step": 11125 }, { - "epoch": 0.3152823826121454, + "epoch": 0.43532357774473746, "grad_norm": 0.0, - "learning_rate": 1.602682026658255e-05, - "loss": 0.9536, + "learning_rate": 1.255216504341916e-05, + "loss": 0.9179, "step": 11126 }, { - "epoch": 0.3153107200544079, + "epoch": 0.4353627044369669, "grad_norm": 0.0, - "learning_rate": 1.602608786240353e-05, - "loss": 1.0097, + "learning_rate": 1.2550939757529678e-05, + "loss": 1.08, "step": 11127 }, { - "epoch": 0.31533905749667035, + "epoch": 0.43540183112919634, "grad_norm": 0.0, - "learning_rate": 1.6025355407464536e-05, - "loss": 0.9339, + "learning_rate": 1.2549714430675299e-05, + "loss": 1.0318, "step": 11128 }, { - "epoch": 0.3153673949389328, + "epoch": 0.4354409578214258, "grad_norm": 0.0, - "learning_rate": 1.6024622901771736e-05, - "loss": 0.9501, + "learning_rate": 1.2548489062875705e-05, + "loss": 1.0738, "step": 11129 }, { - "epoch": 0.3153957323811953, + "epoch": 0.4354800845136552, "grad_norm": 0.0, - "learning_rate": 1.6023890345331297e-05, - "loss": 0.9996, + "learning_rate": 1.2547263654150565e-05, + "loss": 0.9485, "step": 11130 }, { - "epoch": 0.3154240698234577, + "epoch": 0.43551921120588466, "grad_norm": 0.0, - "learning_rate": 1.6023157738149398e-05, - "loss": 0.9197, + "learning_rate": 1.2546038204519567e-05, + "loss": 1.1277, "step": 11131 }, { - "epoch": 0.3154524072657202, + "epoch": 0.4355583378981141, "grad_norm": 0.0, - "learning_rate": 1.60224250802322e-05, - "loss": 0.9652, + "learning_rate": 1.2544812714002381e-05, + "loss": 1.1212, "step": 11132 }, { - "epoch": 0.31548074470798265, + "epoch": 0.43559746459034354, "grad_norm": 0.0, - "learning_rate": 1.6021692371585884e-05, - "loss": 0.9342, + "learning_rate": 1.2543587182618695e-05, + "loss": 1.0975, "step": 11133 }, { - "epoch": 0.31550908215024515, + "epoch": 0.435636591282573, "grad_norm": 0.0, - "learning_rate": 1.6020959612216613e-05, - "loss": 0.9273, + "learning_rate": 1.2542361610388185e-05, + "loss": 1.0438, "step": 11134 }, { - "epoch": 0.3155374195925076, + "epoch": 0.4356757179748024, "grad_norm": 0.0, - "learning_rate": 1.6020226802130566e-05, - "loss": 0.9723, + "learning_rate": 1.2541135997330534e-05, + "loss": 1.1216, "step": 11135 }, { - "epoch": 0.31556575703477, + "epoch": 0.43571484466703186, "grad_norm": 0.0, - "learning_rate": 1.6019493941333907e-05, - "loss": 0.9407, + "learning_rate": 1.2539910343465422e-05, + "loss": 1.1628, "step": 11136 }, { - "epoch": 0.3155940944770325, + "epoch": 0.4357539713592613, "grad_norm": 0.0, - "learning_rate": 1.6018761029832822e-05, - "loss": 1.0473, + "learning_rate": 1.2538684648812535e-05, + "loss": 1.0253, "step": 11137 }, { - "epoch": 0.31562243191929495, + "epoch": 0.43579309805149075, "grad_norm": 0.0, - "learning_rate": 1.6018028067633478e-05, - "loss": 1.0156, + "learning_rate": 1.2537458913391553e-05, + "loss": 1.024, "step": 11138 }, { - "epoch": 0.31565076936155745, + "epoch": 0.4358322247437202, "grad_norm": 0.0, - "learning_rate": 1.6017295054742045e-05, - "loss": 0.978, + "learning_rate": 1.2536233137222159e-05, + "loss": 1.1237, "step": 11139 }, { - "epoch": 0.3156791068038199, + "epoch": 0.4358713514359496, "grad_norm": 0.0, - "learning_rate": 1.6016561991164702e-05, - "loss": 1.0143, + "learning_rate": 1.2535007320324039e-05, + "loss": 1.0467, "step": 11140 }, { - "epoch": 0.3157074442460823, + "epoch": 0.43591047812817907, "grad_norm": 0.0, - "learning_rate": 1.6015828876907623e-05, - "loss": 0.9926, + "learning_rate": 1.2533781462716879e-05, + "loss": 1.0858, "step": 11141 }, { - "epoch": 0.3157357816883448, + "epoch": 0.4359496048204085, "grad_norm": 0.0, - "learning_rate": 1.6015095711976988e-05, - "loss": 0.9002, + "learning_rate": 1.2532555564420363e-05, + "loss": 1.0187, "step": 11142 }, { - "epoch": 0.31576411913060726, + "epoch": 0.4359887315126379, "grad_norm": 0.0, - "learning_rate": 1.6014362496378962e-05, - "loss": 1.0053, + "learning_rate": 1.2531329625454179e-05, + "loss": 1.157, "step": 11143 }, { - "epoch": 0.31579245657286975, + "epoch": 0.43602785820486734, "grad_norm": 0.0, - "learning_rate": 1.601362923011973e-05, - "loss": 0.983, + "learning_rate": 1.2530103645838011e-05, + "loss": 1.0712, "step": 11144 }, { - "epoch": 0.3158207940151322, + "epoch": 0.4360669848970968, "grad_norm": 0.0, - "learning_rate": 1.6012895913205465e-05, - "loss": 0.9566, + "learning_rate": 1.2528877625591552e-05, + "loss": 1.1025, "step": 11145 }, { - "epoch": 0.3158491314573947, + "epoch": 0.4361061115893262, "grad_norm": 0.0, - "learning_rate": 1.6012162545642346e-05, - "loss": 0.8363, + "learning_rate": 1.2527651564734487e-05, + "loss": 0.9718, "step": 11146 }, { - "epoch": 0.3158774688996571, + "epoch": 0.43614523828155566, "grad_norm": 0.0, - "learning_rate": 1.6011429127436547e-05, - "loss": 0.959, + "learning_rate": 1.2526425463286503e-05, + "loss": 1.1348, "step": 11147 }, { - "epoch": 0.31590580634191956, + "epoch": 0.4361843649737851, "grad_norm": 0.0, - "learning_rate": 1.6010695658594255e-05, - "loss": 0.9626, + "learning_rate": 1.2525199321267292e-05, + "loss": 1.0074, "step": 11148 }, { - "epoch": 0.31593414378418205, + "epoch": 0.43622349166601454, "grad_norm": 0.0, - "learning_rate": 1.6009962139121635e-05, - "loss": 1.0798, + "learning_rate": 1.2523973138696546e-05, + "loss": 1.1091, "step": 11149 }, { - "epoch": 0.3159624812264445, + "epoch": 0.436262618358244, "grad_norm": 0.0, - "learning_rate": 1.6009228569024875e-05, - "loss": 1.0369, + "learning_rate": 1.2522746915593951e-05, + "loss": 1.1015, "step": 11150 }, { - "epoch": 0.315990818668707, + "epoch": 0.4363017450504734, "grad_norm": 0.0, - "learning_rate": 1.6008494948310147e-05, - "loss": 0.9391, + "learning_rate": 1.2521520651979205e-05, + "loss": 1.1057, "step": 11151 }, { - "epoch": 0.3160191561109694, + "epoch": 0.43634087174270286, "grad_norm": 0.0, - "learning_rate": 1.600776127698364e-05, - "loss": 0.8828, + "learning_rate": 1.2520294347871993e-05, + "loss": 1.2505, "step": 11152 }, { - "epoch": 0.31604749355323186, + "epoch": 0.4363799984349323, "grad_norm": 0.0, - "learning_rate": 1.6007027555051524e-05, - "loss": 0.912, + "learning_rate": 1.2519068003292017e-05, + "loss": 1.0417, "step": 11153 }, { - "epoch": 0.31607583099549436, + "epoch": 0.43641912512716174, "grad_norm": 0.0, - "learning_rate": 1.6006293782519988e-05, - "loss": 1.0639, + "learning_rate": 1.2517841618258961e-05, + "loss": 1.0262, "step": 11154 }, { - "epoch": 0.3161041684377568, + "epoch": 0.4364582518193912, "grad_norm": 0.0, - "learning_rate": 1.600555995939521e-05, - "loss": 0.9885, + "learning_rate": 1.2516615192792524e-05, + "loss": 1.1627, "step": 11155 }, { - "epoch": 0.3161325058800193, + "epoch": 0.4364973785116206, "grad_norm": 0.0, - "learning_rate": 1.6004826085683367e-05, - "loss": 0.9191, + "learning_rate": 1.2515388726912406e-05, + "loss": 1.0065, "step": 11156 }, { - "epoch": 0.3161608433222817, + "epoch": 0.43653650520385007, "grad_norm": 0.0, - "learning_rate": 1.6004092161390645e-05, - "loss": 1.0953, + "learning_rate": 1.251416222063829e-05, + "loss": 0.9927, "step": 11157 }, { - "epoch": 0.3161891807645442, + "epoch": 0.4365756318960795, "grad_norm": 0.0, - "learning_rate": 1.6003358186523226e-05, - "loss": 0.8805, + "learning_rate": 1.2512935673989884e-05, + "loss": 1.0338, "step": 11158 }, { - "epoch": 0.31621751820680666, + "epoch": 0.43661475858830895, "grad_norm": 0.0, - "learning_rate": 1.6002624161087293e-05, - "loss": 1.0164, + "learning_rate": 1.2511709086986876e-05, + "loss": 1.1105, "step": 11159 }, { - "epoch": 0.3162458556490691, + "epoch": 0.4366538852805384, "grad_norm": 0.0, - "learning_rate": 1.6001890085089026e-05, - "loss": 0.9383, + "learning_rate": 1.2510482459648972e-05, + "loss": 1.1051, "step": 11160 }, { - "epoch": 0.3162741930913316, + "epoch": 0.43669301197276783, "grad_norm": 0.0, - "learning_rate": 1.6001155958534608e-05, - "loss": 0.9854, + "learning_rate": 1.2509255791995863e-05, + "loss": 1.1708, "step": 11161 }, { - "epoch": 0.31630253053359403, + "epoch": 0.43673213866499727, "grad_norm": 0.0, - "learning_rate": 1.600042178143023e-05, - "loss": 0.9732, + "learning_rate": 1.2508029084047251e-05, + "loss": 1.0053, "step": 11162 }, { - "epoch": 0.3163308679758565, + "epoch": 0.4367712653572267, "grad_norm": 0.0, - "learning_rate": 1.5999687553782067e-05, - "loss": 0.8991, + "learning_rate": 1.2506802335822831e-05, + "loss": 1.1135, "step": 11163 }, { - "epoch": 0.31635920541811896, + "epoch": 0.43681039204945615, "grad_norm": 0.0, - "learning_rate": 1.5998953275596307e-05, - "loss": 0.9198, + "learning_rate": 1.250557554734231e-05, + "loss": 1.1047, "step": 11164 }, { - "epoch": 0.3163875428603814, + "epoch": 0.4368495187416856, "grad_norm": 0.0, - "learning_rate": 1.599821894687914e-05, - "loss": 0.9935, + "learning_rate": 1.2504348718625385e-05, + "loss": 1.054, "step": 11165 }, { - "epoch": 0.3164158803026439, + "epoch": 0.43688864543391503, "grad_norm": 0.0, - "learning_rate": 1.5997484567636744e-05, - "loss": 0.9323, + "learning_rate": 1.2503121849691758e-05, + "loss": 0.9965, "step": 11166 }, { - "epoch": 0.31644421774490633, + "epoch": 0.4369277721261445, "grad_norm": 0.0, - "learning_rate": 1.599675013787531e-05, - "loss": 1.034, + "learning_rate": 1.2501894940561133e-05, + "loss": 1.0362, "step": 11167 }, { - "epoch": 0.3164725551871688, + "epoch": 0.4369668988183739, "grad_norm": 0.0, - "learning_rate": 1.5996015657601023e-05, - "loss": 0.7988, + "learning_rate": 1.2500667991253205e-05, + "loss": 1.0319, "step": 11168 }, { - "epoch": 0.31650089262943126, + "epoch": 0.43700602551060336, "grad_norm": 0.0, - "learning_rate": 1.5995281126820067e-05, - "loss": 1.0794, + "learning_rate": 1.2499441001787687e-05, + "loss": 1.0436, "step": 11169 }, { - "epoch": 0.3165292300716937, + "epoch": 0.4370451522028328, "grad_norm": 0.0, - "learning_rate": 1.5994546545538634e-05, - "loss": 0.9459, + "learning_rate": 1.2498213972184277e-05, + "loss": 1.1807, "step": 11170 }, { - "epoch": 0.3165575675139562, + "epoch": 0.4370842788950622, "grad_norm": 0.0, - "learning_rate": 1.599381191376291e-05, - "loss": 0.8625, + "learning_rate": 1.2496986902462683e-05, + "loss": 1.0802, "step": 11171 }, { - "epoch": 0.31658590495621863, + "epoch": 0.4371234055872916, "grad_norm": 0.0, - "learning_rate": 1.599307723149908e-05, - "loss": 0.9401, + "learning_rate": 1.2495759792642603e-05, + "loss": 1.136, "step": 11172 }, { - "epoch": 0.3166142423984811, + "epoch": 0.43716253227952107, "grad_norm": 0.0, - "learning_rate": 1.5992342498753336e-05, - "loss": 0.9321, + "learning_rate": 1.2494532642743753e-05, + "loss": 1.087, "step": 11173 }, { - "epoch": 0.31664257984074357, + "epoch": 0.4372016589717505, "grad_norm": 0.0, - "learning_rate": 1.599160771553187e-05, - "loss": 0.9519, + "learning_rate": 1.249330545278583e-05, + "loss": 1.0092, "step": 11174 }, { - "epoch": 0.31667091728300606, + "epoch": 0.43724078566397995, "grad_norm": 0.0, - "learning_rate": 1.599087288184086e-05, - "loss": 0.9279, + "learning_rate": 1.249207822278855e-05, + "loss": 0.9903, "step": 11175 }, { - "epoch": 0.3166992547252685, + "epoch": 0.4372799123562094, "grad_norm": 0.0, - "learning_rate": 1.5990137997686508e-05, - "loss": 0.9596, + "learning_rate": 1.2490850952771617e-05, + "loss": 1.1094, "step": 11176 }, { - "epoch": 0.31672759216753094, + "epoch": 0.43731903904843883, "grad_norm": 0.0, - "learning_rate": 1.5989403063074998e-05, - "loss": 0.8962, + "learning_rate": 1.2489623642754736e-05, + "loss": 1.059, "step": 11177 }, { - "epoch": 0.31675592960979343, + "epoch": 0.43735816574066827, "grad_norm": 0.0, - "learning_rate": 1.5988668078012525e-05, - "loss": 0.9957, + "learning_rate": 1.248839629275762e-05, + "loss": 1.0119, "step": 11178 }, { - "epoch": 0.31678426705205587, + "epoch": 0.4373972924328977, "grad_norm": 0.0, - "learning_rate": 1.5987933042505272e-05, - "loss": 0.956, + "learning_rate": 1.2487168902799976e-05, + "loss": 1.0246, "step": 11179 }, { - "epoch": 0.31681260449431836, + "epoch": 0.43743641912512715, "grad_norm": 0.0, - "learning_rate": 1.5987197956559434e-05, - "loss": 0.9939, + "learning_rate": 1.2485941472901519e-05, + "loss": 1.0522, "step": 11180 }, { - "epoch": 0.3168409419365808, + "epoch": 0.4374755458173566, "grad_norm": 0.0, - "learning_rate": 1.598646282018121e-05, - "loss": 0.9485, + "learning_rate": 1.2484714003081954e-05, + "loss": 1.1778, "step": 11181 }, { - "epoch": 0.31686927937884324, + "epoch": 0.43751467250958603, "grad_norm": 0.0, - "learning_rate": 1.5985727633376783e-05, - "loss": 1.0709, + "learning_rate": 1.2483486493360996e-05, + "loss": 1.1464, "step": 11182 }, { - "epoch": 0.31689761682110573, + "epoch": 0.4375537992018155, "grad_norm": 0.0, - "learning_rate": 1.598499239615235e-05, - "loss": 0.9456, + "learning_rate": 1.248225894375836e-05, + "loss": 1.0792, "step": 11183 }, { - "epoch": 0.31692595426336817, + "epoch": 0.4375929258940449, "grad_norm": 0.0, - "learning_rate": 1.5984257108514107e-05, - "loss": 0.972, + "learning_rate": 1.2481031354293754e-05, + "loss": 1.1031, "step": 11184 }, { - "epoch": 0.31695429170563066, + "epoch": 0.43763205258627436, "grad_norm": 0.0, - "learning_rate": 1.598352177046824e-05, - "loss": 0.9093, + "learning_rate": 1.2479803724986894e-05, + "loss": 0.9165, "step": 11185 }, { - "epoch": 0.3169826291478931, + "epoch": 0.4376711792785038, "grad_norm": 0.0, - "learning_rate": 1.598278638202095e-05, - "loss": 0.8973, + "learning_rate": 1.2478576055857492e-05, + "loss": 1.137, "step": 11186 }, { - "epoch": 0.3170109665901556, + "epoch": 0.43771030597073324, "grad_norm": 0.0, - "learning_rate": 1.5982050943178428e-05, - "loss": 1.0328, + "learning_rate": 1.2477348346925264e-05, + "loss": 1.0817, "step": 11187 }, { - "epoch": 0.31703930403241803, + "epoch": 0.4377494326629627, "grad_norm": 0.0, - "learning_rate": 1.5981315453946867e-05, - "loss": 1.0303, + "learning_rate": 1.2476120598209926e-05, + "loss": 1.166, "step": 11188 }, { - "epoch": 0.3170676414746805, + "epoch": 0.4377885593551921, "grad_norm": 0.0, - "learning_rate": 1.598057991433247e-05, - "loss": 0.8381, + "learning_rate": 1.2474892809731196e-05, + "loss": 1.0325, "step": 11189 }, { - "epoch": 0.31709597891694297, + "epoch": 0.43782768604742156, "grad_norm": 0.0, - "learning_rate": 1.5979844324341424e-05, - "loss": 0.9502, + "learning_rate": 1.2473664981508786e-05, + "loss": 0.9876, "step": 11190 }, { - "epoch": 0.3171243163592054, + "epoch": 0.437866812739651, "grad_norm": 0.0, - "learning_rate": 1.5979108683979928e-05, - "loss": 1.0017, + "learning_rate": 1.247243711356242e-05, + "loss": 0.9878, "step": 11191 }, { - "epoch": 0.3171526538014679, + "epoch": 0.43790593943188044, "grad_norm": 0.0, - "learning_rate": 1.597837299325418e-05, - "loss": 0.929, + "learning_rate": 1.247120920591181e-05, + "loss": 1.0951, "step": 11192 }, { - "epoch": 0.31718099124373034, + "epoch": 0.4379450661241099, "grad_norm": 0.0, - "learning_rate": 1.5977637252170377e-05, - "loss": 0.9437, + "learning_rate": 1.2469981258576676e-05, + "loss": 1.1052, "step": 11193 }, { - "epoch": 0.3172093286859928, + "epoch": 0.4379841928163393, "grad_norm": 0.0, - "learning_rate": 1.5976901460734714e-05, - "loss": 1.0166, + "learning_rate": 1.2468753271576737e-05, + "loss": 0.9444, "step": 11194 }, { - "epoch": 0.31723766612825527, + "epoch": 0.43802331950856876, "grad_norm": 0.0, - "learning_rate": 1.597616561895339e-05, - "loss": 0.9189, + "learning_rate": 1.2467525244931717e-05, + "loss": 1.0938, "step": 11195 }, { - "epoch": 0.3172660035705177, + "epoch": 0.4380624462007982, "grad_norm": 0.0, - "learning_rate": 1.597542972683261e-05, - "loss": 0.9754, + "learning_rate": 1.2466297178661332e-05, + "loss": 1.0363, "step": 11196 }, { - "epoch": 0.3172943410127802, + "epoch": 0.43810157289302765, "grad_norm": 0.0, - "learning_rate": 1.597469378437856e-05, - "loss": 0.8357, + "learning_rate": 1.2465069072785304e-05, + "loss": 1.0993, "step": 11197 }, { - "epoch": 0.31732267845504264, + "epoch": 0.4381406995852571, "grad_norm": 0.0, - "learning_rate": 1.5973957791597445e-05, - "loss": 0.8558, + "learning_rate": 1.2463840927323358e-05, + "loss": 1.0925, "step": 11198 }, { - "epoch": 0.31735101589730513, + "epoch": 0.43817982627748653, "grad_norm": 0.0, - "learning_rate": 1.5973221748495472e-05, - "loss": 1.0029, + "learning_rate": 1.246261274229521e-05, + "loss": 1.0565, "step": 11199 }, { - "epoch": 0.31737935333956757, + "epoch": 0.4382189529697159, "grad_norm": 0.0, - "learning_rate": 1.5972485655078828e-05, - "loss": 0.9626, + "learning_rate": 1.2461384517720592e-05, + "loss": 0.9464, "step": 11200 }, { - "epoch": 0.31740769078183, + "epoch": 0.43825807966194535, "grad_norm": 0.0, - "learning_rate": 1.597174951135372e-05, - "loss": 0.9936, + "learning_rate": 1.2460156253619218e-05, + "loss": 0.9592, "step": 11201 }, { - "epoch": 0.3174360282240925, + "epoch": 0.4382972063541748, "grad_norm": 0.0, - "learning_rate": 1.597101331732635e-05, - "loss": 0.9205, + "learning_rate": 1.2458927950010821e-05, + "loss": 1.0578, "step": 11202 }, { - "epoch": 0.31746436566635494, + "epoch": 0.43833633304640424, "grad_norm": 0.0, - "learning_rate": 1.597027707300292e-05, - "loss": 1.0005, + "learning_rate": 1.245769960691512e-05, + "loss": 1.2245, "step": 11203 }, { - "epoch": 0.31749270310861744, + "epoch": 0.4383754597386337, "grad_norm": 0.0, - "learning_rate": 1.5969540778389624e-05, - "loss": 0.9344, + "learning_rate": 1.2456471224351847e-05, + "loss": 1.0995, "step": 11204 }, { - "epoch": 0.3175210405508799, + "epoch": 0.4384145864308631, "grad_norm": 0.0, - "learning_rate": 1.596880443349267e-05, - "loss": 0.9344, + "learning_rate": 1.2455242802340721e-05, + "loss": 0.9635, "step": 11205 }, { - "epoch": 0.3175493779931423, + "epoch": 0.43845371312309256, "grad_norm": 0.0, - "learning_rate": 1.5968068038318266e-05, - "loss": 0.9592, + "learning_rate": 1.2454014340901472e-05, + "loss": 1.0962, "step": 11206 }, { - "epoch": 0.3175777154354048, + "epoch": 0.438492839815322, "grad_norm": 0.0, - "learning_rate": 1.5967331592872604e-05, - "loss": 0.8473, + "learning_rate": 1.2452785840053829e-05, + "loss": 1.1951, "step": 11207 }, { - "epoch": 0.31760605287766724, + "epoch": 0.43853196650755144, "grad_norm": 0.0, - "learning_rate": 1.5966595097161893e-05, - "loss": 1.0316, + "learning_rate": 1.2451557299817519e-05, + "loss": 1.0186, "step": 11208 }, { - "epoch": 0.31763439031992974, + "epoch": 0.4385710931997809, "grad_norm": 0.0, - "learning_rate": 1.596585855119233e-05, - "loss": 1.0199, + "learning_rate": 1.245032872021227e-05, + "loss": 1.0833, "step": 11209 }, { - "epoch": 0.3176627277621922, + "epoch": 0.4386102198920103, "grad_norm": 0.0, - "learning_rate": 1.5965121954970134e-05, - "loss": 0.9047, + "learning_rate": 1.2449100101257812e-05, + "loss": 0.9848, "step": 11210 }, { - "epoch": 0.31769106520445467, + "epoch": 0.43864934658423976, "grad_norm": 0.0, - "learning_rate": 1.5964385308501497e-05, - "loss": 0.9802, + "learning_rate": 1.2447871442973876e-05, + "loss": 0.9724, "step": 11211 }, { - "epoch": 0.3177194026467171, + "epoch": 0.4386884732764692, "grad_norm": 0.0, - "learning_rate": 1.5963648611792625e-05, - "loss": 1.0214, + "learning_rate": 1.2446642745380192e-05, + "loss": 0.9958, "step": 11212 }, { - "epoch": 0.31774774008897955, + "epoch": 0.43872759996869864, "grad_norm": 0.0, - "learning_rate": 1.596291186484973e-05, - "loss": 1.0195, + "learning_rate": 1.2445414008496492e-05, + "loss": 1.0258, "step": 11213 }, { - "epoch": 0.31777607753124204, + "epoch": 0.4387667266609281, "grad_norm": 0.0, - "learning_rate": 1.5962175067679013e-05, - "loss": 0.8613, + "learning_rate": 1.2444185232342505e-05, + "loss": 1.0832, "step": 11214 }, { - "epoch": 0.3178044149735045, + "epoch": 0.4388058533531575, "grad_norm": 0.0, - "learning_rate": 1.596143822028668e-05, - "loss": 0.9602, + "learning_rate": 1.2442956416937967e-05, + "loss": 1.1117, "step": 11215 }, { - "epoch": 0.317832752415767, + "epoch": 0.43884498004538697, "grad_norm": 0.0, - "learning_rate": 1.5960701322678943e-05, - "loss": 0.8436, + "learning_rate": 1.2441727562302612e-05, + "loss": 1.0249, "step": 11216 }, { - "epoch": 0.3178610898580294, + "epoch": 0.4388841067376164, "grad_norm": 0.0, - "learning_rate": 1.5959964374862e-05, - "loss": 0.918, + "learning_rate": 1.2440498668456169e-05, + "loss": 1.0551, "step": 11217 }, { - "epoch": 0.31788942730029185, + "epoch": 0.43892323342984585, "grad_norm": 0.0, - "learning_rate": 1.5959227376842067e-05, - "loss": 0.9424, + "learning_rate": 1.2439269735418377e-05, + "loss": 1.2086, "step": 11218 }, { - "epoch": 0.31791776474255434, + "epoch": 0.4389623601220753, "grad_norm": 0.0, - "learning_rate": 1.595849032862535e-05, - "loss": 0.901, + "learning_rate": 1.2438040763208967e-05, + "loss": 1.1995, "step": 11219 }, { - "epoch": 0.3179461021848168, + "epoch": 0.43900148681430473, "grad_norm": 0.0, - "learning_rate": 1.5957753230218052e-05, - "loss": 0.9709, + "learning_rate": 1.2436811751847682e-05, + "loss": 1.1395, "step": 11220 }, { - "epoch": 0.3179744396270793, + "epoch": 0.43904061350653417, "grad_norm": 0.0, - "learning_rate": 1.595701608162639e-05, - "loss": 0.8624, + "learning_rate": 1.243558270135425e-05, + "loss": 1.017, "step": 11221 }, { - "epoch": 0.3180027770693417, + "epoch": 0.4390797401987636, "grad_norm": 0.0, - "learning_rate": 1.595627888285657e-05, - "loss": 0.9634, + "learning_rate": 1.2434353611748415e-05, + "loss": 1.0816, "step": 11222 }, { - "epoch": 0.3180311145116042, + "epoch": 0.43911886689099305, "grad_norm": 0.0, - "learning_rate": 1.5955541633914798e-05, - "loss": 0.9704, + "learning_rate": 1.2433124483049907e-05, + "loss": 1.1348, "step": 11223 }, { - "epoch": 0.31805945195386665, + "epoch": 0.4391579935832225, "grad_norm": 0.0, - "learning_rate": 1.595480433480729e-05, - "loss": 0.9197, + "learning_rate": 1.2431895315278473e-05, + "loss": 1.0285, "step": 11224 }, { - "epoch": 0.3180877893961291, + "epoch": 0.43919712027545194, "grad_norm": 0.0, - "learning_rate": 1.5954066985540257e-05, - "loss": 0.8949, + "learning_rate": 1.2430666108453848e-05, + "loss": 1.062, "step": 11225 }, { - "epoch": 0.3181161268383916, + "epoch": 0.4392362469676814, "grad_norm": 0.0, - "learning_rate": 1.59533295861199e-05, - "loss": 0.9035, + "learning_rate": 1.2429436862595767e-05, + "loss": 0.9428, "step": 11226 }, { - "epoch": 0.318144464280654, + "epoch": 0.4392753736599108, "grad_norm": 0.0, - "learning_rate": 1.595259213655244e-05, - "loss": 1.0166, + "learning_rate": 1.2428207577723977e-05, + "loss": 1.0379, "step": 11227 }, { - "epoch": 0.3181728017229165, + "epoch": 0.4393145003521402, "grad_norm": 0.0, - "learning_rate": 1.5951854636844086e-05, - "loss": 0.9179, + "learning_rate": 1.2426978253858215e-05, + "loss": 1.0898, "step": 11228 }, { - "epoch": 0.31820113916517895, + "epoch": 0.43935362704436964, "grad_norm": 0.0, - "learning_rate": 1.5951117087001048e-05, - "loss": 1.0204, + "learning_rate": 1.2425748891018223e-05, + "loss": 1.1241, "step": 11229 }, { - "epoch": 0.3182294766074414, + "epoch": 0.4393927537365991, "grad_norm": 0.0, - "learning_rate": 1.5950379487029543e-05, - "loss": 1.0002, + "learning_rate": 1.2424519489223743e-05, + "loss": 1.1021, "step": 11230 }, { - "epoch": 0.3182578140497039, + "epoch": 0.4394318804288285, "grad_norm": 0.0, - "learning_rate": 1.5949641836935782e-05, - "loss": 0.9315, + "learning_rate": 1.2423290048494521e-05, + "loss": 1.0667, "step": 11231 }, { - "epoch": 0.3182861514919663, + "epoch": 0.43947100712105797, "grad_norm": 0.0, - "learning_rate": 1.594890413672598e-05, - "loss": 1.0259, + "learning_rate": 1.2422060568850293e-05, + "loss": 1.0898, "step": 11232 }, { - "epoch": 0.3183144889342288, + "epoch": 0.4395101338132874, "grad_norm": 0.0, - "learning_rate": 1.5948166386406345e-05, - "loss": 0.8803, + "learning_rate": 1.2420831050310812e-05, + "loss": 0.9332, "step": 11233 }, { - "epoch": 0.31834282637649125, + "epoch": 0.43954926050551685, "grad_norm": 0.0, - "learning_rate": 1.59474285859831e-05, - "loss": 1.0532, + "learning_rate": 1.2419601492895816e-05, + "loss": 1.0325, "step": 11234 }, { - "epoch": 0.31837116381875374, + "epoch": 0.4395883871977463, "grad_norm": 0.0, - "learning_rate": 1.5946690735462452e-05, - "loss": 0.919, + "learning_rate": 1.2418371896625053e-05, + "loss": 1.0641, "step": 11235 }, { - "epoch": 0.3183995012610162, + "epoch": 0.43962751388997573, "grad_norm": 0.0, - "learning_rate": 1.5945952834850623e-05, - "loss": 0.9473, + "learning_rate": 1.2417142261518265e-05, + "loss": 1.1228, "step": 11236 }, { - "epoch": 0.3184278387032786, + "epoch": 0.43966664058220517, "grad_norm": 0.0, - "learning_rate": 1.5945214884153823e-05, - "loss": 1.0016, + "learning_rate": 1.2415912587595202e-05, + "loss": 0.9694, "step": 11237 }, { - "epoch": 0.3184561761455411, + "epoch": 0.4397057672744346, "grad_norm": 0.0, - "learning_rate": 1.5944476883378274e-05, - "loss": 0.9616, + "learning_rate": 1.2414682874875612e-05, + "loss": 1.092, "step": 11238 }, { - "epoch": 0.31848451358780355, + "epoch": 0.43974489396666405, "grad_norm": 0.0, - "learning_rate": 1.5943738832530183e-05, - "loss": 1.0058, + "learning_rate": 1.2413453123379238e-05, + "loss": 1.1332, "step": 11239 }, { - "epoch": 0.31851285103006605, + "epoch": 0.4397840206588935, "grad_norm": 0.0, - "learning_rate": 1.5943000731615777e-05, - "loss": 0.8985, + "learning_rate": 1.2412223333125833e-05, + "loss": 1.0979, "step": 11240 }, { - "epoch": 0.3185411884723285, + "epoch": 0.43982314735112293, "grad_norm": 0.0, - "learning_rate": 1.594226258064127e-05, - "loss": 0.8387, + "learning_rate": 1.2410993504135143e-05, + "loss": 0.9946, "step": 11241 }, { - "epoch": 0.3185695259145909, + "epoch": 0.4398622740433524, "grad_norm": 0.0, - "learning_rate": 1.5941524379612878e-05, - "loss": 0.9489, + "learning_rate": 1.2409763636426919e-05, + "loss": 1.0765, "step": 11242 }, { - "epoch": 0.3185978633568534, + "epoch": 0.4399014007355818, "grad_norm": 0.0, - "learning_rate": 1.5940786128536813e-05, - "loss": 0.8919, + "learning_rate": 1.240853373002091e-05, + "loss": 1.1046, "step": 11243 }, { - "epoch": 0.31862620079911586, + "epoch": 0.43994052742781126, "grad_norm": 0.0, - "learning_rate": 1.5940047827419305e-05, - "loss": 1.0199, + "learning_rate": 1.2407303784936868e-05, + "loss": 1.0563, "step": 11244 }, { - "epoch": 0.31865453824137835, + "epoch": 0.4399796541200407, "grad_norm": 0.0, - "learning_rate": 1.593930947626657e-05, - "loss": 0.9463, + "learning_rate": 1.2406073801194546e-05, + "loss": 0.9787, "step": 11245 }, { - "epoch": 0.3186828756836408, + "epoch": 0.44001878081227014, "grad_norm": 0.0, - "learning_rate": 1.5938571075084826e-05, - "loss": 0.9337, + "learning_rate": 1.2404843778813689e-05, + "loss": 1.0278, "step": 11246 }, { - "epoch": 0.3187112131259033, + "epoch": 0.4400579075044996, "grad_norm": 0.0, - "learning_rate": 1.593783262388029e-05, - "loss": 0.9965, + "learning_rate": 1.2403613717814058e-05, + "loss": 1.1056, "step": 11247 }, { - "epoch": 0.3187395505681657, + "epoch": 0.440097034196729, "grad_norm": 0.0, - "learning_rate": 1.5937094122659187e-05, - "loss": 1.056, + "learning_rate": 1.24023836182154e-05, + "loss": 0.9845, "step": 11248 }, { - "epoch": 0.31876788801042816, + "epoch": 0.44013616088895846, "grad_norm": 0.0, - "learning_rate": 1.5936355571427734e-05, - "loss": 0.9532, + "learning_rate": 1.2401153480037473e-05, + "loss": 0.9919, "step": 11249 }, { - "epoch": 0.31879622545269065, + "epoch": 0.4401752875811879, "grad_norm": 0.0, - "learning_rate": 1.5935616970192155e-05, - "loss": 0.9928, + "learning_rate": 1.2399923303300028e-05, + "loss": 1.1078, "step": 11250 }, { - "epoch": 0.3188245628949531, + "epoch": 0.44021441427341734, "grad_norm": 0.0, - "learning_rate": 1.5934878318958668e-05, - "loss": 1.1124, + "learning_rate": 1.2398693088022827e-05, + "loss": 1.0676, "step": 11251 }, { - "epoch": 0.3188529003372156, + "epoch": 0.4402535409656468, "grad_norm": 0.0, - "learning_rate": 1.59341396177335e-05, - "loss": 1.0576, + "learning_rate": 1.2397462834225618e-05, + "loss": 1.0704, "step": 11252 }, { - "epoch": 0.318881237779478, + "epoch": 0.4402926676578762, "grad_norm": 0.0, - "learning_rate": 1.593340086652287e-05, - "loss": 0.9164, + "learning_rate": 1.2396232541928157e-05, + "loss": 1.2204, "step": 11253 }, { - "epoch": 0.31890957522174046, + "epoch": 0.44033179435010567, "grad_norm": 0.0, - "learning_rate": 1.5932662065332996e-05, - "loss": 0.9812, + "learning_rate": 1.2395002211150207e-05, + "loss": 1.0214, "step": 11254 }, { - "epoch": 0.31893791266400295, + "epoch": 0.4403709210423351, "grad_norm": 0.0, - "learning_rate": 1.593192321417011e-05, - "loss": 1.0083, + "learning_rate": 1.2393771841911524e-05, + "loss": 1.0669, "step": 11255 }, { - "epoch": 0.3189662501062654, + "epoch": 0.44041004773456455, "grad_norm": 0.0, - "learning_rate": 1.5931184313040437e-05, - "loss": 0.9296, + "learning_rate": 1.2392541434231861e-05, + "loss": 1.1148, "step": 11256 }, { - "epoch": 0.3189945875485279, + "epoch": 0.44044917442679393, "grad_norm": 0.0, - "learning_rate": 1.5930445361950188e-05, - "loss": 0.9393, + "learning_rate": 1.2391310988130983e-05, + "loss": 1.0071, "step": 11257 }, { - "epoch": 0.3190229249907903, + "epoch": 0.4404883011190234, "grad_norm": 0.0, - "learning_rate": 1.59297063609056e-05, - "loss": 0.9196, + "learning_rate": 1.2390080503628647e-05, + "loss": 1.0244, "step": 11258 }, { - "epoch": 0.3190512624330528, + "epoch": 0.4405274278112528, "grad_norm": 0.0, - "learning_rate": 1.592896730991289e-05, - "loss": 1.0142, + "learning_rate": 1.2388849980744613e-05, + "loss": 1.227, "step": 11259 }, { - "epoch": 0.31907959987531526, + "epoch": 0.44056655450348226, "grad_norm": 0.0, - "learning_rate": 1.592822820897829e-05, - "loss": 0.9171, + "learning_rate": 1.2387619419498642e-05, + "loss": 1.086, "step": 11260 }, { - "epoch": 0.3191079373175777, + "epoch": 0.4406056811957117, "grad_norm": 0.0, - "learning_rate": 1.5927489058108025e-05, - "loss": 0.9717, + "learning_rate": 1.2386388819910493e-05, + "loss": 1.1632, "step": 11261 }, { - "epoch": 0.3191362747598402, + "epoch": 0.44064480788794114, "grad_norm": 0.0, - "learning_rate": 1.5926749857308316e-05, - "loss": 0.9772, + "learning_rate": 1.2385158181999933e-05, + "loss": 1.1094, "step": 11262 }, { - "epoch": 0.3191646122021026, + "epoch": 0.4406839345801706, "grad_norm": 0.0, - "learning_rate": 1.5926010606585386e-05, - "loss": 0.929, + "learning_rate": 1.238392750578672e-05, + "loss": 1.0314, "step": 11263 }, { - "epoch": 0.3191929496443651, + "epoch": 0.4407230612724, "grad_norm": 0.0, - "learning_rate": 1.5925271305945474e-05, - "loss": 0.916, + "learning_rate": 1.2382696791290615e-05, + "loss": 1.0054, "step": 11264 }, { - "epoch": 0.31922128708662756, + "epoch": 0.44076218796462946, "grad_norm": 0.0, - "learning_rate": 1.5924531955394802e-05, - "loss": 0.998, + "learning_rate": 1.2381466038531388e-05, + "loss": 0.9568, "step": 11265 }, { - "epoch": 0.31924962452889, + "epoch": 0.4408013146568589, "grad_norm": 0.0, - "learning_rate": 1.5923792554939598e-05, - "loss": 0.9889, + "learning_rate": 1.23802352475288e-05, + "loss": 1.0355, "step": 11266 }, { - "epoch": 0.3192779619711525, + "epoch": 0.44084044134908834, "grad_norm": 0.0, - "learning_rate": 1.5923053104586087e-05, - "loss": 0.9434, + "learning_rate": 1.237900441830262e-05, + "loss": 1.1773, "step": 11267 }, { - "epoch": 0.31930629941341493, + "epoch": 0.4408795680413178, "grad_norm": 0.0, - "learning_rate": 1.59223136043405e-05, - "loss": 0.8755, + "learning_rate": 1.2377773550872605e-05, + "loss": 1.1409, "step": 11268 }, { - "epoch": 0.3193346368556774, + "epoch": 0.4409186947335472, "grad_norm": 0.0, - "learning_rate": 1.5921574054209064e-05, - "loss": 1.0466, + "learning_rate": 1.237654264525853e-05, + "loss": 0.9655, "step": 11269 }, { - "epoch": 0.31936297429793986, + "epoch": 0.44095782142577666, "grad_norm": 0.0, - "learning_rate": 1.5920834454198014e-05, - "loss": 0.9287, + "learning_rate": 1.2375311701480156e-05, + "loss": 0.9987, "step": 11270 }, { - "epoch": 0.31939131174020235, + "epoch": 0.4409969481180061, "grad_norm": 0.0, - "learning_rate": 1.592009480431358e-05, - "loss": 0.97, + "learning_rate": 1.2374080719557253e-05, + "loss": 0.975, "step": 11271 }, { - "epoch": 0.3194196491824648, + "epoch": 0.44103607481023555, "grad_norm": 0.0, - "learning_rate": 1.5919355104561985e-05, - "loss": 1.0255, + "learning_rate": 1.237284969950959e-05, + "loss": 1.0909, "step": 11272 }, { - "epoch": 0.31944798662472723, + "epoch": 0.441075201502465, "grad_norm": 0.0, - "learning_rate": 1.5918615354949463e-05, - "loss": 0.9193, + "learning_rate": 1.2371618641356933e-05, + "loss": 1.161, "step": 11273 }, { - "epoch": 0.3194763240669897, + "epoch": 0.4411143281946944, "grad_norm": 0.0, - "learning_rate": 1.591787555548225e-05, - "loss": 0.9434, + "learning_rate": 1.2370387545119052e-05, + "loss": 1.0427, "step": 11274 }, { - "epoch": 0.31950466150925216, + "epoch": 0.44115345488692387, "grad_norm": 0.0, - "learning_rate": 1.591713570616657e-05, - "loss": 1.1294, + "learning_rate": 1.2369156410815717e-05, + "loss": 1.0964, "step": 11275 }, { - "epoch": 0.31953299895151466, + "epoch": 0.4411925815791533, "grad_norm": 0.0, - "learning_rate": 1.591639580700866e-05, - "loss": 0.8633, + "learning_rate": 1.23679252384667e-05, + "loss": 1.0244, "step": 11276 }, { - "epoch": 0.3195613363937771, + "epoch": 0.44123170827138275, "grad_norm": 0.0, - "learning_rate": 1.591565585801475e-05, - "loss": 0.9369, + "learning_rate": 1.236669402809177e-05, + "loss": 0.9044, "step": 11277 }, { - "epoch": 0.31958967383603953, + "epoch": 0.4412708349636122, "grad_norm": 0.0, - "learning_rate": 1.5914915859191075e-05, - "loss": 0.9152, + "learning_rate": 1.2365462779710699e-05, + "loss": 1.1607, "step": 11278 }, { - "epoch": 0.319618011278302, + "epoch": 0.44130996165584163, "grad_norm": 0.0, - "learning_rate": 1.5914175810543868e-05, - "loss": 1.0558, + "learning_rate": 1.2364231493343262e-05, + "loss": 1.0038, "step": 11279 }, { - "epoch": 0.31964634872056447, + "epoch": 0.4413490883480711, "grad_norm": 0.0, - "learning_rate": 1.591343571207936e-05, - "loss": 1.0499, + "learning_rate": 1.2363000169009228e-05, + "loss": 0.9369, "step": 11280 }, { - "epoch": 0.31967468616282696, + "epoch": 0.4413882150403005, "grad_norm": 0.0, - "learning_rate": 1.591269556380379e-05, - "loss": 0.9017, + "learning_rate": 1.2361768806728372e-05, + "loss": 1.0442, "step": 11281 }, { - "epoch": 0.3197030236050894, + "epoch": 0.44142734173252995, "grad_norm": 0.0, - "learning_rate": 1.5911955365723385e-05, - "loss": 1.0146, + "learning_rate": 1.236053740652047e-05, + "loss": 1.1556, "step": 11282 }, { - "epoch": 0.3197313610473519, + "epoch": 0.4414664684247594, "grad_norm": 0.0, - "learning_rate": 1.5911215117844393e-05, - "loss": 0.8706, + "learning_rate": 1.2359305968405295e-05, + "loss": 1.0883, "step": 11283 }, { - "epoch": 0.31975969848961433, + "epoch": 0.44150559511698884, "grad_norm": 0.0, - "learning_rate": 1.5910474820173033e-05, - "loss": 1.0278, + "learning_rate": 1.235807449240262e-05, + "loss": 0.9235, "step": 11284 }, { - "epoch": 0.31978803593187677, + "epoch": 0.4415447218092182, "grad_norm": 0.0, - "learning_rate": 1.590973447271555e-05, - "loss": 0.9, + "learning_rate": 1.2356842978532227e-05, + "loss": 1.0449, "step": 11285 }, { - "epoch": 0.31981637337413926, + "epoch": 0.44158384850144766, "grad_norm": 0.0, - "learning_rate": 1.590899407547818e-05, - "loss": 0.9561, + "learning_rate": 1.2355611426813886e-05, + "loss": 0.9857, "step": 11286 }, { - "epoch": 0.3198447108164017, + "epoch": 0.4416229751936771, "grad_norm": 0.0, - "learning_rate": 1.590825362846716e-05, - "loss": 0.8811, + "learning_rate": 1.2354379837267378e-05, + "loss": 1.1087, "step": 11287 }, { - "epoch": 0.3198730482586642, + "epoch": 0.44166210188590654, "grad_norm": 0.0, - "learning_rate": 1.5907513131688723e-05, - "loss": 0.9537, + "learning_rate": 1.235314820991248e-05, + "loss": 1.1487, "step": 11288 }, { - "epoch": 0.31990138570092663, + "epoch": 0.441701228578136, "grad_norm": 0.0, - "learning_rate": 1.590677258514911e-05, - "loss": 0.8748, + "learning_rate": 1.2351916544768972e-05, + "loss": 1.0397, "step": 11289 }, { - "epoch": 0.31992972314318907, + "epoch": 0.4417403552703654, "grad_norm": 0.0, - "learning_rate": 1.590603198885456e-05, - "loss": 0.9482, + "learning_rate": 1.235068484185663e-05, + "loss": 1.0497, "step": 11290 }, { - "epoch": 0.31995806058545156, + "epoch": 0.44177948196259487, "grad_norm": 0.0, - "learning_rate": 1.590529134281131e-05, - "loss": 0.9885, + "learning_rate": 1.2349453101195237e-05, + "loss": 1.093, "step": 11291 }, { - "epoch": 0.319986398027714, + "epoch": 0.4418186086548243, "grad_norm": 0.0, - "learning_rate": 1.5904550647025595e-05, - "loss": 1.0429, + "learning_rate": 1.234822132280457e-05, + "loss": 0.9159, "step": 11292 }, { - "epoch": 0.3200147354699765, + "epoch": 0.44185773534705375, "grad_norm": 0.0, - "learning_rate": 1.590380990150366e-05, - "loss": 1.1066, + "learning_rate": 1.234698950670441e-05, + "loss": 1.1516, "step": 11293 }, { - "epoch": 0.32004307291223894, + "epoch": 0.4418968620392832, "grad_norm": 0.0, - "learning_rate": 1.590306910625174e-05, - "loss": 1.0061, + "learning_rate": 1.2345757652914541e-05, + "loss": 1.1576, "step": 11294 }, { - "epoch": 0.32007141035450143, + "epoch": 0.44193598873151263, "grad_norm": 0.0, - "learning_rate": 1.590232826127608e-05, - "loss": 0.9554, + "learning_rate": 1.2344525761454742e-05, + "loss": 0.9969, "step": 11295 }, { - "epoch": 0.32009974779676387, + "epoch": 0.44197511542374207, "grad_norm": 0.0, - "learning_rate": 1.5901587366582915e-05, - "loss": 0.8975, + "learning_rate": 1.2343293832344798e-05, + "loss": 1.0889, "step": 11296 }, { - "epoch": 0.3201280852390263, + "epoch": 0.4420142421159715, "grad_norm": 0.0, - "learning_rate": 1.5900846422178488e-05, - "loss": 0.9268, + "learning_rate": 1.2342061865604492e-05, + "loss": 1.0788, "step": 11297 }, { - "epoch": 0.3201564226812888, + "epoch": 0.44205336880820095, "grad_norm": 0.0, - "learning_rate": 1.590010542806904e-05, - "loss": 1.0606, + "learning_rate": 1.2340829861253605e-05, + "loss": 1.1962, "step": 11298 }, { - "epoch": 0.32018476012355124, + "epoch": 0.4420924955004304, "grad_norm": 0.0, - "learning_rate": 1.5899364384260813e-05, - "loss": 0.9202, + "learning_rate": 1.2339597819311925e-05, + "loss": 1.0164, "step": 11299 }, { - "epoch": 0.32021309756581373, + "epoch": 0.44213162219265983, "grad_norm": 0.0, - "learning_rate": 1.5898623290760048e-05, - "loss": 0.9779, + "learning_rate": 1.2338365739799236e-05, + "loss": 0.9669, "step": 11300 }, { - "epoch": 0.32024143500807617, + "epoch": 0.4421707488848893, "grad_norm": 0.0, - "learning_rate": 1.589788214757299e-05, - "loss": 0.9311, + "learning_rate": 1.2337133622735324e-05, + "loss": 1.0499, "step": 11301 }, { - "epoch": 0.3202697724503386, + "epoch": 0.4422098755771187, "grad_norm": 0.0, - "learning_rate": 1.589714095470588e-05, - "loss": 0.9432, + "learning_rate": 1.2335901468139974e-05, + "loss": 0.941, "step": 11302 }, { - "epoch": 0.3202981098926011, + "epoch": 0.44224900226934816, "grad_norm": 0.0, - "learning_rate": 1.5896399712164966e-05, - "loss": 0.9051, + "learning_rate": 1.2334669276032971e-05, + "loss": 1.0224, "step": 11303 }, { - "epoch": 0.32032644733486354, + "epoch": 0.4422881289615776, "grad_norm": 0.0, - "learning_rate": 1.5895658419956485e-05, - "loss": 1.0087, + "learning_rate": 1.233343704643411e-05, + "loss": 1.1548, "step": 11304 }, { - "epoch": 0.32035478477712603, + "epoch": 0.44232725565380704, "grad_norm": 0.0, - "learning_rate": 1.589491707808668e-05, - "loss": 1.0695, + "learning_rate": 1.2332204779363171e-05, + "loss": 1.1115, "step": 11305 }, { - "epoch": 0.32038312221938847, + "epoch": 0.4423663823460365, "grad_norm": 0.0, - "learning_rate": 1.5894175686561803e-05, - "loss": 0.9005, + "learning_rate": 1.2330972474839944e-05, + "loss": 1.0781, "step": 11306 }, { - "epoch": 0.32041145966165097, + "epoch": 0.4424055090382659, "grad_norm": 0.0, - "learning_rate": 1.5893434245388097e-05, - "loss": 1.0739, + "learning_rate": 1.2329740132884222e-05, + "loss": 1.0044, "step": 11307 }, { - "epoch": 0.3204397971039134, + "epoch": 0.44244463573049536, "grad_norm": 0.0, - "learning_rate": 1.5892692754571802e-05, - "loss": 0.9961, + "learning_rate": 1.2328507753515793e-05, + "loss": 0.9818, "step": 11308 }, { - "epoch": 0.32046813454617584, + "epoch": 0.4424837624227248, "grad_norm": 0.0, - "learning_rate": 1.5891951214119167e-05, - "loss": 1.0215, + "learning_rate": 1.2327275336754448e-05, + "loss": 0.9127, "step": 11309 }, { - "epoch": 0.32049647198843834, + "epoch": 0.44252288911495424, "grad_norm": 0.0, - "learning_rate": 1.5891209624036443e-05, - "loss": 0.8782, + "learning_rate": 1.2326042882619973e-05, + "loss": 0.9927, "step": 11310 }, { - "epoch": 0.3205248094307008, + "epoch": 0.4425620158071837, "grad_norm": 0.0, - "learning_rate": 1.5890467984329872e-05, - "loss": 0.8456, + "learning_rate": 1.232481039113217e-05, + "loss": 1.064, "step": 11311 }, { - "epoch": 0.32055314687296327, + "epoch": 0.4426011424994131, "grad_norm": 0.0, - "learning_rate": 1.58897262950057e-05, - "loss": 0.9303, + "learning_rate": 1.2323577862310823e-05, + "loss": 1.069, "step": 11312 }, { - "epoch": 0.3205814843152257, + "epoch": 0.44264026919164257, "grad_norm": 0.0, - "learning_rate": 1.588898455607018e-05, - "loss": 1.0367, + "learning_rate": 1.2322345296175724e-05, + "loss": 1.1057, "step": 11313 }, { - "epoch": 0.32060982175748814, + "epoch": 0.44267939588387195, "grad_norm": 0.0, - "learning_rate": 1.588824276752955e-05, - "loss": 0.9091, + "learning_rate": 1.2321112692746673e-05, + "loss": 0.9813, "step": 11314 }, { - "epoch": 0.32063815919975064, + "epoch": 0.4427185225761014, "grad_norm": 0.0, - "learning_rate": 1.588750092939007e-05, - "loss": 0.9618, + "learning_rate": 1.2319880052043458e-05, + "loss": 0.995, "step": 11315 }, { - "epoch": 0.3206664966420131, + "epoch": 0.44275764926833083, "grad_norm": 0.0, - "learning_rate": 1.588675904165798e-05, - "loss": 0.99, + "learning_rate": 1.2318647374085878e-05, + "loss": 0.9644, "step": 11316 }, { - "epoch": 0.32069483408427557, + "epoch": 0.4427967759605603, "grad_norm": 0.0, - "learning_rate": 1.5886017104339538e-05, - "loss": 1.0917, + "learning_rate": 1.2317414658893728e-05, + "loss": 1.0301, "step": 11317 }, { - "epoch": 0.320723171526538, + "epoch": 0.4428359026527897, "grad_norm": 0.0, - "learning_rate": 1.5885275117440983e-05, - "loss": 1.0724, + "learning_rate": 1.2316181906486802e-05, + "loss": 1.0699, "step": 11318 }, { - "epoch": 0.3207515089688005, + "epoch": 0.44287502934501916, "grad_norm": 0.0, - "learning_rate": 1.588453308096857e-05, - "loss": 0.9135, + "learning_rate": 1.2314949116884894e-05, + "loss": 1.171, "step": 11319 }, { - "epoch": 0.32077984641106294, + "epoch": 0.4429141560372486, "grad_norm": 0.0, - "learning_rate": 1.5883790994928554e-05, - "loss": 0.8513, + "learning_rate": 1.2313716290107806e-05, + "loss": 1.1104, "step": 11320 }, { - "epoch": 0.3208081838533254, + "epoch": 0.44295328272947804, "grad_norm": 0.0, - "learning_rate": 1.5883048859327178e-05, - "loss": 1.0024, + "learning_rate": 1.2312483426175337e-05, + "loss": 1.0128, "step": 11321 }, { - "epoch": 0.3208365212955879, + "epoch": 0.4429924094217075, "grad_norm": 0.0, - "learning_rate": 1.58823066741707e-05, - "loss": 0.9623, + "learning_rate": 1.2311250525107276e-05, + "loss": 0.9991, "step": 11322 }, { - "epoch": 0.3208648587378503, + "epoch": 0.4430315361139369, "grad_norm": 0.0, - "learning_rate": 1.5881564439465364e-05, - "loss": 1.0873, + "learning_rate": 1.2310017586923431e-05, + "loss": 0.9648, "step": 11323 }, { - "epoch": 0.3208931961801128, + "epoch": 0.44307066280616636, "grad_norm": 0.0, - "learning_rate": 1.588082215521743e-05, - "loss": 1.0479, + "learning_rate": 1.2308784611643597e-05, + "loss": 0.9978, "step": 11324 }, { - "epoch": 0.32092153362237524, + "epoch": 0.4431097894983958, "grad_norm": 0.0, - "learning_rate": 1.5880079821433145e-05, - "loss": 0.9248, + "learning_rate": 1.2307551599287577e-05, + "loss": 1.0854, "step": 11325 }, { - "epoch": 0.3209498710646377, + "epoch": 0.44314891619062524, "grad_norm": 0.0, - "learning_rate": 1.5879337438118766e-05, - "loss": 0.8753, + "learning_rate": 1.2306318549875167e-05, + "loss": 1.108, "step": 11326 }, { - "epoch": 0.3209782085069002, + "epoch": 0.4431880428828547, "grad_norm": 0.0, - "learning_rate": 1.5878595005280543e-05, - "loss": 0.8659, + "learning_rate": 1.2305085463426173e-05, + "loss": 0.995, "step": 11327 }, { - "epoch": 0.3210065459491626, + "epoch": 0.4432271695750841, "grad_norm": 0.0, - "learning_rate": 1.5877852522924733e-05, - "loss": 0.9567, + "learning_rate": 1.2303852339960393e-05, + "loss": 1.1284, "step": 11328 }, { - "epoch": 0.3210348833914251, + "epoch": 0.44326629626731356, "grad_norm": 0.0, - "learning_rate": 1.587710999105759e-05, - "loss": 0.9693, + "learning_rate": 1.2302619179497635e-05, + "loss": 1.1846, "step": 11329 }, { - "epoch": 0.32106322083368755, + "epoch": 0.443305422959543, "grad_norm": 0.0, - "learning_rate": 1.5876367409685363e-05, - "loss": 1.0204, + "learning_rate": 1.2301385982057696e-05, + "loss": 1.0414, "step": 11330 }, { - "epoch": 0.32109155827595004, + "epoch": 0.44334454965177245, "grad_norm": 0.0, - "learning_rate": 1.5875624778814313e-05, - "loss": 0.8656, + "learning_rate": 1.2300152747660382e-05, + "loss": 1.0051, "step": 11331 }, { - "epoch": 0.3211198957182125, + "epoch": 0.4433836763440019, "grad_norm": 0.0, - "learning_rate": 1.5874882098450694e-05, - "loss": 0.9094, + "learning_rate": 1.2298919476325497e-05, + "loss": 1.1392, "step": 11332 }, { - "epoch": 0.3211482331604749, + "epoch": 0.44342280303623133, "grad_norm": 0.0, - "learning_rate": 1.587413936860076e-05, - "loss": 0.99, + "learning_rate": 1.2297686168072844e-05, + "loss": 1.041, "step": 11333 }, { - "epoch": 0.3211765706027374, + "epoch": 0.44346192972846077, "grad_norm": 0.0, - "learning_rate": 1.587339658927077e-05, - "loss": 0.9296, + "learning_rate": 1.2296452822922234e-05, + "loss": 1.029, "step": 11334 }, { - "epoch": 0.32120490804499985, + "epoch": 0.4435010564206902, "grad_norm": 0.0, - "learning_rate": 1.587265376046698e-05, - "loss": 1.075, + "learning_rate": 1.2295219440893467e-05, + "loss": 1.1632, "step": 11335 }, { - "epoch": 0.32123324548726234, + "epoch": 0.44354018311291965, "grad_norm": 0.0, - "learning_rate": 1.5871910882195643e-05, - "loss": 0.9898, + "learning_rate": 1.2293986022006353e-05, + "loss": 1.1598, "step": 11336 }, { - "epoch": 0.3212615829295248, + "epoch": 0.4435793098051491, "grad_norm": 0.0, - "learning_rate": 1.5871167954463028e-05, - "loss": 0.9525, + "learning_rate": 1.2292752566280696e-05, + "loss": 1.0225, "step": 11337 }, { - "epoch": 0.3212899203717872, + "epoch": 0.44361843649737853, "grad_norm": 0.0, - "learning_rate": 1.5870424977275378e-05, - "loss": 0.9522, + "learning_rate": 1.2291519073736308e-05, + "loss": 1.0273, "step": 11338 }, { - "epoch": 0.3213182578140497, + "epoch": 0.443657563189608, "grad_norm": 0.0, - "learning_rate": 1.586968195063896e-05, - "loss": 0.8699, + "learning_rate": 1.2290285544392992e-05, + "loss": 1.0365, "step": 11339 }, { - "epoch": 0.32134659525631215, + "epoch": 0.4436966898818374, "grad_norm": 0.0, - "learning_rate": 1.5868938874560034e-05, - "loss": 0.9564, + "learning_rate": 1.2289051978270565e-05, + "loss": 1.0706, "step": 11340 }, { - "epoch": 0.32137493269857464, + "epoch": 0.44373581657406685, "grad_norm": 0.0, - "learning_rate": 1.5868195749044853e-05, - "loss": 1.0228, + "learning_rate": 1.228781837538883e-05, + "loss": 1.143, "step": 11341 }, { - "epoch": 0.3214032701408371, + "epoch": 0.44377494326629624, "grad_norm": 0.0, - "learning_rate": 1.5867452574099682e-05, - "loss": 1.032, + "learning_rate": 1.2286584735767595e-05, + "loss": 1.1097, "step": 11342 }, { - "epoch": 0.3214316075830996, + "epoch": 0.4438140699585257, "grad_norm": 0.0, - "learning_rate": 1.586670934973078e-05, - "loss": 1.1061, + "learning_rate": 1.228535105942668e-05, + "loss": 1.0047, "step": 11343 }, { - "epoch": 0.321459945025362, + "epoch": 0.4438531966507551, "grad_norm": 0.0, - "learning_rate": 1.5865966075944402e-05, - "loss": 0.9396, + "learning_rate": 1.2284117346385887e-05, + "loss": 1.1125, "step": 11344 }, { - "epoch": 0.32148828246762445, + "epoch": 0.44389232334298456, "grad_norm": 0.0, - "learning_rate": 1.586522275274682e-05, - "loss": 1.0486, + "learning_rate": 1.2282883596665032e-05, + "loss": 0.9536, "step": 11345 }, { - "epoch": 0.32151661990988695, + "epoch": 0.443931450035214, "grad_norm": 0.0, - "learning_rate": 1.5864479380144283e-05, - "loss": 0.9732, + "learning_rate": 1.2281649810283928e-05, + "loss": 0.9402, "step": 11346 }, { - "epoch": 0.3215449573521494, + "epoch": 0.44397057672744344, "grad_norm": 0.0, - "learning_rate": 1.5863735958143064e-05, - "loss": 0.9448, + "learning_rate": 1.2280415987262387e-05, + "loss": 0.9812, "step": 11347 }, { - "epoch": 0.3215732947944119, + "epoch": 0.4440097034196729, "grad_norm": 0.0, - "learning_rate": 1.5862992486749416e-05, - "loss": 0.9967, + "learning_rate": 1.2279182127620221e-05, + "loss": 1.111, "step": 11348 }, { - "epoch": 0.3216016322366743, + "epoch": 0.4440488301119023, "grad_norm": 0.0, - "learning_rate": 1.5862248965969604e-05, - "loss": 1.0774, + "learning_rate": 1.2277948231377247e-05, + "loss": 1.0113, "step": 11349 }, { - "epoch": 0.32162996967893676, + "epoch": 0.44408795680413177, "grad_norm": 0.0, - "learning_rate": 1.5861505395809895e-05, - "loss": 0.9571, + "learning_rate": 1.2276714298553283e-05, + "loss": 1.2053, "step": 11350 }, { - "epoch": 0.32165830712119925, + "epoch": 0.4441270834963612, "grad_norm": 0.0, - "learning_rate": 1.5860761776276547e-05, - "loss": 1.0193, + "learning_rate": 1.2275480329168135e-05, + "loss": 1.1714, "step": 11351 }, { - "epoch": 0.3216866445634617, + "epoch": 0.44416621018859065, "grad_norm": 0.0, - "learning_rate": 1.586001810737583e-05, - "loss": 1.0163, + "learning_rate": 1.2274246323241626e-05, + "loss": 0.9572, "step": 11352 }, { - "epoch": 0.3217149820057242, + "epoch": 0.4442053368808201, "grad_norm": 0.0, - "learning_rate": 1.5859274389114e-05, - "loss": 0.987, + "learning_rate": 1.2273012280793569e-05, + "loss": 1.0803, "step": 11353 }, { - "epoch": 0.3217433194479866, + "epoch": 0.44424446357304953, "grad_norm": 0.0, - "learning_rate": 1.585853062149733e-05, - "loss": 0.9957, + "learning_rate": 1.2271778201843785e-05, + "loss": 1.097, "step": 11354 }, { - "epoch": 0.3217716568902491, + "epoch": 0.44428359026527897, "grad_norm": 0.0, - "learning_rate": 1.5857786804532077e-05, - "loss": 0.8879, + "learning_rate": 1.2270544086412088e-05, + "loss": 1.1775, "step": 11355 }, { - "epoch": 0.32179999433251155, + "epoch": 0.4443227169575084, "grad_norm": 0.0, - "learning_rate": 1.5857042938224513e-05, - "loss": 0.9126, + "learning_rate": 1.22693099345183e-05, + "loss": 1.0875, "step": 11356 }, { - "epoch": 0.321828331774774, + "epoch": 0.44436184364973785, "grad_norm": 0.0, - "learning_rate": 1.5856299022580902e-05, - "loss": 0.9262, + "learning_rate": 1.2268075746182237e-05, + "loss": 0.9283, "step": 11357 }, { - "epoch": 0.3218566692170365, + "epoch": 0.4444009703419673, "grad_norm": 0.0, - "learning_rate": 1.585555505760751e-05, - "loss": 0.9732, + "learning_rate": 1.226684152142372e-05, + "loss": 0.9755, "step": 11358 }, { - "epoch": 0.3218850066592989, + "epoch": 0.44444009703419673, "grad_norm": 0.0, - "learning_rate": 1.58548110433106e-05, - "loss": 1.0598, + "learning_rate": 1.2265607260262571e-05, + "loss": 0.9979, "step": 11359 }, { - "epoch": 0.3219133441015614, + "epoch": 0.4444792237264262, "grad_norm": 0.0, - "learning_rate": 1.585406697969644e-05, - "loss": 0.9066, + "learning_rate": 1.2264372962718602e-05, + "loss": 1.041, "step": 11360 }, { - "epoch": 0.32194168154382385, + "epoch": 0.4445183504186556, "grad_norm": 0.0, - "learning_rate": 1.5853322866771308e-05, - "loss": 0.9659, + "learning_rate": 1.2263138628811648e-05, + "loss": 0.9982, "step": 11361 }, { - "epoch": 0.3219700189860863, + "epoch": 0.44455747711088506, "grad_norm": 0.0, - "learning_rate": 1.585257870454146e-05, - "loss": 0.9876, + "learning_rate": 1.226190425856152e-05, + "loss": 1.137, "step": 11362 }, { - "epoch": 0.3219983564283488, + "epoch": 0.4445966038031145, "grad_norm": 0.0, - "learning_rate": 1.5851834493013168e-05, - "loss": 0.829, + "learning_rate": 1.2260669851988042e-05, + "loss": 1.0964, "step": 11363 }, { - "epoch": 0.3220266938706112, + "epoch": 0.44463573049534394, "grad_norm": 0.0, - "learning_rate": 1.5851090232192704e-05, - "loss": 1.0557, + "learning_rate": 1.225943540911104e-05, + "loss": 1.0452, "step": 11364 }, { - "epoch": 0.3220550313128737, + "epoch": 0.4446748571875734, "grad_norm": 0.0, - "learning_rate": 1.585034592208633e-05, - "loss": 0.9211, + "learning_rate": 1.225820092995034e-05, + "loss": 1.064, "step": 11365 }, { - "epoch": 0.32208336875513616, + "epoch": 0.4447139838798028, "grad_norm": 0.0, - "learning_rate": 1.5849601562700322e-05, - "loss": 0.9935, + "learning_rate": 1.225696641452576e-05, + "loss": 1.0439, "step": 11366 }, { - "epoch": 0.3221117061973986, + "epoch": 0.44475311057203226, "grad_norm": 0.0, - "learning_rate": 1.5848857154040947e-05, - "loss": 0.9061, + "learning_rate": 1.2255731862857127e-05, + "loss": 1.1212, "step": 11367 }, { - "epoch": 0.3221400436396611, + "epoch": 0.4447922372642617, "grad_norm": 0.0, - "learning_rate": 1.5848112696114476e-05, - "loss": 1.0014, + "learning_rate": 1.2254497274964268e-05, + "loss": 1.1243, "step": 11368 }, { - "epoch": 0.3221683810819235, + "epoch": 0.44483136395649114, "grad_norm": 0.0, - "learning_rate": 1.584736818892718e-05, - "loss": 0.9365, + "learning_rate": 1.2253262650867008e-05, + "loss": 1.2158, "step": 11369 }, { - "epoch": 0.322196718524186, + "epoch": 0.4448704906487206, "grad_norm": 0.0, - "learning_rate": 1.5846623632485334e-05, - "loss": 0.8996, + "learning_rate": 1.2252027990585173e-05, + "loss": 0.9609, "step": 11370 }, { - "epoch": 0.32222505596644846, + "epoch": 0.44490961734094997, "grad_norm": 0.0, - "learning_rate": 1.5845879026795202e-05, - "loss": 0.8827, + "learning_rate": 1.225079329413859e-05, + "loss": 1.1245, "step": 11371 }, { - "epoch": 0.32225339340871095, + "epoch": 0.4449487440331794, "grad_norm": 0.0, - "learning_rate": 1.584513437186306e-05, - "loss": 0.9129, + "learning_rate": 1.2249558561547088e-05, + "loss": 1.0922, "step": 11372 }, { - "epoch": 0.3222817308509734, + "epoch": 0.44498787072540885, "grad_norm": 0.0, - "learning_rate": 1.5844389667695185e-05, - "loss": 1.0554, + "learning_rate": 1.2248323792830493e-05, + "loss": 1.062, "step": 11373 }, { - "epoch": 0.32231006829323583, + "epoch": 0.4450269974176383, "grad_norm": 0.0, - "learning_rate": 1.5843644914297838e-05, - "loss": 0.9766, + "learning_rate": 1.2247088988008636e-05, + "loss": 1.0881, "step": 11374 }, { - "epoch": 0.3223384057354983, + "epoch": 0.44506612410986773, "grad_norm": 0.0, - "learning_rate": 1.5842900111677307e-05, - "loss": 0.946, + "learning_rate": 1.2245854147101344e-05, + "loss": 1.0382, "step": 11375 }, { - "epoch": 0.32236674317776076, + "epoch": 0.4451052508020972, "grad_norm": 0.0, - "learning_rate": 1.5842155259839858e-05, - "loss": 0.956, + "learning_rate": 1.2244619270128451e-05, + "loss": 0.9681, "step": 11376 }, { - "epoch": 0.32239508062002326, + "epoch": 0.4451443774943266, "grad_norm": 0.0, - "learning_rate": 1.5841410358791763e-05, - "loss": 0.9263, + "learning_rate": 1.2243384357109785e-05, + "loss": 1.051, "step": 11377 }, { - "epoch": 0.3224234180622857, + "epoch": 0.44518350418655606, "grad_norm": 0.0, - "learning_rate": 1.58406654085393e-05, - "loss": 0.987, + "learning_rate": 1.2242149408065176e-05, + "loss": 0.9847, "step": 11378 }, { - "epoch": 0.32245175550454813, + "epoch": 0.4452226308787855, "grad_norm": 0.0, - "learning_rate": 1.5839920409088743e-05, - "loss": 0.9306, + "learning_rate": 1.2240914423014457e-05, + "loss": 0.9973, "step": 11379 }, { - "epoch": 0.3224800929468106, + "epoch": 0.44526175757101494, "grad_norm": 0.0, - "learning_rate": 1.5839175360446367e-05, - "loss": 0.9884, + "learning_rate": 1.2239679401977462e-05, + "loss": 1.0535, "step": 11380 }, { - "epoch": 0.32250843038907306, + "epoch": 0.4453008842632444, "grad_norm": 0.0, - "learning_rate": 1.583843026261845e-05, - "loss": 1.0358, + "learning_rate": 1.2238444344974024e-05, + "loss": 1.0635, "step": 11381 }, { - "epoch": 0.32253676783133556, + "epoch": 0.4453400109554738, "grad_norm": 0.0, - "learning_rate": 1.583768511561127e-05, - "loss": 1.0287, + "learning_rate": 1.2237209252023969e-05, + "loss": 1.0657, "step": 11382 }, { - "epoch": 0.322565105273598, + "epoch": 0.44537913764770326, "grad_norm": 0.0, - "learning_rate": 1.5836939919431097e-05, - "loss": 0.9527, + "learning_rate": 1.223597412314714e-05, + "loss": 0.9557, "step": 11383 }, { - "epoch": 0.3225934427158605, + "epoch": 0.4454182643399327, "grad_norm": 0.0, - "learning_rate": 1.583619467408421e-05, - "loss": 0.937, + "learning_rate": 1.2234738958363369e-05, + "loss": 1.1657, "step": 11384 }, { - "epoch": 0.32262178015812293, + "epoch": 0.44545739103216214, "grad_norm": 0.0, - "learning_rate": 1.5835449379576892e-05, - "loss": 1.0262, + "learning_rate": 1.2233503757692492e-05, + "loss": 1.0744, "step": 11385 }, { - "epoch": 0.32265011760038537, + "epoch": 0.4454965177243916, "grad_norm": 0.0, - "learning_rate": 1.5834704035915417e-05, - "loss": 0.9677, + "learning_rate": 1.223226852115434e-05, + "loss": 0.9443, "step": 11386 }, { - "epoch": 0.32267845504264786, + "epoch": 0.445535644416621, "grad_norm": 0.0, - "learning_rate": 1.5833958643106058e-05, - "loss": 0.8528, + "learning_rate": 1.2231033248768752e-05, + "loss": 0.9775, "step": 11387 }, { - "epoch": 0.3227067924849103, + "epoch": 0.44557477110885046, "grad_norm": 0.0, - "learning_rate": 1.58332132011551e-05, - "loss": 1.0286, + "learning_rate": 1.222979794055557e-05, + "loss": 1.0324, "step": 11388 }, { - "epoch": 0.3227351299271728, + "epoch": 0.4456138978010799, "grad_norm": 0.0, - "learning_rate": 1.5832467710068825e-05, - "loss": 0.9957, + "learning_rate": 1.2228562596534625e-05, + "loss": 1.2, "step": 11389 }, { - "epoch": 0.32276346736943523, + "epoch": 0.44565302449330935, "grad_norm": 0.0, - "learning_rate": 1.583172216985351e-05, - "loss": 1.0233, + "learning_rate": 1.2227327216725758e-05, + "loss": 1.0108, "step": 11390 }, { - "epoch": 0.32279180481169767, + "epoch": 0.4456921511855388, "grad_norm": 0.0, - "learning_rate": 1.5830976580515432e-05, - "loss": 0.9723, + "learning_rate": 1.2226091801148807e-05, + "loss": 1.0344, "step": 11391 }, { - "epoch": 0.32282014225396016, + "epoch": 0.44573127787776823, "grad_norm": 0.0, - "learning_rate": 1.583023094206087e-05, - "loss": 0.9698, + "learning_rate": 1.2224856349823611e-05, + "loss": 1.0085, "step": 11392 }, { - "epoch": 0.3228484796962226, + "epoch": 0.44577040456999767, "grad_norm": 0.0, - "learning_rate": 1.5829485254496108e-05, - "loss": 1.0318, + "learning_rate": 1.2223620862770007e-05, + "loss": 0.9212, "step": 11393 }, { - "epoch": 0.3228768171384851, + "epoch": 0.4458095312622271, "grad_norm": 0.0, - "learning_rate": 1.5828739517827426e-05, - "loss": 0.9075, + "learning_rate": 1.222238534000784e-05, + "loss": 1.0352, "step": 11394 }, { - "epoch": 0.32290515458074753, + "epoch": 0.44584865795445655, "grad_norm": 0.0, - "learning_rate": 1.5827993732061112e-05, - "loss": 1.0718, + "learning_rate": 1.2221149781556951e-05, + "loss": 1.1642, "step": 11395 }, { - "epoch": 0.32293349202301, + "epoch": 0.445887784646686, "grad_norm": 0.0, - "learning_rate": 1.5827247897203436e-05, - "loss": 0.942, + "learning_rate": 1.2219914187437178e-05, + "loss": 1.1166, "step": 11396 }, { - "epoch": 0.32296182946527247, + "epoch": 0.44592691133891543, "grad_norm": 0.0, - "learning_rate": 1.5826502013260694e-05, - "loss": 0.9977, + "learning_rate": 1.2218678557668365e-05, + "loss": 1.0515, "step": 11397 }, { - "epoch": 0.3229901669075349, + "epoch": 0.4459660380311449, "grad_norm": 0.0, - "learning_rate": 1.582575608023916e-05, - "loss": 0.9027, + "learning_rate": 1.2217442892270355e-05, + "loss": 0.9935, "step": 11398 }, { - "epoch": 0.3230185043497974, + "epoch": 0.44600516472337426, "grad_norm": 0.0, - "learning_rate": 1.5825010098145117e-05, - "loss": 0.9258, + "learning_rate": 1.2216207191262991e-05, + "loss": 1.1476, "step": 11399 }, { - "epoch": 0.32304684179205984, + "epoch": 0.4460442914156037, "grad_norm": 0.0, - "learning_rate": 1.5824264066984848e-05, - "loss": 1.0483, + "learning_rate": 1.2214971454666115e-05, + "loss": 1.0507, "step": 11400 }, { - "epoch": 0.32307517923432233, + "epoch": 0.44608341810783314, "grad_norm": 0.0, - "learning_rate": 1.5823517986764647e-05, - "loss": 0.8611, + "learning_rate": 1.2213735682499578e-05, + "loss": 0.9653, "step": 11401 }, { - "epoch": 0.32310351667658477, + "epoch": 0.4461225448000626, "grad_norm": 0.0, - "learning_rate": 1.582277185749079e-05, - "loss": 1.0096, + "learning_rate": 1.2212499874783213e-05, + "loss": 1.201, "step": 11402 }, { - "epoch": 0.3231318541188472, + "epoch": 0.446161671492292, "grad_norm": 0.0, - "learning_rate": 1.582202567916956e-05, - "loss": 0.9215, + "learning_rate": 1.2211264031536876e-05, + "loss": 1.1285, "step": 11403 }, { - "epoch": 0.3231601915611097, + "epoch": 0.44620079818452146, "grad_norm": 0.0, - "learning_rate": 1.582127945180724e-05, - "loss": 0.8695, + "learning_rate": 1.2210028152780408e-05, + "loss": 0.9949, "step": 11404 }, { - "epoch": 0.32318852900337214, + "epoch": 0.4462399248767509, "grad_norm": 0.0, - "learning_rate": 1.5820533175410134e-05, - "loss": 1.0085, + "learning_rate": 1.220879223853366e-05, + "loss": 1.1611, "step": 11405 }, { - "epoch": 0.32321686644563463, + "epoch": 0.44627905156898034, "grad_norm": 0.0, - "learning_rate": 1.581978684998451e-05, - "loss": 0.9368, + "learning_rate": 1.2207556288816474e-05, + "loss": 0.9774, "step": 11406 }, { - "epoch": 0.32324520388789707, + "epoch": 0.4463181782612098, "grad_norm": 0.0, - "learning_rate": 1.581904047553666e-05, - "loss": 0.932, + "learning_rate": 1.22063203036487e-05, + "loss": 1.0672, "step": 11407 }, { - "epoch": 0.32327354133015956, + "epoch": 0.4463573049534392, "grad_norm": 0.0, - "learning_rate": 1.5818294052072873e-05, - "loss": 0.8698, + "learning_rate": 1.2205084283050188e-05, + "loss": 1.0475, "step": 11408 }, { - "epoch": 0.323301878772422, + "epoch": 0.44639643164566867, "grad_norm": 0.0, - "learning_rate": 1.5817547579599436e-05, - "loss": 0.8243, + "learning_rate": 1.2203848227040784e-05, + "loss": 1.0269, "step": 11409 }, { - "epoch": 0.32333021621468444, + "epoch": 0.4464355583378981, "grad_norm": 0.0, - "learning_rate": 1.5816801058122632e-05, - "loss": 0.9909, + "learning_rate": 1.2202612135640341e-05, + "loss": 0.9963, "step": 11410 }, { - "epoch": 0.32335855365694693, + "epoch": 0.44647468503012755, "grad_norm": 0.0, - "learning_rate": 1.5816054487648753e-05, - "loss": 1.0045, + "learning_rate": 1.2201376008868707e-05, + "loss": 1.0468, "step": 11411 }, { - "epoch": 0.3233868910992094, + "epoch": 0.446513811722357, "grad_norm": 0.0, - "learning_rate": 1.5815307868184085e-05, - "loss": 0.9207, + "learning_rate": 1.220013984674573e-05, + "loss": 1.1161, "step": 11412 }, { - "epoch": 0.32341522854147187, + "epoch": 0.44655293841458643, "grad_norm": 0.0, - "learning_rate": 1.5814561199734922e-05, - "loss": 0.9371, + "learning_rate": 1.2198903649291265e-05, + "loss": 0.9778, "step": 11413 }, { - "epoch": 0.3234435659837343, + "epoch": 0.44659206510681587, "grad_norm": 0.0, - "learning_rate": 1.5813814482307552e-05, - "loss": 0.9282, + "learning_rate": 1.2197667416525165e-05, + "loss": 0.9954, "step": 11414 }, { - "epoch": 0.32347190342599674, + "epoch": 0.4466311917990453, "grad_norm": 0.0, - "learning_rate": 1.5813067715908265e-05, - "loss": 0.906, + "learning_rate": 1.2196431148467278e-05, + "loss": 1.1762, "step": 11415 }, { - "epoch": 0.32350024086825924, + "epoch": 0.44667031849127475, "grad_norm": 0.0, - "learning_rate": 1.5812320900543348e-05, - "loss": 0.9042, + "learning_rate": 1.2195194845137462e-05, + "loss": 1.1075, "step": 11416 }, { - "epoch": 0.3235285783105217, + "epoch": 0.4467094451835042, "grad_norm": 0.0, - "learning_rate": 1.581157403621909e-05, - "loss": 0.8512, + "learning_rate": 1.2193958506555566e-05, + "loss": 1.1302, "step": 11417 }, { - "epoch": 0.32355691575278417, + "epoch": 0.44674857187573364, "grad_norm": 0.0, - "learning_rate": 1.5810827122941792e-05, - "loss": 0.9032, + "learning_rate": 1.2192722132741443e-05, + "loss": 1.1231, "step": 11418 }, { - "epoch": 0.3235852531950466, + "epoch": 0.4467876985679631, "grad_norm": 0.0, - "learning_rate": 1.5810080160717737e-05, - "loss": 1.0433, + "learning_rate": 1.2191485723714953e-05, + "loss": 1.1373, "step": 11419 }, { - "epoch": 0.3236135906373091, + "epoch": 0.4468268252601925, "grad_norm": 0.0, - "learning_rate": 1.580933314955322e-05, - "loss": 0.9099, + "learning_rate": 1.2190249279495947e-05, + "loss": 1.1153, "step": 11420 }, { - "epoch": 0.32364192807957154, + "epoch": 0.44686595195242196, "grad_norm": 0.0, - "learning_rate": 1.580858608945453e-05, - "loss": 0.866, + "learning_rate": 1.2189012800104284e-05, + "loss": 1.0966, "step": 11421 }, { - "epoch": 0.323670265521834, + "epoch": 0.4469050786446514, "grad_norm": 0.0, - "learning_rate": 1.5807838980427967e-05, - "loss": 0.9639, + "learning_rate": 1.2187776285559814e-05, + "loss": 0.9264, "step": 11422 }, { - "epoch": 0.32369860296409647, + "epoch": 0.44694420533688084, "grad_norm": 0.0, - "learning_rate": 1.5807091822479815e-05, - "loss": 0.9269, + "learning_rate": 1.2186539735882402e-05, + "loss": 1.0638, "step": 11423 }, { - "epoch": 0.3237269404063589, + "epoch": 0.4469833320291103, "grad_norm": 0.0, - "learning_rate": 1.5806344615616375e-05, - "loss": 1.0439, + "learning_rate": 1.21853031510919e-05, + "loss": 0.967, "step": 11424 }, { - "epoch": 0.3237552778486214, + "epoch": 0.4470224587213397, "grad_norm": 0.0, - "learning_rate": 1.5805597359843935e-05, - "loss": 0.8914, + "learning_rate": 1.2184066531208169e-05, + "loss": 0.9682, "step": 11425 }, { - "epoch": 0.32378361529088384, + "epoch": 0.44706158541356916, "grad_norm": 0.0, - "learning_rate": 1.5804850055168796e-05, - "loss": 0.8324, + "learning_rate": 1.2182829876251065e-05, + "loss": 1.1525, "step": 11426 }, { - "epoch": 0.3238119527331463, + "epoch": 0.4471007121057986, "grad_norm": 0.0, - "learning_rate": 1.5804102701597247e-05, - "loss": 0.9725, + "learning_rate": 1.218159318624045e-05, + "loss": 1.1353, "step": 11427 }, { - "epoch": 0.3238402901754088, + "epoch": 0.447139838798028, "grad_norm": 0.0, - "learning_rate": 1.5803355299135586e-05, - "loss": 1.023, + "learning_rate": 1.2180356461196183e-05, + "loss": 1.0921, "step": 11428 }, { - "epoch": 0.3238686276176712, + "epoch": 0.44717896549025743, "grad_norm": 0.0, - "learning_rate": 1.580260784779011e-05, - "loss": 0.9989, + "learning_rate": 1.217911970113812e-05, + "loss": 1.1416, "step": 11429 }, { - "epoch": 0.3238969650599337, + "epoch": 0.44721809218248687, "grad_norm": 0.0, - "learning_rate": 1.5801860347567108e-05, - "loss": 1.0304, + "learning_rate": 1.217788290608613e-05, + "loss": 1.023, "step": 11430 }, { - "epoch": 0.32392530250219614, + "epoch": 0.4472572188747163, "grad_norm": 0.0, - "learning_rate": 1.5801112798472887e-05, - "loss": 0.9747, + "learning_rate": 1.2176646076060066e-05, + "loss": 1.0801, "step": 11431 }, { - "epoch": 0.32395363994445864, + "epoch": 0.44729634556694575, "grad_norm": 0.0, - "learning_rate": 1.580036520051374e-05, - "loss": 0.9725, + "learning_rate": 1.2175409211079794e-05, + "loss": 0.9964, "step": 11432 }, { - "epoch": 0.3239819773867211, + "epoch": 0.4473354722591752, "grad_norm": 0.0, - "learning_rate": 1.5799617553695958e-05, - "loss": 0.9219, + "learning_rate": 1.2174172311165178e-05, + "loss": 1.1008, "step": 11433 }, { - "epoch": 0.3240103148289835, + "epoch": 0.44737459895140463, "grad_norm": 0.0, - "learning_rate": 1.5798869858025847e-05, - "loss": 0.9271, + "learning_rate": 1.2172935376336077e-05, + "loss": 1.0669, "step": 11434 }, { - "epoch": 0.324038652271246, + "epoch": 0.4474137256436341, "grad_norm": 0.0, - "learning_rate": 1.5798122113509703e-05, - "loss": 0.893, + "learning_rate": 1.2171698406612356e-05, + "loss": 0.9619, "step": 11435 }, { - "epoch": 0.32406698971350845, + "epoch": 0.4474528523358635, "grad_norm": 0.0, - "learning_rate": 1.579737432015382e-05, - "loss": 0.9628, + "learning_rate": 1.2170461402013883e-05, + "loss": 0.9894, "step": 11436 }, { - "epoch": 0.32409532715577094, + "epoch": 0.44749197902809296, "grad_norm": 0.0, - "learning_rate": 1.5796626477964502e-05, - "loss": 1.0487, + "learning_rate": 1.2169224362560514e-05, + "loss": 1.0341, "step": 11437 }, { - "epoch": 0.3241236645980334, + "epoch": 0.4475311057203224, "grad_norm": 0.0, - "learning_rate": 1.5795878586948047e-05, - "loss": 0.9521, + "learning_rate": 1.2167987288272124e-05, + "loss": 0.9715, "step": 11438 }, { - "epoch": 0.3241520020402958, + "epoch": 0.44757023241255184, "grad_norm": 0.0, - "learning_rate": 1.5795130647110755e-05, - "loss": 0.9659, + "learning_rate": 1.2166750179168576e-05, + "loss": 1.0901, "step": 11439 }, { - "epoch": 0.3241803394825583, + "epoch": 0.4476093591047813, "grad_norm": 0.0, - "learning_rate": 1.5794382658458924e-05, - "loss": 1.0085, + "learning_rate": 1.2165513035269733e-05, + "loss": 1.0694, "step": 11440 }, { - "epoch": 0.32420867692482075, + "epoch": 0.4476484857970107, "grad_norm": 0.0, - "learning_rate": 1.5793634620998858e-05, - "loss": 1.0505, + "learning_rate": 1.2164275856595466e-05, + "loss": 1.0539, "step": 11441 }, { - "epoch": 0.32423701436708324, + "epoch": 0.44768761248924016, "grad_norm": 0.0, - "learning_rate": 1.5792886534736854e-05, - "loss": 0.8635, + "learning_rate": 1.2163038643165636e-05, + "loss": 1.1927, "step": 11442 }, { - "epoch": 0.3242653518093457, + "epoch": 0.4477267391814696, "grad_norm": 0.0, - "learning_rate": 1.5792138399679216e-05, - "loss": 0.9647, + "learning_rate": 1.216180139500012e-05, + "loss": 1.0068, "step": 11443 }, { - "epoch": 0.3242936892516082, + "epoch": 0.44776586587369904, "grad_norm": 0.0, - "learning_rate": 1.5791390215832247e-05, - "loss": 0.911, + "learning_rate": 1.2160564112118781e-05, + "loss": 1.0246, "step": 11444 }, { - "epoch": 0.3243220266938706, + "epoch": 0.4478049925659285, "grad_norm": 0.0, - "learning_rate": 1.5790641983202245e-05, - "loss": 1.0133, + "learning_rate": 1.2159326794541492e-05, + "loss": 1.0836, "step": 11445 }, { - "epoch": 0.32435036413613305, + "epoch": 0.4478441192581579, "grad_norm": 0.0, - "learning_rate": 1.5789893701795515e-05, - "loss": 0.8778, + "learning_rate": 1.2158089442288121e-05, + "loss": 0.9771, "step": 11446 }, { - "epoch": 0.32437870157839555, + "epoch": 0.44788324595038737, "grad_norm": 0.0, - "learning_rate": 1.5789145371618366e-05, - "loss": 0.8438, + "learning_rate": 1.2156852055378534e-05, + "loss": 1.0178, "step": 11447 }, { - "epoch": 0.324407039020658, + "epoch": 0.4479223726426168, "grad_norm": 0.0, - "learning_rate": 1.578839699267709e-05, - "loss": 0.9844, + "learning_rate": 1.2155614633832609e-05, + "loss": 1.0466, "step": 11448 }, { - "epoch": 0.3244353764629205, + "epoch": 0.44796149933484625, "grad_norm": 0.0, - "learning_rate": 1.5787648564978e-05, - "loss": 0.9881, + "learning_rate": 1.2154377177670211e-05, + "loss": 1.0474, "step": 11449 }, { - "epoch": 0.3244637139051829, + "epoch": 0.4480006260270757, "grad_norm": 0.0, - "learning_rate": 1.5786900088527394e-05, - "loss": 0.932, + "learning_rate": 1.2153139686911217e-05, + "loss": 0.9951, "step": 11450 }, { - "epoch": 0.32449205134744535, + "epoch": 0.44803975271930513, "grad_norm": 0.0, - "learning_rate": 1.578615156333158e-05, - "loss": 0.8806, + "learning_rate": 1.2151902161575496e-05, + "loss": 1.1316, "step": 11451 }, { - "epoch": 0.32452038878970785, + "epoch": 0.44807887941153457, "grad_norm": 0.0, - "learning_rate": 1.5785402989396867e-05, - "loss": 1.0128, + "learning_rate": 1.2150664601682924e-05, + "loss": 1.0155, "step": 11452 }, { - "epoch": 0.3245487262319703, + "epoch": 0.448118006103764, "grad_norm": 0.0, - "learning_rate": 1.5784654366729554e-05, - "loss": 1.0132, + "learning_rate": 1.2149427007253372e-05, + "loss": 1.0154, "step": 11453 }, { - "epoch": 0.3245770636742328, + "epoch": 0.44815713279599345, "grad_norm": 0.0, - "learning_rate": 1.5783905695335947e-05, - "loss": 0.9533, + "learning_rate": 1.2148189378306718e-05, + "loss": 1.1202, "step": 11454 }, { - "epoch": 0.3246054011164952, + "epoch": 0.4481962594882229, "grad_norm": 0.0, - "learning_rate": 1.5783156975222356e-05, - "loss": 0.9633, + "learning_rate": 1.2146951714862834e-05, + "loss": 1.1854, "step": 11455 }, { - "epoch": 0.3246337385587577, + "epoch": 0.4482353861804523, "grad_norm": 0.0, - "learning_rate": 1.5782408206395087e-05, - "loss": 0.835, + "learning_rate": 1.2145714016941594e-05, + "loss": 0.9931, "step": 11456 }, { - "epoch": 0.32466207600102015, + "epoch": 0.4482745128726817, "grad_norm": 0.0, - "learning_rate": 1.5781659388860445e-05, - "loss": 0.9184, + "learning_rate": 1.2144476284562878e-05, + "loss": 1.0679, "step": 11457 }, { - "epoch": 0.3246904134432826, + "epoch": 0.44831363956491116, "grad_norm": 0.0, - "learning_rate": 1.578091052262474e-05, - "loss": 0.8413, + "learning_rate": 1.2143238517746558e-05, + "loss": 1.1556, "step": 11458 }, { - "epoch": 0.3247187508855451, + "epoch": 0.4483527662571406, "grad_norm": 0.0, - "learning_rate": 1.5780161607694276e-05, - "loss": 0.9617, + "learning_rate": 1.2142000716512517e-05, + "loss": 1.0948, "step": 11459 }, { - "epoch": 0.3247470883278075, + "epoch": 0.44839189294937004, "grad_norm": 0.0, - "learning_rate": 1.577941264407537e-05, - "loss": 0.925, + "learning_rate": 1.2140762880880623e-05, + "loss": 0.9525, "step": 11460 }, { - "epoch": 0.32477542577007, + "epoch": 0.4484310196415995, "grad_norm": 0.0, - "learning_rate": 1.577866363177432e-05, - "loss": 0.9558, + "learning_rate": 1.2139525010870763e-05, + "loss": 1.049, "step": 11461 }, { - "epoch": 0.32480376321233245, + "epoch": 0.4484701463338289, "grad_norm": 0.0, - "learning_rate": 1.5777914570797443e-05, - "loss": 0.9075, + "learning_rate": 1.213828710650281e-05, + "loss": 1.0693, "step": 11462 }, { - "epoch": 0.3248321006545949, + "epoch": 0.44850927302605836, "grad_norm": 0.0, - "learning_rate": 1.5777165461151045e-05, - "loss": 1.0332, + "learning_rate": 1.2137049167796649e-05, + "loss": 1.1088, "step": 11463 }, { - "epoch": 0.3248604380968574, + "epoch": 0.4485483997182878, "grad_norm": 0.0, - "learning_rate": 1.577641630284144e-05, - "loss": 0.9279, + "learning_rate": 1.2135811194772152e-05, + "loss": 1.1199, "step": 11464 }, { - "epoch": 0.3248887755391198, + "epoch": 0.44858752641051725, "grad_norm": 0.0, - "learning_rate": 1.5775667095874933e-05, - "loss": 0.9307, + "learning_rate": 1.2134573187449206e-05, + "loss": 1.1398, "step": 11465 }, { - "epoch": 0.3249171129813823, + "epoch": 0.4486266531027467, "grad_norm": 0.0, - "learning_rate": 1.5774917840257836e-05, - "loss": 0.9123, + "learning_rate": 1.2133335145847691e-05, + "loss": 0.8933, "step": 11466 }, { - "epoch": 0.32494545042364475, + "epoch": 0.4486657797949761, "grad_norm": 0.0, - "learning_rate": 1.577416853599646e-05, - "loss": 0.931, + "learning_rate": 1.2132097069987483e-05, + "loss": 1.1588, "step": 11467 }, { - "epoch": 0.32497378786590725, + "epoch": 0.44870490648720557, "grad_norm": 0.0, - "learning_rate": 1.5773419183097124e-05, - "loss": 0.9084, + "learning_rate": 1.2130858959888469e-05, + "loss": 1.0988, "step": 11468 }, { - "epoch": 0.3250021253081697, + "epoch": 0.448744033179435, "grad_norm": 0.0, - "learning_rate": 1.577266978156613e-05, - "loss": 0.7945, + "learning_rate": 1.2129620815570531e-05, + "loss": 1.0775, "step": 11469 }, { - "epoch": 0.3250304627504321, + "epoch": 0.44878315987166445, "grad_norm": 0.0, - "learning_rate": 1.57719203314098e-05, - "loss": 0.9261, + "learning_rate": 1.2128382637053552e-05, + "loss": 1.1796, "step": 11470 }, { - "epoch": 0.3250588001926946, + "epoch": 0.4488222865638939, "grad_norm": 0.0, - "learning_rate": 1.5771170832634438e-05, - "loss": 0.9098, + "learning_rate": 1.2127144424357413e-05, + "loss": 1.0488, "step": 11471 }, { - "epoch": 0.32508713763495706, + "epoch": 0.44886141325612333, "grad_norm": 0.0, - "learning_rate": 1.577042128524636e-05, - "loss": 0.9451, + "learning_rate": 1.2125906177502002e-05, + "loss": 1.0204, "step": 11472 }, { - "epoch": 0.32511547507721955, + "epoch": 0.4489005399483528, "grad_norm": 0.0, - "learning_rate": 1.576967168925188e-05, - "loss": 1.0343, + "learning_rate": 1.2124667896507199e-05, + "loss": 0.977, "step": 11473 }, { - "epoch": 0.325143812519482, + "epoch": 0.4489396666405822, "grad_norm": 0.0, - "learning_rate": 1.5768922044657316e-05, - "loss": 0.9302, + "learning_rate": 1.2123429581392894e-05, + "loss": 1.1868, "step": 11474 }, { - "epoch": 0.32517214996174443, + "epoch": 0.44897879333281165, "grad_norm": 0.0, - "learning_rate": 1.5768172351468975e-05, - "loss": 0.7831, + "learning_rate": 1.2122191232178972e-05, + "loss": 1.178, "step": 11475 }, { - "epoch": 0.3252004874040069, + "epoch": 0.4490179200250411, "grad_norm": 0.0, - "learning_rate": 1.576742260969318e-05, - "loss": 1.0311, + "learning_rate": 1.2120952848885315e-05, + "loss": 0.9553, "step": 11476 }, { - "epoch": 0.32522882484626936, + "epoch": 0.44905704671727054, "grad_norm": 0.0, - "learning_rate": 1.5766672819336243e-05, - "loss": 1.1104, + "learning_rate": 1.2119714431531814e-05, + "loss": 1.0613, "step": 11477 }, { - "epoch": 0.32525716228853185, + "epoch": 0.4490961734095, "grad_norm": 0.0, - "learning_rate": 1.576592298040448e-05, - "loss": 0.8591, + "learning_rate": 1.2118475980138358e-05, + "loss": 1.1559, "step": 11478 }, { - "epoch": 0.3252854997307943, + "epoch": 0.4491353001017294, "grad_norm": 0.0, - "learning_rate": 1.5765173092904202e-05, - "loss": 0.9393, + "learning_rate": 1.211723749472483e-05, + "loss": 0.9889, "step": 11479 }, { - "epoch": 0.3253138371730568, + "epoch": 0.44917442679395886, "grad_norm": 0.0, - "learning_rate": 1.5764423156841734e-05, - "loss": 0.9387, + "learning_rate": 1.211599897531112e-05, + "loss": 1.0858, "step": 11480 }, { - "epoch": 0.3253421746153192, + "epoch": 0.4492135534861883, "grad_norm": 0.0, - "learning_rate": 1.576367317222339e-05, - "loss": 1.0251, + "learning_rate": 1.211476042191712e-05, + "loss": 1.162, "step": 11481 }, { - "epoch": 0.32537051205758166, + "epoch": 0.44925268017841774, "grad_norm": 0.0, - "learning_rate": 1.5762923139055485e-05, - "loss": 0.8753, + "learning_rate": 1.2113521834562716e-05, + "loss": 1.1339, "step": 11482 }, { - "epoch": 0.32539884949984416, + "epoch": 0.4492918068706472, "grad_norm": 0.0, - "learning_rate": 1.5762173057344336e-05, - "loss": 0.9954, + "learning_rate": 1.2112283213267801e-05, + "loss": 1.1069, "step": 11483 }, { - "epoch": 0.3254271869421066, + "epoch": 0.4493309335628766, "grad_norm": 0.0, - "learning_rate": 1.5761422927096268e-05, - "loss": 0.9656, + "learning_rate": 1.2111044558052263e-05, + "loss": 1.072, "step": 11484 }, { - "epoch": 0.3254555243843691, + "epoch": 0.449370060255106, "grad_norm": 0.0, - "learning_rate": 1.5760672748317593e-05, - "loss": 1.0256, + "learning_rate": 1.2109805868935995e-05, + "loss": 1.0454, "step": 11485 }, { - "epoch": 0.3254838618266315, + "epoch": 0.44940918694733545, "grad_norm": 0.0, - "learning_rate": 1.5759922521014633e-05, - "loss": 0.9672, + "learning_rate": 1.210856714593889e-05, + "loss": 1.0776, "step": 11486 }, { - "epoch": 0.32551219926889396, + "epoch": 0.4494483136395649, "grad_norm": 0.0, - "learning_rate": 1.5759172245193704e-05, - "loss": 0.9334, + "learning_rate": 1.2107328389080837e-05, + "loss": 1.0894, "step": 11487 }, { - "epoch": 0.32554053671115646, + "epoch": 0.44948744033179433, "grad_norm": 0.0, - "learning_rate": 1.575842192086113e-05, - "loss": 0.9714, + "learning_rate": 1.2106089598381732e-05, + "loss": 1.054, "step": 11488 }, { - "epoch": 0.3255688741534189, + "epoch": 0.44952656702402377, "grad_norm": 0.0, - "learning_rate": 1.575767154802323e-05, - "loss": 0.8259, + "learning_rate": 1.2104850773861466e-05, + "loss": 1.0684, "step": 11489 }, { - "epoch": 0.3255972115956814, + "epoch": 0.4495656937162532, "grad_norm": 0.0, - "learning_rate": 1.575692112668633e-05, - "loss": 0.9045, + "learning_rate": 1.2103611915539934e-05, + "loss": 1.0987, "step": 11490 }, { - "epoch": 0.32562554903794383, + "epoch": 0.44960482040848265, "grad_norm": 0.0, - "learning_rate": 1.575617065685674e-05, - "loss": 0.9696, + "learning_rate": 1.2102373023437031e-05, + "loss": 1.0931, "step": 11491 }, { - "epoch": 0.3256538864802063, + "epoch": 0.4496439471007121, "grad_norm": 0.0, - "learning_rate": 1.5755420138540783e-05, - "loss": 0.8831, + "learning_rate": 1.2101134097572654e-05, + "loss": 0.8803, "step": 11492 }, { - "epoch": 0.32568222392246876, + "epoch": 0.44968307379294153, "grad_norm": 0.0, - "learning_rate": 1.5754669571744792e-05, - "loss": 1.1196, + "learning_rate": 1.209989513796669e-05, + "loss": 1.1414, "step": 11493 }, { - "epoch": 0.3257105613647312, + "epoch": 0.449722200485171, "grad_norm": 0.0, - "learning_rate": 1.575391895647508e-05, - "loss": 0.9387, + "learning_rate": 1.2098656144639047e-05, + "loss": 1.0258, "step": 11494 }, { - "epoch": 0.3257388988069937, + "epoch": 0.4497613271774004, "grad_norm": 0.0, - "learning_rate": 1.5753168292737974e-05, - "loss": 0.9527, + "learning_rate": 1.2097417117609615e-05, + "loss": 1.132, "step": 11495 }, { - "epoch": 0.32576723624925613, + "epoch": 0.44980045386962986, "grad_norm": 0.0, - "learning_rate": 1.575241758053979e-05, - "loss": 0.9836, + "learning_rate": 1.209617805689829e-05, + "loss": 1.0677, "step": 11496 }, { - "epoch": 0.3257955736915186, + "epoch": 0.4498395805618593, "grad_norm": 0.0, - "learning_rate": 1.575166681988686e-05, - "loss": 1.0623, + "learning_rate": 1.2094938962524975e-05, + "loss": 0.993, "step": 11497 }, { - "epoch": 0.32582391113378106, + "epoch": 0.44987870725408874, "grad_norm": 0.0, - "learning_rate": 1.5750916010785503e-05, - "loss": 0.9546, + "learning_rate": 1.2093699834509565e-05, + "loss": 1.0667, "step": 11498 }, { - "epoch": 0.3258522485760435, + "epoch": 0.4499178339463182, "grad_norm": 0.0, - "learning_rate": 1.5750165153242048e-05, - "loss": 0.9708, + "learning_rate": 1.2092460672871959e-05, + "loss": 1.0675, "step": 11499 }, { - "epoch": 0.325880586018306, + "epoch": 0.4499569606385476, "grad_norm": 0.0, - "learning_rate": 1.5749414247262812e-05, - "loss": 1.0635, + "learning_rate": 1.2091221477632056e-05, + "loss": 0.9909, "step": 11500 }, { - "epoch": 0.32590892346056843, + "epoch": 0.44999608733077706, "grad_norm": 0.0, - "learning_rate": 1.5748663292854126e-05, - "loss": 0.953, + "learning_rate": 1.2089982248809755e-05, + "loss": 1.0534, "step": 11501 }, { - "epoch": 0.3259372609028309, + "epoch": 0.4500352140230065, "grad_norm": 0.0, - "learning_rate": 1.5747912290022318e-05, - "loss": 0.8914, + "learning_rate": 1.208874298642496e-05, + "loss": 1.0395, "step": 11502 }, { - "epoch": 0.32596559834509337, + "epoch": 0.45007434071523594, "grad_norm": 0.0, - "learning_rate": 1.5747161238773706e-05, - "loss": 0.968, + "learning_rate": 1.2087503690497571e-05, + "loss": 1.0444, "step": 11503 }, { - "epoch": 0.32599393578735586, + "epoch": 0.4501134674074654, "grad_norm": 0.0, - "learning_rate": 1.5746410139114624e-05, - "loss": 1.019, + "learning_rate": 1.2086264361047487e-05, + "loss": 1.0285, "step": 11504 }, { - "epoch": 0.3260222732296183, + "epoch": 0.4501525940996948, "grad_norm": 0.0, - "learning_rate": 1.5745658991051397e-05, - "loss": 0.9738, + "learning_rate": 1.208502499809461e-05, + "loss": 1.0175, "step": 11505 }, { - "epoch": 0.32605061067188074, + "epoch": 0.45019172079192427, "grad_norm": 0.0, - "learning_rate": 1.5744907794590347e-05, - "loss": 0.8863, + "learning_rate": 1.2083785601658846e-05, + "loss": 0.9543, "step": 11506 }, { - "epoch": 0.32607894811414323, + "epoch": 0.4502308474841537, "grad_norm": 0.0, - "learning_rate": 1.5744156549737807e-05, - "loss": 1.0319, + "learning_rate": 1.2082546171760097e-05, + "loss": 1.0559, "step": 11507 }, { - "epoch": 0.32610728555640567, + "epoch": 0.45026997417638315, "grad_norm": 0.0, - "learning_rate": 1.5743405256500102e-05, - "loss": 0.9209, + "learning_rate": 1.2081306708418266e-05, + "loss": 1.0205, "step": 11508 }, { - "epoch": 0.32613562299866816, + "epoch": 0.4503091008686126, "grad_norm": 0.0, - "learning_rate": 1.574265391488356e-05, - "loss": 0.9839, + "learning_rate": 1.2080067211653255e-05, + "loss": 1.0913, "step": 11509 }, { - "epoch": 0.3261639604409306, + "epoch": 0.45034822756084203, "grad_norm": 0.0, - "learning_rate": 1.5741902524894514e-05, - "loss": 1.0125, + "learning_rate": 1.2078827681484973e-05, + "loss": 1.0822, "step": 11510 }, { - "epoch": 0.32619229788319304, + "epoch": 0.45038735425307147, "grad_norm": 0.0, - "learning_rate": 1.5741151086539293e-05, - "loss": 0.9791, + "learning_rate": 1.2077588117933324e-05, + "loss": 1.2026, "step": 11511 }, { - "epoch": 0.32622063532545553, + "epoch": 0.4504264809453009, "grad_norm": 0.0, - "learning_rate": 1.574039959982422e-05, - "loss": 0.9065, + "learning_rate": 1.207634852101821e-05, + "loss": 1.1325, "step": 11512 }, { - "epoch": 0.32624897276771797, + "epoch": 0.4504656076375303, "grad_norm": 0.0, - "learning_rate": 1.5739648064755634e-05, - "loss": 0.9528, + "learning_rate": 1.2075108890759543e-05, + "loss": 0.9723, "step": 11513 }, { - "epoch": 0.32627731020998046, + "epoch": 0.45050473432975974, "grad_norm": 0.0, - "learning_rate": 1.5738896481339857e-05, - "loss": 0.9144, + "learning_rate": 1.2073869227177228e-05, + "loss": 1.1025, "step": 11514 }, { - "epoch": 0.3263056476522429, + "epoch": 0.4505438610219892, "grad_norm": 0.0, - "learning_rate": 1.573814484958323e-05, - "loss": 0.9912, + "learning_rate": 1.2072629530291171e-05, + "loss": 0.9984, "step": 11515 }, { - "epoch": 0.3263339850945054, + "epoch": 0.4505829877142186, "grad_norm": 0.0, - "learning_rate": 1.5737393169492072e-05, - "loss": 0.9401, + "learning_rate": 1.207138980012128e-05, + "loss": 1.2245, "step": 11516 }, { - "epoch": 0.32636232253676783, + "epoch": 0.45062211440644806, "grad_norm": 0.0, - "learning_rate": 1.5736641441072722e-05, - "loss": 0.9027, + "learning_rate": 1.2070150036687467e-05, + "loss": 1.0398, "step": 11517 }, { - "epoch": 0.3263906599790303, + "epoch": 0.4506612410986775, "grad_norm": 0.0, - "learning_rate": 1.573588966433151e-05, - "loss": 0.8992, + "learning_rate": 1.2068910240009636e-05, + "loss": 1.0813, "step": 11518 }, { - "epoch": 0.32641899742129277, + "epoch": 0.45070036779090694, "grad_norm": 0.0, - "learning_rate": 1.5735137839274775e-05, - "loss": 0.944, + "learning_rate": 1.20676704101077e-05, + "loss": 1.0804, "step": 11519 }, { - "epoch": 0.3264473348635552, + "epoch": 0.4507394944831364, "grad_norm": 0.0, - "learning_rate": 1.573438596590884e-05, - "loss": 1.0525, + "learning_rate": 1.206643054700157e-05, + "loss": 1.0657, "step": 11520 }, { - "epoch": 0.3264756723058177, + "epoch": 0.4507786211753658, "grad_norm": 0.0, - "learning_rate": 1.573363404424004e-05, - "loss": 0.9038, + "learning_rate": 1.2065190650711151e-05, + "loss": 1.0894, "step": 11521 }, { - "epoch": 0.32650400974808014, + "epoch": 0.45081774786759526, "grad_norm": 0.0, - "learning_rate": 1.5732882074274717e-05, - "loss": 0.9935, + "learning_rate": 1.206395072125636e-05, + "loss": 1.0772, "step": 11522 }, { - "epoch": 0.3265323471903426, + "epoch": 0.4508568745598247, "grad_norm": 0.0, - "learning_rate": 1.5732130056019195e-05, - "loss": 0.9523, + "learning_rate": 1.2062710758657109e-05, + "loss": 1.0732, "step": 11523 }, { - "epoch": 0.32656068463260507, + "epoch": 0.45089600125205415, "grad_norm": 0.0, - "learning_rate": 1.5731377989479813e-05, - "loss": 0.9049, + "learning_rate": 1.2061470762933305e-05, + "loss": 1.0503, "step": 11524 }, { - "epoch": 0.3265890220748675, + "epoch": 0.4509351279442836, "grad_norm": 0.0, - "learning_rate": 1.5730625874662908e-05, - "loss": 0.954, + "learning_rate": 1.2060230734104864e-05, + "loss": 1.0126, "step": 11525 }, { - "epoch": 0.32661735951713, + "epoch": 0.45097425463651303, "grad_norm": 0.0, - "learning_rate": 1.572987371157481e-05, - "loss": 0.9388, + "learning_rate": 1.20589906721917e-05, + "loss": 1.0066, "step": 11526 }, { - "epoch": 0.32664569695939244, + "epoch": 0.45101338132874247, "grad_norm": 0.0, - "learning_rate": 1.5729121500221865e-05, - "loss": 0.7944, + "learning_rate": 1.2057750577213726e-05, + "loss": 1.0497, "step": 11527 }, { - "epoch": 0.32667403440165493, + "epoch": 0.4510525080209719, "grad_norm": 0.0, - "learning_rate": 1.5728369240610397e-05, - "loss": 0.9947, + "learning_rate": 1.2056510449190855e-05, + "loss": 1.0501, "step": 11528 }, { - "epoch": 0.32670237184391737, + "epoch": 0.45109163471320135, "grad_norm": 0.0, - "learning_rate": 1.5727616932746748e-05, - "loss": 0.8833, + "learning_rate": 1.2055270288143001e-05, + "loss": 1.0777, "step": 11529 }, { - "epoch": 0.3267307092861798, + "epoch": 0.4511307614054308, "grad_norm": 0.0, - "learning_rate": 1.5726864576637254e-05, - "loss": 0.8772, + "learning_rate": 1.2054030094090086e-05, + "loss": 1.17, "step": 11530 }, { - "epoch": 0.3267590467284423, + "epoch": 0.45116988809766023, "grad_norm": 0.0, - "learning_rate": 1.5726112172288254e-05, - "loss": 0.8702, + "learning_rate": 1.2052789867052018e-05, + "loss": 0.9072, "step": 11531 }, { - "epoch": 0.32678738417070474, + "epoch": 0.4512090147898897, "grad_norm": 0.0, - "learning_rate": 1.572535971970609e-05, - "loss": 1.0116, + "learning_rate": 1.205154960704872e-05, + "loss": 1.0733, "step": 11532 }, { - "epoch": 0.32681572161296724, + "epoch": 0.4512481414821191, "grad_norm": 0.0, - "learning_rate": 1.5724607218897086e-05, - "loss": 0.962, + "learning_rate": 1.2050309314100107e-05, + "loss": 1.0955, "step": 11533 }, { - "epoch": 0.3268440590552297, + "epoch": 0.45128726817434855, "grad_norm": 0.0, - "learning_rate": 1.5723854669867595e-05, - "loss": 0.9404, + "learning_rate": 1.204906898822609e-05, + "loss": 1.0901, "step": 11534 }, { - "epoch": 0.3268723964974921, + "epoch": 0.451326394866578, "grad_norm": 0.0, - "learning_rate": 1.572310207262395e-05, - "loss": 1.149, + "learning_rate": 1.2047828629446597e-05, + "loss": 1.0215, "step": 11535 }, { - "epoch": 0.3269007339397546, + "epoch": 0.45136552155880744, "grad_norm": 0.0, - "learning_rate": 1.572234942717249e-05, - "loss": 1.0229, + "learning_rate": 1.204658823778154e-05, + "loss": 0.9851, "step": 11536 }, { - "epoch": 0.32692907138201704, + "epoch": 0.4514046482510369, "grad_norm": 0.0, - "learning_rate": 1.5721596733519556e-05, - "loss": 1.0388, + "learning_rate": 1.2045347813250842e-05, + "loss": 1.1065, "step": 11537 }, { - "epoch": 0.32695740882427954, + "epoch": 0.4514437749432663, "grad_norm": 0.0, - "learning_rate": 1.5720843991671485e-05, - "loss": 0.9772, + "learning_rate": 1.204410735587442e-05, + "loss": 1.0569, "step": 11538 }, { - "epoch": 0.326985746266542, + "epoch": 0.45148290163549576, "grad_norm": 0.0, - "learning_rate": 1.572009120163463e-05, - "loss": 1.0024, + "learning_rate": 1.2042866865672195e-05, + "loss": 1.0792, "step": 11539 }, { - "epoch": 0.32701408370880447, + "epoch": 0.4515220283277252, "grad_norm": 0.0, - "learning_rate": 1.5719338363415313e-05, - "loss": 0.944, + "learning_rate": 1.204162634266409e-05, + "loss": 1.1027, "step": 11540 }, { - "epoch": 0.3270424211510669, + "epoch": 0.4515611550199546, "grad_norm": 0.0, - "learning_rate": 1.571858547701989e-05, - "loss": 0.9091, + "learning_rate": 1.2040385786870023e-05, + "loss": 0.8517, "step": 11541 }, { - "epoch": 0.32707075859332935, + "epoch": 0.451600281712184, "grad_norm": 0.0, - "learning_rate": 1.5717832542454697e-05, - "loss": 0.8199, + "learning_rate": 1.2039145198309916e-05, + "loss": 1.0419, "step": 11542 }, { - "epoch": 0.32709909603559184, + "epoch": 0.45163940840441347, "grad_norm": 0.0, - "learning_rate": 1.5717079559726075e-05, - "loss": 0.8667, + "learning_rate": 1.2037904577003693e-05, + "loss": 1.1265, "step": 11543 }, { - "epoch": 0.3271274334778543, + "epoch": 0.4516785350966429, "grad_norm": 0.0, - "learning_rate": 1.5716326528840374e-05, - "loss": 0.9541, + "learning_rate": 1.2036663922971279e-05, + "loss": 1.0552, "step": 11544 }, { - "epoch": 0.3271557709201168, + "epoch": 0.45171766178887235, "grad_norm": 0.0, - "learning_rate": 1.5715573449803926e-05, - "loss": 0.9138, + "learning_rate": 1.2035423236232591e-05, + "loss": 1.2568, "step": 11545 }, { - "epoch": 0.3271841083623792, + "epoch": 0.4517567884811018, "grad_norm": 0.0, - "learning_rate": 1.5714820322623085e-05, - "loss": 0.9151, + "learning_rate": 1.2034182516807559e-05, + "loss": 1.0436, "step": 11546 }, { - "epoch": 0.32721244580464165, + "epoch": 0.45179591517333123, "grad_norm": 0.0, - "learning_rate": 1.5714067147304185e-05, - "loss": 1.0722, + "learning_rate": 1.2032941764716102e-05, + "loss": 1.0236, "step": 11547 }, { - "epoch": 0.32724078324690414, + "epoch": 0.45183504186556067, "grad_norm": 0.0, - "learning_rate": 1.5713313923853584e-05, - "loss": 0.9085, + "learning_rate": 1.203170097997815e-05, + "loss": 1.086, "step": 11548 }, { - "epoch": 0.3272691206891666, + "epoch": 0.4518741685577901, "grad_norm": 0.0, - "learning_rate": 1.571256065227761e-05, - "loss": 1.0007, + "learning_rate": 1.2030460162613624e-05, + "loss": 1.0268, "step": 11549 }, { - "epoch": 0.3272974581314291, + "epoch": 0.45191329525001955, "grad_norm": 0.0, - "learning_rate": 1.5711807332582618e-05, - "loss": 0.9613, + "learning_rate": 1.2029219312642453e-05, + "loss": 1.093, "step": 11550 }, { - "epoch": 0.3273257955736915, + "epoch": 0.451952421942249, "grad_norm": 0.0, - "learning_rate": 1.5711053964774956e-05, - "loss": 0.932, + "learning_rate": 1.202797843008456e-05, + "loss": 1.1842, "step": 11551 }, { - "epoch": 0.327354133015954, + "epoch": 0.45199154863447843, "grad_norm": 0.0, - "learning_rate": 1.5710300548860962e-05, - "loss": 0.8883, + "learning_rate": 1.2026737514959881e-05, + "loss": 1.1505, "step": 11552 }, { - "epoch": 0.32738247045821645, + "epoch": 0.4520306753267079, "grad_norm": 0.0, - "learning_rate": 1.570954708484699e-05, - "loss": 0.8514, + "learning_rate": 1.2025496567288335e-05, + "loss": 0.9791, "step": 11553 }, { - "epoch": 0.3274108079004789, + "epoch": 0.4520698020189373, "grad_norm": 0.0, - "learning_rate": 1.5708793572739378e-05, - "loss": 0.9237, + "learning_rate": 1.2024255587089848e-05, + "loss": 1.0903, "step": 11554 }, { - "epoch": 0.3274391453427414, + "epoch": 0.45210892871116676, "grad_norm": 0.0, - "learning_rate": 1.5708040012544482e-05, - "loss": 0.9717, + "learning_rate": 1.2023014574384358e-05, + "loss": 0.9553, "step": 11555 }, { - "epoch": 0.3274674827850038, + "epoch": 0.4521480554033962, "grad_norm": 0.0, - "learning_rate": 1.570728640426864e-05, - "loss": 0.9424, + "learning_rate": 1.2021773529191784e-05, + "loss": 0.9722, "step": 11556 }, { - "epoch": 0.3274958202272663, + "epoch": 0.45218718209562564, "grad_norm": 0.0, - "learning_rate": 1.570653274791821e-05, - "loss": 0.8959, + "learning_rate": 1.2020532451532063e-05, + "loss": 1.0564, "step": 11557 }, { - "epoch": 0.32752415766952875, + "epoch": 0.4522263087878551, "grad_norm": 0.0, - "learning_rate": 1.5705779043499533e-05, - "loss": 0.9045, + "learning_rate": 1.2019291341425121e-05, + "loss": 1.1661, "step": 11558 }, { - "epoch": 0.3275524951117912, + "epoch": 0.4522654354800845, "grad_norm": 0.0, - "learning_rate": 1.570502529101896e-05, - "loss": 0.8912, + "learning_rate": 1.2018050198890894e-05, + "loss": 1.0128, "step": 11559 }, { - "epoch": 0.3275808325540537, + "epoch": 0.45230456217231396, "grad_norm": 0.0, - "learning_rate": 1.5704271490482843e-05, - "loss": 0.988, + "learning_rate": 1.2016809023949307e-05, + "loss": 1.1328, "step": 11560 }, { - "epoch": 0.3276091699963161, + "epoch": 0.4523436888645434, "grad_norm": 0.0, - "learning_rate": 1.570351764189753e-05, - "loss": 0.9578, + "learning_rate": 1.2015567816620296e-05, + "loss": 1.1613, "step": 11561 }, { - "epoch": 0.3276375074385786, + "epoch": 0.45238281555677284, "grad_norm": 0.0, - "learning_rate": 1.5702763745269366e-05, - "loss": 0.9627, + "learning_rate": 1.2014326576923792e-05, + "loss": 1.0013, "step": 11562 }, { - "epoch": 0.32766584488084105, + "epoch": 0.4524219422490023, "grad_norm": 0.0, - "learning_rate": 1.5702009800604704e-05, - "loss": 1.0567, + "learning_rate": 1.2013085304879724e-05, + "loss": 1.1223, "step": 11563 }, { - "epoch": 0.3276941823231035, + "epoch": 0.4524610689412317, "grad_norm": 0.0, - "learning_rate": 1.57012558079099e-05, - "loss": 1.0552, + "learning_rate": 1.2011844000508031e-05, + "loss": 1.0977, "step": 11564 }, { - "epoch": 0.327722519765366, + "epoch": 0.45250019563346117, "grad_norm": 0.0, - "learning_rate": 1.57005017671913e-05, - "loss": 0.9425, + "learning_rate": 1.2010602663828644e-05, + "loss": 1.09, "step": 11565 }, { - "epoch": 0.3277508572076284, + "epoch": 0.4525393223256906, "grad_norm": 0.0, - "learning_rate": 1.5699747678455258e-05, - "loss": 0.7641, + "learning_rate": 1.2009361294861497e-05, + "loss": 1.0037, "step": 11566 }, { - "epoch": 0.3277791946498909, + "epoch": 0.45257844901792005, "grad_norm": 0.0, - "learning_rate": 1.569899354170812e-05, - "loss": 0.9279, + "learning_rate": 1.2008119893626527e-05, + "loss": 1.0367, "step": 11567 }, { - "epoch": 0.32780753209215335, + "epoch": 0.4526175757101495, "grad_norm": 0.0, - "learning_rate": 1.569823935695625e-05, - "loss": 0.9436, + "learning_rate": 1.2006878460143666e-05, + "loss": 1.047, "step": 11568 }, { - "epoch": 0.32783586953441585, + "epoch": 0.45265670240237893, "grad_norm": 0.0, - "learning_rate": 1.569748512420599e-05, - "loss": 0.9999, + "learning_rate": 1.2005636994432849e-05, + "loss": 0.983, "step": 11569 }, { - "epoch": 0.3278642069766783, + "epoch": 0.4526958290946083, "grad_norm": 0.0, - "learning_rate": 1.56967308434637e-05, - "loss": 0.8902, + "learning_rate": 1.2004395496514021e-05, + "loss": 1.091, "step": 11570 }, { - "epoch": 0.3278925444189407, + "epoch": 0.45273495578683776, "grad_norm": 0.0, - "learning_rate": 1.569597651473573e-05, - "loss": 1.0871, + "learning_rate": 1.2003153966407108e-05, + "loss": 1.0183, "step": 11571 }, { - "epoch": 0.3279208818612032, + "epoch": 0.4527740824790672, "grad_norm": 0.0, - "learning_rate": 1.5695222138028432e-05, - "loss": 0.8211, + "learning_rate": 1.2001912404132057e-05, + "loss": 0.9735, "step": 11572 }, { - "epoch": 0.32794921930346566, + "epoch": 0.45281320917129664, "grad_norm": 0.0, - "learning_rate": 1.5694467713348163e-05, - "loss": 0.8113, + "learning_rate": 1.20006708097088e-05, + "loss": 1.0997, "step": 11573 }, { - "epoch": 0.32797755674572815, + "epoch": 0.4528523358635261, "grad_norm": 0.0, - "learning_rate": 1.569371324070128e-05, - "loss": 0.9465, + "learning_rate": 1.1999429183157273e-05, + "loss": 1.0159, "step": 11574 }, { - "epoch": 0.3280058941879906, + "epoch": 0.4528914625557555, "grad_norm": 0.0, - "learning_rate": 1.5692958720094136e-05, - "loss": 1.0049, + "learning_rate": 1.1998187524497422e-05, + "loss": 1.017, "step": 11575 }, { - "epoch": 0.328034231630253, + "epoch": 0.45293058924798496, "grad_norm": 0.0, - "learning_rate": 1.5692204151533083e-05, - "loss": 0.9267, + "learning_rate": 1.1996945833749184e-05, + "loss": 1.0102, "step": 11576 }, { - "epoch": 0.3280625690725155, + "epoch": 0.4529697159402144, "grad_norm": 0.0, - "learning_rate": 1.5691449535024486e-05, - "loss": 0.8756, + "learning_rate": 1.1995704110932499e-05, + "loss": 1.0867, "step": 11577 }, { - "epoch": 0.32809090651477796, + "epoch": 0.45300884263244384, "grad_norm": 0.0, - "learning_rate": 1.5690694870574696e-05, - "loss": 0.9184, + "learning_rate": 1.19944623560673e-05, + "loss": 0.9226, "step": 11578 }, { - "epoch": 0.32811924395704045, + "epoch": 0.4530479693246733, "grad_norm": 0.0, - "learning_rate": 1.5689940158190064e-05, - "loss": 1.0316, + "learning_rate": 1.199322056917354e-05, + "loss": 1.2022, "step": 11579 }, { - "epoch": 0.3281475813993029, + "epoch": 0.4530870960169027, "grad_norm": 0.0, - "learning_rate": 1.5689185397876957e-05, - "loss": 1.0147, + "learning_rate": 1.1991978750271155e-05, + "loss": 1.0206, "step": 11580 }, { - "epoch": 0.3281759188415654, + "epoch": 0.45312622270913216, "grad_norm": 0.0, - "learning_rate": 1.568843058964173e-05, - "loss": 0.9449, + "learning_rate": 1.1990736899380089e-05, + "loss": 1.0587, "step": 11581 }, { - "epoch": 0.3282042562838278, + "epoch": 0.4531653494013616, "grad_norm": 0.0, - "learning_rate": 1.5687675733490736e-05, - "loss": 0.9116, + "learning_rate": 1.198949501652028e-05, + "loss": 1.1339, "step": 11582 }, { - "epoch": 0.32823259372609026, + "epoch": 0.45320447609359105, "grad_norm": 0.0, - "learning_rate": 1.5686920829430337e-05, - "loss": 0.9731, + "learning_rate": 1.1988253101711675e-05, + "loss": 1.1195, "step": 11583 }, { - "epoch": 0.32826093116835275, + "epoch": 0.4532436027858205, "grad_norm": 0.0, - "learning_rate": 1.5686165877466895e-05, - "loss": 0.9861, + "learning_rate": 1.1987011154974218e-05, + "loss": 1.1347, "step": 11584 }, { - "epoch": 0.3282892686106152, + "epoch": 0.45328272947804993, "grad_norm": 0.0, - "learning_rate": 1.568541087760677e-05, - "loss": 0.9166, + "learning_rate": 1.1985769176327848e-05, + "loss": 1.0927, "step": 11585 }, { - "epoch": 0.3283176060528777, + "epoch": 0.45332185617027937, "grad_norm": 0.0, - "learning_rate": 1.568465582985631e-05, - "loss": 0.9305, + "learning_rate": 1.1984527165792517e-05, + "loss": 1.116, "step": 11586 }, { - "epoch": 0.3283459434951401, + "epoch": 0.4533609828625088, "grad_norm": 0.0, - "learning_rate": 1.5683900734221888e-05, - "loss": 0.8619, + "learning_rate": 1.1983285123388163e-05, + "loss": 1.0757, "step": 11587 }, { - "epoch": 0.32837428093740256, + "epoch": 0.45340010955473825, "grad_norm": 0.0, - "learning_rate": 1.568314559070986e-05, - "loss": 0.954, + "learning_rate": 1.1982043049134739e-05, + "loss": 1.0708, "step": 11588 }, { - "epoch": 0.32840261837966506, + "epoch": 0.4534392362469677, "grad_norm": 0.0, - "learning_rate": 1.5682390399326585e-05, - "loss": 0.9384, + "learning_rate": 1.1980800943052184e-05, + "loss": 1.0648, "step": 11589 }, { - "epoch": 0.3284309558219275, + "epoch": 0.45347836293919713, "grad_norm": 0.0, - "learning_rate": 1.5681635160078427e-05, - "loss": 0.9033, + "learning_rate": 1.197955880516045e-05, + "loss": 1.1282, "step": 11590 }, { - "epoch": 0.32845929326419, + "epoch": 0.4535174896314266, "grad_norm": 0.0, - "learning_rate": 1.5680879872971742e-05, - "loss": 0.9017, + "learning_rate": 1.1978316635479483e-05, + "loss": 0.9976, "step": 11591 }, { - "epoch": 0.3284876307064524, + "epoch": 0.453556616323656, "grad_norm": 0.0, - "learning_rate": 1.56801245380129e-05, - "loss": 0.9702, + "learning_rate": 1.1977074434029228e-05, + "loss": 0.9718, "step": 11592 }, { - "epoch": 0.3285159681487149, + "epoch": 0.45359574301588546, "grad_norm": 0.0, - "learning_rate": 1.5679369155208257e-05, - "loss": 0.9354, + "learning_rate": 1.1975832200829635e-05, + "loss": 0.9866, "step": 11593 }, { - "epoch": 0.32854430559097736, + "epoch": 0.4536348697081149, "grad_norm": 0.0, - "learning_rate": 1.567861372456418e-05, - "loss": 0.9807, + "learning_rate": 1.1974589935900651e-05, + "loss": 0.9863, "step": 11594 }, { - "epoch": 0.3285726430332398, + "epoch": 0.45367399640034434, "grad_norm": 0.0, - "learning_rate": 1.5677858246087028e-05, - "loss": 0.9123, + "learning_rate": 1.1973347639262231e-05, + "loss": 1.0758, "step": 11595 }, { - "epoch": 0.3286009804755023, + "epoch": 0.4537131230925738, "grad_norm": 0.0, - "learning_rate": 1.5677102719783172e-05, - "loss": 0.9368, + "learning_rate": 1.1972105310934318e-05, + "loss": 1.0438, "step": 11596 }, { - "epoch": 0.32862931791776473, + "epoch": 0.4537522497848032, "grad_norm": 0.0, - "learning_rate": 1.5676347145658973e-05, - "loss": 1.0012, + "learning_rate": 1.1970862950936866e-05, + "loss": 1.1304, "step": 11597 }, { - "epoch": 0.3286576553600272, + "epoch": 0.4537913764770326, "grad_norm": 0.0, - "learning_rate": 1.567559152372079e-05, - "loss": 0.9102, + "learning_rate": 1.1969620559289824e-05, + "loss": 1.0584, "step": 11598 }, { - "epoch": 0.32868599280228966, + "epoch": 0.45383050316926205, "grad_norm": 0.0, - "learning_rate": 1.5674835853974992e-05, - "loss": 0.9845, + "learning_rate": 1.1968378136013144e-05, + "loss": 0.9836, "step": 11599 }, { - "epoch": 0.3287143302445521, + "epoch": 0.4538696298614915, "grad_norm": 0.0, - "learning_rate": 1.5674080136427946e-05, - "loss": 0.8449, + "learning_rate": 1.1967135681126777e-05, + "loss": 1.0228, "step": 11600 }, { - "epoch": 0.3287426676868146, + "epoch": 0.4539087565537209, "grad_norm": 0.0, - "learning_rate": 1.567332437108602e-05, - "loss": 0.9689, + "learning_rate": 1.1965893194650678e-05, + "loss": 1.1841, "step": 11601 }, { - "epoch": 0.32877100512907703, + "epoch": 0.45394788324595037, "grad_norm": 0.0, - "learning_rate": 1.5672568557955567e-05, - "loss": 0.9358, + "learning_rate": 1.1964650676604796e-05, + "loss": 1.1618, "step": 11602 }, { - "epoch": 0.3287993425713395, + "epoch": 0.4539870099381798, "grad_norm": 0.0, - "learning_rate": 1.567181269704297e-05, - "loss": 1.0231, + "learning_rate": 1.1963408127009087e-05, + "loss": 0.9233, "step": 11603 }, { - "epoch": 0.32882768001360196, + "epoch": 0.45402613663040925, "grad_norm": 0.0, - "learning_rate": 1.5671056788354583e-05, - "loss": 1.0052, + "learning_rate": 1.1962165545883501e-05, + "loss": 1.2534, "step": 11604 }, { - "epoch": 0.32885601745586446, + "epoch": 0.4540652633226387, "grad_norm": 0.0, - "learning_rate": 1.567030083189678e-05, - "loss": 0.9225, + "learning_rate": 1.1960922933247999e-05, + "loss": 1.0164, "step": 11605 }, { - "epoch": 0.3288843548981269, + "epoch": 0.45410439001486813, "grad_norm": 0.0, - "learning_rate": 1.5669544827675934e-05, - "loss": 1.039, + "learning_rate": 1.195968028912253e-05, + "loss": 1.1519, "step": 11606 }, { - "epoch": 0.32891269234038933, + "epoch": 0.45414351670709757, "grad_norm": 0.0, - "learning_rate": 1.56687887756984e-05, - "loss": 0.9901, + "learning_rate": 1.195843761352705e-05, + "loss": 0.8846, "step": 11607 }, { - "epoch": 0.32894102978265183, + "epoch": 0.454182643399327, "grad_norm": 0.0, - "learning_rate": 1.5668032675970555e-05, - "loss": 0.8873, + "learning_rate": 1.1957194906481517e-05, + "loss": 1.0179, "step": 11608 }, { - "epoch": 0.32896936722491427, + "epoch": 0.45422177009155645, "grad_norm": 0.0, - "learning_rate": 1.5667276528498766e-05, - "loss": 0.8644, + "learning_rate": 1.1955952168005889e-05, + "loss": 0.9063, "step": 11609 }, { - "epoch": 0.32899770466717676, + "epoch": 0.4542608967837859, "grad_norm": 0.0, - "learning_rate": 1.56665203332894e-05, - "loss": 0.9015, + "learning_rate": 1.1954709398120117e-05, + "loss": 1.0466, "step": 11610 }, { - "epoch": 0.3290260421094392, + "epoch": 0.45430002347601534, "grad_norm": 0.0, - "learning_rate": 1.5665764090348834e-05, - "loss": 0.8841, + "learning_rate": 1.195346659684416e-05, + "loss": 0.9587, "step": 11611 }, { - "epoch": 0.32905437955170164, + "epoch": 0.4543391501682448, "grad_norm": 0.0, - "learning_rate": 1.566500779968343e-05, - "loss": 0.937, + "learning_rate": 1.1952223764197984e-05, + "loss": 1.1337, "step": 11612 }, { - "epoch": 0.32908271699396413, + "epoch": 0.4543782768604742, "grad_norm": 0.0, - "learning_rate": 1.566425146129956e-05, - "loss": 1.0855, + "learning_rate": 1.1950980900201532e-05, + "loss": 0.9568, "step": 11613 }, { - "epoch": 0.32911105443622657, + "epoch": 0.45441740355270366, "grad_norm": 0.0, - "learning_rate": 1.56634950752036e-05, - "loss": 0.9675, + "learning_rate": 1.1949738004874779e-05, + "loss": 1.1356, "step": 11614 }, { - "epoch": 0.32913939187848906, + "epoch": 0.4544565302449331, "grad_norm": 0.0, - "learning_rate": 1.5662738641401915e-05, - "loss": 0.9912, + "learning_rate": 1.1948495078237673e-05, + "loss": 0.992, "step": 11615 }, { - "epoch": 0.3291677293207515, + "epoch": 0.45449565693716254, "grad_norm": 0.0, - "learning_rate": 1.5661982159900882e-05, - "loss": 0.9639, + "learning_rate": 1.1947252120310175e-05, + "loss": 1.1241, "step": 11616 }, { - "epoch": 0.329196066763014, + "epoch": 0.454534783629392, "grad_norm": 0.0, - "learning_rate": 1.566122563070687e-05, - "loss": 0.8946, + "learning_rate": 1.1946009131112252e-05, + "loss": 0.9921, "step": 11617 }, { - "epoch": 0.32922440420527643, + "epoch": 0.4545739103216214, "grad_norm": 0.0, - "learning_rate": 1.566046905382625e-05, - "loss": 0.8881, + "learning_rate": 1.1944766110663858e-05, + "loss": 1.0079, "step": 11618 }, { - "epoch": 0.32925274164753887, + "epoch": 0.45461303701385086, "grad_norm": 0.0, - "learning_rate": 1.5659712429265403e-05, - "loss": 0.834, + "learning_rate": 1.194352305898496e-05, + "loss": 1.0542, "step": 11619 }, { - "epoch": 0.32928107908980137, + "epoch": 0.4546521637060803, "grad_norm": 0.0, - "learning_rate": 1.565895575703069e-05, - "loss": 1.0162, + "learning_rate": 1.1942279976095518e-05, + "loss": 1.0711, "step": 11620 }, { - "epoch": 0.3293094165320638, + "epoch": 0.45469129039830974, "grad_norm": 0.0, - "learning_rate": 1.5658199037128492e-05, - "loss": 1.0298, + "learning_rate": 1.1941036862015491e-05, + "loss": 0.9493, "step": 11621 }, { - "epoch": 0.3293377539743263, + "epoch": 0.4547304170905392, "grad_norm": 0.0, - "learning_rate": 1.5657442269565185e-05, - "loss": 0.8921, + "learning_rate": 1.1939793716764845e-05, + "loss": 1.0552, "step": 11622 }, { - "epoch": 0.32936609141658874, + "epoch": 0.4547695437827686, "grad_norm": 0.0, - "learning_rate": 1.565668545434714e-05, - "loss": 0.9498, + "learning_rate": 1.193855054036354e-05, + "loss": 1.1012, "step": 11623 }, { - "epoch": 0.3293944288588512, + "epoch": 0.45480867047499807, "grad_norm": 0.0, - "learning_rate": 1.5655928591480732e-05, - "loss": 0.8773, + "learning_rate": 1.1937307332831547e-05, + "loss": 1.0044, "step": 11624 }, { - "epoch": 0.32942276630111367, + "epoch": 0.4548477971672275, "grad_norm": 0.0, - "learning_rate": 1.5655171680972337e-05, - "loss": 0.9383, + "learning_rate": 1.1936064094188821e-05, + "loss": 1.1233, "step": 11625 }, { - "epoch": 0.3294511037433761, + "epoch": 0.45488692385945695, "grad_norm": 0.0, - "learning_rate": 1.5654414722828334e-05, - "loss": 1.0147, + "learning_rate": 1.1934820824455336e-05, + "loss": 1.1003, "step": 11626 }, { - "epoch": 0.3294794411856386, + "epoch": 0.45492605055168633, "grad_norm": 0.0, - "learning_rate": 1.5653657717055096e-05, - "loss": 1.076, + "learning_rate": 1.193357752365105e-05, + "loss": 1.0, "step": 11627 }, { - "epoch": 0.32950777862790104, + "epoch": 0.4549651772439158, "grad_norm": 0.0, - "learning_rate": 1.5652900663658995e-05, - "loss": 1.0791, + "learning_rate": 1.1932334191795934e-05, + "loss": 0.997, "step": 11628 }, { - "epoch": 0.32953611607016353, + "epoch": 0.4550043039361452, "grad_norm": 0.0, - "learning_rate": 1.5652143562646416e-05, - "loss": 0.8111, + "learning_rate": 1.193109082890995e-05, + "loss": 0.9401, "step": 11629 }, { - "epoch": 0.32956445351242597, + "epoch": 0.45504343062837466, "grad_norm": 0.0, - "learning_rate": 1.5651386414023734e-05, - "loss": 0.9628, + "learning_rate": 1.192984743501307e-05, + "loss": 1.02, "step": 11630 }, { - "epoch": 0.3295927909546884, + "epoch": 0.4550825573206041, "grad_norm": 0.0, - "learning_rate": 1.5650629217797322e-05, - "loss": 0.9067, + "learning_rate": 1.1928604010125258e-05, + "loss": 1.1003, "step": 11631 }, { - "epoch": 0.3296211283969509, + "epoch": 0.45512168401283354, "grad_norm": 0.0, - "learning_rate": 1.5649871973973565e-05, - "loss": 1.0182, + "learning_rate": 1.1927360554266478e-05, + "loss": 1.1802, "step": 11632 }, { - "epoch": 0.32964946583921334, + "epoch": 0.455160810705063, "grad_norm": 0.0, - "learning_rate": 1.5649114682558838e-05, - "loss": 1.0859, + "learning_rate": 1.1926117067456709e-05, + "loss": 1.0512, "step": 11633 }, { - "epoch": 0.32967780328147583, + "epoch": 0.4551999373972924, "grad_norm": 0.0, - "learning_rate": 1.5648357343559518e-05, - "loss": 0.9535, + "learning_rate": 1.1924873549715907e-05, + "loss": 0.9959, "step": 11634 }, { - "epoch": 0.3297061407237383, + "epoch": 0.45523906408952186, "grad_norm": 0.0, - "learning_rate": 1.564759995698199e-05, - "loss": 1.0005, + "learning_rate": 1.1923630001064052e-05, + "loss": 1.1052, "step": 11635 }, { - "epoch": 0.3297344781660007, + "epoch": 0.4552781907817513, "grad_norm": 0.0, - "learning_rate": 1.564684252283263e-05, - "loss": 0.9821, + "learning_rate": 1.1922386421521108e-05, + "loss": 1.0558, "step": 11636 }, { - "epoch": 0.3297628156082632, + "epoch": 0.45531731747398074, "grad_norm": 0.0, - "learning_rate": 1.5646085041117817e-05, - "loss": 1.01, + "learning_rate": 1.1921142811107048e-05, + "loss": 1.0563, "step": 11637 }, { - "epoch": 0.32979115305052564, + "epoch": 0.4553564441662102, "grad_norm": 0.0, - "learning_rate": 1.5645327511843932e-05, - "loss": 0.7683, + "learning_rate": 1.191989916984184e-05, + "loss": 1.1472, "step": 11638 }, { - "epoch": 0.32981949049278814, + "epoch": 0.4553955708584396, "grad_norm": 0.0, - "learning_rate": 1.5644569935017357e-05, - "loss": 0.9005, + "learning_rate": 1.1918655497745459e-05, + "loss": 1.0503, "step": 11639 }, { - "epoch": 0.3298478279350506, + "epoch": 0.45543469755066907, "grad_norm": 0.0, - "learning_rate": 1.5643812310644472e-05, - "loss": 1.0652, + "learning_rate": 1.1917411794837876e-05, + "loss": 1.0734, "step": 11640 }, { - "epoch": 0.32987616537731307, + "epoch": 0.4554738242428985, "grad_norm": 0.0, - "learning_rate": 1.5643054638731662e-05, - "loss": 0.9664, + "learning_rate": 1.191616806113906e-05, + "loss": 1.1128, "step": 11641 }, { - "epoch": 0.3299045028195755, + "epoch": 0.45551295093512795, "grad_norm": 0.0, - "learning_rate": 1.5642296919285312e-05, - "loss": 0.9718, + "learning_rate": 1.1914924296668986e-05, + "loss": 1.052, "step": 11642 }, { - "epoch": 0.32993284026183795, + "epoch": 0.4555520776273574, "grad_norm": 0.0, - "learning_rate": 1.5641539152311797e-05, - "loss": 1.1021, + "learning_rate": 1.1913680501447626e-05, + "loss": 1.147, "step": 11643 }, { - "epoch": 0.32996117770410044, + "epoch": 0.45559120431958683, "grad_norm": 0.0, - "learning_rate": 1.5640781337817498e-05, - "loss": 0.9279, + "learning_rate": 1.1912436675494958e-05, + "loss": 0.9645, "step": 11644 }, { - "epoch": 0.3299895151463629, + "epoch": 0.45563033101181627, "grad_norm": 0.0, - "learning_rate": 1.5640023475808807e-05, - "loss": 0.8776, + "learning_rate": 1.1911192818830952e-05, + "loss": 1.0394, "step": 11645 }, { - "epoch": 0.33001785258862537, + "epoch": 0.4556694577040457, "grad_norm": 0.0, - "learning_rate": 1.5639265566292102e-05, - "loss": 0.8444, + "learning_rate": 1.1909948931475585e-05, + "loss": 1.0757, "step": 11646 }, { - "epoch": 0.3300461900308878, + "epoch": 0.45570858439627515, "grad_norm": 0.0, - "learning_rate": 1.5638507609273772e-05, - "loss": 1.0226, + "learning_rate": 1.1908705013448832e-05, + "loss": 1.0029, "step": 11647 }, { - "epoch": 0.33007452747315025, + "epoch": 0.4557477110885046, "grad_norm": 0.0, - "learning_rate": 1.5637749604760198e-05, - "loss": 0.9715, + "learning_rate": 1.1907461064770667e-05, + "loss": 1.0185, "step": 11648 }, { - "epoch": 0.33010286491541274, + "epoch": 0.45578683778073403, "grad_norm": 0.0, - "learning_rate": 1.5636991552757762e-05, - "loss": 0.9547, + "learning_rate": 1.1906217085461068e-05, + "loss": 1.0832, "step": 11649 }, { - "epoch": 0.3301312023576752, + "epoch": 0.4558259644729635, "grad_norm": 0.0, - "learning_rate": 1.5636233453272858e-05, - "loss": 1.0053, + "learning_rate": 1.1904973075540013e-05, + "loss": 0.9672, "step": 11650 }, { - "epoch": 0.3301595397999377, + "epoch": 0.4558650911651929, "grad_norm": 0.0, - "learning_rate": 1.5635475306311865e-05, - "loss": 0.9149, + "learning_rate": 1.1903729035027474e-05, + "loss": 1.1244, "step": 11651 }, { - "epoch": 0.3301878772422001, + "epoch": 0.45590421785742236, "grad_norm": 0.0, - "learning_rate": 1.5634717111881168e-05, - "loss": 0.9858, + "learning_rate": 1.1902484963943433e-05, + "loss": 0.9224, "step": 11652 }, { - "epoch": 0.3302162146844626, + "epoch": 0.4559433445496518, "grad_norm": 0.0, - "learning_rate": 1.563395886998716e-05, - "loss": 1.0, + "learning_rate": 1.1901240862307868e-05, + "loss": 1.0889, "step": 11653 }, { - "epoch": 0.33024455212672504, + "epoch": 0.45598247124188124, "grad_norm": 0.0, - "learning_rate": 1.563320058063622e-05, - "loss": 0.8943, + "learning_rate": 1.189999673014076e-05, + "loss": 1.015, "step": 11654 }, { - "epoch": 0.3302728895689875, + "epoch": 0.4560215979341106, "grad_norm": 0.0, - "learning_rate": 1.5632442243834746e-05, - "loss": 1.0347, + "learning_rate": 1.189875256746208e-05, + "loss": 1.0053, "step": 11655 }, { - "epoch": 0.33030122701125, + "epoch": 0.45606072462634006, "grad_norm": 0.0, - "learning_rate": 1.5631683859589115e-05, - "loss": 0.9778, + "learning_rate": 1.1897508374291817e-05, + "loss": 1.1106, "step": 11656 }, { - "epoch": 0.3303295644535124, + "epoch": 0.4560998513185695, "grad_norm": 0.0, - "learning_rate": 1.563092542790572e-05, - "loss": 0.972, + "learning_rate": 1.1896264150649948e-05, + "loss": 1.1202, "step": 11657 }, { - "epoch": 0.3303579018957749, + "epoch": 0.45613897801079895, "grad_norm": 0.0, - "learning_rate": 1.563016694879095e-05, - "loss": 0.8401, + "learning_rate": 1.1895019896556447e-05, + "loss": 1.0353, "step": 11658 }, { - "epoch": 0.33038623933803735, + "epoch": 0.4561781047030284, "grad_norm": 0.0, - "learning_rate": 1.5629408422251194e-05, - "loss": 0.9293, + "learning_rate": 1.1893775612031306e-05, + "loss": 1.0861, "step": 11659 }, { - "epoch": 0.3304145767802998, + "epoch": 0.4562172313952578, "grad_norm": 0.0, - "learning_rate": 1.5628649848292836e-05, - "loss": 0.9694, + "learning_rate": 1.1892531297094502e-05, + "loss": 0.9599, "step": 11660 }, { - "epoch": 0.3304429142225623, + "epoch": 0.45625635808748727, "grad_norm": 0.0, - "learning_rate": 1.5627891226922277e-05, - "loss": 0.9576, + "learning_rate": 1.1891286951766014e-05, + "loss": 1.0076, "step": 11661 }, { - "epoch": 0.3304712516648247, + "epoch": 0.4562954847797167, "grad_norm": 0.0, - "learning_rate": 1.562713255814589e-05, - "loss": 0.9694, + "learning_rate": 1.189004257606583e-05, + "loss": 1.053, "step": 11662 }, { - "epoch": 0.3304995891070872, + "epoch": 0.45633461147194615, "grad_norm": 0.0, - "learning_rate": 1.5626373841970085e-05, - "loss": 0.8967, + "learning_rate": 1.1888798170013929e-05, + "loss": 1.1465, "step": 11663 }, { - "epoch": 0.33052792654934965, + "epoch": 0.4563737381641756, "grad_norm": 0.0, - "learning_rate": 1.5625615078401244e-05, - "loss": 0.9211, + "learning_rate": 1.1887553733630297e-05, + "loss": 0.9963, "step": 11664 }, { - "epoch": 0.33055626399161214, + "epoch": 0.45641286485640503, "grad_norm": 0.0, - "learning_rate": 1.5624856267445757e-05, - "loss": 0.9401, + "learning_rate": 1.1886309266934916e-05, + "loss": 0.9149, "step": 11665 }, { - "epoch": 0.3305846014338746, + "epoch": 0.4564519915486345, "grad_norm": 0.0, - "learning_rate": 1.5624097409110012e-05, - "loss": 0.9169, + "learning_rate": 1.1885064769947774e-05, + "loss": 1.1378, "step": 11666 }, { - "epoch": 0.330612938876137, + "epoch": 0.4564911182408639, "grad_norm": 0.0, - "learning_rate": 1.562333850340041e-05, - "loss": 0.9229, + "learning_rate": 1.188382024268885e-05, + "loss": 0.785, "step": 11667 }, { - "epoch": 0.3306412763183995, + "epoch": 0.45653024493309335, "grad_norm": 0.0, - "learning_rate": 1.562257955032334e-05, - "loss": 0.9605, + "learning_rate": 1.1882575685178136e-05, + "loss": 1.0427, "step": 11668 }, { - "epoch": 0.33066961376066195, + "epoch": 0.4565693716253228, "grad_norm": 0.0, - "learning_rate": 1.5621820549885192e-05, - "loss": 0.8035, + "learning_rate": 1.1881331097435619e-05, + "loss": 1.0197, "step": 11669 }, { - "epoch": 0.33069795120292445, + "epoch": 0.45660849831755224, "grad_norm": 0.0, - "learning_rate": 1.5621061502092364e-05, - "loss": 0.9191, + "learning_rate": 1.1880086479481277e-05, + "loss": 1.0767, "step": 11670 }, { - "epoch": 0.3307262886451869, + "epoch": 0.4566476250097817, "grad_norm": 0.0, - "learning_rate": 1.5620302406951246e-05, - "loss": 0.8789, + "learning_rate": 1.1878841831335103e-05, + "loss": 1.0859, "step": 11671 }, { - "epoch": 0.3307546260874493, + "epoch": 0.4566867517020111, "grad_norm": 0.0, - "learning_rate": 1.5619543264468236e-05, - "loss": 0.9633, + "learning_rate": 1.1877597153017084e-05, + "loss": 1.179, "step": 11672 }, { - "epoch": 0.3307829635297118, + "epoch": 0.45672587839424056, "grad_norm": 0.0, - "learning_rate": 1.5618784074649725e-05, - "loss": 0.95, + "learning_rate": 1.1876352444547208e-05, + "loss": 1.1669, "step": 11673 }, { - "epoch": 0.33081130097197425, + "epoch": 0.45676500508647, "grad_norm": 0.0, - "learning_rate": 1.561802483750211e-05, - "loss": 0.9304, + "learning_rate": 1.1875107705945461e-05, + "loss": 0.9491, "step": 11674 }, { - "epoch": 0.33083963841423675, + "epoch": 0.45680413177869944, "grad_norm": 0.0, - "learning_rate": 1.5617265553031783e-05, - "loss": 0.9586, + "learning_rate": 1.1873862937231837e-05, + "loss": 0.9946, "step": 11675 }, { - "epoch": 0.3308679758564992, + "epoch": 0.4568432584709289, "grad_norm": 0.0, - "learning_rate": 1.5616506221245143e-05, - "loss": 0.9353, + "learning_rate": 1.187261813842632e-05, + "loss": 1.1338, "step": 11676 }, { - "epoch": 0.3308963132987617, + "epoch": 0.4568823851631583, "grad_norm": 0.0, - "learning_rate": 1.561574684214859e-05, - "loss": 0.9045, + "learning_rate": 1.1871373309548904e-05, + "loss": 1.0739, "step": 11677 }, { - "epoch": 0.3309246507410241, + "epoch": 0.45692151185538776, "grad_norm": 0.0, - "learning_rate": 1.5614987415748514e-05, - "loss": 0.9934, + "learning_rate": 1.1870128450619578e-05, + "loss": 1.0203, "step": 11678 }, { - "epoch": 0.33095298818328656, + "epoch": 0.4569606385476172, "grad_norm": 0.0, - "learning_rate": 1.561422794205131e-05, - "loss": 1.0743, + "learning_rate": 1.186888356165833e-05, + "loss": 1.0668, "step": 11679 }, { - "epoch": 0.33098132562554905, + "epoch": 0.45699976523984664, "grad_norm": 0.0, - "learning_rate": 1.5613468421063383e-05, - "loss": 0.9996, + "learning_rate": 1.1867638642685155e-05, + "loss": 1.1113, "step": 11680 }, { - "epoch": 0.3310096630678115, + "epoch": 0.4570388919320761, "grad_norm": 0.0, - "learning_rate": 1.5612708852791127e-05, - "loss": 0.986, + "learning_rate": 1.1866393693720043e-05, + "loss": 1.0399, "step": 11681 }, { - "epoch": 0.331038000510074, + "epoch": 0.4570780186243055, "grad_norm": 0.0, - "learning_rate": 1.5611949237240938e-05, - "loss": 0.9262, + "learning_rate": 1.1865148714782988e-05, + "loss": 1.1054, "step": 11682 }, { - "epoch": 0.3310663379523364, + "epoch": 0.45711714531653497, "grad_norm": 0.0, - "learning_rate": 1.5611189574419215e-05, - "loss": 0.9117, + "learning_rate": 1.186390370589398e-05, + "loss": 1.1238, "step": 11683 }, { - "epoch": 0.33109467539459886, + "epoch": 0.45715627200876435, "grad_norm": 0.0, - "learning_rate": 1.5610429864332363e-05, - "loss": 1.0544, + "learning_rate": 1.1862658667073018e-05, + "loss": 1.179, "step": 11684 }, { - "epoch": 0.33112301283686135, + "epoch": 0.4571953987009938, "grad_norm": 0.0, - "learning_rate": 1.5609670106986775e-05, - "loss": 0.9071, + "learning_rate": 1.1861413598340086e-05, + "loss": 1.2006, "step": 11685 }, { - "epoch": 0.3311513502791238, + "epoch": 0.45723452539322323, "grad_norm": 0.0, - "learning_rate": 1.5608910302388854e-05, - "loss": 1.0746, + "learning_rate": 1.186016849971519e-05, + "loss": 1.0273, "step": 11686 }, { - "epoch": 0.3311796877213863, + "epoch": 0.4572736520854527, "grad_norm": 0.0, - "learning_rate": 1.5608150450544997e-05, - "loss": 1.0015, + "learning_rate": 1.1858923371218314e-05, + "loss": 1.1581, "step": 11687 }, { - "epoch": 0.3312080251636487, + "epoch": 0.4573127787776821, "grad_norm": 0.0, - "learning_rate": 1.5607390551461605e-05, - "loss": 0.8992, + "learning_rate": 1.185767821286946e-05, + "loss": 1.0727, "step": 11688 }, { - "epoch": 0.3312363626059112, + "epoch": 0.45735190546991156, "grad_norm": 0.0, - "learning_rate": 1.5606630605145084e-05, - "loss": 0.9576, + "learning_rate": 1.1856433024688624e-05, + "loss": 1.1243, "step": 11689 }, { - "epoch": 0.33126470004817365, + "epoch": 0.457391032162141, "grad_norm": 0.0, - "learning_rate": 1.560587061160183e-05, - "loss": 0.9595, + "learning_rate": 1.1855187806695797e-05, + "loss": 1.0587, "step": 11690 }, { - "epoch": 0.3312930374904361, + "epoch": 0.45743015885437044, "grad_norm": 0.0, - "learning_rate": 1.5605110570838246e-05, - "loss": 0.9047, + "learning_rate": 1.185394255891098e-05, + "loss": 1.1766, "step": 11691 }, { - "epoch": 0.3313213749326986, + "epoch": 0.4574692855465999, "grad_norm": 0.0, - "learning_rate": 1.560435048286073e-05, - "loss": 0.9514, + "learning_rate": 1.1852697281354166e-05, + "loss": 1.0822, "step": 11692 }, { - "epoch": 0.331349712374961, + "epoch": 0.4575084122388293, "grad_norm": 0.0, - "learning_rate": 1.5603590347675695e-05, - "loss": 0.927, + "learning_rate": 1.1851451974045357e-05, + "loss": 1.0626, "step": 11693 }, { - "epoch": 0.3313780498172235, + "epoch": 0.45754753893105876, "grad_norm": 0.0, - "learning_rate": 1.5602830165289536e-05, - "loss": 0.9989, + "learning_rate": 1.185020663700455e-05, + "loss": 0.9486, "step": 11694 }, { - "epoch": 0.33140638725948596, + "epoch": 0.4575866656232882, "grad_norm": 0.0, - "learning_rate": 1.5602069935708653e-05, - "loss": 0.9681, + "learning_rate": 1.1848961270251745e-05, + "loss": 1.0094, "step": 11695 }, { - "epoch": 0.3314347247017484, + "epoch": 0.45762579231551764, "grad_norm": 0.0, - "learning_rate": 1.560130965893946e-05, - "loss": 0.8506, + "learning_rate": 1.1847715873806935e-05, + "loss": 1.0673, "step": 11696 }, { - "epoch": 0.3314630621440109, + "epoch": 0.4576649190077471, "grad_norm": 0.0, - "learning_rate": 1.5600549334988356e-05, - "loss": 1.0501, + "learning_rate": 1.1846470447690124e-05, + "loss": 1.0723, "step": 11697 }, { - "epoch": 0.3314913995862733, + "epoch": 0.4577040456999765, "grad_norm": 0.0, - "learning_rate": 1.5599788963861745e-05, - "loss": 0.8708, + "learning_rate": 1.1845224991921313e-05, + "loss": 1.0757, "step": 11698 }, { - "epoch": 0.3315197370285358, + "epoch": 0.45774317239220597, "grad_norm": 0.0, - "learning_rate": 1.5599028545566028e-05, - "loss": 0.9812, + "learning_rate": 1.18439795065205e-05, + "loss": 1.0029, "step": 11699 }, { - "epoch": 0.33154807447079826, + "epoch": 0.4577822990844354, "grad_norm": 0.0, - "learning_rate": 1.5598268080107618e-05, - "loss": 1.0722, + "learning_rate": 1.1842733991507687e-05, + "loss": 1.1169, "step": 11700 }, { - "epoch": 0.33157641191306075, + "epoch": 0.45782142577666485, "grad_norm": 0.0, - "learning_rate": 1.5597507567492915e-05, - "loss": 0.887, + "learning_rate": 1.1841488446902876e-05, + "loss": 0.9718, "step": 11701 }, { - "epoch": 0.3316047493553232, + "epoch": 0.4578605524688943, "grad_norm": 0.0, - "learning_rate": 1.559674700772833e-05, - "loss": 1.0553, + "learning_rate": 1.1840242872726066e-05, + "loss": 1.1167, "step": 11702 }, { - "epoch": 0.33163308679758563, + "epoch": 0.45789967916112373, "grad_norm": 0.0, - "learning_rate": 1.5595986400820266e-05, - "loss": 0.9595, + "learning_rate": 1.183899726899726e-05, + "loss": 1.1106, "step": 11703 }, { - "epoch": 0.3316614242398481, + "epoch": 0.45793880585335317, "grad_norm": 0.0, - "learning_rate": 1.5595225746775127e-05, - "loss": 0.9476, + "learning_rate": 1.1837751635736467e-05, + "loss": 1.0046, "step": 11704 }, { - "epoch": 0.33168976168211056, + "epoch": 0.4579779325455826, "grad_norm": 0.0, - "learning_rate": 1.559446504559933e-05, - "loss": 0.816, + "learning_rate": 1.1836505972963681e-05, + "loss": 1.049, "step": 11705 }, { - "epoch": 0.33171809912437306, + "epoch": 0.45801705923781205, "grad_norm": 0.0, - "learning_rate": 1.559370429729927e-05, - "loss": 1.0574, + "learning_rate": 1.1835260280698915e-05, + "loss": 0.9757, "step": 11706 }, { - "epoch": 0.3317464365666355, + "epoch": 0.4580561859300415, "grad_norm": 0.0, - "learning_rate": 1.5592943501881362e-05, - "loss": 1.0885, + "learning_rate": 1.1834014558962165e-05, + "loss": 0.9051, "step": 11707 }, { - "epoch": 0.33177477400889793, + "epoch": 0.45809531262227093, "grad_norm": 0.0, - "learning_rate": 1.559218265935202e-05, - "loss": 0.8296, + "learning_rate": 1.183276880777344e-05, + "loss": 1.0654, "step": 11708 }, { - "epoch": 0.3318031114511604, + "epoch": 0.4581344393145004, "grad_norm": 0.0, - "learning_rate": 1.5591421769717642e-05, - "loss": 0.9483, + "learning_rate": 1.1831523027152745e-05, + "loss": 1.119, "step": 11709 }, { - "epoch": 0.33183144889342286, + "epoch": 0.4581735660067298, "grad_norm": 0.0, - "learning_rate": 1.5590660832984642e-05, - "loss": 1.0374, + "learning_rate": 1.1830277217120085e-05, + "loss": 0.9816, "step": 11710 }, { - "epoch": 0.33185978633568536, + "epoch": 0.45821269269895926, "grad_norm": 0.0, - "learning_rate": 1.5589899849159432e-05, - "loss": 0.9999, + "learning_rate": 1.1829031377695465e-05, + "loss": 1.0629, "step": 11711 }, { - "epoch": 0.3318881237779478, + "epoch": 0.45825181939118864, "grad_norm": 0.0, - "learning_rate": 1.558913881824842e-05, - "loss": 0.9366, + "learning_rate": 1.1827785508898895e-05, + "loss": 1.1956, "step": 11712 }, { - "epoch": 0.3319164612202103, + "epoch": 0.4582909460834181, "grad_norm": 0.0, - "learning_rate": 1.5588377740258015e-05, - "loss": 0.9573, + "learning_rate": 1.182653961075038e-05, + "loss": 1.1583, "step": 11713 }, { - "epoch": 0.33194479866247273, + "epoch": 0.4583300727756475, "grad_norm": 0.0, - "learning_rate": 1.558761661519463e-05, - "loss": 0.8911, + "learning_rate": 1.1825293683269928e-05, + "loss": 1.1096, "step": 11714 }, { - "epoch": 0.33197313610473517, + "epoch": 0.45836919946787696, "grad_norm": 0.0, - "learning_rate": 1.5586855443064674e-05, - "loss": 0.9584, + "learning_rate": 1.1824047726477546e-05, + "loss": 0.9897, "step": 11715 }, { - "epoch": 0.33200147354699766, + "epoch": 0.4584083261601064, "grad_norm": 0.0, - "learning_rate": 1.558609422387456e-05, - "loss": 0.8878, + "learning_rate": 1.182280174039324e-05, + "loss": 0.9641, "step": 11716 }, { - "epoch": 0.3320298109892601, + "epoch": 0.45844745285233585, "grad_norm": 0.0, - "learning_rate": 1.5585332957630702e-05, - "loss": 0.9498, + "learning_rate": 1.1821555725037025e-05, + "loss": 1.1092, "step": 11717 }, { - "epoch": 0.3320581484315226, + "epoch": 0.4584865795445653, "grad_norm": 0.0, - "learning_rate": 1.558457164433951e-05, - "loss": 0.9247, + "learning_rate": 1.182030968042891e-05, + "loss": 0.9236, "step": 11718 }, { - "epoch": 0.33208648587378503, + "epoch": 0.45852570623679473, "grad_norm": 0.0, - "learning_rate": 1.5583810284007395e-05, - "loss": 0.9453, + "learning_rate": 1.1819063606588898e-05, + "loss": 1.1271, "step": 11719 }, { - "epoch": 0.33211482331604747, + "epoch": 0.45856483292902417, "grad_norm": 0.0, - "learning_rate": 1.5583048876640775e-05, - "loss": 1.0351, + "learning_rate": 1.1817817503537008e-05, + "loss": 1.0781, "step": 11720 }, { - "epoch": 0.33214316075830996, + "epoch": 0.4586039596212536, "grad_norm": 0.0, - "learning_rate": 1.558228742224606e-05, - "loss": 0.9012, + "learning_rate": 1.1816571371293244e-05, + "loss": 1.044, "step": 11721 }, { - "epoch": 0.3321714982005724, + "epoch": 0.45864308631348305, "grad_norm": 0.0, - "learning_rate": 1.5581525920829664e-05, - "loss": 0.9777, + "learning_rate": 1.1815325209877622e-05, + "loss": 1.0512, "step": 11722 }, { - "epoch": 0.3321998356428349, + "epoch": 0.4586822130057125, "grad_norm": 0.0, - "learning_rate": 1.5580764372398e-05, - "loss": 0.9888, + "learning_rate": 1.1814079019310149e-05, + "loss": 1.0509, "step": 11723 }, { - "epoch": 0.33222817308509733, + "epoch": 0.45872133969794193, "grad_norm": 0.0, - "learning_rate": 1.5580002776957493e-05, - "loss": 0.9578, + "learning_rate": 1.1812832799610843e-05, + "loss": 1.1063, "step": 11724 }, { - "epoch": 0.3322565105273598, + "epoch": 0.4587604663901714, "grad_norm": 0.0, - "learning_rate": 1.5579241134514548e-05, - "loss": 0.9235, + "learning_rate": 1.1811586550799712e-05, + "loss": 0.9886, "step": 11725 }, { - "epoch": 0.33228484796962227, + "epoch": 0.4587995930824008, "grad_norm": 0.0, - "learning_rate": 1.557847944507558e-05, - "loss": 0.9277, + "learning_rate": 1.1810340272896772e-05, + "loss": 1.0845, "step": 11726 }, { - "epoch": 0.3323131854118847, + "epoch": 0.45883871977463025, "grad_norm": 0.0, - "learning_rate": 1.557771770864701e-05, - "loss": 0.9879, + "learning_rate": 1.1809093965922035e-05, + "loss": 1.1044, "step": 11727 }, { - "epoch": 0.3323415228541472, + "epoch": 0.4588778464668597, "grad_norm": 0.0, - "learning_rate": 1.557695592523525e-05, - "loss": 0.7788, + "learning_rate": 1.1807847629895517e-05, + "loss": 1.058, "step": 11728 }, { - "epoch": 0.33236986029640964, + "epoch": 0.45891697315908914, "grad_norm": 0.0, - "learning_rate": 1.5576194094846723e-05, - "loss": 0.9639, + "learning_rate": 1.1806601264837232e-05, + "loss": 1.1241, "step": 11729 }, { - "epoch": 0.33239819773867213, + "epoch": 0.4589560998513186, "grad_norm": 0.0, - "learning_rate": 1.5575432217487843e-05, - "loss": 0.8904, + "learning_rate": 1.180535487076719e-05, + "loss": 1.0747, "step": 11730 }, { - "epoch": 0.33242653518093457, + "epoch": 0.458995226543548, "grad_norm": 0.0, - "learning_rate": 1.557467029316502e-05, - "loss": 0.9904, + "learning_rate": 1.1804108447705415e-05, + "loss": 1.0353, "step": 11731 }, { - "epoch": 0.332454872623197, + "epoch": 0.45903435323577746, "grad_norm": 0.0, - "learning_rate": 1.5573908321884685e-05, - "loss": 1.0306, + "learning_rate": 1.1802861995671917e-05, + "loss": 1.0538, "step": 11732 }, { - "epoch": 0.3324832100654595, + "epoch": 0.4590734799280069, "grad_norm": 0.0, - "learning_rate": 1.5573146303653252e-05, - "loss": 1.0038, + "learning_rate": 1.1801615514686718e-05, + "loss": 0.9315, "step": 11733 }, { - "epoch": 0.33251154750772194, + "epoch": 0.45911260662023634, "grad_norm": 0.0, - "learning_rate": 1.5572384238477135e-05, - "loss": 0.957, + "learning_rate": 1.1800369004769827e-05, + "loss": 1.1185, "step": 11734 }, { - "epoch": 0.33253988494998443, + "epoch": 0.4591517333124658, "grad_norm": 0.0, - "learning_rate": 1.557162212636276e-05, - "loss": 0.9029, + "learning_rate": 1.1799122465941268e-05, + "loss": 1.0149, "step": 11735 }, { - "epoch": 0.33256822239224687, + "epoch": 0.4591908600046952, "grad_norm": 0.0, - "learning_rate": 1.5570859967316537e-05, - "loss": 0.9329, + "learning_rate": 1.1797875898221058e-05, + "loss": 0.9973, "step": 11736 }, { - "epoch": 0.33259655983450936, + "epoch": 0.45922998669692466, "grad_norm": 0.0, - "learning_rate": 1.5570097761344892e-05, - "loss": 1.0074, + "learning_rate": 1.1796629301629213e-05, + "loss": 1.1281, "step": 11737 }, { - "epoch": 0.3326248972767718, + "epoch": 0.4592691133891541, "grad_norm": 0.0, - "learning_rate": 1.556933550845425e-05, - "loss": 0.9424, + "learning_rate": 1.1795382676185751e-05, + "loss": 0.9338, "step": 11738 }, { - "epoch": 0.33265323471903424, + "epoch": 0.45930824008138355, "grad_norm": 0.0, - "learning_rate": 1.5568573208651027e-05, - "loss": 1.0646, + "learning_rate": 1.1794136021910694e-05, + "loss": 1.0877, "step": 11739 }, { - "epoch": 0.33268157216129673, + "epoch": 0.459347366773613, "grad_norm": 0.0, - "learning_rate": 1.556781086194164e-05, - "loss": 0.8535, + "learning_rate": 1.179288933882406e-05, + "loss": 1.125, "step": 11740 }, { - "epoch": 0.3327099096035592, + "epoch": 0.45938649346584237, "grad_norm": 0.0, - "learning_rate": 1.5567048468332516e-05, - "loss": 0.9883, + "learning_rate": 1.1791642626945872e-05, + "loss": 0.9859, "step": 11741 }, { - "epoch": 0.33273824704582167, + "epoch": 0.4594256201580718, "grad_norm": 0.0, - "learning_rate": 1.5566286027830076e-05, - "loss": 0.9591, + "learning_rate": 1.1790395886296146e-05, + "loss": 1.0022, "step": 11742 }, { - "epoch": 0.3327665844880841, + "epoch": 0.45946474685030125, "grad_norm": 0.0, - "learning_rate": 1.556552354044074e-05, - "loss": 0.9576, + "learning_rate": 1.1789149116894907e-05, + "loss": 1.1028, "step": 11743 }, { - "epoch": 0.33279492193034654, + "epoch": 0.4595038735425307, "grad_norm": 0.0, - "learning_rate": 1.5564761006170934e-05, - "loss": 0.9952, + "learning_rate": 1.1787902318762174e-05, + "loss": 1.0426, "step": 11744 }, { - "epoch": 0.33282325937260904, + "epoch": 0.45954300023476014, "grad_norm": 0.0, - "learning_rate": 1.556399842502708e-05, - "loss": 1.0276, + "learning_rate": 1.1786655491917971e-05, + "loss": 1.0726, "step": 11745 }, { - "epoch": 0.3328515968148715, + "epoch": 0.4595821269269896, "grad_norm": 0.0, - "learning_rate": 1.55632357970156e-05, - "loss": 0.9673, + "learning_rate": 1.178540863638232e-05, + "loss": 0.8405, "step": 11746 }, { - "epoch": 0.33287993425713397, + "epoch": 0.459621253619219, "grad_norm": 0.0, - "learning_rate": 1.556247312214292e-05, - "loss": 0.8647, + "learning_rate": 1.1784161752175243e-05, + "loss": 1.044, "step": 11747 }, { - "epoch": 0.3329082716993964, + "epoch": 0.45966038031144846, "grad_norm": 0.0, - "learning_rate": 1.556171040041546e-05, - "loss": 0.9696, + "learning_rate": 1.1782914839316764e-05, + "loss": 1.1977, "step": 11748 }, { - "epoch": 0.3329366091416589, + "epoch": 0.4596995070036779, "grad_norm": 0.0, - "learning_rate": 1.5560947631839654e-05, - "loss": 1.046, + "learning_rate": 1.178166789782691e-05, + "loss": 1.0204, "step": 11749 }, { - "epoch": 0.33296494658392134, + "epoch": 0.45973863369590734, "grad_norm": 0.0, - "learning_rate": 1.556018481642192e-05, - "loss": 1.0566, + "learning_rate": 1.1780420927725695e-05, + "loss": 1.0923, "step": 11750 }, { - "epoch": 0.3329932840261838, + "epoch": 0.4597777603881368, "grad_norm": 0.0, - "learning_rate": 1.555942195416868e-05, - "loss": 0.9796, + "learning_rate": 1.1779173929033157e-05, + "loss": 1.0213, "step": 11751 }, { - "epoch": 0.33302162146844627, + "epoch": 0.4598168870803662, "grad_norm": 0.0, - "learning_rate": 1.555865904508637e-05, - "loss": 0.9413, + "learning_rate": 1.177792690176931e-05, + "loss": 1.0481, "step": 11752 }, { - "epoch": 0.3330499589107087, + "epoch": 0.45985601377259566, "grad_norm": 0.0, - "learning_rate": 1.5557896089181403e-05, - "loss": 0.9052, + "learning_rate": 1.177667984595419e-05, + "loss": 0.9439, "step": 11753 }, { - "epoch": 0.3330782963529712, + "epoch": 0.4598951404648251, "grad_norm": 0.0, - "learning_rate": 1.555713308646022e-05, - "loss": 0.9958, + "learning_rate": 1.1775432761607814e-05, + "loss": 1.1952, "step": 11754 }, { - "epoch": 0.33310663379523364, + "epoch": 0.45993426715705454, "grad_norm": 0.0, - "learning_rate": 1.5556370036929237e-05, - "loss": 1.0022, + "learning_rate": 1.1774185648750216e-05, + "loss": 0.9629, "step": 11755 }, { - "epoch": 0.3331349712374961, + "epoch": 0.459973393849284, "grad_norm": 0.0, - "learning_rate": 1.5555606940594892e-05, - "loss": 0.8586, + "learning_rate": 1.1772938507401417e-05, + "loss": 1.1182, "step": 11756 }, { - "epoch": 0.3331633086797586, + "epoch": 0.4600125205415134, "grad_norm": 0.0, - "learning_rate": 1.55548437974636e-05, - "loss": 0.8422, + "learning_rate": 1.1771691337581447e-05, + "loss": 0.9771, "step": 11757 }, { - "epoch": 0.333191646122021, + "epoch": 0.46005164723374287, "grad_norm": 0.0, - "learning_rate": 1.55540806075418e-05, - "loss": 0.9093, + "learning_rate": 1.1770444139310337e-05, + "loss": 1.1081, "step": 11758 }, { - "epoch": 0.3332199835642835, + "epoch": 0.4600907739259723, "grad_norm": 0.0, - "learning_rate": 1.5553317370835916e-05, - "loss": 0.9531, + "learning_rate": 1.176919691260811e-05, + "loss": 0.9228, "step": 11759 }, { - "epoch": 0.33324832100654594, + "epoch": 0.46012990061820175, "grad_norm": 0.0, - "learning_rate": 1.5552554087352382e-05, - "loss": 0.9826, + "learning_rate": 1.1767949657494799e-05, + "loss": 1.1091, "step": 11760 }, { - "epoch": 0.33327665844880844, + "epoch": 0.4601690273104312, "grad_norm": 0.0, - "learning_rate": 1.5551790757097616e-05, - "loss": 0.9211, + "learning_rate": 1.1766702373990431e-05, + "loss": 1.1335, "step": 11761 }, { - "epoch": 0.3333049958910709, + "epoch": 0.46020815400266063, "grad_norm": 0.0, - "learning_rate": 1.5551027380078054e-05, - "loss": 0.9878, + "learning_rate": 1.1765455062115036e-05, + "loss": 1.116, "step": 11762 }, { - "epoch": 0.3333333333333333, + "epoch": 0.46024728069489007, "grad_norm": 0.0, - "learning_rate": 1.555026395630013e-05, - "loss": 0.976, + "learning_rate": 1.1764207721888645e-05, + "loss": 1.1086, "step": 11763 }, { - "epoch": 0.3333616707755958, + "epoch": 0.4602864073871195, "grad_norm": 0.0, - "learning_rate": 1.5549500485770272e-05, - "loss": 1.0221, + "learning_rate": 1.1762960353331293e-05, + "loss": 1.0957, "step": 11764 }, { - "epoch": 0.33339000821785825, + "epoch": 0.46032553407934895, "grad_norm": 0.0, - "learning_rate": 1.554873696849491e-05, - "loss": 0.9569, + "learning_rate": 1.1761712956463003e-05, + "loss": 1.0488, "step": 11765 }, { - "epoch": 0.33341834566012074, + "epoch": 0.4603646607715784, "grad_norm": 0.0, - "learning_rate": 1.554797340448048e-05, - "loss": 0.9542, + "learning_rate": 1.1760465531303808e-05, + "loss": 1.0783, "step": 11766 }, { - "epoch": 0.3334466831023832, + "epoch": 0.46040378746380783, "grad_norm": 0.0, - "learning_rate": 1.5547209793733403e-05, - "loss": 1.0997, + "learning_rate": 1.1759218077873746e-05, + "loss": 1.0706, "step": 11767 }, { - "epoch": 0.3334750205446456, + "epoch": 0.4604429141560373, "grad_norm": 0.0, - "learning_rate": 1.5546446136260123e-05, - "loss": 1.0572, + "learning_rate": 1.1757970596192845e-05, + "loss": 1.0443, "step": 11768 }, { - "epoch": 0.3335033579869081, + "epoch": 0.46048204084826666, "grad_norm": 0.0, - "learning_rate": 1.5545682432067068e-05, - "loss": 0.9961, + "learning_rate": 1.1756723086281141e-05, + "loss": 1.0094, "step": 11769 }, { - "epoch": 0.33353169542917055, + "epoch": 0.4605211675404961, "grad_norm": 0.0, - "learning_rate": 1.5544918681160667e-05, - "loss": 1.1475, + "learning_rate": 1.1755475548158662e-05, + "loss": 1.0422, "step": 11770 }, { - "epoch": 0.33356003287143304, + "epoch": 0.46056029423272554, "grad_norm": 0.0, - "learning_rate": 1.554415488354736e-05, - "loss": 0.9025, + "learning_rate": 1.175422798184545e-05, + "loss": 1.0765, "step": 11771 }, { - "epoch": 0.3335883703136955, + "epoch": 0.460599420924955, "grad_norm": 0.0, - "learning_rate": 1.5543391039233576e-05, - "loss": 0.8648, + "learning_rate": 1.175298038736153e-05, + "loss": 1.011, "step": 11772 }, { - "epoch": 0.3336167077559579, + "epoch": 0.4606385476171844, "grad_norm": 0.0, - "learning_rate": 1.554262714822575e-05, - "loss": 0.9657, + "learning_rate": 1.1751732764726944e-05, + "loss": 1.026, "step": 11773 }, { - "epoch": 0.3336450451982204, + "epoch": 0.46067767430941386, "grad_norm": 0.0, - "learning_rate": 1.554186321053032e-05, - "loss": 1.0203, + "learning_rate": 1.1750485113961725e-05, + "loss": 1.0991, "step": 11774 }, { - "epoch": 0.33367338264048285, + "epoch": 0.4607168010016433, "grad_norm": 0.0, - "learning_rate": 1.5541099226153714e-05, - "loss": 0.952, + "learning_rate": 1.1749237435085909e-05, + "loss": 1.0179, "step": 11775 }, { - "epoch": 0.33370172008274535, + "epoch": 0.46075592769387275, "grad_norm": 0.0, - "learning_rate": 1.5540335195102377e-05, - "loss": 1.0759, + "learning_rate": 1.1747989728119532e-05, + "loss": 1.0591, "step": 11776 }, { - "epoch": 0.3337300575250078, + "epoch": 0.4607950543861022, "grad_norm": 0.0, - "learning_rate": 1.5539571117382734e-05, - "loss": 0.9494, + "learning_rate": 1.1746741993082628e-05, + "loss": 0.9759, "step": 11777 }, { - "epoch": 0.3337583949672703, + "epoch": 0.46083418107833163, "grad_norm": 0.0, - "learning_rate": 1.5538806993001228e-05, - "loss": 0.9773, + "learning_rate": 1.1745494229995237e-05, + "loss": 0.8959, "step": 11778 }, { - "epoch": 0.3337867324095327, + "epoch": 0.46087330777056107, "grad_norm": 0.0, - "learning_rate": 1.5538042821964293e-05, - "loss": 0.9188, + "learning_rate": 1.1744246438877395e-05, + "loss": 1.1124, "step": 11779 }, { - "epoch": 0.33381506985179515, + "epoch": 0.4609124344627905, "grad_norm": 0.0, - "learning_rate": 1.553727860427837e-05, - "loss": 1.0066, + "learning_rate": 1.1742998619749144e-05, + "loss": 1.0185, "step": 11780 }, { - "epoch": 0.33384340729405765, + "epoch": 0.46095156115501995, "grad_norm": 0.0, - "learning_rate": 1.553651433994989e-05, - "loss": 0.9457, + "learning_rate": 1.1741750772630518e-05, + "loss": 1.0821, "step": 11781 }, { - "epoch": 0.3338717447363201, + "epoch": 0.4609906878472494, "grad_norm": 0.0, - "learning_rate": 1.5535750028985296e-05, - "loss": 0.9584, + "learning_rate": 1.1740502897541557e-05, + "loss": 0.963, "step": 11782 }, { - "epoch": 0.3339000821785826, + "epoch": 0.46102981453947883, "grad_norm": 0.0, - "learning_rate": 1.5534985671391025e-05, - "loss": 1.0043, + "learning_rate": 1.1739254994502299e-05, + "loss": 1.1508, "step": 11783 }, { - "epoch": 0.333928419620845, + "epoch": 0.4610689412317083, "grad_norm": 0.0, - "learning_rate": 1.5534221267173513e-05, - "loss": 0.8528, + "learning_rate": 1.1738007063532788e-05, + "loss": 0.9855, "step": 11784 }, { - "epoch": 0.33395675706310746, + "epoch": 0.4611080679239377, "grad_norm": 0.0, - "learning_rate": 1.55334568163392e-05, - "loss": 1.0024, + "learning_rate": 1.1736759104653062e-05, + "loss": 1.0148, "step": 11785 }, { - "epoch": 0.33398509450536995, + "epoch": 0.46114719461616716, "grad_norm": 0.0, - "learning_rate": 1.5532692318894524e-05, - "loss": 0.9117, + "learning_rate": 1.1735511117883156e-05, + "loss": 1.0475, "step": 11786 }, { - "epoch": 0.3340134319476324, + "epoch": 0.4611863213083966, "grad_norm": 0.0, - "learning_rate": 1.5531927774845926e-05, - "loss": 1.0965, + "learning_rate": 1.173426310324312e-05, + "loss": 1.0953, "step": 11787 }, { - "epoch": 0.3340417693898949, + "epoch": 0.46122544800062604, "grad_norm": 0.0, - "learning_rate": 1.553116318419985e-05, - "loss": 0.8403, + "learning_rate": 1.1733015060752993e-05, + "loss": 1.1759, "step": 11788 }, { - "epoch": 0.3340701068321573, + "epoch": 0.4612645746928555, "grad_norm": 0.0, - "learning_rate": 1.553039854696273e-05, - "loss": 1.0665, + "learning_rate": 1.1731766990432812e-05, + "loss": 1.0547, "step": 11789 }, { - "epoch": 0.3340984442744198, + "epoch": 0.4613037013850849, "grad_norm": 0.0, - "learning_rate": 1.5529633863141008e-05, - "loss": 0.9401, + "learning_rate": 1.1730518892302625e-05, + "loss": 1.109, "step": 11790 }, { - "epoch": 0.33412678171668225, + "epoch": 0.46134282807731436, "grad_norm": 0.0, - "learning_rate": 1.5528869132741127e-05, - "loss": 0.9903, + "learning_rate": 1.1729270766382474e-05, + "loss": 1.1371, "step": 11791 }, { - "epoch": 0.3341551191589447, + "epoch": 0.4613819547695438, "grad_norm": 0.0, - "learning_rate": 1.5528104355769527e-05, - "loss": 0.8286, + "learning_rate": 1.1728022612692397e-05, + "loss": 1.0878, "step": 11792 }, { - "epoch": 0.3341834566012072, + "epoch": 0.46142108146177324, "grad_norm": 0.0, - "learning_rate": 1.5527339532232657e-05, - "loss": 0.8073, + "learning_rate": 1.1726774431252447e-05, + "loss": 1.1953, "step": 11793 }, { - "epoch": 0.3342117940434696, + "epoch": 0.4614602081540027, "grad_norm": 0.0, - "learning_rate": 1.5526574662136948e-05, - "loss": 0.961, + "learning_rate": 1.1725526222082663e-05, + "loss": 0.9344, "step": 11794 }, { - "epoch": 0.3342401314857321, + "epoch": 0.4614993348462321, "grad_norm": 0.0, - "learning_rate": 1.552580974548885e-05, - "loss": 1.005, + "learning_rate": 1.1724277985203087e-05, + "loss": 1.113, "step": 11795 }, { - "epoch": 0.33426846892799456, + "epoch": 0.46153846153846156, "grad_norm": 0.0, - "learning_rate": 1.5525044782294804e-05, - "loss": 1.0328, + "learning_rate": 1.1723029720633772e-05, + "loss": 1.0627, "step": 11796 }, { - "epoch": 0.334296806370257, + "epoch": 0.461577588230691, "grad_norm": 0.0, - "learning_rate": 1.5524279772561257e-05, - "loss": 1.0314, + "learning_rate": 1.1721781428394755e-05, + "loss": 1.0675, "step": 11797 }, { - "epoch": 0.3343251438125195, + "epoch": 0.4616167149229204, "grad_norm": 0.0, - "learning_rate": 1.5523514716294648e-05, - "loss": 0.9089, + "learning_rate": 1.1720533108506085e-05, + "loss": 1.0317, "step": 11798 }, { - "epoch": 0.3343534812547819, + "epoch": 0.46165584161514983, "grad_norm": 0.0, - "learning_rate": 1.5522749613501424e-05, - "loss": 0.9888, + "learning_rate": 1.1719284760987811e-05, + "loss": 0.972, "step": 11799 }, { - "epoch": 0.3343818186970444, + "epoch": 0.46169496830737927, "grad_norm": 0.0, - "learning_rate": 1.5521984464188025e-05, - "loss": 0.9289, + "learning_rate": 1.171803638585998e-05, + "loss": 1.1053, "step": 11800 }, { - "epoch": 0.33441015613930686, + "epoch": 0.4617340949996087, "grad_norm": 0.0, - "learning_rate": 1.5521219268360907e-05, - "loss": 0.9842, + "learning_rate": 1.1716787983142633e-05, + "loss": 1.0543, "step": 11801 }, { - "epoch": 0.33443849358156935, + "epoch": 0.46177322169183815, "grad_norm": 0.0, - "learning_rate": 1.5520454026026506e-05, - "loss": 0.9223, + "learning_rate": 1.1715539552855823e-05, + "loss": 0.9856, "step": 11802 }, { - "epoch": 0.3344668310238318, + "epoch": 0.4618123483840676, "grad_norm": 0.0, - "learning_rate": 1.551968873719127e-05, - "loss": 0.8911, + "learning_rate": 1.1714291095019598e-05, + "loss": 1.0536, "step": 11803 }, { - "epoch": 0.33449516846609423, + "epoch": 0.46185147507629704, "grad_norm": 0.0, - "learning_rate": 1.5518923401861647e-05, - "loss": 0.9031, + "learning_rate": 1.1713042609654008e-05, + "loss": 0.9792, "step": 11804 }, { - "epoch": 0.3345235059083567, + "epoch": 0.4618906017685265, "grad_norm": 0.0, - "learning_rate": 1.5518158020044084e-05, - "loss": 0.874, + "learning_rate": 1.1711794096779102e-05, + "loss": 1.1485, "step": 11805 }, { - "epoch": 0.33455184335061916, + "epoch": 0.4619297284607559, "grad_norm": 0.0, - "learning_rate": 1.5517392591745023e-05, - "loss": 0.9903, + "learning_rate": 1.1710545556414922e-05, + "loss": 0.9562, "step": 11806 }, { - "epoch": 0.33458018079288165, + "epoch": 0.46196885515298536, "grad_norm": 0.0, - "learning_rate": 1.5516627116970917e-05, - "loss": 0.8527, + "learning_rate": 1.1709296988581528e-05, + "loss": 1.1322, "step": 11807 }, { - "epoch": 0.3346085182351441, + "epoch": 0.4620079818452148, "grad_norm": 0.0, - "learning_rate": 1.5515861595728214e-05, - "loss": 0.9175, + "learning_rate": 1.1708048393298965e-05, + "loss": 1.0348, "step": 11808 }, { - "epoch": 0.33463685567740653, + "epoch": 0.46204710853744424, "grad_norm": 0.0, - "learning_rate": 1.551509602802336e-05, - "loss": 0.923, + "learning_rate": 1.1706799770587287e-05, + "loss": 1.1039, "step": 11809 }, { - "epoch": 0.334665193119669, + "epoch": 0.4620862352296737, "grad_norm": 0.0, - "learning_rate": 1.5514330413862802e-05, - "loss": 0.8512, + "learning_rate": 1.1705551120466541e-05, + "loss": 1.1593, "step": 11810 }, { - "epoch": 0.33469353056193146, + "epoch": 0.4621253619219031, "grad_norm": 0.0, - "learning_rate": 1.5513564753252995e-05, - "loss": 0.8734, + "learning_rate": 1.1704302442956784e-05, + "loss": 1.0051, "step": 11811 }, { - "epoch": 0.33472186800419396, + "epoch": 0.46216448861413256, "grad_norm": 0.0, - "learning_rate": 1.551279904620038e-05, - "loss": 0.9442, + "learning_rate": 1.170305373807806e-05, + "loss": 1.0981, "step": 11812 }, { - "epoch": 0.3347502054464564, + "epoch": 0.462203615306362, "grad_norm": 0.0, - "learning_rate": 1.5512033292711415e-05, - "loss": 0.8549, + "learning_rate": 1.1701805005850434e-05, + "loss": 1.1074, "step": 11813 }, { - "epoch": 0.3347785428887189, + "epoch": 0.46224274199859144, "grad_norm": 0.0, - "learning_rate": 1.551126749279255e-05, - "loss": 0.9521, + "learning_rate": 1.1700556246293948e-05, + "loss": 1.1061, "step": 11814 }, { - "epoch": 0.3348068803309813, + "epoch": 0.4622818686908209, "grad_norm": 0.0, - "learning_rate": 1.5510501646450222e-05, - "loss": 0.9945, + "learning_rate": 1.1699307459428657e-05, + "loss": 1.1001, "step": 11815 }, { - "epoch": 0.33483521777324377, + "epoch": 0.4623209953830503, "grad_norm": 0.0, - "learning_rate": 1.5509735753690903e-05, - "loss": 0.8501, + "learning_rate": 1.1698058645274623e-05, + "loss": 1.1346, "step": 11816 }, { - "epoch": 0.33486355521550626, + "epoch": 0.46236012207527977, "grad_norm": 0.0, - "learning_rate": 1.5508969814521026e-05, - "loss": 0.9302, + "learning_rate": 1.1696809803851891e-05, + "loss": 0.9414, "step": 11817 }, { - "epoch": 0.3348918926577687, + "epoch": 0.4623992487675092, "grad_norm": 0.0, - "learning_rate": 1.5508203828947056e-05, - "loss": 1.0052, + "learning_rate": 1.1695560935180523e-05, + "loss": 1.1785, "step": 11818 }, { - "epoch": 0.3349202301000312, + "epoch": 0.46243837545973865, "grad_norm": 0.0, - "learning_rate": 1.5507437796975436e-05, - "loss": 0.9578, + "learning_rate": 1.1694312039280567e-05, + "loss": 1.1566, "step": 11819 }, { - "epoch": 0.33494856754229363, + "epoch": 0.4624775021519681, "grad_norm": 0.0, - "learning_rate": 1.5506671718612624e-05, - "loss": 0.9332, + "learning_rate": 1.1693063116172087e-05, + "loss": 0.986, "step": 11820 }, { - "epoch": 0.33497690498455607, + "epoch": 0.46251662884419753, "grad_norm": 0.0, - "learning_rate": 1.5505905593865073e-05, - "loss": 1.0579, + "learning_rate": 1.169181416587513e-05, + "loss": 1.101, "step": 11821 }, { - "epoch": 0.33500524242681856, + "epoch": 0.46255575553642697, "grad_norm": 0.0, - "learning_rate": 1.550513942273923e-05, - "loss": 0.9612, + "learning_rate": 1.1690565188409764e-05, + "loss": 1.1465, "step": 11822 }, { - "epoch": 0.335033579869081, + "epoch": 0.4625948822286564, "grad_norm": 0.0, - "learning_rate": 1.5504373205241558e-05, - "loss": 0.9767, + "learning_rate": 1.1689316183796035e-05, + "loss": 0.9996, "step": 11823 }, { - "epoch": 0.3350619173113435, + "epoch": 0.46263400892088585, "grad_norm": 0.0, - "learning_rate": 1.5503606941378504e-05, - "loss": 0.9403, + "learning_rate": 1.1688067152054005e-05, + "loss": 1.1744, "step": 11824 }, { - "epoch": 0.33509025475360593, + "epoch": 0.4626731356131153, "grad_norm": 0.0, - "learning_rate": 1.5502840631156524e-05, - "loss": 0.8877, + "learning_rate": 1.1686818093203733e-05, + "loss": 1.0582, "step": 11825 }, { - "epoch": 0.3351185921958684, + "epoch": 0.4627122623053447, "grad_norm": 0.0, - "learning_rate": 1.5502074274582075e-05, - "loss": 1.0768, + "learning_rate": 1.1685569007265277e-05, + "loss": 1.1245, "step": 11826 }, { - "epoch": 0.33514692963813086, + "epoch": 0.4627513889975741, "grad_norm": 0.0, - "learning_rate": 1.5501307871661612e-05, - "loss": 0.8656, + "learning_rate": 1.1684319894258693e-05, + "loss": 0.955, "step": 11827 }, { - "epoch": 0.3351752670803933, + "epoch": 0.46279051568980356, "grad_norm": 0.0, - "learning_rate": 1.5500541422401592e-05, - "loss": 1.0331, + "learning_rate": 1.1683070754204043e-05, + "loss": 0.9966, "step": 11828 }, { - "epoch": 0.3352036045226558, + "epoch": 0.462829642382033, "grad_norm": 0.0, - "learning_rate": 1.5499774926808468e-05, - "loss": 0.9233, + "learning_rate": 1.1681821587121385e-05, + "loss": 1.0824, "step": 11829 }, { - "epoch": 0.33523194196491823, + "epoch": 0.46286876907426244, "grad_norm": 0.0, - "learning_rate": 1.5499008384888692e-05, - "loss": 0.9298, + "learning_rate": 1.168057239303078e-05, + "loss": 1.231, "step": 11830 }, { - "epoch": 0.33526027940718073, + "epoch": 0.4629078957664919, "grad_norm": 0.0, - "learning_rate": 1.5498241796648733e-05, - "loss": 0.9908, + "learning_rate": 1.1679323171952287e-05, + "loss": 1.1329, "step": 11831 }, { - "epoch": 0.33528861684944317, + "epoch": 0.4629470224587213, "grad_norm": 0.0, - "learning_rate": 1.549747516209504e-05, - "loss": 0.9191, + "learning_rate": 1.167807392390597e-05, + "loss": 1.034, "step": 11832 }, { - "epoch": 0.3353169542917056, + "epoch": 0.46298614915095077, "grad_norm": 0.0, - "learning_rate": 1.549670848123407e-05, - "loss": 0.9209, + "learning_rate": 1.167682464891189e-05, + "loss": 1.1674, "step": 11833 }, { - "epoch": 0.3353452917339681, + "epoch": 0.4630252758431802, "grad_norm": 0.0, - "learning_rate": 1.5495941754072285e-05, - "loss": 0.9226, + "learning_rate": 1.1675575346990105e-05, + "loss": 1.1732, "step": 11834 }, { - "epoch": 0.33537362917623054, + "epoch": 0.46306440253540965, "grad_norm": 0.0, - "learning_rate": 1.549517498061614e-05, - "loss": 0.9456, + "learning_rate": 1.167432601816068e-05, + "loss": 1.0161, "step": 11835 }, { - "epoch": 0.33540196661849303, + "epoch": 0.4631035292276391, "grad_norm": 0.0, - "learning_rate": 1.5494408160872096e-05, - "loss": 0.9077, + "learning_rate": 1.1673076662443678e-05, + "loss": 1.009, "step": 11836 }, { - "epoch": 0.33543030406075547, + "epoch": 0.46314265591986853, "grad_norm": 0.0, - "learning_rate": 1.5493641294846615e-05, - "loss": 0.9326, + "learning_rate": 1.167182727985916e-05, + "loss": 0.9124, "step": 11837 }, { - "epoch": 0.33545864150301796, + "epoch": 0.46318178261209797, "grad_norm": 0.0, - "learning_rate": 1.549287438254615e-05, - "loss": 1.0007, + "learning_rate": 1.1670577870427191e-05, + "loss": 1.0091, "step": 11838 }, { - "epoch": 0.3354869789452804, + "epoch": 0.4632209093043274, "grad_norm": 0.0, - "learning_rate": 1.5492107423977167e-05, - "loss": 0.9105, + "learning_rate": 1.1669328434167835e-05, + "loss": 1.0956, "step": 11839 }, { - "epoch": 0.33551531638754284, + "epoch": 0.46326003599655685, "grad_norm": 0.0, - "learning_rate": 1.549134041914612e-05, - "loss": 1.0097, + "learning_rate": 1.1668078971101155e-05, + "loss": 1.2191, "step": 11840 }, { - "epoch": 0.33554365382980533, + "epoch": 0.4632991626887863, "grad_norm": 0.0, - "learning_rate": 1.5490573368059475e-05, - "loss": 1.0392, + "learning_rate": 1.166682948124722e-05, + "loss": 1.187, "step": 11841 }, { - "epoch": 0.33557199127206777, + "epoch": 0.46333828938101573, "grad_norm": 0.0, - "learning_rate": 1.5489806270723695e-05, - "loss": 0.9662, + "learning_rate": 1.166557996462609e-05, + "loss": 1.2146, "step": 11842 }, { - "epoch": 0.33560032871433026, + "epoch": 0.4633774160732452, "grad_norm": 0.0, - "learning_rate": 1.5489039127145236e-05, - "loss": 0.9414, + "learning_rate": 1.1664330421257835e-05, + "loss": 1.0748, "step": 11843 }, { - "epoch": 0.3356286661565927, + "epoch": 0.4634165427654746, "grad_norm": 0.0, - "learning_rate": 1.5488271937330562e-05, - "loss": 0.8892, + "learning_rate": 1.1663080851162515e-05, + "loss": 1.0368, "step": 11844 }, { - "epoch": 0.33565700359885514, + "epoch": 0.46345566945770406, "grad_norm": 0.0, - "learning_rate": 1.5487504701286134e-05, - "loss": 0.911, + "learning_rate": 1.1661831254360203e-05, + "loss": 1.0478, "step": 11845 }, { - "epoch": 0.33568534104111764, + "epoch": 0.4634947961499335, "grad_norm": 0.0, - "learning_rate": 1.5486737419018417e-05, - "loss": 0.9669, + "learning_rate": 1.1660581630870962e-05, + "loss": 1.0518, "step": 11846 }, { - "epoch": 0.3357136784833801, + "epoch": 0.46353392284216294, "grad_norm": 0.0, - "learning_rate": 1.5485970090533875e-05, - "loss": 0.9496, + "learning_rate": 1.1659331980714863e-05, + "loss": 0.948, "step": 11847 }, { - "epoch": 0.33574201592564257, + "epoch": 0.4635730495343924, "grad_norm": 0.0, - "learning_rate": 1.5485202715838966e-05, - "loss": 0.9056, + "learning_rate": 1.1658082303911969e-05, + "loss": 0.9818, "step": 11848 }, { - "epoch": 0.335770353367905, + "epoch": 0.4636121762266218, "grad_norm": 0.0, - "learning_rate": 1.548443529494016e-05, - "loss": 0.8697, + "learning_rate": 1.1656832600482354e-05, + "loss": 1.0379, "step": 11849 }, { - "epoch": 0.3357986908101675, + "epoch": 0.46365130291885126, "grad_norm": 0.0, - "learning_rate": 1.548366782784392e-05, - "loss": 0.9557, + "learning_rate": 1.1655582870446081e-05, + "loss": 0.9868, "step": 11850 }, { - "epoch": 0.33582702825242994, + "epoch": 0.4636904296110807, "grad_norm": 0.0, - "learning_rate": 1.5482900314556707e-05, - "loss": 0.9305, + "learning_rate": 1.1654333113823222e-05, + "loss": 1.0886, "step": 11851 }, { - "epoch": 0.3358553656946924, + "epoch": 0.46372955630331014, "grad_norm": 0.0, - "learning_rate": 1.548213275508499e-05, - "loss": 0.9305, + "learning_rate": 1.1653083330633848e-05, + "loss": 0.9806, "step": 11852 }, { - "epoch": 0.33588370313695487, + "epoch": 0.4637686829955396, "grad_norm": 0.0, - "learning_rate": 1.5481365149435235e-05, - "loss": 0.9708, + "learning_rate": 1.1651833520898023e-05, + "loss": 1.0051, "step": 11853 }, { - "epoch": 0.3359120405792173, + "epoch": 0.463807809687769, "grad_norm": 0.0, - "learning_rate": 1.5480597497613903e-05, - "loss": 0.9453, + "learning_rate": 1.1650583684635828e-05, + "loss": 1.0712, "step": 11854 }, { - "epoch": 0.3359403780214798, + "epoch": 0.4638469363799984, "grad_norm": 0.0, - "learning_rate": 1.5479829799627464e-05, - "loss": 0.9876, + "learning_rate": 1.1649333821867321e-05, + "loss": 1.0394, "step": 11855 }, { - "epoch": 0.33596871546374224, + "epoch": 0.46388606307222785, "grad_norm": 0.0, - "learning_rate": 1.5479062055482384e-05, - "loss": 0.899, + "learning_rate": 1.1648083932612584e-05, + "loss": 1.0132, "step": 11856 }, { - "epoch": 0.3359970529060047, + "epoch": 0.4639251897644573, "grad_norm": 0.0, - "learning_rate": 1.547829426518513e-05, - "loss": 0.9095, + "learning_rate": 1.1646834016891682e-05, + "loss": 1.0079, "step": 11857 }, { - "epoch": 0.33602539034826717, + "epoch": 0.46396431645668673, "grad_norm": 0.0, - "learning_rate": 1.547752642874217e-05, - "loss": 0.9083, + "learning_rate": 1.164558407472469e-05, + "loss": 1.0117, "step": 11858 }, { - "epoch": 0.3360537277905296, + "epoch": 0.4640034431489162, "grad_norm": 0.0, - "learning_rate": 1.5476758546159966e-05, - "loss": 0.9303, + "learning_rate": 1.1644334106131678e-05, + "loss": 1.0501, "step": 11859 }, { - "epoch": 0.3360820652327921, + "epoch": 0.4640425698411456, "grad_norm": 0.0, - "learning_rate": 1.5475990617444997e-05, - "loss": 0.9117, + "learning_rate": 1.1643084111132723e-05, + "loss": 1.1385, "step": 11860 }, { - "epoch": 0.33611040267505454, + "epoch": 0.46408169653337505, "grad_norm": 0.0, - "learning_rate": 1.547522264260372e-05, - "loss": 0.9339, + "learning_rate": 1.1641834089747895e-05, + "loss": 0.9388, "step": 11861 }, { - "epoch": 0.33613874011731704, + "epoch": 0.4641208232256045, "grad_norm": 0.0, - "learning_rate": 1.5474454621642613e-05, - "loss": 0.8533, + "learning_rate": 1.1640584041997269e-05, + "loss": 1.1105, "step": 11862 }, { - "epoch": 0.3361670775595795, + "epoch": 0.46415994991783394, "grad_norm": 0.0, - "learning_rate": 1.5473686554568143e-05, - "loss": 1.0122, + "learning_rate": 1.1639333967900921e-05, + "loss": 1.0226, "step": 11863 }, { - "epoch": 0.3361954150018419, + "epoch": 0.4641990766100634, "grad_norm": 0.0, - "learning_rate": 1.5472918441386776e-05, - "loss": 0.973, + "learning_rate": 1.163808386747892e-05, + "loss": 1.0482, "step": 11864 }, { - "epoch": 0.3362237524441044, + "epoch": 0.4642382033022928, "grad_norm": 0.0, - "learning_rate": 1.5472150282104988e-05, - "loss": 0.9374, + "learning_rate": 1.1636833740751348e-05, + "loss": 1.0814, "step": 11865 }, { - "epoch": 0.33625208988636685, + "epoch": 0.46427732999452226, "grad_norm": 0.0, - "learning_rate": 1.547138207672924e-05, - "loss": 0.8057, + "learning_rate": 1.1635583587738276e-05, + "loss": 1.1147, "step": 11866 }, { - "epoch": 0.33628042732862934, + "epoch": 0.4643164566867517, "grad_norm": 0.0, - "learning_rate": 1.5470613825266016e-05, - "loss": 0.9777, + "learning_rate": 1.1634333408459782e-05, + "loss": 0.9775, "step": 11867 }, { - "epoch": 0.3363087647708918, + "epoch": 0.46435558337898114, "grad_norm": 0.0, - "learning_rate": 1.5469845527721776e-05, - "loss": 0.9752, + "learning_rate": 1.1633083202935937e-05, + "loss": 1.1097, "step": 11868 }, { - "epoch": 0.3363371022131542, + "epoch": 0.4643947100712106, "grad_norm": 0.0, - "learning_rate": 1.5469077184103e-05, - "loss": 0.9756, + "learning_rate": 1.1631832971186827e-05, + "loss": 1.1918, "step": 11869 }, { - "epoch": 0.3363654396554167, + "epoch": 0.46443383676344, "grad_norm": 0.0, - "learning_rate": 1.5468308794416152e-05, - "loss": 0.9867, + "learning_rate": 1.1630582713232524e-05, + "loss": 1.0894, "step": 11870 }, { - "epoch": 0.33639377709767915, + "epoch": 0.46447296345566946, "grad_norm": 0.0, - "learning_rate": 1.546754035866771e-05, - "loss": 0.9964, + "learning_rate": 1.1629332429093103e-05, + "loss": 1.0604, "step": 11871 }, { - "epoch": 0.33642211453994164, + "epoch": 0.4645120901478989, "grad_norm": 0.0, - "learning_rate": 1.5466771876864143e-05, - "loss": 0.9619, + "learning_rate": 1.1628082118788648e-05, + "loss": 1.076, "step": 11872 }, { - "epoch": 0.3364504519822041, + "epoch": 0.46455121684012834, "grad_norm": 0.0, - "learning_rate": 1.546600334901193e-05, - "loss": 0.8945, + "learning_rate": 1.1626831782339232e-05, + "loss": 1.0044, "step": 11873 }, { - "epoch": 0.3364787894244666, + "epoch": 0.4645903435323578, "grad_norm": 0.0, - "learning_rate": 1.5465234775117538e-05, - "loss": 1.0291, + "learning_rate": 1.1625581419764938e-05, + "loss": 1.2274, "step": 11874 }, { - "epoch": 0.336507126866729, + "epoch": 0.4646294702245872, "grad_norm": 0.0, - "learning_rate": 1.5464466155187445e-05, - "loss": 1.1409, + "learning_rate": 1.1624331031085843e-05, + "loss": 0.9863, "step": 11875 }, { - "epoch": 0.33653546430899145, + "epoch": 0.46466859691681667, "grad_norm": 0.0, - "learning_rate": 1.5463697489228125e-05, - "loss": 0.962, + "learning_rate": 1.1623080616322024e-05, + "loss": 1.2013, "step": 11876 }, { - "epoch": 0.33656380175125394, + "epoch": 0.4647077236090461, "grad_norm": 0.0, - "learning_rate": 1.546292877724605e-05, - "loss": 0.9522, + "learning_rate": 1.162183017549357e-05, + "loss": 0.9312, "step": 11877 }, { - "epoch": 0.3365921391935164, + "epoch": 0.46474685030127555, "grad_norm": 0.0, - "learning_rate": 1.5462160019247702e-05, - "loss": 0.9489, + "learning_rate": 1.1620579708620551e-05, + "loss": 0.9976, "step": 11878 }, { - "epoch": 0.3366204766357789, + "epoch": 0.464785976993505, "grad_norm": 0.0, - "learning_rate": 1.546139121523955e-05, - "loss": 0.8708, + "learning_rate": 1.1619329215723052e-05, + "loss": 1.1223, "step": 11879 }, { - "epoch": 0.3366488140780413, + "epoch": 0.46482510368573443, "grad_norm": 0.0, - "learning_rate": 1.546062236522807e-05, - "loss": 0.9695, + "learning_rate": 1.1618078696821157e-05, + "loss": 1.0848, "step": 11880 }, { - "epoch": 0.33667715152030375, + "epoch": 0.46486423037796387, "grad_norm": 0.0, - "learning_rate": 1.5459853469219738e-05, - "loss": 1.0096, + "learning_rate": 1.1616828151934944e-05, + "loss": 0.9857, "step": 11881 }, { - "epoch": 0.33670548896256625, + "epoch": 0.4649033570701933, "grad_norm": 0.0, - "learning_rate": 1.5459084527221037e-05, - "loss": 1.0209, + "learning_rate": 1.16155775810845e-05, + "loss": 1.0494, "step": 11882 }, { - "epoch": 0.3367338264048287, + "epoch": 0.4649424837624227, "grad_norm": 0.0, - "learning_rate": 1.545831553923844e-05, - "loss": 0.9272, + "learning_rate": 1.1614326984289902e-05, + "loss": 1.1032, "step": 11883 }, { - "epoch": 0.3367621638470912, + "epoch": 0.46498161045465214, "grad_norm": 0.0, - "learning_rate": 1.545754650527842e-05, - "loss": 0.9933, + "learning_rate": 1.1613076361571236e-05, + "loss": 1.0693, "step": 11884 }, { - "epoch": 0.3367905012893536, + "epoch": 0.4650207371468816, "grad_norm": 0.0, - "learning_rate": 1.5456777425347462e-05, - "loss": 1.0566, + "learning_rate": 1.1611825712948588e-05, + "loss": 0.99, "step": 11885 }, { - "epoch": 0.3368188387316161, + "epoch": 0.465059863839111, "grad_norm": 0.0, - "learning_rate": 1.545600829945204e-05, - "loss": 0.8874, + "learning_rate": 1.1610575038442033e-05, + "loss": 1.1165, "step": 11886 }, { - "epoch": 0.33684717617387855, + "epoch": 0.46509899053134046, "grad_norm": 0.0, - "learning_rate": 1.5455239127598636e-05, - "loss": 0.9, + "learning_rate": 1.1609324338071666e-05, + "loss": 1.171, "step": 11887 }, { - "epoch": 0.336875513616141, + "epoch": 0.4651381172235699, "grad_norm": 0.0, - "learning_rate": 1.5454469909793726e-05, - "loss": 1.0211, + "learning_rate": 1.1608073611857562e-05, + "loss": 1.0317, "step": 11888 }, { - "epoch": 0.3369038510584035, + "epoch": 0.46517724391579934, "grad_norm": 0.0, - "learning_rate": 1.5453700646043793e-05, - "loss": 0.9323, + "learning_rate": 1.1606822859819815e-05, + "loss": 0.9563, "step": 11889 }, { - "epoch": 0.3369321885006659, + "epoch": 0.4652163706080288, "grad_norm": 0.0, - "learning_rate": 1.545293133635531e-05, - "loss": 0.9479, + "learning_rate": 1.1605572081978504e-05, + "loss": 1.121, "step": 11890 }, { - "epoch": 0.3369605259429284, + "epoch": 0.4652554973002582, "grad_norm": 0.0, - "learning_rate": 1.5452161980734764e-05, - "loss": 0.9786, + "learning_rate": 1.1604321278353718e-05, + "loss": 1.0925, "step": 11891 }, { - "epoch": 0.33698886338519085, + "epoch": 0.46529462399248767, "grad_norm": 0.0, - "learning_rate": 1.5451392579188635e-05, - "loss": 0.9918, + "learning_rate": 1.1603070448965543e-05, + "loss": 1.0268, "step": 11892 }, { - "epoch": 0.3370172008274533, + "epoch": 0.4653337506847171, "grad_norm": 0.0, - "learning_rate": 1.54506231317234e-05, - "loss": 0.9104, + "learning_rate": 1.1601819593834066e-05, + "loss": 1.1469, "step": 11893 }, { - "epoch": 0.3370455382697158, + "epoch": 0.46537287737694655, "grad_norm": 0.0, - "learning_rate": 1.5449853638345538e-05, - "loss": 0.9087, + "learning_rate": 1.1600568712979371e-05, + "loss": 1.059, "step": 11894 }, { - "epoch": 0.3370738757119782, + "epoch": 0.465412004069176, "grad_norm": 0.0, - "learning_rate": 1.544908409906154e-05, - "loss": 0.9147, + "learning_rate": 1.1599317806421548e-05, + "loss": 0.9977, "step": 11895 }, { - "epoch": 0.3371022131542407, + "epoch": 0.46545113076140543, "grad_norm": 0.0, - "learning_rate": 1.544831451387788e-05, - "loss": 0.9501, + "learning_rate": 1.1598066874180689e-05, + "loss": 0.9745, "step": 11896 }, { - "epoch": 0.33713055059650315, + "epoch": 0.46549025745363487, "grad_norm": 0.0, - "learning_rate": 1.5447544882801046e-05, - "loss": 0.9679, + "learning_rate": 1.1596815916276876e-05, + "loss": 0.9919, "step": 11897 }, { - "epoch": 0.33715888803876565, + "epoch": 0.4655293841458643, "grad_norm": 0.0, - "learning_rate": 1.5446775205837518e-05, - "loss": 0.9778, + "learning_rate": 1.15955649327302e-05, + "loss": 1.1173, "step": 11898 }, { - "epoch": 0.3371872254810281, + "epoch": 0.46556851083809375, "grad_norm": 0.0, - "learning_rate": 1.5446005482993783e-05, - "loss": 0.907, + "learning_rate": 1.159431392356075e-05, + "loss": 1.065, "step": 11899 }, { - "epoch": 0.3372155629232905, + "epoch": 0.4656076375303232, "grad_norm": 0.0, - "learning_rate": 1.544523571427632e-05, - "loss": 0.983, + "learning_rate": 1.1593062888788617e-05, + "loss": 0.9721, "step": 11900 }, { - "epoch": 0.337243900365553, + "epoch": 0.46564676422255263, "grad_norm": 0.0, - "learning_rate": 1.5444465899691612e-05, - "loss": 0.8768, + "learning_rate": 1.1591811828433892e-05, + "loss": 1.172, "step": 11901 }, { - "epoch": 0.33727223780781546, + "epoch": 0.4656858909147821, "grad_norm": 0.0, - "learning_rate": 1.5443696039246145e-05, - "loss": 0.9589, + "learning_rate": 1.1590560742516662e-05, + "loss": 1.0184, "step": 11902 }, { - "epoch": 0.33730057525007795, + "epoch": 0.4657250176070115, "grad_norm": 0.0, - "learning_rate": 1.544292613294641e-05, - "loss": 0.9532, + "learning_rate": 1.1589309631057019e-05, + "loss": 1.0962, "step": 11903 }, { - "epoch": 0.3373289126923404, + "epoch": 0.46576414429924096, "grad_norm": 0.0, - "learning_rate": 1.5442156180798883e-05, - "loss": 0.9994, + "learning_rate": 1.1588058494075054e-05, + "loss": 1.106, "step": 11904 }, { - "epoch": 0.3373572501346028, + "epoch": 0.4658032709914704, "grad_norm": 0.0, - "learning_rate": 1.5441386182810055e-05, - "loss": 1.0034, + "learning_rate": 1.1586807331590864e-05, + "loss": 0.9529, "step": 11905 }, { - "epoch": 0.3373855875768653, + "epoch": 0.46584239768369984, "grad_norm": 0.0, - "learning_rate": 1.5440616138986407e-05, - "loss": 0.877, + "learning_rate": 1.1585556143624532e-05, + "loss": 1.078, "step": 11906 }, { - "epoch": 0.33741392501912776, + "epoch": 0.4658815243759293, "grad_norm": 0.0, - "learning_rate": 1.5439846049334434e-05, - "loss": 0.9565, + "learning_rate": 1.1584304930196158e-05, + "loss": 1.0146, "step": 11907 }, { - "epoch": 0.33744226246139025, + "epoch": 0.4659206510681587, "grad_norm": 0.0, - "learning_rate": 1.5439075913860615e-05, - "loss": 1.0421, + "learning_rate": 1.1583053691325829e-05, + "loss": 1.0073, "step": 11908 }, { - "epoch": 0.3374705999036527, + "epoch": 0.46595977776038816, "grad_norm": 0.0, - "learning_rate": 1.5438305732571445e-05, - "loss": 0.949, + "learning_rate": 1.1581802427033644e-05, + "loss": 1.0214, "step": 11909 }, { - "epoch": 0.3374989373459152, + "epoch": 0.4659989044526176, "grad_norm": 0.0, - "learning_rate": 1.5437535505473398e-05, - "loss": 0.9176, + "learning_rate": 1.1580551137339696e-05, + "loss": 0.9201, "step": 11910 }, { - "epoch": 0.3375272747881776, + "epoch": 0.466038031144847, "grad_norm": 0.0, - "learning_rate": 1.5436765232572976e-05, - "loss": 0.8717, + "learning_rate": 1.1579299822264073e-05, + "loss": 1.143, "step": 11911 }, { - "epoch": 0.33755561223044006, + "epoch": 0.46607715783707643, "grad_norm": 0.0, - "learning_rate": 1.5435994913876657e-05, - "loss": 0.9365, + "learning_rate": 1.1578048481826875e-05, + "loss": 1.0067, "step": 11912 }, { - "epoch": 0.33758394967270255, + "epoch": 0.46611628452930587, "grad_norm": 0.0, - "learning_rate": 1.543522454939094e-05, - "loss": 0.8515, + "learning_rate": 1.1576797116048195e-05, + "loss": 1.1806, "step": 11913 }, { - "epoch": 0.337612287114965, + "epoch": 0.4661554112215353, "grad_norm": 0.0, - "learning_rate": 1.5434454139122302e-05, - "loss": 0.934, + "learning_rate": 1.1575545724948131e-05, + "loss": 1.1686, "step": 11914 }, { - "epoch": 0.3376406245572275, + "epoch": 0.46619453791376475, "grad_norm": 0.0, - "learning_rate": 1.5433683683077243e-05, - "loss": 1.0717, + "learning_rate": 1.1574294308546776e-05, + "loss": 0.9871, "step": 11915 }, { - "epoch": 0.3376689619994899, + "epoch": 0.4662336646059942, "grad_norm": 0.0, - "learning_rate": 1.543291318126225e-05, - "loss": 0.9634, + "learning_rate": 1.1573042866864229e-05, + "loss": 1.0537, "step": 11916 }, { - "epoch": 0.33769729944175236, + "epoch": 0.46627279129822363, "grad_norm": 0.0, - "learning_rate": 1.543214263368381e-05, - "loss": 0.8601, + "learning_rate": 1.1571791399920582e-05, + "loss": 0.9816, "step": 11917 }, { - "epoch": 0.33772563688401486, + "epoch": 0.4663119179904531, "grad_norm": 0.0, - "learning_rate": 1.5431372040348414e-05, - "loss": 0.9446, + "learning_rate": 1.1570539907735937e-05, + "loss": 1.0626, "step": 11918 }, { - "epoch": 0.3377539743262773, + "epoch": 0.4663510446826825, "grad_norm": 0.0, - "learning_rate": 1.5430601401262554e-05, - "loss": 0.9015, + "learning_rate": 1.1569288390330389e-05, + "loss": 1.1676, "step": 11919 }, { - "epoch": 0.3377823117685398, + "epoch": 0.46639017137491195, "grad_norm": 0.0, - "learning_rate": 1.5429830716432723e-05, - "loss": 0.9427, + "learning_rate": 1.1568036847724033e-05, + "loss": 1.1257, "step": 11920 }, { - "epoch": 0.3378106492108022, + "epoch": 0.4664292980671414, "grad_norm": 0.0, - "learning_rate": 1.5429059985865414e-05, - "loss": 1.0802, + "learning_rate": 1.1566785279936972e-05, + "loss": 1.134, "step": 11921 }, { - "epoch": 0.3378389866530647, + "epoch": 0.46646842475937084, "grad_norm": 0.0, - "learning_rate": 1.5428289209567114e-05, - "loss": 1.0282, + "learning_rate": 1.1565533686989302e-05, + "loss": 0.9774, "step": 11922 }, { - "epoch": 0.33786732409532716, + "epoch": 0.4665075514516003, "grad_norm": 0.0, - "learning_rate": 1.5427518387544316e-05, - "loss": 0.9832, + "learning_rate": 1.1564282068901125e-05, + "loss": 1.0604, "step": 11923 }, { - "epoch": 0.3378956615375896, + "epoch": 0.4665466781438297, "grad_norm": 0.0, - "learning_rate": 1.5426747519803518e-05, - "loss": 1.1251, + "learning_rate": 1.1563030425692536e-05, + "loss": 1.1283, "step": 11924 }, { - "epoch": 0.3379239989798521, + "epoch": 0.46658580483605916, "grad_norm": 0.0, - "learning_rate": 1.542597660635121e-05, - "loss": 0.9839, + "learning_rate": 1.1561778757383639e-05, + "loss": 1.0659, "step": 11925 }, { - "epoch": 0.33795233642211453, + "epoch": 0.4666249315282886, "grad_norm": 0.0, - "learning_rate": 1.5425205647193887e-05, - "loss": 1.025, + "learning_rate": 1.156052706399453e-05, + "loss": 1.1189, "step": 11926 }, { - "epoch": 0.337980673864377, + "epoch": 0.46666405822051804, "grad_norm": 0.0, - "learning_rate": 1.542443464233804e-05, - "loss": 0.9463, + "learning_rate": 1.1559275345545312e-05, + "loss": 1.0831, "step": 11927 }, { - "epoch": 0.33800901130663946, + "epoch": 0.4667031849127475, "grad_norm": 0.0, - "learning_rate": 1.5423663591790168e-05, - "loss": 0.9111, + "learning_rate": 1.1558023602056085e-05, + "loss": 1.0811, "step": 11928 }, { - "epoch": 0.3380373487489019, + "epoch": 0.4667423116049769, "grad_norm": 0.0, - "learning_rate": 1.5422892495556764e-05, - "loss": 0.8407, + "learning_rate": 1.1556771833546954e-05, + "loss": 0.9488, "step": 11929 }, { - "epoch": 0.3380656861911644, + "epoch": 0.46678143829720636, "grad_norm": 0.0, - "learning_rate": 1.5422121353644323e-05, - "loss": 1.0062, + "learning_rate": 1.1555520040038017e-05, + "loss": 0.97, "step": 11930 }, { - "epoch": 0.33809402363342683, + "epoch": 0.4668205649894358, "grad_norm": 0.0, - "learning_rate": 1.5421350166059336e-05, - "loss": 1.1021, + "learning_rate": 1.1554268221549377e-05, + "loss": 0.8829, "step": 11931 }, { - "epoch": 0.3381223610756893, + "epoch": 0.46685969168166525, "grad_norm": 0.0, - "learning_rate": 1.5420578932808307e-05, - "loss": 0.9894, + "learning_rate": 1.1553016378101137e-05, + "loss": 1.078, "step": 11932 }, { - "epoch": 0.33815069851795176, + "epoch": 0.4668988183738947, "grad_norm": 0.0, - "learning_rate": 1.541980765389773e-05, - "loss": 0.9585, + "learning_rate": 1.1551764509713399e-05, + "loss": 1.144, "step": 11933 }, { - "epoch": 0.33817903596021426, + "epoch": 0.4669379450661241, "grad_norm": 0.0, - "learning_rate": 1.54190363293341e-05, - "loss": 0.9982, + "learning_rate": 1.1550512616406269e-05, + "loss": 1.0949, "step": 11934 }, { - "epoch": 0.3382073734024767, + "epoch": 0.46697707175835357, "grad_norm": 0.0, - "learning_rate": 1.541826495912391e-05, - "loss": 0.9142, + "learning_rate": 1.1549260698199846e-05, + "loss": 1.1515, "step": 11935 }, { - "epoch": 0.33823571084473913, + "epoch": 0.467016198450583, "grad_norm": 0.0, - "learning_rate": 1.5417493543273665e-05, - "loss": 0.909, + "learning_rate": 1.1548008755114242e-05, + "loss": 1.0065, "step": 11936 }, { - "epoch": 0.33826404828700163, + "epoch": 0.46705532514281245, "grad_norm": 0.0, - "learning_rate": 1.541672208178986e-05, - "loss": 0.9544, + "learning_rate": 1.1546756787169553e-05, + "loss": 1.1184, "step": 11937 }, { - "epoch": 0.33829238572926407, + "epoch": 0.4670944518350419, "grad_norm": 0.0, - "learning_rate": 1.5415950574678996e-05, - "loss": 0.9419, + "learning_rate": 1.1545504794385893e-05, + "loss": 0.9786, "step": 11938 }, { - "epoch": 0.33832072317152656, + "epoch": 0.46713357852727133, "grad_norm": 0.0, - "learning_rate": 1.5415179021947566e-05, - "loss": 0.871, + "learning_rate": 1.154425277678336e-05, + "loss": 1.1083, "step": 11939 }, { - "epoch": 0.338349060613789, + "epoch": 0.4671727052195007, "grad_norm": 0.0, - "learning_rate": 1.5414407423602073e-05, - "loss": 0.9851, + "learning_rate": 1.154300073438206e-05, + "loss": 1.0342, "step": 11940 }, { - "epoch": 0.33837739805605144, + "epoch": 0.46721183191173016, "grad_norm": 0.0, - "learning_rate": 1.541363577964902e-05, - "loss": 0.9688, + "learning_rate": 1.1541748667202106e-05, + "loss": 1.0582, "step": 11941 }, { - "epoch": 0.33840573549831393, + "epoch": 0.4672509586039596, "grad_norm": 0.0, - "learning_rate": 1.5412864090094898e-05, - "loss": 0.8814, + "learning_rate": 1.1540496575263598e-05, + "loss": 1.0469, "step": 11942 }, { - "epoch": 0.33843407294057637, + "epoch": 0.46729008529618904, "grad_norm": 0.0, - "learning_rate": 1.541209235494621e-05, - "loss": 0.9042, + "learning_rate": 1.1539244458586646e-05, + "loss": 1.0242, "step": 11943 }, { - "epoch": 0.33846241038283886, + "epoch": 0.4673292119884185, "grad_norm": 0.0, - "learning_rate": 1.541132057420946e-05, - "loss": 1.0072, + "learning_rate": 1.1537992317191355e-05, + "loss": 1.0193, "step": 11944 }, { - "epoch": 0.3384907478251013, + "epoch": 0.4673683386806479, "grad_norm": 0.0, - "learning_rate": 1.5410548747891148e-05, - "loss": 0.924, + "learning_rate": 1.1536740151097839e-05, + "loss": 1.1216, "step": 11945 }, { - "epoch": 0.3385190852673638, + "epoch": 0.46740746537287736, "grad_norm": 0.0, - "learning_rate": 1.5409776875997778e-05, - "loss": 0.8966, + "learning_rate": 1.1535487960326197e-05, + "loss": 1.1371, "step": 11946 }, { - "epoch": 0.33854742270962623, + "epoch": 0.4674465920651068, "grad_norm": 0.0, - "learning_rate": 1.5409004958535848e-05, - "loss": 0.9351, + "learning_rate": 1.1534235744896547e-05, + "loss": 1.0314, "step": 11947 }, { - "epoch": 0.33857576015188867, + "epoch": 0.46748571875733624, "grad_norm": 0.0, - "learning_rate": 1.5408232995511853e-05, - "loss": 0.979, + "learning_rate": 1.1532983504828987e-05, + "loss": 0.9639, "step": 11948 }, { - "epoch": 0.33860409759415117, + "epoch": 0.4675248454495657, "grad_norm": 0.0, - "learning_rate": 1.540746098693231e-05, - "loss": 0.8633, + "learning_rate": 1.153173124014364e-05, + "loss": 1.0944, "step": 11949 }, { - "epoch": 0.3386324350364136, + "epoch": 0.4675639721417951, "grad_norm": 0.0, - "learning_rate": 1.5406688932803713e-05, - "loss": 1.0162, + "learning_rate": 1.1530478950860607e-05, + "loss": 0.9853, "step": 11950 }, { - "epoch": 0.3386607724786761, + "epoch": 0.46760309883402457, "grad_norm": 0.0, - "learning_rate": 1.540591683313257e-05, - "loss": 0.9859, + "learning_rate": 1.1529226636999995e-05, + "loss": 1.0977, "step": 11951 }, { - "epoch": 0.33868910992093854, + "epoch": 0.467642225526254, "grad_norm": 0.0, - "learning_rate": 1.5405144687925377e-05, - "loss": 0.9221, + "learning_rate": 1.1527974298581924e-05, + "loss": 1.1288, "step": 11952 }, { - "epoch": 0.338717447363201, + "epoch": 0.46768135221848345, "grad_norm": 0.0, - "learning_rate": 1.5404372497188646e-05, - "loss": 0.9034, + "learning_rate": 1.15267219356265e-05, + "loss": 1.134, "step": 11953 }, { - "epoch": 0.33874578480546347, + "epoch": 0.4677204789107129, "grad_norm": 0.0, - "learning_rate": 1.540360026092888e-05, - "loss": 0.9849, + "learning_rate": 1.1525469548153833e-05, + "loss": 1.0096, "step": 11954 }, { - "epoch": 0.3387741222477259, + "epoch": 0.46775960560294233, "grad_norm": 0.0, - "learning_rate": 1.5402827979152582e-05, - "loss": 0.8653, + "learning_rate": 1.1524217136184037e-05, + "loss": 1.1002, "step": 11955 }, { - "epoch": 0.3388024596899884, + "epoch": 0.46779873229517177, "grad_norm": 0.0, - "learning_rate": 1.5402055651866256e-05, - "loss": 0.9998, + "learning_rate": 1.1522964699737224e-05, + "loss": 1.0753, "step": 11956 }, { - "epoch": 0.33883079713225084, + "epoch": 0.4678378589874012, "grad_norm": 0.0, - "learning_rate": 1.540128327907641e-05, - "loss": 0.8587, + "learning_rate": 1.1521712238833507e-05, + "loss": 1.0923, "step": 11957 }, { - "epoch": 0.33885913457451333, + "epoch": 0.46787698567963065, "grad_norm": 0.0, - "learning_rate": 1.540051086078955e-05, - "loss": 0.893, + "learning_rate": 1.1520459753492999e-05, + "loss": 1.0464, "step": 11958 }, { - "epoch": 0.33888747201677577, + "epoch": 0.4679161123718601, "grad_norm": 0.0, - "learning_rate": 1.5399738397012177e-05, - "loss": 1.0014, + "learning_rate": 1.1519207243735813e-05, + "loss": 1.1895, "step": 11959 }, { - "epoch": 0.3389158094590382, + "epoch": 0.46795523906408953, "grad_norm": 0.0, - "learning_rate": 1.5398965887750807e-05, - "loss": 0.9232, + "learning_rate": 1.1517954709582058e-05, + "loss": 1.0929, "step": 11960 }, { - "epoch": 0.3389441469013007, + "epoch": 0.467994365756319, "grad_norm": 0.0, - "learning_rate": 1.5398193333011944e-05, - "loss": 0.9632, + "learning_rate": 1.1516702151051855e-05, + "loss": 1.1155, "step": 11961 }, { - "epoch": 0.33897248434356314, + "epoch": 0.4680334924485484, "grad_norm": 0.0, - "learning_rate": 1.539742073280209e-05, - "loss": 1.0902, + "learning_rate": 1.1515449568165316e-05, + "loss": 0.9372, "step": 11962 }, { - "epoch": 0.33900082178582563, + "epoch": 0.46807261914077786, "grad_norm": 0.0, - "learning_rate": 1.5396648087127763e-05, - "loss": 0.952, + "learning_rate": 1.1514196960942556e-05, + "loss": 1.1287, "step": 11963 }, { - "epoch": 0.3390291592280881, + "epoch": 0.4681117458330073, "grad_norm": 0.0, - "learning_rate": 1.5395875395995456e-05, - "loss": 1.0112, + "learning_rate": 1.1512944329403693e-05, + "loss": 1.0678, "step": 11964 }, { - "epoch": 0.3390574966703505, + "epoch": 0.46815087252523674, "grad_norm": 0.0, - "learning_rate": 1.539510265941169e-05, - "loss": 0.8631, + "learning_rate": 1.1511691673568835e-05, + "loss": 0.9649, "step": 11965 }, { - "epoch": 0.339085834112613, + "epoch": 0.4681899992174662, "grad_norm": 0.0, - "learning_rate": 1.5394329877382973e-05, - "loss": 0.9117, + "learning_rate": 1.1510438993458106e-05, + "loss": 1.1443, "step": 11966 }, { - "epoch": 0.33911417155487544, + "epoch": 0.4682291259096956, "grad_norm": 0.0, - "learning_rate": 1.539355704991581e-05, - "loss": 0.9531, + "learning_rate": 1.1509186289091621e-05, + "loss": 1.1163, "step": 11967 }, { - "epoch": 0.33914250899713794, + "epoch": 0.468268252601925, "grad_norm": 0.0, - "learning_rate": 1.5392784177016715e-05, - "loss": 0.9283, + "learning_rate": 1.1507933560489491e-05, + "loss": 1.0692, "step": 11968 }, { - "epoch": 0.3391708464394004, + "epoch": 0.46830737929415445, "grad_norm": 0.0, - "learning_rate": 1.5392011258692198e-05, - "loss": 0.9562, + "learning_rate": 1.1506680807671841e-05, + "loss": 1.019, "step": 11969 }, { - "epoch": 0.3391991838816628, + "epoch": 0.4683465059863839, "grad_norm": 0.0, - "learning_rate": 1.5391238294948768e-05, - "loss": 0.9097, + "learning_rate": 1.1505428030658784e-05, + "loss": 1.1066, "step": 11970 }, { - "epoch": 0.3392275213239253, + "epoch": 0.46838563267861333, "grad_norm": 0.0, - "learning_rate": 1.5390465285792933e-05, - "loss": 0.9505, + "learning_rate": 1.1504175229470438e-05, + "loss": 1.1129, "step": 11971 }, { - "epoch": 0.33925585876618775, + "epoch": 0.46842475937084277, "grad_norm": 0.0, - "learning_rate": 1.5389692231231207e-05, - "loss": 0.8503, + "learning_rate": 1.1502922404126924e-05, + "loss": 0.9771, "step": 11972 }, { - "epoch": 0.33928419620845024, + "epoch": 0.4684638860630722, "grad_norm": 0.0, - "learning_rate": 1.5388919131270103e-05, - "loss": 1.0555, + "learning_rate": 1.1501669554648359e-05, + "loss": 1.0567, "step": 11973 }, { - "epoch": 0.3393125336507127, + "epoch": 0.46850301275530165, "grad_norm": 0.0, - "learning_rate": 1.538814598591613e-05, - "loss": 0.8967, + "learning_rate": 1.1500416681054864e-05, + "loss": 1.1606, "step": 11974 }, { - "epoch": 0.33934087109297517, + "epoch": 0.4685421394475311, "grad_norm": 0.0, - "learning_rate": 1.5387372795175806e-05, - "loss": 0.837, + "learning_rate": 1.1499163783366553e-05, + "loss": 0.9371, "step": 11975 }, { - "epoch": 0.3393692085352376, + "epoch": 0.46858126613976053, "grad_norm": 0.0, - "learning_rate": 1.5386599559055643e-05, - "loss": 0.9952, + "learning_rate": 1.1497910861603557e-05, + "loss": 1.0605, "step": 11976 }, { - "epoch": 0.33939754597750005, + "epoch": 0.46862039283199, "grad_norm": 0.0, - "learning_rate": 1.5385826277562145e-05, - "loss": 0.9418, + "learning_rate": 1.1496657915785983e-05, + "loss": 1.0634, "step": 11977 }, { - "epoch": 0.33942588341976254, + "epoch": 0.4686595195242194, "grad_norm": 0.0, - "learning_rate": 1.5385052950701833e-05, - "loss": 0.836, + "learning_rate": 1.1495404945933962e-05, + "loss": 1.0698, "step": 11978 }, { - "epoch": 0.339454220862025, + "epoch": 0.46869864621644886, "grad_norm": 0.0, - "learning_rate": 1.5384279578481223e-05, - "loss": 0.8544, + "learning_rate": 1.1494151952067611e-05, + "loss": 1.0931, "step": 11979 }, { - "epoch": 0.3394825583042875, + "epoch": 0.4687377729086783, "grad_norm": 0.0, - "learning_rate": 1.5383506160906826e-05, - "loss": 0.8636, + "learning_rate": 1.1492898934207052e-05, + "loss": 1.0975, "step": 11980 }, { - "epoch": 0.3395108957465499, + "epoch": 0.46877689960090774, "grad_norm": 0.0, - "learning_rate": 1.5382732697985156e-05, - "loss": 0.9324, + "learning_rate": 1.1491645892372405e-05, + "loss": 1.1495, "step": 11981 }, { - "epoch": 0.33953923318881235, + "epoch": 0.4688160262931372, "grad_norm": 0.0, - "learning_rate": 1.538195918972273e-05, - "loss": 0.9796, + "learning_rate": 1.1490392826583794e-05, + "loss": 1.0168, "step": 11982 }, { - "epoch": 0.33956757063107484, + "epoch": 0.4688551529853666, "grad_norm": 0.0, - "learning_rate": 1.5381185636126067e-05, - "loss": 1.0142, + "learning_rate": 1.1489139736861344e-05, + "loss": 1.0421, "step": 11983 }, { - "epoch": 0.3395959080733373, + "epoch": 0.46889427967759606, "grad_norm": 0.0, - "learning_rate": 1.5380412037201672e-05, - "loss": 0.9851, + "learning_rate": 1.1487886623225174e-05, + "loss": 1.1392, "step": 11984 }, { - "epoch": 0.3396242455155998, + "epoch": 0.4689334063698255, "grad_norm": 0.0, - "learning_rate": 1.5379638392956072e-05, - "loss": 0.9346, + "learning_rate": 1.148663348569541e-05, + "loss": 1.1541, "step": 11985 }, { - "epoch": 0.3396525829578622, + "epoch": 0.46897253306205494, "grad_norm": 0.0, - "learning_rate": 1.5378864703395784e-05, - "loss": 0.9675, + "learning_rate": 1.1485380324292175e-05, + "loss": 1.1152, "step": 11986 }, { - "epoch": 0.3396809204001247, + "epoch": 0.4690116597542844, "grad_norm": 0.0, - "learning_rate": 1.5378090968527318e-05, - "loss": 0.9156, + "learning_rate": 1.1484127139035594e-05, + "loss": 1.1326, "step": 11987 }, { - "epoch": 0.33970925784238715, + "epoch": 0.4690507864465138, "grad_norm": 0.0, - "learning_rate": 1.5377317188357196e-05, - "loss": 0.8882, + "learning_rate": 1.1482873929945793e-05, + "loss": 1.1362, "step": 11988 }, { - "epoch": 0.3397375952846496, + "epoch": 0.46908991313874326, "grad_norm": 0.0, - "learning_rate": 1.5376543362891932e-05, - "loss": 0.928, + "learning_rate": 1.148162069704289e-05, + "loss": 1.0964, "step": 11989 }, { - "epoch": 0.3397659327269121, + "epoch": 0.4691290398309727, "grad_norm": 0.0, - "learning_rate": 1.537576949213805e-05, - "loss": 0.8877, + "learning_rate": 1.1480367440347017e-05, + "loss": 1.1969, "step": 11990 }, { - "epoch": 0.3397942701691745, + "epoch": 0.46916816652320215, "grad_norm": 0.0, - "learning_rate": 1.537499557610206e-05, - "loss": 0.9261, + "learning_rate": 1.1479114159878297e-05, + "loss": 1.0932, "step": 11991 }, { - "epoch": 0.339822607611437, + "epoch": 0.4692072932154316, "grad_norm": 0.0, - "learning_rate": 1.5374221614790493e-05, - "loss": 0.9107, + "learning_rate": 1.1477860855656862e-05, + "loss": 0.9514, "step": 11992 }, { - "epoch": 0.33985094505369945, + "epoch": 0.469246419907661, "grad_norm": 0.0, - "learning_rate": 1.5373447608209856e-05, - "loss": 1.0055, + "learning_rate": 1.1476607527702828e-05, + "loss": 0.9854, "step": 11993 }, { - "epoch": 0.3398792824959619, + "epoch": 0.46928554659989047, "grad_norm": 0.0, - "learning_rate": 1.537267355636668e-05, - "loss": 0.9076, + "learning_rate": 1.147535417603633e-05, + "loss": 1.2301, "step": 11994 }, { - "epoch": 0.3399076199382244, + "epoch": 0.4693246732921199, "grad_norm": 0.0, - "learning_rate": 1.5371899459267473e-05, - "loss": 0.9294, + "learning_rate": 1.147410080067749e-05, + "loss": 1.2125, "step": 11995 }, { - "epoch": 0.3399359573804868, + "epoch": 0.46936379998434935, "grad_norm": 0.0, - "learning_rate": 1.5371125316918767e-05, - "loss": 0.9466, + "learning_rate": 1.147284740164644e-05, + "loss": 1.0749, "step": 11996 }, { - "epoch": 0.3399642948227493, + "epoch": 0.46940292667657874, "grad_norm": 0.0, - "learning_rate": 1.5370351129327074e-05, - "loss": 0.9188, + "learning_rate": 1.1471593978963309e-05, + "loss": 1.0626, "step": 11997 }, { - "epoch": 0.33999263226501175, + "epoch": 0.4694420533688082, "grad_norm": 0.0, - "learning_rate": 1.536957689649892e-05, - "loss": 0.9271, + "learning_rate": 1.147034053264822e-05, + "loss": 1.0165, "step": 11998 }, { - "epoch": 0.34002096970727425, + "epoch": 0.4694811800610376, "grad_norm": 0.0, - "learning_rate": 1.536880261844083e-05, - "loss": 0.9647, + "learning_rate": 1.1469087062721305e-05, + "loss": 1.143, "step": 11999 }, { - "epoch": 0.3400493071495367, + "epoch": 0.46952030675326706, "grad_norm": 0.0, - "learning_rate": 1.536802829515932e-05, - "loss": 0.9196, + "learning_rate": 1.146783356920269e-05, + "loss": 1.0022, "step": 12000 }, { - "epoch": 0.3400776445917991, + "epoch": 0.4695594334454965, "grad_norm": 0.0, - "learning_rate": 1.5367253926660915e-05, - "loss": 0.9431, + "learning_rate": 1.1466580052112511e-05, + "loss": 1.0602, "step": 12001 }, { - "epoch": 0.3401059820340616, + "epoch": 0.46959856013772594, "grad_norm": 0.0, - "learning_rate": 1.5366479512952133e-05, - "loss": 0.9869, + "learning_rate": 1.146532651147089e-05, + "loss": 1.0598, "step": 12002 }, { - "epoch": 0.34013431947632405, + "epoch": 0.4696376868299554, "grad_norm": 0.0, - "learning_rate": 1.5365705054039504e-05, - "loss": 0.9421, + "learning_rate": 1.1464072947297966e-05, + "loss": 0.9972, "step": 12003 }, { - "epoch": 0.34016265691858655, + "epoch": 0.4696768135221848, "grad_norm": 0.0, - "learning_rate": 1.536493054992955e-05, - "loss": 0.9069, + "learning_rate": 1.1462819359613862e-05, + "loss": 0.8667, "step": 12004 }, { - "epoch": 0.340190994360849, + "epoch": 0.46971594021441426, "grad_norm": 0.0, - "learning_rate": 1.536415600062879e-05, - "loss": 0.9473, + "learning_rate": 1.1461565748438715e-05, + "loss": 1.0737, "step": 12005 }, { - "epoch": 0.3402193318031114, + "epoch": 0.4697550669066437, "grad_norm": 0.0, - "learning_rate": 1.5363381406143754e-05, - "loss": 0.9743, + "learning_rate": 1.1460312113792651e-05, + "loss": 1.1503, "step": 12006 }, { - "epoch": 0.3402476692453739, + "epoch": 0.46979419359887314, "grad_norm": 0.0, - "learning_rate": 1.536260676648097e-05, - "loss": 0.8763, + "learning_rate": 1.1459058455695804e-05, + "loss": 1.015, "step": 12007 }, { - "epoch": 0.34027600668763636, + "epoch": 0.4698333202911026, "grad_norm": 0.0, - "learning_rate": 1.536183208164695e-05, - "loss": 0.9168, + "learning_rate": 1.1457804774168308e-05, + "loss": 1.0185, "step": 12008 }, { - "epoch": 0.34030434412989885, + "epoch": 0.469872446983332, "grad_norm": 0.0, - "learning_rate": 1.536105735164823e-05, - "loss": 0.8844, + "learning_rate": 1.1456551069230296e-05, + "loss": 1.1205, "step": 12009 }, { - "epoch": 0.3403326815721613, + "epoch": 0.46991157367556147, "grad_norm": 0.0, - "learning_rate": 1.5360282576491332e-05, - "loss": 0.9901, + "learning_rate": 1.1455297340901895e-05, + "loss": 0.9765, "step": 12010 }, { - "epoch": 0.3403610190144238, + "epoch": 0.4699507003677909, "grad_norm": 0.0, - "learning_rate": 1.5359507756182785e-05, - "loss": 1.056, + "learning_rate": 1.1454043589203243e-05, + "loss": 1.1632, "step": 12011 }, { - "epoch": 0.3403893564566862, + "epoch": 0.46998982706002035, "grad_norm": 0.0, - "learning_rate": 1.5358732890729117e-05, - "loss": 0.9656, + "learning_rate": 1.1452789814154475e-05, + "loss": 1.0315, "step": 12012 }, { - "epoch": 0.34041769389894866, + "epoch": 0.4700289537522498, "grad_norm": 0.0, - "learning_rate": 1.535795798013685e-05, - "loss": 1.0526, + "learning_rate": 1.1451536015775722e-05, + "loss": 1.0966, "step": 12013 }, { - "epoch": 0.34044603134121115, + "epoch": 0.47006808044447923, "grad_norm": 0.0, - "learning_rate": 1.535718302441251e-05, - "loss": 0.9281, + "learning_rate": 1.1450282194087119e-05, + "loss": 1.153, "step": 12014 }, { - "epoch": 0.3404743687834736, + "epoch": 0.47010720713670867, "grad_norm": 0.0, - "learning_rate": 1.5356408023562626e-05, - "loss": 1.0091, + "learning_rate": 1.1449028349108802e-05, + "loss": 0.987, "step": 12015 }, { - "epoch": 0.3405027062257361, + "epoch": 0.4701463338289381, "grad_norm": 0.0, - "learning_rate": 1.5355632977593735e-05, - "loss": 1.0681, + "learning_rate": 1.1447774480860905e-05, + "loss": 1.166, "step": 12016 }, { - "epoch": 0.3405310436679985, + "epoch": 0.47018546052116755, "grad_norm": 0.0, - "learning_rate": 1.5354857886512357e-05, - "loss": 0.9647, + "learning_rate": 1.1446520589363566e-05, + "loss": 0.9974, "step": 12017 }, { - "epoch": 0.34055938111026096, + "epoch": 0.470224587213397, "grad_norm": 0.0, - "learning_rate": 1.535408275032502e-05, - "loss": 0.9907, + "learning_rate": 1.1445266674636915e-05, + "loss": 1.0164, "step": 12018 }, { - "epoch": 0.34058771855252346, + "epoch": 0.47026371390562643, "grad_norm": 0.0, - "learning_rate": 1.5353307569038255e-05, - "loss": 0.8769, + "learning_rate": 1.1444012736701096e-05, + "loss": 1.1218, "step": 12019 }, { - "epoch": 0.3406160559947859, + "epoch": 0.4703028405978559, "grad_norm": 0.0, - "learning_rate": 1.5352532342658597e-05, - "loss": 0.8617, + "learning_rate": 1.1442758775576237e-05, + "loss": 1.1526, "step": 12020 }, { - "epoch": 0.3406443934370484, + "epoch": 0.4703419672900853, "grad_norm": 0.0, - "learning_rate": 1.5351757071192574e-05, - "loss": 0.9357, + "learning_rate": 1.1441504791282485e-05, + "loss": 1.1278, "step": 12021 }, { - "epoch": 0.3406727308793108, + "epoch": 0.47038109398231476, "grad_norm": 0.0, - "learning_rate": 1.5350981754646705e-05, - "loss": 0.8906, + "learning_rate": 1.1440250783839967e-05, + "loss": 0.873, "step": 12022 }, { - "epoch": 0.3407010683215733, + "epoch": 0.4704202206745442, "grad_norm": 0.0, - "learning_rate": 1.5350206393027533e-05, - "loss": 0.9802, + "learning_rate": 1.1438996753268832e-05, + "loss": 1.102, "step": 12023 }, { - "epoch": 0.34072940576383576, + "epoch": 0.47045934736677364, "grad_norm": 0.0, - "learning_rate": 1.5349430986341588e-05, - "loss": 0.9296, + "learning_rate": 1.143774269958921e-05, + "loss": 1.0759, "step": 12024 }, { - "epoch": 0.3407577432060982, + "epoch": 0.470498474059003, "grad_norm": 0.0, - "learning_rate": 1.5348655534595396e-05, - "loss": 0.9074, + "learning_rate": 1.1436488622821243e-05, + "loss": 1.1271, "step": 12025 }, { - "epoch": 0.3407860806483607, + "epoch": 0.47053760075123247, "grad_norm": 0.0, - "learning_rate": 1.5347880037795496e-05, - "loss": 0.9708, + "learning_rate": 1.1435234522985065e-05, + "loss": 1.1082, "step": 12026 }, { - "epoch": 0.34081441809062313, + "epoch": 0.4705767274434619, "grad_norm": 0.0, - "learning_rate": 1.5347104495948414e-05, - "loss": 0.914, + "learning_rate": 1.1433980400100822e-05, + "loss": 1.0537, "step": 12027 }, { - "epoch": 0.3408427555328856, + "epoch": 0.47061585413569135, "grad_norm": 0.0, - "learning_rate": 1.5346328909060688e-05, - "loss": 0.988, + "learning_rate": 1.143272625418865e-05, + "loss": 1.082, "step": 12028 }, { - "epoch": 0.34087109297514806, + "epoch": 0.4706549808279208, "grad_norm": 0.0, - "learning_rate": 1.5345553277138846e-05, - "loss": 0.9425, + "learning_rate": 1.1431472085268688e-05, + "loss": 0.9001, "step": 12029 }, { - "epoch": 0.3408994304174105, + "epoch": 0.47069410752015023, "grad_norm": 0.0, - "learning_rate": 1.5344777600189423e-05, - "loss": 0.8237, + "learning_rate": 1.1430217893361082e-05, + "loss": 1.0369, "step": 12030 }, { - "epoch": 0.340927767859673, + "epoch": 0.47073323421237967, "grad_norm": 0.0, - "learning_rate": 1.5344001878218952e-05, - "loss": 1.0064, + "learning_rate": 1.1428963678485965e-05, + "loss": 1.0162, "step": 12031 }, { - "epoch": 0.34095610530193543, + "epoch": 0.4707723609046091, "grad_norm": 0.0, - "learning_rate": 1.5343226111233973e-05, - "loss": 0.8926, + "learning_rate": 1.1427709440663484e-05, + "loss": 1.0898, "step": 12032 }, { - "epoch": 0.3409844427441979, + "epoch": 0.47081148759683855, "grad_norm": 0.0, - "learning_rate": 1.5342450299241013e-05, - "loss": 0.9707, + "learning_rate": 1.1426455179913777e-05, + "loss": 0.9701, "step": 12033 }, { - "epoch": 0.34101278018646036, + "epoch": 0.470850614289068, "grad_norm": 0.0, - "learning_rate": 1.5341674442246613e-05, - "loss": 1.0317, + "learning_rate": 1.142520089625699e-05, + "loss": 1.1682, "step": 12034 }, { - "epoch": 0.34104111762872286, + "epoch": 0.47088974098129743, "grad_norm": 0.0, - "learning_rate": 1.5340898540257304e-05, - "loss": 0.9613, + "learning_rate": 1.1423946589713259e-05, + "loss": 1.0872, "step": 12035 }, { - "epoch": 0.3410694550709853, + "epoch": 0.4709288676735269, "grad_norm": 0.0, - "learning_rate": 1.5340122593279626e-05, - "loss": 0.8747, + "learning_rate": 1.1422692260302733e-05, + "loss": 1.0182, "step": 12036 }, { - "epoch": 0.34109779251324773, + "epoch": 0.4709679943657563, "grad_norm": 0.0, - "learning_rate": 1.5339346601320107e-05, - "loss": 1.0494, + "learning_rate": 1.1421437908045552e-05, + "loss": 1.1517, "step": 12037 }, { - "epoch": 0.3411261299555102, + "epoch": 0.47100712105798576, "grad_norm": 0.0, - "learning_rate": 1.5338570564385294e-05, - "loss": 0.9862, + "learning_rate": 1.1420183532961855e-05, + "loss": 0.9595, "step": 12038 }, { - "epoch": 0.34115446739777266, + "epoch": 0.4710462477502152, "grad_norm": 0.0, - "learning_rate": 1.5337794482481714e-05, - "loss": 0.9216, + "learning_rate": 1.1418929135071795e-05, + "loss": 0.9382, "step": 12039 }, { - "epoch": 0.34118280484003516, + "epoch": 0.47108537444244464, "grad_norm": 0.0, - "learning_rate": 1.5337018355615912e-05, - "loss": 1.0015, + "learning_rate": 1.1417674714395509e-05, + "loss": 1.0349, "step": 12040 }, { - "epoch": 0.3412111422822976, + "epoch": 0.4711245011346741, "grad_norm": 0.0, - "learning_rate": 1.5336242183794425e-05, - "loss": 0.8511, + "learning_rate": 1.1416420270953144e-05, + "loss": 1.1581, "step": 12041 }, { - "epoch": 0.34123947972456004, + "epoch": 0.4711636278269035, "grad_norm": 0.0, - "learning_rate": 1.5335465967023787e-05, - "loss": 0.9896, + "learning_rate": 1.1415165804764842e-05, + "loss": 1.1264, "step": 12042 }, { - "epoch": 0.34126781716682253, + "epoch": 0.47120275451913296, "grad_norm": 0.0, - "learning_rate": 1.5334689705310533e-05, - "loss": 0.9535, + "learning_rate": 1.1413911315850754e-05, + "loss": 1.093, "step": 12043 }, { - "epoch": 0.34129615460908497, + "epoch": 0.4712418812113624, "grad_norm": 0.0, - "learning_rate": 1.533391339866121e-05, - "loss": 1.0372, + "learning_rate": 1.1412656804231019e-05, + "loss": 1.069, "step": 12044 }, { - "epoch": 0.34132449205134746, + "epoch": 0.47128100790359184, "grad_norm": 0.0, - "learning_rate": 1.5333137047082355e-05, - "loss": 0.9209, + "learning_rate": 1.141140226992579e-05, + "loss": 1.0798, "step": 12045 }, { - "epoch": 0.3413528294936099, + "epoch": 0.4713201345958213, "grad_norm": 0.0, - "learning_rate": 1.5332360650580507e-05, - "loss": 0.7138, + "learning_rate": 1.1410147712955206e-05, + "loss": 0.9701, "step": 12046 }, { - "epoch": 0.3413811669358724, + "epoch": 0.4713592612880507, "grad_norm": 0.0, - "learning_rate": 1.53315842091622e-05, - "loss": 0.9634, + "learning_rate": 1.1408893133339416e-05, + "loss": 1.0009, "step": 12047 }, { - "epoch": 0.34140950437813483, + "epoch": 0.47139838798028016, "grad_norm": 0.0, - "learning_rate": 1.5330807722833985e-05, - "loss": 0.8093, + "learning_rate": 1.140763853109857e-05, + "loss": 1.0406, "step": 12048 }, { - "epoch": 0.34143784182039727, + "epoch": 0.4714375146725096, "grad_norm": 0.0, - "learning_rate": 1.5330031191602395e-05, - "loss": 1.018, + "learning_rate": 1.1406383906252812e-05, + "loss": 1.046, "step": 12049 }, { - "epoch": 0.34146617926265976, + "epoch": 0.47147664136473905, "grad_norm": 0.0, - "learning_rate": 1.5329254615473974e-05, - "loss": 0.8753, + "learning_rate": 1.1405129258822292e-05, + "loss": 0.9676, "step": 12050 }, { - "epoch": 0.3414945167049222, + "epoch": 0.4715157680569685, "grad_norm": 0.0, - "learning_rate": 1.532847799445526e-05, - "loss": 0.9375, + "learning_rate": 1.1403874588827156e-05, + "loss": 1.0452, "step": 12051 }, { - "epoch": 0.3415228541471847, + "epoch": 0.47155489474919793, "grad_norm": 0.0, - "learning_rate": 1.5327701328552796e-05, - "loss": 0.9344, + "learning_rate": 1.1402619896287553e-05, + "loss": 1.1876, "step": 12052 }, { - "epoch": 0.34155119158944713, + "epoch": 0.47159402144142737, "grad_norm": 0.0, - "learning_rate": 1.532692461777313e-05, - "loss": 0.9635, + "learning_rate": 1.140136518122363e-05, + "loss": 1.0607, "step": 12053 }, { - "epoch": 0.3415795290317096, + "epoch": 0.47163314813365675, "grad_norm": 0.0, - "learning_rate": 1.5326147862122796e-05, - "loss": 0.9748, + "learning_rate": 1.1400110443655541e-05, + "loss": 1.1602, "step": 12054 }, { - "epoch": 0.34160786647397207, + "epoch": 0.4716722748258862, "grad_norm": 0.0, - "learning_rate": 1.532537106160834e-05, - "loss": 0.9008, + "learning_rate": 1.1398855683603431e-05, + "loss": 0.9715, "step": 12055 }, { - "epoch": 0.3416362039162345, + "epoch": 0.47171140151811564, "grad_norm": 0.0, - "learning_rate": 1.532459421623631e-05, - "loss": 0.9693, + "learning_rate": 1.1397600901087455e-05, + "loss": 1.0544, "step": 12056 }, { - "epoch": 0.341664541358497, + "epoch": 0.4717505282103451, "grad_norm": 0.0, - "learning_rate": 1.5323817326013244e-05, - "loss": 1.0106, + "learning_rate": 1.1396346096127755e-05, + "loss": 1.0035, "step": 12057 }, { - "epoch": 0.34169287880075944, + "epoch": 0.4717896549025745, "grad_norm": 0.0, - "learning_rate": 1.532304039094569e-05, - "loss": 0.9906, + "learning_rate": 1.1395091268744492e-05, + "loss": 1.1156, "step": 12058 }, { - "epoch": 0.34172121624302193, + "epoch": 0.47182878159480396, "grad_norm": 0.0, - "learning_rate": 1.5322263411040186e-05, - "loss": 1.0258, + "learning_rate": 1.1393836418957806e-05, + "loss": 1.0136, "step": 12059 }, { - "epoch": 0.34174955368528437, + "epoch": 0.4718679082870334, "grad_norm": 0.0, - "learning_rate": 1.532148638630328e-05, - "loss": 1.0378, + "learning_rate": 1.1392581546787855e-05, + "loss": 1.0626, "step": 12060 }, { - "epoch": 0.3417778911275468, + "epoch": 0.47190703497926284, "grad_norm": 0.0, - "learning_rate": 1.532070931674152e-05, - "loss": 0.9915, + "learning_rate": 1.1391326652254792e-05, + "loss": 0.9537, "step": 12061 }, { - "epoch": 0.3418062285698093, + "epoch": 0.4719461616714923, "grad_norm": 0.0, - "learning_rate": 1.5319932202361453e-05, - "loss": 0.9298, + "learning_rate": 1.1390071735378762e-05, + "loss": 1.0349, "step": 12062 }, { - "epoch": 0.34183456601207174, + "epoch": 0.4719852883637217, "grad_norm": 0.0, - "learning_rate": 1.5319155043169618e-05, - "loss": 1.0074, + "learning_rate": 1.1388816796179927e-05, + "loss": 1.1387, "step": 12063 }, { - "epoch": 0.34186290345433423, + "epoch": 0.47202441505595116, "grad_norm": 0.0, - "learning_rate": 1.5318377839172566e-05, - "loss": 1.0351, + "learning_rate": 1.138756183467843e-05, + "loss": 1.1138, "step": 12064 }, { - "epoch": 0.34189124089659667, + "epoch": 0.4720635417481806, "grad_norm": 0.0, - "learning_rate": 1.5317600590376842e-05, - "loss": 0.908, + "learning_rate": 1.1386306850894431e-05, + "loss": 1.0979, "step": 12065 }, { - "epoch": 0.3419195783388591, + "epoch": 0.47210266844041004, "grad_norm": 0.0, - "learning_rate": 1.5316823296788993e-05, - "loss": 0.9521, + "learning_rate": 1.1385051844848083e-05, + "loss": 1.0717, "step": 12066 }, { - "epoch": 0.3419479157811216, + "epoch": 0.4721417951326395, "grad_norm": 0.0, - "learning_rate": 1.531604595841557e-05, - "loss": 0.9876, + "learning_rate": 1.1383796816559534e-05, + "loss": 0.9366, "step": 12067 }, { - "epoch": 0.34197625322338404, + "epoch": 0.4721809218248689, "grad_norm": 0.0, - "learning_rate": 1.5315268575263115e-05, - "loss": 0.9053, + "learning_rate": 1.1382541766048944e-05, + "loss": 1.0554, "step": 12068 }, { - "epoch": 0.34200459066564654, + "epoch": 0.47222004851709837, "grad_norm": 0.0, - "learning_rate": 1.531449114733818e-05, - "loss": 0.903, + "learning_rate": 1.1381286693336465e-05, + "loss": 1.0383, "step": 12069 }, { - "epoch": 0.342032928107909, + "epoch": 0.4722591752093278, "grad_norm": 0.0, - "learning_rate": 1.5313713674647313e-05, - "loss": 0.8668, + "learning_rate": 1.1380031598442254e-05, + "loss": 0.9103, "step": 12070 }, { - "epoch": 0.34206126555017147, + "epoch": 0.47229830190155725, "grad_norm": 0.0, - "learning_rate": 1.531293615719706e-05, - "loss": 1.0119, + "learning_rate": 1.1378776481386462e-05, + "loss": 1.165, "step": 12071 }, { - "epoch": 0.3420896029924339, + "epoch": 0.4723374285937867, "grad_norm": 0.0, - "learning_rate": 1.5312158594993975e-05, - "loss": 0.9271, + "learning_rate": 1.137752134218925e-05, + "loss": 1.1365, "step": 12072 }, { - "epoch": 0.34211794043469634, + "epoch": 0.47237655528601613, "grad_norm": 0.0, - "learning_rate": 1.5311380988044606e-05, - "loss": 0.9405, + "learning_rate": 1.1376266180870769e-05, + "loss": 1.1129, "step": 12073 }, { - "epoch": 0.34214627787695884, + "epoch": 0.47241568197824557, "grad_norm": 0.0, - "learning_rate": 1.53106033363555e-05, - "loss": 0.9377, + "learning_rate": 1.1375010997451182e-05, + "loss": 0.9449, "step": 12074 }, { - "epoch": 0.3421746153192213, + "epoch": 0.472454808670475, "grad_norm": 0.0, - "learning_rate": 1.5309825639933214e-05, - "loss": 1.0141, + "learning_rate": 1.1373755791950638e-05, + "loss": 1.0768, "step": 12075 }, { - "epoch": 0.34220295276148377, + "epoch": 0.47249393536270445, "grad_norm": 0.0, - "learning_rate": 1.530904789878429e-05, - "loss": 1.0494, + "learning_rate": 1.1372500564389298e-05, + "loss": 0.8929, "step": 12076 }, { - "epoch": 0.3422312902037462, + "epoch": 0.4725330620549339, "grad_norm": 0.0, - "learning_rate": 1.5308270112915287e-05, - "loss": 1.0032, + "learning_rate": 1.1371245314787318e-05, + "loss": 1.0992, "step": 12077 }, { - "epoch": 0.34225962764600865, + "epoch": 0.47257218874716334, "grad_norm": 0.0, - "learning_rate": 1.5307492282332754e-05, - "loss": 0.8399, + "learning_rate": 1.1369990043164855e-05, + "loss": 0.999, "step": 12078 }, { - "epoch": 0.34228796508827114, + "epoch": 0.4726113154393928, "grad_norm": 0.0, - "learning_rate": 1.530671440704324e-05, - "loss": 0.9685, + "learning_rate": 1.1368734749542072e-05, + "loss": 1.1586, "step": 12079 }, { - "epoch": 0.3423163025305336, + "epoch": 0.4726504421316222, "grad_norm": 0.0, - "learning_rate": 1.5305936487053303e-05, - "loss": 0.9718, + "learning_rate": 1.1367479433939124e-05, + "loss": 1.053, "step": 12080 }, { - "epoch": 0.34234463997279607, + "epoch": 0.47268956882385166, "grad_norm": 0.0, - "learning_rate": 1.5305158522369493e-05, - "loss": 0.9827, + "learning_rate": 1.1366224096376168e-05, + "loss": 1.1448, "step": 12081 }, { - "epoch": 0.3423729774150585, + "epoch": 0.47272869551608104, "grad_norm": 0.0, - "learning_rate": 1.530438051299836e-05, - "loss": 0.9722, + "learning_rate": 1.1364968736873366e-05, + "loss": 1.0094, "step": 12082 }, { - "epoch": 0.342401314857321, + "epoch": 0.4727678222083105, "grad_norm": 0.0, - "learning_rate": 1.530360245894646e-05, - "loss": 0.9093, + "learning_rate": 1.1363713355450876e-05, + "loss": 1.0153, "step": 12083 }, { - "epoch": 0.34242965229958344, + "epoch": 0.4728069489005399, "grad_norm": 0.0, - "learning_rate": 1.5302824360220352e-05, - "loss": 1.0109, + "learning_rate": 1.136245795212886e-05, + "loss": 1.1046, "step": 12084 }, { - "epoch": 0.3424579897418459, + "epoch": 0.47284607559276937, "grad_norm": 0.0, - "learning_rate": 1.530204621682658e-05, - "loss": 1.0242, + "learning_rate": 1.1361202526927473e-05, + "loss": 1.0616, "step": 12085 }, { - "epoch": 0.3424863271841084, + "epoch": 0.4728852022849988, "grad_norm": 0.0, - "learning_rate": 1.5301268028771708e-05, - "loss": 0.8132, + "learning_rate": 1.1359947079866882e-05, + "loss": 0.9836, "step": 12086 }, { - "epoch": 0.3425146646263708, + "epoch": 0.47292432897722825, "grad_norm": 0.0, - "learning_rate": 1.5300489796062286e-05, - "loss": 0.9173, + "learning_rate": 1.1358691610967242e-05, + "loss": 1.1752, "step": 12087 }, { - "epoch": 0.3425430020686333, + "epoch": 0.4729634556694577, "grad_norm": 0.0, - "learning_rate": 1.5299711518704866e-05, - "loss": 0.8317, + "learning_rate": 1.1357436120248722e-05, + "loss": 0.9716, "step": 12088 }, { - "epoch": 0.34257133951089574, + "epoch": 0.47300258236168713, "grad_norm": 0.0, - "learning_rate": 1.529893319670601e-05, - "loss": 0.9525, + "learning_rate": 1.1356180607731476e-05, + "loss": 1.0239, "step": 12089 }, { - "epoch": 0.3425996769531582, + "epoch": 0.47304170905391657, "grad_norm": 0.0, - "learning_rate": 1.5298154830072274e-05, - "loss": 1.0149, + "learning_rate": 1.135492507343567e-05, + "loss": 1.0633, "step": 12090 }, { - "epoch": 0.3426280143954207, + "epoch": 0.473080835746146, "grad_norm": 0.0, - "learning_rate": 1.529737641881021e-05, - "loss": 0.9422, + "learning_rate": 1.1353669517381463e-05, + "loss": 1.0361, "step": 12091 }, { - "epoch": 0.3426563518376831, + "epoch": 0.47311996243837545, "grad_norm": 0.0, - "learning_rate": 1.5296597962926377e-05, - "loss": 0.9702, + "learning_rate": 1.1352413939589022e-05, + "loss": 1.1422, "step": 12092 }, { - "epoch": 0.3426846892799456, + "epoch": 0.4731590891306049, "grad_norm": 0.0, - "learning_rate": 1.5295819462427336e-05, - "loss": 0.997, + "learning_rate": 1.1351158340078505e-05, + "loss": 1.0468, "step": 12093 }, { - "epoch": 0.34271302672220805, + "epoch": 0.47319821582283433, "grad_norm": 0.0, - "learning_rate": 1.5295040917319637e-05, - "loss": 0.955, + "learning_rate": 1.1349902718870081e-05, + "loss": 1.1537, "step": 12094 }, { - "epoch": 0.34274136416447054, + "epoch": 0.4732373425150638, "grad_norm": 0.0, - "learning_rate": 1.5294262327609843e-05, - "loss": 0.9208, + "learning_rate": 1.1348647075983909e-05, + "loss": 1.0742, "step": 12095 }, { - "epoch": 0.342769701606733, + "epoch": 0.4732764692072932, "grad_norm": 0.0, - "learning_rate": 1.5293483693304513e-05, - "loss": 0.8558, + "learning_rate": 1.1347391411440157e-05, + "loss": 0.9488, "step": 12096 }, { - "epoch": 0.3427980390489954, + "epoch": 0.47331559589952266, "grad_norm": 0.0, - "learning_rate": 1.5292705014410206e-05, - "loss": 1.012, + "learning_rate": 1.1346135725258987e-05, + "loss": 1.064, "step": 12097 }, { - "epoch": 0.3428263764912579, + "epoch": 0.4733547225917521, "grad_norm": 0.0, - "learning_rate": 1.5291926290933476e-05, - "loss": 0.9215, + "learning_rate": 1.1344880017460565e-05, + "loss": 1.0555, "step": 12098 }, { - "epoch": 0.34285471393352035, + "epoch": 0.47339384928398154, "grad_norm": 0.0, - "learning_rate": 1.5291147522880887e-05, - "loss": 0.8224, + "learning_rate": 1.134362428806505e-05, + "loss": 1.0256, "step": 12099 }, { - "epoch": 0.34288305137578284, + "epoch": 0.473432975976211, "grad_norm": 0.0, - "learning_rate": 1.5290368710258998e-05, - "loss": 0.8995, + "learning_rate": 1.1342368537092617e-05, + "loss": 1.1142, "step": 12100 }, { - "epoch": 0.3429113888180453, + "epoch": 0.4734721026684404, "grad_norm": 0.0, - "learning_rate": 1.5289589853074366e-05, - "loss": 1.0267, + "learning_rate": 1.1341112764563428e-05, + "loss": 0.9308, "step": 12101 }, { - "epoch": 0.3429397262603077, + "epoch": 0.47351122936066986, "grad_norm": 0.0, - "learning_rate": 1.5288810951333558e-05, - "loss": 1.0373, + "learning_rate": 1.1339856970497645e-05, + "loss": 1.2094, "step": 12102 }, { - "epoch": 0.3429680637025702, + "epoch": 0.4735503560528993, "grad_norm": 0.0, - "learning_rate": 1.528803200504313e-05, - "loss": 0.938, + "learning_rate": 1.1338601154915441e-05, + "loss": 0.9163, "step": 12103 }, { - "epoch": 0.34299640114483265, + "epoch": 0.47358948274512874, "grad_norm": 0.0, - "learning_rate": 1.5287253014209645e-05, - "loss": 0.9383, + "learning_rate": 1.133734531783698e-05, + "loss": 1.0915, "step": 12104 }, { - "epoch": 0.34302473858709515, + "epoch": 0.4736286094373582, "grad_norm": 0.0, - "learning_rate": 1.5286473978839662e-05, - "loss": 0.9743, + "learning_rate": 1.1336089459282426e-05, + "loss": 1.0325, "step": 12105 }, { - "epoch": 0.3430530760293576, + "epoch": 0.4736677361295876, "grad_norm": 0.0, - "learning_rate": 1.5285694898939748e-05, - "loss": 0.9546, + "learning_rate": 1.133483357927195e-05, + "loss": 0.9743, "step": 12106 }, { - "epoch": 0.3430814134716201, + "epoch": 0.47370686282181707, "grad_norm": 0.0, - "learning_rate": 1.5284915774516465e-05, - "loss": 1.081, + "learning_rate": 1.133357767782572e-05, + "loss": 0.9971, "step": 12107 }, { - "epoch": 0.3431097509138825, + "epoch": 0.4737459895140465, "grad_norm": 0.0, - "learning_rate": 1.5284136605576373e-05, - "loss": 0.92, + "learning_rate": 1.1332321754963907e-05, + "loss": 0.975, "step": 12108 }, { - "epoch": 0.34313808835614495, + "epoch": 0.47378511620627595, "grad_norm": 0.0, - "learning_rate": 1.528335739212603e-05, - "loss": 0.9151, + "learning_rate": 1.133106581070667e-05, + "loss": 0.9757, "step": 12109 }, { - "epoch": 0.34316642579840745, + "epoch": 0.4738242428985054, "grad_norm": 0.0, - "learning_rate": 1.5282578134172013e-05, - "loss": 0.9735, + "learning_rate": 1.132980984507419e-05, + "loss": 1.0609, "step": 12110 }, { - "epoch": 0.3431947632406699, + "epoch": 0.4738633695907348, "grad_norm": 0.0, - "learning_rate": 1.5281798831720876e-05, - "loss": 0.8153, + "learning_rate": 1.1328553858086624e-05, + "loss": 1.1141, "step": 12111 }, { - "epoch": 0.3432231006829324, + "epoch": 0.4739024962829642, "grad_norm": 0.0, - "learning_rate": 1.528101948477919e-05, - "loss": 1.0574, + "learning_rate": 1.1327297849764153e-05, + "loss": 1.011, "step": 12112 }, { - "epoch": 0.3432514381251948, + "epoch": 0.47394162297519365, "grad_norm": 0.0, - "learning_rate": 1.528024009335351e-05, - "loss": 1.0615, + "learning_rate": 1.1326041820126939e-05, + "loss": 1.1588, "step": 12113 }, { - "epoch": 0.34327977556745726, + "epoch": 0.4739807496674231, "grad_norm": 0.0, - "learning_rate": 1.5279460657450408e-05, - "loss": 0.9298, + "learning_rate": 1.1324785769195157e-05, + "loss": 1.0674, "step": 12114 }, { - "epoch": 0.34330811300971975, + "epoch": 0.47401987635965254, "grad_norm": 0.0, - "learning_rate": 1.527868117707645e-05, - "loss": 1.1375, + "learning_rate": 1.1323529696988976e-05, + "loss": 1.0148, "step": 12115 }, { - "epoch": 0.3433364504519822, + "epoch": 0.474059003051882, "grad_norm": 0.0, - "learning_rate": 1.52779016522382e-05, - "loss": 0.9478, + "learning_rate": 1.1322273603528562e-05, + "loss": 1.0102, "step": 12116 }, { - "epoch": 0.3433647878942447, + "epoch": 0.4740981297441114, "grad_norm": 0.0, - "learning_rate": 1.5277122082942225e-05, - "loss": 0.9981, + "learning_rate": 1.1321017488834097e-05, + "loss": 1.0583, "step": 12117 }, { - "epoch": 0.3433931253365071, + "epoch": 0.47413725643634086, "grad_norm": 0.0, - "learning_rate": 1.527634246919509e-05, - "loss": 0.9915, + "learning_rate": 1.131976135292574e-05, + "loss": 1.1145, "step": 12118 }, { - "epoch": 0.3434214627787696, + "epoch": 0.4741763831285703, "grad_norm": 0.0, - "learning_rate": 1.5275562811003363e-05, - "loss": 0.8952, + "learning_rate": 1.1318505195823674e-05, + "loss": 1.0579, "step": 12119 }, { - "epoch": 0.34344980022103205, + "epoch": 0.47421550982079974, "grad_norm": 0.0, - "learning_rate": 1.5274783108373612e-05, - "loss": 0.8108, + "learning_rate": 1.1317249017548064e-05, + "loss": 1.0096, "step": 12120 }, { - "epoch": 0.3434781376632945, + "epoch": 0.4742546365130292, "grad_norm": 0.0, - "learning_rate": 1.5274003361312405e-05, - "loss": 0.8698, + "learning_rate": 1.1315992818119087e-05, + "loss": 1.0582, "step": 12121 }, { - "epoch": 0.343506475105557, + "epoch": 0.4742937632052586, "grad_norm": 0.0, - "learning_rate": 1.5273223569826305e-05, - "loss": 1.0227, + "learning_rate": 1.1314736597556915e-05, + "loss": 1.0293, "step": 12122 }, { - "epoch": 0.3435348125478194, + "epoch": 0.47433288989748806, "grad_norm": 0.0, - "learning_rate": 1.527244373392189e-05, - "loss": 0.9138, + "learning_rate": 1.131348035588172e-05, + "loss": 1.0455, "step": 12123 }, { - "epoch": 0.3435631499900819, + "epoch": 0.4743720165897175, "grad_norm": 0.0, - "learning_rate": 1.5271663853605723e-05, - "loss": 0.9897, + "learning_rate": 1.1312224093113677e-05, + "loss": 1.1715, "step": 12124 }, { - "epoch": 0.34359148743234436, + "epoch": 0.47441114328194695, "grad_norm": 0.0, - "learning_rate": 1.5270883928884373e-05, - "loss": 0.9654, + "learning_rate": 1.1310967809272957e-05, + "loss": 0.9001, "step": 12125 }, { - "epoch": 0.3436198248746068, + "epoch": 0.4744502699741764, "grad_norm": 0.0, - "learning_rate": 1.527010395976441e-05, - "loss": 0.9125, + "learning_rate": 1.1309711504379739e-05, + "loss": 1.0969, "step": 12126 }, { - "epoch": 0.3436481623168693, + "epoch": 0.4744893966664058, "grad_norm": 0.0, - "learning_rate": 1.52693239462524e-05, - "loss": 0.9295, + "learning_rate": 1.1308455178454194e-05, + "loss": 1.0397, "step": 12127 }, { - "epoch": 0.3436764997591317, + "epoch": 0.47452852335863527, "grad_norm": 0.0, - "learning_rate": 1.5268543888354923e-05, - "loss": 1.0061, + "learning_rate": 1.1307198831516502e-05, + "loss": 0.9707, "step": 12128 }, { - "epoch": 0.3437048372013942, + "epoch": 0.4745676500508647, "grad_norm": 0.0, - "learning_rate": 1.5267763786078544e-05, - "loss": 0.9731, + "learning_rate": 1.130594246358683e-05, + "loss": 1.0457, "step": 12129 }, { - "epoch": 0.34373317464365666, + "epoch": 0.47460677674309415, "grad_norm": 0.0, - "learning_rate": 1.5266983639429832e-05, - "loss": 0.9299, + "learning_rate": 1.130468607468536e-05, + "loss": 0.9303, "step": 12130 }, { - "epoch": 0.34376151208591915, + "epoch": 0.4746459034353236, "grad_norm": 0.0, - "learning_rate": 1.526620344841536e-05, - "loss": 0.9172, + "learning_rate": 1.1303429664832266e-05, + "loss": 1.0688, "step": 12131 }, { - "epoch": 0.3437898495281816, + "epoch": 0.47468503012755303, "grad_norm": 0.0, - "learning_rate": 1.526542321304171e-05, - "loss": 0.9887, + "learning_rate": 1.1302173234047727e-05, + "loss": 1.0002, "step": 12132 }, { - "epoch": 0.34381818697044403, + "epoch": 0.4747241568197825, "grad_norm": 0.0, - "learning_rate": 1.5264642933315438e-05, - "loss": 0.9185, + "learning_rate": 1.1300916782351918e-05, + "loss": 1.0148, "step": 12133 }, { - "epoch": 0.3438465244127065, + "epoch": 0.4747632835120119, "grad_norm": 0.0, - "learning_rate": 1.5263862609243122e-05, - "loss": 0.9207, + "learning_rate": 1.1299660309765013e-05, + "loss": 1.082, "step": 12134 }, { - "epoch": 0.34387486185496896, + "epoch": 0.47480241020424135, "grad_norm": 0.0, - "learning_rate": 1.526308224083134e-05, - "loss": 1.0029, + "learning_rate": 1.1298403816307193e-05, + "loss": 0.8997, "step": 12135 }, { - "epoch": 0.34390319929723145, + "epoch": 0.4748415368964708, "grad_norm": 0.0, - "learning_rate": 1.5262301828086657e-05, - "loss": 1.0336, + "learning_rate": 1.1297147301998633e-05, + "loss": 1.058, "step": 12136 }, { - "epoch": 0.3439315367394939, + "epoch": 0.47488066358870024, "grad_norm": 0.0, - "learning_rate": 1.5261521371015657e-05, - "loss": 1.0411, + "learning_rate": 1.1295890766859516e-05, + "loss": 1.0779, "step": 12137 }, { - "epoch": 0.34395987418175633, + "epoch": 0.4749197902809297, "grad_norm": 0.0, - "learning_rate": 1.5260740869624906e-05, - "loss": 0.9642, + "learning_rate": 1.1294634210910015e-05, + "loss": 1.1821, "step": 12138 }, { - "epoch": 0.3439882116240188, + "epoch": 0.47495891697315906, "grad_norm": 0.0, - "learning_rate": 1.525996032392098e-05, - "loss": 0.9396, + "learning_rate": 1.1293377634170311e-05, + "loss": 1.1506, "step": 12139 }, { - "epoch": 0.34401654906628126, + "epoch": 0.4749980436653885, "grad_norm": 0.0, - "learning_rate": 1.5259179733910458e-05, - "loss": 0.8483, + "learning_rate": 1.1292121036660584e-05, + "loss": 1.2045, "step": 12140 }, { - "epoch": 0.34404488650854376, + "epoch": 0.47503717035761794, "grad_norm": 0.0, - "learning_rate": 1.5258399099599909e-05, - "loss": 0.8194, + "learning_rate": 1.129086441840101e-05, + "loss": 1.0454, "step": 12141 }, { - "epoch": 0.3440732239508062, + "epoch": 0.4750762970498474, "grad_norm": 0.0, - "learning_rate": 1.5257618420995917e-05, - "loss": 0.989, + "learning_rate": 1.1289607779411775e-05, + "loss": 1.0414, "step": 12142 }, { - "epoch": 0.3441015613930687, + "epoch": 0.4751154237420768, "grad_norm": 0.0, - "learning_rate": 1.5256837698105047e-05, - "loss": 0.9375, + "learning_rate": 1.1288351119713053e-05, + "loss": 1.043, "step": 12143 }, { - "epoch": 0.3441298988353311, + "epoch": 0.47515455043430627, "grad_norm": 0.0, - "learning_rate": 1.5256056930933884e-05, - "loss": 0.8928, + "learning_rate": 1.1287094439325027e-05, + "loss": 0.9676, "step": 12144 }, { - "epoch": 0.34415823627759357, + "epoch": 0.4751936771265357, "grad_norm": 0.0, - "learning_rate": 1.5255276119489004e-05, - "loss": 0.991, + "learning_rate": 1.1285837738267875e-05, + "loss": 1.0781, "step": 12145 }, { - "epoch": 0.34418657371985606, + "epoch": 0.47523280381876515, "grad_norm": 0.0, - "learning_rate": 1.5254495263776979e-05, - "loss": 1.0661, + "learning_rate": 1.1284581016561781e-05, + "loss": 1.1553, "step": 12146 }, { - "epoch": 0.3442149111621185, + "epoch": 0.4752719305109946, "grad_norm": 0.0, - "learning_rate": 1.5253714363804389e-05, - "loss": 0.9463, + "learning_rate": 1.1283324274226925e-05, + "loss": 0.9576, "step": 12147 }, { - "epoch": 0.344243248604381, + "epoch": 0.47531105720322403, "grad_norm": 0.0, - "learning_rate": 1.5252933419577809e-05, - "loss": 0.9661, + "learning_rate": 1.1282067511283491e-05, + "loss": 1.0867, "step": 12148 }, { - "epoch": 0.34427158604664343, + "epoch": 0.47535018389545347, "grad_norm": 0.0, - "learning_rate": 1.5252152431103824e-05, - "loss": 0.9273, + "learning_rate": 1.1280810727751658e-05, + "loss": 1.1192, "step": 12149 }, { - "epoch": 0.34429992348890587, + "epoch": 0.4753893105876829, "grad_norm": 0.0, - "learning_rate": 1.5251371398389008e-05, - "loss": 0.983, + "learning_rate": 1.1279553923651612e-05, + "loss": 1.0956, "step": 12150 }, { - "epoch": 0.34432826093116836, + "epoch": 0.47542843727991235, "grad_norm": 0.0, - "learning_rate": 1.525059032143994e-05, - "loss": 0.8575, + "learning_rate": 1.1278297099003529e-05, + "loss": 1.0156, "step": 12151 }, { - "epoch": 0.3443565983734308, + "epoch": 0.4754675639721418, "grad_norm": 0.0, - "learning_rate": 1.5249809200263199e-05, - "loss": 0.949, + "learning_rate": 1.12770402538276e-05, + "loss": 1.1134, "step": 12152 }, { - "epoch": 0.3443849358156933, + "epoch": 0.47550669066437123, "grad_norm": 0.0, - "learning_rate": 1.5249028034865368e-05, - "loss": 0.9066, + "learning_rate": 1.1275783388144003e-05, + "loss": 1.0568, "step": 12153 }, { - "epoch": 0.34441327325795573, + "epoch": 0.4755458173566007, "grad_norm": 0.0, - "learning_rate": 1.5248246825253023e-05, - "loss": 0.8061, + "learning_rate": 1.1274526501972924e-05, + "loss": 1.0957, "step": 12154 }, { - "epoch": 0.3444416107002182, + "epoch": 0.4755849440488301, "grad_norm": 0.0, - "learning_rate": 1.5247465571432746e-05, - "loss": 0.9196, + "learning_rate": 1.1273269595334547e-05, + "loss": 0.9365, "step": 12155 }, { - "epoch": 0.34446994814248066, + "epoch": 0.47562407074105956, "grad_norm": 0.0, - "learning_rate": 1.5246684273411121e-05, - "loss": 0.9628, + "learning_rate": 1.1272012668249055e-05, + "loss": 1.1476, "step": 12156 }, { - "epoch": 0.3444982855847431, + "epoch": 0.475663197433289, "grad_norm": 0.0, - "learning_rate": 1.5245902931194723e-05, - "loss": 1.0278, + "learning_rate": 1.1270755720736632e-05, + "loss": 1.0664, "step": 12157 }, { - "epoch": 0.3445266230270056, + "epoch": 0.47570232412551844, "grad_norm": 0.0, - "learning_rate": 1.5245121544790138e-05, - "loss": 0.8242, + "learning_rate": 1.1269498752817466e-05, + "loss": 0.9466, "step": 12158 }, { - "epoch": 0.34455496046926803, + "epoch": 0.4757414508177479, "grad_norm": 0.0, - "learning_rate": 1.5244340114203946e-05, - "loss": 0.9541, + "learning_rate": 1.1268241764511742e-05, + "loss": 1.0349, "step": 12159 }, { - "epoch": 0.34458329791153053, + "epoch": 0.4757805775099773, "grad_norm": 0.0, - "learning_rate": 1.5243558639442728e-05, - "loss": 0.9571, + "learning_rate": 1.1266984755839642e-05, + "loss": 1.0578, "step": 12160 }, { - "epoch": 0.34461163535379297, + "epoch": 0.47581970420220676, "grad_norm": 0.0, - "learning_rate": 1.524277712051307e-05, - "loss": 0.9054, + "learning_rate": 1.1265727726821356e-05, + "loss": 0.939, "step": 12161 }, { - "epoch": 0.3446399727960554, + "epoch": 0.4758588308944362, "grad_norm": 0.0, - "learning_rate": 1.5241995557421555e-05, - "loss": 0.9722, + "learning_rate": 1.126447067747707e-05, + "loss": 1.0845, "step": 12162 }, { - "epoch": 0.3446683102383179, + "epoch": 0.47589795758666564, "grad_norm": 0.0, - "learning_rate": 1.5241213950174763e-05, - "loss": 0.8884, + "learning_rate": 1.1263213607826968e-05, + "loss": 0.9935, "step": 12163 }, { - "epoch": 0.34469664768058034, + "epoch": 0.4759370842788951, "grad_norm": 0.0, - "learning_rate": 1.5240432298779281e-05, - "loss": 0.8615, + "learning_rate": 1.126195651789124e-05, + "loss": 0.993, "step": 12164 }, { - "epoch": 0.34472498512284283, + "epoch": 0.4759762109711245, "grad_norm": 0.0, - "learning_rate": 1.5239650603241692e-05, - "loss": 0.9966, + "learning_rate": 1.1260699407690067e-05, + "loss": 0.8794, "step": 12165 }, { - "epoch": 0.34475332256510527, + "epoch": 0.47601533766335397, "grad_norm": 0.0, - "learning_rate": 1.5238868863568577e-05, - "loss": 0.9196, + "learning_rate": 1.1259442277243646e-05, + "loss": 1.1574, "step": 12166 }, { - "epoch": 0.3447816600073677, + "epoch": 0.4760544643555834, "grad_norm": 0.0, - "learning_rate": 1.5238087079766524e-05, - "loss": 0.8968, + "learning_rate": 1.1258185126572156e-05, + "loss": 1.1661, "step": 12167 }, { - "epoch": 0.3448099974496302, + "epoch": 0.4760935910478128, "grad_norm": 0.0, - "learning_rate": 1.5237305251842122e-05, - "loss": 0.8035, + "learning_rate": 1.1256927955695793e-05, + "loss": 1.2537, "step": 12168 }, { - "epoch": 0.34483833489189264, + "epoch": 0.47613271774004223, "grad_norm": 0.0, - "learning_rate": 1.5236523379801954e-05, - "loss": 0.8547, + "learning_rate": 1.125567076463474e-05, + "loss": 0.8852, "step": 12169 }, { - "epoch": 0.34486667233415513, + "epoch": 0.4761718444322717, "grad_norm": 0.0, - "learning_rate": 1.5235741463652602e-05, - "loss": 0.9056, + "learning_rate": 1.1254413553409189e-05, + "loss": 1.0445, "step": 12170 }, { - "epoch": 0.34489500977641757, + "epoch": 0.4762109711245011, "grad_norm": 0.0, - "learning_rate": 1.5234959503400658e-05, - "loss": 0.8249, + "learning_rate": 1.1253156322039328e-05, + "loss": 1.1283, "step": 12171 }, { - "epoch": 0.34492334721868007, + "epoch": 0.47625009781673056, "grad_norm": 0.0, - "learning_rate": 1.5234177499052703e-05, - "loss": 1.0072, + "learning_rate": 1.125189907054535e-05, + "loss": 1.0106, "step": 12172 }, { - "epoch": 0.3449516846609425, + "epoch": 0.47628922450896, "grad_norm": 0.0, - "learning_rate": 1.5233395450615326e-05, - "loss": 0.8596, + "learning_rate": 1.1250641798947437e-05, + "loss": 1.1216, "step": 12173 }, { - "epoch": 0.34498002210320494, + "epoch": 0.47632835120118944, "grad_norm": 0.0, - "learning_rate": 1.5232613358095121e-05, - "loss": 0.9797, + "learning_rate": 1.1249384507265783e-05, + "loss": 0.9953, "step": 12174 }, { - "epoch": 0.34500835954546744, + "epoch": 0.4763674778934189, "grad_norm": 0.0, - "learning_rate": 1.5231831221498667e-05, - "loss": 0.9186, + "learning_rate": 1.1248127195520583e-05, + "loss": 1.0222, "step": 12175 }, { - "epoch": 0.3450366969877299, + "epoch": 0.4764066045856483, "grad_norm": 0.0, - "learning_rate": 1.5231049040832556e-05, - "loss": 0.9578, + "learning_rate": 1.1246869863732023e-05, + "loss": 0.8782, "step": 12176 }, { - "epoch": 0.34506503442999237, + "epoch": 0.47644573127787776, "grad_norm": 0.0, - "learning_rate": 1.5230266816103379e-05, - "loss": 0.9335, + "learning_rate": 1.1245612511920297e-05, + "loss": 1.0781, "step": 12177 }, { - "epoch": 0.3450933718722548, + "epoch": 0.4764848579701072, "grad_norm": 0.0, - "learning_rate": 1.5229484547317718e-05, - "loss": 1.0912, + "learning_rate": 1.1244355140105591e-05, + "loss": 0.9978, "step": 12178 }, { - "epoch": 0.34512170931451724, + "epoch": 0.47652398466233664, "grad_norm": 0.0, - "learning_rate": 1.5228702234482172e-05, - "loss": 0.8581, + "learning_rate": 1.1243097748308106e-05, + "loss": 1.0622, "step": 12179 }, { - "epoch": 0.34515004675677974, + "epoch": 0.4765631113545661, "grad_norm": 0.0, - "learning_rate": 1.522791987760332e-05, - "loss": 0.8969, + "learning_rate": 1.1241840336548022e-05, + "loss": 1.1345, "step": 12180 }, { - "epoch": 0.3451783841990422, + "epoch": 0.4766022380467955, "grad_norm": 0.0, - "learning_rate": 1.522713747668776e-05, - "loss": 0.8945, + "learning_rate": 1.1240582904845542e-05, + "loss": 1.0077, "step": 12181 }, { - "epoch": 0.34520672164130467, + "epoch": 0.47664136473902496, "grad_norm": 0.0, - "learning_rate": 1.5226355031742081e-05, - "loss": 0.9192, + "learning_rate": 1.1239325453220858e-05, + "loss": 1.0472, "step": 12182 }, { - "epoch": 0.3452350590835671, + "epoch": 0.4766804914312544, "grad_norm": 0.0, - "learning_rate": 1.5225572542772874e-05, - "loss": 0.9588, + "learning_rate": 1.1238067981694157e-05, + "loss": 1.0973, "step": 12183 }, { - "epoch": 0.3452633965258296, + "epoch": 0.47671961812348385, "grad_norm": 0.0, - "learning_rate": 1.5224790009786725e-05, - "loss": 0.9864, + "learning_rate": 1.1236810490285638e-05, + "loss": 1.0756, "step": 12184 }, { - "epoch": 0.34529173396809204, + "epoch": 0.4767587448157133, "grad_norm": 0.0, - "learning_rate": 1.5224007432790234e-05, - "loss": 0.9808, + "learning_rate": 1.123555297901549e-05, + "loss": 0.991, "step": 12185 }, { - "epoch": 0.3453200714103545, + "epoch": 0.4767978715079427, "grad_norm": 0.0, - "learning_rate": 1.5223224811789986e-05, - "loss": 1.0759, + "learning_rate": 1.1234295447903909e-05, + "loss": 1.1052, "step": 12186 }, { - "epoch": 0.345348408852617, + "epoch": 0.47683699820017217, "grad_norm": 0.0, - "learning_rate": 1.5222442146792573e-05, - "loss": 1.0267, + "learning_rate": 1.1233037896971091e-05, + "loss": 1.0473, "step": 12187 }, { - "epoch": 0.3453767462948794, + "epoch": 0.4768761248924016, "grad_norm": 0.0, - "learning_rate": 1.5221659437804594e-05, - "loss": 0.9335, + "learning_rate": 1.1231780326237227e-05, + "loss": 1.0986, "step": 12188 }, { - "epoch": 0.3454050837371419, + "epoch": 0.47691525158463105, "grad_norm": 0.0, - "learning_rate": 1.522087668483264e-05, - "loss": 0.9128, + "learning_rate": 1.1230522735722519e-05, + "loss": 1.0674, "step": 12189 }, { - "epoch": 0.34543342117940434, + "epoch": 0.4769543782768605, "grad_norm": 0.0, - "learning_rate": 1.5220093887883301e-05, - "loss": 0.9695, + "learning_rate": 1.1229265125447155e-05, + "loss": 1.0447, "step": 12190 }, { - "epoch": 0.3454617586216668, + "epoch": 0.47699350496908993, "grad_norm": 0.0, - "learning_rate": 1.5219311046963174e-05, - "loss": 1.0297, + "learning_rate": 1.1228007495431332e-05, + "loss": 1.1258, "step": 12191 }, { - "epoch": 0.3454900960639293, + "epoch": 0.4770326316613194, "grad_norm": 0.0, - "learning_rate": 1.521852816207885e-05, - "loss": 1.0324, + "learning_rate": 1.1226749845695251e-05, + "loss": 1.0289, "step": 12192 }, { - "epoch": 0.3455184335061917, + "epoch": 0.4770717583535488, "grad_norm": 0.0, - "learning_rate": 1.5217745233236922e-05, - "loss": 0.9022, + "learning_rate": 1.1225492176259102e-05, + "loss": 0.9713, "step": 12193 }, { - "epoch": 0.3455467709484542, + "epoch": 0.47711088504577825, "grad_norm": 0.0, - "learning_rate": 1.5216962260443994e-05, - "loss": 0.8764, + "learning_rate": 1.1224234487143085e-05, + "loss": 1.0515, "step": 12194 }, { - "epoch": 0.34557510839071665, + "epoch": 0.4771500117380077, "grad_norm": 0.0, - "learning_rate": 1.5216179243706655e-05, - "loss": 0.9352, + "learning_rate": 1.1222976778367397e-05, + "loss": 1.057, "step": 12195 }, { - "epoch": 0.34560344583297914, + "epoch": 0.4771891384302371, "grad_norm": 0.0, - "learning_rate": 1.52153961830315e-05, - "loss": 0.9281, + "learning_rate": 1.1221719049952232e-05, + "loss": 1.0494, "step": 12196 }, { - "epoch": 0.3456317832752416, + "epoch": 0.4772282651224665, "grad_norm": 0.0, - "learning_rate": 1.5214613078425126e-05, - "loss": 1.0221, + "learning_rate": 1.1220461301917793e-05, + "loss": 0.9911, "step": 12197 }, { - "epoch": 0.345660120717504, + "epoch": 0.47726739181469596, "grad_norm": 0.0, - "learning_rate": 1.5213829929894131e-05, - "loss": 0.9547, + "learning_rate": 1.121920353428427e-05, + "loss": 1.071, "step": 12198 }, { - "epoch": 0.3456884581597665, + "epoch": 0.4773065185069254, "grad_norm": 0.0, - "learning_rate": 1.5213046737445108e-05, - "loss": 0.9229, + "learning_rate": 1.1217945747071871e-05, + "loss": 1.023, "step": 12199 }, { - "epoch": 0.34571679560202895, + "epoch": 0.47734564519915484, "grad_norm": 0.0, - "learning_rate": 1.5212263501084658e-05, - "loss": 0.985, + "learning_rate": 1.1216687940300789e-05, + "loss": 0.9846, "step": 12200 }, { - "epoch": 0.34574513304429144, + "epoch": 0.4773847718913843, "grad_norm": 0.0, - "learning_rate": 1.5211480220819377e-05, - "loss": 0.9174, + "learning_rate": 1.1215430113991222e-05, + "loss": 0.898, "step": 12201 }, { - "epoch": 0.3457734704865539, + "epoch": 0.4774238985836137, "grad_norm": 0.0, - "learning_rate": 1.5210696896655863e-05, - "loss": 0.9703, + "learning_rate": 1.121417226816337e-05, + "loss": 1.1094, "step": 12202 }, { - "epoch": 0.3458018079288163, + "epoch": 0.47746302527584317, "grad_norm": 0.0, - "learning_rate": 1.5209913528600715e-05, - "loss": 1.0961, + "learning_rate": 1.1212914402837434e-05, + "loss": 1.0587, "step": 12203 }, { - "epoch": 0.3458301453710788, + "epoch": 0.4775021519680726, "grad_norm": 0.0, - "learning_rate": 1.5209130116660532e-05, - "loss": 0.988, + "learning_rate": 1.1211656518033612e-05, + "loss": 0.8909, "step": 12204 }, { - "epoch": 0.34585848281334125, + "epoch": 0.47754127866030205, "grad_norm": 0.0, - "learning_rate": 1.5208346660841908e-05, - "loss": 0.803, + "learning_rate": 1.1210398613772105e-05, + "loss": 1.2284, "step": 12205 }, { - "epoch": 0.34588682025560374, + "epoch": 0.4775804053525315, "grad_norm": 0.0, - "learning_rate": 1.5207563161151444e-05, - "loss": 0.9575, + "learning_rate": 1.1209140690073112e-05, + "loss": 1.0344, "step": 12206 }, { - "epoch": 0.3459151576978662, + "epoch": 0.47761953204476093, "grad_norm": 0.0, - "learning_rate": 1.5206779617595748e-05, - "loss": 0.9089, + "learning_rate": 1.1207882746956834e-05, + "loss": 1.1573, "step": 12207 }, { - "epoch": 0.3459434951401287, + "epoch": 0.47765865873699037, "grad_norm": 0.0, - "learning_rate": 1.520599603018141e-05, - "loss": 0.8523, + "learning_rate": 1.1206624784443473e-05, + "loss": 0.9438, "step": 12208 }, { - "epoch": 0.3459718325823911, + "epoch": 0.4776977854292198, "grad_norm": 0.0, - "learning_rate": 1.5205212398915034e-05, - "loss": 0.9896, + "learning_rate": 1.1205366802553231e-05, + "loss": 1.0057, "step": 12209 }, { - "epoch": 0.34600017002465355, + "epoch": 0.47773691212144925, "grad_norm": 0.0, - "learning_rate": 1.5204428723803224e-05, - "loss": 1.0224, + "learning_rate": 1.1204108801306308e-05, + "loss": 0.8779, "step": 12210 }, { - "epoch": 0.34602850746691605, + "epoch": 0.4777760388136787, "grad_norm": 0.0, - "learning_rate": 1.5203645004852577e-05, - "loss": 0.8599, + "learning_rate": 1.120285078072291e-05, + "loss": 1.0717, "step": 12211 }, { - "epoch": 0.3460568449091785, + "epoch": 0.47781516550590813, "grad_norm": 0.0, - "learning_rate": 1.5202861242069693e-05, - "loss": 1.0588, + "learning_rate": 1.1201592740823231e-05, + "loss": 1.0237, "step": 12212 }, { - "epoch": 0.346085182351441, + "epoch": 0.4778542921981376, "grad_norm": 0.0, - "learning_rate": 1.5202077435461178e-05, - "loss": 0.9555, + "learning_rate": 1.120033468162748e-05, + "loss": 1.0033, "step": 12213 }, { - "epoch": 0.3461135197937034, + "epoch": 0.477893418890367, "grad_norm": 0.0, - "learning_rate": 1.5201293585033634e-05, - "loss": 0.9876, + "learning_rate": 1.1199076603155857e-05, + "loss": 1.0391, "step": 12214 }, { - "epoch": 0.34614185723596586, + "epoch": 0.47793254558259646, "grad_norm": 0.0, - "learning_rate": 1.5200509690793665e-05, - "loss": 0.8718, + "learning_rate": 1.1197818505428568e-05, + "loss": 1.1593, "step": 12215 }, { - "epoch": 0.34617019467822835, + "epoch": 0.4779716722748259, "grad_norm": 0.0, - "learning_rate": 1.5199725752747871e-05, - "loss": 0.9413, + "learning_rate": 1.1196560388465811e-05, + "loss": 1.0455, "step": 12216 }, { - "epoch": 0.3461985321204908, + "epoch": 0.47801079896705534, "grad_norm": 0.0, - "learning_rate": 1.519894177090285e-05, - "loss": 0.9136, + "learning_rate": 1.11953022522878e-05, + "loss": 0.9842, "step": 12217 }, { - "epoch": 0.3462268695627533, + "epoch": 0.4780499256592848, "grad_norm": 0.0, - "learning_rate": 1.519815774526522e-05, - "loss": 1.0036, + "learning_rate": 1.1194044096914727e-05, + "loss": 1.1119, "step": 12218 }, { - "epoch": 0.3462552070050157, + "epoch": 0.4780890523515142, "grad_norm": 0.0, - "learning_rate": 1.5197373675841572e-05, - "loss": 0.9309, + "learning_rate": 1.1192785922366805e-05, + "loss": 1.0824, "step": 12219 }, { - "epoch": 0.3462835444472782, + "epoch": 0.47812817904374366, "grad_norm": 0.0, - "learning_rate": 1.5196589562638518e-05, - "loss": 0.8734, + "learning_rate": 1.1191527728664235e-05, + "loss": 1.1323, "step": 12220 }, { - "epoch": 0.34631188188954065, + "epoch": 0.4781673057359731, "grad_norm": 0.0, - "learning_rate": 1.5195805405662655e-05, - "loss": 0.9789, + "learning_rate": 1.119026951582722e-05, + "loss": 0.9396, "step": 12221 }, { - "epoch": 0.3463402193318031, + "epoch": 0.47820643242820254, "grad_norm": 0.0, - "learning_rate": 1.51950212049206e-05, - "loss": 0.9462, + "learning_rate": 1.1189011283875973e-05, + "loss": 1.0708, "step": 12222 }, { - "epoch": 0.3463685567740656, + "epoch": 0.478245559120432, "grad_norm": 0.0, - "learning_rate": 1.519423696041895e-05, - "loss": 1.0181, + "learning_rate": 1.118775303283069e-05, + "loss": 0.9991, "step": 12223 }, { - "epoch": 0.346396894216328, + "epoch": 0.4782846858126614, "grad_norm": 0.0, - "learning_rate": 1.5193452672164316e-05, - "loss": 0.9488, + "learning_rate": 1.1186494762711585e-05, + "loss": 1.1643, "step": 12224 }, { - "epoch": 0.3464252316585905, + "epoch": 0.4783238125048908, "grad_norm": 0.0, - "learning_rate": 1.5192668340163299e-05, - "loss": 1.0793, + "learning_rate": 1.1185236473538861e-05, + "loss": 1.0976, "step": 12225 }, { - "epoch": 0.34645356910085295, + "epoch": 0.47836293919712025, "grad_norm": 0.0, - "learning_rate": 1.5191883964422504e-05, - "loss": 0.8851, + "learning_rate": 1.1183978165332723e-05, + "loss": 0.9603, "step": 12226 }, { - "epoch": 0.3464819065431154, + "epoch": 0.4784020658893497, "grad_norm": 0.0, - "learning_rate": 1.5191099544948552e-05, - "loss": 1.0387, + "learning_rate": 1.1182719838113378e-05, + "loss": 0.8558, "step": 12227 }, { - "epoch": 0.3465102439853779, + "epoch": 0.47844119258157913, "grad_norm": 0.0, - "learning_rate": 1.5190315081748033e-05, - "loss": 0.9748, + "learning_rate": 1.1181461491901038e-05, + "loss": 0.9347, "step": 12228 }, { - "epoch": 0.3465385814276403, + "epoch": 0.4784803192738086, "grad_norm": 0.0, - "learning_rate": 1.5189530574827567e-05, - "loss": 0.9193, + "learning_rate": 1.1180203126715905e-05, + "loss": 1.1669, "step": 12229 }, { - "epoch": 0.3465669188699028, + "epoch": 0.478519445966038, "grad_norm": 0.0, - "learning_rate": 1.5188746024193756e-05, - "loss": 0.9256, + "learning_rate": 1.1178944742578189e-05, + "loss": 1.0831, "step": 12230 }, { - "epoch": 0.34659525631216526, + "epoch": 0.47855857265826746, "grad_norm": 0.0, - "learning_rate": 1.5187961429853211e-05, - "loss": 0.9836, + "learning_rate": 1.1177686339508096e-05, + "loss": 1.0791, "step": 12231 }, { - "epoch": 0.34662359375442775, + "epoch": 0.4785976993504969, "grad_norm": 0.0, - "learning_rate": 1.5187176791812539e-05, - "loss": 1.0011, + "learning_rate": 1.1176427917525839e-05, + "loss": 1.0539, "step": 12232 }, { - "epoch": 0.3466519311966902, + "epoch": 0.47863682604272634, "grad_norm": 0.0, - "learning_rate": 1.5186392110078353e-05, - "loss": 0.8595, + "learning_rate": 1.1175169476651622e-05, + "loss": 0.943, "step": 12233 }, { - "epoch": 0.3466802686389526, + "epoch": 0.4786759527349558, "grad_norm": 0.0, - "learning_rate": 1.5185607384657257e-05, - "loss": 0.9074, + "learning_rate": 1.1173911016905656e-05, + "loss": 0.8899, "step": 12234 }, { - "epoch": 0.3467086060812151, + "epoch": 0.4787150794271852, "grad_norm": 0.0, - "learning_rate": 1.5184822615555867e-05, - "loss": 0.9026, + "learning_rate": 1.1172652538308152e-05, + "loss": 1.0531, "step": 12235 }, { - "epoch": 0.34673694352347756, + "epoch": 0.47875420611941466, "grad_norm": 0.0, - "learning_rate": 1.5184037802780792e-05, - "loss": 0.8981, + "learning_rate": 1.1171394040879312e-05, + "loss": 0.9378, "step": 12236 }, { - "epoch": 0.34676528096574005, + "epoch": 0.4787933328116441, "grad_norm": 0.0, - "learning_rate": 1.518325294633864e-05, - "loss": 1.014, + "learning_rate": 1.1170135524639355e-05, + "loss": 1.0353, "step": 12237 }, { - "epoch": 0.3467936184080025, + "epoch": 0.47883245950387354, "grad_norm": 0.0, - "learning_rate": 1.518246804623602e-05, - "loss": 0.9804, + "learning_rate": 1.1168876989608487e-05, + "loss": 1.1834, "step": 12238 }, { - "epoch": 0.34682195585026493, + "epoch": 0.478871586196103, "grad_norm": 0.0, - "learning_rate": 1.5181683102479553e-05, - "loss": 0.9564, + "learning_rate": 1.1167618435806924e-05, + "loss": 1.0555, "step": 12239 }, { - "epoch": 0.3468502932925274, + "epoch": 0.4789107128883324, "grad_norm": 0.0, - "learning_rate": 1.5180898115075841e-05, - "loss": 0.8395, + "learning_rate": 1.1166359863254868e-05, + "loss": 1.1141, "step": 12240 }, { - "epoch": 0.34687863073478986, + "epoch": 0.47894983958056186, "grad_norm": 0.0, - "learning_rate": 1.5180113084031502e-05, - "loss": 0.9882, + "learning_rate": 1.1165101271972534e-05, + "loss": 1.0858, "step": 12241 }, { - "epoch": 0.34690696817705235, + "epoch": 0.4789889662727913, "grad_norm": 0.0, - "learning_rate": 1.5179328009353147e-05, - "loss": 0.9894, + "learning_rate": 1.1163842661980133e-05, + "loss": 1.0499, "step": 12242 }, { - "epoch": 0.3469353056193148, + "epoch": 0.47902809296502075, "grad_norm": 0.0, - "learning_rate": 1.517854289104739e-05, - "loss": 0.9901, + "learning_rate": 1.1162584033297878e-05, + "loss": 1.0059, "step": 12243 }, { - "epoch": 0.3469636430615773, + "epoch": 0.4790672196572502, "grad_norm": 0.0, - "learning_rate": 1.5177757729120841e-05, - "loss": 0.9082, + "learning_rate": 1.1161325385945981e-05, + "loss": 1.0627, "step": 12244 }, { - "epoch": 0.3469919805038397, + "epoch": 0.47910634634947963, "grad_norm": 0.0, - "learning_rate": 1.5176972523580115e-05, - "loss": 0.9724, + "learning_rate": 1.1160066719944651e-05, + "loss": 0.9973, "step": 12245 }, { - "epoch": 0.34702031794610216, + "epoch": 0.47914547304170907, "grad_norm": 0.0, - "learning_rate": 1.517618727443183e-05, - "loss": 0.8645, + "learning_rate": 1.1158808035314105e-05, + "loss": 1.1213, "step": 12246 }, { - "epoch": 0.34704865538836466, + "epoch": 0.4791845997339385, "grad_norm": 0.0, - "learning_rate": 1.5175401981682597e-05, - "loss": 0.9547, + "learning_rate": 1.1157549332074553e-05, + "loss": 1.0117, "step": 12247 }, { - "epoch": 0.3470769928306271, + "epoch": 0.47922372642616795, "grad_norm": 0.0, - "learning_rate": 1.5174616645339031e-05, - "loss": 1.0012, + "learning_rate": 1.1156290610246213e-05, + "loss": 1.0231, "step": 12248 }, { - "epoch": 0.3471053302728896, + "epoch": 0.4792628531183974, "grad_norm": 0.0, - "learning_rate": 1.5173831265407749e-05, - "loss": 1.0157, + "learning_rate": 1.1155031869849291e-05, + "loss": 1.0317, "step": 12249 }, { - "epoch": 0.34713366771515203, + "epoch": 0.47930197981062683, "grad_norm": 0.0, - "learning_rate": 1.5173045841895362e-05, - "loss": 1.0256, + "learning_rate": 1.1153773110904005e-05, + "loss": 0.9365, "step": 12250 }, { - "epoch": 0.34716200515741447, + "epoch": 0.4793411065028563, "grad_norm": 0.0, - "learning_rate": 1.5172260374808492e-05, - "loss": 0.8691, + "learning_rate": 1.1152514333430571e-05, + "loss": 1.0031, "step": 12251 }, { - "epoch": 0.34719034259967696, + "epoch": 0.4793802331950857, "grad_norm": 0.0, - "learning_rate": 1.5171474864153747e-05, - "loss": 0.9643, + "learning_rate": 1.1151255537449198e-05, + "loss": 1.0782, "step": 12252 }, { - "epoch": 0.3472186800419394, + "epoch": 0.4794193598873151, "grad_norm": 0.0, - "learning_rate": 1.5170689309937751e-05, - "loss": 1.0455, + "learning_rate": 1.1149996722980102e-05, + "loss": 1.1268, "step": 12253 }, { - "epoch": 0.3472470174842019, + "epoch": 0.47945848657954454, "grad_norm": 0.0, - "learning_rate": 1.5169903712167121e-05, - "loss": 0.8381, + "learning_rate": 1.1148737890043503e-05, + "loss": 1.1112, "step": 12254 }, { - "epoch": 0.34727535492646433, + "epoch": 0.479497613271774, "grad_norm": 0.0, - "learning_rate": 1.5169118070848473e-05, - "loss": 0.9005, + "learning_rate": 1.1147479038659614e-05, + "loss": 1.1471, "step": 12255 }, { - "epoch": 0.3473036923687268, + "epoch": 0.4795367399640034, "grad_norm": 0.0, - "learning_rate": 1.5168332385988422e-05, - "loss": 0.9308, + "learning_rate": 1.1146220168848645e-05, + "loss": 1.1523, "step": 12256 }, { - "epoch": 0.34733202981098926, + "epoch": 0.47957586665623286, "grad_norm": 0.0, - "learning_rate": 1.5167546657593587e-05, - "loss": 0.8653, + "learning_rate": 1.1144961280630822e-05, + "loss": 1.0597, "step": 12257 }, { - "epoch": 0.3473603672532517, + "epoch": 0.4796149933484623, "grad_norm": 0.0, - "learning_rate": 1.5166760885670591e-05, - "loss": 0.9008, + "learning_rate": 1.1143702374026351e-05, + "loss": 1.0648, "step": 12258 }, { - "epoch": 0.3473887046955142, + "epoch": 0.47965412004069174, "grad_norm": 0.0, - "learning_rate": 1.5165975070226045e-05, - "loss": 0.9114, + "learning_rate": 1.1142443449055455e-05, + "loss": 1.0917, "step": 12259 }, { - "epoch": 0.34741704213777663, + "epoch": 0.4796932467329212, "grad_norm": 0.0, - "learning_rate": 1.5165189211266573e-05, - "loss": 0.8464, + "learning_rate": 1.1141184505738349e-05, + "loss": 0.8868, "step": 12260 }, { - "epoch": 0.3474453795800391, + "epoch": 0.4797323734251506, "grad_norm": 0.0, - "learning_rate": 1.5164403308798798e-05, - "loss": 0.9625, + "learning_rate": 1.1139925544095249e-05, + "loss": 1.0096, "step": 12261 }, { - "epoch": 0.34747371702230156, + "epoch": 0.47977150011738007, "grad_norm": 0.0, - "learning_rate": 1.5163617362829338e-05, - "loss": 0.953, + "learning_rate": 1.1138666564146375e-05, + "loss": 1.1215, "step": 12262 }, { - "epoch": 0.347502054464564, + "epoch": 0.4798106268096095, "grad_norm": 0.0, - "learning_rate": 1.5162831373364806e-05, - "loss": 1.052, + "learning_rate": 1.113740756591194e-05, + "loss": 1.101, "step": 12263 }, { - "epoch": 0.3475303919068265, + "epoch": 0.47984975350183895, "grad_norm": 0.0, - "learning_rate": 1.5162045340411826e-05, - "loss": 1.0395, + "learning_rate": 1.1136148549412166e-05, + "loss": 1.1234, "step": 12264 }, { - "epoch": 0.34755872934908894, + "epoch": 0.4798888801940684, "grad_norm": 0.0, - "learning_rate": 1.5161259263977028e-05, - "loss": 0.8682, + "learning_rate": 1.1134889514667269e-05, + "loss": 1.019, "step": 12265 }, { - "epoch": 0.34758706679135143, + "epoch": 0.47992800688629783, "grad_norm": 0.0, - "learning_rate": 1.5160473144067026e-05, - "loss": 0.919, + "learning_rate": 1.113363046169747e-05, + "loss": 1.0787, "step": 12266 }, { - "epoch": 0.34761540423361387, + "epoch": 0.47996713357852727, "grad_norm": 0.0, - "learning_rate": 1.5159686980688438e-05, - "loss": 0.9735, + "learning_rate": 1.1132371390522985e-05, + "loss": 1.0706, "step": 12267 }, { - "epoch": 0.34764374167587636, + "epoch": 0.4800062602707567, "grad_norm": 0.0, - "learning_rate": 1.5158900773847891e-05, - "loss": 1.002, + "learning_rate": 1.1131112301164035e-05, + "loss": 1.0904, "step": 12268 }, { - "epoch": 0.3476720791181388, + "epoch": 0.48004538696298615, "grad_norm": 0.0, - "learning_rate": 1.5158114523552011e-05, - "loss": 0.9628, + "learning_rate": 1.112985319364084e-05, + "loss": 1.0413, "step": 12269 }, { - "epoch": 0.34770041656040124, + "epoch": 0.4800845136552156, "grad_norm": 0.0, - "learning_rate": 1.5157328229807412e-05, - "loss": 0.99, + "learning_rate": 1.1128594067973617e-05, + "loss": 1.0192, "step": 12270 }, { - "epoch": 0.34772875400266373, + "epoch": 0.48012364034744504, "grad_norm": 0.0, - "learning_rate": 1.5156541892620725e-05, - "loss": 0.947, + "learning_rate": 1.112733492418259e-05, + "loss": 1.123, "step": 12271 }, { - "epoch": 0.34775709144492617, + "epoch": 0.4801627670396745, "grad_norm": 0.0, - "learning_rate": 1.515575551199857e-05, - "loss": 0.9413, + "learning_rate": 1.1126075762287972e-05, + "loss": 0.8829, "step": 12272 }, { - "epoch": 0.34778542888718866, + "epoch": 0.4802018937319039, "grad_norm": 0.0, - "learning_rate": 1.5154969087947575e-05, - "loss": 0.9671, + "learning_rate": 1.1124816582309992e-05, + "loss": 1.0741, "step": 12273 }, { - "epoch": 0.3478137663294511, + "epoch": 0.48024102042413336, "grad_norm": 0.0, - "learning_rate": 1.5154182620474359e-05, - "loss": 0.8945, + "learning_rate": 1.1123557384268869e-05, + "loss": 1.09, "step": 12274 }, { - "epoch": 0.34784210377171354, + "epoch": 0.4802801471163628, "grad_norm": 0.0, - "learning_rate": 1.5153396109585547e-05, - "loss": 1.0455, + "learning_rate": 1.1122298168184817e-05, + "loss": 1.0677, "step": 12275 }, { - "epoch": 0.34787044121397603, + "epoch": 0.48031927380859224, "grad_norm": 0.0, - "learning_rate": 1.5152609555287767e-05, - "loss": 0.9479, + "learning_rate": 1.1121038934078066e-05, + "loss": 1.0942, "step": 12276 }, { - "epoch": 0.34789877865623847, + "epoch": 0.4803584005008217, "grad_norm": 0.0, - "learning_rate": 1.5151822957587645e-05, - "loss": 1.0159, + "learning_rate": 1.1119779681968834e-05, + "loss": 0.9463, "step": 12277 }, { - "epoch": 0.34792711609850097, + "epoch": 0.4803975271930511, "grad_norm": 0.0, - "learning_rate": 1.5151036316491805e-05, - "loss": 0.9307, + "learning_rate": 1.1118520411877343e-05, + "loss": 1.0401, "step": 12278 }, { - "epoch": 0.3479554535407634, + "epoch": 0.48043665388528056, "grad_norm": 0.0, - "learning_rate": 1.5150249632006871e-05, - "loss": 0.9305, + "learning_rate": 1.1117261123823817e-05, + "loss": 0.8649, "step": 12279 }, { - "epoch": 0.3479837909830259, + "epoch": 0.48047578057751, "grad_norm": 0.0, - "learning_rate": 1.5149462904139474e-05, - "loss": 1.0145, + "learning_rate": 1.1116001817828477e-05, + "loss": 1.076, "step": 12280 }, { - "epoch": 0.34801212842528834, + "epoch": 0.4805149072697394, "grad_norm": 0.0, - "learning_rate": 1.5148676132896238e-05, - "loss": 0.9566, + "learning_rate": 1.1114742493911544e-05, + "loss": 1.0713, "step": 12281 }, { - "epoch": 0.3480404658675508, + "epoch": 0.48055403396196883, "grad_norm": 0.0, - "learning_rate": 1.5147889318283793e-05, - "loss": 0.965, + "learning_rate": 1.1113483152093246e-05, + "loss": 1.0065, "step": 12282 }, { - "epoch": 0.34806880330981327, + "epoch": 0.48059316065419827, "grad_norm": 0.0, - "learning_rate": 1.5147102460308757e-05, - "loss": 0.8919, + "learning_rate": 1.1112223792393801e-05, + "loss": 0.9676, "step": 12283 }, { - "epoch": 0.3480971407520757, + "epoch": 0.4806322873464277, "grad_norm": 0.0, - "learning_rate": 1.5146315558977773e-05, - "loss": 0.847, + "learning_rate": 1.1110964414833438e-05, + "loss": 1.0699, "step": 12284 }, { - "epoch": 0.3481254781943382, + "epoch": 0.48067141403865715, "grad_norm": 0.0, - "learning_rate": 1.514552861429746e-05, - "loss": 1.0659, + "learning_rate": 1.1109705019432378e-05, + "loss": 0.9639, "step": 12285 }, { - "epoch": 0.34815381563660064, + "epoch": 0.4807105407308866, "grad_norm": 0.0, - "learning_rate": 1.5144741626274448e-05, - "loss": 0.9001, + "learning_rate": 1.1108445606210845e-05, + "loss": 0.9771, "step": 12286 }, { - "epoch": 0.3481821530788631, + "epoch": 0.48074966742311603, "grad_norm": 0.0, - "learning_rate": 1.514395459491537e-05, - "loss": 0.9557, + "learning_rate": 1.1107186175189064e-05, + "loss": 1.1089, "step": 12287 }, { - "epoch": 0.34821049052112557, + "epoch": 0.4807887941153455, "grad_norm": 0.0, - "learning_rate": 1.5143167520226849e-05, - "loss": 0.9012, + "learning_rate": 1.1105926726387264e-05, + "loss": 1.02, "step": 12288 }, { - "epoch": 0.348238827963388, + "epoch": 0.4808279208075749, "grad_norm": 0.0, - "learning_rate": 1.5142380402215519e-05, - "loss": 0.9303, + "learning_rate": 1.1104667259825666e-05, + "loss": 1.0085, "step": 12289 }, { - "epoch": 0.3482671654056505, + "epoch": 0.48086704749980436, "grad_norm": 0.0, - "learning_rate": 1.514159324088801e-05, - "loss": 0.9668, + "learning_rate": 1.1103407775524493e-05, + "loss": 1.0582, "step": 12290 }, { - "epoch": 0.34829550284791294, + "epoch": 0.4809061741920338, "grad_norm": 0.0, - "learning_rate": 1.5140806036250952e-05, - "loss": 0.9245, + "learning_rate": 1.1102148273503974e-05, + "loss": 1.0388, "step": 12291 }, { - "epoch": 0.34832384029017543, + "epoch": 0.48094530088426324, "grad_norm": 0.0, - "learning_rate": 1.5140018788310974e-05, - "loss": 1.006, + "learning_rate": 1.1100888753784336e-05, + "loss": 1.1771, "step": 12292 }, { - "epoch": 0.3483521777324379, + "epoch": 0.4809844275764927, "grad_norm": 0.0, - "learning_rate": 1.5139231497074711e-05, - "loss": 0.9783, + "learning_rate": 1.1099629216385805e-05, + "loss": 0.9927, "step": 12293 }, { - "epoch": 0.3483805151747003, + "epoch": 0.4810235542687221, "grad_norm": 0.0, - "learning_rate": 1.5138444162548791e-05, - "loss": 1.0368, + "learning_rate": 1.1098369661328606e-05, + "loss": 1.0826, "step": 12294 }, { - "epoch": 0.3484088526169628, + "epoch": 0.48106268096095156, "grad_norm": 0.0, - "learning_rate": 1.513765678473985e-05, - "loss": 0.9187, + "learning_rate": 1.1097110088632965e-05, + "loss": 1.0704, "step": 12295 }, { - "epoch": 0.34843719005922524, + "epoch": 0.481101807653181, "grad_norm": 0.0, - "learning_rate": 1.5136869363654513e-05, - "loss": 0.9933, + "learning_rate": 1.1095850498319112e-05, + "loss": 1.1493, "step": 12296 }, { - "epoch": 0.34846552750148774, + "epoch": 0.48114093434541044, "grad_norm": 0.0, - "learning_rate": 1.5136081899299422e-05, - "loss": 0.9705, + "learning_rate": 1.1094590890407273e-05, + "loss": 1.017, "step": 12297 }, { - "epoch": 0.3484938649437502, + "epoch": 0.4811800610376399, "grad_norm": 0.0, - "learning_rate": 1.5135294391681207e-05, - "loss": 0.9547, + "learning_rate": 1.1093331264917676e-05, + "loss": 0.9736, "step": 12298 }, { - "epoch": 0.3485222023860126, + "epoch": 0.4812191877298693, "grad_norm": 0.0, - "learning_rate": 1.5134506840806498e-05, - "loss": 0.8447, + "learning_rate": 1.109207162187055e-05, + "loss": 1.1584, "step": 12299 }, { - "epoch": 0.3485505398282751, + "epoch": 0.48125831442209877, "grad_norm": 0.0, - "learning_rate": 1.5133719246681931e-05, - "loss": 1.0399, + "learning_rate": 1.1090811961286124e-05, + "loss": 1.0405, "step": 12300 }, { - "epoch": 0.34857887727053755, + "epoch": 0.4812974411143282, "grad_norm": 0.0, - "learning_rate": 1.513293160931414e-05, - "loss": 1.0482, + "learning_rate": 1.1089552283184619e-05, + "loss": 1.0935, "step": 12301 }, { - "epoch": 0.34860721471280004, + "epoch": 0.48133656780655765, "grad_norm": 0.0, - "learning_rate": 1.5132143928709764e-05, - "loss": 1.0979, + "learning_rate": 1.1088292587586273e-05, + "loss": 0.9096, "step": 12302 }, { - "epoch": 0.3486355521550625, + "epoch": 0.4813756944987871, "grad_norm": 0.0, - "learning_rate": 1.5131356204875428e-05, - "loss": 0.9299, + "learning_rate": 1.108703287451131e-05, + "loss": 1.1302, "step": 12303 }, { - "epoch": 0.34866388959732497, + "epoch": 0.48141482119101653, "grad_norm": 0.0, - "learning_rate": 1.5130568437817776e-05, - "loss": 0.9329, + "learning_rate": 1.1085773143979962e-05, + "loss": 0.8746, "step": 12304 }, { - "epoch": 0.3486922270395874, + "epoch": 0.48145394788324597, "grad_norm": 0.0, - "learning_rate": 1.5129780627543445e-05, - "loss": 0.9706, + "learning_rate": 1.108451339601246e-05, + "loss": 1.055, "step": 12305 }, { - "epoch": 0.34872056448184985, + "epoch": 0.4814930745754754, "grad_norm": 0.0, - "learning_rate": 1.5128992774059063e-05, - "loss": 0.9455, + "learning_rate": 1.1083253630629029e-05, + "loss": 0.9646, "step": 12306 }, { - "epoch": 0.34874890192411234, + "epoch": 0.48153220126770485, "grad_norm": 0.0, - "learning_rate": 1.5128204877371272e-05, - "loss": 1.0142, + "learning_rate": 1.1081993847849906e-05, + "loss": 1.2194, "step": 12307 }, { - "epoch": 0.3487772393663748, + "epoch": 0.4815713279599343, "grad_norm": 0.0, - "learning_rate": 1.5127416937486704e-05, - "loss": 0.9973, + "learning_rate": 1.1080734047695314e-05, + "loss": 1.0803, "step": 12308 }, { - "epoch": 0.3488055768086373, + "epoch": 0.48161045465216373, "grad_norm": 0.0, - "learning_rate": 1.5126628954412002e-05, - "loss": 0.9006, + "learning_rate": 1.107947423018549e-05, + "loss": 1.064, "step": 12309 }, { - "epoch": 0.3488339142508997, + "epoch": 0.4816495813443931, "grad_norm": 0.0, - "learning_rate": 1.5125840928153797e-05, - "loss": 0.8772, + "learning_rate": 1.1078214395340658e-05, + "loss": 1.0248, "step": 12310 }, { - "epoch": 0.34886225169316215, + "epoch": 0.48168870803662256, "grad_norm": 0.0, - "learning_rate": 1.5125052858718735e-05, - "loss": 0.8828, + "learning_rate": 1.1076954543181058e-05, + "loss": 1.0555, "step": 12311 }, { - "epoch": 0.34889058913542464, + "epoch": 0.481727834728852, "grad_norm": 0.0, - "learning_rate": 1.5124264746113446e-05, - "loss": 0.8995, + "learning_rate": 1.1075694673726916e-05, + "loss": 0.9638, "step": 12312 }, { - "epoch": 0.3489189265776871, + "epoch": 0.48176696142108144, "grad_norm": 0.0, - "learning_rate": 1.5123476590344572e-05, - "loss": 1.0906, + "learning_rate": 1.1074434786998467e-05, + "loss": 1.0918, "step": 12313 }, { - "epoch": 0.3489472640199496, + "epoch": 0.4818060881133109, "grad_norm": 0.0, - "learning_rate": 1.5122688391418754e-05, - "loss": 1.0135, + "learning_rate": 1.1073174883015938e-05, + "loss": 1.1724, "step": 12314 }, { - "epoch": 0.348975601462212, + "epoch": 0.4818452148055403, "grad_norm": 0.0, - "learning_rate": 1.512190014934263e-05, - "loss": 1.0524, + "learning_rate": 1.1071914961799569e-05, + "loss": 1.0672, "step": 12315 }, { - "epoch": 0.3490039389044745, + "epoch": 0.48188434149776976, "grad_norm": 0.0, - "learning_rate": 1.5121111864122837e-05, - "loss": 1.0242, + "learning_rate": 1.1070655023369587e-05, + "loss": 1.059, "step": 12316 }, { - "epoch": 0.34903227634673695, + "epoch": 0.4819234681899992, "grad_norm": 0.0, - "learning_rate": 1.5120323535766018e-05, - "loss": 0.9035, + "learning_rate": 1.1069395067746226e-05, + "loss": 1.2188, "step": 12317 }, { - "epoch": 0.3490606137889994, + "epoch": 0.48196259488222865, "grad_norm": 0.0, - "learning_rate": 1.5119535164278812e-05, - "loss": 0.952, + "learning_rate": 1.1068135094949724e-05, + "loss": 1.0081, "step": 12318 }, { - "epoch": 0.3490889512312619, + "epoch": 0.4820017215744581, "grad_norm": 0.0, - "learning_rate": 1.5118746749667862e-05, - "loss": 0.8919, + "learning_rate": 1.106687510500031e-05, + "loss": 0.873, "step": 12319 }, { - "epoch": 0.3491172886735243, + "epoch": 0.4820408482666875, "grad_norm": 0.0, - "learning_rate": 1.5117958291939807e-05, - "loss": 0.9752, + "learning_rate": 1.1065615097918217e-05, + "loss": 1.0433, "step": 12320 }, { - "epoch": 0.3491456261157868, + "epoch": 0.48207997495891697, "grad_norm": 0.0, - "learning_rate": 1.5117169791101285e-05, - "loss": 0.929, + "learning_rate": 1.106435507372368e-05, + "loss": 1.0217, "step": 12321 }, { - "epoch": 0.34917396355804925, + "epoch": 0.4821191016511464, "grad_norm": 0.0, - "learning_rate": 1.5116381247158947e-05, - "loss": 0.9478, + "learning_rate": 1.1063095032436937e-05, + "loss": 1.0776, "step": 12322 }, { - "epoch": 0.3492023010003117, + "epoch": 0.48215822834337585, "grad_norm": 0.0, - "learning_rate": 1.5115592660119425e-05, - "loss": 0.9579, + "learning_rate": 1.1061834974078216e-05, + "loss": 1.1572, "step": 12323 }, { - "epoch": 0.3492306384425742, + "epoch": 0.4821973550356053, "grad_norm": 0.0, - "learning_rate": 1.5114804029989372e-05, - "loss": 0.9679, + "learning_rate": 1.106057489866776e-05, + "loss": 0.9785, "step": 12324 }, { - "epoch": 0.3492589758848366, + "epoch": 0.48223648172783473, "grad_norm": 0.0, - "learning_rate": 1.511401535677542e-05, - "loss": 0.9107, + "learning_rate": 1.1059314806225796e-05, + "loss": 1.0594, "step": 12325 }, { - "epoch": 0.3492873133270991, + "epoch": 0.4822756084200642, "grad_norm": 0.0, - "learning_rate": 1.511322664048422e-05, - "loss": 0.8558, + "learning_rate": 1.1058054696772566e-05, + "loss": 1.0921, "step": 12326 }, { - "epoch": 0.34931565076936155, + "epoch": 0.4823147351122936, "grad_norm": 0.0, - "learning_rate": 1.5112437881122412e-05, - "loss": 0.8492, + "learning_rate": 1.1056794570328304e-05, + "loss": 0.9583, "step": 12327 }, { - "epoch": 0.34934398821162405, + "epoch": 0.48235386180452305, "grad_norm": 0.0, - "learning_rate": 1.5111649078696644e-05, - "loss": 0.8974, + "learning_rate": 1.1055534426913244e-05, + "loss": 1.0763, "step": 12328 }, { - "epoch": 0.3493723256538865, + "epoch": 0.4823929884967525, "grad_norm": 0.0, - "learning_rate": 1.5110860233213556e-05, - "loss": 0.8097, + "learning_rate": 1.1054274266547624e-05, + "loss": 1.1947, "step": 12329 }, { - "epoch": 0.3494006630961489, + "epoch": 0.48243211518898194, "grad_norm": 0.0, - "learning_rate": 1.5110071344679794e-05, - "loss": 0.8562, + "learning_rate": 1.1053014089251681e-05, + "loss": 0.9815, "step": 12330 }, { - "epoch": 0.3494290005384114, + "epoch": 0.4824712418812114, "grad_norm": 0.0, - "learning_rate": 1.5109282413102002e-05, - "loss": 0.9142, + "learning_rate": 1.105175389504565e-05, + "loss": 1.1507, "step": 12331 }, { - "epoch": 0.34945733798067385, + "epoch": 0.4825103685734408, "grad_norm": 0.0, - "learning_rate": 1.5108493438486831e-05, - "loss": 1.0392, + "learning_rate": 1.105049368394977e-05, + "loss": 1.1882, "step": 12332 }, { - "epoch": 0.34948567542293635, + "epoch": 0.48254949526567026, "grad_norm": 0.0, - "learning_rate": 1.5107704420840919e-05, - "loss": 0.8446, + "learning_rate": 1.1049233455984281e-05, + "loss": 0.9546, "step": 12333 }, { - "epoch": 0.3495140128651988, + "epoch": 0.4825886219578997, "grad_norm": 0.0, - "learning_rate": 1.5106915360170917e-05, - "loss": 1.0645, + "learning_rate": 1.1047973211169411e-05, + "loss": 1.0121, "step": 12334 }, { - "epoch": 0.3495423503074612, + "epoch": 0.48262774865012914, "grad_norm": 0.0, - "learning_rate": 1.5106126256483472e-05, - "loss": 0.8444, + "learning_rate": 1.104671294952541e-05, + "loss": 1.058, "step": 12335 }, { - "epoch": 0.3495706877497237, + "epoch": 0.4826668753423586, "grad_norm": 0.0, - "learning_rate": 1.5105337109785228e-05, - "loss": 0.8278, + "learning_rate": 1.104545267107251e-05, + "loss": 0.9872, "step": 12336 }, { - "epoch": 0.34959902519198616, + "epoch": 0.482706002034588, "grad_norm": 0.0, - "learning_rate": 1.5104547920082833e-05, - "loss": 0.8845, + "learning_rate": 1.1044192375830946e-05, + "loss": 1.0864, "step": 12337 }, { - "epoch": 0.34962736263424865, + "epoch": 0.4827451287268174, "grad_norm": 0.0, - "learning_rate": 1.5103758687382937e-05, - "loss": 0.9502, + "learning_rate": 1.1042932063820966e-05, + "loss": 1.1233, "step": 12338 }, { - "epoch": 0.3496557000765111, + "epoch": 0.48278425541904685, "grad_norm": 0.0, - "learning_rate": 1.5102969411692186e-05, - "loss": 0.9437, + "learning_rate": 1.10416717350628e-05, + "loss": 1.1473, "step": 12339 }, { - "epoch": 0.3496840375187736, + "epoch": 0.4828233821112763, "grad_norm": 0.0, - "learning_rate": 1.5102180093017231e-05, - "loss": 0.8087, + "learning_rate": 1.1040411389576692e-05, + "loss": 1.034, "step": 12340 }, { - "epoch": 0.349712374961036, + "epoch": 0.48286250880350573, "grad_norm": 0.0, - "learning_rate": 1.5101390731364715e-05, - "loss": 0.9298, + "learning_rate": 1.1039151027382881e-05, + "loss": 1.0639, "step": 12341 }, { - "epoch": 0.34974071240329846, + "epoch": 0.48290163549573517, "grad_norm": 0.0, - "learning_rate": 1.5100601326741291e-05, - "loss": 1.0818, + "learning_rate": 1.1037890648501608e-05, + "loss": 1.0572, "step": 12342 }, { - "epoch": 0.34976904984556095, + "epoch": 0.4829407621879646, "grad_norm": 0.0, - "learning_rate": 1.5099811879153605e-05, - "loss": 0.8072, + "learning_rate": 1.103663025295311e-05, + "loss": 1.1467, "step": 12343 }, { - "epoch": 0.3497973872878234, + "epoch": 0.48297988888019405, "grad_norm": 0.0, - "learning_rate": 1.5099022388608315e-05, - "loss": 0.9486, + "learning_rate": 1.103536984075763e-05, + "loss": 1.1277, "step": 12344 }, { - "epoch": 0.3498257247300859, + "epoch": 0.4830190155724235, "grad_norm": 0.0, - "learning_rate": 1.5098232855112068e-05, - "loss": 0.9829, + "learning_rate": 1.1034109411935407e-05, + "loss": 1.1335, "step": 12345 }, { - "epoch": 0.3498540621723483, + "epoch": 0.48305814226465293, "grad_norm": 0.0, - "learning_rate": 1.5097443278671505e-05, - "loss": 1.0021, + "learning_rate": 1.1032848966506684e-05, + "loss": 0.9642, "step": 12346 }, { - "epoch": 0.34988239961461076, + "epoch": 0.4830972689568824, "grad_norm": 0.0, - "learning_rate": 1.5096653659293287e-05, - "loss": 1.0815, + "learning_rate": 1.1031588504491702e-05, + "loss": 1.0609, "step": 12347 }, { - "epoch": 0.34991073705687326, + "epoch": 0.4831363956491118, "grad_norm": 0.0, - "learning_rate": 1.5095863996984065e-05, - "loss": 1.02, + "learning_rate": 1.1030328025910699e-05, + "loss": 1.0224, "step": 12348 }, { - "epoch": 0.3499390744991357, + "epoch": 0.48317552234134126, "grad_norm": 0.0, - "learning_rate": 1.5095074291750486e-05, - "loss": 0.9813, + "learning_rate": 1.1029067530783919e-05, + "loss": 0.9938, "step": 12349 }, { - "epoch": 0.3499674119413982, + "epoch": 0.4832146490335707, "grad_norm": 0.0, - "learning_rate": 1.5094284543599205e-05, - "loss": 1.0482, + "learning_rate": 1.1027807019131605e-05, + "loss": 0.9536, "step": 12350 }, { - "epoch": 0.3499957493836606, + "epoch": 0.48325377572580014, "grad_norm": 0.0, - "learning_rate": 1.5093494752536875e-05, - "loss": 1.0147, + "learning_rate": 1.1026546490973997e-05, + "loss": 1.2173, "step": 12351 }, { - "epoch": 0.3500240868259231, + "epoch": 0.4832929024180296, "grad_norm": 0.0, - "learning_rate": 1.5092704918570146e-05, - "loss": 0.8702, + "learning_rate": 1.102528594633134e-05, + "loss": 0.9964, "step": 12352 }, { - "epoch": 0.35005242426818556, + "epoch": 0.483332029110259, "grad_norm": 0.0, - "learning_rate": 1.5091915041705671e-05, - "loss": 1.0565, + "learning_rate": 1.1024025385223872e-05, + "loss": 1.118, "step": 12353 }, { - "epoch": 0.350080761710448, + "epoch": 0.48337115580248846, "grad_norm": 0.0, - "learning_rate": 1.5091125121950105e-05, - "loss": 0.8966, + "learning_rate": 1.1022764807671842e-05, + "loss": 0.9207, "step": 12354 }, { - "epoch": 0.3501090991527105, + "epoch": 0.4834102824947179, "grad_norm": 0.0, - "learning_rate": 1.5090335159310105e-05, - "loss": 1.0127, + "learning_rate": 1.1021504213695493e-05, + "loss": 1.0642, "step": 12355 }, { - "epoch": 0.35013743659497293, + "epoch": 0.48344940918694734, "grad_norm": 0.0, - "learning_rate": 1.5089545153792318e-05, - "loss": 0.9753, + "learning_rate": 1.1020243603315066e-05, + "loss": 1.1888, "step": 12356 }, { - "epoch": 0.3501657740372354, + "epoch": 0.4834885358791768, "grad_norm": 0.0, - "learning_rate": 1.5088755105403405e-05, - "loss": 0.9417, + "learning_rate": 1.10189829765508e-05, + "loss": 1.1865, "step": 12357 }, { - "epoch": 0.35019411147949786, + "epoch": 0.4835276625714062, "grad_norm": 0.0, - "learning_rate": 1.5087965014150015e-05, - "loss": 0.9244, + "learning_rate": 1.101772233342295e-05, + "loss": 1.0617, "step": 12358 }, { - "epoch": 0.3502224489217603, + "epoch": 0.48356678926363567, "grad_norm": 0.0, - "learning_rate": 1.5087174880038808e-05, - "loss": 0.9622, + "learning_rate": 1.1016461673951751e-05, + "loss": 0.9912, "step": 12359 }, { - "epoch": 0.3502507863640228, + "epoch": 0.4836059159558651, "grad_norm": 0.0, - "learning_rate": 1.508638470307644e-05, - "loss": 0.9763, + "learning_rate": 1.1015200998157453e-05, + "loss": 0.9865, "step": 12360 }, { - "epoch": 0.35027912380628523, + "epoch": 0.48364504264809455, "grad_norm": 0.0, - "learning_rate": 1.5085594483269561e-05, - "loss": 0.8886, + "learning_rate": 1.1013940306060298e-05, + "loss": 1.0085, "step": 12361 }, { - "epoch": 0.3503074612485477, + "epoch": 0.483684169340324, "grad_norm": 0.0, - "learning_rate": 1.5084804220624833e-05, - "loss": 0.847, + "learning_rate": 1.1012679597680533e-05, + "loss": 1.0353, "step": 12362 }, { - "epoch": 0.35033579869081016, + "epoch": 0.48372329603255343, "grad_norm": 0.0, - "learning_rate": 1.508401391514891e-05, - "loss": 1.1011, + "learning_rate": 1.1011418873038404e-05, + "loss": 1.1057, "step": 12363 }, { - "epoch": 0.3503641361330726, + "epoch": 0.48376242272478287, "grad_norm": 0.0, - "learning_rate": 1.5083223566848451e-05, - "loss": 0.8887, + "learning_rate": 1.1010158132154153e-05, + "loss": 1.0336, "step": 12364 }, { - "epoch": 0.3503924735753351, + "epoch": 0.4838015494170123, "grad_norm": 0.0, - "learning_rate": 1.5082433175730114e-05, - "loss": 0.8761, + "learning_rate": 1.1008897375048031e-05, + "loss": 1.0436, "step": 12365 }, { - "epoch": 0.35042081101759753, + "epoch": 0.48384067610924175, "grad_norm": 0.0, - "learning_rate": 1.508164274180055e-05, - "loss": 0.8421, + "learning_rate": 1.100763660174028e-05, + "loss": 0.9689, "step": 12366 }, { - "epoch": 0.35044914845986, + "epoch": 0.48387980280147114, "grad_norm": 0.0, - "learning_rate": 1.5080852265066425e-05, - "loss": 1.0662, + "learning_rate": 1.1006375812251144e-05, + "loss": 1.1183, "step": 12367 }, { - "epoch": 0.35047748590212247, + "epoch": 0.4839189294937006, "grad_norm": 0.0, - "learning_rate": 1.5080061745534398e-05, - "loss": 0.9888, + "learning_rate": 1.1005115006600879e-05, + "loss": 1.1691, "step": 12368 }, { - "epoch": 0.35050582334438496, + "epoch": 0.48395805618593, "grad_norm": 0.0, - "learning_rate": 1.5079271183211118e-05, - "loss": 0.9023, + "learning_rate": 1.1003854184809725e-05, + "loss": 1.1825, "step": 12369 }, { - "epoch": 0.3505341607866474, + "epoch": 0.48399718287815946, "grad_norm": 0.0, - "learning_rate": 1.5078480578103256e-05, - "loss": 0.9145, + "learning_rate": 1.100259334689793e-05, + "loss": 0.9419, "step": 12370 }, { - "epoch": 0.35056249822890984, + "epoch": 0.4840363095703889, "grad_norm": 0.0, - "learning_rate": 1.5077689930217462e-05, - "loss": 1.0698, + "learning_rate": 1.1001332492885741e-05, + "loss": 1.0982, "step": 12371 }, { - "epoch": 0.35059083567117233, + "epoch": 0.48407543626261834, "grad_norm": 0.0, - "learning_rate": 1.5076899239560403e-05, - "loss": 0.9891, + "learning_rate": 1.1000071622793406e-05, + "loss": 1.0175, "step": 12372 }, { - "epoch": 0.35061917311343477, + "epoch": 0.4841145629548478, "grad_norm": 0.0, - "learning_rate": 1.5076108506138739e-05, - "loss": 0.9321, + "learning_rate": 1.0998810736641179e-05, + "loss": 1.0251, "step": 12373 }, { - "epoch": 0.35064751055569726, + "epoch": 0.4841536896470772, "grad_norm": 0.0, - "learning_rate": 1.507531772995912e-05, - "loss": 0.9616, + "learning_rate": 1.0997549834449297e-05, + "loss": 1.0525, "step": 12374 }, { - "epoch": 0.3506758479979597, + "epoch": 0.48419281633930666, "grad_norm": 0.0, - "learning_rate": 1.5074526911028222e-05, - "loss": 0.9362, + "learning_rate": 1.099628891623802e-05, + "loss": 0.9992, "step": 12375 }, { - "epoch": 0.35070418544022214, + "epoch": 0.4842319430315361, "grad_norm": 0.0, - "learning_rate": 1.5073736049352696e-05, - "loss": 0.8561, + "learning_rate": 1.0995027982027588e-05, + "loss": 1.1165, "step": 12376 }, { - "epoch": 0.35073252288248463, + "epoch": 0.48427106972376555, "grad_norm": 0.0, - "learning_rate": 1.5072945144939208e-05, - "loss": 0.9666, + "learning_rate": 1.0993767031838253e-05, + "loss": 1.0962, "step": 12377 }, { - "epoch": 0.35076086032474707, + "epoch": 0.484310196415995, "grad_norm": 0.0, - "learning_rate": 1.5072154197794421e-05, - "loss": 0.9846, + "learning_rate": 1.099250606569027e-05, + "loss": 1.0291, "step": 12378 }, { - "epoch": 0.35078919776700956, + "epoch": 0.4843493231082244, "grad_norm": 0.0, - "learning_rate": 1.5071363207924994e-05, - "loss": 0.8839, + "learning_rate": 1.0991245083603877e-05, + "loss": 1.0457, "step": 12379 }, { - "epoch": 0.350817535209272, + "epoch": 0.48438844980045387, "grad_norm": 0.0, - "learning_rate": 1.5070572175337591e-05, - "loss": 1.0108, + "learning_rate": 1.0989984085599335e-05, + "loss": 1.1866, "step": 12380 }, { - "epoch": 0.3508458726515345, + "epoch": 0.4844275764926833, "grad_norm": 0.0, - "learning_rate": 1.5069781100038878e-05, - "loss": 0.8352, + "learning_rate": 1.0988723071696885e-05, + "loss": 1.0265, "step": 12381 }, { - "epoch": 0.35087421009379693, + "epoch": 0.48446670318491275, "grad_norm": 0.0, - "learning_rate": 1.5068989982035516e-05, - "loss": 0.9985, + "learning_rate": 1.0987462041916783e-05, + "loss": 0.9712, "step": 12382 }, { - "epoch": 0.3509025475360594, + "epoch": 0.4845058298771422, "grad_norm": 0.0, - "learning_rate": 1.5068198821334166e-05, - "loss": 0.8884, + "learning_rate": 1.0986200996279277e-05, + "loss": 1.0492, "step": 12383 }, { - "epoch": 0.35093088497832187, + "epoch": 0.48454495656937163, "grad_norm": 0.0, - "learning_rate": 1.5067407617941499e-05, - "loss": 1.0011, + "learning_rate": 1.0984939934804621e-05, + "loss": 0.9456, "step": 12384 }, { - "epoch": 0.3509592224205843, + "epoch": 0.4845840832616011, "grad_norm": 0.0, - "learning_rate": 1.5066616371864174e-05, - "loss": 0.9294, + "learning_rate": 1.0983678857513063e-05, + "loss": 1.0252, "step": 12385 }, { - "epoch": 0.3509875598628468, + "epoch": 0.4846232099538305, "grad_norm": 0.0, - "learning_rate": 1.5065825083108858e-05, - "loss": 0.9888, + "learning_rate": 1.0982417764424853e-05, + "loss": 1.0978, "step": 12386 }, { - "epoch": 0.35101589730510924, + "epoch": 0.48466233664605995, "grad_norm": 0.0, - "learning_rate": 1.5065033751682214e-05, - "loss": 0.8829, + "learning_rate": 1.0981156655560247e-05, + "loss": 1.1857, "step": 12387 }, { - "epoch": 0.3510442347473717, + "epoch": 0.4847014633382894, "grad_norm": 0.0, - "learning_rate": 1.5064242377590912e-05, - "loss": 0.9215, + "learning_rate": 1.0979895530939495e-05, + "loss": 1.0187, "step": 12388 }, { - "epoch": 0.35107257218963417, + "epoch": 0.48474059003051884, "grad_norm": 0.0, - "learning_rate": 1.5063450960841616e-05, - "loss": 0.9186, + "learning_rate": 1.0978634390582847e-05, + "loss": 0.9872, "step": 12389 }, { - "epoch": 0.3511009096318966, + "epoch": 0.4847797167227483, "grad_norm": 0.0, - "learning_rate": 1.5062659501440994e-05, - "loss": 1.0303, + "learning_rate": 1.0977373234510557e-05, + "loss": 0.964, "step": 12390 }, { - "epoch": 0.3511292470741591, + "epoch": 0.4848188434149777, "grad_norm": 0.0, - "learning_rate": 1.5061867999395708e-05, - "loss": 1.0123, + "learning_rate": 1.097611206274288e-05, + "loss": 1.0268, "step": 12391 }, { - "epoch": 0.35115758451642154, + "epoch": 0.48485797010720716, "grad_norm": 0.0, - "learning_rate": 1.506107645471243e-05, - "loss": 0.9511, + "learning_rate": 1.0974850875300063e-05, + "loss": 1.0454, "step": 12392 }, { - "epoch": 0.35118592195868403, + "epoch": 0.4848970967994366, "grad_norm": 0.0, - "learning_rate": 1.5060284867397825e-05, - "loss": 0.9171, + "learning_rate": 1.0973589672202365e-05, + "loss": 1.0661, "step": 12393 }, { - "epoch": 0.35121425940094647, + "epoch": 0.48493622349166604, "grad_norm": 0.0, - "learning_rate": 1.5059493237458563e-05, - "loss": 0.9566, + "learning_rate": 1.0972328453470039e-05, + "loss": 1.1208, "step": 12394 }, { - "epoch": 0.3512425968432089, + "epoch": 0.4849753501838954, "grad_norm": 0.0, - "learning_rate": 1.5058701564901309e-05, - "loss": 0.861, + "learning_rate": 1.0971067219123331e-05, + "loss": 1.1219, "step": 12395 }, { - "epoch": 0.3512709342854714, + "epoch": 0.48501447687612487, "grad_norm": 0.0, - "learning_rate": 1.505790984973273e-05, - "loss": 0.8816, + "learning_rate": 1.0969805969182503e-05, + "loss": 1.0468, "step": 12396 }, { - "epoch": 0.35129927172773384, + "epoch": 0.4850536035683543, "grad_norm": 0.0, - "learning_rate": 1.5057118091959504e-05, - "loss": 0.9, + "learning_rate": 1.0968544703667805e-05, + "loss": 1.1065, "step": 12397 }, { - "epoch": 0.35132760916999634, + "epoch": 0.48509273026058375, "grad_norm": 0.0, - "learning_rate": 1.5056326291588293e-05, - "loss": 0.9547, + "learning_rate": 1.0967283422599495e-05, + "loss": 1.0222, "step": 12398 }, { - "epoch": 0.3513559466122588, + "epoch": 0.4851318569528132, "grad_norm": 0.0, - "learning_rate": 1.5055534448625766e-05, - "loss": 1.02, + "learning_rate": 1.0966022125997825e-05, + "loss": 1.0372, "step": 12399 }, { - "epoch": 0.3513842840545212, + "epoch": 0.48517098364504263, "grad_norm": 0.0, - "learning_rate": 1.5054742563078594e-05, - "loss": 0.9039, + "learning_rate": 1.0964760813883048e-05, + "loss": 1.0981, "step": 12400 }, { - "epoch": 0.3514126214967837, + "epoch": 0.48521011033727207, "grad_norm": 0.0, - "learning_rate": 1.5053950634953451e-05, - "loss": 0.9871, + "learning_rate": 1.0963499486275421e-05, + "loss": 1.1418, "step": 12401 }, { - "epoch": 0.35144095893904614, + "epoch": 0.4852492370295015, "grad_norm": 0.0, - "learning_rate": 1.5053158664257005e-05, - "loss": 0.8749, + "learning_rate": 1.0962238143195203e-05, + "loss": 1.0432, "step": 12402 }, { - "epoch": 0.35146929638130864, + "epoch": 0.48528836372173095, "grad_norm": 0.0, - "learning_rate": 1.5052366650995927e-05, - "loss": 0.8681, + "learning_rate": 1.0960976784662642e-05, + "loss": 1.0609, "step": 12403 }, { - "epoch": 0.3514976338235711, + "epoch": 0.4853274904139604, "grad_norm": 0.0, - "learning_rate": 1.5051574595176886e-05, - "loss": 0.9161, + "learning_rate": 1.0959715410698003e-05, + "loss": 1.0766, "step": 12404 }, { - "epoch": 0.35152597126583357, + "epoch": 0.48536661710618983, "grad_norm": 0.0, - "learning_rate": 1.505078249680656e-05, - "loss": 1.0215, + "learning_rate": 1.0958454021321536e-05, + "loss": 0.935, "step": 12405 }, { - "epoch": 0.351554308708096, + "epoch": 0.4854057437984193, "grad_norm": 0.0, - "learning_rate": 1.5049990355891617e-05, - "loss": 1.032, + "learning_rate": 1.0957192616553494e-05, + "loss": 1.0788, "step": 12406 }, { - "epoch": 0.35158264615035845, + "epoch": 0.4854448704906487, "grad_norm": 0.0, - "learning_rate": 1.5049198172438728e-05, - "loss": 0.888, + "learning_rate": 1.0955931196414143e-05, + "loss": 1.0982, "step": 12407 }, { - "epoch": 0.35161098359262094, + "epoch": 0.48548399718287816, "grad_norm": 0.0, - "learning_rate": 1.5048405946454568e-05, - "loss": 0.8949, + "learning_rate": 1.0954669760923733e-05, + "loss": 0.998, "step": 12408 }, { - "epoch": 0.3516393210348834, + "epoch": 0.4855231238751076, "grad_norm": 0.0, - "learning_rate": 1.504761367794581e-05, - "loss": 1.1101, + "learning_rate": 1.0953408310102522e-05, + "loss": 1.0328, "step": 12409 }, { - "epoch": 0.3516676584771459, + "epoch": 0.48556225056733704, "grad_norm": 0.0, - "learning_rate": 1.5046821366919128e-05, - "loss": 0.9642, + "learning_rate": 1.095214684397077e-05, + "loss": 1.1035, "step": 12410 }, { - "epoch": 0.3516959959194083, + "epoch": 0.4856013772595665, "grad_norm": 0.0, - "learning_rate": 1.5046029013381197e-05, - "loss": 0.9607, + "learning_rate": 1.095088536254873e-05, + "loss": 1.0686, "step": 12411 }, { - "epoch": 0.35172433336167075, + "epoch": 0.4856405039517959, "grad_norm": 0.0, - "learning_rate": 1.5045236617338688e-05, - "loss": 0.925, + "learning_rate": 1.0949623865856664e-05, + "loss": 1.0193, "step": 12412 }, { - "epoch": 0.35175267080393324, + "epoch": 0.48567963064402536, "grad_norm": 0.0, - "learning_rate": 1.5044444178798279e-05, - "loss": 0.9709, + "learning_rate": 1.0948362353914832e-05, + "loss": 1.0833, "step": 12413 }, { - "epoch": 0.3517810082461957, + "epoch": 0.4857187573362548, "grad_norm": 0.0, - "learning_rate": 1.5043651697766642e-05, - "loss": 1.0226, + "learning_rate": 1.0947100826743487e-05, + "loss": 0.9197, "step": 12414 }, { - "epoch": 0.3518093456884582, + "epoch": 0.48575788402848424, "grad_norm": 0.0, - "learning_rate": 1.5042859174250453e-05, - "loss": 0.7856, + "learning_rate": 1.0945839284362885e-05, + "loss": 1.0703, "step": 12415 }, { - "epoch": 0.3518376831307206, + "epoch": 0.4857970107207137, "grad_norm": 0.0, - "learning_rate": 1.5042066608256389e-05, - "loss": 0.9748, + "learning_rate": 1.0944577726793296e-05, + "loss": 0.9691, "step": 12416 }, { - "epoch": 0.3518660205729831, + "epoch": 0.4858361374129431, "grad_norm": 0.0, - "learning_rate": 1.5041273999791128e-05, - "loss": 0.8345, + "learning_rate": 1.0943316154054966e-05, + "loss": 1.001, "step": 12417 }, { - "epoch": 0.35189435801524555, + "epoch": 0.48587526410517257, "grad_norm": 0.0, - "learning_rate": 1.5040481348861345e-05, - "loss": 0.7592, + "learning_rate": 1.0942054566168166e-05, + "loss": 1.077, "step": 12418 }, { - "epoch": 0.351922695457508, + "epoch": 0.485914390797402, "grad_norm": 0.0, - "learning_rate": 1.5039688655473712e-05, - "loss": 0.8776, + "learning_rate": 1.0940792963153146e-05, + "loss": 0.974, "step": 12419 }, { - "epoch": 0.3519510328997705, + "epoch": 0.48595351748963145, "grad_norm": 0.0, - "learning_rate": 1.5038895919634913e-05, - "loss": 1.0882, + "learning_rate": 1.0939531345030173e-05, + "loss": 1.0704, "step": 12420 }, { - "epoch": 0.3519793703420329, + "epoch": 0.4859926441818609, "grad_norm": 0.0, - "learning_rate": 1.5038103141351617e-05, - "loss": 0.9857, + "learning_rate": 1.0938269711819501e-05, + "loss": 1.0197, "step": 12421 }, { - "epoch": 0.3520077077842954, + "epoch": 0.48603177087409033, "grad_norm": 0.0, - "learning_rate": 1.5037310320630512e-05, - "loss": 0.9814, + "learning_rate": 1.0937008063541395e-05, + "loss": 1.0748, "step": 12422 }, { - "epoch": 0.35203604522655785, + "epoch": 0.48607089756631977, "grad_norm": 0.0, - "learning_rate": 1.5036517457478272e-05, - "loss": 0.8741, + "learning_rate": 1.0935746400216114e-05, + "loss": 0.9746, "step": 12423 }, { - "epoch": 0.3520643826688203, + "epoch": 0.48611002425854916, "grad_norm": 0.0, - "learning_rate": 1.5035724551901576e-05, - "loss": 1.0101, + "learning_rate": 1.0934484721863917e-05, + "loss": 1.1205, "step": 12424 }, { - "epoch": 0.3520927201110828, + "epoch": 0.4861491509507786, "grad_norm": 0.0, - "learning_rate": 1.5034931603907099e-05, - "loss": 0.9776, + "learning_rate": 1.0933223028505066e-05, + "loss": 1.1014, "step": 12425 }, { - "epoch": 0.3521210575533452, + "epoch": 0.48618827764300804, "grad_norm": 0.0, - "learning_rate": 1.5034138613501525e-05, - "loss": 1.0027, + "learning_rate": 1.0931961320159822e-05, + "loss": 1.0159, "step": 12426 }, { - "epoch": 0.3521493949956077, + "epoch": 0.4862274043352375, "grad_norm": 0.0, - "learning_rate": 1.503334558069153e-05, - "loss": 0.8293, + "learning_rate": 1.093069959684845e-05, + "loss": 1.0637, "step": 12427 }, { - "epoch": 0.35217773243787015, + "epoch": 0.4862665310274669, "grad_norm": 0.0, - "learning_rate": 1.5032552505483799e-05, - "loss": 0.9215, + "learning_rate": 1.0929437858591207e-05, + "loss": 1.1098, "step": 12428 }, { - "epoch": 0.35220606988013264, + "epoch": 0.48630565771969636, "grad_norm": 0.0, - "learning_rate": 1.5031759387885008e-05, - "loss": 0.9524, + "learning_rate": 1.0928176105408359e-05, + "loss": 1.1262, "step": 12429 }, { - "epoch": 0.3522344073223951, + "epoch": 0.4863447844119258, "grad_norm": 0.0, - "learning_rate": 1.5030966227901842e-05, - "loss": 0.9957, + "learning_rate": 1.0926914337320162e-05, + "loss": 1.1241, "step": 12430 }, { - "epoch": 0.3522627447646575, + "epoch": 0.48638391110415524, "grad_norm": 0.0, - "learning_rate": 1.5030173025540977e-05, - "loss": 1.0103, + "learning_rate": 1.0925652554346884e-05, + "loss": 1.0333, "step": 12431 }, { - "epoch": 0.35229108220692, + "epoch": 0.4864230377963847, "grad_norm": 0.0, - "learning_rate": 1.5029379780809094e-05, - "loss": 0.9588, + "learning_rate": 1.0924390756508785e-05, + "loss": 0.9966, "step": 12432 }, { - "epoch": 0.35231941964918245, + "epoch": 0.4864621644886141, "grad_norm": 0.0, - "learning_rate": 1.5028586493712883e-05, - "loss": 0.9471, + "learning_rate": 1.0923128943826132e-05, + "loss": 1.0479, "step": 12433 }, { - "epoch": 0.35234775709144495, + "epoch": 0.48650129118084356, "grad_norm": 0.0, - "learning_rate": 1.5027793164259014e-05, - "loss": 0.8927, + "learning_rate": 1.0921867116319183e-05, + "loss": 1.077, "step": 12434 }, { - "epoch": 0.3523760945337074, + "epoch": 0.486540417873073, "grad_norm": 0.0, - "learning_rate": 1.502699979245418e-05, - "loss": 0.9183, + "learning_rate": 1.09206052740082e-05, + "loss": 1.0723, "step": 12435 }, { - "epoch": 0.3524044319759698, + "epoch": 0.48657954456530245, "grad_norm": 0.0, - "learning_rate": 1.5026206378305062e-05, - "loss": 1.0034, + "learning_rate": 1.0919343416913455e-05, + "loss": 1.1193, "step": 12436 }, { - "epoch": 0.3524327694182323, + "epoch": 0.4866186712575319, "grad_norm": 0.0, - "learning_rate": 1.5025412921818338e-05, - "loss": 0.9377, + "learning_rate": 1.0918081545055203e-05, + "loss": 1.1361, "step": 12437 }, { - "epoch": 0.35246110686049476, + "epoch": 0.48665779794976133, "grad_norm": 0.0, - "learning_rate": 1.5024619423000695e-05, - "loss": 1.0078, + "learning_rate": 1.0916819658453715e-05, + "loss": 1.1125, "step": 12438 }, { - "epoch": 0.35248944430275725, + "epoch": 0.48669692464199077, "grad_norm": 0.0, - "learning_rate": 1.502382588185882e-05, - "loss": 1.0745, + "learning_rate": 1.0915557757129246e-05, + "loss": 1.1052, "step": 12439 }, { - "epoch": 0.3525177817450197, + "epoch": 0.4867360513342202, "grad_norm": 0.0, - "learning_rate": 1.5023032298399391e-05, - "loss": 0.9322, + "learning_rate": 1.0914295841102075e-05, + "loss": 1.0208, "step": 12440 }, { - "epoch": 0.3525461191872822, + "epoch": 0.48677517802644965, "grad_norm": 0.0, - "learning_rate": 1.5022238672629094e-05, - "loss": 1.0326, + "learning_rate": 1.0913033910392452e-05, + "loss": 1.0955, "step": 12441 }, { - "epoch": 0.3525744566295446, + "epoch": 0.4868143047186791, "grad_norm": 0.0, - "learning_rate": 1.502144500455462e-05, - "loss": 0.8935, + "learning_rate": 1.0911771965020652e-05, + "loss": 0.9963, "step": 12442 }, { - "epoch": 0.35260279407180706, + "epoch": 0.48685343141090853, "grad_norm": 0.0, - "learning_rate": 1.5020651294182646e-05, - "loss": 0.919, + "learning_rate": 1.0910510005006938e-05, + "loss": 1.0346, "step": 12443 }, { - "epoch": 0.35263113151406955, + "epoch": 0.486892558103138, "grad_norm": 0.0, - "learning_rate": 1.5019857541519866e-05, - "loss": 0.9265, + "learning_rate": 1.0909248030371571e-05, + "loss": 1.0563, "step": 12444 }, { - "epoch": 0.352659468956332, + "epoch": 0.4869316847953674, "grad_norm": 0.0, - "learning_rate": 1.501906374657296e-05, - "loss": 0.9425, + "learning_rate": 1.0907986041134821e-05, + "loss": 1.06, "step": 12445 }, { - "epoch": 0.3526878063985945, + "epoch": 0.48697081148759686, "grad_norm": 0.0, - "learning_rate": 1.5018269909348617e-05, - "loss": 0.9238, + "learning_rate": 1.0906724037316952e-05, + "loss": 1.0919, "step": 12446 }, { - "epoch": 0.3527161438408569, + "epoch": 0.4870099381798263, "grad_norm": 0.0, - "learning_rate": 1.501747602985352e-05, - "loss": 0.9808, + "learning_rate": 1.0905462018938234e-05, + "loss": 1.1177, "step": 12447 }, { - "epoch": 0.35274448128311936, + "epoch": 0.48704906487205574, "grad_norm": 0.0, - "learning_rate": 1.5016682108094362e-05, - "loss": 0.8227, + "learning_rate": 1.0904199986018926e-05, + "loss": 1.0178, "step": 12448 }, { - "epoch": 0.35277281872538185, + "epoch": 0.4870881915642852, "grad_norm": 0.0, - "learning_rate": 1.5015888144077826e-05, - "loss": 0.9374, + "learning_rate": 1.0902937938579304e-05, + "loss": 0.9907, "step": 12449 }, { - "epoch": 0.3528011561676443, + "epoch": 0.4871273182565146, "grad_norm": 0.0, - "learning_rate": 1.5015094137810602e-05, - "loss": 0.9331, + "learning_rate": 1.0901675876639628e-05, + "loss": 1.0357, "step": 12450 }, { - "epoch": 0.3528294936099068, + "epoch": 0.48716644494874406, "grad_norm": 0.0, - "learning_rate": 1.5014300089299381e-05, - "loss": 0.9594, + "learning_rate": 1.0900413800220166e-05, + "loss": 1.0623, "step": 12451 }, { - "epoch": 0.3528578310521692, + "epoch": 0.48720557164097344, "grad_norm": 0.0, - "learning_rate": 1.5013505998550846e-05, - "loss": 0.9547, + "learning_rate": 1.0899151709341189e-05, + "loss": 1.1957, "step": 12452 }, { - "epoch": 0.3528861684944317, + "epoch": 0.4872446983332029, "grad_norm": 0.0, - "learning_rate": 1.5012711865571686e-05, - "loss": 0.9311, + "learning_rate": 1.0897889604022962e-05, + "loss": 1.1233, "step": 12453 }, { - "epoch": 0.35291450593669416, + "epoch": 0.4872838250254323, "grad_norm": 0.0, - "learning_rate": 1.5011917690368594e-05, - "loss": 0.8595, + "learning_rate": 1.0896627484285752e-05, + "loss": 1.0987, "step": 12454 }, { - "epoch": 0.3529428433789566, + "epoch": 0.48732295171766177, "grad_norm": 0.0, - "learning_rate": 1.501112347294826e-05, - "loss": 0.9329, + "learning_rate": 1.0895365350149829e-05, + "loss": 1.0948, "step": 12455 }, { - "epoch": 0.3529711808212191, + "epoch": 0.4873620784098912, "grad_norm": 0.0, - "learning_rate": 1.5010329213317372e-05, - "loss": 0.8366, + "learning_rate": 1.0894103201635459e-05, + "loss": 1.1071, "step": 12456 }, { - "epoch": 0.3529995182634815, + "epoch": 0.48740120510212065, "grad_norm": 0.0, - "learning_rate": 1.5009534911482617e-05, - "loss": 0.8923, + "learning_rate": 1.0892841038762913e-05, + "loss": 0.9587, "step": 12457 }, { - "epoch": 0.353027855705744, + "epoch": 0.4874403317943501, "grad_norm": 0.0, - "learning_rate": 1.5008740567450692e-05, - "loss": 0.9283, + "learning_rate": 1.0891578861552461e-05, + "loss": 1.057, "step": 12458 }, { - "epoch": 0.35305619314800646, + "epoch": 0.48747945848657953, "grad_norm": 0.0, - "learning_rate": 1.5007946181228286e-05, - "loss": 0.8659, + "learning_rate": 1.0890316670024366e-05, + "loss": 0.9791, "step": 12459 }, { - "epoch": 0.3530845305902689, + "epoch": 0.48751858517880897, "grad_norm": 0.0, - "learning_rate": 1.5007151752822087e-05, - "loss": 0.8654, + "learning_rate": 1.0889054464198903e-05, + "loss": 1.142, "step": 12460 }, { - "epoch": 0.3531128680325314, + "epoch": 0.4875577118710384, "grad_norm": 0.0, - "learning_rate": 1.5006357282238791e-05, - "loss": 0.9827, + "learning_rate": 1.0887792244096336e-05, + "loss": 1.0201, "step": 12461 }, { - "epoch": 0.35314120547479383, + "epoch": 0.48759683856326785, "grad_norm": 0.0, - "learning_rate": 1.5005562769485087e-05, - "loss": 0.8305, + "learning_rate": 1.0886530009736942e-05, + "loss": 0.9074, "step": 12462 }, { - "epoch": 0.3531695429170563, + "epoch": 0.4876359652554973, "grad_norm": 0.0, - "learning_rate": 1.500476821456767e-05, - "loss": 0.9654, + "learning_rate": 1.0885267761140988e-05, + "loss": 1.0925, "step": 12463 }, { - "epoch": 0.35319788035931876, + "epoch": 0.48767509194772674, "grad_norm": 0.0, - "learning_rate": 1.5003973617493234e-05, - "loss": 0.9541, + "learning_rate": 1.0884005498328737e-05, + "loss": 0.9878, "step": 12464 }, { - "epoch": 0.35322621780158125, + "epoch": 0.4877142186399562, "grad_norm": 0.0, - "learning_rate": 1.5003178978268468e-05, - "loss": 1.0391, + "learning_rate": 1.088274322132047e-05, + "loss": 0.946, "step": 12465 }, { - "epoch": 0.3532545552438437, + "epoch": 0.4877533453321856, "grad_norm": 0.0, - "learning_rate": 1.5002384296900068e-05, - "loss": 0.8593, + "learning_rate": 1.0881480930136452e-05, + "loss": 0.9931, "step": 12466 }, { - "epoch": 0.35328289268610613, + "epoch": 0.48779247202441506, "grad_norm": 0.0, - "learning_rate": 1.5001589573394726e-05, - "loss": 0.9245, + "learning_rate": 1.0880218624796954e-05, + "loss": 1.0647, "step": 12467 }, { - "epoch": 0.3533112301283686, + "epoch": 0.4878315987166445, "grad_norm": 0.0, - "learning_rate": 1.500079480775914e-05, - "loss": 0.9515, + "learning_rate": 1.0878956305322248e-05, + "loss": 1.056, "step": 12468 }, { - "epoch": 0.35333956757063106, + "epoch": 0.48787072540887394, "grad_norm": 0.0, - "learning_rate": 1.5000000000000002e-05, - "loss": 0.9205, + "learning_rate": 1.0877693971732608e-05, + "loss": 1.0277, "step": 12469 }, { - "epoch": 0.35336790501289356, + "epoch": 0.4879098521011034, "grad_norm": 0.0, - "learning_rate": 1.4999205150124005e-05, - "loss": 0.9998, + "learning_rate": 1.0876431624048298e-05, + "loss": 1.0475, "step": 12470 }, { - "epoch": 0.353396242455156, + "epoch": 0.4879489787933328, "grad_norm": 0.0, - "learning_rate": 1.499841025813785e-05, - "loss": 0.9133, + "learning_rate": 1.0875169262289597e-05, + "loss": 1.1679, "step": 12471 }, { - "epoch": 0.35342457989741843, + "epoch": 0.48798810548556226, "grad_norm": 0.0, - "learning_rate": 1.4997615324048229e-05, - "loss": 0.8782, + "learning_rate": 1.0873906886476777e-05, + "loss": 0.8572, "step": 12472 }, { - "epoch": 0.3534529173396809, + "epoch": 0.4880272321777917, "grad_norm": 0.0, - "learning_rate": 1.4996820347861834e-05, - "loss": 1.0087, + "learning_rate": 1.0872644496630103e-05, + "loss": 1.0372, "step": 12473 }, { - "epoch": 0.35348125478194337, + "epoch": 0.48806635887002114, "grad_norm": 0.0, - "learning_rate": 1.4996025329585368e-05, - "loss": 0.9994, + "learning_rate": 1.0871382092769853e-05, + "loss": 1.1256, "step": 12474 }, { - "epoch": 0.35350959222420586, + "epoch": 0.4881054855622506, "grad_norm": 0.0, - "learning_rate": 1.4995230269225526e-05, - "loss": 0.8576, + "learning_rate": 1.0870119674916298e-05, + "loss": 1.0976, "step": 12475 }, { - "epoch": 0.3535379296664683, + "epoch": 0.48814461225448, "grad_norm": 0.0, - "learning_rate": 1.4994435166789006e-05, - "loss": 0.8486, + "learning_rate": 1.0868857243089714e-05, + "loss": 1.0829, "step": 12476 }, { - "epoch": 0.3535662671087308, + "epoch": 0.48818373894670947, "grad_norm": 0.0, - "learning_rate": 1.4993640022282504e-05, - "loss": 0.9511, + "learning_rate": 1.0867594797310368e-05, + "loss": 1.0652, "step": 12477 }, { - "epoch": 0.35359460455099323, + "epoch": 0.4882228656389389, "grad_norm": 0.0, - "learning_rate": 1.4992844835712715e-05, - "loss": 0.9058, + "learning_rate": 1.086633233759854e-05, + "loss": 1.1113, "step": 12478 }, { - "epoch": 0.35362294199325567, + "epoch": 0.48826199233116835, "grad_norm": 0.0, - "learning_rate": 1.499204960708634e-05, - "loss": 0.9693, + "learning_rate": 1.0865069863974496e-05, + "loss": 0.8906, "step": 12479 }, { - "epoch": 0.35365127943551816, + "epoch": 0.4883011190233978, "grad_norm": 0.0, - "learning_rate": 1.499125433641008e-05, - "loss": 0.8618, + "learning_rate": 1.0863807376458516e-05, + "loss": 1.0865, "step": 12480 }, { - "epoch": 0.3536796168777806, + "epoch": 0.4883402457156272, "grad_norm": 0.0, - "learning_rate": 1.4990459023690628e-05, - "loss": 0.9279, + "learning_rate": 1.0862544875070875e-05, + "loss": 0.9153, "step": 12481 }, { - "epoch": 0.3537079543200431, + "epoch": 0.4883793724078566, "grad_norm": 0.0, - "learning_rate": 1.498966366893469e-05, - "loss": 0.983, + "learning_rate": 1.0861282359831842e-05, + "loss": 0.9704, "step": 12482 }, { - "epoch": 0.35373629176230553, + "epoch": 0.48841849910008606, "grad_norm": 0.0, - "learning_rate": 1.4988868272148959e-05, - "loss": 0.8999, + "learning_rate": 1.0860019830761693e-05, + "loss": 1.086, "step": 12483 }, { - "epoch": 0.35376462920456797, + "epoch": 0.4884576257923155, "grad_norm": 0.0, - "learning_rate": 1.498807283334014e-05, - "loss": 1.0083, + "learning_rate": 1.08587572878807e-05, + "loss": 1.0405, "step": 12484 }, { - "epoch": 0.35379296664683046, + "epoch": 0.48849675248454494, "grad_norm": 0.0, - "learning_rate": 1.4987277352514933e-05, - "loss": 0.8878, + "learning_rate": 1.0857494731209148e-05, + "loss": 1.0519, "step": 12485 }, { - "epoch": 0.3538213040890929, + "epoch": 0.4885358791767744, "grad_norm": 0.0, - "learning_rate": 1.4986481829680033e-05, - "loss": 0.9597, + "learning_rate": 1.08562321607673e-05, + "loss": 1.192, "step": 12486 }, { - "epoch": 0.3538496415313554, + "epoch": 0.4885750058690038, "grad_norm": 0.0, - "learning_rate": 1.4985686264842145e-05, - "loss": 1.0198, + "learning_rate": 1.0854969576575438e-05, + "loss": 1.0288, "step": 12487 }, { - "epoch": 0.35387797897361783, + "epoch": 0.48861413256123326, "grad_norm": 0.0, - "learning_rate": 1.498489065800797e-05, - "loss": 0.9684, + "learning_rate": 1.0853706978653835e-05, + "loss": 0.9993, "step": 12488 }, { - "epoch": 0.35390631641588033, + "epoch": 0.4886532592534627, "grad_norm": 0.0, - "learning_rate": 1.4984095009184215e-05, - "loss": 0.9905, + "learning_rate": 1.085244436702277e-05, + "loss": 1.0053, "step": 12489 }, { - "epoch": 0.35393465385814277, + "epoch": 0.48869238594569214, "grad_norm": 0.0, - "learning_rate": 1.4983299318377574e-05, - "loss": 0.8382, + "learning_rate": 1.0851181741702515e-05, + "loss": 0.9828, "step": 12490 }, { - "epoch": 0.3539629913004052, + "epoch": 0.4887315126379216, "grad_norm": 0.0, - "learning_rate": 1.4982503585594748e-05, - "loss": 0.9242, + "learning_rate": 1.084991910271335e-05, + "loss": 1.0571, "step": 12491 }, { - "epoch": 0.3539913287426677, + "epoch": 0.488770639330151, "grad_norm": 0.0, - "learning_rate": 1.4981707810842448e-05, - "loss": 0.9173, + "learning_rate": 1.0848656450075545e-05, + "loss": 0.9471, "step": 12492 }, { - "epoch": 0.35401966618493014, + "epoch": 0.48880976602238047, "grad_norm": 0.0, - "learning_rate": 1.4980911994127372e-05, - "loss": 0.8521, + "learning_rate": 1.0847393783809383e-05, + "loss": 0.9637, "step": 12493 }, { - "epoch": 0.35404800362719263, + "epoch": 0.4888488927146099, "grad_norm": 0.0, - "learning_rate": 1.4980116135456228e-05, - "loss": 0.8878, + "learning_rate": 1.084613110393514e-05, + "loss": 1.0724, "step": 12494 }, { - "epoch": 0.35407634106945507, + "epoch": 0.48888801940683935, "grad_norm": 0.0, - "learning_rate": 1.4979320234835713e-05, - "loss": 1.0935, + "learning_rate": 1.084486841047309e-05, + "loss": 0.9995, "step": 12495 }, { - "epoch": 0.3541046785117175, + "epoch": 0.4889271460990688, "grad_norm": 0.0, - "learning_rate": 1.4978524292272537e-05, - "loss": 0.8258, + "learning_rate": 1.0843605703443511e-05, + "loss": 1.1909, "step": 12496 }, { - "epoch": 0.35413301595398, + "epoch": 0.48896627279129823, "grad_norm": 0.0, - "learning_rate": 1.49777283077734e-05, - "loss": 0.9269, + "learning_rate": 1.0842342982866686e-05, + "loss": 1.1973, "step": 12497 }, { - "epoch": 0.35416135339624244, + "epoch": 0.48900539948352767, "grad_norm": 0.0, - "learning_rate": 1.4976932281345009e-05, - "loss": 0.9595, + "learning_rate": 1.0841080248762883e-05, + "loss": 1.0898, "step": 12498 }, { - "epoch": 0.35418969083850493, + "epoch": 0.4890445261757571, "grad_norm": 0.0, - "learning_rate": 1.497613621299407e-05, - "loss": 0.9813, + "learning_rate": 1.0839817501152387e-05, + "loss": 1.2204, "step": 12499 }, { - "epoch": 0.35421802828076737, + "epoch": 0.48908365286798655, "grad_norm": 0.0, - "learning_rate": 1.4975340102727287e-05, - "loss": 0.9186, + "learning_rate": 1.0838554740055479e-05, + "loss": 1.02, "step": 12500 }, { - "epoch": 0.35424636572302987, + "epoch": 0.489122779560216, "grad_norm": 0.0, - "learning_rate": 1.4974543950551371e-05, - "loss": 1.0547, + "learning_rate": 1.0837291965492425e-05, + "loss": 1.1028, "step": 12501 }, { - "epoch": 0.3542747031652923, + "epoch": 0.48916190625244543, "grad_norm": 0.0, - "learning_rate": 1.497374775647302e-05, - "loss": 0.8641, + "learning_rate": 1.0836029177483516e-05, + "loss": 0.9636, "step": 12502 }, { - "epoch": 0.35430304060755474, + "epoch": 0.4892010329446749, "grad_norm": 0.0, - "learning_rate": 1.4972951520498944e-05, - "loss": 0.9005, + "learning_rate": 1.0834766376049024e-05, + "loss": 0.9967, "step": 12503 }, { - "epoch": 0.35433137804981724, + "epoch": 0.4892401596369043, "grad_norm": 0.0, - "learning_rate": 1.4972155242635853e-05, - "loss": 0.97, + "learning_rate": 1.0833503561209232e-05, + "loss": 1.0104, "step": 12504 }, { - "epoch": 0.3543597154920797, + "epoch": 0.48927928632913376, "grad_norm": 0.0, - "learning_rate": 1.497135892289045e-05, - "loss": 0.9369, + "learning_rate": 1.0832240732984415e-05, + "loss": 0.9753, "step": 12505 }, { - "epoch": 0.35438805293434217, + "epoch": 0.4893184130213632, "grad_norm": 0.0, - "learning_rate": 1.4970562561269447e-05, - "loss": 1.0097, + "learning_rate": 1.0830977891394853e-05, + "loss": 0.9696, "step": 12506 }, { - "epoch": 0.3544163903766046, + "epoch": 0.48935753971359264, "grad_norm": 0.0, - "learning_rate": 1.496976615777955e-05, - "loss": 0.9132, + "learning_rate": 1.0829715036460833e-05, + "loss": 1.1593, "step": 12507 }, { - "epoch": 0.35444472781886704, + "epoch": 0.4893966664058221, "grad_norm": 0.0, - "learning_rate": 1.4968969712427464e-05, - "loss": 0.9386, + "learning_rate": 1.0828452168202624e-05, + "loss": 1.0789, "step": 12508 }, { - "epoch": 0.35447306526112954, + "epoch": 0.48943579309805146, "grad_norm": 0.0, - "learning_rate": 1.4968173225219904e-05, - "loss": 1.0202, + "learning_rate": 1.0827189286640513e-05, + "loss": 0.9511, "step": 12509 }, { - "epoch": 0.354501402703392, + "epoch": 0.4894749197902809, "grad_norm": 0.0, - "learning_rate": 1.4967376696163575e-05, - "loss": 0.9117, + "learning_rate": 1.0825926391794782e-05, + "loss": 0.9657, "step": 12510 }, { - "epoch": 0.35452974014565447, + "epoch": 0.48951404648251035, "grad_norm": 0.0, - "learning_rate": 1.4966580125265187e-05, - "loss": 0.9609, + "learning_rate": 1.0824663483685702e-05, + "loss": 1.1569, "step": 12511 }, { - "epoch": 0.3545580775879169, + "epoch": 0.4895531731747398, "grad_norm": 0.0, - "learning_rate": 1.4965783512531449e-05, - "loss": 0.9468, + "learning_rate": 1.0823400562333563e-05, + "loss": 1.1583, "step": 12512 }, { - "epoch": 0.3545864150301794, + "epoch": 0.4895922998669692, "grad_norm": 0.0, - "learning_rate": 1.4964986857969074e-05, - "loss": 0.9552, + "learning_rate": 1.0822137627758642e-05, + "loss": 1.0711, "step": 12513 }, { - "epoch": 0.35461475247244184, + "epoch": 0.48963142655919867, "grad_norm": 0.0, - "learning_rate": 1.4964190161584773e-05, - "loss": 0.9756, + "learning_rate": 1.0820874679981223e-05, + "loss": 1.1238, "step": 12514 }, { - "epoch": 0.3546430899147043, + "epoch": 0.4896705532514281, "grad_norm": 0.0, - "learning_rate": 1.4963393423385254e-05, - "loss": 1.0095, + "learning_rate": 1.0819611719021584e-05, + "loss": 0.9992, "step": 12515 }, { - "epoch": 0.3546714273569668, + "epoch": 0.48970967994365755, "grad_norm": 0.0, - "learning_rate": 1.4962596643377225e-05, - "loss": 1.0307, + "learning_rate": 1.0818348744900007e-05, + "loss": 1.0827, "step": 12516 }, { - "epoch": 0.3546997647992292, + "epoch": 0.489748806635887, "grad_norm": 0.0, - "learning_rate": 1.4961799821567407e-05, - "loss": 0.9579, + "learning_rate": 1.0817085757636774e-05, + "loss": 1.1075, "step": 12517 }, { - "epoch": 0.3547281022414917, + "epoch": 0.48978793332811643, "grad_norm": 0.0, - "learning_rate": 1.4961002957962503e-05, - "loss": 1.05, + "learning_rate": 1.081582275725217e-05, + "loss": 1.239, "step": 12518 }, { - "epoch": 0.35475643968375414, + "epoch": 0.4898270600203459, "grad_norm": 0.0, - "learning_rate": 1.496020605256923e-05, - "loss": 0.9237, + "learning_rate": 1.081455974376647e-05, + "loss": 1.0881, "step": 12519 }, { - "epoch": 0.3547847771260166, + "epoch": 0.4898661867125753, "grad_norm": 0.0, - "learning_rate": 1.4959409105394298e-05, - "loss": 1.0036, + "learning_rate": 1.0813296717199969e-05, + "loss": 1.1873, "step": 12520 }, { - "epoch": 0.3548131145682791, + "epoch": 0.48990531340480475, "grad_norm": 0.0, - "learning_rate": 1.4958612116444427e-05, - "loss": 1.0678, + "learning_rate": 1.0812033677572937e-05, + "loss": 1.0092, "step": 12521 }, { - "epoch": 0.3548414520105415, + "epoch": 0.4899444400970342, "grad_norm": 0.0, - "learning_rate": 1.495781508572632e-05, - "loss": 0.9538, + "learning_rate": 1.081077062490566e-05, + "loss": 0.9858, "step": 12522 }, { - "epoch": 0.354869789452804, + "epoch": 0.48998356678926364, "grad_norm": 0.0, - "learning_rate": 1.4957018013246698e-05, - "loss": 1.0814, + "learning_rate": 1.0809507559218426e-05, + "loss": 1.1107, "step": 12523 }, { - "epoch": 0.35489812689506645, + "epoch": 0.4900226934814931, "grad_norm": 0.0, - "learning_rate": 1.4956220899012268e-05, - "loss": 0.9874, + "learning_rate": 1.0808244480531513e-05, + "loss": 1.0559, "step": 12524 }, { - "epoch": 0.35492646433732894, + "epoch": 0.4900618201737225, "grad_norm": 0.0, - "learning_rate": 1.4955423743029751e-05, - "loss": 1.0551, + "learning_rate": 1.0806981388865208e-05, + "loss": 1.0873, "step": 12525 }, { - "epoch": 0.3549548017795914, + "epoch": 0.49010094686595196, "grad_norm": 0.0, - "learning_rate": 1.4954626545305861e-05, - "loss": 0.8869, + "learning_rate": 1.0805718284239793e-05, + "loss": 1.001, "step": 12526 }, { - "epoch": 0.3549831392218538, + "epoch": 0.4901400735581814, "grad_norm": 0.0, - "learning_rate": 1.4953829305847314e-05, - "loss": 0.8054, + "learning_rate": 1.0804455166675552e-05, + "loss": 1.1585, "step": 12527 }, { - "epoch": 0.3550114766641163, + "epoch": 0.49017920025041084, "grad_norm": 0.0, - "learning_rate": 1.4953032024660819e-05, - "loss": 0.8144, + "learning_rate": 1.0803192036192767e-05, + "loss": 1.0234, "step": 12528 }, { - "epoch": 0.35503981410637875, + "epoch": 0.4902183269426403, "grad_norm": 0.0, - "learning_rate": 1.4952234701753097e-05, - "loss": 0.8163, + "learning_rate": 1.0801928892811726e-05, + "loss": 1.1548, "step": 12529 }, { - "epoch": 0.35506815154864124, + "epoch": 0.4902574536348697, "grad_norm": 0.0, - "learning_rate": 1.4951437337130865e-05, - "loss": 1.0107, + "learning_rate": 1.0800665736552713e-05, + "loss": 1.0328, "step": 12530 }, { - "epoch": 0.3550964889909037, + "epoch": 0.49029658032709916, "grad_norm": 0.0, - "learning_rate": 1.4950639930800834e-05, - "loss": 0.903, + "learning_rate": 1.0799402567436009e-05, + "loss": 1.1338, "step": 12531 }, { - "epoch": 0.3551248264331661, + "epoch": 0.4903357070193286, "grad_norm": 0.0, - "learning_rate": 1.4949842482769725e-05, - "loss": 0.9411, + "learning_rate": 1.0798139385481903e-05, + "loss": 1.0058, "step": 12532 }, { - "epoch": 0.3551531638754286, + "epoch": 0.49037483371155804, "grad_norm": 0.0, - "learning_rate": 1.4949044993044259e-05, - "loss": 0.9492, + "learning_rate": 1.0796876190710677e-05, + "loss": 1.0867, "step": 12533 }, { - "epoch": 0.35518150131769105, + "epoch": 0.4904139604037875, "grad_norm": 0.0, - "learning_rate": 1.4948247461631148e-05, - "loss": 0.959, + "learning_rate": 1.079561298314262e-05, + "loss": 0.87, "step": 12534 }, { - "epoch": 0.35520983875995354, + "epoch": 0.4904530870960169, "grad_norm": 0.0, - "learning_rate": 1.4947449888537109e-05, - "loss": 0.968, + "learning_rate": 1.0794349762798013e-05, + "loss": 1.1058, "step": 12535 }, { - "epoch": 0.355238176202216, + "epoch": 0.49049221378824637, "grad_norm": 0.0, - "learning_rate": 1.4946652273768864e-05, - "loss": 0.9852, + "learning_rate": 1.0793086529697148e-05, + "loss": 1.076, "step": 12536 }, { - "epoch": 0.3552665136444785, + "epoch": 0.4905313404804758, "grad_norm": 0.0, - "learning_rate": 1.4945854617333129e-05, - "loss": 0.857, + "learning_rate": 1.0791823283860304e-05, + "loss": 0.9811, "step": 12537 }, { - "epoch": 0.3552948510867409, + "epoch": 0.4905704671727052, "grad_norm": 0.0, - "learning_rate": 1.4945056919236622e-05, - "loss": 0.8895, + "learning_rate": 1.0790560025307773e-05, + "loss": 1.0349, "step": 12538 }, { - "epoch": 0.35532318852900335, + "epoch": 0.49060959386493463, "grad_norm": 0.0, - "learning_rate": 1.4944259179486068e-05, - "loss": 1.032, + "learning_rate": 1.0789296754059837e-05, + "loss": 0.9808, "step": 12539 }, { - "epoch": 0.35535152597126585, + "epoch": 0.4906487205571641, "grad_norm": 0.0, - "learning_rate": 1.4943461398088182e-05, - "loss": 1.0034, + "learning_rate": 1.0788033470136783e-05, + "loss": 1.1637, "step": 12540 }, { - "epoch": 0.3553798634135283, + "epoch": 0.4906878472493935, "grad_norm": 0.0, - "learning_rate": 1.4942663575049683e-05, - "loss": 0.9028, + "learning_rate": 1.07867701735589e-05, + "loss": 1.1133, "step": 12541 }, { - "epoch": 0.3554082008557908, + "epoch": 0.49072697394162296, "grad_norm": 0.0, - "learning_rate": 1.4941865710377298e-05, - "loss": 1.0059, + "learning_rate": 1.0785506864346475e-05, + "loss": 1.1018, "step": 12542 }, { - "epoch": 0.3554365382980532, + "epoch": 0.4907661006338524, "grad_norm": 0.0, - "learning_rate": 1.4941067804077738e-05, - "loss": 1.0125, + "learning_rate": 1.0784243542519792e-05, + "loss": 1.0724, "step": 12543 }, { - "epoch": 0.35546487574031566, + "epoch": 0.49080522732608184, "grad_norm": 0.0, - "learning_rate": 1.494026985615773e-05, - "loss": 1.0138, + "learning_rate": 1.0782980208099143e-05, + "loss": 1.1135, "step": 12544 }, { - "epoch": 0.35549321318257815, + "epoch": 0.4908443540183113, "grad_norm": 0.0, - "learning_rate": 1.4939471866623993e-05, - "loss": 0.9894, + "learning_rate": 1.0781716861104812e-05, + "loss": 1.1328, "step": 12545 }, { - "epoch": 0.3555215506248406, + "epoch": 0.4908834807105407, "grad_norm": 0.0, - "learning_rate": 1.4938673835483254e-05, - "loss": 0.9823, + "learning_rate": 1.0780453501557084e-05, + "loss": 0.9926, "step": 12546 }, { - "epoch": 0.3555498880671031, + "epoch": 0.49092260740277016, "grad_norm": 0.0, - "learning_rate": 1.4937875762742232e-05, - "loss": 0.8622, + "learning_rate": 1.0779190129476256e-05, + "loss": 1.0925, "step": 12547 }, { - "epoch": 0.3555782255093655, + "epoch": 0.4909617340949996, "grad_norm": 0.0, - "learning_rate": 1.4937077648407645e-05, - "loss": 1.0002, + "learning_rate": 1.0777926744882607e-05, + "loss": 1.1586, "step": 12548 }, { - "epoch": 0.355606562951628, + "epoch": 0.49100086078722904, "grad_norm": 0.0, - "learning_rate": 1.4936279492486222e-05, - "loss": 1.0066, + "learning_rate": 1.0776663347796433e-05, + "loss": 1.1301, "step": 12549 }, { - "epoch": 0.35563490039389045, + "epoch": 0.4910399874794585, "grad_norm": 0.0, - "learning_rate": 1.4935481294984681e-05, - "loss": 0.8404, + "learning_rate": 1.0775399938238019e-05, + "loss": 1.0211, "step": 12550 }, { - "epoch": 0.3556632378361529, + "epoch": 0.4910791141716879, "grad_norm": 0.0, - "learning_rate": 1.4934683055909751e-05, - "loss": 0.8789, + "learning_rate": 1.077413651622765e-05, + "loss": 1.0264, "step": 12551 }, { - "epoch": 0.3556915752784154, + "epoch": 0.49111824086391737, "grad_norm": 0.0, - "learning_rate": 1.4933884775268153e-05, - "loss": 0.8303, + "learning_rate": 1.077287308178562e-05, + "loss": 1.0174, "step": 12552 }, { - "epoch": 0.3557199127206778, + "epoch": 0.4911573675561468, "grad_norm": 0.0, - "learning_rate": 1.493308645306661e-05, - "loss": 0.8676, + "learning_rate": 1.0771609634932216e-05, + "loss": 1.1001, "step": 12553 }, { - "epoch": 0.3557482501629403, + "epoch": 0.49119649424837625, "grad_norm": 0.0, - "learning_rate": 1.4932288089311848e-05, - "loss": 0.9278, + "learning_rate": 1.0770346175687728e-05, + "loss": 1.0326, "step": 12554 }, { - "epoch": 0.35577658760520275, + "epoch": 0.4912356209406057, "grad_norm": 0.0, - "learning_rate": 1.4931489684010593e-05, - "loss": 0.7941, + "learning_rate": 1.0769082704072447e-05, + "loss": 1.1085, "step": 12555 }, { - "epoch": 0.3558049250474652, + "epoch": 0.49127474763283513, "grad_norm": 0.0, - "learning_rate": 1.493069123716957e-05, - "loss": 0.9755, + "learning_rate": 1.076781922010666e-05, + "loss": 1.1039, "step": 12556 }, { - "epoch": 0.3558332624897277, + "epoch": 0.49131387432506457, "grad_norm": 0.0, - "learning_rate": 1.4929892748795503e-05, - "loss": 0.936, + "learning_rate": 1.0766555723810661e-05, + "loss": 0.9537, "step": 12557 }, { - "epoch": 0.3558615999319901, + "epoch": 0.491353001017294, "grad_norm": 0.0, - "learning_rate": 1.4929094218895117e-05, - "loss": 0.9711, + "learning_rate": 1.0765292215204738e-05, + "loss": 0.9954, "step": 12558 }, { - "epoch": 0.3558899373742526, + "epoch": 0.49139212770952345, "grad_norm": 0.0, - "learning_rate": 1.4928295647475141e-05, - "loss": 0.9492, + "learning_rate": 1.0764028694309179e-05, + "loss": 1.011, "step": 12559 }, { - "epoch": 0.35591827481651506, + "epoch": 0.4914312544017529, "grad_norm": 0.0, - "learning_rate": 1.4927497034542303e-05, - "loss": 0.8193, + "learning_rate": 1.0762765161144275e-05, + "loss": 1.0582, "step": 12560 }, { - "epoch": 0.3559466122587775, + "epoch": 0.49147038109398233, "grad_norm": 0.0, - "learning_rate": 1.4926698380103323e-05, - "loss": 0.9854, + "learning_rate": 1.0761501615730321e-05, + "loss": 1.1107, "step": 12561 }, { - "epoch": 0.35597494970104, + "epoch": 0.4915095077862118, "grad_norm": 0.0, - "learning_rate": 1.4925899684164937e-05, - "loss": 0.9687, + "learning_rate": 1.0760238058087605e-05, + "loss": 1.084, "step": 12562 }, { - "epoch": 0.3560032871433024, + "epoch": 0.4915486344784412, "grad_norm": 0.0, - "learning_rate": 1.4925100946733869e-05, - "loss": 0.995, + "learning_rate": 1.0758974488236418e-05, + "loss": 0.9865, "step": 12563 }, { - "epoch": 0.3560316245855649, + "epoch": 0.49158776117067066, "grad_norm": 0.0, - "learning_rate": 1.4924302167816845e-05, - "loss": 0.8982, + "learning_rate": 1.075771090619705e-05, + "loss": 1.1471, "step": 12564 }, { - "epoch": 0.35605996202782736, + "epoch": 0.4916268878629001, "grad_norm": 0.0, - "learning_rate": 1.4923503347420596e-05, - "loss": 0.9962, + "learning_rate": 1.0756447311989796e-05, + "loss": 1.1162, "step": 12565 }, { - "epoch": 0.35608829947008985, + "epoch": 0.4916660145551295, "grad_norm": 0.0, - "learning_rate": 1.4922704485551847e-05, - "loss": 0.8992, + "learning_rate": 1.0755183705634944e-05, + "loss": 1.0835, "step": 12566 }, { - "epoch": 0.3561166369123523, + "epoch": 0.4917051412473589, "grad_norm": 0.0, - "learning_rate": 1.4921905582217333e-05, - "loss": 0.9803, + "learning_rate": 1.0753920087152792e-05, + "loss": 1.0902, "step": 12567 }, { - "epoch": 0.35614497435461473, + "epoch": 0.49174426793958836, "grad_norm": 0.0, - "learning_rate": 1.4921106637423782e-05, - "loss": 1.0065, + "learning_rate": 1.0752656456563626e-05, + "loss": 1.0973, "step": 12568 }, { - "epoch": 0.3561733117968772, + "epoch": 0.4917833946318178, "grad_norm": 0.0, - "learning_rate": 1.4920307651177921e-05, - "loss": 0.9779, + "learning_rate": 1.075139281388774e-05, + "loss": 1.0315, "step": 12569 }, { - "epoch": 0.35620164923913966, + "epoch": 0.49182252132404725, "grad_norm": 0.0, - "learning_rate": 1.491950862348648e-05, - "loss": 0.9143, + "learning_rate": 1.0750129159145429e-05, + "loss": 1.1763, "step": 12570 }, { - "epoch": 0.35622998668140216, + "epoch": 0.4918616480162767, "grad_norm": 0.0, - "learning_rate": 1.4918709554356192e-05, - "loss": 0.8479, + "learning_rate": 1.0748865492356981e-05, + "loss": 0.8835, "step": 12571 }, { - "epoch": 0.3562583241236646, + "epoch": 0.4919007747085061, "grad_norm": 0.0, - "learning_rate": 1.4917910443793786e-05, - "loss": 0.9832, + "learning_rate": 1.0747601813542694e-05, + "loss": 1.0448, "step": 12572 }, { - "epoch": 0.35628666156592703, + "epoch": 0.49193990140073557, "grad_norm": 0.0, - "learning_rate": 1.4917111291805996e-05, - "loss": 0.904, + "learning_rate": 1.0746338122722854e-05, + "loss": 0.9775, "step": 12573 }, { - "epoch": 0.3563149990081895, + "epoch": 0.491979028092965, "grad_norm": 0.0, - "learning_rate": 1.491631209839955e-05, - "loss": 1.0438, + "learning_rate": 1.0745074419917765e-05, + "loss": 1.0323, "step": 12574 }, { - "epoch": 0.35634333645045196, + "epoch": 0.49201815478519445, "grad_norm": 0.0, - "learning_rate": 1.4915512863581185e-05, - "loss": 0.9141, + "learning_rate": 1.074381070514771e-05, + "loss": 1.0983, "step": 12575 }, { - "epoch": 0.35637167389271446, + "epoch": 0.4920572814774239, "grad_norm": 0.0, - "learning_rate": 1.4914713587357628e-05, - "loss": 0.8863, + "learning_rate": 1.074254697843299e-05, + "loss": 0.9277, "step": 12576 }, { - "epoch": 0.3564000113349769, + "epoch": 0.49209640816965333, "grad_norm": 0.0, - "learning_rate": 1.491391426973561e-05, - "loss": 0.895, + "learning_rate": 1.0741283239793894e-05, + "loss": 1.0296, "step": 12577 }, { - "epoch": 0.3564283487772394, + "epoch": 0.4921355348618828, "grad_norm": 0.0, - "learning_rate": 1.4913114910721869e-05, - "loss": 0.9915, + "learning_rate": 1.0740019489250719e-05, + "loss": 1.0679, "step": 12578 }, { - "epoch": 0.35645668621950183, + "epoch": 0.4921746615541122, "grad_norm": 0.0, - "learning_rate": 1.4912315510323138e-05, - "loss": 1.0413, + "learning_rate": 1.0738755726823759e-05, + "loss": 1.1629, "step": 12579 }, { - "epoch": 0.35648502366176427, + "epoch": 0.49221378824634165, "grad_norm": 0.0, - "learning_rate": 1.491151606854615e-05, - "loss": 0.9396, + "learning_rate": 1.0737491952533305e-05, + "loss": 1.0494, "step": 12580 }, { - "epoch": 0.35651336110402676, + "epoch": 0.4922529149385711, "grad_norm": 0.0, - "learning_rate": 1.491071658539764e-05, - "loss": 1.0767, + "learning_rate": 1.0736228166399659e-05, + "loss": 1.064, "step": 12581 }, { - "epoch": 0.3565416985462892, + "epoch": 0.49229204163080054, "grad_norm": 0.0, - "learning_rate": 1.4909917060884336e-05, - "loss": 1.0459, + "learning_rate": 1.0734964368443106e-05, + "loss": 0.9395, "step": 12582 }, { - "epoch": 0.3565700359885517, + "epoch": 0.49233116832303, "grad_norm": 0.0, - "learning_rate": 1.4909117495012979e-05, - "loss": 1.0636, + "learning_rate": 1.073370055868395e-05, + "loss": 0.9685, "step": 12583 }, { - "epoch": 0.35659837343081413, + "epoch": 0.4923702950152594, "grad_norm": 0.0, - "learning_rate": 1.4908317887790301e-05, - "loss": 0.8696, + "learning_rate": 1.0732436737142482e-05, + "loss": 1.0193, "step": 12584 }, { - "epoch": 0.35662671087307657, + "epoch": 0.49240942170748886, "grad_norm": 0.0, - "learning_rate": 1.4907518239223043e-05, - "loss": 0.9684, + "learning_rate": 1.0731172903838995e-05, + "loss": 1.1364, "step": 12585 }, { - "epoch": 0.35665504831533906, + "epoch": 0.4924485483997183, "grad_norm": 0.0, - "learning_rate": 1.4906718549317934e-05, - "loss": 0.9219, + "learning_rate": 1.072990905879379e-05, + "loss": 1.0422, "step": 12586 }, { - "epoch": 0.3566833857576015, + "epoch": 0.49248767509194774, "grad_norm": 0.0, - "learning_rate": 1.4905918818081713e-05, - "loss": 1.0227, + "learning_rate": 1.0728645202027162e-05, + "loss": 0.913, "step": 12587 }, { - "epoch": 0.356711723199864, + "epoch": 0.4925268017841772, "grad_norm": 0.0, - "learning_rate": 1.4905119045521115e-05, - "loss": 0.9771, + "learning_rate": 1.07273813335594e-05, + "loss": 1.1365, "step": 12588 }, { - "epoch": 0.35674006064212643, + "epoch": 0.4925659284764066, "grad_norm": 0.0, - "learning_rate": 1.4904319231642878e-05, - "loss": 0.9324, + "learning_rate": 1.072611745341081e-05, + "loss": 1.0458, "step": 12589 }, { - "epoch": 0.3567683980843889, + "epoch": 0.49260505516863606, "grad_norm": 0.0, - "learning_rate": 1.4903519376453738e-05, - "loss": 0.9455, + "learning_rate": 1.0724853561601683e-05, + "loss": 1.0574, "step": 12590 }, { - "epoch": 0.35679673552665137, + "epoch": 0.4926441818608655, "grad_norm": 0.0, - "learning_rate": 1.4902719479960434e-05, - "loss": 0.8857, + "learning_rate": 1.0723589658152311e-05, + "loss": 1.0727, "step": 12591 }, { - "epoch": 0.3568250729689138, + "epoch": 0.49268330855309495, "grad_norm": 0.0, - "learning_rate": 1.4901919542169707e-05, - "loss": 0.9534, + "learning_rate": 1.0722325743083001e-05, + "loss": 1.0438, "step": 12592 }, { - "epoch": 0.3568534104111763, + "epoch": 0.4927224352453244, "grad_norm": 0.0, - "learning_rate": 1.4901119563088288e-05, - "loss": 0.9235, + "learning_rate": 1.0721061816414043e-05, + "loss": 1.15, "step": 12593 }, { - "epoch": 0.35688174785343874, + "epoch": 0.4927615619375538, "grad_norm": 0.0, - "learning_rate": 1.4900319542722921e-05, - "loss": 1.0082, + "learning_rate": 1.0719797878165737e-05, + "loss": 1.1071, "step": 12594 }, { - "epoch": 0.35691008529570123, + "epoch": 0.4928006886297832, "grad_norm": 0.0, - "learning_rate": 1.489951948108034e-05, - "loss": 0.9421, + "learning_rate": 1.0718533928358374e-05, + "loss": 1.0326, "step": 12595 }, { - "epoch": 0.35693842273796367, + "epoch": 0.49283981532201265, "grad_norm": 0.0, - "learning_rate": 1.4898719378167287e-05, - "loss": 0.9925, + "learning_rate": 1.0717269967012262e-05, + "loss": 1.0745, "step": 12596 }, { - "epoch": 0.3569667601802261, + "epoch": 0.4928789420142421, "grad_norm": 0.0, - "learning_rate": 1.4897919233990502e-05, - "loss": 0.8455, + "learning_rate": 1.0716005994147694e-05, + "loss": 0.9106, "step": 12597 }, { - "epoch": 0.3569950976224886, + "epoch": 0.49291806870647153, "grad_norm": 0.0, - "learning_rate": 1.4897119048556728e-05, - "loss": 0.9772, + "learning_rate": 1.0714742009784963e-05, + "loss": 0.8962, "step": 12598 }, { - "epoch": 0.35702343506475104, + "epoch": 0.492957195398701, "grad_norm": 0.0, - "learning_rate": 1.4896318821872697e-05, - "loss": 0.9307, + "learning_rate": 1.0713478013944371e-05, + "loss": 1.1254, "step": 12599 }, { - "epoch": 0.35705177250701353, + "epoch": 0.4929963220909304, "grad_norm": 0.0, - "learning_rate": 1.4895518553945159e-05, - "loss": 0.981, + "learning_rate": 1.0712214006646217e-05, + "loss": 1.0101, "step": 12600 }, { - "epoch": 0.35708010994927597, + "epoch": 0.49303544878315986, "grad_norm": 0.0, - "learning_rate": 1.4894718244780845e-05, - "loss": 0.9609, + "learning_rate": 1.07109499879108e-05, + "loss": 1.0568, "step": 12601 }, { - "epoch": 0.35710844739153846, + "epoch": 0.4930745754753893, "grad_norm": 0.0, - "learning_rate": 1.4893917894386505e-05, - "loss": 0.914, + "learning_rate": 1.0709685957758416e-05, + "loss": 1.0387, "step": 12602 }, { - "epoch": 0.3571367848338009, + "epoch": 0.49311370216761874, "grad_norm": 0.0, - "learning_rate": 1.4893117502768877e-05, - "loss": 0.8939, + "learning_rate": 1.0708421916209364e-05, + "loss": 1.0754, "step": 12603 }, { - "epoch": 0.35716512227606334, + "epoch": 0.4931528288598482, "grad_norm": 0.0, - "learning_rate": 1.4892317069934702e-05, - "loss": 0.9507, + "learning_rate": 1.0707157863283944e-05, + "loss": 1.048, "step": 12604 }, { - "epoch": 0.35719345971832583, + "epoch": 0.4931919555520776, "grad_norm": 0.0, - "learning_rate": 1.4891516595890724e-05, - "loss": 0.8882, + "learning_rate": 1.0705893799002455e-05, + "loss": 1.1165, "step": 12605 }, { - "epoch": 0.3572217971605883, + "epoch": 0.49323108224430706, "grad_norm": 0.0, - "learning_rate": 1.4890716080643688e-05, - "loss": 0.9442, + "learning_rate": 1.0704629723385195e-05, + "loss": 1.0836, "step": 12606 }, { - "epoch": 0.35725013460285077, + "epoch": 0.4932702089365365, "grad_norm": 0.0, - "learning_rate": 1.4889915524200331e-05, - "loss": 1.0018, + "learning_rate": 1.0703365636452468e-05, + "loss": 1.0695, "step": 12607 }, { - "epoch": 0.3572784720451132, + "epoch": 0.49330933562876594, "grad_norm": 0.0, - "learning_rate": 1.4889114926567402e-05, - "loss": 0.9525, + "learning_rate": 1.070210153822457e-05, + "loss": 1.0447, "step": 12608 }, { - "epoch": 0.35730680948737564, + "epoch": 0.4933484623209954, "grad_norm": 0.0, - "learning_rate": 1.488831428775164e-05, - "loss": 0.8616, + "learning_rate": 1.0700837428721798e-05, + "loss": 0.9906, "step": 12609 }, { - "epoch": 0.35733514692963814, + "epoch": 0.4933875890132248, "grad_norm": 0.0, - "learning_rate": 1.4887513607759794e-05, - "loss": 0.8571, + "learning_rate": 1.0699573307964457e-05, + "loss": 0.9844, "step": 12610 }, { - "epoch": 0.3573634843719006, + "epoch": 0.49342671570545427, "grad_norm": 0.0, - "learning_rate": 1.4886712886598602e-05, - "loss": 0.7686, + "learning_rate": 1.0698309175972843e-05, + "loss": 1.0652, "step": 12611 }, { - "epoch": 0.35739182181416307, + "epoch": 0.4934658423976837, "grad_norm": 0.0, - "learning_rate": 1.4885912124274818e-05, - "loss": 0.9153, + "learning_rate": 1.0697045032767262e-05, + "loss": 0.9522, "step": 12612 }, { - "epoch": 0.3574201592564255, + "epoch": 0.49350496908991315, "grad_norm": 0.0, - "learning_rate": 1.488511132079518e-05, - "loss": 0.9714, + "learning_rate": 1.0695780878368007e-05, + "loss": 1.0629, "step": 12613 }, { - "epoch": 0.357448496698688, + "epoch": 0.4935440957821426, "grad_norm": 0.0, - "learning_rate": 1.4884310476166435e-05, - "loss": 1.0502, + "learning_rate": 1.0694516712795387e-05, + "loss": 1.0315, "step": 12614 }, { - "epoch": 0.35747683414095044, + "epoch": 0.49358322247437203, "grad_norm": 0.0, - "learning_rate": 1.4883509590395326e-05, - "loss": 1.0028, + "learning_rate": 1.0693252536069695e-05, + "loss": 1.0839, "step": 12615 }, { - "epoch": 0.3575051715832129, + "epoch": 0.49362234916660147, "grad_norm": 0.0, - "learning_rate": 1.4882708663488605e-05, - "loss": 0.9272, + "learning_rate": 1.0691988348211239e-05, + "loss": 1.1247, "step": 12616 }, { - "epoch": 0.35753350902547537, + "epoch": 0.4936614758588309, "grad_norm": 0.0, - "learning_rate": 1.4881907695453018e-05, - "loss": 0.8873, + "learning_rate": 1.0690724149240319e-05, + "loss": 1.0431, "step": 12617 }, { - "epoch": 0.3575618464677378, + "epoch": 0.49370060255106035, "grad_norm": 0.0, - "learning_rate": 1.4881106686295305e-05, - "loss": 0.8226, + "learning_rate": 1.0689459939177231e-05, + "loss": 1.1125, "step": 12618 }, { - "epoch": 0.3575901839100003, + "epoch": 0.4937397292432898, "grad_norm": 0.0, - "learning_rate": 1.4880305636022221e-05, - "loss": 0.8934, + "learning_rate": 1.0688195718042283e-05, + "loss": 1.0888, "step": 12619 }, { - "epoch": 0.35761852135226274, + "epoch": 0.49377885593551923, "grad_norm": 0.0, - "learning_rate": 1.4879504544640511e-05, - "loss": 1.0323, + "learning_rate": 1.0686931485855772e-05, + "loss": 0.8771, "step": 12620 }, { - "epoch": 0.3576468587945252, + "epoch": 0.4938179826277487, "grad_norm": 0.0, - "learning_rate": 1.487870341215692e-05, - "loss": 0.9248, + "learning_rate": 1.0685667242638003e-05, + "loss": 1.0781, "step": 12621 }, { - "epoch": 0.3576751962367877, + "epoch": 0.4938571093199781, "grad_norm": 0.0, - "learning_rate": 1.48779022385782e-05, - "loss": 1.1069, + "learning_rate": 1.0684402988409278e-05, + "loss": 1.1075, "step": 12622 }, { - "epoch": 0.3577035336790501, + "epoch": 0.4938962360122075, "grad_norm": 0.0, - "learning_rate": 1.4877101023911098e-05, - "loss": 0.9811, + "learning_rate": 1.0683138723189897e-05, + "loss": 1.1173, "step": 12623 }, { - "epoch": 0.3577318711213126, + "epoch": 0.49393536270443694, "grad_norm": 0.0, - "learning_rate": 1.4876299768162361e-05, - "loss": 0.8651, + "learning_rate": 1.0681874447000165e-05, + "loss": 1.0892, "step": 12624 }, { - "epoch": 0.35776020856357504, + "epoch": 0.4939744893966664, "grad_norm": 0.0, - "learning_rate": 1.487549847133874e-05, - "loss": 0.8614, + "learning_rate": 1.0680610159860382e-05, + "loss": 1.0517, "step": 12625 }, { - "epoch": 0.35778854600583754, + "epoch": 0.4940136160888958, "grad_norm": 0.0, - "learning_rate": 1.4874697133446988e-05, - "loss": 0.9793, + "learning_rate": 1.0679345861790858e-05, + "loss": 1.0096, "step": 12626 }, { - "epoch": 0.3578168834481, + "epoch": 0.49405274278112526, "grad_norm": 0.0, - "learning_rate": 1.4873895754493852e-05, - "loss": 1.0106, + "learning_rate": 1.0678081552811886e-05, + "loss": 1.0118, "step": 12627 }, { - "epoch": 0.3578452208903624, + "epoch": 0.4940918694733547, "grad_norm": 0.0, - "learning_rate": 1.487309433448608e-05, - "loss": 0.8339, + "learning_rate": 1.0676817232943775e-05, + "loss": 1.0682, "step": 12628 }, { - "epoch": 0.3578735583326249, + "epoch": 0.49413099616558415, "grad_norm": 0.0, - "learning_rate": 1.4872292873430425e-05, - "loss": 0.991, + "learning_rate": 1.0675552902206827e-05, + "loss": 0.9764, "step": 12629 }, { - "epoch": 0.35790189577488735, + "epoch": 0.4941701228578136, "grad_norm": 0.0, - "learning_rate": 1.4871491371333637e-05, - "loss": 0.852, + "learning_rate": 1.0674288560621346e-05, + "loss": 0.9883, "step": 12630 }, { - "epoch": 0.35793023321714984, + "epoch": 0.49420924955004303, "grad_norm": 0.0, - "learning_rate": 1.4870689828202471e-05, - "loss": 0.9223, + "learning_rate": 1.0673024208207636e-05, + "loss": 0.9787, "step": 12631 }, { - "epoch": 0.3579585706594123, + "epoch": 0.49424837624227247, "grad_norm": 0.0, - "learning_rate": 1.4869888244043674e-05, - "loss": 1.0311, + "learning_rate": 1.0671759844986002e-05, + "loss": 0.9981, "step": 12632 }, { - "epoch": 0.3579869081016747, + "epoch": 0.4942875029345019, "grad_norm": 0.0, - "learning_rate": 1.4869086618864e-05, - "loss": 0.9781, + "learning_rate": 1.0670495470976745e-05, + "loss": 0.9565, "step": 12633 }, { - "epoch": 0.3580152455439372, + "epoch": 0.49432662962673135, "grad_norm": 0.0, - "learning_rate": 1.4868284952670205e-05, - "loss": 0.9305, + "learning_rate": 1.066923108620017e-05, + "loss": 1.1302, "step": 12634 }, { - "epoch": 0.35804358298619965, + "epoch": 0.4943657563189608, "grad_norm": 0.0, - "learning_rate": 1.4867483245469031e-05, - "loss": 0.7826, + "learning_rate": 1.0667966690676583e-05, + "loss": 1.1275, "step": 12635 }, { - "epoch": 0.35807192042846214, + "epoch": 0.49440488301119023, "grad_norm": 0.0, - "learning_rate": 1.4866681497267242e-05, - "loss": 0.9232, + "learning_rate": 1.0666702284426289e-05, + "loss": 1.0193, "step": 12636 }, { - "epoch": 0.3581002578707246, + "epoch": 0.4944440097034197, "grad_norm": 0.0, - "learning_rate": 1.4865879708071589e-05, - "loss": 0.8241, + "learning_rate": 1.0665437867469593e-05, + "loss": 1.038, "step": 12637 }, { - "epoch": 0.3581285953129871, + "epoch": 0.4944831363956491, "grad_norm": 0.0, - "learning_rate": 1.4865077877888822e-05, - "loss": 0.8832, + "learning_rate": 1.0664173439826794e-05, + "loss": 1.1118, "step": 12638 }, { - "epoch": 0.3581569327552495, + "epoch": 0.49452226308787856, "grad_norm": 0.0, - "learning_rate": 1.48642760067257e-05, - "loss": 0.8827, + "learning_rate": 1.0662909001518207e-05, + "loss": 1.1549, "step": 12639 }, { - "epoch": 0.35818527019751195, + "epoch": 0.494561389780108, "grad_norm": 0.0, - "learning_rate": 1.486347409458897e-05, - "loss": 0.8248, + "learning_rate": 1.0661644552564127e-05, + "loss": 1.118, "step": 12640 }, { - "epoch": 0.35821360763977445, + "epoch": 0.49460051647233744, "grad_norm": 0.0, - "learning_rate": 1.4862672141485396e-05, - "loss": 0.8154, + "learning_rate": 1.066038009298487e-05, + "loss": 1.0034, "step": 12641 }, { - "epoch": 0.3582419450820369, + "epoch": 0.4946396431645669, "grad_norm": 0.0, - "learning_rate": 1.4861870147421726e-05, - "loss": 1.014, + "learning_rate": 1.0659115622800733e-05, + "loss": 1.0797, "step": 12642 }, { - "epoch": 0.3582702825242994, + "epoch": 0.4946787698567963, "grad_norm": 0.0, - "learning_rate": 1.4861068112404721e-05, - "loss": 0.8642, + "learning_rate": 1.0657851142032026e-05, + "loss": 1.0582, "step": 12643 }, { - "epoch": 0.3582986199665618, + "epoch": 0.49471789654902576, "grad_norm": 0.0, - "learning_rate": 1.4860266036441134e-05, - "loss": 0.9215, + "learning_rate": 1.0656586650699052e-05, + "loss": 0.9374, "step": 12644 }, { - "epoch": 0.35832695740882425, + "epoch": 0.4947570232412552, "grad_norm": 0.0, - "learning_rate": 1.485946391953772e-05, - "loss": 0.8979, + "learning_rate": 1.0655322148822123e-05, + "loss": 1.1814, "step": 12645 }, { - "epoch": 0.35835529485108675, + "epoch": 0.49479614993348464, "grad_norm": 0.0, - "learning_rate": 1.485866176170124e-05, - "loss": 0.9467, + "learning_rate": 1.0654057636421538e-05, + "loss": 1.0325, "step": 12646 }, { - "epoch": 0.3583836322933492, + "epoch": 0.4948352766257141, "grad_norm": 0.0, - "learning_rate": 1.4857859562938444e-05, - "loss": 0.9151, + "learning_rate": 1.0652793113517607e-05, + "loss": 1.1251, "step": 12647 }, { - "epoch": 0.3584119697356117, + "epoch": 0.4948744033179435, "grad_norm": 0.0, - "learning_rate": 1.4857057323256092e-05, - "loss": 0.8865, + "learning_rate": 1.0651528580130639e-05, + "loss": 1.0183, "step": 12648 }, { - "epoch": 0.3584403071778741, + "epoch": 0.49491353001017296, "grad_norm": 0.0, - "learning_rate": 1.4856255042660945e-05, - "loss": 0.9628, + "learning_rate": 1.0650264036280935e-05, + "loss": 1.0054, "step": 12649 }, { - "epoch": 0.3584686446201366, + "epoch": 0.4949526567024024, "grad_norm": 0.0, - "learning_rate": 1.4855452721159757e-05, - "loss": 0.9749, + "learning_rate": 1.064899948198881e-05, + "loss": 0.9792, "step": 12650 }, { - "epoch": 0.35849698206239905, + "epoch": 0.4949917833946318, "grad_norm": 0.0, - "learning_rate": 1.485465035875929e-05, - "loss": 0.9055, + "learning_rate": 1.0647734917274562e-05, + "loss": 0.9113, "step": 12651 }, { - "epoch": 0.3585253195046615, + "epoch": 0.49503091008686123, "grad_norm": 0.0, - "learning_rate": 1.48538479554663e-05, - "loss": 0.8846, + "learning_rate": 1.0646470342158505e-05, + "loss": 1.0181, "step": 12652 }, { - "epoch": 0.358553656946924, + "epoch": 0.49507003677909067, "grad_norm": 0.0, - "learning_rate": 1.4853045511287545e-05, - "loss": 0.8699, + "learning_rate": 1.0645205756660943e-05, + "loss": 1.0692, "step": 12653 }, { - "epoch": 0.3585819943891864, + "epoch": 0.4951091634713201, "grad_norm": 0.0, - "learning_rate": 1.4852243026229787e-05, - "loss": 0.9284, + "learning_rate": 1.0643941160802189e-05, + "loss": 0.9409, "step": 12654 }, { - "epoch": 0.3586103318314489, + "epoch": 0.49514829016354955, "grad_norm": 0.0, - "learning_rate": 1.485144050029978e-05, - "loss": 1.0061, + "learning_rate": 1.0642676554602545e-05, + "loss": 1.0844, "step": 12655 }, { - "epoch": 0.35863866927371135, + "epoch": 0.495187416855779, "grad_norm": 0.0, - "learning_rate": 1.4850637933504292e-05, - "loss": 0.928, + "learning_rate": 1.0641411938082317e-05, + "loss": 1.162, "step": 12656 }, { - "epoch": 0.3586670067159738, + "epoch": 0.49522654354800844, "grad_norm": 0.0, - "learning_rate": 1.4849835325850079e-05, - "loss": 0.9124, + "learning_rate": 1.0640147311261821e-05, + "loss": 1.1071, "step": 12657 }, { - "epoch": 0.3586953441582363, + "epoch": 0.4952656702402379, "grad_norm": 0.0, - "learning_rate": 1.4849032677343902e-05, - "loss": 0.9436, + "learning_rate": 1.063888267416136e-05, + "loss": 0.9933, "step": 12658 }, { - "epoch": 0.3587236816004987, + "epoch": 0.4953047969324673, "grad_norm": 0.0, - "learning_rate": 1.4848229987992522e-05, - "loss": 1.0028, + "learning_rate": 1.0637618026801246e-05, + "loss": 1.0591, "step": 12659 }, { - "epoch": 0.3587520190427612, + "epoch": 0.49534392362469676, "grad_norm": 0.0, - "learning_rate": 1.4847427257802702e-05, - "loss": 0.9264, + "learning_rate": 1.0636353369201782e-05, + "loss": 1.0072, "step": 12660 }, { - "epoch": 0.35878035648502365, + "epoch": 0.4953830503169262, "grad_norm": 0.0, - "learning_rate": 1.4846624486781199e-05, - "loss": 0.9688, + "learning_rate": 1.0635088701383282e-05, + "loss": 1.0267, "step": 12661 }, { - "epoch": 0.35880869392728615, + "epoch": 0.49542217700915564, "grad_norm": 0.0, - "learning_rate": 1.4845821674934779e-05, - "loss": 1.0583, + "learning_rate": 1.0633824023366053e-05, + "loss": 1.1214, "step": 12662 }, { - "epoch": 0.3588370313695486, + "epoch": 0.4954613037013851, "grad_norm": 0.0, - "learning_rate": 1.4845018822270204e-05, - "loss": 0.9027, + "learning_rate": 1.0632559335170405e-05, + "loss": 1.0827, "step": 12663 }, { - "epoch": 0.358865368811811, + "epoch": 0.4955004303936145, "grad_norm": 0.0, - "learning_rate": 1.4844215928794236e-05, - "loss": 0.9083, + "learning_rate": 1.0631294636816645e-05, + "loss": 1.1321, "step": 12664 }, { - "epoch": 0.3588937062540735, + "epoch": 0.49553955708584396, "grad_norm": 0.0, - "learning_rate": 1.484341299451364e-05, - "loss": 1.0117, + "learning_rate": 1.0630029928325087e-05, + "loss": 1.0452, "step": 12665 }, { - "epoch": 0.35892204369633596, + "epoch": 0.4955786837780734, "grad_norm": 0.0, - "learning_rate": 1.4842610019435178e-05, - "loss": 0.9281, + "learning_rate": 1.0628765209716037e-05, + "loss": 0.8669, "step": 12666 }, { - "epoch": 0.35895038113859845, + "epoch": 0.49561781047030284, "grad_norm": 0.0, - "learning_rate": 1.4841807003565612e-05, - "loss": 0.8816, + "learning_rate": 1.0627500481009805e-05, + "loss": 1.0765, "step": 12667 }, { - "epoch": 0.3589787185808609, + "epoch": 0.4956569371625323, "grad_norm": 0.0, - "learning_rate": 1.4841003946911707e-05, - "loss": 0.9586, + "learning_rate": 1.0626235742226702e-05, + "loss": 1.0653, "step": 12668 }, { - "epoch": 0.3590070560231233, + "epoch": 0.4956960638547617, "grad_norm": 0.0, - "learning_rate": 1.4840200849480226e-05, - "loss": 0.8025, + "learning_rate": 1.0624970993387036e-05, + "loss": 0.9873, "step": 12669 }, { - "epoch": 0.3590353934653858, + "epoch": 0.49573519054699117, "grad_norm": 0.0, - "learning_rate": 1.483939771127794e-05, - "loss": 0.8625, + "learning_rate": 1.062370623451112e-05, + "loss": 1.1164, "step": 12670 }, { - "epoch": 0.35906373090764826, + "epoch": 0.4957743172392206, "grad_norm": 0.0, - "learning_rate": 1.483859453231161e-05, - "loss": 0.8034, + "learning_rate": 1.0622441465619265e-05, + "loss": 0.9958, "step": 12671 }, { - "epoch": 0.35909206834991075, + "epoch": 0.49581344393145005, "grad_norm": 0.0, - "learning_rate": 1.4837791312588001e-05, - "loss": 0.9924, + "learning_rate": 1.0621176686731778e-05, + "loss": 1.0907, "step": 12672 }, { - "epoch": 0.3591204057921732, + "epoch": 0.4958525706236795, "grad_norm": 0.0, - "learning_rate": 1.483698805211388e-05, - "loss": 0.9252, + "learning_rate": 1.0619911897868973e-05, + "loss": 1.093, "step": 12673 }, { - "epoch": 0.3591487432344357, + "epoch": 0.49589169731590893, "grad_norm": 0.0, - "learning_rate": 1.483618475089601e-05, - "loss": 0.9814, + "learning_rate": 1.0618647099051158e-05, + "loss": 0.9189, "step": 12674 }, { - "epoch": 0.3591770806766981, + "epoch": 0.49593082400813837, "grad_norm": 0.0, - "learning_rate": 1.483538140894116e-05, - "loss": 0.8194, + "learning_rate": 1.0617382290298649e-05, + "loss": 1.158, "step": 12675 }, { - "epoch": 0.35920541811896056, + "epoch": 0.4959699507003678, "grad_norm": 0.0, - "learning_rate": 1.4834578026256099e-05, - "loss": 1.0115, + "learning_rate": 1.0616117471631753e-05, + "loss": 1.0649, "step": 12676 }, { - "epoch": 0.35923375556122306, + "epoch": 0.49600907739259725, "grad_norm": 0.0, - "learning_rate": 1.483377460284759e-05, - "loss": 0.9441, + "learning_rate": 1.0614852643070779e-05, + "loss": 1.0115, "step": 12677 }, { - "epoch": 0.3592620930034855, + "epoch": 0.4960482040848267, "grad_norm": 0.0, - "learning_rate": 1.4832971138722403e-05, - "loss": 0.9974, + "learning_rate": 1.0613587804636045e-05, + "loss": 1.0885, "step": 12678 }, { - "epoch": 0.359290430445748, + "epoch": 0.49608733077705613, "grad_norm": 0.0, - "learning_rate": 1.4832167633887306e-05, - "loss": 0.9397, + "learning_rate": 1.061232295634786e-05, + "loss": 1.1321, "step": 12679 }, { - "epoch": 0.3593187678880104, + "epoch": 0.4961264574692855, "grad_norm": 0.0, - "learning_rate": 1.483136408834907e-05, - "loss": 0.8693, + "learning_rate": 1.0611058098226534e-05, + "loss": 1.1644, "step": 12680 }, { - "epoch": 0.35934710533027286, + "epoch": 0.49616558416151496, "grad_norm": 0.0, - "learning_rate": 1.4830560502114452e-05, - "loss": 0.8726, + "learning_rate": 1.0609793230292382e-05, + "loss": 1.1075, "step": 12681 }, { - "epoch": 0.35937544277253536, + "epoch": 0.4962047108537444, "grad_norm": 0.0, - "learning_rate": 1.4829756875190236e-05, - "loss": 0.8874, + "learning_rate": 1.0608528352565714e-05, + "loss": 1.0686, "step": 12682 }, { - "epoch": 0.3594037802147978, + "epoch": 0.49624383754597384, "grad_norm": 0.0, - "learning_rate": 1.482895320758318e-05, - "loss": 0.8425, + "learning_rate": 1.0607263465066844e-05, + "loss": 0.937, "step": 12683 }, { - "epoch": 0.3594321176570603, + "epoch": 0.4962829642382033, "grad_norm": 0.0, - "learning_rate": 1.4828149499300061e-05, - "loss": 0.9428, + "learning_rate": 1.0605998567816084e-05, + "loss": 1.0308, "step": 12684 }, { - "epoch": 0.35946045509932273, + "epoch": 0.4963220909304327, "grad_norm": 0.0, - "learning_rate": 1.4827345750347646e-05, - "loss": 0.9718, + "learning_rate": 1.0604733660833744e-05, + "loss": 1.0497, "step": 12685 }, { - "epoch": 0.3594887925415852, + "epoch": 0.49636121762266217, "grad_norm": 0.0, - "learning_rate": 1.4826541960732704e-05, - "loss": 1.0092, + "learning_rate": 1.0603468744140142e-05, + "loss": 1.0295, "step": 12686 }, { - "epoch": 0.35951712998384766, + "epoch": 0.4964003443148916, "grad_norm": 0.0, - "learning_rate": 1.4825738130462008e-05, - "loss": 0.9375, + "learning_rate": 1.0602203817755585e-05, + "loss": 0.9605, "step": 12687 }, { - "epoch": 0.3595454674261101, + "epoch": 0.49643947100712105, "grad_norm": 0.0, - "learning_rate": 1.4824934259542326e-05, - "loss": 0.912, + "learning_rate": 1.0600938881700394e-05, + "loss": 1.1746, "step": 12688 }, { - "epoch": 0.3595738048683726, + "epoch": 0.4964785976993505, "grad_norm": 0.0, - "learning_rate": 1.4824130347980431e-05, - "loss": 0.9895, + "learning_rate": 1.0599673935994872e-05, + "loss": 1.0633, "step": 12689 }, { - "epoch": 0.35960214231063503, + "epoch": 0.49651772439157993, "grad_norm": 0.0, - "learning_rate": 1.4823326395783096e-05, - "loss": 0.9935, + "learning_rate": 1.0598408980659342e-05, + "loss": 0.979, "step": 12690 }, { - "epoch": 0.3596304797528975, + "epoch": 0.49655685108380937, "grad_norm": 0.0, - "learning_rate": 1.4822522402957091e-05, - "loss": 0.9285, + "learning_rate": 1.0597144015714112e-05, + "loss": 1.0737, "step": 12691 }, { - "epoch": 0.35965881719515996, + "epoch": 0.4965959777760388, "grad_norm": 0.0, - "learning_rate": 1.482171836950919e-05, - "loss": 0.9909, + "learning_rate": 1.0595879041179498e-05, + "loss": 1.0186, "step": 12692 }, { - "epoch": 0.3596871546374224, + "epoch": 0.49663510446826825, "grad_norm": 0.0, - "learning_rate": 1.4820914295446165e-05, - "loss": 0.8311, + "learning_rate": 1.0594614057075811e-05, + "loss": 0.9656, "step": 12693 }, { - "epoch": 0.3597154920796849, + "epoch": 0.4966742311604977, "grad_norm": 0.0, - "learning_rate": 1.4820110180774784e-05, - "loss": 0.9739, + "learning_rate": 1.059334906342337e-05, + "loss": 1.1265, "step": 12694 }, { - "epoch": 0.35974382952194733, + "epoch": 0.49671335785272713, "grad_norm": 0.0, - "learning_rate": 1.481930602550183e-05, - "loss": 0.9809, + "learning_rate": 1.0592084060242489e-05, + "loss": 1.0045, "step": 12695 }, { - "epoch": 0.3597721669642098, + "epoch": 0.4967524845449566, "grad_norm": 0.0, - "learning_rate": 1.4818501829634069e-05, - "loss": 0.9996, + "learning_rate": 1.0590819047553476e-05, + "loss": 1.1192, "step": 12696 }, { - "epoch": 0.35980050440647227, + "epoch": 0.496791611237186, "grad_norm": 0.0, - "learning_rate": 1.4817697593178281e-05, - "loss": 0.8955, + "learning_rate": 1.058955402537665e-05, + "loss": 0.9593, "step": 12697 }, { - "epoch": 0.35982884184873476, + "epoch": 0.49683073792941546, "grad_norm": 0.0, - "learning_rate": 1.4816893316141232e-05, - "loss": 0.8002, + "learning_rate": 1.0588288993732324e-05, + "loss": 1.1309, "step": 12698 }, { - "epoch": 0.3598571792909972, + "epoch": 0.4968698646216449, "grad_norm": 0.0, - "learning_rate": 1.4816088998529707e-05, - "loss": 0.9426, + "learning_rate": 1.0587023952640815e-05, + "loss": 1.1733, "step": 12699 }, { - "epoch": 0.35988551673325964, + "epoch": 0.49690899131387434, "grad_norm": 0.0, - "learning_rate": 1.4815284640350476e-05, - "loss": 0.9368, + "learning_rate": 1.0585758902122437e-05, + "loss": 0.8993, "step": 12700 }, { - "epoch": 0.35991385417552213, + "epoch": 0.4969481180061038, "grad_norm": 0.0, - "learning_rate": 1.481448024161031e-05, - "loss": 0.9553, + "learning_rate": 1.0584493842197505e-05, + "loss": 1.0587, "step": 12701 }, { - "epoch": 0.35994219161778457, + "epoch": 0.4969872446983332, "grad_norm": 0.0, - "learning_rate": 1.481367580231599e-05, - "loss": 1.0, + "learning_rate": 1.0583228772886333e-05, + "loss": 1.1295, "step": 12702 }, { - "epoch": 0.35997052906004706, + "epoch": 0.49702637139056266, "grad_norm": 0.0, - "learning_rate": 1.4812871322474294e-05, - "loss": 1.0642, + "learning_rate": 1.058196369420924e-05, + "loss": 1.1218, "step": 12703 }, { - "epoch": 0.3599988665023095, + "epoch": 0.4970654980827921, "grad_norm": 0.0, - "learning_rate": 1.4812066802091995e-05, - "loss": 0.9446, + "learning_rate": 1.0580698606186542e-05, + "loss": 1.0948, "step": 12704 }, { - "epoch": 0.36002720394457194, + "epoch": 0.49710462477502154, "grad_norm": 0.0, - "learning_rate": 1.481126224117587e-05, - "loss": 1.0054, + "learning_rate": 1.0579433508838546e-05, + "loss": 0.9939, "step": 12705 }, { - "epoch": 0.36005554138683443, + "epoch": 0.497143751467251, "grad_norm": 0.0, - "learning_rate": 1.4810457639732696e-05, - "loss": 0.904, + "learning_rate": 1.0578168402185577e-05, + "loss": 1.189, "step": 12706 }, { - "epoch": 0.36008387882909687, + "epoch": 0.4971828781594804, "grad_norm": 0.0, - "learning_rate": 1.480965299776925e-05, - "loss": 1.0002, + "learning_rate": 1.0576903286247947e-05, + "loss": 0.9865, "step": 12707 }, { - "epoch": 0.36011221627135936, + "epoch": 0.4972220048517098, "grad_norm": 0.0, - "learning_rate": 1.4808848315292313e-05, - "loss": 0.9127, + "learning_rate": 1.0575638161045976e-05, + "loss": 1.018, "step": 12708 }, { - "epoch": 0.3601405537136218, + "epoch": 0.49726113154393925, "grad_norm": 0.0, - "learning_rate": 1.4808043592308661e-05, - "loss": 0.8981, + "learning_rate": 1.0574373026599973e-05, + "loss": 0.9939, "step": 12709 }, { - "epoch": 0.3601688911558843, + "epoch": 0.4973002582361687, "grad_norm": 0.0, - "learning_rate": 1.480723882882507e-05, - "loss": 0.9668, + "learning_rate": 1.0573107882930262e-05, + "loss": 1.0832, "step": 12710 }, { - "epoch": 0.36019722859814673, + "epoch": 0.49733938492839813, "grad_norm": 0.0, - "learning_rate": 1.4806434024848322e-05, - "loss": 0.8964, + "learning_rate": 1.0571842730057154e-05, + "loss": 0.9699, "step": 12711 }, { - "epoch": 0.3602255660404092, + "epoch": 0.4973785116206276, "grad_norm": 0.0, - "learning_rate": 1.4805629180385197e-05, - "loss": 0.8804, + "learning_rate": 1.057057756800097e-05, + "loss": 1.0678, "step": 12712 }, { - "epoch": 0.36025390348267167, + "epoch": 0.497417638312857, "grad_norm": 0.0, - "learning_rate": 1.480482429544247e-05, - "loss": 0.9288, + "learning_rate": 1.0569312396782029e-05, + "loss": 0.8745, "step": 12713 }, { - "epoch": 0.3602822409249341, + "epoch": 0.49745676500508645, "grad_norm": 0.0, - "learning_rate": 1.4804019370026927e-05, - "loss": 0.9262, + "learning_rate": 1.0568047216420636e-05, + "loss": 1.1296, "step": 12714 }, { - "epoch": 0.3603105783671966, + "epoch": 0.4974958916973159, "grad_norm": 0.0, - "learning_rate": 1.4803214404145343e-05, - "loss": 0.9459, + "learning_rate": 1.0566782026937124e-05, + "loss": 0.9894, "step": 12715 }, { - "epoch": 0.36033891580945904, + "epoch": 0.49753501838954534, "grad_norm": 0.0, - "learning_rate": 1.4802409397804501e-05, - "loss": 0.9477, + "learning_rate": 1.0565516828351796e-05, + "loss": 1.0974, "step": 12716 }, { - "epoch": 0.3603672532517215, + "epoch": 0.4975741450817748, "grad_norm": 0.0, - "learning_rate": 1.480160435101118e-05, - "loss": 0.9482, + "learning_rate": 1.0564251620684982e-05, + "loss": 1.0371, "step": 12717 }, { - "epoch": 0.36039559069398397, + "epoch": 0.4976132717740042, "grad_norm": 0.0, - "learning_rate": 1.4800799263772168e-05, - "loss": 0.9435, + "learning_rate": 1.0562986403956994e-05, + "loss": 1.1038, "step": 12718 }, { - "epoch": 0.3604239281362464, + "epoch": 0.49765239846623366, "grad_norm": 0.0, - "learning_rate": 1.4799994136094233e-05, - "loss": 0.931, + "learning_rate": 1.0561721178188148e-05, + "loss": 1.0667, "step": 12719 }, { - "epoch": 0.3604522655785089, + "epoch": 0.4976915251584631, "grad_norm": 0.0, - "learning_rate": 1.4799188967984168e-05, - "loss": 0.9448, + "learning_rate": 1.0560455943398763e-05, + "loss": 0.9623, "step": 12720 }, { - "epoch": 0.36048060302077134, + "epoch": 0.49773065185069254, "grad_norm": 0.0, - "learning_rate": 1.4798383759448754e-05, - "loss": 0.8301, + "learning_rate": 1.0559190699609158e-05, + "loss": 0.9362, "step": 12721 }, { - "epoch": 0.36050894046303383, + "epoch": 0.497769778542922, "grad_norm": 0.0, - "learning_rate": 1.4797578510494772e-05, - "loss": 0.8814, + "learning_rate": 1.0557925446839652e-05, + "loss": 1.0612, "step": 12722 }, { - "epoch": 0.36053727790529627, + "epoch": 0.4978089052351514, "grad_norm": 0.0, - "learning_rate": 1.4796773221129001e-05, - "loss": 1.0017, + "learning_rate": 1.0556660185110564e-05, + "loss": 1.0777, "step": 12723 }, { - "epoch": 0.3605656153475587, + "epoch": 0.49784803192738086, "grad_norm": 0.0, - "learning_rate": 1.4795967891358232e-05, - "loss": 0.9027, + "learning_rate": 1.0555394914442207e-05, + "loss": 1.0512, "step": 12724 }, { - "epoch": 0.3605939527898212, + "epoch": 0.4978871586196103, "grad_norm": 0.0, - "learning_rate": 1.4795162521189243e-05, - "loss": 0.9999, + "learning_rate": 1.0554129634854906e-05, + "loss": 1.1687, "step": 12725 }, { - "epoch": 0.36062229023208364, + "epoch": 0.49792628531183974, "grad_norm": 0.0, - "learning_rate": 1.479435711062882e-05, - "loss": 0.8583, + "learning_rate": 1.0552864346368979e-05, + "loss": 1.0208, "step": 12726 }, { - "epoch": 0.36065062767434614, + "epoch": 0.4979654120040692, "grad_norm": 0.0, - "learning_rate": 1.4793551659683743e-05, - "loss": 0.9769, + "learning_rate": 1.0551599049004738e-05, + "loss": 1.0653, "step": 12727 }, { - "epoch": 0.3606789651166086, + "epoch": 0.4980045386962986, "grad_norm": 0.0, - "learning_rate": 1.4792746168360803e-05, - "loss": 0.8508, + "learning_rate": 1.0550333742782512e-05, + "loss": 1.1434, "step": 12728 }, { - "epoch": 0.360707302558871, + "epoch": 0.49804366538852807, "grad_norm": 0.0, - "learning_rate": 1.4791940636666785e-05, - "loss": 1.0013, + "learning_rate": 1.0549068427722613e-05, + "loss": 1.1456, "step": 12729 }, { - "epoch": 0.3607356400011335, + "epoch": 0.4980827920807575, "grad_norm": 0.0, - "learning_rate": 1.479113506460847e-05, - "loss": 0.9443, + "learning_rate": 1.0547803103845366e-05, + "loss": 1.1241, "step": 12730 }, { - "epoch": 0.36076397744339594, + "epoch": 0.49812191877298695, "grad_norm": 0.0, - "learning_rate": 1.4790329452192643e-05, - "loss": 0.987, + "learning_rate": 1.0546537771171087e-05, + "loss": 1.0913, "step": 12731 }, { - "epoch": 0.36079231488565844, + "epoch": 0.4981610454652164, "grad_norm": 0.0, - "learning_rate": 1.4789523799426095e-05, - "loss": 0.8579, + "learning_rate": 1.0545272429720094e-05, + "loss": 1.0573, "step": 12732 }, { - "epoch": 0.3608206523279209, + "epoch": 0.49820017215744583, "grad_norm": 0.0, - "learning_rate": 1.4788718106315605e-05, - "loss": 0.9188, + "learning_rate": 1.0544007079512713e-05, + "loss": 1.0496, "step": 12733 }, { - "epoch": 0.36084898977018337, + "epoch": 0.49823929884967527, "grad_norm": 0.0, - "learning_rate": 1.478791237286797e-05, - "loss": 0.88, + "learning_rate": 1.0542741720569257e-05, + "loss": 1.0272, "step": 12734 }, { - "epoch": 0.3608773272124458, + "epoch": 0.4982784255419047, "grad_norm": 0.0, - "learning_rate": 1.4787106599089969e-05, - "loss": 0.9302, + "learning_rate": 1.054147635291005e-05, + "loss": 1.0571, "step": 12735 }, { - "epoch": 0.36090566465470825, + "epoch": 0.49831755223413415, "grad_norm": 0.0, - "learning_rate": 1.478630078498839e-05, - "loss": 0.9379, + "learning_rate": 1.054021097655541e-05, + "loss": 1.1659, "step": 12736 }, { - "epoch": 0.36093400209697074, + "epoch": 0.49835667892636354, "grad_norm": 0.0, - "learning_rate": 1.4785494930570021e-05, - "loss": 0.8294, + "learning_rate": 1.053894559152566e-05, + "loss": 1.0027, "step": 12737 }, { - "epoch": 0.3609623395392332, + "epoch": 0.498395805618593, "grad_norm": 0.0, - "learning_rate": 1.4784689035841655e-05, - "loss": 0.9296, + "learning_rate": 1.0537680197841116e-05, + "loss": 1.0963, "step": 12738 }, { - "epoch": 0.3609906769814957, + "epoch": 0.4984349323108224, "grad_norm": 0.0, - "learning_rate": 1.4783883100810074e-05, - "loss": 1.0055, + "learning_rate": 1.0536414795522105e-05, + "loss": 1.0113, "step": 12739 }, { - "epoch": 0.3610190144237581, + "epoch": 0.49847405900305186, "grad_norm": 0.0, - "learning_rate": 1.4783077125482068e-05, - "loss": 0.9829, + "learning_rate": 1.0535149384588943e-05, + "loss": 0.9771, "step": 12740 }, { - "epoch": 0.36104735186602055, + "epoch": 0.4985131856952813, "grad_norm": 0.0, - "learning_rate": 1.478227110986443e-05, - "loss": 0.9492, + "learning_rate": 1.0533883965061955e-05, + "loss": 0.9287, "step": 12741 }, { - "epoch": 0.36107568930828304, + "epoch": 0.49855231238751074, "grad_norm": 0.0, - "learning_rate": 1.4781465053963946e-05, - "loss": 0.8827, + "learning_rate": 1.0532618536961459e-05, + "loss": 1.0697, "step": 12742 }, { - "epoch": 0.3611040267505455, + "epoch": 0.4985914390797402, "grad_norm": 0.0, - "learning_rate": 1.4780658957787407e-05, - "loss": 0.9196, + "learning_rate": 1.0531353100307775e-05, + "loss": 0.9219, "step": 12743 }, { - "epoch": 0.361132364192808, + "epoch": 0.4986305657719696, "grad_norm": 0.0, - "learning_rate": 1.47798528213416e-05, - "loss": 0.8888, + "learning_rate": 1.0530087655121227e-05, + "loss": 1.0203, "step": 12744 }, { - "epoch": 0.3611607016350704, + "epoch": 0.49866969246419907, "grad_norm": 0.0, - "learning_rate": 1.477904664463332e-05, - "loss": 0.9277, + "learning_rate": 1.0528822201422133e-05, + "loss": 1.2217, "step": 12745 }, { - "epoch": 0.3611890390773329, + "epoch": 0.4987088191564285, "grad_norm": 0.0, - "learning_rate": 1.4778240427669352e-05, - "loss": 0.9221, + "learning_rate": 1.0527556739230822e-05, + "loss": 1.0342, "step": 12746 }, { - "epoch": 0.36121737651959535, + "epoch": 0.49874794584865795, "grad_norm": 0.0, - "learning_rate": 1.4777434170456495e-05, - "loss": 1.0221, + "learning_rate": 1.0526291268567605e-05, + "loss": 0.9553, "step": 12747 }, { - "epoch": 0.3612457139618578, + "epoch": 0.4987870725408874, "grad_norm": 0.0, - "learning_rate": 1.4776627873001533e-05, - "loss": 0.7685, + "learning_rate": 1.0525025789452815e-05, + "loss": 1.0902, "step": 12748 }, { - "epoch": 0.3612740514041203, + "epoch": 0.49882619923311683, "grad_norm": 0.0, - "learning_rate": 1.477582153531126e-05, - "loss": 1.0567, + "learning_rate": 1.0523760301906764e-05, + "loss": 1.0205, "step": 12749 }, { - "epoch": 0.3613023888463827, + "epoch": 0.49886532592534627, "grad_norm": 0.0, - "learning_rate": 1.4775015157392472e-05, - "loss": 0.907, + "learning_rate": 1.0522494805949784e-05, + "loss": 1.0744, "step": 12750 }, { - "epoch": 0.3613307262886452, + "epoch": 0.4989044526175757, "grad_norm": 0.0, - "learning_rate": 1.4774208739251959e-05, - "loss": 0.9749, + "learning_rate": 1.0521229301602188e-05, + "loss": 1.079, "step": 12751 }, { - "epoch": 0.36135906373090765, + "epoch": 0.49894357930980515, "grad_norm": 0.0, - "learning_rate": 1.4773402280896507e-05, - "loss": 0.9268, + "learning_rate": 1.0519963788884305e-05, + "loss": 1.0073, "step": 12752 }, { - "epoch": 0.3613874011731701, + "epoch": 0.4989827060020346, "grad_norm": 0.0, - "learning_rate": 1.4772595782332916e-05, - "loss": 0.9385, + "learning_rate": 1.0518698267816454e-05, + "loss": 1.1044, "step": 12753 }, { - "epoch": 0.3614157386154326, + "epoch": 0.49902183269426403, "grad_norm": 0.0, - "learning_rate": 1.4771789243567983e-05, - "loss": 0.9232, + "learning_rate": 1.0517432738418957e-05, + "loss": 0.9726, "step": 12754 }, { - "epoch": 0.361444076057695, + "epoch": 0.4990609593864935, "grad_norm": 0.0, - "learning_rate": 1.4770982664608497e-05, - "loss": 0.9914, + "learning_rate": 1.0516167200712144e-05, + "loss": 1.0276, "step": 12755 }, { - "epoch": 0.3614724134999575, + "epoch": 0.4991000860787229, "grad_norm": 0.0, - "learning_rate": 1.4770176045461248e-05, - "loss": 0.9398, + "learning_rate": 1.0514901654716327e-05, + "loss": 1.1074, "step": 12756 }, { - "epoch": 0.36150075094221995, + "epoch": 0.49913921277095236, "grad_norm": 0.0, - "learning_rate": 1.4769369386133038e-05, - "loss": 0.8197, + "learning_rate": 1.0513636100451838e-05, + "loss": 0.9887, "step": 12757 }, { - "epoch": 0.36152908838448244, + "epoch": 0.4991783394631818, "grad_norm": 0.0, - "learning_rate": 1.4768562686630659e-05, - "loss": 1.0095, + "learning_rate": 1.0512370537938994e-05, + "loss": 0.9919, "step": 12758 }, { - "epoch": 0.3615574258267449, + "epoch": 0.49921746615541124, "grad_norm": 0.0, - "learning_rate": 1.4767755946960902e-05, - "loss": 1.0229, + "learning_rate": 1.0511104967198124e-05, + "loss": 1.0724, "step": 12759 }, { - "epoch": 0.3615857632690073, + "epoch": 0.4992565928476407, "grad_norm": 0.0, - "learning_rate": 1.476694916713057e-05, - "loss": 1.0224, + "learning_rate": 1.0509839388249548e-05, + "loss": 0.9935, "step": 12760 }, { - "epoch": 0.3616141007112698, + "epoch": 0.4992957195398701, "grad_norm": 0.0, - "learning_rate": 1.4766142347146452e-05, - "loss": 0.9667, + "learning_rate": 1.0508573801113588e-05, + "loss": 1.0529, "step": 12761 }, { - "epoch": 0.36164243815353225, + "epoch": 0.49933484623209956, "grad_norm": 0.0, - "learning_rate": 1.476533548701535e-05, - "loss": 0.9394, + "learning_rate": 1.0507308205810573e-05, + "loss": 0.9489, "step": 12762 }, { - "epoch": 0.36167077559579475, + "epoch": 0.499373972924329, "grad_norm": 0.0, - "learning_rate": 1.4764528586744058e-05, - "loss": 0.8469, + "learning_rate": 1.0506042602360823e-05, + "loss": 1.1468, "step": 12763 }, { - "epoch": 0.3616991130380572, + "epoch": 0.49941309961655844, "grad_norm": 0.0, - "learning_rate": 1.4763721646339373e-05, - "loss": 0.9893, + "learning_rate": 1.0504776990784661e-05, + "loss": 1.1041, "step": 12764 }, { - "epoch": 0.3617274504803196, + "epoch": 0.49945222630878783, "grad_norm": 0.0, - "learning_rate": 1.4762914665808089e-05, - "loss": 0.9236, + "learning_rate": 1.0503511371102417e-05, + "loss": 0.9355, "step": 12765 }, { - "epoch": 0.3617557879225821, + "epoch": 0.49949135300101727, "grad_norm": 0.0, - "learning_rate": 1.4762107645157005e-05, - "loss": 0.8976, + "learning_rate": 1.0502245743334409e-05, + "loss": 1.1039, "step": 12766 }, { - "epoch": 0.36178412536484456, + "epoch": 0.4995304796932467, "grad_norm": 0.0, - "learning_rate": 1.4761300584392922e-05, - "loss": 0.9155, + "learning_rate": 1.0500980107500965e-05, + "loss": 0.9694, "step": 12767 }, { - "epoch": 0.36181246280710705, + "epoch": 0.49956960638547615, "grad_norm": 0.0, - "learning_rate": 1.4760493483522637e-05, - "loss": 0.8453, + "learning_rate": 1.0499714463622405e-05, + "loss": 1.1009, "step": 12768 }, { - "epoch": 0.3618408002493695, + "epoch": 0.4996087330777056, "grad_norm": 0.0, - "learning_rate": 1.4759686342552945e-05, - "loss": 0.8692, + "learning_rate": 1.049844881171906e-05, + "loss": 1.1695, "step": 12769 }, { - "epoch": 0.3618691376916319, + "epoch": 0.49964785976993503, "grad_norm": 0.0, - "learning_rate": 1.475887916149065e-05, - "loss": 0.9118, + "learning_rate": 1.049718315181125e-05, + "loss": 0.9108, "step": 12770 }, { - "epoch": 0.3618974751338944, + "epoch": 0.4996869864621645, "grad_norm": 0.0, - "learning_rate": 1.4758071940342547e-05, - "loss": 1.0031, + "learning_rate": 1.0495917483919302e-05, + "loss": 1.0485, "step": 12771 }, { - "epoch": 0.36192581257615686, + "epoch": 0.4997261131543939, "grad_norm": 0.0, - "learning_rate": 1.4757264679115437e-05, - "loss": 0.8668, + "learning_rate": 1.049465180806354e-05, + "loss": 1.0883, "step": 12772 }, { - "epoch": 0.36195415001841935, + "epoch": 0.49976523984662335, "grad_norm": 0.0, - "learning_rate": 1.475645737781612e-05, - "loss": 0.9578, + "learning_rate": 1.0493386124264292e-05, + "loss": 1.0978, "step": 12773 }, { - "epoch": 0.3619824874606818, + "epoch": 0.4998043665388528, "grad_norm": 0.0, - "learning_rate": 1.4755650036451397e-05, - "loss": 0.8786, + "learning_rate": 1.049212043254188e-05, + "loss": 1.064, "step": 12774 }, { - "epoch": 0.3620108249029443, + "epoch": 0.49984349323108224, "grad_norm": 0.0, - "learning_rate": 1.4754842655028067e-05, - "loss": 0.9393, + "learning_rate": 1.0490854732916629e-05, + "loss": 1.1299, "step": 12775 }, { - "epoch": 0.3620391623452067, + "epoch": 0.4998826199233117, "grad_norm": 0.0, - "learning_rate": 1.4754035233552935e-05, - "loss": 0.8689, + "learning_rate": 1.0489589025408866e-05, + "loss": 1.0749, "step": 12776 }, { - "epoch": 0.36206749978746916, + "epoch": 0.4999217466155411, "grad_norm": 0.0, - "learning_rate": 1.4753227772032795e-05, - "loss": 0.862, + "learning_rate": 1.048832331003892e-05, + "loss": 1.0062, "step": 12777 }, { - "epoch": 0.36209583722973165, + "epoch": 0.49996087330777056, "grad_norm": 0.0, - "learning_rate": 1.4752420270474455e-05, - "loss": 0.9038, + "learning_rate": 1.048705758682711e-05, + "loss": 1.0033, "step": 12778 }, { - "epoch": 0.3621241746719941, + "epoch": 0.5, "grad_norm": 0.0, - "learning_rate": 1.4751612728884711e-05, - "loss": 0.8278, + "learning_rate": 1.0485791855793768e-05, + "loss": 1.1282, "step": 12779 }, { - "epoch": 0.3621525121142566, + "epoch": 0.5000391266922294, "grad_norm": 0.0, - "learning_rate": 1.4750805147270373e-05, - "loss": 0.9273, + "learning_rate": 1.0484526116959214e-05, + "loss": 1.106, "step": 12780 }, { - "epoch": 0.362180849556519, + "epoch": 0.5000782533844589, "grad_norm": 0.0, - "learning_rate": 1.4749997525638233e-05, - "loss": 0.9798, + "learning_rate": 1.0483260370343781e-05, + "loss": 0.9452, "step": 12781 }, { - "epoch": 0.36220918699878146, + "epoch": 0.5001173800766883, "grad_norm": 0.0, - "learning_rate": 1.4749189863995107e-05, - "loss": 0.8975, + "learning_rate": 1.0481994615967791e-05, + "loss": 0.9466, "step": 12782 }, { - "epoch": 0.36223752444104396, + "epoch": 0.5001565067689178, "grad_norm": 0.0, - "learning_rate": 1.4748382162347788e-05, - "loss": 0.927, + "learning_rate": 1.0480728853851572e-05, + "loss": 1.1118, "step": 12783 }, { - "epoch": 0.3622658618833064, + "epoch": 0.5001956334611471, "grad_norm": 0.0, - "learning_rate": 1.474757442070308e-05, - "loss": 0.8959, + "learning_rate": 1.047946308401545e-05, + "loss": 1.0887, "step": 12784 }, { - "epoch": 0.3622941993255689, + "epoch": 0.5002347601533766, "grad_norm": 0.0, - "learning_rate": 1.4746766639067793e-05, - "loss": 0.9, + "learning_rate": 1.0478197306479752e-05, + "loss": 1.0998, "step": 12785 }, { - "epoch": 0.3623225367678313, + "epoch": 0.500273886845606, "grad_norm": 0.0, - "learning_rate": 1.4745958817448727e-05, - "loss": 1.0125, + "learning_rate": 1.0476931521264804e-05, + "loss": 0.9883, "step": 12786 }, { - "epoch": 0.3623508742100938, + "epoch": 0.5003130135378355, "grad_norm": 0.0, - "learning_rate": 1.4745150955852687e-05, - "loss": 0.8485, + "learning_rate": 1.047566572839093e-05, + "loss": 0.8498, "step": 12787 }, { - "epoch": 0.36237921165235626, + "epoch": 0.5003521402300649, "grad_norm": 0.0, - "learning_rate": 1.474434305428648e-05, - "loss": 0.9481, + "learning_rate": 1.0474399927878464e-05, + "loss": 0.9202, "step": 12788 }, { - "epoch": 0.3624075490946187, + "epoch": 0.5003912669222944, "grad_norm": 0.0, - "learning_rate": 1.474353511275691e-05, - "loss": 0.9448, + "learning_rate": 1.047313411974773e-05, + "loss": 1.0205, "step": 12789 }, { - "epoch": 0.3624358865368812, + "epoch": 0.5004303936145238, "grad_norm": 0.0, - "learning_rate": 1.474272713127078e-05, - "loss": 1.0064, + "learning_rate": 1.0471868304019056e-05, + "loss": 1.0438, "step": 12790 }, { - "epoch": 0.36246422397914363, + "epoch": 0.5004695203067533, "grad_norm": 0.0, - "learning_rate": 1.4741919109834898e-05, - "loss": 1.0251, + "learning_rate": 1.0470602480712766e-05, + "loss": 1.0739, "step": 12791 }, { - "epoch": 0.3624925614214061, + "epoch": 0.5005086469989827, "grad_norm": 0.0, - "learning_rate": 1.4741111048456072e-05, - "loss": 0.9073, + "learning_rate": 1.046933664984919e-05, + "loss": 1.0435, "step": 12792 }, { - "epoch": 0.36252089886366856, + "epoch": 0.5005477736912122, "grad_norm": 0.0, - "learning_rate": 1.4740302947141107e-05, - "loss": 0.9709, + "learning_rate": 1.0468070811448656e-05, + "loss": 0.9573, "step": 12793 }, { - "epoch": 0.362549236305931, + "epoch": 0.5005869003834416, "grad_norm": 0.0, - "learning_rate": 1.473949480589681e-05, - "loss": 1.0637, + "learning_rate": 1.046680496553149e-05, + "loss": 1.0282, "step": 12794 }, { - "epoch": 0.3625775737481935, + "epoch": 0.500626027075671, "grad_norm": 0.0, - "learning_rate": 1.4738686624729987e-05, - "loss": 0.9661, + "learning_rate": 1.0465539112118024e-05, + "loss": 1.0096, "step": 12795 }, { - "epoch": 0.36260591119045593, + "epoch": 0.5006651537679004, "grad_norm": 0.0, - "learning_rate": 1.473787840364745e-05, - "loss": 0.9937, + "learning_rate": 1.046427325122858e-05, + "loss": 1.0089, "step": 12796 }, { - "epoch": 0.3626342486327184, + "epoch": 0.5007042804601299, "grad_norm": 0.0, - "learning_rate": 1.4737070142656003e-05, - "loss": 0.9597, + "learning_rate": 1.0463007382883492e-05, + "loss": 0.9643, "step": 12797 }, { - "epoch": 0.36266258607498086, + "epoch": 0.5007434071523593, "grad_norm": 0.0, - "learning_rate": 1.4736261841762454e-05, - "loss": 1.0858, + "learning_rate": 1.0461741507103083e-05, + "loss": 1.1267, "step": 12798 }, { - "epoch": 0.36269092351724336, + "epoch": 0.5007825338445888, "grad_norm": 0.0, - "learning_rate": 1.4735453500973611e-05, - "loss": 0.7476, + "learning_rate": 1.0460475623907687e-05, + "loss": 1.0444, "step": 12799 }, { - "epoch": 0.3627192609595058, + "epoch": 0.5008216605368182, "grad_norm": 0.0, - "learning_rate": 1.4734645120296284e-05, - "loss": 0.962, + "learning_rate": 1.0459209733317628e-05, + "loss": 1.0107, "step": 12800 }, { - "epoch": 0.36274759840176823, + "epoch": 0.5008607872290477, "grad_norm": 0.0, - "learning_rate": 1.4733836699737287e-05, - "loss": 0.8892, + "learning_rate": 1.0457943835353235e-05, + "loss": 0.9806, "step": 12801 }, { - "epoch": 0.36277593584403073, + "epoch": 0.5008999139212771, "grad_norm": 0.0, - "learning_rate": 1.4733028239303424e-05, - "loss": 1.0192, + "learning_rate": 1.045667793003484e-05, + "loss": 0.9225, "step": 12802 }, { - "epoch": 0.36280427328629317, + "epoch": 0.5009390406135066, "grad_norm": 0.0, - "learning_rate": 1.4732219739001508e-05, - "loss": 1.0182, + "learning_rate": 1.0455412017382767e-05, + "loss": 1.041, "step": 12803 }, { - "epoch": 0.36283261072855566, + "epoch": 0.500978167305736, "grad_norm": 0.0, - "learning_rate": 1.4731411198838346e-05, - "loss": 0.9352, + "learning_rate": 1.045414609741735e-05, + "loss": 1.0968, "step": 12804 }, { - "epoch": 0.3628609481708181, + "epoch": 0.5010172939979655, "grad_norm": 0.0, - "learning_rate": 1.4730602618820751e-05, - "loss": 0.9833, + "learning_rate": 1.0452880170158914e-05, + "loss": 0.9492, "step": 12805 }, { - "epoch": 0.36288928561308054, + "epoch": 0.5010564206901948, "grad_norm": 0.0, - "learning_rate": 1.472979399895553e-05, - "loss": 1.0833, + "learning_rate": 1.0451614235627791e-05, + "loss": 1.1517, "step": 12806 }, { - "epoch": 0.36291762305534303, + "epoch": 0.5010955473824243, "grad_norm": 0.0, - "learning_rate": 1.4728985339249504e-05, - "loss": 0.8367, + "learning_rate": 1.045034829384431e-05, + "loss": 0.9983, "step": 12807 }, { - "epoch": 0.36294596049760547, + "epoch": 0.5011346740746537, "grad_norm": 0.0, - "learning_rate": 1.4728176639709475e-05, - "loss": 1.0014, + "learning_rate": 1.0449082344828799e-05, + "loss": 1.0559, "step": 12808 }, { - "epoch": 0.36297429793986796, + "epoch": 0.5011738007668831, "grad_norm": 0.0, - "learning_rate": 1.472736790034226e-05, - "loss": 0.8832, + "learning_rate": 1.0447816388601588e-05, + "loss": 1.0733, "step": 12809 }, { - "epoch": 0.3630026353821304, + "epoch": 0.5012129274591126, "grad_norm": 0.0, - "learning_rate": 1.4726559121154668e-05, - "loss": 1.0446, + "learning_rate": 1.0446550425183008e-05, + "loss": 1.063, "step": 12810 }, { - "epoch": 0.3630309728243929, + "epoch": 0.501252054151342, "grad_norm": 0.0, - "learning_rate": 1.4725750302153514e-05, - "loss": 0.8717, + "learning_rate": 1.0445284454593387e-05, + "loss": 1.0095, "step": 12811 }, { - "epoch": 0.36305931026665533, + "epoch": 0.5012911808435715, "grad_norm": 0.0, - "learning_rate": 1.472494144334561e-05, - "loss": 0.928, + "learning_rate": 1.0444018476853055e-05, + "loss": 1.0519, "step": 12812 }, { - "epoch": 0.36308764770891777, + "epoch": 0.5013303075358009, "grad_norm": 0.0, - "learning_rate": 1.472413254473777e-05, - "loss": 0.9929, + "learning_rate": 1.0442752491982345e-05, + "loss": 0.9682, "step": 12813 }, { - "epoch": 0.36311598515118026, + "epoch": 0.5013694342280304, "grad_norm": 0.0, - "learning_rate": 1.4723323606336805e-05, - "loss": 0.8989, + "learning_rate": 1.044148650000158e-05, + "loss": 1.0521, "step": 12814 }, { - "epoch": 0.3631443225934427, + "epoch": 0.5014085609202598, "grad_norm": 0.0, - "learning_rate": 1.4722514628149535e-05, - "loss": 0.8916, + "learning_rate": 1.0440220500931099e-05, + "loss": 1.1424, "step": 12815 }, { - "epoch": 0.3631726600357052, + "epoch": 0.5014476876124893, "grad_norm": 0.0, - "learning_rate": 1.4721705610182771e-05, - "loss": 1.0961, + "learning_rate": 1.0438954494791227e-05, + "loss": 1.0152, "step": 12816 }, { - "epoch": 0.36320099747796764, + "epoch": 0.5014868143047186, "grad_norm": 0.0, - "learning_rate": 1.4720896552443327e-05, - "loss": 1.0215, + "learning_rate": 1.0437688481602297e-05, + "loss": 0.9526, "step": 12817 }, { - "epoch": 0.3632293349202301, + "epoch": 0.5015259409969481, "grad_norm": 0.0, - "learning_rate": 1.4720087454938014e-05, - "loss": 0.9657, + "learning_rate": 1.0436422461384636e-05, + "loss": 1.029, "step": 12818 }, { - "epoch": 0.36325767236249257, + "epoch": 0.5015650676891775, "grad_norm": 0.0, - "learning_rate": 1.4719278317673655e-05, - "loss": 0.9584, + "learning_rate": 1.0435156434158581e-05, + "loss": 1.1407, "step": 12819 }, { - "epoch": 0.363286009804755, + "epoch": 0.501604194381407, "grad_norm": 0.0, - "learning_rate": 1.4718469140657061e-05, - "loss": 0.8662, + "learning_rate": 1.0433890399944458e-05, + "loss": 1.0505, "step": 12820 }, { - "epoch": 0.3633143472470175, + "epoch": 0.5016433210736364, "grad_norm": 0.0, - "learning_rate": 1.471765992389505e-05, - "loss": 0.8845, + "learning_rate": 1.0432624358762595e-05, + "loss": 1.0946, "step": 12821 }, { - "epoch": 0.36334268468927994, + "epoch": 0.5016824477658659, "grad_norm": 0.0, - "learning_rate": 1.471685066739444e-05, - "loss": 0.9816, + "learning_rate": 1.0431358310633333e-05, + "loss": 1.0985, "step": 12822 }, { - "epoch": 0.36337102213154243, + "epoch": 0.5017215744580953, "grad_norm": 0.0, - "learning_rate": 1.4716041371162041e-05, - "loss": 1.018, + "learning_rate": 1.0430092255576991e-05, + "loss": 1.0192, "step": 12823 }, { - "epoch": 0.36339935957380487, + "epoch": 0.5017607011503248, "grad_norm": 0.0, - "learning_rate": 1.4715232035204678e-05, - "loss": 0.8882, + "learning_rate": 1.042882619361391e-05, + "loss": 1.0101, "step": 12824 }, { - "epoch": 0.3634276970160673, + "epoch": 0.5017998278425542, "grad_norm": 0.0, - "learning_rate": 1.4714422659529161e-05, - "loss": 0.8708, + "learning_rate": 1.0427560124764415e-05, + "loss": 0.9917, "step": 12825 }, { - "epoch": 0.3634560344583298, + "epoch": 0.5018389545347837, "grad_norm": 0.0, - "learning_rate": 1.4713613244142315e-05, - "loss": 0.9931, + "learning_rate": 1.0426294049048845e-05, + "loss": 1.0109, "step": 12826 }, { - "epoch": 0.36348437190059224, + "epoch": 0.501878081227013, "grad_norm": 0.0, - "learning_rate": 1.471280378905095e-05, - "loss": 0.8696, + "learning_rate": 1.042502796648752e-05, + "loss": 0.9843, "step": 12827 }, { - "epoch": 0.36351270934285473, + "epoch": 0.5019172079192425, "grad_norm": 0.0, - "learning_rate": 1.4711994294261893e-05, - "loss": 0.9864, + "learning_rate": 1.0423761877100784e-05, + "loss": 1.0651, "step": 12828 }, { - "epoch": 0.3635410467851172, + "epoch": 0.5019563346114719, "grad_norm": 0.0, - "learning_rate": 1.4711184759781956e-05, - "loss": 0.8573, + "learning_rate": 1.0422495780908961e-05, + "loss": 0.948, "step": 12829 }, { - "epoch": 0.3635693842273796, + "epoch": 0.5019954613037014, "grad_norm": 0.0, - "learning_rate": 1.471037518561796e-05, - "loss": 0.9368, + "learning_rate": 1.0421229677932384e-05, + "loss": 1.1344, "step": 12830 }, { - "epoch": 0.3635977216696421, + "epoch": 0.5020345879959308, "grad_norm": 0.0, - "learning_rate": 1.4709565571776723e-05, - "loss": 0.9754, + "learning_rate": 1.0419963568191389e-05, + "loss": 1.0093, "step": 12831 }, { - "epoch": 0.36362605911190454, + "epoch": 0.5020737146881603, "grad_norm": 0.0, - "learning_rate": 1.470875591826507e-05, - "loss": 1.0101, + "learning_rate": 1.0418697451706304e-05, + "loss": 1.0506, "step": 12832 }, { - "epoch": 0.36365439655416704, + "epoch": 0.5021128413803897, "grad_norm": 0.0, - "learning_rate": 1.4707946225089815e-05, - "loss": 0.8791, + "learning_rate": 1.0417431328497462e-05, + "loss": 1.0646, "step": 12833 }, { - "epoch": 0.3636827339964295, + "epoch": 0.5021519680726192, "grad_norm": 0.0, - "learning_rate": 1.4707136492257783e-05, - "loss": 0.9584, + "learning_rate": 1.0416165198585195e-05, + "loss": 1.0494, "step": 12834 }, { - "epoch": 0.36371107143869197, + "epoch": 0.5021910947648486, "grad_norm": 0.0, - "learning_rate": 1.470632671977579e-05, - "loss": 0.8811, + "learning_rate": 1.0414899061989838e-05, + "loss": 1.1022, "step": 12835 }, { - "epoch": 0.3637394088809544, + "epoch": 0.502230221457078, "grad_norm": 0.0, - "learning_rate": 1.470551690765066e-05, - "loss": 0.9491, + "learning_rate": 1.0413632918731722e-05, + "loss": 0.9989, "step": 12836 }, { - "epoch": 0.36376774632321685, + "epoch": 0.5022693481493075, "grad_norm": 0.0, - "learning_rate": 1.4704707055889213e-05, - "loss": 0.9027, + "learning_rate": 1.0412366768831178e-05, + "loss": 1.078, "step": 12837 }, { - "epoch": 0.36379608376547934, + "epoch": 0.5023084748415368, "grad_norm": 0.0, - "learning_rate": 1.4703897164498276e-05, - "loss": 0.8453, + "learning_rate": 1.0411100612308543e-05, + "loss": 1.0287, "step": 12838 }, { - "epoch": 0.3638244212077418, + "epoch": 0.5023476015337663, "grad_norm": 0.0, - "learning_rate": 1.470308723348466e-05, - "loss": 0.8604, + "learning_rate": 1.0409834449184146e-05, + "loss": 1.045, "step": 12839 }, { - "epoch": 0.36385275865000427, + "epoch": 0.5023867282259957, "grad_norm": 0.0, - "learning_rate": 1.4702277262855198e-05, - "loss": 0.9952, + "learning_rate": 1.0408568279478324e-05, + "loss": 1.0165, "step": 12840 }, { - "epoch": 0.3638810960922667, + "epoch": 0.5024258549182252, "grad_norm": 0.0, - "learning_rate": 1.4701467252616709e-05, - "loss": 0.9214, + "learning_rate": 1.0407302103211403e-05, + "loss": 1.1536, "step": 12841 }, { - "epoch": 0.36390943353452915, + "epoch": 0.5024649816104546, "grad_norm": 0.0, - "learning_rate": 1.4700657202776014e-05, - "loss": 0.9536, + "learning_rate": 1.0406035920403723e-05, + "loss": 1.0841, "step": 12842 }, { - "epoch": 0.36393777097679164, + "epoch": 0.5025041083026841, "grad_norm": 0.0, - "learning_rate": 1.4699847113339935e-05, - "loss": 0.881, + "learning_rate": 1.0404769731075612e-05, + "loss": 1.0277, "step": 12843 }, { - "epoch": 0.3639661084190541, + "epoch": 0.5025432349949135, "grad_norm": 0.0, - "learning_rate": 1.46990369843153e-05, - "loss": 0.9175, + "learning_rate": 1.040350353524741e-05, + "loss": 1.0117, "step": 12844 }, { - "epoch": 0.3639944458613166, + "epoch": 0.502582361687143, "grad_norm": 0.0, - "learning_rate": 1.4698226815708934e-05, - "loss": 0.9595, + "learning_rate": 1.0402237332939444e-05, + "loss": 0.9842, "step": 12845 }, { - "epoch": 0.364022783303579, + "epoch": 0.5026214883793724, "grad_norm": 0.0, - "learning_rate": 1.4697416607527659e-05, - "loss": 0.895, + "learning_rate": 1.0400971124172055e-05, + "loss": 1.0179, "step": 12846 }, { - "epoch": 0.3640511207458415, + "epoch": 0.5026606150716019, "grad_norm": 0.0, - "learning_rate": 1.4696606359778299e-05, - "loss": 0.974, + "learning_rate": 1.0399704908965566e-05, + "loss": 1.1028, "step": 12847 }, { - "epoch": 0.36407945818810394, + "epoch": 0.5026997417638313, "grad_norm": 0.0, - "learning_rate": 1.4695796072467677e-05, - "loss": 0.8702, + "learning_rate": 1.0398438687340322e-05, + "loss": 0.9818, "step": 12848 }, { - "epoch": 0.3641077956303664, + "epoch": 0.5027388684560608, "grad_norm": 0.0, - "learning_rate": 1.4694985745602623e-05, - "loss": 1.0531, + "learning_rate": 1.0397172459316651e-05, + "loss": 1.0977, "step": 12849 }, { - "epoch": 0.3641361330726289, + "epoch": 0.5027779951482901, "grad_norm": 0.0, - "learning_rate": 1.469417537918996e-05, - "loss": 0.7996, + "learning_rate": 1.0395906224914887e-05, + "loss": 0.9623, "step": 12850 }, { - "epoch": 0.3641644705148913, + "epoch": 0.5028171218405196, "grad_norm": 0.0, - "learning_rate": 1.4693364973236515e-05, - "loss": 0.9209, + "learning_rate": 1.0394639984155366e-05, + "loss": 0.8991, "step": 12851 }, { - "epoch": 0.3641928079571538, + "epoch": 0.502856248532749, "grad_norm": 0.0, - "learning_rate": 1.4692554527749112e-05, - "loss": 0.9277, + "learning_rate": 1.039337373705842e-05, + "loss": 1.1595, "step": 12852 }, { - "epoch": 0.36422114539941625, + "epoch": 0.5028953752249785, "grad_norm": 0.0, - "learning_rate": 1.4691744042734581e-05, - "loss": 0.9734, + "learning_rate": 1.0392107483644386e-05, + "loss": 1.0294, "step": 12853 }, { - "epoch": 0.3642494828416787, + "epoch": 0.5029345019172079, "grad_norm": 0.0, - "learning_rate": 1.469093351819975e-05, - "loss": 0.9396, + "learning_rate": 1.0390841223933596e-05, + "loss": 1.2187, "step": 12854 }, { - "epoch": 0.3642778202839412, + "epoch": 0.5029736286094374, "grad_norm": 0.0, - "learning_rate": 1.4690122954151443e-05, - "loss": 0.9035, + "learning_rate": 1.0389574957946387e-05, + "loss": 1.1143, "step": 12855 }, { - "epoch": 0.3643061577262036, + "epoch": 0.5030127553016668, "grad_norm": 0.0, - "learning_rate": 1.4689312350596488e-05, - "loss": 0.9053, + "learning_rate": 1.038830868570309e-05, + "loss": 1.0785, "step": 12856 }, { - "epoch": 0.3643344951684661, + "epoch": 0.5030518819938963, "grad_norm": 0.0, - "learning_rate": 1.4688501707541711e-05, - "loss": 0.8716, + "learning_rate": 1.0387042407224046e-05, + "loss": 1.1918, "step": 12857 }, { - "epoch": 0.36436283261072855, + "epoch": 0.5030910086861257, "grad_norm": 0.0, - "learning_rate": 1.4687691024993947e-05, - "loss": 0.9087, + "learning_rate": 1.0385776122529583e-05, + "loss": 1.101, "step": 12858 }, { - "epoch": 0.36439117005299104, + "epoch": 0.5031301353783552, "grad_norm": 0.0, - "learning_rate": 1.4686880302960021e-05, - "loss": 0.9071, + "learning_rate": 1.038450983164004e-05, + "loss": 1.0934, "step": 12859 }, { - "epoch": 0.3644195074952535, + "epoch": 0.5031692620705845, "grad_norm": 0.0, - "learning_rate": 1.4686069541446757e-05, - "loss": 0.9107, + "learning_rate": 1.0383243534575751e-05, + "loss": 0.9827, "step": 12860 }, { - "epoch": 0.3644478449375159, + "epoch": 0.503208388762814, "grad_norm": 0.0, - "learning_rate": 1.4685258740460995e-05, - "loss": 0.9247, + "learning_rate": 1.0381977231357048e-05, + "loss": 1.0908, "step": 12861 }, { - "epoch": 0.3644761823797784, + "epoch": 0.5032475154550434, "grad_norm": 0.0, - "learning_rate": 1.4684447900009557e-05, - "loss": 0.9647, + "learning_rate": 1.0380710922004273e-05, + "loss": 1.0246, "step": 12862 }, { - "epoch": 0.36450451982204085, + "epoch": 0.5032866421472729, "grad_norm": 0.0, - "learning_rate": 1.4683637020099273e-05, - "loss": 0.8997, + "learning_rate": 1.0379444606537754e-05, + "loss": 0.996, "step": 12863 }, { - "epoch": 0.36453285726430334, + "epoch": 0.5033257688395023, "grad_norm": 0.0, - "learning_rate": 1.4682826100736973e-05, - "loss": 0.9349, + "learning_rate": 1.0378178284977833e-05, + "loss": 1.0333, "step": 12864 }, { - "epoch": 0.3645611947065658, + "epoch": 0.5033648955317317, "grad_norm": 0.0, - "learning_rate": 1.4682015141929495e-05, - "loss": 0.9373, + "learning_rate": 1.0376911957344842e-05, + "loss": 0.9885, "step": 12865 }, { - "epoch": 0.3645895321488282, + "epoch": 0.5034040222239612, "grad_norm": 0.0, - "learning_rate": 1.4681204143683663e-05, - "loss": 1.0156, + "learning_rate": 1.0375645623659118e-05, + "loss": 1.0274, "step": 12866 }, { - "epoch": 0.3646178695910907, + "epoch": 0.5034431489161906, "grad_norm": 0.0, - "learning_rate": 1.4680393106006312e-05, - "loss": 0.8201, + "learning_rate": 1.0374379283940995e-05, + "loss": 1.0331, "step": 12867 }, { - "epoch": 0.36464620703335315, + "epoch": 0.5034822756084201, "grad_norm": 0.0, - "learning_rate": 1.4679582028904269e-05, - "loss": 0.8941, + "learning_rate": 1.037311293821081e-05, + "loss": 1.1218, "step": 12868 }, { - "epoch": 0.36467454447561565, + "epoch": 0.5035214023006495, "grad_norm": 0.0, - "learning_rate": 1.467877091238437e-05, - "loss": 0.9572, + "learning_rate": 1.0371846586488901e-05, + "loss": 0.9838, "step": 12869 }, { - "epoch": 0.3647028819178781, + "epoch": 0.503560528992879, "grad_norm": 0.0, - "learning_rate": 1.4677959756453443e-05, - "loss": 0.9438, + "learning_rate": 1.0370580228795597e-05, + "loss": 0.9954, "step": 12870 }, { - "epoch": 0.3647312193601406, + "epoch": 0.5035996556851083, "grad_norm": 0.0, - "learning_rate": 1.4677148561118328e-05, - "loss": 1.0001, + "learning_rate": 1.0369313865151243e-05, + "loss": 1.0317, "step": 12871 }, { - "epoch": 0.364759556802403, + "epoch": 0.5036387823773378, "grad_norm": 0.0, - "learning_rate": 1.4676337326385852e-05, - "loss": 0.965, + "learning_rate": 1.0368047495576168e-05, + "loss": 0.9772, "step": 12872 }, { - "epoch": 0.36478789424466546, + "epoch": 0.5036779090695672, "grad_norm": 0.0, - "learning_rate": 1.4675526052262853e-05, - "loss": 1.0119, + "learning_rate": 1.0366781120090714e-05, + "loss": 0.9797, "step": 12873 }, { - "epoch": 0.36481623168692795, + "epoch": 0.5037170357617967, "grad_norm": 0.0, - "learning_rate": 1.467471473875616e-05, - "loss": 0.9844, + "learning_rate": 1.0365514738715215e-05, + "loss": 0.9466, "step": 12874 }, { - "epoch": 0.3648445691291904, + "epoch": 0.5037561624540261, "grad_norm": 0.0, - "learning_rate": 1.467390338587261e-05, - "loss": 0.9213, + "learning_rate": 1.0364248351470005e-05, + "loss": 1.1198, "step": 12875 }, { - "epoch": 0.3648729065714529, + "epoch": 0.5037952891462556, "grad_norm": 0.0, - "learning_rate": 1.4673091993619033e-05, - "loss": 1.0009, + "learning_rate": 1.0362981958375425e-05, + "loss": 1.0249, "step": 12876 }, { - "epoch": 0.3649012440137153, + "epoch": 0.503834415838485, "grad_norm": 0.0, - "learning_rate": 1.4672280562002266e-05, - "loss": 0.9585, + "learning_rate": 1.0361715559451808e-05, + "loss": 1.1524, "step": 12877 }, { - "epoch": 0.36492958145597776, + "epoch": 0.5038735425307145, "grad_norm": 0.0, - "learning_rate": 1.4671469091029149e-05, - "loss": 0.9786, + "learning_rate": 1.0360449154719495e-05, + "loss": 1.0418, "step": 12878 }, { - "epoch": 0.36495791889824025, + "epoch": 0.5039126692229439, "grad_norm": 0.0, - "learning_rate": 1.4670657580706511e-05, - "loss": 0.8563, + "learning_rate": 1.0359182744198817e-05, + "loss": 1.0596, "step": 12879 }, { - "epoch": 0.3649862563405027, + "epoch": 0.5039517959151734, "grad_norm": 0.0, - "learning_rate": 1.4669846031041193e-05, - "loss": 0.9567, + "learning_rate": 1.0357916327910117e-05, + "loss": 1.0971, "step": 12880 }, { - "epoch": 0.3650145937827652, + "epoch": 0.5039909226074027, "grad_norm": 0.0, - "learning_rate": 1.4669034442040021e-05, - "loss": 0.9445, + "learning_rate": 1.0356649905873727e-05, + "loss": 1.0873, "step": 12881 }, { - "epoch": 0.3650429312250276, + "epoch": 0.5040300492996322, "grad_norm": 0.0, - "learning_rate": 1.4668222813709844e-05, - "loss": 0.8891, + "learning_rate": 1.0355383478109986e-05, + "loss": 1.0695, "step": 12882 }, { - "epoch": 0.3650712686672901, + "epoch": 0.5040691759918616, "grad_norm": 0.0, - "learning_rate": 1.466741114605749e-05, - "loss": 0.9662, + "learning_rate": 1.0354117044639232e-05, + "loss": 1.13, "step": 12883 }, { - "epoch": 0.36509960610955255, + "epoch": 0.5041083026840911, "grad_norm": 0.0, - "learning_rate": 1.46665994390898e-05, - "loss": 1.0122, + "learning_rate": 1.0352850605481804e-05, + "loss": 1.1046, "step": 12884 }, { - "epoch": 0.365127943551815, + "epoch": 0.5041474293763205, "grad_norm": 0.0, - "learning_rate": 1.4665787692813608e-05, - "loss": 1.0587, + "learning_rate": 1.0351584160658034e-05, + "loss": 1.0294, "step": 12885 }, { - "epoch": 0.3651562809940775, + "epoch": 0.50418655606855, "grad_norm": 0.0, - "learning_rate": 1.4664975907235757e-05, - "loss": 0.8702, + "learning_rate": 1.0350317710188267e-05, + "loss": 0.9704, "step": 12886 }, { - "epoch": 0.3651846184363399, + "epoch": 0.5042256827607794, "grad_norm": 0.0, - "learning_rate": 1.466416408236308e-05, - "loss": 0.8268, + "learning_rate": 1.0349051254092837e-05, + "loss": 1.0464, "step": 12887 }, { - "epoch": 0.3652129558786024, + "epoch": 0.5042648094530089, "grad_norm": 0.0, - "learning_rate": 1.4663352218202417e-05, - "loss": 0.9541, + "learning_rate": 1.0347784792392077e-05, + "loss": 1.0444, "step": 12888 }, { - "epoch": 0.36524129332086486, + "epoch": 0.5043039361452383, "grad_norm": 0.0, - "learning_rate": 1.4662540314760608e-05, - "loss": 0.9374, + "learning_rate": 1.0346518325106332e-05, + "loss": 1.011, "step": 12889 }, { - "epoch": 0.3652696307631273, + "epoch": 0.5043430628374678, "grad_norm": 0.0, - "learning_rate": 1.4661728372044486e-05, - "loss": 1.017, + "learning_rate": 1.0345251852255934e-05, + "loss": 1.0189, "step": 12890 }, { - "epoch": 0.3652979682053898, + "epoch": 0.5043821895296972, "grad_norm": 0.0, - "learning_rate": 1.46609163900609e-05, - "loss": 0.9117, + "learning_rate": 1.0343985373861227e-05, + "loss": 1.043, "step": 12891 }, { - "epoch": 0.3653263056476522, + "epoch": 0.5044213162219267, "grad_norm": 0.0, - "learning_rate": 1.4660104368816681e-05, - "loss": 0.9539, + "learning_rate": 1.0342718889942543e-05, + "loss": 1.1015, "step": 12892 }, { - "epoch": 0.3653546430899147, + "epoch": 0.504460442914156, "grad_norm": 0.0, - "learning_rate": 1.4659292308318673e-05, - "loss": 0.9224, + "learning_rate": 1.0341452400520227e-05, + "loss": 0.9553, "step": 12893 }, { - "epoch": 0.36538298053217716, + "epoch": 0.5044995696063854, "grad_norm": 0.0, - "learning_rate": 1.4658480208573717e-05, - "loss": 0.9149, + "learning_rate": 1.034018590561461e-05, + "loss": 0.8903, "step": 12894 }, { - "epoch": 0.36541131797443965, + "epoch": 0.5045386962986149, "grad_norm": 0.0, - "learning_rate": 1.4657668069588654e-05, - "loss": 1.0163, + "learning_rate": 1.0338919405246034e-05, + "loss": 0.9267, "step": 12895 }, { - "epoch": 0.3654396554167021, + "epoch": 0.5045778229908443, "grad_norm": 0.0, - "learning_rate": 1.4656855891370318e-05, - "loss": 0.9062, + "learning_rate": 1.033765289943484e-05, + "loss": 1.1019, "step": 12896 }, { - "epoch": 0.36546799285896453, + "epoch": 0.5046169496830738, "grad_norm": 0.0, - "learning_rate": 1.4656043673925557e-05, - "loss": 0.9622, + "learning_rate": 1.0336386388201363e-05, + "loss": 0.9581, "step": 12897 }, { - "epoch": 0.365496330301227, + "epoch": 0.5046560763753032, "grad_norm": 0.0, - "learning_rate": 1.4655231417261213e-05, - "loss": 1.0123, + "learning_rate": 1.0335119871565938e-05, + "loss": 0.9679, "step": 12898 }, { - "epoch": 0.36552466774348946, + "epoch": 0.5046952030675327, "grad_norm": 0.0, - "learning_rate": 1.4654419121384126e-05, - "loss": 0.9024, + "learning_rate": 1.0333853349548912e-05, + "loss": 1.1307, "step": 12899 }, { - "epoch": 0.36555300518575196, + "epoch": 0.5047343297597621, "grad_norm": 0.0, - "learning_rate": 1.465360678630114e-05, - "loss": 0.8857, + "learning_rate": 1.0332586822170618e-05, + "loss": 1.0592, "step": 12900 }, { - "epoch": 0.3655813426280144, + "epoch": 0.5047734564519916, "grad_norm": 0.0, - "learning_rate": 1.4652794412019094e-05, - "loss": 1.0058, + "learning_rate": 1.0331320289451394e-05, + "loss": 1.0372, "step": 12901 }, { - "epoch": 0.36560968007027683, + "epoch": 0.504812583144221, "grad_norm": 0.0, - "learning_rate": 1.4651981998544833e-05, - "loss": 1.055, + "learning_rate": 1.0330053751411587e-05, + "loss": 1.0332, "step": 12902 }, { - "epoch": 0.3656380175125393, + "epoch": 0.5048517098364504, "grad_norm": 0.0, - "learning_rate": 1.46511695458852e-05, - "loss": 0.8933, + "learning_rate": 1.0328787208071524e-05, + "loss": 1.092, "step": 12903 }, { - "epoch": 0.36566635495480176, + "epoch": 0.5048908365286798, "grad_norm": 0.0, - "learning_rate": 1.465035705404704e-05, - "loss": 0.9442, + "learning_rate": 1.0327520659451555e-05, + "loss": 1.0019, "step": 12904 }, { - "epoch": 0.36569469239706426, + "epoch": 0.5049299632209093, "grad_norm": 0.0, - "learning_rate": 1.4649544523037193e-05, - "loss": 0.8465, + "learning_rate": 1.0326254105572012e-05, + "loss": 1.0472, "step": 12905 }, { - "epoch": 0.3657230298393267, + "epoch": 0.5049690899131387, "grad_norm": 0.0, - "learning_rate": 1.4648731952862506e-05, - "loss": 0.8714, + "learning_rate": 1.0324987546453238e-05, + "loss": 1.1186, "step": 12906 }, { - "epoch": 0.3657513672815892, + "epoch": 0.5050082166053682, "grad_norm": 0.0, - "learning_rate": 1.4647919343529825e-05, - "loss": 0.9408, + "learning_rate": 1.0323720982115573e-05, + "loss": 1.0251, "step": 12907 }, { - "epoch": 0.36577970472385163, + "epoch": 0.5050473432975976, "grad_norm": 0.0, - "learning_rate": 1.4647106695045996e-05, - "loss": 0.9174, + "learning_rate": 1.032245441257935e-05, + "loss": 1.0004, "step": 12908 }, { - "epoch": 0.36580804216611407, + "epoch": 0.5050864699898271, "grad_norm": 0.0, - "learning_rate": 1.4646294007417858e-05, - "loss": 0.9167, + "learning_rate": 1.0321187837864917e-05, + "loss": 1.0317, "step": 12909 }, { - "epoch": 0.36583637960837656, + "epoch": 0.5051255966820565, "grad_norm": 0.0, - "learning_rate": 1.464548128065226e-05, - "loss": 0.8993, + "learning_rate": 1.0319921257992607e-05, + "loss": 1.0254, "step": 12910 }, { - "epoch": 0.365864717050639, + "epoch": 0.505164723374286, "grad_norm": 0.0, - "learning_rate": 1.464466851475605e-05, - "loss": 0.9527, + "learning_rate": 1.0318654672982766e-05, + "loss": 1.1149, "step": 12911 }, { - "epoch": 0.3658930544929015, + "epoch": 0.5052038500665154, "grad_norm": 0.0, - "learning_rate": 1.4643855709736071e-05, - "loss": 0.9128, + "learning_rate": 1.0317388082855725e-05, + "loss": 1.2014, "step": 12912 }, { - "epoch": 0.36592139193516393, + "epoch": 0.5052429767587449, "grad_norm": 0.0, - "learning_rate": 1.4643042865599174e-05, - "loss": 0.8878, + "learning_rate": 1.0316121487631837e-05, + "loss": 1.191, "step": 12913 }, { - "epoch": 0.36594972937742637, + "epoch": 0.5052821034509742, "grad_norm": 0.0, - "learning_rate": 1.4642229982352198e-05, - "loss": 1.0282, + "learning_rate": 1.0314854887331427e-05, + "loss": 1.0426, "step": 12914 }, { - "epoch": 0.36597806681968886, + "epoch": 0.5053212301432037, "grad_norm": 0.0, - "learning_rate": 1.4641417060002e-05, - "loss": 1.0508, + "learning_rate": 1.0313588281974845e-05, + "loss": 1.1207, "step": 12915 }, { - "epoch": 0.3660064042619513, + "epoch": 0.5053603568354331, "grad_norm": 0.0, - "learning_rate": 1.4640604098555418e-05, - "loss": 0.9784, + "learning_rate": 1.0312321671582427e-05, + "loss": 1.0956, "step": 12916 }, { - "epoch": 0.3660347417042138, + "epoch": 0.5053994835276626, "grad_norm": 0.0, - "learning_rate": 1.4639791098019307e-05, - "loss": 1.043, + "learning_rate": 1.0311055056174514e-05, + "loss": 1.0751, "step": 12917 }, { - "epoch": 0.36606307914647623, + "epoch": 0.505438610219892, "grad_norm": 0.0, - "learning_rate": 1.463897805840051e-05, - "loss": 1.0098, + "learning_rate": 1.0309788435771451e-05, + "loss": 0.9153, "step": 12918 }, { - "epoch": 0.3660914165887387, + "epoch": 0.5054777369121215, "grad_norm": 0.0, - "learning_rate": 1.4638164979705883e-05, - "loss": 0.936, + "learning_rate": 1.0308521810393569e-05, + "loss": 1.037, "step": 12919 }, { - "epoch": 0.36611975403100117, + "epoch": 0.5055168636043509, "grad_norm": 0.0, - "learning_rate": 1.4637351861942266e-05, - "loss": 0.9373, + "learning_rate": 1.0307255180061216e-05, + "loss": 1.1437, "step": 12920 }, { - "epoch": 0.3661480914732636, + "epoch": 0.5055559902965804, "grad_norm": 0.0, - "learning_rate": 1.4636538705116516e-05, - "loss": 0.9632, + "learning_rate": 1.0305988544794727e-05, + "loss": 0.9764, "step": 12921 }, { - "epoch": 0.3661764289155261, + "epoch": 0.5055951169888098, "grad_norm": 0.0, - "learning_rate": 1.4635725509235474e-05, - "loss": 0.9008, + "learning_rate": 1.0304721904614447e-05, + "loss": 1.1252, "step": 12922 }, { - "epoch": 0.36620476635778854, + "epoch": 0.5056342436810392, "grad_norm": 0.0, - "learning_rate": 1.4634912274305996e-05, - "loss": 0.9646, + "learning_rate": 1.0303455259540716e-05, + "loss": 1.012, "step": 12923 }, { - "epoch": 0.36623310380005103, + "epoch": 0.5056733703732686, "grad_norm": 0.0, - "learning_rate": 1.4634099000334932e-05, - "loss": 0.994, + "learning_rate": 1.0302188609593872e-05, + "loss": 1.0421, "step": 12924 }, { - "epoch": 0.36626144124231347, + "epoch": 0.505712497065498, "grad_norm": 0.0, - "learning_rate": 1.463328568732913e-05, - "loss": 0.9922, + "learning_rate": 1.0300921954794258e-05, + "loss": 1.0371, "step": 12925 }, { - "epoch": 0.3662897786845759, + "epoch": 0.5057516237577275, "grad_norm": 0.0, - "learning_rate": 1.4632472335295442e-05, - "loss": 0.8551, + "learning_rate": 1.0299655295162216e-05, + "loss": 1.0872, "step": 12926 }, { - "epoch": 0.3663181161268384, + "epoch": 0.5057907504499569, "grad_norm": 0.0, - "learning_rate": 1.4631658944240723e-05, - "loss": 0.9613, + "learning_rate": 1.0298388630718087e-05, + "loss": 1.0323, "step": 12927 }, { - "epoch": 0.36634645356910084, + "epoch": 0.5058298771421864, "grad_norm": 0.0, - "learning_rate": 1.4630845514171818e-05, - "loss": 0.9258, + "learning_rate": 1.0297121961482205e-05, + "loss": 1.1174, "step": 12928 }, { - "epoch": 0.36637479101136333, + "epoch": 0.5058690038344158, "grad_norm": 0.0, - "learning_rate": 1.4630032045095582e-05, - "loss": 1.014, + "learning_rate": 1.0295855287474921e-05, + "loss": 1.0214, "step": 12929 }, { - "epoch": 0.36640312845362577, + "epoch": 0.5059081305266453, "grad_norm": 0.0, - "learning_rate": 1.4629218537018866e-05, - "loss": 0.9991, + "learning_rate": 1.0294588608716569e-05, + "loss": 0.9913, "step": 12930 }, { - "epoch": 0.36643146589588826, + "epoch": 0.5059472572188747, "grad_norm": 0.0, - "learning_rate": 1.4628404989948522e-05, - "loss": 0.9452, + "learning_rate": 1.0293321925227494e-05, + "loss": 0.9391, "step": 12931 }, { - "epoch": 0.3664598033381507, + "epoch": 0.5059863839111042, "grad_norm": 0.0, - "learning_rate": 1.4627591403891405e-05, - "loss": 1.0013, + "learning_rate": 1.0292055237028036e-05, + "loss": 1.0889, "step": 12932 }, { - "epoch": 0.36648814078041314, + "epoch": 0.5060255106033336, "grad_norm": 0.0, - "learning_rate": 1.4626777778854372e-05, - "loss": 0.8664, + "learning_rate": 1.029078854413854e-05, + "loss": 1.0441, "step": 12933 }, { - "epoch": 0.36651647822267563, + "epoch": 0.5060646372955631, "grad_norm": 0.0, - "learning_rate": 1.4625964114844266e-05, - "loss": 0.9023, + "learning_rate": 1.028952184657934e-05, + "loss": 0.9281, "step": 12934 }, { - "epoch": 0.3665448156649381, + "epoch": 0.5061037639877924, "grad_norm": 0.0, - "learning_rate": 1.4625150411867948e-05, - "loss": 0.9386, + "learning_rate": 1.0288255144370784e-05, + "loss": 0.9254, "step": 12935 }, { - "epoch": 0.36657315310720057, + "epoch": 0.5061428906800219, "grad_norm": 0.0, - "learning_rate": 1.4624336669932268e-05, - "loss": 0.9152, + "learning_rate": 1.0286988437533214e-05, + "loss": 1.0925, "step": 12936 }, { - "epoch": 0.366601490549463, + "epoch": 0.5061820173722513, "grad_norm": 0.0, - "learning_rate": 1.4623522889044089e-05, - "loss": 0.9654, + "learning_rate": 1.0285721726086966e-05, + "loss": 0.9496, "step": 12937 }, { - "epoch": 0.36662982799172544, + "epoch": 0.5062211440644808, "grad_norm": 0.0, - "learning_rate": 1.4622709069210257e-05, - "loss": 1.0274, + "learning_rate": 1.0284455010052385e-05, + "loss": 1.1144, "step": 12938 }, { - "epoch": 0.36665816543398794, + "epoch": 0.5062602707567102, "grad_norm": 0.0, - "learning_rate": 1.4621895210437627e-05, - "loss": 0.866, + "learning_rate": 1.0283188289449817e-05, + "loss": 1.0891, "step": 12939 }, { - "epoch": 0.3666865028762504, + "epoch": 0.5062993974489397, "grad_norm": 0.0, - "learning_rate": 1.4621081312733061e-05, - "loss": 0.9655, + "learning_rate": 1.0281921564299595e-05, + "loss": 0.9934, "step": 12940 }, { - "epoch": 0.36671484031851287, + "epoch": 0.5063385241411691, "grad_norm": 0.0, - "learning_rate": 1.4620267376103407e-05, - "loss": 0.8601, + "learning_rate": 1.0280654834622069e-05, + "loss": 0.9481, "step": 12941 }, { - "epoch": 0.3667431777607753, + "epoch": 0.5063776508333986, "grad_norm": 0.0, - "learning_rate": 1.461945340055553e-05, - "loss": 0.885, + "learning_rate": 1.0279388100437574e-05, + "loss": 1.0176, "step": 12942 }, { - "epoch": 0.3667715152030378, + "epoch": 0.506416777525628, "grad_norm": 0.0, - "learning_rate": 1.4618639386096278e-05, - "loss": 0.8965, + "learning_rate": 1.0278121361766462e-05, + "loss": 1.1364, "step": 12943 }, { - "epoch": 0.36679985264530024, + "epoch": 0.5064559042178575, "grad_norm": 0.0, - "learning_rate": 1.4617825332732513e-05, - "loss": 0.9103, + "learning_rate": 1.0276854618629067e-05, + "loss": 0.9561, "step": 12944 }, { - "epoch": 0.3668281900875627, + "epoch": 0.5064950309100869, "grad_norm": 0.0, - "learning_rate": 1.4617011240471093e-05, - "loss": 0.9622, + "learning_rate": 1.0275587871045731e-05, + "loss": 1.0571, "step": 12945 }, { - "epoch": 0.36685652752982517, + "epoch": 0.5065341576023163, "grad_norm": 0.0, - "learning_rate": 1.4616197109318871e-05, - "loss": 0.8273, + "learning_rate": 1.0274321119036803e-05, + "loss": 0.9796, "step": 12946 }, { - "epoch": 0.3668848649720876, + "epoch": 0.5065732842945457, "grad_norm": 0.0, - "learning_rate": 1.4615382939282702e-05, - "loss": 0.8863, + "learning_rate": 1.027305436262262e-05, + "loss": 0.9651, "step": 12947 }, { - "epoch": 0.3669132024143501, + "epoch": 0.5066124109867752, "grad_norm": 0.0, - "learning_rate": 1.4614568730369454e-05, - "loss": 0.99, + "learning_rate": 1.0271787601823526e-05, + "loss": 1.0869, "step": 12948 }, { - "epoch": 0.36694153985661254, + "epoch": 0.5066515376790046, "grad_norm": 0.0, - "learning_rate": 1.4613754482585978e-05, - "loss": 0.8518, + "learning_rate": 1.0270520836659866e-05, + "loss": 0.957, "step": 12949 }, { - "epoch": 0.366969877298875, + "epoch": 0.506690664371234, "grad_norm": 0.0, - "learning_rate": 1.4612940195939136e-05, - "loss": 0.933, + "learning_rate": 1.0269254067151975e-05, + "loss": 1.0316, "step": 12950 }, { - "epoch": 0.3669982147411375, + "epoch": 0.5067297910634635, "grad_norm": 0.0, - "learning_rate": 1.4612125870435785e-05, - "loss": 0.8791, + "learning_rate": 1.0267987293320205e-05, + "loss": 1.1483, "step": 12951 }, { - "epoch": 0.3670265521833999, + "epoch": 0.5067689177556929, "grad_norm": 0.0, - "learning_rate": 1.4611311506082784e-05, - "loss": 0.941, + "learning_rate": 1.0266720515184894e-05, + "loss": 1.051, "step": 12952 }, { - "epoch": 0.3670548896256624, + "epoch": 0.5068080444479224, "grad_norm": 0.0, - "learning_rate": 1.4610497102886995e-05, - "loss": 0.9507, + "learning_rate": 1.0265453732766387e-05, + "loss": 1.0117, "step": 12953 }, { - "epoch": 0.36708322706792484, + "epoch": 0.5068471711401518, "grad_norm": 0.0, - "learning_rate": 1.4609682660855277e-05, - "loss": 0.9632, + "learning_rate": 1.0264186946085022e-05, + "loss": 1.0796, "step": 12954 }, { - "epoch": 0.36711156451018734, + "epoch": 0.5068862978323813, "grad_norm": 0.0, - "learning_rate": 1.4608868179994489e-05, - "loss": 0.957, + "learning_rate": 1.026292015516115e-05, + "loss": 1.0631, "step": 12955 }, { - "epoch": 0.3671399019524498, + "epoch": 0.5069254245246106, "grad_norm": 0.0, - "learning_rate": 1.4608053660311495e-05, - "loss": 0.866, + "learning_rate": 1.026165336001511e-05, + "loss": 0.9735, "step": 12956 }, { - "epoch": 0.3671682393947122, + "epoch": 0.5069645512168401, "grad_norm": 0.0, - "learning_rate": 1.4607239101813153e-05, - "loss": 0.8913, + "learning_rate": 1.026038656066724e-05, + "loss": 0.9549, "step": 12957 }, { - "epoch": 0.3671965768369747, + "epoch": 0.5070036779090695, "grad_norm": 0.0, - "learning_rate": 1.4606424504506325e-05, - "loss": 1.0289, + "learning_rate": 1.0259119757137891e-05, + "loss": 1.0533, "step": 12958 }, { - "epoch": 0.36722491427923715, + "epoch": 0.507042804601299, "grad_norm": 0.0, - "learning_rate": 1.4605609868397874e-05, - "loss": 0.927, + "learning_rate": 1.0257852949447404e-05, + "loss": 1.0157, "step": 12959 }, { - "epoch": 0.36725325172149964, + "epoch": 0.5070819312935284, "grad_norm": 0.0, - "learning_rate": 1.4604795193494659e-05, - "loss": 0.9213, + "learning_rate": 1.0256586137616123e-05, + "loss": 1.0038, "step": 12960 }, { - "epoch": 0.3672815891637621, + "epoch": 0.5071210579857579, "grad_norm": 0.0, - "learning_rate": 1.460398047980354e-05, - "loss": 0.8844, + "learning_rate": 1.0255319321664386e-05, + "loss": 0.9925, "step": 12961 }, { - "epoch": 0.3673099266060245, + "epoch": 0.5071601846779873, "grad_norm": 0.0, - "learning_rate": 1.4603165727331392e-05, - "loss": 0.9505, + "learning_rate": 1.0254052501612543e-05, + "loss": 1.0581, "step": 12962 }, { - "epoch": 0.367338264048287, + "epoch": 0.5071993113702168, "grad_norm": 0.0, - "learning_rate": 1.4602350936085066e-05, - "loss": 0.8941, + "learning_rate": 1.0252785677480934e-05, + "loss": 1.0898, "step": 12963 }, { - "epoch": 0.36736660149054945, + "epoch": 0.5072384380624462, "grad_norm": 0.0, - "learning_rate": 1.4601536106071428e-05, - "loss": 0.9351, + "learning_rate": 1.0251518849289905e-05, + "loss": 0.9947, "step": 12964 }, { - "epoch": 0.36739493893281194, + "epoch": 0.5072775647546757, "grad_norm": 0.0, - "learning_rate": 1.4600721237297344e-05, - "loss": 0.8546, + "learning_rate": 1.02502520170598e-05, + "loss": 1.0475, "step": 12965 }, { - "epoch": 0.3674232763750744, + "epoch": 0.507316691446905, "grad_norm": 0.0, - "learning_rate": 1.4599906329769678e-05, - "loss": 0.9717, + "learning_rate": 1.0248985180810958e-05, + "loss": 1.0394, "step": 12966 }, { - "epoch": 0.3674516138173368, + "epoch": 0.5073558181391346, "grad_norm": 0.0, - "learning_rate": 1.459909138349529e-05, - "loss": 0.8861, + "learning_rate": 1.0247718340563728e-05, + "loss": 1.0017, "step": 12967 }, { - "epoch": 0.3674799512595993, + "epoch": 0.5073949448313639, "grad_norm": 0.0, - "learning_rate": 1.4598276398481046e-05, - "loss": 0.7992, + "learning_rate": 1.024645149633845e-05, + "loss": 1.0813, "step": 12968 }, { - "epoch": 0.36750828870186175, + "epoch": 0.5074340715235934, "grad_norm": 0.0, - "learning_rate": 1.4597461374733817e-05, - "loss": 0.9258, + "learning_rate": 1.0245184648155472e-05, + "loss": 1.1146, "step": 12969 }, { - "epoch": 0.36753662614412425, + "epoch": 0.5074731982158228, "grad_norm": 0.0, - "learning_rate": 1.4596646312260462e-05, - "loss": 0.995, + "learning_rate": 1.0243917796035135e-05, + "loss": 0.9334, "step": 12970 }, { - "epoch": 0.3675649635863867, + "epoch": 0.5075123249080523, "grad_norm": 0.0, - "learning_rate": 1.459583121106785e-05, - "loss": 0.9924, + "learning_rate": 1.0242650939997786e-05, + "loss": 0.9893, "step": 12971 }, { - "epoch": 0.3675933010286492, + "epoch": 0.5075514516002817, "grad_norm": 0.0, - "learning_rate": 1.459501607116284e-05, - "loss": 0.9704, + "learning_rate": 1.0241384080063761e-05, + "loss": 0.9288, "step": 12972 }, { - "epoch": 0.3676216384709116, + "epoch": 0.5075905782925112, "grad_norm": 0.0, - "learning_rate": 1.4594200892552308e-05, - "loss": 0.9542, + "learning_rate": 1.0240117216253416e-05, + "loss": 1.05, "step": 12973 }, { - "epoch": 0.36764997591317405, + "epoch": 0.5076297049847406, "grad_norm": 0.0, - "learning_rate": 1.4593385675243113e-05, - "loss": 1.0569, + "learning_rate": 1.0238850348587088e-05, + "loss": 1.1587, "step": 12974 }, { - "epoch": 0.36767831335543655, + "epoch": 0.5076688316769701, "grad_norm": 0.0, - "learning_rate": 1.4592570419242126e-05, - "loss": 0.9545, + "learning_rate": 1.023758347708512e-05, + "loss": 0.9327, "step": 12975 }, { - "epoch": 0.367706650797699, + "epoch": 0.5077079583691995, "grad_norm": 0.0, - "learning_rate": 1.4591755124556214e-05, - "loss": 0.9023, + "learning_rate": 1.0236316601767862e-05, + "loss": 0.9609, "step": 12976 }, { - "epoch": 0.3677349882399615, + "epoch": 0.507747085061429, "grad_norm": 0.0, - "learning_rate": 1.459093979119224e-05, - "loss": 0.9086, + "learning_rate": 1.0235049722655654e-05, + "loss": 1.1088, "step": 12977 }, { - "epoch": 0.3677633256822239, + "epoch": 0.5077862117536583, "grad_norm": 0.0, - "learning_rate": 1.459012441915708e-05, - "loss": 0.8384, + "learning_rate": 1.0233782839768843e-05, + "loss": 1.0167, "step": 12978 }, { - "epoch": 0.36779166312448636, + "epoch": 0.5078253384458877, "grad_norm": 0.0, - "learning_rate": 1.4589309008457594e-05, - "loss": 0.8888, + "learning_rate": 1.0232515953127771e-05, + "loss": 1.073, "step": 12979 }, { - "epoch": 0.36782000056674885, + "epoch": 0.5078644651381172, "grad_norm": 0.0, - "learning_rate": 1.4588493559100653e-05, - "loss": 1.0592, + "learning_rate": 1.0231249062752787e-05, + "loss": 1.1171, "step": 12980 }, { - "epoch": 0.3678483380090113, + "epoch": 0.5079035918303466, "grad_norm": 0.0, - "learning_rate": 1.458767807109313e-05, - "loss": 0.8224, + "learning_rate": 1.0229982168664227e-05, + "loss": 1.1476, "step": 12981 }, { - "epoch": 0.3678766754512738, + "epoch": 0.5079427185225761, "grad_norm": 0.0, - "learning_rate": 1.4586862544441891e-05, - "loss": 1.0131, + "learning_rate": 1.0228715270882448e-05, + "loss": 1.0943, "step": 12982 }, { - "epoch": 0.3679050128935362, + "epoch": 0.5079818452148055, "grad_norm": 0.0, - "learning_rate": 1.4586046979153805e-05, - "loss": 0.9545, + "learning_rate": 1.0227448369427786e-05, + "loss": 1.0712, "step": 12983 }, { - "epoch": 0.3679333503357987, + "epoch": 0.508020971907035, "grad_norm": 0.0, - "learning_rate": 1.458523137523574e-05, - "loss": 0.832, + "learning_rate": 1.0226181464320589e-05, + "loss": 1.0738, "step": 12984 }, { - "epoch": 0.36796168777806115, + "epoch": 0.5080600985992644, "grad_norm": 0.0, - "learning_rate": 1.4584415732694572e-05, - "loss": 0.9331, + "learning_rate": 1.02249145555812e-05, + "loss": 1.1317, "step": 12985 }, { - "epoch": 0.3679900252203236, + "epoch": 0.5080992252914939, "grad_norm": 0.0, - "learning_rate": 1.4583600051537166e-05, - "loss": 1.0511, + "learning_rate": 1.0223647643229966e-05, + "loss": 1.1112, "step": 12986 }, { - "epoch": 0.3680183626625861, + "epoch": 0.5081383519837233, "grad_norm": 0.0, - "learning_rate": 1.4582784331770395e-05, - "loss": 0.9153, + "learning_rate": 1.022238072728723e-05, + "loss": 1.1469, "step": 12987 }, { - "epoch": 0.3680467001048485, + "epoch": 0.5081774786759528, "grad_norm": 0.0, - "learning_rate": 1.4581968573401128e-05, - "loss": 0.8485, + "learning_rate": 1.022111380777334e-05, + "loss": 1.0972, "step": 12988 }, { - "epoch": 0.368075037547111, + "epoch": 0.5082166053681821, "grad_norm": 0.0, - "learning_rate": 1.458115277643624e-05, - "loss": 0.7864, + "learning_rate": 1.0219846884708638e-05, + "loss": 1.0462, "step": 12989 }, { - "epoch": 0.36810337498937346, + "epoch": 0.5082557320604116, "grad_norm": 0.0, - "learning_rate": 1.4580336940882602e-05, - "loss": 0.9552, + "learning_rate": 1.0218579958113468e-05, + "loss": 1.1027, "step": 12990 }, { - "epoch": 0.3681317124316359, + "epoch": 0.508294858752641, "grad_norm": 0.0, - "learning_rate": 1.4579521066747085e-05, - "loss": 1.0022, + "learning_rate": 1.0217313028008183e-05, + "loss": 0.8725, "step": 12991 }, { - "epoch": 0.3681600498738984, + "epoch": 0.5083339854448705, "grad_norm": 0.0, - "learning_rate": 1.457870515403656e-05, - "loss": 1.0155, + "learning_rate": 1.0216046094413117e-05, + "loss": 1.0138, "step": 12992 }, { - "epoch": 0.3681883873161608, + "epoch": 0.5083731121370999, "grad_norm": 0.0, - "learning_rate": 1.4577889202757902e-05, - "loss": 0.8207, + "learning_rate": 1.0214779157348627e-05, + "loss": 1.0509, "step": 12993 }, { - "epoch": 0.3682167247584233, + "epoch": 0.5084122388293294, "grad_norm": 0.0, - "learning_rate": 1.457707321291798e-05, - "loss": 0.9875, + "learning_rate": 1.0213512216835052e-05, + "loss": 1.0953, "step": 12994 }, { - "epoch": 0.36824506220068576, + "epoch": 0.5084513655215588, "grad_norm": 0.0, - "learning_rate": 1.4576257184523677e-05, - "loss": 0.8857, + "learning_rate": 1.0212245272892733e-05, + "loss": 1.1435, "step": 12995 }, { - "epoch": 0.36827339964294825, + "epoch": 0.5084904922137883, "grad_norm": 0.0, - "learning_rate": 1.4575441117581856e-05, - "loss": 0.869, + "learning_rate": 1.0210978325542027e-05, + "loss": 0.9882, "step": 12996 }, { - "epoch": 0.3683017370852107, + "epoch": 0.5085296189060177, "grad_norm": 0.0, - "learning_rate": 1.4574625012099394e-05, - "loss": 0.9129, + "learning_rate": 1.020971137480327e-05, + "loss": 0.921, "step": 12997 }, { - "epoch": 0.36833007452747313, + "epoch": 0.5085687455982472, "grad_norm": 0.0, - "learning_rate": 1.4573808868083172e-05, - "loss": 0.9303, + "learning_rate": 1.0208444420696812e-05, + "loss": 0.9015, "step": 12998 }, { - "epoch": 0.3683584119697356, + "epoch": 0.5086078722904765, "grad_norm": 0.0, - "learning_rate": 1.4572992685540057e-05, - "loss": 0.8846, + "learning_rate": 1.0207177463242998e-05, + "loss": 1.1537, "step": 12999 }, { - "epoch": 0.36838674941199806, + "epoch": 0.508646998982706, "grad_norm": 0.0, - "learning_rate": 1.4572176464476924e-05, - "loss": 1.016, + "learning_rate": 1.0205910502462174e-05, + "loss": 0.9963, "step": 13000 }, { - "epoch": 0.36841508685426055, + "epoch": 0.5086861256749354, "grad_norm": 0.0, - "learning_rate": 1.4571360204900653e-05, - "loss": 0.9986, + "learning_rate": 1.0204643538374685e-05, + "loss": 1.0302, "step": 13001 }, { - "epoch": 0.368443424296523, + "epoch": 0.5087252523671649, "grad_norm": 0.0, - "learning_rate": 1.4570543906818118e-05, - "loss": 0.8977, + "learning_rate": 1.0203376571000879e-05, + "loss": 1.0653, "step": 13002 }, { - "epoch": 0.36847176173878543, + "epoch": 0.5087643790593943, "grad_norm": 0.0, - "learning_rate": 1.4569727570236195e-05, - "loss": 0.7823, + "learning_rate": 1.0202109600361098e-05, + "loss": 0.9597, "step": 13003 }, { - "epoch": 0.3685000991810479, + "epoch": 0.5088035057516238, "grad_norm": 0.0, - "learning_rate": 1.4568911195161758e-05, - "loss": 1.0849, + "learning_rate": 1.0200842626475689e-05, + "loss": 1.0632, "step": 13004 }, { - "epoch": 0.36852843662331036, + "epoch": 0.5088426324438532, "grad_norm": 0.0, - "learning_rate": 1.4568094781601687e-05, - "loss": 0.8989, + "learning_rate": 1.0199575649365002e-05, + "loss": 1.0342, "step": 13005 }, { - "epoch": 0.36855677406557286, + "epoch": 0.5088817591360827, "grad_norm": 0.0, - "learning_rate": 1.4567278329562856e-05, - "loss": 0.9886, + "learning_rate": 1.0198308669049377e-05, + "loss": 1.1478, "step": 13006 }, { - "epoch": 0.3685851115078353, + "epoch": 0.5089208858283121, "grad_norm": 0.0, - "learning_rate": 1.4566461839052144e-05, - "loss": 0.9249, + "learning_rate": 1.0197041685549166e-05, + "loss": 0.9992, "step": 13007 }, { - "epoch": 0.3686134489500978, + "epoch": 0.5089600125205415, "grad_norm": 0.0, - "learning_rate": 1.4565645310076429e-05, - "loss": 1.0153, + "learning_rate": 1.019577469888471e-05, + "loss": 1.0554, "step": 13008 }, { - "epoch": 0.3686417863923602, + "epoch": 0.508999139212771, "grad_norm": 0.0, - "learning_rate": 1.4564828742642586e-05, - "loss": 1.0451, + "learning_rate": 1.019450770907636e-05, + "loss": 0.9401, "step": 13009 }, { - "epoch": 0.36867012383462267, + "epoch": 0.5090382659050003, "grad_norm": 0.0, - "learning_rate": 1.4564012136757497e-05, - "loss": 0.9922, + "learning_rate": 1.019324071614446e-05, + "loss": 0.9413, "step": 13010 }, { - "epoch": 0.36869846127688516, + "epoch": 0.5090773925972298, "grad_norm": 0.0, - "learning_rate": 1.456319549242804e-05, - "loss": 1.0402, + "learning_rate": 1.0191973720109354e-05, + "loss": 1.0015, "step": 13011 }, { - "epoch": 0.3687267987191476, + "epoch": 0.5091165192894592, "grad_norm": 0.0, - "learning_rate": 1.456237880966109e-05, - "loss": 0.88, + "learning_rate": 1.019070672099139e-05, + "loss": 1.1125, "step": 13012 }, { - "epoch": 0.3687551361614101, + "epoch": 0.5091556459816887, "grad_norm": 0.0, - "learning_rate": 1.456156208846353e-05, - "loss": 1.0469, + "learning_rate": 1.0189439718810919e-05, + "loss": 1.0985, "step": 13013 }, { - "epoch": 0.36878347360367253, + "epoch": 0.5091947726739181, "grad_norm": 0.0, - "learning_rate": 1.4560745328842238e-05, - "loss": 0.9627, + "learning_rate": 1.0188172713588282e-05, + "loss": 1.0033, "step": 13014 }, { - "epoch": 0.36881181104593497, + "epoch": 0.5092338993661476, "grad_norm": 0.0, - "learning_rate": 1.4559928530804097e-05, - "loss": 0.9927, + "learning_rate": 1.0186905705343826e-05, + "loss": 1.1419, "step": 13015 }, { - "epoch": 0.36884014848819746, + "epoch": 0.509273026058377, "grad_norm": 0.0, - "learning_rate": 1.4559111694355985e-05, - "loss": 0.9308, + "learning_rate": 1.01856386940979e-05, + "loss": 1.049, "step": 13016 }, { - "epoch": 0.3688684859304599, + "epoch": 0.5093121527506065, "grad_norm": 0.0, - "learning_rate": 1.4558294819504779e-05, - "loss": 0.8397, + "learning_rate": 1.0184371679870845e-05, + "loss": 1.0171, "step": 13017 }, { - "epoch": 0.3688968233727224, + "epoch": 0.5093512794428359, "grad_norm": 0.0, - "learning_rate": 1.4557477906257365e-05, - "loss": 0.9495, + "learning_rate": 1.0183104662683016e-05, + "loss": 0.9807, "step": 13018 }, { - "epoch": 0.36892516081498483, + "epoch": 0.5093904061350654, "grad_norm": 0.0, - "learning_rate": 1.4556660954620622e-05, - "loss": 0.8902, + "learning_rate": 1.0181837642554753e-05, + "loss": 0.996, "step": 13019 }, { - "epoch": 0.3689534982572473, + "epoch": 0.5094295328272948, "grad_norm": 0.0, - "learning_rate": 1.455584396460143e-05, - "loss": 0.9849, + "learning_rate": 1.018057061950641e-05, + "loss": 1.0542, "step": 13020 }, { - "epoch": 0.36898183569950976, + "epoch": 0.5094686595195242, "grad_norm": 0.0, - "learning_rate": 1.4555026936206675e-05, - "loss": 0.9169, + "learning_rate": 1.0179303593558323e-05, + "loss": 0.9233, "step": 13021 }, { - "epoch": 0.3690101731417722, + "epoch": 0.5095077862117536, "grad_norm": 0.0, - "learning_rate": 1.4554209869443235e-05, - "loss": 0.9243, + "learning_rate": 1.0178036564730849e-05, + "loss": 0.8931, "step": 13022 }, { - "epoch": 0.3690385105840347, + "epoch": 0.5095469129039831, "grad_norm": 0.0, - "learning_rate": 1.4553392764317998e-05, - "loss": 0.9177, + "learning_rate": 1.0176769533044331e-05, + "loss": 0.9772, "step": 13023 }, { - "epoch": 0.36906684802629713, + "epoch": 0.5095860395962125, "grad_norm": 0.0, - "learning_rate": 1.4552575620837839e-05, - "loss": 0.8655, + "learning_rate": 1.0175502498519115e-05, + "loss": 1.2129, "step": 13024 }, { - "epoch": 0.36909518546855963, + "epoch": 0.509625166288442, "grad_norm": 0.0, - "learning_rate": 1.4551758439009647e-05, - "loss": 0.9597, + "learning_rate": 1.0174235461175547e-05, + "loss": 0.901, "step": 13025 }, { - "epoch": 0.36912352291082207, + "epoch": 0.5096642929806714, "grad_norm": 0.0, - "learning_rate": 1.45509412188403e-05, - "loss": 0.8143, + "learning_rate": 1.0172968421033977e-05, + "loss": 0.9671, "step": 13026 }, { - "epoch": 0.3691518603530845, + "epoch": 0.5097034196729009, "grad_norm": 0.0, - "learning_rate": 1.4550123960336687e-05, - "loss": 0.874, + "learning_rate": 1.0171701378114751e-05, + "loss": 1.0705, "step": 13027 }, { - "epoch": 0.369180197795347, + "epoch": 0.5097425463651303, "grad_norm": 0.0, - "learning_rate": 1.4549306663505691e-05, - "loss": 0.9329, + "learning_rate": 1.0170434332438217e-05, + "loss": 1.1263, "step": 13028 }, { - "epoch": 0.36920853523760944, + "epoch": 0.5097816730573598, "grad_norm": 0.0, - "learning_rate": 1.4548489328354197e-05, - "loss": 0.7546, + "learning_rate": 1.0169167284024718e-05, + "loss": 1.1561, "step": 13029 }, { - "epoch": 0.36923687267987193, + "epoch": 0.5098207997495892, "grad_norm": 0.0, - "learning_rate": 1.4547671954889085e-05, - "loss": 0.8831, + "learning_rate": 1.016790023289461e-05, + "loss": 0.9768, "step": 13030 }, { - "epoch": 0.36926521012213437, + "epoch": 0.5098599264418187, "grad_norm": 0.0, - "learning_rate": 1.4546854543117243e-05, - "loss": 0.952, + "learning_rate": 1.0166633179068232e-05, + "loss": 0.9872, "step": 13031 }, { - "epoch": 0.36929354756439686, + "epoch": 0.509899053134048, "grad_norm": 0.0, - "learning_rate": 1.4546037093045562e-05, - "loss": 0.9711, + "learning_rate": 1.0165366122565931e-05, + "loss": 1.1153, "step": 13032 }, { - "epoch": 0.3693218850066593, + "epoch": 0.5099381798262775, "grad_norm": 0.0, - "learning_rate": 1.4545219604680918e-05, - "loss": 0.8698, + "learning_rate": 1.0164099063408062e-05, + "loss": 0.9963, "step": 13033 }, { - "epoch": 0.36935022244892174, + "epoch": 0.5099773065185069, "grad_norm": 0.0, - "learning_rate": 1.4544402078030203e-05, - "loss": 0.8672, + "learning_rate": 1.0162832001614966e-05, + "loss": 1.0531, "step": 13034 }, { - "epoch": 0.36937855989118423, + "epoch": 0.5100164332107364, "grad_norm": 0.0, - "learning_rate": 1.45435845131003e-05, - "loss": 0.8348, + "learning_rate": 1.016156493720699e-05, + "loss": 1.0639, "step": 13035 }, { - "epoch": 0.36940689733344667, + "epoch": 0.5100555599029658, "grad_norm": 0.0, - "learning_rate": 1.45427669098981e-05, - "loss": 0.818, + "learning_rate": 1.0160297870204486e-05, + "loss": 0.994, "step": 13036 }, { - "epoch": 0.36943523477570916, + "epoch": 0.5100946865951952, "grad_norm": 0.0, - "learning_rate": 1.4541949268430487e-05, - "loss": 0.8634, + "learning_rate": 1.0159030800627796e-05, + "loss": 1.0744, "step": 13037 }, { - "epoch": 0.3694635722179716, + "epoch": 0.5101338132874247, "grad_norm": 0.0, - "learning_rate": 1.4541131588704346e-05, - "loss": 0.9083, + "learning_rate": 1.0157763728497275e-05, + "loss": 1.1637, "step": 13038 }, { - "epoch": 0.36949190966023404, + "epoch": 0.5101729399796541, "grad_norm": 0.0, - "learning_rate": 1.454031387072657e-05, - "loss": 1.0073, + "learning_rate": 1.015649665383326e-05, + "loss": 0.8987, "step": 13039 }, { - "epoch": 0.36952024710249654, + "epoch": 0.5102120666718836, "grad_norm": 0.0, - "learning_rate": 1.4539496114504044e-05, - "loss": 1.1306, + "learning_rate": 1.0155229576656112e-05, + "loss": 1.039, "step": 13040 }, { - "epoch": 0.369548584544759, + "epoch": 0.510251193364113, "grad_norm": 0.0, - "learning_rate": 1.4538678320043656e-05, - "loss": 0.8898, + "learning_rate": 1.0153962496986166e-05, + "loss": 1.0383, "step": 13041 }, { - "epoch": 0.36957692198702147, + "epoch": 0.5102903200563425, "grad_norm": 0.0, - "learning_rate": 1.4537860487352293e-05, - "loss": 0.922, + "learning_rate": 1.0152695414843778e-05, + "loss": 1.0292, "step": 13042 }, { - "epoch": 0.3696052594292839, + "epoch": 0.5103294467485718, "grad_norm": 0.0, - "learning_rate": 1.453704261643685e-05, - "loss": 0.9312, + "learning_rate": 1.0151428330249294e-05, + "loss": 1.176, "step": 13043 }, { - "epoch": 0.3696335968715464, + "epoch": 0.5103685734408013, "grad_norm": 0.0, - "learning_rate": 1.4536224707304209e-05, - "loss": 0.9658, + "learning_rate": 1.0150161243223058e-05, + "loss": 1.0053, "step": 13044 }, { - "epoch": 0.36966193431380884, + "epoch": 0.5104077001330307, "grad_norm": 0.0, - "learning_rate": 1.4535406759961267e-05, - "loss": 0.8401, + "learning_rate": 1.0148894153785422e-05, + "loss": 1.0469, "step": 13045 }, { - "epoch": 0.3696902717560713, + "epoch": 0.5104468268252602, "grad_norm": 0.0, - "learning_rate": 1.4534588774414905e-05, - "loss": 0.9486, + "learning_rate": 1.0147627061956731e-05, + "loss": 1.1526, "step": 13046 }, { - "epoch": 0.36971860919833377, + "epoch": 0.5104859535174896, "grad_norm": 0.0, - "learning_rate": 1.4533770750672019e-05, - "loss": 0.8763, + "learning_rate": 1.0146359967757337e-05, + "loss": 1.0593, "step": 13047 }, { - "epoch": 0.3697469466405962, + "epoch": 0.5105250802097191, "grad_norm": 0.0, - "learning_rate": 1.45329526887395e-05, - "loss": 0.9417, + "learning_rate": 1.0145092871207583e-05, + "loss": 0.9783, "step": 13048 }, { - "epoch": 0.3697752840828587, + "epoch": 0.5105642069019485, "grad_norm": 0.0, - "learning_rate": 1.4532134588624236e-05, - "loss": 0.9292, + "learning_rate": 1.014382577232782e-05, + "loss": 1.0402, "step": 13049 }, { - "epoch": 0.36980362152512114, + "epoch": 0.510603333594178, "grad_norm": 0.0, - "learning_rate": 1.4531316450333121e-05, - "loss": 0.9294, + "learning_rate": 1.0142558671138394e-05, + "loss": 1.0957, "step": 13050 }, { - "epoch": 0.3698319589673836, + "epoch": 0.5106424602864074, "grad_norm": 0.0, - "learning_rate": 1.4530498273873042e-05, - "loss": 0.9673, + "learning_rate": 1.0141291567659658e-05, + "loss": 1.0865, "step": 13051 }, { - "epoch": 0.36986029640964607, + "epoch": 0.5106815869786369, "grad_norm": 0.0, - "learning_rate": 1.4529680059250894e-05, - "loss": 0.7966, + "learning_rate": 1.0140024461911955e-05, + "loss": 0.9838, "step": 13052 }, { - "epoch": 0.3698886338519085, + "epoch": 0.5107207136708662, "grad_norm": 0.0, - "learning_rate": 1.4528861806473572e-05, - "loss": 0.9898, + "learning_rate": 1.0138757353915632e-05, + "loss": 0.9936, "step": 13053 }, { - "epoch": 0.369916971294171, + "epoch": 0.5107598403630957, "grad_norm": 0.0, - "learning_rate": 1.4528043515547965e-05, - "loss": 1.0658, + "learning_rate": 1.0137490243691044e-05, + "loss": 1.0756, "step": 13054 }, { - "epoch": 0.36994530873643344, + "epoch": 0.5107989670553251, "grad_norm": 0.0, - "learning_rate": 1.4527225186480962e-05, - "loss": 1.0104, + "learning_rate": 1.0136223131258532e-05, + "loss": 0.9828, "step": 13055 }, { - "epoch": 0.36997364617869594, + "epoch": 0.5108380937475546, "grad_norm": 0.0, - "learning_rate": 1.4526406819279464e-05, - "loss": 0.972, + "learning_rate": 1.0134956016638446e-05, + "loss": 1.0713, "step": 13056 }, { - "epoch": 0.3700019836209584, + "epoch": 0.510877220439784, "grad_norm": 0.0, - "learning_rate": 1.4525588413950357e-05, - "loss": 0.9786, + "learning_rate": 1.0133688899851137e-05, + "loss": 0.9582, "step": 13057 }, { - "epoch": 0.3700303210632208, + "epoch": 0.5109163471320135, "grad_norm": 0.0, - "learning_rate": 1.4524769970500541e-05, - "loss": 0.8906, + "learning_rate": 1.0132421780916954e-05, + "loss": 1.1227, "step": 13058 }, { - "epoch": 0.3700586585054833, + "epoch": 0.5109554738242429, "grad_norm": 0.0, - "learning_rate": 1.4523951488936905e-05, - "loss": 0.9127, + "learning_rate": 1.013115465985624e-05, + "loss": 0.9964, "step": 13059 }, { - "epoch": 0.37008699594774574, + "epoch": 0.5109946005164724, "grad_norm": 0.0, - "learning_rate": 1.452313296926635e-05, - "loss": 1.0116, + "learning_rate": 1.012988753668935e-05, + "loss": 1.1448, "step": 13060 }, { - "epoch": 0.37011533339000824, + "epoch": 0.5110337272087018, "grad_norm": 0.0, - "learning_rate": 1.4522314411495763e-05, - "loss": 0.8867, + "learning_rate": 1.0128620411436626e-05, + "loss": 1.1083, "step": 13061 }, { - "epoch": 0.3701436708322707, + "epoch": 0.5110728539009313, "grad_norm": 0.0, - "learning_rate": 1.4521495815632043e-05, - "loss": 0.9961, + "learning_rate": 1.012735328411842e-05, + "loss": 1.0115, "step": 13062 }, { - "epoch": 0.3701720082745331, + "epoch": 0.5111119805931607, "grad_norm": 0.0, - "learning_rate": 1.4520677181682084e-05, - "loss": 0.872, + "learning_rate": 1.0126086154755079e-05, + "loss": 0.9794, "step": 13063 }, { - "epoch": 0.3702003457167956, + "epoch": 0.51115110728539, "grad_norm": 0.0, - "learning_rate": 1.4519858509652785e-05, - "loss": 0.9319, + "learning_rate": 1.0124819023366954e-05, + "loss": 1.0997, "step": 13064 }, { - "epoch": 0.37022868315905805, + "epoch": 0.5111902339776195, "grad_norm": 0.0, - "learning_rate": 1.4519039799551036e-05, - "loss": 0.9579, + "learning_rate": 1.012355188997439e-05, + "loss": 1.0447, "step": 13065 }, { - "epoch": 0.37025702060132054, + "epoch": 0.5112293606698489, "grad_norm": 0.0, - "learning_rate": 1.4518221051383738e-05, - "loss": 0.9434, + "learning_rate": 1.0122284754597738e-05, + "loss": 1.0409, "step": 13066 }, { - "epoch": 0.370285358043583, + "epoch": 0.5112684873620784, "grad_norm": 0.0, - "learning_rate": 1.4517402265157785e-05, - "loss": 1.0053, + "learning_rate": 1.0121017617257349e-05, + "loss": 1.0826, "step": 13067 }, { - "epoch": 0.3703136954858455, + "epoch": 0.5113076140543078, "grad_norm": 0.0, - "learning_rate": 1.451658344088008e-05, - "loss": 0.8735, + "learning_rate": 1.0119750477973564e-05, + "loss": 1.0848, "step": 13068 }, { - "epoch": 0.3703420329281079, + "epoch": 0.5113467407465373, "grad_norm": 0.0, - "learning_rate": 1.4515764578557512e-05, - "loss": 1.0153, + "learning_rate": 1.011848333676674e-05, + "loss": 1.009, "step": 13069 }, { - "epoch": 0.37037037037037035, + "epoch": 0.5113858674387667, "grad_norm": 0.0, - "learning_rate": 1.4514945678196984e-05, - "loss": 0.991, + "learning_rate": 1.0117216193657221e-05, + "loss": 1.0694, "step": 13070 }, { - "epoch": 0.37039870781263284, + "epoch": 0.5114249941309962, "grad_norm": 0.0, - "learning_rate": 1.4514126739805388e-05, - "loss": 0.8524, + "learning_rate": 1.0115949048665356e-05, + "loss": 0.9168, "step": 13071 }, { - "epoch": 0.3704270452548953, + "epoch": 0.5114641208232256, "grad_norm": 0.0, - "learning_rate": 1.4513307763389626e-05, - "loss": 0.9647, + "learning_rate": 1.0114681901811496e-05, + "loss": 1.0407, "step": 13072 }, { - "epoch": 0.3704553826971578, + "epoch": 0.5115032475154551, "grad_norm": 0.0, - "learning_rate": 1.45124887489566e-05, - "loss": 1.0906, + "learning_rate": 1.011341475311599e-05, + "loss": 1.1593, "step": 13073 }, { - "epoch": 0.3704837201394202, + "epoch": 0.5115423742076844, "grad_norm": 0.0, - "learning_rate": 1.4511669696513206e-05, - "loss": 0.9131, + "learning_rate": 1.011214760259918e-05, + "loss": 1.1185, "step": 13074 }, { - "epoch": 0.37051205758168265, + "epoch": 0.5115815008999139, "grad_norm": 0.0, - "learning_rate": 1.4510850606066343e-05, - "loss": 0.874, + "learning_rate": 1.0110880450281424e-05, + "loss": 1.0139, "step": 13075 }, { - "epoch": 0.37054039502394515, + "epoch": 0.5116206275921433, "grad_norm": 0.0, - "learning_rate": 1.4510031477622905e-05, - "loss": 1.0188, + "learning_rate": 1.0109613296183067e-05, + "loss": 1.0094, "step": 13076 }, { - "epoch": 0.3705687324662076, + "epoch": 0.5116597542843728, "grad_norm": 0.0, - "learning_rate": 1.4509212311189803e-05, - "loss": 0.9975, + "learning_rate": 1.0108346140324457e-05, + "loss": 1.0579, "step": 13077 }, { - "epoch": 0.3705970699084701, + "epoch": 0.5116988809766022, "grad_norm": 0.0, - "learning_rate": 1.4508393106773928e-05, - "loss": 0.9459, + "learning_rate": 1.0107078982725942e-05, + "loss": 1.0561, "step": 13078 }, { - "epoch": 0.3706254073507325, + "epoch": 0.5117380076688317, "grad_norm": 0.0, - "learning_rate": 1.4507573864382187e-05, - "loss": 0.928, + "learning_rate": 1.0105811823407874e-05, + "loss": 1.0546, "step": 13079 }, { - "epoch": 0.370653744792995, + "epoch": 0.5117771343610611, "grad_norm": 0.0, - "learning_rate": 1.4506754584021474e-05, - "loss": 0.9155, + "learning_rate": 1.01045446623906e-05, + "loss": 1.0876, "step": 13080 }, { - "epoch": 0.37068208223525745, + "epoch": 0.5118162610532906, "grad_norm": 0.0, - "learning_rate": 1.4505935265698694e-05, - "loss": 0.9577, + "learning_rate": 1.0103277499694472e-05, + "loss": 1.0762, "step": 13081 }, { - "epoch": 0.3707104196775199, + "epoch": 0.51185538774552, "grad_norm": 0.0, - "learning_rate": 1.4505115909420751e-05, - "loss": 0.9417, + "learning_rate": 1.0102010335339835e-05, + "loss": 1.0884, "step": 13082 }, { - "epoch": 0.3707387571197824, + "epoch": 0.5118945144377495, "grad_norm": 0.0, - "learning_rate": 1.4504296515194543e-05, - "loss": 0.7831, + "learning_rate": 1.010074316934704e-05, + "loss": 0.8553, "step": 13083 }, { - "epoch": 0.3707670945620448, + "epoch": 0.5119336411299789, "grad_norm": 0.0, - "learning_rate": 1.4503477083026969e-05, - "loss": 0.8758, + "learning_rate": 1.0099476001736434e-05, + "loss": 1.1041, "step": 13084 }, { - "epoch": 0.3707954320043073, + "epoch": 0.5119727678222084, "grad_norm": 0.0, - "learning_rate": 1.4502657612924938e-05, - "loss": 1.0051, + "learning_rate": 1.0098208832528373e-05, + "loss": 1.0349, "step": 13085 }, { - "epoch": 0.37082376944656975, + "epoch": 0.5120118945144377, "grad_norm": 0.0, - "learning_rate": 1.450183810489535e-05, - "loss": 0.9397, + "learning_rate": 1.0096941661743196e-05, + "loss": 1.023, "step": 13086 }, { - "epoch": 0.3708521068888322, + "epoch": 0.5120510212066672, "grad_norm": 0.0, - "learning_rate": 1.4501018558945109e-05, - "loss": 1.0061, + "learning_rate": 1.0095674489401262e-05, + "loss": 1.0016, "step": 13087 }, { - "epoch": 0.3708804443310947, + "epoch": 0.5120901478988966, "grad_norm": 0.0, - "learning_rate": 1.4500198975081112e-05, - "loss": 1.0419, + "learning_rate": 1.0094407315522912e-05, + "loss": 1.0137, "step": 13088 }, { - "epoch": 0.3709087817733571, + "epoch": 0.5121292745911261, "grad_norm": 0.0, - "learning_rate": 1.4499379353310275e-05, - "loss": 1.0094, + "learning_rate": 1.0093140140128502e-05, + "loss": 1.1196, "step": 13089 }, { - "epoch": 0.3709371192156196, + "epoch": 0.5121684012833555, "grad_norm": 0.0, - "learning_rate": 1.4498559693639492e-05, - "loss": 0.8175, + "learning_rate": 1.0091872963238376e-05, + "loss": 1.1069, "step": 13090 }, { - "epoch": 0.37096545665788205, + "epoch": 0.512207527975585, "grad_norm": 0.0, - "learning_rate": 1.4497739996075669e-05, - "loss": 1.0156, + "learning_rate": 1.0090605784872884e-05, + "loss": 0.987, "step": 13091 }, { - "epoch": 0.37099379410014455, + "epoch": 0.5122466546678144, "grad_norm": 0.0, - "learning_rate": 1.449692026062571e-05, - "loss": 1.0051, + "learning_rate": 1.0089338605052379e-05, + "loss": 0.91, "step": 13092 }, { - "epoch": 0.371022131542407, + "epoch": 0.5122857813600438, "grad_norm": 0.0, - "learning_rate": 1.4496100487296527e-05, - "loss": 0.9149, + "learning_rate": 1.0088071423797206e-05, + "loss": 1.1036, "step": 13093 }, { - "epoch": 0.3710504689846694, + "epoch": 0.5123249080522733, "grad_norm": 0.0, - "learning_rate": 1.4495280676095016e-05, - "loss": 0.9289, + "learning_rate": 1.008680424112772e-05, + "loss": 0.9921, "step": 13094 }, { - "epoch": 0.3710788064269319, + "epoch": 0.5123640347445026, "grad_norm": 0.0, - "learning_rate": 1.449446082702809e-05, - "loss": 0.9275, + "learning_rate": 1.008553705706426e-05, + "loss": 1.024, "step": 13095 }, { - "epoch": 0.37110714386919436, + "epoch": 0.5124031614367321, "grad_norm": 0.0, - "learning_rate": 1.449364094010265e-05, - "loss": 0.8555, + "learning_rate": 1.0084269871627189e-05, + "loss": 0.9695, "step": 13096 }, { - "epoch": 0.37113548131145685, + "epoch": 0.5124422881289615, "grad_norm": 0.0, - "learning_rate": 1.4492821015325603e-05, - "loss": 0.9442, + "learning_rate": 1.0083002684836845e-05, + "loss": 1.1418, "step": 13097 }, { - "epoch": 0.3711638187537193, + "epoch": 0.512481414821191, "grad_norm": 0.0, - "learning_rate": 1.4492001052703854e-05, - "loss": 0.7583, + "learning_rate": 1.0081735496713582e-05, + "loss": 0.9768, "step": 13098 }, { - "epoch": 0.3711921561959817, + "epoch": 0.5125205415134204, "grad_norm": 0.0, - "learning_rate": 1.4491181052244317e-05, - "loss": 1.0307, + "learning_rate": 1.0080468307277751e-05, + "loss": 1.0078, "step": 13099 }, { - "epoch": 0.3712204936382442, + "epoch": 0.5125596682056499, "grad_norm": 0.0, - "learning_rate": 1.4490361013953891e-05, - "loss": 0.9128, + "learning_rate": 1.00792011165497e-05, + "loss": 1.1088, "step": 13100 }, { - "epoch": 0.37124883108050666, + "epoch": 0.5125987948978793, "grad_norm": 0.0, - "learning_rate": 1.4489540937839486e-05, - "loss": 0.8923, + "learning_rate": 1.0077933924549778e-05, + "loss": 1.1411, "step": 13101 }, { - "epoch": 0.37127716852276915, + "epoch": 0.5126379215901088, "grad_norm": 0.0, - "learning_rate": 1.4488720823908013e-05, - "loss": 0.9023, + "learning_rate": 1.0076666731298334e-05, + "loss": 1.0113, "step": 13102 }, { - "epoch": 0.3713055059650316, + "epoch": 0.5126770482823382, "grad_norm": 0.0, - "learning_rate": 1.4487900672166377e-05, - "loss": 0.8781, + "learning_rate": 1.007539953681572e-05, + "loss": 1.0306, "step": 13103 }, { - "epoch": 0.3713338434072941, + "epoch": 0.5127161749745677, "grad_norm": 0.0, - "learning_rate": 1.4487080482621485e-05, - "loss": 0.8537, + "learning_rate": 1.0074132341122282e-05, + "loss": 1.0956, "step": 13104 }, { - "epoch": 0.3713621808495565, + "epoch": 0.5127553016667971, "grad_norm": 0.0, - "learning_rate": 1.448626025528025e-05, - "loss": 0.9259, + "learning_rate": 1.0072865144238373e-05, + "loss": 1.0923, "step": 13105 }, { - "epoch": 0.37139051829181896, + "epoch": 0.5127944283590266, "grad_norm": 0.0, - "learning_rate": 1.4485439990149579e-05, - "loss": 0.8924, + "learning_rate": 1.007159794618434e-05, + "loss": 1.0814, "step": 13106 }, { - "epoch": 0.37141885573408145, + "epoch": 0.5128335550512559, "grad_norm": 0.0, - "learning_rate": 1.448461968723638e-05, - "loss": 0.9253, + "learning_rate": 1.0070330746980534e-05, + "loss": 1.0727, "step": 13107 }, { - "epoch": 0.3714471931763439, + "epoch": 0.5128726817434854, "grad_norm": 0.0, - "learning_rate": 1.4483799346547566e-05, - "loss": 1.0181, + "learning_rate": 1.0069063546647304e-05, + "loss": 1.0804, "step": 13108 }, { - "epoch": 0.3714755306186064, + "epoch": 0.5129118084357148, "grad_norm": 0.0, - "learning_rate": 1.4482978968090044e-05, - "loss": 1.0367, + "learning_rate": 1.0067796345205002e-05, + "loss": 0.9756, "step": 13109 }, { - "epoch": 0.3715038680608688, + "epoch": 0.5129509351279443, "grad_norm": 0.0, - "learning_rate": 1.4482158551870727e-05, - "loss": 0.9053, + "learning_rate": 1.0066529142673976e-05, + "loss": 1.1917, "step": 13110 }, { - "epoch": 0.37153220550313126, + "epoch": 0.5129900618201737, "grad_norm": 0.0, - "learning_rate": 1.448133809789652e-05, - "loss": 0.9858, + "learning_rate": 1.0065261939074571e-05, + "loss": 1.1582, "step": 13111 }, { - "epoch": 0.37156054294539376, + "epoch": 0.5130291885124032, "grad_norm": 0.0, - "learning_rate": 1.4480517606174342e-05, - "loss": 1.0265, + "learning_rate": 1.0063994734427145e-05, + "loss": 1.1423, "step": 13112 }, { - "epoch": 0.3715888803876562, + "epoch": 0.5130683152046326, "grad_norm": 0.0, - "learning_rate": 1.4479697076711098e-05, - "loss": 0.9381, + "learning_rate": 1.0062727528752042e-05, + "loss": 1.0945, "step": 13113 }, { - "epoch": 0.3716172178299187, + "epoch": 0.5131074418968621, "grad_norm": 0.0, - "learning_rate": 1.4478876509513703e-05, - "loss": 0.9594, + "learning_rate": 1.0061460322069613e-05, + "loss": 0.9752, "step": 13114 }, { - "epoch": 0.3716455552721811, + "epoch": 0.5131465685890915, "grad_norm": 0.0, - "learning_rate": 1.447805590458907e-05, - "loss": 0.9227, + "learning_rate": 1.006019311440021e-05, + "loss": 1.0197, "step": 13115 }, { - "epoch": 0.3716738927144436, + "epoch": 0.513185695281321, "grad_norm": 0.0, - "learning_rate": 1.4477235261944107e-05, - "loss": 0.9816, + "learning_rate": 1.005892590576418e-05, + "loss": 1.0098, "step": 13116 }, { - "epoch": 0.37170223015670606, + "epoch": 0.5132248219735503, "grad_norm": 0.0, - "learning_rate": 1.447641458158573e-05, - "loss": 0.951, + "learning_rate": 1.0057658696181875e-05, + "loss": 1.1143, "step": 13117 }, { - "epoch": 0.3717305675989685, + "epoch": 0.5132639486657798, "grad_norm": 0.0, - "learning_rate": 1.4475593863520847e-05, - "loss": 1.0381, + "learning_rate": 1.005639148567364e-05, + "loss": 1.0056, "step": 13118 }, { - "epoch": 0.371758905041231, + "epoch": 0.5133030753580092, "grad_norm": 0.0, - "learning_rate": 1.4474773107756379e-05, - "loss": 0.9987, + "learning_rate": 1.0055124274259833e-05, + "loss": 1.0915, "step": 13119 }, { - "epoch": 0.37178724248349343, + "epoch": 0.5133422020502387, "grad_norm": 0.0, - "learning_rate": 1.4473952314299235e-05, - "loss": 0.8998, + "learning_rate": 1.0053857061960798e-05, + "loss": 0.959, "step": 13120 }, { - "epoch": 0.3718155799257559, + "epoch": 0.5133813287424681, "grad_norm": 0.0, - "learning_rate": 1.4473131483156326e-05, - "loss": 0.8499, + "learning_rate": 1.0052589848796882e-05, + "loss": 1.0749, "step": 13121 }, { - "epoch": 0.37184391736801836, + "epoch": 0.5134204554346975, "grad_norm": 0.0, - "learning_rate": 1.4472310614334575e-05, - "loss": 0.9136, + "learning_rate": 1.0051322634788446e-05, + "loss": 1.0212, "step": 13122 }, { - "epoch": 0.3718722548102808, + "epoch": 0.513459582126927, "grad_norm": 0.0, - "learning_rate": 1.4471489707840887e-05, - "loss": 0.8828, + "learning_rate": 1.0050055419955829e-05, + "loss": 1.0276, "step": 13123 }, { - "epoch": 0.3719005922525433, + "epoch": 0.5134987088191564, "grad_norm": 0.0, - "learning_rate": 1.447066876368218e-05, - "loss": 0.9322, + "learning_rate": 1.0048788204319383e-05, + "loss": 1.0236, "step": 13124 }, { - "epoch": 0.37192892969480573, + "epoch": 0.5135378355113859, "grad_norm": 0.0, - "learning_rate": 1.4469847781865372e-05, - "loss": 0.9694, + "learning_rate": 1.0047520987899463e-05, + "loss": 1.0656, "step": 13125 }, { - "epoch": 0.3719572671370682, + "epoch": 0.5135769622036153, "grad_norm": 0.0, - "learning_rate": 1.4469026762397376e-05, - "loss": 0.8544, + "learning_rate": 1.0046253770716412e-05, + "loss": 0.9253, "step": 13126 }, { - "epoch": 0.37198560457933066, + "epoch": 0.5136160888958448, "grad_norm": 0.0, - "learning_rate": 1.4468205705285108e-05, - "loss": 0.9126, + "learning_rate": 1.0044986552790586e-05, + "loss": 1.0266, "step": 13127 }, { - "epoch": 0.37201394202159316, + "epoch": 0.5136552155880741, "grad_norm": 0.0, - "learning_rate": 1.4467384610535486e-05, - "loss": 0.9478, + "learning_rate": 1.004371933414233e-05, + "loss": 0.9818, "step": 13128 }, { - "epoch": 0.3720422794638556, + "epoch": 0.5136943422803036, "grad_norm": 0.0, - "learning_rate": 1.4466563478155422e-05, - "loss": 0.9424, + "learning_rate": 1.0042452114791998e-05, + "loss": 1.0632, "step": 13129 }, { - "epoch": 0.37207061690611803, + "epoch": 0.513733468972533, "grad_norm": 0.0, - "learning_rate": 1.4465742308151836e-05, - "loss": 1.0516, + "learning_rate": 1.004118489475994e-05, + "loss": 1.0541, "step": 13130 }, { - "epoch": 0.37209895434838053, + "epoch": 0.5137725956647625, "grad_norm": 0.0, - "learning_rate": 1.4464921100531642e-05, - "loss": 0.9096, + "learning_rate": 1.0039917674066499e-05, + "loss": 1.0245, "step": 13131 }, { - "epoch": 0.37212729179064297, + "epoch": 0.5138117223569919, "grad_norm": 0.0, - "learning_rate": 1.4464099855301762e-05, - "loss": 0.9891, + "learning_rate": 1.0038650452732035e-05, + "loss": 1.0775, "step": 13132 }, { - "epoch": 0.37215562923290546, + "epoch": 0.5138508490492214, "grad_norm": 0.0, - "learning_rate": 1.446327857246911e-05, - "loss": 0.996, + "learning_rate": 1.0037383230776891e-05, + "loss": 1.049, "step": 13133 }, { - "epoch": 0.3721839666751679, + "epoch": 0.5138899757414508, "grad_norm": 0.0, - "learning_rate": 1.4462457252040606e-05, - "loss": 0.9367, + "learning_rate": 1.003611600822142e-05, + "loss": 0.9583, "step": 13134 }, { - "epoch": 0.37221230411743034, + "epoch": 0.5139291024336803, "grad_norm": 0.0, - "learning_rate": 1.4461635894023167e-05, - "loss": 0.9619, + "learning_rate": 1.0034848785085969e-05, + "loss": 1.0303, "step": 13135 }, { - "epoch": 0.37224064155969283, + "epoch": 0.5139682291259097, "grad_norm": 0.0, - "learning_rate": 1.4460814498423714e-05, - "loss": 0.9749, + "learning_rate": 1.0033581561390892e-05, + "loss": 1.1414, "step": 13136 }, { - "epoch": 0.37226897900195527, + "epoch": 0.5140073558181392, "grad_norm": 0.0, - "learning_rate": 1.445999306524916e-05, - "loss": 0.7945, + "learning_rate": 1.0032314337156537e-05, + "loss": 1.0495, "step": 13137 }, { - "epoch": 0.37229731644421776, + "epoch": 0.5140464825103686, "grad_norm": 0.0, - "learning_rate": 1.4459171594506431e-05, - "loss": 0.9431, + "learning_rate": 1.0031047112403255e-05, + "loss": 1.0449, "step": 13138 }, { - "epoch": 0.3723256538864802, + "epoch": 0.514085609202598, "grad_norm": 0.0, - "learning_rate": 1.4458350086202443e-05, - "loss": 0.9601, + "learning_rate": 1.0029779887151397e-05, + "loss": 1.076, "step": 13139 }, { - "epoch": 0.3723539913287427, + "epoch": 0.5141247358948274, "grad_norm": 0.0, - "learning_rate": 1.445752854034412e-05, - "loss": 0.9568, + "learning_rate": 1.0028512661421306e-05, + "loss": 1.1614, "step": 13140 }, { - "epoch": 0.37238232877100513, + "epoch": 0.5141638625870569, "grad_norm": 0.0, - "learning_rate": 1.4456706956938376e-05, - "loss": 0.8652, + "learning_rate": 1.002724543523334e-05, + "loss": 1.1102, "step": 13141 }, { - "epoch": 0.37241066621326757, + "epoch": 0.5142029892792863, "grad_norm": 0.0, - "learning_rate": 1.4455885335992133e-05, - "loss": 0.9028, + "learning_rate": 1.0025978208607847e-05, + "loss": 1.0371, "step": 13142 }, { - "epoch": 0.37243900365553007, + "epoch": 0.5142421159715158, "grad_norm": 0.0, - "learning_rate": 1.4455063677512315e-05, - "loss": 0.9883, + "learning_rate": 1.0024710981565177e-05, + "loss": 1.0044, "step": 13143 }, { - "epoch": 0.3724673410977925, + "epoch": 0.5142812426637452, "grad_norm": 0.0, - "learning_rate": 1.4454241981505838e-05, - "loss": 0.8933, + "learning_rate": 1.0023443754125678e-05, + "loss": 0.8911, "step": 13144 }, { - "epoch": 0.372495678540055, + "epoch": 0.5143203693559747, "grad_norm": 0.0, - "learning_rate": 1.445342024797963e-05, - "loss": 0.9725, + "learning_rate": 1.0022176526309703e-05, + "loss": 1.1743, "step": 13145 }, { - "epoch": 0.37252401598231744, + "epoch": 0.5143594960482041, "grad_norm": 0.0, - "learning_rate": 1.445259847694061e-05, - "loss": 0.9609, + "learning_rate": 1.00209092981376e-05, + "loss": 1.0694, "step": 13146 }, { - "epoch": 0.3725523534245799, + "epoch": 0.5143986227404336, "grad_norm": 0.0, - "learning_rate": 1.4451776668395698e-05, - "loss": 1.0083, + "learning_rate": 1.0019642069629723e-05, + "loss": 0.9637, "step": 13147 }, { - "epoch": 0.37258069086684237, + "epoch": 0.514437749432663, "grad_norm": 0.0, - "learning_rate": 1.4450954822351819e-05, - "loss": 1.0323, + "learning_rate": 1.0018374840806416e-05, + "loss": 1.0515, "step": 13148 }, { - "epoch": 0.3726090283091048, + "epoch": 0.5144768761248923, "grad_norm": 0.0, - "learning_rate": 1.4450132938815896e-05, - "loss": 0.8909, + "learning_rate": 1.0017107611688031e-05, + "loss": 1.0713, "step": 13149 }, { - "epoch": 0.3726373657513673, + "epoch": 0.5145160028171218, "grad_norm": 0.0, - "learning_rate": 1.4449311017794847e-05, - "loss": 0.9565, + "learning_rate": 1.0015840382294922e-05, + "loss": 1.1153, "step": 13150 }, { - "epoch": 0.37266570319362974, + "epoch": 0.5145551295093512, "grad_norm": 0.0, - "learning_rate": 1.44484890592956e-05, - "loss": 0.9611, + "learning_rate": 1.0014573152647433e-05, + "loss": 0.9518, "step": 13151 }, { - "epoch": 0.37269404063589223, + "epoch": 0.5145942562015807, "grad_norm": 0.0, - "learning_rate": 1.4447667063325081e-05, - "loss": 0.9388, + "learning_rate": 1.0013305922765919e-05, + "loss": 1.0637, "step": 13152 }, { - "epoch": 0.37272237807815467, + "epoch": 0.5146333828938101, "grad_norm": 0.0, - "learning_rate": 1.4446845029890209e-05, - "loss": 0.9002, + "learning_rate": 1.001203869267073e-05, + "loss": 1.0509, "step": 13153 }, { - "epoch": 0.3727507155204171, + "epoch": 0.5146725095860396, "grad_norm": 0.0, - "learning_rate": 1.4446022958997908e-05, - "loss": 0.9238, + "learning_rate": 1.0010771462382214e-05, + "loss": 1.067, "step": 13154 }, { - "epoch": 0.3727790529626796, + "epoch": 0.514711636278269, "grad_norm": 0.0, - "learning_rate": 1.4445200850655106e-05, - "loss": 0.8789, + "learning_rate": 1.000950423192072e-05, + "loss": 0.9441, "step": 13155 }, { - "epoch": 0.37280739040494204, + "epoch": 0.5147507629704985, "grad_norm": 0.0, - "learning_rate": 1.444437870486873e-05, - "loss": 0.9349, + "learning_rate": 1.0008237001306602e-05, + "loss": 1.0439, "step": 13156 }, { - "epoch": 0.37283572784720453, + "epoch": 0.5147898896627279, "grad_norm": 0.0, - "learning_rate": 1.4443556521645696e-05, - "loss": 0.8801, + "learning_rate": 1.0006969770560207e-05, + "loss": 1.0969, "step": 13157 }, { - "epoch": 0.372864065289467, + "epoch": 0.5148290163549574, "grad_norm": 0.0, - "learning_rate": 1.4442734300992937e-05, - "loss": 0.9666, + "learning_rate": 1.0005702539701888e-05, + "loss": 0.9621, "step": 13158 }, { - "epoch": 0.3728924027317294, + "epoch": 0.5148681430471868, "grad_norm": 0.0, - "learning_rate": 1.4441912042917378e-05, - "loss": 0.9044, + "learning_rate": 1.0004435308751993e-05, + "loss": 0.8561, "step": 13159 }, { - "epoch": 0.3729207401739919, + "epoch": 0.5149072697394163, "grad_norm": 0.0, - "learning_rate": 1.4441089747425946e-05, - "loss": 0.9777, + "learning_rate": 1.0003168077730867e-05, + "loss": 0.9843, "step": 13160 }, { - "epoch": 0.37294907761625434, + "epoch": 0.5149463964316456, "grad_norm": 0.0, - "learning_rate": 1.4440267414525564e-05, - "loss": 1.0392, + "learning_rate": 1.0001900846658872e-05, + "loss": 1.0129, "step": 13161 }, { - "epoch": 0.37297741505851684, + "epoch": 0.5149855231238751, "grad_norm": 0.0, - "learning_rate": 1.443944504422316e-05, - "loss": 0.8611, + "learning_rate": 1.000063361555635e-05, + "loss": 1.0311, "step": 13162 }, { - "epoch": 0.3730057525007793, + "epoch": 0.5150246498161045, "grad_norm": 0.0, - "learning_rate": 1.4438622636525666e-05, - "loss": 0.9203, + "learning_rate": 9.999366384443652e-06, + "loss": 1.0672, "step": 13163 }, { - "epoch": 0.3730340899430417, + "epoch": 0.515063776508334, "grad_norm": 0.0, - "learning_rate": 1.443780019144e-05, - "loss": 0.9614, + "learning_rate": 9.998099153341131e-06, + "loss": 1.1266, "step": 13164 }, { - "epoch": 0.3730624273853042, + "epoch": 0.5151029032005634, "grad_norm": 0.0, - "learning_rate": 1.4436977708973099e-05, - "loss": 0.8287, + "learning_rate": 9.996831922269135e-06, + "loss": 1.0659, "step": 13165 }, { - "epoch": 0.37309076482756665, + "epoch": 0.5151420298927929, "grad_norm": 0.0, - "learning_rate": 1.4436155189131884e-05, - "loss": 1.0367, + "learning_rate": 9.995564691248013e-06, + "loss": 1.0598, "step": 13166 }, { - "epoch": 0.37311910226982914, + "epoch": 0.5151811565850223, "grad_norm": 0.0, - "learning_rate": 1.4435332631923289e-05, - "loss": 0.8479, + "learning_rate": 9.994297460298114e-06, + "loss": 1.0099, "step": 13167 }, { - "epoch": 0.3731474397120916, + "epoch": 0.5152202832772518, "grad_norm": 0.0, - "learning_rate": 1.4434510037354239e-05, - "loss": 0.9672, + "learning_rate": 9.993030229439795e-06, + "loss": 1.1377, "step": 13168 }, { - "epoch": 0.37317577715435407, + "epoch": 0.5152594099694812, "grad_norm": 0.0, - "learning_rate": 1.4433687405431663e-05, - "loss": 1.005, + "learning_rate": 9.991762998693401e-06, + "loss": 1.0806, "step": 13169 }, { - "epoch": 0.3732041145966165, + "epoch": 0.5152985366617107, "grad_norm": 0.0, - "learning_rate": 1.4432864736162493e-05, - "loss": 0.9294, + "learning_rate": 9.990495768079283e-06, + "loss": 1.0884, "step": 13170 }, { - "epoch": 0.37323245203887895, + "epoch": 0.51533766335394, "grad_norm": 0.0, - "learning_rate": 1.4432042029553657e-05, - "loss": 0.982, + "learning_rate": 9.989228537617792e-06, + "loss": 1.0222, "step": 13171 }, { - "epoch": 0.37326078948114144, + "epoch": 0.5153767900461695, "grad_norm": 0.0, - "learning_rate": 1.4431219285612085e-05, - "loss": 1.0604, + "learning_rate": 9.987961307329272e-06, + "loss": 0.9978, "step": 13172 }, { - "epoch": 0.3732891269234039, + "epoch": 0.5154159167383989, "grad_norm": 0.0, - "learning_rate": 1.443039650434471e-05, - "loss": 0.9421, + "learning_rate": 9.986694077234083e-06, + "loss": 1.0732, "step": 13173 }, { - "epoch": 0.3733174643656664, + "epoch": 0.5154550434306284, "grad_norm": 0.0, - "learning_rate": 1.4429573685758458e-05, - "loss": 0.9314, + "learning_rate": 9.985426847352569e-06, + "loss": 0.9064, "step": 13174 }, { - "epoch": 0.3733458018079288, + "epoch": 0.5154941701228578, "grad_norm": 0.0, - "learning_rate": 1.442875082986026e-05, - "loss": 0.916, + "learning_rate": 9.984159617705083e-06, + "loss": 1.0236, "step": 13175 }, { - "epoch": 0.37337413925019125, + "epoch": 0.5155332968150873, "grad_norm": 0.0, - "learning_rate": 1.4427927936657051e-05, - "loss": 0.9722, + "learning_rate": 9.98289238831197e-06, + "loss": 0.9853, "step": 13176 }, { - "epoch": 0.37340247669245374, + "epoch": 0.5155724235073167, "grad_norm": 0.0, - "learning_rate": 1.4427105006155761e-05, - "loss": 0.9334, + "learning_rate": 9.981625159193587e-06, + "loss": 1.0462, "step": 13177 }, { - "epoch": 0.3734308141347162, + "epoch": 0.5156115501995461, "grad_norm": 0.0, - "learning_rate": 1.4426282038363322e-05, - "loss": 0.7676, + "learning_rate": 9.980357930370282e-06, + "loss": 1.1169, "step": 13178 }, { - "epoch": 0.3734591515769787, + "epoch": 0.5156506768917756, "grad_norm": 0.0, - "learning_rate": 1.4425459033286664e-05, - "loss": 0.9277, + "learning_rate": 9.979090701862403e-06, + "loss": 0.9699, "step": 13179 }, { - "epoch": 0.3734874890192411, + "epoch": 0.515689803584005, "grad_norm": 0.0, - "learning_rate": 1.4424635990932723e-05, - "loss": 0.9839, + "learning_rate": 9.977823473690302e-06, + "loss": 1.071, "step": 13180 }, { - "epoch": 0.3735158264615036, + "epoch": 0.5157289302762345, "grad_norm": 0.0, - "learning_rate": 1.442381291130843e-05, - "loss": 0.8687, + "learning_rate": 9.976556245874322e-06, + "loss": 0.9229, "step": 13181 }, { - "epoch": 0.37354416390376605, + "epoch": 0.5157680569684638, "grad_norm": 0.0, - "learning_rate": 1.4422989794420716e-05, - "loss": 1.0162, + "learning_rate": 9.975289018434826e-06, + "loss": 1.0172, "step": 13182 }, { - "epoch": 0.3735725013460285, + "epoch": 0.5158071836606933, "grad_norm": 0.0, - "learning_rate": 1.4422166640276519e-05, - "loss": 1.0353, + "learning_rate": 9.974021791392155e-06, + "loss": 1.0343, "step": 13183 }, { - "epoch": 0.373600838788291, + "epoch": 0.5158463103529227, "grad_norm": 0.0, - "learning_rate": 1.442134344888277e-05, - "loss": 0.9813, + "learning_rate": 9.972754564766663e-06, + "loss": 1.0018, "step": 13184 }, { - "epoch": 0.3736291762305534, + "epoch": 0.5158854370451522, "grad_norm": 0.0, - "learning_rate": 1.4420520220246401e-05, - "loss": 1.0039, + "learning_rate": 9.971487338578694e-06, + "loss": 0.9661, "step": 13185 }, { - "epoch": 0.3736575136728159, + "epoch": 0.5159245637373816, "grad_norm": 0.0, - "learning_rate": 1.4419696954374352e-05, - "loss": 0.9446, + "learning_rate": 9.970220112848607e-06, + "loss": 1.0639, "step": 13186 }, { - "epoch": 0.37368585111507835, + "epoch": 0.5159636904296111, "grad_norm": 0.0, - "learning_rate": 1.4418873651273553e-05, - "loss": 0.9155, + "learning_rate": 9.968952887596748e-06, + "loss": 0.9698, "step": 13187 }, { - "epoch": 0.3737141885573408, + "epoch": 0.5160028171218405, "grad_norm": 0.0, - "learning_rate": 1.4418050310950938e-05, - "loss": 0.971, + "learning_rate": 9.967685662843466e-06, + "loss": 1.1051, "step": 13188 }, { - "epoch": 0.3737425259996033, + "epoch": 0.51604194381407, "grad_norm": 0.0, - "learning_rate": 1.4417226933413446e-05, - "loss": 0.88, + "learning_rate": 9.966418438609111e-06, + "loss": 1.0105, "step": 13189 }, { - "epoch": 0.3737708634418657, + "epoch": 0.5160810705062994, "grad_norm": 0.0, - "learning_rate": 1.4416403518668013e-05, - "loss": 0.9535, + "learning_rate": 9.965151214914031e-06, + "loss": 1.0667, "step": 13190 }, { - "epoch": 0.3737992008841282, + "epoch": 0.5161201971985289, "grad_norm": 0.0, - "learning_rate": 1.4415580066721573e-05, - "loss": 0.875, + "learning_rate": 9.963883991778582e-06, + "loss": 1.1091, "step": 13191 }, { - "epoch": 0.37382753832639065, + "epoch": 0.5161593238907582, "grad_norm": 0.0, - "learning_rate": 1.441475657758106e-05, - "loss": 0.8821, + "learning_rate": 9.962616769223112e-06, + "loss": 0.9001, "step": 13192 }, { - "epoch": 0.37385587576865315, + "epoch": 0.5161984505829877, "grad_norm": 0.0, - "learning_rate": 1.4413933051253414e-05, - "loss": 0.8767, + "learning_rate": 9.96134954726797e-06, + "loss": 1.0483, "step": 13193 }, { - "epoch": 0.3738842132109156, + "epoch": 0.5162375772752171, "grad_norm": 0.0, - "learning_rate": 1.4413109487745571e-05, - "loss": 0.9377, + "learning_rate": 9.9600823259335e-06, + "loss": 1.0121, "step": 13194 }, { - "epoch": 0.373912550653178, + "epoch": 0.5162767039674466, "grad_norm": 0.0, - "learning_rate": 1.4412285887064468e-05, - "loss": 0.9555, + "learning_rate": 9.958815105240064e-06, + "loss": 0.9586, "step": 13195 }, { - "epoch": 0.3739408880954405, + "epoch": 0.516315830659676, "grad_norm": 0.0, - "learning_rate": 1.4411462249217041e-05, - "loss": 0.9056, + "learning_rate": 9.957547885208005e-06, + "loss": 1.1177, "step": 13196 }, { - "epoch": 0.37396922553770295, + "epoch": 0.5163549573519055, "grad_norm": 0.0, - "learning_rate": 1.4410638574210231e-05, - "loss": 0.9087, + "learning_rate": 9.956280665857672e-06, + "loss": 1.1564, "step": 13197 }, { - "epoch": 0.37399756297996545, + "epoch": 0.5163940840441349, "grad_norm": 0.0, - "learning_rate": 1.4409814862050974e-05, - "loss": 0.8743, + "learning_rate": 9.95501344720942e-06, + "loss": 1.0683, "step": 13198 }, { - "epoch": 0.3740259004222279, + "epoch": 0.5164332107363644, "grad_norm": 0.0, - "learning_rate": 1.4408991112746212e-05, - "loss": 0.9362, + "learning_rate": 9.953746229283589e-06, + "loss": 0.9505, "step": 13199 }, { - "epoch": 0.3740542378644903, + "epoch": 0.5164723374285938, "grad_norm": 0.0, - "learning_rate": 1.4408167326302875e-05, - "loss": 0.8774, + "learning_rate": 9.95247901210054e-06, + "loss": 1.0075, "step": 13200 }, { - "epoch": 0.3740825753067528, + "epoch": 0.5165114641208233, "grad_norm": 0.0, - "learning_rate": 1.4407343502727913e-05, - "loss": 0.9076, + "learning_rate": 9.95121179568062e-06, + "loss": 0.9962, "step": 13201 }, { - "epoch": 0.37411091274901526, + "epoch": 0.5165505908130527, "grad_norm": 0.0, - "learning_rate": 1.4406519642028255e-05, - "loss": 0.8568, + "learning_rate": 9.949944580044176e-06, + "loss": 1.0033, "step": 13202 }, { - "epoch": 0.37413925019127775, + "epoch": 0.5165897175052822, "grad_norm": 0.0, - "learning_rate": 1.440569574421085e-05, - "loss": 0.9231, + "learning_rate": 9.948677365211558e-06, + "loss": 1.0122, "step": 13203 }, { - "epoch": 0.3741675876335402, + "epoch": 0.5166288441975115, "grad_norm": 0.0, - "learning_rate": 1.4404871809282632e-05, - "loss": 0.8295, + "learning_rate": 9.94741015120312e-06, + "loss": 1.0294, "step": 13204 }, { - "epoch": 0.3741959250758027, + "epoch": 0.516667970889741, "grad_norm": 0.0, - "learning_rate": 1.4404047837250543e-05, - "loss": 0.9636, + "learning_rate": 9.946142938039205e-06, + "loss": 0.9471, "step": 13205 }, { - "epoch": 0.3742242625180651, + "epoch": 0.5167070975819704, "grad_norm": 0.0, - "learning_rate": 1.4403223828121525e-05, - "loss": 0.9349, + "learning_rate": 9.944875725740169e-06, + "loss": 1.0115, "step": 13206 }, { - "epoch": 0.37425259996032756, + "epoch": 0.5167462242741998, "grad_norm": 0.0, - "learning_rate": 1.4402399781902516e-05, - "loss": 0.91, + "learning_rate": 9.943608514326362e-06, + "loss": 1.0214, "step": 13207 }, { - "epoch": 0.37428093740259005, + "epoch": 0.5167853509664293, "grad_norm": 0.0, - "learning_rate": 1.4401575698600458e-05, - "loss": 0.9708, + "learning_rate": 9.942341303818128e-06, + "loss": 1.0394, "step": 13208 }, { - "epoch": 0.3743092748448525, + "epoch": 0.5168244776586587, "grad_norm": 0.0, - "learning_rate": 1.4400751578222293e-05, - "loss": 0.8684, + "learning_rate": 9.941074094235823e-06, + "loss": 1.1495, "step": 13209 }, { - "epoch": 0.374337612287115, + "epoch": 0.5168636043508882, "grad_norm": 0.0, - "learning_rate": 1.4399927420774966e-05, - "loss": 1.0516, + "learning_rate": 9.939806885599795e-06, + "loss": 0.9718, "step": 13210 }, { - "epoch": 0.3743659497293774, + "epoch": 0.5169027310431176, "grad_norm": 0.0, - "learning_rate": 1.4399103226265418e-05, - "loss": 0.9452, + "learning_rate": 9.938539677930388e-06, + "loss": 0.9319, "step": 13211 }, { - "epoch": 0.37439428717163986, + "epoch": 0.5169418577353471, "grad_norm": 0.0, - "learning_rate": 1.4398278994700587e-05, - "loss": 0.9197, + "learning_rate": 9.93727247124796e-06, + "loss": 1.0192, "step": 13212 }, { - "epoch": 0.37442262461390236, + "epoch": 0.5169809844275765, "grad_norm": 0.0, - "learning_rate": 1.4397454726087418e-05, - "loss": 0.9214, + "learning_rate": 9.936005265572857e-06, + "loss": 0.9645, "step": 13213 }, { - "epoch": 0.3744509620561648, + "epoch": 0.517020111119806, "grad_norm": 0.0, - "learning_rate": 1.4396630420432858e-05, - "loss": 0.9062, + "learning_rate": 9.93473806092543e-06, + "loss": 1.0765, "step": 13214 }, { - "epoch": 0.3744792994984273, + "epoch": 0.5170592378120353, "grad_norm": 0.0, - "learning_rate": 1.4395806077743843e-05, - "loss": 0.9856, + "learning_rate": 9.93347085732603e-06, + "loss": 0.9858, "step": 13215 }, { - "epoch": 0.3745076369406897, + "epoch": 0.5170983645042648, "grad_norm": 0.0, - "learning_rate": 1.4394981698027324e-05, - "loss": 1.0028, + "learning_rate": 9.932203654795001e-06, + "loss": 0.9689, "step": 13216 }, { - "epoch": 0.3745359743829522, + "epoch": 0.5171374911964942, "grad_norm": 0.0, - "learning_rate": 1.4394157281290239e-05, - "loss": 1.0214, + "learning_rate": 9.930936453352696e-06, + "loss": 1.0931, "step": 13217 }, { - "epoch": 0.37456431182521466, + "epoch": 0.5171766178887237, "grad_norm": 0.0, - "learning_rate": 1.4393332827539542e-05, - "loss": 0.876, + "learning_rate": 9.929669253019468e-06, + "loss": 1.079, "step": 13218 }, { - "epoch": 0.3745926492674771, + "epoch": 0.5172157445809531, "grad_norm": 0.0, - "learning_rate": 1.4392508336782167e-05, - "loss": 0.9599, + "learning_rate": 9.928402053815664e-06, + "loss": 1.0094, "step": 13219 }, { - "epoch": 0.3746209867097396, + "epoch": 0.5172548712731826, "grad_norm": 0.0, - "learning_rate": 1.4391683809025063e-05, - "loss": 0.9681, + "learning_rate": 9.92713485576163e-06, + "loss": 0.8955, "step": 13220 }, { - "epoch": 0.37464932415200203, + "epoch": 0.517293997965412, "grad_norm": 0.0, - "learning_rate": 1.4390859244275175e-05, - "loss": 0.9171, + "learning_rate": 9.925867658877718e-06, + "loss": 1.145, "step": 13221 }, { - "epoch": 0.3746776615942645, + "epoch": 0.5173331246576415, "grad_norm": 0.0, - "learning_rate": 1.4390034642539447e-05, - "loss": 0.9116, + "learning_rate": 9.924600463184282e-06, + "loss": 1.0539, "step": 13222 }, { - "epoch": 0.37470599903652696, + "epoch": 0.5173722513498709, "grad_norm": 0.0, - "learning_rate": 1.4389210003824832e-05, - "loss": 0.8242, + "learning_rate": 9.923333268701669e-06, + "loss": 1.0844, "step": 13223 }, { - "epoch": 0.3747343364787894, + "epoch": 0.5174113780421004, "grad_norm": 0.0, - "learning_rate": 1.438838532813827e-05, - "loss": 0.8791, + "learning_rate": 9.922066075450227e-06, + "loss": 0.948, "step": 13224 }, { - "epoch": 0.3747626739210519, + "epoch": 0.5174505047343297, "grad_norm": 0.0, - "learning_rate": 1.4387560615486706e-05, - "loss": 0.885, + "learning_rate": 9.9207988834503e-06, + "loss": 0.9095, "step": 13225 }, { - "epoch": 0.37479101136331433, + "epoch": 0.5174896314265592, "grad_norm": 0.0, - "learning_rate": 1.4386735865877092e-05, - "loss": 0.7761, + "learning_rate": 9.91953169272225e-06, + "loss": 1.1169, "step": 13226 }, { - "epoch": 0.3748193488055768, + "epoch": 0.5175287581187886, "grad_norm": 0.0, - "learning_rate": 1.4385911079316373e-05, - "loss": 0.8899, + "learning_rate": 9.91826450328642e-06, + "loss": 0.9915, "step": 13227 }, { - "epoch": 0.37484768624783926, + "epoch": 0.5175678848110181, "grad_norm": 0.0, - "learning_rate": 1.4385086255811496e-05, - "loss": 0.991, + "learning_rate": 9.916997315163159e-06, + "loss": 0.904, "step": 13228 }, { - "epoch": 0.37487602369010176, + "epoch": 0.5176070115032475, "grad_norm": 0.0, - "learning_rate": 1.4384261395369405e-05, - "loss": 1.0136, + "learning_rate": 9.915730128372816e-06, + "loss": 1.0152, "step": 13229 }, { - "epoch": 0.3749043611323642, + "epoch": 0.517646138195477, "grad_norm": 0.0, - "learning_rate": 1.4383436497997059e-05, - "loss": 0.8904, + "learning_rate": 9.91446294293574e-06, + "loss": 0.9897, "step": 13230 }, { - "epoch": 0.37493269857462663, + "epoch": 0.5176852648877064, "grad_norm": 0.0, - "learning_rate": 1.4382611563701396e-05, - "loss": 0.9982, + "learning_rate": 9.913195758872284e-06, + "loss": 1.0245, "step": 13231 }, { - "epoch": 0.3749610360168891, + "epoch": 0.5177243915799359, "grad_norm": 0.0, - "learning_rate": 1.438178659248937e-05, - "loss": 0.9671, + "learning_rate": 9.911928576202798e-06, + "loss": 1.0941, "step": 13232 }, { - "epoch": 0.37498937345915156, + "epoch": 0.5177635182721653, "grad_norm": 0.0, - "learning_rate": 1.4380961584367927e-05, - "loss": 0.9586, + "learning_rate": 9.910661394947626e-06, + "loss": 0.9499, "step": 13233 }, { - "epoch": 0.37501771090141406, + "epoch": 0.5178026449643948, "grad_norm": 0.0, - "learning_rate": 1.4380136539344018e-05, - "loss": 1.0445, + "learning_rate": 9.909394215127118e-06, + "loss": 0.9503, "step": 13234 }, { - "epoch": 0.3750460483436765, + "epoch": 0.5178417716566242, "grad_norm": 0.0, - "learning_rate": 1.4379311457424591e-05, - "loss": 0.8732, + "learning_rate": 9.908127036761628e-06, + "loss": 1.0142, "step": 13235 }, { - "epoch": 0.37507438578593894, + "epoch": 0.5178808983488535, "grad_norm": 0.0, - "learning_rate": 1.43784863386166e-05, - "loss": 1.1129, + "learning_rate": 9.906859859871503e-06, + "loss": 0.9799, "step": 13236 }, { - "epoch": 0.37510272322820143, + "epoch": 0.517920025041083, "grad_norm": 0.0, - "learning_rate": 1.4377661182926992e-05, - "loss": 0.9562, + "learning_rate": 9.905592684477091e-06, + "loss": 1.0934, "step": 13237 }, { - "epoch": 0.37513106067046387, + "epoch": 0.5179591517333124, "grad_norm": 0.0, - "learning_rate": 1.4376835990362716e-05, - "loss": 0.881, + "learning_rate": 9.904325510598743e-06, + "loss": 1.0683, "step": 13238 }, { - "epoch": 0.37515939811272636, + "epoch": 0.5179982784255419, "grad_norm": 0.0, - "learning_rate": 1.437601076093073e-05, - "loss": 0.9761, + "learning_rate": 9.903058338256804e-06, + "loss": 0.9305, "step": 13239 }, { - "epoch": 0.3751877355549888, + "epoch": 0.5180374051177713, "grad_norm": 0.0, - "learning_rate": 1.4375185494637978e-05, - "loss": 0.9763, + "learning_rate": 9.90179116747163e-06, + "loss": 0.9244, "step": 13240 }, { - "epoch": 0.3752160729972513, + "epoch": 0.5180765318100008, "grad_norm": 0.0, - "learning_rate": 1.4374360191491411e-05, - "loss": 1.0563, + "learning_rate": 9.900523998263567e-06, + "loss": 0.9643, "step": 13241 }, { - "epoch": 0.37524441043951373, + "epoch": 0.5181156585022302, "grad_norm": 0.0, - "learning_rate": 1.4373534851497985e-05, - "loss": 0.8896, + "learning_rate": 9.899256830652965e-06, + "loss": 0.9984, "step": 13242 }, { - "epoch": 0.37527274788177617, + "epoch": 0.5181547851944597, "grad_norm": 0.0, - "learning_rate": 1.4372709474664653e-05, - "loss": 0.9346, + "learning_rate": 9.897989664660168e-06, + "loss": 1.0249, "step": 13243 }, { - "epoch": 0.37530108532403866, + "epoch": 0.5181939118866891, "grad_norm": 0.0, - "learning_rate": 1.4371884060998364e-05, - "loss": 0.9816, + "learning_rate": 9.89672250030553e-06, + "loss": 0.9832, "step": 13244 }, { - "epoch": 0.3753294227663011, + "epoch": 0.5182330385789186, "grad_norm": 0.0, - "learning_rate": 1.4371058610506075e-05, - "loss": 0.94, + "learning_rate": 9.895455337609402e-06, + "loss": 0.968, "step": 13245 }, { - "epoch": 0.3753577602085636, + "epoch": 0.5182721652711479, "grad_norm": 0.0, - "learning_rate": 1.437023312319473e-05, - "loss": 0.9503, + "learning_rate": 9.894188176592129e-06, + "loss": 1.0574, "step": 13246 }, { - "epoch": 0.37538609765082603, + "epoch": 0.5183112919633774, "grad_norm": 0.0, - "learning_rate": 1.4369407599071295e-05, - "loss": 0.8475, + "learning_rate": 9.892921017274062e-06, + "loss": 1.0354, "step": 13247 }, { - "epoch": 0.37541443509308847, + "epoch": 0.5183504186556068, "grad_norm": 0.0, - "learning_rate": 1.4368582038142713e-05, - "loss": 0.9913, + "learning_rate": 9.891653859675548e-06, + "loss": 1.0286, "step": 13248 }, { - "epoch": 0.37544277253535097, + "epoch": 0.5183895453478363, "grad_norm": 0.0, - "learning_rate": 1.4367756440415943e-05, - "loss": 0.9581, + "learning_rate": 9.890386703816936e-06, + "loss": 0.9356, "step": 13249 }, { - "epoch": 0.3754711099776134, + "epoch": 0.5184286720400657, "grad_norm": 0.0, - "learning_rate": 1.4366930805897939e-05, - "loss": 1.059, + "learning_rate": 9.889119549718577e-06, + "loss": 0.9376, "step": 13250 }, { - "epoch": 0.3754994474198759, + "epoch": 0.5184677987322952, "grad_norm": 0.0, - "learning_rate": 1.4366105134595656e-05, - "loss": 0.9448, + "learning_rate": 9.887852397400823e-06, + "loss": 1.0309, "step": 13251 }, { - "epoch": 0.37552778486213834, + "epoch": 0.5185069254245246, "grad_norm": 0.0, - "learning_rate": 1.4365279426516048e-05, - "loss": 0.8966, + "learning_rate": 9.886585246884014e-06, + "loss": 1.1088, "step": 13252 }, { - "epoch": 0.37555612230440083, + "epoch": 0.5185460521167541, "grad_norm": 0.0, - "learning_rate": 1.436445368166607e-05, - "loss": 0.9602, + "learning_rate": 9.885318098188507e-06, + "loss": 1.1053, "step": 13253 }, { - "epoch": 0.37558445974666327, + "epoch": 0.5185851788089835, "grad_norm": 0.0, - "learning_rate": 1.4363627900052676e-05, - "loss": 1.0005, + "learning_rate": 9.884050951334645e-06, + "loss": 1.101, "step": 13254 }, { - "epoch": 0.3756127971889257, + "epoch": 0.518624305501213, "grad_norm": 0.0, - "learning_rate": 1.4362802081682826e-05, - "loss": 0.9626, + "learning_rate": 9.88278380634278e-06, + "loss": 1.1295, "step": 13255 }, { - "epoch": 0.3756411346311882, + "epoch": 0.5186634321934424, "grad_norm": 0.0, - "learning_rate": 1.4361976226563475e-05, - "loss": 0.8894, + "learning_rate": 9.881516663233263e-06, + "loss": 1.11, "step": 13256 }, { - "epoch": 0.37566947207345064, + "epoch": 0.5187025588856718, "grad_norm": 0.0, - "learning_rate": 1.4361150334701575e-05, - "loss": 0.9914, + "learning_rate": 9.880249522026436e-06, + "loss": 1.0479, "step": 13257 }, { - "epoch": 0.37569780951571313, + "epoch": 0.5187416855779012, "grad_norm": 0.0, - "learning_rate": 1.4360324406104086e-05, - "loss": 0.9372, + "learning_rate": 9.878982382742654e-06, + "loss": 1.1081, "step": 13258 }, { - "epoch": 0.37572614695797557, + "epoch": 0.5187808122701307, "grad_norm": 0.0, - "learning_rate": 1.435949844077797e-05, - "loss": 1.0135, + "learning_rate": 9.877715245402263e-06, + "loss": 1.0085, "step": 13259 }, { - "epoch": 0.375754484400238, + "epoch": 0.5188199389623601, "grad_norm": 0.0, - "learning_rate": 1.4358672438730178e-05, - "loss": 0.8389, + "learning_rate": 9.876448110025615e-06, + "loss": 1.0284, "step": 13260 }, { - "epoch": 0.3757828218425005, + "epoch": 0.5188590656545896, "grad_norm": 0.0, - "learning_rate": 1.4357846399967668e-05, - "loss": 1.0269, + "learning_rate": 9.875180976633047e-06, + "loss": 1.0051, "step": 13261 }, { - "epoch": 0.37581115928476294, + "epoch": 0.518898192346819, "grad_norm": 0.0, - "learning_rate": 1.43570203244974e-05, - "loss": 0.9464, + "learning_rate": 9.873913845244923e-06, + "loss": 1.0608, "step": 13262 }, { - "epoch": 0.37583949672702543, + "epoch": 0.5189373190390484, "grad_norm": 0.0, - "learning_rate": 1.4356194212326333e-05, - "loss": 0.9683, + "learning_rate": 9.872646715881585e-06, + "loss": 0.8269, "step": 13263 }, { - "epoch": 0.3758678341692879, + "epoch": 0.5189764457312779, "grad_norm": 0.0, - "learning_rate": 1.4355368063461423e-05, - "loss": 0.9147, + "learning_rate": 9.871379588563379e-06, + "loss": 1.0156, "step": 13264 }, { - "epoch": 0.37589617161155037, + "epoch": 0.5190155724235073, "grad_norm": 0.0, - "learning_rate": 1.4354541877909634e-05, - "loss": 0.9709, + "learning_rate": 9.870112463310656e-06, + "loss": 1.1178, "step": 13265 }, { - "epoch": 0.3759245090538128, + "epoch": 0.5190546991157368, "grad_norm": 0.0, - "learning_rate": 1.4353715655677919e-05, - "loss": 0.8683, + "learning_rate": 9.868845340143762e-06, + "loss": 0.9594, "step": 13266 }, { - "epoch": 0.37595284649607524, + "epoch": 0.5190938258079661, "grad_norm": 0.0, - "learning_rate": 1.4352889396773239e-05, - "loss": 0.9453, + "learning_rate": 9.867578219083049e-06, + "loss": 0.9404, "step": 13267 }, { - "epoch": 0.37598118393833774, + "epoch": 0.5191329525001956, "grad_norm": 0.0, - "learning_rate": 1.4352063101202556e-05, - "loss": 0.8909, + "learning_rate": 9.866311100148865e-06, + "loss": 1.1342, "step": 13268 }, { - "epoch": 0.3760095213806002, + "epoch": 0.519172079192425, "grad_norm": 0.0, - "learning_rate": 1.435123676897283e-05, - "loss": 0.8617, + "learning_rate": 9.865043983361557e-06, + "loss": 1.1282, "step": 13269 }, { - "epoch": 0.37603785882286267, + "epoch": 0.5192112058846545, "grad_norm": 0.0, - "learning_rate": 1.4350410400091022e-05, - "loss": 0.8751, + "learning_rate": 9.86377686874147e-06, + "loss": 1.0995, "step": 13270 }, { - "epoch": 0.3760661962651251, + "epoch": 0.5192503325768839, "grad_norm": 0.0, - "learning_rate": 1.4349583994564088e-05, - "loss": 0.9659, + "learning_rate": 9.86250975630896e-06, + "loss": 1.0315, "step": 13271 }, { - "epoch": 0.37609453370738755, + "epoch": 0.5192894592691134, "grad_norm": 0.0, - "learning_rate": 1.4348757552398997e-05, - "loss": 0.9612, + "learning_rate": 9.86124264608437e-06, + "loss": 1.0271, "step": 13272 }, { - "epoch": 0.37612287114965004, + "epoch": 0.5193285859613428, "grad_norm": 0.0, - "learning_rate": 1.4347931073602706e-05, - "loss": 1.0051, + "learning_rate": 9.85997553808805e-06, + "loss": 0.9989, "step": 13273 }, { - "epoch": 0.3761512085919125, + "epoch": 0.5193677126535723, "grad_norm": 0.0, - "learning_rate": 1.4347104558182174e-05, - "loss": 0.8153, + "learning_rate": 9.858708432340347e-06, + "loss": 1.1517, "step": 13274 }, { - "epoch": 0.37617954603417497, + "epoch": 0.5194068393458017, "grad_norm": 0.0, - "learning_rate": 1.4346278006144365e-05, - "loss": 0.8473, + "learning_rate": 9.857441328861606e-06, + "loss": 1.0674, "step": 13275 }, { - "epoch": 0.3762078834764374, + "epoch": 0.5194459660380312, "grad_norm": 0.0, - "learning_rate": 1.4345451417496246e-05, - "loss": 0.9475, + "learning_rate": 9.856174227672183e-06, + "loss": 1.0123, "step": 13276 }, { - "epoch": 0.3762362209186999, + "epoch": 0.5194850927302606, "grad_norm": 0.0, - "learning_rate": 1.4344624792244776e-05, - "loss": 0.9852, + "learning_rate": 9.85490712879242e-06, + "loss": 1.035, "step": 13277 }, { - "epoch": 0.37626455836096234, + "epoch": 0.51952421942249, "grad_norm": 0.0, - "learning_rate": 1.434379813039692e-05, - "loss": 0.9446, + "learning_rate": 9.853640032242668e-06, + "loss": 1.0551, "step": 13278 }, { - "epoch": 0.3762928958032248, + "epoch": 0.5195633461147194, "grad_norm": 0.0, - "learning_rate": 1.4342971431959634e-05, - "loss": 0.9765, + "learning_rate": 9.85237293804327e-06, + "loss": 1.0, "step": 13279 }, { - "epoch": 0.3763212332454873, + "epoch": 0.5196024728069489, "grad_norm": 0.0, - "learning_rate": 1.4342144696939888e-05, - "loss": 0.8934, + "learning_rate": 9.85110584621458e-06, + "loss": 1.0133, "step": 13280 }, { - "epoch": 0.3763495706877497, + "epoch": 0.5196415994991783, "grad_norm": 0.0, - "learning_rate": 1.4341317925344646e-05, - "loss": 0.91, + "learning_rate": 9.849838756776946e-06, + "loss": 1.0622, "step": 13281 }, { - "epoch": 0.3763779081300122, + "epoch": 0.5196807261914078, "grad_norm": 0.0, - "learning_rate": 1.4340491117180872e-05, - "loss": 0.9615, + "learning_rate": 9.848571669750711e-06, + "loss": 0.9855, "step": 13282 }, { - "epoch": 0.37640624557227464, + "epoch": 0.5197198528836372, "grad_norm": 0.0, - "learning_rate": 1.433966427245553e-05, - "loss": 0.846, + "learning_rate": 9.847304585156222e-06, + "loss": 1.0373, "step": 13283 }, { - "epoch": 0.3764345830145371, + "epoch": 0.5197589795758667, "grad_norm": 0.0, - "learning_rate": 1.4338837391175582e-05, - "loss": 0.9342, + "learning_rate": 9.846037503013834e-06, + "loss": 1.0197, "step": 13284 }, { - "epoch": 0.3764629204567996, + "epoch": 0.5197981062680961, "grad_norm": 0.0, - "learning_rate": 1.4338010473348e-05, - "loss": 0.8499, + "learning_rate": 9.844770423343893e-06, + "loss": 1.0547, "step": 13285 }, { - "epoch": 0.376491257899062, + "epoch": 0.5198372329603256, "grad_norm": 0.0, - "learning_rate": 1.4337183518979739e-05, - "loss": 0.9824, + "learning_rate": 9.843503346166741e-06, + "loss": 1.0652, "step": 13286 }, { - "epoch": 0.3765195953413245, + "epoch": 0.519876359652555, "grad_norm": 0.0, - "learning_rate": 1.433635652807777e-05, - "loss": 0.9536, + "learning_rate": 9.84223627150273e-06, + "loss": 1.0619, "step": 13287 }, { - "epoch": 0.37654793278358695, + "epoch": 0.5199154863447845, "grad_norm": 0.0, - "learning_rate": 1.4335529500649065e-05, - "loss": 0.9361, + "learning_rate": 9.840969199372204e-06, + "loss": 1.0455, "step": 13288 }, { - "epoch": 0.37657627022584944, + "epoch": 0.5199546130370138, "grad_norm": 0.0, - "learning_rate": 1.4334702436700583e-05, - "loss": 0.9619, + "learning_rate": 9.839702129795518e-06, + "loss": 1.0558, "step": 13289 }, { - "epoch": 0.3766046076681119, + "epoch": 0.5199937397292433, "grad_norm": 0.0, - "learning_rate": 1.4333875336239293e-05, - "loss": 1.0034, + "learning_rate": 9.838435062793013e-06, + "loss": 1.1156, "step": 13290 }, { - "epoch": 0.3766329451103743, + "epoch": 0.5200328664214727, "grad_norm": 0.0, - "learning_rate": 1.4333048199272161e-05, - "loss": 0.9419, + "learning_rate": 9.83716799838504e-06, + "loss": 1.0486, "step": 13291 }, { - "epoch": 0.3766612825526368, + "epoch": 0.5200719931137021, "grad_norm": 0.0, - "learning_rate": 1.4332221025806157e-05, - "loss": 0.9728, + "learning_rate": 9.835900936591941e-06, + "loss": 1.1653, "step": 13292 }, { - "epoch": 0.37668961999489925, + "epoch": 0.5201111198059316, "grad_norm": 0.0, - "learning_rate": 1.4331393815848242e-05, - "loss": 0.8792, + "learning_rate": 9.83463387743407e-06, + "loss": 1.1414, "step": 13293 }, { - "epoch": 0.37671795743716174, + "epoch": 0.520150246498161, "grad_norm": 0.0, - "learning_rate": 1.4330566569405393e-05, - "loss": 0.8996, + "learning_rate": 9.833366820931771e-06, + "loss": 1.0617, "step": 13294 }, { - "epoch": 0.3767462948794242, + "epoch": 0.5201893731903905, "grad_norm": 0.0, - "learning_rate": 1.432973928648457e-05, - "loss": 1.0204, + "learning_rate": 9.832099767105393e-06, + "loss": 1.1719, "step": 13295 }, { - "epoch": 0.3767746323216866, + "epoch": 0.5202284998826199, "grad_norm": 0.0, - "learning_rate": 1.4328911967092748e-05, - "loss": 1.0206, + "learning_rate": 9.830832715975283e-06, + "loss": 1.0641, "step": 13296 }, { - "epoch": 0.3768029697639491, + "epoch": 0.5202676265748494, "grad_norm": 0.0, - "learning_rate": 1.4328084611236892e-05, - "loss": 0.9265, + "learning_rate": 9.829565667561786e-06, + "loss": 1.1523, "step": 13297 }, { - "epoch": 0.37683130720621155, + "epoch": 0.5203067532670788, "grad_norm": 0.0, - "learning_rate": 1.4327257218923976e-05, - "loss": 1.0078, + "learning_rate": 9.828298621885252e-06, + "loss": 1.047, "step": 13298 }, { - "epoch": 0.37685964464847405, + "epoch": 0.5203458799593083, "grad_norm": 0.0, - "learning_rate": 1.4326429790160958e-05, - "loss": 0.8563, + "learning_rate": 9.827031578966026e-06, + "loss": 1.0435, "step": 13299 }, { - "epoch": 0.3768879820907365, + "epoch": 0.5203850066515376, "grad_norm": 0.0, - "learning_rate": 1.432560232495482e-05, - "loss": 0.9353, + "learning_rate": 9.825764538824454e-06, + "loss": 0.9037, "step": 13300 }, { - "epoch": 0.376916319532999, + "epoch": 0.5204241333437671, "grad_norm": 0.0, - "learning_rate": 1.4324774823312526e-05, - "loss": 0.9602, + "learning_rate": 9.824497501480887e-06, + "loss": 1.0692, "step": 13301 }, { - "epoch": 0.3769446569752614, + "epoch": 0.5204632600359965, "grad_norm": 0.0, - "learning_rate": 1.4323947285241049e-05, - "loss": 1.0549, + "learning_rate": 9.823230466955672e-06, + "loss": 0.8978, "step": 13302 }, { - "epoch": 0.37697299441752385, + "epoch": 0.520502386728226, "grad_norm": 0.0, - "learning_rate": 1.4323119710747357e-05, - "loss": 0.9128, + "learning_rate": 9.821963435269155e-06, + "loss": 0.997, "step": 13303 }, { - "epoch": 0.37700133185978635, + "epoch": 0.5205415134204554, "grad_norm": 0.0, - "learning_rate": 1.4322292099838425e-05, - "loss": 0.9816, + "learning_rate": 9.82069640644168e-06, + "loss": 1.0482, "step": 13304 }, { - "epoch": 0.3770296693020488, + "epoch": 0.5205806401126849, "grad_norm": 0.0, - "learning_rate": 1.432146445252122e-05, - "loss": 1.0025, + "learning_rate": 9.819429380493597e-06, + "loss": 1.0984, "step": 13305 }, { - "epoch": 0.3770580067443113, + "epoch": 0.5206197668049143, "grad_norm": 0.0, - "learning_rate": 1.4320636768802712e-05, - "loss": 0.9252, + "learning_rate": 9.818162357445247e-06, + "loss": 1.06, "step": 13306 }, { - "epoch": 0.3770863441865737, + "epoch": 0.5206588934971438, "grad_norm": 0.0, - "learning_rate": 1.4319809048689879e-05, - "loss": 0.9631, + "learning_rate": 9.816895337316985e-06, + "loss": 0.9708, "step": 13307 }, { - "epoch": 0.37711468162883616, + "epoch": 0.5206980201893732, "grad_norm": 0.0, - "learning_rate": 1.4318981292189688e-05, - "loss": 0.8575, + "learning_rate": 9.815628320129156e-06, + "loss": 1.0683, "step": 13308 }, { - "epoch": 0.37714301907109865, + "epoch": 0.5207371468816027, "grad_norm": 0.0, - "learning_rate": 1.4318153499309118e-05, - "loss": 0.8632, + "learning_rate": 9.814361305902105e-06, + "loss": 1.1952, "step": 13309 }, { - "epoch": 0.3771713565133611, + "epoch": 0.520776273573832, "grad_norm": 0.0, - "learning_rate": 1.4317325670055136e-05, - "loss": 1.0614, + "learning_rate": 9.813094294656175e-06, + "loss": 0.9686, "step": 13310 }, { - "epoch": 0.3771996939556236, + "epoch": 0.5208154002660615, "grad_norm": 0.0, - "learning_rate": 1.4316497804434714e-05, - "loss": 0.8663, + "learning_rate": 9.811827286411721e-06, + "loss": 0.9705, "step": 13311 }, { - "epoch": 0.377228031397886, + "epoch": 0.5208545269582909, "grad_norm": 0.0, - "learning_rate": 1.4315669902454832e-05, - "loss": 0.9248, + "learning_rate": 9.810560281189085e-06, + "loss": 0.8941, "step": 13312 }, { - "epoch": 0.3772563688401485, + "epoch": 0.5208936536505204, "grad_norm": 0.0, - "learning_rate": 1.4314841964122455e-05, - "loss": 0.9196, + "learning_rate": 9.809293279008613e-06, + "loss": 1.1019, "step": 13313 }, { - "epoch": 0.37728470628241095, + "epoch": 0.5209327803427498, "grad_norm": 0.0, - "learning_rate": 1.4314013989444566e-05, - "loss": 1.019, + "learning_rate": 9.808026279890651e-06, + "loss": 1.0503, "step": 13314 }, { - "epoch": 0.3773130437246734, + "epoch": 0.5209719070349793, "grad_norm": 0.0, - "learning_rate": 1.4313185978428135e-05, - "loss": 0.9443, + "learning_rate": 9.806759283855542e-06, + "loss": 0.9835, "step": 13315 }, { - "epoch": 0.3773413811669359, + "epoch": 0.5210110337272087, "grad_norm": 0.0, - "learning_rate": 1.4312357931080135e-05, - "loss": 0.9187, + "learning_rate": 9.805492290923643e-06, + "loss": 1.1437, "step": 13316 }, { - "epoch": 0.3773697186091983, + "epoch": 0.5210501604194382, "grad_norm": 0.0, - "learning_rate": 1.4311529847407544e-05, - "loss": 0.9116, + "learning_rate": 9.804225301115292e-06, + "loss": 1.0007, "step": 13317 }, { - "epoch": 0.3773980560514608, + "epoch": 0.5210892871116676, "grad_norm": 0.0, - "learning_rate": 1.4310701727417336e-05, - "loss": 0.913, + "learning_rate": 9.802958314450839e-06, + "loss": 0.9272, "step": 13318 }, { - "epoch": 0.37742639349372326, + "epoch": 0.5211284138038971, "grad_norm": 0.0, - "learning_rate": 1.4309873571116486e-05, - "loss": 0.8489, + "learning_rate": 9.801691330950623e-06, + "loss": 1.0532, "step": 13319 }, { - "epoch": 0.3774547309359857, + "epoch": 0.5211675404961265, "grad_norm": 0.0, - "learning_rate": 1.430904537851197e-05, - "loss": 0.998, + "learning_rate": 9.800424350635e-06, + "loss": 1.1468, "step": 13320 }, { - "epoch": 0.3774830683782482, + "epoch": 0.5212066671883558, "grad_norm": 0.0, - "learning_rate": 1.4308217149610767e-05, - "loss": 0.9741, + "learning_rate": 9.799157373524313e-06, + "loss": 1.1008, "step": 13321 }, { - "epoch": 0.3775114058205106, + "epoch": 0.5212457938805853, "grad_norm": 0.0, - "learning_rate": 1.4307388884419848e-05, - "loss": 0.9803, + "learning_rate": 9.797890399638907e-06, + "loss": 1.0078, "step": 13322 }, { - "epoch": 0.3775397432627731, + "epoch": 0.5212849205728147, "grad_norm": 0.0, - "learning_rate": 1.4306560582946194e-05, - "loss": 0.8823, + "learning_rate": 9.796623428999126e-06, + "loss": 1.084, "step": 13323 }, { - "epoch": 0.37756808070503556, + "epoch": 0.5213240472650442, "grad_norm": 0.0, - "learning_rate": 1.4305732245196782e-05, - "loss": 1.0541, + "learning_rate": 9.795356461625317e-06, + "loss": 1.1194, "step": 13324 }, { - "epoch": 0.37759641814729805, + "epoch": 0.5213631739572736, "grad_norm": 0.0, - "learning_rate": 1.4304903871178588e-05, - "loss": 1.0651, + "learning_rate": 9.794089497537827e-06, + "loss": 1.0184, "step": 13325 }, { - "epoch": 0.3776247555895605, + "epoch": 0.5214023006495031, "grad_norm": 0.0, - "learning_rate": 1.430407546089859e-05, - "loss": 0.8833, + "learning_rate": 9.792822536757004e-06, + "loss": 0.9861, "step": 13326 }, { - "epoch": 0.37765309303182293, + "epoch": 0.5214414273417325, "grad_norm": 0.0, - "learning_rate": 1.4303247014363765e-05, - "loss": 0.9199, + "learning_rate": 9.791555579303192e-06, + "loss": 1.1956, "step": 13327 }, { - "epoch": 0.3776814304740854, + "epoch": 0.521480554033962, "grad_norm": 0.0, - "learning_rate": 1.4302418531581094e-05, - "loss": 0.8779, + "learning_rate": 9.79028862519673e-06, + "loss": 0.9299, "step": 13328 }, { - "epoch": 0.37770976791634786, + "epoch": 0.5215196807261914, "grad_norm": 0.0, - "learning_rate": 1.4301590012557553e-05, - "loss": 0.9939, + "learning_rate": 9.789021674457977e-06, + "loss": 0.9513, "step": 13329 }, { - "epoch": 0.37773810535861035, + "epoch": 0.5215588074184209, "grad_norm": 0.0, - "learning_rate": 1.4300761457300122e-05, - "loss": 0.9136, + "learning_rate": 9.787754727107269e-06, + "loss": 0.884, "step": 13330 }, { - "epoch": 0.3777664428008728, + "epoch": 0.5215979341106503, "grad_norm": 0.0, - "learning_rate": 1.4299932865815782e-05, - "loss": 0.9818, + "learning_rate": 9.786487783164953e-06, + "loss": 0.9143, "step": 13331 }, { - "epoch": 0.37779478024313523, + "epoch": 0.5216370608028797, "grad_norm": 0.0, - "learning_rate": 1.429910423811151e-05, - "loss": 0.9471, + "learning_rate": 9.785220842651378e-06, + "loss": 1.1066, "step": 13332 }, { - "epoch": 0.3778231176853977, + "epoch": 0.5216761874951091, "grad_norm": 0.0, - "learning_rate": 1.4298275574194285e-05, - "loss": 0.968, + "learning_rate": 9.783953905586883e-06, + "loss": 1.0132, "step": 13333 }, { - "epoch": 0.37785145512766016, + "epoch": 0.5217153141873386, "grad_norm": 0.0, - "learning_rate": 1.429744687407109e-05, - "loss": 0.9655, + "learning_rate": 9.78268697199182e-06, + "loss": 1.0398, "step": 13334 }, { - "epoch": 0.37787979256992266, + "epoch": 0.521754440879568, "grad_norm": 0.0, - "learning_rate": 1.4296618137748903e-05, - "loss": 0.9868, + "learning_rate": 9.781420041886535e-06, + "loss": 1.1384, "step": 13335 }, { - "epoch": 0.3779081300121851, + "epoch": 0.5217935675717975, "grad_norm": 0.0, - "learning_rate": 1.429578936523471e-05, - "loss": 0.886, + "learning_rate": 9.780153115291367e-06, + "loss": 0.9434, "step": 13336 }, { - "epoch": 0.3779364674544476, + "epoch": 0.5218326942640269, "grad_norm": 0.0, - "learning_rate": 1.4294960556535482e-05, - "loss": 0.8686, + "learning_rate": 9.778886192226664e-06, + "loss": 1.0681, "step": 13337 }, { - "epoch": 0.37796480489671, + "epoch": 0.5218718209562564, "grad_norm": 0.0, - "learning_rate": 1.429413171165821e-05, - "loss": 1.0647, + "learning_rate": 9.777619272712774e-06, + "loss": 1.0912, "step": 13338 }, { - "epoch": 0.37799314233897247, + "epoch": 0.5219109476484858, "grad_norm": 0.0, - "learning_rate": 1.4293302830609869e-05, - "loss": 0.8346, + "learning_rate": 9.776352356770037e-06, + "loss": 1.0007, "step": 13339 }, { - "epoch": 0.37802147978123496, + "epoch": 0.5219500743407153, "grad_norm": 0.0, - "learning_rate": 1.4292473913397449e-05, - "loss": 0.797, + "learning_rate": 9.775085444418802e-06, + "loss": 1.0829, "step": 13340 }, { - "epoch": 0.3780498172234974, + "epoch": 0.5219892010329447, "grad_norm": 0.0, - "learning_rate": 1.4291644960027921e-05, - "loss": 1.0209, + "learning_rate": 9.773818535679413e-06, + "loss": 1.0835, "step": 13341 }, { - "epoch": 0.3780781546657599, + "epoch": 0.5220283277251742, "grad_norm": 0.0, - "learning_rate": 1.4290815970508279e-05, - "loss": 1.0938, + "learning_rate": 9.772551630572215e-06, + "loss": 1.0306, "step": 13342 }, { - "epoch": 0.37810649210802233, + "epoch": 0.5220674544174035, "grad_norm": 0.0, - "learning_rate": 1.42899869448455e-05, - "loss": 0.8782, + "learning_rate": 9.771284729117555e-06, + "loss": 0.9677, "step": 13343 }, { - "epoch": 0.37813482955028477, + "epoch": 0.522106581109633, "grad_norm": 0.0, - "learning_rate": 1.4289157883046567e-05, - "loss": 0.7729, + "learning_rate": 9.770017831335774e-06, + "loss": 0.9496, "step": 13344 }, { - "epoch": 0.37816316699254726, + "epoch": 0.5221457078018624, "grad_norm": 0.0, - "learning_rate": 1.4288328785118464e-05, - "loss": 0.9319, + "learning_rate": 9.768750937247216e-06, + "loss": 0.973, "step": 13345 }, { - "epoch": 0.3781915044348097, + "epoch": 0.5221848344940919, "grad_norm": 0.0, - "learning_rate": 1.4287499651068172e-05, - "loss": 0.9633, + "learning_rate": 9.76748404687223e-06, + "loss": 1.0079, "step": 13346 }, { - "epoch": 0.3782198418770722, + "epoch": 0.5222239611863213, "grad_norm": 0.0, - "learning_rate": 1.4286670480902684e-05, - "loss": 0.9284, + "learning_rate": 9.766217160231159e-06, + "loss": 1.1025, "step": 13347 }, { - "epoch": 0.37824817931933463, + "epoch": 0.5222630878785508, "grad_norm": 0.0, - "learning_rate": 1.4285841274628978e-05, - "loss": 0.9524, + "learning_rate": 9.764950277344349e-06, + "loss": 1.0383, "step": 13348 }, { - "epoch": 0.3782765167615971, + "epoch": 0.5223022145707802, "grad_norm": 0.0, - "learning_rate": 1.4285012032254035e-05, - "loss": 0.9793, + "learning_rate": 9.763683398232143e-06, + "loss": 0.9902, "step": 13349 }, { - "epoch": 0.37830485420385956, + "epoch": 0.5223413412630096, "grad_norm": 0.0, - "learning_rate": 1.4284182753784847e-05, - "loss": 0.8743, + "learning_rate": 9.76241652291488e-06, + "loss": 1.0086, "step": 13350 }, { - "epoch": 0.378333191646122, + "epoch": 0.5223804679552391, "grad_norm": 0.0, - "learning_rate": 1.42833534392284e-05, - "loss": 0.8888, + "learning_rate": 9.761149651412915e-06, + "loss": 0.9881, "step": 13351 }, { - "epoch": 0.3783615290883845, + "epoch": 0.5224195946474685, "grad_norm": 0.0, - "learning_rate": 1.4282524088591672e-05, - "loss": 0.8817, + "learning_rate": 9.759882783746588e-06, + "loss": 1.1271, "step": 13352 }, { - "epoch": 0.37838986653064693, + "epoch": 0.522458721339698, "grad_norm": 0.0, - "learning_rate": 1.4281694701881657e-05, - "loss": 0.9336, + "learning_rate": 9.75861591993624e-06, + "loss": 0.9572, "step": 13353 }, { - "epoch": 0.37841820397290943, + "epoch": 0.5224978480319273, "grad_norm": 0.0, - "learning_rate": 1.4280865279105333e-05, - "loss": 0.8349, + "learning_rate": 9.757349060002221e-06, + "loss": 1.0349, "step": 13354 }, { - "epoch": 0.37844654141517187, + "epoch": 0.5225369747241568, "grad_norm": 0.0, - "learning_rate": 1.4280035820269692e-05, - "loss": 0.9802, + "learning_rate": 9.756082203964867e-06, + "loss": 1.0276, "step": 13355 }, { - "epoch": 0.3784748788574343, + "epoch": 0.5225761014163862, "grad_norm": 0.0, - "learning_rate": 1.4279206325381725e-05, - "loss": 0.9141, + "learning_rate": 9.75481535184453e-06, + "loss": 0.9853, "step": 13356 }, { - "epoch": 0.3785032162996968, + "epoch": 0.5226152281086157, "grad_norm": 0.0, - "learning_rate": 1.4278376794448411e-05, - "loss": 0.9021, + "learning_rate": 9.753548503661552e-06, + "loss": 1.0629, "step": 13357 }, { - "epoch": 0.37853155374195924, + "epoch": 0.5226543548008451, "grad_norm": 0.0, - "learning_rate": 1.4277547227476738e-05, - "loss": 0.9084, + "learning_rate": 9.752281659436277e-06, + "loss": 1.1473, "step": 13358 }, { - "epoch": 0.37855989118422173, + "epoch": 0.5226934814930746, "grad_norm": 0.0, - "learning_rate": 1.4276717624473697e-05, - "loss": 0.9978, + "learning_rate": 9.751014819189042e-06, + "loss": 1.0525, "step": 13359 }, { - "epoch": 0.37858822862648417, + "epoch": 0.522732608185304, "grad_norm": 0.0, - "learning_rate": 1.4275887985446279e-05, - "loss": 0.8109, + "learning_rate": 9.749747982940203e-06, + "loss": 0.9429, "step": 13360 }, { - "epoch": 0.3786165660687466, + "epoch": 0.5227717348775335, "grad_norm": 0.0, - "learning_rate": 1.4275058310401466e-05, - "loss": 0.8697, + "learning_rate": 9.748481150710097e-06, + "loss": 1.0714, "step": 13361 }, { - "epoch": 0.3786449035110091, + "epoch": 0.5228108615697629, "grad_norm": 0.0, - "learning_rate": 1.4274228599346249e-05, - "loss": 0.8902, + "learning_rate": 9.747214322519069e-06, + "loss": 1.0406, "step": 13362 }, { - "epoch": 0.37867324095327154, + "epoch": 0.5228499882619924, "grad_norm": 0.0, - "learning_rate": 1.4273398852287619e-05, - "loss": 0.9407, + "learning_rate": 9.745947498387462e-06, + "loss": 1.0464, "step": 13363 }, { - "epoch": 0.37870157839553403, + "epoch": 0.5228891149542217, "grad_norm": 0.0, - "learning_rate": 1.4272569069232563e-05, - "loss": 0.8585, + "learning_rate": 9.744680678335614e-06, + "loss": 0.9787, "step": 13364 }, { - "epoch": 0.37872991583779647, + "epoch": 0.5229282416464512, "grad_norm": 0.0, - "learning_rate": 1.427173925018807e-05, - "loss": 0.9808, + "learning_rate": 9.74341386238388e-06, + "loss": 1.0802, "step": 13365 }, { - "epoch": 0.37875825328005897, + "epoch": 0.5229673683386806, "grad_norm": 0.0, - "learning_rate": 1.427090939516113e-05, - "loss": 0.9787, + "learning_rate": 9.7421470505526e-06, + "loss": 0.9481, "step": 13366 }, { - "epoch": 0.3787865907223214, + "epoch": 0.5230064950309101, "grad_norm": 0.0, - "learning_rate": 1.4270079504158738e-05, - "loss": 0.945, + "learning_rate": 9.740880242862112e-06, + "loss": 0.965, "step": 13367 }, { - "epoch": 0.37881492816458384, + "epoch": 0.5230456217231395, "grad_norm": 0.0, - "learning_rate": 1.4269249577187877e-05, - "loss": 0.9392, + "learning_rate": 9.73961343933276e-06, + "loss": 1.0358, "step": 13368 }, { - "epoch": 0.37884326560684634, + "epoch": 0.523084748415369, "grad_norm": 0.0, - "learning_rate": 1.4268419614255545e-05, - "loss": 0.935, + "learning_rate": 9.738346639984893e-06, + "loss": 1.0222, "step": 13369 }, { - "epoch": 0.3788716030491088, + "epoch": 0.5231238751075984, "grad_norm": 0.0, - "learning_rate": 1.4267589615368727e-05, - "loss": 1.0149, + "learning_rate": 9.737079844838852e-06, + "loss": 1.0225, "step": 13370 }, { - "epoch": 0.37889994049137127, + "epoch": 0.5231630017998279, "grad_norm": 0.0, - "learning_rate": 1.4266759580534417e-05, - "loss": 0.9401, + "learning_rate": 9.73581305391498e-06, + "loss": 1.0321, "step": 13371 }, { - "epoch": 0.3789282779336337, + "epoch": 0.5232021284920573, "grad_norm": 0.0, - "learning_rate": 1.4265929509759606e-05, - "loss": 0.9642, + "learning_rate": 9.734546267233618e-06, + "loss": 1.1486, "step": 13372 }, { - "epoch": 0.37895661537589614, + "epoch": 0.5232412551842868, "grad_norm": 0.0, - "learning_rate": 1.4265099403051291e-05, - "loss": 0.9737, + "learning_rate": 9.733279484815108e-06, + "loss": 0.9899, "step": 13373 }, { - "epoch": 0.37898495281815864, + "epoch": 0.5232803818765162, "grad_norm": 0.0, - "learning_rate": 1.4264269260416455e-05, - "loss": 0.9244, + "learning_rate": 9.732012706679797e-06, + "loss": 1.0353, "step": 13374 }, { - "epoch": 0.3790132902604211, + "epoch": 0.5233195085687457, "grad_norm": 0.0, - "learning_rate": 1.4263439081862095e-05, - "loss": 0.9953, + "learning_rate": 9.730745932848027e-06, + "loss": 1.0204, "step": 13375 }, { - "epoch": 0.37904162770268357, + "epoch": 0.523358635260975, "grad_norm": 0.0, - "learning_rate": 1.426260886739521e-05, - "loss": 0.9547, + "learning_rate": 9.72947916334014e-06, + "loss": 1.0664, "step": 13376 }, { - "epoch": 0.379069965144946, + "epoch": 0.5233977619532044, "grad_norm": 0.0, - "learning_rate": 1.4261778617022786e-05, - "loss": 0.7752, + "learning_rate": 9.728212398176476e-06, + "loss": 1.0165, "step": 13377 }, { - "epoch": 0.3790983025872085, + "epoch": 0.5234368886454339, "grad_norm": 0.0, - "learning_rate": 1.4260948330751814e-05, - "loss": 0.959, + "learning_rate": 9.726945637377381e-06, + "loss": 0.9115, "step": 13378 }, { - "epoch": 0.37912664002947094, + "epoch": 0.5234760153376633, "grad_norm": 0.0, - "learning_rate": 1.4260118008589294e-05, - "loss": 0.9411, + "learning_rate": 9.7256788809632e-06, + "loss": 0.9649, "step": 13379 }, { - "epoch": 0.3791549774717334, + "epoch": 0.5235151420298928, "grad_norm": 0.0, - "learning_rate": 1.4259287650542217e-05, - "loss": 0.9288, + "learning_rate": 9.72441212895427e-06, + "loss": 1.0578, "step": 13380 }, { - "epoch": 0.3791833149139959, + "epoch": 0.5235542687221222, "grad_norm": 0.0, - "learning_rate": 1.4258457256617581e-05, - "loss": 0.9741, + "learning_rate": 9.723145381370938e-06, + "loss": 1.0457, "step": 13381 }, { - "epoch": 0.3792116523562583, + "epoch": 0.5235933954143517, "grad_norm": 0.0, - "learning_rate": 1.425762682682238e-05, - "loss": 0.8005, + "learning_rate": 9.721878638233541e-06, + "loss": 0.9982, "step": 13382 }, { - "epoch": 0.3792399897985208, + "epoch": 0.5236325221065811, "grad_norm": 0.0, - "learning_rate": 1.4256796361163603e-05, - "loss": 0.8415, + "learning_rate": 9.720611899562427e-06, + "loss": 1.0658, "step": 13383 }, { - "epoch": 0.37926832724078324, + "epoch": 0.5236716487988106, "grad_norm": 0.0, - "learning_rate": 1.425596585964825e-05, - "loss": 0.9186, + "learning_rate": 9.719345165377933e-06, + "loss": 1.1505, "step": 13384 }, { - "epoch": 0.3792966646830457, + "epoch": 0.52371077549104, "grad_norm": 0.0, - "learning_rate": 1.4255135322283318e-05, - "loss": 0.8815, + "learning_rate": 9.718078435700408e-06, + "loss": 1.0157, "step": 13385 }, { - "epoch": 0.3793250021253082, + "epoch": 0.5237499021832694, "grad_norm": 0.0, - "learning_rate": 1.42543047490758e-05, - "loss": 0.9653, + "learning_rate": 9.716811710550186e-06, + "loss": 1.1049, "step": 13386 }, { - "epoch": 0.3793533395675706, + "epoch": 0.5237890288754988, "grad_norm": 0.0, - "learning_rate": 1.425347414003269e-05, - "loss": 1.1187, + "learning_rate": 9.715544989947616e-06, + "loss": 1.0383, "step": 13387 }, { - "epoch": 0.3793816770098331, + "epoch": 0.5238281555677283, "grad_norm": 0.0, - "learning_rate": 1.4252643495160994e-05, - "loss": 0.8374, + "learning_rate": 9.714278273913038e-06, + "loss": 0.9782, "step": 13388 }, { - "epoch": 0.37941001445209555, + "epoch": 0.5238672822599577, "grad_norm": 0.0, - "learning_rate": 1.4251812814467701e-05, - "loss": 0.8257, + "learning_rate": 9.713011562466789e-06, + "loss": 1.0641, "step": 13389 }, { - "epoch": 0.37943835189435804, + "epoch": 0.5239064089521872, "grad_norm": 0.0, - "learning_rate": 1.4250982097959806e-05, - "loss": 0.8778, + "learning_rate": 9.711744855629218e-06, + "loss": 1.0073, "step": 13390 }, { - "epoch": 0.3794666893366205, + "epoch": 0.5239455356444166, "grad_norm": 0.0, - "learning_rate": 1.4250151345644314e-05, - "loss": 0.9938, + "learning_rate": 9.71047815342066e-06, + "loss": 0.969, "step": 13391 }, { - "epoch": 0.3794950267788829, + "epoch": 0.5239846623366461, "grad_norm": 0.0, - "learning_rate": 1.4249320557528217e-05, - "loss": 0.9743, + "learning_rate": 9.709211455861464e-06, + "loss": 0.9543, "step": 13392 }, { - "epoch": 0.3795233642211454, + "epoch": 0.5240237890288755, "grad_norm": 0.0, - "learning_rate": 1.4248489733618516e-05, - "loss": 1.0204, + "learning_rate": 9.707944762971965e-06, + "loss": 0.9366, "step": 13393 }, { - "epoch": 0.37955170166340785, + "epoch": 0.524062915721105, "grad_norm": 0.0, - "learning_rate": 1.424765887392221e-05, - "loss": 0.7941, + "learning_rate": 9.70667807477251e-06, + "loss": 1.1548, "step": 13394 }, { - "epoch": 0.37958003910567034, + "epoch": 0.5241020424133344, "grad_norm": 0.0, - "learning_rate": 1.4246827978446293e-05, - "loss": 1.0048, + "learning_rate": 9.705411391283433e-06, + "loss": 0.9674, "step": 13395 }, { - "epoch": 0.3796083765479328, + "epoch": 0.5241411691055639, "grad_norm": 0.0, - "learning_rate": 1.4245997047197767e-05, - "loss": 0.9844, + "learning_rate": 9.704144712525082e-06, + "loss": 1.0587, "step": 13396 }, { - "epoch": 0.3796367139901952, + "epoch": 0.5241802957977932, "grad_norm": 0.0, - "learning_rate": 1.4245166080183633e-05, - "loss": 1.0286, + "learning_rate": 9.702878038517798e-06, + "loss": 1.0317, "step": 13397 }, { - "epoch": 0.3796650514324577, + "epoch": 0.5242194224900227, "grad_norm": 0.0, - "learning_rate": 1.4244335077410889e-05, - "loss": 1.014, + "learning_rate": 9.70161136928192e-06, + "loss": 1.0591, "step": 13398 }, { - "epoch": 0.37969338887472015, + "epoch": 0.5242585491822521, "grad_norm": 0.0, - "learning_rate": 1.4243504038886531e-05, - "loss": 1.051, + "learning_rate": 9.700344704837786e-06, + "loss": 1.0352, "step": 13399 }, { - "epoch": 0.37972172631698264, + "epoch": 0.5242976758744816, "grad_norm": 0.0, - "learning_rate": 1.4242672964617565e-05, - "loss": 1.0588, + "learning_rate": 9.699078045205743e-06, + "loss": 1.0163, "step": 13400 }, { - "epoch": 0.3797500637592451, + "epoch": 0.524336802566711, "grad_norm": 0.0, - "learning_rate": 1.4241841854610992e-05, - "loss": 0.883, + "learning_rate": 9.69781139040613e-06, + "loss": 1.0769, "step": 13401 }, { - "epoch": 0.3797784012015076, + "epoch": 0.5243759292589405, "grad_norm": 0.0, - "learning_rate": 1.4241010708873808e-05, - "loss": 0.9061, + "learning_rate": 9.696544740459289e-06, + "loss": 1.0408, "step": 13402 }, { - "epoch": 0.37980673864377, + "epoch": 0.5244150559511699, "grad_norm": 0.0, - "learning_rate": 1.4240179527413014e-05, - "loss": 0.909, + "learning_rate": 9.695278095385558e-06, + "loss": 1.0383, "step": 13403 }, { - "epoch": 0.37983507608603245, + "epoch": 0.5244541826433994, "grad_norm": 0.0, - "learning_rate": 1.4239348310235613e-05, - "loss": 0.9006, + "learning_rate": 9.694011455205273e-06, + "loss": 0.9219, "step": 13404 }, { - "epoch": 0.37986341352829495, + "epoch": 0.5244933093356288, "grad_norm": 0.0, - "learning_rate": 1.4238517057348609e-05, - "loss": 0.949, + "learning_rate": 9.692744819938787e-06, + "loss": 0.9687, "step": 13405 }, { - "epoch": 0.3798917509705574, + "epoch": 0.5245324360278582, "grad_norm": 0.0, - "learning_rate": 1.4237685768759002e-05, - "loss": 0.9909, + "learning_rate": 9.691478189606433e-06, + "loss": 1.0564, "step": 13406 }, { - "epoch": 0.3799200884128199, + "epoch": 0.5245715627200876, "grad_norm": 0.0, - "learning_rate": 1.4236854444473793e-05, - "loss": 0.977, + "learning_rate": 9.690211564228554e-06, + "loss": 1.0135, "step": 13407 }, { - "epoch": 0.3799484258550823, + "epoch": 0.524610689412317, "grad_norm": 0.0, - "learning_rate": 1.4236023084499987e-05, - "loss": 1.08, + "learning_rate": 9.688944943825484e-06, + "loss": 0.9753, "step": 13408 }, { - "epoch": 0.37997676329734476, + "epoch": 0.5246498161045465, "grad_norm": 0.0, - "learning_rate": 1.4235191688844585e-05, - "loss": 1.0083, + "learning_rate": 9.687678328417574e-06, + "loss": 1.0159, "step": 13409 }, { - "epoch": 0.38000510073960725, + "epoch": 0.5246889427967759, "grad_norm": 0.0, - "learning_rate": 1.423436025751459e-05, - "loss": 0.9315, + "learning_rate": 9.686411718025157e-06, + "loss": 0.9781, "step": 13410 }, { - "epoch": 0.3800334381818697, + "epoch": 0.5247280694890054, "grad_norm": 0.0, - "learning_rate": 1.4233528790517007e-05, - "loss": 1.0037, + "learning_rate": 9.685145112668577e-06, + "loss": 0.94, "step": 13411 }, { - "epoch": 0.3800617756241322, + "epoch": 0.5247671961812348, "grad_norm": 0.0, - "learning_rate": 1.4232697287858836e-05, - "loss": 0.8962, + "learning_rate": 9.68387851236817e-06, + "loss": 1.0387, "step": 13412 }, { - "epoch": 0.3800901130663946, + "epoch": 0.5248063228734643, "grad_norm": 0.0, - "learning_rate": 1.4231865749547086e-05, - "loss": 0.9298, + "learning_rate": 9.682611917144273e-06, + "loss": 1.0047, "step": 13413 }, { - "epoch": 0.3801184505086571, + "epoch": 0.5248454495656937, "grad_norm": 0.0, - "learning_rate": 1.4231034175588762e-05, - "loss": 0.9254, + "learning_rate": 9.681345327017237e-06, + "loss": 0.9439, "step": 13414 }, { - "epoch": 0.38014678795091955, + "epoch": 0.5248845762579232, "grad_norm": 0.0, - "learning_rate": 1.4230202565990865e-05, - "loss": 0.9585, + "learning_rate": 9.680078742007395e-06, + "loss": 1.0185, "step": 13415 }, { - "epoch": 0.380175125393182, + "epoch": 0.5249237029501526, "grad_norm": 0.0, - "learning_rate": 1.4229370920760398e-05, - "loss": 0.8935, + "learning_rate": 9.678812162135087e-06, + "loss": 0.9486, "step": 13416 }, { - "epoch": 0.3802034628354445, + "epoch": 0.5249628296423821, "grad_norm": 0.0, - "learning_rate": 1.422853923990437e-05, - "loss": 1.0182, + "learning_rate": 9.67754558742065e-06, + "loss": 1.0072, "step": 13417 }, { - "epoch": 0.3802318002777069, + "epoch": 0.5250019563346114, "grad_norm": 0.0, - "learning_rate": 1.4227707523429788e-05, - "loss": 0.9208, + "learning_rate": 9.676279017884432e-06, + "loss": 1.086, "step": 13418 }, { - "epoch": 0.3802601377199694, + "epoch": 0.5250410830268409, "grad_norm": 0.0, - "learning_rate": 1.4226875771343656e-05, - "loss": 0.9949, + "learning_rate": 9.675012453546766e-06, + "loss": 1.0746, "step": 13419 }, { - "epoch": 0.38028847516223185, + "epoch": 0.5250802097190703, "grad_norm": 0.0, - "learning_rate": 1.4226043983652975e-05, - "loss": 0.9004, + "learning_rate": 9.673745894427991e-06, + "loss": 1.0146, "step": 13420 }, { - "epoch": 0.3803168126044943, + "epoch": 0.5251193364112998, "grad_norm": 0.0, - "learning_rate": 1.422521216036476e-05, - "loss": 1.0569, + "learning_rate": 9.672479340548451e-06, + "loss": 1.0673, "step": 13421 }, { - "epoch": 0.3803451500467568, + "epoch": 0.5251584631035292, "grad_norm": 0.0, - "learning_rate": 1.4224380301486013e-05, - "loss": 0.9335, + "learning_rate": 9.671212791928476e-06, + "loss": 0.9528, "step": 13422 }, { - "epoch": 0.3803734874890192, + "epoch": 0.5251975897957587, "grad_norm": 0.0, - "learning_rate": 1.4223548407023743e-05, - "loss": 0.8833, + "learning_rate": 9.669946248588418e-06, + "loss": 1.0627, "step": 13423 }, { - "epoch": 0.3804018249312817, + "epoch": 0.5252367164879881, "grad_norm": 0.0, - "learning_rate": 1.4222716476984953e-05, - "loss": 0.9942, + "learning_rate": 9.668679710548608e-06, + "loss": 1.0623, "step": 13424 }, { - "epoch": 0.38043016237354416, + "epoch": 0.5252758431802176, "grad_norm": 0.0, - "learning_rate": 1.4221884511376658e-05, - "loss": 0.9292, + "learning_rate": 9.667413177829387e-06, + "loss": 1.0921, "step": 13425 }, { - "epoch": 0.38045849981580665, + "epoch": 0.525314969872447, "grad_norm": 0.0, - "learning_rate": 1.4221052510205861e-05, - "loss": 0.815, + "learning_rate": 9.666146650451091e-06, + "loss": 1.0302, "step": 13426 }, { - "epoch": 0.3804868372580691, + "epoch": 0.5253540965646765, "grad_norm": 0.0, - "learning_rate": 1.4220220473479574e-05, - "loss": 0.9119, + "learning_rate": 9.664880128434064e-06, + "loss": 1.0195, "step": 13427 }, { - "epoch": 0.3805151747003315, + "epoch": 0.5253932232569059, "grad_norm": 0.0, - "learning_rate": 1.4219388401204796e-05, - "loss": 0.7516, + "learning_rate": 9.66361361179864e-06, + "loss": 1.0056, "step": 13428 }, { - "epoch": 0.380543512142594, + "epoch": 0.5254323499491353, "grad_norm": 0.0, - "learning_rate": 1.4218556293388548e-05, - "loss": 0.9294, + "learning_rate": 9.662347100565163e-06, + "loss": 1.1539, "step": 13429 }, { - "epoch": 0.38057184958485646, + "epoch": 0.5254714766413647, "grad_norm": 0.0, - "learning_rate": 1.4217724150037831e-05, - "loss": 0.9559, + "learning_rate": 9.661080594753967e-06, + "loss": 1.0822, "step": 13430 }, { - "epoch": 0.38060018702711895, + "epoch": 0.5255106033335942, "grad_norm": 0.0, - "learning_rate": 1.4216891971159659e-05, - "loss": 0.8686, + "learning_rate": 9.659814094385391e-06, + "loss": 1.103, "step": 13431 }, { - "epoch": 0.3806285244693814, + "epoch": 0.5255497300258236, "grad_norm": 0.0, - "learning_rate": 1.4216059756761038e-05, - "loss": 0.9278, + "learning_rate": 9.658547599479777e-06, + "loss": 1.0127, "step": 13432 }, { - "epoch": 0.38065686191164383, + "epoch": 0.5255888567180531, "grad_norm": 0.0, - "learning_rate": 1.4215227506848982e-05, - "loss": 1.0894, + "learning_rate": 9.657281110057459e-06, + "loss": 0.9816, "step": 13433 }, { - "epoch": 0.3806851993539063, + "epoch": 0.5256279834102825, "grad_norm": 0.0, - "learning_rate": 1.4214395221430501e-05, - "loss": 0.7963, + "learning_rate": 9.656014626138776e-06, + "loss": 1.1124, "step": 13434 }, { - "epoch": 0.38071353679616876, + "epoch": 0.5256671101025119, "grad_norm": 0.0, - "learning_rate": 1.4213562900512603e-05, - "loss": 0.8544, + "learning_rate": 9.654748147744066e-06, + "loss": 1.2607, "step": 13435 }, { - "epoch": 0.38074187423843125, + "epoch": 0.5257062367947414, "grad_norm": 0.0, - "learning_rate": 1.4212730544102297e-05, - "loss": 0.9548, + "learning_rate": 9.653481674893672e-06, + "loss": 1.121, "step": 13436 }, { - "epoch": 0.3807702116806937, + "epoch": 0.5257453634869708, "grad_norm": 0.0, - "learning_rate": 1.4211898152206598e-05, - "loss": 0.9058, + "learning_rate": 9.652215207607926e-06, + "loss": 1.0402, "step": 13437 }, { - "epoch": 0.3807985491229562, + "epoch": 0.5257844901792003, "grad_norm": 0.0, - "learning_rate": 1.421106572483252e-05, - "loss": 0.9878, + "learning_rate": 9.65094874590717e-06, + "loss": 1.0877, "step": 13438 }, { - "epoch": 0.3808268865652186, + "epoch": 0.5258236168714296, "grad_norm": 0.0, - "learning_rate": 1.421023326198707e-05, - "loss": 0.8499, + "learning_rate": 9.649682289811738e-06, + "loss": 1.0526, "step": 13439 }, { - "epoch": 0.38085522400748106, + "epoch": 0.5258627435636591, "grad_norm": 0.0, - "learning_rate": 1.4209400763677263e-05, - "loss": 0.9434, + "learning_rate": 9.648415839341966e-06, + "loss": 1.0598, "step": 13440 }, { - "epoch": 0.38088356144974356, + "epoch": 0.5259018702558885, "grad_norm": 0.0, - "learning_rate": 1.4208568229910106e-05, - "loss": 0.9415, + "learning_rate": 9.6471493945182e-06, + "loss": 1.0188, "step": 13441 }, { - "epoch": 0.380911898892006, + "epoch": 0.525940996948118, "grad_norm": 0.0, - "learning_rate": 1.4207735660692621e-05, - "loss": 0.9389, + "learning_rate": 9.645882955360771e-06, + "loss": 1.136, "step": 13442 }, { - "epoch": 0.3809402363342685, + "epoch": 0.5259801236403474, "grad_norm": 0.0, - "learning_rate": 1.4206903056031813e-05, - "loss": 0.9811, + "learning_rate": 9.644616521890019e-06, + "loss": 0.9977, "step": 13443 }, { - "epoch": 0.3809685737765309, + "epoch": 0.5260192503325769, "grad_norm": 0.0, - "learning_rate": 1.4206070415934701e-05, - "loss": 0.9686, + "learning_rate": 9.643350094126275e-06, + "loss": 1.137, "step": 13444 }, { - "epoch": 0.38099691121879337, + "epoch": 0.5260583770248063, "grad_norm": 0.0, - "learning_rate": 1.4205237740408291e-05, - "loss": 1.0095, + "learning_rate": 9.642083672089887e-06, + "loss": 0.9895, "step": 13445 }, { - "epoch": 0.38102524866105586, + "epoch": 0.5260975037170358, "grad_norm": 0.0, - "learning_rate": 1.4204405029459607e-05, - "loss": 0.8549, + "learning_rate": 9.640817255801188e-06, + "loss": 1.0454, "step": 13446 }, { - "epoch": 0.3810535861033183, + "epoch": 0.5261366304092652, "grad_norm": 0.0, - "learning_rate": 1.4203572283095657e-05, - "loss": 0.9744, + "learning_rate": 9.63955084528051e-06, + "loss": 1.1128, "step": 13447 }, { - "epoch": 0.3810819235455808, + "epoch": 0.5261757571014947, "grad_norm": 0.0, - "learning_rate": 1.4202739501323457e-05, - "loss": 0.944, + "learning_rate": 9.638284440548197e-06, + "loss": 1.0148, "step": 13448 }, { - "epoch": 0.38111026098784323, + "epoch": 0.526214883793724, "grad_norm": 0.0, - "learning_rate": 1.420190668415002e-05, - "loss": 0.9939, + "learning_rate": 9.637018041624577e-06, + "loss": 0.9518, "step": 13449 }, { - "epoch": 0.3811385984301057, + "epoch": 0.5262540104859535, "grad_norm": 0.0, - "learning_rate": 1.4201073831582361e-05, - "loss": 0.94, + "learning_rate": 9.635751648529998e-06, + "loss": 0.9245, "step": 13450 }, { - "epoch": 0.38116693587236816, + "epoch": 0.5262931371781829, "grad_norm": 0.0, - "learning_rate": 1.4200240943627501e-05, - "loss": 0.8456, + "learning_rate": 9.63448526128479e-06, + "loss": 0.9378, "step": 13451 }, { - "epoch": 0.3811952733146306, + "epoch": 0.5263322638704124, "grad_norm": 0.0, - "learning_rate": 1.4199408020292451e-05, - "loss": 0.9214, + "learning_rate": 9.633218879909291e-06, + "loss": 0.8673, "step": 13452 }, { - "epoch": 0.3812236107568931, + "epoch": 0.5263713905626418, "grad_norm": 0.0, - "learning_rate": 1.4198575061584225e-05, - "loss": 0.9135, + "learning_rate": 9.631952504423832e-06, + "loss": 1.0368, "step": 13453 }, { - "epoch": 0.38125194819915553, + "epoch": 0.5264105172548713, "grad_norm": 0.0, - "learning_rate": 1.4197742067509845e-05, - "loss": 1.0019, + "learning_rate": 9.630686134848759e-06, + "loss": 1.0889, "step": 13454 }, { - "epoch": 0.381280285641418, + "epoch": 0.5264496439471007, "grad_norm": 0.0, - "learning_rate": 1.4196909038076326e-05, - "loss": 0.9522, + "learning_rate": 9.629419771204405e-06, + "loss": 1.0806, "step": 13455 }, { - "epoch": 0.38130862308368046, + "epoch": 0.5264887706393302, "grad_norm": 0.0, - "learning_rate": 1.419607597329068e-05, - "loss": 0.9001, + "learning_rate": 9.628153413511104e-06, + "loss": 1.0566, "step": 13456 }, { - "epoch": 0.3813369605259429, + "epoch": 0.5265278973315596, "grad_norm": 0.0, - "learning_rate": 1.4195242873159928e-05, - "loss": 0.8361, + "learning_rate": 9.62688706178919e-06, + "loss": 1.0268, "step": 13457 }, { - "epoch": 0.3813652979682054, + "epoch": 0.5265670240237891, "grad_norm": 0.0, - "learning_rate": 1.419440973769109e-05, - "loss": 0.9945, + "learning_rate": 9.625620716059007e-06, + "loss": 0.9509, "step": 13458 }, { - "epoch": 0.38139363541046784, + "epoch": 0.5266061507160185, "grad_norm": 0.0, - "learning_rate": 1.4193576566891181e-05, - "loss": 0.9872, + "learning_rate": 9.624354376340884e-06, + "loss": 1.103, "step": 13459 }, { - "epoch": 0.38142197285273033, + "epoch": 0.526645277408248, "grad_norm": 0.0, - "learning_rate": 1.4192743360767219e-05, - "loss": 0.8165, + "learning_rate": 9.623088042655161e-06, + "loss": 1.0038, "step": 13460 }, { - "epoch": 0.38145031029499277, + "epoch": 0.5266844041004773, "grad_norm": 0.0, - "learning_rate": 1.4191910119326222e-05, - "loss": 0.9708, + "learning_rate": 9.62182171502217e-06, + "loss": 1.0791, "step": 13461 }, { - "epoch": 0.38147864773725526, + "epoch": 0.5267235307927068, "grad_norm": 0.0, - "learning_rate": 1.4191076842575209e-05, - "loss": 0.9453, + "learning_rate": 9.620555393462245e-06, + "loss": 1.1892, "step": 13462 }, { - "epoch": 0.3815069851795177, + "epoch": 0.5267626574849362, "grad_norm": 0.0, - "learning_rate": 1.41902435305212e-05, - "loss": 0.9013, + "learning_rate": 9.61928907799573e-06, + "loss": 0.9563, "step": 13463 }, { - "epoch": 0.38153532262178014, + "epoch": 0.5268017841771656, "grad_norm": 0.0, - "learning_rate": 1.4189410183171214e-05, - "loss": 1.0092, + "learning_rate": 9.618022768642955e-06, + "loss": 1.0838, "step": 13464 }, { - "epoch": 0.38156366006404263, + "epoch": 0.5268409108693951, "grad_norm": 0.0, - "learning_rate": 1.4188576800532268e-05, - "loss": 1.0293, + "learning_rate": 9.616756465424256e-06, + "loss": 1.1167, "step": 13465 }, { - "epoch": 0.38159199750630507, + "epoch": 0.5268800375616245, "grad_norm": 0.0, - "learning_rate": 1.4187743382611388e-05, - "loss": 0.9474, + "learning_rate": 9.615490168359964e-06, + "loss": 1.1088, "step": 13466 }, { - "epoch": 0.38162033494856756, + "epoch": 0.526919164253854, "grad_norm": 0.0, - "learning_rate": 1.418690992941559e-05, - "loss": 0.9033, + "learning_rate": 9.614223877470419e-06, + "loss": 1.0851, "step": 13467 }, { - "epoch": 0.38164867239083, + "epoch": 0.5269582909460834, "grad_norm": 0.0, - "learning_rate": 1.4186076440951895e-05, - "loss": 1.0513, + "learning_rate": 9.612957592775957e-06, + "loss": 1.1522, "step": 13468 }, { - "epoch": 0.38167700983309244, + "epoch": 0.5269974176383129, "grad_norm": 0.0, - "learning_rate": 1.418524291722732e-05, - "loss": 0.9814, + "learning_rate": 9.611691314296913e-06, + "loss": 1.0685, "step": 13469 }, { - "epoch": 0.38170534727535493, + "epoch": 0.5270365443305423, "grad_norm": 0.0, - "learning_rate": 1.4184409358248893e-05, - "loss": 1.0141, + "learning_rate": 9.610425042053618e-06, + "loss": 1.1625, "step": 13470 }, { - "epoch": 0.38173368471761737, + "epoch": 0.5270756710227718, "grad_norm": 0.0, - "learning_rate": 1.418357576402363e-05, - "loss": 0.9221, + "learning_rate": 9.609158776066405e-06, + "loss": 0.9156, "step": 13471 }, { - "epoch": 0.38176202215987987, + "epoch": 0.5271147977150011, "grad_norm": 0.0, - "learning_rate": 1.4182742134558555e-05, - "loss": 0.9165, + "learning_rate": 9.607892516355618e-06, + "loss": 1.1102, "step": 13472 }, { - "epoch": 0.3817903596021423, + "epoch": 0.5271539244072306, "grad_norm": 0.0, - "learning_rate": 1.4181908469860695e-05, - "loss": 0.8625, + "learning_rate": 9.606626262941582e-06, + "loss": 0.9443, "step": 13473 }, { - "epoch": 0.3818186970444048, + "epoch": 0.52719305109946, "grad_norm": 0.0, - "learning_rate": 1.418107476993706e-05, - "loss": 1.0002, + "learning_rate": 9.605360015844637e-06, + "loss": 1.0542, "step": 13474 }, { - "epoch": 0.38184703448666724, + "epoch": 0.5272321777916895, "grad_norm": 0.0, - "learning_rate": 1.4180241034794684e-05, - "loss": 0.939, + "learning_rate": 9.604093775085114e-06, + "loss": 1.0313, "step": 13475 }, { - "epoch": 0.3818753719289297, + "epoch": 0.5272713044839189, "grad_norm": 0.0, - "learning_rate": 1.4179407264440582e-05, - "loss": 1.0015, + "learning_rate": 9.60282754068335e-06, + "loss": 0.9282, "step": 13476 }, { - "epoch": 0.38190370937119217, + "epoch": 0.5273104311761484, "grad_norm": 0.0, - "learning_rate": 1.4178573458881784e-05, - "loss": 0.923, + "learning_rate": 9.601561312659681e-06, + "loss": 0.8982, "step": 13477 }, { - "epoch": 0.3819320468134546, + "epoch": 0.5273495578683778, "grad_norm": 0.0, - "learning_rate": 1.4177739618125305e-05, - "loss": 0.9131, + "learning_rate": 9.600295091034436e-06, + "loss": 1.0988, "step": 13478 }, { - "epoch": 0.3819603842557171, + "epoch": 0.5273886845606073, "grad_norm": 0.0, - "learning_rate": 1.417690574217818e-05, - "loss": 0.9747, + "learning_rate": 9.59902887582795e-06, + "loss": 1.0674, "step": 13479 }, { - "epoch": 0.38198872169797954, + "epoch": 0.5274278112528367, "grad_norm": 0.0, - "learning_rate": 1.4176071831047425e-05, - "loss": 0.89, + "learning_rate": 9.597762667060556e-06, + "loss": 0.9875, "step": 13480 }, { - "epoch": 0.382017059140242, + "epoch": 0.5274669379450662, "grad_norm": 0.0, - "learning_rate": 1.4175237884740068e-05, - "loss": 0.8652, + "learning_rate": 9.596496464752593e-06, + "loss": 1.0382, "step": 13481 }, { - "epoch": 0.38204539658250447, + "epoch": 0.5275060646372955, "grad_norm": 0.0, - "learning_rate": 1.4174403903263127e-05, - "loss": 0.9425, + "learning_rate": 9.59523026892439e-06, + "loss": 1.0419, "step": 13482 }, { - "epoch": 0.3820737340247669, + "epoch": 0.527545191329525, "grad_norm": 0.0, - "learning_rate": 1.4173569886623633e-05, - "loss": 0.9192, + "learning_rate": 9.593964079596282e-06, + "loss": 1.0866, "step": 13483 }, { - "epoch": 0.3821020714670294, + "epoch": 0.5275843180217544, "grad_norm": 0.0, - "learning_rate": 1.4172735834828613e-05, - "loss": 0.9513, + "learning_rate": 9.592697896788598e-06, + "loss": 0.9091, "step": 13484 }, { - "epoch": 0.38213040890929184, + "epoch": 0.5276234447139839, "grad_norm": 0.0, - "learning_rate": 1.4171901747885088e-05, - "loss": 0.9537, + "learning_rate": 9.591431720521681e-06, + "loss": 0.9369, "step": 13485 }, { - "epoch": 0.38215874635155433, + "epoch": 0.5276625714062133, "grad_norm": 0.0, - "learning_rate": 1.4171067625800083e-05, - "loss": 0.9394, + "learning_rate": 9.590165550815857e-06, + "loss": 1.1633, "step": 13486 }, { - "epoch": 0.3821870837938168, + "epoch": 0.5277016980984428, "grad_norm": 0.0, - "learning_rate": 1.417023346858063e-05, - "loss": 0.9761, + "learning_rate": 9.58889938769146e-06, + "loss": 1.0678, "step": 13487 }, { - "epoch": 0.3822154212360792, + "epoch": 0.5277408247906722, "grad_norm": 0.0, - "learning_rate": 1.4169399276233754e-05, - "loss": 0.9384, + "learning_rate": 9.587633231168825e-06, + "loss": 1.0544, "step": 13488 }, { - "epoch": 0.3822437586783417, + "epoch": 0.5277799514829017, "grad_norm": 0.0, - "learning_rate": 1.4168565048766475e-05, - "loss": 0.9808, + "learning_rate": 9.58636708126828e-06, + "loss": 0.946, "step": 13489 }, { - "epoch": 0.38227209612060414, + "epoch": 0.5278190781751311, "grad_norm": 0.0, - "learning_rate": 1.4167730786185822e-05, - "loss": 1.0336, + "learning_rate": 9.585100938010163e-06, + "loss": 1.0435, "step": 13490 }, { - "epoch": 0.38230043356286664, + "epoch": 0.5278582048673605, "grad_norm": 0.0, - "learning_rate": 1.4166896488498831e-05, - "loss": 0.9343, + "learning_rate": 9.583834801414808e-06, + "loss": 0.9287, "step": 13491 }, { - "epoch": 0.3823287710051291, + "epoch": 0.52789733155959, "grad_norm": 0.0, - "learning_rate": 1.416606215571252e-05, - "loss": 0.9509, + "learning_rate": 9.582568671502543e-06, + "loss": 0.9665, "step": 13492 }, { - "epoch": 0.3823571084473915, + "epoch": 0.5279364582518193, "grad_norm": 0.0, - "learning_rate": 1.4165227787833925e-05, - "loss": 1.0146, + "learning_rate": 9.581302548293698e-06, + "loss": 1.1407, "step": 13493 }, { - "epoch": 0.382385445889654, + "epoch": 0.5279755849440488, "grad_norm": 0.0, - "learning_rate": 1.4164393384870065e-05, - "loss": 1.009, + "learning_rate": 9.580036431808614e-06, + "loss": 1.0204, "step": 13494 }, { - "epoch": 0.38241378333191645, + "epoch": 0.5280147116362782, "grad_norm": 0.0, - "learning_rate": 1.4163558946827975e-05, - "loss": 1.0209, + "learning_rate": 9.578770322067619e-06, + "loss": 1.008, "step": 13495 }, { - "epoch": 0.38244212077417894, + "epoch": 0.5280538383285077, "grad_norm": 0.0, - "learning_rate": 1.416272447371468e-05, - "loss": 0.9907, + "learning_rate": 9.577504219091044e-06, + "loss": 1.0522, "step": 13496 }, { - "epoch": 0.3824704582164414, + "epoch": 0.5280929650207371, "grad_norm": 0.0, - "learning_rate": 1.4161889965537213e-05, - "loss": 0.9553, + "learning_rate": 9.576238122899221e-06, + "loss": 1.1509, "step": 13497 }, { - "epoch": 0.38249879565870387, + "epoch": 0.5281320917129666, "grad_norm": 0.0, - "learning_rate": 1.41610554223026e-05, - "loss": 1.0167, + "learning_rate": 9.574972033512482e-06, + "loss": 0.9294, "step": 13498 }, { - "epoch": 0.3825271331009663, + "epoch": 0.528171218405196, "grad_norm": 0.0, - "learning_rate": 1.4160220844017874e-05, - "loss": 0.9295, + "learning_rate": 9.57370595095116e-06, + "loss": 0.9634, "step": 13499 }, { - "epoch": 0.38255547054322875, + "epoch": 0.5282103450974255, "grad_norm": 0.0, - "learning_rate": 1.4159386230690062e-05, - "loss": 0.9285, + "learning_rate": 9.572439875235587e-06, + "loss": 1.0211, "step": 13500 }, { - "epoch": 0.38258380798549124, + "epoch": 0.5282494717896549, "grad_norm": 0.0, - "learning_rate": 1.4158551582326193e-05, - "loss": 0.9617, + "learning_rate": 9.571173806386095e-06, + "loss": 1.0934, "step": 13501 }, { - "epoch": 0.3826121454277537, + "epoch": 0.5282885984818844, "grad_norm": 0.0, - "learning_rate": 1.4157716898933302e-05, - "loss": 0.9228, + "learning_rate": 9.569907744423009e-06, + "loss": 0.9563, "step": 13502 }, { - "epoch": 0.3826404828700162, + "epoch": 0.5283277251741137, "grad_norm": 0.0, - "learning_rate": 1.4156882180518417e-05, - "loss": 0.8342, + "learning_rate": 9.56864168936667e-06, + "loss": 0.9789, "step": 13503 }, { - "epoch": 0.3826688203122786, + "epoch": 0.5283668518663432, "grad_norm": 0.0, - "learning_rate": 1.415604742708857e-05, - "loss": 0.9393, + "learning_rate": 9.567375641237407e-06, + "loss": 1.1208, "step": 13504 }, { - "epoch": 0.38269715775454105, + "epoch": 0.5284059785585726, "grad_norm": 0.0, - "learning_rate": 1.4155212638650793e-05, - "loss": 0.8634, + "learning_rate": 9.566109600055547e-06, + "loss": 1.1661, "step": 13505 }, { - "epoch": 0.38272549519680354, + "epoch": 0.5284451052508021, "grad_norm": 0.0, - "learning_rate": 1.4154377815212117e-05, - "loss": 0.8627, + "learning_rate": 9.564843565841424e-06, + "loss": 0.9811, "step": 13506 }, { - "epoch": 0.382753832639066, + "epoch": 0.5284842319430315, "grad_norm": 0.0, - "learning_rate": 1.4153542956779573e-05, - "loss": 0.9501, + "learning_rate": 9.563577538615363e-06, + "loss": 1.0604, "step": 13507 }, { - "epoch": 0.3827821700813285, + "epoch": 0.528523358635261, "grad_norm": 0.0, - "learning_rate": 1.4152708063360195e-05, - "loss": 0.9744, + "learning_rate": 9.562311518397704e-06, + "loss": 1.2902, "step": 13508 }, { - "epoch": 0.3828105075235909, + "epoch": 0.5285624853274904, "grad_norm": 0.0, - "learning_rate": 1.4151873134961014e-05, - "loss": 0.9151, + "learning_rate": 9.561045505208775e-06, + "loss": 1.1618, "step": 13509 }, { - "epoch": 0.3828388449658534, + "epoch": 0.5286016120197199, "grad_norm": 0.0, - "learning_rate": 1.4151038171589064e-05, - "loss": 0.963, + "learning_rate": 9.559779499068904e-06, + "loss": 0.8938, "step": 13510 }, { - "epoch": 0.38286718240811585, + "epoch": 0.5286407387119493, "grad_norm": 0.0, - "learning_rate": 1.4150203173251377e-05, - "loss": 0.946, + "learning_rate": 9.558513499998421e-06, + "loss": 1.1041, "step": 13511 }, { - "epoch": 0.3828955198503783, + "epoch": 0.5286798654041788, "grad_norm": 0.0, - "learning_rate": 1.414936813995499e-05, - "loss": 0.9201, + "learning_rate": 9.557247508017657e-06, + "loss": 1.0024, "step": 13512 }, { - "epoch": 0.3829238572926408, + "epoch": 0.5287189920964082, "grad_norm": 0.0, - "learning_rate": 1.4148533071706933e-05, - "loss": 0.9, + "learning_rate": 9.555981523146946e-06, + "loss": 0.9979, "step": 13513 }, { - "epoch": 0.3829521947349032, + "epoch": 0.5287581187886377, "grad_norm": 0.0, - "learning_rate": 1.4147697968514242e-05, - "loss": 0.8984, + "learning_rate": 9.554715545406617e-06, + "loss": 1.0248, "step": 13514 }, { - "epoch": 0.3829805321771657, + "epoch": 0.528797245480867, "grad_norm": 0.0, - "learning_rate": 1.414686283038395e-05, - "loss": 0.893, + "learning_rate": 9.553449574816995e-06, + "loss": 0.9194, "step": 13515 }, { - "epoch": 0.38300886961942815, + "epoch": 0.5288363721730965, "grad_norm": 0.0, - "learning_rate": 1.4146027657323092e-05, - "loss": 0.9118, + "learning_rate": 9.552183611398415e-06, + "loss": 1.0566, "step": 13516 }, { - "epoch": 0.3830372070616906, + "epoch": 0.5288754988653259, "grad_norm": 0.0, - "learning_rate": 1.4145192449338704e-05, - "loss": 0.9496, + "learning_rate": 9.550917655171205e-06, + "loss": 0.9846, "step": 13517 }, { - "epoch": 0.3830655445039531, + "epoch": 0.5289146255575554, "grad_norm": 0.0, - "learning_rate": 1.4144357206437822e-05, - "loss": 0.8913, + "learning_rate": 9.549651706155692e-06, + "loss": 1.0242, "step": 13518 }, { - "epoch": 0.3830938819462155, + "epoch": 0.5289537522497848, "grad_norm": 0.0, - "learning_rate": 1.4143521928627479e-05, - "loss": 0.9341, + "learning_rate": 9.54838576437221e-06, + "loss": 0.8621, "step": 13519 }, { - "epoch": 0.383122219388478, + "epoch": 0.5289928789420142, "grad_norm": 0.0, - "learning_rate": 1.4142686615914713e-05, - "loss": 0.9817, + "learning_rate": 9.547119829841088e-06, + "loss": 1.075, "step": 13520 }, { - "epoch": 0.38315055683074045, + "epoch": 0.5290320056342437, "grad_norm": 0.0, - "learning_rate": 1.414185126830656e-05, - "loss": 0.9772, + "learning_rate": 9.545853902582653e-06, + "loss": 0.9848, "step": 13521 }, { - "epoch": 0.38317889427300295, + "epoch": 0.5290711323264731, "grad_norm": 0.0, - "learning_rate": 1.4141015885810055e-05, - "loss": 0.9688, + "learning_rate": 9.544587982617236e-06, + "loss": 0.9596, "step": 13522 }, { - "epoch": 0.3832072317152654, + "epoch": 0.5291102590187026, "grad_norm": 0.0, - "learning_rate": 1.4140180468432235e-05, - "loss": 0.8198, + "learning_rate": 9.543322069965163e-06, + "loss": 1.0768, "step": 13523 }, { - "epoch": 0.3832355691575278, + "epoch": 0.529149385710932, "grad_norm": 0.0, - "learning_rate": 1.4139345016180135e-05, - "loss": 1.0248, + "learning_rate": 9.542056164646765e-06, + "loss": 1.0572, "step": 13524 }, { - "epoch": 0.3832639065997903, + "epoch": 0.5291885124031614, "grad_norm": 0.0, - "learning_rate": 1.41385095290608e-05, - "loss": 0.8819, + "learning_rate": 9.540790266682375e-06, + "loss": 1.0775, "step": 13525 }, { - "epoch": 0.38329224404205275, + "epoch": 0.5292276390953908, "grad_norm": 0.0, - "learning_rate": 1.4137674007081259e-05, - "loss": 0.937, + "learning_rate": 9.539524376092317e-06, + "loss": 1.0075, "step": 13526 }, { - "epoch": 0.38332058148431525, + "epoch": 0.5292667657876203, "grad_norm": 0.0, - "learning_rate": 1.4136838450248553e-05, - "loss": 0.9214, + "learning_rate": 9.53825849289692e-06, + "loss": 1.1107, "step": 13527 }, { - "epoch": 0.3833489189265777, + "epoch": 0.5293058924798497, "grad_norm": 0.0, - "learning_rate": 1.413600285856972e-05, - "loss": 0.8555, + "learning_rate": 9.536992617116515e-06, + "loss": 0.9786, "step": 13528 }, { - "epoch": 0.3833772563688401, + "epoch": 0.5293450191720792, "grad_norm": 0.0, - "learning_rate": 1.4135167232051802e-05, - "loss": 0.9553, + "learning_rate": 9.535726748771422e-06, + "loss": 1.1302, "step": 13529 }, { - "epoch": 0.3834055938111026, + "epoch": 0.5293841458643086, "grad_norm": 0.0, - "learning_rate": 1.4134331570701834e-05, - "loss": 0.8994, + "learning_rate": 9.53446088788198e-06, + "loss": 1.0727, "step": 13530 }, { - "epoch": 0.38343393125336506, + "epoch": 0.5294232725565381, "grad_norm": 0.0, - "learning_rate": 1.4133495874526857e-05, - "loss": 0.9723, + "learning_rate": 9.533195034468513e-06, + "loss": 0.8994, "step": 13531 }, { - "epoch": 0.38346226869562755, + "epoch": 0.5294623992487675, "grad_norm": 0.0, - "learning_rate": 1.4132660143533907e-05, - "loss": 0.925, + "learning_rate": 9.531929188551349e-06, + "loss": 0.9664, "step": 13532 }, { - "epoch": 0.38349060613789, + "epoch": 0.529501525940997, "grad_norm": 0.0, - "learning_rate": 1.4131824377730026e-05, - "loss": 0.8553, + "learning_rate": 9.530663350150812e-06, + "loss": 1.04, "step": 13533 }, { - "epoch": 0.3835189435801525, + "epoch": 0.5295406526332264, "grad_norm": 0.0, - "learning_rate": 1.4130988577122253e-05, - "loss": 0.8538, + "learning_rate": 9.529397519287237e-06, + "loss": 0.9979, "step": 13534 }, { - "epoch": 0.3835472810224149, + "epoch": 0.5295797793254559, "grad_norm": 0.0, - "learning_rate": 1.4130152741717634e-05, - "loss": 0.9663, + "learning_rate": 9.528131695980948e-06, + "loss": 1.0309, "step": 13535 }, { - "epoch": 0.38357561846467736, + "epoch": 0.5296189060176852, "grad_norm": 0.0, - "learning_rate": 1.41293168715232e-05, - "loss": 0.9235, + "learning_rate": 9.526865880252273e-06, + "loss": 1.1, "step": 13536 }, { - "epoch": 0.38360395590693985, + "epoch": 0.5296580327099147, "grad_norm": 0.0, - "learning_rate": 1.4128480966545998e-05, - "loss": 0.9751, + "learning_rate": 9.52560007212154e-06, + "loss": 1.1292, "step": 13537 }, { - "epoch": 0.3836322933492023, + "epoch": 0.5296971594021441, "grad_norm": 0.0, - "learning_rate": 1.4127645026793068e-05, - "loss": 0.8712, + "learning_rate": 9.524334271609069e-06, + "loss": 1.1414, "step": 13538 }, { - "epoch": 0.3836606307914648, + "epoch": 0.5297362860943736, "grad_norm": 0.0, - "learning_rate": 1.4126809052271453e-05, - "loss": 0.9425, + "learning_rate": 9.5230684787352e-06, + "loss": 1.1035, "step": 13539 }, { - "epoch": 0.3836889682337272, + "epoch": 0.529775412786603, "grad_norm": 0.0, - "learning_rate": 1.412597304298819e-05, - "loss": 1.0613, + "learning_rate": 9.521802693520253e-06, + "loss": 0.996, "step": 13540 }, { - "epoch": 0.38371730567598966, + "epoch": 0.5298145394788325, "grad_norm": 0.0, - "learning_rate": 1.4125136998950324e-05, - "loss": 0.8188, + "learning_rate": 9.520536915984555e-06, + "loss": 1.029, "step": 13541 }, { - "epoch": 0.38374564311825216, + "epoch": 0.5298536661710619, "grad_norm": 0.0, - "learning_rate": 1.4124300920164897e-05, - "loss": 0.934, + "learning_rate": 9.51927114614843e-06, + "loss": 1.0722, "step": 13542 }, { - "epoch": 0.3837739805605146, + "epoch": 0.5298927928632914, "grad_norm": 0.0, - "learning_rate": 1.4123464806638955e-05, - "loss": 0.9199, + "learning_rate": 9.51800538403221e-06, + "loss": 1.0411, "step": 13543 }, { - "epoch": 0.3838023180027771, + "epoch": 0.5299319195555208, "grad_norm": 0.0, - "learning_rate": 1.4122628658379536e-05, - "loss": 0.9045, + "learning_rate": 9.51673962965622e-06, + "loss": 0.9385, "step": 13544 }, { - "epoch": 0.3838306554450395, + "epoch": 0.5299710462477503, "grad_norm": 0.0, - "learning_rate": 1.4121792475393685e-05, - "loss": 1.0098, + "learning_rate": 9.515473883040789e-06, + "loss": 1.0413, "step": 13545 }, { - "epoch": 0.383858992887302, + "epoch": 0.5300101729399797, "grad_norm": 0.0, - "learning_rate": 1.4120956257688445e-05, - "loss": 0.9098, + "learning_rate": 9.514208144206237e-06, + "loss": 0.9799, "step": 13546 }, { - "epoch": 0.38388733032956446, + "epoch": 0.5300492996322091, "grad_norm": 0.0, - "learning_rate": 1.412012000527086e-05, - "loss": 0.9524, + "learning_rate": 9.512942413172892e-06, + "loss": 1.0057, "step": 13547 }, { - "epoch": 0.3839156677718269, + "epoch": 0.5300884263244385, "grad_norm": 0.0, - "learning_rate": 1.4119283718147974e-05, - "loss": 0.9019, + "learning_rate": 9.511676689961084e-06, + "loss": 1.0799, "step": 13548 }, { - "epoch": 0.3839440052140894, + "epoch": 0.5301275530166679, "grad_norm": 0.0, - "learning_rate": 1.4118447396326832e-05, - "loss": 0.8302, + "learning_rate": 9.510410974591137e-06, + "loss": 1.0667, "step": 13549 }, { - "epoch": 0.38397234265635183, + "epoch": 0.5301666797088974, "grad_norm": 0.0, - "learning_rate": 1.4117611039814479e-05, - "loss": 0.8639, + "learning_rate": 9.509145267083374e-06, + "loss": 1.0775, "step": 13550 }, { - "epoch": 0.3840006800986143, + "epoch": 0.5302058064011268, "grad_norm": 0.0, - "learning_rate": 1.4116774648617958e-05, - "loss": 0.9913, + "learning_rate": 9.507879567458122e-06, + "loss": 1.0273, "step": 13551 }, { - "epoch": 0.38402901754087676, + "epoch": 0.5302449330933563, "grad_norm": 0.0, - "learning_rate": 1.4115938222744317e-05, - "loss": 0.8033, + "learning_rate": 9.506613875735711e-06, + "loss": 1.0048, "step": 13552 }, { - "epoch": 0.3840573549831392, + "epoch": 0.5302840597855857, "grad_norm": 0.0, - "learning_rate": 1.4115101762200598e-05, - "loss": 0.8727, + "learning_rate": 9.505348191936461e-06, + "loss": 1.0757, "step": 13553 }, { - "epoch": 0.3840856924254017, + "epoch": 0.5303231864778152, "grad_norm": 0.0, - "learning_rate": 1.4114265266993847e-05, - "loss": 1.0612, + "learning_rate": 9.504082516080702e-06, + "loss": 0.9892, "step": 13554 }, { - "epoch": 0.38411402986766413, + "epoch": 0.5303623131700446, "grad_norm": 0.0, - "learning_rate": 1.4113428737131116e-05, - "loss": 0.8648, + "learning_rate": 9.502816848188755e-06, + "loss": 1.1455, "step": 13555 }, { - "epoch": 0.3841423673099266, + "epoch": 0.5304014398622741, "grad_norm": 0.0, - "learning_rate": 1.4112592172619449e-05, - "loss": 1.0116, + "learning_rate": 9.501551188280942e-06, + "loss": 0.9993, "step": 13556 }, { - "epoch": 0.38417070475218906, + "epoch": 0.5304405665545034, "grad_norm": 0.0, - "learning_rate": 1.4111755573465884e-05, - "loss": 1.0346, + "learning_rate": 9.500285536377597e-06, + "loss": 1.1294, "step": 13557 }, { - "epoch": 0.3841990421944515, + "epoch": 0.5304796932467329, "grad_norm": 0.0, - "learning_rate": 1.411091893967748e-05, - "loss": 0.987, + "learning_rate": 9.49901989249904e-06, + "loss": 0.9031, "step": 13558 }, { - "epoch": 0.384227379636714, + "epoch": 0.5305188199389623, "grad_norm": 0.0, - "learning_rate": 1.4110082271261278e-05, - "loss": 0.8989, + "learning_rate": 9.497754256665596e-06, + "loss": 1.0398, "step": 13559 }, { - "epoch": 0.38425571707897643, + "epoch": 0.5305579466311918, "grad_norm": 0.0, - "learning_rate": 1.4109245568224326e-05, - "loss": 0.9483, + "learning_rate": 9.496488628897586e-06, + "loss": 0.9794, "step": 13560 }, { - "epoch": 0.3842840545212389, + "epoch": 0.5305970733234212, "grad_norm": 0.0, - "learning_rate": 1.4108408830573673e-05, - "loss": 0.9203, + "learning_rate": 9.49522300921534e-06, + "loss": 1.1222, "step": 13561 }, { - "epoch": 0.38431239196350137, + "epoch": 0.5306362000156507, "grad_norm": 0.0, - "learning_rate": 1.4107572058316365e-05, - "loss": 0.8813, + "learning_rate": 9.493957397639178e-06, + "loss": 1.0179, "step": 13562 }, { - "epoch": 0.38434072940576386, + "epoch": 0.5306753267078801, "grad_norm": 0.0, - "learning_rate": 1.4106735251459456e-05, - "loss": 0.9827, + "learning_rate": 9.49269179418943e-06, + "loss": 1.1594, "step": 13563 }, { - "epoch": 0.3843690668480263, + "epoch": 0.5307144534001096, "grad_norm": 0.0, - "learning_rate": 1.410589841000999e-05, - "loss": 0.8827, + "learning_rate": 9.491426198886414e-06, + "loss": 1.0235, "step": 13564 }, { - "epoch": 0.38439740429028874, + "epoch": 0.530753580092339, "grad_norm": 0.0, - "learning_rate": 1.4105061533975015e-05, - "loss": 0.8581, + "learning_rate": 9.490160611750456e-06, + "loss": 1.0468, "step": 13565 }, { - "epoch": 0.38442574173255123, + "epoch": 0.5307927067845685, "grad_norm": 0.0, - "learning_rate": 1.4104224623361584e-05, - "loss": 1.0416, + "learning_rate": 9.488895032801879e-06, + "loss": 1.1302, "step": 13566 }, { - "epoch": 0.38445407917481367, + "epoch": 0.5308318334767979, "grad_norm": 0.0, - "learning_rate": 1.4103387678176745e-05, - "loss": 1.0139, + "learning_rate": 9.48762946206101e-06, + "loss": 1.0311, "step": 13567 }, { - "epoch": 0.38448241661707616, + "epoch": 0.5308709601690274, "grad_norm": 0.0, - "learning_rate": 1.4102550698427548e-05, - "loss": 1.0436, + "learning_rate": 9.486363899548165e-06, + "loss": 1.0802, "step": 13568 }, { - "epoch": 0.3845107540593386, + "epoch": 0.5309100868612567, "grad_norm": 0.0, - "learning_rate": 1.4101713684121042e-05, - "loss": 0.9055, + "learning_rate": 9.485098345283675e-06, + "loss": 0.9382, "step": 13569 }, { - "epoch": 0.38453909150160104, + "epoch": 0.5309492135534862, "grad_norm": 0.0, - "learning_rate": 1.4100876635264279e-05, - "loss": 0.9241, + "learning_rate": 9.48383279928786e-06, + "loss": 1.1183, "step": 13570 }, { - "epoch": 0.38456742894386353, + "epoch": 0.5309883402457156, "grad_norm": 0.0, - "learning_rate": 1.410003955186431e-05, - "loss": 0.9613, + "learning_rate": 9.482567261581044e-06, + "loss": 1.0244, "step": 13571 }, { - "epoch": 0.38459576638612597, + "epoch": 0.5310274669379451, "grad_norm": 0.0, - "learning_rate": 1.4099202433928185e-05, - "loss": 0.8289, + "learning_rate": 9.48130173218355e-06, + "loss": 1.093, "step": 13572 }, { - "epoch": 0.38462410382838846, + "epoch": 0.5310665936301745, "grad_norm": 0.0, - "learning_rate": 1.4098365281462953e-05, - "loss": 0.9455, + "learning_rate": 9.480036211115697e-06, + "loss": 1.1202, "step": 13573 }, { - "epoch": 0.3846524412706509, + "epoch": 0.531105720322404, "grad_norm": 0.0, - "learning_rate": 1.409752809447567e-05, - "loss": 0.8609, + "learning_rate": 9.478770698397814e-06, + "loss": 0.9898, "step": 13574 }, { - "epoch": 0.3846807787129134, + "epoch": 0.5311448470146334, "grad_norm": 0.0, - "learning_rate": 1.4096690872973388e-05, - "loss": 0.945, + "learning_rate": 9.47750519405022e-06, + "loss": 0.9807, "step": 13575 }, { - "epoch": 0.38470911615517583, + "epoch": 0.5311839737068628, "grad_norm": 0.0, - "learning_rate": 1.4095853616963157e-05, - "loss": 0.9637, + "learning_rate": 9.476239698093238e-06, + "loss": 1.1049, "step": 13576 }, { - "epoch": 0.3847374535974383, + "epoch": 0.5312231003990923, "grad_norm": 0.0, - "learning_rate": 1.4095016326452027e-05, - "loss": 1.1015, + "learning_rate": 9.47497421054719e-06, + "loss": 1.0919, "step": 13577 }, { - "epoch": 0.38476579103970077, + "epoch": 0.5312622270913216, "grad_norm": 0.0, - "learning_rate": 1.409417900144706e-05, - "loss": 1.0692, + "learning_rate": 9.473708731432395e-06, + "loss": 0.9095, "step": 13578 }, { - "epoch": 0.3847941284819632, + "epoch": 0.5313013537835511, "grad_norm": 0.0, - "learning_rate": 1.4093341641955298e-05, - "loss": 0.9226, + "learning_rate": 9.472443260769181e-06, + "loss": 1.009, "step": 13579 }, { - "epoch": 0.3848224659242257, + "epoch": 0.5313404804757805, "grad_norm": 0.0, - "learning_rate": 1.4092504247983798e-05, - "loss": 0.8813, + "learning_rate": 9.471177798577869e-06, + "loss": 1.1621, "step": 13580 }, { - "epoch": 0.38485080336648814, + "epoch": 0.53137960716801, "grad_norm": 0.0, - "learning_rate": 1.409166681953962e-05, - "loss": 0.931, + "learning_rate": 9.469912344878779e-06, + "loss": 1.0852, "step": 13581 }, { - "epoch": 0.3848791408087506, + "epoch": 0.5314187338602394, "grad_norm": 0.0, - "learning_rate": 1.4090829356629809e-05, - "loss": 0.9372, + "learning_rate": 9.468646899692227e-06, + "loss": 1.1156, "step": 13582 }, { - "epoch": 0.38490747825101307, + "epoch": 0.5314578605524689, "grad_norm": 0.0, - "learning_rate": 1.4089991859261426e-05, - "loss": 0.9625, + "learning_rate": 9.467381463038545e-06, + "loss": 1.0002, "step": 13583 }, { - "epoch": 0.3849358156932755, + "epoch": 0.5314969872446983, "grad_norm": 0.0, - "learning_rate": 1.408915432744152e-05, - "loss": 0.8995, + "learning_rate": 9.466116034938047e-06, + "loss": 0.9239, "step": 13584 }, { - "epoch": 0.384964153135538, + "epoch": 0.5315361139369278, "grad_norm": 0.0, - "learning_rate": 1.4088316761177151e-05, - "loss": 0.7821, + "learning_rate": 9.464850615411059e-06, + "loss": 0.9817, "step": 13585 }, { - "epoch": 0.38499249057780044, + "epoch": 0.5315752406291572, "grad_norm": 0.0, - "learning_rate": 1.408747916047537e-05, - "loss": 0.975, + "learning_rate": 9.463585204477898e-06, + "loss": 0.9485, "step": 13586 }, { - "epoch": 0.38502082802006293, + "epoch": 0.5316143673213867, "grad_norm": 0.0, - "learning_rate": 1.4086641525343234e-05, - "loss": 0.8654, + "learning_rate": 9.462319802158884e-06, + "loss": 1.1289, "step": 13587 }, { - "epoch": 0.38504916546232537, + "epoch": 0.5316534940136161, "grad_norm": 0.0, - "learning_rate": 1.40858038557878e-05, - "loss": 0.9546, + "learning_rate": 9.461054408474343e-06, + "loss": 0.9135, "step": 13588 }, { - "epoch": 0.3850775029045878, + "epoch": 0.5316926207058456, "grad_norm": 0.0, - "learning_rate": 1.4084966151816124e-05, - "loss": 0.8261, + "learning_rate": 9.459789023444595e-06, + "loss": 1.1118, "step": 13589 }, { - "epoch": 0.3851058403468503, + "epoch": 0.5317317473980749, "grad_norm": 0.0, - "learning_rate": 1.4084128413435258e-05, - "loss": 0.9099, + "learning_rate": 9.458523647089955e-06, + "loss": 1.0647, "step": 13590 }, { - "epoch": 0.38513417778911274, + "epoch": 0.5317708740903044, "grad_norm": 0.0, - "learning_rate": 1.4083290640652267e-05, - "loss": 0.8389, + "learning_rate": 9.457258279430745e-06, + "loss": 0.9483, "step": 13591 }, { - "epoch": 0.38516251523137524, + "epoch": 0.5318100007825338, "grad_norm": 0.0, - "learning_rate": 1.4082452833474198e-05, - "loss": 0.7971, + "learning_rate": 9.45599292048729e-06, + "loss": 1.0948, "step": 13592 }, { - "epoch": 0.3851908526736377, + "epoch": 0.5318491274747633, "grad_norm": 0.0, - "learning_rate": 1.4081614991908115e-05, - "loss": 0.7895, + "learning_rate": 9.454727570279907e-06, + "loss": 1.1513, "step": 13593 }, { - "epoch": 0.3852191901159001, + "epoch": 0.5318882541669927, "grad_norm": 0.0, - "learning_rate": 1.408077711596107e-05, - "loss": 0.9288, + "learning_rate": 9.453462228828917e-06, + "loss": 0.8948, "step": 13594 }, { - "epoch": 0.3852475275581626, + "epoch": 0.5319273808592222, "grad_norm": 0.0, - "learning_rate": 1.4079939205640127e-05, - "loss": 0.983, + "learning_rate": 9.452196896154639e-06, + "loss": 1.0361, "step": 13595 }, { - "epoch": 0.38527586500042504, + "epoch": 0.5319665075514516, "grad_norm": 0.0, - "learning_rate": 1.4079101260952342e-05, - "loss": 0.9028, + "learning_rate": 9.450931572277387e-06, + "loss": 1.1232, "step": 13596 }, { - "epoch": 0.38530420244268754, + "epoch": 0.5320056342436811, "grad_norm": 0.0, - "learning_rate": 1.4078263281904771e-05, - "loss": 0.9046, + "learning_rate": 9.44966625721749e-06, + "loss": 0.9742, "step": 13597 }, { - "epoch": 0.38533253988495, + "epoch": 0.5320447609359105, "grad_norm": 0.0, - "learning_rate": 1.4077425268504474e-05, - "loss": 1.0396, + "learning_rate": 9.448400950995265e-06, + "loss": 1.0248, "step": 13598 }, { - "epoch": 0.38536087732721247, + "epoch": 0.53208388762814, "grad_norm": 0.0, - "learning_rate": 1.407658722075851e-05, - "loss": 0.9457, + "learning_rate": 9.447135653631028e-06, + "loss": 1.0617, "step": 13599 }, { - "epoch": 0.3853892147694749, + "epoch": 0.5321230143203693, "grad_norm": 0.0, - "learning_rate": 1.4075749138673937e-05, - "loss": 0.8406, + "learning_rate": 9.445870365145097e-06, + "loss": 1.0415, "step": 13600 }, { - "epoch": 0.38541755221173735, + "epoch": 0.5321621410125988, "grad_norm": 0.0, - "learning_rate": 1.4074911022257815e-05, - "loss": 0.867, + "learning_rate": 9.444605085557795e-06, + "loss": 1.0541, "step": 13601 }, { - "epoch": 0.38544588965399984, + "epoch": 0.5322012677048282, "grad_norm": 0.0, - "learning_rate": 1.4074072871517205e-05, - "loss": 0.9465, + "learning_rate": 9.443339814889441e-06, + "loss": 1.1744, "step": 13602 }, { - "epoch": 0.3854742270962623, + "epoch": 0.5322403943970577, "grad_norm": 0.0, - "learning_rate": 1.4073234686459167e-05, - "loss": 0.8787, + "learning_rate": 9.442074553160353e-06, + "loss": 1.1078, "step": 13603 }, { - "epoch": 0.3855025645385248, + "epoch": 0.5322795210892871, "grad_norm": 0.0, - "learning_rate": 1.4072396467090764e-05, - "loss": 0.8706, + "learning_rate": 9.440809300390847e-06, + "loss": 1.1352, "step": 13604 }, { - "epoch": 0.3855309019807872, + "epoch": 0.5323186477815165, "grad_norm": 0.0, - "learning_rate": 1.407155821341905e-05, - "loss": 0.9133, + "learning_rate": 9.43954405660124e-06, + "loss": 1.022, "step": 13605 }, { - "epoch": 0.38555923942304965, + "epoch": 0.532357774473746, "grad_norm": 0.0, - "learning_rate": 1.4070719925451086e-05, - "loss": 1.0011, + "learning_rate": 9.438278821811857e-06, + "loss": 1.0635, "step": 13606 }, { - "epoch": 0.38558757686531214, + "epoch": 0.5323969011659754, "grad_norm": 0.0, - "learning_rate": 1.406988160319394e-05, - "loss": 0.9757, + "learning_rate": 9.43701359604301e-06, + "loss": 0.9712, "step": 13607 }, { - "epoch": 0.3856159143075746, + "epoch": 0.5324360278582049, "grad_norm": 0.0, - "learning_rate": 1.406904324665467e-05, - "loss": 1.0314, + "learning_rate": 9.435748379315021e-06, + "loss": 1.0207, "step": 13608 }, { - "epoch": 0.3856442517498371, + "epoch": 0.5324751545504343, "grad_norm": 0.0, - "learning_rate": 1.4068204855840338e-05, - "loss": 1.0781, + "learning_rate": 9.434483171648204e-06, + "loss": 1.0606, "step": 13609 }, { - "epoch": 0.3856725891920995, + "epoch": 0.5325142812426638, "grad_norm": 0.0, - "learning_rate": 1.4067366430758004e-05, - "loss": 0.9732, + "learning_rate": 9.43321797306288e-06, + "loss": 1.0169, "step": 13610 }, { - "epoch": 0.385700926634362, + "epoch": 0.5325534079348931, "grad_norm": 0.0, - "learning_rate": 1.4066527971414732e-05, - "loss": 0.8183, + "learning_rate": 9.431952783579365e-06, + "loss": 1.0511, "step": 13611 }, { - "epoch": 0.38572926407662445, + "epoch": 0.5325925346271226, "grad_norm": 0.0, - "learning_rate": 1.4065689477817587e-05, - "loss": 0.9676, + "learning_rate": 9.430687603217978e-06, + "loss": 1.0502, "step": 13612 }, { - "epoch": 0.3857576015188869, + "epoch": 0.532631661319352, "grad_norm": 0.0, - "learning_rate": 1.4064850949973627e-05, - "loss": 0.9429, + "learning_rate": 9.429422431999033e-06, + "loss": 1.0103, "step": 13613 }, { - "epoch": 0.3857859389611494, + "epoch": 0.5326707880115815, "grad_norm": 0.0, - "learning_rate": 1.406401238788992e-05, - "loss": 0.9098, + "learning_rate": 9.428157269942847e-06, + "loss": 0.9537, "step": 13614 }, { - "epoch": 0.3858142764034118, + "epoch": 0.5327099147038109, "grad_norm": 0.0, - "learning_rate": 1.4063173791573528e-05, - "loss": 0.9512, + "learning_rate": 9.426892117069741e-06, + "loss": 0.9737, "step": 13615 }, { - "epoch": 0.3858426138456743, + "epoch": 0.5327490413960404, "grad_norm": 0.0, - "learning_rate": 1.4062335161031512e-05, - "loss": 1.0017, + "learning_rate": 9.42562697340003e-06, + "loss": 1.124, "step": 13616 }, { - "epoch": 0.38587095128793675, + "epoch": 0.5327881680882698, "grad_norm": 0.0, - "learning_rate": 1.4061496496270944e-05, - "loss": 0.9334, + "learning_rate": 9.42436183895403e-06, + "loss": 1.1382, "step": 13617 }, { - "epoch": 0.3858992887301992, + "epoch": 0.5328272947804993, "grad_norm": 0.0, - "learning_rate": 1.4060657797298876e-05, - "loss": 0.9239, + "learning_rate": 9.423096713752054e-06, + "loss": 0.9677, "step": 13618 }, { - "epoch": 0.3859276261724617, + "epoch": 0.5328664214727287, "grad_norm": 0.0, - "learning_rate": 1.4059819064122382e-05, - "loss": 0.9151, + "learning_rate": 9.421831597814424e-06, + "loss": 0.9651, "step": 13619 }, { - "epoch": 0.3859559636147241, + "epoch": 0.5329055481649582, "grad_norm": 0.0, - "learning_rate": 1.4058980296748526e-05, - "loss": 0.9656, + "learning_rate": 9.420566491161456e-06, + "loss": 1.0671, "step": 13620 }, { - "epoch": 0.3859843010569866, + "epoch": 0.5329446748571876, "grad_norm": 0.0, - "learning_rate": 1.4058141495184369e-05, - "loss": 0.9133, + "learning_rate": 9.419301393813463e-06, + "loss": 1.0661, "step": 13621 }, { - "epoch": 0.38601263849924905, + "epoch": 0.532983801549417, "grad_norm": 0.0, - "learning_rate": 1.4057302659436981e-05, - "loss": 0.8901, + "learning_rate": 9.418036305790763e-06, + "loss": 1.0079, "step": 13622 }, { - "epoch": 0.38604097594151154, + "epoch": 0.5330229282416464, "grad_norm": 0.0, - "learning_rate": 1.4056463789513425e-05, - "loss": 0.8901, + "learning_rate": 9.416771227113665e-06, + "loss": 1.0992, "step": 13623 }, { - "epoch": 0.386069313383774, + "epoch": 0.5330620549338759, "grad_norm": 0.0, - "learning_rate": 1.405562488542077e-05, - "loss": 0.934, + "learning_rate": 9.415506157802497e-06, + "loss": 1.0562, "step": 13624 }, { - "epoch": 0.3860976508260364, + "epoch": 0.5331011816261053, "grad_norm": 0.0, - "learning_rate": 1.4054785947166079e-05, - "loss": 0.8967, + "learning_rate": 9.414241097877565e-06, + "loss": 1.0333, "step": 13625 }, { - "epoch": 0.3861259882682989, + "epoch": 0.5331403083183348, "grad_norm": 0.0, - "learning_rate": 1.405394697475642e-05, - "loss": 0.9272, + "learning_rate": 9.41297604735919e-06, + "loss": 0.888, "step": 13626 }, { - "epoch": 0.38615432571056135, + "epoch": 0.5331794350105642, "grad_norm": 0.0, - "learning_rate": 1.4053107968198862e-05, - "loss": 0.8993, + "learning_rate": 9.411711006267676e-06, + "loss": 1.0673, "step": 13627 }, { - "epoch": 0.38618266315282385, + "epoch": 0.5332185617027937, "grad_norm": 0.0, - "learning_rate": 1.405226892750047e-05, - "loss": 0.8665, + "learning_rate": 9.410445974623353e-06, + "loss": 1.0023, "step": 13628 }, { - "epoch": 0.3862110005950863, + "epoch": 0.5332576883950231, "grad_norm": 0.0, - "learning_rate": 1.4051429852668312e-05, - "loss": 1.0268, + "learning_rate": 9.409180952446528e-06, + "loss": 1.0005, "step": 13629 }, { - "epoch": 0.3862393380373487, + "epoch": 0.5332968150872526, "grad_norm": 0.0, - "learning_rate": 1.4050590743709456e-05, - "loss": 0.9514, + "learning_rate": 9.407915939757516e-06, + "loss": 1.0672, "step": 13630 }, { - "epoch": 0.3862676754796112, + "epoch": 0.533335941779482, "grad_norm": 0.0, - "learning_rate": 1.4049751600630968e-05, - "loss": 1.025, + "learning_rate": 9.40665093657663e-06, + "loss": 0.9802, "step": 13631 }, { - "epoch": 0.38629601292187365, + "epoch": 0.5333750684717115, "grad_norm": 0.0, - "learning_rate": 1.4048912423439917e-05, - "loss": 0.8795, + "learning_rate": 9.405385942924189e-06, + "loss": 0.9369, "step": 13632 }, { - "epoch": 0.38632435036413615, + "epoch": 0.5334141951639408, "grad_norm": 0.0, - "learning_rate": 1.4048073212143379e-05, - "loss": 0.8649, + "learning_rate": 9.404120958820505e-06, + "loss": 0.9611, "step": 13633 }, { - "epoch": 0.3863526878063986, + "epoch": 0.5334533218561702, "grad_norm": 0.0, - "learning_rate": 1.4047233966748415e-05, - "loss": 0.8284, + "learning_rate": 9.402855984285891e-06, + "loss": 0.9766, "step": 13634 }, { - "epoch": 0.3863810252486611, + "epoch": 0.5334924485483997, "grad_norm": 0.0, - "learning_rate": 1.4046394687262095e-05, - "loss": 0.8531, + "learning_rate": 9.401591019340663e-06, + "loss": 1.0134, "step": 13635 }, { - "epoch": 0.3864093626909235, + "epoch": 0.5335315752406291, "grad_norm": 0.0, - "learning_rate": 1.404555537369149e-05, - "loss": 1.0927, + "learning_rate": 9.400326064005128e-06, + "loss": 1.0601, "step": 13636 }, { - "epoch": 0.38643770013318596, + "epoch": 0.5335707019328586, "grad_norm": 0.0, - "learning_rate": 1.404471602604367e-05, - "loss": 0.9728, + "learning_rate": 9.39906111829961e-06, + "loss": 0.9261, "step": 13637 }, { - "epoch": 0.38646603757544845, + "epoch": 0.533609828625088, "grad_norm": 0.0, - "learning_rate": 1.4043876644325705e-05, - "loss": 0.931, + "learning_rate": 9.397796182244416e-06, + "loss": 1.1107, "step": 13638 }, { - "epoch": 0.3864943750177109, + "epoch": 0.5336489553173175, "grad_norm": 0.0, - "learning_rate": 1.4043037228544667e-05, - "loss": 0.9721, + "learning_rate": 9.396531255859863e-06, + "loss": 1.0085, "step": 13639 }, { - "epoch": 0.3865227124599734, + "epoch": 0.5336880820095469, "grad_norm": 0.0, - "learning_rate": 1.4042197778707622e-05, - "loss": 0.9466, + "learning_rate": 9.395266339166256e-06, + "loss": 0.9876, "step": 13640 }, { - "epoch": 0.3865510499022358, + "epoch": 0.5337272087017764, "grad_norm": 0.0, - "learning_rate": 1.4041358294821646e-05, - "loss": 0.9527, + "learning_rate": 9.394001432183919e-06, + "loss": 1.0137, "step": 13641 }, { - "epoch": 0.38657938734449826, + "epoch": 0.5337663353940058, "grad_norm": 0.0, - "learning_rate": 1.404051877689381e-05, - "loss": 0.9292, + "learning_rate": 9.392736534933159e-06, + "loss": 0.9056, "step": 13642 }, { - "epoch": 0.38660772478676075, + "epoch": 0.5338054620862352, "grad_norm": 0.0, - "learning_rate": 1.4039679224931183e-05, - "loss": 0.9655, + "learning_rate": 9.391471647434289e-06, + "loss": 0.9743, "step": 13643 }, { - "epoch": 0.3866360622290232, + "epoch": 0.5338445887784646, "grad_norm": 0.0, - "learning_rate": 1.4038839638940835e-05, - "loss": 0.9444, + "learning_rate": 9.390206769707623e-06, + "loss": 1.0939, "step": 13644 }, { - "epoch": 0.3866643996712857, + "epoch": 0.5338837154706941, "grad_norm": 0.0, - "learning_rate": 1.403800001892984e-05, - "loss": 1.0105, + "learning_rate": 9.38894190177347e-06, + "loss": 0.9458, "step": 13645 }, { - "epoch": 0.3866927371135481, + "epoch": 0.5339228421629235, "grad_norm": 0.0, - "learning_rate": 1.4037160364905276e-05, - "loss": 0.8611, + "learning_rate": 9.387677043652141e-06, + "loss": 0.9635, "step": 13646 }, { - "epoch": 0.3867210745558106, + "epoch": 0.533961968855153, "grad_norm": 0.0, - "learning_rate": 1.403632067687421e-05, - "loss": 0.8508, + "learning_rate": 9.386412195363958e-06, + "loss": 0.9268, "step": 13647 }, { - "epoch": 0.38674941199807306, + "epoch": 0.5340010955473824, "grad_norm": 0.0, - "learning_rate": 1.4035480954843714e-05, - "loss": 1.009, + "learning_rate": 9.385147356929224e-06, + "loss": 1.0383, "step": 13648 }, { - "epoch": 0.3867777494403355, + "epoch": 0.5340402222396119, "grad_norm": 0.0, - "learning_rate": 1.4034641198820866e-05, - "loss": 0.9417, + "learning_rate": 9.38388252836825e-06, + "loss": 0.9886, "step": 13649 }, { - "epoch": 0.386806086882598, + "epoch": 0.5340793489318413, "grad_norm": 0.0, - "learning_rate": 1.4033801408812738e-05, - "loss": 0.9733, + "learning_rate": 9.382617709701355e-06, + "loss": 1.0131, "step": 13650 }, { - "epoch": 0.3868344243248604, + "epoch": 0.5341184756240708, "grad_norm": 0.0, - "learning_rate": 1.4032961584826396e-05, - "loss": 0.9963, + "learning_rate": 9.381352900948844e-06, + "loss": 1.0201, "step": 13651 }, { - "epoch": 0.3868627617671229, + "epoch": 0.5341576023163002, "grad_norm": 0.0, - "learning_rate": 1.4032121726868926e-05, - "loss": 0.8775, + "learning_rate": 9.38008810213103e-06, + "loss": 1.0999, "step": 13652 }, { - "epoch": 0.38689109920938536, + "epoch": 0.5341967290085297, "grad_norm": 0.0, - "learning_rate": 1.4031281834947397e-05, - "loss": 0.9271, + "learning_rate": 9.378823313268226e-06, + "loss": 1.0101, "step": 13653 }, { - "epoch": 0.3869194366516478, + "epoch": 0.534235855700759, "grad_norm": 0.0, - "learning_rate": 1.4030441909068886e-05, - "loss": 0.7835, + "learning_rate": 9.377558534380737e-06, + "loss": 1.1396, "step": 13654 }, { - "epoch": 0.3869477740939103, + "epoch": 0.5342749823929885, "grad_norm": 0.0, - "learning_rate": 1.4029601949240464e-05, - "loss": 0.902, + "learning_rate": 9.376293765488882e-06, + "loss": 1.0915, "step": 13655 }, { - "epoch": 0.38697611153617273, + "epoch": 0.5343141090852179, "grad_norm": 0.0, - "learning_rate": 1.4028761955469206e-05, - "loss": 0.9352, + "learning_rate": 9.375029006612966e-06, + "loss": 1.1273, "step": 13656 }, { - "epoch": 0.3870044489784352, + "epoch": 0.5343532357774474, "grad_norm": 0.0, - "learning_rate": 1.4027921927762193e-05, - "loss": 0.8247, + "learning_rate": 9.373764257773303e-06, + "loss": 0.9174, "step": 13657 }, { - "epoch": 0.38703278642069766, + "epoch": 0.5343923624696768, "grad_norm": 0.0, - "learning_rate": 1.4027081866126498e-05, - "loss": 0.9828, + "learning_rate": 9.372499518990197e-06, + "loss": 1.0577, "step": 13658 }, { - "epoch": 0.38706112386296015, + "epoch": 0.5344314891619063, "grad_norm": 0.0, - "learning_rate": 1.4026241770569198e-05, - "loss": 0.8597, + "learning_rate": 9.371234790283965e-06, + "loss": 1.0675, "step": 13659 }, { - "epoch": 0.3870894613052226, + "epoch": 0.5344706158541357, "grad_norm": 0.0, - "learning_rate": 1.4025401641097365e-05, - "loss": 0.9111, + "learning_rate": 9.369970071674916e-06, + "loss": 0.9195, "step": 13660 }, { - "epoch": 0.38711779874748503, + "epoch": 0.5345097425463652, "grad_norm": 0.0, - "learning_rate": 1.4024561477718081e-05, - "loss": 1.0266, + "learning_rate": 9.368705363183356e-06, + "loss": 1.036, "step": 13661 }, { - "epoch": 0.3871461361897475, + "epoch": 0.5345488692385946, "grad_norm": 0.0, - "learning_rate": 1.4023721280438423e-05, - "loss": 0.8994, + "learning_rate": 9.3674406648296e-06, + "loss": 1.0926, "step": 13662 }, { - "epoch": 0.38717447363200996, + "epoch": 0.534587995930824, "grad_norm": 0.0, - "learning_rate": 1.4022881049265465e-05, - "loss": 0.946, + "learning_rate": 9.366175976633949e-06, + "loss": 0.9832, "step": 13663 }, { - "epoch": 0.38720281107427246, + "epoch": 0.5346271226230535, "grad_norm": 0.0, - "learning_rate": 1.4022040784206284e-05, - "loss": 0.868, + "learning_rate": 9.36491129861672e-06, + "loss": 1.0529, "step": 13664 }, { - "epoch": 0.3872311485165349, + "epoch": 0.5346662493152828, "grad_norm": 0.0, - "learning_rate": 1.4021200485267961e-05, - "loss": 0.9808, + "learning_rate": 9.363646630798221e-06, + "loss": 1.0908, "step": 13665 }, { - "epoch": 0.38725948595879733, + "epoch": 0.5347053760075123, "grad_norm": 0.0, - "learning_rate": 1.4020360152457575e-05, - "loss": 0.9437, + "learning_rate": 9.36238197319876e-06, + "loss": 1.0123, "step": 13666 }, { - "epoch": 0.3872878234010598, + "epoch": 0.5347445026997417, "grad_norm": 0.0, - "learning_rate": 1.4019519785782201e-05, - "loss": 0.9401, + "learning_rate": 9.36111732583864e-06, + "loss": 0.9594, "step": 13667 }, { - "epoch": 0.38731616084332227, + "epoch": 0.5347836293919712, "grad_norm": 0.0, - "learning_rate": 1.401867938524892e-05, - "loss": 0.9012, + "learning_rate": 9.35985268873818e-06, + "loss": 1.0404, "step": 13668 }, { - "epoch": 0.38734449828558476, + "epoch": 0.5348227560842006, "grad_norm": 0.0, - "learning_rate": 1.4017838950864808e-05, - "loss": 1.0231, + "learning_rate": 9.358588061917684e-06, + "loss": 1.0403, "step": 13669 }, { - "epoch": 0.3873728357278472, + "epoch": 0.5348618827764301, "grad_norm": 0.0, - "learning_rate": 1.401699848263695e-05, - "loss": 0.9781, + "learning_rate": 9.35732344539746e-06, + "loss": 1.1019, "step": 13670 }, { - "epoch": 0.3874011731701097, + "epoch": 0.5349010094686595, "grad_norm": 0.0, - "learning_rate": 1.4016157980572418e-05, - "loss": 1.0325, + "learning_rate": 9.356058839197816e-06, + "loss": 0.9508, "step": 13671 }, { - "epoch": 0.38742951061237213, + "epoch": 0.534940136160889, "grad_norm": 0.0, - "learning_rate": 1.40153174446783e-05, - "loss": 0.9737, + "learning_rate": 9.354794243339056e-06, + "loss": 1.0029, "step": 13672 }, { - "epoch": 0.38745784805463457, + "epoch": 0.5349792628531184, "grad_norm": 0.0, - "learning_rate": 1.4014476874961669e-05, - "loss": 0.8338, + "learning_rate": 9.353529657841497e-06, + "loss": 1.084, "step": 13673 }, { - "epoch": 0.38748618549689706, + "epoch": 0.5350183895453479, "grad_norm": 0.0, - "learning_rate": 1.4013636271429612e-05, - "loss": 0.8778, + "learning_rate": 9.35226508272544e-06, + "loss": 0.937, "step": 13674 }, { - "epoch": 0.3875145229391595, + "epoch": 0.5350575162375772, "grad_norm": 0.0, - "learning_rate": 1.4012795634089205e-05, - "loss": 0.874, + "learning_rate": 9.351000518011196e-06, + "loss": 1.0872, "step": 13675 }, { - "epoch": 0.387542860381422, + "epoch": 0.5350966429298067, "grad_norm": 0.0, - "learning_rate": 1.4011954962947529e-05, - "loss": 1.0082, + "learning_rate": 9.349735963719065e-06, + "loss": 0.9786, "step": 13676 }, { - "epoch": 0.38757119782368443, + "epoch": 0.5351357696220361, "grad_norm": 0.0, - "learning_rate": 1.4011114258011667e-05, - "loss": 0.8958, + "learning_rate": 9.348471419869364e-06, + "loss": 1.0353, "step": 13677 }, { - "epoch": 0.38759953526594687, + "epoch": 0.5351748963142656, "grad_norm": 0.0, - "learning_rate": 1.4010273519288698e-05, - "loss": 0.797, + "learning_rate": 9.347206886482394e-06, + "loss": 1.0739, "step": 13678 }, { - "epoch": 0.38762787270820936, + "epoch": 0.535214023006495, "grad_norm": 0.0, - "learning_rate": 1.400943274678571e-05, - "loss": 1.1114, + "learning_rate": 9.345942363578467e-06, + "loss": 1.0254, "step": 13679 }, { - "epoch": 0.3876562101504718, + "epoch": 0.5352531496987245, "grad_norm": 0.0, - "learning_rate": 1.400859194050978e-05, - "loss": 0.8456, + "learning_rate": 9.344677851177884e-06, + "loss": 0.9784, "step": 13680 }, { - "epoch": 0.3876845475927343, + "epoch": 0.5352922763909539, "grad_norm": 0.0, - "learning_rate": 1.4007751100467988e-05, - "loss": 0.9935, + "learning_rate": 9.343413349300948e-06, + "loss": 1.0517, "step": 13681 }, { - "epoch": 0.38771288503499673, + "epoch": 0.5353314030831834, "grad_norm": 0.0, - "learning_rate": 1.4006910226667425e-05, - "loss": 0.7843, + "learning_rate": 9.342148857967978e-06, + "loss": 1.0021, "step": 13682 }, { - "epoch": 0.38774122247725923, + "epoch": 0.5353705297754128, "grad_norm": 0.0, - "learning_rate": 1.4006069319115168e-05, - "loss": 1.0035, + "learning_rate": 9.34088437719927e-06, + "loss": 1.1639, "step": 13683 }, { - "epoch": 0.38776955991952167, + "epoch": 0.5354096564676423, "grad_norm": 0.0, - "learning_rate": 1.4005228377818298e-05, - "loss": 0.9869, + "learning_rate": 9.339619907015135e-06, + "loss": 1.0463, "step": 13684 }, { - "epoch": 0.3877978973617841, + "epoch": 0.5354487831598717, "grad_norm": 0.0, - "learning_rate": 1.4004387402783906e-05, - "loss": 0.8164, + "learning_rate": 9.338355447435871e-06, + "loss": 1.054, "step": 13685 }, { - "epoch": 0.3878262348040466, + "epoch": 0.5354879098521012, "grad_norm": 0.0, - "learning_rate": 1.4003546394019071e-05, - "loss": 0.9493, + "learning_rate": 9.337090998481796e-06, + "loss": 1.0684, "step": 13686 }, { - "epoch": 0.38785457224630904, + "epoch": 0.5355270365443305, "grad_norm": 0.0, - "learning_rate": 1.4002705351530878e-05, - "loss": 0.9518, + "learning_rate": 9.335826560173207e-06, + "loss": 1.1258, "step": 13687 }, { - "epoch": 0.38788290968857153, + "epoch": 0.53556616323656, "grad_norm": 0.0, - "learning_rate": 1.4001864275326412e-05, - "loss": 1.0576, + "learning_rate": 9.334562132530412e-06, + "loss": 1.074, "step": 13688 }, { - "epoch": 0.38791124713083397, + "epoch": 0.5356052899287894, "grad_norm": 0.0, - "learning_rate": 1.4001023165412754e-05, - "loss": 0.9084, + "learning_rate": 9.333297715573713e-06, + "loss": 1.1237, "step": 13689 }, { - "epoch": 0.3879395845730964, + "epoch": 0.5356444166210188, "grad_norm": 0.0, - "learning_rate": 1.4000182021796995e-05, - "loss": 0.9953, + "learning_rate": 9.33203330932342e-06, + "loss": 1.0502, "step": 13690 }, { - "epoch": 0.3879679220153589, + "epoch": 0.5356835433132483, "grad_norm": 0.0, - "learning_rate": 1.3999340844486218e-05, - "loss": 0.9043, + "learning_rate": 9.330768913799831e-06, + "loss": 1.0811, "step": 13691 }, { - "epoch": 0.38799625945762134, + "epoch": 0.5357226700054777, "grad_norm": 0.0, - "learning_rate": 1.3998499633487509e-05, - "loss": 0.9038, + "learning_rate": 9.329504529023259e-06, + "loss": 1.1392, "step": 13692 }, { - "epoch": 0.38802459689988383, + "epoch": 0.5357617966977072, "grad_norm": 0.0, - "learning_rate": 1.3997658388807948e-05, - "loss": 0.9988, + "learning_rate": 9.328240155014001e-06, + "loss": 1.0505, "step": 13693 }, { - "epoch": 0.38805293434214627, + "epoch": 0.5358009233899366, "grad_norm": 0.0, - "learning_rate": 1.3996817110454627e-05, - "loss": 0.8049, + "learning_rate": 9.326975791792366e-06, + "loss": 1.0159, "step": 13694 }, { - "epoch": 0.38808127178440877, + "epoch": 0.5358400500821661, "grad_norm": 0.0, - "learning_rate": 1.3995975798434636e-05, - "loss": 0.8892, + "learning_rate": 9.325711439378658e-06, + "loss": 1.0266, "step": 13695 }, { - "epoch": 0.3881096092266712, + "epoch": 0.5358791767743954, "grad_norm": 0.0, - "learning_rate": 1.3995134452755055e-05, - "loss": 0.99, + "learning_rate": 9.324447097793174e-06, + "loss": 1.0518, "step": 13696 }, { - "epoch": 0.38813794666893364, + "epoch": 0.535918303466625, "grad_norm": 0.0, - "learning_rate": 1.399429307342297e-05, - "loss": 1.0135, + "learning_rate": 9.323182767056228e-06, + "loss": 1.0317, "step": 13697 }, { - "epoch": 0.38816628411119614, + "epoch": 0.5359574301588543, "grad_norm": 0.0, - "learning_rate": 1.3993451660445472e-05, - "loss": 0.9403, + "learning_rate": 9.321918447188116e-06, + "loss": 1.1213, "step": 13698 }, { - "epoch": 0.3881946215534586, + "epoch": 0.5359965568510838, "grad_norm": 0.0, - "learning_rate": 1.3992610213829649e-05, - "loss": 0.8184, + "learning_rate": 9.320654138209146e-06, + "loss": 0.9915, "step": 13699 }, { - "epoch": 0.38822295899572107, + "epoch": 0.5360356835433132, "grad_norm": 0.0, - "learning_rate": 1.3991768733582589e-05, - "loss": 0.9822, + "learning_rate": 9.31938984013962e-06, + "loss": 1.0435, "step": 13700 }, { - "epoch": 0.3882512964379835, + "epoch": 0.5360748102355427, "grad_norm": 0.0, - "learning_rate": 1.3990927219711377e-05, - "loss": 0.9311, + "learning_rate": 9.318125552999839e-06, + "loss": 1.0106, "step": 13701 }, { - "epoch": 0.38827963388024594, + "epoch": 0.5361139369277721, "grad_norm": 0.0, - "learning_rate": 1.3990085672223102e-05, - "loss": 0.9012, + "learning_rate": 9.316861276810105e-06, + "loss": 1.0161, "step": 13702 }, { - "epoch": 0.38830797132250844, + "epoch": 0.5361530636200016, "grad_norm": 0.0, - "learning_rate": 1.3989244091124853e-05, - "loss": 0.9263, + "learning_rate": 9.315597011590724e-06, + "loss": 0.9383, "step": 13703 }, { - "epoch": 0.3883363087647709, + "epoch": 0.536192190312231, "grad_norm": 0.0, - "learning_rate": 1.3988402476423722e-05, - "loss": 0.8907, + "learning_rate": 9.314332757361998e-06, + "loss": 1.0061, "step": 13704 }, { - "epoch": 0.38836464620703337, + "epoch": 0.5362313170044605, "grad_norm": 0.0, - "learning_rate": 1.3987560828126796e-05, - "loss": 0.8995, + "learning_rate": 9.313068514144232e-06, + "loss": 1.1442, "step": 13705 }, { - "epoch": 0.3883929836492958, + "epoch": 0.5362704436966899, "grad_norm": 0.0, - "learning_rate": 1.3986719146241163e-05, - "loss": 0.96, + "learning_rate": 9.31180428195772e-06, + "loss": 1.0478, "step": 13706 }, { - "epoch": 0.3884213210915583, + "epoch": 0.5363095703889194, "grad_norm": 0.0, - "learning_rate": 1.3985877430773916e-05, - "loss": 1.0428, + "learning_rate": 9.310540060822769e-06, + "loss": 1.0141, "step": 13707 }, { - "epoch": 0.38844965853382074, + "epoch": 0.5363486970811487, "grad_norm": 0.0, - "learning_rate": 1.3985035681732141e-05, - "loss": 0.8971, + "learning_rate": 9.309275850759683e-06, + "loss": 0.9849, "step": 13708 }, { - "epoch": 0.3884779959760832, + "epoch": 0.5363878237733782, "grad_norm": 0.0, - "learning_rate": 1.3984193899122932e-05, - "loss": 0.9667, + "learning_rate": 9.308011651788763e-06, + "loss": 1.0684, "step": 13709 }, { - "epoch": 0.3885063334183457, + "epoch": 0.5364269504656076, "grad_norm": 0.0, - "learning_rate": 1.3983352082953378e-05, - "loss": 1.0591, + "learning_rate": 9.306747463930307e-06, + "loss": 1.0515, "step": 13710 }, { - "epoch": 0.3885346708606081, + "epoch": 0.5364660771578371, "grad_norm": 0.0, - "learning_rate": 1.3982510233230569e-05, - "loss": 0.975, + "learning_rate": 9.305483287204618e-06, + "loss": 1.0223, "step": 13711 }, { - "epoch": 0.3885630083028706, + "epoch": 0.5365052038500665, "grad_norm": 0.0, - "learning_rate": 1.3981668349961599e-05, - "loss": 0.8703, + "learning_rate": 9.304219121631993e-06, + "loss": 1.064, "step": 13712 }, { - "epoch": 0.38859134574513304, + "epoch": 0.536544330542296, "grad_norm": 0.0, - "learning_rate": 1.3980826433153558e-05, - "loss": 0.8595, + "learning_rate": 9.302954967232741e-06, + "loss": 0.9938, "step": 13713 }, { - "epoch": 0.3886196831873955, + "epoch": 0.5365834572345254, "grad_norm": 0.0, - "learning_rate": 1.3979984482813538e-05, - "loss": 0.9631, + "learning_rate": 9.30169082402716e-06, + "loss": 1.1079, "step": 13714 }, { - "epoch": 0.388648020629658, + "epoch": 0.5366225839267549, "grad_norm": 0.0, - "learning_rate": 1.397914249894863e-05, - "loss": 0.9213, + "learning_rate": 9.30042669203555e-06, + "loss": 1.0517, "step": 13715 }, { - "epoch": 0.3886763580719204, + "epoch": 0.5366617106189843, "grad_norm": 0.0, - "learning_rate": 1.3978300481565928e-05, - "loss": 0.8781, + "learning_rate": 9.299162571278203e-06, + "loss": 0.9772, "step": 13716 }, { - "epoch": 0.3887046955141829, + "epoch": 0.5367008373112138, "grad_norm": 0.0, - "learning_rate": 1.3977458430672521e-05, - "loss": 0.9669, + "learning_rate": 9.297898461775435e-06, + "loss": 1.1067, "step": 13717 }, { - "epoch": 0.38873303295644535, + "epoch": 0.5367399640034431, "grad_norm": 0.0, - "learning_rate": 1.3976616346275505e-05, - "loss": 0.8518, + "learning_rate": 9.296634363547535e-06, + "loss": 0.9614, "step": 13718 }, { - "epoch": 0.38876137039870784, + "epoch": 0.5367790906956725, "grad_norm": 0.0, - "learning_rate": 1.3975774228381975e-05, - "loss": 0.9113, + "learning_rate": 9.295370276614806e-06, + "loss": 1.0656, "step": 13719 }, { - "epoch": 0.3887897078409703, + "epoch": 0.536818217387902, "grad_norm": 0.0, - "learning_rate": 1.3974932076999023e-05, - "loss": 1.0074, + "learning_rate": 9.294106200997548e-06, + "loss": 1.0857, "step": 13720 }, { - "epoch": 0.3888180452832327, + "epoch": 0.5368573440801314, "grad_norm": 0.0, - "learning_rate": 1.3974089892133742e-05, - "loss": 0.9429, + "learning_rate": 9.292842136716058e-06, + "loss": 0.9975, "step": 13721 }, { - "epoch": 0.3888463827254952, + "epoch": 0.5368964707723609, "grad_norm": 0.0, - "learning_rate": 1.3973247673793226e-05, - "loss": 0.9314, + "learning_rate": 9.29157808379064e-06, + "loss": 0.9545, "step": 13722 }, { - "epoch": 0.38887472016775765, + "epoch": 0.5369355974645903, "grad_norm": 0.0, - "learning_rate": 1.3972405421984568e-05, - "loss": 0.9099, + "learning_rate": 9.290314042241589e-06, + "loss": 1.0452, "step": 13723 }, { - "epoch": 0.38890305761002014, + "epoch": 0.5369747241568198, "grad_norm": 0.0, - "learning_rate": 1.397156313671486e-05, - "loss": 0.9226, + "learning_rate": 9.289050012089205e-06, + "loss": 1.0679, "step": 13724 }, { - "epoch": 0.3889313950522826, + "epoch": 0.5370138508490492, "grad_norm": 0.0, - "learning_rate": 1.3970720817991208e-05, - "loss": 0.9129, + "learning_rate": 9.287785993353784e-06, + "loss": 0.9583, "step": 13725 }, { - "epoch": 0.388959732494545, + "epoch": 0.5370529775412787, "grad_norm": 0.0, - "learning_rate": 1.3969878465820697e-05, - "loss": 1.006, + "learning_rate": 9.28652198605563e-06, + "loss": 1.0697, "step": 13726 }, { - "epoch": 0.3889880699368075, + "epoch": 0.5370921042335081, "grad_norm": 0.0, - "learning_rate": 1.3969036080210425e-05, - "loss": 0.9505, + "learning_rate": 9.28525799021504e-06, + "loss": 1.1141, "step": 13727 }, { - "epoch": 0.38901640737906995, + "epoch": 0.5371312309257376, "grad_norm": 0.0, - "learning_rate": 1.396819366116749e-05, - "loss": 0.956, + "learning_rate": 9.283994005852313e-06, + "loss": 0.9716, "step": 13728 }, { - "epoch": 0.38904474482133244, + "epoch": 0.5371703576179669, "grad_norm": 0.0, - "learning_rate": 1.3967351208698985e-05, - "loss": 0.9182, + "learning_rate": 9.282730032987743e-06, + "loss": 0.9705, "step": 13729 }, { - "epoch": 0.3890730822635949, + "epoch": 0.5372094843101964, "grad_norm": 0.0, - "learning_rate": 1.3966508722812009e-05, - "loss": 1.0106, + "learning_rate": 9.281466071641624e-06, + "loss": 0.8954, "step": 13730 }, { - "epoch": 0.3891014197058574, + "epoch": 0.5372486110024258, "grad_norm": 0.0, - "learning_rate": 1.3965666203513653e-05, - "loss": 0.8907, + "learning_rate": 9.280202121834268e-06, + "loss": 1.0353, "step": 13731 }, { - "epoch": 0.3891297571481198, + "epoch": 0.5372877376946553, "grad_norm": 0.0, - "learning_rate": 1.3964823650811021e-05, - "loss": 0.9812, + "learning_rate": 9.27893818358596e-06, + "loss": 1.0075, "step": 13732 }, { - "epoch": 0.38915809459038225, + "epoch": 0.5373268643868847, "grad_norm": 0.0, - "learning_rate": 1.396398106471121e-05, - "loss": 0.9588, + "learning_rate": 9.277674256917004e-06, + "loss": 1.0808, "step": 13733 }, { - "epoch": 0.38918643203264475, + "epoch": 0.5373659910791142, "grad_norm": 0.0, - "learning_rate": 1.3963138445221311e-05, - "loss": 0.9173, + "learning_rate": 9.27641034184769e-06, + "loss": 1.0539, "step": 13734 }, { - "epoch": 0.3892147694749072, + "epoch": 0.5374051177713436, "grad_norm": 0.0, - "learning_rate": 1.3962295792348424e-05, - "loss": 0.8969, + "learning_rate": 9.275146438398322e-06, + "loss": 1.0207, "step": 13735 }, { - "epoch": 0.3892431069171697, + "epoch": 0.5374442444635731, "grad_norm": 0.0, - "learning_rate": 1.396145310609965e-05, - "loss": 0.9332, + "learning_rate": 9.273882546589194e-06, + "loss": 1.1016, "step": 13736 }, { - "epoch": 0.3892714443594321, + "epoch": 0.5374833711558025, "grad_norm": 0.0, - "learning_rate": 1.3960610386482085e-05, - "loss": 0.8614, + "learning_rate": 9.2726186664406e-06, + "loss": 0.9505, "step": 13737 }, { - "epoch": 0.38929978180169456, + "epoch": 0.537522497848032, "grad_norm": 0.0, - "learning_rate": 1.3959767633502827e-05, - "loss": 0.8787, + "learning_rate": 9.271354797972841e-06, + "loss": 0.9882, "step": 13738 }, { - "epoch": 0.38932811924395705, + "epoch": 0.5375616245402614, "grad_norm": 0.0, - "learning_rate": 1.3958924847168977e-05, - "loss": 0.8562, + "learning_rate": 9.270090941206211e-06, + "loss": 1.0784, "step": 13739 }, { - "epoch": 0.3893564566862195, + "epoch": 0.5376007512324908, "grad_norm": 0.0, - "learning_rate": 1.3958082027487634e-05, - "loss": 0.8196, + "learning_rate": 9.268827096161007e-06, + "loss": 0.9548, "step": 13740 }, { - "epoch": 0.389384794128482, + "epoch": 0.5376398779247202, "grad_norm": 0.0, - "learning_rate": 1.3957239174465898e-05, - "loss": 0.8618, + "learning_rate": 9.26756326285752e-06, + "loss": 1.0412, "step": 13741 }, { - "epoch": 0.3894131315707444, + "epoch": 0.5376790046169497, "grad_norm": 0.0, - "learning_rate": 1.395639628811086e-05, - "loss": 0.9422, + "learning_rate": 9.266299441316053e-06, + "loss": 0.9771, "step": 13742 }, { - "epoch": 0.3894414690130069, + "epoch": 0.5377181313091791, "grad_norm": 0.0, - "learning_rate": 1.395555336842963e-05, - "loss": 0.9406, + "learning_rate": 9.265035631556894e-06, + "loss": 0.9653, "step": 13743 }, { - "epoch": 0.38946980645526935, + "epoch": 0.5377572580014086, "grad_norm": 0.0, - "learning_rate": 1.3954710415429307e-05, - "loss": 0.9426, + "learning_rate": 9.263771833600345e-06, + "loss": 0.9927, "step": 13744 }, { - "epoch": 0.3894981438975318, + "epoch": 0.537796384693638, "grad_norm": 0.0, - "learning_rate": 1.3953867429116991e-05, - "loss": 0.9608, + "learning_rate": 9.262508047466698e-06, + "loss": 1.0782, "step": 13745 }, { - "epoch": 0.3895264813397943, + "epoch": 0.5378355113858675, "grad_norm": 0.0, - "learning_rate": 1.395302440949978e-05, - "loss": 0.959, + "learning_rate": 9.261244273176246e-06, + "loss": 1.053, "step": 13746 }, { - "epoch": 0.3895548187820567, + "epoch": 0.5378746380780969, "grad_norm": 0.0, - "learning_rate": 1.3952181356584773e-05, - "loss": 0.922, + "learning_rate": 9.259980510749281e-06, + "loss": 1.0736, "step": 13747 }, { - "epoch": 0.3895831562243192, + "epoch": 0.5379137647703263, "grad_norm": 0.0, - "learning_rate": 1.395133827037908e-05, - "loss": 0.8969, + "learning_rate": 9.258716760206107e-06, + "loss": 1.0127, "step": 13748 }, { - "epoch": 0.38961149366658165, + "epoch": 0.5379528914625558, "grad_norm": 0.0, - "learning_rate": 1.3950495150889793e-05, - "loss": 0.8736, + "learning_rate": 9.257453021567013e-06, + "loss": 0.9558, "step": 13749 }, { - "epoch": 0.3896398311088441, + "epoch": 0.5379920181547851, "grad_norm": 0.0, - "learning_rate": 1.394965199812402e-05, - "loss": 0.9618, + "learning_rate": 9.256189294852294e-06, + "loss": 1.1007, "step": 13750 }, { - "epoch": 0.3896681685511066, + "epoch": 0.5380311448470146, "grad_norm": 0.0, - "learning_rate": 1.3948808812088863e-05, - "loss": 0.9999, + "learning_rate": 9.254925580082242e-06, + "loss": 1.1175, "step": 13751 }, { - "epoch": 0.389696505993369, + "epoch": 0.538070271539244, "grad_norm": 0.0, - "learning_rate": 1.394796559279142e-05, - "loss": 0.9297, + "learning_rate": 9.253661877277145e-06, + "loss": 1.0451, "step": 13752 }, { - "epoch": 0.3897248434356315, + "epoch": 0.5381093982314735, "grad_norm": 0.0, - "learning_rate": 1.39471223402388e-05, - "loss": 0.8285, + "learning_rate": 9.25239818645731e-06, + "loss": 1.0419, "step": 13753 }, { - "epoch": 0.38975318087789396, + "epoch": 0.5381485249237029, "grad_norm": 0.0, - "learning_rate": 1.3946279054438103e-05, - "loss": 0.9725, + "learning_rate": 9.251134507643022e-06, + "loss": 1.053, "step": 13754 }, { - "epoch": 0.38978151832015645, + "epoch": 0.5381876516159324, "grad_norm": 0.0, - "learning_rate": 1.3945435735396428e-05, - "loss": 0.8223, + "learning_rate": 9.249870840854576e-06, + "loss": 0.9881, "step": 13755 }, { - "epoch": 0.3898098557624189, + "epoch": 0.5382267783081618, "grad_norm": 0.0, - "learning_rate": 1.3944592383120885e-05, - "loss": 0.9855, + "learning_rate": 9.24860718611226e-06, + "loss": 1.2278, "step": 13756 }, { - "epoch": 0.3898381932046813, + "epoch": 0.5382659050003913, "grad_norm": 0.0, - "learning_rate": 1.3943748997618576e-05, - "loss": 0.825, + "learning_rate": 9.247343543436376e-06, + "loss": 1.1267, "step": 13757 }, { - "epoch": 0.3898665306469438, + "epoch": 0.5383050316926207, "grad_norm": 0.0, - "learning_rate": 1.3942905578896606e-05, - "loss": 1.0316, + "learning_rate": 9.246079912847211e-06, + "loss": 0.9176, "step": 13758 }, { - "epoch": 0.38989486808920626, + "epoch": 0.5383441583848502, "grad_norm": 0.0, - "learning_rate": 1.3942062126962078e-05, - "loss": 0.8985, + "learning_rate": 9.244816294365058e-06, + "loss": 1.0432, "step": 13759 }, { - "epoch": 0.38992320553146875, + "epoch": 0.5383832850770796, "grad_norm": 0.0, - "learning_rate": 1.3941218641822094e-05, - "loss": 0.9991, + "learning_rate": 9.243552688010209e-06, + "loss": 0.9976, "step": 13760 }, { - "epoch": 0.3899515429737312, + "epoch": 0.538422411769309, "grad_norm": 0.0, - "learning_rate": 1.3940375123483764e-05, - "loss": 0.9038, + "learning_rate": 9.24228909380295e-06, + "loss": 1.15, "step": 13761 }, { - "epoch": 0.38997988041599363, + "epoch": 0.5384615384615384, "grad_norm": 0.0, - "learning_rate": 1.393953157195419e-05, - "loss": 0.9136, + "learning_rate": 9.241025511763587e-06, + "loss": 1.0229, "step": 13762 }, { - "epoch": 0.3900082178582561, + "epoch": 0.5385006651537679, "grad_norm": 0.0, - "learning_rate": 1.393868798724048e-05, - "loss": 0.8847, + "learning_rate": 9.2397619419124e-06, + "loss": 0.9476, "step": 13763 }, { - "epoch": 0.39003655530051856, + "epoch": 0.5385397918459973, "grad_norm": 0.0, - "learning_rate": 1.3937844369349736e-05, - "loss": 1.0137, + "learning_rate": 9.238498384269684e-06, + "loss": 1.0185, "step": 13764 }, { - "epoch": 0.39006489274278106, + "epoch": 0.5385789185382268, "grad_norm": 0.0, - "learning_rate": 1.393700071828907e-05, - "loss": 0.9993, + "learning_rate": 9.237234838855725e-06, + "loss": 1.1294, "step": 13765 }, { - "epoch": 0.3900932301850435, + "epoch": 0.5386180452304562, "grad_norm": 0.0, - "learning_rate": 1.3936157034065583e-05, - "loss": 0.9804, + "learning_rate": 9.235971305690825e-06, + "loss": 1.0139, "step": 13766 }, { - "epoch": 0.39012156762730593, + "epoch": 0.5386571719226857, "grad_norm": 0.0, - "learning_rate": 1.3935313316686385e-05, - "loss": 0.8647, + "learning_rate": 9.234707784795266e-06, + "loss": 0.9816, "step": 13767 }, { - "epoch": 0.3901499050695684, + "epoch": 0.5386962986149151, "grad_norm": 0.0, - "learning_rate": 1.3934469566158579e-05, - "loss": 0.9755, + "learning_rate": 9.233444276189342e-06, + "loss": 1.0958, "step": 13768 }, { - "epoch": 0.39017824251183086, + "epoch": 0.5387354253071446, "grad_norm": 0.0, - "learning_rate": 1.3933625782489275e-05, - "loss": 0.8684, + "learning_rate": 9.232180779893343e-06, + "loss": 0.8993, "step": 13769 }, { - "epoch": 0.39020657995409336, + "epoch": 0.538774551999374, "grad_norm": 0.0, - "learning_rate": 1.3932781965685583e-05, - "loss": 0.9484, + "learning_rate": 9.230917295927553e-06, + "loss": 1.0141, "step": 13770 }, { - "epoch": 0.3902349173963558, + "epoch": 0.5388136786916035, "grad_norm": 0.0, - "learning_rate": 1.3931938115754606e-05, - "loss": 0.9286, + "learning_rate": 9.229653824312273e-06, + "loss": 1.0136, "step": 13771 }, { - "epoch": 0.3902632548386183, + "epoch": 0.5388528053838328, "grad_norm": 0.0, - "learning_rate": 1.3931094232703456e-05, - "loss": 0.9002, + "learning_rate": 9.228390365067787e-06, + "loss": 1.0901, "step": 13772 }, { - "epoch": 0.39029159228088073, + "epoch": 0.5388919320760623, "grad_norm": 0.0, - "learning_rate": 1.3930250316539237e-05, - "loss": 0.9959, + "learning_rate": 9.227126918214385e-06, + "loss": 0.9493, "step": 13773 }, { - "epoch": 0.39031992972314317, + "epoch": 0.5389310587682917, "grad_norm": 0.0, - "learning_rate": 1.3929406367269063e-05, - "loss": 0.9783, + "learning_rate": 9.22586348377235e-06, + "loss": 1.0197, "step": 13774 }, { - "epoch": 0.39034826716540566, + "epoch": 0.5389701854605212, "grad_norm": 0.0, - "learning_rate": 1.3928562384900037e-05, - "loss": 1.0173, + "learning_rate": 9.224600061761986e-06, + "loss": 0.901, "step": 13775 }, { - "epoch": 0.3903766046076681, + "epoch": 0.5390093121527506, "grad_norm": 0.0, - "learning_rate": 1.3927718369439274e-05, - "loss": 0.8376, + "learning_rate": 9.22333665220357e-06, + "loss": 1.0986, "step": 13776 }, { - "epoch": 0.3904049420499306, + "epoch": 0.53904843884498, "grad_norm": 0.0, - "learning_rate": 1.3926874320893878e-05, - "loss": 1.0363, + "learning_rate": 9.222073255117395e-06, + "loss": 0.9938, "step": 13777 }, { - "epoch": 0.39043327949219303, + "epoch": 0.5390875655372095, "grad_norm": 0.0, - "learning_rate": 1.3926030239270962e-05, - "loss": 0.7351, + "learning_rate": 9.220809870523749e-06, + "loss": 0.9299, "step": 13778 }, { - "epoch": 0.39046161693445547, + "epoch": 0.5391266922294389, "grad_norm": 0.0, - "learning_rate": 1.3925186124577639e-05, - "loss": 0.9918, + "learning_rate": 9.219546498442917e-06, + "loss": 1.1349, "step": 13779 }, { - "epoch": 0.39048995437671796, + "epoch": 0.5391658189216684, "grad_norm": 0.0, - "learning_rate": 1.3924341976821013e-05, - "loss": 0.829, + "learning_rate": 9.218283138895192e-06, + "loss": 1.0095, "step": 13780 }, { - "epoch": 0.3905182918189804, + "epoch": 0.5392049456138978, "grad_norm": 0.0, - "learning_rate": 1.3923497796008196e-05, - "loss": 0.8241, + "learning_rate": 9.21701979190086e-06, + "loss": 1.0012, "step": 13781 }, { - "epoch": 0.3905466292612429, + "epoch": 0.5392440723061273, "grad_norm": 0.0, - "learning_rate": 1.39226535821463e-05, - "loss": 0.9079, + "learning_rate": 9.21575645748021e-06, + "loss": 1.1046, "step": 13782 }, { - "epoch": 0.39057496670350533, + "epoch": 0.5392831989983566, "grad_norm": 0.0, - "learning_rate": 1.3921809335242438e-05, - "loss": 0.8628, + "learning_rate": 9.214493135653526e-06, + "loss": 0.9871, "step": 13783 }, { - "epoch": 0.3906033041457678, + "epoch": 0.5393223256905861, "grad_norm": 0.0, - "learning_rate": 1.3920965055303718e-05, - "loss": 1.0205, + "learning_rate": 9.213229826441103e-06, + "loss": 1.0946, "step": 13784 }, { - "epoch": 0.39063164158803027, + "epoch": 0.5393614523828155, "grad_norm": 0.0, - "learning_rate": 1.3920120742337255e-05, - "loss": 0.8557, + "learning_rate": 9.21196652986322e-06, + "loss": 1.1327, "step": 13785 }, { - "epoch": 0.3906599790302927, + "epoch": 0.539400579075045, "grad_norm": 0.0, - "learning_rate": 1.391927639635016e-05, - "loss": 0.9757, + "learning_rate": 9.210703245940166e-06, + "loss": 1.0066, "step": 13786 }, { - "epoch": 0.3906883164725552, + "epoch": 0.5394397057672744, "grad_norm": 0.0, - "learning_rate": 1.3918432017349543e-05, - "loss": 0.8494, + "learning_rate": 9.20943997469223e-06, + "loss": 1.1328, "step": 13787 }, { - "epoch": 0.39071665391481764, + "epoch": 0.5394788324595039, "grad_norm": 0.0, - "learning_rate": 1.3917587605342517e-05, - "loss": 0.9463, + "learning_rate": 9.208176716139698e-06, + "loss": 0.9021, "step": 13788 }, { - "epoch": 0.39074499135708013, + "epoch": 0.5395179591517333, "grad_norm": 0.0, - "learning_rate": 1.3916743160336197e-05, - "loss": 0.7977, + "learning_rate": 9.206913470302856e-06, + "loss": 1.1418, "step": 13789 }, { - "epoch": 0.39077332879934257, + "epoch": 0.5395570858439628, "grad_norm": 0.0, - "learning_rate": 1.3915898682337697e-05, - "loss": 0.8536, + "learning_rate": 9.205650237201989e-06, + "loss": 1.0627, "step": 13790 }, { - "epoch": 0.390801666241605, + "epoch": 0.5395962125361922, "grad_norm": 0.0, - "learning_rate": 1.3915054171354126e-05, - "loss": 0.8854, + "learning_rate": 9.204387016857384e-06, + "loss": 1.0664, "step": 13791 }, { - "epoch": 0.3908300036838675, + "epoch": 0.5396353392284217, "grad_norm": 0.0, - "learning_rate": 1.39142096273926e-05, - "loss": 0.956, + "learning_rate": 9.203123809289323e-06, + "loss": 1.2209, "step": 13792 }, { - "epoch": 0.39085834112612994, + "epoch": 0.539674465920651, "grad_norm": 0.0, - "learning_rate": 1.3913365050460232e-05, - "loss": 1.0444, + "learning_rate": 9.201860614518098e-06, + "loss": 1.1523, "step": 13793 }, { - "epoch": 0.39088667856839243, + "epoch": 0.5397135926128805, "grad_norm": 0.0, - "learning_rate": 1.3912520440564139e-05, - "loss": 0.7748, + "learning_rate": 9.200597432563993e-06, + "loss": 1.0886, "step": 13794 }, { - "epoch": 0.39091501601065487, + "epoch": 0.5397527193051099, "grad_norm": 0.0, - "learning_rate": 1.3911675797711431e-05, - "loss": 0.9086, + "learning_rate": 9.199334263447292e-06, + "loss": 1.0876, "step": 13795 }, { - "epoch": 0.39094335345291736, + "epoch": 0.5397918459973394, "grad_norm": 0.0, - "learning_rate": 1.3910831121909228e-05, - "loss": 0.9731, + "learning_rate": 9.198071107188274e-06, + "loss": 1.1551, "step": 13796 }, { - "epoch": 0.3909716908951798, + "epoch": 0.5398309726895688, "grad_norm": 0.0, - "learning_rate": 1.3909986413164637e-05, - "loss": 0.974, + "learning_rate": 9.196807963807234e-06, + "loss": 0.8626, "step": 13797 }, { - "epoch": 0.39100002833744224, + "epoch": 0.5398700993817983, "grad_norm": 0.0, - "learning_rate": 1.3909141671484785e-05, - "loss": 0.942, + "learning_rate": 9.195544833324452e-06, + "loss": 0.9677, "step": 13798 }, { - "epoch": 0.39102836577970473, + "epoch": 0.5399092260740277, "grad_norm": 0.0, - "learning_rate": 1.3908296896876778e-05, - "loss": 0.8351, + "learning_rate": 9.194281715760212e-06, + "loss": 1.0512, "step": 13799 }, { - "epoch": 0.3910567032219672, + "epoch": 0.5399483527662572, "grad_norm": 0.0, - "learning_rate": 1.3907452089347738e-05, - "loss": 0.9597, + "learning_rate": 9.193018611134796e-06, + "loss": 0.9995, "step": 13800 }, { - "epoch": 0.39108504066422967, + "epoch": 0.5399874794584866, "grad_norm": 0.0, - "learning_rate": 1.3906607248904773e-05, - "loss": 0.7528, + "learning_rate": 9.191755519468487e-06, + "loss": 0.8924, "step": 13801 }, { - "epoch": 0.3911133781064921, + "epoch": 0.5400266061507161, "grad_norm": 0.0, - "learning_rate": 1.3905762375555006e-05, - "loss": 1.1571, + "learning_rate": 9.190492440781576e-06, + "loss": 1.0669, "step": 13802 }, { - "epoch": 0.39114171554875454, + "epoch": 0.5400657328429455, "grad_norm": 0.0, - "learning_rate": 1.3904917469305555e-05, - "loss": 0.7882, + "learning_rate": 9.189229375094342e-06, + "loss": 1.0043, "step": 13803 }, { - "epoch": 0.39117005299101704, + "epoch": 0.5401048595351748, "grad_norm": 0.0, - "learning_rate": 1.3904072530163532e-05, - "loss": 0.9209, + "learning_rate": 9.187966322427068e-06, + "loss": 0.962, "step": 13804 }, { - "epoch": 0.3911983904332795, + "epoch": 0.5401439862274043, "grad_norm": 0.0, - "learning_rate": 1.3903227558136057e-05, - "loss": 0.9428, + "learning_rate": 9.186703282800033e-06, + "loss": 1.0222, "step": 13805 }, { - "epoch": 0.39122672787554197, + "epoch": 0.5401831129196337, "grad_norm": 0.0, - "learning_rate": 1.3902382553230245e-05, - "loss": 0.8962, + "learning_rate": 9.18544025623353e-06, + "loss": 0.9715, "step": 13806 }, { - "epoch": 0.3912550653178044, + "epoch": 0.5402222396118632, "grad_norm": 0.0, - "learning_rate": 1.3901537515453218e-05, - "loss": 0.9579, + "learning_rate": 9.184177242747833e-06, + "loss": 1.0483, "step": 13807 }, { - "epoch": 0.3912834027600669, + "epoch": 0.5402613663040926, "grad_norm": 0.0, - "learning_rate": 1.390069244481209e-05, - "loss": 0.8192, + "learning_rate": 9.18291424236323e-06, + "loss": 1.0875, "step": 13808 }, { - "epoch": 0.39131174020232934, + "epoch": 0.5403004929963221, "grad_norm": 0.0, - "learning_rate": 1.3899847341313982e-05, - "loss": 0.8841, + "learning_rate": 9.181651255099998e-06, + "loss": 0.9515, "step": 13809 }, { - "epoch": 0.3913400776445918, + "epoch": 0.5403396196885515, "grad_norm": 0.0, - "learning_rate": 1.389900220496601e-05, - "loss": 0.9464, + "learning_rate": 9.180388280978418e-06, + "loss": 1.1049, "step": 13810 }, { - "epoch": 0.39136841508685427, + "epoch": 0.540378746380781, "grad_norm": 0.0, - "learning_rate": 1.3898157035775296e-05, - "loss": 0.8945, + "learning_rate": 9.17912532001878e-06, + "loss": 0.9233, "step": 13811 }, { - "epoch": 0.3913967525291167, + "epoch": 0.5404178730730104, "grad_norm": 0.0, - "learning_rate": 1.3897311833748958e-05, - "loss": 1.095, + "learning_rate": 9.177862372241361e-06, + "loss": 1.0028, "step": 13812 }, { - "epoch": 0.3914250899713792, + "epoch": 0.5404569997652399, "grad_norm": 0.0, - "learning_rate": 1.3896466598894115e-05, - "loss": 0.8892, + "learning_rate": 9.17659943766644e-06, + "loss": 1.0605, "step": 13813 }, { - "epoch": 0.39145342741364164, + "epoch": 0.5404961264574693, "grad_norm": 0.0, - "learning_rate": 1.3895621331217887e-05, - "loss": 0.8766, + "learning_rate": 9.175336516314298e-06, + "loss": 1.0841, "step": 13814 }, { - "epoch": 0.3914817648559041, + "epoch": 0.5405352531496987, "grad_norm": 0.0, - "learning_rate": 1.3894776030727393e-05, - "loss": 0.9882, + "learning_rate": 9.174073608205222e-06, + "loss": 1.1268, "step": 13815 }, { - "epoch": 0.3915101022981666, + "epoch": 0.5405743798419281, "grad_norm": 0.0, - "learning_rate": 1.3893930697429756e-05, - "loss": 0.9829, + "learning_rate": 9.172810713359488e-06, + "loss": 1.1251, "step": 13816 }, { - "epoch": 0.391538439740429, + "epoch": 0.5406135065341576, "grad_norm": 0.0, - "learning_rate": 1.3893085331332094e-05, - "loss": 0.8982, + "learning_rate": 9.17154783179738e-06, + "loss": 1.0309, "step": 13817 }, { - "epoch": 0.3915667771826915, + "epoch": 0.540652633226387, "grad_norm": 0.0, - "learning_rate": 1.3892239932441526e-05, - "loss": 0.8803, + "learning_rate": 9.170284963539174e-06, + "loss": 1.034, "step": 13818 }, { - "epoch": 0.39159511462495394, + "epoch": 0.5406917599186165, "grad_norm": 0.0, - "learning_rate": 1.3891394500765181e-05, - "loss": 0.9166, + "learning_rate": 9.169022108605147e-06, + "loss": 1.0634, "step": 13819 }, { - "epoch": 0.39162345206721644, + "epoch": 0.5407308866108459, "grad_norm": 0.0, - "learning_rate": 1.389054903631017e-05, - "loss": 0.9916, + "learning_rate": 9.167759267015588e-06, + "loss": 0.8905, "step": 13820 }, { - "epoch": 0.3916517895094789, + "epoch": 0.5407700133030754, "grad_norm": 0.0, - "learning_rate": 1.3889703539083621e-05, - "loss": 0.9184, + "learning_rate": 9.166496438790773e-06, + "loss": 0.9737, "step": 13821 }, { - "epoch": 0.3916801269517413, + "epoch": 0.5408091399953048, "grad_norm": 0.0, - "learning_rate": 1.3888858009092653e-05, - "loss": 0.8757, + "learning_rate": 9.16523362395098e-06, + "loss": 1.0361, "step": 13822 }, { - "epoch": 0.3917084643940038, + "epoch": 0.5408482666875343, "grad_norm": 0.0, - "learning_rate": 1.3888012446344394e-05, - "loss": 1.0008, + "learning_rate": 9.163970822516487e-06, + "loss": 0.9975, "step": 13823 }, { - "epoch": 0.39173680183626625, + "epoch": 0.5408873933797637, "grad_norm": 0.0, - "learning_rate": 1.3887166850845963e-05, - "loss": 1.0313, + "learning_rate": 9.162708034507578e-06, + "loss": 1.0989, "step": 13824 }, { - "epoch": 0.39176513927852874, + "epoch": 0.5409265200719932, "grad_norm": 0.0, - "learning_rate": 1.388632122260448e-05, - "loss": 0.8447, + "learning_rate": 9.161445259944526e-06, + "loss": 1.0347, "step": 13825 }, { - "epoch": 0.3917934767207912, + "epoch": 0.5409656467642225, "grad_norm": 0.0, - "learning_rate": 1.3885475561627069e-05, - "loss": 0.9427, + "learning_rate": 9.160182498847615e-06, + "loss": 1.0369, "step": 13826 }, { - "epoch": 0.3918218141630536, + "epoch": 0.541004773456452, "grad_norm": 0.0, - "learning_rate": 1.3884629867920856e-05, - "loss": 0.926, + "learning_rate": 9.158919751237119e-06, + "loss": 1.0941, "step": 13827 }, { - "epoch": 0.3918501516053161, + "epoch": 0.5410439001486814, "grad_norm": 0.0, - "learning_rate": 1.3883784141492961e-05, - "loss": 0.9752, + "learning_rate": 9.157657017133318e-06, + "loss": 1.0098, "step": 13828 }, { - "epoch": 0.39187848904757855, + "epoch": 0.5410830268409109, "grad_norm": 0.0, - "learning_rate": 1.3882938382350513e-05, - "loss": 0.9836, + "learning_rate": 9.15639429655649e-06, + "loss": 1.0495, "step": 13829 }, { - "epoch": 0.39190682648984104, + "epoch": 0.5411221535331403, "grad_norm": 0.0, - "learning_rate": 1.388209259050063e-05, - "loss": 0.8741, + "learning_rate": 9.155131589526913e-06, + "loss": 1.0641, "step": 13830 }, { - "epoch": 0.3919351639321035, + "epoch": 0.5411612802253698, "grad_norm": 0.0, - "learning_rate": 1.3881246765950442e-05, - "loss": 0.8936, + "learning_rate": 9.153868896064864e-06, + "loss": 1.0338, "step": 13831 }, { - "epoch": 0.391963501374366, + "epoch": 0.5412004069175992, "grad_norm": 0.0, - "learning_rate": 1.3880400908707073e-05, - "loss": 0.9603, + "learning_rate": 9.152606216190619e-06, + "loss": 1.1258, "step": 13832 }, { - "epoch": 0.3919918388166284, + "epoch": 0.5412395336098286, "grad_norm": 0.0, - "learning_rate": 1.3879555018777643e-05, - "loss": 1.0058, + "learning_rate": 9.151343549924456e-06, + "loss": 1.0642, "step": 13833 }, { - "epoch": 0.39202017625889085, + "epoch": 0.5412786603020581, "grad_norm": 0.0, - "learning_rate": 1.3878709096169281e-05, - "loss": 0.839, + "learning_rate": 9.150080897286656e-06, + "loss": 0.9393, "step": 13834 }, { - "epoch": 0.39204851370115334, + "epoch": 0.5413177869942875, "grad_norm": 0.0, - "learning_rate": 1.3877863140889111e-05, - "loss": 0.9523, + "learning_rate": 9.14881825829749e-06, + "loss": 1.0073, "step": 13835 }, { - "epoch": 0.3920768511434158, + "epoch": 0.541356913686517, "grad_norm": 0.0, - "learning_rate": 1.3877017152944261e-05, - "loss": 1.0726, + "learning_rate": 9.147555632977232e-06, + "loss": 1.0507, "step": 13836 }, { - "epoch": 0.3921051885856783, + "epoch": 0.5413960403787463, "grad_norm": 0.0, - "learning_rate": 1.3876171132341858e-05, - "loss": 0.9275, + "learning_rate": 9.146293021346165e-06, + "loss": 1.0047, "step": 13837 }, { - "epoch": 0.3921335260279407, + "epoch": 0.5414351670709758, "grad_norm": 0.0, - "learning_rate": 1.3875325079089024e-05, - "loss": 1.0164, + "learning_rate": 9.145030423424564e-06, + "loss": 1.0336, "step": 13838 }, { - "epoch": 0.39216186347020315, + "epoch": 0.5414742937632052, "grad_norm": 0.0, - "learning_rate": 1.3874478993192886e-05, - "loss": 0.8804, + "learning_rate": 9.143767839232704e-06, + "loss": 1.0057, "step": 13839 }, { - "epoch": 0.39219020091246565, + "epoch": 0.5415134204554347, "grad_norm": 0.0, - "learning_rate": 1.3873632874660575e-05, - "loss": 1.0111, + "learning_rate": 9.142505268790857e-06, + "loss": 0.9709, "step": 13840 }, { - "epoch": 0.3922185383547281, + "epoch": 0.5415525471476641, "grad_norm": 0.0, - "learning_rate": 1.3872786723499215e-05, - "loss": 0.7907, + "learning_rate": 9.141242712119298e-06, + "loss": 0.9865, "step": 13841 }, { - "epoch": 0.3922468757969906, + "epoch": 0.5415916738398936, "grad_norm": 0.0, - "learning_rate": 1.3871940539715936e-05, - "loss": 0.9369, + "learning_rate": 9.139980169238309e-06, + "loss": 0.996, "step": 13842 }, { - "epoch": 0.392275213239253, + "epoch": 0.541630800532123, "grad_norm": 0.0, - "learning_rate": 1.387109432331786e-05, - "loss": 0.903, + "learning_rate": 9.138717640168161e-06, + "loss": 1.0922, "step": 13843 }, { - "epoch": 0.3923035506815155, + "epoch": 0.5416699272243525, "grad_norm": 0.0, - "learning_rate": 1.3870248074312123e-05, - "loss": 0.968, + "learning_rate": 9.13745512492913e-06, + "loss": 0.9292, "step": 13844 }, { - "epoch": 0.39233188812377795, + "epoch": 0.5417090539165819, "grad_norm": 0.0, - "learning_rate": 1.3869401792705847e-05, - "loss": 0.8985, + "learning_rate": 9.136192623541487e-06, + "loss": 1.1266, "step": 13845 }, { - "epoch": 0.3923602255660404, + "epoch": 0.5417481806088114, "grad_norm": 0.0, - "learning_rate": 1.3868555478506162e-05, - "loss": 0.9347, + "learning_rate": 9.134930136025504e-06, + "loss": 1.0321, "step": 13846 }, { - "epoch": 0.3923885630083029, + "epoch": 0.5417873073010407, "grad_norm": 0.0, - "learning_rate": 1.38677091317202e-05, - "loss": 0.9309, + "learning_rate": 9.133667662401464e-06, + "loss": 0.9983, "step": 13847 }, { - "epoch": 0.3924169004505653, + "epoch": 0.5418264339932702, "grad_norm": 0.0, - "learning_rate": 1.3866862752355088e-05, - "loss": 0.9613, + "learning_rate": 9.132405202689636e-06, + "loss": 1.1008, "step": 13848 }, { - "epoch": 0.3924452378928278, + "epoch": 0.5418655606854996, "grad_norm": 0.0, - "learning_rate": 1.3866016340417953e-05, - "loss": 1.0031, + "learning_rate": 9.131142756910291e-06, + "loss": 0.9078, "step": 13849 }, { - "epoch": 0.39247357533509025, + "epoch": 0.5419046873777291, "grad_norm": 0.0, - "learning_rate": 1.3865169895915931e-05, - "loss": 0.9284, + "learning_rate": 9.129880325083702e-06, + "loss": 0.9934, "step": 13850 }, { - "epoch": 0.3925019127773527, + "epoch": 0.5419438140699585, "grad_norm": 0.0, - "learning_rate": 1.3864323418856142e-05, - "loss": 1.0554, + "learning_rate": 9.12861790723015e-06, + "loss": 1.0345, "step": 13851 }, { - "epoch": 0.3925302502196152, + "epoch": 0.541982940762188, "grad_norm": 0.0, - "learning_rate": 1.3863476909245726e-05, - "loss": 0.9657, + "learning_rate": 9.1273555033699e-06, + "loss": 0.9314, "step": 13852 }, { - "epoch": 0.3925585876618776, + "epoch": 0.5420220674544174, "grad_norm": 0.0, - "learning_rate": 1.3862630367091808e-05, - "loss": 1.0656, + "learning_rate": 9.12609311352323e-06, + "loss": 1.0776, "step": 13853 }, { - "epoch": 0.3925869251041401, + "epoch": 0.5420611941466469, "grad_norm": 0.0, - "learning_rate": 1.386178379240152e-05, - "loss": 0.8966, + "learning_rate": 9.124830737710403e-06, + "loss": 1.0712, "step": 13854 }, { - "epoch": 0.39261526254640255, + "epoch": 0.5421003208388763, "grad_norm": 0.0, - "learning_rate": 1.3860937185181991e-05, - "loss": 0.9983, + "learning_rate": 9.123568375951702e-06, + "loss": 1.0922, "step": 13855 }, { - "epoch": 0.39264359998866505, + "epoch": 0.5421394475311058, "grad_norm": 0.0, - "learning_rate": 1.3860090545440357e-05, - "loss": 1.0278, + "learning_rate": 9.122306028267396e-06, + "loss": 1.0207, "step": 13856 }, { - "epoch": 0.3926719374309275, + "epoch": 0.5421785742233352, "grad_norm": 0.0, - "learning_rate": 1.3859243873183748e-05, - "loss": 0.98, + "learning_rate": 9.121043694677755e-06, + "loss": 0.9907, "step": 13857 }, { - "epoch": 0.3927002748731899, + "epoch": 0.5422177009155646, "grad_norm": 0.0, - "learning_rate": 1.3858397168419292e-05, - "loss": 0.9621, + "learning_rate": 9.11978137520305e-06, + "loss": 0.9869, "step": 13858 }, { - "epoch": 0.3927286123154524, + "epoch": 0.542256827607794, "grad_norm": 0.0, - "learning_rate": 1.3857550431154123e-05, - "loss": 1.0134, + "learning_rate": 9.11851906986355e-06, + "loss": 1.0592, "step": 13859 }, { - "epoch": 0.39275694975771486, + "epoch": 0.5422959543000235, "grad_norm": 0.0, - "learning_rate": 1.3856703661395376e-05, - "loss": 0.9272, + "learning_rate": 9.117256778679533e-06, + "loss": 1.0186, "step": 13860 }, { - "epoch": 0.39278528719997735, + "epoch": 0.5423350809922529, "grad_norm": 0.0, - "learning_rate": 1.3855856859150182e-05, - "loss": 0.9537, + "learning_rate": 9.115994501671264e-06, + "loss": 0.9934, "step": 13861 }, { - "epoch": 0.3928136246422398, + "epoch": 0.5423742076844823, "grad_norm": 0.0, - "learning_rate": 1.3855010024425677e-05, - "loss": 0.9222, + "learning_rate": 9.114732238859019e-06, + "loss": 1.075, "step": 13862 }, { - "epoch": 0.3928419620845022, + "epoch": 0.5424133343767118, "grad_norm": 0.0, - "learning_rate": 1.3854163157228987e-05, - "loss": 0.9466, + "learning_rate": 9.113469990263061e-06, + "loss": 1.0388, "step": 13863 }, { - "epoch": 0.3928702995267647, + "epoch": 0.5424524610689412, "grad_norm": 0.0, - "learning_rate": 1.3853316257567251e-05, - "loss": 0.9022, + "learning_rate": 9.112207755903664e-06, + "loss": 1.0428, "step": 13864 }, { - "epoch": 0.39289863696902716, + "epoch": 0.5424915877611707, "grad_norm": 0.0, - "learning_rate": 1.3852469325447599e-05, - "loss": 1.0635, + "learning_rate": 9.110945535801102e-06, + "loss": 1.096, "step": 13865 }, { - "epoch": 0.39292697441128965, + "epoch": 0.5425307144534001, "grad_norm": 0.0, - "learning_rate": 1.3851622360877169e-05, - "loss": 0.7807, + "learning_rate": 9.109683329975639e-06, + "loss": 1.0418, "step": 13866 }, { - "epoch": 0.3929553118535521, + "epoch": 0.5425698411456296, "grad_norm": 0.0, - "learning_rate": 1.3850775363863094e-05, - "loss": 0.8193, + "learning_rate": 9.108421138447545e-06, + "loss": 1.0133, "step": 13867 }, { - "epoch": 0.3929836492958146, + "epoch": 0.542608967837859, "grad_norm": 0.0, - "learning_rate": 1.3849928334412508e-05, - "loss": 0.9419, + "learning_rate": 9.10715896123709e-06, + "loss": 0.9009, "step": 13868 }, { - "epoch": 0.393011986738077, + "epoch": 0.5426480945300884, "grad_norm": 0.0, - "learning_rate": 1.3849081272532545e-05, - "loss": 0.9453, + "learning_rate": 9.105896798364543e-06, + "loss": 0.9436, "step": 13869 }, { - "epoch": 0.39304032418033946, + "epoch": 0.5426872212223178, "grad_norm": 0.0, - "learning_rate": 1.3848234178230344e-05, - "loss": 0.824, + "learning_rate": 9.104634649850174e-06, + "loss": 1.0194, "step": 13870 }, { - "epoch": 0.39306866162260196, + "epoch": 0.5427263479145473, "grad_norm": 0.0, - "learning_rate": 1.3847387051513035e-05, - "loss": 0.9944, + "learning_rate": 9.103372515714252e-06, + "loss": 1.0811, "step": 13871 }, { - "epoch": 0.3930969990648644, + "epoch": 0.5427654746067767, "grad_norm": 0.0, - "learning_rate": 1.3846539892387755e-05, - "loss": 0.8713, + "learning_rate": 9.10211039597704e-06, + "loss": 0.9495, "step": 13872 }, { - "epoch": 0.3931253365071269, + "epoch": 0.5428046012990062, "grad_norm": 0.0, - "learning_rate": 1.3845692700861642e-05, - "loss": 0.9542, + "learning_rate": 9.100848290658814e-06, + "loss": 0.897, "step": 13873 }, { - "epoch": 0.3931536739493893, + "epoch": 0.5428437279912356, "grad_norm": 0.0, - "learning_rate": 1.3844845476941833e-05, - "loss": 0.8991, + "learning_rate": 9.099586199779836e-06, + "loss": 1.041, "step": 13874 }, { - "epoch": 0.39318201139165176, + "epoch": 0.5428828546834651, "grad_norm": 0.0, - "learning_rate": 1.3843998220635462e-05, - "loss": 0.9056, + "learning_rate": 9.098324123360375e-06, + "loss": 1.2017, "step": 13875 }, { - "epoch": 0.39321034883391426, + "epoch": 0.5429219813756945, "grad_norm": 0.0, - "learning_rate": 1.3843150931949665e-05, - "loss": 0.7999, + "learning_rate": 9.0970620614207e-06, + "loss": 0.9596, "step": 13876 }, { - "epoch": 0.3932386862761767, + "epoch": 0.542961108067924, "grad_norm": 0.0, - "learning_rate": 1.3842303610891582e-05, - "loss": 0.918, + "learning_rate": 9.095800013981074e-06, + "loss": 0.9247, "step": 13877 }, { - "epoch": 0.3932670237184392, + "epoch": 0.5430002347601534, "grad_norm": 0.0, - "learning_rate": 1.384145625746835e-05, - "loss": 0.9252, + "learning_rate": 9.094537981061771e-06, + "loss": 1.0772, "step": 13878 }, { - "epoch": 0.39329536116070163, + "epoch": 0.5430393614523829, "grad_norm": 0.0, - "learning_rate": 1.3840608871687104e-05, - "loss": 0.8298, + "learning_rate": 9.093275962683051e-06, + "loss": 0.9388, "step": 13879 }, { - "epoch": 0.3933236986029641, + "epoch": 0.5430784881446122, "grad_norm": 0.0, - "learning_rate": 1.383976145355498e-05, - "loss": 0.8673, + "learning_rate": 9.092013958865185e-06, + "loss": 1.062, "step": 13880 }, { - "epoch": 0.39335203604522656, + "epoch": 0.5431176148368417, "grad_norm": 0.0, - "learning_rate": 1.3838914003079125e-05, - "loss": 0.8796, + "learning_rate": 9.09075196962843e-06, + "loss": 1.0436, "step": 13881 }, { - "epoch": 0.393380373487489, + "epoch": 0.5431567415290711, "grad_norm": 0.0, - "learning_rate": 1.3838066520266672e-05, - "loss": 0.9536, + "learning_rate": 9.089489994993066e-06, + "loss": 1.0699, "step": 13882 }, { - "epoch": 0.3934087109297515, + "epoch": 0.5431958682213006, "grad_norm": 0.0, - "learning_rate": 1.3837219005124758e-05, - "loss": 0.9321, + "learning_rate": 9.08822803497935e-06, + "loss": 0.8737, "step": 13883 }, { - "epoch": 0.39343704837201393, + "epoch": 0.54323499491353, "grad_norm": 0.0, - "learning_rate": 1.383637145766052e-05, - "loss": 1.0394, + "learning_rate": 9.086966089607551e-06, + "loss": 1.0631, "step": 13884 }, { - "epoch": 0.3934653858142764, + "epoch": 0.5432741216057595, "grad_norm": 0.0, - "learning_rate": 1.3835523877881106e-05, - "loss": 0.9401, + "learning_rate": 9.08570415889793e-06, + "loss": 1.0602, "step": 13885 }, { - "epoch": 0.39349372325653886, + "epoch": 0.5433132482979889, "grad_norm": 0.0, - "learning_rate": 1.3834676265793646e-05, - "loss": 0.9547, + "learning_rate": 9.084442242870752e-06, + "loss": 1.0956, "step": 13886 }, { - "epoch": 0.3935220606988013, + "epoch": 0.5433523749902184, "grad_norm": 0.0, - "learning_rate": 1.3833828621405286e-05, - "loss": 0.9684, + "learning_rate": 9.08318034154629e-06, + "loss": 1.0983, "step": 13887 }, { - "epoch": 0.3935503981410638, + "epoch": 0.5433915016824478, "grad_norm": 0.0, - "learning_rate": 1.383298094472316e-05, - "loss": 1.011, + "learning_rate": 9.0819184549448e-06, + "loss": 1.0834, "step": 13888 }, { - "epoch": 0.39357873558332623, + "epoch": 0.5434306283746771, "grad_norm": 0.0, - "learning_rate": 1.3832133235754417e-05, - "loss": 0.8092, + "learning_rate": 9.08065658308655e-06, + "loss": 1.0459, "step": 13889 }, { - "epoch": 0.3936070730255887, + "epoch": 0.5434697550669066, "grad_norm": 0.0, - "learning_rate": 1.383128549450619e-05, - "loss": 0.8995, + "learning_rate": 9.079394725991799e-06, + "loss": 1.019, "step": 13890 }, { - "epoch": 0.39363541046785117, + "epoch": 0.543508881759136, "grad_norm": 0.0, - "learning_rate": 1.3830437720985626e-05, - "loss": 1.0587, + "learning_rate": 9.07813288368082e-06, + "loss": 0.9958, "step": 13891 }, { - "epoch": 0.39366374791011366, + "epoch": 0.5435480084513655, "grad_norm": 0.0, - "learning_rate": 1.3829589915199859e-05, - "loss": 0.8361, + "learning_rate": 9.076871056173872e-06, + "loss": 0.9623, "step": 13892 }, { - "epoch": 0.3936920853523761, + "epoch": 0.5435871351435949, "grad_norm": 0.0, - "learning_rate": 1.3828742077156035e-05, - "loss": 0.9079, + "learning_rate": 9.075609243491218e-06, + "loss": 0.9775, "step": 13893 }, { - "epoch": 0.39372042279463854, + "epoch": 0.5436262618358244, "grad_norm": 0.0, - "learning_rate": 1.3827894206861294e-05, - "loss": 0.8567, + "learning_rate": 9.07434744565312e-06, + "loss": 0.9057, "step": 13894 }, { - "epoch": 0.39374876023690103, + "epoch": 0.5436653885280538, "grad_norm": 0.0, - "learning_rate": 1.3827046304322779e-05, - "loss": 0.8966, + "learning_rate": 9.07308566267984e-06, + "loss": 1.0917, "step": 13895 }, { - "epoch": 0.39377709767916347, + "epoch": 0.5437045152202833, "grad_norm": 0.0, - "learning_rate": 1.3826198369547635e-05, - "loss": 0.9261, + "learning_rate": 9.071823894591645e-06, + "loss": 1.2031, "step": 13896 }, { - "epoch": 0.39380543512142596, + "epoch": 0.5437436419125127, "grad_norm": 0.0, - "learning_rate": 1.3825350402542999e-05, - "loss": 0.9285, + "learning_rate": 9.070562141408795e-06, + "loss": 0.9759, "step": 13897 }, { - "epoch": 0.3938337725636884, + "epoch": 0.5437827686047422, "grad_norm": 0.0, - "learning_rate": 1.3824502403316015e-05, - "loss": 0.928, + "learning_rate": 9.069300403151555e-06, + "loss": 1.1367, "step": 13898 }, { - "epoch": 0.39386211000595084, + "epoch": 0.5438218952969716, "grad_norm": 0.0, - "learning_rate": 1.3823654371873827e-05, - "loss": 0.9431, + "learning_rate": 9.068038679840176e-06, + "loss": 1.0526, "step": 13899 }, { - "epoch": 0.39389044744821333, + "epoch": 0.543861021989201, "grad_norm": 0.0, - "learning_rate": 1.3822806308223579e-05, - "loss": 0.8949, + "learning_rate": 9.066776971494935e-06, + "loss": 1.0146, "step": 13900 }, { - "epoch": 0.39391878489047577, + "epoch": 0.5439001486814304, "grad_norm": 0.0, - "learning_rate": 1.3821958212372413e-05, - "loss": 0.9617, + "learning_rate": 9.065515278136086e-06, + "loss": 0.8638, "step": 13901 }, { - "epoch": 0.39394712233273826, + "epoch": 0.5439392753736599, "grad_norm": 0.0, - "learning_rate": 1.3821110084327476e-05, - "loss": 0.8206, + "learning_rate": 9.064253599783891e-06, + "loss": 1.0715, "step": 13902 }, { - "epoch": 0.3939754597750007, + "epoch": 0.5439784020658893, "grad_norm": 0.0, - "learning_rate": 1.382026192409591e-05, - "loss": 0.8789, + "learning_rate": 9.06299193645861e-06, + "loss": 1.1452, "step": 13903 }, { - "epoch": 0.3940037972172632, + "epoch": 0.5440175287581188, "grad_norm": 0.0, - "learning_rate": 1.3819413731684858e-05, - "loss": 0.938, + "learning_rate": 9.0617302881805e-06, + "loss": 1.1572, "step": 13904 }, { - "epoch": 0.39403213465952563, + "epoch": 0.5440566554503482, "grad_norm": 0.0, - "learning_rate": 1.3818565507101464e-05, - "loss": 0.9071, + "learning_rate": 9.06046865496983e-06, + "loss": 0.9764, "step": 13905 }, { - "epoch": 0.3940604721017881, + "epoch": 0.5440957821425777, "grad_norm": 0.0, - "learning_rate": 1.3817717250352876e-05, - "loss": 0.9559, + "learning_rate": 9.059207036846857e-06, + "loss": 0.9972, "step": 13906 }, { - "epoch": 0.39408880954405057, + "epoch": 0.5441349088348071, "grad_norm": 0.0, - "learning_rate": 1.381686896144624e-05, - "loss": 0.8876, + "learning_rate": 9.05794543383184e-06, + "loss": 1.064, "step": 13907 }, { - "epoch": 0.394117146986313, + "epoch": 0.5441740355270366, "grad_norm": 0.0, - "learning_rate": 1.3816020640388698e-05, - "loss": 0.9615, + "learning_rate": 9.056683845945034e-06, + "loss": 0.9745, "step": 13908 }, { - "epoch": 0.3941454844285755, + "epoch": 0.544213162219266, "grad_norm": 0.0, - "learning_rate": 1.3815172287187394e-05, - "loss": 0.8942, + "learning_rate": 9.05542227320671e-06, + "loss": 1.0582, "step": 13909 }, { - "epoch": 0.39417382187083794, + "epoch": 0.5442522889114955, "grad_norm": 0.0, - "learning_rate": 1.3814323901849483e-05, - "loss": 0.9014, + "learning_rate": 9.054160715637117e-06, + "loss": 0.9819, "step": 13910 }, { - "epoch": 0.3942021593131004, + "epoch": 0.5442914156037248, "grad_norm": 0.0, - "learning_rate": 1.3813475484382102e-05, - "loss": 0.9185, + "learning_rate": 9.05289917325652e-06, + "loss": 1.0514, "step": 13911 }, { - "epoch": 0.39423049675536287, + "epoch": 0.5443305422959543, "grad_norm": 0.0, - "learning_rate": 1.3812627034792401e-05, - "loss": 0.9075, + "learning_rate": 9.051637646085171e-06, + "loss": 0.9831, "step": 13912 }, { - "epoch": 0.3942588341976253, + "epoch": 0.5443696689881837, "grad_norm": 0.0, - "learning_rate": 1.3811778553087524e-05, - "loss": 0.8466, + "learning_rate": 9.050376134143339e-06, + "loss": 0.978, "step": 13913 }, { - "epoch": 0.3942871716398878, + "epoch": 0.5444087956804132, "grad_norm": 0.0, - "learning_rate": 1.3810930039274626e-05, - "loss": 0.9329, + "learning_rate": 9.049114637451271e-06, + "loss": 0.9439, "step": 13914 }, { - "epoch": 0.39431550908215024, + "epoch": 0.5444479223726426, "grad_norm": 0.0, - "learning_rate": 1.3810081493360847e-05, - "loss": 0.8359, + "learning_rate": 9.047853156029234e-06, + "loss": 0.9121, "step": 13915 }, { - "epoch": 0.39434384652441273, + "epoch": 0.5444870490648721, "grad_norm": 0.0, - "learning_rate": 1.3809232915353336e-05, - "loss": 0.8698, + "learning_rate": 9.046591689897481e-06, + "loss": 1.1735, "step": 13916 }, { - "epoch": 0.39437218396667517, + "epoch": 0.5445261757571015, "grad_norm": 0.0, - "learning_rate": 1.3808384305259244e-05, - "loss": 0.9389, + "learning_rate": 9.045330239076269e-06, + "loss": 0.9662, "step": 13917 }, { - "epoch": 0.3944005214089376, + "epoch": 0.5445653024493309, "grad_norm": 0.0, - "learning_rate": 1.3807535663085714e-05, - "loss": 0.8634, + "learning_rate": 9.04406880358586e-06, + "loss": 1.0955, "step": 13918 }, { - "epoch": 0.3944288588512001, + "epoch": 0.5446044291415604, "grad_norm": 0.0, - "learning_rate": 1.3806686988839898e-05, - "loss": 0.9609, + "learning_rate": 9.042807383446508e-06, + "loss": 1.0791, "step": 13919 }, { - "epoch": 0.39445719629346254, + "epoch": 0.5446435558337898, "grad_norm": 0.0, - "learning_rate": 1.3805838282528946e-05, - "loss": 0.8684, + "learning_rate": 9.041545978678467e-06, + "loss": 0.9577, "step": 13920 }, { - "epoch": 0.39448553373572504, + "epoch": 0.5446826825260193, "grad_norm": 0.0, - "learning_rate": 1.380498954416e-05, - "loss": 0.934, + "learning_rate": 9.040284589301997e-06, + "loss": 0.9628, "step": 13921 }, { - "epoch": 0.3945138711779875, + "epoch": 0.5447218092182486, "grad_norm": 0.0, - "learning_rate": 1.3804140773740218e-05, - "loss": 1.0133, + "learning_rate": 9.039023215337357e-06, + "loss": 1.0717, "step": 13922 }, { - "epoch": 0.3945422086202499, + "epoch": 0.5447609359104781, "grad_norm": 0.0, - "learning_rate": 1.3803291971276744e-05, - "loss": 0.9144, + "learning_rate": 9.0377618568048e-06, + "loss": 0.955, "step": 13923 }, { - "epoch": 0.3945705460625124, + "epoch": 0.5448000626027075, "grad_norm": 0.0, - "learning_rate": 1.380244313677673e-05, - "loss": 0.9792, + "learning_rate": 9.03650051372458e-06, + "loss": 0.9854, "step": 13924 }, { - "epoch": 0.39459888350477484, + "epoch": 0.544839189294937, "grad_norm": 0.0, - "learning_rate": 1.3801594270247328e-05, - "loss": 0.9306, + "learning_rate": 9.035239186116957e-06, + "loss": 1.1236, "step": 13925 }, { - "epoch": 0.39462722094703734, + "epoch": 0.5448783159871664, "grad_norm": 0.0, - "learning_rate": 1.380074537169568e-05, - "loss": 0.9453, + "learning_rate": 9.033977874002177e-06, + "loss": 1.0331, "step": 13926 }, { - "epoch": 0.3946555583892998, + "epoch": 0.5449174426793959, "grad_norm": 0.0, - "learning_rate": 1.3799896441128949e-05, - "loss": 0.9247, + "learning_rate": 9.032716577400508e-06, + "loss": 1.0962, "step": 13927 }, { - "epoch": 0.39468389583156227, + "epoch": 0.5449565693716253, "grad_norm": 0.0, - "learning_rate": 1.3799047478554275e-05, - "loss": 0.9038, + "learning_rate": 9.031455296332196e-06, + "loss": 0.9542, "step": 13928 }, { - "epoch": 0.3947122332738247, + "epoch": 0.5449956960638548, "grad_norm": 0.0, - "learning_rate": 1.3798198483978816e-05, - "loss": 0.995, + "learning_rate": 9.0301940308175e-06, + "loss": 1.0916, "step": 13929 }, { - "epoch": 0.39474057071608715, + "epoch": 0.5450348227560842, "grad_norm": 0.0, - "learning_rate": 1.3797349457409716e-05, - "loss": 0.8909, + "learning_rate": 9.028932780876669e-06, + "loss": 1.15, "step": 13930 }, { - "epoch": 0.39476890815834964, + "epoch": 0.5450739494483137, "grad_norm": 0.0, - "learning_rate": 1.3796500398854136e-05, - "loss": 0.923, + "learning_rate": 9.027671546529965e-06, + "loss": 1.0938, "step": 13931 }, { - "epoch": 0.3947972456006121, + "epoch": 0.545113076140543, "grad_norm": 0.0, - "learning_rate": 1.379565130831922e-05, - "loss": 0.8679, + "learning_rate": 9.026410327797637e-06, + "loss": 1.0528, "step": 13932 }, { - "epoch": 0.3948255830428746, + "epoch": 0.5451522028327725, "grad_norm": 0.0, - "learning_rate": 1.3794802185812126e-05, - "loss": 1.0472, + "learning_rate": 9.025149124699938e-06, + "loss": 0.9017, "step": 13933 }, { - "epoch": 0.394853920485137, + "epoch": 0.5451913295250019, "grad_norm": 0.0, - "learning_rate": 1.3793953031340004e-05, - "loss": 0.9562, + "learning_rate": 9.023887937257126e-06, + "loss": 1.1002, "step": 13934 }, { - "epoch": 0.39488225792739945, + "epoch": 0.5452304562172314, "grad_norm": 0.0, - "learning_rate": 1.3793103844910005e-05, - "loss": 0.9748, + "learning_rate": 9.022626765489443e-06, + "loss": 1.0236, "step": 13935 }, { - "epoch": 0.39491059536966194, + "epoch": 0.5452695829094608, "grad_norm": 0.0, - "learning_rate": 1.3792254626529286e-05, - "loss": 0.8986, + "learning_rate": 9.021365609417155e-06, + "loss": 1.0284, "step": 13936 }, { - "epoch": 0.3949389328119244, + "epoch": 0.5453087096016903, "grad_norm": 0.0, - "learning_rate": 1.3791405376204998e-05, - "loss": 0.963, + "learning_rate": 9.020104469060508e-06, + "loss": 1.0844, "step": 13937 }, { - "epoch": 0.3949672702541869, + "epoch": 0.5453478362939197, "grad_norm": 0.0, - "learning_rate": 1.3790556093944291e-05, - "loss": 0.9826, + "learning_rate": 9.018843344439756e-06, + "loss": 1.1092, "step": 13938 }, { - "epoch": 0.3949956076964493, + "epoch": 0.5453869629861492, "grad_norm": 0.0, - "learning_rate": 1.3789706779754326e-05, - "loss": 0.9334, + "learning_rate": 9.017582235575147e-06, + "loss": 1.149, "step": 13939 }, { - "epoch": 0.3950239451387118, + "epoch": 0.5454260896783786, "grad_norm": 0.0, - "learning_rate": 1.3788857433642253e-05, - "loss": 0.9761, + "learning_rate": 9.016321142486938e-06, + "loss": 1.1561, "step": 13940 }, { - "epoch": 0.39505228258097425, + "epoch": 0.5454652163706081, "grad_norm": 0.0, - "learning_rate": 1.3788008055615227e-05, - "loss": 0.9733, + "learning_rate": 9.015060065195382e-06, + "loss": 1.0457, "step": 13941 }, { - "epoch": 0.3950806200232367, + "epoch": 0.5455043430628375, "grad_norm": 0.0, - "learning_rate": 1.3787158645680401e-05, - "loss": 0.9676, + "learning_rate": 9.013799003720725e-06, + "loss": 1.1006, "step": 13942 }, { - "epoch": 0.3951089574654992, + "epoch": 0.545543469755067, "grad_norm": 0.0, - "learning_rate": 1.3786309203844932e-05, - "loss": 0.8725, + "learning_rate": 9.012537958083222e-06, + "loss": 1.0466, "step": 13943 }, { - "epoch": 0.3951372949077616, + "epoch": 0.5455825964472963, "grad_norm": 0.0, - "learning_rate": 1.3785459730115975e-05, - "loss": 0.994, + "learning_rate": 9.011276928303116e-06, + "loss": 1.1091, "step": 13944 }, { - "epoch": 0.3951656323500241, + "epoch": 0.5456217231395258, "grad_norm": 0.0, - "learning_rate": 1.3784610224500685e-05, - "loss": 0.889, + "learning_rate": 9.010015914400669e-06, + "loss": 1.031, "step": 13945 }, { - "epoch": 0.39519396979228655, + "epoch": 0.5456608498317552, "grad_norm": 0.0, - "learning_rate": 1.3783760687006218e-05, - "loss": 0.849, + "learning_rate": 9.008754916396125e-06, + "loss": 1.1178, "step": 13946 }, { - "epoch": 0.395222307234549, + "epoch": 0.5456999765239846, "grad_norm": 0.0, - "learning_rate": 1.3782911117639729e-05, - "loss": 0.9955, + "learning_rate": 9.007493934309737e-06, + "loss": 0.9441, "step": 13947 }, { - "epoch": 0.3952506446768115, + "epoch": 0.5457391032162141, "grad_norm": 0.0, - "learning_rate": 1.3782061516408376e-05, - "loss": 0.8753, + "learning_rate": 9.006232968161745e-06, + "loss": 1.1081, "step": 13948 }, { - "epoch": 0.3952789821190739, + "epoch": 0.5457782299084435, "grad_norm": 0.0, - "learning_rate": 1.3781211883319315e-05, - "loss": 0.9515, + "learning_rate": 9.004972017972414e-06, + "loss": 1.0279, "step": 13949 }, { - "epoch": 0.3953073195613364, + "epoch": 0.545817356600673, "grad_norm": 0.0, - "learning_rate": 1.3780362218379697e-05, - "loss": 0.9596, + "learning_rate": 9.003711083761984e-06, + "loss": 1.0737, "step": 13950 }, { - "epoch": 0.39533565700359885, + "epoch": 0.5458564832929024, "grad_norm": 0.0, - "learning_rate": 1.3779512521596689e-05, - "loss": 0.9047, + "learning_rate": 9.002450165550705e-06, + "loss": 0.9675, "step": 13951 }, { - "epoch": 0.39536399444586134, + "epoch": 0.5458956099851319, "grad_norm": 0.0, - "learning_rate": 1.3778662792977443e-05, - "loss": 0.8696, + "learning_rate": 9.001189263358828e-06, + "loss": 0.9937, "step": 13952 }, { - "epoch": 0.3953923318881238, + "epoch": 0.5459347366773613, "grad_norm": 0.0, - "learning_rate": 1.3777813032529117e-05, - "loss": 1.0237, + "learning_rate": 8.999928377206594e-06, + "loss": 0.9131, "step": 13953 }, { - "epoch": 0.3954206693303862, + "epoch": 0.5459738633695908, "grad_norm": 0.0, - "learning_rate": 1.3776963240258869e-05, - "loss": 0.9081, + "learning_rate": 8.998667507114262e-06, + "loss": 1.0584, "step": 13954 }, { - "epoch": 0.3954490067726487, + "epoch": 0.5460129900618201, "grad_norm": 0.0, - "learning_rate": 1.3776113416173856e-05, - "loss": 0.9243, + "learning_rate": 8.997406653102075e-06, + "loss": 0.937, "step": 13955 }, { - "epoch": 0.39547734421491115, + "epoch": 0.5460521167540496, "grad_norm": 0.0, - "learning_rate": 1.3775263560281238e-05, - "loss": 0.982, + "learning_rate": 8.99614581519028e-06, + "loss": 1.0851, "step": 13956 }, { - "epoch": 0.39550568165717365, + "epoch": 0.546091243446279, "grad_norm": 0.0, - "learning_rate": 1.3774413672588174e-05, - "loss": 1.0152, + "learning_rate": 8.994884993399125e-06, + "loss": 1.008, "step": 13957 }, { - "epoch": 0.3955340190994361, + "epoch": 0.5461303701385085, "grad_norm": 0.0, - "learning_rate": 1.377356375310182e-05, - "loss": 0.9432, + "learning_rate": 8.993624187748858e-06, + "loss": 1.0831, "step": 13958 }, { - "epoch": 0.3955623565416985, + "epoch": 0.5461694968307379, "grad_norm": 0.0, - "learning_rate": 1.3772713801829338e-05, - "loss": 0.9023, + "learning_rate": 8.992363398259724e-06, + "loss": 1.1571, "step": 13959 }, { - "epoch": 0.395590693983961, + "epoch": 0.5462086235229674, "grad_norm": 0.0, - "learning_rate": 1.3771863818777888e-05, - "loss": 0.8457, + "learning_rate": 8.991102624951972e-06, + "loss": 0.939, "step": 13960 }, { - "epoch": 0.39561903142622346, + "epoch": 0.5462477502151968, "grad_norm": 0.0, - "learning_rate": 1.377101380395463e-05, - "loss": 0.8816, + "learning_rate": 8.98984186784585e-06, + "loss": 1.1198, "step": 13961 }, { - "epoch": 0.39564736886848595, + "epoch": 0.5462868769074263, "grad_norm": 0.0, - "learning_rate": 1.3770163757366718e-05, - "loss": 0.998, + "learning_rate": 8.9885811269616e-06, + "loss": 1.0524, "step": 13962 }, { - "epoch": 0.3956757063107484, + "epoch": 0.5463260035996557, "grad_norm": 0.0, - "learning_rate": 1.3769313679021319e-05, - "loss": 0.9239, + "learning_rate": 8.987320402319468e-06, + "loss": 1.0292, "step": 13963 }, { - "epoch": 0.3957040437530108, + "epoch": 0.5463651302918852, "grad_norm": 0.0, - "learning_rate": 1.3768463568925589e-05, - "loss": 0.9984, + "learning_rate": 8.986059693939706e-06, + "loss": 0.9425, "step": 13964 }, { - "epoch": 0.3957323811952733, + "epoch": 0.5464042569841145, "grad_norm": 0.0, - "learning_rate": 1.3767613427086694e-05, - "loss": 0.9084, + "learning_rate": 8.984799001842549e-06, + "loss": 0.9934, "step": 13965 }, { - "epoch": 0.39576071863753576, + "epoch": 0.546443383676344, "grad_norm": 0.0, - "learning_rate": 1.3766763253511793e-05, - "loss": 0.8954, + "learning_rate": 8.983538326048249e-06, + "loss": 1.0714, "step": 13966 }, { - "epoch": 0.39578905607979825, + "epoch": 0.5464825103685734, "grad_norm": 0.0, - "learning_rate": 1.3765913048208042e-05, - "loss": 0.8342, + "learning_rate": 8.982277666577053e-06, + "loss": 0.9616, "step": 13967 }, { - "epoch": 0.3958173935220607, + "epoch": 0.5465216370608029, "grad_norm": 0.0, - "learning_rate": 1.376506281118261e-05, - "loss": 0.9246, + "learning_rate": 8.981017023449202e-06, + "loss": 1.0385, "step": 13968 }, { - "epoch": 0.3958457309643232, + "epoch": 0.5465607637530323, "grad_norm": 0.0, - "learning_rate": 1.3764212542442656e-05, - "loss": 0.92, + "learning_rate": 8.97975639668494e-06, + "loss": 1.0858, "step": 13969 }, { - "epoch": 0.3958740684065856, + "epoch": 0.5465998904452618, "grad_norm": 0.0, - "learning_rate": 1.376336224199534e-05, - "loss": 0.8947, + "learning_rate": 8.978495786304507e-06, + "loss": 0.9478, "step": 13970 }, { - "epoch": 0.39590240584884806, + "epoch": 0.5466390171374912, "grad_norm": 0.0, - "learning_rate": 1.3762511909847827e-05, - "loss": 0.9052, + "learning_rate": 8.977235192328158e-06, + "loss": 1.205, "step": 13971 }, { - "epoch": 0.39593074329111055, + "epoch": 0.5466781438297207, "grad_norm": 0.0, - "learning_rate": 1.376166154600728e-05, - "loss": 0.9988, + "learning_rate": 8.97597461477613e-06, + "loss": 1.1362, "step": 13972 }, { - "epoch": 0.395959080733373, + "epoch": 0.5467172705219501, "grad_norm": 0.0, - "learning_rate": 1.376081115048086e-05, - "loss": 0.9306, + "learning_rate": 8.974714053668665e-06, + "loss": 0.9889, "step": 13973 }, { - "epoch": 0.3959874181756355, + "epoch": 0.5467563972141796, "grad_norm": 0.0, - "learning_rate": 1.375996072327573e-05, - "loss": 1.0051, + "learning_rate": 8.973453509026008e-06, + "loss": 1.2443, "step": 13974 }, { - "epoch": 0.3960157556178979, + "epoch": 0.546795523906409, "grad_norm": 0.0, - "learning_rate": 1.3759110264399058e-05, - "loss": 0.8922, + "learning_rate": 8.972192980868397e-06, + "loss": 0.9931, "step": 13975 }, { - "epoch": 0.39604409306016036, + "epoch": 0.5468346505986383, "grad_norm": 0.0, - "learning_rate": 1.3758259773858003e-05, - "loss": 0.8643, + "learning_rate": 8.970932469216083e-06, + "loss": 0.833, "step": 13976 }, { - "epoch": 0.39607243050242286, + "epoch": 0.5468737772908678, "grad_norm": 0.0, - "learning_rate": 1.3757409251659727e-05, - "loss": 0.9366, + "learning_rate": 8.969671974089304e-06, + "loss": 0.9948, "step": 13977 }, { - "epoch": 0.3961007679446853, + "epoch": 0.5469129039830972, "grad_norm": 0.0, - "learning_rate": 1.3756558697811402e-05, - "loss": 0.9777, + "learning_rate": 8.968411495508303e-06, + "loss": 1.0385, "step": 13978 }, { - "epoch": 0.3961291053869478, + "epoch": 0.5469520306753267, "grad_norm": 0.0, - "learning_rate": 1.3755708112320187e-05, - "loss": 0.9289, + "learning_rate": 8.967151033493315e-06, + "loss": 1.1031, "step": 13979 }, { - "epoch": 0.3961574428292102, + "epoch": 0.5469911573675561, "grad_norm": 0.0, - "learning_rate": 1.3754857495193245e-05, - "loss": 1.0257, + "learning_rate": 8.965890588064593e-06, + "loss": 1.0814, "step": 13980 }, { - "epoch": 0.3961857802714727, + "epoch": 0.5470302840597856, "grad_norm": 0.0, - "learning_rate": 1.3754006846437748e-05, - "loss": 0.9959, + "learning_rate": 8.964630159242373e-06, + "loss": 1.0807, "step": 13981 }, { - "epoch": 0.39621411771373516, + "epoch": 0.547069410752015, "grad_norm": 0.0, - "learning_rate": 1.3753156166060857e-05, - "loss": 0.9063, + "learning_rate": 8.963369747046893e-06, + "loss": 0.9818, "step": 13982 }, { - "epoch": 0.3962424551559976, + "epoch": 0.5471085374442445, "grad_norm": 0.0, - "learning_rate": 1.3752305454069734e-05, - "loss": 0.9559, + "learning_rate": 8.962109351498397e-06, + "loss": 1.0367, "step": 13983 }, { - "epoch": 0.3962707925982601, + "epoch": 0.5471476641364739, "grad_norm": 0.0, - "learning_rate": 1.375145471047155e-05, - "loss": 0.889, + "learning_rate": 8.960848972617119e-06, + "loss": 0.9427, "step": 13984 }, { - "epoch": 0.39629913004052253, + "epoch": 0.5471867908287034, "grad_norm": 0.0, - "learning_rate": 1.3750603935273472e-05, - "loss": 0.872, + "learning_rate": 8.95958861042331e-06, + "loss": 0.9736, "step": 13985 }, { - "epoch": 0.396327467482785, + "epoch": 0.5472259175209327, "grad_norm": 0.0, - "learning_rate": 1.3749753128482665e-05, - "loss": 0.9163, + "learning_rate": 8.958328264937203e-06, + "loss": 1.0189, "step": 13986 }, { - "epoch": 0.39635580492504746, + "epoch": 0.5472650442131622, "grad_norm": 0.0, - "learning_rate": 1.3748902290106294e-05, - "loss": 0.9585, + "learning_rate": 8.95706793617904e-06, + "loss": 1.1244, "step": 13987 }, { - "epoch": 0.3963841423673099, + "epoch": 0.5473041709053916, "grad_norm": 0.0, - "learning_rate": 1.3748051420151524e-05, - "loss": 0.911, + "learning_rate": 8.955807624169054e-06, + "loss": 0.9852, "step": 13988 }, { - "epoch": 0.3964124798095724, + "epoch": 0.5473432975976211, "grad_norm": 0.0, - "learning_rate": 1.374720051862553e-05, - "loss": 0.9722, + "learning_rate": 8.954547328927494e-06, + "loss": 1.0691, "step": 13989 }, { - "epoch": 0.39644081725183483, + "epoch": 0.5473824242898505, "grad_norm": 0.0, - "learning_rate": 1.374634958553547e-05, - "loss": 0.7516, + "learning_rate": 8.953287050474592e-06, + "loss": 1.0767, "step": 13990 }, { - "epoch": 0.3964691546940973, + "epoch": 0.54742155098208, "grad_norm": 0.0, - "learning_rate": 1.374549862088852e-05, - "loss": 0.9249, + "learning_rate": 8.95202678883059e-06, + "loss": 1.0398, "step": 13991 }, { - "epoch": 0.39649749213635976, + "epoch": 0.5474606776743094, "grad_norm": 0.0, - "learning_rate": 1.3744647624691841e-05, - "loss": 0.8629, + "learning_rate": 8.950766544015726e-06, + "loss": 0.8929, "step": 13992 }, { - "epoch": 0.39652582957862226, + "epoch": 0.5474998043665389, "grad_norm": 0.0, - "learning_rate": 1.3743796596952608e-05, - "loss": 0.9427, + "learning_rate": 8.94950631605023e-06, + "loss": 1.044, "step": 13993 }, { - "epoch": 0.3965541670208847, + "epoch": 0.5475389310587683, "grad_norm": 0.0, - "learning_rate": 1.3742945537677983e-05, - "loss": 1.0183, + "learning_rate": 8.948246104954351e-06, + "loss": 1.0642, "step": 13994 }, { - "epoch": 0.39658250446314713, + "epoch": 0.5475780577509978, "grad_norm": 0.0, - "learning_rate": 1.374209444687514e-05, - "loss": 0.9243, + "learning_rate": 8.946985910748322e-06, + "loss": 1.0749, "step": 13995 }, { - "epoch": 0.39661084190540963, + "epoch": 0.5476171844432272, "grad_norm": 0.0, - "learning_rate": 1.3741243324551246e-05, - "loss": 0.8712, + "learning_rate": 8.94572573345238e-06, + "loss": 1.0612, "step": 13996 }, { - "epoch": 0.39663917934767207, + "epoch": 0.5476563111354567, "grad_norm": 0.0, - "learning_rate": 1.3740392170713466e-05, - "loss": 0.912, + "learning_rate": 8.944465573086757e-06, + "loss": 1.232, "step": 13997 }, { - "epoch": 0.39666751678993456, + "epoch": 0.547695437827686, "grad_norm": 0.0, - "learning_rate": 1.3739540985368978e-05, - "loss": 0.944, + "learning_rate": 8.943205429671697e-06, + "loss": 1.0278, "step": 13998 }, { - "epoch": 0.396695854232197, + "epoch": 0.5477345645199155, "grad_norm": 0.0, - "learning_rate": 1.3738689768524946e-05, - "loss": 0.9994, + "learning_rate": 8.941945303227436e-06, + "loss": 1.1161, "step": 13999 }, { - "epoch": 0.39672419167445944, + "epoch": 0.5477736912121449, "grad_norm": 0.0, - "learning_rate": 1.3737838520188542e-05, - "loss": 0.9184, + "learning_rate": 8.940685193774207e-06, + "loss": 1.0292, "step": 14000 }, { - "epoch": 0.39675252911672193, + "epoch": 0.5478128179043744, "grad_norm": 0.0, - "learning_rate": 1.3736987240366937e-05, - "loss": 0.856, + "learning_rate": 8.939425101332245e-06, + "loss": 1.0168, "step": 14001 }, { - "epoch": 0.39678086655898437, + "epoch": 0.5478519445966038, "grad_norm": 0.0, - "learning_rate": 1.37361359290673e-05, - "loss": 0.8748, + "learning_rate": 8.938165025921786e-06, + "loss": 1.0742, "step": 14002 }, { - "epoch": 0.39680920400124686, + "epoch": 0.5478910712888332, "grad_norm": 0.0, - "learning_rate": 1.3735284586296802e-05, - "loss": 0.9212, + "learning_rate": 8.936904967563066e-06, + "loss": 1.1814, "step": 14003 }, { - "epoch": 0.3968375414435093, + "epoch": 0.5479301979810627, "grad_norm": 0.0, - "learning_rate": 1.3734433212062617e-05, - "loss": 0.9183, + "learning_rate": 8.935644926276322e-06, + "loss": 1.0023, "step": 14004 }, { - "epoch": 0.3968658788857718, + "epoch": 0.5479693246732921, "grad_norm": 0.0, - "learning_rate": 1.3733581806371911e-05, - "loss": 1.0013, + "learning_rate": 8.934384902081788e-06, + "loss": 0.9787, "step": 14005 }, { - "epoch": 0.39689421632803423, + "epoch": 0.5480084513655216, "grad_norm": 0.0, - "learning_rate": 1.3732730369231862e-05, - "loss": 0.774, + "learning_rate": 8.933124894999693e-06, + "loss": 1.0224, "step": 14006 }, { - "epoch": 0.39692255377029667, + "epoch": 0.548047578057751, "grad_norm": 0.0, - "learning_rate": 1.3731878900649638e-05, - "loss": 0.9114, + "learning_rate": 8.931864905050277e-06, + "loss": 1.006, "step": 14007 }, { - "epoch": 0.39695089121255916, + "epoch": 0.5480867047499804, "grad_norm": 0.0, - "learning_rate": 1.3731027400632413e-05, - "loss": 0.9683, + "learning_rate": 8.930604932253776e-06, + "loss": 1.0541, "step": 14008 }, { - "epoch": 0.3969792286548216, + "epoch": 0.5481258314422098, "grad_norm": 0.0, - "learning_rate": 1.373017586918736e-05, - "loss": 0.8992, + "learning_rate": 8.929344976630414e-06, + "loss": 1.0238, "step": 14009 }, { - "epoch": 0.3970075660970841, + "epoch": 0.5481649581344393, "grad_norm": 0.0, - "learning_rate": 1.3729324306321645e-05, - "loss": 0.9613, + "learning_rate": 8.928085038200433e-06, + "loss": 1.051, "step": 14010 }, { - "epoch": 0.39703590353934654, + "epoch": 0.5482040848266687, "grad_norm": 0.0, - "learning_rate": 1.372847271204245e-05, - "loss": 0.9634, + "learning_rate": 8.926825116984063e-06, + "loss": 1.0793, "step": 14011 }, { - "epoch": 0.397064240981609, + "epoch": 0.5482432115188982, "grad_norm": 0.0, - "learning_rate": 1.3727621086356947e-05, - "loss": 0.8946, + "learning_rate": 8.925565213001536e-06, + "loss": 1.075, "step": 14012 }, { - "epoch": 0.39709257842387147, + "epoch": 0.5482823382111276, "grad_norm": 0.0, - "learning_rate": 1.3726769429272302e-05, - "loss": 0.9119, + "learning_rate": 8.924305326273087e-06, + "loss": 1.1647, "step": 14013 }, { - "epoch": 0.3971209158661339, + "epoch": 0.5483214649033571, "grad_norm": 0.0, - "learning_rate": 1.3725917740795698e-05, - "loss": 1.0227, + "learning_rate": 8.923045456818947e-06, + "loss": 1.0405, "step": 14014 }, { - "epoch": 0.3971492533083964, + "epoch": 0.5483605915955865, "grad_norm": 0.0, - "learning_rate": 1.3725066020934306e-05, - "loss": 0.9269, + "learning_rate": 8.921785604659342e-06, + "loss": 1.0606, "step": 14015 }, { - "epoch": 0.39717759075065884, + "epoch": 0.548399718287816, "grad_norm": 0.0, - "learning_rate": 1.3724214269695297e-05, - "loss": 0.8811, + "learning_rate": 8.920525769814514e-06, + "loss": 1.0524, "step": 14016 }, { - "epoch": 0.39720592819292133, + "epoch": 0.5484388449800454, "grad_norm": 0.0, - "learning_rate": 1.3723362487085847e-05, - "loss": 0.8801, + "learning_rate": 8.91926595230469e-06, + "loss": 1.1039, "step": 14017 }, { - "epoch": 0.39723426563518377, + "epoch": 0.5484779716722749, "grad_norm": 0.0, - "learning_rate": 1.3722510673113136e-05, - "loss": 0.8836, + "learning_rate": 8.918006152150099e-06, + "loss": 0.9402, "step": 14018 }, { - "epoch": 0.3972626030774462, + "epoch": 0.5485170983645042, "grad_norm": 0.0, - "learning_rate": 1.3721658827784335e-05, - "loss": 0.8707, + "learning_rate": 8.916746369370975e-06, + "loss": 1.0022, "step": 14019 }, { - "epoch": 0.3972909405197087, + "epoch": 0.5485562250567337, "grad_norm": 0.0, - "learning_rate": 1.372080695110662e-05, - "loss": 0.942, + "learning_rate": 8.91548660398754e-06, + "loss": 1.0623, "step": 14020 }, { - "epoch": 0.39731927796197114, + "epoch": 0.5485953517489631, "grad_norm": 0.0, - "learning_rate": 1.3719955043087163e-05, - "loss": 0.9242, + "learning_rate": 8.91422685602004e-06, + "loss": 1.0594, "step": 14021 }, { - "epoch": 0.39734761540423363, + "epoch": 0.5486344784411926, "grad_norm": 0.0, - "learning_rate": 1.3719103103733147e-05, - "loss": 0.8948, + "learning_rate": 8.912967125488692e-06, + "loss": 1.1069, "step": 14022 }, { - "epoch": 0.39737595284649607, + "epoch": 0.548673605133422, "grad_norm": 0.0, - "learning_rate": 1.371825113305174e-05, - "loss": 0.7824, + "learning_rate": 8.911707412413732e-06, + "loss": 1.0364, "step": 14023 }, { - "epoch": 0.3974042902887585, + "epoch": 0.5487127318256515, "grad_norm": 0.0, - "learning_rate": 1.3717399131050128e-05, - "loss": 0.9423, + "learning_rate": 8.910447716815383e-06, + "loss": 1.1122, "step": 14024 }, { - "epoch": 0.397432627731021, + "epoch": 0.5487518585178809, "grad_norm": 0.0, - "learning_rate": 1.371654709773548e-05, - "loss": 0.9844, + "learning_rate": 8.909188038713881e-06, + "loss": 1.0823, "step": 14025 }, { - "epoch": 0.39746096517328344, + "epoch": 0.5487909852101104, "grad_norm": 0.0, - "learning_rate": 1.3715695033114974e-05, - "loss": 0.8696, + "learning_rate": 8.907928378129453e-06, + "loss": 1.025, "step": 14026 }, { - "epoch": 0.39748930261554594, + "epoch": 0.5488301119023398, "grad_norm": 0.0, - "learning_rate": 1.3714842937195794e-05, - "loss": 0.9614, + "learning_rate": 8.906668735082327e-06, + "loss": 1.0514, "step": 14027 }, { - "epoch": 0.3975176400578084, + "epoch": 0.5488692385945693, "grad_norm": 0.0, - "learning_rate": 1.3713990809985109e-05, - "loss": 0.8737, + "learning_rate": 8.905409109592727e-06, + "loss": 1.023, "step": 14028 }, { - "epoch": 0.39754597750007087, + "epoch": 0.5489083652867986, "grad_norm": 0.0, - "learning_rate": 1.37131386514901e-05, - "loss": 0.7734, + "learning_rate": 8.904149501680888e-06, + "loss": 1.0928, "step": 14029 }, { - "epoch": 0.3975743149423333, + "epoch": 0.5489474919790281, "grad_norm": 0.0, - "learning_rate": 1.3712286461717945e-05, - "loss": 0.8367, + "learning_rate": 8.902889911367038e-06, + "loss": 1.1962, "step": 14030 }, { - "epoch": 0.39760265238459574, + "epoch": 0.5489866186712575, "grad_norm": 0.0, - "learning_rate": 1.3711434240675825e-05, - "loss": 0.925, + "learning_rate": 8.9016303386714e-06, + "loss": 1.0835, "step": 14031 }, { - "epoch": 0.39763098982685824, + "epoch": 0.5490257453634869, "grad_norm": 0.0, - "learning_rate": 1.3710581988370915e-05, - "loss": 0.8752, + "learning_rate": 8.900370783614201e-06, + "loss": 0.9064, "step": 14032 }, { - "epoch": 0.3976593272691207, + "epoch": 0.5490648720557164, "grad_norm": 0.0, - "learning_rate": 1.3709729704810396e-05, - "loss": 0.8803, + "learning_rate": 8.899111246215665e-06, + "loss": 0.9001, "step": 14033 }, { - "epoch": 0.39768766471138317, + "epoch": 0.5491039987479458, "grad_norm": 0.0, - "learning_rate": 1.3708877390001442e-05, - "loss": 0.8501, + "learning_rate": 8.897851726496028e-06, + "loss": 1.0424, "step": 14034 }, { - "epoch": 0.3977160021536456, + "epoch": 0.5491431254401753, "grad_norm": 0.0, - "learning_rate": 1.370802504395124e-05, - "loss": 0.8881, + "learning_rate": 8.89659222447551e-06, + "loss": 1.0434, "step": 14035 }, { - "epoch": 0.39774433959590805, + "epoch": 0.5491822521324047, "grad_norm": 0.0, - "learning_rate": 1.3707172666666966e-05, - "loss": 0.9327, + "learning_rate": 8.89533274017434e-06, + "loss": 0.9904, "step": 14036 }, { - "epoch": 0.39777267703817054, + "epoch": 0.5492213788246342, "grad_norm": 0.0, - "learning_rate": 1.37063202581558e-05, - "loss": 0.972, + "learning_rate": 8.894073273612738e-06, + "loss": 0.9098, "step": 14037 }, { - "epoch": 0.397801014480433, + "epoch": 0.5492605055168636, "grad_norm": 0.0, - "learning_rate": 1.370546781842492e-05, - "loss": 0.8451, + "learning_rate": 8.892813824810936e-06, + "loss": 0.9964, "step": 14038 }, { - "epoch": 0.3978293519226955, + "epoch": 0.5492996322090931, "grad_norm": 0.0, - "learning_rate": 1.3704615347481511e-05, - "loss": 0.8783, + "learning_rate": 8.891554393789157e-06, + "loss": 0.9648, "step": 14039 }, { - "epoch": 0.3978576893649579, + "epoch": 0.5493387589013224, "grad_norm": 0.0, - "learning_rate": 1.370376284533275e-05, - "loss": 0.9165, + "learning_rate": 8.890294980567626e-06, + "loss": 1.0398, "step": 14040 }, { - "epoch": 0.3978860268072204, + "epoch": 0.5493778855935519, "grad_norm": 0.0, - "learning_rate": 1.3702910311985822e-05, - "loss": 0.9142, + "learning_rate": 8.889035585166567e-06, + "loss": 1.0208, "step": 14041 }, { - "epoch": 0.39791436424948284, + "epoch": 0.5494170122857813, "grad_norm": 0.0, - "learning_rate": 1.3702057747447903e-05, - "loss": 0.9826, + "learning_rate": 8.8877762076062e-06, + "loss": 1.0488, "step": 14042 }, { - "epoch": 0.3979427016917453, + "epoch": 0.5494561389780108, "grad_norm": 0.0, - "learning_rate": 1.3701205151726177e-05, - "loss": 0.8494, + "learning_rate": 8.886516847906757e-06, + "loss": 1.0201, "step": 14043 }, { - "epoch": 0.3979710391340078, + "epoch": 0.5494952656702402, "grad_norm": 0.0, - "learning_rate": 1.3700352524827825e-05, - "loss": 0.9485, + "learning_rate": 8.885257506088459e-06, + "loss": 0.9932, "step": 14044 }, { - "epoch": 0.3979993765762702, + "epoch": 0.5495343923624697, "grad_norm": 0.0, - "learning_rate": 1.3699499866760032e-05, - "loss": 0.9487, + "learning_rate": 8.88399818217153e-06, + "loss": 1.0342, "step": 14045 }, { - "epoch": 0.3980277140185327, + "epoch": 0.5495735190546991, "grad_norm": 0.0, - "learning_rate": 1.3698647177529974e-05, - "loss": 0.9937, + "learning_rate": 8.882738876176188e-06, + "loss": 1.1172, "step": 14046 }, { - "epoch": 0.39805605146079515, + "epoch": 0.5496126457469286, "grad_norm": 0.0, - "learning_rate": 1.369779445714484e-05, - "loss": 0.9229, + "learning_rate": 8.881479588122662e-06, + "loss": 1.0766, "step": 14047 }, { - "epoch": 0.3980843889030576, + "epoch": 0.549651772439158, "grad_norm": 0.0, - "learning_rate": 1.3696941705611811e-05, - "loss": 0.9255, + "learning_rate": 8.880220318031168e-06, + "loss": 1.002, "step": 14048 }, { - "epoch": 0.3981127263453201, + "epoch": 0.5496908991313875, "grad_norm": 0.0, - "learning_rate": 1.3696088922938065e-05, - "loss": 0.9365, + "learning_rate": 8.878961065921937e-06, + "loss": 1.1501, "step": 14049 }, { - "epoch": 0.3981410637875825, + "epoch": 0.5497300258236169, "grad_norm": 0.0, - "learning_rate": 1.3695236109130792e-05, - "loss": 1.2091, + "learning_rate": 8.877701831815186e-06, + "loss": 1.081, "step": 14050 }, { - "epoch": 0.398169401229845, + "epoch": 0.5497691525158463, "grad_norm": 0.0, - "learning_rate": 1.3694383264197173e-05, - "loss": 0.9106, + "learning_rate": 8.876442615731136e-06, + "loss": 1.0541, "step": 14051 }, { - "epoch": 0.39819773867210745, + "epoch": 0.5498082792080757, "grad_norm": 0.0, - "learning_rate": 1.3693530388144394e-05, - "loss": 0.9239, + "learning_rate": 8.875183417690011e-06, + "loss": 0.9972, "step": 14052 }, { - "epoch": 0.39822607611436994, + "epoch": 0.5498474059003052, "grad_norm": 0.0, - "learning_rate": 1.3692677480979635e-05, - "loss": 0.8606, + "learning_rate": 8.87392423771203e-06, + "loss": 1.0917, "step": 14053 }, { - "epoch": 0.3982544135566324, + "epoch": 0.5498865325925346, "grad_norm": 0.0, - "learning_rate": 1.3691824542710082e-05, - "loss": 1.0208, + "learning_rate": 8.872665075817414e-06, + "loss": 1.0477, "step": 14054 }, { - "epoch": 0.3982827509988948, + "epoch": 0.5499256592847641, "grad_norm": 0.0, - "learning_rate": 1.3690971573342921e-05, - "loss": 0.8534, + "learning_rate": 8.871405932026383e-06, + "loss": 1.0081, "step": 14055 }, { - "epoch": 0.3983110884411573, + "epoch": 0.5499647859769935, "grad_norm": 0.0, - "learning_rate": 1.3690118572885334e-05, - "loss": 0.9748, + "learning_rate": 8.870146806359163e-06, + "loss": 1.0796, "step": 14056 }, { - "epoch": 0.39833942588341975, + "epoch": 0.550003912669223, "grad_norm": 0.0, - "learning_rate": 1.368926554134451e-05, - "loss": 0.903, + "learning_rate": 8.868887698835968e-06, + "loss": 1.0818, "step": 14057 }, { - "epoch": 0.39836776332568224, + "epoch": 0.5500430393614524, "grad_norm": 0.0, - "learning_rate": 1.3688412478727633e-05, - "loss": 0.871, + "learning_rate": 8.867628609477019e-06, + "loss": 1.1707, "step": 14058 }, { - "epoch": 0.3983961007679447, + "epoch": 0.5500821660536819, "grad_norm": 0.0, - "learning_rate": 1.3687559385041884e-05, - "loss": 1.0474, + "learning_rate": 8.866369538302535e-06, + "loss": 1.0934, "step": 14059 }, { - "epoch": 0.3984244382102071, + "epoch": 0.5501212927459113, "grad_norm": 0.0, - "learning_rate": 1.3686706260294458e-05, - "loss": 0.9211, + "learning_rate": 8.865110485332731e-06, + "loss": 1.0588, "step": 14060 }, { - "epoch": 0.3984527756524696, + "epoch": 0.5501604194381406, "grad_norm": 0.0, - "learning_rate": 1.3685853104492534e-05, - "loss": 0.9422, + "learning_rate": 8.863851450587837e-06, + "loss": 1.0598, "step": 14061 }, { - "epoch": 0.39848111309473205, + "epoch": 0.5501995461303701, "grad_norm": 0.0, - "learning_rate": 1.36849999176433e-05, - "loss": 0.8847, + "learning_rate": 8.862592434088063e-06, + "loss": 0.9261, "step": 14062 }, { - "epoch": 0.39850945053699455, + "epoch": 0.5502386728225995, "grad_norm": 0.0, - "learning_rate": 1.3684146699753942e-05, - "loss": 0.8538, + "learning_rate": 8.86133343585363e-06, + "loss": 1.0607, "step": 14063 }, { - "epoch": 0.398537787979257, + "epoch": 0.550277799514829, "grad_norm": 0.0, - "learning_rate": 1.3683293450831649e-05, - "loss": 0.712, + "learning_rate": 8.860074455904753e-06, + "loss": 1.014, "step": 14064 }, { - "epoch": 0.3985661254215195, + "epoch": 0.5503169262070584, "grad_norm": 0.0, - "learning_rate": 1.368244017088361e-05, - "loss": 1.0855, + "learning_rate": 8.858815494261653e-06, + "loss": 1.1822, "step": 14065 }, { - "epoch": 0.3985944628637819, + "epoch": 0.5503560528992879, "grad_norm": 0.0, - "learning_rate": 1.3681586859917011e-05, - "loss": 0.9648, + "learning_rate": 8.857556550944548e-06, + "loss": 1.0945, "step": 14066 }, { - "epoch": 0.39862280030604436, + "epoch": 0.5503951795915173, "grad_norm": 0.0, - "learning_rate": 1.3680733517939034e-05, - "loss": 0.8914, + "learning_rate": 8.856297625973652e-06, + "loss": 1.0049, "step": 14067 }, { - "epoch": 0.39865113774830685, + "epoch": 0.5504343062837468, "grad_norm": 0.0, - "learning_rate": 1.3679880144956875e-05, - "loss": 0.9557, + "learning_rate": 8.855038719369185e-06, + "loss": 0.9844, "step": 14068 }, { - "epoch": 0.3986794751905693, + "epoch": 0.5504734329759762, "grad_norm": 0.0, - "learning_rate": 1.3679026740977717e-05, - "loss": 0.9586, + "learning_rate": 8.853779831151355e-06, + "loss": 0.9807, "step": 14069 }, { - "epoch": 0.3987078126328318, + "epoch": 0.5505125596682057, "grad_norm": 0.0, - "learning_rate": 1.3678173306008753e-05, - "loss": 0.9272, + "learning_rate": 8.852520961340389e-06, + "loss": 1.0112, "step": 14070 }, { - "epoch": 0.3987361500750942, + "epoch": 0.550551686360435, "grad_norm": 0.0, - "learning_rate": 1.3677319840057166e-05, - "loss": 0.9431, + "learning_rate": 8.8512621099565e-06, + "loss": 1.092, "step": 14071 }, { - "epoch": 0.39876448751735666, + "epoch": 0.5505908130526646, "grad_norm": 0.0, - "learning_rate": 1.367646634313015e-05, - "loss": 0.895, + "learning_rate": 8.850003277019901e-06, + "loss": 1.0909, "step": 14072 }, { - "epoch": 0.39879282495961915, + "epoch": 0.5506299397448939, "grad_norm": 0.0, - "learning_rate": 1.3675612815234896e-05, - "loss": 0.8965, + "learning_rate": 8.848744462550804e-06, + "loss": 0.8909, "step": 14073 }, { - "epoch": 0.3988211624018816, + "epoch": 0.5506690664371234, "grad_norm": 0.0, - "learning_rate": 1.3674759256378585e-05, - "loss": 1.0139, + "learning_rate": 8.847485666569434e-06, + "loss": 1.1405, "step": 14074 }, { - "epoch": 0.3988494998441441, + "epoch": 0.5507081931293528, "grad_norm": 0.0, - "learning_rate": 1.3673905666568414e-05, - "loss": 0.882, + "learning_rate": 8.846226889095998e-06, + "loss": 1.0898, "step": 14075 }, { - "epoch": 0.3988778372864065, + "epoch": 0.5507473198215823, "grad_norm": 0.0, - "learning_rate": 1.3673052045811572e-05, - "loss": 0.9701, + "learning_rate": 8.844968130150714e-06, + "loss": 0.9536, "step": 14076 }, { - "epoch": 0.398906174728669, + "epoch": 0.5507864465138117, "grad_norm": 0.0, - "learning_rate": 1.367219839411525e-05, - "loss": 0.9451, + "learning_rate": 8.843709389753792e-06, + "loss": 0.9403, "step": 14077 }, { - "epoch": 0.39893451217093145, + "epoch": 0.5508255732060412, "grad_norm": 0.0, - "learning_rate": 1.3671344711486638e-05, - "loss": 1.0214, + "learning_rate": 8.842450667925446e-06, + "loss": 0.9451, "step": 14078 }, { - "epoch": 0.3989628496131939, + "epoch": 0.5508646998982706, "grad_norm": 0.0, - "learning_rate": 1.3670490997932922e-05, - "loss": 0.994, + "learning_rate": 8.841191964685896e-06, + "loss": 0.983, "step": 14079 }, { - "epoch": 0.3989911870554564, + "epoch": 0.5509038265905001, "grad_norm": 0.0, - "learning_rate": 1.36696372534613e-05, - "loss": 0.9168, + "learning_rate": 8.839933280055352e-06, + "loss": 1.0579, "step": 14080 }, { - "epoch": 0.3990195244977188, + "epoch": 0.5509429532827295, "grad_norm": 0.0, - "learning_rate": 1.366878347807896e-05, - "loss": 0.9272, + "learning_rate": 8.838674614054024e-06, + "loss": 0.9144, "step": 14081 }, { - "epoch": 0.3990478619399813, + "epoch": 0.550982079974959, "grad_norm": 0.0, - "learning_rate": 1.3667929671793094e-05, - "loss": 0.9213, + "learning_rate": 8.837415966702123e-06, + "loss": 1.0603, "step": 14082 }, { - "epoch": 0.39907619938224376, + "epoch": 0.5510212066671883, "grad_norm": 0.0, - "learning_rate": 1.3667075834610894e-05, - "loss": 0.8736, + "learning_rate": 8.836157338019869e-06, + "loss": 1.0349, "step": 14083 }, { - "epoch": 0.3991045368245062, + "epoch": 0.5510603333594178, "grad_norm": 0.0, - "learning_rate": 1.3666221966539554e-05, - "loss": 0.9565, + "learning_rate": 8.83489872802747e-06, + "loss": 1.0541, "step": 14084 }, { - "epoch": 0.3991328742667687, + "epoch": 0.5510994600516472, "grad_norm": 0.0, - "learning_rate": 1.3665368067586267e-05, - "loss": 0.8549, + "learning_rate": 8.833640136745137e-06, + "loss": 1.0095, "step": 14085 }, { - "epoch": 0.3991612117090311, + "epoch": 0.5511385867438767, "grad_norm": 0.0, - "learning_rate": 1.366451413775822e-05, - "loss": 0.9449, + "learning_rate": 8.83238156419308e-06, + "loss": 1.2173, "step": 14086 }, { - "epoch": 0.3991895491512936, + "epoch": 0.5511777134361061, "grad_norm": 0.0, - "learning_rate": 1.366366017706261e-05, - "loss": 0.7923, + "learning_rate": 8.83112301039151e-06, + "loss": 0.9911, "step": 14087 }, { - "epoch": 0.39921788659355606, + "epoch": 0.5512168401283356, "grad_norm": 0.0, - "learning_rate": 1.366280618550663e-05, - "loss": 0.9202, + "learning_rate": 8.829864475360646e-06, + "loss": 1.14, "step": 14088 }, { - "epoch": 0.39924622403581855, + "epoch": 0.551255966820565, "grad_norm": 0.0, - "learning_rate": 1.3661952163097474e-05, - "loss": 0.8476, + "learning_rate": 8.82860595912069e-06, + "loss": 1.1001, "step": 14089 }, { - "epoch": 0.399274561478081, + "epoch": 0.5512950935127944, "grad_norm": 0.0, - "learning_rate": 1.3661098109842336e-05, - "loss": 0.9194, + "learning_rate": 8.827347461691853e-06, + "loss": 1.1255, "step": 14090 }, { - "epoch": 0.39930289892034343, + "epoch": 0.5513342202050239, "grad_norm": 0.0, - "learning_rate": 1.366024402574841e-05, - "loss": 0.9679, + "learning_rate": 8.826088983094347e-06, + "loss": 0.9696, "step": 14091 }, { - "epoch": 0.3993312363626059, + "epoch": 0.5513733468972533, "grad_norm": 0.0, - "learning_rate": 1.3659389910822887e-05, - "loss": 1.0038, + "learning_rate": 8.824830523348383e-06, + "loss": 1.0543, "step": 14092 }, { - "epoch": 0.39935957380486836, + "epoch": 0.5514124735894828, "grad_norm": 0.0, - "learning_rate": 1.3658535765072964e-05, - "loss": 0.9896, + "learning_rate": 8.823572082474165e-06, + "loss": 0.9575, "step": 14093 }, { - "epoch": 0.39938791124713086, + "epoch": 0.5514516002817121, "grad_norm": 0.0, - "learning_rate": 1.3657681588505835e-05, - "loss": 0.9421, + "learning_rate": 8.822313660491905e-06, + "loss": 1.0554, "step": 14094 }, { - "epoch": 0.3994162486893933, + "epoch": 0.5514907269739416, "grad_norm": 0.0, - "learning_rate": 1.3656827381128697e-05, - "loss": 0.913, + "learning_rate": 8.821055257421813e-06, + "loss": 1.0309, "step": 14095 }, { - "epoch": 0.39944458613165573, + "epoch": 0.551529853666171, "grad_norm": 0.0, - "learning_rate": 1.3655973142948743e-05, - "loss": 1.038, + "learning_rate": 8.819796873284098e-06, + "loss": 0.9818, "step": 14096 }, { - "epoch": 0.3994729235739182, + "epoch": 0.5515689803584005, "grad_norm": 0.0, - "learning_rate": 1.3655118873973172e-05, - "loss": 0.896, + "learning_rate": 8.818538508098965e-06, + "loss": 0.9736, "step": 14097 }, { - "epoch": 0.39950126101618066, + "epoch": 0.5516081070506299, "grad_norm": 0.0, - "learning_rate": 1.3654264574209175e-05, - "loss": 0.9427, + "learning_rate": 8.817280161886624e-06, + "loss": 1.0132, "step": 14098 }, { - "epoch": 0.39952959845844316, + "epoch": 0.5516472337428594, "grad_norm": 0.0, - "learning_rate": 1.3653410243663953e-05, - "loss": 1.0154, + "learning_rate": 8.81602183466728e-06, + "loss": 0.9031, "step": 14099 }, { - "epoch": 0.3995579359007056, + "epoch": 0.5516863604350888, "grad_norm": 0.0, - "learning_rate": 1.3652555882344696e-05, - "loss": 0.8216, + "learning_rate": 8.81476352646114e-06, + "loss": 1.058, "step": 14100 }, { - "epoch": 0.3995862733429681, + "epoch": 0.5517254871273183, "grad_norm": 0.0, - "learning_rate": 1.3651701490258607e-05, - "loss": 0.8541, + "learning_rate": 8.813505237288416e-06, + "loss": 1.009, "step": 14101 }, { - "epoch": 0.39961461078523053, + "epoch": 0.5517646138195477, "grad_norm": 0.0, - "learning_rate": 1.365084706741288e-05, - "loss": 0.8982, + "learning_rate": 8.812246967169312e-06, + "loss": 1.0047, "step": 14102 }, { - "epoch": 0.39964294822749297, + "epoch": 0.5518037405117772, "grad_norm": 0.0, - "learning_rate": 1.3649992613814714e-05, - "loss": 0.8966, + "learning_rate": 8.810988716124032e-06, + "loss": 1.0386, "step": 14103 }, { - "epoch": 0.39967128566975546, + "epoch": 0.5518428672040065, "grad_norm": 0.0, - "learning_rate": 1.3649138129471302e-05, - "loss": 0.9821, + "learning_rate": 8.80973048417278e-06, + "loss": 1.1073, "step": 14104 }, { - "epoch": 0.3996996231120179, + "epoch": 0.551881993896236, "grad_norm": 0.0, - "learning_rate": 1.3648283614389846e-05, - "loss": 1.02, + "learning_rate": 8.808472271335767e-06, + "loss": 1.0067, "step": 14105 }, { - "epoch": 0.3997279605542804, + "epoch": 0.5519211205884654, "grad_norm": 0.0, - "learning_rate": 1.3647429068577544e-05, - "loss": 0.9926, + "learning_rate": 8.807214077633198e-06, + "loss": 1.1812, "step": 14106 }, { - "epoch": 0.39975629799654283, + "epoch": 0.5519602472806949, "grad_norm": 0.0, - "learning_rate": 1.364657449204159e-05, - "loss": 0.813, + "learning_rate": 8.805955903085277e-06, + "loss": 1.1749, "step": 14107 }, { - "epoch": 0.39978463543880527, + "epoch": 0.5519993739729243, "grad_norm": 0.0, - "learning_rate": 1.3645719884789184e-05, - "loss": 0.867, + "learning_rate": 8.804697747712206e-06, + "loss": 0.9899, "step": 14108 }, { - "epoch": 0.39981297288106776, + "epoch": 0.5520385006651538, "grad_norm": 0.0, - "learning_rate": 1.3644865246827528e-05, - "loss": 0.9881, + "learning_rate": 8.803439611534187e-06, + "loss": 1.0935, "step": 14109 }, { - "epoch": 0.3998413103233302, + "epoch": 0.5520776273573832, "grad_norm": 0.0, - "learning_rate": 1.3644010578163818e-05, - "loss": 0.8351, + "learning_rate": 8.802181494571435e-06, + "loss": 1.1671, "step": 14110 }, { - "epoch": 0.3998696477655927, + "epoch": 0.5521167540496127, "grad_norm": 0.0, - "learning_rate": 1.3643155878805255e-05, - "loss": 0.8945, + "learning_rate": 8.800923396844147e-06, + "loss": 1.1059, "step": 14111 }, { - "epoch": 0.39989798520785513, + "epoch": 0.5521558807418421, "grad_norm": 0.0, - "learning_rate": 1.3642301148759034e-05, - "loss": 0.9022, + "learning_rate": 8.799665318372526e-06, + "loss": 1.1186, "step": 14112 }, { - "epoch": 0.3999263226501176, + "epoch": 0.5521950074340716, "grad_norm": 0.0, - "learning_rate": 1.364144638803236e-05, - "loss": 0.9179, + "learning_rate": 8.79840725917677e-06, + "loss": 0.8793, "step": 14113 }, { - "epoch": 0.39995466009238007, + "epoch": 0.552234134126301, "grad_norm": 0.0, - "learning_rate": 1.3640591596632429e-05, - "loss": 1.0213, + "learning_rate": 8.797149219277094e-06, + "loss": 1.0879, "step": 14114 }, { - "epoch": 0.3999829975346425, + "epoch": 0.5522732608185305, "grad_norm": 0.0, - "learning_rate": 1.3639736774566445e-05, - "loss": 0.938, + "learning_rate": 8.795891198693694e-06, + "loss": 1.0959, "step": 14115 }, { - "epoch": 0.400011334976905, + "epoch": 0.5523123875107598, "grad_norm": 0.0, - "learning_rate": 1.3638881921841607e-05, - "loss": 0.931, + "learning_rate": 8.79463319744677e-06, + "loss": 0.9131, "step": 14116 }, { - "epoch": 0.40003967241916744, + "epoch": 0.5523515142029892, "grad_norm": 0.0, - "learning_rate": 1.3638027038465114e-05, - "loss": 0.9362, + "learning_rate": 8.79337521555653e-06, + "loss": 1.1047, "step": 14117 }, { - "epoch": 0.40006800986142993, + "epoch": 0.5523906408952187, "grad_norm": 0.0, - "learning_rate": 1.3637172124444169e-05, - "loss": 0.9259, + "learning_rate": 8.792117253043166e-06, + "loss": 0.8925, "step": 14118 }, { - "epoch": 0.40009634730369237, + "epoch": 0.5524297675874481, "grad_norm": 0.0, - "learning_rate": 1.3636317179785972e-05, - "loss": 0.8566, + "learning_rate": 8.790859309926891e-06, + "loss": 1.1133, "step": 14119 }, { - "epoch": 0.4001246847459548, + "epoch": 0.5524688942796776, "grad_norm": 0.0, - "learning_rate": 1.3635462204497724e-05, - "loss": 0.8392, + "learning_rate": 8.789601386227899e-06, + "loss": 1.0943, "step": 14120 }, { - "epoch": 0.4001530221882173, + "epoch": 0.552508020971907, "grad_norm": 0.0, - "learning_rate": 1.363460719858663e-05, - "loss": 0.9932, + "learning_rate": 8.788343481966393e-06, + "loss": 0.9841, "step": 14121 }, { - "epoch": 0.40018135963047974, + "epoch": 0.5525471476641365, "grad_norm": 0.0, - "learning_rate": 1.3633752162059888e-05, - "loss": 0.963, + "learning_rate": 8.787085597162568e-06, + "loss": 1.005, "step": 14122 }, { - "epoch": 0.40020969707274223, + "epoch": 0.5525862743563659, "grad_norm": 0.0, - "learning_rate": 1.3632897094924704e-05, - "loss": 0.9416, + "learning_rate": 8.785827731836631e-06, + "loss": 0.8887, "step": 14123 }, { - "epoch": 0.40023803451500467, + "epoch": 0.5526254010485954, "grad_norm": 0.0, - "learning_rate": 1.3632041997188278e-05, - "loss": 0.9725, + "learning_rate": 8.784569886008781e-06, + "loss": 0.9661, "step": 14124 }, { - "epoch": 0.40026637195726716, + "epoch": 0.5526645277408248, "grad_norm": 0.0, - "learning_rate": 1.3631186868857813e-05, - "loss": 1.0133, + "learning_rate": 8.783312059699215e-06, + "loss": 1.0652, "step": 14125 }, { - "epoch": 0.4002947093995296, + "epoch": 0.5527036544330542, "grad_norm": 0.0, - "learning_rate": 1.3630331709940514e-05, - "loss": 0.9045, + "learning_rate": 8.782054252928132e-06, + "loss": 1.1127, "step": 14126 }, { - "epoch": 0.40032304684179204, + "epoch": 0.5527427811252836, "grad_norm": 0.0, - "learning_rate": 1.362947652044358e-05, - "loss": 0.8735, + "learning_rate": 8.78079646571573e-06, + "loss": 0.9003, "step": 14127 }, { - "epoch": 0.40035138428405453, + "epoch": 0.5527819078175131, "grad_norm": 0.0, - "learning_rate": 1.362862130037422e-05, - "loss": 0.9176, + "learning_rate": 8.77953869808221e-06, + "loss": 1.0944, "step": 14128 }, { - "epoch": 0.400379721726317, + "epoch": 0.5528210345097425, "grad_norm": 0.0, - "learning_rate": 1.3627766049739635e-05, - "loss": 0.863, + "learning_rate": 8.77828095004777e-06, + "loss": 1.0978, "step": 14129 }, { - "epoch": 0.40040805916857947, + "epoch": 0.552860161201972, "grad_norm": 0.0, - "learning_rate": 1.362691076854703e-05, - "loss": 0.8693, + "learning_rate": 8.777023221632609e-06, + "loss": 1.1934, "step": 14130 }, { - "epoch": 0.4004363966108419, + "epoch": 0.5528992878942014, "grad_norm": 0.0, - "learning_rate": 1.3626055456803608e-05, - "loss": 0.9711, + "learning_rate": 8.775765512856919e-06, + "loss": 0.9171, "step": 14131 }, { - "epoch": 0.40046473405310434, + "epoch": 0.5529384145864309, "grad_norm": 0.0, - "learning_rate": 1.3625200114516574e-05, - "loss": 0.9105, + "learning_rate": 8.7745078237409e-06, + "loss": 1.1401, "step": 14132 }, { - "epoch": 0.40049307149536684, + "epoch": 0.5529775412786603, "grad_norm": 0.0, - "learning_rate": 1.3624344741693134e-05, - "loss": 0.8067, + "learning_rate": 8.773250154304754e-06, + "loss": 0.9334, "step": 14133 }, { - "epoch": 0.4005214089376293, + "epoch": 0.5530166679708898, "grad_norm": 0.0, - "learning_rate": 1.3623489338340491e-05, - "loss": 0.9022, + "learning_rate": 8.77199250456867e-06, + "loss": 1.0674, "step": 14134 }, { - "epoch": 0.40054974637989177, + "epoch": 0.5530557946631192, "grad_norm": 0.0, - "learning_rate": 1.3622633904465855e-05, - "loss": 1.0006, + "learning_rate": 8.770734874552851e-06, + "loss": 1.0483, "step": 14135 }, { - "epoch": 0.4005780838221542, + "epoch": 0.5530949213553487, "grad_norm": 0.0, - "learning_rate": 1.3621778440076426e-05, - "loss": 0.8679, + "learning_rate": 8.769477264277485e-06, + "loss": 1.2802, "step": 14136 }, { - "epoch": 0.4006064212644167, + "epoch": 0.553134048047578, "grad_norm": 0.0, - "learning_rate": 1.3620922945179411e-05, - "loss": 0.8361, + "learning_rate": 8.768219673762775e-06, + "loss": 0.9799, "step": 14137 }, { - "epoch": 0.40063475870667914, + "epoch": 0.5531731747398075, "grad_norm": 0.0, - "learning_rate": 1.3620067419782019e-05, - "loss": 0.9717, + "learning_rate": 8.766962103028912e-06, + "loss": 1.0598, "step": 14138 }, { - "epoch": 0.4006630961489416, + "epoch": 0.5532123014320369, "grad_norm": 0.0, - "learning_rate": 1.3619211863891458e-05, - "loss": 0.9219, + "learning_rate": 8.765704552096094e-06, + "loss": 1.0288, "step": 14139 }, { - "epoch": 0.40069143359120407, + "epoch": 0.5532514281242664, "grad_norm": 0.0, - "learning_rate": 1.3618356277514924e-05, - "loss": 0.94, + "learning_rate": 8.764447020984513e-06, + "loss": 1.0553, "step": 14140 }, { - "epoch": 0.4007197710334665, + "epoch": 0.5532905548164958, "grad_norm": 0.0, - "learning_rate": 1.3617500660659635e-05, - "loss": 0.8554, + "learning_rate": 8.763189509714367e-06, + "loss": 1.0483, "step": 14141 }, { - "epoch": 0.400748108475729, + "epoch": 0.5533296815087253, "grad_norm": 0.0, - "learning_rate": 1.3616645013332796e-05, - "loss": 0.8416, + "learning_rate": 8.761932018305846e-06, + "loss": 0.9946, "step": 14142 }, { - "epoch": 0.40077644591799144, + "epoch": 0.5533688082009547, "grad_norm": 0.0, - "learning_rate": 1.3615789335541613e-05, - "loss": 0.8813, + "learning_rate": 8.760674546779144e-06, + "loss": 0.9258, "step": 14143 }, { - "epoch": 0.4008047833602539, + "epoch": 0.5534079348931842, "grad_norm": 0.0, - "learning_rate": 1.3614933627293294e-05, - "loss": 1.0104, + "learning_rate": 8.759417095154456e-06, + "loss": 1.0838, "step": 14144 }, { - "epoch": 0.4008331208025164, + "epoch": 0.5534470615854136, "grad_norm": 0.0, - "learning_rate": 1.3614077888595047e-05, - "loss": 0.9317, + "learning_rate": 8.758159663451978e-06, + "loss": 1.028, "step": 14145 }, { - "epoch": 0.4008614582447788, + "epoch": 0.553486188277643, "grad_norm": 0.0, - "learning_rate": 1.3613222119454077e-05, - "loss": 0.9232, + "learning_rate": 8.7569022516919e-06, + "loss": 0.9513, "step": 14146 }, { - "epoch": 0.4008897956870413, + "epoch": 0.5535253149698725, "grad_norm": 0.0, - "learning_rate": 1.3612366319877597e-05, - "loss": 0.9533, + "learning_rate": 8.755644859894412e-06, + "loss": 1.0444, "step": 14147 }, { - "epoch": 0.40091813312930374, + "epoch": 0.5535644416621018, "grad_norm": 0.0, - "learning_rate": 1.3611510489872815e-05, - "loss": 0.8813, + "learning_rate": 8.754387488079708e-06, + "loss": 1.0585, "step": 14148 }, { - "epoch": 0.40094647057156624, + "epoch": 0.5536035683543313, "grad_norm": 0.0, - "learning_rate": 1.3610654629446938e-05, - "loss": 0.8627, + "learning_rate": 8.753130136267979e-06, + "loss": 1.0353, "step": 14149 }, { - "epoch": 0.4009748080138287, + "epoch": 0.5536426950465607, "grad_norm": 0.0, - "learning_rate": 1.3609798738607176e-05, - "loss": 0.8726, + "learning_rate": 8.751872804479418e-06, + "loss": 1.0261, "step": 14150 }, { - "epoch": 0.4010031454560911, + "epoch": 0.5536818217387902, "grad_norm": 0.0, - "learning_rate": 1.360894281736074e-05, - "loss": 0.8224, + "learning_rate": 8.750615492734219e-06, + "loss": 1.1213, "step": 14151 }, { - "epoch": 0.4010314828983536, + "epoch": 0.5537209484310196, "grad_norm": 0.0, - "learning_rate": 1.3608086865714838e-05, - "loss": 0.9429, + "learning_rate": 8.749358201052568e-06, + "loss": 0.9806, "step": 14152 }, { - "epoch": 0.40105982034061605, + "epoch": 0.5537600751232491, "grad_norm": 0.0, - "learning_rate": 1.3607230883676678e-05, - "loss": 0.7753, + "learning_rate": 8.748100929454652e-06, + "loss": 1.1255, "step": 14153 }, { - "epoch": 0.40108815778287854, + "epoch": 0.5537992018154785, "grad_norm": 0.0, - "learning_rate": 1.3606374871253474e-05, - "loss": 1.0494, + "learning_rate": 8.746843677960673e-06, + "loss": 1.0076, "step": 14154 }, { - "epoch": 0.401116495225141, + "epoch": 0.553838328507708, "grad_norm": 0.0, - "learning_rate": 1.3605518828452437e-05, - "loss": 0.8298, + "learning_rate": 8.745586446590813e-06, + "loss": 0.9826, "step": 14155 }, { - "epoch": 0.4011448326674034, + "epoch": 0.5538774551999374, "grad_norm": 0.0, - "learning_rate": 1.3604662755280775e-05, - "loss": 0.8623, + "learning_rate": 8.744329235365263e-06, + "loss": 1.0724, "step": 14156 }, { - "epoch": 0.4011731701096659, + "epoch": 0.5539165818921669, "grad_norm": 0.0, - "learning_rate": 1.3603806651745701e-05, - "loss": 0.9326, + "learning_rate": 8.743072044304212e-06, + "loss": 1.0153, "step": 14157 }, { - "epoch": 0.40120150755192835, + "epoch": 0.5539557085843962, "grad_norm": 0.0, - "learning_rate": 1.3602950517854426e-05, - "loss": 0.9349, + "learning_rate": 8.741814873427844e-06, + "loss": 1.0083, "step": 14158 }, { - "epoch": 0.40122984499419084, + "epoch": 0.5539948352766257, "grad_norm": 0.0, - "learning_rate": 1.360209435361416e-05, - "loss": 0.8934, + "learning_rate": 8.740557722756358e-06, + "loss": 1.1329, "step": 14159 }, { - "epoch": 0.4012581824364533, + "epoch": 0.5540339619688551, "grad_norm": 0.0, - "learning_rate": 1.3601238159032113e-05, - "loss": 0.8932, + "learning_rate": 8.739300592309934e-06, + "loss": 1.0607, "step": 14160 }, { - "epoch": 0.4012865198787157, + "epoch": 0.5540730886610846, "grad_norm": 0.0, - "learning_rate": 1.3600381934115502e-05, - "loss": 0.9014, + "learning_rate": 8.738043482108767e-06, + "loss": 0.9339, "step": 14161 }, { - "epoch": 0.4013148573209782, + "epoch": 0.554112215353314, "grad_norm": 0.0, - "learning_rate": 1.3599525678871536e-05, - "loss": 0.9355, + "learning_rate": 8.736786392173033e-06, + "loss": 0.9541, "step": 14162 }, { - "epoch": 0.40134319476324065, + "epoch": 0.5541513420455435, "grad_norm": 0.0, - "learning_rate": 1.3598669393307429e-05, - "loss": 0.9135, + "learning_rate": 8.735529322522932e-06, + "loss": 0.9958, "step": 14163 }, { - "epoch": 0.40137153220550315, + "epoch": 0.5541904687377729, "grad_norm": 0.0, - "learning_rate": 1.3597813077430395e-05, - "loss": 0.9896, + "learning_rate": 8.734272273178646e-06, + "loss": 0.9473, "step": 14164 }, { - "epoch": 0.4013998696477656, + "epoch": 0.5542295954300024, "grad_norm": 0.0, - "learning_rate": 1.3596956731247646e-05, - "loss": 0.8488, + "learning_rate": 8.73301524416036e-06, + "loss": 1.076, "step": 14165 }, { - "epoch": 0.4014282070900281, + "epoch": 0.5542687221222318, "grad_norm": 0.0, - "learning_rate": 1.359610035476639e-05, - "loss": 0.9747, + "learning_rate": 8.731758235488263e-06, + "loss": 0.8716, "step": 14166 }, { - "epoch": 0.4014565445322905, + "epoch": 0.5543078488144613, "grad_norm": 0.0, - "learning_rate": 1.3595243947993848e-05, - "loss": 0.9061, + "learning_rate": 8.730501247182535e-06, + "loss": 1.0671, "step": 14167 }, { - "epoch": 0.40148488197455295, + "epoch": 0.5543469755066907, "grad_norm": 0.0, - "learning_rate": 1.3594387510937232e-05, - "loss": 1.0276, + "learning_rate": 8.72924427926337e-06, + "loss": 1.0364, "step": 14168 }, { - "epoch": 0.40151321941681545, + "epoch": 0.5543861021989201, "grad_norm": 0.0, - "learning_rate": 1.3593531043603756e-05, - "loss": 0.9131, + "learning_rate": 8.727987331750948e-06, + "loss": 1.0105, "step": 14169 }, { - "epoch": 0.4015415568590779, + "epoch": 0.5544252288911495, "grad_norm": 0.0, - "learning_rate": 1.3592674546000631e-05, - "loss": 0.8212, + "learning_rate": 8.726730404665458e-06, + "loss": 1.0177, "step": 14170 }, { - "epoch": 0.4015698943013404, + "epoch": 0.554464355583379, "grad_norm": 0.0, - "learning_rate": 1.3591818018135077e-05, - "loss": 0.9278, + "learning_rate": 8.725473498027078e-06, + "loss": 1.0448, "step": 14171 }, { - "epoch": 0.4015982317436028, + "epoch": 0.5545034822756084, "grad_norm": 0.0, - "learning_rate": 1.3590961460014304e-05, - "loss": 0.9939, + "learning_rate": 8.724216611855998e-06, + "loss": 1.0278, "step": 14172 }, { - "epoch": 0.40162656918586526, + "epoch": 0.5545426089678379, "grad_norm": 0.0, - "learning_rate": 1.3590104871645529e-05, - "loss": 1.0382, + "learning_rate": 8.722959746172403e-06, + "loss": 0.9863, "step": 14173 }, { - "epoch": 0.40165490662812775, + "epoch": 0.5545817356600673, "grad_norm": 0.0, - "learning_rate": 1.3589248253035967e-05, - "loss": 0.9325, + "learning_rate": 8.721702900996473e-06, + "loss": 1.0102, "step": 14174 }, { - "epoch": 0.4016832440703902, + "epoch": 0.5546208623522967, "grad_norm": 0.0, - "learning_rate": 1.3588391604192834e-05, - "loss": 0.8144, + "learning_rate": 8.720446076348395e-06, + "loss": 1.0319, "step": 14175 }, { - "epoch": 0.4017115815126527, + "epoch": 0.5546599890445262, "grad_norm": 0.0, - "learning_rate": 1.3587534925123349e-05, - "loss": 0.9324, + "learning_rate": 8.719189272248344e-06, + "loss": 1.0919, "step": 14176 }, { - "epoch": 0.4017399189549151, + "epoch": 0.5546991157367556, "grad_norm": 0.0, - "learning_rate": 1.3586678215834725e-05, - "loss": 0.89, + "learning_rate": 8.717932488716512e-06, + "loss": 0.8966, "step": 14177 }, { - "epoch": 0.4017682563971776, + "epoch": 0.5547382424289851, "grad_norm": 0.0, - "learning_rate": 1.3585821476334176e-05, - "loss": 0.8007, + "learning_rate": 8.716675725773077e-06, + "loss": 0.8908, "step": 14178 }, { - "epoch": 0.40179659383944005, + "epoch": 0.5547773691212144, "grad_norm": 0.0, - "learning_rate": 1.3584964706628923e-05, - "loss": 0.9769, + "learning_rate": 8.715418983438222e-06, + "loss": 0.9846, "step": 14179 }, { - "epoch": 0.4018249312817025, + "epoch": 0.5548164958134439, "grad_norm": 0.0, - "learning_rate": 1.3584107906726178e-05, - "loss": 0.8991, + "learning_rate": 8.714162261732128e-06, + "loss": 1.0683, "step": 14180 }, { - "epoch": 0.401853268723965, + "epoch": 0.5548556225056733, "grad_norm": 0.0, - "learning_rate": 1.3583251076633163e-05, - "loss": 0.8006, + "learning_rate": 8.712905560674978e-06, + "loss": 0.9637, "step": 14181 }, { - "epoch": 0.4018816061662274, + "epoch": 0.5548947491979028, "grad_norm": 0.0, - "learning_rate": 1.3582394216357095e-05, - "loss": 0.8233, + "learning_rate": 8.711648880286949e-06, + "loss": 1.077, "step": 14182 }, { - "epoch": 0.4019099436084899, + "epoch": 0.5549338758901322, "grad_norm": 0.0, - "learning_rate": 1.3581537325905188e-05, - "loss": 0.9228, + "learning_rate": 8.710392220588229e-06, + "loss": 0.8438, "step": 14183 }, { - "epoch": 0.40193828105075236, + "epoch": 0.5549730025823617, "grad_norm": 0.0, - "learning_rate": 1.3580680405284666e-05, - "loss": 0.9182, + "learning_rate": 8.709135581598992e-06, + "loss": 1.0508, "step": 14184 }, { - "epoch": 0.4019666184930148, + "epoch": 0.5550121292745911, "grad_norm": 0.0, - "learning_rate": 1.357982345450274e-05, - "loss": 0.8699, + "learning_rate": 8.707878963339417e-06, + "loss": 1.073, "step": 14185 }, { - "epoch": 0.4019949559352773, + "epoch": 0.5550512559668206, "grad_norm": 0.0, - "learning_rate": 1.3578966473566631e-05, - "loss": 0.962, + "learning_rate": 8.70662236582969e-06, + "loss": 1.0164, "step": 14186 }, { - "epoch": 0.4020232933775397, + "epoch": 0.55509038265905, "grad_norm": 0.0, - "learning_rate": 1.3578109462483558e-05, - "loss": 0.9676, + "learning_rate": 8.705365789089989e-06, + "loss": 1.048, "step": 14187 }, { - "epoch": 0.4020516308198022, + "epoch": 0.5551295093512795, "grad_norm": 0.0, - "learning_rate": 1.3577252421260742e-05, - "loss": 0.9103, + "learning_rate": 8.704109233140487e-06, + "loss": 1.1735, "step": 14188 }, { - "epoch": 0.40207996826206466, + "epoch": 0.5551686360435089, "grad_norm": 0.0, - "learning_rate": 1.3576395349905403e-05, - "loss": 0.8462, + "learning_rate": 8.702852698001367e-06, + "loss": 0.968, "step": 14189 }, { - "epoch": 0.40210830570432715, + "epoch": 0.5552077627357384, "grad_norm": 0.0, - "learning_rate": 1.3575538248424756e-05, - "loss": 0.9589, + "learning_rate": 8.70159618369281e-06, + "loss": 1.0692, "step": 14190 }, { - "epoch": 0.4021366431465896, + "epoch": 0.5552468894279677, "grad_norm": 0.0, - "learning_rate": 1.3574681116826018e-05, - "loss": 0.8332, + "learning_rate": 8.70033969023499e-06, + "loss": 1.0731, "step": 14191 }, { - "epoch": 0.40216498058885203, + "epoch": 0.5552860161201972, "grad_norm": 0.0, - "learning_rate": 1.3573823955116416e-05, - "loss": 0.9613, + "learning_rate": 8.699083217648088e-06, + "loss": 0.9432, "step": 14192 }, { - "epoch": 0.4021933180311145, + "epoch": 0.5553251428124266, "grad_norm": 0.0, - "learning_rate": 1.357296676330317e-05, - "loss": 0.8397, + "learning_rate": 8.697826765952278e-06, + "loss": 1.1046, "step": 14193 }, { - "epoch": 0.40222165547337696, + "epoch": 0.5553642695046561, "grad_norm": 0.0, - "learning_rate": 1.3572109541393498e-05, - "loss": 0.9776, + "learning_rate": 8.696570335167734e-06, + "loss": 1.0015, "step": 14194 }, { - "epoch": 0.40224999291563945, + "epoch": 0.5554033961968855, "grad_norm": 0.0, - "learning_rate": 1.357125228939462e-05, - "loss": 0.8405, + "learning_rate": 8.695313925314643e-06, + "loss": 1.0477, "step": 14195 }, { - "epoch": 0.4022783303579019, + "epoch": 0.555442522889115, "grad_norm": 0.0, - "learning_rate": 1.357039500731376e-05, - "loss": 0.9564, + "learning_rate": 8.694057536413175e-06, + "loss": 0.9739, "step": 14196 }, { - "epoch": 0.40230666780016433, + "epoch": 0.5554816495813444, "grad_norm": 0.0, - "learning_rate": 1.3569537695158135e-05, - "loss": 0.8991, + "learning_rate": 8.692801168483505e-06, + "loss": 1.1174, "step": 14197 }, { - "epoch": 0.4023350052424268, + "epoch": 0.5555207762735739, "grad_norm": 0.0, - "learning_rate": 1.3568680352934966e-05, - "loss": 0.9195, + "learning_rate": 8.691544821545807e-06, + "loss": 1.1335, "step": 14198 }, { - "epoch": 0.40236334268468926, + "epoch": 0.5555599029658033, "grad_norm": 0.0, - "learning_rate": 1.3567822980651481e-05, - "loss": 1.0309, + "learning_rate": 8.690288495620263e-06, + "loss": 1.1272, "step": 14199 }, { - "epoch": 0.40239168012695176, + "epoch": 0.5555990296580328, "grad_norm": 0.0, - "learning_rate": 1.3566965578314897e-05, - "loss": 0.8263, + "learning_rate": 8.689032190727046e-06, + "loss": 1.0729, "step": 14200 }, { - "epoch": 0.4024200175692142, + "epoch": 0.5556381563502621, "grad_norm": 0.0, - "learning_rate": 1.3566108145932437e-05, - "loss": 1.0235, + "learning_rate": 8.687775906886328e-06, + "loss": 1.1806, "step": 14201 }, { - "epoch": 0.4024483550114767, + "epoch": 0.5556772830424916, "grad_norm": 0.0, - "learning_rate": 1.3565250683511324e-05, - "loss": 0.9621, + "learning_rate": 8.68651964411828e-06, + "loss": 1.193, "step": 14202 }, { - "epoch": 0.4024766924537391, + "epoch": 0.555716409734721, "grad_norm": 0.0, - "learning_rate": 1.3564393191058782e-05, - "loss": 0.8431, + "learning_rate": 8.685263402443087e-06, + "loss": 1.1261, "step": 14203 }, { - "epoch": 0.40250502989600156, + "epoch": 0.5557555364269504, "grad_norm": 0.0, - "learning_rate": 1.356353566858203e-05, - "loss": 0.9212, + "learning_rate": 8.684007181880914e-06, + "loss": 0.9805, "step": 14204 }, { - "epoch": 0.40253336733826406, + "epoch": 0.5557946631191799, "grad_norm": 0.0, - "learning_rate": 1.3562678116088294e-05, - "loss": 0.9762, + "learning_rate": 8.682750982451939e-06, + "loss": 0.956, "step": 14205 }, { - "epoch": 0.4025617047805265, + "epoch": 0.5558337898114093, "grad_norm": 0.0, - "learning_rate": 1.35618205335848e-05, - "loss": 0.9276, + "learning_rate": 8.681494804176331e-06, + "loss": 1.0879, "step": 14206 }, { - "epoch": 0.402590042222789, + "epoch": 0.5558729165036388, "grad_norm": 0.0, - "learning_rate": 1.3560962921078766e-05, - "loss": 0.9356, + "learning_rate": 8.68023864707426e-06, + "loss": 1.0683, "step": 14207 }, { - "epoch": 0.40261837966505143, + "epoch": 0.5559120431958682, "grad_norm": 0.0, - "learning_rate": 1.356010527857742e-05, - "loss": 0.938, + "learning_rate": 8.678982511165908e-06, + "loss": 1.0555, "step": 14208 }, { - "epoch": 0.40264671710731387, + "epoch": 0.5559511698880977, "grad_norm": 0.0, - "learning_rate": 1.3559247606087987e-05, - "loss": 0.9287, + "learning_rate": 8.67772639647144e-06, + "loss": 1.1007, "step": 14209 }, { - "epoch": 0.40267505454957636, + "epoch": 0.5559902965803271, "grad_norm": 0.0, - "learning_rate": 1.3558389903617688e-05, - "loss": 1.0397, + "learning_rate": 8.676470303011031e-06, + "loss": 1.0076, "step": 14210 }, { - "epoch": 0.4027033919918388, + "epoch": 0.5560294232725566, "grad_norm": 0.0, - "learning_rate": 1.3557532171173749e-05, - "loss": 1.0251, + "learning_rate": 8.675214230804844e-06, + "loss": 0.9379, "step": 14211 }, { - "epoch": 0.4027317294341013, + "epoch": 0.5560685499647859, "grad_norm": 0.0, - "learning_rate": 1.3556674408763395e-05, - "loss": 1.0469, + "learning_rate": 8.673958179873063e-06, + "loss": 1.0742, "step": 14212 }, { - "epoch": 0.40276006687636373, + "epoch": 0.5561076766570154, "grad_norm": 0.0, - "learning_rate": 1.3555816616393852e-05, - "loss": 0.9745, + "learning_rate": 8.67270215023585e-06, + "loss": 1.1001, "step": 14213 }, { - "epoch": 0.4027884043186262, + "epoch": 0.5561468033492448, "grad_norm": 0.0, - "learning_rate": 1.3554958794072346e-05, - "loss": 0.7762, + "learning_rate": 8.671446141913378e-06, + "loss": 0.9689, "step": 14214 }, { - "epoch": 0.40281674176088866, + "epoch": 0.5561859300414743, "grad_norm": 0.0, - "learning_rate": 1.3554100941806103e-05, - "loss": 0.9626, + "learning_rate": 8.670190154925816e-06, + "loss": 0.91, "step": 14215 }, { - "epoch": 0.4028450792031511, + "epoch": 0.5562250567337037, "grad_norm": 0.0, - "learning_rate": 1.3553243059602346e-05, - "loss": 1.0765, + "learning_rate": 8.66893418929333e-06, + "loss": 1.2691, "step": 14216 }, { - "epoch": 0.4028734166454136, + "epoch": 0.5562641834259332, "grad_norm": 0.0, - "learning_rate": 1.3552385147468306e-05, - "loss": 0.9432, + "learning_rate": 8.667678245036098e-06, + "loss": 0.9892, "step": 14217 }, { - "epoch": 0.40290175408767603, + "epoch": 0.5563033101181626, "grad_norm": 0.0, - "learning_rate": 1.3551527205411203e-05, - "loss": 0.9427, + "learning_rate": 8.666422322174281e-06, + "loss": 1.099, "step": 14218 }, { - "epoch": 0.4029300915299385, + "epoch": 0.5563424368103921, "grad_norm": 0.0, - "learning_rate": 1.3550669233438271e-05, - "loss": 1.0587, + "learning_rate": 8.665166420728053e-06, + "loss": 1.0594, "step": 14219 }, { - "epoch": 0.40295842897220097, + "epoch": 0.5563815635026215, "grad_norm": 0.0, - "learning_rate": 1.354981123155673e-05, - "loss": 0.869, + "learning_rate": 8.663910540717577e-06, + "loss": 0.9984, "step": 14220 }, { - "epoch": 0.4029867664144634, + "epoch": 0.556420690194851, "grad_norm": 0.0, - "learning_rate": 1.3548953199773813e-05, - "loss": 1.0157, + "learning_rate": 8.662654682163022e-06, + "loss": 1.0406, "step": 14221 }, { - "epoch": 0.4030151038567259, + "epoch": 0.5564598168870803, "grad_norm": 0.0, - "learning_rate": 1.3548095138096746e-05, - "loss": 0.8989, + "learning_rate": 8.661398845084562e-06, + "loss": 1.0535, "step": 14222 }, { - "epoch": 0.40304344129898834, + "epoch": 0.5564989435793098, "grad_norm": 0.0, - "learning_rate": 1.3547237046532757e-05, - "loss": 0.8862, + "learning_rate": 8.660143029502359e-06, + "loss": 1.0245, "step": 14223 }, { - "epoch": 0.40307177874125083, + "epoch": 0.5565380702715392, "grad_norm": 0.0, - "learning_rate": 1.3546378925089069e-05, - "loss": 0.9902, + "learning_rate": 8.658887235436577e-06, + "loss": 0.9255, "step": 14224 }, { - "epoch": 0.40310011618351327, + "epoch": 0.5565771969637687, "grad_norm": 0.0, - "learning_rate": 1.3545520773772916e-05, - "loss": 0.8855, + "learning_rate": 8.657631462907385e-06, + "loss": 1.0707, "step": 14225 }, { - "epoch": 0.40312845362577576, + "epoch": 0.5566163236559981, "grad_norm": 0.0, - "learning_rate": 1.3544662592591526e-05, - "loss": 0.9354, + "learning_rate": 8.656375711934952e-06, + "loss": 0.9704, "step": 14226 }, { - "epoch": 0.4031567910680382, + "epoch": 0.5566554503482276, "grad_norm": 0.0, - "learning_rate": 1.3543804381552129e-05, - "loss": 1.015, + "learning_rate": 8.65511998253944e-06, + "loss": 1.0281, "step": 14227 }, { - "epoch": 0.40318512851030064, + "epoch": 0.556694577040457, "grad_norm": 0.0, - "learning_rate": 1.3542946140661948e-05, - "loss": 0.8431, + "learning_rate": 8.653864274741017e-06, + "loss": 0.8887, "step": 14228 }, { - "epoch": 0.40321346595256313, + "epoch": 0.5567337037326865, "grad_norm": 0.0, - "learning_rate": 1.3542087869928215e-05, - "loss": 0.8925, + "learning_rate": 8.652608588559845e-06, + "loss": 1.0106, "step": 14229 }, { - "epoch": 0.40324180339482557, + "epoch": 0.5567728304249159, "grad_norm": 0.0, - "learning_rate": 1.3541229569358165e-05, - "loss": 0.8867, + "learning_rate": 8.651352924016093e-06, + "loss": 1.0158, "step": 14230 }, { - "epoch": 0.40327014083708806, + "epoch": 0.5568119571171453, "grad_norm": 0.0, - "learning_rate": 1.354037123895902e-05, - "loss": 0.9425, + "learning_rate": 8.650097281129922e-06, + "loss": 1.0491, "step": 14231 }, { - "epoch": 0.4032984782793505, + "epoch": 0.5568510838093748, "grad_norm": 0.0, - "learning_rate": 1.3539512878738015e-05, - "loss": 0.8467, + "learning_rate": 8.648841659921499e-06, + "loss": 0.9177, "step": 14232 }, { - "epoch": 0.40332681572161294, + "epoch": 0.5568902105016041, "grad_norm": 0.0, - "learning_rate": 1.3538654488702376e-05, - "loss": 0.992, + "learning_rate": 8.647586060410981e-06, + "loss": 0.9804, "step": 14233 }, { - "epoch": 0.40335515316387544, + "epoch": 0.5569293371938336, "grad_norm": 0.0, - "learning_rate": 1.3537796068859339e-05, - "loss": 0.9846, + "learning_rate": 8.646330482618539e-06, + "loss": 1.1868, "step": 14234 }, { - "epoch": 0.4033834906061379, + "epoch": 0.556968463886063, "grad_norm": 0.0, - "learning_rate": 1.3536937619216133e-05, - "loss": 0.9659, + "learning_rate": 8.645074926564334e-06, + "loss": 1.0065, "step": 14235 }, { - "epoch": 0.40341182804840037, + "epoch": 0.5570075905782925, "grad_norm": 0.0, - "learning_rate": 1.3536079139779987e-05, - "loss": 1.0065, + "learning_rate": 8.643819392268527e-06, + "loss": 1.0844, "step": 14236 }, { - "epoch": 0.4034401654906628, + "epoch": 0.5570467172705219, "grad_norm": 0.0, - "learning_rate": 1.353522063055813e-05, - "loss": 0.9823, + "learning_rate": 8.642563879751283e-06, + "loss": 0.9967, "step": 14237 }, { - "epoch": 0.4034685029329253, + "epoch": 0.5570858439627514, "grad_norm": 0.0, - "learning_rate": 1.3534362091557798e-05, - "loss": 0.8812, + "learning_rate": 8.641308389032758e-06, + "loss": 1.0937, "step": 14238 }, { - "epoch": 0.40349684037518774, + "epoch": 0.5571249706549808, "grad_norm": 0.0, - "learning_rate": 1.3533503522786224e-05, - "loss": 0.9356, + "learning_rate": 8.640052920133121e-06, + "loss": 0.8242, "step": 14239 }, { - "epoch": 0.4035251778174502, + "epoch": 0.5571640973472103, "grad_norm": 0.0, - "learning_rate": 1.3532644924250638e-05, - "loss": 0.9821, + "learning_rate": 8.63879747307253e-06, + "loss": 1.0317, "step": 14240 }, { - "epoch": 0.40355351525971267, + "epoch": 0.5572032240394397, "grad_norm": 0.0, - "learning_rate": 1.3531786295958268e-05, - "loss": 0.8544, + "learning_rate": 8.637542047871146e-06, + "loss": 1.0528, "step": 14241 }, { - "epoch": 0.4035818527019751, + "epoch": 0.5572423507316692, "grad_norm": 0.0, - "learning_rate": 1.3530927637916356e-05, - "loss": 0.97, + "learning_rate": 8.636286644549129e-06, + "loss": 1.0027, "step": 14242 }, { - "epoch": 0.4036101901442376, + "epoch": 0.5572814774238986, "grad_norm": 0.0, - "learning_rate": 1.3530068950132127e-05, - "loss": 0.8834, + "learning_rate": 8.635031263126636e-06, + "loss": 1.0286, "step": 14243 }, { - "epoch": 0.40363852758650004, + "epoch": 0.557320604116128, "grad_norm": 0.0, - "learning_rate": 1.3529210232612815e-05, - "loss": 0.9998, + "learning_rate": 8.633775903623834e-06, + "loss": 1.0751, "step": 14244 }, { - "epoch": 0.4036668650287625, + "epoch": 0.5573597308083574, "grad_norm": 0.0, - "learning_rate": 1.3528351485365654e-05, - "loss": 0.9494, + "learning_rate": 8.63252056606088e-06, + "loss": 1.0457, "step": 14245 }, { - "epoch": 0.40369520247102497, + "epoch": 0.5573988575005869, "grad_norm": 0.0, - "learning_rate": 1.3527492708397881e-05, - "loss": 1.0204, + "learning_rate": 8.631265250457931e-06, + "loss": 1.0201, "step": 14246 }, { - "epoch": 0.4037235399132874, + "epoch": 0.5574379841928163, "grad_norm": 0.0, - "learning_rate": 1.3526633901716726e-05, - "loss": 1.0113, + "learning_rate": 8.630009956835144e-06, + "loss": 1.0656, "step": 14247 }, { - "epoch": 0.4037518773555499, + "epoch": 0.5574771108850458, "grad_norm": 0.0, - "learning_rate": 1.3525775065329425e-05, - "loss": 0.9446, + "learning_rate": 8.628754685212685e-06, + "loss": 1.0587, "step": 14248 }, { - "epoch": 0.40378021479781234, + "epoch": 0.5575162375772752, "grad_norm": 0.0, - "learning_rate": 1.352491619924321e-05, - "loss": 0.9407, + "learning_rate": 8.627499435610707e-06, + "loss": 0.9998, "step": 14249 }, { - "epoch": 0.40380855224007484, + "epoch": 0.5575553642695047, "grad_norm": 0.0, - "learning_rate": 1.3524057303465317e-05, - "loss": 0.9367, + "learning_rate": 8.626244208049367e-06, + "loss": 1.0048, "step": 14250 }, { - "epoch": 0.4038368896823373, + "epoch": 0.5575944909617341, "grad_norm": 0.0, - "learning_rate": 1.352319837800298e-05, - "loss": 0.8657, + "learning_rate": 8.624989002548825e-06, + "loss": 1.1704, "step": 14251 }, { - "epoch": 0.4038652271245997, + "epoch": 0.5576336176539636, "grad_norm": 0.0, - "learning_rate": 1.3522339422863438e-05, - "loss": 0.9727, + "learning_rate": 8.623733819129233e-06, + "loss": 0.9976, "step": 14252 }, { - "epoch": 0.4038935645668622, + "epoch": 0.557672744346193, "grad_norm": 0.0, - "learning_rate": 1.3521480438053918e-05, - "loss": 0.9473, + "learning_rate": 8.622478657810753e-06, + "loss": 0.9797, "step": 14253 }, { - "epoch": 0.40392190200912464, + "epoch": 0.5577118710384225, "grad_norm": 0.0, - "learning_rate": 1.3520621423581663e-05, - "loss": 0.8968, + "learning_rate": 8.621223518613541e-06, + "loss": 1.0954, "step": 14254 }, { - "epoch": 0.40395023945138714, + "epoch": 0.5577509977306518, "grad_norm": 0.0, - "learning_rate": 1.351976237945391e-05, - "loss": 1.006, + "learning_rate": 8.619968401557752e-06, + "loss": 1.0877, "step": 14255 }, { - "epoch": 0.4039785768936496, + "epoch": 0.5577901244228813, "grad_norm": 0.0, - "learning_rate": 1.3518903305677889e-05, - "loss": 0.8476, + "learning_rate": 8.618713306663537e-06, + "loss": 0.9327, "step": 14256 }, { - "epoch": 0.404006914335912, + "epoch": 0.5578292511151107, "grad_norm": 0.0, - "learning_rate": 1.3518044202260835e-05, - "loss": 0.8603, + "learning_rate": 8.617458233951058e-06, + "loss": 1.0221, "step": 14257 }, { - "epoch": 0.4040352517781745, + "epoch": 0.5578683778073402, "grad_norm": 0.0, - "learning_rate": 1.351718506920999e-05, - "loss": 0.8063, + "learning_rate": 8.616203183440469e-06, + "loss": 0.9467, "step": 14258 }, { - "epoch": 0.40406358922043695, + "epoch": 0.5579075044995696, "grad_norm": 0.0, - "learning_rate": 1.3516325906532592e-05, - "loss": 0.8687, + "learning_rate": 8.614948155151924e-06, + "loss": 0.9485, "step": 14259 }, { - "epoch": 0.40409192666269944, + "epoch": 0.557946631191799, "grad_norm": 0.0, - "learning_rate": 1.3515466714235874e-05, - "loss": 0.9338, + "learning_rate": 8.613693149105569e-06, + "loss": 1.0164, "step": 14260 }, { - "epoch": 0.4041202641049619, + "epoch": 0.5579857578840285, "grad_norm": 0.0, - "learning_rate": 1.3514607492327074e-05, - "loss": 0.9456, + "learning_rate": 8.612438165321571e-06, + "loss": 1.1205, "step": 14261 }, { - "epoch": 0.4041486015472244, + "epoch": 0.5580248845762579, "grad_norm": 0.0, - "learning_rate": 1.3513748240813429e-05, - "loss": 0.8959, + "learning_rate": 8.611183203820076e-06, + "loss": 1.0929, "step": 14262 }, { - "epoch": 0.4041769389894868, + "epoch": 0.5580640112684874, "grad_norm": 0.0, - "learning_rate": 1.351288895970218e-05, - "loss": 0.9014, + "learning_rate": 8.60992826462124e-06, + "loss": 0.9688, "step": 14263 }, { - "epoch": 0.40420527643174925, + "epoch": 0.5581031379607168, "grad_norm": 0.0, - "learning_rate": 1.351202964900056e-05, - "loss": 0.8218, + "learning_rate": 8.608673347745213e-06, + "loss": 0.9634, "step": 14264 }, { - "epoch": 0.40423361387401174, + "epoch": 0.5581422646529463, "grad_norm": 0.0, - "learning_rate": 1.3511170308715811e-05, - "loss": 1.0488, + "learning_rate": 8.607418453212147e-06, + "loss": 1.044, "step": 14265 }, { - "epoch": 0.4042619513162742, + "epoch": 0.5581813913451756, "grad_norm": 0.0, - "learning_rate": 1.3510310938855172e-05, - "loss": 0.88, + "learning_rate": 8.606163581042196e-06, + "loss": 0.9111, "step": 14266 }, { - "epoch": 0.4042902887585367, + "epoch": 0.5582205180374051, "grad_norm": 0.0, - "learning_rate": 1.350945153942588e-05, - "loss": 0.851, + "learning_rate": 8.604908731255512e-06, + "loss": 1.1496, "step": 14267 }, { - "epoch": 0.4043186262007991, + "epoch": 0.5582596447296345, "grad_norm": 0.0, - "learning_rate": 1.3508592110435173e-05, - "loss": 1.0424, + "learning_rate": 8.603653903872246e-06, + "loss": 1.0931, "step": 14268 }, { - "epoch": 0.40434696364306155, + "epoch": 0.558298771421864, "grad_norm": 0.0, - "learning_rate": 1.3507732651890294e-05, - "loss": 0.9047, + "learning_rate": 8.602399098912548e-06, + "loss": 1.0375, "step": 14269 }, { - "epoch": 0.40437530108532405, + "epoch": 0.5583378981140934, "grad_norm": 0.0, - "learning_rate": 1.3506873163798478e-05, - "loss": 0.8366, + "learning_rate": 8.60114431639657e-06, + "loss": 0.975, "step": 14270 }, { - "epoch": 0.4044036385275865, + "epoch": 0.5583770248063229, "grad_norm": 0.0, - "learning_rate": 1.3506013646166966e-05, - "loss": 1.0422, + "learning_rate": 8.599889556344462e-06, + "loss": 0.9763, "step": 14271 }, { - "epoch": 0.404431975969849, + "epoch": 0.5584161514985523, "grad_norm": 0.0, - "learning_rate": 1.3505154099003003e-05, - "loss": 0.8707, + "learning_rate": 8.598634818776371e-06, + "loss": 0.9597, "step": 14272 }, { - "epoch": 0.4044603134121114, + "epoch": 0.5584552781907818, "grad_norm": 0.0, - "learning_rate": 1.3504294522313825e-05, - "loss": 0.9231, + "learning_rate": 8.59738010371245e-06, + "loss": 1.0124, "step": 14273 }, { - "epoch": 0.4044886508543739, + "epoch": 0.5584944048830112, "grad_norm": 0.0, - "learning_rate": 1.350343491610667e-05, - "loss": 0.9289, + "learning_rate": 8.596125411172846e-06, + "loss": 0.8925, "step": 14274 }, { - "epoch": 0.40451698829663635, + "epoch": 0.5585335315752407, "grad_norm": 0.0, - "learning_rate": 1.3502575280388785e-05, - "loss": 0.9281, + "learning_rate": 8.594870741177713e-06, + "loss": 0.9989, "step": 14275 }, { - "epoch": 0.4045453257388988, + "epoch": 0.55857265826747, "grad_norm": 0.0, - "learning_rate": 1.3501715615167408e-05, - "loss": 0.9347, + "learning_rate": 8.593616093747191e-06, + "loss": 0.945, "step": 14276 }, { - "epoch": 0.4045736631811613, + "epoch": 0.5586117849596995, "grad_norm": 0.0, - "learning_rate": 1.3500855920449775e-05, - "loss": 0.8075, + "learning_rate": 8.592361468901432e-06, + "loss": 1.06, "step": 14277 }, { - "epoch": 0.4046020006234237, + "epoch": 0.5586509116519289, "grad_norm": 0.0, - "learning_rate": 1.3499996196243135e-05, - "loss": 0.8424, + "learning_rate": 8.591106866660584e-06, + "loss": 1.1763, "step": 14278 }, { - "epoch": 0.4046303380656862, + "epoch": 0.5586900383441584, "grad_norm": 0.0, - "learning_rate": 1.349913644255473e-05, - "loss": 0.9561, + "learning_rate": 8.589852287044796e-06, + "loss": 1.0659, "step": 14279 }, { - "epoch": 0.40465867550794865, + "epoch": 0.5587291650363878, "grad_norm": 0.0, - "learning_rate": 1.3498276659391799e-05, - "loss": 0.812, + "learning_rate": 8.588597730074214e-06, + "loss": 1.0352, "step": 14280 }, { - "epoch": 0.4046870129502111, + "epoch": 0.5587682917286173, "grad_norm": 0.0, - "learning_rate": 1.3497416846761584e-05, - "loss": 0.8979, + "learning_rate": 8.587343195768983e-06, + "loss": 1.0565, "step": 14281 }, { - "epoch": 0.4047153503924736, + "epoch": 0.5588074184208467, "grad_norm": 0.0, - "learning_rate": 1.3496557004671325e-05, - "loss": 0.954, + "learning_rate": 8.586088684149251e-06, + "loss": 1.0417, "step": 14282 }, { - "epoch": 0.404743687834736, + "epoch": 0.5588465451130762, "grad_norm": 0.0, - "learning_rate": 1.3495697133128272e-05, - "loss": 0.8581, + "learning_rate": 8.584834195235158e-06, + "loss": 1.0191, "step": 14283 }, { - "epoch": 0.4047720252769985, + "epoch": 0.5588856718053056, "grad_norm": 0.0, - "learning_rate": 1.349483723213966e-05, - "loss": 0.9764, + "learning_rate": 8.58357972904686e-06, + "loss": 0.967, "step": 14284 }, { - "epoch": 0.40480036271926095, + "epoch": 0.5589247984975351, "grad_norm": 0.0, - "learning_rate": 1.349397730171274e-05, - "loss": 0.8729, + "learning_rate": 8.582325285604494e-06, + "loss": 1.0541, "step": 14285 }, { - "epoch": 0.40482870016152345, + "epoch": 0.5589639251897645, "grad_norm": 0.0, - "learning_rate": 1.3493117341854748e-05, - "loss": 0.8253, + "learning_rate": 8.58107086492821e-06, + "loss": 1.0594, "step": 14286 }, { - "epoch": 0.4048570376037859, + "epoch": 0.559003051881994, "grad_norm": 0.0, - "learning_rate": 1.3492257352572935e-05, - "loss": 0.8345, + "learning_rate": 8.579816467038144e-06, + "loss": 0.9733, "step": 14287 }, { - "epoch": 0.4048853750460483, + "epoch": 0.5590421785742233, "grad_norm": 0.0, - "learning_rate": 1.349139733387454e-05, - "loss": 1.0814, + "learning_rate": 8.578562091954451e-06, + "loss": 0.9545, "step": 14288 }, { - "epoch": 0.4049137124883108, + "epoch": 0.5590813052664527, "grad_norm": 0.0, - "learning_rate": 1.3490537285766809e-05, - "loss": 0.9914, + "learning_rate": 8.57730773969727e-06, + "loss": 0.9301, "step": 14289 }, { - "epoch": 0.40494204993057326, + "epoch": 0.5591204319586822, "grad_norm": 0.0, - "learning_rate": 1.3489677208256987e-05, - "loss": 0.9244, + "learning_rate": 8.576053410286744e-06, + "loss": 0.9515, "step": 14290 }, { - "epoch": 0.40497038737283575, + "epoch": 0.5591595586509116, "grad_norm": 0.0, - "learning_rate": 1.3488817101352315e-05, - "loss": 0.9932, + "learning_rate": 8.574799103743015e-06, + "loss": 1.1573, "step": 14291 }, { - "epoch": 0.4049987248150982, + "epoch": 0.5591986853431411, "grad_norm": 0.0, - "learning_rate": 1.3487956965060044e-05, - "loss": 1.0271, + "learning_rate": 8.573544820086224e-06, + "loss": 0.8688, "step": 14292 }, { - "epoch": 0.4050270622573606, + "epoch": 0.5592378120353705, "grad_norm": 0.0, - "learning_rate": 1.3487096799387418e-05, - "loss": 0.8943, + "learning_rate": 8.572290559336518e-06, + "loss": 1.0422, "step": 14293 }, { - "epoch": 0.4050553996996231, + "epoch": 0.5592769387276, "grad_norm": 0.0, - "learning_rate": 1.3486236604341679e-05, - "loss": 0.9293, + "learning_rate": 8.571036321514039e-06, + "loss": 1.0271, "step": 14294 }, { - "epoch": 0.40508373714188556, + "epoch": 0.5593160654198294, "grad_norm": 0.0, - "learning_rate": 1.3485376379930071e-05, - "loss": 0.8642, + "learning_rate": 8.569782106638923e-06, + "loss": 1.0726, "step": 14295 }, { - "epoch": 0.40511207458414805, + "epoch": 0.5593551921120589, "grad_norm": 0.0, - "learning_rate": 1.3484516126159845e-05, - "loss": 0.89, + "learning_rate": 8.568527914731312e-06, + "loss": 1.0275, "step": 14296 }, { - "epoch": 0.4051404120264105, + "epoch": 0.5593943188042882, "grad_norm": 0.0, - "learning_rate": 1.3483655843038248e-05, - "loss": 0.9377, + "learning_rate": 8.567273745811353e-06, + "loss": 1.0396, "step": 14297 }, { - "epoch": 0.405168749468673, + "epoch": 0.5594334454965177, "grad_norm": 0.0, - "learning_rate": 1.3482795530572523e-05, - "loss": 0.9405, + "learning_rate": 8.566019599899182e-06, + "loss": 1.0513, "step": 14298 }, { - "epoch": 0.4051970869109354, + "epoch": 0.5594725721887471, "grad_norm": 0.0, - "learning_rate": 1.348193518876992e-05, - "loss": 0.961, + "learning_rate": 8.564765477014938e-06, + "loss": 1.0504, "step": 14299 }, { - "epoch": 0.40522542435319786, + "epoch": 0.5595116988809766, "grad_norm": 0.0, - "learning_rate": 1.3481074817637681e-05, - "loss": 0.8062, + "learning_rate": 8.563511377178764e-06, + "loss": 0.9372, "step": 14300 }, { - "epoch": 0.40525376179546035, + "epoch": 0.559550825573206, "grad_norm": 0.0, - "learning_rate": 1.3480214417183058e-05, - "loss": 0.9516, + "learning_rate": 8.562257300410792e-06, + "loss": 1.0527, "step": 14301 }, { - "epoch": 0.4052820992377228, + "epoch": 0.5595899522654355, "grad_norm": 0.0, - "learning_rate": 1.3479353987413295e-05, - "loss": 1.0335, + "learning_rate": 8.561003246731172e-06, + "loss": 0.9477, "step": 14302 }, { - "epoch": 0.4053104366799853, + "epoch": 0.5596290789576649, "grad_norm": 0.0, - "learning_rate": 1.3478493528335641e-05, - "loss": 1.012, + "learning_rate": 8.559749216160034e-06, + "loss": 1.0129, "step": 14303 }, { - "epoch": 0.4053387741222477, + "epoch": 0.5596682056498944, "grad_norm": 0.0, - "learning_rate": 1.3477633039957346e-05, - "loss": 0.8355, + "learning_rate": 8.558495208717521e-06, + "loss": 0.9782, "step": 14304 }, { - "epoch": 0.40536711156451016, + "epoch": 0.5597073323421238, "grad_norm": 0.0, - "learning_rate": 1.3476772522285656e-05, - "loss": 0.8658, + "learning_rate": 8.557241224423763e-06, + "loss": 1.0777, "step": 14305 }, { - "epoch": 0.40539544900677266, + "epoch": 0.5597464590343533, "grad_norm": 0.0, - "learning_rate": 1.3475911975327823e-05, - "loss": 0.8368, + "learning_rate": 8.555987263298908e-06, + "loss": 1.121, "step": 14306 }, { - "epoch": 0.4054237864490351, + "epoch": 0.5597855857265827, "grad_norm": 0.0, - "learning_rate": 1.3475051399091088e-05, - "loss": 0.9402, + "learning_rate": 8.554733325363088e-06, + "loss": 0.9975, "step": 14307 }, { - "epoch": 0.4054521238912976, + "epoch": 0.5598247124188122, "grad_norm": 0.0, - "learning_rate": 1.3474190793582707e-05, - "loss": 0.9727, + "learning_rate": 8.553479410636441e-06, + "loss": 1.0668, "step": 14308 }, { - "epoch": 0.40548046133356, + "epoch": 0.5598638391110415, "grad_norm": 0.0, - "learning_rate": 1.3473330158809925e-05, - "loss": 0.8483, + "learning_rate": 8.5522255191391e-06, + "loss": 1.0676, "step": 14309 }, { - "epoch": 0.4055087987758225, + "epoch": 0.559902965803271, "grad_norm": 0.0, - "learning_rate": 1.3472469494779994e-05, - "loss": 0.9522, + "learning_rate": 8.5509716508912e-06, + "loss": 0.9653, "step": 14310 }, { - "epoch": 0.40553713621808496, + "epoch": 0.5599420924955004, "grad_norm": 0.0, - "learning_rate": 1.3471608801500163e-05, - "loss": 0.9787, + "learning_rate": 8.549717805912883e-06, + "loss": 1.0878, "step": 14311 }, { - "epoch": 0.4055654736603474, + "epoch": 0.5599812191877299, "grad_norm": 0.0, - "learning_rate": 1.3470748078977682e-05, - "loss": 0.9282, + "learning_rate": 8.548463984224282e-06, + "loss": 1.0572, "step": 14312 }, { - "epoch": 0.4055938111026099, + "epoch": 0.5600203458799593, "grad_norm": 0.0, - "learning_rate": 1.3469887327219802e-05, - "loss": 0.8576, + "learning_rate": 8.54721018584553e-06, + "loss": 1.0021, "step": 14313 }, { - "epoch": 0.40562214854487233, + "epoch": 0.5600594725721888, "grad_norm": 0.0, - "learning_rate": 1.3469026546233774e-05, - "loss": 0.8857, + "learning_rate": 8.545956410796758e-06, + "loss": 1.1065, "step": 14314 }, { - "epoch": 0.4056504859871348, + "epoch": 0.5600985992644182, "grad_norm": 0.0, - "learning_rate": 1.3468165736026844e-05, - "loss": 0.8746, + "learning_rate": 8.544702659098109e-06, + "loss": 1.0273, "step": 14315 }, { - "epoch": 0.40567882342939726, + "epoch": 0.5601377259566476, "grad_norm": 0.0, - "learning_rate": 1.3467304896606265e-05, - "loss": 0.8487, + "learning_rate": 8.543448930769708e-06, + "loss": 1.0272, "step": 14316 }, { - "epoch": 0.4057071608716597, + "epoch": 0.5601768526488771, "grad_norm": 0.0, - "learning_rate": 1.3466444027979293e-05, - "loss": 0.815, + "learning_rate": 8.542195225831695e-06, + "loss": 0.9725, "step": 14317 }, { - "epoch": 0.4057354983139222, + "epoch": 0.5602159793411065, "grad_norm": 0.0, - "learning_rate": 1.3465583130153174e-05, - "loss": 0.9503, + "learning_rate": 8.540941544304197e-06, + "loss": 0.9758, "step": 14318 }, { - "epoch": 0.40576383575618463, + "epoch": 0.560255106033336, "grad_norm": 0.0, - "learning_rate": 1.3464722203135164e-05, - "loss": 0.8856, + "learning_rate": 8.539687886207352e-06, + "loss": 1.0482, "step": 14319 }, { - "epoch": 0.4057921731984471, + "epoch": 0.5602942327255653, "grad_norm": 0.0, - "learning_rate": 1.3463861246932508e-05, - "loss": 0.9974, + "learning_rate": 8.53843425156129e-06, + "loss": 1.0171, "step": 14320 }, { - "epoch": 0.40582051064070956, + "epoch": 0.5603333594177948, "grad_norm": 0.0, - "learning_rate": 1.3463000261552466e-05, - "loss": 0.8274, + "learning_rate": 8.537180640386141e-06, + "loss": 1.1097, "step": 14321 }, { - "epoch": 0.40584884808297206, + "epoch": 0.5603724861100242, "grad_norm": 0.0, - "learning_rate": 1.3462139247002284e-05, - "loss": 0.8517, + "learning_rate": 8.535927052702037e-06, + "loss": 1.0898, "step": 14322 }, { - "epoch": 0.4058771855252345, + "epoch": 0.5604116128022537, "grad_norm": 0.0, - "learning_rate": 1.346127820328922e-05, - "loss": 0.9512, + "learning_rate": 8.53467348852911e-06, + "loss": 1.028, "step": 14323 }, { - "epoch": 0.40590552296749693, + "epoch": 0.5604507394944831, "grad_norm": 0.0, - "learning_rate": 1.346041713042052e-05, - "loss": 1.0073, + "learning_rate": 8.533419947887492e-06, + "loss": 1.0284, "step": 14324 }, { - "epoch": 0.40593386040975943, + "epoch": 0.5604898661867126, "grad_norm": 0.0, - "learning_rate": 1.3459556028403446e-05, - "loss": 0.8286, + "learning_rate": 8.532166430797313e-06, + "loss": 1.0928, "step": 14325 }, { - "epoch": 0.40596219785202187, + "epoch": 0.560528992878942, "grad_norm": 0.0, - "learning_rate": 1.3458694897245246e-05, - "loss": 0.9041, + "learning_rate": 8.530912937278702e-06, + "loss": 1.1245, "step": 14326 }, { - "epoch": 0.40599053529428436, + "epoch": 0.5605681195711715, "grad_norm": 0.0, - "learning_rate": 1.3457833736953174e-05, - "loss": 0.9724, + "learning_rate": 8.529659467351782e-06, + "loss": 1.0579, "step": 14327 }, { - "epoch": 0.4060188727365468, + "epoch": 0.5606072462634009, "grad_norm": 0.0, - "learning_rate": 1.3456972547534484e-05, - "loss": 0.931, + "learning_rate": 8.528406021036694e-06, + "loss": 1.0627, "step": 14328 }, { - "epoch": 0.40604721017880924, + "epoch": 0.5606463729556304, "grad_norm": 0.0, - "learning_rate": 1.3456111328996431e-05, - "loss": 0.9071, + "learning_rate": 8.527152598353561e-06, + "loss": 1.0237, "step": 14329 }, { - "epoch": 0.40607554762107173, + "epoch": 0.5606854996478597, "grad_norm": 0.0, - "learning_rate": 1.3455250081346269e-05, - "loss": 0.9726, + "learning_rate": 8.525899199322512e-06, + "loss": 1.0119, "step": 14330 }, { - "epoch": 0.40610388506333417, + "epoch": 0.5607246263400892, "grad_norm": 0.0, - "learning_rate": 1.3454388804591253e-05, - "loss": 1.0044, + "learning_rate": 8.524645823963676e-06, + "loss": 1.0319, "step": 14331 }, { - "epoch": 0.40613222250559666, + "epoch": 0.5607637530323186, "grad_norm": 0.0, - "learning_rate": 1.3453527498738637e-05, - "loss": 0.8868, + "learning_rate": 8.523392472297174e-06, + "loss": 0.9562, "step": 14332 }, { - "epoch": 0.4061605599478591, + "epoch": 0.5608028797245481, "grad_norm": 0.0, - "learning_rate": 1.3452666163795675e-05, - "loss": 0.8512, + "learning_rate": 8.522139144343143e-06, + "loss": 1.0924, "step": 14333 }, { - "epoch": 0.4061888973901216, + "epoch": 0.5608420064167775, "grad_norm": 0.0, - "learning_rate": 1.3451804799769625e-05, - "loss": 0.9476, + "learning_rate": 8.520885840121705e-06, + "loss": 1.0404, "step": 14334 }, { - "epoch": 0.40621723483238403, + "epoch": 0.560881133109007, "grad_norm": 0.0, - "learning_rate": 1.3450943406667741e-05, - "loss": 0.9003, + "learning_rate": 8.519632559652988e-06, + "loss": 0.9724, "step": 14335 }, { - "epoch": 0.40624557227464647, + "epoch": 0.5609202598012364, "grad_norm": 0.0, - "learning_rate": 1.3450081984497279e-05, - "loss": 0.9231, + "learning_rate": 8.518379302957111e-06, + "loss": 1.0334, "step": 14336 }, { - "epoch": 0.40627390971690897, + "epoch": 0.5609593864934659, "grad_norm": 0.0, - "learning_rate": 1.3449220533265497e-05, - "loss": 0.9957, + "learning_rate": 8.517126070054212e-06, + "loss": 1.0748, "step": 14337 }, { - "epoch": 0.4063022471591714, + "epoch": 0.5609985131856953, "grad_norm": 0.0, - "learning_rate": 1.3448359052979649e-05, - "loss": 0.9639, + "learning_rate": 8.515872860964407e-06, + "loss": 0.9958, "step": 14338 }, { - "epoch": 0.4063305846014339, + "epoch": 0.5610376398779248, "grad_norm": 0.0, - "learning_rate": 1.3447497543646992e-05, - "loss": 0.8988, + "learning_rate": 8.514619675707828e-06, + "loss": 1.1555, "step": 14339 }, { - "epoch": 0.40635892204369634, + "epoch": 0.5610767665701542, "grad_norm": 0.0, - "learning_rate": 1.3446636005274782e-05, - "loss": 0.9182, + "learning_rate": 8.513366514304595e-06, + "loss": 1.0065, "step": 14340 }, { - "epoch": 0.4063872594859588, + "epoch": 0.5611158932623836, "grad_norm": 0.0, - "learning_rate": 1.3445774437870278e-05, - "loss": 1.0747, + "learning_rate": 8.512113376774827e-06, + "loss": 1.0795, "step": 14341 }, { - "epoch": 0.40641559692822127, + "epoch": 0.561155019954613, "grad_norm": 0.0, - "learning_rate": 1.3444912841440734e-05, - "loss": 0.8787, + "learning_rate": 8.510860263138658e-06, + "loss": 0.9537, "step": 14342 }, { - "epoch": 0.4064439343704837, + "epoch": 0.5611941466468425, "grad_norm": 0.0, - "learning_rate": 1.3444051215993412e-05, - "loss": 0.9957, + "learning_rate": 8.509607173416208e-06, + "loss": 0.9203, "step": 14343 }, { - "epoch": 0.4064722718127462, + "epoch": 0.5612332733390719, "grad_norm": 0.0, - "learning_rate": 1.3443189561535568e-05, - "loss": 0.9202, + "learning_rate": 8.5083541076276e-06, + "loss": 0.9858, "step": 14344 }, { - "epoch": 0.40650060925500864, + "epoch": 0.5612724000313013, "grad_norm": 0.0, - "learning_rate": 1.3442327878074459e-05, - "loss": 0.9549, + "learning_rate": 8.50710106579295e-06, + "loss": 1.1699, "step": 14345 }, { - "epoch": 0.40652894669727113, + "epoch": 0.5613115267235308, "grad_norm": 0.0, - "learning_rate": 1.3441466165617346e-05, - "loss": 1.0007, + "learning_rate": 8.505848047932392e-06, + "loss": 1.0164, "step": 14346 }, { - "epoch": 0.40655728413953357, + "epoch": 0.5613506534157602, "grad_norm": 0.0, - "learning_rate": 1.3440604424171483e-05, - "loss": 0.9227, + "learning_rate": 8.50459505406604e-06, + "loss": 1.153, "step": 14347 }, { - "epoch": 0.406585621581796, + "epoch": 0.5613897801079897, "grad_norm": 0.0, - "learning_rate": 1.3439742653744133e-05, - "loss": 1.0018, + "learning_rate": 8.50334208421402e-06, + "loss": 0.9422, "step": 14348 }, { - "epoch": 0.4066139590240585, + "epoch": 0.5614289068002191, "grad_norm": 0.0, - "learning_rate": 1.3438880854342552e-05, - "loss": 0.9507, + "learning_rate": 8.50208913839645e-06, + "loss": 0.9223, "step": 14349 }, { - "epoch": 0.40664229646632094, + "epoch": 0.5614680334924486, "grad_norm": 0.0, - "learning_rate": 1.3438019025974e-05, - "loss": 0.8573, + "learning_rate": 8.500836216633447e-06, + "loss": 1.1452, "step": 14350 }, { - "epoch": 0.40667063390858343, + "epoch": 0.561507160184678, "grad_norm": 0.0, - "learning_rate": 1.3437157168645738e-05, - "loss": 0.8943, + "learning_rate": 8.49958331894514e-06, + "loss": 0.9946, "step": 14351 }, { - "epoch": 0.4066989713508459, + "epoch": 0.5615462868769074, "grad_norm": 0.0, - "learning_rate": 1.3436295282365026e-05, - "loss": 0.9073, + "learning_rate": 8.498330445351643e-06, + "loss": 1.0504, "step": 14352 }, { - "epoch": 0.4067273087931083, + "epoch": 0.5615854135691368, "grad_norm": 0.0, - "learning_rate": 1.3435433367139122e-05, - "loss": 0.8694, + "learning_rate": 8.49707759587308e-06, + "loss": 1.1821, "step": 14353 }, { - "epoch": 0.4067556462353708, + "epoch": 0.5616245402613663, "grad_norm": 0.0, - "learning_rate": 1.3434571422975286e-05, - "loss": 0.9339, + "learning_rate": 8.495824770529565e-06, + "loss": 1.0698, "step": 14354 }, { - "epoch": 0.40678398367763324, + "epoch": 0.5616636669535957, "grad_norm": 0.0, - "learning_rate": 1.3433709449880778e-05, - "loss": 0.9235, + "learning_rate": 8.494571969341219e-06, + "loss": 1.0833, "step": 14355 }, { - "epoch": 0.40681232111989574, + "epoch": 0.5617027936458252, "grad_norm": 0.0, - "learning_rate": 1.3432847447862865e-05, - "loss": 1.0526, + "learning_rate": 8.493319192328162e-06, + "loss": 1.011, "step": 14356 }, { - "epoch": 0.4068406585621582, + "epoch": 0.5617419203380546, "grad_norm": 0.0, - "learning_rate": 1.3431985416928799e-05, - "loss": 0.9836, + "learning_rate": 8.49206643951051e-06, + "loss": 1.0599, "step": 14357 }, { - "epoch": 0.4068689960044206, + "epoch": 0.5617810470302841, "grad_norm": 0.0, - "learning_rate": 1.3431123357085847e-05, - "loss": 0.9102, + "learning_rate": 8.490813710908384e-06, + "loss": 1.1204, "step": 14358 }, { - "epoch": 0.4068973334466831, + "epoch": 0.5618201737225135, "grad_norm": 0.0, - "learning_rate": 1.3430261268341272e-05, - "loss": 0.9798, + "learning_rate": 8.489561006541895e-06, + "loss": 1.0614, "step": 14359 }, { - "epoch": 0.40692567088894555, + "epoch": 0.561859300414743, "grad_norm": 0.0, - "learning_rate": 1.342939915070233e-05, - "loss": 0.8918, + "learning_rate": 8.488308326431166e-06, + "loss": 1.0686, "step": 14360 }, { - "epoch": 0.40695400833120804, + "epoch": 0.5618984271069724, "grad_norm": 0.0, - "learning_rate": 1.3428537004176282e-05, - "loss": 0.8807, + "learning_rate": 8.48705567059631e-06, + "loss": 1.2256, "step": 14361 }, { - "epoch": 0.4069823457734705, + "epoch": 0.5619375537992018, "grad_norm": 0.0, - "learning_rate": 1.3427674828770396e-05, - "loss": 0.8798, + "learning_rate": 8.485803039057446e-06, + "loss": 1.0804, "step": 14362 }, { - "epoch": 0.40701068321573297, + "epoch": 0.5619766804914312, "grad_norm": 0.0, - "learning_rate": 1.3426812624491935e-05, - "loss": 1.0294, + "learning_rate": 8.484550431834684e-06, + "loss": 0.9182, "step": 14363 }, { - "epoch": 0.4070390206579954, + "epoch": 0.5620158071836607, "grad_norm": 0.0, - "learning_rate": 1.3425950391348154e-05, - "loss": 0.9335, + "learning_rate": 8.483297848948147e-06, + "loss": 1.0452, "step": 14364 }, { - "epoch": 0.40706735810025785, + "epoch": 0.5620549338758901, "grad_norm": 0.0, - "learning_rate": 1.3425088129346322e-05, - "loss": 0.88, + "learning_rate": 8.482045290417946e-06, + "loss": 1.0079, "step": 14365 }, { - "epoch": 0.40709569554252034, + "epoch": 0.5620940605681196, "grad_norm": 0.0, - "learning_rate": 1.3424225838493703e-05, - "loss": 0.9077, + "learning_rate": 8.480792756264194e-06, + "loss": 0.9325, "step": 14366 }, { - "epoch": 0.4071240329847828, + "epoch": 0.562133187260349, "grad_norm": 0.0, - "learning_rate": 1.3423363518797558e-05, - "loss": 0.8942, + "learning_rate": 8.479540246507005e-06, + "loss": 0.9969, "step": 14367 }, { - "epoch": 0.4071523704270453, + "epoch": 0.5621723139525785, "grad_norm": 0.0, - "learning_rate": 1.342250117026515e-05, - "loss": 0.9065, + "learning_rate": 8.478287761166494e-06, + "loss": 1.1315, "step": 14368 }, { - "epoch": 0.4071807078693077, + "epoch": 0.5622114406448079, "grad_norm": 0.0, - "learning_rate": 1.3421638792903743e-05, - "loss": 1.0184, + "learning_rate": 8.477035300262778e-06, + "loss": 1.1556, "step": 14369 }, { - "epoch": 0.40720904531157015, + "epoch": 0.5622505673370374, "grad_norm": 0.0, - "learning_rate": 1.3420776386720601e-05, - "loss": 0.8463, + "learning_rate": 8.475782863815967e-06, + "loss": 0.9642, "step": 14370 }, { - "epoch": 0.40723738275383264, + "epoch": 0.5622896940292668, "grad_norm": 0.0, - "learning_rate": 1.3419913951722991e-05, - "loss": 0.9178, + "learning_rate": 8.474530451846172e-06, + "loss": 0.9632, "step": 14371 }, { - "epoch": 0.4072657201960951, + "epoch": 0.5623288207214963, "grad_norm": 0.0, - "learning_rate": 1.3419051487918178e-05, - "loss": 1.0535, + "learning_rate": 8.473278064373502e-06, + "loss": 1.1002, "step": 14372 }, { - "epoch": 0.4072940576383576, + "epoch": 0.5623679474137256, "grad_norm": 0.0, - "learning_rate": 1.3418188995313424e-05, - "loss": 0.8791, + "learning_rate": 8.472025701418078e-06, + "loss": 0.8677, "step": 14373 }, { - "epoch": 0.40732239508062, + "epoch": 0.562407074105955, "grad_norm": 0.0, - "learning_rate": 1.341732647391599e-05, - "loss": 0.8384, + "learning_rate": 8.470773363000006e-06, + "loss": 0.9234, "step": 14374 }, { - "epoch": 0.4073507325228825, + "epoch": 0.5624462007981845, "grad_norm": 0.0, - "learning_rate": 1.341646392373315e-05, - "loss": 0.8537, + "learning_rate": 8.4695210491394e-06, + "loss": 1.1023, "step": 14375 }, { - "epoch": 0.40737906996514495, + "epoch": 0.5624853274904139, "grad_norm": 0.0, - "learning_rate": 1.3415601344772164e-05, - "loss": 0.9484, + "learning_rate": 8.468268759856361e-06, + "loss": 1.009, "step": 14376 }, { - "epoch": 0.4074074074074074, + "epoch": 0.5625244541826434, "grad_norm": 0.0, - "learning_rate": 1.3414738737040303e-05, - "loss": 0.9166, + "learning_rate": 8.467016495171012e-06, + "loss": 1.027, "step": 14377 }, { - "epoch": 0.4074357448496699, + "epoch": 0.5625635808748728, "grad_norm": 0.0, - "learning_rate": 1.3413876100544825e-05, - "loss": 0.8721, + "learning_rate": 8.465764255103457e-06, + "loss": 1.1677, "step": 14378 }, { - "epoch": 0.4074640822919323, + "epoch": 0.5626027075671023, "grad_norm": 0.0, - "learning_rate": 1.3413013435293004e-05, - "loss": 0.9469, + "learning_rate": 8.464512039673806e-06, + "loss": 0.9966, "step": 14379 }, { - "epoch": 0.4074924197341948, + "epoch": 0.5626418342593317, "grad_norm": 0.0, - "learning_rate": 1.3412150741292102e-05, - "loss": 0.9266, + "learning_rate": 8.463259848902166e-06, + "loss": 1.0157, "step": 14380 }, { - "epoch": 0.40752075717645725, + "epoch": 0.5626809609515612, "grad_norm": 0.0, - "learning_rate": 1.3411288018549387e-05, - "loss": 0.8357, + "learning_rate": 8.462007682808645e-06, + "loss": 1.0365, "step": 14381 }, { - "epoch": 0.4075490946187197, + "epoch": 0.5627200876437906, "grad_norm": 0.0, - "learning_rate": 1.3410425267072124e-05, - "loss": 0.9104, + "learning_rate": 8.460755541413355e-06, + "loss": 1.0355, "step": 14382 }, { - "epoch": 0.4075774320609822, + "epoch": 0.56275921433602, "grad_norm": 0.0, - "learning_rate": 1.3409562486867586e-05, - "loss": 0.9746, + "learning_rate": 8.459503424736405e-06, + "loss": 1.0767, "step": 14383 }, { - "epoch": 0.4076057695032446, + "epoch": 0.5627983410282494, "grad_norm": 0.0, - "learning_rate": 1.3408699677943038e-05, - "loss": 0.8897, + "learning_rate": 8.458251332797899e-06, + "loss": 1.0564, "step": 14384 }, { - "epoch": 0.4076341069455071, + "epoch": 0.5628374677204789, "grad_norm": 0.0, - "learning_rate": 1.3407836840305746e-05, - "loss": 0.8917, + "learning_rate": 8.45699926561794e-06, + "loss": 0.9964, "step": 14385 }, { - "epoch": 0.40766244438776955, + "epoch": 0.5628765944127083, "grad_norm": 0.0, - "learning_rate": 1.3406973973962977e-05, - "loss": 0.976, + "learning_rate": 8.455747223216642e-06, + "loss": 1.067, "step": 14386 }, { - "epoch": 0.40769078183003205, + "epoch": 0.5629157211049378, "grad_norm": 0.0, - "learning_rate": 1.3406111078922002e-05, - "loss": 0.91, + "learning_rate": 8.45449520561411e-06, + "loss": 0.9916, "step": 14387 }, { - "epoch": 0.4077191192722945, + "epoch": 0.5629548477971672, "grad_norm": 0.0, - "learning_rate": 1.3405248155190086e-05, - "loss": 0.9409, + "learning_rate": 8.453243212830448e-06, + "loss": 0.9787, "step": 14388 }, { - "epoch": 0.4077474567145569, + "epoch": 0.5629939744893967, "grad_norm": 0.0, - "learning_rate": 1.3404385202774506e-05, - "loss": 0.92, + "learning_rate": 8.451991244885763e-06, + "loss": 1.1859, "step": 14389 }, { - "epoch": 0.4077757941568194, + "epoch": 0.5630331011816261, "grad_norm": 0.0, - "learning_rate": 1.3403522221682522e-05, - "loss": 0.9391, + "learning_rate": 8.450739301800153e-06, + "loss": 1.0577, "step": 14390 }, { - "epoch": 0.40780413159908185, + "epoch": 0.5630722278738556, "grad_norm": 0.0, - "learning_rate": 1.3402659211921407e-05, - "loss": 1.0269, + "learning_rate": 8.449487383593734e-06, + "loss": 1.0805, "step": 14391 }, { - "epoch": 0.40783246904134435, + "epoch": 0.563111354566085, "grad_norm": 0.0, - "learning_rate": 1.340179617349843e-05, - "loss": 0.9572, + "learning_rate": 8.448235490286604e-06, + "loss": 1.0423, "step": 14392 }, { - "epoch": 0.4078608064836068, + "epoch": 0.5631504812583145, "grad_norm": 0.0, - "learning_rate": 1.3400933106420861e-05, - "loss": 1.0267, + "learning_rate": 8.446983621898868e-06, + "loss": 1.0499, "step": 14393 }, { - "epoch": 0.4078891439258692, + "epoch": 0.5631896079505438, "grad_norm": 0.0, - "learning_rate": 1.3400070010695966e-05, - "loss": 0.9612, + "learning_rate": 8.445731778450625e-06, + "loss": 1.2064, "step": 14394 }, { - "epoch": 0.4079174813681317, + "epoch": 0.5632287346427733, "grad_norm": 0.0, - "learning_rate": 1.3399206886331022e-05, - "loss": 0.8665, + "learning_rate": 8.444479959961986e-06, + "loss": 1.179, "step": 14395 }, { - "epoch": 0.40794581881039416, + "epoch": 0.5632678613350027, "grad_norm": 0.0, - "learning_rate": 1.3398343733333295e-05, - "loss": 0.959, + "learning_rate": 8.443228166453049e-06, + "loss": 0.9624, "step": 14396 }, { - "epoch": 0.40797415625265665, + "epoch": 0.5633069880272322, "grad_norm": 0.0, - "learning_rate": 1.3397480551710059e-05, - "loss": 1.001, + "learning_rate": 8.441976397943918e-06, + "loss": 1.0954, "step": 14397 }, { - "epoch": 0.4080024936949191, + "epoch": 0.5633461147194616, "grad_norm": 0.0, - "learning_rate": 1.3396617341468581e-05, - "loss": 0.9692, + "learning_rate": 8.440724654454693e-06, + "loss": 1.0694, "step": 14398 }, { - "epoch": 0.4080308311371816, + "epoch": 0.5633852414116911, "grad_norm": 0.0, - "learning_rate": 1.3395754102616135e-05, - "loss": 0.9499, + "learning_rate": 8.439472936005474e-06, + "loss": 1.0818, "step": 14399 }, { - "epoch": 0.408059168579444, + "epoch": 0.5634243681039205, "grad_norm": 0.0, - "learning_rate": 1.339489083515999e-05, - "loss": 0.9793, + "learning_rate": 8.438221242616364e-06, + "loss": 1.0116, "step": 14400 }, { - "epoch": 0.40808750602170646, + "epoch": 0.56346349479615, "grad_norm": 0.0, - "learning_rate": 1.3394027539107417e-05, - "loss": 0.9471, + "learning_rate": 8.436969574307467e-06, + "loss": 1.0218, "step": 14401 }, { - "epoch": 0.40811584346396895, + "epoch": 0.5635026214883794, "grad_norm": 0.0, - "learning_rate": 1.3393164214465692e-05, - "loss": 0.934, + "learning_rate": 8.43571793109888e-06, + "loss": 1.092, "step": 14402 }, { - "epoch": 0.4081441809062314, + "epoch": 0.5635417481806088, "grad_norm": 0.0, - "learning_rate": 1.3392300861242085e-05, - "loss": 0.9088, + "learning_rate": 8.4344663130107e-06, + "loss": 1.0556, "step": 14403 }, { - "epoch": 0.4081725183484939, + "epoch": 0.5635808748728383, "grad_norm": 0.0, - "learning_rate": 1.3391437479443867e-05, - "loss": 0.9152, + "learning_rate": 8.43321472006303e-06, + "loss": 0.9848, "step": 14404 }, { - "epoch": 0.4082008557907563, + "epoch": 0.5636200015650676, "grad_norm": 0.0, - "learning_rate": 1.3390574069078312e-05, - "loss": 0.9747, + "learning_rate": 8.43196315227597e-06, + "loss": 0.9005, "step": 14405 }, { - "epoch": 0.40822919323301876, + "epoch": 0.5636591282572971, "grad_norm": 0.0, - "learning_rate": 1.3389710630152693e-05, - "loss": 0.961, + "learning_rate": 8.430711609669615e-06, + "loss": 1.0349, "step": 14406 }, { - "epoch": 0.40825753067528125, + "epoch": 0.5636982549495265, "grad_norm": 0.0, - "learning_rate": 1.3388847162674282e-05, - "loss": 0.9398, + "learning_rate": 8.429460092264067e-06, + "loss": 0.9782, "step": 14407 }, { - "epoch": 0.4082858681175437, + "epoch": 0.563737381641756, "grad_norm": 0.0, - "learning_rate": 1.3387983666650352e-05, - "loss": 0.9095, + "learning_rate": 8.42820860007942e-06, + "loss": 0.8971, "step": 14408 }, { - "epoch": 0.4083142055598062, + "epoch": 0.5637765083339854, "grad_norm": 0.0, - "learning_rate": 1.3387120142088182e-05, - "loss": 0.9073, + "learning_rate": 8.426957133135775e-06, + "loss": 0.9527, "step": 14409 }, { - "epoch": 0.4083425430020686, + "epoch": 0.5638156350262149, "grad_norm": 0.0, - "learning_rate": 1.3386256588995036e-05, - "loss": 0.9864, + "learning_rate": 8.425705691453227e-06, + "loss": 1.1094, "step": 14410 }, { - "epoch": 0.4083708804443311, + "epoch": 0.5638547617184443, "grad_norm": 0.0, - "learning_rate": 1.3385393007378195e-05, - "loss": 0.9625, + "learning_rate": 8.42445427505187e-06, + "loss": 1.0426, "step": 14411 }, { - "epoch": 0.40839921788659356, + "epoch": 0.5638938884106738, "grad_norm": 0.0, - "learning_rate": 1.3384529397244935e-05, - "loss": 0.912, + "learning_rate": 8.423202883951805e-06, + "loss": 0.9212, "step": 14412 }, { - "epoch": 0.408427555328856, + "epoch": 0.5639330151029032, "grad_norm": 0.0, - "learning_rate": 1.3383665758602522e-05, - "loss": 1.043, + "learning_rate": 8.421951518173126e-06, + "loss": 1.0049, "step": 14413 }, { - "epoch": 0.4084558927711185, + "epoch": 0.5639721417951327, "grad_norm": 0.0, - "learning_rate": 1.3382802091458237e-05, - "loss": 0.9056, + "learning_rate": 8.42070017773593e-06, + "loss": 1.0466, "step": 14414 }, { - "epoch": 0.4084842302133809, + "epoch": 0.564011268487362, "grad_norm": 0.0, - "learning_rate": 1.3381938395819354e-05, - "loss": 0.8969, + "learning_rate": 8.419448862660311e-06, + "loss": 0.9762, "step": 14415 }, { - "epoch": 0.4085125676556434, + "epoch": 0.5640503951795915, "grad_norm": 0.0, - "learning_rate": 1.338107467169315e-05, - "loss": 0.9274, + "learning_rate": 8.41819757296636e-06, + "loss": 0.9355, "step": 14416 }, { - "epoch": 0.40854090509790586, + "epoch": 0.5640895218718209, "grad_norm": 0.0, - "learning_rate": 1.3380210919086898e-05, - "loss": 0.9019, + "learning_rate": 8.416946308674173e-06, + "loss": 1.0594, "step": 14417 }, { - "epoch": 0.4085692425401683, + "epoch": 0.5641286485640504, "grad_norm": 0.0, - "learning_rate": 1.3379347138007874e-05, - "loss": 0.9293, + "learning_rate": 8.415695069803846e-06, + "loss": 0.9859, "step": 14418 }, { - "epoch": 0.4085975799824308, + "epoch": 0.5641677752562798, "grad_norm": 0.0, - "learning_rate": 1.3378483328463352e-05, - "loss": 0.9554, + "learning_rate": 8.414443856375471e-06, + "loss": 1.0414, "step": 14419 }, { - "epoch": 0.40862591742469323, + "epoch": 0.5642069019485093, "grad_norm": 0.0, - "learning_rate": 1.3377619490460612e-05, - "loss": 0.9305, + "learning_rate": 8.413192668409143e-06, + "loss": 1.022, "step": 14420 }, { - "epoch": 0.4086542548669557, + "epoch": 0.5642460286407387, "grad_norm": 0.0, - "learning_rate": 1.337675562400693e-05, - "loss": 0.9024, + "learning_rate": 8.411941505924945e-06, + "loss": 1.0327, "step": 14421 }, { - "epoch": 0.40868259230921816, + "epoch": 0.5642851553329682, "grad_norm": 0.0, - "learning_rate": 1.337589172910958e-05, - "loss": 0.9631, + "learning_rate": 8.410690368942983e-06, + "loss": 1.0475, "step": 14422 }, { - "epoch": 0.40871092975148066, + "epoch": 0.5643242820251976, "grad_norm": 0.0, - "learning_rate": 1.337502780577584e-05, - "loss": 0.8453, + "learning_rate": 8.409439257483341e-06, + "loss": 1.0089, "step": 14423 }, { - "epoch": 0.4087392671937431, + "epoch": 0.5643634087174271, "grad_norm": 0.0, - "learning_rate": 1.3374163854012987e-05, - "loss": 0.8331, + "learning_rate": 8.408188171566113e-06, + "loss": 1.0283, "step": 14424 }, { - "epoch": 0.40876760463600553, + "epoch": 0.5644025354096565, "grad_norm": 0.0, - "learning_rate": 1.3373299873828303e-05, - "loss": 0.9182, + "learning_rate": 8.406937111211386e-06, + "loss": 1.0414, "step": 14425 }, { - "epoch": 0.408795942078268, + "epoch": 0.564441662101886, "grad_norm": 0.0, - "learning_rate": 1.3372435865229056e-05, - "loss": 0.8409, + "learning_rate": 8.40568607643925e-06, + "loss": 0.8507, "step": 14426 }, { - "epoch": 0.40882427952053046, + "epoch": 0.5644807887941153, "grad_norm": 0.0, - "learning_rate": 1.3371571828222534e-05, - "loss": 0.9672, + "learning_rate": 8.404435067269803e-06, + "loss": 0.9833, "step": 14427 }, { - "epoch": 0.40885261696279296, + "epoch": 0.5645199154863448, "grad_norm": 0.0, - "learning_rate": 1.3370707762816007e-05, - "loss": 0.9077, + "learning_rate": 8.403184083723128e-06, + "loss": 0.9556, "step": 14428 }, { - "epoch": 0.4088809544050554, + "epoch": 0.5645590421785742, "grad_norm": 0.0, - "learning_rate": 1.3369843669016757e-05, - "loss": 0.9945, + "learning_rate": 8.401933125819316e-06, + "loss": 1.0612, "step": 14429 }, { - "epoch": 0.40890929184731784, + "epoch": 0.5645981688708036, "grad_norm": 0.0, - "learning_rate": 1.3368979546832066e-05, - "loss": 0.8911, + "learning_rate": 8.400682193578451e-06, + "loss": 0.9446, "step": 14430 }, { - "epoch": 0.40893762928958033, + "epoch": 0.5646372955630331, "grad_norm": 0.0, - "learning_rate": 1.3368115396269209e-05, - "loss": 0.8039, + "learning_rate": 8.39943128702063e-06, + "loss": 1.0078, "step": 14431 }, { - "epoch": 0.40896596673184277, + "epoch": 0.5646764222552625, "grad_norm": 0.0, - "learning_rate": 1.336725121733546e-05, - "loss": 0.932, + "learning_rate": 8.39818040616594e-06, + "loss": 0.9083, "step": 14432 }, { - "epoch": 0.40899430417410526, + "epoch": 0.564715548947492, "grad_norm": 0.0, - "learning_rate": 1.3366387010038107e-05, - "loss": 1.0126, + "learning_rate": 8.39692955103446e-06, + "loss": 1.1149, "step": 14433 }, { - "epoch": 0.4090226416163677, + "epoch": 0.5647546756397214, "grad_norm": 0.0, - "learning_rate": 1.3365522774384425e-05, - "loss": 0.9045, + "learning_rate": 8.395678721646282e-06, + "loss": 1.0388, "step": 14434 }, { - "epoch": 0.4090509790586302, + "epoch": 0.5647938023319509, "grad_norm": 0.0, - "learning_rate": 1.3364658510381699e-05, - "loss": 0.9882, + "learning_rate": 8.394427918021497e-06, + "loss": 0.9523, "step": 14435 }, { - "epoch": 0.40907931650089263, + "epoch": 0.5648329290241803, "grad_norm": 0.0, - "learning_rate": 1.3363794218037198e-05, - "loss": 1.0084, + "learning_rate": 8.393177140180189e-06, + "loss": 0.9773, "step": 14436 }, { - "epoch": 0.40910765394315507, + "epoch": 0.5648720557164097, "grad_norm": 0.0, - "learning_rate": 1.3362929897358215e-05, - "loss": 0.8549, + "learning_rate": 8.391926388142441e-06, + "loss": 1.1493, "step": 14437 }, { - "epoch": 0.40913599138541756, + "epoch": 0.5649111824086391, "grad_norm": 0.0, - "learning_rate": 1.3362065548352023e-05, - "loss": 0.9351, + "learning_rate": 8.390675661928341e-06, + "loss": 1.0149, "step": 14438 }, { - "epoch": 0.40916432882768, + "epoch": 0.5649503091008686, "grad_norm": 0.0, - "learning_rate": 1.33612011710259e-05, - "loss": 1.0374, + "learning_rate": 8.389424961557967e-06, + "loss": 1.0255, "step": 14439 }, { - "epoch": 0.4091926662699425, + "epoch": 0.564989435793098, "grad_norm": 0.0, - "learning_rate": 1.3360336765387136e-05, - "loss": 1.0158, + "learning_rate": 8.388174287051417e-06, + "loss": 0.906, "step": 14440 }, { - "epoch": 0.40922100371220493, + "epoch": 0.5650285624853275, "grad_norm": 0.0, - "learning_rate": 1.3359472331443008e-05, - "loss": 0.9321, + "learning_rate": 8.386923638428765e-06, + "loss": 1.1225, "step": 14441 }, { - "epoch": 0.40924934115446737, + "epoch": 0.5650676891775569, "grad_norm": 0.0, - "learning_rate": 1.3358607869200798e-05, - "loss": 0.9203, + "learning_rate": 8.3856730157101e-06, + "loss": 0.9416, "step": 14442 }, { - "epoch": 0.40927767859672987, + "epoch": 0.5651068158697864, "grad_norm": 0.0, - "learning_rate": 1.3357743378667785e-05, - "loss": 0.9074, + "learning_rate": 8.384422418915503e-06, + "loss": 0.9547, "step": 14443 }, { - "epoch": 0.4093060160389923, + "epoch": 0.5651459425620158, "grad_norm": 0.0, - "learning_rate": 1.335687885985125e-05, - "loss": 0.874, + "learning_rate": 8.383171848065059e-06, + "loss": 0.943, "step": 14444 }, { - "epoch": 0.4093343534812548, + "epoch": 0.5651850692542453, "grad_norm": 0.0, - "learning_rate": 1.3356014312758483e-05, - "loss": 0.8615, + "learning_rate": 8.381921303178844e-06, + "loss": 1.0699, "step": 14445 }, { - "epoch": 0.40936269092351724, + "epoch": 0.5652241959464747, "grad_norm": 0.0, - "learning_rate": 1.3355149737396756e-05, - "loss": 0.9725, + "learning_rate": 8.38067078427695e-06, + "loss": 0.9682, "step": 14446 }, { - "epoch": 0.40939102836577973, + "epoch": 0.5652633226387042, "grad_norm": 0.0, - "learning_rate": 1.3354285133773361e-05, - "loss": 1.006, + "learning_rate": 8.379420291379454e-06, + "loss": 1.0558, "step": 14447 }, { - "epoch": 0.40941936580804217, + "epoch": 0.5653024493309335, "grad_norm": 0.0, - "learning_rate": 1.3353420501895573e-05, - "loss": 0.9084, + "learning_rate": 8.378169824506435e-06, + "loss": 1.0519, "step": 14448 }, { - "epoch": 0.4094477032503046, + "epoch": 0.565341576023163, "grad_norm": 0.0, - "learning_rate": 1.3352555841770682e-05, - "loss": 0.9883, + "learning_rate": 8.376919383677978e-06, + "loss": 1.1121, "step": 14449 }, { - "epoch": 0.4094760406925671, + "epoch": 0.5653807027153924, "grad_norm": 0.0, - "learning_rate": 1.3351691153405968e-05, - "loss": 0.912, + "learning_rate": 8.37566896891416e-06, + "loss": 1.0742, "step": 14450 }, { - "epoch": 0.40950437813482954, + "epoch": 0.5654198294076219, "grad_norm": 0.0, - "learning_rate": 1.3350826436808715e-05, - "loss": 1.0001, + "learning_rate": 8.374418580235065e-06, + "loss": 1.0709, "step": 14451 }, { - "epoch": 0.40953271557709203, + "epoch": 0.5654589560998513, "grad_norm": 0.0, - "learning_rate": 1.3349961691986205e-05, - "loss": 0.9031, + "learning_rate": 8.37316821766077e-06, + "loss": 1.0016, "step": 14452 }, { - "epoch": 0.40956105301935447, + "epoch": 0.5654980827920808, "grad_norm": 0.0, - "learning_rate": 1.334909691894572e-05, - "loss": 0.9571, + "learning_rate": 8.371917881211354e-06, + "loss": 1.0665, "step": 14453 }, { - "epoch": 0.4095893904616169, + "epoch": 0.5655372094843102, "grad_norm": 0.0, - "learning_rate": 1.3348232117694555e-05, - "loss": 0.9033, + "learning_rate": 8.370667570906899e-06, + "loss": 0.9715, "step": 14454 }, { - "epoch": 0.4096177279038794, + "epoch": 0.5655763361765397, "grad_norm": 0.0, - "learning_rate": 1.3347367288239986e-05, - "loss": 0.925, + "learning_rate": 8.36941728676748e-06, + "loss": 1.1239, "step": 14455 }, { - "epoch": 0.40964606534614184, + "epoch": 0.5656154628687691, "grad_norm": 0.0, - "learning_rate": 1.33465024305893e-05, - "loss": 0.811, + "learning_rate": 8.368167028813176e-06, + "loss": 0.9477, "step": 14456 }, { - "epoch": 0.40967440278840433, + "epoch": 0.5656545895609986, "grad_norm": 0.0, - "learning_rate": 1.3345637544749776e-05, - "loss": 1.0598, + "learning_rate": 8.366916797064061e-06, + "loss": 1.0273, "step": 14457 }, { - "epoch": 0.4097027402306668, + "epoch": 0.565693716253228, "grad_norm": 0.0, - "learning_rate": 1.3344772630728708e-05, - "loss": 0.9491, + "learning_rate": 8.365666591540223e-06, + "loss": 1.0836, "step": 14458 }, { - "epoch": 0.40973107767292927, + "epoch": 0.5657328429454573, "grad_norm": 0.0, - "learning_rate": 1.3343907688533378e-05, - "loss": 0.9026, + "learning_rate": 8.364416412261728e-06, + "loss": 1.0516, "step": 14459 }, { - "epoch": 0.4097594151151917, + "epoch": 0.5657719696376868, "grad_norm": 0.0, - "learning_rate": 1.3343042718171073e-05, - "loss": 0.8842, + "learning_rate": 8.363166259248657e-06, + "loss": 1.1552, "step": 14460 }, { - "epoch": 0.40978775255745414, + "epoch": 0.5658110963299162, "grad_norm": 0.0, - "learning_rate": 1.3342177719649075e-05, - "loss": 1.0154, + "learning_rate": 8.36191613252108e-06, + "loss": 1.0216, "step": 14461 }, { - "epoch": 0.40981608999971664, + "epoch": 0.5658502230221457, "grad_norm": 0.0, - "learning_rate": 1.3341312692974674e-05, - "loss": 0.8295, + "learning_rate": 8.360666032099082e-06, + "loss": 1.0911, "step": 14462 }, { - "epoch": 0.4098444274419791, + "epoch": 0.5658893497143751, "grad_norm": 0.0, - "learning_rate": 1.3340447638155158e-05, - "loss": 1.0001, + "learning_rate": 8.359415958002733e-06, + "loss": 0.97, "step": 14463 }, { - "epoch": 0.40987276488424157, + "epoch": 0.5659284764066046, "grad_norm": 0.0, - "learning_rate": 1.3339582555197809e-05, - "loss": 0.9886, + "learning_rate": 8.358165910252108e-06, + "loss": 1.0289, "step": 14464 }, { - "epoch": 0.409901102326504, + "epoch": 0.565967603098834, "grad_norm": 0.0, - "learning_rate": 1.3338717444109916e-05, - "loss": 1.0014, + "learning_rate": 8.356915888867282e-06, + "loss": 0.909, "step": 14465 }, { - "epoch": 0.40992943976876645, + "epoch": 0.5660067297910635, "grad_norm": 0.0, - "learning_rate": 1.3337852304898766e-05, - "loss": 0.9807, + "learning_rate": 8.355665893868322e-06, + "loss": 1.1116, "step": 14466 }, { - "epoch": 0.40995777721102894, + "epoch": 0.5660458564832929, "grad_norm": 0.0, - "learning_rate": 1.3336987137571646e-05, - "loss": 0.821, + "learning_rate": 8.354415925275314e-06, + "loss": 1.0386, "step": 14467 }, { - "epoch": 0.4099861146532914, + "epoch": 0.5660849831755224, "grad_norm": 0.0, - "learning_rate": 1.3336121942135847e-05, - "loss": 0.9705, + "learning_rate": 8.353165983108322e-06, + "loss": 1.0081, "step": 14468 }, { - "epoch": 0.41001445209555387, + "epoch": 0.5661241098677517, "grad_norm": 0.0, - "learning_rate": 1.333525671859865e-05, - "loss": 0.9638, + "learning_rate": 8.351916067387421e-06, + "loss": 1.0725, "step": 14469 }, { - "epoch": 0.4100427895378163, + "epoch": 0.5661632365599812, "grad_norm": 0.0, - "learning_rate": 1.333439146696735e-05, - "loss": 0.8821, + "learning_rate": 8.350666178132679e-06, + "loss": 0.932, "step": 14470 }, { - "epoch": 0.4100711269800788, + "epoch": 0.5662023632522106, "grad_norm": 0.0, - "learning_rate": 1.3333526187249235e-05, - "loss": 0.8779, + "learning_rate": 8.349416315364177e-06, + "loss": 1.0057, "step": 14471 }, { - "epoch": 0.41009946442234124, + "epoch": 0.5662414899444401, "grad_norm": 0.0, - "learning_rate": 1.3332660879451584e-05, - "loss": 0.9242, + "learning_rate": 8.348166479101979e-06, + "loss": 1.0204, "step": 14472 }, { - "epoch": 0.4101278018646037, + "epoch": 0.5662806166366695, "grad_norm": 0.0, - "learning_rate": 1.3331795543581696e-05, - "loss": 0.9685, + "learning_rate": 8.346916669366157e-06, + "loss": 1.0339, "step": 14473 }, { - "epoch": 0.4101561393068662, + "epoch": 0.566319743328899, "grad_norm": 0.0, - "learning_rate": 1.3330930179646859e-05, - "loss": 0.9527, + "learning_rate": 8.345666886176783e-06, + "loss": 1.0472, "step": 14474 }, { - "epoch": 0.4101844767491286, + "epoch": 0.5663588700211284, "grad_norm": 0.0, - "learning_rate": 1.333006478765436e-05, - "loss": 0.9019, + "learning_rate": 8.34441712955392e-06, + "loss": 0.9381, "step": 14475 }, { - "epoch": 0.4102128141913911, + "epoch": 0.5663979967133579, "grad_norm": 0.0, - "learning_rate": 1.3329199367611488e-05, - "loss": 0.8382, + "learning_rate": 8.34316739951765e-06, + "loss": 1.0477, "step": 14476 }, { - "epoch": 0.41024115163365354, + "epoch": 0.5664371234055873, "grad_norm": 0.0, - "learning_rate": 1.3328333919525531e-05, - "loss": 0.9577, + "learning_rate": 8.341917696088034e-06, + "loss": 1.0753, "step": 14477 }, { - "epoch": 0.410269489075916, + "epoch": 0.5664762500978168, "grad_norm": 0.0, - "learning_rate": 1.3327468443403784e-05, - "loss": 1.0001, + "learning_rate": 8.340668019285143e-06, + "loss": 0.993, "step": 14478 }, { - "epoch": 0.4102978265181785, + "epoch": 0.5665153767900462, "grad_norm": 0.0, - "learning_rate": 1.3326602939253532e-05, - "loss": 0.967, + "learning_rate": 8.339418369129038e-06, + "loss": 0.9598, "step": 14479 }, { - "epoch": 0.4103261639604409, + "epoch": 0.5665545034822757, "grad_norm": 0.0, - "learning_rate": 1.3325737407082074e-05, - "loss": 0.9058, + "learning_rate": 8.3381687456398e-06, + "loss": 1.0438, "step": 14480 }, { - "epoch": 0.4103545014027034, + "epoch": 0.566593630174505, "grad_norm": 0.0, - "learning_rate": 1.332487184689669e-05, - "loss": 1.0414, + "learning_rate": 8.336919148837487e-06, + "loss": 1.1016, "step": 14481 }, { - "epoch": 0.41038283884496585, + "epoch": 0.5666327568667345, "grad_norm": 0.0, - "learning_rate": 1.3324006258704677e-05, - "loss": 0.9156, + "learning_rate": 8.335669578742172e-06, + "loss": 1.0991, "step": 14482 }, { - "epoch": 0.41041117628722834, + "epoch": 0.5666718835589639, "grad_norm": 0.0, - "learning_rate": 1.3323140642513324e-05, - "loss": 0.9636, + "learning_rate": 8.334420035373916e-06, + "loss": 0.9836, "step": 14483 }, { - "epoch": 0.4104395137294908, + "epoch": 0.5667110102511934, "grad_norm": 0.0, - "learning_rate": 1.3322274998329925e-05, - "loss": 0.9106, + "learning_rate": 8.333170518752782e-06, + "loss": 1.014, "step": 14484 }, { - "epoch": 0.4104678511717532, + "epoch": 0.5667501369434228, "grad_norm": 0.0, - "learning_rate": 1.3321409326161767e-05, - "loss": 0.9594, + "learning_rate": 8.331921028898846e-06, + "loss": 1.0696, "step": 14485 }, { - "epoch": 0.4104961886140157, + "epoch": 0.5667892636356523, "grad_norm": 0.0, - "learning_rate": 1.3320543626016147e-05, - "loss": 1.0056, + "learning_rate": 8.330671565832168e-06, + "loss": 1.0396, "step": 14486 }, { - "epoch": 0.41052452605627815, + "epoch": 0.5668283903278817, "grad_norm": 0.0, - "learning_rate": 1.3319677897900357e-05, - "loss": 0.8478, + "learning_rate": 8.329422129572812e-06, + "loss": 0.9387, "step": 14487 }, { - "epoch": 0.41055286349854064, + "epoch": 0.5668675170201111, "grad_norm": 0.0, - "learning_rate": 1.3318812141821684e-05, - "loss": 0.9501, + "learning_rate": 8.328172720140843e-06, + "loss": 0.9728, "step": 14488 }, { - "epoch": 0.4105812009408031, + "epoch": 0.5669066437123406, "grad_norm": 0.0, - "learning_rate": 1.3317946357787426e-05, - "loss": 0.9401, + "learning_rate": 8.326923337556324e-06, + "loss": 0.9819, "step": 14489 }, { - "epoch": 0.4106095383830655, + "epoch": 0.56694577040457, "grad_norm": 0.0, - "learning_rate": 1.3317080545804872e-05, - "loss": 1.0015, + "learning_rate": 8.325673981839322e-06, + "loss": 1.0529, "step": 14490 }, { - "epoch": 0.410637875825328, + "epoch": 0.5669848970967994, "grad_norm": 0.0, - "learning_rate": 1.3316214705881318e-05, - "loss": 0.899, + "learning_rate": 8.324424653009898e-06, + "loss": 1.1757, "step": 14491 }, { - "epoch": 0.41066621326759045, + "epoch": 0.5670240237890288, "grad_norm": 0.0, - "learning_rate": 1.3315348838024056e-05, - "loss": 0.9187, + "learning_rate": 8.323175351088113e-06, + "loss": 0.9348, "step": 14492 }, { - "epoch": 0.41069455070985295, + "epoch": 0.5670631504812583, "grad_norm": 0.0, - "learning_rate": 1.3314482942240379e-05, - "loss": 0.8495, + "learning_rate": 8.321926076094032e-06, + "loss": 1.0822, "step": 14493 }, { - "epoch": 0.4107228881521154, + "epoch": 0.5671022771734877, "grad_norm": 0.0, - "learning_rate": 1.3313617018537581e-05, - "loss": 0.9079, + "learning_rate": 8.320676828047716e-06, + "loss": 0.9859, "step": 14494 }, { - "epoch": 0.4107512255943779, + "epoch": 0.5671414038657172, "grad_norm": 0.0, - "learning_rate": 1.3312751066922958e-05, - "loss": 0.88, + "learning_rate": 8.319427606969223e-06, + "loss": 1.0478, "step": 14495 }, { - "epoch": 0.4107795630366403, + "epoch": 0.5671805305579466, "grad_norm": 0.0, - "learning_rate": 1.3311885087403801e-05, - "loss": 0.907, + "learning_rate": 8.318178412878618e-06, + "loss": 1.063, "step": 14496 }, { - "epoch": 0.41080790047890275, + "epoch": 0.5672196572501761, "grad_norm": 0.0, - "learning_rate": 1.3311019079987409e-05, - "loss": 0.9012, + "learning_rate": 8.316929245795959e-06, + "loss": 1.0534, "step": 14497 }, { - "epoch": 0.41083623792116525, + "epoch": 0.5672587839424055, "grad_norm": 0.0, - "learning_rate": 1.331015304468107e-05, - "loss": 0.8427, + "learning_rate": 8.31568010574131e-06, + "loss": 0.9846, "step": 14498 }, { - "epoch": 0.4108645753634277, + "epoch": 0.567297910634635, "grad_norm": 0.0, - "learning_rate": 1.3309286981492084e-05, - "loss": 0.8009, + "learning_rate": 8.314430992734728e-06, + "loss": 0.9746, "step": 14499 }, { - "epoch": 0.4108929128056902, + "epoch": 0.5673370373268644, "grad_norm": 0.0, - "learning_rate": 1.3308420890427747e-05, - "loss": 0.7848, + "learning_rate": 8.313181906796272e-06, + "loss": 1.0601, "step": 14500 }, { - "epoch": 0.4109212502479526, + "epoch": 0.5673761640190939, "grad_norm": 0.0, - "learning_rate": 1.330755477149535e-05, - "loss": 0.8627, + "learning_rate": 8.311932847945996e-06, + "loss": 1.0378, "step": 14501 }, { - "epoch": 0.41094958769021506, + "epoch": 0.5674152907113232, "grad_norm": 0.0, - "learning_rate": 1.330668862470219e-05, - "loss": 1.0197, + "learning_rate": 8.310683816203967e-06, + "loss": 1.0204, "step": 14502 }, { - "epoch": 0.41097792513247755, + "epoch": 0.5674544174035527, "grad_norm": 0.0, - "learning_rate": 1.3305822450055565e-05, - "loss": 0.9145, + "learning_rate": 8.30943481159024e-06, + "loss": 0.9874, "step": 14503 }, { - "epoch": 0.41100626257474, + "epoch": 0.5674935440957821, "grad_norm": 0.0, - "learning_rate": 1.3304956247562772e-05, - "loss": 0.986, + "learning_rate": 8.308185834124872e-06, + "loss": 0.9571, "step": 14504 }, { - "epoch": 0.4110346000170025, + "epoch": 0.5675326707880116, "grad_norm": 0.0, - "learning_rate": 1.3304090017231101e-05, - "loss": 0.8608, + "learning_rate": 8.306936883827918e-06, + "loss": 0.9955, "step": 14505 }, { - "epoch": 0.4110629374592649, + "epoch": 0.567571797480241, "grad_norm": 0.0, - "learning_rate": 1.3303223759067855e-05, - "loss": 0.962, + "learning_rate": 8.305687960719433e-06, + "loss": 1.002, "step": 14506 }, { - "epoch": 0.4110912749015274, + "epoch": 0.5676109241724705, "grad_norm": 0.0, - "learning_rate": 1.330235747308033e-05, - "loss": 0.9366, + "learning_rate": 8.30443906481948e-06, + "loss": 1.0273, "step": 14507 }, { - "epoch": 0.41111961234378985, + "epoch": 0.5676500508646999, "grad_norm": 0.0, - "learning_rate": 1.3301491159275821e-05, - "loss": 0.9582, + "learning_rate": 8.303190196148112e-06, + "loss": 0.8959, "step": 14508 }, { - "epoch": 0.4111479497860523, + "epoch": 0.5676891775569294, "grad_norm": 0.0, - "learning_rate": 1.3300624817661627e-05, - "loss": 0.9922, + "learning_rate": 8.301941354725382e-06, + "loss": 0.9465, "step": 14509 }, { - "epoch": 0.4111762872283148, + "epoch": 0.5677283042491588, "grad_norm": 0.0, - "learning_rate": 1.3299758448245044e-05, - "loss": 1.0339, + "learning_rate": 8.300692540571343e-06, + "loss": 0.9797, "step": 14510 }, { - "epoch": 0.4112046246705772, + "epoch": 0.5677674309413883, "grad_norm": 0.0, - "learning_rate": 1.3298892051033368e-05, - "loss": 0.7693, + "learning_rate": 8.299443753706056e-06, + "loss": 1.0178, "step": 14511 }, { - "epoch": 0.4112329621128397, + "epoch": 0.5678065576336176, "grad_norm": 0.0, - "learning_rate": 1.32980256260339e-05, - "loss": 0.8875, + "learning_rate": 8.298194994149571e-06, + "loss": 1.0038, "step": 14512 }, { - "epoch": 0.41126129955510216, + "epoch": 0.5678456843258471, "grad_norm": 0.0, - "learning_rate": 1.3297159173253937e-05, - "loss": 0.8925, + "learning_rate": 8.296946261921941e-06, + "loss": 1.0441, "step": 14513 }, { - "epoch": 0.4112896369973646, + "epoch": 0.5678848110180765, "grad_norm": 0.0, - "learning_rate": 1.3296292692700781e-05, - "loss": 0.8749, + "learning_rate": 8.295697557043223e-06, + "loss": 1.0194, "step": 14514 }, { - "epoch": 0.4113179744396271, + "epoch": 0.567923937710306, "grad_norm": 0.0, - "learning_rate": 1.3295426184381723e-05, - "loss": 0.9144, + "learning_rate": 8.29444887953346e-06, + "loss": 1.0917, "step": 14515 }, { - "epoch": 0.4113463118818895, + "epoch": 0.5679630644025354, "grad_norm": 0.0, - "learning_rate": 1.329455964830407e-05, - "loss": 0.8898, + "learning_rate": 8.293200229412716e-06, + "loss": 0.9003, "step": 14516 }, { - "epoch": 0.411374649324152, + "epoch": 0.5680021910947648, "grad_norm": 0.0, - "learning_rate": 1.3293693084475116e-05, - "loss": 0.9653, + "learning_rate": 8.291951606701037e-06, + "loss": 0.9928, "step": 14517 }, { - "epoch": 0.41140298676641446, + "epoch": 0.5680413177869943, "grad_norm": 0.0, - "learning_rate": 1.3292826492902164e-05, - "loss": 0.918, + "learning_rate": 8.290703011418475e-06, + "loss": 1.0444, "step": 14518 }, { - "epoch": 0.41143132420867695, + "epoch": 0.5680804444792237, "grad_norm": 0.0, - "learning_rate": 1.3291959873592508e-05, - "loss": 0.8887, + "learning_rate": 8.289454443585076e-06, + "loss": 1.0664, "step": 14519 }, { - "epoch": 0.4114596616509394, + "epoch": 0.5681195711714532, "grad_norm": 0.0, - "learning_rate": 1.3291093226553456e-05, - "loss": 1.0022, + "learning_rate": 8.2882059032209e-06, + "loss": 1.0488, "step": 14520 }, { - "epoch": 0.41148799909320183, + "epoch": 0.5681586978636826, "grad_norm": 0.0, - "learning_rate": 1.3290226551792302e-05, - "loss": 0.9092, + "learning_rate": 8.286957390345994e-06, + "loss": 0.9223, "step": 14521 }, { - "epoch": 0.4115163365354643, + "epoch": 0.5681978245559121, "grad_norm": 0.0, - "learning_rate": 1.328935984931635e-05, - "loss": 0.8812, + "learning_rate": 8.285708904980404e-06, + "loss": 1.0441, "step": 14522 }, { - "epoch": 0.41154467397772676, + "epoch": 0.5682369512481414, "grad_norm": 0.0, - "learning_rate": 1.3288493119132894e-05, - "loss": 0.915, + "learning_rate": 8.28446044714418e-06, + "loss": 0.9194, "step": 14523 }, { - "epoch": 0.41157301141998925, + "epoch": 0.5682760779403709, "grad_norm": 0.0, - "learning_rate": 1.328762636124924e-05, - "loss": 0.8622, + "learning_rate": 8.283212016857369e-06, + "loss": 0.9645, "step": 14524 }, { - "epoch": 0.4116013488622517, + "epoch": 0.5683152046326003, "grad_norm": 0.0, - "learning_rate": 1.3286759575672692e-05, - "loss": 0.9074, + "learning_rate": 8.281963614140026e-06, + "loss": 0.9916, "step": 14525 }, { - "epoch": 0.41162968630451413, + "epoch": 0.5683543313248298, "grad_norm": 0.0, - "learning_rate": 1.3285892762410547e-05, - "loss": 0.9242, + "learning_rate": 8.280715239012192e-06, + "loss": 1.1539, "step": 14526 }, { - "epoch": 0.4116580237467766, + "epoch": 0.5683934580170592, "grad_norm": 0.0, - "learning_rate": 1.3285025921470103e-05, - "loss": 0.957, + "learning_rate": 8.27946689149392e-06, + "loss": 0.9712, "step": 14527 }, { - "epoch": 0.41168636118903906, + "epoch": 0.5684325847092887, "grad_norm": 0.0, - "learning_rate": 1.3284159052858668e-05, - "loss": 0.8677, + "learning_rate": 8.278218571605247e-06, + "loss": 1.1527, "step": 14528 }, { - "epoch": 0.41171469863130156, + "epoch": 0.5684717114015181, "grad_norm": 0.0, - "learning_rate": 1.3283292156583542e-05, - "loss": 0.8262, + "learning_rate": 8.276970279366232e-06, + "loss": 0.8885, "step": 14529 }, { - "epoch": 0.411743036073564, + "epoch": 0.5685108380937476, "grad_norm": 0.0, - "learning_rate": 1.3282425232652027e-05, - "loss": 0.8595, + "learning_rate": 8.275722014796915e-06, + "loss": 0.9656, "step": 14530 }, { - "epoch": 0.4117713735158265, + "epoch": 0.568549964785977, "grad_norm": 0.0, - "learning_rate": 1.3281558281071422e-05, - "loss": 1.0, + "learning_rate": 8.274473777917342e-06, + "loss": 1.1003, "step": 14531 }, { - "epoch": 0.4117997109580889, + "epoch": 0.5685890914782065, "grad_norm": 0.0, - "learning_rate": 1.3280691301849037e-05, - "loss": 1.0372, + "learning_rate": 8.273225568747558e-06, + "loss": 1.1003, "step": 14532 }, { - "epoch": 0.41182804840035137, + "epoch": 0.5686282181704359, "grad_norm": 0.0, - "learning_rate": 1.3279824294992172e-05, - "loss": 0.9891, + "learning_rate": 8.271977387307604e-06, + "loss": 0.9822, "step": 14533 }, { - "epoch": 0.41185638584261386, + "epoch": 0.5686673448626653, "grad_norm": 0.0, - "learning_rate": 1.3278957260508129e-05, - "loss": 0.8712, + "learning_rate": 8.27072923361753e-06, + "loss": 1.0361, "step": 14534 }, { - "epoch": 0.4118847232848763, + "epoch": 0.5687064715548947, "grad_norm": 0.0, - "learning_rate": 1.3278090198404207e-05, - "loss": 0.8672, + "learning_rate": 8.269481107697379e-06, + "loss": 0.958, "step": 14535 }, { - "epoch": 0.4119130607271388, + "epoch": 0.5687455982471242, "grad_norm": 0.0, - "learning_rate": 1.3277223108687717e-05, - "loss": 0.9208, + "learning_rate": 8.268233009567192e-06, + "loss": 1.0364, "step": 14536 }, { - "epoch": 0.41194139816940123, + "epoch": 0.5687847249393536, "grad_norm": 0.0, - "learning_rate": 1.327635599136596e-05, - "loss": 0.9005, + "learning_rate": 8.266984939247012e-06, + "loss": 0.9988, "step": 14537 }, { - "epoch": 0.41196973561166367, + "epoch": 0.5688238516315831, "grad_norm": 0.0, - "learning_rate": 1.327548884644624e-05, - "loss": 0.8399, + "learning_rate": 8.265736896756883e-06, + "loss": 0.9667, "step": 14538 }, { - "epoch": 0.41199807305392616, + "epoch": 0.5688629783238125, "grad_norm": 0.0, - "learning_rate": 1.3274621673935861e-05, - "loss": 0.8275, + "learning_rate": 8.264488882116846e-06, + "loss": 1.0322, "step": 14539 }, { - "epoch": 0.4120264104961886, + "epoch": 0.568902105016042, "grad_norm": 0.0, - "learning_rate": 1.3273754473842127e-05, - "loss": 0.8787, + "learning_rate": 8.263240895346943e-06, + "loss": 1.1116, "step": 14540 }, { - "epoch": 0.4120547479384511, + "epoch": 0.5689412317082714, "grad_norm": 0.0, - "learning_rate": 1.3272887246172344e-05, - "loss": 0.9328, + "learning_rate": 8.261992936467215e-06, + "loss": 1.058, "step": 14541 }, { - "epoch": 0.41208308538071353, + "epoch": 0.5689803584005009, "grad_norm": 0.0, - "learning_rate": 1.3272019990933816e-05, - "loss": 0.9034, + "learning_rate": 8.260745005497701e-06, + "loss": 1.06, "step": 14542 }, { - "epoch": 0.412111422822976, + "epoch": 0.5690194850927303, "grad_norm": 0.0, - "learning_rate": 1.3271152708133848e-05, - "loss": 0.9681, + "learning_rate": 8.259497102458447e-06, + "loss": 1.0143, "step": 14543 }, { - "epoch": 0.41213976026523846, + "epoch": 0.5690586117849596, "grad_norm": 0.0, - "learning_rate": 1.3270285397779743e-05, - "loss": 0.9388, + "learning_rate": 8.258249227369485e-06, + "loss": 0.9551, "step": 14544 }, { - "epoch": 0.4121680977075009, + "epoch": 0.5690977384771891, "grad_norm": 0.0, - "learning_rate": 1.3269418059878815e-05, - "loss": 0.9005, + "learning_rate": 8.25700138025086e-06, + "loss": 0.9528, "step": 14545 }, { - "epoch": 0.4121964351497634, + "epoch": 0.5691368651694185, "grad_norm": 0.0, - "learning_rate": 1.3268550694438363e-05, - "loss": 0.9016, + "learning_rate": 8.255753561122603e-06, + "loss": 1.01, "step": 14546 }, { - "epoch": 0.41222477259202583, + "epoch": 0.569175991861648, "grad_norm": 0.0, - "learning_rate": 1.3267683301465697e-05, - "loss": 0.858, + "learning_rate": 8.254505770004764e-06, + "loss": 1.0287, "step": 14547 }, { - "epoch": 0.41225311003428833, + "epoch": 0.5692151185538774, "grad_norm": 0.0, - "learning_rate": 1.3266815880968115e-05, - "loss": 0.791, + "learning_rate": 8.253258006917375e-06, + "loss": 1.0687, "step": 14548 }, { - "epoch": 0.41228144747655077, + "epoch": 0.5692542452461069, "grad_norm": 0.0, - "learning_rate": 1.3265948432952935e-05, - "loss": 0.8664, + "learning_rate": 8.252010271880473e-06, + "loss": 1.0891, "step": 14549 }, { - "epoch": 0.4123097849188132, + "epoch": 0.5692933719383363, "grad_norm": 0.0, - "learning_rate": 1.3265080957427456e-05, - "loss": 0.8253, + "learning_rate": 8.250762564914093e-06, + "loss": 1.0847, "step": 14550 }, { - "epoch": 0.4123381223610757, + "epoch": 0.5693324986305658, "grad_norm": 0.0, - "learning_rate": 1.3264213454398988e-05, - "loss": 0.9642, + "learning_rate": 8.249514886038277e-06, + "loss": 1.0212, "step": 14551 }, { - "epoch": 0.41236645980333814, + "epoch": 0.5693716253227952, "grad_norm": 0.0, - "learning_rate": 1.3263345923874838e-05, - "loss": 0.9013, + "learning_rate": 8.248267235273057e-06, + "loss": 1.0479, "step": 14552 }, { - "epoch": 0.41239479724560063, + "epoch": 0.5694107520150247, "grad_norm": 0.0, - "learning_rate": 1.3262478365862314e-05, - "loss": 0.8849, + "learning_rate": 8.247019612638473e-06, + "loss": 0.9913, "step": 14553 }, { - "epoch": 0.41242313468786307, + "epoch": 0.569449878707254, "grad_norm": 0.0, - "learning_rate": 1.3261610780368726e-05, - "loss": 0.9799, + "learning_rate": 8.245772018154557e-06, + "loss": 1.0654, "step": 14554 }, { - "epoch": 0.4124514721301255, + "epoch": 0.5694890053994835, "grad_norm": 0.0, - "learning_rate": 1.3260743167401375e-05, - "loss": 0.9844, + "learning_rate": 8.244524451841338e-06, + "loss": 1.0466, "step": 14555 }, { - "epoch": 0.412479809572388, + "epoch": 0.5695281320917129, "grad_norm": 0.0, - "learning_rate": 1.3259875526967574e-05, - "loss": 0.8939, + "learning_rate": 8.243276913718862e-06, + "loss": 1.0284, "step": 14556 }, { - "epoch": 0.41250814701465044, + "epoch": 0.5695672587839424, "grad_norm": 0.0, - "learning_rate": 1.325900785907463e-05, - "loss": 0.8281, + "learning_rate": 8.242029403807158e-06, + "loss": 0.9438, "step": 14557 }, { - "epoch": 0.41253648445691293, + "epoch": 0.5696063854761718, "grad_norm": 0.0, - "learning_rate": 1.3258140163729856e-05, - "loss": 0.8385, + "learning_rate": 8.240781922126257e-06, + "loss": 1.0757, "step": 14558 }, { - "epoch": 0.41256482189917537, + "epoch": 0.5696455121684013, "grad_norm": 0.0, - "learning_rate": 1.3257272440940559e-05, - "loss": 0.8736, + "learning_rate": 8.239534468696192e-06, + "loss": 0.9429, "step": 14559 }, { - "epoch": 0.41259315934143787, + "epoch": 0.5696846388606307, "grad_norm": 0.0, - "learning_rate": 1.325640469071404e-05, - "loss": 0.8884, + "learning_rate": 8.238287043537e-06, + "loss": 0.9542, "step": 14560 }, { - "epoch": 0.4126214967837003, + "epoch": 0.5697237655528602, "grad_norm": 0.0, - "learning_rate": 1.3255536913057621e-05, - "loss": 0.9095, + "learning_rate": 8.237039646668712e-06, + "loss": 0.9702, "step": 14561 }, { - "epoch": 0.41264983422596274, + "epoch": 0.5697628922450896, "grad_norm": 0.0, - "learning_rate": 1.3254669107978604e-05, - "loss": 0.8714, + "learning_rate": 8.235792278111357e-06, + "loss": 1.0679, "step": 14562 }, { - "epoch": 0.41267817166822524, + "epoch": 0.5698020189373191, "grad_norm": 0.0, - "learning_rate": 1.3253801275484298e-05, - "loss": 1.0532, + "learning_rate": 8.234544937884969e-06, + "loss": 1.0654, "step": 14563 }, { - "epoch": 0.4127065091104877, + "epoch": 0.5698411456295485, "grad_norm": 0.0, - "learning_rate": 1.3252933415582016e-05, - "loss": 0.8569, + "learning_rate": 8.23329762600957e-06, + "loss": 0.9847, "step": 14564 }, { - "epoch": 0.41273484655275017, + "epoch": 0.569880272321778, "grad_norm": 0.0, - "learning_rate": 1.325206552827907e-05, - "loss": 0.9928, + "learning_rate": 8.232050342505204e-06, + "loss": 1.0222, "step": 14565 }, { - "epoch": 0.4127631839950126, + "epoch": 0.5699193990140073, "grad_norm": 0.0, - "learning_rate": 1.3251197613582769e-05, - "loss": 0.7522, + "learning_rate": 8.230803087391893e-06, + "loss": 1.0392, "step": 14566 }, { - "epoch": 0.41279152143727504, + "epoch": 0.5699585257062368, "grad_norm": 0.0, - "learning_rate": 1.3250329671500421e-05, - "loss": 0.955, + "learning_rate": 8.229555860689668e-06, + "loss": 1.0278, "step": 14567 }, { - "epoch": 0.41281985887953754, + "epoch": 0.5699976523984662, "grad_norm": 0.0, - "learning_rate": 1.3249461702039344e-05, - "loss": 1.0627, + "learning_rate": 8.228308662418553e-06, + "loss": 0.9933, "step": 14568 }, { - "epoch": 0.4128481963218, + "epoch": 0.5700367790906957, "grad_norm": 0.0, - "learning_rate": 1.3248593705206838e-05, - "loss": 0.9467, + "learning_rate": 8.227061492598585e-06, + "loss": 1.1186, "step": 14569 }, { - "epoch": 0.41287653376406247, + "epoch": 0.5700759057829251, "grad_norm": 0.0, - "learning_rate": 1.3247725681010223e-05, - "loss": 0.9739, + "learning_rate": 8.225814351249788e-06, + "loss": 1.0593, "step": 14570 }, { - "epoch": 0.4129048712063249, + "epoch": 0.5701150324751546, "grad_norm": 0.0, - "learning_rate": 1.3246857629456808e-05, - "loss": 0.9498, + "learning_rate": 8.224567238392189e-06, + "loss": 0.9024, "step": 14571 }, { - "epoch": 0.4129332086485874, + "epoch": 0.570154159167384, "grad_norm": 0.0, - "learning_rate": 1.3245989550553909e-05, - "loss": 0.9001, + "learning_rate": 8.223320154045816e-06, + "loss": 1.0242, "step": 14572 }, { - "epoch": 0.41296154609084984, + "epoch": 0.5701932858596134, "grad_norm": 0.0, - "learning_rate": 1.324512144430883e-05, - "loss": 1.0912, + "learning_rate": 8.22207309823069e-06, + "loss": 1.1578, "step": 14573 }, { - "epoch": 0.4129898835331123, + "epoch": 0.5702324125518429, "grad_norm": 0.0, - "learning_rate": 1.324425331072889e-05, - "loss": 0.8878, + "learning_rate": 8.220826070966847e-06, + "loss": 1.0078, "step": 14574 }, { - "epoch": 0.4130182209753748, + "epoch": 0.5702715392440723, "grad_norm": 0.0, - "learning_rate": 1.3243385149821402e-05, - "loss": 0.8798, + "learning_rate": 8.219579072274307e-06, + "loss": 0.9871, "step": 14575 }, { - "epoch": 0.4130465584176372, + "epoch": 0.5703106659363018, "grad_norm": 0.0, - "learning_rate": 1.3242516961593672e-05, - "loss": 0.969, + "learning_rate": 8.218332102173097e-06, + "loss": 0.9317, "step": 14576 }, { - "epoch": 0.4130748958598997, + "epoch": 0.5703497926285311, "grad_norm": 0.0, - "learning_rate": 1.324164874605302e-05, - "loss": 0.8483, + "learning_rate": 8.217085160683238e-06, + "loss": 1.0254, "step": 14577 }, { - "epoch": 0.41310323330216214, + "epoch": 0.5703889193207606, "grad_norm": 0.0, - "learning_rate": 1.3240780503206755e-05, - "loss": 0.8179, + "learning_rate": 8.21583824782476e-06, + "loss": 0.9164, "step": 14578 }, { - "epoch": 0.4131315707444246, + "epoch": 0.57042804601299, "grad_norm": 0.0, - "learning_rate": 1.3239912233062198e-05, - "loss": 0.9998, + "learning_rate": 8.214591363617683e-06, + "loss": 1.0424, "step": 14579 }, { - "epoch": 0.4131599081866871, + "epoch": 0.5704671727052195, "grad_norm": 0.0, - "learning_rate": 1.3239043935626652e-05, - "loss": 0.9204, + "learning_rate": 8.21334450808203e-06, + "loss": 1.0819, "step": 14580 }, { - "epoch": 0.4131882456289495, + "epoch": 0.5705062993974489, "grad_norm": 0.0, - "learning_rate": 1.3238175610907437e-05, - "loss": 0.8704, + "learning_rate": 8.212097681237829e-06, + "loss": 1.1301, "step": 14581 }, { - "epoch": 0.413216583071212, + "epoch": 0.5705454260896784, "grad_norm": 0.0, - "learning_rate": 1.3237307258911867e-05, - "loss": 0.8183, + "learning_rate": 8.210850883105095e-06, + "loss": 0.9762, "step": 14582 }, { - "epoch": 0.41324492051347445, + "epoch": 0.5705845527819078, "grad_norm": 0.0, - "learning_rate": 1.3236438879647256e-05, - "loss": 1.0214, + "learning_rate": 8.209604113703857e-06, + "loss": 0.9563, "step": 14583 }, { - "epoch": 0.41327325795573694, + "epoch": 0.5706236794741373, "grad_norm": 0.0, - "learning_rate": 1.3235570473120917e-05, - "loss": 0.7542, + "learning_rate": 8.20835737305413e-06, + "loss": 1.0072, "step": 14584 }, { - "epoch": 0.4133015953979994, + "epoch": 0.5706628061663667, "grad_norm": 0.0, - "learning_rate": 1.3234702039340167e-05, - "loss": 0.9714, + "learning_rate": 8.207110661175942e-06, + "loss": 1.0565, "step": 14585 }, { - "epoch": 0.4133299328402618, + "epoch": 0.5707019328585962, "grad_norm": 0.0, - "learning_rate": 1.3233833578312321e-05, - "loss": 0.799, + "learning_rate": 8.205863978089308e-06, + "loss": 0.9651, "step": 14586 }, { - "epoch": 0.4133582702825243, + "epoch": 0.5707410595508255, "grad_norm": 0.0, - "learning_rate": 1.3232965090044694e-05, - "loss": 0.9702, + "learning_rate": 8.20461732381425e-06, + "loss": 1.092, "step": 14587 }, { - "epoch": 0.41338660772478675, + "epoch": 0.570780186243055, "grad_norm": 0.0, - "learning_rate": 1.3232096574544602e-05, - "loss": 0.9645, + "learning_rate": 8.203370698370792e-06, + "loss": 1.1127, "step": 14588 }, { - "epoch": 0.41341494516704924, + "epoch": 0.5708193129352844, "grad_norm": 0.0, - "learning_rate": 1.3231228031819358e-05, - "loss": 0.8651, + "learning_rate": 8.202124101778947e-06, + "loss": 0.9667, "step": 14589 }, { - "epoch": 0.4134432826093117, + "epoch": 0.5708584396275139, "grad_norm": 0.0, - "learning_rate": 1.3230359461876282e-05, - "loss": 0.8705, + "learning_rate": 8.200877534058734e-06, + "loss": 1.1448, "step": 14590 }, { - "epoch": 0.4134716200515741, + "epoch": 0.5708975663197433, "grad_norm": 0.0, - "learning_rate": 1.322949086472269e-05, - "loss": 0.9885, + "learning_rate": 8.199630995230173e-06, + "loss": 1.0595, "step": 14591 }, { - "epoch": 0.4134999574938366, + "epoch": 0.5709366930119728, "grad_norm": 0.0, - "learning_rate": 1.3228622240365896e-05, - "loss": 1.118, + "learning_rate": 8.198384485313286e-06, + "loss": 0.9977, "step": 14592 }, { - "epoch": 0.41352829493609905, + "epoch": 0.5709758197042022, "grad_norm": 0.0, - "learning_rate": 1.3227753588813217e-05, - "loss": 0.9714, + "learning_rate": 8.197138004328085e-06, + "loss": 1.0912, "step": 14593 }, { - "epoch": 0.41355663237836154, + "epoch": 0.5710149463964317, "grad_norm": 0.0, - "learning_rate": 1.3226884910071973e-05, - "loss": 0.8923, + "learning_rate": 8.19589155229459e-06, + "loss": 0.9971, "step": 14594 }, { - "epoch": 0.413584969820624, + "epoch": 0.5710540730886611, "grad_norm": 0.0, - "learning_rate": 1.322601620414948e-05, - "loss": 0.9936, + "learning_rate": 8.19464512923281e-06, + "loss": 0.887, "step": 14595 }, { - "epoch": 0.4136133072628865, + "epoch": 0.5710931997808906, "grad_norm": 0.0, - "learning_rate": 1.322514747105305e-05, - "loss": 0.9746, + "learning_rate": 8.193398735162771e-06, + "loss": 0.9594, "step": 14596 }, { - "epoch": 0.4136416447051489, + "epoch": 0.57113232647312, "grad_norm": 0.0, - "learning_rate": 1.3224278710790008e-05, - "loss": 0.896, + "learning_rate": 8.192152370104487e-06, + "loss": 1.0787, "step": 14597 }, { - "epoch": 0.41366998214741135, + "epoch": 0.5711714531653495, "grad_norm": 0.0, - "learning_rate": 1.3223409923367669e-05, - "loss": 0.9105, + "learning_rate": 8.19090603407797e-06, + "loss": 1.0515, "step": 14598 }, { - "epoch": 0.41369831958967385, + "epoch": 0.5712105798575788, "grad_norm": 0.0, - "learning_rate": 1.3222541108793352e-05, - "loss": 0.9519, + "learning_rate": 8.189659727103233e-06, + "loss": 1.1615, "step": 14599 }, { - "epoch": 0.4137266570319363, + "epoch": 0.5712497065498083, "grad_norm": 0.0, - "learning_rate": 1.3221672267074375e-05, - "loss": 0.9199, + "learning_rate": 8.18841344920029e-06, + "loss": 1.002, "step": 14600 }, { - "epoch": 0.4137549944741988, + "epoch": 0.5712888332420377, "grad_norm": 0.0, - "learning_rate": 1.3220803398218056e-05, - "loss": 0.9057, + "learning_rate": 8.18716720038916e-06, + "loss": 0.9864, "step": 14601 }, { - "epoch": 0.4137833319164612, + "epoch": 0.5713279599342671, "grad_norm": 0.0, - "learning_rate": 1.3219934502231711e-05, - "loss": 0.928, + "learning_rate": 8.185920980689854e-06, + "loss": 0.9349, "step": 14602 }, { - "epoch": 0.41381166935872365, + "epoch": 0.5713670866264966, "grad_norm": 0.0, - "learning_rate": 1.3219065579122663e-05, - "loss": 1.0167, + "learning_rate": 8.184674790122384e-06, + "loss": 0.964, "step": 14603 }, { - "epoch": 0.41384000680098615, + "epoch": 0.571406213318726, "grad_norm": 0.0, - "learning_rate": 1.3218196628898232e-05, - "loss": 0.9532, + "learning_rate": 8.183428628706758e-06, + "loss": 1.0193, "step": 14604 }, { - "epoch": 0.4138683442432486, + "epoch": 0.5714453400109555, "grad_norm": 0.0, - "learning_rate": 1.3217327651565734e-05, - "loss": 0.8729, + "learning_rate": 8.182182496462995e-06, + "loss": 0.9842, "step": 14605 }, { - "epoch": 0.4138966816855111, + "epoch": 0.5714844667031849, "grad_norm": 0.0, - "learning_rate": 1.321645864713249e-05, - "loss": 0.8807, + "learning_rate": 8.180936393411103e-06, + "loss": 1.0623, "step": 14606 }, { - "epoch": 0.4139250191277735, + "epoch": 0.5715235933954144, "grad_norm": 0.0, - "learning_rate": 1.3215589615605824e-05, - "loss": 0.9831, + "learning_rate": 8.179690319571096e-06, + "loss": 1.1099, "step": 14607 }, { - "epoch": 0.413953356570036, + "epoch": 0.5715627200876437, "grad_norm": 0.0, - "learning_rate": 1.321472055699305e-05, - "loss": 0.8785, + "learning_rate": 8.178444274962975e-06, + "loss": 1.0088, "step": 14608 }, { - "epoch": 0.41398169401229845, + "epoch": 0.5716018467798732, "grad_norm": 0.0, - "learning_rate": 1.3213851471301492e-05, - "loss": 0.9304, + "learning_rate": 8.17719825960676e-06, + "loss": 0.9729, "step": 14609 }, { - "epoch": 0.4140100314545609, + "epoch": 0.5716409734721026, "grad_norm": 0.0, - "learning_rate": 1.3212982358538467e-05, - "loss": 0.9401, + "learning_rate": 8.175952273522458e-06, + "loss": 1.0494, "step": 14610 }, { - "epoch": 0.4140383688968234, + "epoch": 0.5716801001643321, "grad_norm": 0.0, - "learning_rate": 1.3212113218711302e-05, - "loss": 0.9074, + "learning_rate": 8.174706316730076e-06, + "loss": 1.1235, "step": 14611 }, { - "epoch": 0.4140667063390858, + "epoch": 0.5717192268565615, "grad_norm": 0.0, - "learning_rate": 1.3211244051827312e-05, - "loss": 0.9814, + "learning_rate": 8.173460389249625e-06, + "loss": 0.9689, "step": 14612 }, { - "epoch": 0.4140950437813483, + "epoch": 0.571758353548791, "grad_norm": 0.0, - "learning_rate": 1.3210374857893824e-05, - "loss": 0.871, + "learning_rate": 8.172214491101107e-06, + "loss": 1.0067, "step": 14613 }, { - "epoch": 0.41412338122361075, + "epoch": 0.5717974802410204, "grad_norm": 0.0, - "learning_rate": 1.3209505636918154e-05, - "loss": 0.9156, + "learning_rate": 8.170968622304536e-06, + "loss": 0.9592, "step": 14614 }, { - "epoch": 0.4141517186658732, + "epoch": 0.5718366069332499, "grad_norm": 0.0, - "learning_rate": 1.3208636388907627e-05, - "loss": 0.9991, + "learning_rate": 8.169722782879918e-06, + "loss": 1.0465, "step": 14615 }, { - "epoch": 0.4141800561081357, + "epoch": 0.5718757336254793, "grad_norm": 0.0, - "learning_rate": 1.320776711386956e-05, - "loss": 0.9353, + "learning_rate": 8.16847697284726e-06, + "loss": 0.9689, "step": 14616 }, { - "epoch": 0.4142083935503981, + "epoch": 0.5719148603177088, "grad_norm": 0.0, - "learning_rate": 1.3206897811811285e-05, - "loss": 1.0045, + "learning_rate": 8.167231192226562e-06, + "loss": 1.0082, "step": 14617 }, { - "epoch": 0.4142367309926606, + "epoch": 0.5719539870099382, "grad_norm": 0.0, - "learning_rate": 1.3206028482740116e-05, - "loss": 0.9669, + "learning_rate": 8.165985441037836e-06, + "loss": 0.8974, "step": 14618 }, { - "epoch": 0.41426506843492306, + "epoch": 0.5719931137021677, "grad_norm": 0.0, - "learning_rate": 1.320515912666338e-05, - "loss": 0.9522, + "learning_rate": 8.164739719301089e-06, + "loss": 1.0593, "step": 14619 }, { - "epoch": 0.41429340587718555, + "epoch": 0.572032240394397, "grad_norm": 0.0, - "learning_rate": 1.32042897435884e-05, - "loss": 0.881, + "learning_rate": 8.163494027036322e-06, + "loss": 0.887, "step": 14620 }, { - "epoch": 0.414321743319448, + "epoch": 0.5720713670866265, "grad_norm": 0.0, - "learning_rate": 1.3203420333522497e-05, - "loss": 0.9684, + "learning_rate": 8.162248364263538e-06, + "loss": 1.0574, "step": 14621 }, { - "epoch": 0.4143500807617104, + "epoch": 0.5721104937788559, "grad_norm": 0.0, - "learning_rate": 1.3202550896472993e-05, - "loss": 0.9576, + "learning_rate": 8.161002731002741e-06, + "loss": 1.0342, "step": 14622 }, { - "epoch": 0.4143784182039729, + "epoch": 0.5721496204710854, "grad_norm": 0.0, - "learning_rate": 1.320168143244721e-05, - "loss": 0.8521, + "learning_rate": 8.159757127273936e-06, + "loss": 1.099, "step": 14623 }, { - "epoch": 0.41440675564623536, + "epoch": 0.5721887471633148, "grad_norm": 0.0, - "learning_rate": 1.320081194145248e-05, - "loss": 0.9291, + "learning_rate": 8.158511553097127e-06, + "loss": 0.9916, "step": 14624 }, { - "epoch": 0.41443509308849785, + "epoch": 0.5722278738555443, "grad_norm": 0.0, - "learning_rate": 1.3199942423496123e-05, - "loss": 0.9854, + "learning_rate": 8.157266008492318e-06, + "loss": 0.9485, "step": 14625 }, { - "epoch": 0.4144634305307603, + "epoch": 0.5722670005477737, "grad_norm": 0.0, - "learning_rate": 1.3199072878585464e-05, - "loss": 0.9481, + "learning_rate": 8.156020493479502e-06, + "loss": 0.9342, "step": 14626 }, { - "epoch": 0.41449176797302273, + "epoch": 0.5723061272400032, "grad_norm": 0.0, - "learning_rate": 1.3198203306727822e-05, - "loss": 0.959, + "learning_rate": 8.15477500807869e-06, + "loss": 1.0062, "step": 14627 }, { - "epoch": 0.4145201054152852, + "epoch": 0.5723452539322326, "grad_norm": 0.0, - "learning_rate": 1.3197333707930527e-05, - "loss": 0.8732, + "learning_rate": 8.15352955230988e-06, + "loss": 1.0207, "step": 14628 }, { - "epoch": 0.41454844285754766, + "epoch": 0.572384380624462, "grad_norm": 0.0, - "learning_rate": 1.3196464082200903e-05, - "loss": 1.0775, + "learning_rate": 8.152284126193067e-06, + "loss": 0.8397, "step": 14629 }, { - "epoch": 0.41457678029981015, + "epoch": 0.5724235073166914, "grad_norm": 0.0, - "learning_rate": 1.3195594429546279e-05, - "loss": 0.9302, + "learning_rate": 8.15103872974826e-06, + "loss": 1.0481, "step": 14630 }, { - "epoch": 0.4146051177420726, + "epoch": 0.5724626340089208, "grad_norm": 0.0, - "learning_rate": 1.3194724749973971e-05, - "loss": 0.8775, + "learning_rate": 8.149793362995451e-06, + "loss": 0.8882, "step": 14631 }, { - "epoch": 0.4146334551843351, + "epoch": 0.5725017607011503, "grad_norm": 0.0, - "learning_rate": 1.3193855043491313e-05, - "loss": 0.8422, + "learning_rate": 8.148548025954644e-06, + "loss": 1.0765, "step": 14632 }, { - "epoch": 0.4146617926265975, + "epoch": 0.5725408873933797, "grad_norm": 0.0, - "learning_rate": 1.3192985310105628e-05, - "loss": 0.8469, + "learning_rate": 8.147302718645835e-06, + "loss": 1.1025, "step": 14633 }, { - "epoch": 0.41469013006885996, + "epoch": 0.5725800140856092, "grad_norm": 0.0, - "learning_rate": 1.319211554982424e-05, - "loss": 0.9095, + "learning_rate": 8.146057441089025e-06, + "loss": 0.8862, "step": 14634 }, { - "epoch": 0.41471846751112246, + "epoch": 0.5726191407778386, "grad_norm": 0.0, - "learning_rate": 1.319124576265448e-05, - "loss": 0.9782, + "learning_rate": 8.144812193304204e-06, + "loss": 1.0159, "step": 14635 }, { - "epoch": 0.4147468049533849, + "epoch": 0.5726582674700681, "grad_norm": 0.0, - "learning_rate": 1.3190375948603668e-05, - "loss": 0.8675, + "learning_rate": 8.143566975311379e-06, + "loss": 1.0131, "step": 14636 }, { - "epoch": 0.4147751423956474, + "epoch": 0.5726973941622975, "grad_norm": 0.0, - "learning_rate": 1.3189506107679139e-05, - "loss": 0.951, + "learning_rate": 8.14232178713054e-06, + "loss": 1.0263, "step": 14637 }, { - "epoch": 0.4148034798379098, + "epoch": 0.572736520854527, "grad_norm": 0.0, - "learning_rate": 1.3188636239888216e-05, - "loss": 1.0543, + "learning_rate": 8.14107662878169e-06, + "loss": 1.081, "step": 14638 }, { - "epoch": 0.41483181728017227, + "epoch": 0.5727756475467564, "grad_norm": 0.0, - "learning_rate": 1.3187766345238222e-05, - "loss": 0.9154, + "learning_rate": 8.139831500284816e-06, + "loss": 1.0895, "step": 14639 }, { - "epoch": 0.41486015472243476, + "epoch": 0.5728147742389859, "grad_norm": 0.0, - "learning_rate": 1.318689642373649e-05, - "loss": 0.9108, + "learning_rate": 8.138586401659914e-06, + "loss": 1.0316, "step": 14640 }, { - "epoch": 0.4148884921646972, + "epoch": 0.5728539009312152, "grad_norm": 0.0, - "learning_rate": 1.3186026475390345e-05, - "loss": 0.8769, + "learning_rate": 8.137341332926986e-06, + "loss": 1.1065, "step": 14641 }, { - "epoch": 0.4149168296069597, + "epoch": 0.5728930276234447, "grad_norm": 0.0, - "learning_rate": 1.318515650020712e-05, - "loss": 0.9787, + "learning_rate": 8.136096294106023e-06, + "loss": 1.0612, "step": 14642 }, { - "epoch": 0.41494516704922213, + "epoch": 0.5729321543156741, "grad_norm": 0.0, - "learning_rate": 1.3184286498194134e-05, - "loss": 0.9555, + "learning_rate": 8.134851285217017e-06, + "loss": 1.0179, "step": 14643 }, { - "epoch": 0.4149735044914846, + "epoch": 0.5729712810079036, "grad_norm": 0.0, - "learning_rate": 1.3183416469358724e-05, - "loss": 0.9551, + "learning_rate": 8.133606306279957e-06, + "loss": 1.0401, "step": 14644 }, { - "epoch": 0.41500184193374706, + "epoch": 0.573010407700133, "grad_norm": 0.0, - "learning_rate": 1.3182546413708212e-05, - "loss": 0.9263, + "learning_rate": 8.132361357314847e-06, + "loss": 0.991, "step": 14645 }, { - "epoch": 0.4150301793760095, + "epoch": 0.5730495343923625, "grad_norm": 0.0, - "learning_rate": 1.3181676331249932e-05, - "loss": 0.9146, + "learning_rate": 8.131116438341672e-06, + "loss": 1.1265, "step": 14646 }, { - "epoch": 0.415058516818272, + "epoch": 0.5730886610845919, "grad_norm": 0.0, - "learning_rate": 1.3180806221991209e-05, - "loss": 0.8249, + "learning_rate": 8.129871549380429e-06, + "loss": 0.9186, "step": 14647 }, { - "epoch": 0.41508685426053443, + "epoch": 0.5731277877768214, "grad_norm": 0.0, - "learning_rate": 1.3179936085939372e-05, - "loss": 1.0562, + "learning_rate": 8.1286266904511e-06, + "loss": 1.0639, "step": 14648 }, { - "epoch": 0.4151151917027969, + "epoch": 0.5731669144690508, "grad_norm": 0.0, - "learning_rate": 1.3179065923101759e-05, - "loss": 0.8568, + "learning_rate": 8.12738186157368e-06, + "loss": 1.0809, "step": 14649 }, { - "epoch": 0.41514352914505936, + "epoch": 0.5732060411612803, "grad_norm": 0.0, - "learning_rate": 1.3178195733485689e-05, - "loss": 0.9341, + "learning_rate": 8.126137062768165e-06, + "loss": 1.0995, "step": 14650 }, { - "epoch": 0.4151718665873218, + "epoch": 0.5732451678535097, "grad_norm": 0.0, - "learning_rate": 1.3177325517098498e-05, - "loss": 0.9143, + "learning_rate": 8.12489229405454e-06, + "loss": 1.0043, "step": 14651 }, { - "epoch": 0.4152002040295843, + "epoch": 0.5732842945457391, "grad_norm": 0.0, - "learning_rate": 1.3176455273947513e-05, - "loss": 0.8711, + "learning_rate": 8.123647555452795e-06, + "loss": 0.9574, "step": 14652 }, { - "epoch": 0.41522854147184673, + "epoch": 0.5733234212379685, "grad_norm": 0.0, - "learning_rate": 1.3175585004040066e-05, - "loss": 0.9309, + "learning_rate": 8.122402846982916e-06, + "loss": 0.9559, "step": 14653 }, { - "epoch": 0.41525687891410923, + "epoch": 0.573362547930198, "grad_norm": 0.0, - "learning_rate": 1.3174714707383485e-05, - "loss": 0.9681, + "learning_rate": 8.121158168664898e-06, + "loss": 1.0435, "step": 14654 }, { - "epoch": 0.41528521635637167, + "epoch": 0.5734016746224274, "grad_norm": 0.0, - "learning_rate": 1.3173844383985107e-05, - "loss": 0.9103, + "learning_rate": 8.119913520518726e-06, + "loss": 0.9895, "step": 14655 }, { - "epoch": 0.41531355379863416, + "epoch": 0.5734408013146569, "grad_norm": 0.0, - "learning_rate": 1.3172974033852254e-05, - "loss": 0.9442, + "learning_rate": 8.118668902564386e-06, + "loss": 0.9315, "step": 14656 }, { - "epoch": 0.4153418912408966, + "epoch": 0.5734799280068863, "grad_norm": 0.0, - "learning_rate": 1.3172103656992268e-05, - "loss": 0.9551, + "learning_rate": 8.117424314821867e-06, + "loss": 1.0412, "step": 14657 }, { - "epoch": 0.41537022868315904, + "epoch": 0.5735190546991157, "grad_norm": 0.0, - "learning_rate": 1.3171233253412475e-05, - "loss": 0.8315, + "learning_rate": 8.11617975731115e-06, + "loss": 0.9806, "step": 14658 }, { - "epoch": 0.41539856612542153, + "epoch": 0.5735581813913452, "grad_norm": 0.0, - "learning_rate": 1.3170362823120204e-05, - "loss": 0.8382, + "learning_rate": 8.11493523005223e-06, + "loss": 1.0367, "step": 14659 }, { - "epoch": 0.41542690356768397, + "epoch": 0.5735973080835746, "grad_norm": 0.0, - "learning_rate": 1.316949236612279e-05, - "loss": 0.8366, + "learning_rate": 8.113690733065087e-06, + "loss": 0.9929, "step": 14660 }, { - "epoch": 0.41545524100994646, + "epoch": 0.5736364347758041, "grad_norm": 0.0, - "learning_rate": 1.3168621882427561e-05, - "loss": 0.9436, + "learning_rate": 8.112446266369708e-06, + "loss": 0.8964, "step": 14661 }, { - "epoch": 0.4154835784522089, + "epoch": 0.5736755614680334, "grad_norm": 0.0, - "learning_rate": 1.316775137204186e-05, - "loss": 0.9875, + "learning_rate": 8.111201829986071e-06, + "loss": 0.9442, "step": 14662 }, { - "epoch": 0.41551191589447134, + "epoch": 0.5737146881602629, "grad_norm": 0.0, - "learning_rate": 1.316688083497301e-05, - "loss": 0.9606, + "learning_rate": 8.109957423934172e-06, + "loss": 1.1802, "step": 14663 }, { - "epoch": 0.41554025333673383, + "epoch": 0.5737538148524923, "grad_norm": 0.0, - "learning_rate": 1.3166010271228347e-05, - "loss": 0.8723, + "learning_rate": 8.108713048233988e-06, + "loss": 0.8762, "step": 14664 }, { - "epoch": 0.41556859077899627, + "epoch": 0.5737929415447218, "grad_norm": 0.0, - "learning_rate": 1.3165139680815202e-05, - "loss": 0.9078, + "learning_rate": 8.107468702905503e-06, + "loss": 1.0709, "step": 14665 }, { - "epoch": 0.41559692822125877, + "epoch": 0.5738320682369512, "grad_norm": 0.0, - "learning_rate": 1.3164269063740914e-05, - "loss": 0.7781, + "learning_rate": 8.106224387968696e-06, + "loss": 1.0439, "step": 14666 }, { - "epoch": 0.4156252656635212, + "epoch": 0.5738711949291807, "grad_norm": 0.0, - "learning_rate": 1.316339842001281e-05, - "loss": 0.9487, + "learning_rate": 8.104980103443555e-06, + "loss": 1.0302, "step": 14667 }, { - "epoch": 0.4156536031057837, + "epoch": 0.5739103216214101, "grad_norm": 0.0, - "learning_rate": 1.3162527749638226e-05, - "loss": 0.9253, + "learning_rate": 8.103735849350056e-06, + "loss": 1.0657, "step": 14668 }, { - "epoch": 0.41568194054804614, + "epoch": 0.5739494483136396, "grad_norm": 0.0, - "learning_rate": 1.3161657052624497e-05, - "loss": 0.9385, + "learning_rate": 8.102491625708186e-06, + "loss": 1.0994, "step": 14669 }, { - "epoch": 0.4157102779903086, + "epoch": 0.573988575005869, "grad_norm": 0.0, - "learning_rate": 1.3160786328978956e-05, - "loss": 0.93, + "learning_rate": 8.101247432537922e-06, + "loss": 0.9942, "step": 14670 }, { - "epoch": 0.41573861543257107, + "epoch": 0.5740277016980985, "grad_norm": 0.0, - "learning_rate": 1.3159915578708939e-05, - "loss": 0.8801, + "learning_rate": 8.100003269859244e-06, + "loss": 0.9049, "step": 14671 }, { - "epoch": 0.4157669528748335, + "epoch": 0.5740668283903279, "grad_norm": 0.0, - "learning_rate": 1.3159044801821779e-05, - "loss": 0.9132, + "learning_rate": 8.098759137692133e-06, + "loss": 0.885, "step": 14672 }, { - "epoch": 0.415795290317096, + "epoch": 0.5741059550825574, "grad_norm": 0.0, - "learning_rate": 1.3158173998324816e-05, - "loss": 0.9365, + "learning_rate": 8.09751503605657e-06, + "loss": 0.9363, "step": 14673 }, { - "epoch": 0.41582362775935844, + "epoch": 0.5741450817747867, "grad_norm": 0.0, - "learning_rate": 1.3157303168225373e-05, - "loss": 0.944, + "learning_rate": 8.096270964972528e-06, + "loss": 1.1305, "step": 14674 }, { - "epoch": 0.4158519652016209, + "epoch": 0.5741842084670162, "grad_norm": 0.0, - "learning_rate": 1.3156432311530797e-05, - "loss": 0.8902, + "learning_rate": 8.095026924459989e-06, + "loss": 1.0545, "step": 14675 }, { - "epoch": 0.41588030264388337, + "epoch": 0.5742233351592456, "grad_norm": 0.0, - "learning_rate": 1.3155561428248418e-05, - "loss": 0.9224, + "learning_rate": 8.093782914538933e-06, + "loss": 1.1376, "step": 14676 }, { - "epoch": 0.4159086400861458, + "epoch": 0.5742624618514751, "grad_norm": 0.0, - "learning_rate": 1.3154690518385575e-05, - "loss": 0.8667, + "learning_rate": 8.092538935229336e-06, + "loss": 1.075, "step": 14677 }, { - "epoch": 0.4159369775284083, + "epoch": 0.5743015885437045, "grad_norm": 0.0, - "learning_rate": 1.3153819581949603e-05, - "loss": 0.9237, + "learning_rate": 8.091294986551173e-06, + "loss": 0.8517, "step": 14678 }, { - "epoch": 0.41596531497067074, + "epoch": 0.574340715235934, "grad_norm": 0.0, - "learning_rate": 1.3152948618947839e-05, - "loss": 0.8977, + "learning_rate": 8.090051068524418e-06, + "loss": 0.905, "step": 14679 }, { - "epoch": 0.41599365241293323, + "epoch": 0.5743798419281634, "grad_norm": 0.0, - "learning_rate": 1.3152077629387612e-05, - "loss": 0.8994, + "learning_rate": 8.088807181169048e-06, + "loss": 0.9192, "step": 14680 }, { - "epoch": 0.4160219898551957, + "epoch": 0.5744189686203929, "grad_norm": 0.0, - "learning_rate": 1.3151206613276265e-05, - "loss": 0.9151, + "learning_rate": 8.087563324505043e-06, + "loss": 0.9177, "step": 14681 }, { - "epoch": 0.4160503272974581, + "epoch": 0.5744580953126223, "grad_norm": 0.0, - "learning_rate": 1.315033557062114e-05, - "loss": 0.9497, + "learning_rate": 8.086319498552376e-06, + "loss": 0.9863, "step": 14682 }, { - "epoch": 0.4160786647397206, + "epoch": 0.5744972220048518, "grad_norm": 0.0, - "learning_rate": 1.3149464501429568e-05, - "loss": 0.9111, + "learning_rate": 8.085075703331017e-06, + "loss": 1.0848, "step": 14683 }, { - "epoch": 0.41610700218198304, + "epoch": 0.5745363486970811, "grad_norm": 0.0, - "learning_rate": 1.3148593405708886e-05, - "loss": 0.9225, + "learning_rate": 8.083831938860941e-06, + "loss": 0.9236, "step": 14684 }, { - "epoch": 0.41613533962424554, + "epoch": 0.5745754753893106, "grad_norm": 0.0, - "learning_rate": 1.3147722283466428e-05, - "loss": 0.9906, + "learning_rate": 8.082588205162127e-06, + "loss": 0.994, "step": 14685 }, { - "epoch": 0.416163677066508, + "epoch": 0.57461460208154, "grad_norm": 0.0, - "learning_rate": 1.3146851134709542e-05, - "loss": 0.8315, + "learning_rate": 8.081344502254543e-06, + "loss": 0.9886, "step": 14686 }, { - "epoch": 0.4161920145087704, + "epoch": 0.5746537287737694, "grad_norm": 0.0, - "learning_rate": 1.3145979959445556e-05, - "loss": 0.9189, + "learning_rate": 8.080100830158163e-06, + "loss": 1.0237, "step": 14687 }, { - "epoch": 0.4162203519510329, + "epoch": 0.5746928554659989, "grad_norm": 0.0, - "learning_rate": 1.3145108757681818e-05, - "loss": 0.9939, + "learning_rate": 8.078857188892957e-06, + "loss": 0.923, "step": 14688 }, { - "epoch": 0.41624868939329535, + "epoch": 0.5747319821582283, "grad_norm": 0.0, - "learning_rate": 1.3144237529425655e-05, - "loss": 0.9575, + "learning_rate": 8.077613578478894e-06, + "loss": 1.0873, "step": 14689 }, { - "epoch": 0.41627702683555784, + "epoch": 0.5747711088504578, "grad_norm": 0.0, - "learning_rate": 1.3143366274684415e-05, - "loss": 0.922, + "learning_rate": 8.076369998935951e-06, + "loss": 1.0358, "step": 14690 }, { - "epoch": 0.4163053642778203, + "epoch": 0.5748102355426872, "grad_norm": 0.0, - "learning_rate": 1.3142494993465435e-05, - "loss": 0.9132, + "learning_rate": 8.075126450284095e-06, + "loss": 1.0073, "step": 14691 }, { - "epoch": 0.41633370172008277, + "epoch": 0.5748493622349167, "grad_norm": 0.0, - "learning_rate": 1.314162368577605e-05, - "loss": 0.8454, + "learning_rate": 8.073882932543298e-06, + "loss": 1.0633, "step": 14692 }, { - "epoch": 0.4163620391623452, + "epoch": 0.5748884889271461, "grad_norm": 0.0, - "learning_rate": 1.3140752351623602e-05, - "loss": 0.8733, + "learning_rate": 8.07263944573352e-06, + "loss": 1.0352, "step": 14693 }, { - "epoch": 0.41639037660460765, + "epoch": 0.5749276156193756, "grad_norm": 0.0, - "learning_rate": 1.3139880991015432e-05, - "loss": 0.8475, + "learning_rate": 8.071395989874747e-06, + "loss": 1.0515, "step": 14694 }, { - "epoch": 0.41641871404687014, + "epoch": 0.5749667423116049, "grad_norm": 0.0, - "learning_rate": 1.313900960395888e-05, - "loss": 0.9833, + "learning_rate": 8.070152564986934e-06, + "loss": 1.0595, "step": 14695 }, { - "epoch": 0.4164470514891326, + "epoch": 0.5750058690038344, "grad_norm": 0.0, - "learning_rate": 1.313813819046128e-05, - "loss": 0.9968, + "learning_rate": 8.068909171090053e-06, + "loss": 0.852, "step": 14696 }, { - "epoch": 0.4164753889313951, + "epoch": 0.5750449956960638, "grad_norm": 0.0, - "learning_rate": 1.313726675052998e-05, - "loss": 0.8618, + "learning_rate": 8.067665808204071e-06, + "loss": 1.0461, "step": 14697 }, { - "epoch": 0.4165037263736575, + "epoch": 0.5750841223882933, "grad_norm": 0.0, - "learning_rate": 1.3136395284172317e-05, - "loss": 0.9378, + "learning_rate": 8.06642247634895e-06, + "loss": 1.0513, "step": 14698 }, { - "epoch": 0.41653206381591995, + "epoch": 0.5751232490805227, "grad_norm": 0.0, - "learning_rate": 1.3135523791395632e-05, - "loss": 0.9222, + "learning_rate": 8.065179175544666e-06, + "loss": 1.0874, "step": 14699 }, { - "epoch": 0.41656040125818244, + "epoch": 0.5751623757727522, "grad_norm": 0.0, - "learning_rate": 1.3134652272207265e-05, - "loss": 0.9491, + "learning_rate": 8.06393590581118e-06, + "loss": 1.0842, "step": 14700 }, { - "epoch": 0.4165887387004449, + "epoch": 0.5752015024649816, "grad_norm": 0.0, - "learning_rate": 1.3133780726614556e-05, - "loss": 0.8164, + "learning_rate": 8.062692667168458e-06, + "loss": 0.9963, "step": 14701 }, { - "epoch": 0.4166170761427074, + "epoch": 0.5752406291572111, "grad_norm": 0.0, - "learning_rate": 1.313290915462485e-05, - "loss": 0.9477, + "learning_rate": 8.06144945963646e-06, + "loss": 1.0443, "step": 14702 }, { - "epoch": 0.4166454135849698, + "epoch": 0.5752797558494405, "grad_norm": 0.0, - "learning_rate": 1.313203755624549e-05, - "loss": 0.8708, + "learning_rate": 8.060206283235159e-06, + "loss": 1.02, "step": 14703 }, { - "epoch": 0.4166737510272323, + "epoch": 0.57531888254167, "grad_norm": 0.0, - "learning_rate": 1.313116593148381e-05, - "loss": 0.9983, + "learning_rate": 8.058963137984512e-06, + "loss": 1.1952, "step": 14704 }, { - "epoch": 0.41670208846949475, + "epoch": 0.5753580092338993, "grad_norm": 0.0, - "learning_rate": 1.3130294280347157e-05, - "loss": 0.924, + "learning_rate": 8.057720023904487e-06, + "loss": 1.0076, "step": 14705 }, { - "epoch": 0.4167304259117572, + "epoch": 0.5753971359261288, "grad_norm": 0.0, - "learning_rate": 1.3129422602842876e-05, - "loss": 0.8177, + "learning_rate": 8.056476941015043e-06, + "loss": 1.2063, "step": 14706 }, { - "epoch": 0.4167587633540197, + "epoch": 0.5754362626183582, "grad_norm": 0.0, - "learning_rate": 1.3128550898978303e-05, - "loss": 0.9699, + "learning_rate": 8.055233889336142e-06, + "loss": 1.0664, "step": 14707 }, { - "epoch": 0.4167871007962821, + "epoch": 0.5754753893105877, "grad_norm": 0.0, - "learning_rate": 1.3127679168760785e-05, - "loss": 0.9586, + "learning_rate": 8.05399086888775e-06, + "loss": 0.9406, "step": 14708 }, { - "epoch": 0.4168154382385446, + "epoch": 0.5755145160028171, "grad_norm": 0.0, - "learning_rate": 1.3126807412197666e-05, - "loss": 0.9514, + "learning_rate": 8.052747879689827e-06, + "loss": 1.0655, "step": 14709 }, { - "epoch": 0.41684377568080705, + "epoch": 0.5755536426950466, "grad_norm": 0.0, - "learning_rate": 1.3125935629296284e-05, - "loss": 0.9214, + "learning_rate": 8.051504921762332e-06, + "loss": 0.9167, "step": 14710 }, { - "epoch": 0.4168721131230695, + "epoch": 0.575592769387276, "grad_norm": 0.0, - "learning_rate": 1.3125063820063989e-05, - "loss": 0.9026, + "learning_rate": 8.050261995125226e-06, + "loss": 1.1458, "step": 14711 }, { - "epoch": 0.416900450565332, + "epoch": 0.5756318960795055, "grad_norm": 0.0, - "learning_rate": 1.3124191984508118e-05, - "loss": 0.9022, + "learning_rate": 8.04901909979847e-06, + "loss": 1.1512, "step": 14712 }, { - "epoch": 0.4169287880075944, + "epoch": 0.5756710227717349, "grad_norm": 0.0, - "learning_rate": 1.3123320122636019e-05, - "loss": 0.9966, + "learning_rate": 8.047776235802021e-06, + "loss": 1.0797, "step": 14713 }, { - "epoch": 0.4169571254498569, + "epoch": 0.5757101494639644, "grad_norm": 0.0, - "learning_rate": 1.3122448234455031e-05, - "loss": 0.9313, + "learning_rate": 8.046533403155841e-06, + "loss": 1.0018, "step": 14714 }, { - "epoch": 0.41698546289211935, + "epoch": 0.5757492761561938, "grad_norm": 0.0, - "learning_rate": 1.3121576319972507e-05, - "loss": 0.9514, + "learning_rate": 8.045290601879888e-06, + "loss": 0.9493, "step": 14715 }, { - "epoch": 0.41701380033438185, + "epoch": 0.5757884028484231, "grad_norm": 0.0, - "learning_rate": 1.3120704379195785e-05, - "loss": 0.8656, + "learning_rate": 8.044047831994114e-06, + "loss": 1.0704, "step": 14716 }, { - "epoch": 0.4170421377766443, + "epoch": 0.5758275295406526, "grad_norm": 0.0, - "learning_rate": 1.3119832412132212e-05, - "loss": 0.8713, + "learning_rate": 8.042805093518484e-06, + "loss": 1.1395, "step": 14717 }, { - "epoch": 0.4170704752189067, + "epoch": 0.575866656232882, "grad_norm": 0.0, - "learning_rate": 1.311896041878913e-05, - "loss": 0.8707, + "learning_rate": 8.041562386472953e-06, + "loss": 0.9831, "step": 14718 }, { - "epoch": 0.4170988126611692, + "epoch": 0.5759057829251115, "grad_norm": 0.0, - "learning_rate": 1.3118088399173888e-05, - "loss": 1.0044, + "learning_rate": 8.040319710877473e-06, + "loss": 1.0236, "step": 14719 }, { - "epoch": 0.41712715010343165, + "epoch": 0.5759449096173409, "grad_norm": 0.0, - "learning_rate": 1.311721635329383e-05, - "loss": 0.9565, + "learning_rate": 8.039077066752003e-06, + "loss": 1.0986, "step": 14720 }, { - "epoch": 0.41715548754569415, + "epoch": 0.5759840363095704, "grad_norm": 0.0, - "learning_rate": 1.3116344281156299e-05, - "loss": 0.9215, + "learning_rate": 8.0378344541165e-06, + "loss": 1.0848, "step": 14721 }, { - "epoch": 0.4171838249879566, + "epoch": 0.5760231630017998, "grad_norm": 0.0, - "learning_rate": 1.3115472182768643e-05, - "loss": 0.8991, + "learning_rate": 8.036591872990918e-06, + "loss": 0.9984, "step": 14722 }, { - "epoch": 0.417212162430219, + "epoch": 0.5760622896940293, "grad_norm": 0.0, - "learning_rate": 1.311460005813821e-05, - "loss": 0.8891, + "learning_rate": 8.035349323395209e-06, + "loss": 1.0071, "step": 14723 }, { - "epoch": 0.4172404998724815, + "epoch": 0.5761014163862587, "grad_norm": 0.0, - "learning_rate": 1.3113727907272341e-05, - "loss": 0.9422, + "learning_rate": 8.034106805349324e-06, + "loss": 1.0004, "step": 14724 }, { - "epoch": 0.41726883731474396, + "epoch": 0.5761405430784882, "grad_norm": 0.0, - "learning_rate": 1.3112855730178389e-05, - "loss": 0.9919, + "learning_rate": 8.032864318873224e-06, + "loss": 0.8923, "step": 14725 }, { - "epoch": 0.41729717475700645, + "epoch": 0.5761796697707176, "grad_norm": 0.0, - "learning_rate": 1.3111983526863696e-05, - "loss": 0.9757, + "learning_rate": 8.031621863986857e-06, + "loss": 1.0256, "step": 14726 }, { - "epoch": 0.4173255121992689, + "epoch": 0.576218796462947, "grad_norm": 0.0, - "learning_rate": 1.3111111297335608e-05, - "loss": 0.9324, + "learning_rate": 8.03037944071018e-06, + "loss": 1.1369, "step": 14727 }, { - "epoch": 0.4173538496415314, + "epoch": 0.5762579231551764, "grad_norm": 0.0, - "learning_rate": 1.3110239041601478e-05, - "loss": 0.9849, + "learning_rate": 8.029137049063139e-06, + "loss": 1.0672, "step": 14728 }, { - "epoch": 0.4173821870837938, + "epoch": 0.5762970498474059, "grad_norm": 0.0, - "learning_rate": 1.3109366759668647e-05, - "loss": 0.9619, + "learning_rate": 8.027894689065684e-06, + "loss": 1.0034, "step": 14729 }, { - "epoch": 0.41741052452605626, + "epoch": 0.5763361765396353, "grad_norm": 0.0, - "learning_rate": 1.3108494451544464e-05, - "loss": 0.8814, + "learning_rate": 8.02665236073777e-06, + "loss": 1.0831, "step": 14730 }, { - "epoch": 0.41743886196831875, + "epoch": 0.5763753032318648, "grad_norm": 0.0, - "learning_rate": 1.310762211723628e-05, - "loss": 0.8766, + "learning_rate": 8.02541006409935e-06, + "loss": 0.9118, "step": 14731 }, { - "epoch": 0.4174671994105812, + "epoch": 0.5764144299240942, "grad_norm": 0.0, - "learning_rate": 1.3106749756751443e-05, - "loss": 0.7656, + "learning_rate": 8.02416779917037e-06, + "loss": 1.0627, "step": 14732 }, { - "epoch": 0.4174955368528437, + "epoch": 0.5764535566163237, "grad_norm": 0.0, - "learning_rate": 1.3105877370097295e-05, - "loss": 0.8741, + "learning_rate": 8.022925565970774e-06, + "loss": 1.117, "step": 14733 }, { - "epoch": 0.4175238742951061, + "epoch": 0.5764926833085531, "grad_norm": 0.0, - "learning_rate": 1.3105004957281189e-05, - "loss": 0.8655, + "learning_rate": 8.021683364520519e-06, + "loss": 1.1006, "step": 14734 }, { - "epoch": 0.41755221173736856, + "epoch": 0.5765318100007826, "grad_norm": 0.0, - "learning_rate": 1.3104132518310477e-05, - "loss": 0.9426, + "learning_rate": 8.020441194839552e-06, + "loss": 0.9209, "step": 14735 }, { - "epoch": 0.41758054917963106, + "epoch": 0.576570936693012, "grad_norm": 0.0, - "learning_rate": 1.3103260053192501e-05, - "loss": 0.9067, + "learning_rate": 8.019199056947819e-06, + "loss": 0.9649, "step": 14736 }, { - "epoch": 0.4176088866218935, + "epoch": 0.5766100633852415, "grad_norm": 0.0, - "learning_rate": 1.3102387561934615e-05, - "loss": 0.8624, + "learning_rate": 8.017956950865266e-06, + "loss": 1.0986, "step": 14737 }, { - "epoch": 0.417637224064156, + "epoch": 0.5766491900774708, "grad_norm": 0.0, - "learning_rate": 1.3101515044544164e-05, - "loss": 0.9032, + "learning_rate": 8.016714876611838e-06, + "loss": 1.1412, "step": 14738 }, { - "epoch": 0.4176655615064184, + "epoch": 0.5766883167697003, "grad_norm": 0.0, - "learning_rate": 1.3100642501028502e-05, - "loss": 0.9307, + "learning_rate": 8.015472834207486e-06, + "loss": 0.945, "step": 14739 }, { - "epoch": 0.4176938989486809, + "epoch": 0.5767274434619297, "grad_norm": 0.0, - "learning_rate": 1.3099769931394977e-05, - "loss": 0.8508, + "learning_rate": 8.014230823672154e-06, + "loss": 1.1036, "step": 14740 }, { - "epoch": 0.41772223639094336, + "epoch": 0.5767665701541592, "grad_norm": 0.0, - "learning_rate": 1.3098897335650938e-05, - "loss": 0.8581, + "learning_rate": 8.012988845025787e-06, + "loss": 1.1095, "step": 14741 }, { - "epoch": 0.4177505738332058, + "epoch": 0.5768056968463886, "grad_norm": 0.0, - "learning_rate": 1.3098024713803736e-05, - "loss": 0.8662, + "learning_rate": 8.011746898288326e-06, + "loss": 0.9283, "step": 14742 }, { - "epoch": 0.4177789112754683, + "epoch": 0.576844823538618, "grad_norm": 0.0, - "learning_rate": 1.309715206586072e-05, - "loss": 1.0246, + "learning_rate": 8.010504983479723e-06, + "loss": 1.0, "step": 14743 }, { - "epoch": 0.41780724871773073, + "epoch": 0.5768839502308475, "grad_norm": 0.0, - "learning_rate": 1.3096279391829245e-05, - "loss": 0.9074, + "learning_rate": 8.009263100619915e-06, + "loss": 1.0207, "step": 14744 }, { - "epoch": 0.4178355861599932, + "epoch": 0.5769230769230769, "grad_norm": 0.0, - "learning_rate": 1.3095406691716657e-05, - "loss": 0.8242, + "learning_rate": 8.008021249728848e-06, + "loss": 1.1321, "step": 14745 }, { - "epoch": 0.41786392360225566, + "epoch": 0.5769622036153064, "grad_norm": 0.0, - "learning_rate": 1.3094533965530309e-05, - "loss": 0.9795, + "learning_rate": 8.006779430826463e-06, + "loss": 0.9825, "step": 14746 }, { - "epoch": 0.4178922610445181, + "epoch": 0.5770013303075358, "grad_norm": 0.0, - "learning_rate": 1.3093661213277553e-05, - "loss": 1.0282, + "learning_rate": 8.0055376439327e-06, + "loss": 1.0848, "step": 14747 }, { - "epoch": 0.4179205984867806, + "epoch": 0.5770404569997652, "grad_norm": 0.0, - "learning_rate": 1.309278843496574e-05, - "loss": 1.0496, + "learning_rate": 8.004295889067506e-06, + "loss": 1.1436, "step": 14748 }, { - "epoch": 0.41794893592904303, + "epoch": 0.5770795836919946, "grad_norm": 0.0, - "learning_rate": 1.3091915630602223e-05, - "loss": 0.9019, + "learning_rate": 8.00305416625082e-06, + "loss": 0.9608, "step": 14749 }, { - "epoch": 0.4179772733713055, + "epoch": 0.5771187103842241, "grad_norm": 0.0, - "learning_rate": 1.3091042800194352e-05, - "loss": 0.8275, + "learning_rate": 8.001812475502582e-06, + "loss": 0.9341, "step": 14750 }, { - "epoch": 0.41800561081356796, + "epoch": 0.5771578370764535, "grad_norm": 0.0, - "learning_rate": 1.3090169943749475e-05, - "loss": 0.9051, + "learning_rate": 8.000570816842728e-06, + "loss": 0.9626, "step": 14751 }, { - "epoch": 0.41803394825583046, + "epoch": 0.577196963768683, "grad_norm": 0.0, - "learning_rate": 1.3089297061274953e-05, - "loss": 1.0233, + "learning_rate": 7.999329190291202e-06, + "loss": 1.0557, "step": 14752 }, { - "epoch": 0.4180622856980929, + "epoch": 0.5772360904609124, "grad_norm": 0.0, - "learning_rate": 1.3088424152778134e-05, - "loss": 0.8324, + "learning_rate": 7.998087595867946e-06, + "loss": 1.0567, "step": 14753 }, { - "epoch": 0.41809062314035533, + "epoch": 0.5772752171531419, "grad_norm": 0.0, - "learning_rate": 1.3087551218266373e-05, - "loss": 0.8015, + "learning_rate": 7.996846033592893e-06, + "loss": 1.0065, "step": 14754 }, { - "epoch": 0.4181189605826178, + "epoch": 0.5773143438453713, "grad_norm": 0.0, - "learning_rate": 1.308667825774702e-05, - "loss": 0.965, + "learning_rate": 7.995604503485984e-06, + "loss": 1.1538, "step": 14755 }, { - "epoch": 0.41814729802488027, + "epoch": 0.5773534705376008, "grad_norm": 0.0, - "learning_rate": 1.308580527122743e-05, - "loss": 0.8531, + "learning_rate": 7.994363005567153e-06, + "loss": 0.9249, "step": 14756 }, { - "epoch": 0.41817563546714276, + "epoch": 0.5773925972298302, "grad_norm": 0.0, - "learning_rate": 1.3084932258714955e-05, - "loss": 0.9893, + "learning_rate": 7.993121539856336e-06, + "loss": 1.0037, "step": 14757 }, { - "epoch": 0.4182039729094052, + "epoch": 0.5774317239220597, "grad_norm": 0.0, - "learning_rate": 1.3084059220216952e-05, - "loss": 0.8882, + "learning_rate": 7.991880106373478e-06, + "loss": 1.2276, "step": 14758 }, { - "epoch": 0.41823231035166764, + "epoch": 0.577470850614289, "grad_norm": 0.0, - "learning_rate": 1.308318615574077e-05, - "loss": 0.8542, + "learning_rate": 7.990638705138506e-06, + "loss": 1.0343, "step": 14759 }, { - "epoch": 0.41826064779393013, + "epoch": 0.5775099773065185, "grad_norm": 0.0, - "learning_rate": 1.3082313065293767e-05, - "loss": 0.9144, + "learning_rate": 7.989397336171358e-06, + "loss": 0.9831, "step": 14760 }, { - "epoch": 0.41828898523619257, + "epoch": 0.5775491039987479, "grad_norm": 0.0, - "learning_rate": 1.3081439948883299e-05, - "loss": 0.9117, + "learning_rate": 7.988155999491972e-06, + "loss": 0.9455, "step": 14761 }, { - "epoch": 0.41831732267845506, + "epoch": 0.5775882306909774, "grad_norm": 0.0, - "learning_rate": 1.3080566806516719e-05, - "loss": 0.9206, + "learning_rate": 7.98691469512028e-06, + "loss": 1.0512, "step": 14762 }, { - "epoch": 0.4183456601207175, + "epoch": 0.5776273573832068, "grad_norm": 0.0, - "learning_rate": 1.3079693638201379e-05, - "loss": 1.0374, + "learning_rate": 7.985673423076213e-06, + "loss": 1.0592, "step": 14763 }, { - "epoch": 0.41837399756297994, + "epoch": 0.5776664840754363, "grad_norm": 0.0, - "learning_rate": 1.3078820443944635e-05, - "loss": 0.8311, + "learning_rate": 7.984432183379706e-06, + "loss": 1.0486, "step": 14764 }, { - "epoch": 0.41840233500524243, + "epoch": 0.5777056107676657, "grad_norm": 0.0, - "learning_rate": 1.3077947223753842e-05, - "loss": 0.8251, + "learning_rate": 7.983190976050694e-06, + "loss": 0.9075, "step": 14765 }, { - "epoch": 0.41843067244750487, + "epoch": 0.5777447374598952, "grad_norm": 0.0, - "learning_rate": 1.307707397763636e-05, - "loss": 0.8556, + "learning_rate": 7.981949801109107e-06, + "loss": 1.0377, "step": 14766 }, { - "epoch": 0.41845900988976736, + "epoch": 0.5777838641521246, "grad_norm": 0.0, - "learning_rate": 1.307620070559954e-05, - "loss": 0.9569, + "learning_rate": 7.98070865857488e-06, + "loss": 1.0094, "step": 14767 }, { - "epoch": 0.4184873473320298, + "epoch": 0.5778229908443541, "grad_norm": 0.0, - "learning_rate": 1.3075327407650736e-05, - "loss": 0.9505, + "learning_rate": 7.97946754846794e-06, + "loss": 0.9344, "step": 14768 }, { - "epoch": 0.4185156847742923, + "epoch": 0.5778621175365835, "grad_norm": 0.0, - "learning_rate": 1.307445408379731e-05, - "loss": 0.8743, + "learning_rate": 7.978226470808217e-06, + "loss": 0.9687, "step": 14769 }, { - "epoch": 0.41854402221655473, + "epoch": 0.577901244228813, "grad_norm": 0.0, - "learning_rate": 1.3073580734046618e-05, - "loss": 0.9876, + "learning_rate": 7.976985425615647e-06, + "loss": 0.8944, "step": 14770 }, { - "epoch": 0.4185723596588172, + "epoch": 0.5779403709210423, "grad_norm": 0.0, - "learning_rate": 1.307270735840601e-05, - "loss": 0.8493, + "learning_rate": 7.975744412910155e-06, + "loss": 1.0373, "step": 14771 }, { - "epoch": 0.41860069710107967, + "epoch": 0.5779794976132717, "grad_norm": 0.0, - "learning_rate": 1.3071833956882847e-05, - "loss": 0.956, + "learning_rate": 7.974503432711671e-06, + "loss": 1.0491, "step": 14772 }, { - "epoch": 0.4186290345433421, + "epoch": 0.5780186243055012, "grad_norm": 0.0, - "learning_rate": 1.307096052948449e-05, - "loss": 0.9423, + "learning_rate": 7.973262485040125e-06, + "loss": 1.0297, "step": 14773 }, { - "epoch": 0.4186573719856046, + "epoch": 0.5780577509977306, "grad_norm": 0.0, - "learning_rate": 1.3070087076218288e-05, - "loss": 0.9222, + "learning_rate": 7.972021569915437e-06, + "loss": 1.039, "step": 14774 }, { - "epoch": 0.41868570942786704, + "epoch": 0.5780968776899601, "grad_norm": 0.0, - "learning_rate": 1.3069213597091603e-05, - "loss": 0.9198, + "learning_rate": 7.970780687357549e-06, + "loss": 1.072, "step": 14775 }, { - "epoch": 0.4187140468701295, + "epoch": 0.5781360043821895, "grad_norm": 0.0, - "learning_rate": 1.3068340092111793e-05, - "loss": 1.0395, + "learning_rate": 7.96953983738638e-06, + "loss": 1.0848, "step": 14776 }, { - "epoch": 0.41874238431239197, + "epoch": 0.578175131074419, "grad_norm": 0.0, - "learning_rate": 1.3067466561286217e-05, - "loss": 0.9049, + "learning_rate": 7.968299020021855e-06, + "loss": 0.9927, "step": 14777 }, { - "epoch": 0.4187707217546544, + "epoch": 0.5782142577666484, "grad_norm": 0.0, - "learning_rate": 1.3066593004622225e-05, - "loss": 0.8577, + "learning_rate": 7.967058235283898e-06, + "loss": 1.0194, "step": 14778 }, { - "epoch": 0.4187990591969169, + "epoch": 0.5782533844588779, "grad_norm": 0.0, - "learning_rate": 1.3065719422127188e-05, - "loss": 0.918, + "learning_rate": 7.965817483192444e-06, + "loss": 1.0001, "step": 14779 }, { - "epoch": 0.41882739663917934, + "epoch": 0.5782925111511072, "grad_norm": 0.0, - "learning_rate": 1.3064845813808455e-05, - "loss": 0.8457, + "learning_rate": 7.96457676376741e-06, + "loss": 1.0118, "step": 14780 }, { - "epoch": 0.41885573408144183, + "epoch": 0.5783316378433367, "grad_norm": 0.0, - "learning_rate": 1.3063972179673388e-05, - "loss": 0.945, + "learning_rate": 7.963336077028725e-06, + "loss": 1.0305, "step": 14781 }, { - "epoch": 0.41888407152370427, + "epoch": 0.5783707645355661, "grad_norm": 0.0, - "learning_rate": 1.3063098519729347e-05, - "loss": 0.9124, + "learning_rate": 7.962095422996305e-06, + "loss": 1.0829, "step": 14782 }, { - "epoch": 0.4189124089659667, + "epoch": 0.5784098912277956, "grad_norm": 0.0, - "learning_rate": 1.3062224833983687e-05, - "loss": 0.8836, + "learning_rate": 7.960854801690084e-06, + "loss": 0.9945, "step": 14783 }, { - "epoch": 0.4189407464082292, + "epoch": 0.578449017920025, "grad_norm": 0.0, - "learning_rate": 1.3061351122443774e-05, - "loss": 1.0758, + "learning_rate": 7.959614213129979e-06, + "loss": 1.0077, "step": 14784 }, { - "epoch": 0.41896908385049164, + "epoch": 0.5784881446122545, "grad_norm": 0.0, - "learning_rate": 1.3060477385116958e-05, - "loss": 0.9099, + "learning_rate": 7.958373657335913e-06, + "loss": 1.0385, "step": 14785 }, { - "epoch": 0.41899742129275414, + "epoch": 0.5785272713044839, "grad_norm": 0.0, - "learning_rate": 1.3059603622010607e-05, - "loss": 0.8922, + "learning_rate": 7.957133134327808e-06, + "loss": 0.8807, "step": 14786 }, { - "epoch": 0.4190257587350166, + "epoch": 0.5785663979967134, "grad_norm": 0.0, - "learning_rate": 1.305872983313208e-05, - "loss": 0.9253, + "learning_rate": 7.95589264412558e-06, + "loss": 1.0089, "step": 14787 }, { - "epoch": 0.419054096177279, + "epoch": 0.5786055246889428, "grad_norm": 0.0, - "learning_rate": 1.3057856018488734e-05, - "loss": 0.8229, + "learning_rate": 7.954652186749161e-06, + "loss": 1.0164, "step": 14788 }, { - "epoch": 0.4190824336195415, + "epoch": 0.5786446513811723, "grad_norm": 0.0, - "learning_rate": 1.3056982178087933e-05, - "loss": 0.9007, + "learning_rate": 7.953411762218463e-06, + "loss": 0.9388, "step": 14789 }, { - "epoch": 0.41911077106180394, + "epoch": 0.5786837780734017, "grad_norm": 0.0, - "learning_rate": 1.3056108311937034e-05, - "loss": 0.8729, + "learning_rate": 7.952171370553408e-06, + "loss": 1.0468, "step": 14790 }, { - "epoch": 0.41913910850406644, + "epoch": 0.5787229047656312, "grad_norm": 0.0, - "learning_rate": 1.3055234420043401e-05, - "loss": 0.8894, + "learning_rate": 7.95093101177391e-06, + "loss": 1.1084, "step": 14791 }, { - "epoch": 0.4191674459463289, + "epoch": 0.5787620314578605, "grad_norm": 0.0, - "learning_rate": 1.3054360502414393e-05, - "loss": 0.9233, + "learning_rate": 7.949690685899898e-06, + "loss": 1.1609, "step": 14792 }, { - "epoch": 0.41919578338859137, + "epoch": 0.57880115815009, "grad_norm": 0.0, - "learning_rate": 1.3053486559057373e-05, - "loss": 0.9066, + "learning_rate": 7.948450392951283e-06, + "loss": 0.919, "step": 14793 }, { - "epoch": 0.4192241208308538, + "epoch": 0.5788402848423194, "grad_norm": 0.0, - "learning_rate": 1.3052612589979704e-05, - "loss": 0.9067, + "learning_rate": 7.947210132947984e-06, + "loss": 1.0941, "step": 14794 }, { - "epoch": 0.41925245827311625, + "epoch": 0.5788794115345489, "grad_norm": 0.0, - "learning_rate": 1.3051738595188744e-05, - "loss": 0.9475, + "learning_rate": 7.94596990590992e-06, + "loss": 0.9974, "step": 14795 }, { - "epoch": 0.41928079571537874, + "epoch": 0.5789185382267783, "grad_norm": 0.0, - "learning_rate": 1.3050864574691857e-05, - "loss": 0.8803, + "learning_rate": 7.944729711856999e-06, + "loss": 1.1338, "step": 14796 }, { - "epoch": 0.4193091331576412, + "epoch": 0.5789576649190078, "grad_norm": 0.0, - "learning_rate": 1.3049990528496404e-05, - "loss": 0.9828, + "learning_rate": 7.943489550809148e-06, + "loss": 1.0983, "step": 14797 }, { - "epoch": 0.41933747059990367, + "epoch": 0.5789967916112372, "grad_norm": 0.0, - "learning_rate": 1.3049116456609745e-05, - "loss": 0.938, + "learning_rate": 7.94224942278628e-06, + "loss": 0.9953, "step": 14798 }, { - "epoch": 0.4193658080421661, + "epoch": 0.5790359183034667, "grad_norm": 0.0, - "learning_rate": 1.304824235903925e-05, - "loss": 0.9704, + "learning_rate": 7.941009327808305e-06, + "loss": 1.0745, "step": 14799 }, { - "epoch": 0.41939414548442855, + "epoch": 0.5790750449956961, "grad_norm": 0.0, - "learning_rate": 1.3047368235792277e-05, - "loss": 0.9405, + "learning_rate": 7.939769265895138e-06, + "loss": 1.024, "step": 14800 }, { - "epoch": 0.41942248292669104, + "epoch": 0.5791141716879254, "grad_norm": 0.0, - "learning_rate": 1.304649408687619e-05, - "loss": 0.9483, + "learning_rate": 7.938529237066698e-06, + "loss": 0.9947, "step": 14801 }, { - "epoch": 0.4194508203689535, + "epoch": 0.579153298380155, "grad_norm": 0.0, - "learning_rate": 1.304561991229835e-05, - "loss": 0.9824, + "learning_rate": 7.937289241342893e-06, + "loss": 0.933, "step": 14802 }, { - "epoch": 0.419479157811216, + "epoch": 0.5791924250723843, "grad_norm": 0.0, - "learning_rate": 1.3044745712066125e-05, - "loss": 1.0047, + "learning_rate": 7.936049278743641e-06, + "loss": 0.9707, "step": 14803 }, { - "epoch": 0.4195074952534784, + "epoch": 0.5792315517646138, "grad_norm": 0.0, - "learning_rate": 1.3043871486186874e-05, - "loss": 0.9479, + "learning_rate": 7.93480934928885e-06, + "loss": 0.9493, "step": 14804 }, { - "epoch": 0.4195358326957409, + "epoch": 0.5792706784568432, "grad_norm": 0.0, - "learning_rate": 1.3042997234667963e-05, - "loss": 0.9497, + "learning_rate": 7.933569452998433e-06, + "loss": 1.1125, "step": 14805 }, { - "epoch": 0.41956417013800335, + "epoch": 0.5793098051490727, "grad_norm": 0.0, - "learning_rate": 1.3042122957516759e-05, - "loss": 0.9018, + "learning_rate": 7.932329589892303e-06, + "loss": 0.8813, "step": 14806 }, { - "epoch": 0.4195925075802658, + "epoch": 0.5793489318413021, "grad_norm": 0.0, - "learning_rate": 1.3041248654740621e-05, - "loss": 0.9792, + "learning_rate": 7.931089759990367e-06, + "loss": 1.1058, "step": 14807 }, { - "epoch": 0.4196208450225283, + "epoch": 0.5793880585335316, "grad_norm": 0.0, - "learning_rate": 1.304037432634692e-05, - "loss": 0.8547, + "learning_rate": 7.929849963312536e-06, + "loss": 0.9962, "step": 14808 }, { - "epoch": 0.4196491824647907, + "epoch": 0.579427185225761, "grad_norm": 0.0, - "learning_rate": 1.3039499972343013e-05, - "loss": 0.7577, + "learning_rate": 7.92861019987872e-06, + "loss": 0.9979, "step": 14809 }, { - "epoch": 0.4196775199070532, + "epoch": 0.5794663119179905, "grad_norm": 0.0, - "learning_rate": 1.3038625592736271e-05, - "loss": 0.8856, + "learning_rate": 7.92737046970883e-06, + "loss": 1.052, "step": 14810 }, { - "epoch": 0.41970585734931565, + "epoch": 0.5795054386102199, "grad_norm": 0.0, - "learning_rate": 1.3037751187534055e-05, - "loss": 0.8813, + "learning_rate": 7.926130772822775e-06, + "loss": 0.8022, "step": 14811 }, { - "epoch": 0.4197341947915781, + "epoch": 0.5795445653024494, "grad_norm": 0.0, - "learning_rate": 1.3036876756743734e-05, - "loss": 0.8101, + "learning_rate": 7.92489110924046e-06, + "loss": 1.0893, "step": 14812 }, { - "epoch": 0.4197625322338406, + "epoch": 0.5795836919946787, "grad_norm": 0.0, - "learning_rate": 1.3036002300372675e-05, - "loss": 0.8688, + "learning_rate": 7.923651478981793e-06, + "loss": 0.8496, "step": 14813 }, { - "epoch": 0.419790869676103, + "epoch": 0.5796228186869082, "grad_norm": 0.0, - "learning_rate": 1.3035127818428239e-05, - "loss": 0.9768, + "learning_rate": 7.922411882066678e-06, + "loss": 1.141, "step": 14814 }, { - "epoch": 0.4198192071183655, + "epoch": 0.5796619453791376, "grad_norm": 0.0, - "learning_rate": 1.3034253310917795e-05, - "loss": 0.8828, + "learning_rate": 7.921172318515028e-06, + "loss": 0.8797, "step": 14815 }, { - "epoch": 0.41984754456062795, + "epoch": 0.5797010720713671, "grad_norm": 0.0, - "learning_rate": 1.3033378777848708e-05, - "loss": 0.9519, + "learning_rate": 7.919932788346748e-06, + "loss": 0.9657, "step": 14816 }, { - "epoch": 0.41987588200289044, + "epoch": 0.5797401987635965, "grad_norm": 0.0, - "learning_rate": 1.3032504219228344e-05, - "loss": 0.9152, + "learning_rate": 7.91869329158174e-06, + "loss": 1.1117, "step": 14817 }, { - "epoch": 0.4199042194451529, + "epoch": 0.579779325455826, "grad_norm": 0.0, - "learning_rate": 1.3031629635064072e-05, - "loss": 0.8517, + "learning_rate": 7.917453828239905e-06, + "loss": 0.9694, "step": 14818 }, { - "epoch": 0.4199325568874153, + "epoch": 0.5798184521480554, "grad_norm": 0.0, - "learning_rate": 1.3030755025363257e-05, - "loss": 0.8437, + "learning_rate": 7.916214398341156e-06, + "loss": 1.1052, "step": 14819 }, { - "epoch": 0.4199608943296778, + "epoch": 0.5798575788402849, "grad_norm": 0.0, - "learning_rate": 1.302988039013327e-05, - "loss": 1.0388, + "learning_rate": 7.914975001905393e-06, + "loss": 1.1338, "step": 14820 }, { - "epoch": 0.41998923177194025, + "epoch": 0.5798967055325143, "grad_norm": 0.0, - "learning_rate": 1.3029005729381474e-05, - "loss": 0.9473, + "learning_rate": 7.913735638952518e-06, + "loss": 1.0018, "step": 14821 }, { - "epoch": 0.42001756921420275, + "epoch": 0.5799358322247438, "grad_norm": 0.0, - "learning_rate": 1.3028131043115235e-05, - "loss": 1.0499, + "learning_rate": 7.912496309502435e-06, + "loss": 1.0328, "step": 14822 }, { - "epoch": 0.4200459066564652, + "epoch": 0.5799749589169731, "grad_norm": 0.0, - "learning_rate": 1.3027256331341926e-05, - "loss": 0.8809, + "learning_rate": 7.91125701357504e-06, + "loss": 1.0598, "step": 14823 }, { - "epoch": 0.4200742440987276, + "epoch": 0.5800140856092026, "grad_norm": 0.0, - "learning_rate": 1.3026381594068913e-05, - "loss": 0.9528, + "learning_rate": 7.910017751190246e-06, + "loss": 1.0187, "step": 14824 }, { - "epoch": 0.4201025815409901, + "epoch": 0.580053212301432, "grad_norm": 0.0, - "learning_rate": 1.3025506831303565e-05, - "loss": 0.9006, + "learning_rate": 7.908778522367949e-06, + "loss": 0.9214, "step": 14825 }, { - "epoch": 0.42013091898325255, + "epoch": 0.5800923389936615, "grad_norm": 0.0, - "learning_rate": 1.3024632043053246e-05, - "loss": 0.8685, + "learning_rate": 7.907539327128046e-06, + "loss": 1.1336, "step": 14826 }, { - "epoch": 0.42015925642551505, + "epoch": 0.5801314656858909, "grad_norm": 0.0, - "learning_rate": 1.302375722932533e-05, - "loss": 0.9622, + "learning_rate": 7.906300165490437e-06, + "loss": 1.0968, "step": 14827 }, { - "epoch": 0.4201875938677775, + "epoch": 0.5801705923781204, "grad_norm": 0.0, - "learning_rate": 1.3022882390127185e-05, - "loss": 0.945, + "learning_rate": 7.905061037475026e-06, + "loss": 1.0202, "step": 14828 }, { - "epoch": 0.42021593131004, + "epoch": 0.5802097190703498, "grad_norm": 0.0, - "learning_rate": 1.302200752546618e-05, - "loss": 0.8399, + "learning_rate": 7.903821943101711e-06, + "loss": 1.0207, "step": 14829 }, { - "epoch": 0.4202442687523024, + "epoch": 0.5802488457625792, "grad_norm": 0.0, - "learning_rate": 1.302113263534968e-05, - "loss": 0.952, + "learning_rate": 7.90258288239039e-06, + "loss": 1.0438, "step": 14830 }, { - "epoch": 0.42027260619456486, + "epoch": 0.5802879724548087, "grad_norm": 0.0, - "learning_rate": 1.3020257719785058e-05, - "loss": 0.9392, + "learning_rate": 7.901343855360954e-06, + "loss": 1.017, "step": 14831 }, { - "epoch": 0.42030094363682735, + "epoch": 0.5803270991470381, "grad_norm": 0.0, - "learning_rate": 1.3019382778779688e-05, - "loss": 0.9099, + "learning_rate": 7.90010486203331e-06, + "loss": 0.9474, "step": 14832 }, { - "epoch": 0.4203292810790898, + "epoch": 0.5803662258392676, "grad_norm": 0.0, - "learning_rate": 1.3018507812340932e-05, - "loss": 0.9133, + "learning_rate": 7.898865902427351e-06, + "loss": 1.028, "step": 14833 }, { - "epoch": 0.4203576185213523, + "epoch": 0.5804053525314969, "grad_norm": 0.0, - "learning_rate": 1.3017632820476165e-05, - "loss": 0.9109, + "learning_rate": 7.897626976562974e-06, + "loss": 0.9259, "step": 14834 }, { - "epoch": 0.4203859559636147, + "epoch": 0.5804444792237264, "grad_norm": 0.0, - "learning_rate": 1.3016757803192756e-05, - "loss": 0.9223, + "learning_rate": 7.896388084460071e-06, + "loss": 1.1435, "step": 14835 }, { - "epoch": 0.42041429340587716, + "epoch": 0.5804836059159558, "grad_norm": 0.0, - "learning_rate": 1.3015882760498077e-05, - "loss": 0.9745, + "learning_rate": 7.895149226138536e-06, + "loss": 1.0178, "step": 14836 }, { - "epoch": 0.42044263084813965, + "epoch": 0.5805227326081853, "grad_norm": 0.0, - "learning_rate": 1.3015007692399496e-05, - "loss": 0.9292, + "learning_rate": 7.893910401618271e-06, + "loss": 0.9725, "step": 14837 }, { - "epoch": 0.4204709682904021, + "epoch": 0.5805618593004147, "grad_norm": 0.0, - "learning_rate": 1.3014132598904383e-05, - "loss": 1.0183, + "learning_rate": 7.892671610919166e-06, + "loss": 1.0193, "step": 14838 }, { - "epoch": 0.4204993057326646, + "epoch": 0.5806009859926442, "grad_norm": 0.0, - "learning_rate": 1.3013257480020116e-05, - "loss": 0.9584, + "learning_rate": 7.891432854061115e-06, + "loss": 0.9424, "step": 14839 }, { - "epoch": 0.420527643174927, + "epoch": 0.5806401126848736, "grad_norm": 0.0, - "learning_rate": 1.3012382335754064e-05, - "loss": 0.9675, + "learning_rate": 7.890194131064008e-06, + "loss": 1.0496, "step": 14840 }, { - "epoch": 0.4205559806171895, + "epoch": 0.5806792393771031, "grad_norm": 0.0, - "learning_rate": 1.3011507166113595e-05, - "loss": 0.8584, + "learning_rate": 7.888955441947738e-06, + "loss": 1.01, "step": 14841 }, { - "epoch": 0.42058431805945196, + "epoch": 0.5807183660693325, "grad_norm": 0.0, - "learning_rate": 1.301063197110608e-05, - "loss": 0.9116, + "learning_rate": 7.887716786732202e-06, + "loss": 0.9812, "step": 14842 }, { - "epoch": 0.4206126555017144, + "epoch": 0.580757492761562, "grad_norm": 0.0, - "learning_rate": 1.3009756750738896e-05, - "loss": 0.9051, + "learning_rate": 7.886478165437288e-06, + "loss": 1.0488, "step": 14843 }, { - "epoch": 0.4206409929439769, + "epoch": 0.5807966194537914, "grad_norm": 0.0, - "learning_rate": 1.3008881505019413e-05, - "loss": 0.943, + "learning_rate": 7.885239578082885e-06, + "loss": 1.0331, "step": 14844 }, { - "epoch": 0.4206693303862393, + "epoch": 0.5808357461460208, "grad_norm": 0.0, - "learning_rate": 1.3008006233955004e-05, - "loss": 1.0089, + "learning_rate": 7.884001024688881e-06, + "loss": 0.9454, "step": 14845 }, { - "epoch": 0.4206976678285018, + "epoch": 0.5808748728382502, "grad_norm": 0.0, - "learning_rate": 1.300713093755304e-05, - "loss": 0.9687, + "learning_rate": 7.882762505275175e-06, + "loss": 1.0976, "step": 14846 }, { - "epoch": 0.42072600527076426, + "epoch": 0.5809139995304797, "grad_norm": 0.0, - "learning_rate": 1.3006255615820898e-05, - "loss": 0.8255, + "learning_rate": 7.881524019861645e-06, + "loss": 0.946, "step": 14847 }, { - "epoch": 0.4207543427130267, + "epoch": 0.5809531262227091, "grad_norm": 0.0, - "learning_rate": 1.3005380268765947e-05, - "loss": 0.987, + "learning_rate": 7.880285568468187e-06, + "loss": 0.9852, "step": 14848 }, { - "epoch": 0.4207826801552892, + "epoch": 0.5809922529149386, "grad_norm": 0.0, - "learning_rate": 1.3004504896395564e-05, - "loss": 0.9317, + "learning_rate": 7.879047151114686e-06, + "loss": 0.9877, "step": 14849 }, { - "epoch": 0.42081101759755163, + "epoch": 0.581031379607168, "grad_norm": 0.0, - "learning_rate": 1.3003629498717119e-05, - "loss": 0.8883, + "learning_rate": 7.877808767821031e-06, + "loss": 0.9866, "step": 14850 }, { - "epoch": 0.4208393550398141, + "epoch": 0.5810705062993975, "grad_norm": 0.0, - "learning_rate": 1.3002754075737984e-05, - "loss": 0.864, + "learning_rate": 7.876570418607108e-06, + "loss": 0.9919, "step": 14851 }, { - "epoch": 0.42086769248207656, + "epoch": 0.5811096329916269, "grad_norm": 0.0, - "learning_rate": 1.300187862746554e-05, - "loss": 0.8874, + "learning_rate": 7.875332103492803e-06, + "loss": 0.9722, "step": 14852 }, { - "epoch": 0.42089602992433905, + "epoch": 0.5811487596838564, "grad_norm": 0.0, - "learning_rate": 1.3001003153907158e-05, - "loss": 0.9428, + "learning_rate": 7.874093822498002e-06, + "loss": 0.9928, "step": 14853 }, { - "epoch": 0.4209243673666015, + "epoch": 0.5811878863760858, "grad_norm": 0.0, - "learning_rate": 1.3000127655070214e-05, - "loss": 0.8921, + "learning_rate": 7.872855575642589e-06, + "loss": 1.0182, "step": 14854 }, { - "epoch": 0.42095270480886393, + "epoch": 0.5812270130683153, "grad_norm": 0.0, - "learning_rate": 1.2999252130962076e-05, - "loss": 0.9237, + "learning_rate": 7.871617362946451e-06, + "loss": 0.8826, "step": 14855 }, { - "epoch": 0.4209810422511264, + "epoch": 0.5812661397605446, "grad_norm": 0.0, - "learning_rate": 1.2998376581590125e-05, - "loss": 0.8722, + "learning_rate": 7.870379184429472e-06, + "loss": 1.0818, "step": 14856 }, { - "epoch": 0.42100937969338886, + "epoch": 0.581305266452774, "grad_norm": 0.0, - "learning_rate": 1.2997501006961737e-05, - "loss": 0.9412, + "learning_rate": 7.869141040111534e-06, + "loss": 0.9293, "step": 14857 }, { - "epoch": 0.42103771713565136, + "epoch": 0.5813443931450035, "grad_norm": 0.0, - "learning_rate": 1.2996625407084282e-05, - "loss": 1.0172, + "learning_rate": 7.867902930012518e-06, + "loss": 1.0094, "step": 14858 }, { - "epoch": 0.4210660545779138, + "epoch": 0.5813835198372329, "grad_norm": 0.0, - "learning_rate": 1.2995749781965139e-05, - "loss": 0.8829, + "learning_rate": 7.866664854152312e-06, + "loss": 1.0567, "step": 14859 }, { - "epoch": 0.42109439202017623, + "epoch": 0.5814226465294624, "grad_norm": 0.0, - "learning_rate": 1.2994874131611685e-05, - "loss": 0.929, + "learning_rate": 7.865426812550795e-06, + "loss": 1.0122, "step": 14860 }, { - "epoch": 0.4211227294624387, + "epoch": 0.5814617732216918, "grad_norm": 0.0, - "learning_rate": 1.2993998456031294e-05, - "loss": 0.8929, + "learning_rate": 7.864188805227852e-06, + "loss": 1.0421, "step": 14861 }, { - "epoch": 0.42115106690470117, + "epoch": 0.5815008999139213, "grad_norm": 0.0, - "learning_rate": 1.299312275523134e-05, - "loss": 0.8353, + "learning_rate": 7.862950832203358e-06, + "loss": 0.9866, "step": 14862 }, { - "epoch": 0.42117940434696366, + "epoch": 0.5815400266061507, "grad_norm": 0.0, - "learning_rate": 1.29922470292192e-05, - "loss": 1.0248, + "learning_rate": 7.861712893497191e-06, + "loss": 0.9598, "step": 14863 }, { - "epoch": 0.4212077417892261, + "epoch": 0.5815791532983802, "grad_norm": 0.0, - "learning_rate": 1.2991371278002256e-05, - "loss": 0.9352, + "learning_rate": 7.86047498912924e-06, + "loss": 1.0609, "step": 14864 }, { - "epoch": 0.4212360792314886, + "epoch": 0.5816182799906096, "grad_norm": 0.0, - "learning_rate": 1.2990495501587882e-05, - "loss": 0.9531, + "learning_rate": 7.85923711911938e-06, + "loss": 1.0602, "step": 14865 }, { - "epoch": 0.42126441667375103, + "epoch": 0.581657406682839, "grad_norm": 0.0, - "learning_rate": 1.298961969998345e-05, - "loss": 0.9933, + "learning_rate": 7.85799928348749e-06, + "loss": 1.0525, "step": 14866 }, { - "epoch": 0.42129275411601347, + "epoch": 0.5816965333750684, "grad_norm": 0.0, - "learning_rate": 1.2988743873196344e-05, - "loss": 0.9461, + "learning_rate": 7.856761482253442e-06, + "loss": 0.9302, "step": 14867 }, { - "epoch": 0.42132109155827596, + "epoch": 0.5817356600672979, "grad_norm": 0.0, - "learning_rate": 1.2987868021233936e-05, - "loss": 0.8544, + "learning_rate": 7.855523715437123e-06, + "loss": 0.9716, "step": 14868 }, { - "epoch": 0.4213494290005384, + "epoch": 0.5817747867595273, "grad_norm": 0.0, - "learning_rate": 1.2986992144103607e-05, - "loss": 0.9267, + "learning_rate": 7.854285983058408e-06, + "loss": 0.9952, "step": 14869 }, { - "epoch": 0.4213777664428009, + "epoch": 0.5818139134517568, "grad_norm": 0.0, - "learning_rate": 1.2986116241812734e-05, - "loss": 0.8944, + "learning_rate": 7.85304828513717e-06, + "loss": 0.943, "step": 14870 }, { - "epoch": 0.42140610388506333, + "epoch": 0.5818530401439862, "grad_norm": 0.0, - "learning_rate": 1.2985240314368694e-05, - "loss": 0.867, + "learning_rate": 7.851810621693287e-06, + "loss": 1.0405, "step": 14871 }, { - "epoch": 0.42143444132732577, + "epoch": 0.5818921668362157, "grad_norm": 0.0, - "learning_rate": 1.298436436177887e-05, - "loss": 0.8665, + "learning_rate": 7.850572992746628e-06, + "loss": 0.9554, "step": 14872 }, { - "epoch": 0.42146277876958826, + "epoch": 0.5819312935284451, "grad_norm": 0.0, - "learning_rate": 1.2983488384050633e-05, - "loss": 0.9218, + "learning_rate": 7.849335398317078e-06, + "loss": 1.0016, "step": 14873 }, { - "epoch": 0.4214911162118507, + "epoch": 0.5819704202206746, "grad_norm": 0.0, - "learning_rate": 1.2982612381191368e-05, - "loss": 1.0295, + "learning_rate": 7.848097838424506e-06, + "loss": 1.0335, "step": 14874 }, { - "epoch": 0.4215194536541132, + "epoch": 0.582009546912904, "grad_norm": 0.0, - "learning_rate": 1.298173635320845e-05, - "loss": 0.9092, + "learning_rate": 7.846860313088788e-06, + "loss": 1.0828, "step": 14875 }, { - "epoch": 0.42154779109637563, + "epoch": 0.5820486736051335, "grad_norm": 0.0, - "learning_rate": 1.2980860300109257e-05, - "loss": 0.8244, + "learning_rate": 7.84562282232979e-06, + "loss": 1.0724, "step": 14876 }, { - "epoch": 0.42157612853863813, + "epoch": 0.5820878002973628, "grad_norm": 0.0, - "learning_rate": 1.2979984221901174e-05, - "loss": 0.8946, + "learning_rate": 7.844385366167396e-06, + "loss": 0.8804, "step": 14877 }, { - "epoch": 0.42160446598090057, + "epoch": 0.5821269269895923, "grad_norm": 0.0, - "learning_rate": 1.297910811859158e-05, - "loss": 1.0026, + "learning_rate": 7.843147944621468e-06, + "loss": 1.0128, "step": 14878 }, { - "epoch": 0.421632803423163, + "epoch": 0.5821660536818217, "grad_norm": 0.0, - "learning_rate": 1.2978231990187848e-05, - "loss": 0.9569, + "learning_rate": 7.841910557711884e-06, + "loss": 1.0388, "step": 14879 }, { - "epoch": 0.4216611408654255, + "epoch": 0.5822051803740512, "grad_norm": 0.0, - "learning_rate": 1.297735583669736e-05, - "loss": 0.9195, + "learning_rate": 7.840673205458513e-06, + "loss": 1.1364, "step": 14880 }, { - "epoch": 0.42168947830768794, + "epoch": 0.5822443070662806, "grad_norm": 0.0, - "learning_rate": 1.2976479658127503e-05, - "loss": 0.8879, + "learning_rate": 7.839435887881218e-06, + "loss": 1.0844, "step": 14881 }, { - "epoch": 0.42171781574995043, + "epoch": 0.5822834337585101, "grad_norm": 0.0, - "learning_rate": 1.2975603454485648e-05, - "loss": 0.8721, + "learning_rate": 7.838198604999881e-06, + "loss": 1.0306, "step": 14882 }, { - "epoch": 0.42174615319221287, + "epoch": 0.5823225604507395, "grad_norm": 0.0, - "learning_rate": 1.2974727225779185e-05, - "loss": 0.9812, + "learning_rate": 7.836961356834365e-06, + "loss": 1.0464, "step": 14883 }, { - "epoch": 0.4217744906344753, + "epoch": 0.582361687142969, "grad_norm": 0.0, - "learning_rate": 1.2973850972015485e-05, - "loss": 0.9503, + "learning_rate": 7.835724143404539e-06, + "loss": 0.8782, "step": 14884 }, { - "epoch": 0.4218028280767378, + "epoch": 0.5824008138351984, "grad_norm": 0.0, - "learning_rate": 1.2972974693201938e-05, - "loss": 0.947, + "learning_rate": 7.83448696473027e-06, + "loss": 0.9532, "step": 14885 }, { - "epoch": 0.42183116551900024, + "epoch": 0.5824399405274278, "grad_norm": 0.0, - "learning_rate": 1.2972098389345921e-05, - "loss": 0.9106, + "learning_rate": 7.833249820831425e-06, + "loss": 1.0603, "step": 14886 }, { - "epoch": 0.42185950296126273, + "epoch": 0.5824790672196573, "grad_norm": 0.0, - "learning_rate": 1.2971222060454816e-05, - "loss": 0.945, + "learning_rate": 7.832012711727877e-06, + "loss": 0.8919, "step": 14887 }, { - "epoch": 0.42188784040352517, + "epoch": 0.5825181939118866, "grad_norm": 0.0, - "learning_rate": 1.2970345706536e-05, - "loss": 1.0499, + "learning_rate": 7.830775637439487e-06, + "loss": 1.0671, "step": 14888 }, { - "epoch": 0.42191617784578767, + "epoch": 0.5825573206041161, "grad_norm": 0.0, - "learning_rate": 1.296946932759686e-05, - "loss": 1.0541, + "learning_rate": 7.82953859798612e-06, + "loss": 1.0281, "step": 14889 }, { - "epoch": 0.4219445152880501, + "epoch": 0.5825964472963455, "grad_norm": 0.0, - "learning_rate": 1.296859292364478e-05, - "loss": 0.7853, + "learning_rate": 7.828301593387646e-06, + "loss": 0.9454, "step": 14890 }, { - "epoch": 0.42197285273031254, + "epoch": 0.582635573988575, "grad_norm": 0.0, - "learning_rate": 1.296771649468714e-05, - "loss": 0.9568, + "learning_rate": 7.827064623663927e-06, + "loss": 0.9764, "step": 14891 }, { - "epoch": 0.42200119017257504, + "epoch": 0.5826747006808044, "grad_norm": 0.0, - "learning_rate": 1.2966840040731316e-05, - "loss": 0.9146, + "learning_rate": 7.825827688834826e-06, + "loss": 1.0491, "step": 14892 }, { - "epoch": 0.4220295276148375, + "epoch": 0.5827138273730339, "grad_norm": 0.0, - "learning_rate": 1.2965963561784705e-05, - "loss": 0.8981, + "learning_rate": 7.82459078892021e-06, + "loss": 0.9635, "step": 14893 }, { - "epoch": 0.42205786505709997, + "epoch": 0.5827529540652633, "grad_norm": 0.0, - "learning_rate": 1.2965087057854678e-05, - "loss": 0.951, + "learning_rate": 7.823353923939936e-06, + "loss": 0.9598, "step": 14894 }, { - "epoch": 0.4220862024993624, + "epoch": 0.5827920807574928, "grad_norm": 0.0, - "learning_rate": 1.2964210528948617e-05, - "loss": 0.9643, + "learning_rate": 7.822117093913874e-06, + "loss": 1.0487, "step": 14895 }, { - "epoch": 0.42211453994162484, + "epoch": 0.5828312074497222, "grad_norm": 0.0, - "learning_rate": 1.2963333975073912e-05, - "loss": 0.7735, + "learning_rate": 7.820880298861881e-06, + "loss": 0.9902, "step": 14896 }, { - "epoch": 0.42214287738388734, + "epoch": 0.5828703341419517, "grad_norm": 0.0, - "learning_rate": 1.296245739623795e-05, - "loss": 0.8358, + "learning_rate": 7.819643538803819e-06, + "loss": 1.0645, "step": 14897 }, { - "epoch": 0.4221712148261498, + "epoch": 0.582909460834181, "grad_norm": 0.0, - "learning_rate": 1.2961580792448104e-05, - "loss": 0.978, + "learning_rate": 7.81840681375955e-06, + "loss": 1.046, "step": 14898 }, { - "epoch": 0.42219955226841227, + "epoch": 0.5829485875264105, "grad_norm": 0.0, - "learning_rate": 1.2960704163711769e-05, - "loss": 0.8811, + "learning_rate": 7.817170123748935e-06, + "loss": 0.995, "step": 14899 }, { - "epoch": 0.4222278897106747, + "epoch": 0.5829877142186399, "grad_norm": 0.0, - "learning_rate": 1.2959827510036318e-05, - "loss": 0.9362, + "learning_rate": 7.815933468791833e-06, + "loss": 1.0717, "step": 14900 }, { - "epoch": 0.4222562271529372, + "epoch": 0.5830268409108694, "grad_norm": 0.0, - "learning_rate": 1.2958950831429142e-05, - "loss": 0.97, + "learning_rate": 7.814696848908103e-06, + "loss": 1.0099, "step": 14901 }, { - "epoch": 0.42228456459519964, + "epoch": 0.5830659676030988, "grad_norm": 0.0, - "learning_rate": 1.2958074127897624e-05, - "loss": 0.9243, + "learning_rate": 7.813460264117603e-06, + "loss": 1.0328, "step": 14902 }, { - "epoch": 0.4223129020374621, + "epoch": 0.5831050942953283, "grad_norm": 0.0, - "learning_rate": 1.295719739944915e-05, - "loss": 0.9466, + "learning_rate": 7.812223714440188e-06, + "loss": 0.9736, "step": 14903 }, { - "epoch": 0.4223412394797246, + "epoch": 0.5831442209875577, "grad_norm": 0.0, - "learning_rate": 1.2956320646091106e-05, - "loss": 0.8786, + "learning_rate": 7.810987199895721e-06, + "loss": 1.0914, "step": 14904 }, { - "epoch": 0.422369576921987, + "epoch": 0.5831833476797872, "grad_norm": 0.0, - "learning_rate": 1.295544386783087e-05, - "loss": 0.9759, + "learning_rate": 7.809750720504058e-06, + "loss": 0.9891, "step": 14905 }, { - "epoch": 0.4223979143642495, + "epoch": 0.5832224743720166, "grad_norm": 0.0, - "learning_rate": 1.2954567064675839e-05, - "loss": 1.018, + "learning_rate": 7.808514276285052e-06, + "loss": 1.0317, "step": 14906 }, { - "epoch": 0.42242625180651194, + "epoch": 0.5832616010642461, "grad_norm": 0.0, - "learning_rate": 1.2953690236633389e-05, - "loss": 0.874, + "learning_rate": 7.807277867258559e-06, + "loss": 1.0272, "step": 14907 }, { - "epoch": 0.4224545892487744, + "epoch": 0.5833007277564755, "grad_norm": 0.0, - "learning_rate": 1.2952813383710909e-05, - "loss": 0.9826, + "learning_rate": 7.806041493444439e-06, + "loss": 1.108, "step": 14908 }, { - "epoch": 0.4224829266910369, + "epoch": 0.583339854448705, "grad_norm": 0.0, - "learning_rate": 1.2951936505915783e-05, - "loss": 0.9442, + "learning_rate": 7.804805154862543e-06, + "loss": 0.9422, "step": 14909 }, { - "epoch": 0.4225112641332993, + "epoch": 0.5833789811409343, "grad_norm": 0.0, - "learning_rate": 1.2951059603255405e-05, - "loss": 0.9455, + "learning_rate": 7.803568851532725e-06, + "loss": 0.9902, "step": 14910 }, { - "epoch": 0.4225396015755618, + "epoch": 0.5834181078331638, "grad_norm": 0.0, - "learning_rate": 1.2950182675737155e-05, - "loss": 0.946, + "learning_rate": 7.80233258347484e-06, + "loss": 0.9871, "step": 14911 }, { - "epoch": 0.42256793901782425, + "epoch": 0.5834572345253932, "grad_norm": 0.0, - "learning_rate": 1.2949305723368419e-05, - "loss": 0.8754, + "learning_rate": 7.801096350708735e-06, + "loss": 0.8786, "step": 14912 }, { - "epoch": 0.42259627646008674, + "epoch": 0.5834963612176227, "grad_norm": 0.0, - "learning_rate": 1.2948428746156588e-05, - "loss": 0.9474, + "learning_rate": 7.799860153254271e-06, + "loss": 1.0021, "step": 14913 }, { - "epoch": 0.4226246139023492, + "epoch": 0.5835354879098521, "grad_norm": 0.0, - "learning_rate": 1.2947551744109044e-05, - "loss": 0.9245, + "learning_rate": 7.798623991131298e-06, + "loss": 0.94, "step": 14914 }, { - "epoch": 0.4226529513446116, + "epoch": 0.5835746146020815, "grad_norm": 0.0, - "learning_rate": 1.294667471723318e-05, - "loss": 0.942, + "learning_rate": 7.797387864359664e-06, + "loss": 0.9993, "step": 14915 }, { - "epoch": 0.4226812887868741, + "epoch": 0.583613741294311, "grad_norm": 0.0, - "learning_rate": 1.2945797665536378e-05, - "loss": 0.9074, + "learning_rate": 7.796151772959216e-06, + "loss": 0.9926, "step": 14916 }, { - "epoch": 0.42270962622913655, + "epoch": 0.5836528679865404, "grad_norm": 0.0, - "learning_rate": 1.2944920589026029e-05, - "loss": 0.9416, + "learning_rate": 7.794915716949815e-06, + "loss": 1.0233, "step": 14917 }, { - "epoch": 0.42273796367139904, + "epoch": 0.5836919946787699, "grad_norm": 0.0, - "learning_rate": 1.2944043487709519e-05, - "loss": 0.8819, + "learning_rate": 7.793679696351302e-06, + "loss": 1.0522, "step": 14918 }, { - "epoch": 0.4227663011136615, + "epoch": 0.5837311213709993, "grad_norm": 0.0, - "learning_rate": 1.2943166361594242e-05, - "loss": 0.8791, + "learning_rate": 7.792443711183531e-06, + "loss": 0.9427, "step": 14919 }, { - "epoch": 0.4227946385559239, + "epoch": 0.5837702480632287, "grad_norm": 0.0, - "learning_rate": 1.2942289210687577e-05, - "loss": 0.8729, + "learning_rate": 7.791207761466346e-06, + "loss": 0.9737, "step": 14920 }, { - "epoch": 0.4228229759981864, + "epoch": 0.5838093747554581, "grad_norm": 0.0, - "learning_rate": 1.294141203499692e-05, - "loss": 0.9701, + "learning_rate": 7.789971847219593e-06, + "loss": 1.0359, "step": 14921 }, { - "epoch": 0.42285131344044885, + "epoch": 0.5838485014476876, "grad_norm": 0.0, - "learning_rate": 1.2940534834529654e-05, - "loss": 0.9637, + "learning_rate": 7.788735968463128e-06, + "loss": 0.9734, "step": 14922 }, { - "epoch": 0.42287965088271134, + "epoch": 0.583887628139917, "grad_norm": 0.0, - "learning_rate": 1.2939657609293174e-05, - "loss": 0.8541, + "learning_rate": 7.78750012521679e-06, + "loss": 1.0008, "step": 14923 }, { - "epoch": 0.4229079883249738, + "epoch": 0.5839267548321465, "grad_norm": 0.0, - "learning_rate": 1.2938780359294868e-05, - "loss": 0.8455, + "learning_rate": 7.786264317500429e-06, + "loss": 0.9366, "step": 14924 }, { - "epoch": 0.4229363257672363, + "epoch": 0.5839658815243759, "grad_norm": 0.0, - "learning_rate": 1.293790308454212e-05, - "loss": 1.0225, + "learning_rate": 7.785028545333885e-06, + "loss": 1.0407, "step": 14925 }, { - "epoch": 0.4229646632094987, + "epoch": 0.5840050082166054, "grad_norm": 0.0, - "learning_rate": 1.2937025785042328e-05, - "loss": 0.8172, + "learning_rate": 7.78379280873701e-06, + "loss": 1.0479, "step": 14926 }, { - "epoch": 0.42299300065176115, + "epoch": 0.5840441349088348, "grad_norm": 0.0, - "learning_rate": 1.2936148460802875e-05, - "loss": 1.0214, + "learning_rate": 7.782557107729646e-06, + "loss": 1.1412, "step": 14927 }, { - "epoch": 0.42302133809402365, + "epoch": 0.5840832616010643, "grad_norm": 0.0, - "learning_rate": 1.293527111183115e-05, - "loss": 0.9437, + "learning_rate": 7.781321442331637e-06, + "loss": 0.9135, "step": 14928 }, { - "epoch": 0.4230496755362861, + "epoch": 0.5841223882932937, "grad_norm": 0.0, - "learning_rate": 1.2934393738134548e-05, - "loss": 0.9341, + "learning_rate": 7.780085812562827e-06, + "loss": 1.0151, "step": 14929 }, { - "epoch": 0.4230780129785486, + "epoch": 0.5841615149855232, "grad_norm": 0.0, - "learning_rate": 1.2933516339720459e-05, - "loss": 0.7699, + "learning_rate": 7.77885021844305e-06, + "loss": 1.019, "step": 14930 }, { - "epoch": 0.423106350420811, + "epoch": 0.5842006416777525, "grad_norm": 0.0, - "learning_rate": 1.2932638916596275e-05, - "loss": 0.8827, + "learning_rate": 7.777614659992162e-06, + "loss": 0.9999, "step": 14931 }, { - "epoch": 0.42313468786307346, + "epoch": 0.584239768369982, "grad_norm": 0.0, - "learning_rate": 1.2931761468769382e-05, - "loss": 0.9607, + "learning_rate": 7.776379137229996e-06, + "loss": 0.9857, "step": 14932 }, { - "epoch": 0.42316302530533595, + "epoch": 0.5842788950622114, "grad_norm": 0.0, - "learning_rate": 1.2930883996247174e-05, - "loss": 0.8069, + "learning_rate": 7.775143650176394e-06, + "loss": 1.0081, "step": 14933 }, { - "epoch": 0.4231913627475984, + "epoch": 0.5843180217544409, "grad_norm": 0.0, - "learning_rate": 1.293000649903704e-05, - "loss": 1.0074, + "learning_rate": 7.773908198851197e-06, + "loss": 1.0282, "step": 14934 }, { - "epoch": 0.4232197001898609, + "epoch": 0.5843571484466703, "grad_norm": 0.0, - "learning_rate": 1.2929128977146372e-05, - "loss": 0.8481, + "learning_rate": 7.772672783274246e-06, + "loss": 0.8732, "step": 14935 }, { - "epoch": 0.4232480376321233, + "epoch": 0.5843962751388998, "grad_norm": 0.0, - "learning_rate": 1.2928251430582565e-05, - "loss": 0.8414, + "learning_rate": 7.771437403465377e-06, + "loss": 1.035, "step": 14936 }, { - "epoch": 0.4232763750743858, + "epoch": 0.5844354018311292, "grad_norm": 0.0, - "learning_rate": 1.292737385935301e-05, - "loss": 0.918, + "learning_rate": 7.770202059444433e-06, + "loss": 0.9, "step": 14937 }, { - "epoch": 0.42330471251664825, + "epoch": 0.5844745285233587, "grad_norm": 0.0, - "learning_rate": 1.2926496263465095e-05, - "loss": 0.8343, + "learning_rate": 7.76896675123125e-06, + "loss": 0.9838, "step": 14938 }, { - "epoch": 0.4233330499589107, + "epoch": 0.5845136552155881, "grad_norm": 0.0, - "learning_rate": 1.292561864292622e-05, - "loss": 1.0363, + "learning_rate": 7.767731478845662e-06, + "loss": 0.9917, "step": 14939 }, { - "epoch": 0.4233613874011732, + "epoch": 0.5845527819078176, "grad_norm": 0.0, - "learning_rate": 1.2924740997743769e-05, - "loss": 0.8292, + "learning_rate": 7.766496242307513e-06, + "loss": 0.937, "step": 14940 }, { - "epoch": 0.4233897248434356, + "epoch": 0.584591908600047, "grad_norm": 0.0, - "learning_rate": 1.2923863327925138e-05, - "loss": 0.938, + "learning_rate": 7.765261041636635e-06, + "loss": 1.0875, "step": 14941 }, { - "epoch": 0.4234180622856981, + "epoch": 0.5846310352922764, "grad_norm": 0.0, - "learning_rate": 1.2922985633477722e-05, - "loss": 0.9102, + "learning_rate": 7.764025876852861e-06, + "loss": 1.0004, "step": 14942 }, { - "epoch": 0.42344639972796055, + "epoch": 0.5846701619845058, "grad_norm": 0.0, - "learning_rate": 1.2922107914408913e-05, - "loss": 0.9088, + "learning_rate": 7.762790747976031e-06, + "loss": 0.9567, "step": 14943 }, { - "epoch": 0.423474737170223, + "epoch": 0.5847092886767352, "grad_norm": 0.0, - "learning_rate": 1.2921230170726102e-05, - "loss": 0.8451, + "learning_rate": 7.761555655025981e-06, + "loss": 1.0412, "step": 14944 }, { - "epoch": 0.4235030746124855, + "epoch": 0.5847484153689647, "grad_norm": 0.0, - "learning_rate": 1.292035240243669e-05, - "loss": 0.9684, + "learning_rate": 7.76032059802254e-06, + "loss": 0.8301, "step": 14945 }, { - "epoch": 0.4235314120547479, + "epoch": 0.5847875420611941, "grad_norm": 0.0, - "learning_rate": 1.291947460954806e-05, - "loss": 0.8687, + "learning_rate": 7.759085576985546e-06, + "loss": 1.0795, "step": 14946 }, { - "epoch": 0.4235597494970104, + "epoch": 0.5848266687534236, "grad_norm": 0.0, - "learning_rate": 1.2918596792067613e-05, - "loss": 0.9239, + "learning_rate": 7.757850591934825e-06, + "loss": 1.1171, "step": 14947 }, { - "epoch": 0.42358808693927286, + "epoch": 0.584865795445653, "grad_norm": 0.0, - "learning_rate": 1.2917718950002737e-05, - "loss": 0.8966, + "learning_rate": 7.756615642890217e-06, + "loss": 0.9671, "step": 14948 }, { - "epoch": 0.42361642438153535, + "epoch": 0.5849049221378825, "grad_norm": 0.0, - "learning_rate": 1.2916841083360836e-05, - "loss": 0.7635, + "learning_rate": 7.75538072987155e-06, + "loss": 0.9569, "step": 14949 }, { - "epoch": 0.4236447618237978, + "epoch": 0.5849440488301119, "grad_norm": 0.0, - "learning_rate": 1.2915963192149297e-05, - "loss": 0.9277, + "learning_rate": 7.754145852898658e-06, + "loss": 0.9977, "step": 14950 }, { - "epoch": 0.4236730992660602, + "epoch": 0.5849831755223414, "grad_norm": 0.0, - "learning_rate": 1.2915085276375519e-05, - "loss": 0.9445, + "learning_rate": 7.75291101199137e-06, + "loss": 0.9866, "step": 14951 }, { - "epoch": 0.4237014367083227, + "epoch": 0.5850223022145707, "grad_norm": 0.0, - "learning_rate": 1.2914207336046896e-05, - "loss": 0.8743, + "learning_rate": 7.751676207169509e-06, + "loss": 0.9726, "step": 14952 }, { - "epoch": 0.42372977415058516, + "epoch": 0.5850614289068002, "grad_norm": 0.0, - "learning_rate": 1.291332937117082e-05, - "loss": 0.9917, + "learning_rate": 7.750441438452915e-06, + "loss": 0.991, "step": 14953 }, { - "epoch": 0.42375811159284765, + "epoch": 0.5851005555990296, "grad_norm": 0.0, - "learning_rate": 1.291245138175469e-05, - "loss": 0.841, + "learning_rate": 7.749206705861413e-06, + "loss": 0.9962, "step": 14954 }, { - "epoch": 0.4237864490351101, + "epoch": 0.5851396822912591, "grad_norm": 0.0, - "learning_rate": 1.2911573367805898e-05, - "loss": 0.8241, + "learning_rate": 7.747972009414832e-06, + "loss": 0.9873, "step": 14955 }, { - "epoch": 0.42381478647737253, + "epoch": 0.5851788089834885, "grad_norm": 0.0, - "learning_rate": 1.2910695329331846e-05, - "loss": 0.8838, + "learning_rate": 7.746737349132994e-06, + "loss": 0.9697, "step": 14956 }, { - "epoch": 0.423843123919635, + "epoch": 0.585217935675718, "grad_norm": 0.0, - "learning_rate": 1.2909817266339926e-05, - "loss": 0.8282, + "learning_rate": 7.745502725035733e-06, + "loss": 1.0257, "step": 14957 }, { - "epoch": 0.42387146136189746, + "epoch": 0.5852570623679474, "grad_norm": 0.0, - "learning_rate": 1.2908939178837532e-05, - "loss": 1.0311, + "learning_rate": 7.744268137142875e-06, + "loss": 0.9275, "step": 14958 }, { - "epoch": 0.42389979880415996, + "epoch": 0.5852961890601769, "grad_norm": 0.0, - "learning_rate": 1.2908061066832064e-05, - "loss": 1.0024, + "learning_rate": 7.743033585474244e-06, + "loss": 0.9925, "step": 14959 }, { - "epoch": 0.4239281362464224, + "epoch": 0.5853353157524063, "grad_norm": 0.0, - "learning_rate": 1.2907182930330921e-05, - "loss": 0.8759, + "learning_rate": 7.741799070049665e-06, + "loss": 1.0747, "step": 14960 }, { - "epoch": 0.42395647368868483, + "epoch": 0.5853744424446358, "grad_norm": 0.0, - "learning_rate": 1.2906304769341493e-05, - "loss": 0.8822, + "learning_rate": 7.740564590888959e-06, + "loss": 0.9594, "step": 14961 }, { - "epoch": 0.4239848111309473, + "epoch": 0.5854135691368652, "grad_norm": 0.0, - "learning_rate": 1.290542658387118e-05, - "loss": 0.8721, + "learning_rate": 7.73933014801196e-06, + "loss": 1.0217, "step": 14962 }, { - "epoch": 0.42401314857320976, + "epoch": 0.5854526958290946, "grad_norm": 0.0, - "learning_rate": 1.2904548373927383e-05, - "loss": 0.896, + "learning_rate": 7.738095741438485e-06, + "loss": 0.9212, "step": 14963 }, { - "epoch": 0.42404148601547226, + "epoch": 0.585491822521324, "grad_norm": 0.0, - "learning_rate": 1.2903670139517495e-05, - "loss": 0.9234, + "learning_rate": 7.736861371188357e-06, + "loss": 0.9805, "step": 14964 }, { - "epoch": 0.4240698234577347, + "epoch": 0.5855309492135535, "grad_norm": 0.0, - "learning_rate": 1.2902791880648917e-05, - "loss": 0.9931, + "learning_rate": 7.735627037281396e-06, + "loss": 0.9787, "step": 14965 }, { - "epoch": 0.4240981608999972, + "epoch": 0.5855700759057829, "grad_norm": 0.0, - "learning_rate": 1.290191359732904e-05, - "loss": 1.0248, + "learning_rate": 7.734392739737434e-06, + "loss": 1.0333, "step": 14966 }, { - "epoch": 0.42412649834225963, + "epoch": 0.5856092025980124, "grad_norm": 0.0, - "learning_rate": 1.2901035289565274e-05, - "loss": 0.9618, + "learning_rate": 7.733158478576283e-06, + "loss": 0.9836, "step": 14967 }, { - "epoch": 0.42415483578452207, + "epoch": 0.5856483292902418, "grad_norm": 0.0, - "learning_rate": 1.2900156957365004e-05, - "loss": 0.933, + "learning_rate": 7.731924253817767e-06, + "loss": 1.0982, "step": 14968 }, { - "epoch": 0.42418317322678456, + "epoch": 0.5856874559824713, "grad_norm": 0.0, - "learning_rate": 1.2899278600735641e-05, - "loss": 0.995, + "learning_rate": 7.730690065481704e-06, + "loss": 0.9933, "step": 14969 }, { - "epoch": 0.424211510669047, + "epoch": 0.5857265826747007, "grad_norm": 0.0, - "learning_rate": 1.2898400219684575e-05, - "loss": 0.8129, + "learning_rate": 7.729455913587914e-06, + "loss": 1.007, "step": 14970 }, { - "epoch": 0.4242398481113095, + "epoch": 0.5857657093669301, "grad_norm": 0.0, - "learning_rate": 1.2897521814219207e-05, - "loss": 1.0269, + "learning_rate": 7.728221798156218e-06, + "loss": 1.0045, "step": 14971 }, { - "epoch": 0.42426818555357193, + "epoch": 0.5858048360591596, "grad_norm": 0.0, - "learning_rate": 1.2896643384346936e-05, - "loss": 0.8738, + "learning_rate": 7.726987719206433e-06, + "loss": 0.898, "step": 14972 }, { - "epoch": 0.42429652299583437, + "epoch": 0.585843962751389, "grad_norm": 0.0, - "learning_rate": 1.2895764930075164e-05, - "loss": 0.8605, + "learning_rate": 7.725753676758379e-06, + "loss": 1.0058, "step": 14973 }, { - "epoch": 0.42432486043809686, + "epoch": 0.5858830894436184, "grad_norm": 0.0, - "learning_rate": 1.2894886451411289e-05, - "loss": 0.948, + "learning_rate": 7.72451967083187e-06, + "loss": 1.1218, "step": 14974 }, { - "epoch": 0.4243531978803593, + "epoch": 0.5859222161358478, "grad_norm": 0.0, - "learning_rate": 1.2894007948362707e-05, - "loss": 0.897, + "learning_rate": 7.723285701446722e-06, + "loss": 1.1329, "step": 14975 }, { - "epoch": 0.4243815353226218, + "epoch": 0.5859613428280773, "grad_norm": 0.0, - "learning_rate": 1.2893129420936826e-05, - "loss": 0.8536, + "learning_rate": 7.722051768622754e-06, + "loss": 0.9922, "step": 14976 }, { - "epoch": 0.42440987276488423, + "epoch": 0.5860004695203067, "grad_norm": 0.0, - "learning_rate": 1.289225086914104e-05, - "loss": 0.8349, + "learning_rate": 7.720817872379782e-06, + "loss": 1.0451, "step": 14977 }, { - "epoch": 0.4244382102071467, + "epoch": 0.5860395962125362, "grad_norm": 0.0, - "learning_rate": 1.2891372292982749e-05, - "loss": 0.9819, + "learning_rate": 7.719584012737618e-06, + "loss": 1.0847, "step": 14978 }, { - "epoch": 0.42446654764940916, + "epoch": 0.5860787229047656, "grad_norm": 0.0, - "learning_rate": 1.2890493692469357e-05, - "loss": 0.9785, + "learning_rate": 7.718350189716075e-06, + "loss": 1.0176, "step": 14979 }, { - "epoch": 0.4244948850916716, + "epoch": 0.5861178495969951, "grad_norm": 0.0, - "learning_rate": 1.2889615067608261e-05, - "loss": 0.9573, + "learning_rate": 7.717116403334972e-06, + "loss": 0.9041, "step": 14980 }, { - "epoch": 0.4245232225339341, + "epoch": 0.5861569762892245, "grad_norm": 0.0, - "learning_rate": 1.2888736418406869e-05, - "loss": 0.8978, + "learning_rate": 7.715882653614115e-06, + "loss": 0.931, "step": 14981 }, { - "epoch": 0.42455155997619654, + "epoch": 0.586196102981454, "grad_norm": 0.0, - "learning_rate": 1.2887857744872574e-05, - "loss": 0.9991, + "learning_rate": 7.714648940573323e-06, + "loss": 1.0347, "step": 14982 }, { - "epoch": 0.42457989741845903, + "epoch": 0.5862352296736834, "grad_norm": 0.0, - "learning_rate": 1.288697904701278e-05, - "loss": 0.9209, + "learning_rate": 7.713415264232405e-06, + "loss": 0.985, "step": 14983 }, { - "epoch": 0.42460823486072147, + "epoch": 0.5862743563659129, "grad_norm": 0.0, - "learning_rate": 1.288610032483489e-05, - "loss": 0.9276, + "learning_rate": 7.712181624611174e-06, + "loss": 0.9733, "step": 14984 }, { - "epoch": 0.4246365723029839, + "epoch": 0.5863134830581422, "grad_norm": 0.0, - "learning_rate": 1.288522157834631e-05, - "loss": 0.9153, + "learning_rate": 7.710948021729438e-06, + "loss": 0.967, "step": 14985 }, { - "epoch": 0.4246649097452464, + "epoch": 0.5863526097503717, "grad_norm": 0.0, - "learning_rate": 1.2884342807554433e-05, - "loss": 0.8968, + "learning_rate": 7.70971445560701e-06, + "loss": 1.0057, "step": 14986 }, { - "epoch": 0.42469324718750884, + "epoch": 0.5863917364426011, "grad_norm": 0.0, - "learning_rate": 1.2883464012466664e-05, - "loss": 0.8749, + "learning_rate": 7.708480926263695e-06, + "loss": 1.0541, "step": 14987 }, { - "epoch": 0.42472158462977133, + "epoch": 0.5864308631348306, "grad_norm": 0.0, - "learning_rate": 1.2882585193090412e-05, - "loss": 0.8926, + "learning_rate": 7.707247433719306e-06, + "loss": 1.0395, "step": 14988 }, { - "epoch": 0.42474992207203377, + "epoch": 0.58646998982706, "grad_norm": 0.0, - "learning_rate": 1.288170634943307e-05, - "loss": 0.9907, + "learning_rate": 7.70601397799365e-06, + "loss": 1.0538, "step": 14989 }, { - "epoch": 0.42477825951429626, + "epoch": 0.5865091165192895, "grad_norm": 0.0, - "learning_rate": 1.2880827481502048e-05, - "loss": 0.8737, + "learning_rate": 7.704780559106538e-06, + "loss": 0.8425, "step": 14990 }, { - "epoch": 0.4248065969565587, + "epoch": 0.5865482432115189, "grad_norm": 0.0, - "learning_rate": 1.2879948589304745e-05, - "loss": 0.8313, + "learning_rate": 7.70354717707777e-06, + "loss": 0.8053, "step": 14991 }, { - "epoch": 0.42483493439882114, + "epoch": 0.5865873699037484, "grad_norm": 0.0, - "learning_rate": 1.2879069672848565e-05, - "loss": 0.8825, + "learning_rate": 7.702313831927156e-06, + "loss": 0.9855, "step": 14992 }, { - "epoch": 0.42486327184108363, + "epoch": 0.5866264965959778, "grad_norm": 0.0, - "learning_rate": 1.2878190732140911e-05, - "loss": 0.8635, + "learning_rate": 7.701080523674506e-06, + "loss": 1.0744, "step": 14993 }, { - "epoch": 0.4248916092833461, + "epoch": 0.5866656232882073, "grad_norm": 0.0, - "learning_rate": 1.2877311767189192e-05, - "loss": 0.8578, + "learning_rate": 7.699847252339623e-06, + "loss": 0.9629, "step": 14994 }, { - "epoch": 0.42491994672560857, + "epoch": 0.5867047499804366, "grad_norm": 0.0, - "learning_rate": 1.2876432778000804e-05, - "loss": 0.869, + "learning_rate": 7.69861401794231e-06, + "loss": 1.1638, "step": 14995 }, { - "epoch": 0.424948284167871, + "epoch": 0.5867438766726661, "grad_norm": 0.0, - "learning_rate": 1.2875553764583156e-05, - "loss": 0.9055, + "learning_rate": 7.697380820502372e-06, + "loss": 0.9742, "step": 14996 }, { - "epoch": 0.42497662161013344, + "epoch": 0.5867830033648955, "grad_norm": 0.0, - "learning_rate": 1.287467472694365e-05, - "loss": 0.8723, + "learning_rate": 7.696147660039609e-06, + "loss": 0.9822, "step": 14997 }, { - "epoch": 0.42500495905239594, + "epoch": 0.586822130057125, "grad_norm": 0.0, - "learning_rate": 1.2873795665089692e-05, - "loss": 0.7701, + "learning_rate": 7.69491453657383e-06, + "loss": 1.006, "step": 14998 }, { - "epoch": 0.4250332964946584, + "epoch": 0.5868612567493544, "grad_norm": 0.0, - "learning_rate": 1.2872916579028684e-05, - "loss": 0.9019, + "learning_rate": 7.693681450124837e-06, + "loss": 0.9246, "step": 14999 }, { - "epoch": 0.42506163393692087, + "epoch": 0.5869003834415838, "grad_norm": 0.0, - "learning_rate": 1.2872037468768032e-05, - "loss": 0.8637, + "learning_rate": 7.69244840071243e-06, + "loss": 1.0175, "step": 15000 }, { - "epoch": 0.4250899713791833, + "epoch": 0.5869395101338133, "grad_norm": 0.0, - "learning_rate": 1.2871158334315146e-05, - "loss": 0.9766, + "learning_rate": 7.691215388356403e-06, + "loss": 1.1064, "step": 15001 }, { - "epoch": 0.4251183088214458, + "epoch": 0.5869786368260427, "grad_norm": 0.0, - "learning_rate": 1.2870279175677427e-05, - "loss": 0.9997, + "learning_rate": 7.68998241307657e-06, + "loss": 0.9902, "step": 15002 }, { - "epoch": 0.42514664626370824, + "epoch": 0.5870177635182722, "grad_norm": 0.0, - "learning_rate": 1.286939999286228e-05, - "loss": 0.8098, + "learning_rate": 7.688749474892727e-06, + "loss": 0.9189, "step": 15003 }, { - "epoch": 0.4251749837059707, + "epoch": 0.5870568902105016, "grad_norm": 0.0, - "learning_rate": 1.2868520785877108e-05, - "loss": 1.1765, + "learning_rate": 7.68751657382467e-06, + "loss": 1.0858, "step": 15004 }, { - "epoch": 0.42520332114823317, + "epoch": 0.587096016902731, "grad_norm": 0.0, - "learning_rate": 1.2867641554729325e-05, - "loss": 0.8933, + "learning_rate": 7.686283709892192e-06, + "loss": 0.9214, "step": 15005 }, { - "epoch": 0.4252316585904956, + "epoch": 0.5871351435949604, "grad_norm": 0.0, - "learning_rate": 1.2866762299426328e-05, - "loss": 0.8285, + "learning_rate": 7.685050883115106e-06, + "loss": 1.0814, "step": 15006 }, { - "epoch": 0.4252599960327581, + "epoch": 0.5871742702871899, "grad_norm": 0.0, - "learning_rate": 1.2865883019975529e-05, - "loss": 0.9221, + "learning_rate": 7.683818093513201e-06, + "loss": 1.0109, "step": 15007 }, { - "epoch": 0.42528833347502054, + "epoch": 0.5872133969794193, "grad_norm": 0.0, - "learning_rate": 1.2865003716384332e-05, - "loss": 0.7478, + "learning_rate": 7.682585341106276e-06, + "loss": 0.9903, "step": 15008 }, { - "epoch": 0.425316670917283, + "epoch": 0.5872525236716488, "grad_norm": 0.0, - "learning_rate": 1.2864124388660148e-05, - "loss": 0.9544, + "learning_rate": 7.681352625914125e-06, + "loss": 1.1365, "step": 15009 }, { - "epoch": 0.4253450083595455, + "epoch": 0.5872916503638782, "grad_norm": 0.0, - "learning_rate": 1.286324503681038e-05, - "loss": 0.8784, + "learning_rate": 7.680119947956542e-06, + "loss": 1.1121, "step": 15010 }, { - "epoch": 0.4253733458018079, + "epoch": 0.5873307770561077, "grad_norm": 0.0, - "learning_rate": 1.2862365660842437e-05, - "loss": 0.8333, + "learning_rate": 7.678887307253329e-06, + "loss": 1.0026, "step": 15011 }, { - "epoch": 0.4254016832440704, + "epoch": 0.5873699037483371, "grad_norm": 0.0, - "learning_rate": 1.286148626076372e-05, - "loss": 0.9322, + "learning_rate": 7.677654703824279e-06, + "loss": 0.8855, "step": 15012 }, { - "epoch": 0.42543002068633284, + "epoch": 0.5874090304405666, "grad_norm": 0.0, - "learning_rate": 1.2860606836581643e-05, - "loss": 0.9273, + "learning_rate": 7.676422137689183e-06, + "loss": 1.0473, "step": 15013 }, { - "epoch": 0.42545835812859534, + "epoch": 0.587448157132796, "grad_norm": 0.0, - "learning_rate": 1.2859727388303615e-05, - "loss": 1.0628, + "learning_rate": 7.675189608867832e-06, + "loss": 1.0113, "step": 15014 }, { - "epoch": 0.4254866955708578, + "epoch": 0.5874872838250255, "grad_norm": 0.0, - "learning_rate": 1.2858847915937043e-05, - "loss": 0.9332, + "learning_rate": 7.673957117380027e-06, + "loss": 1.0743, "step": 15015 }, { - "epoch": 0.4255150330131202, + "epoch": 0.5875264105172548, "grad_norm": 0.0, - "learning_rate": 1.2857968419489329e-05, - "loss": 0.9592, + "learning_rate": 7.672724663245555e-06, + "loss": 1.0853, "step": 15016 }, { - "epoch": 0.4255433704553827, + "epoch": 0.5875655372094843, "grad_norm": 0.0, - "learning_rate": 1.285708889896789e-05, - "loss": 0.8137, + "learning_rate": 7.67149224648421e-06, + "loss": 1.028, "step": 15017 }, { - "epoch": 0.42557170789764515, + "epoch": 0.5876046639017137, "grad_norm": 0.0, - "learning_rate": 1.2856209354380127e-05, - "loss": 0.8022, + "learning_rate": 7.670259867115781e-06, + "loss": 1.0241, "step": 15018 }, { - "epoch": 0.42560004533990764, + "epoch": 0.5876437905939432, "grad_norm": 0.0, - "learning_rate": 1.2855329785733452e-05, - "loss": 0.9691, + "learning_rate": 7.669027525160057e-06, + "loss": 1.0899, "step": 15019 }, { - "epoch": 0.4256283827821701, + "epoch": 0.5876829172861726, "grad_norm": 0.0, - "learning_rate": 1.2854450193035275e-05, - "loss": 0.9463, + "learning_rate": 7.66779522063683e-06, + "loss": 1.0408, "step": 15020 }, { - "epoch": 0.4256567202244325, + "epoch": 0.5877220439784021, "grad_norm": 0.0, - "learning_rate": 1.2853570576293002e-05, - "loss": 0.9034, + "learning_rate": 7.666562953565894e-06, + "loss": 0.9573, "step": 15021 }, { - "epoch": 0.425685057666695, + "epoch": 0.5877611706706315, "grad_norm": 0.0, - "learning_rate": 1.2852690935514047e-05, - "loss": 0.8638, + "learning_rate": 7.66533072396703e-06, + "loss": 1.0384, "step": 15022 }, { - "epoch": 0.42571339510895745, + "epoch": 0.587800297362861, "grad_norm": 0.0, - "learning_rate": 1.2851811270705819e-05, - "loss": 0.758, + "learning_rate": 7.664098531860029e-06, + "loss": 1.0288, "step": 15023 }, { - "epoch": 0.42574173255121994, + "epoch": 0.5878394240550904, "grad_norm": 0.0, - "learning_rate": 1.2850931581875723e-05, - "loss": 0.9872, + "learning_rate": 7.66286637726468e-06, + "loss": 1.0319, "step": 15024 }, { - "epoch": 0.4257700699934824, + "epoch": 0.5878785507473199, "grad_norm": 0.0, - "learning_rate": 1.2850051869031169e-05, - "loss": 0.9059, + "learning_rate": 7.661634260200767e-06, + "loss": 1.0372, "step": 15025 }, { - "epoch": 0.4257984074357449, + "epoch": 0.5879176774395493, "grad_norm": 0.0, - "learning_rate": 1.284917213217957e-05, - "loss": 0.846, + "learning_rate": 7.660402180688076e-06, + "loss": 1.0788, "step": 15026 }, { - "epoch": 0.4258267448780073, + "epoch": 0.5879568041317788, "grad_norm": 0.0, - "learning_rate": 1.284829237132834e-05, - "loss": 0.7744, + "learning_rate": 7.659170138746398e-06, + "loss": 1.1038, "step": 15027 }, { - "epoch": 0.42585508232026975, + "epoch": 0.5879959308240081, "grad_norm": 0.0, - "learning_rate": 1.2847412586484884e-05, - "loss": 0.8729, + "learning_rate": 7.65793813439551e-06, + "loss": 1.0429, "step": 15028 }, { - "epoch": 0.42588341976253224, + "epoch": 0.5880350575162375, "grad_norm": 0.0, - "learning_rate": 1.2846532777656613e-05, - "loss": 0.9197, + "learning_rate": 7.656706167655205e-06, + "loss": 0.9806, "step": 15029 }, { - "epoch": 0.4259117572047947, + "epoch": 0.588074184208467, "grad_norm": 0.0, - "learning_rate": 1.2845652944850941e-05, - "loss": 0.806, + "learning_rate": 7.655474238545261e-06, + "loss": 0.8874, "step": 15030 }, { - "epoch": 0.4259400946470572, + "epoch": 0.5881133109006964, "grad_norm": 0.0, - "learning_rate": 1.284477308807528e-05, - "loss": 0.9603, + "learning_rate": 7.654242347085462e-06, + "loss": 1.0573, "step": 15031 }, { - "epoch": 0.4259684320893196, + "epoch": 0.5881524375929259, "grad_norm": 0.0, - "learning_rate": 1.2843893207337033e-05, - "loss": 0.921, + "learning_rate": 7.653010493295592e-06, + "loss": 1.1005, "step": 15032 }, { - "epoch": 0.42599676953158205, + "epoch": 0.5881915642851553, "grad_norm": 0.0, - "learning_rate": 1.284301330264362e-05, - "loss": 1.0023, + "learning_rate": 7.651778677195433e-06, + "loss": 1.0077, "step": 15033 }, { - "epoch": 0.42602510697384455, + "epoch": 0.5882306909773848, "grad_norm": 0.0, - "learning_rate": 1.2842133374002452e-05, - "loss": 0.9028, + "learning_rate": 7.650546898804766e-06, + "loss": 0.921, "step": 15034 }, { - "epoch": 0.426053444416107, + "epoch": 0.5882698176696142, "grad_norm": 0.0, - "learning_rate": 1.2841253421420938e-05, - "loss": 0.9351, + "learning_rate": 7.649315158143373e-06, + "loss": 1.0207, "step": 15035 }, { - "epoch": 0.4260817818583695, + "epoch": 0.5883089443618437, "grad_norm": 0.0, - "learning_rate": 1.2840373444906493e-05, - "loss": 0.9067, + "learning_rate": 7.648083455231033e-06, + "loss": 0.9543, "step": 15036 }, { - "epoch": 0.4261101193006319, + "epoch": 0.588348071054073, "grad_norm": 0.0, - "learning_rate": 1.2839493444466525e-05, - "loss": 0.9706, + "learning_rate": 7.646851790087519e-06, + "loss": 0.9447, "step": 15037 }, { - "epoch": 0.4261384567428944, + "epoch": 0.5883871977463025, "grad_norm": 0.0, - "learning_rate": 1.283861342010845e-05, - "loss": 0.845, + "learning_rate": 7.645620162732624e-06, + "loss": 1.1102, "step": 15038 }, { - "epoch": 0.42616679418515685, + "epoch": 0.5884263244385319, "grad_norm": 0.0, - "learning_rate": 1.283773337183968e-05, - "loss": 1.0184, + "learning_rate": 7.644388573186116e-06, + "loss": 1.0356, "step": 15039 }, { - "epoch": 0.4261951316274193, + "epoch": 0.5884654511307614, "grad_norm": 0.0, - "learning_rate": 1.2836853299667628e-05, - "loss": 0.8751, + "learning_rate": 7.643157021467778e-06, + "loss": 1.0572, "step": 15040 }, { - "epoch": 0.4262234690696818, + "epoch": 0.5885045778229908, "grad_norm": 0.0, - "learning_rate": 1.2835973203599707e-05, - "loss": 0.9962, + "learning_rate": 7.641925507597381e-06, + "loss": 1.0853, "step": 15041 }, { - "epoch": 0.4262518065119442, + "epoch": 0.5885437045152203, "grad_norm": 0.0, - "learning_rate": 1.2835093083643332e-05, - "loss": 0.8703, + "learning_rate": 7.640694031594708e-06, + "loss": 0.9858, "step": 15042 }, { - "epoch": 0.4262801439542067, + "epoch": 0.5885828312074497, "grad_norm": 0.0, - "learning_rate": 1.2834212939805917e-05, - "loss": 0.9417, + "learning_rate": 7.639462593479533e-06, + "loss": 0.9847, "step": 15043 }, { - "epoch": 0.42630848139646915, + "epoch": 0.5886219578996792, "grad_norm": 0.0, - "learning_rate": 1.283333277209487e-05, - "loss": 0.9944, + "learning_rate": 7.638231193271632e-06, + "loss": 1.0113, "step": 15044 }, { - "epoch": 0.4263368188387316, + "epoch": 0.5886610845919086, "grad_norm": 0.0, - "learning_rate": 1.283245258051761e-05, - "loss": 0.8687, + "learning_rate": 7.636999830990777e-06, + "loss": 0.982, "step": 15045 }, { - "epoch": 0.4263651562809941, + "epoch": 0.5887002112841381, "grad_norm": 0.0, - "learning_rate": 1.2831572365081549e-05, - "loss": 0.9171, + "learning_rate": 7.63576850665674e-06, + "loss": 1.0435, "step": 15046 }, { - "epoch": 0.4263934937232565, + "epoch": 0.5887393379763675, "grad_norm": 0.0, - "learning_rate": 1.2830692125794104e-05, - "loss": 1.0519, + "learning_rate": 7.634537220289303e-06, + "loss": 1.0305, "step": 15047 }, { - "epoch": 0.426421831165519, + "epoch": 0.588778464668597, "grad_norm": 0.0, - "learning_rate": 1.282981186266269e-05, - "loss": 0.9073, + "learning_rate": 7.633305971908234e-06, + "loss": 1.0338, "step": 15048 }, { - "epoch": 0.42645016860778145, + "epoch": 0.5888175913608263, "grad_norm": 0.0, - "learning_rate": 1.2828931575694718e-05, - "loss": 0.9661, + "learning_rate": 7.632074761533304e-06, + "loss": 1.0348, "step": 15049 }, { - "epoch": 0.42647850605004395, + "epoch": 0.5888567180530558, "grad_norm": 0.0, - "learning_rate": 1.2828051264897604e-05, - "loss": 0.9336, + "learning_rate": 7.630843589184284e-06, + "loss": 0.9609, "step": 15050 }, { - "epoch": 0.4265068434923064, + "epoch": 0.5888958447452852, "grad_norm": 0.0, - "learning_rate": 1.2827170930278765e-05, - "loss": 0.97, + "learning_rate": 7.62961245488095e-06, + "loss": 0.9808, "step": 15051 }, { - "epoch": 0.4265351809345688, + "epoch": 0.5889349714375147, "grad_norm": 0.0, - "learning_rate": 1.2826290571845614e-05, - "loss": 0.8047, + "learning_rate": 7.628381358643071e-06, + "loss": 1.1631, "step": 15052 }, { - "epoch": 0.4265635183768313, + "epoch": 0.5889740981297441, "grad_norm": 0.0, - "learning_rate": 1.2825410189605569e-05, - "loss": 0.8402, + "learning_rate": 7.6271503004904144e-06, + "loss": 0.955, "step": 15053 }, { - "epoch": 0.42659185581909376, + "epoch": 0.5890132248219736, "grad_norm": 0.0, - "learning_rate": 1.2824529783566044e-05, - "loss": 0.9097, + "learning_rate": 7.625919280442751e-06, + "loss": 1.1194, "step": 15054 }, { - "epoch": 0.42662019326135625, + "epoch": 0.589052351514203, "grad_norm": 0.0, - "learning_rate": 1.2823649353734458e-05, - "loss": 0.8985, + "learning_rate": 7.624688298519844e-06, + "loss": 0.9555, "step": 15055 }, { - "epoch": 0.4266485307036187, + "epoch": 0.5890914782064324, "grad_norm": 0.0, - "learning_rate": 1.2822768900118226e-05, - "loss": 1.0072, + "learning_rate": 7.623457354741472e-06, + "loss": 1.1046, "step": 15056 }, { - "epoch": 0.4266768681458811, + "epoch": 0.5891306048986619, "grad_norm": 0.0, - "learning_rate": 1.282188842272476e-05, - "loss": 0.9352, + "learning_rate": 7.6222264491273965e-06, + "loss": 1.0042, "step": 15057 }, { - "epoch": 0.4267052055881436, + "epoch": 0.5891697315908913, "grad_norm": 0.0, - "learning_rate": 1.2821007921561481e-05, - "loss": 0.9535, + "learning_rate": 7.6209955816973854e-06, + "loss": 1.0311, "step": 15058 }, { - "epoch": 0.42673354303040606, + "epoch": 0.5892088582831208, "grad_norm": 0.0, - "learning_rate": 1.2820127396635802e-05, - "loss": 1.0181, + "learning_rate": 7.6197647524712e-06, + "loss": 1.0815, "step": 15059 }, { - "epoch": 0.42676188047266855, + "epoch": 0.5892479849753501, "grad_norm": 0.0, - "learning_rate": 1.2819246847955148e-05, - "loss": 1.0278, + "learning_rate": 7.618533961468612e-06, + "loss": 1.0426, "step": 15060 }, { - "epoch": 0.426790217914931, + "epoch": 0.5892871116675796, "grad_norm": 0.0, - "learning_rate": 1.2818366275526927e-05, - "loss": 0.9217, + "learning_rate": 7.617303208709388e-06, + "loss": 1.0604, "step": 15061 }, { - "epoch": 0.4268185553571935, + "epoch": 0.589326238359809, "grad_norm": 0.0, - "learning_rate": 1.2817485679358562e-05, - "loss": 0.9263, + "learning_rate": 7.616072494213286e-06, + "loss": 1.0126, "step": 15062 }, { - "epoch": 0.4268468927994559, + "epoch": 0.5893653650520385, "grad_norm": 0.0, - "learning_rate": 1.2816605059457468e-05, - "loss": 0.8556, + "learning_rate": 7.614841818000071e-06, + "loss": 1.0109, "step": 15063 }, { - "epoch": 0.42687523024171836, + "epoch": 0.5894044917442679, "grad_norm": 0.0, - "learning_rate": 1.2815724415831065e-05, - "loss": 0.8426, + "learning_rate": 7.613611180089508e-06, + "loss": 1.1554, "step": 15064 }, { - "epoch": 0.42690356768398086, + "epoch": 0.5894436184364974, "grad_norm": 0.0, - "learning_rate": 1.2814843748486767e-05, - "loss": 0.9094, + "learning_rate": 7.612380580501362e-06, + "loss": 1.2014, "step": 15065 }, { - "epoch": 0.4269319051262433, + "epoch": 0.5894827451287268, "grad_norm": 0.0, - "learning_rate": 1.2813963057431995e-05, - "loss": 0.8773, + "learning_rate": 7.611150019255391e-06, + "loss": 0.8036, "step": 15066 }, { - "epoch": 0.4269602425685058, + "epoch": 0.5895218718209563, "grad_norm": 0.0, - "learning_rate": 1.281308234267417e-05, - "loss": 0.979, + "learning_rate": 7.609919496371357e-06, + "loss": 1.0198, "step": 15067 }, { - "epoch": 0.4269885800107682, + "epoch": 0.5895609985131857, "grad_norm": 0.0, - "learning_rate": 1.2812201604220706e-05, - "loss": 0.9813, + "learning_rate": 7.608689011869019e-06, + "loss": 1.0695, "step": 15068 }, { - "epoch": 0.42701691745303066, + "epoch": 0.5896001252054152, "grad_norm": 0.0, - "learning_rate": 1.2811320842079026e-05, - "loss": 1.0477, + "learning_rate": 7.607458565768142e-06, + "loss": 0.9174, "step": 15069 }, { - "epoch": 0.42704525489529316, + "epoch": 0.5896392518976445, "grad_norm": 0.0, - "learning_rate": 1.2810440056256543e-05, - "loss": 0.8624, + "learning_rate": 7.60622815808848e-06, + "loss": 0.9894, "step": 15070 }, { - "epoch": 0.4270735923375556, + "epoch": 0.589678378589874, "grad_norm": 0.0, - "learning_rate": 1.2809559246760684e-05, - "loss": 1.0059, + "learning_rate": 7.604997788849795e-06, + "loss": 0.8874, "step": 15071 }, { - "epoch": 0.4271019297798181, + "epoch": 0.5897175052821034, "grad_norm": 0.0, - "learning_rate": 1.2808678413598861e-05, - "loss": 0.9836, + "learning_rate": 7.603767458071843e-06, + "loss": 1.1353, "step": 15072 }, { - "epoch": 0.42713026722208053, + "epoch": 0.5897566319743329, "grad_norm": 0.0, - "learning_rate": 1.2807797556778497e-05, - "loss": 0.9385, + "learning_rate": 7.602537165774386e-06, + "loss": 1.1398, "step": 15073 }, { - "epoch": 0.427158604664343, + "epoch": 0.5897957586665623, "grad_norm": 0.0, - "learning_rate": 1.2806916676307012e-05, - "loss": 0.9844, + "learning_rate": 7.601306911977178e-06, + "loss": 0.9827, "step": 15074 }, { - "epoch": 0.42718694210660546, + "epoch": 0.5898348853587918, "grad_norm": 0.0, - "learning_rate": 1.2806035772191825e-05, - "loss": 0.901, + "learning_rate": 7.600076696699974e-06, + "loss": 1.0462, "step": 15075 }, { - "epoch": 0.4272152795488679, + "epoch": 0.5898740120510212, "grad_norm": 0.0, - "learning_rate": 1.2805154844440359e-05, - "loss": 0.9718, + "learning_rate": 7.598846519962529e-06, + "loss": 0.9607, "step": 15076 }, { - "epoch": 0.4272436169911304, + "epoch": 0.5899131387432507, "grad_norm": 0.0, - "learning_rate": 1.2804273893060028e-05, - "loss": 0.86, + "learning_rate": 7.597616381784601e-06, + "loss": 1.0627, "step": 15077 }, { - "epoch": 0.42727195443339283, + "epoch": 0.5899522654354801, "grad_norm": 0.0, - "learning_rate": 1.2803392918058259e-05, - "loss": 0.9723, + "learning_rate": 7.5963862821859456e-06, + "loss": 1.007, "step": 15078 }, { - "epoch": 0.4273002918756553, + "epoch": 0.5899913921277096, "grad_norm": 0.0, - "learning_rate": 1.280251191944247e-05, - "loss": 0.9387, + "learning_rate": 7.595156221186314e-06, + "loss": 0.8423, "step": 15079 }, { - "epoch": 0.42732862931791776, + "epoch": 0.590030518819939, "grad_norm": 0.0, - "learning_rate": 1.2801630897220083e-05, - "loss": 0.9339, + "learning_rate": 7.59392619880546e-06, + "loss": 1.0223, "step": 15080 }, { - "epoch": 0.4273569667601802, + "epoch": 0.5900696455121685, "grad_norm": 0.0, - "learning_rate": 1.280074985139852e-05, - "loss": 0.9016, + "learning_rate": 7.5926962150631324e-06, + "loss": 0.9266, "step": 15081 }, { - "epoch": 0.4273853042024427, + "epoch": 0.5901087722043978, "grad_norm": 0.0, - "learning_rate": 1.2799868781985201e-05, - "loss": 0.9362, + "learning_rate": 7.591466269979091e-06, + "loss": 1.1006, "step": 15082 }, { - "epoch": 0.42741364164470513, + "epoch": 0.5901478988966273, "grad_norm": 0.0, - "learning_rate": 1.2798987688987543e-05, - "loss": 0.9308, + "learning_rate": 7.5902363635730835e-06, + "loss": 0.9928, "step": 15083 }, { - "epoch": 0.4274419790869676, + "epoch": 0.5901870255888567, "grad_norm": 0.0, - "learning_rate": 1.2798106572412973e-05, - "loss": 0.8358, + "learning_rate": 7.589006495864861e-06, + "loss": 0.978, "step": 15084 }, { - "epoch": 0.42747031652923007, + "epoch": 0.5902261522810861, "grad_norm": 0.0, - "learning_rate": 1.2797225432268916e-05, - "loss": 0.8898, + "learning_rate": 7.587776666874171e-06, + "loss": 0.9857, "step": 15085 }, { - "epoch": 0.42749865397149256, + "epoch": 0.5902652789733156, "grad_norm": 0.0, - "learning_rate": 1.279634426856279e-05, - "loss": 0.8964, + "learning_rate": 7.586546876620763e-06, + "loss": 1.1048, "step": 15086 }, { - "epoch": 0.427526991413755, + "epoch": 0.590304405665545, "grad_norm": 0.0, - "learning_rate": 1.2795463081302017e-05, - "loss": 0.9532, + "learning_rate": 7.585317125124392e-06, + "loss": 0.9101, "step": 15087 }, { - "epoch": 0.42755532885601744, + "epoch": 0.5903435323577745, "grad_norm": 0.0, - "learning_rate": 1.2794581870494021e-05, - "loss": 0.8124, + "learning_rate": 7.584087412404802e-06, + "loss": 0.9908, "step": 15088 }, { - "epoch": 0.42758366629827993, + "epoch": 0.5903826590500039, "grad_norm": 0.0, - "learning_rate": 1.2793700636146222e-05, - "loss": 0.9048, + "learning_rate": 7.5828577384817395e-06, + "loss": 1.0252, "step": 15089 }, { - "epoch": 0.42761200374054237, + "epoch": 0.5904217857422334, "grad_norm": 0.0, - "learning_rate": 1.2792819378266047e-05, - "loss": 0.9429, + "learning_rate": 7.58162810337495e-06, + "loss": 1.0608, "step": 15090 }, { - "epoch": 0.42764034118280486, + "epoch": 0.5904609124344627, "grad_norm": 0.0, - "learning_rate": 1.2791938096860914e-05, - "loss": 0.9685, + "learning_rate": 7.580398507104186e-06, + "loss": 0.9947, "step": 15091 }, { - "epoch": 0.4276686786250673, + "epoch": 0.5905000391266922, "grad_norm": 0.0, - "learning_rate": 1.2791056791938255e-05, - "loss": 0.9178, + "learning_rate": 7.579168949689191e-06, + "loss": 0.9401, "step": 15092 }, { - "epoch": 0.42769701606732974, + "epoch": 0.5905391658189216, "grad_norm": 0.0, - "learning_rate": 1.2790175463505485e-05, - "loss": 0.9344, + "learning_rate": 7.577939431149709e-06, + "loss": 1.1121, "step": 15093 }, { - "epoch": 0.42772535350959223, + "epoch": 0.5905782925111511, "grad_norm": 0.0, - "learning_rate": 1.2789294111570035e-05, - "loss": 0.8793, + "learning_rate": 7.576709951505484e-06, + "loss": 0.9522, "step": 15094 }, { - "epoch": 0.42775369095185467, + "epoch": 0.5906174192033805, "grad_norm": 0.0, - "learning_rate": 1.278841273613932e-05, - "loss": 0.8997, + "learning_rate": 7.575480510776256e-06, + "loss": 1.0207, "step": 15095 }, { - "epoch": 0.42778202839411716, + "epoch": 0.59065654589561, "grad_norm": 0.0, - "learning_rate": 1.2787531337220771e-05, - "loss": 0.8721, + "learning_rate": 7.5742511089817795e-06, + "loss": 1.0263, "step": 15096 }, { - "epoch": 0.4278103658363796, + "epoch": 0.5906956725878394, "grad_norm": 0.0, - "learning_rate": 1.2786649914821807e-05, - "loss": 0.905, + "learning_rate": 7.573021746141789e-06, + "loss": 0.9495, "step": 15097 }, { - "epoch": 0.4278387032786421, + "epoch": 0.5907347992800689, "grad_norm": 0.0, - "learning_rate": 1.278576846894986e-05, - "loss": 0.8111, + "learning_rate": 7.571792422276028e-06, + "loss": 0.9988, "step": 15098 }, { - "epoch": 0.42786704072090453, + "epoch": 0.5907739259722983, "grad_norm": 0.0, - "learning_rate": 1.278488699961235e-05, - "loss": 0.9303, + "learning_rate": 7.570563137404234e-06, + "loss": 1.0814, "step": 15099 }, { - "epoch": 0.427895378163167, + "epoch": 0.5908130526645278, "grad_norm": 0.0, - "learning_rate": 1.2784005506816701e-05, - "loss": 0.9128, + "learning_rate": 7.569333891546156e-06, + "loss": 0.9705, "step": 15100 }, { - "epoch": 0.42792371560542947, + "epoch": 0.5908521793567572, "grad_norm": 0.0, - "learning_rate": 1.2783123990570343e-05, - "loss": 0.9796, + "learning_rate": 7.568104684721529e-06, + "loss": 0.9499, "step": 15101 }, { - "epoch": 0.4279520530476919, + "epoch": 0.5908913060489867, "grad_norm": 0.0, - "learning_rate": 1.2782242450880697e-05, - "loss": 0.8833, + "learning_rate": 7.566875516950095e-06, + "loss": 0.9882, "step": 15102 }, { - "epoch": 0.4279803904899544, + "epoch": 0.590930432741216, "grad_norm": 0.0, - "learning_rate": 1.2781360887755188e-05, - "loss": 0.9817, + "learning_rate": 7.565646388251591e-06, + "loss": 0.9771, "step": 15103 }, { - "epoch": 0.42800872793221684, + "epoch": 0.5909695594334455, "grad_norm": 0.0, - "learning_rate": 1.2780479301201243e-05, - "loss": 0.9124, + "learning_rate": 7.56441729864575e-06, + "loss": 0.9786, "step": 15104 }, { - "epoch": 0.4280370653744793, + "epoch": 0.5910086861256749, "grad_norm": 0.0, - "learning_rate": 1.277959769122629e-05, - "loss": 0.8594, + "learning_rate": 7.5631882481523215e-06, + "loss": 1.0377, "step": 15105 }, { - "epoch": 0.42806540281674177, + "epoch": 0.5910478128179044, "grad_norm": 0.0, - "learning_rate": 1.2778716057837755e-05, - "loss": 0.884, + "learning_rate": 7.5619592367910345e-06, + "loss": 1.0407, "step": 15106 }, { - "epoch": 0.4280937402590042, + "epoch": 0.5910869395101338, "grad_norm": 0.0, - "learning_rate": 1.2777834401043061e-05, - "loss": 0.8769, + "learning_rate": 7.560730264581629e-06, + "loss": 0.9973, "step": 15107 }, { - "epoch": 0.4281220777012667, + "epoch": 0.5911260662023633, "grad_norm": 0.0, - "learning_rate": 1.2776952720849636e-05, - "loss": 0.9793, + "learning_rate": 7.559501331543835e-06, + "loss": 0.9309, "step": 15108 }, { - "epoch": 0.42815041514352914, + "epoch": 0.5911651928945927, "grad_norm": 0.0, - "learning_rate": 1.2776071017264908e-05, - "loss": 1.0332, + "learning_rate": 7.558272437697392e-06, + "loss": 0.9395, "step": 15109 }, { - "epoch": 0.42817875258579163, + "epoch": 0.5912043195868222, "grad_norm": 0.0, - "learning_rate": 1.27751892902963e-05, - "loss": 0.802, + "learning_rate": 7.557043583062036e-06, + "loss": 1.037, "step": 15110 }, { - "epoch": 0.42820709002805407, + "epoch": 0.5912434462790516, "grad_norm": 0.0, - "learning_rate": 1.2774307539951245e-05, - "loss": 1.01, + "learning_rate": 7.555814767657499e-06, + "loss": 0.882, "step": 15111 }, { - "epoch": 0.4282354274703165, + "epoch": 0.5912825729712811, "grad_norm": 0.0, - "learning_rate": 1.2773425766237167e-05, - "loss": 0.9665, + "learning_rate": 7.554585991503514e-06, + "loss": 1.1528, "step": 15112 }, { - "epoch": 0.428263764912579, + "epoch": 0.5913216996635104, "grad_norm": 0.0, - "learning_rate": 1.2772543969161493e-05, - "loss": 0.8732, + "learning_rate": 7.553357254619811e-06, + "loss": 0.9468, "step": 15113 }, { - "epoch": 0.42829210235484144, + "epoch": 0.5913608263557398, "grad_norm": 0.0, - "learning_rate": 1.2771662148731653e-05, - "loss": 0.9554, + "learning_rate": 7.552128557026129e-06, + "loss": 0.9975, "step": 15114 }, { - "epoch": 0.42832043979710394, + "epoch": 0.5913999530479693, "grad_norm": 0.0, - "learning_rate": 1.2770780304955075e-05, - "loss": 0.9447, + "learning_rate": 7.55089989874219e-06, + "loss": 1.1814, "step": 15115 }, { - "epoch": 0.4283487772393664, + "epoch": 0.5914390797401987, "grad_norm": 0.0, - "learning_rate": 1.2769898437839181e-05, - "loss": 0.9251, + "learning_rate": 7.5496712797877334e-06, + "loss": 1.0488, "step": 15116 }, { - "epoch": 0.4283771146816288, + "epoch": 0.5914782064324282, "grad_norm": 0.0, - "learning_rate": 1.2769016547391405e-05, - "loss": 0.9929, + "learning_rate": 7.548442700182483e-06, + "loss": 1.0252, "step": 15117 }, { - "epoch": 0.4284054521238913, + "epoch": 0.5915173331246576, "grad_norm": 0.0, - "learning_rate": 1.2768134633619176e-05, - "loss": 1.0217, + "learning_rate": 7.547214159946174e-06, + "loss": 1.0175, "step": 15118 }, { - "epoch": 0.42843378956615374, + "epoch": 0.5915564598168871, "grad_norm": 0.0, - "learning_rate": 1.2767252696529922e-05, - "loss": 0.983, + "learning_rate": 7.545985659098531e-06, + "loss": 0.9728, "step": 15119 }, { - "epoch": 0.42846212700841624, + "epoch": 0.5915955865091165, "grad_norm": 0.0, - "learning_rate": 1.2766370736131069e-05, - "loss": 0.9884, + "learning_rate": 7.544757197659284e-06, + "loss": 1.0792, "step": 15120 }, { - "epoch": 0.4284904644506787, + "epoch": 0.591634713201346, "grad_norm": 0.0, - "learning_rate": 1.2765488752430049e-05, - "loss": 0.8562, + "learning_rate": 7.5435287756481544e-06, + "loss": 1.0389, "step": 15121 }, { - "epoch": 0.42851880189294117, + "epoch": 0.5916738398935754, "grad_norm": 0.0, - "learning_rate": 1.2764606745434289e-05, - "loss": 0.8706, + "learning_rate": 7.54230039308488e-06, + "loss": 0.986, "step": 15122 }, { - "epoch": 0.4285471393352036, + "epoch": 0.5917129665858049, "grad_norm": 0.0, - "learning_rate": 1.276372471515122e-05, - "loss": 0.948, + "learning_rate": 7.5410720499891806e-06, + "loss": 1.067, "step": 15123 }, { - "epoch": 0.42857547677746605, + "epoch": 0.5917520932780342, "grad_norm": 0.0, - "learning_rate": 1.276284266158827e-05, - "loss": 0.9292, + "learning_rate": 7.539843746380784e-06, + "loss": 1.018, "step": 15124 }, { - "epoch": 0.42860381421972854, + "epoch": 0.5917912199702637, "grad_norm": 0.0, - "learning_rate": 1.2761960584752874e-05, - "loss": 0.9285, + "learning_rate": 7.5386154822794135e-06, + "loss": 0.9583, "step": 15125 }, { - "epoch": 0.428632151661991, + "epoch": 0.5918303466624931, "grad_norm": 0.0, - "learning_rate": 1.2761078484652458e-05, - "loss": 0.9298, + "learning_rate": 7.537387257704789e-06, + "loss": 0.9913, "step": 15126 }, { - "epoch": 0.4286604891042535, + "epoch": 0.5918694733547226, "grad_norm": 0.0, - "learning_rate": 1.2760196361294452e-05, - "loss": 0.8524, + "learning_rate": 7.536159072676645e-06, + "loss": 0.8984, "step": 15127 }, { - "epoch": 0.4286888265465159, + "epoch": 0.591908600046952, "grad_norm": 0.0, - "learning_rate": 1.2759314214686284e-05, - "loss": 0.8262, + "learning_rate": 7.534930927214699e-06, + "loss": 0.9017, "step": 15128 }, { - "epoch": 0.42871716398877835, + "epoch": 0.5919477267391815, "grad_norm": 0.0, - "learning_rate": 1.275843204483539e-05, - "loss": 0.8167, + "learning_rate": 7.533702821338672e-06, + "loss": 0.9379, "step": 15129 }, { - "epoch": 0.42874550143104084, + "epoch": 0.5919868534314109, "grad_norm": 0.0, - "learning_rate": 1.27575498517492e-05, - "loss": 0.9428, + "learning_rate": 7.532474755068284e-06, + "loss": 0.9358, "step": 15130 }, { - "epoch": 0.4287738388733033, + "epoch": 0.5920259801236404, "grad_norm": 0.0, - "learning_rate": 1.2756667635435143e-05, - "loss": 0.9072, + "learning_rate": 7.531246728423264e-06, + "loss": 1.1133, "step": 15131 }, { - "epoch": 0.4288021763155658, + "epoch": 0.5920651068158698, "grad_norm": 0.0, - "learning_rate": 1.2755785395900651e-05, - "loss": 1.0307, + "learning_rate": 7.530018741423328e-06, + "loss": 1.1135, "step": 15132 }, { - "epoch": 0.4288305137578282, + "epoch": 0.5921042335080993, "grad_norm": 0.0, - "learning_rate": 1.2754903133153154e-05, - "loss": 0.9587, + "learning_rate": 7.528790794088194e-06, + "loss": 1.0091, "step": 15133 }, { - "epoch": 0.4288588512000907, + "epoch": 0.5921433602003287, "grad_norm": 0.0, - "learning_rate": 1.2754020847200085e-05, - "loss": 1.009, + "learning_rate": 7.527562886437585e-06, + "loss": 0.9333, "step": 15134 }, { - "epoch": 0.42888718864235315, + "epoch": 0.5921824868925581, "grad_norm": 0.0, - "learning_rate": 1.2753138538048878e-05, - "loss": 1.022, + "learning_rate": 7.526335018491213e-06, + "loss": 1.0569, "step": 15135 }, { - "epoch": 0.4289155260846156, + "epoch": 0.5922216135847875, "grad_norm": 0.0, - "learning_rate": 1.2752256205706958e-05, - "loss": 0.8698, + "learning_rate": 7.525107190268805e-06, + "loss": 1.0542, "step": 15136 }, { - "epoch": 0.4289438635268781, + "epoch": 0.592260740277017, "grad_norm": 0.0, - "learning_rate": 1.2751373850181766e-05, - "loss": 0.9065, + "learning_rate": 7.523879401790076e-06, + "loss": 1.1255, "step": 15137 }, { - "epoch": 0.4289722009691405, + "epoch": 0.5922998669692464, "grad_norm": 0.0, - "learning_rate": 1.2750491471480729e-05, - "loss": 0.9385, + "learning_rate": 7.5226516530747395e-06, + "loss": 1.1525, "step": 15138 }, { - "epoch": 0.429000538411403, + "epoch": 0.5923389936614759, "grad_norm": 0.0, - "learning_rate": 1.2749609069611282e-05, - "loss": 0.9365, + "learning_rate": 7.52142394414251e-06, + "loss": 1.1224, "step": 15139 }, { - "epoch": 0.42902887585366545, + "epoch": 0.5923781203537053, "grad_norm": 0.0, - "learning_rate": 1.2748726644580856e-05, - "loss": 0.909, + "learning_rate": 7.520196275013109e-06, + "loss": 1.0424, "step": 15140 }, { - "epoch": 0.4290572132959279, + "epoch": 0.5924172470459348, "grad_norm": 0.0, - "learning_rate": 1.2747844196396883e-05, - "loss": 0.7948, + "learning_rate": 7.518968645706249e-06, + "loss": 1.0531, "step": 15141 }, { - "epoch": 0.4290855507381904, + "epoch": 0.5924563737381642, "grad_norm": 0.0, - "learning_rate": 1.27469617250668e-05, - "loss": 0.9081, + "learning_rate": 7.517741056241644e-06, + "loss": 0.955, "step": 15142 }, { - "epoch": 0.4291138881804528, + "epoch": 0.5924955004303936, "grad_norm": 0.0, - "learning_rate": 1.2746079230598036e-05, - "loss": 0.8507, + "learning_rate": 7.516513506639007e-06, + "loss": 1.0042, "step": 15143 }, { - "epoch": 0.4291422256227153, + "epoch": 0.5925346271226231, "grad_norm": 0.0, - "learning_rate": 1.2745196712998032e-05, - "loss": 0.8611, + "learning_rate": 7.515285996918047e-06, + "loss": 1.0518, "step": 15144 }, { - "epoch": 0.42917056306497775, + "epoch": 0.5925737538148524, "grad_norm": 0.0, - "learning_rate": 1.274431417227421e-05, - "loss": 0.8632, + "learning_rate": 7.514058527098484e-06, + "loss": 1.0494, "step": 15145 }, { - "epoch": 0.42919890050724024, + "epoch": 0.5926128805070819, "grad_norm": 0.0, - "learning_rate": 1.2743431608434016e-05, - "loss": 0.9424, + "learning_rate": 7.512831097200026e-06, + "loss": 1.1577, "step": 15146 }, { - "epoch": 0.4292272379495027, + "epoch": 0.5926520071993113, "grad_norm": 0.0, - "learning_rate": 1.2742549021484878e-05, - "loss": 0.7851, + "learning_rate": 7.5116037072423855e-06, + "loss": 0.9191, "step": 15147 }, { - "epoch": 0.4292555753917651, + "epoch": 0.5926911338915408, "grad_norm": 0.0, - "learning_rate": 1.274166641143423e-05, - "loss": 0.9836, + "learning_rate": 7.510376357245266e-06, + "loss": 1.0131, "step": 15148 }, { - "epoch": 0.4292839128340276, + "epoch": 0.5927302605837702, "grad_norm": 0.0, - "learning_rate": 1.2740783778289507e-05, - "loss": 0.8689, + "learning_rate": 7.509149047228387e-06, + "loss": 0.9728, "step": 15149 }, { - "epoch": 0.42931225027629005, + "epoch": 0.5927693872759997, "grad_norm": 0.0, - "learning_rate": 1.2739901122058145e-05, - "loss": 0.8784, + "learning_rate": 7.507921777211452e-06, + "loss": 1.0034, "step": 15150 }, { - "epoch": 0.42934058771855255, + "epoch": 0.5928085139682291, "grad_norm": 0.0, - "learning_rate": 1.273901844274758e-05, - "loss": 0.9137, + "learning_rate": 7.506694547214172e-06, + "loss": 1.0879, "step": 15151 }, { - "epoch": 0.429368925160815, + "epoch": 0.5928476406604586, "grad_norm": 0.0, - "learning_rate": 1.2738135740365243e-05, - "loss": 0.9659, + "learning_rate": 7.505467357256252e-06, + "loss": 0.9851, "step": 15152 }, { - "epoch": 0.4293972626030774, + "epoch": 0.592886767352688, "grad_norm": 0.0, - "learning_rate": 1.2737253014918573e-05, - "loss": 0.9537, + "learning_rate": 7.5042402073573995e-06, + "loss": 0.951, "step": 15153 }, { - "epoch": 0.4294256000453399, + "epoch": 0.5929258940449175, "grad_norm": 0.0, - "learning_rate": 1.2736370266415006e-05, - "loss": 0.9402, + "learning_rate": 7.503013097537322e-06, + "loss": 1.0856, "step": 15154 }, { - "epoch": 0.42945393748760236, + "epoch": 0.5929650207371469, "grad_norm": 0.0, - "learning_rate": 1.2735487494861975e-05, - "loss": 0.9139, + "learning_rate": 7.501786027815726e-06, + "loss": 0.9867, "step": 15155 }, { - "epoch": 0.42948227492986485, + "epoch": 0.5930041474293763, "grad_norm": 0.0, - "learning_rate": 1.2734604700266914e-05, - "loss": 0.9717, + "learning_rate": 7.500558998212318e-06, + "loss": 0.9962, "step": 15156 }, { - "epoch": 0.4295106123721273, + "epoch": 0.5930432741216057, "grad_norm": 0.0, - "learning_rate": 1.2733721882637265e-05, - "loss": 1.0779, + "learning_rate": 7.4993320087467955e-06, + "loss": 1.0944, "step": 15157 }, { - "epoch": 0.4295389498143897, + "epoch": 0.5930824008138352, "grad_norm": 0.0, - "learning_rate": 1.2732839041980463e-05, - "loss": 0.9425, + "learning_rate": 7.4981050594388716e-06, + "loss": 1.0618, "step": 15158 }, { - "epoch": 0.4295672872566522, + "epoch": 0.5931215275060646, "grad_norm": 0.0, - "learning_rate": 1.2731956178303941e-05, - "loss": 0.8367, + "learning_rate": 7.4968781503082445e-06, + "loss": 1.0229, "step": 15159 }, { - "epoch": 0.42959562469891466, + "epoch": 0.5931606541982941, "grad_norm": 0.0, - "learning_rate": 1.273107329161514e-05, - "loss": 0.9494, + "learning_rate": 7.495651281374616e-06, + "loss": 1.0635, "step": 15160 }, { - "epoch": 0.42962396214117715, + "epoch": 0.5931997808905235, "grad_norm": 0.0, - "learning_rate": 1.2730190381921492e-05, - "loss": 0.9234, + "learning_rate": 7.494424452657691e-06, + "loss": 0.8964, "step": 15161 }, { - "epoch": 0.4296522995834396, + "epoch": 0.593238907582753, "grad_norm": 0.0, - "learning_rate": 1.2729307449230435e-05, - "loss": 0.854, + "learning_rate": 7.49319766417717e-06, + "loss": 0.9132, "step": 15162 }, { - "epoch": 0.4296806370257021, + "epoch": 0.5932780342749824, "grad_norm": 0.0, - "learning_rate": 1.2728424493549409e-05, - "loss": 1.0057, + "learning_rate": 7.491970915952753e-06, + "loss": 0.9935, "step": 15163 }, { - "epoch": 0.4297089744679645, + "epoch": 0.5933171609672119, "grad_norm": 0.0, - "learning_rate": 1.2727541514885853e-05, - "loss": 0.7941, + "learning_rate": 7.4907442080041415e-06, + "loss": 0.9718, "step": 15164 }, { - "epoch": 0.42973731191022696, + "epoch": 0.5933562876594413, "grad_norm": 0.0, - "learning_rate": 1.2726658513247202e-05, - "loss": 0.8622, + "learning_rate": 7.489517540351032e-06, + "loss": 0.9717, "step": 15165 }, { - "epoch": 0.42976564935248945, + "epoch": 0.5933954143516708, "grad_norm": 0.0, - "learning_rate": 1.2725775488640887e-05, - "loss": 0.9827, + "learning_rate": 7.488290913013123e-06, + "loss": 0.9404, "step": 15166 }, { - "epoch": 0.4297939867947519, + "epoch": 0.5934345410439001, "grad_norm": 0.0, - "learning_rate": 1.272489244107436e-05, - "loss": 0.9236, + "learning_rate": 7.487064326010118e-06, + "loss": 1.0717, "step": 15167 }, { - "epoch": 0.4298223242370144, + "epoch": 0.5934736677361296, "grad_norm": 0.0, - "learning_rate": 1.2724009370555051e-05, - "loss": 0.9212, + "learning_rate": 7.485837779361712e-06, + "loss": 1.1149, "step": 15168 }, { - "epoch": 0.4298506616792768, + "epoch": 0.593512794428359, "grad_norm": 0.0, - "learning_rate": 1.2723126277090396e-05, - "loss": 1.048, + "learning_rate": 7.484611273087601e-06, + "loss": 1.0531, "step": 15169 }, { - "epoch": 0.42987899912153926, + "epoch": 0.5935519211205884, "grad_norm": 0.0, - "learning_rate": 1.272224316068784e-05, - "loss": 0.8835, + "learning_rate": 7.483384807207479e-06, + "loss": 1.0187, "step": 15170 }, { - "epoch": 0.42990733656380176, + "epoch": 0.5935910478128179, "grad_norm": 0.0, - "learning_rate": 1.2721360021354817e-05, - "loss": 0.89, + "learning_rate": 7.48215838174104e-06, + "loss": 1.0042, "step": 15171 }, { - "epoch": 0.4299356740060642, + "epoch": 0.5936301745050473, "grad_norm": 0.0, - "learning_rate": 1.2720476859098771e-05, - "loss": 0.9912, + "learning_rate": 7.480931996707988e-06, + "loss": 0.9085, "step": 15172 }, { - "epoch": 0.4299640114483267, + "epoch": 0.5936693011972768, "grad_norm": 0.0, - "learning_rate": 1.271959367392714e-05, - "loss": 0.9101, + "learning_rate": 7.479705652128009e-06, + "loss": 1.0447, "step": 15173 }, { - "epoch": 0.4299923488905891, + "epoch": 0.5937084278895062, "grad_norm": 0.0, - "learning_rate": 1.2718710465847355e-05, - "loss": 1.0406, + "learning_rate": 7.4784793480208e-06, + "loss": 1.0001, "step": 15174 }, { - "epoch": 0.4300206863328516, + "epoch": 0.5937475545817357, "grad_norm": 0.0, - "learning_rate": 1.2717827234866867e-05, - "loss": 0.9995, + "learning_rate": 7.47725308440605e-06, + "loss": 1.1386, "step": 15175 }, { - "epoch": 0.43004902377511406, + "epoch": 0.5937866812739651, "grad_norm": 0.0, - "learning_rate": 1.2716943980993108e-05, - "loss": 0.7602, + "learning_rate": 7.476026861303458e-06, + "loss": 1.0282, "step": 15176 }, { - "epoch": 0.4300773612173765, + "epoch": 0.5938258079661946, "grad_norm": 0.0, - "learning_rate": 1.2716060704233523e-05, - "loss": 0.8528, + "learning_rate": 7.474800678732712e-06, + "loss": 0.8961, "step": 15177 }, { - "epoch": 0.430105698659639, + "epoch": 0.5938649346584239, "grad_norm": 0.0, - "learning_rate": 1.2715177404595548e-05, - "loss": 0.872, + "learning_rate": 7.4735745367135014e-06, + "loss": 1.151, "step": 15178 }, { - "epoch": 0.43013403610190143, + "epoch": 0.5939040613506534, "grad_norm": 0.0, - "learning_rate": 1.2714294082086628e-05, - "loss": 0.9291, + "learning_rate": 7.472348435265515e-06, + "loss": 1.0198, "step": 15179 }, { - "epoch": 0.4301623735441639, + "epoch": 0.5939431880428828, "grad_norm": 0.0, - "learning_rate": 1.2713410736714202e-05, - "loss": 0.8966, + "learning_rate": 7.471122374408451e-06, + "loss": 1.0624, "step": 15180 }, { - "epoch": 0.43019071098642636, + "epoch": 0.5939823147351123, "grad_norm": 0.0, - "learning_rate": 1.2712527368485708e-05, - "loss": 0.8771, + "learning_rate": 7.4698963541619895e-06, + "loss": 0.9554, "step": 15181 }, { - "epoch": 0.4302190484286888, + "epoch": 0.5940214414273417, "grad_norm": 0.0, - "learning_rate": 1.2711643977408587e-05, - "loss": 0.8781, + "learning_rate": 7.468670374545826e-06, + "loss": 0.938, "step": 15182 }, { - "epoch": 0.4302473858709513, + "epoch": 0.5940605681195712, "grad_norm": 0.0, - "learning_rate": 1.271076056349028e-05, - "loss": 0.8566, + "learning_rate": 7.46744443557964e-06, + "loss": 1.0226, "step": 15183 }, { - "epoch": 0.43027572331321373, + "epoch": 0.5940996948118006, "grad_norm": 0.0, - "learning_rate": 1.2709877126738235e-05, - "loss": 0.9824, + "learning_rate": 7.466218537283122e-06, + "loss": 1.0303, "step": 15184 }, { - "epoch": 0.4303040607554762, + "epoch": 0.5941388215040301, "grad_norm": 0.0, - "learning_rate": 1.2708993667159887e-05, - "loss": 0.9946, + "learning_rate": 7.464992679675962e-06, + "loss": 1.0529, "step": 15185 }, { - "epoch": 0.43033239819773866, + "epoch": 0.5941779481962595, "grad_norm": 0.0, - "learning_rate": 1.2708110184762684e-05, - "loss": 0.9818, + "learning_rate": 7.463766862777844e-06, + "loss": 1.0038, "step": 15186 }, { - "epoch": 0.43036073564000116, + "epoch": 0.594217074888489, "grad_norm": 0.0, - "learning_rate": 1.2707226679554054e-05, - "loss": 0.9394, + "learning_rate": 7.462541086608453e-06, + "loss": 1.029, "step": 15187 }, { - "epoch": 0.4303890730822636, + "epoch": 0.5942562015807183, "grad_norm": 0.0, - "learning_rate": 1.2706343151541457e-05, - "loss": 1.0038, + "learning_rate": 7.461315351187466e-06, + "loss": 0.938, "step": 15188 }, { - "epoch": 0.43041741052452603, + "epoch": 0.5942953282729478, "grad_norm": 0.0, - "learning_rate": 1.2705459600732319e-05, - "loss": 0.9022, + "learning_rate": 7.460089656534578e-06, + "loss": 0.9913, "step": 15189 }, { - "epoch": 0.43044574796678853, + "epoch": 0.5943344549651772, "grad_norm": 0.0, - "learning_rate": 1.2704576027134095e-05, - "loss": 0.8275, + "learning_rate": 7.458864002669468e-06, + "loss": 0.9481, "step": 15190 }, { - "epoch": 0.43047408540905097, + "epoch": 0.5943735816574067, "grad_norm": 0.0, - "learning_rate": 1.2703692430754223e-05, - "loss": 0.8125, + "learning_rate": 7.457638389611818e-06, + "loss": 0.9599, "step": 15191 }, { - "epoch": 0.43050242285131346, + "epoch": 0.5944127083496361, "grad_norm": 0.0, - "learning_rate": 1.2702808811600144e-05, - "loss": 0.9633, + "learning_rate": 7.4564128173813085e-06, + "loss": 1.0039, "step": 15192 }, { - "epoch": 0.4305307602935759, + "epoch": 0.5944518350418656, "grad_norm": 0.0, - "learning_rate": 1.2701925169679303e-05, - "loss": 0.9159, + "learning_rate": 7.455187285997619e-06, + "loss": 1.0114, "step": 15193 }, { - "epoch": 0.43055909773583834, + "epoch": 0.594490961734095, "grad_norm": 0.0, - "learning_rate": 1.2701041504999144e-05, - "loss": 0.9879, + "learning_rate": 7.453961795480438e-06, + "loss": 0.9614, "step": 15194 }, { - "epoch": 0.43058743517810083, + "epoch": 0.5945300884263245, "grad_norm": 0.0, - "learning_rate": 1.2700157817567105e-05, - "loss": 0.8629, + "learning_rate": 7.452736345849438e-06, + "loss": 1.0544, "step": 15195 }, { - "epoch": 0.43061577262036327, + "epoch": 0.5945692151185539, "grad_norm": 0.0, - "learning_rate": 1.2699274107390638e-05, - "loss": 0.9197, + "learning_rate": 7.451510937124301e-06, + "loss": 1.0187, "step": 15196 }, { - "epoch": 0.43064411006262576, + "epoch": 0.5946083418107834, "grad_norm": 0.0, - "learning_rate": 1.2698390374477186e-05, - "loss": 0.9184, + "learning_rate": 7.450285569324703e-06, + "loss": 0.9794, "step": 15197 }, { - "epoch": 0.4306724475048882, + "epoch": 0.5946474685030128, "grad_norm": 0.0, - "learning_rate": 1.2697506618834185e-05, - "loss": 0.8672, + "learning_rate": 7.449060242470324e-06, + "loss": 0.9075, "step": 15198 }, { - "epoch": 0.4307007849471507, + "epoch": 0.5946865951952421, "grad_norm": 0.0, - "learning_rate": 1.2696622840469084e-05, - "loss": 0.9158, + "learning_rate": 7.447834956580844e-06, + "loss": 1.0702, "step": 15199 }, { - "epoch": 0.43072912238941313, + "epoch": 0.5947257218874716, "grad_norm": 0.0, - "learning_rate": 1.269573903938933e-05, - "loss": 0.9047, + "learning_rate": 7.446609711675935e-06, + "loss": 1.1049, "step": 15200 }, { - "epoch": 0.43075745983167557, + "epoch": 0.594764848579701, "grad_norm": 0.0, - "learning_rate": 1.2694855215602362e-05, - "loss": 0.8201, + "learning_rate": 7.445384507775275e-06, + "loss": 1.0581, "step": 15201 }, { - "epoch": 0.43078579727393806, + "epoch": 0.5948039752719305, "grad_norm": 0.0, - "learning_rate": 1.269397136911563e-05, - "loss": 0.9418, + "learning_rate": 7.4441593448985365e-06, + "loss": 1.1204, "step": 15202 }, { - "epoch": 0.4308141347162005, + "epoch": 0.5948431019641599, "grad_norm": 0.0, - "learning_rate": 1.2693087499936575e-05, - "loss": 0.8763, + "learning_rate": 7.442934223065399e-06, + "loss": 1.026, "step": 15203 }, { - "epoch": 0.430842472158463, + "epoch": 0.5948822286563894, "grad_norm": 0.0, - "learning_rate": 1.2692203608072646e-05, - "loss": 0.944, + "learning_rate": 7.44170914229553e-06, + "loss": 1.0743, "step": 15204 }, { - "epoch": 0.43087080960072544, + "epoch": 0.5949213553486188, "grad_norm": 0.0, - "learning_rate": 1.2691319693531287e-05, - "loss": 0.8328, + "learning_rate": 7.4404841026086096e-06, + "loss": 1.0043, "step": 15205 }, { - "epoch": 0.4308991470429879, + "epoch": 0.5949604820408483, "grad_norm": 0.0, - "learning_rate": 1.269043575631994e-05, - "loss": 0.9497, + "learning_rate": 7.4392591040243056e-06, + "loss": 1.0301, "step": 15206 }, { - "epoch": 0.43092748448525037, + "epoch": 0.5949996087330777, "grad_norm": 0.0, - "learning_rate": 1.2689551796446057e-05, - "loss": 0.8818, + "learning_rate": 7.438034146562294e-06, + "loss": 1.0211, "step": 15207 }, { - "epoch": 0.4309558219275128, + "epoch": 0.5950387354253072, "grad_norm": 0.0, - "learning_rate": 1.2688667813917075e-05, - "loss": 0.9961, + "learning_rate": 7.4368092302422424e-06, + "loss": 1.0489, "step": 15208 }, { - "epoch": 0.4309841593697753, + "epoch": 0.5950778621175365, "grad_norm": 0.0, - "learning_rate": 1.268778380874045e-05, - "loss": 0.8893, + "learning_rate": 7.435584355083822e-06, + "loss": 1.0335, "step": 15209 }, { - "epoch": 0.43101249681203774, + "epoch": 0.595116988809766, "grad_norm": 0.0, - "learning_rate": 1.2686899780923624e-05, - "loss": 0.8859, + "learning_rate": 7.4343595211067045e-06, + "loss": 0.9238, "step": 15210 }, { - "epoch": 0.43104083425430023, + "epoch": 0.5951561155019954, "grad_norm": 0.0, - "learning_rate": 1.2686015730474042e-05, - "loss": 0.9131, + "learning_rate": 7.433134728330555e-06, + "loss": 0.8416, "step": 15211 }, { - "epoch": 0.43106917169656267, + "epoch": 0.5951952421942249, "grad_norm": 0.0, - "learning_rate": 1.2685131657399153e-05, - "loss": 0.8472, + "learning_rate": 7.431909976775049e-06, + "loss": 1.0624, "step": 15212 }, { - "epoch": 0.4310975091388251, + "epoch": 0.5952343688864543, "grad_norm": 0.0, - "learning_rate": 1.2684247561706402e-05, - "loss": 0.886, + "learning_rate": 7.43068526645985e-06, + "loss": 1.0134, "step": 15213 }, { - "epoch": 0.4311258465810876, + "epoch": 0.5952734955786838, "grad_norm": 0.0, - "learning_rate": 1.2683363443403235e-05, - "loss": 0.9239, + "learning_rate": 7.4294605974046275e-06, + "loss": 1.0087, "step": 15214 }, { - "epoch": 0.43115418402335004, + "epoch": 0.5953126222709132, "grad_norm": 0.0, - "learning_rate": 1.2682479302497106e-05, - "loss": 0.9348, + "learning_rate": 7.42823596962904e-06, + "loss": 0.891, "step": 15215 }, { - "epoch": 0.43118252146561253, + "epoch": 0.5953517489631427, "grad_norm": 0.0, - "learning_rate": 1.2681595138995456e-05, - "loss": 0.8823, + "learning_rate": 7.427011383152767e-06, + "loss": 0.9882, "step": 15216 }, { - "epoch": 0.43121085890787497, + "epoch": 0.5953908756553721, "grad_norm": 0.0, - "learning_rate": 1.2680710952905733e-05, - "loss": 1.028, + "learning_rate": 7.425786837995466e-06, + "loss": 1.1603, "step": 15217 }, { - "epoch": 0.4312391963501374, + "epoch": 0.5954300023476016, "grad_norm": 0.0, - "learning_rate": 1.2679826744235388e-05, - "loss": 0.9538, + "learning_rate": 7.424562334176804e-06, + "loss": 0.9848, "step": 15218 }, { - "epoch": 0.4312675337923999, + "epoch": 0.595469129039831, "grad_norm": 0.0, - "learning_rate": 1.2678942512991865e-05, - "loss": 0.9286, + "learning_rate": 7.423337871716442e-06, + "loss": 0.9871, "step": 15219 }, { - "epoch": 0.43129587123466234, + "epoch": 0.5955082557320605, "grad_norm": 0.0, - "learning_rate": 1.2678058259182615e-05, - "loss": 0.9234, + "learning_rate": 7.4221134506340405e-06, + "loss": 0.9852, "step": 15220 }, { - "epoch": 0.43132420867692484, + "epoch": 0.5955473824242898, "grad_norm": 0.0, - "learning_rate": 1.2677173982815086e-05, - "loss": 0.9665, + "learning_rate": 7.420889070949272e-06, + "loss": 1.0323, "step": 15221 }, { - "epoch": 0.4313525461191873, + "epoch": 0.5955865091165193, "grad_norm": 0.0, - "learning_rate": 1.2676289683896727e-05, - "loss": 0.9443, + "learning_rate": 7.419664732681793e-06, + "loss": 0.9406, "step": 15222 }, { - "epoch": 0.43138088356144977, + "epoch": 0.5956256358087487, "grad_norm": 0.0, - "learning_rate": 1.2675405362434987e-05, - "loss": 0.9467, + "learning_rate": 7.418440435851265e-06, + "loss": 0.9583, "step": 15223 }, { - "epoch": 0.4314092210037122, + "epoch": 0.5956647625009782, "grad_norm": 0.0, - "learning_rate": 1.2674521018437311e-05, - "loss": 0.8233, + "learning_rate": 7.417216180477344e-06, + "loss": 1.0328, "step": 15224 }, { - "epoch": 0.43143755844597464, + "epoch": 0.5957038891932076, "grad_norm": 0.0, - "learning_rate": 1.2673636651911154e-05, - "loss": 0.9021, + "learning_rate": 7.4159919665797006e-06, + "loss": 0.9999, "step": 15225 }, { - "epoch": 0.43146589588823714, + "epoch": 0.5957430158854371, "grad_norm": 0.0, - "learning_rate": 1.2672752262863963e-05, - "loss": 0.9919, + "learning_rate": 7.414767794177986e-06, + "loss": 0.9625, "step": 15226 }, { - "epoch": 0.4314942333304996, + "epoch": 0.5957821425776665, "grad_norm": 0.0, - "learning_rate": 1.2671867851303185e-05, - "loss": 0.9814, + "learning_rate": 7.413543663291864e-06, + "loss": 1.005, "step": 15227 }, { - "epoch": 0.43152257077276207, + "epoch": 0.5958212692698959, "grad_norm": 0.0, - "learning_rate": 1.2670983417236271e-05, - "loss": 0.9169, + "learning_rate": 7.412319573940987e-06, + "loss": 1.0138, "step": 15228 }, { - "epoch": 0.4315509082150245, + "epoch": 0.5958603959621254, "grad_norm": 0.0, - "learning_rate": 1.2670098960670676e-05, - "loss": 0.9569, + "learning_rate": 7.411095526145011e-06, + "loss": 0.8246, "step": 15229 }, { - "epoch": 0.43157924565728695, + "epoch": 0.5958995226543548, "grad_norm": 0.0, - "learning_rate": 1.2669214481613846e-05, - "loss": 0.8642, + "learning_rate": 7.4098715199236036e-06, + "loss": 0.9174, "step": 15230 }, { - "epoch": 0.43160758309954944, + "epoch": 0.5959386493465842, "grad_norm": 0.0, - "learning_rate": 1.2668329980073229e-05, - "loss": 0.8379, + "learning_rate": 7.408647555296411e-06, + "loss": 1.0371, "step": 15231 }, { - "epoch": 0.4316359205418119, + "epoch": 0.5959777760388136, "grad_norm": 0.0, - "learning_rate": 1.2667445456056276e-05, - "loss": 0.8932, + "learning_rate": 7.4074236322830926e-06, + "loss": 0.9975, "step": 15232 }, { - "epoch": 0.4316642579840744, + "epoch": 0.5960169027310431, "grad_norm": 0.0, - "learning_rate": 1.2666560909570442e-05, - "loss": 0.9057, + "learning_rate": 7.406199750903299e-06, + "loss": 0.8653, "step": 15233 }, { - "epoch": 0.4316925954263368, + "epoch": 0.5960560294232725, "grad_norm": 0.0, - "learning_rate": 1.2665676340623172e-05, - "loss": 0.8628, + "learning_rate": 7.404975911176691e-06, + "loss": 0.9625, "step": 15234 }, { - "epoch": 0.4317209328685993, + "epoch": 0.596095156115502, "grad_norm": 0.0, - "learning_rate": 1.2664791749221923e-05, - "loss": 0.8203, + "learning_rate": 7.403752113122918e-06, + "loss": 0.9506, "step": 15235 }, { - "epoch": 0.43174927031086174, + "epoch": 0.5961342828077314, "grad_norm": 0.0, - "learning_rate": 1.2663907135374142e-05, - "loss": 0.9889, + "learning_rate": 7.4025283567616315e-06, + "loss": 1.0472, "step": 15236 }, { - "epoch": 0.4317776077531242, + "epoch": 0.5961734094999609, "grad_norm": 0.0, - "learning_rate": 1.2663022499087285e-05, - "loss": 0.9354, + "learning_rate": 7.401304642112481e-06, + "loss": 1.0507, "step": 15237 }, { - "epoch": 0.4318059451953867, + "epoch": 0.5962125361921903, "grad_norm": 0.0, - "learning_rate": 1.26621378403688e-05, - "loss": 0.9871, + "learning_rate": 7.4000809691951255e-06, + "loss": 1.0331, "step": 15238 }, { - "epoch": 0.4318342826376491, + "epoch": 0.5962516628844198, "grad_norm": 0.0, - "learning_rate": 1.266125315922614e-05, - "loss": 0.9005, + "learning_rate": 7.398857338029213e-06, + "loss": 1.028, "step": 15239 }, { - "epoch": 0.4318626200799116, + "epoch": 0.5962907895766492, "grad_norm": 0.0, - "learning_rate": 1.2660368455666752e-05, - "loss": 0.9861, + "learning_rate": 7.397633748634392e-06, + "loss": 1.0505, "step": 15240 }, { - "epoch": 0.43189095752217405, + "epoch": 0.5963299162688787, "grad_norm": 0.0, - "learning_rate": 1.2659483729698094e-05, - "loss": 0.9912, + "learning_rate": 7.39641020103031e-06, + "loss": 0.9152, "step": 15241 }, { - "epoch": 0.4319192949644365, + "epoch": 0.596369042961108, "grad_norm": 0.0, - "learning_rate": 1.265859898132762e-05, - "loss": 0.8273, + "learning_rate": 7.395186695236618e-06, + "loss": 0.9446, "step": 15242 }, { - "epoch": 0.431947632406699, + "epoch": 0.5964081696533375, "grad_norm": 0.0, - "learning_rate": 1.265771421056278e-05, - "loss": 0.9474, + "learning_rate": 7.393963231272964e-06, + "loss": 1.0972, "step": 15243 }, { - "epoch": 0.4319759698489614, + "epoch": 0.5964472963455669, "grad_norm": 0.0, - "learning_rate": 1.2656829417411023e-05, - "loss": 0.8564, + "learning_rate": 7.392739809158995e-06, + "loss": 1.0809, "step": 15244 }, { - "epoch": 0.4320043072912239, + "epoch": 0.5964864230377964, "grad_norm": 0.0, - "learning_rate": 1.2655944601879805e-05, - "loss": 0.9165, + "learning_rate": 7.3915164289143595e-06, + "loss": 0.9773, "step": 15245 }, { - "epoch": 0.43203264473348635, + "epoch": 0.5965255497300258, "grad_norm": 0.0, - "learning_rate": 1.265505976397658e-05, - "loss": 0.8515, + "learning_rate": 7.390293090558698e-06, + "loss": 1.088, "step": 15246 }, { - "epoch": 0.43206098217574884, + "epoch": 0.5965646764222553, "grad_norm": 0.0, - "learning_rate": 1.2654174903708803e-05, - "loss": 0.9607, + "learning_rate": 7.389069794111663e-06, + "loss": 0.9355, "step": 15247 }, { - "epoch": 0.4320893196180113, + "epoch": 0.5966038031144847, "grad_norm": 0.0, - "learning_rate": 1.2653290021083925e-05, - "loss": 0.9034, + "learning_rate": 7.387846539592894e-06, + "loss": 1.0496, "step": 15248 }, { - "epoch": 0.4321176570602737, + "epoch": 0.5966429298067142, "grad_norm": 0.0, - "learning_rate": 1.2652405116109394e-05, - "loss": 0.9803, + "learning_rate": 7.386623327022034e-06, + "loss": 1.0395, "step": 15249 }, { - "epoch": 0.4321459945025362, + "epoch": 0.5966820564989436, "grad_norm": 0.0, - "learning_rate": 1.2651520188792677e-05, - "loss": 0.9012, + "learning_rate": 7.385400156418731e-06, + "loss": 1.0049, "step": 15250 }, { - "epoch": 0.43217433194479865, + "epoch": 0.5967211831911731, "grad_norm": 0.0, - "learning_rate": 1.2650635239141217e-05, - "loss": 0.9844, + "learning_rate": 7.384177027802624e-06, + "loss": 1.0305, "step": 15251 }, { - "epoch": 0.43220266938706114, + "epoch": 0.5967603098834025, "grad_norm": 0.0, - "learning_rate": 1.2649750267162474e-05, - "loss": 0.9809, + "learning_rate": 7.382953941193358e-06, + "loss": 1.0231, "step": 15252 }, { - "epoch": 0.4322310068293236, + "epoch": 0.596799436575632, "grad_norm": 0.0, - "learning_rate": 1.26488652728639e-05, - "loss": 0.9106, + "learning_rate": 7.381730896610573e-06, + "loss": 1.0525, "step": 15253 }, { - "epoch": 0.432259344271586, + "epoch": 0.5968385632678613, "grad_norm": 0.0, - "learning_rate": 1.2647980256252947e-05, - "loss": 1.0056, + "learning_rate": 7.380507894073907e-06, + "loss": 1.12, "step": 15254 }, { - "epoch": 0.4322876817138485, + "epoch": 0.5968776899600908, "grad_norm": 0.0, - "learning_rate": 1.2647095217337078e-05, - "loss": 0.812, + "learning_rate": 7.3792849336029995e-06, + "loss": 1.0123, "step": 15255 }, { - "epoch": 0.43231601915611095, + "epoch": 0.5969168166523202, "grad_norm": 0.0, - "learning_rate": 1.2646210156123742e-05, - "loss": 0.9457, + "learning_rate": 7.378062015217494e-06, + "loss": 1.0625, "step": 15256 }, { - "epoch": 0.43234435659837345, + "epoch": 0.5969559433445496, "grad_norm": 0.0, - "learning_rate": 1.264532507262039e-05, - "loss": 0.9421, + "learning_rate": 7.376839138937028e-06, + "loss": 1.0973, "step": 15257 }, { - "epoch": 0.4323726940406359, + "epoch": 0.5969950700367791, "grad_norm": 0.0, - "learning_rate": 1.264443996683449e-05, - "loss": 0.8821, + "learning_rate": 7.375616304781239e-06, + "loss": 1.1131, "step": 15258 }, { - "epoch": 0.4324010314828984, + "epoch": 0.5970341967290085, "grad_norm": 0.0, - "learning_rate": 1.2643554838773486e-05, - "loss": 0.9589, + "learning_rate": 7.374393512769764e-06, + "loss": 1.0703, "step": 15259 }, { - "epoch": 0.4324293689251608, + "epoch": 0.597073323421238, "grad_norm": 0.0, - "learning_rate": 1.2642669688444837e-05, - "loss": 0.922, + "learning_rate": 7.373170762922235e-06, + "loss": 1.1244, "step": 15260 }, { - "epoch": 0.43245770636742326, + "epoch": 0.5971124501134674, "grad_norm": 0.0, - "learning_rate": 1.2641784515856002e-05, - "loss": 0.8501, + "learning_rate": 7.371948055258296e-06, + "loss": 0.9366, "step": 15261 }, { - "epoch": 0.43248604380968575, + "epoch": 0.5971515768056969, "grad_norm": 0.0, - "learning_rate": 1.2640899321014435e-05, - "loss": 0.9033, + "learning_rate": 7.370725389797577e-06, + "loss": 0.886, "step": 15262 }, { - "epoch": 0.4325143812519482, + "epoch": 0.5971907034979262, "grad_norm": 0.0, - "learning_rate": 1.2640014103927594e-05, - "loss": 0.9571, + "learning_rate": 7.369502766559713e-06, + "loss": 1.0796, "step": 15263 }, { - "epoch": 0.4325427186942107, + "epoch": 0.5972298301901557, "grad_norm": 0.0, - "learning_rate": 1.2639128864602932e-05, - "loss": 0.8204, + "learning_rate": 7.368280185564336e-06, + "loss": 1.0585, "step": 15264 }, { - "epoch": 0.4325710561364731, + "epoch": 0.5972689568823851, "grad_norm": 0.0, - "learning_rate": 1.2638243603047907e-05, - "loss": 0.8857, + "learning_rate": 7.367057646831085e-06, + "loss": 0.8812, "step": 15265 }, { - "epoch": 0.43259939357873556, + "epoch": 0.5973080835746146, "grad_norm": 0.0, - "learning_rate": 1.2637358319269976e-05, - "loss": 0.8686, + "learning_rate": 7.365835150379589e-06, + "loss": 1.0365, "step": 15266 }, { - "epoch": 0.43262773102099805, + "epoch": 0.597347210266844, "grad_norm": 0.0, - "learning_rate": 1.2636473013276596e-05, - "loss": 0.9536, + "learning_rate": 7.364612696229479e-06, + "loss": 0.9293, "step": 15267 }, { - "epoch": 0.4326560684632605, + "epoch": 0.5973863369590735, "grad_norm": 0.0, - "learning_rate": 1.2635587685075227e-05, - "loss": 0.8722, + "learning_rate": 7.363390284400388e-06, + "loss": 1.0561, "step": 15268 }, { - "epoch": 0.432684405905523, + "epoch": 0.5974254636513029, "grad_norm": 0.0, - "learning_rate": 1.263470233467332e-05, - "loss": 0.8689, + "learning_rate": 7.362167914911939e-06, + "loss": 1.0461, "step": 15269 }, { - "epoch": 0.4327127433477854, + "epoch": 0.5974645903435324, "grad_norm": 0.0, - "learning_rate": 1.2633816962078342e-05, - "loss": 0.9085, + "learning_rate": 7.360945587783774e-06, + "loss": 1.0399, "step": 15270 }, { - "epoch": 0.4327410807900479, + "epoch": 0.5975037170357618, "grad_norm": 0.0, - "learning_rate": 1.2632931567297745e-05, - "loss": 0.9559, + "learning_rate": 7.3597233030355165e-06, + "loss": 0.96, "step": 15271 }, { - "epoch": 0.43276941823231035, + "epoch": 0.5975428437279913, "grad_norm": 0.0, - "learning_rate": 1.2632046150338988e-05, - "loss": 0.9333, + "learning_rate": 7.358501060686794e-06, + "loss": 0.9967, "step": 15272 }, { - "epoch": 0.4327977556745728, + "epoch": 0.5975819704202207, "grad_norm": 0.0, - "learning_rate": 1.2631160711209528e-05, - "loss": 0.77, + "learning_rate": 7.357278860757229e-06, + "loss": 0.9627, "step": 15273 }, { - "epoch": 0.4328260931168353, + "epoch": 0.5976210971124502, "grad_norm": 0.0, - "learning_rate": 1.2630275249916822e-05, - "loss": 0.9364, + "learning_rate": 7.356056703266459e-06, + "loss": 1.0131, "step": 15274 }, { - "epoch": 0.4328544305590977, + "epoch": 0.5976602238046795, "grad_norm": 0.0, - "learning_rate": 1.2629389766468331e-05, - "loss": 0.8314, + "learning_rate": 7.354834588234105e-06, + "loss": 1.0469, "step": 15275 }, { - "epoch": 0.4328827680013602, + "epoch": 0.597699350496909, "grad_norm": 0.0, - "learning_rate": 1.2628504260871517e-05, - "loss": 0.9476, + "learning_rate": 7.353612515679792e-06, + "loss": 1.0279, "step": 15276 }, { - "epoch": 0.43291110544362266, + "epoch": 0.5977384771891384, "grad_norm": 0.0, - "learning_rate": 1.2627618733133835e-05, - "loss": 0.8926, + "learning_rate": 7.352390485623146e-06, + "loss": 0.98, "step": 15277 }, { - "epoch": 0.4329394428858851, + "epoch": 0.5977776038813679, "grad_norm": 0.0, - "learning_rate": 1.2626733183262743e-05, - "loss": 0.9657, + "learning_rate": 7.351168498083789e-06, + "loss": 1.0372, "step": 15278 }, { - "epoch": 0.4329677803281476, + "epoch": 0.5978167305735973, "grad_norm": 0.0, - "learning_rate": 1.2625847611265703e-05, - "loss": 0.954, + "learning_rate": 7.349946553081349e-06, + "loss": 1.0416, "step": 15279 }, { - "epoch": 0.43299611777041, + "epoch": 0.5978558572658268, "grad_norm": 0.0, - "learning_rate": 1.262496201715017e-05, - "loss": 0.8195, + "learning_rate": 7.348724650635448e-06, + "loss": 1.0475, "step": 15280 }, { - "epoch": 0.4330244552126725, + "epoch": 0.5978949839580562, "grad_norm": 0.0, - "learning_rate": 1.262407640092361e-05, - "loss": 1.0616, + "learning_rate": 7.347502790765706e-06, + "loss": 0.9157, "step": 15281 }, { - "epoch": 0.43305279265493496, + "epoch": 0.5979341106502857, "grad_norm": 0.0, - "learning_rate": 1.262319076259348e-05, - "loss": 0.9384, + "learning_rate": 7.346280973491741e-06, + "loss": 1.0994, "step": 15282 }, { - "epoch": 0.43308113009719745, + "epoch": 0.5979732373425151, "grad_norm": 0.0, - "learning_rate": 1.262230510216724e-05, - "loss": 0.9387, + "learning_rate": 7.345059198833185e-06, + "loss": 0.9894, "step": 15283 }, { - "epoch": 0.4331094675394599, + "epoch": 0.5980123640347444, "grad_norm": 0.0, - "learning_rate": 1.2621419419652353e-05, - "loss": 0.8972, + "learning_rate": 7.3438374668096475e-06, + "loss": 1.0256, "step": 15284 }, { - "epoch": 0.43313780498172233, + "epoch": 0.5980514907269739, "grad_norm": 0.0, - "learning_rate": 1.2620533715056275e-05, - "loss": 1.0116, + "learning_rate": 7.342615777440753e-06, + "loss": 1.1091, "step": 15285 }, { - "epoch": 0.4331661424239848, + "epoch": 0.5980906174192033, "grad_norm": 0.0, - "learning_rate": 1.2619647988386468e-05, - "loss": 1.0665, + "learning_rate": 7.341394130746122e-06, + "loss": 1.1323, "step": 15286 }, { - "epoch": 0.43319447986624726, + "epoch": 0.5981297441114328, "grad_norm": 0.0, - "learning_rate": 1.2618762239650391e-05, - "loss": 0.9064, + "learning_rate": 7.340172526745366e-06, + "loss": 0.9855, "step": 15287 }, { - "epoch": 0.43322281730850976, + "epoch": 0.5981688708036622, "grad_norm": 0.0, - "learning_rate": 1.261787646885551e-05, - "loss": 0.816, + "learning_rate": 7.3389509654581045e-06, + "loss": 1.0368, "step": 15288 }, { - "epoch": 0.4332511547507722, + "epoch": 0.5982079974958917, "grad_norm": 0.0, - "learning_rate": 1.2616990676009283e-05, - "loss": 0.9007, + "learning_rate": 7.337729446903961e-06, + "loss": 1.0459, "step": 15289 }, { - "epoch": 0.43327949219303463, + "epoch": 0.5982471241881211, "grad_norm": 0.0, - "learning_rate": 1.261610486111917e-05, - "loss": 0.8598, + "learning_rate": 7.3365079711025445e-06, + "loss": 0.9427, "step": 15290 }, { - "epoch": 0.4333078296352971, + "epoch": 0.5982862508803506, "grad_norm": 0.0, - "learning_rate": 1.2615219024192636e-05, - "loss": 0.9833, + "learning_rate": 7.335286538073472e-06, + "loss": 0.9904, "step": 15291 }, { - "epoch": 0.43333616707755956, + "epoch": 0.59832537757258, "grad_norm": 0.0, - "learning_rate": 1.261433316523714e-05, - "loss": 0.9844, + "learning_rate": 7.334065147836359e-06, + "loss": 0.827, "step": 15292 }, { - "epoch": 0.43336450451982206, + "epoch": 0.5983645042648095, "grad_norm": 0.0, - "learning_rate": 1.2613447284260144e-05, - "loss": 0.88, + "learning_rate": 7.33284380041082e-06, + "loss": 1.0227, "step": 15293 }, { - "epoch": 0.4333928419620845, + "epoch": 0.5984036309570389, "grad_norm": 0.0, - "learning_rate": 1.2612561381269113e-05, - "loss": 0.917, + "learning_rate": 7.3316224958164654e-06, + "loss": 1.0283, "step": 15294 }, { - "epoch": 0.433421179404347, + "epoch": 0.5984427576492684, "grad_norm": 0.0, - "learning_rate": 1.2611675456271505e-05, - "loss": 0.8994, + "learning_rate": 7.33040123407291e-06, + "loss": 1.0678, "step": 15295 }, { - "epoch": 0.43344951684660943, + "epoch": 0.5984818843414977, "grad_norm": 0.0, - "learning_rate": 1.261078950927479e-05, - "loss": 0.8637, + "learning_rate": 7.329180015199767e-06, + "loss": 1.029, "step": 15296 }, { - "epoch": 0.43347785428887187, + "epoch": 0.5985210110337272, "grad_norm": 0.0, - "learning_rate": 1.2609903540286424e-05, - "loss": 0.9579, + "learning_rate": 7.327958839216647e-06, + "loss": 1.0587, "step": 15297 }, { - "epoch": 0.43350619173113436, + "epoch": 0.5985601377259566, "grad_norm": 0.0, - "learning_rate": 1.2609017549313867e-05, - "loss": 1.0674, + "learning_rate": 7.326737706143159e-06, + "loss": 0.9697, "step": 15298 }, { - "epoch": 0.4335345291733968, + "epoch": 0.5985992644181861, "grad_norm": 0.0, - "learning_rate": 1.260813153636459e-05, - "loss": 0.9268, + "learning_rate": 7.325516615998915e-06, + "loss": 1.0176, "step": 15299 }, { - "epoch": 0.4335628666156593, + "epoch": 0.5986383911104155, "grad_norm": 0.0, - "learning_rate": 1.2607245501446051e-05, - "loss": 1.0044, + "learning_rate": 7.324295568803517e-06, + "loss": 1.0544, "step": 15300 }, { - "epoch": 0.43359120405792173, + "epoch": 0.598677517802645, "grad_norm": 0.0, - "learning_rate": 1.2606359444565715e-05, - "loss": 0.8106, + "learning_rate": 7.3230745645765845e-06, + "loss": 0.9903, "step": 15301 }, { - "epoch": 0.43361954150018417, + "epoch": 0.5987166444948744, "grad_norm": 0.0, - "learning_rate": 1.2605473365731047e-05, - "loss": 0.9335, + "learning_rate": 7.321853603337719e-06, + "loss": 1.0142, "step": 15302 }, { - "epoch": 0.43364787894244666, + "epoch": 0.5987557711871039, "grad_norm": 0.0, - "learning_rate": 1.2604587264949506e-05, - "loss": 0.9384, + "learning_rate": 7.32063268510653e-06, + "loss": 1.0018, "step": 15303 }, { - "epoch": 0.4336762163847091, + "epoch": 0.5987948978793333, "grad_norm": 0.0, - "learning_rate": 1.2603701142228564e-05, - "loss": 0.9219, + "learning_rate": 7.3194118099026175e-06, + "loss": 1.0961, "step": 15304 }, { - "epoch": 0.4337045538269716, + "epoch": 0.5988340245715628, "grad_norm": 0.0, - "learning_rate": 1.2602814997575677e-05, - "loss": 0.9185, + "learning_rate": 7.318190977745598e-06, + "loss": 0.8494, "step": 15305 }, { - "epoch": 0.43373289126923403, + "epoch": 0.5988731512637921, "grad_norm": 0.0, - "learning_rate": 1.2601928830998314e-05, - "loss": 0.9109, + "learning_rate": 7.316970188655069e-06, + "loss": 1.0252, "step": 15306 }, { - "epoch": 0.4337612287114965, + "epoch": 0.5989122779560216, "grad_norm": 0.0, - "learning_rate": 1.2601042642503935e-05, - "loss": 0.9655, + "learning_rate": 7.315749442650638e-06, + "loss": 1.0198, "step": 15307 }, { - "epoch": 0.43378956615375897, + "epoch": 0.598951404648251, "grad_norm": 0.0, - "learning_rate": 1.2600156432100012e-05, - "loss": 1.0187, + "learning_rate": 7.314528739751907e-06, + "loss": 1.1005, "step": 15308 }, { - "epoch": 0.4338179035960214, + "epoch": 0.5989905313404805, "grad_norm": 0.0, - "learning_rate": 1.2599270199794008e-05, - "loss": 0.8582, + "learning_rate": 7.3133080799784765e-06, + "loss": 0.9572, "step": 15309 }, { - "epoch": 0.4338462410382839, + "epoch": 0.5990296580327099, "grad_norm": 0.0, - "learning_rate": 1.2598383945593382e-05, - "loss": 0.9933, + "learning_rate": 7.312087463349954e-06, + "loss": 1.0054, "step": 15310 }, { - "epoch": 0.43387457848054634, + "epoch": 0.5990687847249394, "grad_norm": 0.0, - "learning_rate": 1.2597497669505603e-05, - "loss": 0.8854, + "learning_rate": 7.310866889885939e-06, + "loss": 0.9822, "step": 15311 }, { - "epoch": 0.43390291592280883, + "epoch": 0.5991079114171688, "grad_norm": 0.0, - "learning_rate": 1.2596611371538135e-05, - "loss": 0.9075, + "learning_rate": 7.309646359606033e-06, + "loss": 0.9486, "step": 15312 }, { - "epoch": 0.43393125336507127, + "epoch": 0.5991470381093982, "grad_norm": 0.0, - "learning_rate": 1.2595725051698448e-05, - "loss": 0.8768, + "learning_rate": 7.308425872529829e-06, + "loss": 1.1118, "step": 15313 }, { - "epoch": 0.4339595908073337, + "epoch": 0.5991861648016277, "grad_norm": 0.0, - "learning_rate": 1.2594838709994007e-05, - "loss": 0.8938, + "learning_rate": 7.307205428676939e-06, + "loss": 1.0546, "step": 15314 }, { - "epoch": 0.4339879282495962, + "epoch": 0.5992252914938571, "grad_norm": 0.0, - "learning_rate": 1.2593952346432273e-05, - "loss": 0.9626, + "learning_rate": 7.305985028066955e-06, + "loss": 1.1009, "step": 15315 }, { - "epoch": 0.43401626569185864, + "epoch": 0.5992644181860866, "grad_norm": 0.0, - "learning_rate": 1.2593065961020714e-05, - "loss": 0.8257, + "learning_rate": 7.304764670719476e-06, + "loss": 0.9735, "step": 15316 }, { - "epoch": 0.43404460313412113, + "epoch": 0.5993035448783159, "grad_norm": 0.0, - "learning_rate": 1.25921795537668e-05, - "loss": 1.0307, + "learning_rate": 7.303544356654098e-06, + "loss": 1.0572, "step": 15317 }, { - "epoch": 0.43407294057638357, + "epoch": 0.5993426715705454, "grad_norm": 0.0, - "learning_rate": 1.2591293124677992e-05, - "loss": 0.923, + "learning_rate": 7.302324085890416e-06, + "loss": 1.0873, "step": 15318 }, { - "epoch": 0.43410127801864606, + "epoch": 0.5993817982627748, "grad_norm": 0.0, - "learning_rate": 1.2590406673761762e-05, - "loss": 0.9345, + "learning_rate": 7.301103858448032e-06, + "loss": 1.0533, "step": 15319 }, { - "epoch": 0.4341296154609085, + "epoch": 0.5994209249550043, "grad_norm": 0.0, - "learning_rate": 1.2589520201025576e-05, - "loss": 0.9254, + "learning_rate": 7.299883674346538e-06, + "loss": 1.001, "step": 15320 }, { - "epoch": 0.43415795290317094, + "epoch": 0.5994600516472337, "grad_norm": 0.0, - "learning_rate": 1.2588633706476898e-05, - "loss": 0.8756, + "learning_rate": 7.29866353360553e-06, + "loss": 0.7968, "step": 15321 }, { - "epoch": 0.43418629034543343, + "epoch": 0.5994991783394632, "grad_norm": 0.0, - "learning_rate": 1.2587747190123198e-05, - "loss": 0.7559, + "learning_rate": 7.297443436244595e-06, + "loss": 0.9496, "step": 15322 }, { - "epoch": 0.4342146277876959, + "epoch": 0.5995383050316926, "grad_norm": 0.0, - "learning_rate": 1.2586860651971941e-05, - "loss": 0.8808, + "learning_rate": 7.296223382283336e-06, + "loss": 0.905, "step": 15323 }, { - "epoch": 0.43424296522995837, + "epoch": 0.5995774317239221, "grad_norm": 0.0, - "learning_rate": 1.2585974092030597e-05, - "loss": 0.8639, + "learning_rate": 7.295003371741343e-06, + "loss": 0.9558, "step": 15324 }, { - "epoch": 0.4342713026722208, + "epoch": 0.5996165584161515, "grad_norm": 0.0, - "learning_rate": 1.2585087510306633e-05, - "loss": 1.0055, + "learning_rate": 7.293783404638206e-06, + "loss": 1.0091, "step": 15325 }, { - "epoch": 0.43429964011448324, + "epoch": 0.599655685108381, "grad_norm": 0.0, - "learning_rate": 1.2584200906807517e-05, - "loss": 0.9754, + "learning_rate": 7.292563480993515e-06, + "loss": 0.9851, "step": 15326 }, { - "epoch": 0.43432797755674574, + "epoch": 0.5996948118006104, "grad_norm": 0.0, - "learning_rate": 1.2583314281540718e-05, - "loss": 0.878, + "learning_rate": 7.291343600826859e-06, + "loss": 0.9469, "step": 15327 }, { - "epoch": 0.4343563149990082, + "epoch": 0.5997339384928398, "grad_norm": 0.0, - "learning_rate": 1.2582427634513701e-05, - "loss": 0.9861, + "learning_rate": 7.2901237641578345e-06, + "loss": 0.9205, "step": 15328 }, { - "epoch": 0.43438465244127067, + "epoch": 0.5997730651850692, "grad_norm": 0.0, - "learning_rate": 1.258154096573394e-05, - "loss": 0.8573, + "learning_rate": 7.288903971006027e-06, + "loss": 1.0097, "step": 15329 }, { - "epoch": 0.4344129898835331, + "epoch": 0.5998121918772987, "grad_norm": 0.0, - "learning_rate": 1.25806542752089e-05, - "loss": 0.9108, + "learning_rate": 7.287684221391025e-06, + "loss": 1.0263, "step": 15330 }, { - "epoch": 0.4344413273257956, + "epoch": 0.5998513185695281, "grad_norm": 0.0, - "learning_rate": 1.2579767562946048e-05, - "loss": 0.7834, + "learning_rate": 7.286464515332412e-06, + "loss": 1.2154, "step": 15331 }, { - "epoch": 0.43446966476805804, + "epoch": 0.5998904452617576, "grad_norm": 0.0, - "learning_rate": 1.2578880828952857e-05, - "loss": 0.9277, + "learning_rate": 7.285244852849782e-06, + "loss": 1.0469, "step": 15332 }, { - "epoch": 0.4344980022103205, + "epoch": 0.599929571953987, "grad_norm": 0.0, - "learning_rate": 1.2577994073236797e-05, - "loss": 0.8307, + "learning_rate": 7.284025233962715e-06, + "loss": 1.0066, "step": 15333 }, { - "epoch": 0.43452633965258297, + "epoch": 0.5999686986462165, "grad_norm": 0.0, - "learning_rate": 1.2577107295805334e-05, - "loss": 0.8806, + "learning_rate": 7.282805658690801e-06, + "loss": 1.0831, "step": 15334 }, { - "epoch": 0.4345546770948454, + "epoch": 0.6000078253384459, "grad_norm": 0.0, - "learning_rate": 1.2576220496665942e-05, - "loss": 0.9711, + "learning_rate": 7.281586127053625e-06, + "loss": 0.965, "step": 15335 }, { - "epoch": 0.4345830145371079, + "epoch": 0.6000469520306754, "grad_norm": 0.0, - "learning_rate": 1.2575333675826084e-05, - "loss": 1.0025, + "learning_rate": 7.280366639070766e-06, + "loss": 1.0441, "step": 15336 }, { - "epoch": 0.43461135197937034, + "epoch": 0.6000860787229048, "grad_norm": 0.0, - "learning_rate": 1.257444683329324e-05, - "loss": 0.8928, + "learning_rate": 7.279147194761813e-06, + "loss": 1.1962, "step": 15337 }, { - "epoch": 0.4346396894216328, + "epoch": 0.6001252054151343, "grad_norm": 0.0, - "learning_rate": 1.257355996907487e-05, - "loss": 0.9359, + "learning_rate": 7.277927794146345e-06, + "loss": 0.9602, "step": 15338 }, { - "epoch": 0.4346680268638953, + "epoch": 0.6001643321073636, "grad_norm": 0.0, - "learning_rate": 1.2572673083178448e-05, - "loss": 0.9091, + "learning_rate": 7.276708437243949e-06, + "loss": 0.9677, "step": 15339 }, { - "epoch": 0.4346963643061577, + "epoch": 0.6002034587995931, "grad_norm": 0.0, - "learning_rate": 1.2571786175611445e-05, - "loss": 0.879, + "learning_rate": 7.275489124074198e-06, + "loss": 0.9831, "step": 15340 }, { - "epoch": 0.4347247017484202, + "epoch": 0.6002425854918225, "grad_norm": 0.0, - "learning_rate": 1.2570899246381334e-05, - "loss": 0.7887, + "learning_rate": 7.274269854656682e-06, + "loss": 1.0626, "step": 15341 }, { - "epoch": 0.43475303919068264, + "epoch": 0.6002817121840519, "grad_norm": 0.0, - "learning_rate": 1.2570012295495583e-05, - "loss": 0.8942, + "learning_rate": 7.273050629010976e-06, + "loss": 1.1107, "step": 15342 }, { - "epoch": 0.43478137663294514, + "epoch": 0.6003208388762814, "grad_norm": 0.0, - "learning_rate": 1.2569125322961667e-05, - "loss": 0.9888, + "learning_rate": 7.271831447156661e-06, + "loss": 1.1134, "step": 15343 }, { - "epoch": 0.4348097140752076, + "epoch": 0.6003599655685108, "grad_norm": 0.0, - "learning_rate": 1.256823832878705e-05, - "loss": 0.8392, + "learning_rate": 7.270612309113309e-06, + "loss": 0.9542, "step": 15344 }, { - "epoch": 0.43483805151747, + "epoch": 0.6003990922607403, "grad_norm": 0.0, - "learning_rate": 1.256735131297921e-05, - "loss": 0.8488, + "learning_rate": 7.2693932149005065e-06, + "loss": 1.0117, "step": 15345 }, { - "epoch": 0.4348663889597325, + "epoch": 0.6004382189529697, "grad_norm": 0.0, - "learning_rate": 1.2566464275545615e-05, - "loss": 0.7903, + "learning_rate": 7.268174164537829e-06, + "loss": 0.9781, "step": 15346 }, { - "epoch": 0.43489472640199495, + "epoch": 0.6004773456451992, "grad_norm": 0.0, - "learning_rate": 1.2565577216493743e-05, - "loss": 0.8483, + "learning_rate": 7.266955158044851e-06, + "loss": 0.9253, "step": 15347 }, { - "epoch": 0.43492306384425744, + "epoch": 0.6005164723374286, "grad_norm": 0.0, - "learning_rate": 1.2564690135831057e-05, - "loss": 0.9429, + "learning_rate": 7.2657361954411465e-06, + "loss": 1.015, "step": 15348 }, { - "epoch": 0.4349514012865199, + "epoch": 0.600555599029658, "grad_norm": 0.0, - "learning_rate": 1.2563803033565034e-05, - "loss": 1.0251, + "learning_rate": 7.26451727674629e-06, + "loss": 0.9551, "step": 15349 }, { - "epoch": 0.4349797387287823, + "epoch": 0.6005947257218874, "grad_norm": 0.0, - "learning_rate": 1.2562915909703149e-05, - "loss": 0.8687, + "learning_rate": 7.263298401979862e-06, + "loss": 0.9953, "step": 15350 }, { - "epoch": 0.4350080761710448, + "epoch": 0.6006338524141169, "grad_norm": 0.0, - "learning_rate": 1.2562028764252867e-05, - "loss": 0.9628, + "learning_rate": 7.2620795711614335e-06, + "loss": 0.9272, "step": 15351 }, { - "epoch": 0.43503641361330725, + "epoch": 0.6006729791063463, "grad_norm": 0.0, - "learning_rate": 1.2561141597221667e-05, - "loss": 0.8214, + "learning_rate": 7.260860784310575e-06, + "loss": 0.9571, "step": 15352 }, { - "epoch": 0.43506475105556974, + "epoch": 0.6007121057985758, "grad_norm": 0.0, - "learning_rate": 1.2560254408617022e-05, - "loss": 0.8424, + "learning_rate": 7.259642041446856e-06, + "loss": 0.9416, "step": 15353 }, { - "epoch": 0.4350930884978322, + "epoch": 0.6007512324908052, "grad_norm": 0.0, - "learning_rate": 1.2559367198446401e-05, - "loss": 0.9645, + "learning_rate": 7.258423342589857e-06, + "loss": 1.1089, "step": 15354 }, { - "epoch": 0.4351214259400946, + "epoch": 0.6007903591830347, "grad_norm": 0.0, - "learning_rate": 1.2558479966717282e-05, - "loss": 0.9078, + "learning_rate": 7.257204687759143e-06, + "loss": 1.0948, "step": 15355 }, { - "epoch": 0.4351497633823571, + "epoch": 0.6008294858752641, "grad_norm": 0.0, - "learning_rate": 1.2557592713437137e-05, - "loss": 0.8429, + "learning_rate": 7.255986076974284e-06, + "loss": 0.9968, "step": 15356 }, { - "epoch": 0.43517810082461955, + "epoch": 0.6008686125674936, "grad_norm": 0.0, - "learning_rate": 1.2556705438613437e-05, - "loss": 0.9093, + "learning_rate": 7.254767510254852e-06, + "loss": 0.9562, "step": 15357 }, { - "epoch": 0.43520643826688205, + "epoch": 0.600907739259723, "grad_norm": 0.0, - "learning_rate": 1.2555818142253656e-05, - "loss": 0.8918, + "learning_rate": 7.253548987620408e-06, + "loss": 0.9631, "step": 15358 }, { - "epoch": 0.4352347757091445, + "epoch": 0.6009468659519525, "grad_norm": 0.0, - "learning_rate": 1.2554930824365273e-05, - "loss": 0.9569, + "learning_rate": 7.2523305090905305e-06, + "loss": 1.1223, "step": 15359 }, { - "epoch": 0.435263113151407, + "epoch": 0.6009859926441818, "grad_norm": 0.0, - "learning_rate": 1.2554043484955757e-05, - "loss": 0.931, + "learning_rate": 7.251112074684783e-06, + "loss": 1.1151, "step": 15360 }, { - "epoch": 0.4352914505936694, + "epoch": 0.6010251193364113, "grad_norm": 0.0, - "learning_rate": 1.2553156124032585e-05, - "loss": 0.9728, + "learning_rate": 7.24989368442273e-06, + "loss": 0.8951, "step": 15361 }, { - "epoch": 0.43531978803593185, + "epoch": 0.6010642460286407, "grad_norm": 0.0, - "learning_rate": 1.2552268741603232e-05, - "loss": 0.8788, + "learning_rate": 7.248675338323934e-06, + "loss": 1.0047, "step": 15362 }, { - "epoch": 0.43534812547819435, + "epoch": 0.6011033727208702, "grad_norm": 0.0, - "learning_rate": 1.2551381337675168e-05, - "loss": 1.0237, + "learning_rate": 7.247457036407968e-06, + "loss": 1.0082, "step": 15363 }, { - "epoch": 0.4353764629204568, + "epoch": 0.6011424994130996, "grad_norm": 0.0, - "learning_rate": 1.2550493912255872e-05, - "loss": 1.0065, + "learning_rate": 7.246238778694394e-06, + "loss": 0.9167, "step": 15364 }, { - "epoch": 0.4354048003627193, + "epoch": 0.6011816261053291, "grad_norm": 0.0, - "learning_rate": 1.2549606465352819e-05, - "loss": 0.9178, + "learning_rate": 7.245020565202775e-06, + "loss": 1.0746, "step": 15365 }, { - "epoch": 0.4354331378049817, + "epoch": 0.6012207527975585, "grad_norm": 0.0, - "learning_rate": 1.2548718996973488e-05, - "loss": 0.9855, + "learning_rate": 7.243802395952673e-06, + "loss": 1.0626, "step": 15366 }, { - "epoch": 0.43546147524724416, + "epoch": 0.601259879489788, "grad_norm": 0.0, - "learning_rate": 1.2547831507125348e-05, - "loss": 0.9644, + "learning_rate": 7.242584270963646e-06, + "loss": 1.017, "step": 15367 }, { - "epoch": 0.43548981268950665, + "epoch": 0.6012990061820174, "grad_norm": 0.0, - "learning_rate": 1.2546943995815876e-05, - "loss": 0.9025, + "learning_rate": 7.2413661902552654e-06, + "loss": 1.0165, "step": 15368 }, { - "epoch": 0.4355181501317691, + "epoch": 0.6013381328742468, "grad_norm": 0.0, - "learning_rate": 1.254605646305255e-05, - "loss": 0.9124, + "learning_rate": 7.240148153847086e-06, + "loss": 0.9869, "step": 15369 }, { - "epoch": 0.4355464875740316, + "epoch": 0.6013772595664763, "grad_norm": 0.0, - "learning_rate": 1.2545168908842846e-05, - "loss": 0.8205, + "learning_rate": 7.23893016175867e-06, + "loss": 1.065, "step": 15370 }, { - "epoch": 0.435574825016294, + "epoch": 0.6014163862587056, "grad_norm": 0.0, - "learning_rate": 1.2544281333194238e-05, - "loss": 0.9337, + "learning_rate": 7.237712214009571e-06, + "loss": 1.1438, "step": 15371 }, { - "epoch": 0.4356031624585565, + "epoch": 0.6014555129509351, "grad_norm": 0.0, - "learning_rate": 1.2543393736114205e-05, - "loss": 0.9464, + "learning_rate": 7.236494310619357e-06, + "loss": 0.9609, "step": 15372 }, { - "epoch": 0.43563149990081895, + "epoch": 0.6014946396431645, "grad_norm": 0.0, - "learning_rate": 1.2542506117610218e-05, - "loss": 0.9239, + "learning_rate": 7.235276451607582e-06, + "loss": 0.963, "step": 15373 }, { - "epoch": 0.4356598373430814, + "epoch": 0.601533766335394, "grad_norm": 0.0, - "learning_rate": 1.2541618477689761e-05, - "loss": 0.9926, + "learning_rate": 7.234058636993803e-06, + "loss": 1.0334, "step": 15374 }, { - "epoch": 0.4356881747853439, + "epoch": 0.6015728930276234, "grad_norm": 0.0, - "learning_rate": 1.254073081636031e-05, - "loss": 0.866, + "learning_rate": 7.232840866797575e-06, + "loss": 1.1028, "step": 15375 }, { - "epoch": 0.4357165122276063, + "epoch": 0.6016120197198529, "grad_norm": 0.0, - "learning_rate": 1.2539843133629335e-05, - "loss": 0.9422, + "learning_rate": 7.231623141038454e-06, + "loss": 0.939, "step": 15376 }, { - "epoch": 0.4357448496698688, + "epoch": 0.6016511464120823, "grad_norm": 0.0, - "learning_rate": 1.253895542950432e-05, - "loss": 0.9955, + "learning_rate": 7.230405459735996e-06, + "loss": 1.1015, "step": 15377 }, { - "epoch": 0.43577318711213126, + "epoch": 0.6016902731043118, "grad_norm": 0.0, - "learning_rate": 1.2538067703992738e-05, - "loss": 0.8563, + "learning_rate": 7.229187822909758e-06, + "loss": 1.0042, "step": 15378 }, { - "epoch": 0.4358015245543937, + "epoch": 0.6017293997965412, "grad_norm": 0.0, - "learning_rate": 1.2537179957102075e-05, - "loss": 0.9509, + "learning_rate": 7.227970230579292e-06, + "loss": 0.9468, "step": 15379 }, { - "epoch": 0.4358298619966562, + "epoch": 0.6017685264887707, "grad_norm": 0.0, - "learning_rate": 1.25362921888398e-05, - "loss": 0.8967, + "learning_rate": 7.226752682764149e-06, + "loss": 1.1544, "step": 15380 }, { - "epoch": 0.4358581994389186, + "epoch": 0.601807653181, "grad_norm": 0.0, - "learning_rate": 1.2535404399213394e-05, - "loss": 0.955, + "learning_rate": 7.225535179483882e-06, + "loss": 1.0125, "step": 15381 }, { - "epoch": 0.4358865368811811, + "epoch": 0.6018467798732295, "grad_norm": 0.0, - "learning_rate": 1.2534516588230335e-05, - "loss": 0.9649, + "learning_rate": 7.224317720758047e-06, + "loss": 0.9489, "step": 15382 }, { - "epoch": 0.43591487432344356, + "epoch": 0.6018859065654589, "grad_norm": 0.0, - "learning_rate": 1.2533628755898102e-05, - "loss": 0.9938, + "learning_rate": 7.223100306606188e-06, + "loss": 1.0533, "step": 15383 }, { - "epoch": 0.43594321176570605, + "epoch": 0.6019250332576884, "grad_norm": 0.0, - "learning_rate": 1.2532740902224171e-05, - "loss": 0.9862, + "learning_rate": 7.2218829370478596e-06, + "loss": 0.8505, "step": 15384 }, { - "epoch": 0.4359715492079685, + "epoch": 0.6019641599499178, "grad_norm": 0.0, - "learning_rate": 1.2531853027216028e-05, - "loss": 0.9429, + "learning_rate": 7.220665612102608e-06, + "loss": 1.0451, "step": 15385 }, { - "epoch": 0.43599988665023093, + "epoch": 0.6020032866421473, "grad_norm": 0.0, - "learning_rate": 1.253096513088114e-05, - "loss": 0.8684, + "learning_rate": 7.219448331789987e-06, + "loss": 1.1368, "step": 15386 }, { - "epoch": 0.4360282240924934, + "epoch": 0.6020424133343767, "grad_norm": 0.0, - "learning_rate": 1.2530077213226998e-05, - "loss": 0.9898, + "learning_rate": 7.218231096129541e-06, + "loss": 0.9502, "step": 15387 }, { - "epoch": 0.43605656153475586, + "epoch": 0.6020815400266062, "grad_norm": 0.0, - "learning_rate": 1.2529189274261078e-05, - "loss": 0.9728, + "learning_rate": 7.217013905140817e-06, + "loss": 1.0927, "step": 15388 }, { - "epoch": 0.43608489897701835, + "epoch": 0.6021206667188356, "grad_norm": 0.0, - "learning_rate": 1.2528301313990854e-05, - "loss": 0.9774, + "learning_rate": 7.215796758843361e-06, + "loss": 0.9666, "step": 15389 }, { - "epoch": 0.4361132364192808, + "epoch": 0.6021597934110651, "grad_norm": 0.0, - "learning_rate": 1.2527413332423808e-05, - "loss": 0.9383, + "learning_rate": 7.214579657256722e-06, + "loss": 0.9743, "step": 15390 }, { - "epoch": 0.43614157386154323, + "epoch": 0.6021989201032945, "grad_norm": 0.0, - "learning_rate": 1.2526525329567422e-05, - "loss": 1.0667, + "learning_rate": 7.213362600400444e-06, + "loss": 1.043, "step": 15391 }, { - "epoch": 0.4361699113038057, + "epoch": 0.602238046795524, "grad_norm": 0.0, - "learning_rate": 1.2525637305429176e-05, - "loss": 0.9288, + "learning_rate": 7.212145588294071e-06, + "loss": 1.0961, "step": 15392 }, { - "epoch": 0.43619824874606816, + "epoch": 0.6022771734877533, "grad_norm": 0.0, - "learning_rate": 1.252474926001655e-05, - "loss": 0.8821, + "learning_rate": 7.210928620957146e-06, + "loss": 1.0425, "step": 15393 }, { - "epoch": 0.43622658618833066, + "epoch": 0.6023163001799828, "grad_norm": 0.0, - "learning_rate": 1.2523861193337019e-05, - "loss": 0.7997, + "learning_rate": 7.209711698409209e-06, + "loss": 1.0359, "step": 15394 }, { - "epoch": 0.4362549236305931, + "epoch": 0.6023554268722122, "grad_norm": 0.0, - "learning_rate": 1.2522973105398073e-05, - "loss": 0.891, + "learning_rate": 7.208494820669809e-06, + "loss": 1.076, "step": 15395 }, { - "epoch": 0.4362832610728556, + "epoch": 0.6023945535644417, "grad_norm": 0.0, - "learning_rate": 1.2522084996207187e-05, - "loss": 0.9697, + "learning_rate": 7.2072779877584844e-06, + "loss": 0.9077, "step": 15396 }, { - "epoch": 0.436311598515118, + "epoch": 0.6024336802566711, "grad_norm": 0.0, - "learning_rate": 1.2521196865771839e-05, - "loss": 0.8749, + "learning_rate": 7.206061199694776e-06, + "loss": 1.062, "step": 15397 }, { - "epoch": 0.43633993595738046, + "epoch": 0.6024728069489005, "grad_norm": 0.0, - "learning_rate": 1.2520308714099513e-05, - "loss": 0.9608, + "learning_rate": 7.204844456498219e-06, + "loss": 1.0625, "step": 15398 }, { - "epoch": 0.43636827339964296, + "epoch": 0.60251193364113, "grad_norm": 0.0, - "learning_rate": 1.2519420541197696e-05, - "loss": 0.9923, + "learning_rate": 7.203627758188363e-06, + "loss": 1.0242, "step": 15399 }, { - "epoch": 0.4363966108419054, + "epoch": 0.6025510603333594, "grad_norm": 0.0, - "learning_rate": 1.2518532347073862e-05, - "loss": 0.9574, + "learning_rate": 7.202411104784741e-06, + "loss": 1.029, "step": 15400 }, { - "epoch": 0.4364249482841679, + "epoch": 0.6025901870255889, "grad_norm": 0.0, - "learning_rate": 1.2517644131735496e-05, - "loss": 0.8951, + "learning_rate": 7.20119449630689e-06, + "loss": 0.9541, "step": 15401 }, { - "epoch": 0.43645328572643033, + "epoch": 0.6026293137178182, "grad_norm": 0.0, - "learning_rate": 1.2516755895190076e-05, - "loss": 0.9068, + "learning_rate": 7.199977932774349e-06, + "loss": 0.8856, "step": 15402 }, { - "epoch": 0.43648162316869277, + "epoch": 0.6026684404100477, "grad_norm": 0.0, - "learning_rate": 1.2515867637445088e-05, - "loss": 0.8787, + "learning_rate": 7.198761414206649e-06, + "loss": 1.0279, "step": 15403 }, { - "epoch": 0.43650996061095526, + "epoch": 0.6027075671022771, "grad_norm": 0.0, - "learning_rate": 1.251497935850801e-05, - "loss": 1.0282, + "learning_rate": 7.197544940623336e-06, + "loss": 0.9492, "step": 15404 }, { - "epoch": 0.4365382980532177, + "epoch": 0.6027466937945066, "grad_norm": 0.0, - "learning_rate": 1.2514091058386331e-05, - "loss": 0.821, + "learning_rate": 7.196328512043938e-06, + "loss": 1.0249, "step": 15405 }, { - "epoch": 0.4365666354954802, + "epoch": 0.602785820486736, "grad_norm": 0.0, - "learning_rate": 1.2513202737087525e-05, - "loss": 0.9423, + "learning_rate": 7.1951121284879925e-06, + "loss": 1.1247, "step": 15406 }, { - "epoch": 0.43659497293774263, + "epoch": 0.6028249471789655, "grad_norm": 0.0, - "learning_rate": 1.2512314394619083e-05, - "loss": 1.0834, + "learning_rate": 7.193895789975025e-06, + "loss": 1.1632, "step": 15407 }, { - "epoch": 0.4366233103800051, + "epoch": 0.6028640738711949, "grad_norm": 0.0, - "learning_rate": 1.2511426030988483e-05, - "loss": 0.9046, + "learning_rate": 7.192679496524582e-06, + "loss": 1.014, "step": 15408 }, { - "epoch": 0.43665164782226756, + "epoch": 0.6029032005634244, "grad_norm": 0.0, - "learning_rate": 1.2510537646203209e-05, - "loss": 0.8727, + "learning_rate": 7.191463248156186e-06, + "loss": 1.0482, "step": 15409 }, { - "epoch": 0.43667998526453, + "epoch": 0.6029423272556538, "grad_norm": 0.0, - "learning_rate": 1.2509649240270742e-05, - "loss": 0.856, + "learning_rate": 7.190247044889372e-06, + "loss": 1.1488, "step": 15410 }, { - "epoch": 0.4367083227067925, + "epoch": 0.6029814539478833, "grad_norm": 0.0, - "learning_rate": 1.2508760813198569e-05, - "loss": 0.9843, + "learning_rate": 7.189030886743667e-06, + "loss": 0.9671, "step": 15411 }, { - "epoch": 0.43673666014905493, + "epoch": 0.6030205806401127, "grad_norm": 0.0, - "learning_rate": 1.2507872364994174e-05, - "loss": 0.8723, + "learning_rate": 7.187814773738605e-06, + "loss": 0.9042, "step": 15412 }, { - "epoch": 0.4367649975913174, + "epoch": 0.6030597073323422, "grad_norm": 0.0, - "learning_rate": 1.2506983895665036e-05, - "loss": 0.931, + "learning_rate": 7.186598705893714e-06, + "loss": 0.9302, "step": 15413 }, { - "epoch": 0.43679333503357987, + "epoch": 0.6030988340245715, "grad_norm": 0.0, - "learning_rate": 1.2506095405218646e-05, - "loss": 0.9344, + "learning_rate": 7.185382683228524e-06, + "loss": 0.9143, "step": 15414 }, { - "epoch": 0.4368216724758423, + "epoch": 0.603137960716801, "grad_norm": 0.0, - "learning_rate": 1.2505206893662478e-05, - "loss": 0.975, + "learning_rate": 7.1841667057625605e-06, + "loss": 1.0498, "step": 15415 }, { - "epoch": 0.4368500099181048, + "epoch": 0.6031770874090304, "grad_norm": 0.0, - "learning_rate": 1.2504318361004022e-05, - "loss": 1.0267, + "learning_rate": 7.182950773515348e-06, + "loss": 1.0494, "step": 15416 }, { - "epoch": 0.43687834736036724, + "epoch": 0.6032162141012599, "grad_norm": 0.0, - "learning_rate": 1.2503429807250766e-05, - "loss": 0.9072, + "learning_rate": 7.18173488650642e-06, + "loss": 0.9138, "step": 15417 }, { - "epoch": 0.43690668480262973, + "epoch": 0.6032553407934893, "grad_norm": 0.0, - "learning_rate": 1.2502541232410192e-05, - "loss": 0.9418, + "learning_rate": 7.180519044755298e-06, + "loss": 1.0837, "step": 15418 }, { - "epoch": 0.43693502224489217, + "epoch": 0.6032944674857188, "grad_norm": 0.0, - "learning_rate": 1.250165263648978e-05, - "loss": 0.9572, + "learning_rate": 7.179303248281506e-06, + "loss": 1.0708, "step": 15419 }, { - "epoch": 0.43696335968715466, + "epoch": 0.6033335941779482, "grad_norm": 0.0, - "learning_rate": 1.2500764019497022e-05, - "loss": 0.967, + "learning_rate": 7.1780874971045685e-06, + "loss": 1.0148, "step": 15420 }, { - "epoch": 0.4369916971294171, + "epoch": 0.6033727208701777, "grad_norm": 0.0, - "learning_rate": 1.24998753814394e-05, - "loss": 0.9857, + "learning_rate": 7.176871791244011e-06, + "loss": 0.9164, "step": 15421 }, { - "epoch": 0.43702003457167954, + "epoch": 0.6034118475624071, "grad_norm": 0.0, - "learning_rate": 1.2498986722324398e-05, - "loss": 0.8508, + "learning_rate": 7.175656130719354e-06, + "loss": 1.0842, "step": 15422 }, { - "epoch": 0.43704837201394203, + "epoch": 0.6034509742546366, "grad_norm": 0.0, - "learning_rate": 1.24980980421595e-05, - "loss": 0.958, + "learning_rate": 7.174440515550122e-06, + "loss": 1.0114, "step": 15423 }, { - "epoch": 0.43707670945620447, + "epoch": 0.603490100946866, "grad_norm": 0.0, - "learning_rate": 1.24972093409522e-05, - "loss": 0.8562, + "learning_rate": 7.173224945755834e-06, + "loss": 1.0162, "step": 15424 }, { - "epoch": 0.43710504689846696, + "epoch": 0.6035292276390954, "grad_norm": 0.0, - "learning_rate": 1.2496320618709979e-05, - "loss": 0.9083, + "learning_rate": 7.17200942135601e-06, + "loss": 1.1, "step": 15425 }, { - "epoch": 0.4371333843407294, + "epoch": 0.6035683543313248, "grad_norm": 0.0, - "learning_rate": 1.2495431875440319e-05, - "loss": 1.0081, + "learning_rate": 7.170793942370173e-06, + "loss": 1.09, "step": 15426 }, { - "epoch": 0.43716172178299184, + "epoch": 0.6036074810235542, "grad_norm": 0.0, - "learning_rate": 1.2494543111150707e-05, - "loss": 0.9828, + "learning_rate": 7.16957850881784e-06, + "loss": 1.0688, "step": 15427 }, { - "epoch": 0.43719005922525433, + "epoch": 0.6036466077157837, "grad_norm": 0.0, - "learning_rate": 1.249365432584864e-05, - "loss": 0.8555, + "learning_rate": 7.168363120718527e-06, + "loss": 1.134, "step": 15428 }, { - "epoch": 0.4372183966675168, + "epoch": 0.6036857344080131, "grad_norm": 0.0, - "learning_rate": 1.249276551954159e-05, - "loss": 1.0226, + "learning_rate": 7.167147778091754e-06, + "loss": 1.0267, "step": 15429 }, { - "epoch": 0.43724673410977927, + "epoch": 0.6037248611002426, "grad_norm": 0.0, - "learning_rate": 1.249187669223705e-05, - "loss": 0.8901, + "learning_rate": 7.165932480957041e-06, + "loss": 1.063, "step": 15430 }, { - "epoch": 0.4372750715520417, + "epoch": 0.603763987792472, "grad_norm": 0.0, - "learning_rate": 1.2490987843942511e-05, - "loss": 0.8178, + "learning_rate": 7.1647172293338995e-06, + "loss": 0.9891, "step": 15431 }, { - "epoch": 0.4373034089943042, + "epoch": 0.6038031144847015, "grad_norm": 0.0, - "learning_rate": 1.2490098974665454e-05, - "loss": 0.9847, + "learning_rate": 7.163502023241849e-06, + "loss": 1.0178, "step": 15432 }, { - "epoch": 0.43733174643656664, + "epoch": 0.6038422411769309, "grad_norm": 0.0, - "learning_rate": 1.2489210084413372e-05, - "loss": 0.8901, + "learning_rate": 7.162286862700398e-06, + "loss": 1.0269, "step": 15433 }, { - "epoch": 0.4373600838788291, + "epoch": 0.6038813678691604, "grad_norm": 0.0, - "learning_rate": 1.2488321173193748e-05, - "loss": 0.9344, + "learning_rate": 7.1610717477290625e-06, + "loss": 0.8747, "step": 15434 }, { - "epoch": 0.43738842132109157, + "epoch": 0.6039204945613897, "grad_norm": 0.0, - "learning_rate": 1.2487432241014068e-05, - "loss": 0.9496, + "learning_rate": 7.159856678347361e-06, + "loss": 1.0713, "step": 15435 }, { - "epoch": 0.437416758763354, + "epoch": 0.6039596212536192, "grad_norm": 0.0, - "learning_rate": 1.2486543287881822e-05, - "loss": 0.9375, + "learning_rate": 7.1586416545748026e-06, + "loss": 1.0666, "step": 15436 }, { - "epoch": 0.4374450962056165, + "epoch": 0.6039987479458486, "grad_norm": 0.0, - "learning_rate": 1.2485654313804501e-05, - "loss": 0.9937, + "learning_rate": 7.1574266764308984e-06, + "loss": 0.9949, "step": 15437 }, { - "epoch": 0.43747343364787894, + "epoch": 0.6040378746380781, "grad_norm": 0.0, - "learning_rate": 1.2484765318789591e-05, - "loss": 0.9531, + "learning_rate": 7.156211743935157e-06, + "loss": 1.0777, "step": 15438 }, { - "epoch": 0.4375017710901414, + "epoch": 0.6040770013303075, "grad_norm": 0.0, - "learning_rate": 1.2483876302844579e-05, - "loss": 0.9721, + "learning_rate": 7.154996857107094e-06, + "loss": 0.8936, "step": 15439 }, { - "epoch": 0.43753010853240387, + "epoch": 0.604116128022537, "grad_norm": 0.0, - "learning_rate": 1.2482987265976956e-05, - "loss": 0.8639, + "learning_rate": 7.153782015966218e-06, + "loss": 1.0284, "step": 15440 }, { - "epoch": 0.4375584459746663, + "epoch": 0.6041552547147664, "grad_norm": 0.0, - "learning_rate": 1.2482098208194208e-05, - "loss": 0.9125, + "learning_rate": 7.152567220532034e-06, + "loss": 0.853, "step": 15441 }, { - "epoch": 0.4375867834169288, + "epoch": 0.6041943814069959, "grad_norm": 0.0, - "learning_rate": 1.2481209129503824e-05, - "loss": 0.923, + "learning_rate": 7.151352470824053e-06, + "loss": 0.9888, "step": 15442 }, { - "epoch": 0.43761512085919124, + "epoch": 0.6042335080992253, "grad_norm": 0.0, - "learning_rate": 1.2480320029913295e-05, - "loss": 0.9025, + "learning_rate": 7.15013776686178e-06, + "loss": 0.8924, "step": 15443 }, { - "epoch": 0.43764345830145374, + "epoch": 0.6042726347914548, "grad_norm": 0.0, - "learning_rate": 1.2479430909430109e-05, - "loss": 0.9652, + "learning_rate": 7.148923108664725e-06, + "loss": 0.9446, "step": 15444 }, { - "epoch": 0.4376717957437162, + "epoch": 0.6043117614836842, "grad_norm": 0.0, - "learning_rate": 1.2478541768061758e-05, - "loss": 0.8444, + "learning_rate": 7.1477084962523935e-06, + "loss": 1.0906, "step": 15445 }, { - "epoch": 0.4377001331859786, + "epoch": 0.6043508881759136, "grad_norm": 0.0, - "learning_rate": 1.2477652605815729e-05, - "loss": 0.9021, + "learning_rate": 7.146493929644289e-06, + "loss": 0.9798, "step": 15446 }, { - "epoch": 0.4377284706282411, + "epoch": 0.604390014868143, "grad_norm": 0.0, - "learning_rate": 1.247676342269951e-05, - "loss": 0.8612, + "learning_rate": 7.145279408859912e-06, + "loss": 1.0176, "step": 15447 }, { - "epoch": 0.43775680807050354, + "epoch": 0.6044291415603725, "grad_norm": 0.0, - "learning_rate": 1.2475874218720594e-05, - "loss": 0.8646, + "learning_rate": 7.1440649339187755e-06, + "loss": 1.0325, "step": 15448 }, { - "epoch": 0.43778514551276604, + "epoch": 0.6044682682526019, "grad_norm": 0.0, - "learning_rate": 1.2474984993886467e-05, - "loss": 0.8963, + "learning_rate": 7.142850504840378e-06, + "loss": 1.0544, "step": 15449 }, { - "epoch": 0.4378134829550285, + "epoch": 0.6045073949448314, "grad_norm": 0.0, - "learning_rate": 1.2474095748204627e-05, - "loss": 0.9786, + "learning_rate": 7.141636121644219e-06, + "loss": 0.9237, "step": 15450 }, { - "epoch": 0.4378418203972909, + "epoch": 0.6045465216370608, "grad_norm": 0.0, - "learning_rate": 1.2473206481682559e-05, - "loss": 0.8729, + "learning_rate": 7.140421784349802e-06, + "loss": 1.021, "step": 15451 }, { - "epoch": 0.4378701578395534, + "epoch": 0.6045856483292903, "grad_norm": 0.0, - "learning_rate": 1.247231719432775e-05, - "loss": 0.9942, + "learning_rate": 7.1392074929766254e-06, + "loss": 1.0703, "step": 15452 }, { - "epoch": 0.43789849528181585, + "epoch": 0.6046247750215197, "grad_norm": 0.0, - "learning_rate": 1.24714278861477e-05, - "loss": 0.8646, + "learning_rate": 7.137993247544196e-06, + "loss": 0.9818, "step": 15453 }, { - "epoch": 0.43792683272407834, + "epoch": 0.6046639017137492, "grad_norm": 0.0, - "learning_rate": 1.2470538557149895e-05, - "loss": 0.9117, + "learning_rate": 7.136779048072009e-06, + "loss": 1.0215, "step": 15454 }, { - "epoch": 0.4379551701663408, + "epoch": 0.6047030284059786, "grad_norm": 0.0, - "learning_rate": 1.2469649207341823e-05, - "loss": 0.8415, + "learning_rate": 7.135564894579561e-06, + "loss": 0.9484, "step": 15455 }, { - "epoch": 0.4379835076086033, + "epoch": 0.604742155098208, "grad_norm": 0.0, - "learning_rate": 1.246875983673098e-05, - "loss": 0.8503, + "learning_rate": 7.134350787086348e-06, + "loss": 0.9944, "step": 15456 }, { - "epoch": 0.4380118450508657, + "epoch": 0.6047812817904374, "grad_norm": 0.0, - "learning_rate": 1.2467870445324857e-05, - "loss": 0.9318, + "learning_rate": 7.133136725611876e-06, + "loss": 1.1058, "step": 15457 }, { - "epoch": 0.43804018249312815, + "epoch": 0.6048204084826668, "grad_norm": 0.0, - "learning_rate": 1.2466981033130944e-05, - "loss": 0.8536, + "learning_rate": 7.131922710175634e-06, + "loss": 1.0232, "step": 15458 }, { - "epoch": 0.43806851993539064, + "epoch": 0.6048595351748963, "grad_norm": 0.0, - "learning_rate": 1.2466091600156736e-05, - "loss": 0.8574, + "learning_rate": 7.13070874079712e-06, + "loss": 0.976, "step": 15459 }, { - "epoch": 0.4380968573776531, + "epoch": 0.6048986618671257, "grad_norm": 0.0, - "learning_rate": 1.246520214640972e-05, - "loss": 0.933, + "learning_rate": 7.129494817495828e-06, + "loss": 0.9761, "step": 15460 }, { - "epoch": 0.4381251948199156, + "epoch": 0.6049377885593552, "grad_norm": 0.0, - "learning_rate": 1.2464312671897391e-05, - "loss": 0.9204, + "learning_rate": 7.128280940291248e-06, + "loss": 0.9953, "step": 15461 }, { - "epoch": 0.438153532262178, + "epoch": 0.6049769152515846, "grad_norm": 0.0, - "learning_rate": 1.2463423176627243e-05, - "loss": 0.8277, + "learning_rate": 7.127067109202883e-06, + "loss": 0.9208, "step": 15462 }, { - "epoch": 0.43818186970444045, + "epoch": 0.6050160419438141, "grad_norm": 0.0, - "learning_rate": 1.2462533660606766e-05, - "loss": 0.9818, + "learning_rate": 7.125853324250218e-06, + "loss": 1.0801, "step": 15463 }, { - "epoch": 0.43821020714670295, + "epoch": 0.6050551686360435, "grad_norm": 0.0, - "learning_rate": 1.2461644123843452e-05, - "loss": 0.9078, + "learning_rate": 7.1246395854527496e-06, + "loss": 1.0894, "step": 15464 }, { - "epoch": 0.4382385445889654, + "epoch": 0.605094295328273, "grad_norm": 0.0, - "learning_rate": 1.2460754566344797e-05, - "loss": 0.8755, + "learning_rate": 7.1234258928299624e-06, + "loss": 0.9593, "step": 15465 }, { - "epoch": 0.4382668820312279, + "epoch": 0.6051334220205024, "grad_norm": 0.0, - "learning_rate": 1.2459864988118293e-05, - "loss": 0.8844, + "learning_rate": 7.122212246401355e-06, + "loss": 1.025, "step": 15466 }, { - "epoch": 0.4382952194734903, + "epoch": 0.6051725487127319, "grad_norm": 0.0, - "learning_rate": 1.2458975389171434e-05, - "loss": 0.877, + "learning_rate": 7.120998646186409e-06, + "loss": 1.1063, "step": 15467 }, { - "epoch": 0.4383235569157528, + "epoch": 0.6052116754049612, "grad_norm": 0.0, - "learning_rate": 1.245808576951171e-05, - "loss": 0.8839, + "learning_rate": 7.11978509220462e-06, + "loss": 1.0483, "step": 15468 }, { - "epoch": 0.43835189435801525, + "epoch": 0.6052508020971907, "grad_norm": 0.0, - "learning_rate": 1.2457196129146616e-05, - "loss": 0.8385, + "learning_rate": 7.118571584475471e-06, + "loss": 1.0776, "step": 15469 }, { - "epoch": 0.4383802318002777, + "epoch": 0.6052899287894201, "grad_norm": 0.0, - "learning_rate": 1.2456306468083647e-05, - "loss": 0.933, + "learning_rate": 7.117358123018454e-06, + "loss": 1.0816, "step": 15470 }, { - "epoch": 0.4384085692425402, + "epoch": 0.6053290554816496, "grad_norm": 0.0, - "learning_rate": 1.2455416786330299e-05, - "loss": 0.7124, + "learning_rate": 7.116144707853052e-06, + "loss": 1.1025, "step": 15471 }, { - "epoch": 0.4384369066848026, + "epoch": 0.605368182173879, "grad_norm": 0.0, - "learning_rate": 1.2454527083894061e-05, - "loss": 0.9511, + "learning_rate": 7.114931338998752e-06, + "loss": 0.9943, "step": 15472 }, { - "epoch": 0.4384652441270651, + "epoch": 0.6054073088661085, "grad_norm": 0.0, - "learning_rate": 1.2453637360782432e-05, - "loss": 0.8651, + "learning_rate": 7.113718016475041e-06, + "loss": 1.0019, "step": 15473 }, { - "epoch": 0.43849358156932755, + "epoch": 0.6054464355583379, "grad_norm": 0.0, - "learning_rate": 1.2452747617002902e-05, - "loss": 0.9698, + "learning_rate": 7.1125047403014005e-06, + "loss": 0.8967, "step": 15474 }, { - "epoch": 0.43852191901159, + "epoch": 0.6054855622505674, "grad_norm": 0.0, - "learning_rate": 1.2451857852562967e-05, - "loss": 1.0294, + "learning_rate": 7.111291510497316e-06, + "loss": 0.9877, "step": 15475 }, { - "epoch": 0.4385502564538525, + "epoch": 0.6055246889427968, "grad_norm": 0.0, - "learning_rate": 1.2450968067470126e-05, - "loss": 0.8765, + "learning_rate": 7.110078327082271e-06, + "loss": 0.9562, "step": 15476 }, { - "epoch": 0.4385785938961149, + "epoch": 0.6055638156350263, "grad_norm": 0.0, - "learning_rate": 1.2450078261731869e-05, - "loss": 0.9255, + "learning_rate": 7.108865190075747e-06, + "loss": 1.1375, "step": 15477 }, { - "epoch": 0.4386069313383774, + "epoch": 0.6056029423272556, "grad_norm": 0.0, - "learning_rate": 1.2449188435355695e-05, - "loss": 0.947, + "learning_rate": 7.107652099497222e-06, + "loss": 0.998, "step": 15478 }, { - "epoch": 0.43863526878063985, + "epoch": 0.6056420690194851, "grad_norm": 0.0, - "learning_rate": 1.2448298588349097e-05, - "loss": 0.9204, + "learning_rate": 7.106439055366182e-06, + "loss": 1.024, "step": 15479 }, { - "epoch": 0.43866360622290235, + "epoch": 0.6056811957117145, "grad_norm": 0.0, - "learning_rate": 1.244740872071957e-05, - "loss": 0.9698, + "learning_rate": 7.105226057702106e-06, + "loss": 1.0534, "step": 15480 }, { - "epoch": 0.4386919436651648, + "epoch": 0.605720322403944, "grad_norm": 0.0, - "learning_rate": 1.2446518832474609e-05, - "loss": 0.8581, + "learning_rate": 7.104013106524472e-06, + "loss": 1.03, "step": 15481 }, { - "epoch": 0.4387202811074272, + "epoch": 0.6057594490961734, "grad_norm": 0.0, - "learning_rate": 1.244562892362171e-05, - "loss": 0.8491, + "learning_rate": 7.10280020185276e-06, + "loss": 1.0192, "step": 15482 }, { - "epoch": 0.4387486185496897, + "epoch": 0.6057985757884028, "grad_norm": 0.0, - "learning_rate": 1.2444738994168374e-05, - "loss": 0.8758, + "learning_rate": 7.101587343706441e-06, + "loss": 1.117, "step": 15483 }, { - "epoch": 0.43877695599195216, + "epoch": 0.6058377024806323, "grad_norm": 0.0, - "learning_rate": 1.2443849044122094e-05, - "loss": 0.9331, + "learning_rate": 7.100374532105001e-06, + "loss": 1.0377, "step": 15484 }, { - "epoch": 0.43880529343421465, + "epoch": 0.6058768291728617, "grad_norm": 0.0, - "learning_rate": 1.2442959073490365e-05, - "loss": 0.9197, + "learning_rate": 7.099161767067914e-06, + "loss": 1.0241, "step": 15485 }, { - "epoch": 0.4388336308764771, + "epoch": 0.6059159558650912, "grad_norm": 0.0, - "learning_rate": 1.2442069082280683e-05, - "loss": 0.8001, + "learning_rate": 7.097949048614653e-06, + "loss": 1.0181, "step": 15486 }, { - "epoch": 0.4388619683187395, + "epoch": 0.6059550825573206, "grad_norm": 0.0, - "learning_rate": 1.2441179070500549e-05, - "loss": 0.9076, + "learning_rate": 7.09673637676469e-06, + "loss": 1.1035, "step": 15487 }, { - "epoch": 0.438890305761002, + "epoch": 0.60599420924955, "grad_norm": 0.0, - "learning_rate": 1.2440289038157455e-05, - "loss": 0.9167, + "learning_rate": 7.095523751537506e-06, + "loss": 0.9808, "step": 15488 }, { - "epoch": 0.43891864320326446, + "epoch": 0.6060333359417794, "grad_norm": 0.0, - "learning_rate": 1.24393989852589e-05, - "loss": 0.9606, + "learning_rate": 7.094311172952573e-06, + "loss": 0.9968, "step": 15489 }, { - "epoch": 0.43894698064552695, + "epoch": 0.6060724626340089, "grad_norm": 0.0, - "learning_rate": 1.2438508911812384e-05, - "loss": 0.8447, + "learning_rate": 7.093098641029359e-06, + "loss": 1.0497, "step": 15490 }, { - "epoch": 0.4389753180877894, + "epoch": 0.6061115893262383, "grad_norm": 0.0, - "learning_rate": 1.2437618817825402e-05, - "loss": 0.7817, + "learning_rate": 7.09188615578734e-06, + "loss": 1.0144, "step": 15491 }, { - "epoch": 0.4390036555300519, + "epoch": 0.6061507160184678, "grad_norm": 0.0, - "learning_rate": 1.243672870330545e-05, - "loss": 0.9172, + "learning_rate": 7.090673717245982e-06, + "loss": 1.01, "step": 15492 }, { - "epoch": 0.4390319929723143, + "epoch": 0.6061898427106972, "grad_norm": 0.0, - "learning_rate": 1.2435838568260026e-05, - "loss": 0.8129, + "learning_rate": 7.08946132542476e-06, + "loss": 1.0027, "step": 15493 }, { - "epoch": 0.43906033041457676, + "epoch": 0.6062289694029267, "grad_norm": 0.0, - "learning_rate": 1.243494841269663e-05, - "loss": 0.9411, + "learning_rate": 7.0882489803431444e-06, + "loss": 1.0525, "step": 15494 }, { - "epoch": 0.43908866785683925, + "epoch": 0.6062680960951561, "grad_norm": 0.0, - "learning_rate": 1.2434058236622759e-05, - "loss": 0.8756, + "learning_rate": 7.087036682020601e-06, + "loss": 1.0443, "step": 15495 }, { - "epoch": 0.4391170052991017, + "epoch": 0.6063072227873856, "grad_norm": 0.0, - "learning_rate": 1.2433168040045912e-05, - "loss": 1.0544, + "learning_rate": 7.085824430476593e-06, + "loss": 0.9262, "step": 15496 }, { - "epoch": 0.4391453427413642, + "epoch": 0.606346349479615, "grad_norm": 0.0, - "learning_rate": 1.2432277822973588e-05, - "loss": 0.9207, + "learning_rate": 7.084612225730599e-06, + "loss": 0.9485, "step": 15497 }, { - "epoch": 0.4391736801836266, + "epoch": 0.6063854761718445, "grad_norm": 0.0, - "learning_rate": 1.2431387585413283e-05, - "loss": 0.8758, + "learning_rate": 7.083400067802078e-06, + "loss": 0.9993, "step": 15498 }, { - "epoch": 0.43920201762588906, + "epoch": 0.6064246028640738, "grad_norm": 0.0, - "learning_rate": 1.2430497327372502e-05, - "loss": 0.9276, + "learning_rate": 7.082187956710498e-06, + "loss": 0.9903, "step": 15499 }, { - "epoch": 0.43923035506815156, + "epoch": 0.6064637295563033, "grad_norm": 0.0, - "learning_rate": 1.2429607048858737e-05, - "loss": 0.8621, + "learning_rate": 7.080975892475322e-06, + "loss": 1.0499, "step": 15500 }, { - "epoch": 0.439258692510414, + "epoch": 0.6065028562485327, "grad_norm": 0.0, - "learning_rate": 1.2428716749879485e-05, - "loss": 0.838, + "learning_rate": 7.079763875116012e-06, + "loss": 0.9592, "step": 15501 }, { - "epoch": 0.4392870299526765, + "epoch": 0.6065419829407622, "grad_norm": 0.0, - "learning_rate": 1.2427826430442253e-05, - "loss": 0.9741, + "learning_rate": 7.078551904652039e-06, + "loss": 0.9802, "step": 15502 }, { - "epoch": 0.4393153673949389, + "epoch": 0.6065811096329916, "grad_norm": 0.0, - "learning_rate": 1.242693609055454e-05, - "loss": 0.9081, + "learning_rate": 7.077339981102862e-06, + "loss": 1.097, "step": 15503 }, { - "epoch": 0.4393437048372014, + "epoch": 0.6066202363252211, "grad_norm": 0.0, - "learning_rate": 1.2426045730223842e-05, - "loss": 0.8723, + "learning_rate": 7.076128104487943e-06, + "loss": 0.9677, "step": 15504 }, { - "epoch": 0.43937204227946386, + "epoch": 0.6066593630174505, "grad_norm": 0.0, - "learning_rate": 1.242515534945766e-05, - "loss": 0.8718, + "learning_rate": 7.074916274826737e-06, + "loss": 0.9329, "step": 15505 }, { - "epoch": 0.4394003797217263, + "epoch": 0.60669848970968, "grad_norm": 0.0, - "learning_rate": 1.2424264948263492e-05, - "loss": 1.0234, + "learning_rate": 7.0737044921387155e-06, + "loss": 1.1151, "step": 15506 }, { - "epoch": 0.4394287171639888, + "epoch": 0.6067376164019094, "grad_norm": 0.0, - "learning_rate": 1.2423374526648841e-05, - "loss": 0.928, + "learning_rate": 7.072492756443333e-06, + "loss": 1.0127, "step": 15507 }, { - "epoch": 0.43945705460625123, + "epoch": 0.6067767430941389, "grad_norm": 0.0, - "learning_rate": 1.2422484084621205e-05, - "loss": 0.9221, + "learning_rate": 7.071281067760048e-06, + "loss": 0.8442, "step": 15508 }, { - "epoch": 0.4394853920485137, + "epoch": 0.6068158697863683, "grad_norm": 0.0, - "learning_rate": 1.2421593622188088e-05, - "loss": 0.8167, + "learning_rate": 7.07006942610832e-06, + "loss": 1.1017, "step": 15509 }, { - "epoch": 0.43951372949077616, + "epoch": 0.6068549964785978, "grad_norm": 0.0, - "learning_rate": 1.2420703139356987e-05, - "loss": 0.947, + "learning_rate": 7.0688578315076025e-06, + "loss": 0.9815, "step": 15510 }, { - "epoch": 0.4395420669330386, + "epoch": 0.6068941231708271, "grad_norm": 0.0, - "learning_rate": 1.2419812636135406e-05, - "loss": 0.9158, + "learning_rate": 7.067646283977356e-06, + "loss": 0.9568, "step": 15511 }, { - "epoch": 0.4395704043753011, + "epoch": 0.6069332498630565, "grad_norm": 0.0, - "learning_rate": 1.2418922112530847e-05, - "loss": 0.8859, + "learning_rate": 7.066434783537039e-06, + "loss": 1.0534, "step": 15512 }, { - "epoch": 0.43959874181756353, + "epoch": 0.606972376555286, "grad_norm": 0.0, - "learning_rate": 1.2418031568550805e-05, - "loss": 0.9984, + "learning_rate": 7.065223330206101e-06, + "loss": 0.9933, "step": 15513 }, { - "epoch": 0.439627079259826, + "epoch": 0.6070115032475154, "grad_norm": 0.0, - "learning_rate": 1.2417141004202787e-05, - "loss": 0.9337, + "learning_rate": 7.064011924004e-06, + "loss": 0.9559, "step": 15514 }, { - "epoch": 0.43965541670208846, + "epoch": 0.6070506299397449, "grad_norm": 0.0, - "learning_rate": 1.2416250419494292e-05, - "loss": 0.8895, + "learning_rate": 7.0628005649501875e-06, + "loss": 0.9718, "step": 15515 }, { - "epoch": 0.43968375414435096, + "epoch": 0.6070897566319743, "grad_norm": 0.0, - "learning_rate": 1.2415359814432822e-05, - "loss": 0.9171, + "learning_rate": 7.061589253064118e-06, + "loss": 0.9789, "step": 15516 }, { - "epoch": 0.4397120915866134, + "epoch": 0.6071288833242038, "grad_norm": 0.0, - "learning_rate": 1.2414469189025881e-05, - "loss": 0.9199, + "learning_rate": 7.060377988365241e-06, + "loss": 1.0289, "step": 15517 }, { - "epoch": 0.43974042902887583, + "epoch": 0.6071680100164332, "grad_norm": 0.0, - "learning_rate": 1.2413578543280967e-05, - "loss": 1.0067, + "learning_rate": 7.059166770873013e-06, + "loss": 1.0103, "step": 15518 }, { - "epoch": 0.43976876647113833, + "epoch": 0.6072071367086627, "grad_norm": 0.0, - "learning_rate": 1.2412687877205587e-05, - "loss": 0.8593, + "learning_rate": 7.057955600606879e-06, + "loss": 1.1068, "step": 15519 }, { - "epoch": 0.43979710391340077, + "epoch": 0.607246263400892, "grad_norm": 0.0, - "learning_rate": 1.241179719080724e-05, - "loss": 0.8446, + "learning_rate": 7.056744477586293e-06, + "loss": 0.953, "step": 15520 }, { - "epoch": 0.43982544135566326, + "epoch": 0.6072853900931215, "grad_norm": 0.0, - "learning_rate": 1.241090648409343e-05, - "loss": 0.9809, + "learning_rate": 7.055533401830703e-06, + "loss": 0.9868, "step": 15521 }, { - "epoch": 0.4398537787979257, + "epoch": 0.6073245167853509, "grad_norm": 0.0, - "learning_rate": 1.2410015757071656e-05, - "loss": 0.8432, + "learning_rate": 7.054322373359557e-06, + "loss": 1.0473, "step": 15522 }, { - "epoch": 0.43988211624018814, + "epoch": 0.6073636434775804, "grad_norm": 0.0, - "learning_rate": 1.240912500974943e-05, - "loss": 0.8872, + "learning_rate": 7.053111392192299e-06, + "loss": 1.0236, "step": 15523 }, { - "epoch": 0.43991045368245063, + "epoch": 0.6074027701698098, "grad_norm": 0.0, - "learning_rate": 1.2408234242134247e-05, - "loss": 0.9364, + "learning_rate": 7.051900458348383e-06, + "loss": 1.0075, "step": 15524 }, { - "epoch": 0.43993879112471307, + "epoch": 0.6074418968620393, "grad_norm": 0.0, - "learning_rate": 1.2407343454233613e-05, - "loss": 0.8666, + "learning_rate": 7.050689571847251e-06, + "loss": 0.9993, "step": 15525 }, { - "epoch": 0.43996712856697556, + "epoch": 0.6074810235542687, "grad_norm": 0.0, - "learning_rate": 1.240645264605503e-05, - "loss": 0.8916, + "learning_rate": 7.04947873270835e-06, + "loss": 1.0619, "step": 15526 }, { - "epoch": 0.439995466009238, + "epoch": 0.6075201502464982, "grad_norm": 0.0, - "learning_rate": 1.2405561817606005e-05, - "loss": 0.9899, + "learning_rate": 7.048267940951118e-06, + "loss": 1.0655, "step": 15527 }, { - "epoch": 0.4400238034515005, + "epoch": 0.6075592769387276, "grad_norm": 0.0, - "learning_rate": 1.2404670968894037e-05, - "loss": 0.976, + "learning_rate": 7.0470571965950085e-06, + "loss": 1.1453, "step": 15528 }, { - "epoch": 0.44005214089376293, + "epoch": 0.6075984036309571, "grad_norm": 0.0, - "learning_rate": 1.2403780099926635e-05, - "loss": 0.8135, + "learning_rate": 7.0458464996594615e-06, + "loss": 0.9669, "step": 15529 }, { - "epoch": 0.44008047833602537, + "epoch": 0.6076375303231865, "grad_norm": 0.0, - "learning_rate": 1.24028892107113e-05, - "loss": 0.8536, + "learning_rate": 7.044635850163916e-06, + "loss": 0.9771, "step": 15530 }, { - "epoch": 0.44010881577828787, + "epoch": 0.607676657015416, "grad_norm": 0.0, - "learning_rate": 1.2401998301255533e-05, - "loss": 0.8851, + "learning_rate": 7.043425248127815e-06, + "loss": 1.112, "step": 15531 }, { - "epoch": 0.4401371532205503, + "epoch": 0.6077157837076453, "grad_norm": 0.0, - "learning_rate": 1.2401107371566847e-05, - "loss": 0.9225, + "learning_rate": 7.042214693570597e-06, + "loss": 1.0685, "step": 15532 }, { - "epoch": 0.4401654906628128, + "epoch": 0.6077549103998748, "grad_norm": 0.0, - "learning_rate": 1.240021642165274e-05, - "loss": 0.9075, + "learning_rate": 7.041004186511709e-06, + "loss": 1.0076, "step": 15533 }, { - "epoch": 0.44019382810507524, + "epoch": 0.6077940370921042, "grad_norm": 0.0, - "learning_rate": 1.2399325451520718e-05, - "loss": 0.934, + "learning_rate": 7.0397937269705865e-06, + "loss": 1.0459, "step": 15534 }, { - "epoch": 0.4402221655473377, + "epoch": 0.6078331637843337, "grad_norm": 0.0, - "learning_rate": 1.2398434461178289e-05, - "loss": 0.7906, + "learning_rate": 7.038583314966666e-06, + "loss": 0.9941, "step": 15535 }, { - "epoch": 0.44025050298960017, + "epoch": 0.6078722904765631, "grad_norm": 0.0, - "learning_rate": 1.2397543450632953e-05, - "loss": 0.9293, + "learning_rate": 7.037372950519383e-06, + "loss": 1.0838, "step": 15536 }, { - "epoch": 0.4402788404318626, + "epoch": 0.6079114171687926, "grad_norm": 0.0, - "learning_rate": 1.2396652419892221e-05, - "loss": 0.9398, + "learning_rate": 7.036162633648183e-06, + "loss": 0.9063, "step": 15537 }, { - "epoch": 0.4403071778741251, + "epoch": 0.607950543861022, "grad_norm": 0.0, - "learning_rate": 1.2395761368963597e-05, - "loss": 0.9344, + "learning_rate": 7.034952364372496e-06, + "loss": 1.0494, "step": 15538 }, { - "epoch": 0.44033551531638754, + "epoch": 0.6079896705532515, "grad_norm": 0.0, - "learning_rate": 1.2394870297854582e-05, - "loss": 0.9199, + "learning_rate": 7.033742142711759e-06, + "loss": 0.9787, "step": 15539 }, { - "epoch": 0.44036385275865003, + "epoch": 0.6080287972454809, "grad_norm": 0.0, - "learning_rate": 1.2393979206572684e-05, - "loss": 0.988, + "learning_rate": 7.0325319686854054e-06, + "loss": 0.8264, "step": 15540 }, { - "epoch": 0.44039219020091247, + "epoch": 0.6080679239377103, "grad_norm": 0.0, - "learning_rate": 1.2393088095125414e-05, - "loss": 0.8712, + "learning_rate": 7.031321842312867e-06, + "loss": 0.9507, "step": 15541 }, { - "epoch": 0.4404205276431749, + "epoch": 0.6081070506299397, "grad_norm": 0.0, - "learning_rate": 1.2392196963520273e-05, - "loss": 0.9147, + "learning_rate": 7.030111763613583e-06, + "loss": 1.0214, "step": 15542 }, { - "epoch": 0.4404488650854374, + "epoch": 0.6081461773221691, "grad_norm": 0.0, - "learning_rate": 1.2391305811764767e-05, - "loss": 0.9763, + "learning_rate": 7.028901732606984e-06, + "loss": 0.9249, "step": 15543 }, { - "epoch": 0.44047720252769984, + "epoch": 0.6081853040143986, "grad_norm": 0.0, - "learning_rate": 1.2390414639866406e-05, - "loss": 0.8596, + "learning_rate": 7.027691749312499e-06, + "loss": 1.0002, "step": 15544 }, { - "epoch": 0.44050553996996233, + "epoch": 0.608224430706628, "grad_norm": 0.0, - "learning_rate": 1.2389523447832696e-05, - "loss": 0.8864, + "learning_rate": 7.026481813749557e-06, + "loss": 0.9124, "step": 15545 }, { - "epoch": 0.4405338774122248, + "epoch": 0.6082635573988575, "grad_norm": 0.0, - "learning_rate": 1.2388632235671139e-05, - "loss": 0.9591, + "learning_rate": 7.0252719259375935e-06, + "loss": 1.0989, "step": 15546 }, { - "epoch": 0.4405622148544872, + "epoch": 0.6083026840910869, "grad_norm": 0.0, - "learning_rate": 1.2387741003389247e-05, - "loss": 0.914, + "learning_rate": 7.024062085896037e-06, + "loss": 0.9763, "step": 15547 }, { - "epoch": 0.4405905522967497, + "epoch": 0.6083418107833164, "grad_norm": 0.0, - "learning_rate": 1.2386849750994528e-05, - "loss": 0.9028, + "learning_rate": 7.022852293644313e-06, + "loss": 1.0672, "step": 15548 }, { - "epoch": 0.44061888973901214, + "epoch": 0.6083809374755458, "grad_norm": 0.0, - "learning_rate": 1.2385958478494487e-05, - "loss": 0.914, + "learning_rate": 7.021642549201851e-06, + "loss": 1.0218, "step": 15549 }, { - "epoch": 0.44064722718127464, + "epoch": 0.6084200641677753, "grad_norm": 0.0, - "learning_rate": 1.2385067185896631e-05, - "loss": 0.9076, + "learning_rate": 7.0204328525880745e-06, + "loss": 0.9871, "step": 15550 }, { - "epoch": 0.4406755646235371, + "epoch": 0.6084591908600047, "grad_norm": 0.0, - "learning_rate": 1.2384175873208467e-05, - "loss": 0.9158, + "learning_rate": 7.0192232038224165e-06, + "loss": 1.0761, "step": 15551 }, { - "epoch": 0.4407039020657995, + "epoch": 0.6084983175522342, "grad_norm": 0.0, - "learning_rate": 1.2383284540437505e-05, - "loss": 0.8918, + "learning_rate": 7.018013602924297e-06, + "loss": 0.9889, "step": 15552 }, { - "epoch": 0.440732239508062, + "epoch": 0.6085374442444635, "grad_norm": 0.0, - "learning_rate": 1.2382393187591251e-05, - "loss": 0.9303, + "learning_rate": 7.016804049913144e-06, + "loss": 1.0074, "step": 15553 }, { - "epoch": 0.44076057695032445, + "epoch": 0.608576570936693, "grad_norm": 0.0, - "learning_rate": 1.2381501814677216e-05, - "loss": 0.8129, + "learning_rate": 7.015594544808378e-06, + "loss": 0.947, "step": 15554 }, { - "epoch": 0.44078891439258694, + "epoch": 0.6086156976289224, "grad_norm": 0.0, - "learning_rate": 1.238061042170291e-05, - "loss": 0.8792, + "learning_rate": 7.014385087629425e-06, + "loss": 1.0344, "step": 15555 }, { - "epoch": 0.4408172518348494, + "epoch": 0.6086548243211519, "grad_norm": 0.0, - "learning_rate": 1.2379719008675833e-05, - "loss": 1.0576, + "learning_rate": 7.013175678395705e-06, + "loss": 1.016, "step": 15556 }, { - "epoch": 0.44084558927711187, + "epoch": 0.6086939510133813, "grad_norm": 0.0, - "learning_rate": 1.2378827575603502e-05, - "loss": 0.9366, + "learning_rate": 7.011966317126642e-06, + "loss": 0.9463, "step": 15557 }, { - "epoch": 0.4408739267193743, + "epoch": 0.6087330777056108, "grad_norm": 0.0, - "learning_rate": 1.2377936122493423e-05, - "loss": 0.9553, + "learning_rate": 7.0107570038416575e-06, + "loss": 1.0153, "step": 15558 }, { - "epoch": 0.44090226416163675, + "epoch": 0.6087722043978402, "grad_norm": 0.0, - "learning_rate": 1.2377044649353103e-05, - "loss": 0.9238, + "learning_rate": 7.0095477385601655e-06, + "loss": 1.1345, "step": 15559 }, { - "epoch": 0.44093060160389924, + "epoch": 0.6088113310900697, "grad_norm": 0.0, - "learning_rate": 1.2376153156190053e-05, - "loss": 0.807, + "learning_rate": 7.0083385213015935e-06, + "loss": 0.9994, "step": 15560 }, { - "epoch": 0.4409589390461617, + "epoch": 0.6088504577822991, "grad_norm": 0.0, - "learning_rate": 1.2375261643011787e-05, - "loss": 0.947, + "learning_rate": 7.0071293520853555e-06, + "loss": 1.007, "step": 15561 }, { - "epoch": 0.4409872764884242, + "epoch": 0.6088895844745286, "grad_norm": 0.0, - "learning_rate": 1.2374370109825807e-05, - "loss": 0.8928, + "learning_rate": 7.005920230930867e-06, + "loss": 1.1077, "step": 15562 }, { - "epoch": 0.4410156139306866, + "epoch": 0.608928711166758, "grad_norm": 0.0, - "learning_rate": 1.2373478556639627e-05, - "loss": 0.9187, + "learning_rate": 7.004711157857549e-06, + "loss": 1.0882, "step": 15563 }, { - "epoch": 0.44104395137294905, + "epoch": 0.6089678378589874, "grad_norm": 0.0, - "learning_rate": 1.2372586983460755e-05, - "loss": 0.7808, + "learning_rate": 7.003502132884817e-06, + "loss": 0.9176, "step": 15564 }, { - "epoch": 0.44107228881521154, + "epoch": 0.6090069645512168, "grad_norm": 0.0, - "learning_rate": 1.23716953902967e-05, - "loss": 0.9603, + "learning_rate": 7.002293156032087e-06, + "loss": 0.9604, "step": 15565 }, { - "epoch": 0.441100626257474, + "epoch": 0.6090460912434463, "grad_norm": 0.0, - "learning_rate": 1.2370803777154976e-05, - "loss": 0.9685, + "learning_rate": 7.001084227318773e-06, + "loss": 0.9596, "step": 15566 }, { - "epoch": 0.4411289636997365, + "epoch": 0.6090852179356757, "grad_norm": 0.0, - "learning_rate": 1.2369912144043092e-05, - "loss": 0.9615, + "learning_rate": 6.999875346764288e-06, + "loss": 0.9842, "step": 15567 }, { - "epoch": 0.4411573011419989, + "epoch": 0.6091243446279052, "grad_norm": 0.0, - "learning_rate": 1.2369020490968556e-05, - "loss": 0.9595, + "learning_rate": 6.998666514388041e-06, + "loss": 1.0664, "step": 15568 }, { - "epoch": 0.4411856385842614, + "epoch": 0.6091634713201346, "grad_norm": 0.0, - "learning_rate": 1.2368128817938883e-05, - "loss": 0.8541, + "learning_rate": 6.9974577302094536e-06, + "loss": 0.9974, "step": 15569 }, { - "epoch": 0.44121397602652385, + "epoch": 0.609202598012364, "grad_norm": 0.0, - "learning_rate": 1.2367237124961582e-05, - "loss": 0.8663, + "learning_rate": 6.996248994247931e-06, + "loss": 1.0267, "step": 15570 }, { - "epoch": 0.4412423134687863, + "epoch": 0.6092417247045935, "grad_norm": 0.0, - "learning_rate": 1.2366345412044161e-05, - "loss": 0.9609, + "learning_rate": 6.995040306522889e-06, + "loss": 1.1597, "step": 15571 }, { - "epoch": 0.4412706509110488, + "epoch": 0.6092808513968229, "grad_norm": 0.0, - "learning_rate": 1.2365453679194135e-05, - "loss": 0.903, + "learning_rate": 6.993831667053728e-06, + "loss": 1.097, "step": 15572 }, { - "epoch": 0.4412989883533112, + "epoch": 0.6093199780890524, "grad_norm": 0.0, - "learning_rate": 1.2364561926419011e-05, - "loss": 0.9102, + "learning_rate": 6.992623075859867e-06, + "loss": 0.9649, "step": 15573 }, { - "epoch": 0.4413273257955737, + "epoch": 0.6093591047812817, "grad_norm": 0.0, - "learning_rate": 1.2363670153726308e-05, - "loss": 0.9299, + "learning_rate": 6.991414532960712e-06, + "loss": 1.0272, "step": 15574 }, { - "epoch": 0.44135566323783615, + "epoch": 0.6093982314735112, "grad_norm": 0.0, - "learning_rate": 1.2362778361123535e-05, - "loss": 0.9427, + "learning_rate": 6.990206038375669e-06, + "loss": 1.0543, "step": 15575 }, { - "epoch": 0.4413840006800986, + "epoch": 0.6094373581657406, "grad_norm": 0.0, - "learning_rate": 1.2361886548618198e-05, - "loss": 0.9537, + "learning_rate": 6.988997592124146e-06, + "loss": 0.9205, "step": 15576 }, { - "epoch": 0.4414123381223611, + "epoch": 0.6094764848579701, "grad_norm": 0.0, - "learning_rate": 1.2360994716217819e-05, - "loss": 0.8909, + "learning_rate": 6.987789194225545e-06, + "loss": 1.0333, "step": 15577 }, { - "epoch": 0.4414406755646235, + "epoch": 0.6095156115501995, "grad_norm": 0.0, - "learning_rate": 1.2360102863929902e-05, - "loss": 0.9949, + "learning_rate": 6.986580844699278e-06, + "loss": 1.0144, "step": 15578 }, { - "epoch": 0.441469013006886, + "epoch": 0.609554738242429, "grad_norm": 0.0, - "learning_rate": 1.2359210991761958e-05, - "loss": 0.9533, + "learning_rate": 6.985372543564748e-06, + "loss": 0.9719, "step": 15579 }, { - "epoch": 0.44149735044914845, + "epoch": 0.6095938649346584, "grad_norm": 0.0, - "learning_rate": 1.2358319099721508e-05, - "loss": 0.9448, + "learning_rate": 6.984164290841355e-06, + "loss": 1.0553, "step": 15580 }, { - "epoch": 0.44152568789141095, + "epoch": 0.6096329916268879, "grad_norm": 0.0, - "learning_rate": 1.235742718781606e-05, - "loss": 0.9516, + "learning_rate": 6.982956086548502e-06, + "loss": 0.9002, "step": 15581 }, { - "epoch": 0.4415540253336734, + "epoch": 0.6096721183191173, "grad_norm": 0.0, - "learning_rate": 1.2356535256053129e-05, - "loss": 0.845, + "learning_rate": 6.981747930705598e-06, + "loss": 1.0669, "step": 15582 }, { - "epoch": 0.4415823627759358, + "epoch": 0.6097112450113468, "grad_norm": 0.0, - "learning_rate": 1.2355643304440223e-05, - "loss": 0.7995, + "learning_rate": 6.980539823332041e-06, + "loss": 1.0528, "step": 15583 }, { - "epoch": 0.4416107002181983, + "epoch": 0.6097503717035762, "grad_norm": 0.0, - "learning_rate": 1.2354751332984862e-05, - "loss": 0.8688, + "learning_rate": 6.97933176444723e-06, + "loss": 1.0627, "step": 15584 }, { - "epoch": 0.44163903766046075, + "epoch": 0.6097894983958057, "grad_norm": 0.0, - "learning_rate": 1.2353859341694555e-05, - "loss": 0.9426, + "learning_rate": 6.97812375407056e-06, + "loss": 0.9439, "step": 15585 }, { - "epoch": 0.44166737510272325, + "epoch": 0.609828625088035, "grad_norm": 0.0, - "learning_rate": 1.2352967330576813e-05, - "loss": 1.0575, + "learning_rate": 6.976915792221441e-06, + "loss": 1.0573, "step": 15586 }, { - "epoch": 0.4416957125449857, + "epoch": 0.6098677517802645, "grad_norm": 0.0, - "learning_rate": 1.2352075299639157e-05, - "loss": 0.9341, + "learning_rate": 6.975707878919268e-06, + "loss": 0.9646, "step": 15587 }, { - "epoch": 0.4417240499872481, + "epoch": 0.6099068784724939, "grad_norm": 0.0, - "learning_rate": 1.2351183248889098e-05, - "loss": 0.8394, + "learning_rate": 6.974500014183435e-06, + "loss": 1.0026, "step": 15588 }, { - "epoch": 0.4417523874295106, + "epoch": 0.6099460051647234, "grad_norm": 0.0, - "learning_rate": 1.2350291178334145e-05, - "loss": 0.9972, + "learning_rate": 6.973292198033341e-06, + "loss": 0.9902, "step": 15589 }, { - "epoch": 0.44178072487177306, + "epoch": 0.6099851318569528, "grad_norm": 0.0, - "learning_rate": 1.2349399087981823e-05, - "loss": 0.9941, + "learning_rate": 6.972084430488377e-06, + "loss": 0.9453, "step": 15590 }, { - "epoch": 0.44180906231403555, + "epoch": 0.6100242585491823, "grad_norm": 0.0, - "learning_rate": 1.2348506977839639e-05, - "loss": 0.9293, + "learning_rate": 6.970876711567949e-06, + "loss": 1.0629, "step": 15591 }, { - "epoch": 0.441837399756298, + "epoch": 0.6100633852414117, "grad_norm": 0.0, - "learning_rate": 1.2347614847915103e-05, - "loss": 0.9936, + "learning_rate": 6.969669041291442e-06, + "loss": 0.978, "step": 15592 }, { - "epoch": 0.4418657371985605, + "epoch": 0.6101025119336412, "grad_norm": 0.0, - "learning_rate": 1.2346722698215738e-05, - "loss": 0.9171, + "learning_rate": 6.968461419678256e-06, + "loss": 1.0141, "step": 15593 }, { - "epoch": 0.4418940746408229, + "epoch": 0.6101416386258706, "grad_norm": 0.0, - "learning_rate": 1.2345830528749059e-05, - "loss": 0.9639, + "learning_rate": 6.967253846747778e-06, + "loss": 1.1974, "step": 15594 }, { - "epoch": 0.44192241208308536, + "epoch": 0.6101807653181001, "grad_norm": 0.0, - "learning_rate": 1.2344938339522576e-05, - "loss": 0.8658, + "learning_rate": 6.966046322519402e-06, + "loss": 0.9556, "step": 15595 }, { - "epoch": 0.44195074952534785, + "epoch": 0.6102198920103294, "grad_norm": 0.0, - "learning_rate": 1.234404613054381e-05, - "loss": 0.9202, + "learning_rate": 6.964838847012523e-06, + "loss": 1.0565, "step": 15596 }, { - "epoch": 0.4419790869676103, + "epoch": 0.6102590187025588, "grad_norm": 0.0, - "learning_rate": 1.2343153901820269e-05, - "loss": 1.0048, + "learning_rate": 6.963631420246529e-06, + "loss": 1.0729, "step": 15597 }, { - "epoch": 0.4420074244098728, + "epoch": 0.6102981453947883, "grad_norm": 0.0, - "learning_rate": 1.2342261653359472e-05, - "loss": 0.9016, + "learning_rate": 6.96242404224081e-06, + "loss": 1.0092, "step": 15598 }, { - "epoch": 0.4420357618521352, + "epoch": 0.6103372720870177, "grad_norm": 0.0, - "learning_rate": 1.2341369385168938e-05, - "loss": 1.0021, + "learning_rate": 6.961216713014751e-06, + "loss": 1.0224, "step": 15599 }, { - "epoch": 0.44206409929439766, + "epoch": 0.6103763987792472, "grad_norm": 0.0, - "learning_rate": 1.2340477097256181e-05, - "loss": 0.8852, + "learning_rate": 6.960009432587747e-06, + "loss": 1.1131, "step": 15600 }, { - "epoch": 0.44209243673666015, + "epoch": 0.6104155254714766, "grad_norm": 0.0, - "learning_rate": 1.2339584789628712e-05, - "loss": 0.8692, + "learning_rate": 6.95880220097918e-06, + "loss": 0.946, "step": 15601 }, { - "epoch": 0.4421207741789226, + "epoch": 0.6104546521637061, "grad_norm": 0.0, - "learning_rate": 1.2338692462294054e-05, - "loss": 0.8587, + "learning_rate": 6.957595018208441e-06, + "loss": 1.0773, "step": 15602 }, { - "epoch": 0.4421491116211851, + "epoch": 0.6104937788559355, "grad_norm": 0.0, - "learning_rate": 1.2337800115259724e-05, - "loss": 0.9178, + "learning_rate": 6.95638788429491e-06, + "loss": 0.9791, "step": 15603 }, { - "epoch": 0.4421774490634475, + "epoch": 0.610532905548165, "grad_norm": 0.0, - "learning_rate": 1.2336907748533234e-05, - "loss": 0.8486, + "learning_rate": 6.955180799257981e-06, + "loss": 0.9726, "step": 15604 }, { - "epoch": 0.44220578650571, + "epoch": 0.6105720322403944, "grad_norm": 0.0, - "learning_rate": 1.2336015362122099e-05, - "loss": 0.9595, + "learning_rate": 6.95397376311703e-06, + "loss": 1.0073, "step": 15605 }, { - "epoch": 0.44223412394797246, + "epoch": 0.6106111589326239, "grad_norm": 0.0, - "learning_rate": 1.2335122956033838e-05, - "loss": 0.9318, + "learning_rate": 6.9527667758914454e-06, + "loss": 1.0393, "step": 15606 }, { - "epoch": 0.4422624613902349, + "epoch": 0.6106502856248532, "grad_norm": 0.0, - "learning_rate": 1.2334230530275974e-05, - "loss": 0.9226, + "learning_rate": 6.951559837600605e-06, + "loss": 0.942, "step": 15607 }, { - "epoch": 0.4422907988324974, + "epoch": 0.6106894123170827, "grad_norm": 0.0, - "learning_rate": 1.233333808485602e-05, - "loss": 0.9371, + "learning_rate": 6.950352948263894e-06, + "loss": 1.1526, "step": 15608 }, { - "epoch": 0.4423191362747598, + "epoch": 0.6107285390093121, "grad_norm": 0.0, - "learning_rate": 1.233244561978149e-05, - "loss": 0.8981, + "learning_rate": 6.949146107900695e-06, + "loss": 1.0297, "step": 15609 }, { - "epoch": 0.4423474737170223, + "epoch": 0.6107676657015416, "grad_norm": 0.0, - "learning_rate": 1.2331553135059904e-05, - "loss": 0.8611, + "learning_rate": 6.947939316530387e-06, + "loss": 0.8666, "step": 15610 }, { - "epoch": 0.44237581115928476, + "epoch": 0.610806792393771, "grad_norm": 0.0, - "learning_rate": 1.233066063069878e-05, - "loss": 1.0205, + "learning_rate": 6.946732574172349e-06, + "loss": 0.9858, "step": 15611 }, { - "epoch": 0.4424041486015472, + "epoch": 0.6108459190860005, "grad_norm": 0.0, - "learning_rate": 1.2329768106705637e-05, - "loss": 0.9133, + "learning_rate": 6.945525880845956e-06, + "loss": 0.9693, "step": 15612 }, { - "epoch": 0.4424324860438097, + "epoch": 0.6108850457782299, "grad_norm": 0.0, - "learning_rate": 1.2328875563087994e-05, - "loss": 0.8805, + "learning_rate": 6.944319236570593e-06, + "loss": 1.0367, "step": 15613 }, { - "epoch": 0.44246082348607213, + "epoch": 0.6109241724704594, "grad_norm": 0.0, - "learning_rate": 1.2327982999853363e-05, - "loss": 0.9153, + "learning_rate": 6.9431126413656346e-06, + "loss": 1.0149, "step": 15614 }, { - "epoch": 0.4424891609283346, + "epoch": 0.6109632991626888, "grad_norm": 0.0, - "learning_rate": 1.232709041700927e-05, - "loss": 0.9528, + "learning_rate": 6.941906095250457e-06, + "loss": 1.027, "step": 15615 }, { - "epoch": 0.44251749837059706, + "epoch": 0.6110024258549183, "grad_norm": 0.0, - "learning_rate": 1.2326197814563233e-05, - "loss": 0.9012, + "learning_rate": 6.940699598244436e-06, + "loss": 1.0388, "step": 15616 }, { - "epoch": 0.44254583581285956, + "epoch": 0.6110415525471476, "grad_norm": 0.0, - "learning_rate": 1.2325305192522763e-05, - "loss": 0.9059, + "learning_rate": 6.939493150366941e-06, + "loss": 1.0303, "step": 15617 }, { - "epoch": 0.442574173255122, + "epoch": 0.6110806792393771, "grad_norm": 0.0, - "learning_rate": 1.2324412550895383e-05, - "loss": 0.918, + "learning_rate": 6.938286751637355e-06, + "loss": 1.0905, "step": 15618 }, { - "epoch": 0.44260251069738443, + "epoch": 0.6111198059316065, "grad_norm": 0.0, - "learning_rate": 1.2323519889688615e-05, - "loss": 0.8588, + "learning_rate": 6.937080402075047e-06, + "loss": 1.0752, "step": 15619 }, { - "epoch": 0.4426308481396469, + "epoch": 0.611158932623836, "grad_norm": 0.0, - "learning_rate": 1.2322627208909979e-05, - "loss": 0.9101, + "learning_rate": 6.93587410169939e-06, + "loss": 0.8599, "step": 15620 }, { - "epoch": 0.44265918558190936, + "epoch": 0.6111980593160654, "grad_norm": 0.0, - "learning_rate": 1.232173450856699e-05, - "loss": 0.901, + "learning_rate": 6.934667850529751e-06, + "loss": 1.0084, "step": 15621 }, { - "epoch": 0.44268752302417186, + "epoch": 0.6112371860082949, "grad_norm": 0.0, - "learning_rate": 1.2320841788667166e-05, - "loss": 0.8354, + "learning_rate": 6.933461648585511e-06, + "loss": 0.9404, "step": 15622 }, { - "epoch": 0.4427158604664343, + "epoch": 0.6112763127005243, "grad_norm": 0.0, - "learning_rate": 1.2319949049218031e-05, - "loss": 0.7143, + "learning_rate": 6.9322554958860334e-06, + "loss": 1.0513, "step": 15623 }, { - "epoch": 0.44274419790869673, + "epoch": 0.6113154393927538, "grad_norm": 0.0, - "learning_rate": 1.2319056290227106e-05, - "loss": 0.8059, + "learning_rate": 6.931049392450687e-06, + "loss": 0.9519, "step": 15624 }, { - "epoch": 0.44277253535095923, + "epoch": 0.6113545660849832, "grad_norm": 0.0, - "learning_rate": 1.2318163511701906e-05, - "loss": 1.0715, + "learning_rate": 6.929843338298843e-06, + "loss": 1.0064, "step": 15625 }, { - "epoch": 0.44280087279322167, + "epoch": 0.6113936927772126, "grad_norm": 0.0, - "learning_rate": 1.2317270713649955e-05, - "loss": 0.8682, + "learning_rate": 6.928637333449863e-06, + "loss": 0.8574, "step": 15626 }, { - "epoch": 0.44282921023548416, + "epoch": 0.6114328194694421, "grad_norm": 0.0, - "learning_rate": 1.2316377896078772e-05, - "loss": 0.8197, + "learning_rate": 6.927431377923124e-06, + "loss": 1.0236, "step": 15627 }, { - "epoch": 0.4428575476777466, + "epoch": 0.6114719461616714, "grad_norm": 0.0, - "learning_rate": 1.231548505899588e-05, - "loss": 1.0112, + "learning_rate": 6.926225471737986e-06, + "loss": 1.0028, "step": 15628 }, { - "epoch": 0.4428858851200091, + "epoch": 0.6115110728539009, "grad_norm": 0.0, - "learning_rate": 1.2314592202408798e-05, - "loss": 0.9539, + "learning_rate": 6.925019614913817e-06, + "loss": 1.0468, "step": 15629 }, { - "epoch": 0.44291422256227153, + "epoch": 0.6115501995461303, "grad_norm": 0.0, - "learning_rate": 1.2313699326325041e-05, - "loss": 0.9926, + "learning_rate": 6.923813807469975e-06, + "loss": 1.1277, "step": 15630 }, { - "epoch": 0.44294256000453397, + "epoch": 0.6115893262383598, "grad_norm": 0.0, - "learning_rate": 1.231280643075214e-05, - "loss": 0.9855, + "learning_rate": 6.922608049425832e-06, + "loss": 0.9636, "step": 15631 }, { - "epoch": 0.44297089744679646, + "epoch": 0.6116284529305892, "grad_norm": 0.0, - "learning_rate": 1.231191351569761e-05, - "loss": 0.9853, + "learning_rate": 6.921402340800747e-06, + "loss": 0.9011, "step": 15632 }, { - "epoch": 0.4429992348890589, + "epoch": 0.6116675796228187, "grad_norm": 0.0, - "learning_rate": 1.2311020581168972e-05, - "loss": 0.7982, + "learning_rate": 6.920196681614083e-06, + "loss": 0.9925, "step": 15633 }, { - "epoch": 0.4430275723313214, + "epoch": 0.6117067063150481, "grad_norm": 0.0, - "learning_rate": 1.2310127627173753e-05, - "loss": 0.8868, + "learning_rate": 6.918991071885202e-06, + "loss": 0.9249, "step": 15634 }, { - "epoch": 0.44305590977358383, + "epoch": 0.6117458330072776, "grad_norm": 0.0, - "learning_rate": 1.230923465371947e-05, - "loss": 0.8631, + "learning_rate": 6.917785511633459e-06, + "loss": 0.9514, "step": 15635 }, { - "epoch": 0.44308424721584627, + "epoch": 0.611784959699507, "grad_norm": 0.0, - "learning_rate": 1.2308341660813647e-05, - "loss": 1.0134, + "learning_rate": 6.916580000878221e-06, + "loss": 0.9862, "step": 15636 }, { - "epoch": 0.44311258465810877, + "epoch": 0.6118240863917365, "grad_norm": 0.0, - "learning_rate": 1.2307448648463804e-05, - "loss": 0.8622, + "learning_rate": 6.9153745396388475e-06, + "loss": 1.0481, "step": 15637 }, { - "epoch": 0.4431409221003712, + "epoch": 0.6118632130839659, "grad_norm": 0.0, - "learning_rate": 1.2306555616677462e-05, - "loss": 0.9725, + "learning_rate": 6.914169127934691e-06, + "loss": 1.0066, "step": 15638 }, { - "epoch": 0.4431692595426337, + "epoch": 0.6119023397761953, "grad_norm": 0.0, - "learning_rate": 1.2305662565462146e-05, - "loss": 1.0006, + "learning_rate": 6.912963765785111e-06, + "loss": 1.0186, "step": 15639 }, { - "epoch": 0.44319759698489614, + "epoch": 0.6119414664684247, "grad_norm": 0.0, - "learning_rate": 1.2304769494825382e-05, - "loss": 0.9823, + "learning_rate": 6.911758453209465e-06, + "loss": 0.9159, "step": 15640 }, { - "epoch": 0.44322593442715863, + "epoch": 0.6119805931606542, "grad_norm": 0.0, - "learning_rate": 1.2303876404774686e-05, - "loss": 0.9058, + "learning_rate": 6.910553190227109e-06, + "loss": 0.9586, "step": 15641 }, { - "epoch": 0.44325427186942107, + "epoch": 0.6120197198528836, "grad_norm": 0.0, - "learning_rate": 1.2302983295317586e-05, - "loss": 0.9502, + "learning_rate": 6.909347976857399e-06, + "loss": 1.0792, "step": 15642 }, { - "epoch": 0.4432826093116835, + "epoch": 0.6120588465451131, "grad_norm": 0.0, - "learning_rate": 1.2302090166461598e-05, - "loss": 0.9603, + "learning_rate": 6.908142813119685e-06, + "loss": 1.0501, "step": 15643 }, { - "epoch": 0.443310946753946, + "epoch": 0.6120979732373425, "grad_norm": 0.0, - "learning_rate": 1.2301197018214251e-05, - "loss": 0.8648, + "learning_rate": 6.906937699033325e-06, + "loss": 0.9802, "step": 15644 }, { - "epoch": 0.44333928419620844, + "epoch": 0.612137099929572, "grad_norm": 0.0, - "learning_rate": 1.2300303850583069e-05, - "loss": 0.8779, + "learning_rate": 6.905732634617668e-06, + "loss": 1.0747, "step": 15645 }, { - "epoch": 0.44336762163847093, + "epoch": 0.6121762266218014, "grad_norm": 0.0, - "learning_rate": 1.2299410663575572e-05, - "loss": 0.9422, + "learning_rate": 6.904527619892069e-06, + "loss": 1.0261, "step": 15646 }, { - "epoch": 0.44339595908073337, + "epoch": 0.6122153533140309, "grad_norm": 0.0, - "learning_rate": 1.2298517457199283e-05, - "loss": 0.8512, + "learning_rate": 6.903322654875878e-06, + "loss": 0.9243, "step": 15647 }, { - "epoch": 0.4434242965229958, + "epoch": 0.6122544800062603, "grad_norm": 0.0, - "learning_rate": 1.2297624231461734e-05, - "loss": 0.9563, + "learning_rate": 6.902117739588443e-06, + "loss": 1.109, "step": 15648 }, { - "epoch": 0.4434526339652583, + "epoch": 0.6122936066984898, "grad_norm": 0.0, - "learning_rate": 1.2296730986370437e-05, - "loss": 0.8526, + "learning_rate": 6.900912874049117e-06, + "loss": 1.0967, "step": 15649 }, { - "epoch": 0.44348097140752074, + "epoch": 0.6123327333907191, "grad_norm": 0.0, - "learning_rate": 1.2295837721932925e-05, - "loss": 0.9628, + "learning_rate": 6.899708058277246e-06, + "loss": 1.0032, "step": 15650 }, { - "epoch": 0.44350930884978323, + "epoch": 0.6123718600829486, "grad_norm": 0.0, - "learning_rate": 1.2294944438156717e-05, - "loss": 0.9486, + "learning_rate": 6.898503292292178e-06, + "loss": 1.0214, "step": 15651 }, { - "epoch": 0.4435376462920457, + "epoch": 0.612410986775178, "grad_norm": 0.0, - "learning_rate": 1.2294051135049343e-05, - "loss": 0.8751, + "learning_rate": 6.89729857611326e-06, + "loss": 1.0993, "step": 15652 }, { - "epoch": 0.44356598373430817, + "epoch": 0.6124501134674075, "grad_norm": 0.0, - "learning_rate": 1.2293157812618324e-05, - "loss": 0.9773, + "learning_rate": 6.896093909759841e-06, + "loss": 0.9168, "step": 15653 }, { - "epoch": 0.4435943211765706, + "epoch": 0.6124892401596369, "grad_norm": 0.0, - "learning_rate": 1.2292264470871183e-05, - "loss": 0.9191, + "learning_rate": 6.894889293251265e-06, + "loss": 1.0021, "step": 15654 }, { - "epoch": 0.44362265861883304, + "epoch": 0.6125283668518663, "grad_norm": 0.0, - "learning_rate": 1.2291371109815446e-05, - "loss": 0.8879, + "learning_rate": 6.893684726606875e-06, + "loss": 1.06, "step": 15655 }, { - "epoch": 0.44365099606109554, + "epoch": 0.6125674935440958, "grad_norm": 0.0, - "learning_rate": 1.2290477729458642e-05, - "loss": 0.9308, + "learning_rate": 6.8924802098460175e-06, + "loss": 1.0131, "step": 15656 }, { - "epoch": 0.443679333503358, + "epoch": 0.6126066202363252, "grad_norm": 0.0, - "learning_rate": 1.2289584329808293e-05, - "loss": 0.9099, + "learning_rate": 6.891275742988028e-06, + "loss": 1.1216, "step": 15657 }, { - "epoch": 0.44370767094562047, + "epoch": 0.6126457469285547, "grad_norm": 0.0, - "learning_rate": 1.2288690910871926e-05, - "loss": 0.796, + "learning_rate": 6.89007132605226e-06, + "loss": 0.8682, "step": 15658 }, { - "epoch": 0.4437360083878829, + "epoch": 0.612684873620784, "grad_norm": 0.0, - "learning_rate": 1.2287797472657064e-05, - "loss": 0.8855, + "learning_rate": 6.888866959058049e-06, + "loss": 1.1094, "step": 15659 }, { - "epoch": 0.44376434583014535, + "epoch": 0.6127240003130136, "grad_norm": 0.0, - "learning_rate": 1.2286904015171234e-05, - "loss": 0.8052, + "learning_rate": 6.887662642024736e-06, + "loss": 1.037, "step": 15660 }, { - "epoch": 0.44379268327240784, + "epoch": 0.6127631270052429, "grad_norm": 0.0, - "learning_rate": 1.2286010538421964e-05, - "loss": 0.9815, + "learning_rate": 6.886458374971658e-06, + "loss": 0.9942, "step": 15661 }, { - "epoch": 0.4438210207146703, + "epoch": 0.6128022536974724, "grad_norm": 0.0, - "learning_rate": 1.228511704241678e-05, - "loss": 0.9589, + "learning_rate": 6.8852541579181596e-06, + "loss": 1.0462, "step": 15662 }, { - "epoch": 0.44384935815693277, + "epoch": 0.6128413803897018, "grad_norm": 0.0, - "learning_rate": 1.22842235271632e-05, - "loss": 0.9131, + "learning_rate": 6.8840499908835764e-06, + "loss": 1.071, "step": 15663 }, { - "epoch": 0.4438776955991952, + "epoch": 0.6128805070819313, "grad_norm": 0.0, - "learning_rate": 1.2283329992668762e-05, - "loss": 0.881, + "learning_rate": 6.882845873887247e-06, + "loss": 0.8469, "step": 15664 }, { - "epoch": 0.4439060330414577, + "epoch": 0.6129196337741607, "grad_norm": 0.0, - "learning_rate": 1.2282436438940987e-05, - "loss": 0.8492, + "learning_rate": 6.881641806948507e-06, + "loss": 0.9977, "step": 15665 }, { - "epoch": 0.44393437048372014, + "epoch": 0.6129587604663902, "grad_norm": 0.0, - "learning_rate": 1.2281542865987404e-05, - "loss": 0.8585, + "learning_rate": 6.880437790086687e-06, + "loss": 0.98, "step": 15666 }, { - "epoch": 0.4439627079259826, + "epoch": 0.6129978871586196, "grad_norm": 0.0, - "learning_rate": 1.2280649273815536e-05, - "loss": 0.9499, + "learning_rate": 6.879233823321132e-06, + "loss": 1.0536, "step": 15667 }, { - "epoch": 0.4439910453682451, + "epoch": 0.6130370138508491, "grad_norm": 0.0, - "learning_rate": 1.227975566243291e-05, - "loss": 0.9621, + "learning_rate": 6.878029906671171e-06, + "loss": 1.0443, "step": 15668 }, { - "epoch": 0.4440193828105075, + "epoch": 0.6130761405430785, "grad_norm": 0.0, - "learning_rate": 1.2278862031847061e-05, - "loss": 0.7985, + "learning_rate": 6.876826040156137e-06, + "loss": 1.1004, "step": 15669 }, { - "epoch": 0.44404772025277, + "epoch": 0.613115267235308, "grad_norm": 0.0, - "learning_rate": 1.2277968382065506e-05, - "loss": 0.7682, + "learning_rate": 6.875622223795359e-06, + "loss": 0.926, "step": 15670 }, { - "epoch": 0.44407605769503244, + "epoch": 0.6131543939275373, "grad_norm": 0.0, - "learning_rate": 1.227707471309578e-05, - "loss": 0.9326, + "learning_rate": 6.874418457608178e-06, + "loss": 1.0233, "step": 15671 }, { - "epoch": 0.4441043951372949, + "epoch": 0.6131935206197668, "grad_norm": 0.0, - "learning_rate": 1.2276181024945406e-05, - "loss": 0.8987, + "learning_rate": 6.87321474161392e-06, + "loss": 1.0561, "step": 15672 }, { - "epoch": 0.4441327325795574, + "epoch": 0.6132326473119962, "grad_norm": 0.0, - "learning_rate": 1.2275287317621916e-05, - "loss": 1.0, + "learning_rate": 6.872011075831914e-06, + "loss": 0.963, "step": 15673 }, { - "epoch": 0.4441610700218198, + "epoch": 0.6132717740042257, "grad_norm": 0.0, - "learning_rate": 1.2274393591132837e-05, - "loss": 0.9014, + "learning_rate": 6.870807460281493e-06, + "loss": 1.0594, "step": 15674 }, { - "epoch": 0.4441894074640823, + "epoch": 0.6133109006964551, "grad_norm": 0.0, - "learning_rate": 1.2273499845485695e-05, - "loss": 0.9007, + "learning_rate": 6.869603894981976e-06, + "loss": 1.0297, "step": 15675 }, { - "epoch": 0.44421774490634475, + "epoch": 0.6133500273886846, "grad_norm": 0.0, - "learning_rate": 1.2272606080688016e-05, - "loss": 0.9668, + "learning_rate": 6.868400379952704e-06, + "loss": 0.9895, "step": 15676 }, { - "epoch": 0.44424608234860724, + "epoch": 0.613389154080914, "grad_norm": 0.0, - "learning_rate": 1.2271712296747335e-05, - "loss": 0.9427, + "learning_rate": 6.867196915212997e-06, + "loss": 1.0444, "step": 15677 }, { - "epoch": 0.4442744197908697, + "epoch": 0.6134282807731435, "grad_norm": 0.0, - "learning_rate": 1.227081849367118e-05, - "loss": 0.9031, + "learning_rate": 6.865993500782182e-06, + "loss": 1.099, "step": 15678 }, { - "epoch": 0.4443027572331321, + "epoch": 0.6134674074653729, "grad_norm": 0.0, - "learning_rate": 1.2269924671467075e-05, - "loss": 0.8925, + "learning_rate": 6.86479013667958e-06, + "loss": 1.0645, "step": 15679 }, { - "epoch": 0.4443310946753946, + "epoch": 0.6135065341576024, "grad_norm": 0.0, - "learning_rate": 1.2269030830142552e-05, - "loss": 0.911, + "learning_rate": 6.863586822924524e-06, + "loss": 1.0627, "step": 15680 }, { - "epoch": 0.44435943211765705, + "epoch": 0.6135456608498318, "grad_norm": 0.0, - "learning_rate": 1.226813696970514e-05, - "loss": 0.9235, + "learning_rate": 6.862383559536333e-06, + "loss": 1.0726, "step": 15681 }, { - "epoch": 0.44438776955991954, + "epoch": 0.6135847875420612, "grad_norm": 0.0, - "learning_rate": 1.2267243090162369e-05, - "loss": 0.9129, + "learning_rate": 6.8611803465343305e-06, + "loss": 1.004, "step": 15682 }, { - "epoch": 0.444416107002182, + "epoch": 0.6136239142342906, "grad_norm": 0.0, - "learning_rate": 1.2266349191521765e-05, - "loss": 0.9571, + "learning_rate": 6.8599771839378394e-06, + "loss": 1.0295, "step": 15683 }, { - "epoch": 0.4444444444444444, + "epoch": 0.61366304092652, "grad_norm": 0.0, - "learning_rate": 1.226545527379086e-05, - "loss": 0.8977, + "learning_rate": 6.858774071766175e-06, + "loss": 0.9203, "step": 15684 }, { - "epoch": 0.4444727818867069, + "epoch": 0.6137021676187495, "grad_norm": 0.0, - "learning_rate": 1.2264561336977185e-05, - "loss": 0.8478, + "learning_rate": 6.857571010038669e-06, + "loss": 0.975, "step": 15685 }, { - "epoch": 0.44450111932896935, + "epoch": 0.6137412943109789, "grad_norm": 0.0, - "learning_rate": 1.2263667381088272e-05, - "loss": 0.8179, + "learning_rate": 6.856367998774631e-06, + "loss": 0.9863, "step": 15686 }, { - "epoch": 0.44452945677123185, + "epoch": 0.6137804210032084, "grad_norm": 0.0, - "learning_rate": 1.2262773406131644e-05, - "loss": 0.829, + "learning_rate": 6.855165037993388e-06, + "loss": 1.0489, "step": 15687 }, { - "epoch": 0.4445577942134943, + "epoch": 0.6138195476954378, "grad_norm": 0.0, - "learning_rate": 1.2261879412114837e-05, - "loss": 0.8594, + "learning_rate": 6.853962127714249e-06, + "loss": 1.0893, "step": 15688 }, { - "epoch": 0.4445861316557568, + "epoch": 0.6138586743876673, "grad_norm": 0.0, - "learning_rate": 1.2260985399045379e-05, - "loss": 0.9618, + "learning_rate": 6.8527592679565395e-06, + "loss": 1.0808, "step": 15689 }, { - "epoch": 0.4446144690980192, + "epoch": 0.6138978010798967, "grad_norm": 0.0, - "learning_rate": 1.2260091366930802e-05, - "loss": 0.8968, + "learning_rate": 6.851556458739569e-06, + "loss": 1.0467, "step": 15690 }, { - "epoch": 0.44464280654028165, + "epoch": 0.6139369277721262, "grad_norm": 0.0, - "learning_rate": 1.2259197315778638e-05, - "loss": 0.9249, + "learning_rate": 6.850353700082659e-06, + "loss": 1.0591, "step": 15691 }, { - "epoch": 0.44467114398254415, + "epoch": 0.6139760544643555, "grad_norm": 0.0, - "learning_rate": 1.2258303245596413e-05, - "loss": 0.9498, + "learning_rate": 6.8491509920051225e-06, + "loss": 0.9169, "step": 15692 }, { - "epoch": 0.4446994814248066, + "epoch": 0.614015181156585, "grad_norm": 0.0, - "learning_rate": 1.2257409156391662e-05, - "loss": 0.7866, + "learning_rate": 6.8479483345262695e-06, + "loss": 1.0459, "step": 15693 }, { - "epoch": 0.4447278188670691, + "epoch": 0.6140543078488144, "grad_norm": 0.0, - "learning_rate": 1.2256515048171917e-05, - "loss": 0.8571, + "learning_rate": 6.84674572766542e-06, + "loss": 1.0758, "step": 15694 }, { - "epoch": 0.4447561563093315, + "epoch": 0.6140934345410439, "grad_norm": 0.0, - "learning_rate": 1.2255620920944708e-05, - "loss": 0.9619, + "learning_rate": 6.845543171441882e-06, + "loss": 0.9554, "step": 15695 }, { - "epoch": 0.44478449375159396, + "epoch": 0.6141325612332733, "grad_norm": 0.0, - "learning_rate": 1.2254726774717564e-05, - "loss": 0.9433, + "learning_rate": 6.844340665874967e-06, + "loss": 0.9985, "step": 15696 }, { - "epoch": 0.44481283119385645, + "epoch": 0.6141716879255028, "grad_norm": 0.0, - "learning_rate": 1.2253832609498018e-05, - "loss": 1.0363, + "learning_rate": 6.843138210983986e-06, + "loss": 1.0243, "step": 15697 }, { - "epoch": 0.4448411686361189, + "epoch": 0.6142108146177322, "grad_norm": 0.0, - "learning_rate": 1.2252938425293606e-05, - "loss": 0.8735, + "learning_rate": 6.8419358067882516e-06, + "loss": 0.8582, "step": 15698 }, { - "epoch": 0.4448695060783814, + "epoch": 0.6142499413099617, "grad_norm": 0.0, - "learning_rate": 1.2252044222111859e-05, - "loss": 0.9529, + "learning_rate": 6.84073345330707e-06, + "loss": 0.9988, "step": 15699 }, { - "epoch": 0.4448978435206438, + "epoch": 0.6142890680021911, "grad_norm": 0.0, - "learning_rate": 1.2251149999960303e-05, - "loss": 0.908, + "learning_rate": 6.839531150559751e-06, + "loss": 1.107, "step": 15700 }, { - "epoch": 0.4449261809629063, + "epoch": 0.6143281946944206, "grad_norm": 0.0, - "learning_rate": 1.2250255758846477e-05, - "loss": 0.9645, + "learning_rate": 6.8383288985655985e-06, + "loss": 0.9671, "step": 15701 }, { - "epoch": 0.44495451840516875, + "epoch": 0.61436732138665, "grad_norm": 0.0, - "learning_rate": 1.2249361498777909e-05, - "loss": 0.8447, + "learning_rate": 6.837126697343924e-06, + "loss": 1.0087, "step": 15702 }, { - "epoch": 0.4449828558474312, + "epoch": 0.6144064480788795, "grad_norm": 0.0, - "learning_rate": 1.2248467219762135e-05, - "loss": 0.9191, + "learning_rate": 6.835924546914032e-06, + "loss": 1.0147, "step": 15703 }, { - "epoch": 0.4450111932896937, + "epoch": 0.6144455747711088, "grad_norm": 0.0, - "learning_rate": 1.2247572921806688e-05, - "loss": 0.9227, + "learning_rate": 6.834722447295228e-06, + "loss": 0.8592, "step": 15704 }, { - "epoch": 0.4450395307319561, + "epoch": 0.6144847014633383, "grad_norm": 0.0, - "learning_rate": 1.2246678604919095e-05, - "loss": 0.9031, + "learning_rate": 6.833520398506814e-06, + "loss": 0.9309, "step": 15705 }, { - "epoch": 0.4450678681742186, + "epoch": 0.6145238281555677, "grad_norm": 0.0, - "learning_rate": 1.2245784269106897e-05, - "loss": 0.981, + "learning_rate": 6.832318400568092e-06, + "loss": 1.0326, "step": 15706 }, { - "epoch": 0.44509620561648106, + "epoch": 0.6145629548477972, "grad_norm": 0.0, - "learning_rate": 1.2244889914377627e-05, - "loss": 0.7576, + "learning_rate": 6.83111645349837e-06, + "loss": 1.035, "step": 15707 }, { - "epoch": 0.4451245430587435, + "epoch": 0.6146020815400266, "grad_norm": 0.0, - "learning_rate": 1.2243995540738813e-05, - "loss": 0.8462, + "learning_rate": 6.829914557316948e-06, + "loss": 1.0412, "step": 15708 }, { - "epoch": 0.445152880501006, + "epoch": 0.6146412082322561, "grad_norm": 0.0, - "learning_rate": 1.2243101148197991e-05, - "loss": 0.9057, + "learning_rate": 6.828712712043125e-06, + "loss": 0.9209, "step": 15709 }, { - "epoch": 0.4451812179432684, + "epoch": 0.6146803349244855, "grad_norm": 0.0, - "learning_rate": 1.2242206736762694e-05, - "loss": 0.7917, + "learning_rate": 6.827510917696198e-06, + "loss": 1.1135, "step": 15710 }, { - "epoch": 0.4452095553855309, + "epoch": 0.6147194616167149, "grad_norm": 0.0, - "learning_rate": 1.2241312306440458e-05, - "loss": 0.8229, + "learning_rate": 6.826309174295475e-06, + "loss": 0.9608, "step": 15711 }, { - "epoch": 0.44523789282779336, + "epoch": 0.6147585883089444, "grad_norm": 0.0, - "learning_rate": 1.2240417857238817e-05, - "loss": 0.9306, + "learning_rate": 6.825107481860249e-06, + "loss": 0.9681, "step": 15712 }, { - "epoch": 0.44526623027005585, + "epoch": 0.6147977150011738, "grad_norm": 0.0, - "learning_rate": 1.2239523389165301e-05, - "loss": 0.8898, + "learning_rate": 6.823905840409819e-06, + "loss": 0.9518, "step": 15713 }, { - "epoch": 0.4452945677123183, + "epoch": 0.6148368416934032, "grad_norm": 0.0, - "learning_rate": 1.2238628902227454e-05, - "loss": 0.951, + "learning_rate": 6.822704249963481e-06, + "loss": 0.9437, "step": 15714 }, { - "epoch": 0.44532290515458073, + "epoch": 0.6148759683856326, "grad_norm": 0.0, - "learning_rate": 1.2237734396432801e-05, - "loss": 0.8866, + "learning_rate": 6.821502710540527e-06, + "loss": 0.9812, "step": 15715 }, { - "epoch": 0.4453512425968432, + "epoch": 0.6149150950778621, "grad_norm": 0.0, - "learning_rate": 1.2236839871788879e-05, - "loss": 0.955, + "learning_rate": 6.820301222160261e-06, + "loss": 0.9355, "step": 15716 }, { - "epoch": 0.44537958003910566, + "epoch": 0.6149542217700915, "grad_norm": 0.0, - "learning_rate": 1.2235945328303225e-05, - "loss": 0.9425, + "learning_rate": 6.819099784841974e-06, + "loss": 1.0158, "step": 15717 }, { - "epoch": 0.44540791748136815, + "epoch": 0.614993348462321, "grad_norm": 0.0, - "learning_rate": 1.2235050765983374e-05, - "loss": 0.93, + "learning_rate": 6.817898398604958e-06, + "loss": 1.0945, "step": 15718 }, { - "epoch": 0.4454362549236306, + "epoch": 0.6150324751545504, "grad_norm": 0.0, - "learning_rate": 1.223415618483686e-05, - "loss": 0.9428, + "learning_rate": 6.816697063468501e-06, + "loss": 1.025, "step": 15719 }, { - "epoch": 0.44546459236589303, + "epoch": 0.6150716018467799, "grad_norm": 0.0, - "learning_rate": 1.223326158487122e-05, - "loss": 0.8643, + "learning_rate": 6.815495779451906e-06, + "loss": 0.9852, "step": 15720 }, { - "epoch": 0.4454929298081555, + "epoch": 0.6151107285390093, "grad_norm": 0.0, - "learning_rate": 1.2232366966093989e-05, - "loss": 0.9875, + "learning_rate": 6.814294546574458e-06, + "loss": 0.9937, "step": 15721 }, { - "epoch": 0.44552126725041796, + "epoch": 0.6151498552312388, "grad_norm": 0.0, - "learning_rate": 1.2231472328512699e-05, - "loss": 0.9602, + "learning_rate": 6.813093364855447e-06, + "loss": 1.0085, "step": 15722 }, { - "epoch": 0.44554960469268046, + "epoch": 0.6151889819234682, "grad_norm": 0.0, - "learning_rate": 1.2230577672134889e-05, - "loss": 0.8322, + "learning_rate": 6.811892234314163e-06, + "loss": 0.9117, "step": 15723 }, { - "epoch": 0.4455779421349429, + "epoch": 0.6152281086156977, "grad_norm": 0.0, - "learning_rate": 1.2229682996968097e-05, - "loss": 0.8588, + "learning_rate": 6.810691154969891e-06, + "loss": 1.0978, "step": 15724 }, { - "epoch": 0.4456062795772054, + "epoch": 0.615267235307927, "grad_norm": 0.0, - "learning_rate": 1.2228788303019858e-05, - "loss": 0.867, + "learning_rate": 6.809490126841927e-06, + "loss": 0.9459, "step": 15725 }, { - "epoch": 0.4456346170194678, + "epoch": 0.6153063620001565, "grad_norm": 0.0, - "learning_rate": 1.2227893590297706e-05, - "loss": 0.8819, + "learning_rate": 6.8082891499495545e-06, + "loss": 1.0084, "step": 15726 }, { - "epoch": 0.44566295446173027, + "epoch": 0.6153454886923859, "grad_norm": 0.0, - "learning_rate": 1.2226998858809178e-05, - "loss": 0.8426, + "learning_rate": 6.807088224312057e-06, + "loss": 1.0443, "step": 15727 }, { - "epoch": 0.44569129190399276, + "epoch": 0.6153846153846154, "grad_norm": 0.0, - "learning_rate": 1.2226104108561813e-05, - "loss": 0.8733, + "learning_rate": 6.805887349948721e-06, + "loss": 1.0014, "step": 15728 }, { - "epoch": 0.4457196293462552, + "epoch": 0.6154237420768448, "grad_norm": 0.0, - "learning_rate": 1.2225209339563144e-05, - "loss": 0.937, + "learning_rate": 6.80468652687883e-06, + "loss": 1.0165, "step": 15729 }, { - "epoch": 0.4457479667885177, + "epoch": 0.6154628687690743, "grad_norm": 0.0, - "learning_rate": 1.2224314551820712e-05, - "loss": 0.9988, + "learning_rate": 6.803485755121672e-06, + "loss": 0.9803, "step": 15730 }, { - "epoch": 0.44577630423078013, + "epoch": 0.6155019954613037, "grad_norm": 0.0, - "learning_rate": 1.2223419745342055e-05, - "loss": 0.7993, + "learning_rate": 6.802285034696529e-06, + "loss": 0.9968, "step": 15731 }, { - "epoch": 0.44580464167304257, + "epoch": 0.6155411221535332, "grad_norm": 0.0, - "learning_rate": 1.2222524920134707e-05, - "loss": 0.8732, + "learning_rate": 6.801084365622681e-06, + "loss": 0.9223, "step": 15732 }, { - "epoch": 0.44583297911530506, + "epoch": 0.6155802488457626, "grad_norm": 0.0, - "learning_rate": 1.2221630076206206e-05, - "loss": 0.8729, + "learning_rate": 6.799883747919407e-06, + "loss": 1.0386, "step": 15733 }, { - "epoch": 0.4458613165575675, + "epoch": 0.6156193755379921, "grad_norm": 0.0, - "learning_rate": 1.2220735213564088e-05, - "loss": 0.9279, + "learning_rate": 6.798683181605992e-06, + "loss": 1.0268, "step": 15734 }, { - "epoch": 0.44588965399983, + "epoch": 0.6156585022302214, "grad_norm": 0.0, - "learning_rate": 1.2219840332215894e-05, - "loss": 0.85, + "learning_rate": 6.797482666701711e-06, + "loss": 1.0495, "step": 15735 }, { - "epoch": 0.44591799144209243, + "epoch": 0.615697628922451, "grad_norm": 0.0, - "learning_rate": 1.2218945432169158e-05, - "loss": 0.7936, + "learning_rate": 6.796282203225848e-06, + "loss": 1.0673, "step": 15736 }, { - "epoch": 0.4459463288843549, + "epoch": 0.6157367556146803, "grad_norm": 0.0, - "learning_rate": 1.2218050513431424e-05, - "loss": 0.9229, + "learning_rate": 6.7950817911976755e-06, + "loss": 0.9863, "step": 15737 }, { - "epoch": 0.44597466632661736, + "epoch": 0.6157758823069098, "grad_norm": 0.0, - "learning_rate": 1.2217155576010225e-05, - "loss": 0.9552, + "learning_rate": 6.7938814306364776e-06, + "loss": 0.9724, "step": 15738 }, { - "epoch": 0.4460030037688798, + "epoch": 0.6158150089991392, "grad_norm": 0.0, - "learning_rate": 1.2216260619913103e-05, - "loss": 0.938, + "learning_rate": 6.792681121561524e-06, + "loss": 1.0577, "step": 15739 }, { - "epoch": 0.4460313412111423, + "epoch": 0.6158541356913686, "grad_norm": 0.0, - "learning_rate": 1.2215365645147594e-05, - "loss": 0.8995, + "learning_rate": 6.791480863992095e-06, + "loss": 0.9434, "step": 15740 }, { - "epoch": 0.44605967865340473, + "epoch": 0.6158932623835981, "grad_norm": 0.0, - "learning_rate": 1.2214470651721237e-05, - "loss": 0.9174, + "learning_rate": 6.790280657947459e-06, + "loss": 1.0008, "step": 15741 }, { - "epoch": 0.44608801609566723, + "epoch": 0.6159323890758275, "grad_norm": 0.0, - "learning_rate": 1.2213575639641571e-05, - "loss": 0.8065, + "learning_rate": 6.789080503446895e-06, + "loss": 0.9651, "step": 15742 }, { - "epoch": 0.44611635353792967, + "epoch": 0.615971515768057, "grad_norm": 0.0, - "learning_rate": 1.2212680608916134e-05, - "loss": 0.909, + "learning_rate": 6.787880400509674e-06, + "loss": 1.0832, "step": 15743 }, { - "epoch": 0.4461446909801921, + "epoch": 0.6160106424602864, "grad_norm": 0.0, - "learning_rate": 1.2211785559552472e-05, - "loss": 0.9712, + "learning_rate": 6.78668034915507e-06, + "loss": 1.0043, "step": 15744 }, { - "epoch": 0.4461730284224546, + "epoch": 0.6160497691525159, "grad_norm": 0.0, - "learning_rate": 1.2210890491558117e-05, - "loss": 0.8131, + "learning_rate": 6.7854803494023545e-06, + "loss": 1.0585, "step": 15745 }, { - "epoch": 0.44620136586471704, + "epoch": 0.6160888958447452, "grad_norm": 0.0, - "learning_rate": 1.2209995404940607e-05, - "loss": 0.9241, + "learning_rate": 6.7842804012707904e-06, + "loss": 1.0727, "step": 15746 }, { - "epoch": 0.44622970330697953, + "epoch": 0.6161280225369747, "grad_norm": 0.0, - "learning_rate": 1.2209100299707485e-05, - "loss": 0.9439, + "learning_rate": 6.78308050477966e-06, + "loss": 0.9259, "step": 15747 }, { - "epoch": 0.44625804074924197, + "epoch": 0.6161671492292041, "grad_norm": 0.0, - "learning_rate": 1.2208205175866295e-05, - "loss": 0.8645, + "learning_rate": 6.781880659948224e-06, + "loss": 0.985, "step": 15748 }, { - "epoch": 0.44628637819150446, + "epoch": 0.6162062759214336, "grad_norm": 0.0, - "learning_rate": 1.2207310033424569e-05, - "loss": 0.8073, + "learning_rate": 6.780680866795753e-06, + "loss": 1.051, "step": 15749 }, { - "epoch": 0.4463147156337669, + "epoch": 0.616245402613663, "grad_norm": 0.0, - "learning_rate": 1.2206414872389853e-05, - "loss": 0.9189, + "learning_rate": 6.779481125341513e-06, + "loss": 0.9348, "step": 15750 }, { - "epoch": 0.44634305307602934, + "epoch": 0.6162845293058925, "grad_norm": 0.0, - "learning_rate": 1.2205519692769685e-05, - "loss": 1.0338, + "learning_rate": 6.778281435604765e-06, + "loss": 1.048, "step": 15751 }, { - "epoch": 0.44637139051829183, + "epoch": 0.6163236559981219, "grad_norm": 0.0, - "learning_rate": 1.2204624494571607e-05, - "loss": 0.9427, + "learning_rate": 6.777081797604786e-06, + "loss": 1.0716, "step": 15752 }, { - "epoch": 0.44639972796055427, + "epoch": 0.6163627826903514, "grad_norm": 0.0, - "learning_rate": 1.2203729277803157e-05, - "loss": 0.8365, + "learning_rate": 6.775882211360835e-06, + "loss": 0.8939, "step": 15753 }, { - "epoch": 0.44642806540281676, + "epoch": 0.6164019093825808, "grad_norm": 0.0, - "learning_rate": 1.2202834042471874e-05, - "loss": 1.0544, + "learning_rate": 6.774682676892175e-06, + "loss": 1.0364, "step": 15754 }, { - "epoch": 0.4464564028450792, + "epoch": 0.6164410360748103, "grad_norm": 0.0, - "learning_rate": 1.2201938788585305e-05, - "loss": 0.8485, + "learning_rate": 6.773483194218065e-06, + "loss": 1.0817, "step": 15755 }, { - "epoch": 0.44648474028734164, + "epoch": 0.6164801627670397, "grad_norm": 0.0, - "learning_rate": 1.2201043516150988e-05, - "loss": 1.0335, + "learning_rate": 6.7722837633577766e-06, + "loss": 1.0536, "step": 15756 }, { - "epoch": 0.44651307772960414, + "epoch": 0.6165192894592691, "grad_norm": 0.0, - "learning_rate": 1.2200148225176462e-05, - "loss": 0.8006, + "learning_rate": 6.771084384330566e-06, + "loss": 1.1309, "step": 15757 }, { - "epoch": 0.4465414151718666, + "epoch": 0.6165584161514985, "grad_norm": 0.0, - "learning_rate": 1.2199252915669274e-05, - "loss": 0.8148, + "learning_rate": 6.769885057155694e-06, + "loss": 1.0394, "step": 15758 }, { - "epoch": 0.44656975261412907, + "epoch": 0.616597542843728, "grad_norm": 0.0, - "learning_rate": 1.2198357587636958e-05, - "loss": 0.9624, + "learning_rate": 6.768685781852418e-06, + "loss": 1.1026, "step": 15759 }, { - "epoch": 0.4465980900563915, + "epoch": 0.6166366695359574, "grad_norm": 0.0, - "learning_rate": 1.2197462241087063e-05, - "loss": 0.9929, + "learning_rate": 6.767486558440002e-06, + "loss": 1.0128, "step": 15760 }, { - "epoch": 0.44662642749865394, + "epoch": 0.6166757962281869, "grad_norm": 0.0, - "learning_rate": 1.2196566876027125e-05, - "loss": 0.9539, + "learning_rate": 6.7662873869377e-06, + "loss": 0.959, "step": 15761 }, { - "epoch": 0.44665476494091644, + "epoch": 0.6167149229204163, "grad_norm": 0.0, - "learning_rate": 1.2195671492464691e-05, - "loss": 0.9349, + "learning_rate": 6.765088267364772e-06, + "loss": 0.8943, "step": 15762 }, { - "epoch": 0.4466831023831789, + "epoch": 0.6167540496126458, "grad_norm": 0.0, - "learning_rate": 1.21947760904073e-05, - "loss": 0.9084, + "learning_rate": 6.763889199740473e-06, + "loss": 0.9699, "step": 15763 }, { - "epoch": 0.44671143982544137, + "epoch": 0.6167931763048752, "grad_norm": 0.0, - "learning_rate": 1.2193880669862492e-05, - "loss": 0.8586, + "learning_rate": 6.762690184084054e-06, + "loss": 1.0674, "step": 15764 }, { - "epoch": 0.4467397772677038, + "epoch": 0.6168323029971047, "grad_norm": 0.0, - "learning_rate": 1.2192985230837817e-05, - "loss": 0.8756, + "learning_rate": 6.761491220414779e-06, + "loss": 1.1023, "step": 15765 }, { - "epoch": 0.4467681147099663, + "epoch": 0.6168714296893341, "grad_norm": 0.0, - "learning_rate": 1.2192089773340811e-05, - "loss": 0.9665, + "learning_rate": 6.760292308751896e-06, + "loss": 0.9729, "step": 15766 }, { - "epoch": 0.44679645215222874, + "epoch": 0.6169105563815636, "grad_norm": 0.0, - "learning_rate": 1.2191194297379019e-05, - "loss": 0.8773, + "learning_rate": 6.759093449114659e-06, + "loss": 1.0169, "step": 15767 }, { - "epoch": 0.4468247895944912, + "epoch": 0.6169496830737929, "grad_norm": 0.0, - "learning_rate": 1.2190298802959982e-05, - "loss": 0.9641, + "learning_rate": 6.757894641522316e-06, + "loss": 1.121, "step": 15768 }, { - "epoch": 0.4468531270367537, + "epoch": 0.6169888097660223, "grad_norm": 0.0, - "learning_rate": 1.2189403290091246e-05, - "loss": 0.9874, + "learning_rate": 6.756695885994126e-06, + "loss": 0.9906, "step": 15769 }, { - "epoch": 0.4468814644790161, + "epoch": 0.6170279364582518, "grad_norm": 0.0, - "learning_rate": 1.2188507758780355e-05, - "loss": 0.8259, + "learning_rate": 6.755497182549337e-06, + "loss": 1.0303, "step": 15770 }, { - "epoch": 0.4469098019212786, + "epoch": 0.6170670631504812, "grad_norm": 0.0, - "learning_rate": 1.2187612209034847e-05, - "loss": 0.9602, + "learning_rate": 6.7542985312071975e-06, + "loss": 0.9244, "step": 15771 }, { - "epoch": 0.44693813936354104, + "epoch": 0.6171061898427107, "grad_norm": 0.0, - "learning_rate": 1.218671664086227e-05, - "loss": 0.9431, + "learning_rate": 6.7530999319869565e-06, + "loss": 1.0079, "step": 15772 }, { - "epoch": 0.4469664768058035, + "epoch": 0.6171453165349401, "grad_norm": 0.0, - "learning_rate": 1.218582105427017e-05, - "loss": 0.8703, + "learning_rate": 6.75190138490786e-06, + "loss": 0.9617, "step": 15773 }, { - "epoch": 0.446994814248066, + "epoch": 0.6171844432271696, "grad_norm": 0.0, - "learning_rate": 1.2184925449266083e-05, - "loss": 0.9427, + "learning_rate": 6.750702889989156e-06, + "loss": 1.019, "step": 15774 }, { - "epoch": 0.4470231516903284, + "epoch": 0.617223569919399, "grad_norm": 0.0, - "learning_rate": 1.2184029825857559e-05, - "loss": 0.9741, + "learning_rate": 6.749504447250095e-06, + "loss": 0.9448, "step": 15775 }, { - "epoch": 0.4470514891325909, + "epoch": 0.6172626966116285, "grad_norm": 0.0, - "learning_rate": 1.2183134184052143e-05, - "loss": 0.8993, + "learning_rate": 6.748306056709919e-06, + "loss": 1.0099, "step": 15776 }, { - "epoch": 0.44707982657485335, + "epoch": 0.6173018233038579, "grad_norm": 0.0, - "learning_rate": 1.2182238523857378e-05, - "loss": 0.9053, + "learning_rate": 6.747107718387872e-06, + "loss": 1.0231, "step": 15777 }, { - "epoch": 0.44710816401711584, + "epoch": 0.6173409499960874, "grad_norm": 0.0, - "learning_rate": 1.2181342845280803e-05, - "loss": 0.8384, + "learning_rate": 6.7459094323032e-06, + "loss": 1.0885, "step": 15778 }, { - "epoch": 0.4471365014593783, + "epoch": 0.6173800766883167, "grad_norm": 0.0, - "learning_rate": 1.2180447148329972e-05, - "loss": 0.9262, + "learning_rate": 6.744711198475143e-06, + "loss": 0.9733, "step": 15779 }, { - "epoch": 0.4471648389016407, + "epoch": 0.6174192033805462, "grad_norm": 0.0, - "learning_rate": 1.217955143301242e-05, - "loss": 0.965, + "learning_rate": 6.743513016922948e-06, + "loss": 1.0822, "step": 15780 }, { - "epoch": 0.4471931763439032, + "epoch": 0.6174583300727756, "grad_norm": 0.0, - "learning_rate": 1.2178655699335698e-05, - "loss": 1.0247, + "learning_rate": 6.742314887665853e-06, + "loss": 0.9774, "step": 15781 }, { - "epoch": 0.44722151378616565, + "epoch": 0.6174974567650051, "grad_norm": 0.0, - "learning_rate": 1.2177759947307352e-05, - "loss": 0.9599, + "learning_rate": 6.741116810723096e-06, + "loss": 1.006, "step": 15782 }, { - "epoch": 0.44724985122842814, + "epoch": 0.6175365834572345, "grad_norm": 0.0, - "learning_rate": 1.2176864176934925e-05, - "loss": 0.9683, + "learning_rate": 6.7399187861139215e-06, + "loss": 1.1207, "step": 15783 }, { - "epoch": 0.4472781886706906, + "epoch": 0.617575710149464, "grad_norm": 0.0, - "learning_rate": 1.2175968388225963e-05, - "loss": 0.9408, + "learning_rate": 6.738720813857566e-06, + "loss": 1.0378, "step": 15784 }, { - "epoch": 0.447306526112953, + "epoch": 0.6176148368416934, "grad_norm": 0.0, - "learning_rate": 1.217507258118801e-05, - "loss": 0.8267, + "learning_rate": 6.737522893973267e-06, + "loss": 1.0463, "step": 15785 }, { - "epoch": 0.4473348635552155, + "epoch": 0.6176539635339229, "grad_norm": 0.0, - "learning_rate": 1.2174176755828616e-05, - "loss": 0.9904, + "learning_rate": 6.736325026480262e-06, + "loss": 1.0749, "step": 15786 }, { - "epoch": 0.44736320099747795, + "epoch": 0.6176930902261523, "grad_norm": 0.0, - "learning_rate": 1.2173280912155318e-05, - "loss": 0.8653, + "learning_rate": 6.735127211397789e-06, + "loss": 1.0132, "step": 15787 }, { - "epoch": 0.44739153843974044, + "epoch": 0.6177322169183818, "grad_norm": 0.0, - "learning_rate": 1.217238505017567e-05, - "loss": 0.8335, + "learning_rate": 6.7339294487450825e-06, + "loss": 0.9603, "step": 15788 }, { - "epoch": 0.4474198758820029, + "epoch": 0.6177713436106111, "grad_norm": 0.0, - "learning_rate": 1.2171489169897217e-05, - "loss": 1.0144, + "learning_rate": 6.732731738541375e-06, + "loss": 1.1393, "step": 15789 }, { - "epoch": 0.4474482133242654, + "epoch": 0.6178104703028406, "grad_norm": 0.0, - "learning_rate": 1.2170593271327507e-05, - "loss": 0.9024, + "learning_rate": 6.7315340808059025e-06, + "loss": 0.9539, "step": 15790 }, { - "epoch": 0.4474765507665278, + "epoch": 0.61784959699507, "grad_norm": 0.0, - "learning_rate": 1.2169697354474081e-05, - "loss": 0.8569, + "learning_rate": 6.730336475557892e-06, + "loss": 0.9453, "step": 15791 }, { - "epoch": 0.44750488820879025, + "epoch": 0.6178887236872995, "grad_norm": 0.0, - "learning_rate": 1.216880141934449e-05, - "loss": 0.923, + "learning_rate": 6.729138922816587e-06, + "loss": 1.0442, "step": 15792 }, { - "epoch": 0.44753322565105275, + "epoch": 0.6179278503795289, "grad_norm": 0.0, - "learning_rate": 1.2167905465946276e-05, - "loss": 0.9492, + "learning_rate": 6.72794142260121e-06, + "loss": 1.0603, "step": 15793 }, { - "epoch": 0.4475615630933152, + "epoch": 0.6179669770717584, "grad_norm": 0.0, - "learning_rate": 1.2167009494286991e-05, - "loss": 0.9045, + "learning_rate": 6.726743974930995e-06, + "loss": 1.0568, "step": 15794 }, { - "epoch": 0.4475899005355777, + "epoch": 0.6180061037639878, "grad_norm": 0.0, - "learning_rate": 1.2166113504374182e-05, - "loss": 0.7371, + "learning_rate": 6.725546579825165e-06, + "loss": 0.9621, "step": 15795 }, { - "epoch": 0.4476182379778401, + "epoch": 0.6180452304562172, "grad_norm": 0.0, - "learning_rate": 1.2165217496215392e-05, - "loss": 0.8858, + "learning_rate": 6.724349237302958e-06, + "loss": 1.046, "step": 15796 }, { - "epoch": 0.44764657542010255, + "epoch": 0.6180843571484467, "grad_norm": 0.0, - "learning_rate": 1.2164321469818172e-05, - "loss": 0.8747, + "learning_rate": 6.723151947383599e-06, + "loss": 0.9601, "step": 15797 }, { - "epoch": 0.44767491286236505, + "epoch": 0.6181234838406761, "grad_norm": 0.0, - "learning_rate": 1.216342542519007e-05, - "loss": 0.8939, + "learning_rate": 6.721954710086312e-06, + "loss": 1.065, "step": 15798 }, { - "epoch": 0.4477032503046275, + "epoch": 0.6181626105329056, "grad_norm": 0.0, - "learning_rate": 1.2162529362338633e-05, - "loss": 0.8537, + "learning_rate": 6.720757525430326e-06, + "loss": 0.9536, "step": 15799 }, { - "epoch": 0.44773158774689, + "epoch": 0.6182017372251349, "grad_norm": 0.0, - "learning_rate": 1.2161633281271403e-05, - "loss": 1.0275, + "learning_rate": 6.71956039343486e-06, + "loss": 1.1377, "step": 15800 }, { - "epoch": 0.4477599251891524, + "epoch": 0.6182408639173644, "grad_norm": 0.0, - "learning_rate": 1.2160737181995937e-05, - "loss": 0.8924, + "learning_rate": 6.7183633141191475e-06, + "loss": 1.0309, "step": 15801 }, { - "epoch": 0.4477882626314149, + "epoch": 0.6182799906095938, "grad_norm": 0.0, - "learning_rate": 1.2159841064519777e-05, - "loss": 0.8258, + "learning_rate": 6.717166287502408e-06, + "loss": 1.0656, "step": 15802 }, { - "epoch": 0.44781660007367735, + "epoch": 0.6183191173018233, "grad_norm": 0.0, - "learning_rate": 1.2158944928850479e-05, - "loss": 0.93, + "learning_rate": 6.715969313603865e-06, + "loss": 0.9034, "step": 15803 }, { - "epoch": 0.4478449375159398, + "epoch": 0.6183582439940527, "grad_norm": 0.0, - "learning_rate": 1.215804877499558e-05, - "loss": 0.8407, + "learning_rate": 6.714772392442735e-06, + "loss": 1.0241, "step": 15804 }, { - "epoch": 0.4478732749582023, + "epoch": 0.6183973706862822, "grad_norm": 0.0, - "learning_rate": 1.2157152602962638e-05, - "loss": 0.9913, + "learning_rate": 6.71357552403825e-06, + "loss": 1.0395, "step": 15805 }, { - "epoch": 0.4479016124004647, + "epoch": 0.6184364973785116, "grad_norm": 0.0, - "learning_rate": 1.21562564127592e-05, - "loss": 0.8895, + "learning_rate": 6.7123787084096215e-06, + "loss": 1.0184, "step": 15806 }, { - "epoch": 0.4479299498427272, + "epoch": 0.6184756240707411, "grad_norm": 0.0, - "learning_rate": 1.215536020439281e-05, - "loss": 0.8469, + "learning_rate": 6.711181945576071e-06, + "loss": 1.1585, "step": 15807 }, { - "epoch": 0.44795828728498965, + "epoch": 0.6185147507629705, "grad_norm": 0.0, - "learning_rate": 1.2154463977871022e-05, - "loss": 0.7427, + "learning_rate": 6.709985235556819e-06, + "loss": 1.0706, "step": 15808 }, { - "epoch": 0.4479866247272521, + "epoch": 0.6185538774552, "grad_norm": 0.0, - "learning_rate": 1.2153567733201383e-05, - "loss": 0.8919, + "learning_rate": 6.708788578371077e-06, + "loss": 0.9945, "step": 15809 }, { - "epoch": 0.4480149621695146, + "epoch": 0.6185930041474293, "grad_norm": 0.0, - "learning_rate": 1.2152671470391443e-05, - "loss": 0.8806, + "learning_rate": 6.70759197403807e-06, + "loss": 1.0538, "step": 15810 }, { - "epoch": 0.448043299611777, + "epoch": 0.6186321308396588, "grad_norm": 0.0, - "learning_rate": 1.2151775189448755e-05, - "loss": 0.8617, + "learning_rate": 6.70639542257701e-06, + "loss": 1.1129, "step": 15811 }, { - "epoch": 0.4480716370540395, + "epoch": 0.6186712575318882, "grad_norm": 0.0, - "learning_rate": 1.2150878890380865e-05, - "loss": 0.919, + "learning_rate": 6.705198924007112e-06, + "loss": 0.9729, "step": 15812 }, { - "epoch": 0.44809997449630196, + "epoch": 0.6187103842241177, "grad_norm": 0.0, - "learning_rate": 1.214998257319532e-05, - "loss": 0.9367, + "learning_rate": 6.704002478347589e-06, + "loss": 1.1283, "step": 15813 }, { - "epoch": 0.44812831193856445, + "epoch": 0.6187495109163471, "grad_norm": 0.0, - "learning_rate": 1.2149086237899675e-05, - "loss": 0.8776, + "learning_rate": 6.702806085617657e-06, + "loss": 0.9905, "step": 15814 }, { - "epoch": 0.4481566493808269, + "epoch": 0.6187886376085766, "grad_norm": 0.0, - "learning_rate": 1.214818988450148e-05, - "loss": 0.9206, + "learning_rate": 6.70160974583653e-06, + "loss": 0.9376, "step": 15815 }, { - "epoch": 0.4481849868230893, + "epoch": 0.618827764300806, "grad_norm": 0.0, - "learning_rate": 1.2147293513008283e-05, - "loss": 0.7861, + "learning_rate": 6.700413459023416e-06, + "loss": 0.9528, "step": 15816 }, { - "epoch": 0.4482133242653518, + "epoch": 0.6188668909930355, "grad_norm": 0.0, - "learning_rate": 1.2146397123427635e-05, - "loss": 0.8482, + "learning_rate": 6.699217225197527e-06, + "loss": 1.0768, "step": 15817 }, { - "epoch": 0.44824166170761426, + "epoch": 0.6189060176852649, "grad_norm": 0.0, - "learning_rate": 1.2145500715767087e-05, - "loss": 0.9211, + "learning_rate": 6.698021044378071e-06, + "loss": 0.9881, "step": 15818 }, { - "epoch": 0.44826999914987675, + "epoch": 0.6189451443774944, "grad_norm": 0.0, - "learning_rate": 1.2144604290034193e-05, - "loss": 0.879, + "learning_rate": 6.696824916584262e-06, + "loss": 1.0523, "step": 15819 }, { - "epoch": 0.4482983365921392, + "epoch": 0.6189842710697238, "grad_norm": 0.0, - "learning_rate": 1.2143707846236495e-05, - "loss": 0.9783, + "learning_rate": 6.6956288418353064e-06, + "loss": 1.1032, "step": 15820 }, { - "epoch": 0.44832667403440163, + "epoch": 0.6190233977619533, "grad_norm": 0.0, - "learning_rate": 1.214281138438155e-05, - "loss": 0.9152, + "learning_rate": 6.694432820150412e-06, + "loss": 1.0618, "step": 15821 }, { - "epoch": 0.4483550114766641, + "epoch": 0.6190625244541826, "grad_norm": 0.0, - "learning_rate": 1.2141914904476912e-05, - "loss": 0.9227, + "learning_rate": 6.69323685154878e-06, + "loss": 0.9218, "step": 15822 }, { - "epoch": 0.44838334891892656, + "epoch": 0.6191016511464121, "grad_norm": 0.0, - "learning_rate": 1.2141018406530131e-05, - "loss": 0.8308, + "learning_rate": 6.692040936049624e-06, + "loss": 0.9838, "step": 15823 }, { - "epoch": 0.44841168636118905, + "epoch": 0.6191407778386415, "grad_norm": 0.0, - "learning_rate": 1.2140121890548755e-05, - "loss": 0.9151, + "learning_rate": 6.690845073672143e-06, + "loss": 0.9908, "step": 15824 }, { - "epoch": 0.4484400238034515, + "epoch": 0.6191799045308709, "grad_norm": 0.0, - "learning_rate": 1.2139225356540336e-05, - "loss": 1.0087, + "learning_rate": 6.689649264435546e-06, + "loss": 1.0477, "step": 15825 }, { - "epoch": 0.448468361245714, + "epoch": 0.6192190312231004, "grad_norm": 0.0, - "learning_rate": 1.2138328804512429e-05, - "loss": 1.0189, + "learning_rate": 6.688453508359032e-06, + "loss": 0.9762, "step": 15826 }, { - "epoch": 0.4484966986879764, + "epoch": 0.6192581579153298, "grad_norm": 0.0, - "learning_rate": 1.2137432234472583e-05, - "loss": 0.9682, + "learning_rate": 6.687257805461806e-06, + "loss": 0.9655, "step": 15827 }, { - "epoch": 0.44852503613023886, + "epoch": 0.6192972846075593, "grad_norm": 0.0, - "learning_rate": 1.2136535646428356e-05, - "loss": 0.9673, + "learning_rate": 6.6860621557630685e-06, + "loss": 0.9758, "step": 15828 }, { - "epoch": 0.44855337357250136, + "epoch": 0.6193364112997887, "grad_norm": 0.0, - "learning_rate": 1.2135639040387291e-05, - "loss": 0.9809, + "learning_rate": 6.684866559282022e-06, + "loss": 0.948, "step": 15829 }, { - "epoch": 0.4485817110147638, + "epoch": 0.6193755379920182, "grad_norm": 0.0, - "learning_rate": 1.2134742416356945e-05, - "loss": 0.7605, + "learning_rate": 6.683671016037861e-06, + "loss": 0.916, "step": 15830 }, { - "epoch": 0.4486100484570263, + "epoch": 0.6194146646842476, "grad_norm": 0.0, - "learning_rate": 1.2133845774344875e-05, - "loss": 0.9366, + "learning_rate": 6.682475526049787e-06, + "loss": 1.0708, "step": 15831 }, { - "epoch": 0.4486383858992887, + "epoch": 0.619453791376477, "grad_norm": 0.0, - "learning_rate": 1.2132949114358627e-05, - "loss": 0.8179, + "learning_rate": 6.681280089337002e-06, + "loss": 1.029, "step": 15832 }, { - "epoch": 0.44866672334155117, + "epoch": 0.6194929180687064, "grad_norm": 0.0, - "learning_rate": 1.2132052436405756e-05, - "loss": 0.9332, + "learning_rate": 6.6800847059187e-06, + "loss": 1.0771, "step": 15833 }, { - "epoch": 0.44869506078381366, + "epoch": 0.6195320447609359, "grad_norm": 0.0, - "learning_rate": 1.2131155740493816e-05, - "loss": 0.8812, + "learning_rate": 6.678889375814077e-06, + "loss": 0.9789, "step": 15834 }, { - "epoch": 0.4487233982260761, + "epoch": 0.6195711714531653, "grad_norm": 0.0, - "learning_rate": 1.2130259026630363e-05, - "loss": 0.7816, + "learning_rate": 6.6776940990423266e-06, + "loss": 0.9743, "step": 15835 }, { - "epoch": 0.4487517356683386, + "epoch": 0.6196102981453948, "grad_norm": 0.0, - "learning_rate": 1.2129362294822943e-05, - "loss": 0.8243, + "learning_rate": 6.676498875622649e-06, + "loss": 1.012, "step": 15836 }, { - "epoch": 0.44878007311060103, + "epoch": 0.6196494248376242, "grad_norm": 0.0, - "learning_rate": 1.2128465545079117e-05, - "loss": 0.9156, + "learning_rate": 6.675303705574235e-06, + "loss": 1.0325, "step": 15837 }, { - "epoch": 0.4488084105528635, + "epoch": 0.6196885515298537, "grad_norm": 0.0, - "learning_rate": 1.2127568777406434e-05, - "loss": 0.7908, + "learning_rate": 6.6741085889162775e-06, + "loss": 0.9851, "step": 15838 }, { - "epoch": 0.44883674799512596, + "epoch": 0.6197276782220831, "grad_norm": 0.0, - "learning_rate": 1.212667199181245e-05, - "loss": 0.8226, + "learning_rate": 6.6729135256679676e-06, + "loss": 1.0398, "step": 15839 }, { - "epoch": 0.4488650854373884, + "epoch": 0.6197668049143126, "grad_norm": 0.0, - "learning_rate": 1.2125775188304714e-05, - "loss": 0.9115, + "learning_rate": 6.6717185158484944e-06, + "loss": 1.0474, "step": 15840 }, { - "epoch": 0.4488934228796509, + "epoch": 0.619805931606542, "grad_norm": 0.0, - "learning_rate": 1.212487836689079e-05, - "loss": 0.7928, + "learning_rate": 6.670523559477055e-06, + "loss": 1.0078, "step": 15841 }, { - "epoch": 0.44892176032191333, + "epoch": 0.6198450582987715, "grad_norm": 0.0, - "learning_rate": 1.2123981527578221e-05, - "loss": 0.9259, + "learning_rate": 6.669328656572835e-06, + "loss": 1.0042, "step": 15842 }, { - "epoch": 0.4489500977641758, + "epoch": 0.6198841849910008, "grad_norm": 0.0, - "learning_rate": 1.212308467037457e-05, - "loss": 0.9559, + "learning_rate": 6.668133807155024e-06, + "loss": 1.0587, "step": 15843 }, { - "epoch": 0.44897843520643826, + "epoch": 0.6199233116832303, "grad_norm": 0.0, - "learning_rate": 1.2122187795287388e-05, - "loss": 1.0161, + "learning_rate": 6.6669390112428035e-06, + "loss": 1.0992, "step": 15844 }, { - "epoch": 0.4490067726487007, + "epoch": 0.6199624383754597, "grad_norm": 0.0, - "learning_rate": 1.2121290902324232e-05, - "loss": 1.0376, + "learning_rate": 6.665744268855372e-06, + "loss": 1.1153, "step": 15845 }, { - "epoch": 0.4490351100909632, + "epoch": 0.6200015650676892, "grad_norm": 0.0, - "learning_rate": 1.2120393991492652e-05, - "loss": 0.9129, + "learning_rate": 6.664549580011908e-06, + "loss": 0.8049, "step": 15846 }, { - "epoch": 0.44906344753322563, + "epoch": 0.6200406917599186, "grad_norm": 0.0, - "learning_rate": 1.2119497062800205e-05, - "loss": 0.8635, + "learning_rate": 6.663354944731598e-06, + "loss": 1.0272, "step": 15847 }, { - "epoch": 0.44909178497548813, + "epoch": 0.6200798184521481, "grad_norm": 0.0, - "learning_rate": 1.2118600116254452e-05, - "loss": 0.9606, + "learning_rate": 6.662160363033627e-06, + "loss": 1.0092, "step": 15848 }, { - "epoch": 0.44912012241775057, + "epoch": 0.6201189451443775, "grad_norm": 0.0, - "learning_rate": 1.211770315186294e-05, - "loss": 0.9961, + "learning_rate": 6.660965834937174e-06, + "loss": 1.1431, "step": 15849 }, { - "epoch": 0.44914845986001306, + "epoch": 0.620158071836607, "grad_norm": 0.0, - "learning_rate": 1.2116806169633227e-05, - "loss": 1.0363, + "learning_rate": 6.659771360461429e-06, + "loss": 0.9839, "step": 15850 }, { - "epoch": 0.4491767973022755, + "epoch": 0.6201971985288364, "grad_norm": 0.0, - "learning_rate": 1.2115909169572872e-05, - "loss": 0.8701, + "learning_rate": 6.6585769396255715e-06, + "loss": 0.9809, "step": 15851 }, { - "epoch": 0.44920513474453794, + "epoch": 0.6202363252210659, "grad_norm": 0.0, - "learning_rate": 1.211501215168943e-05, - "loss": 0.9956, + "learning_rate": 6.657382572448781e-06, + "loss": 1.1047, "step": 15852 }, { - "epoch": 0.44923347218680043, + "epoch": 0.6202754519132953, "grad_norm": 0.0, - "learning_rate": 1.211411511599045e-05, - "loss": 0.9339, + "learning_rate": 6.656188258950233e-06, + "loss": 0.8454, "step": 15853 }, { - "epoch": 0.44926180962906287, + "epoch": 0.6203145786055246, "grad_norm": 0.0, - "learning_rate": 1.2113218062483493e-05, - "loss": 0.9543, + "learning_rate": 6.654993999149117e-06, + "loss": 1.0455, "step": 15854 }, { - "epoch": 0.44929014707132536, + "epoch": 0.6203537052977541, "grad_norm": 0.0, - "learning_rate": 1.211232099117612e-05, - "loss": 0.8681, + "learning_rate": 6.653799793064606e-06, + "loss": 0.9623, "step": 15855 }, { - "epoch": 0.4493184845135878, + "epoch": 0.6203928319899835, "grad_norm": 0.0, - "learning_rate": 1.2111423902075883e-05, - "loss": 0.899, + "learning_rate": 6.652605640715876e-06, + "loss": 0.9433, "step": 15856 }, { - "epoch": 0.44934682195585024, + "epoch": 0.620431958682213, "grad_norm": 0.0, - "learning_rate": 1.2110526795190338e-05, - "loss": 0.9017, + "learning_rate": 6.651411542122105e-06, + "loss": 0.9573, "step": 15857 }, { - "epoch": 0.44937515939811273, + "epoch": 0.6204710853744424, "grad_norm": 0.0, - "learning_rate": 1.210962967052704e-05, - "loss": 0.9218, + "learning_rate": 6.650217497302465e-06, + "loss": 1.0304, "step": 15858 }, { - "epoch": 0.44940349684037517, + "epoch": 0.6205102120666719, "grad_norm": 0.0, - "learning_rate": 1.2108732528093549e-05, - "loss": 0.8952, + "learning_rate": 6.6490235062761375e-06, + "loss": 0.9356, "step": 15859 }, { - "epoch": 0.44943183428263767, + "epoch": 0.6205493387589013, "grad_norm": 0.0, - "learning_rate": 1.210783536789742e-05, - "loss": 0.8964, + "learning_rate": 6.647829569062295e-06, + "loss": 1.1093, "step": 15860 }, { - "epoch": 0.4494601717249001, + "epoch": 0.6205884654511308, "grad_norm": 0.0, - "learning_rate": 1.2106938189946213e-05, - "loss": 0.9239, + "learning_rate": 6.646635685680109e-06, + "loss": 0.9265, "step": 15861 }, { - "epoch": 0.4494885091671626, + "epoch": 0.6206275921433602, "grad_norm": 0.0, - "learning_rate": 1.2106040994247484e-05, - "loss": 0.9437, + "learning_rate": 6.6454418561487485e-06, + "loss": 0.8776, "step": 15862 }, { - "epoch": 0.44951684660942504, + "epoch": 0.6206667188355897, "grad_norm": 0.0, - "learning_rate": 1.2105143780808786e-05, - "loss": 0.8776, + "learning_rate": 6.64424808048739e-06, + "loss": 1.1038, "step": 15863 }, { - "epoch": 0.4495451840516875, + "epoch": 0.620705845527819, "grad_norm": 0.0, - "learning_rate": 1.2104246549637683e-05, - "loss": 0.8477, + "learning_rate": 6.643054358715203e-06, + "loss": 0.9614, "step": 15864 }, { - "epoch": 0.44957352149394997, + "epoch": 0.6207449722200485, "grad_norm": 0.0, - "learning_rate": 1.2103349300741727e-05, - "loss": 1.1548, + "learning_rate": 6.641860690851357e-06, + "loss": 0.9657, "step": 15865 }, { - "epoch": 0.4496018589362124, + "epoch": 0.6207840989122779, "grad_norm": 0.0, - "learning_rate": 1.2102452034128482e-05, - "loss": 0.8816, + "learning_rate": 6.6406670769150186e-06, + "loss": 1.0549, "step": 15866 }, { - "epoch": 0.4496301963784749, + "epoch": 0.6208232256045074, "grad_norm": 0.0, - "learning_rate": 1.21015547498055e-05, - "loss": 0.9595, + "learning_rate": 6.639473516925359e-06, + "loss": 0.92, "step": 15867 }, { - "epoch": 0.44965853382073734, + "epoch": 0.6208623522967368, "grad_norm": 0.0, - "learning_rate": 1.2100657447780344e-05, - "loss": 0.7792, + "learning_rate": 6.638280010901544e-06, + "loss": 0.932, "step": 15868 }, { - "epoch": 0.4496868712629998, + "epoch": 0.6209014789889663, "grad_norm": 0.0, - "learning_rate": 1.2099760128060571e-05, - "loss": 0.8218, + "learning_rate": 6.637086558862738e-06, + "loss": 1.0261, "step": 15869 }, { - "epoch": 0.44971520870526227, + "epoch": 0.6209406056811957, "grad_norm": 0.0, - "learning_rate": 1.2098862790653738e-05, - "loss": 1.0775, + "learning_rate": 6.6358931608281086e-06, + "loss": 1.0906, "step": 15870 }, { - "epoch": 0.4497435461475247, + "epoch": 0.6209797323734252, "grad_norm": 0.0, - "learning_rate": 1.2097965435567402e-05, - "loss": 0.8271, + "learning_rate": 6.634699816816819e-06, + "loss": 1.1072, "step": 15871 }, { - "epoch": 0.4497718835897872, + "epoch": 0.6210188590656546, "grad_norm": 0.0, - "learning_rate": 1.2097068062809125e-05, - "loss": 0.9853, + "learning_rate": 6.633506526848034e-06, + "loss": 1.2038, "step": 15872 }, { - "epoch": 0.44980022103204964, + "epoch": 0.6210579857578841, "grad_norm": 0.0, - "learning_rate": 1.2096170672386467e-05, - "loss": 0.9701, + "learning_rate": 6.632313290940917e-06, + "loss": 0.9836, "step": 15873 }, { - "epoch": 0.44982855847431213, + "epoch": 0.6210971124501135, "grad_norm": 0.0, - "learning_rate": 1.2095273264306984e-05, - "loss": 0.944, + "learning_rate": 6.631120109114628e-06, + "loss": 1.0056, "step": 15874 }, { - "epoch": 0.4498568959165746, + "epoch": 0.621136239142343, "grad_norm": 0.0, - "learning_rate": 1.2094375838578234e-05, - "loss": 1.0469, + "learning_rate": 6.629926981388325e-06, + "loss": 0.9617, "step": 15875 }, { - "epoch": 0.449885233358837, + "epoch": 0.6211753658345723, "grad_norm": 0.0, - "learning_rate": 1.209347839520778e-05, - "loss": 0.9325, + "learning_rate": 6.628733907781175e-06, + "loss": 1.0027, "step": 15876 }, { - "epoch": 0.4499135708010995, + "epoch": 0.6212144925268018, "grad_norm": 0.0, - "learning_rate": 1.2092580934203183e-05, - "loss": 0.8779, + "learning_rate": 6.627540888312335e-06, + "loss": 1.0449, "step": 15877 }, { - "epoch": 0.44994190824336194, + "epoch": 0.6212536192190312, "grad_norm": 0.0, - "learning_rate": 1.2091683455571997e-05, - "loss": 1.0103, + "learning_rate": 6.626347923000962e-06, + "loss": 0.9805, "step": 15878 }, { - "epoch": 0.44997024568562444, + "epoch": 0.6212927459112607, "grad_norm": 0.0, - "learning_rate": 1.2090785959321783e-05, - "loss": 1.0134, + "learning_rate": 6.625155011866213e-06, + "loss": 1.0129, "step": 15879 }, { - "epoch": 0.4499985831278869, + "epoch": 0.6213318726034901, "grad_norm": 0.0, - "learning_rate": 1.2089888445460105e-05, - "loss": 0.961, + "learning_rate": 6.623962154927243e-06, + "loss": 0.993, "step": 15880 }, { - "epoch": 0.4500269205701493, + "epoch": 0.6213709992957196, "grad_norm": 0.0, - "learning_rate": 1.208899091399452e-05, - "loss": 0.766, + "learning_rate": 6.6227693522032135e-06, + "loss": 0.9509, "step": 15881 }, { - "epoch": 0.4500552580124118, + "epoch": 0.621410125987949, "grad_norm": 0.0, - "learning_rate": 1.2088093364932591e-05, - "loss": 0.9531, + "learning_rate": 6.6215766037132765e-06, + "loss": 0.9492, "step": 15882 }, { - "epoch": 0.45008359545467425, + "epoch": 0.6214492526801784, "grad_norm": 0.0, - "learning_rate": 1.2087195798281873e-05, - "loss": 0.9024, + "learning_rate": 6.620383909476587e-06, + "loss": 0.8539, "step": 15883 }, { - "epoch": 0.45011193289693674, + "epoch": 0.6214883793724079, "grad_norm": 0.0, - "learning_rate": 1.208629821404993e-05, - "loss": 0.9169, + "learning_rate": 6.61919126951229e-06, + "loss": 1.1132, "step": 15884 }, { - "epoch": 0.4501402703391992, + "epoch": 0.6215275060646372, "grad_norm": 0.0, - "learning_rate": 1.2085400612244323e-05, - "loss": 0.9118, + "learning_rate": 6.6179986838395515e-06, + "loss": 1.0205, "step": 15885 }, { - "epoch": 0.45016860778146167, + "epoch": 0.6215666327568667, "grad_norm": 0.0, - "learning_rate": 1.2084502992872613e-05, - "loss": 0.7976, + "learning_rate": 6.616806152477515e-06, + "loss": 0.9329, "step": 15886 }, { - "epoch": 0.4501969452237241, + "epoch": 0.6216057594490961, "grad_norm": 0.0, - "learning_rate": 1.2083605355942358e-05, - "loss": 0.8895, + "learning_rate": 6.615613675445334e-06, + "loss": 0.9807, "step": 15887 }, { - "epoch": 0.45022528266598655, + "epoch": 0.6216448861413256, "grad_norm": 0.0, - "learning_rate": 1.2082707701461122e-05, - "loss": 0.9134, + "learning_rate": 6.614421252762155e-06, + "loss": 1.0249, "step": 15888 }, { - "epoch": 0.45025362010824904, + "epoch": 0.621684012833555, "grad_norm": 0.0, - "learning_rate": 1.208181002943647e-05, - "loss": 0.9623, + "learning_rate": 6.613228884447125e-06, + "loss": 0.9524, "step": 15889 }, { - "epoch": 0.4502819575505115, + "epoch": 0.6217231395257845, "grad_norm": 0.0, - "learning_rate": 1.2080912339875957e-05, - "loss": 1.0251, + "learning_rate": 6.612036570519398e-06, + "loss": 0.965, "step": 15890 }, { - "epoch": 0.450310294992774, + "epoch": 0.6217622662180139, "grad_norm": 0.0, - "learning_rate": 1.2080014632787142e-05, - "loss": 0.8155, + "learning_rate": 6.610844310998119e-06, + "loss": 1.1245, "step": 15891 }, { - "epoch": 0.4503386324350364, + "epoch": 0.6218013929102434, "grad_norm": 0.0, - "learning_rate": 1.2079116908177592e-05, - "loss": 0.8267, + "learning_rate": 6.609652105902433e-06, + "loss": 1.2169, "step": 15892 }, { - "epoch": 0.45036696987729885, + "epoch": 0.6218405196024728, "grad_norm": 0.0, - "learning_rate": 1.2078219166054873e-05, - "loss": 0.9398, + "learning_rate": 6.608459955251482e-06, + "loss": 1.0087, "step": 15893 }, { - "epoch": 0.45039530731956134, + "epoch": 0.6218796462947023, "grad_norm": 0.0, - "learning_rate": 1.2077321406426542e-05, - "loss": 0.9197, + "learning_rate": 6.60726785906442e-06, + "loss": 1.0652, "step": 15894 }, { - "epoch": 0.4504236447618238, + "epoch": 0.6219187729869317, "grad_norm": 0.0, - "learning_rate": 1.207642362930016e-05, - "loss": 0.7981, + "learning_rate": 6.6060758173603846e-06, + "loss": 1.0626, "step": 15895 }, { - "epoch": 0.4504519822040863, + "epoch": 0.6219578996791612, "grad_norm": 0.0, - "learning_rate": 1.2075525834683288e-05, - "loss": 1.0255, + "learning_rate": 6.604883830158517e-06, + "loss": 1.0342, "step": 15896 }, { - "epoch": 0.4504803196463487, + "epoch": 0.6219970263713905, "grad_norm": 0.0, - "learning_rate": 1.2074628022583494e-05, - "loss": 0.9439, + "learning_rate": 6.603691897477962e-06, + "loss": 0.9667, "step": 15897 }, { - "epoch": 0.4505086570886112, + "epoch": 0.62203615306362, "grad_norm": 0.0, - "learning_rate": 1.2073730193008336e-05, - "loss": 0.8805, + "learning_rate": 6.602500019337854e-06, + "loss": 1.134, "step": 15898 }, { - "epoch": 0.45053699453087365, + "epoch": 0.6220752797558494, "grad_norm": 0.0, - "learning_rate": 1.2072832345965381e-05, - "loss": 0.8577, + "learning_rate": 6.601308195757343e-06, + "loss": 1.1468, "step": 15899 }, { - "epoch": 0.4505653319731361, + "epoch": 0.6221144064480789, "grad_norm": 0.0, - "learning_rate": 1.2071934481462186e-05, - "loss": 0.9228, + "learning_rate": 6.600116426755565e-06, + "loss": 0.9979, "step": 15900 }, { - "epoch": 0.4505936694153986, + "epoch": 0.6221535331403083, "grad_norm": 0.0, - "learning_rate": 1.207103659950632e-05, - "loss": 0.8702, + "learning_rate": 6.598924712351655e-06, + "loss": 0.9857, "step": 15901 }, { - "epoch": 0.450622006857661, + "epoch": 0.6221926598325378, "grad_norm": 0.0, - "learning_rate": 1.2070138700105346e-05, - "loss": 0.9279, + "learning_rate": 6.5977330525647495e-06, + "loss": 1.0133, "step": 15902 }, { - "epoch": 0.4506503442999235, + "epoch": 0.6222317865247672, "grad_norm": 0.0, - "learning_rate": 1.2069240783266822e-05, - "loss": 0.9618, + "learning_rate": 6.596541447413991e-06, + "loss": 1.155, "step": 15903 }, { - "epoch": 0.45067868174218595, + "epoch": 0.6222709132169967, "grad_norm": 0.0, - "learning_rate": 1.2068342848998314e-05, - "loss": 0.9484, + "learning_rate": 6.595349896918512e-06, + "loss": 1.0346, "step": 15904 }, { - "epoch": 0.4507070191844484, + "epoch": 0.6223100399092261, "grad_norm": 0.0, - "learning_rate": 1.2067444897307386e-05, - "loss": 0.9649, + "learning_rate": 6.594158401097449e-06, + "loss": 1.0177, "step": 15905 }, { - "epoch": 0.4507353566267109, + "epoch": 0.6223491666014556, "grad_norm": 0.0, - "learning_rate": 1.2066546928201602e-05, - "loss": 0.7873, + "learning_rate": 6.592966959969933e-06, + "loss": 0.9808, "step": 15906 }, { - "epoch": 0.4507636940689733, + "epoch": 0.622388293293685, "grad_norm": 0.0, - "learning_rate": 1.2065648941688528e-05, - "loss": 0.9371, + "learning_rate": 6.591775573555097e-06, + "loss": 0.9026, "step": 15907 }, { - "epoch": 0.4507920315112358, + "epoch": 0.6224274199859144, "grad_norm": 0.0, - "learning_rate": 1.2064750937775722e-05, - "loss": 1.0005, + "learning_rate": 6.590584241872075e-06, + "loss": 1.103, "step": 15908 }, { - "epoch": 0.45082036895349825, + "epoch": 0.6224665466781438, "grad_norm": 0.0, - "learning_rate": 1.2063852916470755e-05, - "loss": 1.015, + "learning_rate": 6.589392964939998e-06, + "loss": 1.0646, "step": 15909 }, { - "epoch": 0.45084870639576075, + "epoch": 0.6225056733703732, "grad_norm": 0.0, - "learning_rate": 1.206295487778119e-05, - "loss": 0.8575, + "learning_rate": 6.588201742777998e-06, + "loss": 1.0456, "step": 15910 }, { - "epoch": 0.4508770438380232, + "epoch": 0.6225448000626027, "grad_norm": 0.0, - "learning_rate": 1.2062056821714588e-05, - "loss": 0.9178, + "learning_rate": 6.587010575405199e-06, + "loss": 1.0159, "step": 15911 }, { - "epoch": 0.4509053812802856, + "epoch": 0.6225839267548321, "grad_norm": 0.0, - "learning_rate": 1.2061158748278512e-05, - "loss": 0.9866, + "learning_rate": 6.585819462840737e-06, + "loss": 0.9175, "step": 15912 }, { - "epoch": 0.4509337187225481, + "epoch": 0.6226230534470616, "grad_norm": 0.0, - "learning_rate": 1.2060260657480536e-05, - "loss": 0.9473, + "learning_rate": 6.584628405103737e-06, + "loss": 0.9226, "step": 15913 }, { - "epoch": 0.45096205616481055, + "epoch": 0.622662180139291, "grad_norm": 0.0, - "learning_rate": 1.205936254932822e-05, - "loss": 0.943, + "learning_rate": 6.5834374022133205e-06, + "loss": 0.8837, "step": 15914 }, { - "epoch": 0.45099039360707305, + "epoch": 0.6227013068315205, "grad_norm": 0.0, - "learning_rate": 1.2058464423829127e-05, - "loss": 0.8391, + "learning_rate": 6.582246454188621e-06, + "loss": 1.0241, "step": 15915 }, { - "epoch": 0.4510187310493355, + "epoch": 0.6227404335237499, "grad_norm": 0.0, - "learning_rate": 1.205756628099082e-05, - "loss": 0.8942, + "learning_rate": 6.58105556104876e-06, + "loss": 1.0451, "step": 15916 }, { - "epoch": 0.4510470684915979, + "epoch": 0.6227795602159794, "grad_norm": 0.0, - "learning_rate": 1.2056668120820871e-05, - "loss": 0.8257, + "learning_rate": 6.579864722812863e-06, + "loss": 1.0115, "step": 15917 }, { - "epoch": 0.4510754059338604, + "epoch": 0.6228186869082087, "grad_norm": 0.0, - "learning_rate": 1.2055769943326844e-05, - "loss": 0.9604, + "learning_rate": 6.578673939500054e-06, + "loss": 0.9978, "step": 15918 }, { - "epoch": 0.45110374337612286, + "epoch": 0.6228578136004382, "grad_norm": 0.0, - "learning_rate": 1.2054871748516301e-05, - "loss": 0.8708, + "learning_rate": 6.5774832111294515e-06, + "loss": 1.0499, "step": 15919 }, { - "epoch": 0.45113208081838535, + "epoch": 0.6228969402926676, "grad_norm": 0.0, - "learning_rate": 1.2053973536396812e-05, - "loss": 0.8766, + "learning_rate": 6.576292537720182e-06, + "loss": 1.1064, "step": 15920 }, { - "epoch": 0.4511604182606478, + "epoch": 0.6229360669848971, "grad_norm": 0.0, - "learning_rate": 1.205307530697594e-05, - "loss": 0.9378, + "learning_rate": 6.575101919291365e-06, + "loss": 0.9352, "step": 15921 }, { - "epoch": 0.4511887557029103, + "epoch": 0.6229751936771265, "grad_norm": 0.0, - "learning_rate": 1.2052177060261254e-05, - "loss": 0.9553, + "learning_rate": 6.573911355862118e-06, + "loss": 0.9856, "step": 15922 }, { - "epoch": 0.4512170931451727, + "epoch": 0.623014320369356, "grad_norm": 0.0, - "learning_rate": 1.2051278796260318e-05, - "loss": 0.9778, + "learning_rate": 6.572720847451564e-06, + "loss": 0.989, "step": 15923 }, { - "epoch": 0.45124543058743516, + "epoch": 0.6230534470615854, "grad_norm": 0.0, - "learning_rate": 1.2050380514980697e-05, - "loss": 0.812, + "learning_rate": 6.571530394078813e-06, + "loss": 1.1107, "step": 15924 }, { - "epoch": 0.45127376802969765, + "epoch": 0.6230925737538149, "grad_norm": 0.0, - "learning_rate": 1.204948221642996e-05, - "loss": 0.804, + "learning_rate": 6.570339995762991e-06, + "loss": 0.9074, "step": 15925 }, { - "epoch": 0.4513021054719601, + "epoch": 0.6231317004460443, "grad_norm": 0.0, - "learning_rate": 1.2048583900615674e-05, - "loss": 0.9165, + "learning_rate": 6.569149652523213e-06, + "loss": 0.9539, "step": 15926 }, { - "epoch": 0.4513304429142226, + "epoch": 0.6231708271382738, "grad_norm": 0.0, - "learning_rate": 1.2047685567545406e-05, - "loss": 0.8703, + "learning_rate": 6.567959364378591e-06, + "loss": 1.0271, "step": 15927 }, { - "epoch": 0.451358780356485, + "epoch": 0.6232099538305031, "grad_norm": 0.0, - "learning_rate": 1.2046787217226723e-05, - "loss": 0.9851, + "learning_rate": 6.566769131348242e-06, + "loss": 1.0583, "step": 15928 }, { - "epoch": 0.45138711779874746, + "epoch": 0.6232490805227326, "grad_norm": 0.0, - "learning_rate": 1.2045888849667187e-05, - "loss": 0.8454, + "learning_rate": 6.565578953451273e-06, + "loss": 1.046, "step": 15929 }, { - "epoch": 0.45141545524100996, + "epoch": 0.623288207214962, "grad_norm": 0.0, - "learning_rate": 1.2044990464874373e-05, - "loss": 0.925, + "learning_rate": 6.56438883070681e-06, + "loss": 0.9151, "step": 15930 }, { - "epoch": 0.4514437926832724, + "epoch": 0.6233273339071915, "grad_norm": 0.0, - "learning_rate": 1.2044092062855844e-05, - "loss": 0.8238, + "learning_rate": 6.563198763133953e-06, + "loss": 0.9921, "step": 15931 }, { - "epoch": 0.4514721301255349, + "epoch": 0.6233664605994209, "grad_norm": 0.0, - "learning_rate": 1.2043193643619168e-05, - "loss": 0.8787, + "learning_rate": 6.56200875075182e-06, + "loss": 0.9866, "step": 15932 }, { - "epoch": 0.4515004675677973, + "epoch": 0.6234055872916504, "grad_norm": 0.0, - "learning_rate": 1.2042295207171912e-05, - "loss": 0.8623, + "learning_rate": 6.5608187935795135e-06, + "loss": 0.9267, "step": 15933 }, { - "epoch": 0.4515288050100598, + "epoch": 0.6234447139838798, "grad_norm": 0.0, - "learning_rate": 1.2041396753521649e-05, - "loss": 0.7976, + "learning_rate": 6.559628891636152e-06, + "loss": 1.04, "step": 15934 }, { - "epoch": 0.45155714245232226, + "epoch": 0.6234838406761093, "grad_norm": 0.0, - "learning_rate": 1.204049828267594e-05, - "loss": 0.9113, + "learning_rate": 6.55843904494084e-06, + "loss": 0.9543, "step": 15935 }, { - "epoch": 0.4515854798945847, + "epoch": 0.6235229673683387, "grad_norm": 0.0, - "learning_rate": 1.2039599794642358e-05, - "loss": 0.8099, + "learning_rate": 6.557249253512683e-06, + "loss": 1.0098, "step": 15936 }, { - "epoch": 0.4516138173368472, + "epoch": 0.6235620940605682, "grad_norm": 0.0, - "learning_rate": 1.2038701289428468e-05, - "loss": 0.8628, + "learning_rate": 6.55605951737079e-06, + "loss": 0.9964, "step": 15937 }, { - "epoch": 0.45164215477910963, + "epoch": 0.6236012207527976, "grad_norm": 0.0, - "learning_rate": 1.203780276704184e-05, - "loss": 0.7835, + "learning_rate": 6.554869836534261e-06, + "loss": 0.9254, "step": 15938 }, { - "epoch": 0.4516704922213721, + "epoch": 0.6236403474450269, "grad_norm": 0.0, - "learning_rate": 1.2036904227490043e-05, - "loss": 0.9836, + "learning_rate": 6.553680211022211e-06, + "loss": 1.0323, "step": 15939 }, { - "epoch": 0.45169882966363456, + "epoch": 0.6236794741372564, "grad_norm": 0.0, - "learning_rate": 1.2036005670780646e-05, - "loss": 0.925, + "learning_rate": 6.552490640853737e-06, + "loss": 0.9607, "step": 15940 }, { - "epoch": 0.451727167105897, + "epoch": 0.6237186008294858, "grad_norm": 0.0, - "learning_rate": 1.2035107096921215e-05, - "loss": 0.8752, + "learning_rate": 6.551301126047943e-06, + "loss": 0.974, "step": 15941 }, { - "epoch": 0.4517555045481595, + "epoch": 0.6237577275217153, "grad_norm": 0.0, - "learning_rate": 1.2034208505919324e-05, - "loss": 0.9293, + "learning_rate": 6.550111666623929e-06, + "loss": 1.0066, "step": 15942 }, { - "epoch": 0.45178384199042193, + "epoch": 0.6237968542139447, "grad_norm": 0.0, - "learning_rate": 1.2033309897782538e-05, - "loss": 0.8958, + "learning_rate": 6.548922262600801e-06, + "loss": 1.1016, "step": 15943 }, { - "epoch": 0.4518121794326844, + "epoch": 0.6238359809061742, "grad_norm": 0.0, - "learning_rate": 1.2032411272518428e-05, - "loss": 1.0143, + "learning_rate": 6.547732913997658e-06, + "loss": 1.0617, "step": 15944 }, { - "epoch": 0.45184051687494686, + "epoch": 0.6238751075984036, "grad_norm": 0.0, - "learning_rate": 1.2031512630134562e-05, - "loss": 0.935, + "learning_rate": 6.546543620833598e-06, + "loss": 1.0399, "step": 15945 }, { - "epoch": 0.45186885431720936, + "epoch": 0.6239142342906331, "grad_norm": 0.0, - "learning_rate": 1.2030613970638512e-05, - "loss": 0.8683, + "learning_rate": 6.54535438312772e-06, + "loss": 1.0532, "step": 15946 }, { - "epoch": 0.4518971917594718, + "epoch": 0.6239533609828625, "grad_norm": 0.0, - "learning_rate": 1.2029715294037847e-05, - "loss": 0.9948, + "learning_rate": 6.544165200899118e-06, + "loss": 0.9602, "step": 15947 }, { - "epoch": 0.45192552920173423, + "epoch": 0.623992487675092, "grad_norm": 0.0, - "learning_rate": 1.2028816600340137e-05, - "loss": 0.9092, + "learning_rate": 6.542976074166896e-06, + "loss": 1.0429, "step": 15948 }, { - "epoch": 0.4519538666439967, + "epoch": 0.6240316143673214, "grad_norm": 0.0, - "learning_rate": 1.2027917889552951e-05, - "loss": 0.8984, + "learning_rate": 6.541787002950146e-06, + "loss": 1.0681, "step": 15949 }, { - "epoch": 0.45198220408625916, + "epoch": 0.6240707410595508, "grad_norm": 0.0, - "learning_rate": 1.2027019161683857e-05, - "loss": 0.9216, + "learning_rate": 6.540597987267965e-06, + "loss": 1.0444, "step": 15950 }, { - "epoch": 0.45201054152852166, + "epoch": 0.6241098677517802, "grad_norm": 0.0, - "learning_rate": 1.2026120416740428e-05, - "loss": 0.8632, + "learning_rate": 6.539409027139443e-06, + "loss": 1.0644, "step": 15951 }, { - "epoch": 0.4520388789707841, + "epoch": 0.6241489944440097, "grad_norm": 0.0, - "learning_rate": 1.2025221654730238e-05, - "loss": 0.8881, + "learning_rate": 6.538220122583674e-06, + "loss": 1.0091, "step": 15952 }, { - "epoch": 0.45206721641304654, + "epoch": 0.6241881211362391, "grad_norm": 0.0, - "learning_rate": 1.2024322875660853e-05, - "loss": 0.9427, + "learning_rate": 6.537031273619756e-06, + "loss": 0.9958, "step": 15953 }, { - "epoch": 0.45209555385530903, + "epoch": 0.6242272478284686, "grad_norm": 0.0, - "learning_rate": 1.2023424079539841e-05, - "loss": 0.9106, + "learning_rate": 6.535842480266776e-06, + "loss": 1.0227, "step": 15954 }, { - "epoch": 0.45212389129757147, + "epoch": 0.624266374520698, "grad_norm": 0.0, - "learning_rate": 1.2022525266374778e-05, - "loss": 0.9388, + "learning_rate": 6.5346537425438235e-06, + "loss": 1.0922, "step": 15955 }, { - "epoch": 0.45215222873983396, + "epoch": 0.6243055012129275, "grad_norm": 0.0, - "learning_rate": 1.2021626436173238e-05, - "loss": 0.9399, + "learning_rate": 6.533465060469989e-06, + "loss": 1.0261, "step": 15956 }, { - "epoch": 0.4521805661820964, + "epoch": 0.6243446279051569, "grad_norm": 0.0, - "learning_rate": 1.2020727588942783e-05, - "loss": 0.8932, + "learning_rate": 6.532276434064364e-06, + "loss": 1.0559, "step": 15957 }, { - "epoch": 0.45220890362435884, + "epoch": 0.6243837545973864, "grad_norm": 0.0, - "learning_rate": 1.2019828724690988e-05, - "loss": 0.8767, + "learning_rate": 6.5310878633460305e-06, + "loss": 1.0176, "step": 15958 }, { - "epoch": 0.45223724106662133, + "epoch": 0.6244228812896158, "grad_norm": 0.0, - "learning_rate": 1.201892984342543e-05, - "loss": 0.8317, + "learning_rate": 6.529899348334083e-06, + "loss": 1.035, "step": 15959 }, { - "epoch": 0.45226557850888377, + "epoch": 0.6244620079818453, "grad_norm": 0.0, - "learning_rate": 1.2018030945153674e-05, - "loss": 0.8916, + "learning_rate": 6.5287108890476006e-06, + "loss": 0.9873, "step": 15960 }, { - "epoch": 0.45229391595114626, + "epoch": 0.6245011346740746, "grad_norm": 0.0, - "learning_rate": 1.2017132029883297e-05, - "loss": 0.9695, + "learning_rate": 6.527522485505673e-06, + "loss": 0.9144, "step": 15961 }, { - "epoch": 0.4523222533934087, + "epoch": 0.6245402613663041, "grad_norm": 0.0, - "learning_rate": 1.2016233097621864e-05, - "loss": 0.9675, + "learning_rate": 6.526334137727384e-06, + "loss": 0.9499, "step": 15962 }, { - "epoch": 0.4523505908356712, + "epoch": 0.6245793880585335, "grad_norm": 0.0, - "learning_rate": 1.2015334148376951e-05, - "loss": 0.8441, + "learning_rate": 6.525145845731816e-06, + "loss": 1.0219, "step": 15963 }, { - "epoch": 0.45237892827793363, + "epoch": 0.624618514750763, "grad_norm": 0.0, - "learning_rate": 1.201443518215613e-05, - "loss": 1.0005, + "learning_rate": 6.523957609538049e-06, + "loss": 1.0253, "step": 15964 }, { - "epoch": 0.4524072657201961, + "epoch": 0.6246576414429924, "grad_norm": 0.0, - "learning_rate": 1.2013536198966977e-05, - "loss": 0.9208, + "learning_rate": 6.522769429165168e-06, + "loss": 1.0414, "step": 15965 }, { - "epoch": 0.45243560316245857, + "epoch": 0.6246967681352219, "grad_norm": 0.0, - "learning_rate": 1.2012637198817056e-05, - "loss": 1.0438, + "learning_rate": 6.521581304632254e-06, + "loss": 1.0703, "step": 15966 }, { - "epoch": 0.452463940604721, + "epoch": 0.6247358948274513, "grad_norm": 0.0, - "learning_rate": 1.2011738181713947e-05, - "loss": 0.8757, + "learning_rate": 6.520393235958387e-06, + "loss": 1.0316, "step": 15967 }, { - "epoch": 0.4524922780469835, + "epoch": 0.6247750215196807, "grad_norm": 0.0, - "learning_rate": 1.2010839147665222e-05, - "loss": 1.0094, + "learning_rate": 6.5192052231626415e-06, + "loss": 0.9325, "step": 15968 }, { - "epoch": 0.45252061548924594, + "epoch": 0.6248141482119102, "grad_norm": 0.0, - "learning_rate": 1.2009940096678451e-05, - "loss": 0.7423, + "learning_rate": 6.518017266264096e-06, + "loss": 1.0509, "step": 15969 }, { - "epoch": 0.4525489529315084, + "epoch": 0.6248532749041396, "grad_norm": 0.0, - "learning_rate": 1.2009041028761207e-05, - "loss": 0.8654, + "learning_rate": 6.516829365281834e-06, + "loss": 0.992, "step": 15970 }, { - "epoch": 0.45257729037377087, + "epoch": 0.624892401596369, "grad_norm": 0.0, - "learning_rate": 1.2008141943921063e-05, - "loss": 0.9039, + "learning_rate": 6.515641520234928e-06, + "loss": 1.0055, "step": 15971 }, { - "epoch": 0.4526056278160333, + "epoch": 0.6249315282885984, "grad_norm": 0.0, - "learning_rate": 1.2007242842165599e-05, - "loss": 0.8933, + "learning_rate": 6.514453731142454e-06, + "loss": 0.9911, "step": 15972 }, { - "epoch": 0.4526339652582958, + "epoch": 0.6249706549808279, "grad_norm": 0.0, - "learning_rate": 1.200634372350238e-05, - "loss": 0.8416, + "learning_rate": 6.513265998023484e-06, + "loss": 1.0225, "step": 15973 }, { - "epoch": 0.45266230270055824, + "epoch": 0.6250097816730573, "grad_norm": 0.0, - "learning_rate": 1.2005444587938981e-05, - "loss": 0.942, + "learning_rate": 6.512078320897088e-06, + "loss": 0.9539, "step": 15974 }, { - "epoch": 0.45269064014282073, + "epoch": 0.6250489083652868, "grad_norm": 0.0, - "learning_rate": 1.2004545435482979e-05, - "loss": 0.9835, + "learning_rate": 6.510890699782349e-06, + "loss": 0.9781, "step": 15975 }, { - "epoch": 0.45271897758508317, + "epoch": 0.6250880350575162, "grad_norm": 0.0, - "learning_rate": 1.2003646266141949e-05, - "loss": 0.9683, + "learning_rate": 6.509703134698333e-06, + "loss": 0.9384, "step": 15976 }, { - "epoch": 0.4527473150273456, + "epoch": 0.6251271617497457, "grad_norm": 0.0, - "learning_rate": 1.2002747079923459e-05, - "loss": 0.8935, + "learning_rate": 6.508515625664111e-06, + "loss": 1.0887, "step": 15977 }, { - "epoch": 0.4527756524696081, + "epoch": 0.6251662884419751, "grad_norm": 0.0, - "learning_rate": 1.200184787683509e-05, - "loss": 0.8456, + "learning_rate": 6.50732817269875e-06, + "loss": 0.9521, "step": 15978 }, { - "epoch": 0.45280398991187054, + "epoch": 0.6252054151342046, "grad_norm": 0.0, - "learning_rate": 1.2000948656884408e-05, - "loss": 0.8465, + "learning_rate": 6.506140775821326e-06, + "loss": 1.0176, "step": 15979 }, { - "epoch": 0.45283232735413304, + "epoch": 0.625244541826434, "grad_norm": 0.0, - "learning_rate": 1.2000049420078996e-05, - "loss": 0.9527, + "learning_rate": 6.504953435050902e-06, + "loss": 1.0703, "step": 15980 }, { - "epoch": 0.4528606647963955, + "epoch": 0.6252836685186635, "grad_norm": 0.0, - "learning_rate": 1.1999150166426426e-05, - "loss": 0.8966, + "learning_rate": 6.503766150406545e-06, + "loss": 1.0203, "step": 15981 }, { - "epoch": 0.4528890022386579, + "epoch": 0.6253227952108928, "grad_norm": 0.0, - "learning_rate": 1.1998250895934267e-05, - "loss": 0.8372, + "learning_rate": 6.502578921907321e-06, + "loss": 0.9147, "step": 15982 }, { - "epoch": 0.4529173396809204, + "epoch": 0.6253619219031223, "grad_norm": 0.0, - "learning_rate": 1.1997351608610102e-05, - "loss": 0.8178, + "learning_rate": 6.5013917495723e-06, + "loss": 1.0882, "step": 15983 }, { - "epoch": 0.45294567712318284, + "epoch": 0.6254010485953517, "grad_norm": 0.0, - "learning_rate": 1.1996452304461502e-05, - "loss": 0.8383, + "learning_rate": 6.500204633420544e-06, + "loss": 0.9211, "step": 15984 }, { - "epoch": 0.45297401456544534, + "epoch": 0.6254401752875812, "grad_norm": 0.0, - "learning_rate": 1.1995552983496043e-05, - "loss": 1.0205, + "learning_rate": 6.4990175734711156e-06, + "loss": 1.0219, "step": 15985 }, { - "epoch": 0.4530023520077078, + "epoch": 0.6254793019798106, "grad_norm": 0.0, - "learning_rate": 1.1994653645721304e-05, - "loss": 0.8902, + "learning_rate": 6.4978305697430775e-06, + "loss": 1.0114, "step": 15986 }, { - "epoch": 0.45303068944997027, + "epoch": 0.6255184286720401, "grad_norm": 0.0, - "learning_rate": 1.199375429114485e-05, - "loss": 0.948, + "learning_rate": 6.496643622255489e-06, + "loss": 0.9694, "step": 15987 }, { - "epoch": 0.4530590268922327, + "epoch": 0.6255575553642695, "grad_norm": 0.0, - "learning_rate": 1.1992854919774269e-05, - "loss": 0.8264, + "learning_rate": 6.495456731027417e-06, + "loss": 0.9257, "step": 15988 }, { - "epoch": 0.45308736433449515, + "epoch": 0.625596682056499, "grad_norm": 0.0, - "learning_rate": 1.1991955531617126e-05, - "loss": 0.8883, + "learning_rate": 6.494269896077919e-06, + "loss": 1.1044, "step": 15989 }, { - "epoch": 0.45311570177675764, + "epoch": 0.6256358087487284, "grad_norm": 0.0, - "learning_rate": 1.1991056126681005e-05, - "loss": 1.008, + "learning_rate": 6.4930831174260535e-06, + "loss": 0.9136, "step": 15990 }, { - "epoch": 0.4531440392190201, + "epoch": 0.6256749354409579, "grad_norm": 0.0, - "learning_rate": 1.1990156704973478e-05, - "loss": 0.8871, + "learning_rate": 6.491896395090875e-06, + "loss": 1.0498, "step": 15991 }, { - "epoch": 0.45317237666128257, + "epoch": 0.6257140621331873, "grad_norm": 0.0, - "learning_rate": 1.1989257266502121e-05, - "loss": 0.8676, + "learning_rate": 6.490709729091449e-06, + "loss": 0.9413, "step": 15992 }, { - "epoch": 0.453200714103545, + "epoch": 0.6257531888254168, "grad_norm": 0.0, - "learning_rate": 1.1988357811274514e-05, - "loss": 1.0244, + "learning_rate": 6.489523119446826e-06, + "loss": 1.0186, "step": 15993 }, { - "epoch": 0.45322905154580745, + "epoch": 0.6257923155176461, "grad_norm": 0.0, - "learning_rate": 1.198745833929823e-05, - "loss": 0.8786, + "learning_rate": 6.4883365661760635e-06, + "loss": 0.836, "step": 15994 }, { - "epoch": 0.45325738898806994, + "epoch": 0.6258314422098756, "grad_norm": 0.0, - "learning_rate": 1.1986558850580843e-05, - "loss": 0.799, + "learning_rate": 6.487150069298216e-06, + "loss": 1.0198, "step": 15995 }, { - "epoch": 0.4532857264303324, + "epoch": 0.625870568902105, "grad_norm": 0.0, - "learning_rate": 1.1985659345129936e-05, - "loss": 0.7992, + "learning_rate": 6.4859636288323336e-06, + "loss": 1.1603, "step": 15996 }, { - "epoch": 0.4533140638725949, + "epoch": 0.6259096955943344, "grad_norm": 0.0, - "learning_rate": 1.1984759822953083e-05, - "loss": 0.93, + "learning_rate": 6.484777244797472e-06, + "loss": 0.967, "step": 15997 }, { - "epoch": 0.4533424013148573, + "epoch": 0.6259488222865639, "grad_norm": 0.0, - "learning_rate": 1.1983860284057862e-05, - "loss": 1.0229, + "learning_rate": 6.483590917212686e-06, + "loss": 0.8976, "step": 15998 }, { - "epoch": 0.4533707387571198, + "epoch": 0.6259879489787933, "grad_norm": 0.0, - "learning_rate": 1.1982960728451847e-05, - "loss": 0.9218, + "learning_rate": 6.482404646097023e-06, + "loss": 0.9888, "step": 15999 }, { - "epoch": 0.45339907619938224, + "epoch": 0.6260270756710228, "grad_norm": 0.0, - "learning_rate": 1.1982061156142618e-05, - "loss": 0.8706, + "learning_rate": 6.48121843146953e-06, + "loss": 0.9592, "step": 16000 }, { - "epoch": 0.4534274136416447, + "epoch": 0.6260662023632522, "grad_norm": 0.0, - "learning_rate": 1.1981161567137754e-05, - "loss": 0.7808, + "learning_rate": 6.480032273349264e-06, + "loss": 1.004, "step": 16001 }, { - "epoch": 0.4534557510839072, + "epoch": 0.6261053290554817, "grad_norm": 0.0, - "learning_rate": 1.1980261961444826e-05, - "loss": 0.9306, + "learning_rate": 6.478846171755268e-06, + "loss": 1.016, "step": 16002 }, { - "epoch": 0.4534840885261696, + "epoch": 0.626144455747711, "grad_norm": 0.0, - "learning_rate": 1.1979362339071421e-05, - "loss": 0.8721, + "learning_rate": 6.477660126706588e-06, + "loss": 0.9677, "step": 16003 }, { - "epoch": 0.4535124259684321, + "epoch": 0.6261835824399405, "grad_norm": 0.0, - "learning_rate": 1.1978462700025109e-05, - "loss": 0.991, + "learning_rate": 6.476474138222275e-06, + "loss": 1.0622, "step": 16004 }, { - "epoch": 0.45354076341069455, + "epoch": 0.6262227091321699, "grad_norm": 0.0, - "learning_rate": 1.1977563044313471e-05, - "loss": 0.9902, + "learning_rate": 6.4752882063213705e-06, + "loss": 1.0114, "step": 16005 }, { - "epoch": 0.453569100852957, + "epoch": 0.6262618358243994, "grad_norm": 0.0, - "learning_rate": 1.1976663371944085e-05, - "loss": 0.8786, + "learning_rate": 6.474102331022922e-06, + "loss": 1.1185, "step": 16006 }, { - "epoch": 0.4535974382952195, + "epoch": 0.6263009625166288, "grad_norm": 0.0, - "learning_rate": 1.1975763682924532e-05, - "loss": 0.8515, + "learning_rate": 6.472916512345972e-06, + "loss": 0.9285, "step": 16007 }, { - "epoch": 0.4536257757374819, + "epoch": 0.6263400892088583, "grad_norm": 0.0, - "learning_rate": 1.1974863977262386e-05, - "loss": 0.8674, + "learning_rate": 6.471730750309564e-06, + "loss": 1.1323, "step": 16008 }, { - "epoch": 0.4536541131797444, + "epoch": 0.6263792159010877, "grad_norm": 0.0, - "learning_rate": 1.1973964254965224e-05, - "loss": 0.9705, + "learning_rate": 6.470545044932734e-06, + "loss": 0.9116, "step": 16009 }, { - "epoch": 0.45368245062200685, + "epoch": 0.6264183425933172, "grad_norm": 0.0, - "learning_rate": 1.1973064516040634e-05, - "loss": 0.8296, + "learning_rate": 6.4693593962345315e-06, + "loss": 1.0142, "step": 16010 }, { - "epoch": 0.45371078806426934, + "epoch": 0.6264574692855466, "grad_norm": 0.0, - "learning_rate": 1.1972164760496187e-05, - "loss": 0.8255, + "learning_rate": 6.468173804233993e-06, + "loss": 0.8548, "step": 16011 }, { - "epoch": 0.4537391255065318, + "epoch": 0.6264965959777761, "grad_norm": 0.0, - "learning_rate": 1.197126498833946e-05, - "loss": 0.8721, + "learning_rate": 6.466988268950159e-06, + "loss": 0.9907, "step": 16012 }, { - "epoch": 0.4537674629487942, + "epoch": 0.6265357226700055, "grad_norm": 0.0, - "learning_rate": 1.1970365199578043e-05, - "loss": 0.959, + "learning_rate": 6.465802790402065e-06, + "loss": 0.872, "step": 16013 }, { - "epoch": 0.4537958003910567, + "epoch": 0.626574849362235, "grad_norm": 0.0, - "learning_rate": 1.1969465394219503e-05, - "loss": 0.8485, + "learning_rate": 6.464617368608747e-06, + "loss": 0.9715, "step": 16014 }, { - "epoch": 0.45382413783331915, + "epoch": 0.6266139760544643, "grad_norm": 0.0, - "learning_rate": 1.1968565572271426e-05, - "loss": 0.7058, + "learning_rate": 6.463432003589247e-06, + "loss": 1.0211, "step": 16015 }, { - "epoch": 0.45385247527558165, + "epoch": 0.6266531027466938, "grad_norm": 0.0, - "learning_rate": 1.196766573374139e-05, - "loss": 1.0334, + "learning_rate": 6.462246695362597e-06, + "loss": 0.9928, "step": 16016 }, { - "epoch": 0.4538808127178441, + "epoch": 0.6266922294389232, "grad_norm": 0.0, - "learning_rate": 1.1966765878636974e-05, - "loss": 1.0446, + "learning_rate": 6.461061443947832e-06, + "loss": 1.0953, "step": 16017 }, { - "epoch": 0.4539091501601065, + "epoch": 0.6267313561311527, "grad_norm": 0.0, - "learning_rate": 1.1965866006965762e-05, - "loss": 0.9327, + "learning_rate": 6.459876249363983e-06, + "loss": 0.9517, "step": 16018 }, { - "epoch": 0.453937487602369, + "epoch": 0.6267704828233821, "grad_norm": 0.0, - "learning_rate": 1.196496611873533e-05, - "loss": 1.0987, + "learning_rate": 6.4586911116300885e-06, + "loss": 1.0454, "step": 16019 }, { - "epoch": 0.45396582504463145, + "epoch": 0.6268096095156116, "grad_norm": 0.0, - "learning_rate": 1.1964066213953256e-05, - "loss": 0.8869, + "learning_rate": 6.457506030765177e-06, + "loss": 1.1168, "step": 16020 }, { - "epoch": 0.45399416248689395, + "epoch": 0.626848736207841, "grad_norm": 0.0, - "learning_rate": 1.1963166292627125e-05, - "loss": 0.9589, + "learning_rate": 6.45632100678828e-06, + "loss": 1.0807, "step": 16021 }, { - "epoch": 0.4540224999291564, + "epoch": 0.6268878629000705, "grad_norm": 0.0, - "learning_rate": 1.1962266354764513e-05, - "loss": 0.9998, + "learning_rate": 6.455136039718428e-06, + "loss": 1.0438, "step": 16022 }, { - "epoch": 0.4540508373714189, + "epoch": 0.6269269895922999, "grad_norm": 0.0, - "learning_rate": 1.1961366400373005e-05, - "loss": 0.9937, + "learning_rate": 6.453951129574644e-06, + "loss": 0.9792, "step": 16023 }, { - "epoch": 0.4540791748136813, + "epoch": 0.6269661162845293, "grad_norm": 0.0, - "learning_rate": 1.1960466429460179e-05, - "loss": 0.9682, + "learning_rate": 6.452766276375966e-06, + "loss": 0.954, "step": 16024 }, { - "epoch": 0.45410751225594376, + "epoch": 0.6270052429767587, "grad_norm": 0.0, - "learning_rate": 1.1959566442033615e-05, - "loss": 0.9127, + "learning_rate": 6.4515814801414155e-06, + "loss": 0.9794, "step": 16025 }, { - "epoch": 0.45413584969820625, + "epoch": 0.6270443696689881, "grad_norm": 0.0, - "learning_rate": 1.1958666438100897e-05, - "loss": 0.8721, + "learning_rate": 6.450396740890021e-06, + "loss": 1.0998, "step": 16026 }, { - "epoch": 0.4541641871404687, + "epoch": 0.6270834963612176, "grad_norm": 0.0, - "learning_rate": 1.1957766417669605e-05, - "loss": 0.8615, + "learning_rate": 6.449212058640804e-06, + "loss": 0.988, "step": 16027 }, { - "epoch": 0.4541925245827312, + "epoch": 0.627122623053447, "grad_norm": 0.0, - "learning_rate": 1.1956866380747316e-05, - "loss": 0.8828, + "learning_rate": 6.448027433412794e-06, + "loss": 0.9595, "step": 16028 }, { - "epoch": 0.4542208620249936, + "epoch": 0.6271617497456765, "grad_norm": 0.0, - "learning_rate": 1.1955966327341614e-05, - "loss": 0.8864, + "learning_rate": 6.446842865225014e-06, + "loss": 0.9421, "step": 16029 }, { - "epoch": 0.45424919946725606, + "epoch": 0.6272008764379059, "grad_norm": 0.0, - "learning_rate": 1.1955066257460086e-05, - "loss": 0.7939, + "learning_rate": 6.445658354096484e-06, + "loss": 1.0643, "step": 16030 }, { - "epoch": 0.45427753690951855, + "epoch": 0.6272400031301354, "grad_norm": 0.0, - "learning_rate": 1.1954166171110308e-05, - "loss": 0.9968, + "learning_rate": 6.444473900046229e-06, + "loss": 1.0347, "step": 16031 }, { - "epoch": 0.454305874351781, + "epoch": 0.6272791298223648, "grad_norm": 0.0, - "learning_rate": 1.1953266068299863e-05, - "loss": 0.851, + "learning_rate": 6.443289503093262e-06, + "loss": 1.0892, "step": 16032 }, { - "epoch": 0.4543342117940435, + "epoch": 0.6273182565145943, "grad_norm": 0.0, - "learning_rate": 1.195236594903633e-05, - "loss": 0.8604, + "learning_rate": 6.442105163256614e-06, + "loss": 0.9782, "step": 16033 }, { - "epoch": 0.4543625492363059, + "epoch": 0.6273573832068237, "grad_norm": 0.0, - "learning_rate": 1.1951465813327294e-05, - "loss": 0.8206, + "learning_rate": 6.440920880555297e-06, + "loss": 1.0042, "step": 16034 }, { - "epoch": 0.4543908866785684, + "epoch": 0.6273965098990532, "grad_norm": 0.0, - "learning_rate": 1.1950565661180337e-05, - "loss": 0.9493, + "learning_rate": 6.4397366550083314e-06, + "loss": 1.155, "step": 16035 }, { - "epoch": 0.45441922412083086, + "epoch": 0.6274356365912825, "grad_norm": 0.0, - "learning_rate": 1.1949665492603045e-05, - "loss": 0.9279, + "learning_rate": 6.43855248663473e-06, + "loss": 1.0143, "step": 16036 }, { - "epoch": 0.4544475615630933, + "epoch": 0.627474763283512, "grad_norm": 0.0, - "learning_rate": 1.194876530760299e-05, - "loss": 0.894, + "learning_rate": 6.437368375453517e-06, + "loss": 0.9019, "step": 16037 }, { - "epoch": 0.4544758990053558, + "epoch": 0.6275138899757414, "grad_norm": 0.0, - "learning_rate": 1.1947865106187766e-05, - "loss": 0.8639, + "learning_rate": 6.436184321483705e-06, + "loss": 1.0129, "step": 16038 }, { - "epoch": 0.4545042364476182, + "epoch": 0.6275530166679709, "grad_norm": 0.0, - "learning_rate": 1.1946964888364949e-05, - "loss": 0.9453, + "learning_rate": 6.435000324744306e-06, + "loss": 0.9596, "step": 16039 }, { - "epoch": 0.4545325738898807, + "epoch": 0.6275921433602003, "grad_norm": 0.0, - "learning_rate": 1.1946064654142124e-05, - "loss": 0.8707, + "learning_rate": 6.433816385254332e-06, + "loss": 0.9971, "step": 16040 }, { - "epoch": 0.45456091133214316, + "epoch": 0.6276312700524298, "grad_norm": 0.0, - "learning_rate": 1.1945164403526874e-05, - "loss": 0.9455, + "learning_rate": 6.4326325030328e-06, + "loss": 0.9771, "step": 16041 }, { - "epoch": 0.4545892487744056, + "epoch": 0.6276703967446592, "grad_norm": 0.0, - "learning_rate": 1.194426413652678e-05, - "loss": 0.8567, + "learning_rate": 6.43144867809872e-06, + "loss": 0.9852, "step": 16042 }, { - "epoch": 0.4546175862166681, + "epoch": 0.6277095234368887, "grad_norm": 0.0, - "learning_rate": 1.194336385314943e-05, - "loss": 0.8056, + "learning_rate": 6.430264910471104e-06, + "loss": 1.1017, "step": 16043 }, { - "epoch": 0.45464592365893053, + "epoch": 0.6277486501291181, "grad_norm": 0.0, - "learning_rate": 1.1942463553402407e-05, - "loss": 0.873, + "learning_rate": 6.429081200168958e-06, + "loss": 1.0986, "step": 16044 }, { - "epoch": 0.454674261101193, + "epoch": 0.6277877768213476, "grad_norm": 0.0, - "learning_rate": 1.1941563237293285e-05, - "loss": 0.996, + "learning_rate": 6.4278975472112945e-06, + "loss": 0.9375, "step": 16045 }, { - "epoch": 0.45470259854345546, + "epoch": 0.627826903513577, "grad_norm": 0.0, - "learning_rate": 1.1940662904829661e-05, - "loss": 0.8824, + "learning_rate": 6.42671395161712e-06, + "loss": 1.0563, "step": 16046 }, { - "epoch": 0.45473093598571795, + "epoch": 0.6278660302058064, "grad_norm": 0.0, - "learning_rate": 1.193976255601911e-05, - "loss": 0.8484, + "learning_rate": 6.425530413405443e-06, + "loss": 0.9373, "step": 16047 }, { - "epoch": 0.4547592734279804, + "epoch": 0.6279051568980358, "grad_norm": 0.0, - "learning_rate": 1.1938862190869218e-05, - "loss": 0.8727, + "learning_rate": 6.424346932595267e-06, + "loss": 1.061, "step": 16048 }, { - "epoch": 0.45478761087024283, + "epoch": 0.6279442835902653, "grad_norm": 0.0, - "learning_rate": 1.1937961809387569e-05, - "loss": 0.9152, + "learning_rate": 6.423163509205596e-06, + "loss": 0.9518, "step": 16049 }, { - "epoch": 0.4548159483125053, + "epoch": 0.6279834102824947, "grad_norm": 0.0, - "learning_rate": 1.1937061411581752e-05, - "loss": 0.9261, + "learning_rate": 6.421980143255442e-06, + "loss": 1.0548, "step": 16050 }, { - "epoch": 0.45484428575476776, + "epoch": 0.6280225369747242, "grad_norm": 0.0, - "learning_rate": 1.1936160997459344e-05, - "loss": 0.875, + "learning_rate": 6.420796834763802e-06, + "loss": 1.0944, "step": 16051 }, { - "epoch": 0.45487262319703026, + "epoch": 0.6280616636669536, "grad_norm": 0.0, - "learning_rate": 1.1935260567027936e-05, - "loss": 0.9808, + "learning_rate": 6.419613583749679e-06, + "loss": 1.0241, "step": 16052 }, { - "epoch": 0.4549009606392927, + "epoch": 0.628100790359183, "grad_norm": 0.0, - "learning_rate": 1.1934360120295105e-05, - "loss": 0.9431, + "learning_rate": 6.418430390232076e-06, + "loss": 1.0797, "step": 16053 }, { - "epoch": 0.45492929808155513, + "epoch": 0.6281399170514125, "grad_norm": 0.0, - "learning_rate": 1.1933459657268444e-05, - "loss": 0.8605, + "learning_rate": 6.4172472542299905e-06, + "loss": 1.0175, "step": 16054 }, { - "epoch": 0.4549576355238176, + "epoch": 0.6281790437436419, "grad_norm": 0.0, - "learning_rate": 1.1932559177955533e-05, - "loss": 0.8879, + "learning_rate": 6.416064175762428e-06, + "loss": 0.9451, "step": 16055 }, { - "epoch": 0.45498597296608007, + "epoch": 0.6282181704358714, "grad_norm": 0.0, - "learning_rate": 1.193165868236396e-05, - "loss": 0.8257, + "learning_rate": 6.414881154848383e-06, + "loss": 0.9606, "step": 16056 }, { - "epoch": 0.45501431040834256, + "epoch": 0.6282572971281007, "grad_norm": 0.0, - "learning_rate": 1.1930758170501306e-05, - "loss": 0.9856, + "learning_rate": 6.413698191506855e-06, + "loss": 1.0303, "step": 16057 }, { - "epoch": 0.455042647850605, + "epoch": 0.6282964238203302, "grad_norm": 0.0, - "learning_rate": 1.1929857642375162e-05, - "loss": 0.9903, + "learning_rate": 6.412515285756836e-06, + "loss": 1.0866, "step": 16058 }, { - "epoch": 0.4550709852928675, + "epoch": 0.6283355505125596, "grad_norm": 0.0, - "learning_rate": 1.192895709799311e-05, - "loss": 1.0271, + "learning_rate": 6.41133243761733e-06, + "loss": 1.0291, "step": 16059 }, { - "epoch": 0.45509932273512993, + "epoch": 0.6283746772047891, "grad_norm": 0.0, - "learning_rate": 1.1928056537362736e-05, - "loss": 0.9309, + "learning_rate": 6.4101496471073284e-06, + "loss": 1.0421, "step": 16060 }, { - "epoch": 0.45512766017739237, + "epoch": 0.6284138038970185, "grad_norm": 0.0, - "learning_rate": 1.1927155960491623e-05, - "loss": 0.8491, + "learning_rate": 6.408966914245824e-06, + "loss": 0.965, "step": 16061 }, { - "epoch": 0.45515599761965486, + "epoch": 0.628452930589248, "grad_norm": 0.0, - "learning_rate": 1.1926255367387361e-05, - "loss": 0.971, + "learning_rate": 6.407784239051813e-06, + "loss": 0.9789, "step": 16062 }, { - "epoch": 0.4551843350619173, + "epoch": 0.6284920572814774, "grad_norm": 0.0, - "learning_rate": 1.1925354758057535e-05, - "loss": 0.9035, + "learning_rate": 6.406601621544282e-06, + "loss": 1.0038, "step": 16063 }, { - "epoch": 0.4552126725041798, + "epoch": 0.6285311839737069, "grad_norm": 0.0, - "learning_rate": 1.1924454132509733e-05, - "loss": 0.8164, + "learning_rate": 6.405419061742229e-06, + "loss": 1.0609, "step": 16064 }, { - "epoch": 0.45524100994644223, + "epoch": 0.6285703106659363, "grad_norm": 0.0, - "learning_rate": 1.1923553490751539e-05, - "loss": 0.8525, + "learning_rate": 6.40423655966464e-06, + "loss": 0.9911, "step": 16065 }, { - "epoch": 0.45526934738870467, + "epoch": 0.6286094373581658, "grad_norm": 0.0, - "learning_rate": 1.1922652832790536e-05, - "loss": 0.8377, + "learning_rate": 6.403054115330509e-06, + "loss": 1.0623, "step": 16066 }, { - "epoch": 0.45529768483096716, + "epoch": 0.6286485640503952, "grad_norm": 0.0, - "learning_rate": 1.1921752158634316e-05, - "loss": 0.8116, + "learning_rate": 6.401871728758816e-06, + "loss": 1.0312, "step": 16067 }, { - "epoch": 0.4553260222732296, + "epoch": 0.6286876907426246, "grad_norm": 0.0, - "learning_rate": 1.1920851468290465e-05, - "loss": 0.8761, + "learning_rate": 6.400689399968559e-06, + "loss": 1.0137, "step": 16068 }, { - "epoch": 0.4553543597154921, + "epoch": 0.628726817434854, "grad_norm": 0.0, - "learning_rate": 1.1919950761766568e-05, - "loss": 0.863, + "learning_rate": 6.39950712897872e-06, + "loss": 0.965, "step": 16069 }, { - "epoch": 0.45538269715775453, + "epoch": 0.6287659441270835, "grad_norm": 0.0, - "learning_rate": 1.191905003907021e-05, - "loss": 0.8622, + "learning_rate": 6.398324915808285e-06, + "loss": 0.9839, "step": 16070 }, { - "epoch": 0.45541103460001703, + "epoch": 0.6288050708193129, "grad_norm": 0.0, - "learning_rate": 1.1918149300208986e-05, - "loss": 0.9459, + "learning_rate": 6.397142760476239e-06, + "loss": 0.9818, "step": 16071 }, { - "epoch": 0.45543937204227947, + "epoch": 0.6288441975115424, "grad_norm": 0.0, - "learning_rate": 1.1917248545190476e-05, - "loss": 0.8581, + "learning_rate": 6.395960663001561e-06, + "loss": 1.0377, "step": 16072 }, { - "epoch": 0.4554677094845419, + "epoch": 0.6288833242037718, "grad_norm": 0.0, - "learning_rate": 1.191634777402227e-05, - "loss": 0.9023, + "learning_rate": 6.394778623403241e-06, + "loss": 0.9784, "step": 16073 }, { - "epoch": 0.4554960469268044, + "epoch": 0.6289224508960013, "grad_norm": 0.0, - "learning_rate": 1.1915446986711953e-05, - "loss": 0.9535, + "learning_rate": 6.393596641700262e-06, + "loss": 1.0571, "step": 16074 }, { - "epoch": 0.45552438436906684, + "epoch": 0.6289615775882307, "grad_norm": 0.0, - "learning_rate": 1.1914546183267115e-05, - "loss": 0.9868, + "learning_rate": 6.392414717911599e-06, + "loss": 1.0916, "step": 16075 }, { - "epoch": 0.45555272181132933, + "epoch": 0.6290007042804602, "grad_norm": 0.0, - "learning_rate": 1.1913645363695345e-05, - "loss": 1.0224, + "learning_rate": 6.391232852056233e-06, + "loss": 1.046, "step": 16076 }, { - "epoch": 0.45558105925359177, + "epoch": 0.6290398309726896, "grad_norm": 0.0, - "learning_rate": 1.1912744528004233e-05, - "loss": 0.9698, + "learning_rate": 6.390051044153149e-06, + "loss": 0.9133, "step": 16077 }, { - "epoch": 0.4556093966958542, + "epoch": 0.6290789576649191, "grad_norm": 0.0, - "learning_rate": 1.1911843676201356e-05, - "loss": 0.9394, + "learning_rate": 6.388869294221322e-06, + "loss": 1.0619, "step": 16078 }, { - "epoch": 0.4556377341381167, + "epoch": 0.6291180843571484, "grad_norm": 0.0, - "learning_rate": 1.1910942808294315e-05, - "loss": 0.8529, + "learning_rate": 6.387687602279729e-06, + "loss": 0.9836, "step": 16079 }, { - "epoch": 0.45566607158037914, + "epoch": 0.6291572110493779, "grad_norm": 0.0, - "learning_rate": 1.1910041924290692e-05, - "loss": 0.8785, + "learning_rate": 6.3865059683473456e-06, + "loss": 0.9947, "step": 16080 }, { - "epoch": 0.45569440902264163, + "epoch": 0.6291963377416073, "grad_norm": 0.0, - "learning_rate": 1.1909141024198076e-05, - "loss": 0.8729, + "learning_rate": 6.385324392443144e-06, + "loss": 1.0653, "step": 16081 }, { - "epoch": 0.45572274646490407, + "epoch": 0.6292354644338367, "grad_norm": 0.0, - "learning_rate": 1.1908240108024053e-05, - "loss": 0.9141, + "learning_rate": 6.384142874586108e-06, + "loss": 0.9482, "step": 16082 }, { - "epoch": 0.45575108390716657, + "epoch": 0.6292745911260662, "grad_norm": 0.0, - "learning_rate": 1.1907339175776222e-05, - "loss": 0.8034, + "learning_rate": 6.382961414795208e-06, + "loss": 1.0212, "step": 16083 }, { - "epoch": 0.455779421349429, + "epoch": 0.6293137178182956, "grad_norm": 0.0, - "learning_rate": 1.1906438227462162e-05, - "loss": 0.9013, + "learning_rate": 6.3817800130894135e-06, + "loss": 1.0184, "step": 16084 }, { - "epoch": 0.45580775879169144, + "epoch": 0.6293528445105251, "grad_norm": 0.0, - "learning_rate": 1.1905537263089464e-05, - "loss": 0.8752, + "learning_rate": 6.380598669487697e-06, + "loss": 1.0645, "step": 16085 }, { - "epoch": 0.45583609623395394, + "epoch": 0.6293919712027545, "grad_norm": 0.0, - "learning_rate": 1.1904636282665718e-05, - "loss": 0.8646, + "learning_rate": 6.379417384009031e-06, + "loss": 1.0189, "step": 16086 }, { - "epoch": 0.4558644336762164, + "epoch": 0.629431097894984, "grad_norm": 0.0, - "learning_rate": 1.1903735286198514e-05, - "loss": 0.8929, + "learning_rate": 6.378236156672384e-06, + "loss": 0.9919, "step": 16087 }, { - "epoch": 0.45589277111847887, + "epoch": 0.6294702245872134, "grad_norm": 0.0, - "learning_rate": 1.1902834273695438e-05, - "loss": 0.8952, + "learning_rate": 6.3770549874967295e-06, + "loss": 0.9966, "step": 16088 }, { - "epoch": 0.4559211085607413, + "epoch": 0.6295093512794429, "grad_norm": 0.0, - "learning_rate": 1.1901933245164085e-05, - "loss": 0.9343, + "learning_rate": 6.37587387650103e-06, + "loss": 0.8989, "step": 16089 }, { - "epoch": 0.45594944600300374, + "epoch": 0.6295484779716722, "grad_norm": 0.0, - "learning_rate": 1.1901032200612043e-05, - "loss": 0.9105, + "learning_rate": 6.374692823704254e-06, + "loss": 0.9993, "step": 16090 }, { - "epoch": 0.45597778344526624, + "epoch": 0.6295876046639017, "grad_norm": 0.0, - "learning_rate": 1.19001311400469e-05, - "loss": 1.0044, + "learning_rate": 6.37351182912537e-06, + "loss": 0.9843, "step": 16091 }, { - "epoch": 0.4560061208875287, + "epoch": 0.6296267313561311, "grad_norm": 0.0, - "learning_rate": 1.1899230063476246e-05, - "loss": 0.8618, + "learning_rate": 6.372330892783338e-06, + "loss": 1.0389, "step": 16092 }, { - "epoch": 0.45603445832979117, + "epoch": 0.6296658580483606, "grad_norm": 0.0, - "learning_rate": 1.1898328970907671e-05, - "loss": 1.0372, + "learning_rate": 6.37115001469713e-06, + "loss": 1.0863, "step": 16093 }, { - "epoch": 0.4560627957720536, + "epoch": 0.62970498474059, "grad_norm": 0.0, - "learning_rate": 1.1897427862348768e-05, - "loss": 0.9026, + "learning_rate": 6.369969194885703e-06, + "loss": 1.0067, "step": 16094 }, { - "epoch": 0.4560911332143161, + "epoch": 0.6297441114328195, "grad_norm": 0.0, - "learning_rate": 1.1896526737807124e-05, - "loss": 0.9729, + "learning_rate": 6.3687884333680246e-06, + "loss": 1.1848, "step": 16095 }, { - "epoch": 0.45611947065657854, + "epoch": 0.6297832381250489, "grad_norm": 0.0, - "learning_rate": 1.1895625597290333e-05, - "loss": 0.93, + "learning_rate": 6.367607730163054e-06, + "loss": 0.9759, "step": 16096 }, { - "epoch": 0.456147808098841, + "epoch": 0.6298223648172784, "grad_norm": 0.0, - "learning_rate": 1.1894724440805982e-05, - "loss": 0.9549, + "learning_rate": 6.366427085289752e-06, + "loss": 1.117, "step": 16097 }, { - "epoch": 0.4561761455411035, + "epoch": 0.6298614915095078, "grad_norm": 0.0, - "learning_rate": 1.1893823268361667e-05, - "loss": 0.8341, + "learning_rate": 6.3652464987670726e-06, + "loss": 1.0203, "step": 16098 }, { - "epoch": 0.4562044829833659, + "epoch": 0.6299006182017373, "grad_norm": 0.0, - "learning_rate": 1.189292207996497e-05, - "loss": 0.8259, + "learning_rate": 6.364065970613984e-06, + "loss": 0.9505, "step": 16099 }, { - "epoch": 0.4562328204256284, + "epoch": 0.6299397448939666, "grad_norm": 0.0, - "learning_rate": 1.189202087562349e-05, - "loss": 0.8616, + "learning_rate": 6.36288550084944e-06, + "loss": 1.0341, "step": 16100 }, { - "epoch": 0.45626115786789084, + "epoch": 0.6299788715861961, "grad_norm": 0.0, - "learning_rate": 1.1891119655344815e-05, - "loss": 0.9559, + "learning_rate": 6.361705089492398e-06, + "loss": 1.0213, "step": 16101 }, { - "epoch": 0.4562894953101533, + "epoch": 0.6300179982784255, "grad_norm": 0.0, - "learning_rate": 1.1890218419136538e-05, - "loss": 0.8984, + "learning_rate": 6.360524736561813e-06, + "loss": 1.0201, "step": 16102 }, { - "epoch": 0.4563178327524158, + "epoch": 0.630057124970655, "grad_norm": 0.0, - "learning_rate": 1.1889317167006247e-05, - "loss": 0.8753, + "learning_rate": 6.359344442076636e-06, + "loss": 1.0513, "step": 16103 }, { - "epoch": 0.4563461701946782, + "epoch": 0.6300962516628844, "grad_norm": 0.0, - "learning_rate": 1.1888415898961538e-05, - "loss": 0.8469, + "learning_rate": 6.358164206055829e-06, + "loss": 1.0346, "step": 16104 }, { - "epoch": 0.4563745076369407, + "epoch": 0.6301353783551139, "grad_norm": 0.0, - "learning_rate": 1.188751461501e-05, - "loss": 1.0107, + "learning_rate": 6.356984028518341e-06, + "loss": 0.9529, "step": 16105 }, { - "epoch": 0.45640284507920315, + "epoch": 0.6301745050473433, "grad_norm": 0.0, - "learning_rate": 1.1886613315159226e-05, - "loss": 0.8968, + "learning_rate": 6.355803909483125e-06, + "loss": 0.9538, "step": 16106 }, { - "epoch": 0.45643118252146564, + "epoch": 0.6302136317395728, "grad_norm": 0.0, - "learning_rate": 1.1885711999416804e-05, - "loss": 0.9781, + "learning_rate": 6.354623848969128e-06, + "loss": 0.9014, "step": 16107 }, { - "epoch": 0.4564595199637281, + "epoch": 0.6302527584318022, "grad_norm": 0.0, - "learning_rate": 1.1884810667790334e-05, - "loss": 0.9047, + "learning_rate": 6.353443846995309e-06, + "loss": 0.9243, "step": 16108 }, { - "epoch": 0.4564878574059905, + "epoch": 0.6302918851240316, "grad_norm": 0.0, - "learning_rate": 1.1883909320287406e-05, - "loss": 0.9324, + "learning_rate": 6.352263903580611e-06, + "loss": 0.9463, "step": 16109 }, { - "epoch": 0.456516194848253, + "epoch": 0.630331011816261, "grad_norm": 0.0, - "learning_rate": 1.1883007956915606e-05, - "loss": 0.8895, + "learning_rate": 6.351084018743984e-06, + "loss": 0.9221, "step": 16110 }, { - "epoch": 0.45654453229051545, + "epoch": 0.6303701385084904, "grad_norm": 0.0, - "learning_rate": 1.188210657768253e-05, - "loss": 0.9059, + "learning_rate": 6.349904192504375e-06, + "loss": 0.9879, "step": 16111 }, { - "epoch": 0.45657286973277794, + "epoch": 0.6304092652007199, "grad_norm": 0.0, - "learning_rate": 1.1881205182595774e-05, - "loss": 0.9421, + "learning_rate": 6.3487244248807275e-06, + "loss": 1.0399, "step": 16112 }, { - "epoch": 0.4566012071750404, + "epoch": 0.6304483918929493, "grad_norm": 0.0, - "learning_rate": 1.1880303771662926e-05, - "loss": 0.9235, + "learning_rate": 6.347544715891995e-06, + "loss": 0.8994, "step": 16113 }, { - "epoch": 0.4566295446173028, + "epoch": 0.6304875185851788, "grad_norm": 0.0, - "learning_rate": 1.1879402344891581e-05, - "loss": 0.8156, + "learning_rate": 6.346365065557116e-06, + "loss": 0.8392, "step": 16114 }, { - "epoch": 0.4566578820595653, + "epoch": 0.6305266452774082, "grad_norm": 0.0, - "learning_rate": 1.1878500902289335e-05, - "loss": 0.8903, + "learning_rate": 6.345185473895037e-06, + "loss": 0.9848, "step": 16115 }, { - "epoch": 0.45668621950182775, + "epoch": 0.6305657719696377, "grad_norm": 0.0, - "learning_rate": 1.1877599443863777e-05, - "loss": 0.8981, + "learning_rate": 6.344005940924695e-06, + "loss": 1.1571, "step": 16116 }, { - "epoch": 0.45671455694409024, + "epoch": 0.6306048986618671, "grad_norm": 0.0, - "learning_rate": 1.1876697969622502e-05, - "loss": 0.8897, + "learning_rate": 6.342826466665041e-06, + "loss": 1.1139, "step": 16117 }, { - "epoch": 0.4567428943863527, + "epoch": 0.6306440253540966, "grad_norm": 0.0, - "learning_rate": 1.1875796479573104e-05, - "loss": 0.8852, + "learning_rate": 6.341647051135011e-06, + "loss": 1.0972, "step": 16118 }, { - "epoch": 0.4567712318286152, + "epoch": 0.630683152046326, "grad_norm": 0.0, - "learning_rate": 1.1874894973723173e-05, - "loss": 0.8961, + "learning_rate": 6.340467694353545e-06, + "loss": 1.0001, "step": 16119 }, { - "epoch": 0.4567995692708776, + "epoch": 0.6307222787385555, "grad_norm": 0.0, - "learning_rate": 1.1873993452080304e-05, - "loss": 0.943, + "learning_rate": 6.3392883963395826e-06, + "loss": 0.9645, "step": 16120 }, { - "epoch": 0.45682790671314005, + "epoch": 0.6307614054307848, "grad_norm": 0.0, - "learning_rate": 1.1873091914652096e-05, - "loss": 0.747, + "learning_rate": 6.338109157112055e-06, + "loss": 1.0274, "step": 16121 }, { - "epoch": 0.45685624415540255, + "epoch": 0.6308005321230143, "grad_norm": 0.0, - "learning_rate": 1.1872190361446139e-05, - "loss": 0.9544, + "learning_rate": 6.336929976689914e-06, + "loss": 1.049, "step": 16122 }, { - "epoch": 0.456884581597665, + "epoch": 0.6308396588152437, "grad_norm": 0.0, - "learning_rate": 1.1871288792470026e-05, - "loss": 0.9588, + "learning_rate": 6.335750855092084e-06, + "loss": 0.9929, "step": 16123 }, { - "epoch": 0.4569129190399275, + "epoch": 0.6308787855074732, "grad_norm": 0.0, - "learning_rate": 1.1870387207731353e-05, - "loss": 0.8467, + "learning_rate": 6.334571792337505e-06, + "loss": 1.0141, "step": 16124 }, { - "epoch": 0.4569412564821899, + "epoch": 0.6309179121997026, "grad_norm": 0.0, - "learning_rate": 1.1869485607237714e-05, - "loss": 0.7643, + "learning_rate": 6.333392788445106e-06, + "loss": 1.0708, "step": 16125 }, { - "epoch": 0.45696959392445236, + "epoch": 0.6309570388919321, "grad_norm": 0.0, - "learning_rate": 1.1868583990996702e-05, - "loss": 0.9395, + "learning_rate": 6.332213843433829e-06, + "loss": 0.9851, "step": 16126 }, { - "epoch": 0.45699793136671485, + "epoch": 0.6309961655841615, "grad_norm": 0.0, - "learning_rate": 1.1867682359015915e-05, - "loss": 0.7845, + "learning_rate": 6.331034957322601e-06, + "loss": 1.0234, "step": 16127 }, { - "epoch": 0.4570262688089773, + "epoch": 0.631035292276391, "grad_norm": 0.0, - "learning_rate": 1.1866780711302946e-05, - "loss": 0.9131, + "learning_rate": 6.329856130130354e-06, + "loss": 0.9099, "step": 16128 }, { - "epoch": 0.4570546062512398, + "epoch": 0.6310744189686204, "grad_norm": 0.0, - "learning_rate": 1.186587904786539e-05, - "loss": 0.9476, + "learning_rate": 6.328677361876019e-06, + "loss": 1.1183, "step": 16129 }, { - "epoch": 0.4570829436935022, + "epoch": 0.6311135456608499, "grad_norm": 0.0, - "learning_rate": 1.1864977368710841e-05, - "loss": 0.9056, + "learning_rate": 6.327498652578522e-06, + "loss": 0.874, "step": 16130 }, { - "epoch": 0.4571112811357647, + "epoch": 0.6311526723530793, "grad_norm": 0.0, - "learning_rate": 1.1864075673846897e-05, - "loss": 0.8388, + "learning_rate": 6.326320002256796e-06, + "loss": 0.8577, "step": 16131 }, { - "epoch": 0.45713961857802715, + "epoch": 0.6311917990453088, "grad_norm": 0.0, - "learning_rate": 1.186317396328115e-05, - "loss": 1.066, + "learning_rate": 6.325141410929769e-06, + "loss": 1.0115, "step": 16132 }, { - "epoch": 0.4571679560202896, + "epoch": 0.6312309257375381, "grad_norm": 0.0, - "learning_rate": 1.1862272237021193e-05, - "loss": 0.974, + "learning_rate": 6.323962878616366e-06, + "loss": 0.8949, "step": 16133 }, { - "epoch": 0.4571962934625521, + "epoch": 0.6312700524297676, "grad_norm": 0.0, - "learning_rate": 1.1861370495074631e-05, - "loss": 0.9903, + "learning_rate": 6.3227844053355105e-06, + "loss": 1.0025, "step": 16134 }, { - "epoch": 0.4572246309048145, + "epoch": 0.631309179121997, "grad_norm": 0.0, - "learning_rate": 1.1860468737449054e-05, - "loss": 0.9017, + "learning_rate": 6.321605991106132e-06, + "loss": 0.9573, "step": 16135 }, { - "epoch": 0.457252968347077, + "epoch": 0.6313483058142265, "grad_norm": 0.0, - "learning_rate": 1.1859566964152053e-05, - "loss": 0.875, + "learning_rate": 6.3204276359471515e-06, + "loss": 0.8552, "step": 16136 }, { - "epoch": 0.45728130578933945, + "epoch": 0.6313874325064559, "grad_norm": 0.0, - "learning_rate": 1.1858665175191233e-05, - "loss": 0.8519, + "learning_rate": 6.319249339877491e-06, + "loss": 1.0238, "step": 16137 }, { - "epoch": 0.4573096432316019, + "epoch": 0.6314265591986853, "grad_norm": 0.0, - "learning_rate": 1.1857763370574188e-05, - "loss": 0.7549, + "learning_rate": 6.3180711029160756e-06, + "loss": 1.2097, "step": 16138 }, { - "epoch": 0.4573379806738644, + "epoch": 0.6314656858909148, "grad_norm": 0.0, - "learning_rate": 1.1856861550308507e-05, - "loss": 0.8711, + "learning_rate": 6.3168929250818235e-06, + "loss": 1.0167, "step": 16139 }, { - "epoch": 0.4573663181161268, + "epoch": 0.6315048125831442, "grad_norm": 0.0, - "learning_rate": 1.1855959714401792e-05, - "loss": 0.9433, + "learning_rate": 6.315714806393656e-06, + "loss": 0.7971, "step": 16140 }, { - "epoch": 0.4573946555583893, + "epoch": 0.6315439392753737, "grad_norm": 0.0, - "learning_rate": 1.1855057862861643e-05, - "loss": 0.9499, + "learning_rate": 6.3145367468704944e-06, + "loss": 0.9585, "step": 16141 }, { - "epoch": 0.45742299300065176, + "epoch": 0.631583065967603, "grad_norm": 0.0, - "learning_rate": 1.1854155995695652e-05, - "loss": 0.8597, + "learning_rate": 6.313358746531253e-06, + "loss": 0.8983, "step": 16142 }, { - "epoch": 0.45745133044291425, + "epoch": 0.6316221926598325, "grad_norm": 0.0, - "learning_rate": 1.1853254112911416e-05, - "loss": 0.8213, + "learning_rate": 6.312180805394845e-06, + "loss": 1.0, "step": 16143 }, { - "epoch": 0.4574796678851767, + "epoch": 0.6316613193520619, "grad_norm": 0.0, - "learning_rate": 1.185235221451653e-05, - "loss": 0.9201, + "learning_rate": 6.311002923480198e-06, + "loss": 1.0995, "step": 16144 }, { - "epoch": 0.4575080053274391, + "epoch": 0.6317004460442914, "grad_norm": 0.0, - "learning_rate": 1.1851450300518597e-05, - "loss": 0.9129, + "learning_rate": 6.309825100806221e-06, + "loss": 1.0114, "step": 16145 }, { - "epoch": 0.4575363427697016, + "epoch": 0.6317395727365208, "grad_norm": 0.0, - "learning_rate": 1.1850548370925208e-05, - "loss": 0.803, + "learning_rate": 6.308647337391831e-06, + "loss": 1.0479, "step": 16146 }, { - "epoch": 0.45756468021196406, + "epoch": 0.6317786994287503, "grad_norm": 0.0, - "learning_rate": 1.1849646425743964e-05, - "loss": 1.0156, + "learning_rate": 6.307469633255937e-06, + "loss": 1.0981, "step": 16147 }, { - "epoch": 0.45759301765422655, + "epoch": 0.6318178261209797, "grad_norm": 0.0, - "learning_rate": 1.1848744464982463e-05, - "loss": 0.8864, + "learning_rate": 6.30629198841745e-06, + "loss": 1.045, "step": 16148 }, { - "epoch": 0.457621355096489, + "epoch": 0.6318569528132092, "grad_norm": 0.0, - "learning_rate": 1.1847842488648296e-05, - "loss": 0.7607, + "learning_rate": 6.30511440289529e-06, + "loss": 0.9473, "step": 16149 }, { - "epoch": 0.45764969253875143, + "epoch": 0.6318960795054386, "grad_norm": 0.0, - "learning_rate": 1.1846940496749068e-05, - "loss": 1.0029, + "learning_rate": 6.303936876708361e-06, + "loss": 0.9172, "step": 16150 }, { - "epoch": 0.4576780299810139, + "epoch": 0.6319352061976681, "grad_norm": 0.0, - "learning_rate": 1.1846038489292377e-05, - "loss": 0.9861, + "learning_rate": 6.302759409875576e-06, + "loss": 0.9706, "step": 16151 }, { - "epoch": 0.45770636742327636, + "epoch": 0.6319743328898975, "grad_norm": 0.0, - "learning_rate": 1.1845136466285816e-05, - "loss": 0.9149, + "learning_rate": 6.301582002415837e-06, + "loss": 1.0221, "step": 16152 }, { - "epoch": 0.45773470486553886, + "epoch": 0.632013459582127, "grad_norm": 0.0, - "learning_rate": 1.1844234427736982e-05, - "loss": 0.9686, + "learning_rate": 6.30040465434806e-06, + "loss": 0.9111, "step": 16153 }, { - "epoch": 0.4577630423078013, + "epoch": 0.6320525862743563, "grad_norm": 0.0, - "learning_rate": 1.1843332373653482e-05, - "loss": 0.8677, + "learning_rate": 6.299227365691151e-06, + "loss": 1.0692, "step": 16154 }, { - "epoch": 0.45779137975006373, + "epoch": 0.6320917129665858, "grad_norm": 0.0, - "learning_rate": 1.1842430304042909e-05, - "loss": 0.8554, + "learning_rate": 6.298050136464011e-06, + "loss": 1.0414, "step": 16155 }, { - "epoch": 0.4578197171923262, + "epoch": 0.6321308396588152, "grad_norm": 0.0, - "learning_rate": 1.1841528218912858e-05, - "loss": 0.914, + "learning_rate": 6.296872966685543e-06, + "loss": 0.9988, "step": 16156 }, { - "epoch": 0.45784805463458866, + "epoch": 0.6321699663510447, "grad_norm": 0.0, - "learning_rate": 1.1840626118270932e-05, - "loss": 1.0046, + "learning_rate": 6.2956958563746595e-06, + "loss": 0.9483, "step": 16157 }, { - "epoch": 0.45787639207685116, + "epoch": 0.6322090930432741, "grad_norm": 0.0, - "learning_rate": 1.183972400212473e-05, - "loss": 0.8687, + "learning_rate": 6.29451880555026e-06, + "loss": 1.0569, "step": 16158 }, { - "epoch": 0.4579047295191136, + "epoch": 0.6322482197355036, "grad_norm": 0.0, - "learning_rate": 1.1838821870481847e-05, - "loss": 0.7632, + "learning_rate": 6.293341814231244e-06, + "loss": 1.0002, "step": 16159 }, { - "epoch": 0.4579330669613761, + "epoch": 0.632287346427733, "grad_norm": 0.0, - "learning_rate": 1.1837919723349888e-05, - "loss": 0.9561, + "learning_rate": 6.292164882436513e-06, + "loss": 1.0029, "step": 16160 }, { - "epoch": 0.45796140440363853, + "epoch": 0.6323264731199625, "grad_norm": 0.0, - "learning_rate": 1.1837017560736447e-05, - "loss": 0.9528, + "learning_rate": 6.290988010184964e-06, + "loss": 1.017, "step": 16161 }, { - "epoch": 0.45798974184590097, + "epoch": 0.6323655998121919, "grad_norm": 0.0, - "learning_rate": 1.1836115382649126e-05, - "loss": 0.8771, + "learning_rate": 6.289811197495506e-06, + "loss": 1.0668, "step": 16162 }, { - "epoch": 0.45801807928816346, + "epoch": 0.6324047265044214, "grad_norm": 0.0, - "learning_rate": 1.183521318909552e-05, - "loss": 0.9095, + "learning_rate": 6.288634444387028e-06, + "loss": 1.0312, "step": 16163 }, { - "epoch": 0.4580464167304259, + "epoch": 0.6324438531966508, "grad_norm": 0.0, - "learning_rate": 1.1834310980083234e-05, - "loss": 0.8732, + "learning_rate": 6.287457750878431e-06, + "loss": 1.007, "step": 16164 }, { - "epoch": 0.4580747541726884, + "epoch": 0.6324829798888802, "grad_norm": 0.0, - "learning_rate": 1.1833408755619867e-05, - "loss": 0.9937, + "learning_rate": 6.2862811169886085e-06, + "loss": 1.1759, "step": 16165 }, { - "epoch": 0.45810309161495083, + "epoch": 0.6325221065811096, "grad_norm": 0.0, - "learning_rate": 1.1832506515713014e-05, - "loss": 0.8262, + "learning_rate": 6.2851045427364595e-06, + "loss": 1.003, "step": 16166 }, { - "epoch": 0.45813142905721327, + "epoch": 0.632561233273339, "grad_norm": 0.0, - "learning_rate": 1.1831604260370279e-05, - "loss": 0.7859, + "learning_rate": 6.283928028140877e-06, + "loss": 0.9034, "step": 16167 }, { - "epoch": 0.45815976649947576, + "epoch": 0.6326003599655685, "grad_norm": 0.0, - "learning_rate": 1.1830701989599263e-05, - "loss": 0.8711, + "learning_rate": 6.282751573220755e-06, + "loss": 0.9864, "step": 16168 }, { - "epoch": 0.4581881039417382, + "epoch": 0.6326394866577979, "grad_norm": 0.0, - "learning_rate": 1.1829799703407563e-05, - "loss": 0.8583, + "learning_rate": 6.281575177994985e-06, + "loss": 0.8424, "step": 16169 }, { - "epoch": 0.4582164413840007, + "epoch": 0.6326786133500274, "grad_norm": 0.0, - "learning_rate": 1.1828897401802782e-05, - "loss": 0.9995, + "learning_rate": 6.280398842482454e-06, + "loss": 0.9376, "step": 16170 }, { - "epoch": 0.45824477882626313, + "epoch": 0.6327177400422568, "grad_norm": 0.0, - "learning_rate": 1.1827995084792518e-05, - "loss": 0.9217, + "learning_rate": 6.279222566702059e-06, + "loss": 1.0267, "step": 16171 }, { - "epoch": 0.4582731162685256, + "epoch": 0.6327568667344863, "grad_norm": 0.0, - "learning_rate": 1.182709275238437e-05, - "loss": 0.914, + "learning_rate": 6.278046350672689e-06, + "loss": 1.0216, "step": 16172 }, { - "epoch": 0.45830145371078806, + "epoch": 0.6327959934267157, "grad_norm": 0.0, - "learning_rate": 1.1826190404585946e-05, - "loss": 0.9296, + "learning_rate": 6.276870194413229e-06, + "loss": 0.9593, "step": 16173 }, { - "epoch": 0.4583297911530505, + "epoch": 0.6328351201189452, "grad_norm": 0.0, - "learning_rate": 1.1825288041404838e-05, - "loss": 0.9399, + "learning_rate": 6.275694097942567e-06, + "loss": 0.9666, "step": 16174 }, { - "epoch": 0.458358128595313, + "epoch": 0.6328742468111745, "grad_norm": 0.0, - "learning_rate": 1.1824385662848654e-05, - "loss": 0.8598, + "learning_rate": 6.274518061279593e-06, + "loss": 0.9469, "step": 16175 }, { - "epoch": 0.45838646603757544, + "epoch": 0.632913373503404, "grad_norm": 0.0, - "learning_rate": 1.1823483268924992e-05, - "loss": 0.8663, + "learning_rate": 6.273342084443189e-06, + "loss": 1.0751, "step": 16176 }, { - "epoch": 0.45841480347983793, + "epoch": 0.6329525001956334, "grad_norm": 0.0, - "learning_rate": 1.1822580859641452e-05, - "loss": 0.9436, + "learning_rate": 6.272166167452242e-06, + "loss": 1.0839, "step": 16177 }, { - "epoch": 0.45844314092210037, + "epoch": 0.6329916268878629, "grad_norm": 0.0, - "learning_rate": 1.1821678435005637e-05, - "loss": 0.9696, + "learning_rate": 6.270990310325637e-06, + "loss": 1.0327, "step": 16178 }, { - "epoch": 0.4584714783643628, + "epoch": 0.6330307535800923, "grad_norm": 0.0, - "learning_rate": 1.1820775995025147e-05, - "loss": 0.8059, + "learning_rate": 6.269814513082253e-06, + "loss": 0.9974, "step": 16179 }, { - "epoch": 0.4584998158066253, + "epoch": 0.6330698802723218, "grad_norm": 0.0, - "learning_rate": 1.1819873539707584e-05, - "loss": 0.8713, + "learning_rate": 6.2686387757409736e-06, + "loss": 1.1146, "step": 16180 }, { - "epoch": 0.45852815324888774, + "epoch": 0.6331090069645512, "grad_norm": 0.0, - "learning_rate": 1.1818971069060553e-05, - "loss": 0.9528, + "learning_rate": 6.267463098320682e-06, + "loss": 0.9786, "step": 16181 }, { - "epoch": 0.45855649069115023, + "epoch": 0.6331481336567807, "grad_norm": 0.0, - "learning_rate": 1.1818068583091654e-05, - "loss": 0.9101, + "learning_rate": 6.266287480840253e-06, + "loss": 0.922, "step": 16182 }, { - "epoch": 0.45858482813341267, + "epoch": 0.6331872603490101, "grad_norm": 0.0, - "learning_rate": 1.1817166081808486e-05, - "loss": 0.8974, + "learning_rate": 6.265111923318569e-06, + "loss": 1.077, "step": 16183 }, { - "epoch": 0.45861316557567516, + "epoch": 0.6332263870412396, "grad_norm": 0.0, - "learning_rate": 1.1816263565218654e-05, - "loss": 0.8386, + "learning_rate": 6.263936425774508e-06, + "loss": 0.9805, "step": 16184 }, { - "epoch": 0.4586415030179376, + "epoch": 0.633265513733469, "grad_norm": 0.0, - "learning_rate": 1.1815361033329758e-05, - "loss": 0.8852, + "learning_rate": 6.262760988226948e-06, + "loss": 0.9597, "step": 16185 }, { - "epoch": 0.45866984046020004, + "epoch": 0.6333046404256985, "grad_norm": 0.0, - "learning_rate": 1.1814458486149402e-05, - "loss": 0.8741, + "learning_rate": 6.261585610694762e-06, + "loss": 0.9751, "step": 16186 }, { - "epoch": 0.45869817790246253, + "epoch": 0.6333437671179278, "grad_norm": 0.0, - "learning_rate": 1.1813555923685189e-05, - "loss": 0.9708, + "learning_rate": 6.260410293196828e-06, + "loss": 0.9068, "step": 16187 }, { - "epoch": 0.45872651534472497, + "epoch": 0.6333828938101573, "grad_norm": 0.0, - "learning_rate": 1.1812653345944725e-05, - "loss": 0.8853, + "learning_rate": 6.259235035752014e-06, + "loss": 0.9369, "step": 16188 }, { - "epoch": 0.45875485278698747, + "epoch": 0.6334220205023867, "grad_norm": 0.0, - "learning_rate": 1.1811750752935604e-05, - "loss": 0.8707, + "learning_rate": 6.258059838379203e-06, + "loss": 0.9804, "step": 16189 }, { - "epoch": 0.4587831902292499, + "epoch": 0.6334611471946162, "grad_norm": 0.0, - "learning_rate": 1.1810848144665435e-05, - "loss": 0.9193, + "learning_rate": 6.256884701097261e-06, + "loss": 1.0746, "step": 16190 }, { - "epoch": 0.45881152767151234, + "epoch": 0.6335002738868456, "grad_norm": 0.0, - "learning_rate": 1.1809945521141818e-05, - "loss": 0.8826, + "learning_rate": 6.255709623925061e-06, + "loss": 0.7877, "step": 16191 }, { - "epoch": 0.45883986511377484, + "epoch": 0.6335394005790751, "grad_norm": 0.0, - "learning_rate": 1.180904288237236e-05, - "loss": 1.0337, + "learning_rate": 6.254534606881467e-06, + "loss": 1.0609, "step": 16192 }, { - "epoch": 0.4588682025560373, + "epoch": 0.6335785272713045, "grad_norm": 0.0, - "learning_rate": 1.1808140228364662e-05, - "loss": 0.8063, + "learning_rate": 6.253359649985359e-06, + "loss": 0.9143, "step": 16193 }, { - "epoch": 0.45889653999829977, + "epoch": 0.633617653963534, "grad_norm": 0.0, - "learning_rate": 1.1807237559126325e-05, - "loss": 0.9673, + "learning_rate": 6.2521847532556e-06, + "loss": 0.9536, "step": 16194 }, { - "epoch": 0.4589248774405622, + "epoch": 0.6336567806557634, "grad_norm": 0.0, - "learning_rate": 1.1806334874664958e-05, - "loss": 0.9106, + "learning_rate": 6.251009916711059e-06, + "loss": 1.0678, "step": 16195 }, { - "epoch": 0.4589532148828247, + "epoch": 0.6336959073479927, "grad_norm": 0.0, - "learning_rate": 1.180543217498816e-05, - "loss": 0.9443, + "learning_rate": 6.2498351403706e-06, + "loss": 1.1379, "step": 16196 }, { - "epoch": 0.45898155232508714, + "epoch": 0.6337350340402222, "grad_norm": 0.0, - "learning_rate": 1.180452946010354e-05, - "loss": 0.8922, + "learning_rate": 6.248660424253084e-06, + "loss": 0.9762, "step": 16197 }, { - "epoch": 0.4590098897673496, + "epoch": 0.6337741607324516, "grad_norm": 0.0, - "learning_rate": 1.1803626730018694e-05, - "loss": 0.8087, + "learning_rate": 6.247485768377386e-06, + "loss": 0.902, "step": 16198 }, { - "epoch": 0.45903822720961207, + "epoch": 0.6338132874246811, "grad_norm": 0.0, - "learning_rate": 1.1802723984741229e-05, - "loss": 0.9427, + "learning_rate": 6.2463111727623645e-06, + "loss": 1.0421, "step": 16199 }, { - "epoch": 0.4590665646518745, + "epoch": 0.6338524141169105, "grad_norm": 0.0, - "learning_rate": 1.1801821224278753e-05, - "loss": 0.8758, + "learning_rate": 6.245136637426882e-06, + "loss": 1.0612, "step": 16200 }, { - "epoch": 0.459094902094137, + "epoch": 0.63389154080914, "grad_norm": 0.0, - "learning_rate": 1.180091844863887e-05, - "loss": 0.837, + "learning_rate": 6.2439621623897965e-06, + "loss": 1.0927, "step": 16201 }, { - "epoch": 0.45912323953639944, + "epoch": 0.6339306675013694, "grad_norm": 0.0, - "learning_rate": 1.1800015657829178e-05, - "loss": 0.9589, + "learning_rate": 6.242787747669974e-06, + "loss": 1.0726, "step": 16202 }, { - "epoch": 0.4591515769786619, + "epoch": 0.6339697941935989, "grad_norm": 0.0, - "learning_rate": 1.1799112851857286e-05, - "loss": 0.8028, + "learning_rate": 6.2416133932862746e-06, + "loss": 1.0259, "step": 16203 }, { - "epoch": 0.4591799144209244, + "epoch": 0.6340089208858283, "grad_norm": 0.0, - "learning_rate": 1.1798210030730798e-05, - "loss": 0.8079, + "learning_rate": 6.240439099257554e-06, + "loss": 0.97, "step": 16204 }, { - "epoch": 0.4592082518631868, + "epoch": 0.6340480475780578, "grad_norm": 0.0, - "learning_rate": 1.1797307194457323e-05, - "loss": 0.8509, + "learning_rate": 6.23926486560267e-06, + "loss": 0.9549, "step": 16205 }, { - "epoch": 0.4592365893054493, + "epoch": 0.6340871742702872, "grad_norm": 0.0, - "learning_rate": 1.1796404343044461e-05, - "loss": 0.8733, + "learning_rate": 6.238090692340475e-06, + "loss": 0.9617, "step": 16206 }, { - "epoch": 0.45926492674771174, + "epoch": 0.6341263009625167, "grad_norm": 0.0, - "learning_rate": 1.1795501476499816e-05, - "loss": 0.994, + "learning_rate": 6.236916579489835e-06, + "loss": 1.0099, "step": 16207 }, { - "epoch": 0.45929326418997424, + "epoch": 0.634165427654746, "grad_norm": 0.0, - "learning_rate": 1.1794598594830996e-05, - "loss": 0.9367, + "learning_rate": 6.235742527069599e-06, + "loss": 1.0494, "step": 16208 }, { - "epoch": 0.4593216016322367, + "epoch": 0.6342045543469755, "grad_norm": 0.0, - "learning_rate": 1.1793695698045606e-05, - "loss": 0.8473, + "learning_rate": 6.23456853509862e-06, + "loss": 0.9351, "step": 16209 }, { - "epoch": 0.4593499390744991, + "epoch": 0.6342436810392049, "grad_norm": 0.0, - "learning_rate": 1.1792792786151251e-05, - "loss": 0.8179, + "learning_rate": 6.233394603595751e-06, + "loss": 0.8688, "step": 16210 }, { - "epoch": 0.4593782765167616, + "epoch": 0.6342828077314344, "grad_norm": 0.0, - "learning_rate": 1.1791889859155537e-05, - "loss": 0.9508, + "learning_rate": 6.232220732579845e-06, + "loss": 1.0677, "step": 16211 }, { - "epoch": 0.45940661395902405, + "epoch": 0.6343219344236638, "grad_norm": 0.0, - "learning_rate": 1.1790986917066068e-05, - "loss": 0.8578, + "learning_rate": 6.231046922069755e-06, + "loss": 0.9811, "step": 16212 }, { - "epoch": 0.45943495140128654, + "epoch": 0.6343610611158933, "grad_norm": 0.0, - "learning_rate": 1.1790083959890453e-05, - "loss": 0.8245, + "learning_rate": 6.229873172084329e-06, + "loss": 1.1663, "step": 16213 }, { - "epoch": 0.459463288843549, + "epoch": 0.6344001878081227, "grad_norm": 0.0, - "learning_rate": 1.1789180987636297e-05, - "loss": 0.9158, + "learning_rate": 6.228699482642412e-06, + "loss": 1.116, "step": 16214 }, { - "epoch": 0.4594916262858114, + "epoch": 0.6344393145003522, "grad_norm": 0.0, - "learning_rate": 1.1788278000311202e-05, - "loss": 0.9183, + "learning_rate": 6.227525853762857e-06, + "loss": 1.0886, "step": 16215 }, { - "epoch": 0.4595199637280739, + "epoch": 0.6344784411925816, "grad_norm": 0.0, - "learning_rate": 1.1787374997922779e-05, - "loss": 0.9957, + "learning_rate": 6.22635228546451e-06, + "loss": 0.9691, "step": 16216 }, { - "epoch": 0.45954830117033635, + "epoch": 0.6345175678848111, "grad_norm": 0.0, - "learning_rate": 1.1786471980478631e-05, - "loss": 0.9627, + "learning_rate": 6.225178777766219e-06, + "loss": 0.9657, "step": 16217 }, { - "epoch": 0.45957663861259884, + "epoch": 0.6345566945770404, "grad_norm": 0.0, - "learning_rate": 1.1785568947986368e-05, - "loss": 0.8055, + "learning_rate": 6.224005330686824e-06, + "loss": 1.0466, "step": 16218 }, { - "epoch": 0.4596049760548613, + "epoch": 0.6345958212692699, "grad_norm": 0.0, - "learning_rate": 1.1784665900453594e-05, - "loss": 1.0907, + "learning_rate": 6.222831944245171e-06, + "loss": 0.8994, "step": 16219 }, { - "epoch": 0.4596333134971238, + "epoch": 0.6346349479614993, "grad_norm": 0.0, - "learning_rate": 1.1783762837887915e-05, - "loss": 0.8394, + "learning_rate": 6.221658618460107e-06, + "loss": 0.9472, "step": 16220 }, { - "epoch": 0.4596616509393862, + "epoch": 0.6346740746537288, "grad_norm": 0.0, - "learning_rate": 1.1782859760296943e-05, - "loss": 0.9496, + "learning_rate": 6.220485353350467e-06, + "loss": 1.0699, "step": 16221 }, { - "epoch": 0.45968998838164865, + "epoch": 0.6347132013459582, "grad_norm": 0.0, - "learning_rate": 1.1781956667688279e-05, - "loss": 0.8946, + "learning_rate": 6.2193121489351e-06, + "loss": 0.9035, "step": 16222 }, { - "epoch": 0.45971832582391114, + "epoch": 0.6347523280381876, "grad_norm": 0.0, - "learning_rate": 1.1781053560069531e-05, - "loss": 0.8484, + "learning_rate": 6.21813900523284e-06, + "loss": 0.9825, "step": 16223 }, { - "epoch": 0.4597466632661736, + "epoch": 0.6347914547304171, "grad_norm": 0.0, - "learning_rate": 1.1780150437448308e-05, - "loss": 0.9666, + "learning_rate": 6.21696592226253e-06, + "loss": 0.9397, "step": 16224 }, { - "epoch": 0.4597750007084361, + "epoch": 0.6348305814226465, "grad_norm": 0.0, - "learning_rate": 1.1779247299832219e-05, - "loss": 0.994, + "learning_rate": 6.215792900043007e-06, + "loss": 0.9868, "step": 16225 }, { - "epoch": 0.4598033381506985, + "epoch": 0.634869708114876, "grad_norm": 0.0, - "learning_rate": 1.1778344147228869e-05, - "loss": 0.9765, + "learning_rate": 6.214619938593108e-06, + "loss": 1.038, "step": 16226 }, { - "epoch": 0.45983167559296095, + "epoch": 0.6349088348071054, "grad_norm": 0.0, - "learning_rate": 1.1777440979645862e-05, - "loss": 0.9982, + "learning_rate": 6.213447037931669e-06, + "loss": 1.0684, "step": 16227 }, { - "epoch": 0.45986001303522345, + "epoch": 0.6349479614993349, "grad_norm": 0.0, - "learning_rate": 1.1776537797090815e-05, - "loss": 0.9286, + "learning_rate": 6.212274198077526e-06, + "loss": 0.9431, "step": 16228 }, { - "epoch": 0.4598883504774859, + "epoch": 0.6349870881915642, "grad_norm": 0.0, - "learning_rate": 1.1775634599571326e-05, - "loss": 0.7762, + "learning_rate": 6.211101419049512e-06, + "loss": 1.0055, "step": 16229 }, { - "epoch": 0.4599166879197484, + "epoch": 0.6350262148837937, "grad_norm": 0.0, - "learning_rate": 1.1774731387095008e-05, - "loss": 0.9323, + "learning_rate": 6.209928700866463e-06, + "loss": 1.0514, "step": 16230 }, { - "epoch": 0.4599450253620108, + "epoch": 0.6350653415760231, "grad_norm": 0.0, - "learning_rate": 1.1773828159669472e-05, - "loss": 0.933, + "learning_rate": 6.208756043547211e-06, + "loss": 1.0526, "step": 16231 }, { - "epoch": 0.4599733628042733, + "epoch": 0.6351044682682526, "grad_norm": 0.0, - "learning_rate": 1.177292491730232e-05, - "loss": 1.001, + "learning_rate": 6.207583447110581e-06, + "loss": 1.0097, "step": 16232 }, { - "epoch": 0.46000170024653575, + "epoch": 0.635143594960482, "grad_norm": 0.0, - "learning_rate": 1.1772021660001163e-05, - "loss": 0.8284, + "learning_rate": 6.206410911575413e-06, + "loss": 0.9831, "step": 16233 }, { - "epoch": 0.4600300376887982, + "epoch": 0.6351827216527115, "grad_norm": 0.0, - "learning_rate": 1.177111838777361e-05, - "loss": 0.8582, + "learning_rate": 6.205238436960532e-06, + "loss": 0.9677, "step": 16234 }, { - "epoch": 0.4600583751310607, + "epoch": 0.6352218483449409, "grad_norm": 0.0, - "learning_rate": 1.177021510062727e-05, - "loss": 0.9698, + "learning_rate": 6.204066023284767e-06, + "loss": 1.0332, "step": 16235 }, { - "epoch": 0.4600867125733231, + "epoch": 0.6352609750371704, "grad_norm": 0.0, - "learning_rate": 1.176931179856975e-05, - "loss": 0.8966, + "learning_rate": 6.202893670566945e-06, + "loss": 1.0758, "step": 16236 }, { - "epoch": 0.4601150500155856, + "epoch": 0.6353001017293998, "grad_norm": 0.0, - "learning_rate": 1.1768408481608657e-05, - "loss": 0.8651, + "learning_rate": 6.201721378825889e-06, + "loss": 0.9635, "step": 16237 }, { - "epoch": 0.46014338745784805, + "epoch": 0.6353392284216293, "grad_norm": 0.0, - "learning_rate": 1.1767505149751606e-05, - "loss": 0.9046, + "learning_rate": 6.20054914808043e-06, + "loss": 0.9709, "step": 16238 }, { - "epoch": 0.4601717249001105, + "epoch": 0.6353783551138587, "grad_norm": 0.0, - "learning_rate": 1.1766601803006204e-05, - "loss": 0.822, + "learning_rate": 6.199376978349394e-06, + "loss": 0.9457, "step": 16239 }, { - "epoch": 0.460200062342373, + "epoch": 0.6354174818060881, "grad_norm": 0.0, - "learning_rate": 1.1765698441380056e-05, - "loss": 0.9106, + "learning_rate": 6.198204869651599e-06, + "loss": 1.0611, "step": 16240 }, { - "epoch": 0.4602283997846354, + "epoch": 0.6354566084983175, "grad_norm": 0.0, - "learning_rate": 1.1764795064880777e-05, - "loss": 0.8113, + "learning_rate": 6.1970328220058665e-06, + "loss": 0.9906, "step": 16241 }, { - "epoch": 0.4602567372268979, + "epoch": 0.635495735190547, "grad_norm": 0.0, - "learning_rate": 1.1763891673515973e-05, - "loss": 1.021, + "learning_rate": 6.195860835431024e-06, + "loss": 0.9953, "step": 16242 }, { - "epoch": 0.46028507466916035, + "epoch": 0.6355348618827764, "grad_norm": 0.0, - "learning_rate": 1.1762988267293252e-05, - "loss": 0.9376, + "learning_rate": 6.1946889099458915e-06, + "loss": 1.0386, "step": 16243 }, { - "epoch": 0.46031341211142285, + "epoch": 0.6355739885750059, "grad_norm": 0.0, - "learning_rate": 1.1762084846220226e-05, - "loss": 0.9314, + "learning_rate": 6.1935170455692874e-06, + "loss": 0.9199, "step": 16244 }, { - "epoch": 0.4603417495536853, + "epoch": 0.6356131152672353, "grad_norm": 0.0, - "learning_rate": 1.1761181410304508e-05, - "loss": 0.8061, + "learning_rate": 6.192345242320028e-06, + "loss": 0.9479, "step": 16245 }, { - "epoch": 0.4603700869959477, + "epoch": 0.6356522419594648, "grad_norm": 0.0, - "learning_rate": 1.1760277959553706e-05, - "loss": 0.8875, + "learning_rate": 6.1911735002169295e-06, + "loss": 0.9633, "step": 16246 }, { - "epoch": 0.4603984244382102, + "epoch": 0.6356913686516942, "grad_norm": 0.0, - "learning_rate": 1.1759374493975425e-05, - "loss": 0.8102, + "learning_rate": 6.190001819278817e-06, + "loss": 0.9576, "step": 16247 }, { - "epoch": 0.46042676188047266, + "epoch": 0.6357304953439237, "grad_norm": 0.0, - "learning_rate": 1.1758471013577283e-05, - "loss": 0.8779, + "learning_rate": 6.1888301995245e-06, + "loss": 1.1973, "step": 16248 }, { - "epoch": 0.46045509932273515, + "epoch": 0.6357696220361531, "grad_norm": 0.0, - "learning_rate": 1.1757567518366883e-05, - "loss": 0.9662, + "learning_rate": 6.187658640972794e-06, + "loss": 1.1147, "step": 16249 }, { - "epoch": 0.4604834367649976, + "epoch": 0.6358087487283826, "grad_norm": 0.0, - "learning_rate": 1.1756664008351842e-05, - "loss": 0.885, + "learning_rate": 6.186487143642508e-06, + "loss": 0.8904, "step": 16250 }, { - "epoch": 0.46051177420726, + "epoch": 0.6358478754206119, "grad_norm": 0.0, - "learning_rate": 1.1755760483539767e-05, - "loss": 0.9943, + "learning_rate": 6.1853157075524664e-06, + "loss": 1.036, "step": 16251 }, { - "epoch": 0.4605401116495225, + "epoch": 0.6358870021128413, "grad_norm": 0.0, - "learning_rate": 1.1754856943938266e-05, - "loss": 0.8764, + "learning_rate": 6.184144332721473e-06, + "loss": 0.9683, "step": 16252 }, { - "epoch": 0.46056844909178496, + "epoch": 0.6359261288050708, "grad_norm": 0.0, - "learning_rate": 1.175395338955496e-05, - "loss": 0.864, + "learning_rate": 6.18297301916834e-06, + "loss": 0.9791, "step": 16253 }, { - "epoch": 0.46059678653404745, + "epoch": 0.6359652554973002, "grad_norm": 0.0, - "learning_rate": 1.1753049820397449e-05, - "loss": 0.8849, + "learning_rate": 6.181801766911878e-06, + "loss": 0.8203, "step": 16254 }, { - "epoch": 0.4606251239763099, + "epoch": 0.6360043821895297, "grad_norm": 0.0, - "learning_rate": 1.175214623647335e-05, - "loss": 0.8925, + "learning_rate": 6.1806305759708906e-06, + "loss": 0.8343, "step": 16255 }, { - "epoch": 0.4606534614185724, + "epoch": 0.6360435088817591, "grad_norm": 0.0, - "learning_rate": 1.175124263779027e-05, - "loss": 0.8728, + "learning_rate": 6.179459446364194e-06, + "loss": 0.9378, "step": 16256 }, { - "epoch": 0.4606817988608348, + "epoch": 0.6360826355739886, "grad_norm": 0.0, - "learning_rate": 1.1750339024355824e-05, - "loss": 0.8719, + "learning_rate": 6.178288378110593e-06, + "loss": 0.9266, "step": 16257 }, { - "epoch": 0.46071013630309726, + "epoch": 0.636121762266218, "grad_norm": 0.0, - "learning_rate": 1.1749435396177623e-05, - "loss": 0.9334, + "learning_rate": 6.177117371228889e-06, + "loss": 1.0191, "step": 16258 }, { - "epoch": 0.46073847374535976, + "epoch": 0.6361608889584475, "grad_norm": 0.0, - "learning_rate": 1.1748531753263282e-05, - "loss": 0.8685, + "learning_rate": 6.1759464257378874e-06, + "loss": 0.9271, "step": 16259 }, { - "epoch": 0.4607668111876222, + "epoch": 0.6362000156506769, "grad_norm": 0.0, - "learning_rate": 1.1747628095620405e-05, - "loss": 0.9574, + "learning_rate": 6.1747755416563996e-06, + "loss": 0.853, "step": 16260 }, { - "epoch": 0.4607951486298847, + "epoch": 0.6362391423429063, "grad_norm": 0.0, - "learning_rate": 1.1746724423256605e-05, - "loss": 0.8864, + "learning_rate": 6.173604719003221e-06, + "loss": 1.0435, "step": 16261 }, { - "epoch": 0.4608234860721471, + "epoch": 0.6362782690351357, "grad_norm": 0.0, - "learning_rate": 1.17458207361795e-05, - "loss": 0.8466, + "learning_rate": 6.172433957797157e-06, + "loss": 1.129, "step": 16262 }, { - "epoch": 0.46085182351440956, + "epoch": 0.6363173957273652, "grad_norm": 0.0, - "learning_rate": 1.1744917034396697e-05, - "loss": 0.9254, + "learning_rate": 6.171263258057008e-06, + "loss": 1.0555, "step": 16263 }, { - "epoch": 0.46088016095667206, + "epoch": 0.6363565224195946, "grad_norm": 0.0, - "learning_rate": 1.1744013317915812e-05, - "loss": 1.0328, + "learning_rate": 6.1700926198015685e-06, + "loss": 0.9437, "step": 16264 }, { - "epoch": 0.4609084983989345, + "epoch": 0.6363956491118241, "grad_norm": 0.0, - "learning_rate": 1.1743109586744451e-05, - "loss": 0.9656, + "learning_rate": 6.168922043049645e-06, + "loss": 1.0564, "step": 16265 }, { - "epoch": 0.460936835841197, + "epoch": 0.6364347758040535, "grad_norm": 0.0, - "learning_rate": 1.1742205840890235e-05, - "loss": 0.8697, + "learning_rate": 6.167751527820034e-06, + "loss": 1.0464, "step": 16266 }, { - "epoch": 0.46096517328345943, + "epoch": 0.636473902496283, "grad_norm": 0.0, - "learning_rate": 1.1741302080360773e-05, - "loss": 0.956, + "learning_rate": 6.1665810741315325e-06, + "loss": 0.9135, "step": 16267 }, { - "epoch": 0.4609935107257219, + "epoch": 0.6365130291885124, "grad_norm": 0.0, - "learning_rate": 1.1740398305163673e-05, - "loss": 0.9249, + "learning_rate": 6.165410682002932e-06, + "loss": 0.9938, "step": 16268 }, { - "epoch": 0.46102184816798436, + "epoch": 0.6365521558807419, "grad_norm": 0.0, - "learning_rate": 1.1739494515306553e-05, - "loss": 0.9856, + "learning_rate": 6.1642403514530344e-06, + "loss": 0.9698, "step": 16269 }, { - "epoch": 0.4610501856102468, + "epoch": 0.6365912825729713, "grad_norm": 0.0, - "learning_rate": 1.1738590710797024e-05, - "loss": 0.8623, + "learning_rate": 6.163070082500629e-06, + "loss": 1.0412, "step": 16270 }, { - "epoch": 0.4610785230525093, + "epoch": 0.6366304092652008, "grad_norm": 0.0, - "learning_rate": 1.1737686891642703e-05, - "loss": 0.9627, + "learning_rate": 6.161899875164509e-06, + "loss": 0.9734, "step": 16271 }, { - "epoch": 0.46110686049477173, + "epoch": 0.6366695359574301, "grad_norm": 0.0, - "learning_rate": 1.1736783057851198e-05, - "loss": 0.8194, + "learning_rate": 6.1607297294634675e-06, + "loss": 0.9026, "step": 16272 }, { - "epoch": 0.4611351979370342, + "epoch": 0.6367086626496596, "grad_norm": 0.0, - "learning_rate": 1.1735879209430123e-05, - "loss": 0.7484, + "learning_rate": 6.1595596454162975e-06, + "loss": 1.0257, "step": 16273 }, { - "epoch": 0.46116353537929666, + "epoch": 0.636747789341889, "grad_norm": 0.0, - "learning_rate": 1.1734975346387097e-05, - "loss": 0.9194, + "learning_rate": 6.158389623041787e-06, + "loss": 0.9179, "step": 16274 }, { - "epoch": 0.4611918728215591, + "epoch": 0.6367869160341185, "grad_norm": 0.0, - "learning_rate": 1.1734071468729726e-05, - "loss": 0.9013, + "learning_rate": 6.157219662358724e-06, + "loss": 0.981, "step": 16275 }, { - "epoch": 0.4612202102638216, + "epoch": 0.6368260427263479, "grad_norm": 0.0, - "learning_rate": 1.1733167576465627e-05, - "loss": 0.9271, + "learning_rate": 6.156049763385901e-06, + "loss": 1.0577, "step": 16276 }, { - "epoch": 0.46124854770608403, + "epoch": 0.6368651694185774, "grad_norm": 0.0, - "learning_rate": 1.1732263669602413e-05, - "loss": 0.8885, + "learning_rate": 6.154879926142095e-06, + "loss": 1.0583, "step": 16277 }, { - "epoch": 0.4612768851483465, + "epoch": 0.6369042961108068, "grad_norm": 0.0, - "learning_rate": 1.1731359748147702e-05, - "loss": 0.9292, + "learning_rate": 6.153710150646106e-06, + "loss": 0.9635, "step": 16278 }, { - "epoch": 0.46130522259060897, + "epoch": 0.6369434228030363, "grad_norm": 0.0, - "learning_rate": 1.1730455812109102e-05, - "loss": 0.8937, + "learning_rate": 6.1525404369167096e-06, + "loss": 0.9804, "step": 16279 }, { - "epoch": 0.46133356003287146, + "epoch": 0.6369825494952657, "grad_norm": 0.0, - "learning_rate": 1.1729551861494235e-05, - "loss": 0.9903, + "learning_rate": 6.1513707849726945e-06, + "loss": 0.9578, "step": 16280 }, { - "epoch": 0.4613618974751339, + "epoch": 0.6370216761874951, "grad_norm": 0.0, - "learning_rate": 1.1728647896310705e-05, - "loss": 1.0288, + "learning_rate": 6.150201194832837e-06, + "loss": 0.8574, "step": 16281 }, { - "epoch": 0.46139023491739634, + "epoch": 0.6370608028797246, "grad_norm": 0.0, - "learning_rate": 1.1727743916566135e-05, - "loss": 0.8502, + "learning_rate": 6.14903166651593e-06, + "loss": 1.0204, "step": 16282 }, { - "epoch": 0.46141857235965883, + "epoch": 0.6370999295719539, "grad_norm": 0.0, - "learning_rate": 1.1726839922268134e-05, - "loss": 1.0019, + "learning_rate": 6.1478622000407475e-06, + "loss": 0.8912, "step": 16283 }, { - "epoch": 0.46144690980192127, + "epoch": 0.6371390562641834, "grad_norm": 0.0, - "learning_rate": 1.172593591342432e-05, - "loss": 0.9929, + "learning_rate": 6.1466927954260705e-06, + "loss": 0.9347, "step": 16284 }, { - "epoch": 0.46147524724418376, + "epoch": 0.6371781829564128, "grad_norm": 0.0, - "learning_rate": 1.1725031890042309e-05, - "loss": 0.7688, + "learning_rate": 6.145523452690681e-06, + "loss": 1.1616, "step": 16285 }, { - "epoch": 0.4615035846864462, + "epoch": 0.6372173096486423, "grad_norm": 0.0, - "learning_rate": 1.172412785212971e-05, - "loss": 0.9323, + "learning_rate": 6.144354171853348e-06, + "loss": 1.1168, "step": 16286 }, { - "epoch": 0.46153192212870864, + "epoch": 0.6372564363408717, "grad_norm": 0.0, - "learning_rate": 1.1723223799694145e-05, - "loss": 0.8407, + "learning_rate": 6.1431849529328615e-06, + "loss": 1.0999, "step": 16287 }, { - "epoch": 0.46156025957097113, + "epoch": 0.6372955630331012, "grad_norm": 0.0, - "learning_rate": 1.1722319732743225e-05, - "loss": 0.8402, + "learning_rate": 6.142015795947991e-06, + "loss": 0.9818, "step": 16288 }, { - "epoch": 0.46158859701323357, + "epoch": 0.6373346897253306, "grad_norm": 0.0, - "learning_rate": 1.1721415651284567e-05, - "loss": 0.907, + "learning_rate": 6.140846700917513e-06, + "loss": 1.0265, "step": 16289 }, { - "epoch": 0.46161693445549606, + "epoch": 0.6373738164175601, "grad_norm": 0.0, - "learning_rate": 1.1720511555325782e-05, - "loss": 0.831, + "learning_rate": 6.139677667860197e-06, + "loss": 1.0602, "step": 16290 }, { - "epoch": 0.4616452718977585, + "epoch": 0.6374129431097895, "grad_norm": 0.0, - "learning_rate": 1.1719607444874495e-05, - "loss": 0.9395, + "learning_rate": 6.1385086967948235e-06, + "loss": 0.9579, "step": 16291 }, { - "epoch": 0.461673609340021, + "epoch": 0.637452069802019, "grad_norm": 0.0, - "learning_rate": 1.1718703319938313e-05, - "loss": 1.0137, + "learning_rate": 6.137339787740163e-06, + "loss": 0.967, "step": 16292 }, { - "epoch": 0.46170194678228343, + "epoch": 0.6374911964942483, "grad_norm": 0.0, - "learning_rate": 1.1717799180524856e-05, - "loss": 0.7655, + "learning_rate": 6.136170940714985e-06, + "loss": 0.9873, "step": 16293 }, { - "epoch": 0.4617302842245459, + "epoch": 0.6375303231864778, "grad_norm": 0.0, - "learning_rate": 1.1716895026641735e-05, - "loss": 0.8608, + "learning_rate": 6.135002155738059e-06, + "loss": 0.9475, "step": 16294 }, { - "epoch": 0.46175862166680837, + "epoch": 0.6375694498787072, "grad_norm": 0.0, - "learning_rate": 1.1715990858296573e-05, - "loss": 0.8705, + "learning_rate": 6.133833432828153e-06, + "loss": 0.9659, "step": 16295 }, { - "epoch": 0.4617869591090708, + "epoch": 0.6376085765709367, "grad_norm": 0.0, - "learning_rate": 1.171508667549698e-05, - "loss": 0.8524, + "learning_rate": 6.13266477200404e-06, + "loss": 0.9767, "step": 16296 }, { - "epoch": 0.4618152965513333, + "epoch": 0.6376477032631661, "grad_norm": 0.0, - "learning_rate": 1.171418247825058e-05, - "loss": 0.8287, + "learning_rate": 6.131496173284485e-06, + "loss": 1.0473, "step": 16297 }, { - "epoch": 0.46184363399359574, + "epoch": 0.6376868299553956, "grad_norm": 0.0, - "learning_rate": 1.1713278266564978e-05, - "loss": 0.9402, + "learning_rate": 6.130327636688254e-06, + "loss": 0.8708, "step": 16298 }, { - "epoch": 0.4618719714358582, + "epoch": 0.637725956647625, "grad_norm": 0.0, - "learning_rate": 1.1712374040447802e-05, - "loss": 0.9553, + "learning_rate": 6.129159162234109e-06, + "loss": 0.9352, "step": 16299 }, { - "epoch": 0.46190030887812067, + "epoch": 0.6377650833398545, "grad_norm": 0.0, - "learning_rate": 1.1711469799906663e-05, - "loss": 0.8834, + "learning_rate": 6.127990749940822e-06, + "loss": 0.8936, "step": 16300 }, { - "epoch": 0.4619286463203831, + "epoch": 0.6378042100320839, "grad_norm": 0.0, - "learning_rate": 1.1710565544949177e-05, - "loss": 0.886, + "learning_rate": 6.1268223998271505e-06, + "loss": 1.0054, "step": 16301 }, { - "epoch": 0.4619569837626456, + "epoch": 0.6378433367243134, "grad_norm": 0.0, - "learning_rate": 1.1709661275582961e-05, - "loss": 0.8271, + "learning_rate": 6.125654111911857e-06, + "loss": 0.998, "step": 16302 }, { - "epoch": 0.46198532120490804, + "epoch": 0.6378824634165428, "grad_norm": 0.0, - "learning_rate": 1.1708756991815635e-05, - "loss": 0.8917, + "learning_rate": 6.124485886213705e-06, + "loss": 1.0864, "step": 16303 }, { - "epoch": 0.46201365864717053, + "epoch": 0.6379215901087723, "grad_norm": 0.0, - "learning_rate": 1.1707852693654815e-05, - "loss": 0.8665, + "learning_rate": 6.123317722751449e-06, + "loss": 1.1811, "step": 16304 }, { - "epoch": 0.46204199608943297, + "epoch": 0.6379607168010016, "grad_norm": 0.0, - "learning_rate": 1.1706948381108117e-05, - "loss": 1.0609, + "learning_rate": 6.122149621543856e-06, + "loss": 1.0444, "step": 16305 }, { - "epoch": 0.4620703335316954, + "epoch": 0.6379998434932311, "grad_norm": 0.0, - "learning_rate": 1.1706044054183156e-05, - "loss": 0.8464, + "learning_rate": 6.120981582609682e-06, + "loss": 1.0622, "step": 16306 }, { - "epoch": 0.4620986709739579, + "epoch": 0.6380389701854605, "grad_norm": 0.0, - "learning_rate": 1.1705139712887556e-05, - "loss": 0.8971, + "learning_rate": 6.119813605967681e-06, + "loss": 1.0276, "step": 16307 }, { - "epoch": 0.46212700841622034, + "epoch": 0.63807809687769, "grad_norm": 0.0, - "learning_rate": 1.1704235357228932e-05, - "loss": 0.9507, + "learning_rate": 6.118645691636612e-06, + "loss": 0.9851, "step": 16308 }, { - "epoch": 0.46215534585848284, + "epoch": 0.6381172235699194, "grad_norm": 0.0, - "learning_rate": 1.1703330987214898e-05, - "loss": 0.8675, + "learning_rate": 6.117477839635231e-06, + "loss": 1.1006, "step": 16309 }, { - "epoch": 0.4621836833007453, + "epoch": 0.6381563502621488, "grad_norm": 0.0, - "learning_rate": 1.1702426602853074e-05, - "loss": 0.9902, + "learning_rate": 6.116310049982288e-06, + "loss": 1.0277, "step": 16310 }, { - "epoch": 0.4622120207430077, + "epoch": 0.6381954769543783, "grad_norm": 0.0, - "learning_rate": 1.170152220415108e-05, - "loss": 0.9926, + "learning_rate": 6.1151423226965416e-06, + "loss": 1.0138, "step": 16311 }, { - "epoch": 0.4622403581852702, + "epoch": 0.6382346036466077, "grad_norm": 0.0, - "learning_rate": 1.1700617791116532e-05, - "loss": 0.9043, + "learning_rate": 6.1139746577967405e-06, + "loss": 1.0628, "step": 16312 }, { - "epoch": 0.46226869562753264, + "epoch": 0.6382737303388372, "grad_norm": 0.0, - "learning_rate": 1.169971336375705e-05, - "loss": 0.9382, + "learning_rate": 6.112807055301635e-06, + "loss": 0.9718, "step": 16313 }, { - "epoch": 0.46229703306979514, + "epoch": 0.6383128570310665, "grad_norm": 0.0, - "learning_rate": 1.1698808922080248e-05, - "loss": 0.9043, + "learning_rate": 6.111639515229981e-06, + "loss": 0.9781, "step": 16314 }, { - "epoch": 0.4623253705120576, + "epoch": 0.638351983723296, "grad_norm": 0.0, - "learning_rate": 1.1697904466093753e-05, - "loss": 0.8864, + "learning_rate": 6.110472037600523e-06, + "loss": 1.1393, "step": 16315 }, { - "epoch": 0.46235370795432007, + "epoch": 0.6383911104155254, "grad_norm": 0.0, - "learning_rate": 1.1696999995805174e-05, - "loss": 0.882, + "learning_rate": 6.109304622432008e-06, + "loss": 1.0207, "step": 16316 }, { - "epoch": 0.4623820453965825, + "epoch": 0.6384302371077549, "grad_norm": 0.0, - "learning_rate": 1.1696095511222137e-05, - "loss": 0.9059, + "learning_rate": 6.1081372697431854e-06, + "loss": 1.0076, "step": 16317 }, { - "epoch": 0.46241038283884495, + "epoch": 0.6384693637999843, "grad_norm": 0.0, - "learning_rate": 1.1695191012352256e-05, - "loss": 0.9148, + "learning_rate": 6.106969979552804e-06, + "loss": 0.9116, "step": 16318 }, { - "epoch": 0.46243872028110744, + "epoch": 0.6385084904922138, "grad_norm": 0.0, - "learning_rate": 1.169428649920315e-05, - "loss": 0.7141, + "learning_rate": 6.105802751879606e-06, + "loss": 0.9599, "step": 16319 }, { - "epoch": 0.4624670577233699, + "epoch": 0.6385476171844432, "grad_norm": 0.0, - "learning_rate": 1.1693381971782442e-05, - "loss": 0.7818, + "learning_rate": 6.1046355867423355e-06, + "loss": 1.024, "step": 16320 }, { - "epoch": 0.4624953951656324, + "epoch": 0.6385867438766727, "grad_norm": 0.0, - "learning_rate": 1.1692477430097746e-05, - "loss": 1.0057, + "learning_rate": 6.103468484159738e-06, + "loss": 0.9828, "step": 16321 }, { - "epoch": 0.4625237326078948, + "epoch": 0.6386258705689021, "grad_norm": 0.0, - "learning_rate": 1.1691572874156687e-05, - "loss": 0.9906, + "learning_rate": 6.102301444150549e-06, + "loss": 1.0272, "step": 16322 }, { - "epoch": 0.46255207005015725, + "epoch": 0.6386649972611316, "grad_norm": 0.0, - "learning_rate": 1.169066830396688e-05, - "loss": 0.8587, + "learning_rate": 6.101134466733518e-06, + "loss": 0.9992, "step": 16323 }, { - "epoch": 0.46258040749241974, + "epoch": 0.638704123953361, "grad_norm": 0.0, - "learning_rate": 1.1689763719535947e-05, - "loss": 0.8514, + "learning_rate": 6.099967551927384e-06, + "loss": 1.1217, "step": 16324 }, { - "epoch": 0.4626087449346822, + "epoch": 0.6387432506455905, "grad_norm": 0.0, - "learning_rate": 1.1688859120871507e-05, - "loss": 0.9071, + "learning_rate": 6.098800699750882e-06, + "loss": 1.0367, "step": 16325 }, { - "epoch": 0.4626370823769447, + "epoch": 0.6387823773378198, "grad_norm": 0.0, - "learning_rate": 1.168795450798118e-05, - "loss": 1.0174, + "learning_rate": 6.097633910222749e-06, + "loss": 1.068, "step": 16326 }, { - "epoch": 0.4626654198192071, + "epoch": 0.6388215040300493, "grad_norm": 0.0, - "learning_rate": 1.1687049880872583e-05, - "loss": 0.7572, + "learning_rate": 6.09646718336173e-06, + "loss": 1.1642, "step": 16327 }, { - "epoch": 0.4626937572614696, + "epoch": 0.6388606307222787, "grad_norm": 0.0, - "learning_rate": 1.1686145239553336e-05, - "loss": 1.013, + "learning_rate": 6.095300519186555e-06, + "loss": 1.0878, "step": 16328 }, { - "epoch": 0.46272209470373205, + "epoch": 0.6388997574145082, "grad_norm": 0.0, - "learning_rate": 1.1685240584031068e-05, - "loss": 0.9441, + "learning_rate": 6.094133917715963e-06, + "loss": 1.0206, "step": 16329 }, { - "epoch": 0.4627504321459945, + "epoch": 0.6389388841067376, "grad_norm": 0.0, - "learning_rate": 1.168433591431339e-05, - "loss": 0.884, + "learning_rate": 6.09296737896868e-06, + "loss": 0.9232, "step": 16330 }, { - "epoch": 0.462778769588257, + "epoch": 0.6389780107989671, "grad_norm": 0.0, - "learning_rate": 1.1683431230407924e-05, - "loss": 0.8763, + "learning_rate": 6.091800902963451e-06, + "loss": 1.0024, "step": 16331 }, { - "epoch": 0.4628071070305194, + "epoch": 0.6390171374911965, "grad_norm": 0.0, - "learning_rate": 1.1682526532322294e-05, - "loss": 0.8317, + "learning_rate": 6.090634489719e-06, + "loss": 0.8987, "step": 16332 }, { - "epoch": 0.4628354444727819, + "epoch": 0.639056264183426, "grad_norm": 0.0, - "learning_rate": 1.1681621820064117e-05, - "loss": 0.9714, + "learning_rate": 6.089468139254062e-06, + "loss": 1.1493, "step": 16333 }, { - "epoch": 0.46286378191504435, + "epoch": 0.6390953908756554, "grad_norm": 0.0, - "learning_rate": 1.1680717093641012e-05, - "loss": 0.9538, + "learning_rate": 6.088301851587366e-06, + "loss": 0.9518, "step": 16334 }, { - "epoch": 0.4628921193573068, + "epoch": 0.6391345175678849, "grad_norm": 0.0, - "learning_rate": 1.1679812353060604e-05, - "loss": 1.0069, + "learning_rate": 6.087135626737636e-06, + "loss": 1.0645, "step": 16335 }, { - "epoch": 0.4629204567995693, + "epoch": 0.6391736442601142, "grad_norm": 0.0, - "learning_rate": 1.1678907598330515e-05, - "loss": 0.7647, + "learning_rate": 6.085969464723609e-06, + "loss": 0.919, "step": 16336 }, { - "epoch": 0.4629487942418317, + "epoch": 0.6392127709523436, "grad_norm": 0.0, - "learning_rate": 1.1678002829458367e-05, - "loss": 0.843, + "learning_rate": 6.084803365564007e-06, + "loss": 0.9962, "step": 16337 }, { - "epoch": 0.4629771316840942, + "epoch": 0.6392518976445731, "grad_norm": 0.0, - "learning_rate": 1.1677098046451773e-05, - "loss": 0.873, + "learning_rate": 6.083637329277557e-06, + "loss": 1.1123, "step": 16338 }, { - "epoch": 0.46300546912635665, + "epoch": 0.6392910243368025, "grad_norm": 0.0, - "learning_rate": 1.1676193249318359e-05, - "loss": 0.8168, + "learning_rate": 6.08247135588298e-06, + "loss": 1.0887, "step": 16339 }, { - "epoch": 0.46303380656861914, + "epoch": 0.639330151029032, "grad_norm": 0.0, - "learning_rate": 1.167528843806575e-05, - "loss": 0.9845, + "learning_rate": 6.081305445399008e-06, + "loss": 1.1124, "step": 16340 }, { - "epoch": 0.4630621440108816, + "epoch": 0.6393692777212614, "grad_norm": 0.0, - "learning_rate": 1.1674383612701562e-05, - "loss": 1.0043, + "learning_rate": 6.080139597844361e-06, + "loss": 0.8588, "step": 16341 }, { - "epoch": 0.463090481453144, + "epoch": 0.6394084044134909, "grad_norm": 0.0, - "learning_rate": 1.167347877323342e-05, - "loss": 0.8996, + "learning_rate": 6.078973813237761e-06, + "loss": 0.8532, "step": 16342 }, { - "epoch": 0.4631188188954065, + "epoch": 0.6394475311057203, "grad_norm": 0.0, - "learning_rate": 1.1672573919668947e-05, - "loss": 0.8942, + "learning_rate": 6.077808091597927e-06, + "loss": 0.9852, "step": 16343 }, { - "epoch": 0.46314715633766895, + "epoch": 0.6394866577979498, "grad_norm": 0.0, - "learning_rate": 1.1671669052015757e-05, - "loss": 0.841, + "learning_rate": 6.076642432943576e-06, + "loss": 0.9949, "step": 16344 }, { - "epoch": 0.46317549377993145, + "epoch": 0.6395257844901792, "grad_norm": 0.0, - "learning_rate": 1.1670764170281483e-05, - "loss": 0.8811, + "learning_rate": 6.0754768372934345e-06, + "loss": 0.9696, "step": 16345 }, { - "epoch": 0.4632038312221939, + "epoch": 0.6395649111824087, "grad_norm": 0.0, - "learning_rate": 1.166985927447374e-05, - "loss": 0.9739, + "learning_rate": 6.074311304666219e-06, + "loss": 1.0019, "step": 16346 }, { - "epoch": 0.4632321686644563, + "epoch": 0.639604037874638, "grad_norm": 0.0, - "learning_rate": 1.1668954364600154e-05, - "loss": 0.8332, + "learning_rate": 6.073145835080645e-06, + "loss": 0.9931, "step": 16347 }, { - "epoch": 0.4632605061067188, + "epoch": 0.6396431645668675, "grad_norm": 0.0, - "learning_rate": 1.1668049440668342e-05, - "loss": 0.898, + "learning_rate": 6.071980428555425e-06, + "loss": 1.1149, "step": 16348 }, { - "epoch": 0.46328884354898126, + "epoch": 0.6396822912590969, "grad_norm": 0.0, - "learning_rate": 1.1667144502685932e-05, - "loss": 0.9032, + "learning_rate": 6.070815085109278e-06, + "loss": 1.004, "step": 16349 }, { - "epoch": 0.46331718099124375, + "epoch": 0.6397214179513264, "grad_norm": 0.0, - "learning_rate": 1.1666239550660548e-05, - "loss": 0.9866, + "learning_rate": 6.069649804760919e-06, + "loss": 1.0825, "step": 16350 }, { - "epoch": 0.4633455184335062, + "epoch": 0.6397605446435558, "grad_norm": 0.0, - "learning_rate": 1.1665334584599805e-05, - "loss": 0.9358, + "learning_rate": 6.068484587529059e-06, + "loss": 0.9873, "step": 16351 }, { - "epoch": 0.4633738558757686, + "epoch": 0.6397996713357853, "grad_norm": 0.0, - "learning_rate": 1.1664429604511332e-05, - "loss": 0.7131, + "learning_rate": 6.06731943343241e-06, + "loss": 0.9815, "step": 16352 }, { - "epoch": 0.4634021933180311, + "epoch": 0.6398387980280147, "grad_norm": 0.0, - "learning_rate": 1.1663524610402752e-05, - "loss": 1.0197, + "learning_rate": 6.066154342489681e-06, + "loss": 0.9571, "step": 16353 }, { - "epoch": 0.46343053076029356, + "epoch": 0.6398779247202442, "grad_norm": 0.0, - "learning_rate": 1.1662619602281682e-05, - "loss": 0.7843, + "learning_rate": 6.0649893147195845e-06, + "loss": 0.9958, "step": 16354 }, { - "epoch": 0.46345886820255605, + "epoch": 0.6399170514124736, "grad_norm": 0.0, - "learning_rate": 1.1661714580155755e-05, - "loss": 1.0147, + "learning_rate": 6.06382435014083e-06, + "loss": 1.0685, "step": 16355 }, { - "epoch": 0.4634872056448185, + "epoch": 0.6399561781047031, "grad_norm": 0.0, - "learning_rate": 1.1660809544032584e-05, - "loss": 0.8258, + "learning_rate": 6.062659448772124e-06, + "loss": 0.9602, "step": 16356 }, { - "epoch": 0.463515543087081, + "epoch": 0.6399953047969325, "grad_norm": 0.0, - "learning_rate": 1.16599044939198e-05, - "loss": 1.0189, + "learning_rate": 6.061494610632171e-06, + "loss": 1.0688, "step": 16357 }, { - "epoch": 0.4635438805293434, + "epoch": 0.640034431489162, "grad_norm": 0.0, - "learning_rate": 1.1658999429825026e-05, - "loss": 0.9538, + "learning_rate": 6.060329835739682e-06, + "loss": 0.6856, "step": 16358 }, { - "epoch": 0.46357221797160586, + "epoch": 0.6400735581813913, "grad_norm": 0.0, - "learning_rate": 1.1658094351755883e-05, - "loss": 0.9462, + "learning_rate": 6.05916512411336e-06, + "loss": 1.0289, "step": 16359 }, { - "epoch": 0.46360055541386835, + "epoch": 0.6401126848736208, "grad_norm": 0.0, - "learning_rate": 1.1657189259719992e-05, - "loss": 0.801, + "learning_rate": 6.058000475771906e-06, + "loss": 1.0959, "step": 16360 }, { - "epoch": 0.4636288928561308, + "epoch": 0.6401518115658502, "grad_norm": 0.0, - "learning_rate": 1.1656284153724982e-05, - "loss": 1.0239, + "learning_rate": 6.056835890734025e-06, + "loss": 1.0509, "step": 16361 }, { - "epoch": 0.4636572302983933, + "epoch": 0.6401909382580797, "grad_norm": 0.0, - "learning_rate": 1.1655379033778477e-05, - "loss": 0.9379, + "learning_rate": 6.055671369018418e-06, + "loss": 1.0793, "step": 16362 }, { - "epoch": 0.4636855677406557, + "epoch": 0.6402300649503091, "grad_norm": 0.0, - "learning_rate": 1.16544738998881e-05, - "loss": 0.8694, + "learning_rate": 6.054506910643787e-06, + "loss": 0.9165, "step": 16363 }, { - "epoch": 0.46371390518291816, + "epoch": 0.6402691916425386, "grad_norm": 0.0, - "learning_rate": 1.1653568752061472e-05, - "loss": 1.0167, + "learning_rate": 6.0533425156288325e-06, + "loss": 0.9056, "step": 16364 }, { - "epoch": 0.46374224262518066, + "epoch": 0.640308318334768, "grad_norm": 0.0, - "learning_rate": 1.1652663590306226e-05, - "loss": 0.925, + "learning_rate": 6.05217818399225e-06, + "loss": 0.9701, "step": 16365 }, { - "epoch": 0.4637705800674431, + "epoch": 0.6403474450269974, "grad_norm": 0.0, - "learning_rate": 1.1651758414629977e-05, - "loss": 0.9031, + "learning_rate": 6.051013915752737e-06, + "loss": 1.0755, "step": 16366 }, { - "epoch": 0.4637989175097056, + "epoch": 0.6403865717192269, "grad_norm": 0.0, - "learning_rate": 1.1650853225040352e-05, - "loss": 0.9428, + "learning_rate": 6.049849710928995e-06, + "loss": 0.8748, "step": 16367 }, { - "epoch": 0.463827254951968, + "epoch": 0.6404256984114562, "grad_norm": 0.0, - "learning_rate": 1.1649948021544979e-05, - "loss": 0.9519, + "learning_rate": 6.048685569539717e-06, + "loss": 1.0241, "step": 16368 }, { - "epoch": 0.4638555923942305, + "epoch": 0.6404648251036857, "grad_norm": 0.0, - "learning_rate": 1.164904280415148e-05, - "loss": 0.9258, + "learning_rate": 6.0475214916035985e-06, + "loss": 0.9764, "step": 16369 }, { - "epoch": 0.46388392983649296, + "epoch": 0.6405039517959151, "grad_norm": 0.0, - "learning_rate": 1.1648137572867484e-05, - "loss": 0.8613, + "learning_rate": 6.046357477139331e-06, + "loss": 1.0464, "step": 16370 }, { - "epoch": 0.4639122672787554, + "epoch": 0.6405430784881446, "grad_norm": 0.0, - "learning_rate": 1.1647232327700613e-05, - "loss": 0.9639, + "learning_rate": 6.045193526165604e-06, + "loss": 0.9805, "step": 16371 }, { - "epoch": 0.4639406047210179, + "epoch": 0.640582205180374, "grad_norm": 0.0, - "learning_rate": 1.1646327068658491e-05, - "loss": 0.9171, + "learning_rate": 6.044029638701117e-06, + "loss": 0.9451, "step": 16372 }, { - "epoch": 0.46396894216328033, + "epoch": 0.6406213318726035, "grad_norm": 0.0, - "learning_rate": 1.1645421795748744e-05, - "loss": 0.8219, + "learning_rate": 6.042865814764558e-06, + "loss": 1.1023, "step": 16373 }, { - "epoch": 0.4639972796055428, + "epoch": 0.6406604585648329, "grad_norm": 0.0, - "learning_rate": 1.1644516508978998e-05, - "loss": 0.8959, + "learning_rate": 6.041702054374615e-06, + "loss": 1.0242, "step": 16374 }, { - "epoch": 0.46402561704780526, + "epoch": 0.6406995852570624, "grad_norm": 0.0, - "learning_rate": 1.164361120835688e-05, - "loss": 0.9621, + "learning_rate": 6.0405383575499735e-06, + "loss": 1.0295, "step": 16375 }, { - "epoch": 0.4640539544900677, + "epoch": 0.6407387119492918, "grad_norm": 0.0, - "learning_rate": 1.1642705893890014e-05, - "loss": 0.7941, + "learning_rate": 6.039374724309328e-06, + "loss": 0.9569, "step": 16376 }, { - "epoch": 0.4640822919323302, + "epoch": 0.6407778386415213, "grad_norm": 0.0, - "learning_rate": 1.1641800565586026e-05, - "loss": 0.8596, + "learning_rate": 6.0382111546713605e-06, + "loss": 1.0005, "step": 16377 }, { - "epoch": 0.46411062937459263, + "epoch": 0.6408169653337507, "grad_norm": 0.0, - "learning_rate": 1.1640895223452543e-05, - "loss": 0.9346, + "learning_rate": 6.037047648654759e-06, + "loss": 1.0244, "step": 16378 }, { - "epoch": 0.4641389668168551, + "epoch": 0.6408560920259802, "grad_norm": 0.0, - "learning_rate": 1.163998986749719e-05, - "loss": 0.9653, + "learning_rate": 6.035884206278206e-06, + "loss": 0.9105, "step": 16379 }, { - "epoch": 0.46416730425911756, + "epoch": 0.6408952187182095, "grad_norm": 0.0, - "learning_rate": 1.1639084497727593e-05, - "loss": 0.921, + "learning_rate": 6.034720827560381e-06, + "loss": 0.9719, "step": 16380 }, { - "epoch": 0.46419564170138006, + "epoch": 0.640934345410439, "grad_norm": 0.0, - "learning_rate": 1.1638179114151378e-05, - "loss": 0.9155, + "learning_rate": 6.033557512519975e-06, + "loss": 0.8893, "step": 16381 }, { - "epoch": 0.4642239791436425, + "epoch": 0.6409734721026684, "grad_norm": 0.0, - "learning_rate": 1.1637273716776172e-05, - "loss": 0.9258, + "learning_rate": 6.032394261175665e-06, + "loss": 0.8642, "step": 16382 }, { - "epoch": 0.46425231658590493, + "epoch": 0.6410125987948979, "grad_norm": 0.0, - "learning_rate": 1.1636368305609604e-05, - "loss": 0.8847, + "learning_rate": 6.031231073546131e-06, + "loss": 0.9669, "step": 16383 }, { - "epoch": 0.4642806540281674, + "epoch": 0.6410517254871273, "grad_norm": 0.0, - "learning_rate": 1.1635462880659296e-05, - "loss": 0.8547, + "learning_rate": 6.03006794965005e-06, + "loss": 1.1372, "step": 16384 }, { - "epoch": 0.46430899147042987, + "epoch": 0.6410908521793568, "grad_norm": 0.0, - "learning_rate": 1.1634557441932877e-05, - "loss": 0.8986, + "learning_rate": 6.028904889506107e-06, + "loss": 0.9453, "step": 16385 }, { - "epoch": 0.46433732891269236, + "epoch": 0.6411299788715862, "grad_norm": 0.0, - "learning_rate": 1.1633651989437976e-05, - "loss": 0.9625, + "learning_rate": 6.027741893132974e-06, + "loss": 1.0634, "step": 16386 }, { - "epoch": 0.4643656663549548, + "epoch": 0.6411691055638157, "grad_norm": 0.0, - "learning_rate": 1.1632746523182213e-05, - "loss": 0.9317, + "learning_rate": 6.02657896054933e-06, + "loss": 1.0417, "step": 16387 }, { - "epoch": 0.46439400379721724, + "epoch": 0.6412082322560451, "grad_norm": 0.0, - "learning_rate": 1.1631841043173222e-05, - "loss": 0.8608, + "learning_rate": 6.025416091773845e-06, + "loss": 1.0897, "step": 16388 }, { - "epoch": 0.46442234123947973, + "epoch": 0.6412473589482746, "grad_norm": 0.0, - "learning_rate": 1.1630935549418627e-05, - "loss": 0.9264, + "learning_rate": 6.0242532868252e-06, + "loss": 1.0469, "step": 16389 }, { - "epoch": 0.46445067868174217, + "epoch": 0.641286485640504, "grad_norm": 0.0, - "learning_rate": 1.163003004192606e-05, - "loss": 0.7909, + "learning_rate": 6.023090545722066e-06, + "loss": 0.985, "step": 16390 }, { - "epoch": 0.46447901612400466, + "epoch": 0.6413256123327334, "grad_norm": 0.0, - "learning_rate": 1.1629124520703141e-05, - "loss": 0.8927, + "learning_rate": 6.021927868483115e-06, + "loss": 0.987, "step": 16391 }, { - "epoch": 0.4645073535662671, + "epoch": 0.6413647390249628, "grad_norm": 0.0, - "learning_rate": 1.1628218985757504e-05, - "loss": 0.8256, + "learning_rate": 6.020765255127017e-06, + "loss": 1.0316, "step": 16392 }, { - "epoch": 0.4645356910085296, + "epoch": 0.6414038657171923, "grad_norm": 0.0, - "learning_rate": 1.162731343709677e-05, - "loss": 0.9439, + "learning_rate": 6.019602705672441e-06, + "loss": 1.1577, "step": 16393 }, { - "epoch": 0.46456402845079203, + "epoch": 0.6414429924094217, "grad_norm": 0.0, - "learning_rate": 1.1626407874728572e-05, - "loss": 0.9807, + "learning_rate": 6.0184402201380575e-06, + "loss": 0.9888, "step": 16394 }, { - "epoch": 0.46459236589305447, + "epoch": 0.6414821191016511, "grad_norm": 0.0, - "learning_rate": 1.1625502298660539e-05, - "loss": 0.9839, + "learning_rate": 6.017277798542536e-06, + "loss": 0.9632, "step": 16395 }, { - "epoch": 0.46462070333531696, + "epoch": 0.6415212457938806, "grad_norm": 0.0, - "learning_rate": 1.1624596708900296e-05, - "loss": 0.8483, + "learning_rate": 6.016115440904544e-06, + "loss": 1.1407, "step": 16396 }, { - "epoch": 0.4646490407775794, + "epoch": 0.64156037248611, "grad_norm": 0.0, - "learning_rate": 1.1623691105455469e-05, - "loss": 0.8993, + "learning_rate": 6.014953147242744e-06, + "loss": 1.0155, "step": 16397 }, { - "epoch": 0.4646773782198419, + "epoch": 0.6415994991783395, "grad_norm": 0.0, - "learning_rate": 1.162278548833369e-05, - "loss": 0.8864, + "learning_rate": 6.013790917575804e-06, + "loss": 0.9564, "step": 16398 }, { - "epoch": 0.46470571566210434, + "epoch": 0.6416386258705689, "grad_norm": 0.0, - "learning_rate": 1.1621879857542587e-05, - "loss": 0.9677, + "learning_rate": 6.012628751922385e-06, + "loss": 0.9928, "step": 16399 }, { - "epoch": 0.4647340531043668, + "epoch": 0.6416777525627984, "grad_norm": 0.0, - "learning_rate": 1.1620974213089786e-05, - "loss": 0.952, + "learning_rate": 6.011466650301154e-06, + "loss": 1.0542, "step": 16400 }, { - "epoch": 0.46476239054662927, + "epoch": 0.6417168792550277, "grad_norm": 0.0, - "learning_rate": 1.1620068554982919e-05, - "loss": 0.9364, + "learning_rate": 6.010304612730771e-06, + "loss": 0.9495, "step": 16401 }, { - "epoch": 0.4647907279888917, + "epoch": 0.6417560059472572, "grad_norm": 0.0, - "learning_rate": 1.1619162883229611e-05, - "loss": 0.9718, + "learning_rate": 6.009142639229893e-06, + "loss": 1.1446, "step": 16402 }, { - "epoch": 0.4648190654311542, + "epoch": 0.6417951326394866, "grad_norm": 0.0, - "learning_rate": 1.1618257197837495e-05, - "loss": 0.9026, + "learning_rate": 6.007980729817187e-06, + "loss": 0.9922, "step": 16403 }, { - "epoch": 0.46484740287341664, + "epoch": 0.6418342593317161, "grad_norm": 0.0, - "learning_rate": 1.1617351498814199e-05, - "loss": 0.8893, + "learning_rate": 6.006818884511307e-06, + "loss": 1.0087, "step": 16404 }, { - "epoch": 0.46487574031567913, + "epoch": 0.6418733860239455, "grad_norm": 0.0, - "learning_rate": 1.161644578616735e-05, - "loss": 0.9803, + "learning_rate": 6.005657103330911e-06, + "loss": 0.932, "step": 16405 }, { - "epoch": 0.46490407775794157, + "epoch": 0.641912512716175, "grad_norm": 0.0, - "learning_rate": 1.1615540059904572e-05, - "loss": 0.8864, + "learning_rate": 6.004495386294657e-06, + "loss": 1.0807, "step": 16406 }, { - "epoch": 0.464932415200204, + "epoch": 0.6419516394084044, "grad_norm": 0.0, - "learning_rate": 1.1614634320033505e-05, - "loss": 0.7975, + "learning_rate": 6.003333733421202e-06, + "loss": 1.0647, "step": 16407 }, { - "epoch": 0.4649607526424665, + "epoch": 0.6419907661006339, "grad_norm": 0.0, - "learning_rate": 1.1613728566561775e-05, - "loss": 0.9393, + "learning_rate": 6.002172144729199e-06, + "loss": 0.9278, "step": 16408 }, { - "epoch": 0.46498909008472894, + "epoch": 0.6420298927928633, "grad_norm": 0.0, - "learning_rate": 1.1612822799497008e-05, - "loss": 0.8015, + "learning_rate": 6.001010620237302e-06, + "loss": 0.9456, "step": 16409 }, { - "epoch": 0.46501742752699143, + "epoch": 0.6420690194850928, "grad_norm": 0.0, - "learning_rate": 1.1611917018846835e-05, - "loss": 0.8018, + "learning_rate": 5.999849159964164e-06, + "loss": 0.983, "step": 16410 }, { - "epoch": 0.46504576496925387, + "epoch": 0.6421081461773221, "grad_norm": 0.0, - "learning_rate": 1.1611011224618888e-05, - "loss": 0.9509, + "learning_rate": 5.99868776392843e-06, + "loss": 0.8174, "step": 16411 }, { - "epoch": 0.4650741024115163, + "epoch": 0.6421472728695516, "grad_norm": 0.0, - "learning_rate": 1.1610105416820796e-05, - "loss": 0.8767, + "learning_rate": 5.997526432148763e-06, + "loss": 1.0322, "step": 16412 }, { - "epoch": 0.4651024398537788, + "epoch": 0.642186399561781, "grad_norm": 0.0, - "learning_rate": 1.1609199595460185e-05, - "loss": 0.9534, + "learning_rate": 5.9963651646438045e-06, + "loss": 0.9782, "step": 16413 }, { - "epoch": 0.46513077729604124, + "epoch": 0.6422255262540105, "grad_norm": 0.0, - "learning_rate": 1.160829376054469e-05, - "loss": 0.9165, + "learning_rate": 5.995203961432205e-06, + "loss": 0.9724, "step": 16414 }, { - "epoch": 0.46515911473830374, + "epoch": 0.6422646529462399, "grad_norm": 0.0, - "learning_rate": 1.160738791208194e-05, - "loss": 0.9869, + "learning_rate": 5.994042822532608e-06, + "loss": 0.9381, "step": 16415 }, { - "epoch": 0.4651874521805662, + "epoch": 0.6423037796384694, "grad_norm": 0.0, - "learning_rate": 1.1606482050079563e-05, - "loss": 0.8881, + "learning_rate": 5.992881747963667e-06, + "loss": 0.9651, "step": 16416 }, { - "epoch": 0.46521578962282867, + "epoch": 0.6423429063306988, "grad_norm": 0.0, - "learning_rate": 1.1605576174545195e-05, - "loss": 0.8807, + "learning_rate": 5.991720737744024e-06, + "loss": 1.0706, "step": 16417 }, { - "epoch": 0.4652441270650911, + "epoch": 0.6423820330229283, "grad_norm": 0.0, - "learning_rate": 1.1604670285486457e-05, - "loss": 0.9662, + "learning_rate": 5.990559791892323e-06, + "loss": 0.9788, "step": 16418 }, { - "epoch": 0.46527246450735354, + "epoch": 0.6424211597151577, "grad_norm": 0.0, - "learning_rate": 1.1603764382910989e-05, - "loss": 0.8907, + "learning_rate": 5.989398910427209e-06, + "loss": 1.0131, "step": 16419 }, { - "epoch": 0.46530080194961604, + "epoch": 0.6424602864073872, "grad_norm": 0.0, - "learning_rate": 1.1602858466826417e-05, - "loss": 0.9044, + "learning_rate": 5.988238093367318e-06, + "loss": 0.993, "step": 16420 }, { - "epoch": 0.4653291393918785, + "epoch": 0.6424994130996166, "grad_norm": 0.0, - "learning_rate": 1.1601952537240373e-05, - "loss": 0.8896, + "learning_rate": 5.9870773407313e-06, + "loss": 0.9033, "step": 16421 }, { - "epoch": 0.46535747683414097, + "epoch": 0.642538539791846, "grad_norm": 0.0, - "learning_rate": 1.1601046594160487e-05, - "loss": 0.8685, + "learning_rate": 5.985916652537791e-06, + "loss": 0.9233, "step": 16422 }, { - "epoch": 0.4653858142764034, + "epoch": 0.6425776664840754, "grad_norm": 0.0, - "learning_rate": 1.1600140637594392e-05, - "loss": 0.8871, + "learning_rate": 5.984756028805432e-06, + "loss": 0.9426, "step": 16423 }, { - "epoch": 0.46541415171866585, + "epoch": 0.6426167931763048, "grad_norm": 0.0, - "learning_rate": 1.1599234667549722e-05, - "loss": 0.9533, + "learning_rate": 5.983595469552855e-06, + "loss": 0.8038, "step": 16424 }, { - "epoch": 0.46544248916092834, + "epoch": 0.6426559198685343, "grad_norm": 0.0, - "learning_rate": 1.15983286840341e-05, - "loss": 0.9324, + "learning_rate": 5.982434974798705e-06, + "loss": 0.964, "step": 16425 }, { - "epoch": 0.4654708266031908, + "epoch": 0.6426950465607637, "grad_norm": 0.0, - "learning_rate": 1.1597422687055161e-05, - "loss": 0.9401, + "learning_rate": 5.981274544561617e-06, + "loss": 1.1541, "step": 16426 }, { - "epoch": 0.4654991640454533, + "epoch": 0.6427341732529932, "grad_norm": 0.0, - "learning_rate": 1.1596516676620539e-05, - "loss": 0.866, + "learning_rate": 5.980114178860224e-06, + "loss": 0.8937, "step": 16427 }, { - "epoch": 0.4655275014877157, + "epoch": 0.6427732999452226, "grad_norm": 0.0, - "learning_rate": 1.1595610652737865e-05, - "loss": 0.8407, + "learning_rate": 5.978953877713159e-06, + "loss": 1.0223, "step": 16428 }, { - "epoch": 0.4655558389299782, + "epoch": 0.6428124266374521, "grad_norm": 0.0, - "learning_rate": 1.159470461541477e-05, - "loss": 0.8492, + "learning_rate": 5.977793641139051e-06, + "loss": 1.0811, "step": 16429 }, { - "epoch": 0.46558417637224064, + "epoch": 0.6428515533296815, "grad_norm": 0.0, - "learning_rate": 1.1593798564658887e-05, - "loss": 0.9148, + "learning_rate": 5.976633469156543e-06, + "loss": 0.9023, "step": 16430 }, { - "epoch": 0.4656125138145031, + "epoch": 0.642890680021911, "grad_norm": 0.0, - "learning_rate": 1.1592892500477843e-05, - "loss": 0.8969, + "learning_rate": 5.9754733617842585e-06, + "loss": 1.1102, "step": 16431 }, { - "epoch": 0.4656408512567656, + "epoch": 0.6429298067141404, "grad_norm": 0.0, - "learning_rate": 1.1591986422879276e-05, - "loss": 1.0537, + "learning_rate": 5.97431331904083e-06, + "loss": 0.9621, "step": 16432 }, { - "epoch": 0.465669188699028, + "epoch": 0.6429689334063698, "grad_norm": 0.0, - "learning_rate": 1.1591080331870816e-05, - "loss": 0.9045, + "learning_rate": 5.973153340944882e-06, + "loss": 1.0077, "step": 16433 }, { - "epoch": 0.4656975261412905, + "epoch": 0.6430080600985992, "grad_norm": 0.0, - "learning_rate": 1.1590174227460098e-05, - "loss": 1.0135, + "learning_rate": 5.971993427515047e-06, + "loss": 0.9922, "step": 16434 }, { - "epoch": 0.46572586358355295, + "epoch": 0.6430471867908287, "grad_norm": 0.0, - "learning_rate": 1.1589268109654748e-05, - "loss": 0.9251, + "learning_rate": 5.970833578769951e-06, + "loss": 1.0744, "step": 16435 }, { - "epoch": 0.4657542010258154, + "epoch": 0.6430863134830581, "grad_norm": 0.0, - "learning_rate": 1.1588361978462405e-05, - "loss": 0.8653, + "learning_rate": 5.969673794728219e-06, + "loss": 0.9746, "step": 16436 }, { - "epoch": 0.4657825384680779, + "epoch": 0.6431254401752876, "grad_norm": 0.0, - "learning_rate": 1.15874558338907e-05, - "loss": 0.8542, + "learning_rate": 5.968514075408476e-06, + "loss": 1.0675, "step": 16437 }, { - "epoch": 0.4658108759103403, + "epoch": 0.643164566867517, "grad_norm": 0.0, - "learning_rate": 1.1586549675947261e-05, - "loss": 0.913, + "learning_rate": 5.967354420829341e-06, + "loss": 0.9796, "step": 16438 }, { - "epoch": 0.4658392133526028, + "epoch": 0.6432036935597465, "grad_norm": 0.0, - "learning_rate": 1.1585643504639728e-05, - "loss": 0.8567, + "learning_rate": 5.9661948310094446e-06, + "loss": 0.9672, "step": 16439 }, { - "epoch": 0.46586755079486525, + "epoch": 0.6432428202519759, "grad_norm": 0.0, - "learning_rate": 1.158473731997573e-05, - "loss": 0.9141, + "learning_rate": 5.965035305967405e-06, + "loss": 0.9576, "step": 16440 }, { - "epoch": 0.46589588823712774, + "epoch": 0.6432819469442054, "grad_norm": 0.0, - "learning_rate": 1.1583831121962902e-05, - "loss": 0.9339, + "learning_rate": 5.9638758457218425e-06, + "loss": 1.0323, "step": 16441 }, { - "epoch": 0.4659242256793902, + "epoch": 0.6433210736364348, "grad_norm": 0.0, - "learning_rate": 1.1582924910608877e-05, - "loss": 0.9971, + "learning_rate": 5.962716450291372e-06, + "loss": 1.0099, "step": 16442 }, { - "epoch": 0.4659525631216526, + "epoch": 0.6433602003286643, "grad_norm": 0.0, - "learning_rate": 1.1582018685921287e-05, - "loss": 0.9284, + "learning_rate": 5.961557119694621e-06, + "loss": 1.0543, "step": 16443 }, { - "epoch": 0.4659809005639151, + "epoch": 0.6433993270208936, "grad_norm": 0.0, - "learning_rate": 1.1581112447907766e-05, - "loss": 0.9504, + "learning_rate": 5.960397853950199e-06, + "loss": 0.9954, "step": 16444 }, { - "epoch": 0.46600923800617755, + "epoch": 0.6434384537131231, "grad_norm": 0.0, - "learning_rate": 1.1580206196575945e-05, - "loss": 0.8419, + "learning_rate": 5.9592386530767285e-06, + "loss": 1.0575, "step": 16445 }, { - "epoch": 0.46603757544844004, + "epoch": 0.6434775804053525, "grad_norm": 0.0, - "learning_rate": 1.1579299931933465e-05, - "loss": 0.8319, + "learning_rate": 5.9580795170928195e-06, + "loss": 1.0145, "step": 16446 }, { - "epoch": 0.4660659128907025, + "epoch": 0.643516707097582, "grad_norm": 0.0, - "learning_rate": 1.1578393653987952e-05, - "loss": 1.0134, + "learning_rate": 5.95692044601709e-06, + "loss": 0.8987, "step": 16447 }, { - "epoch": 0.4660942503329649, + "epoch": 0.6435558337898114, "grad_norm": 0.0, - "learning_rate": 1.1577487362747043e-05, - "loss": 0.8409, + "learning_rate": 5.955761439868153e-06, + "loss": 1.1647, "step": 16448 }, { - "epoch": 0.4661225877752274, + "epoch": 0.6435949604820409, "grad_norm": 0.0, - "learning_rate": 1.1576581058218375e-05, - "loss": 0.9248, + "learning_rate": 5.9546024986646186e-06, + "loss": 1.1093, "step": 16449 }, { - "epoch": 0.46615092521748985, + "epoch": 0.6436340871742703, "grad_norm": 0.0, - "learning_rate": 1.1575674740409579e-05, - "loss": 0.8539, + "learning_rate": 5.953443622425097e-06, + "loss": 0.9493, "step": 16450 }, { - "epoch": 0.46617926265975235, + "epoch": 0.6436732138664997, "grad_norm": 0.0, - "learning_rate": 1.1574768409328288e-05, - "loss": 0.945, + "learning_rate": 5.952284811168201e-06, + "loss": 1.0493, "step": 16451 }, { - "epoch": 0.4662076001020148, + "epoch": 0.6437123405587292, "grad_norm": 0.0, - "learning_rate": 1.1573862064982135e-05, - "loss": 0.9357, + "learning_rate": 5.95112606491254e-06, + "loss": 1.0005, "step": 16452 }, { - "epoch": 0.4662359375442773, + "epoch": 0.6437514672509586, "grad_norm": 0.0, - "learning_rate": 1.157295570737876e-05, - "loss": 0.9081, + "learning_rate": 5.9499673836767215e-06, + "loss": 1.0298, "step": 16453 }, { - "epoch": 0.4662642749865397, + "epoch": 0.643790593943188, "grad_norm": 0.0, - "learning_rate": 1.15720493365258e-05, - "loss": 0.8888, + "learning_rate": 5.948808767479352e-06, + "loss": 1.0637, "step": 16454 }, { - "epoch": 0.46629261242880216, + "epoch": 0.6438297206354174, "grad_norm": 0.0, - "learning_rate": 1.1571142952430878e-05, - "loss": 0.8629, + "learning_rate": 5.947650216339031e-06, + "loss": 0.9817, "step": 16455 }, { - "epoch": 0.46632094987106465, + "epoch": 0.6438688473276469, "grad_norm": 0.0, - "learning_rate": 1.1570236555101638e-05, - "loss": 0.8727, + "learning_rate": 5.946491730274376e-06, + "loss": 1.0445, "step": 16456 }, { - "epoch": 0.4663492873133271, + "epoch": 0.6439079740198763, "grad_norm": 0.0, - "learning_rate": 1.1569330144545712e-05, - "loss": 0.9285, + "learning_rate": 5.945333309303984e-06, + "loss": 0.9435, "step": 16457 }, { - "epoch": 0.4663776247555896, + "epoch": 0.6439471007121058, "grad_norm": 0.0, - "learning_rate": 1.1568423720770734e-05, - "loss": 0.863, + "learning_rate": 5.944174953446457e-06, + "loss": 1.0427, "step": 16458 }, { - "epoch": 0.466405962197852, + "epoch": 0.6439862274043352, "grad_norm": 0.0, - "learning_rate": 1.1567517283784344e-05, - "loss": 0.9023, + "learning_rate": 5.9430166627204e-06, + "loss": 1.0209, "step": 16459 }, { - "epoch": 0.46643429964011446, + "epoch": 0.6440253540965647, "grad_norm": 0.0, - "learning_rate": 1.156661083359417e-05, - "loss": 0.8894, + "learning_rate": 5.941858437144408e-06, + "loss": 1.2066, "step": 16460 }, { - "epoch": 0.46646263708237695, + "epoch": 0.6440644807887941, "grad_norm": 0.0, - "learning_rate": 1.1565704370207853e-05, - "loss": 0.8436, + "learning_rate": 5.940700276737087e-06, + "loss": 1.0336, "step": 16461 }, { - "epoch": 0.4664909745246394, + "epoch": 0.6441036074810236, "grad_norm": 0.0, - "learning_rate": 1.1564797893633029e-05, - "loss": 0.9509, + "learning_rate": 5.939542181517033e-06, + "loss": 0.9209, "step": 16462 }, { - "epoch": 0.4665193119669019, + "epoch": 0.644142734173253, "grad_norm": 0.0, - "learning_rate": 1.156389140387733e-05, - "loss": 0.7923, + "learning_rate": 5.938384151502844e-06, + "loss": 0.7725, "step": 16463 }, { - "epoch": 0.4665476494091643, + "epoch": 0.6441818608654825, "grad_norm": 0.0, - "learning_rate": 1.156298490094839e-05, - "loss": 0.8419, + "learning_rate": 5.937226186713112e-06, + "loss": 0.9896, "step": 16464 }, { - "epoch": 0.4665759868514268, + "epoch": 0.6442209875577118, "grad_norm": 0.0, - "learning_rate": 1.156207838485385e-05, - "loss": 0.8318, + "learning_rate": 5.93606828716644e-06, + "loss": 1.1304, "step": 16465 }, { - "epoch": 0.46660432429368925, + "epoch": 0.6442601142499413, "grad_norm": 0.0, - "learning_rate": 1.1561171855601344e-05, - "loss": 0.9776, + "learning_rate": 5.934910452881419e-06, + "loss": 1.0737, "step": 16466 }, { - "epoch": 0.4666326617359517, + "epoch": 0.6442992409421707, "grad_norm": 0.0, - "learning_rate": 1.1560265313198507e-05, - "loss": 0.8494, + "learning_rate": 5.933752683876642e-06, + "loss": 0.8894, "step": 16467 }, { - "epoch": 0.4666609991782142, + "epoch": 0.6443383676344002, "grad_norm": 0.0, - "learning_rate": 1.1559358757652973e-05, - "loss": 0.8714, + "learning_rate": 5.932594980170703e-06, + "loss": 1.0076, "step": 16468 }, { - "epoch": 0.4666893366204766, + "epoch": 0.6443774943266296, "grad_norm": 0.0, - "learning_rate": 1.1558452188972386e-05, - "loss": 0.9316, + "learning_rate": 5.931437341782187e-06, + "loss": 0.8882, "step": 16469 }, { - "epoch": 0.4667176740627391, + "epoch": 0.6444166210188591, "grad_norm": 0.0, - "learning_rate": 1.1557545607164378e-05, - "loss": 0.9064, + "learning_rate": 5.930279768729692e-06, + "loss": 1.0278, "step": 16470 }, { - "epoch": 0.46674601150500156, + "epoch": 0.6444557477110885, "grad_norm": 0.0, - "learning_rate": 1.155663901223658e-05, - "loss": 0.8824, + "learning_rate": 5.929122261031806e-06, + "loss": 1.0162, "step": 16471 }, { - "epoch": 0.466774348947264, + "epoch": 0.644494874403318, "grad_norm": 0.0, - "learning_rate": 1.1555732404196636e-05, - "loss": 0.891, + "learning_rate": 5.927964818707115e-06, + "loss": 0.9332, "step": 16472 }, { - "epoch": 0.4668026863895265, + "epoch": 0.6445340010955474, "grad_norm": 0.0, - "learning_rate": 1.1554825783052181e-05, - "loss": 0.8941, + "learning_rate": 5.926807441774202e-06, + "loss": 1.0438, "step": 16473 }, { - "epoch": 0.4668310238317889, + "epoch": 0.6445731277877769, "grad_norm": 0.0, - "learning_rate": 1.1553919148810853e-05, - "loss": 0.8615, + "learning_rate": 5.925650130251661e-06, + "loss": 0.9486, "step": 16474 }, { - "epoch": 0.4668593612740514, + "epoch": 0.6446122544800063, "grad_norm": 0.0, - "learning_rate": 1.1553012501480285e-05, - "loss": 0.8406, + "learning_rate": 5.924492884158075e-06, + "loss": 0.9182, "step": 16475 }, { - "epoch": 0.46688769871631386, + "epoch": 0.6446513811722357, "grad_norm": 0.0, - "learning_rate": 1.1552105841068114e-05, - "loss": 0.8397, + "learning_rate": 5.923335703512026e-06, + "loss": 0.9587, "step": 16476 }, { - "epoch": 0.46691603615857635, + "epoch": 0.6446905078644651, "grad_norm": 0.0, - "learning_rate": 1.1551199167581982e-05, - "loss": 0.9052, + "learning_rate": 5.922178588332096e-06, + "loss": 1.012, "step": 16477 }, { - "epoch": 0.4669443736008388, + "epoch": 0.6447296345566946, "grad_norm": 0.0, - "learning_rate": 1.155029248102952e-05, - "loss": 1.0063, + "learning_rate": 5.921021538636864e-06, + "loss": 1.0789, "step": 16478 }, { - "epoch": 0.46697271104310123, + "epoch": 0.644768761248924, "grad_norm": 0.0, - "learning_rate": 1.1549385781418372e-05, - "loss": 0.901, + "learning_rate": 5.919864554444921e-06, + "loss": 0.9925, "step": 16479 }, { - "epoch": 0.4670010484853637, + "epoch": 0.6448078879411534, "grad_norm": 0.0, - "learning_rate": 1.1548479068756169e-05, - "loss": 0.8325, + "learning_rate": 5.918707635774838e-06, + "loss": 1.0032, "step": 16480 }, { - "epoch": 0.46702938592762616, + "epoch": 0.6448470146333829, "grad_norm": 0.0, - "learning_rate": 1.1547572343050556e-05, - "loss": 0.9574, + "learning_rate": 5.917550782645198e-06, + "loss": 1.011, "step": 16481 }, { - "epoch": 0.46705772336988866, + "epoch": 0.6448861413256123, "grad_norm": 0.0, - "learning_rate": 1.1546665604309165e-05, - "loss": 0.9, + "learning_rate": 5.916393995074574e-06, + "loss": 1.08, "step": 16482 }, { - "epoch": 0.4670860608121511, + "epoch": 0.6449252680178418, "grad_norm": 0.0, - "learning_rate": 1.1545758852539634e-05, - "loss": 0.8215, + "learning_rate": 5.915237273081547e-06, + "loss": 1.0334, "step": 16483 }, { - "epoch": 0.46711439825441353, + "epoch": 0.6449643947100712, "grad_norm": 0.0, - "learning_rate": 1.1544852087749604e-05, - "loss": 0.928, + "learning_rate": 5.914080616684691e-06, + "loss": 1.0352, "step": 16484 }, { - "epoch": 0.467142735696676, + "epoch": 0.6450035214023007, "grad_norm": 0.0, - "learning_rate": 1.1543945309946707e-05, - "loss": 0.8909, + "learning_rate": 5.912924025902581e-06, + "loss": 0.8888, "step": 16485 }, { - "epoch": 0.46717107313893846, + "epoch": 0.64504264809453, "grad_norm": 0.0, - "learning_rate": 1.1543038519138589e-05, - "loss": 0.9444, + "learning_rate": 5.911767500753789e-06, + "loss": 1.0896, "step": 16486 }, { - "epoch": 0.46719941058120096, + "epoch": 0.6450817747867595, "grad_norm": 0.0, - "learning_rate": 1.1542131715332886e-05, - "loss": 0.8653, + "learning_rate": 5.910611041256888e-06, + "loss": 1.0234, "step": 16487 }, { - "epoch": 0.4672277480234634, + "epoch": 0.6451209014789889, "grad_norm": 0.0, - "learning_rate": 1.154122489853723e-05, - "loss": 0.8593, + "learning_rate": 5.9094546474304505e-06, + "loss": 1.0506, "step": 16488 }, { - "epoch": 0.4672560854657259, + "epoch": 0.6451600281712184, "grad_norm": 0.0, - "learning_rate": 1.1540318068759268e-05, - "loss": 0.8497, + "learning_rate": 5.908298319293042e-06, + "loss": 0.8873, "step": 16489 }, { - "epoch": 0.46728442290798833, + "epoch": 0.6451991548634478, "grad_norm": 0.0, - "learning_rate": 1.1539411226006636e-05, - "loss": 0.8527, + "learning_rate": 5.90714205686324e-06, + "loss": 1.105, "step": 16490 }, { - "epoch": 0.46731276035025077, + "epoch": 0.6452382815556773, "grad_norm": 0.0, - "learning_rate": 1.1538504370286967e-05, - "loss": 0.8956, + "learning_rate": 5.9059858601596045e-06, + "loss": 0.9482, "step": 16491 }, { - "epoch": 0.46734109779251326, + "epoch": 0.6452774082479067, "grad_norm": 0.0, - "learning_rate": 1.1537597501607908e-05, - "loss": 0.8839, + "learning_rate": 5.904829729200707e-06, + "loss": 0.8935, "step": 16492 }, { - "epoch": 0.4673694352347757, + "epoch": 0.6453165349401362, "grad_norm": 0.0, - "learning_rate": 1.153669061997709e-05, - "loss": 0.9237, + "learning_rate": 5.903673664005114e-06, + "loss": 0.8631, "step": 16493 }, { - "epoch": 0.4673977726770382, + "epoch": 0.6453556616323656, "grad_norm": 0.0, - "learning_rate": 1.1535783725402163e-05, - "loss": 0.8785, + "learning_rate": 5.902517664591389e-06, + "loss": 1.0424, "step": 16494 }, { - "epoch": 0.46742611011930063, + "epoch": 0.6453947883245951, "grad_norm": 0.0, - "learning_rate": 1.1534876817890756e-05, - "loss": 0.9309, + "learning_rate": 5.9013617309780925e-06, + "loss": 1.1584, "step": 16495 }, { - "epoch": 0.46745444756156307, + "epoch": 0.6454339150168245, "grad_norm": 0.0, - "learning_rate": 1.1533969897450512e-05, - "loss": 0.9159, + "learning_rate": 5.900205863183792e-06, + "loss": 0.965, "step": 16496 }, { - "epoch": 0.46748278500382556, + "epoch": 0.645473041709054, "grad_norm": 0.0, - "learning_rate": 1.1533062964089068e-05, - "loss": 0.918, + "learning_rate": 5.89905006122705e-06, + "loss": 0.9332, "step": 16497 }, { - "epoch": 0.467511122446088, + "epoch": 0.6455121684012833, "grad_norm": 0.0, - "learning_rate": 1.1532156017814068e-05, - "loss": 0.9536, + "learning_rate": 5.897894325126424e-06, + "loss": 0.9998, "step": 16498 }, { - "epoch": 0.4675394598883505, + "epoch": 0.6455512950935128, "grad_norm": 0.0, - "learning_rate": 1.1531249058633147e-05, - "loss": 0.7664, + "learning_rate": 5.896738654900476e-06, + "loss": 0.954, "step": 16499 }, { - "epoch": 0.46756779733061293, + "epoch": 0.6455904217857422, "grad_norm": 0.0, - "learning_rate": 1.1530342086553947e-05, - "loss": 0.8873, + "learning_rate": 5.8955830505677595e-06, + "loss": 1.0778, "step": 16500 }, { - "epoch": 0.4675961347728754, + "epoch": 0.6456295484779717, "grad_norm": 0.0, - "learning_rate": 1.1529435101584108e-05, - "loss": 0.9182, + "learning_rate": 5.89442751214684e-06, + "loss": 0.8014, "step": 16501 }, { - "epoch": 0.46762447221513787, + "epoch": 0.6456686751702011, "grad_norm": 0.0, - "learning_rate": 1.152852810373127e-05, - "loss": 0.7964, + "learning_rate": 5.89327203965627e-06, + "loss": 1.0487, "step": 16502 }, { - "epoch": 0.4676528096574003, + "epoch": 0.6457078018624306, "grad_norm": 0.0, - "learning_rate": 1.1527621093003071e-05, - "loss": 0.8431, + "learning_rate": 5.892116633114605e-06, + "loss": 1.0351, "step": 16503 }, { - "epoch": 0.4676811470996628, + "epoch": 0.64574692855466, "grad_norm": 0.0, - "learning_rate": 1.152671406940715e-05, - "loss": 0.8106, + "learning_rate": 5.890961292540396e-06, + "loss": 1.0569, "step": 16504 }, { - "epoch": 0.46770948454192524, + "epoch": 0.6457860552468895, "grad_norm": 0.0, - "learning_rate": 1.152580703295115e-05, - "loss": 0.9075, + "learning_rate": 5.889806017952202e-06, + "loss": 0.8649, "step": 16505 }, { - "epoch": 0.46773782198418773, + "epoch": 0.6458251819391189, "grad_norm": 0.0, - "learning_rate": 1.1524899983642715e-05, - "loss": 1.0591, + "learning_rate": 5.888650809368574e-06, + "loss": 1.0071, "step": 16506 }, { - "epoch": 0.46776615942645017, + "epoch": 0.6458643086313484, "grad_norm": 0.0, - "learning_rate": 1.152399292148948e-05, - "loss": 0.978, + "learning_rate": 5.887495666808063e-06, + "loss": 1.0562, "step": 16507 }, { - "epoch": 0.4677944968687126, + "epoch": 0.6459034353235777, "grad_norm": 0.0, - "learning_rate": 1.1523085846499085e-05, - "loss": 0.9245, + "learning_rate": 5.886340590289218e-06, + "loss": 0.9454, "step": 16508 }, { - "epoch": 0.4678228343109751, + "epoch": 0.6459425620158071, "grad_norm": 0.0, - "learning_rate": 1.1522178758679172e-05, - "loss": 0.9284, + "learning_rate": 5.885185579830584e-06, + "loss": 1.0156, "step": 16509 }, { - "epoch": 0.46785117175323754, + "epoch": 0.6459816887080366, "grad_norm": 0.0, - "learning_rate": 1.1521271658037383e-05, - "loss": 0.8252, + "learning_rate": 5.884030635450717e-06, + "loss": 0.9623, "step": 16510 }, { - "epoch": 0.46787950919550003, + "epoch": 0.646020815400266, "grad_norm": 0.0, - "learning_rate": 1.1520364544581357e-05, - "loss": 0.8414, + "learning_rate": 5.882875757168161e-06, + "loss": 0.9402, "step": 16511 }, { - "epoch": 0.46790784663776247, + "epoch": 0.6460599420924955, "grad_norm": 0.0, - "learning_rate": 1.1519457418318738e-05, - "loss": 0.8581, + "learning_rate": 5.881720945001461e-06, + "loss": 1.0029, "step": 16512 }, { - "epoch": 0.46793618408002496, + "epoch": 0.6460990687847249, "grad_norm": 0.0, - "learning_rate": 1.1518550279257164e-05, - "loss": 0.8508, + "learning_rate": 5.880566198969159e-06, + "loss": 0.901, "step": 16513 }, { - "epoch": 0.4679645215222874, + "epoch": 0.6461381954769544, "grad_norm": 0.0, - "learning_rate": 1.1517643127404274e-05, - "loss": 0.9369, + "learning_rate": 5.879411519089805e-06, + "loss": 0.9996, "step": 16514 }, { - "epoch": 0.46799285896454984, + "epoch": 0.6461773221691838, "grad_norm": 0.0, - "learning_rate": 1.1516735962767716e-05, - "loss": 0.9283, + "learning_rate": 5.878256905381938e-06, + "loss": 1.0534, "step": 16515 }, { - "epoch": 0.46802119640681233, + "epoch": 0.6462164488614133, "grad_norm": 0.0, - "learning_rate": 1.1515828785355128e-05, - "loss": 0.8762, + "learning_rate": 5.8771023578641015e-06, + "loss": 0.9221, "step": 16516 }, { - "epoch": 0.4680495338490748, + "epoch": 0.6462555755536427, "grad_norm": 0.0, - "learning_rate": 1.151492159517415e-05, - "loss": 0.9257, + "learning_rate": 5.875947876554835e-06, + "loss": 1.1082, "step": 16517 }, { - "epoch": 0.46807787129133727, + "epoch": 0.6462947022458722, "grad_norm": 0.0, - "learning_rate": 1.1514014392232422e-05, - "loss": 0.8985, + "learning_rate": 5.874793461472673e-06, + "loss": 1.0726, "step": 16518 }, { - "epoch": 0.4681062087335997, + "epoch": 0.6463338289381015, "grad_norm": 0.0, - "learning_rate": 1.1513107176537593e-05, - "loss": 1.0114, + "learning_rate": 5.873639112636164e-06, + "loss": 0.9022, "step": 16519 }, { - "epoch": 0.46813454617586214, + "epoch": 0.646372955630331, "grad_norm": 0.0, - "learning_rate": 1.15121999480973e-05, - "loss": 0.9555, + "learning_rate": 5.872484830063838e-06, + "loss": 1.0026, "step": 16520 }, { - "epoch": 0.46816288361812464, + "epoch": 0.6464120823225604, "grad_norm": 0.0, - "learning_rate": 1.1511292706919184e-05, - "loss": 0.9048, + "learning_rate": 5.8713306137742355e-06, + "loss": 1.0073, "step": 16521 }, { - "epoch": 0.4681912210603871, + "epoch": 0.6464512090147899, "grad_norm": 0.0, - "learning_rate": 1.1510385453010886e-05, - "loss": 0.8688, + "learning_rate": 5.870176463785886e-06, + "loss": 0.9791, "step": 16522 }, { - "epoch": 0.46821955850264957, + "epoch": 0.6464903357070193, "grad_norm": 0.0, - "learning_rate": 1.1509478186380054e-05, - "loss": 0.8499, + "learning_rate": 5.8690223801173305e-06, + "loss": 1.0732, "step": 16523 }, { - "epoch": 0.468247895944912, + "epoch": 0.6465294623992488, "grad_norm": 0.0, - "learning_rate": 1.1508570907034325e-05, - "loss": 0.8765, + "learning_rate": 5.867868362787099e-06, + "loss": 1.0745, "step": 16524 }, { - "epoch": 0.4682762333871745, + "epoch": 0.6465685890914782, "grad_norm": 0.0, - "learning_rate": 1.1507663614981343e-05, - "loss": 0.9143, + "learning_rate": 5.866714411813725e-06, + "loss": 0.9632, "step": 16525 }, { - "epoch": 0.46830457082943694, + "epoch": 0.6466077157837077, "grad_norm": 0.0, - "learning_rate": 1.150675631022875e-05, - "loss": 0.8753, + "learning_rate": 5.865560527215737e-06, + "loss": 1.0612, "step": 16526 }, { - "epoch": 0.4683329082716994, + "epoch": 0.6466468424759371, "grad_norm": 0.0, - "learning_rate": 1.1505848992784192e-05, - "loss": 0.9589, + "learning_rate": 5.864406709011665e-06, + "loss": 1.0698, "step": 16527 }, { - "epoch": 0.46836124571396187, + "epoch": 0.6466859691681666, "grad_norm": 0.0, - "learning_rate": 1.1504941662655309e-05, - "loss": 0.9031, + "learning_rate": 5.863252957220038e-06, + "loss": 1.0794, "step": 16528 }, { - "epoch": 0.4683895831562243, + "epoch": 0.646725095860396, "grad_norm": 0.0, - "learning_rate": 1.1504034319849741e-05, - "loss": 0.8396, + "learning_rate": 5.862099271859388e-06, + "loss": 0.9004, "step": 16529 }, { - "epoch": 0.4684179205984868, + "epoch": 0.6467642225526254, "grad_norm": 0.0, - "learning_rate": 1.1503126964375133e-05, - "loss": 0.9385, + "learning_rate": 5.860945652948239e-06, + "loss": 1.1013, "step": 16530 }, { - "epoch": 0.46844625804074924, + "epoch": 0.6468033492448548, "grad_norm": 0.0, - "learning_rate": 1.1502219596239128e-05, - "loss": 0.9869, + "learning_rate": 5.859792100505113e-06, + "loss": 1.0376, "step": 16531 }, { - "epoch": 0.4684745954830117, + "epoch": 0.6468424759370843, "grad_norm": 0.0, - "learning_rate": 1.1501312215449372e-05, - "loss": 0.8837, + "learning_rate": 5.858638614548541e-06, + "loss": 1.0644, "step": 16532 }, { - "epoch": 0.4685029329252742, + "epoch": 0.6468816026293137, "grad_norm": 0.0, - "learning_rate": 1.1500404822013505e-05, - "loss": 1.0148, + "learning_rate": 5.857485195097041e-06, + "loss": 0.9814, "step": 16533 }, { - "epoch": 0.4685312703675366, + "epoch": 0.6469207293215432, "grad_norm": 0.0, - "learning_rate": 1.149949741593917e-05, - "loss": 0.955, + "learning_rate": 5.856331842169139e-06, + "loss": 0.9962, "step": 16534 }, { - "epoch": 0.4685596078097991, + "epoch": 0.6469598560137726, "grad_norm": 0.0, - "learning_rate": 1.149858999723401e-05, - "loss": 0.8503, + "learning_rate": 5.855178555783356e-06, + "loss": 0.9116, "step": 16535 }, { - "epoch": 0.46858794525206154, + "epoch": 0.646998982706002, "grad_norm": 0.0, - "learning_rate": 1.1497682565905674e-05, - "loss": 0.9692, + "learning_rate": 5.85402533595821e-06, + "loss": 1.0168, "step": 16536 }, { - "epoch": 0.46861628269432404, + "epoch": 0.6470381093982315, "grad_norm": 0.0, - "learning_rate": 1.1496775121961799e-05, - "loss": 0.8685, + "learning_rate": 5.852872182712224e-06, + "loss": 0.9243, "step": 16537 }, { - "epoch": 0.4686446201365865, + "epoch": 0.6470772360904609, "grad_norm": 0.0, - "learning_rate": 1.149586766541003e-05, - "loss": 0.8971, + "learning_rate": 5.851719096063914e-06, + "loss": 0.9452, "step": 16538 }, { - "epoch": 0.4686729575788489, + "epoch": 0.6471163627826904, "grad_norm": 0.0, - "learning_rate": 1.1494960196258016e-05, - "loss": 0.8644, + "learning_rate": 5.850566076031794e-06, + "loss": 1.0829, "step": 16539 }, { - "epoch": 0.4687012950211114, + "epoch": 0.6471554894749197, "grad_norm": 0.0, - "learning_rate": 1.1494052714513395e-05, - "loss": 0.8857, + "learning_rate": 5.849413122634384e-06, + "loss": 0.9836, "step": 16540 }, { - "epoch": 0.46872963246337385, + "epoch": 0.6471946161671492, "grad_norm": 0.0, - "learning_rate": 1.1493145220183814e-05, - "loss": 0.8413, + "learning_rate": 5.848260235890202e-06, + "loss": 0.9472, "step": 16541 }, { - "epoch": 0.46875796990563634, + "epoch": 0.6472337428593786, "grad_norm": 0.0, - "learning_rate": 1.1492237713276915e-05, - "loss": 0.866, + "learning_rate": 5.847107415817757e-06, + "loss": 0.953, "step": 16542 }, { - "epoch": 0.4687863073478988, + "epoch": 0.6472728695516081, "grad_norm": 0.0, - "learning_rate": 1.1491330193800345e-05, - "loss": 0.9505, + "learning_rate": 5.845954662435563e-06, + "loss": 1.0005, "step": 16543 }, { - "epoch": 0.4688146447901612, + "epoch": 0.6473119962438375, "grad_norm": 0.0, - "learning_rate": 1.1490422661761744e-05, - "loss": 0.9675, + "learning_rate": 5.844801975762131e-06, + "loss": 1.0279, "step": 16544 }, { - "epoch": 0.4688429822324237, + "epoch": 0.647351122936067, "grad_norm": 0.0, - "learning_rate": 1.1489515117168763e-05, - "loss": 0.9015, + "learning_rate": 5.843649355815968e-06, + "loss": 0.9425, "step": 16545 }, { - "epoch": 0.46887131967468615, + "epoch": 0.6473902496282964, "grad_norm": 0.0, - "learning_rate": 1.1488607560029043e-05, - "loss": 0.9888, + "learning_rate": 5.8424968026155935e-06, + "loss": 1.0876, "step": 16546 }, { - "epoch": 0.46889965711694864, + "epoch": 0.6474293763205259, "grad_norm": 0.0, - "learning_rate": 1.1487699990350228e-05, - "loss": 0.9417, + "learning_rate": 5.841344316179511e-06, + "loss": 0.9945, "step": 16547 }, { - "epoch": 0.4689279945592111, + "epoch": 0.6474685030127553, "grad_norm": 0.0, - "learning_rate": 1.1486792408139962e-05, - "loss": 0.9686, + "learning_rate": 5.840191896526226e-06, + "loss": 0.9627, "step": 16548 }, { - "epoch": 0.4689563320014735, + "epoch": 0.6475076297049848, "grad_norm": 0.0, - "learning_rate": 1.1485884813405893e-05, - "loss": 0.8448, + "learning_rate": 5.839039543674245e-06, + "loss": 0.9417, "step": 16549 }, { - "epoch": 0.468984669443736, + "epoch": 0.6475467563972142, "grad_norm": 0.0, - "learning_rate": 1.1484977206155662e-05, - "loss": 0.9247, + "learning_rate": 5.837887257642075e-06, + "loss": 0.9534, "step": 16550 }, { - "epoch": 0.46901300688599845, + "epoch": 0.6475858830894436, "grad_norm": 0.0, - "learning_rate": 1.1484069586396919e-05, - "loss": 0.9569, + "learning_rate": 5.836735038448225e-06, + "loss": 0.9843, "step": 16551 }, { - "epoch": 0.46904134432826095, + "epoch": 0.647625009781673, "grad_norm": 0.0, - "learning_rate": 1.1483161954137308e-05, - "loss": 0.8893, + "learning_rate": 5.83558288611119e-06, + "loss": 1.0379, "step": 16552 }, { - "epoch": 0.4690696817705234, + "epoch": 0.6476641364739025, "grad_norm": 0.0, - "learning_rate": 1.148225430938447e-05, - "loss": 0.94, + "learning_rate": 5.834430800649477e-06, + "loss": 1.0869, "step": 16553 }, { - "epoch": 0.4690980192127859, + "epoch": 0.6477032631661319, "grad_norm": 0.0, - "learning_rate": 1.1481346652146057e-05, - "loss": 0.9102, + "learning_rate": 5.833278782081585e-06, + "loss": 0.9055, "step": 16554 }, { - "epoch": 0.4691263566550483, + "epoch": 0.6477423898583614, "grad_norm": 0.0, - "learning_rate": 1.1480438982429707e-05, - "loss": 0.8906, + "learning_rate": 5.832126830426012e-06, + "loss": 0.9981, "step": 16555 }, { - "epoch": 0.46915469409731075, + "epoch": 0.6477815165505908, "grad_norm": 0.0, - "learning_rate": 1.1479531300243072e-05, - "loss": 0.8867, + "learning_rate": 5.830974945701262e-06, + "loss": 1.046, "step": 16556 }, { - "epoch": 0.46918303153957325, + "epoch": 0.6478206432428203, "grad_norm": 0.0, - "learning_rate": 1.1478623605593795e-05, - "loss": 0.9583, + "learning_rate": 5.829823127925832e-06, + "loss": 0.9351, "step": 16557 }, { - "epoch": 0.4692113689818357, + "epoch": 0.6478597699350497, "grad_norm": 0.0, - "learning_rate": 1.1477715898489522e-05, - "loss": 0.8214, + "learning_rate": 5.828671377118213e-06, + "loss": 1.0073, "step": 16558 }, { - "epoch": 0.4692397064240982, + "epoch": 0.6478988966272792, "grad_norm": 0.0, - "learning_rate": 1.1476808178937899e-05, - "loss": 0.9832, + "learning_rate": 5.827519693296906e-06, + "loss": 0.9592, "step": 16559 }, { - "epoch": 0.4692680438663606, + "epoch": 0.6479380233195086, "grad_norm": 0.0, - "learning_rate": 1.1475900446946575e-05, - "loss": 0.9703, + "learning_rate": 5.826368076480409e-06, + "loss": 1.0582, "step": 16560 }, { - "epoch": 0.46929638130862306, + "epoch": 0.6479771500117381, "grad_norm": 0.0, - "learning_rate": 1.1474992702523191e-05, - "loss": 0.9962, + "learning_rate": 5.825216526687205e-06, + "loss": 1.0951, "step": 16561 }, { - "epoch": 0.46932471875088555, + "epoch": 0.6480162767039674, "grad_norm": 0.0, - "learning_rate": 1.1474084945675396e-05, - "loss": 0.8878, + "learning_rate": 5.824065043935794e-06, + "loss": 1.0748, "step": 16562 }, { - "epoch": 0.469353056193148, + "epoch": 0.6480554033961969, "grad_norm": 0.0, - "learning_rate": 1.1473177176410835e-05, - "loss": 0.8756, + "learning_rate": 5.822913628244664e-06, + "loss": 0.8537, "step": 16563 }, { - "epoch": 0.4693813936354105, + "epoch": 0.6480945300884263, "grad_norm": 0.0, - "learning_rate": 1.147226939473716e-05, - "loss": 0.9269, + "learning_rate": 5.821762279632312e-06, + "loss": 0.9517, "step": 16564 }, { - "epoch": 0.4694097310776729, + "epoch": 0.6481336567806557, "grad_norm": 0.0, - "learning_rate": 1.147136160066201e-05, - "loss": 0.8438, + "learning_rate": 5.820610998117218e-06, + "loss": 1.0408, "step": 16565 }, { - "epoch": 0.4694380685199354, + "epoch": 0.6481727834728852, "grad_norm": 0.0, - "learning_rate": 1.1470453794193034e-05, - "loss": 0.845, + "learning_rate": 5.819459783717878e-06, + "loss": 0.9558, "step": 16566 }, { - "epoch": 0.46946640596219785, + "epoch": 0.6482119101651146, "grad_norm": 0.0, - "learning_rate": 1.1469545975337884e-05, - "loss": 0.8371, + "learning_rate": 5.818308636452767e-06, + "loss": 0.9091, "step": 16567 }, { - "epoch": 0.4694947434044603, + "epoch": 0.6482510368573441, "grad_norm": 0.0, - "learning_rate": 1.1468638144104195e-05, - "loss": 0.8283, + "learning_rate": 5.817157556340387e-06, + "loss": 0.9577, "step": 16568 }, { - "epoch": 0.4695230808467228, + "epoch": 0.6482901635495735, "grad_norm": 0.0, - "learning_rate": 1.1467730300499626e-05, - "loss": 0.8376, + "learning_rate": 5.816006543399213e-06, + "loss": 0.9907, "step": 16569 }, { - "epoch": 0.4695514182889852, + "epoch": 0.648329290241803, "grad_norm": 0.0, - "learning_rate": 1.146682244453182e-05, - "loss": 0.9609, + "learning_rate": 5.8148555976477335e-06, + "loss": 0.8533, "step": 16570 }, { - "epoch": 0.4695797557312477, + "epoch": 0.6483684169340324, "grad_norm": 0.0, - "learning_rate": 1.1465914576208423e-05, - "loss": 0.9418, + "learning_rate": 5.813704719104422e-06, + "loss": 0.9814, "step": 16571 }, { - "epoch": 0.46960809317351015, + "epoch": 0.6484075436262619, "grad_norm": 0.0, - "learning_rate": 1.1465006695537082e-05, - "loss": 0.9512, + "learning_rate": 5.812553907787774e-06, + "loss": 0.9489, "step": 16572 }, { - "epoch": 0.4696364306157726, + "epoch": 0.6484466703184912, "grad_norm": 0.0, - "learning_rate": 1.1464098802525449e-05, - "loss": 0.8946, + "learning_rate": 5.811403163716259e-06, + "loss": 0.9885, "step": 16573 }, { - "epoch": 0.4696647680580351, + "epoch": 0.6484857970107207, "grad_norm": 0.0, - "learning_rate": 1.1463190897181167e-05, - "loss": 0.8302, + "learning_rate": 5.810252486908366e-06, + "loss": 0.9118, "step": 16574 }, { - "epoch": 0.4696931055002975, + "epoch": 0.6485249237029501, "grad_norm": 0.0, - "learning_rate": 1.1462282979511883e-05, - "loss": 0.8057, + "learning_rate": 5.809101877382562e-06, + "loss": 1.0162, "step": 16575 }, { - "epoch": 0.46972144294256, + "epoch": 0.6485640503951796, "grad_norm": 0.0, - "learning_rate": 1.1461375049525246e-05, - "loss": 0.786, + "learning_rate": 5.807951335157332e-06, + "loss": 1.0195, "step": 16576 }, { - "epoch": 0.46974978038482246, + "epoch": 0.648603177087409, "grad_norm": 0.0, - "learning_rate": 1.1460467107228906e-05, - "loss": 1.038, + "learning_rate": 5.80680086025115e-06, + "loss": 1.0288, "step": 16577 }, { - "epoch": 0.46977811782708495, + "epoch": 0.6486423037796385, "grad_norm": 0.0, - "learning_rate": 1.145955915263051e-05, - "loss": 0.9966, + "learning_rate": 5.805650452682496e-06, + "loss": 0.96, "step": 16578 }, { - "epoch": 0.4698064552693474, + "epoch": 0.6486814304718679, "grad_norm": 0.0, - "learning_rate": 1.1458651185737703e-05, - "loss": 0.7929, + "learning_rate": 5.804500112469835e-06, + "loss": 0.861, "step": 16579 }, { - "epoch": 0.4698347927116098, + "epoch": 0.6487205571640974, "grad_norm": 0.0, - "learning_rate": 1.1457743206558137e-05, - "loss": 0.9134, + "learning_rate": 5.803349839631647e-06, + "loss": 1.0897, "step": 16580 }, { - "epoch": 0.4698631301538723, + "epoch": 0.6487596838563268, "grad_norm": 0.0, - "learning_rate": 1.1456835215099457e-05, - "loss": 0.883, + "learning_rate": 5.802199634186401e-06, + "loss": 1.0559, "step": 16581 }, { - "epoch": 0.46989146759613476, + "epoch": 0.6487988105485563, "grad_norm": 0.0, - "learning_rate": 1.1455927211369314e-05, - "loss": 0.9065, + "learning_rate": 5.801049496152571e-06, + "loss": 0.9442, "step": 16582 }, { - "epoch": 0.46991980503839725, + "epoch": 0.6488379372407856, "grad_norm": 0.0, - "learning_rate": 1.1455019195375356e-05, - "loss": 0.8351, + "learning_rate": 5.799899425548623e-06, + "loss": 1.0341, "step": 16583 }, { - "epoch": 0.4699481424806597, + "epoch": 0.6488770639330151, "grad_norm": 0.0, - "learning_rate": 1.1454111167125231e-05, - "loss": 0.8991, + "learning_rate": 5.79874942239303e-06, + "loss": 1.0006, "step": 16584 }, { - "epoch": 0.46997647992292213, + "epoch": 0.6489161906252445, "grad_norm": 0.0, - "learning_rate": 1.1453203126626586e-05, - "loss": 0.8073, + "learning_rate": 5.797599486704249e-06, + "loss": 0.8445, "step": 16585 }, { - "epoch": 0.4700048173651846, + "epoch": 0.648955317317474, "grad_norm": 0.0, - "learning_rate": 1.1452295073887074e-05, - "loss": 0.9213, + "learning_rate": 5.796449618500761e-06, + "loss": 1.0887, "step": 16586 }, { - "epoch": 0.47003315480744706, + "epoch": 0.6489944440097034, "grad_norm": 0.0, - "learning_rate": 1.145138700891434e-05, - "loss": 0.7989, + "learning_rate": 5.795299817801024e-06, + "loss": 0.941, "step": 16587 }, { - "epoch": 0.47006149224970956, + "epoch": 0.6490335707019329, "grad_norm": 0.0, - "learning_rate": 1.1450478931716033e-05, - "loss": 1.0092, + "learning_rate": 5.7941500846235045e-06, + "loss": 0.9322, "step": 16588 }, { - "epoch": 0.470089829691972, + "epoch": 0.6490726973941623, "grad_norm": 0.0, - "learning_rate": 1.1449570842299804e-05, - "loss": 0.8834, + "learning_rate": 5.793000418986656e-06, + "loss": 0.9408, "step": 16589 }, { - "epoch": 0.4701181671342345, + "epoch": 0.6491118240863918, "grad_norm": 0.0, - "learning_rate": 1.1448662740673304e-05, - "loss": 0.8573, + "learning_rate": 5.791850820908959e-06, + "loss": 1.0439, "step": 16590 }, { - "epoch": 0.4701465045764969, + "epoch": 0.6491509507786212, "grad_norm": 0.0, - "learning_rate": 1.1447754626844178e-05, - "loss": 0.9522, + "learning_rate": 5.79070129040886e-06, + "loss": 1.0487, "step": 16591 }, { - "epoch": 0.47017484201875936, + "epoch": 0.6491900774708507, "grad_norm": 0.0, - "learning_rate": 1.1446846500820076e-05, - "loss": 0.9365, + "learning_rate": 5.789551827504827e-06, + "loss": 1.0054, "step": 16592 }, { - "epoch": 0.47020317946102186, + "epoch": 0.64922920416308, "grad_norm": 0.0, - "learning_rate": 1.144593836260865e-05, - "loss": 0.9214, + "learning_rate": 5.788402432215312e-06, + "loss": 0.965, "step": 16593 }, { - "epoch": 0.4702315169032843, + "epoch": 0.6492683308553094, "grad_norm": 0.0, - "learning_rate": 1.1445030212217549e-05, - "loss": 0.9507, + "learning_rate": 5.787253104558776e-06, + "loss": 1.0035, "step": 16594 }, { - "epoch": 0.4702598543455468, + "epoch": 0.6493074575475389, "grad_norm": 0.0, - "learning_rate": 1.1444122049654421e-05, - "loss": 0.8446, + "learning_rate": 5.786103844553679e-06, + "loss": 0.9769, "step": 16595 }, { - "epoch": 0.47028819178780923, + "epoch": 0.6493465842397683, "grad_norm": 0.0, - "learning_rate": 1.1443213874926914e-05, - "loss": 1.0171, + "learning_rate": 5.784954652218475e-06, + "loss": 1.0087, "step": 16596 }, { - "epoch": 0.47031652923007167, + "epoch": 0.6493857109319978, "grad_norm": 0.0, - "learning_rate": 1.1442305688042685e-05, - "loss": 0.8046, + "learning_rate": 5.783805527571616e-06, + "loss": 0.9897, "step": 16597 }, { - "epoch": 0.47034486667233416, + "epoch": 0.6494248376242272, "grad_norm": 0.0, - "learning_rate": 1.1441397489009378e-05, - "loss": 0.9695, + "learning_rate": 5.782656470631553e-06, + "loss": 1.0418, "step": 16598 }, { - "epoch": 0.4703732041145966, + "epoch": 0.6494639643164567, "grad_norm": 0.0, - "learning_rate": 1.1440489277834645e-05, - "loss": 1.032, + "learning_rate": 5.78150748141675e-06, + "loss": 0.9574, "step": 16599 }, { - "epoch": 0.4704015415568591, + "epoch": 0.6495030910086861, "grad_norm": 0.0, - "learning_rate": 1.1439581054526136e-05, - "loss": 0.8951, + "learning_rate": 5.780358559945647e-06, + "loss": 1.0104, "step": 16600 }, { - "epoch": 0.47042987899912153, + "epoch": 0.6495422177009156, "grad_norm": 0.0, - "learning_rate": 1.1438672819091502e-05, - "loss": 0.8786, + "learning_rate": 5.779209706236696e-06, + "loss": 1.0524, "step": 16601 }, { - "epoch": 0.470458216441384, + "epoch": 0.649581344393145, "grad_norm": 0.0, - "learning_rate": 1.1437764571538387e-05, - "loss": 0.8909, + "learning_rate": 5.7780609203083525e-06, + "loss": 1.0135, "step": 16602 }, { - "epoch": 0.47048655388364646, + "epoch": 0.6496204710853745, "grad_norm": 0.0, - "learning_rate": 1.1436856311874453e-05, - "loss": 0.8096, + "learning_rate": 5.7769122021790565e-06, + "loss": 0.9566, "step": 16603 }, { - "epoch": 0.4705148913259089, + "epoch": 0.6496595977776038, "grad_norm": 0.0, - "learning_rate": 1.1435948040107343e-05, - "loss": 0.8961, + "learning_rate": 5.7757635518672595e-06, + "loss": 0.8969, "step": 16604 }, { - "epoch": 0.4705432287681714, + "epoch": 0.6496987244698333, "grad_norm": 0.0, - "learning_rate": 1.1435039756244708e-05, - "loss": 0.8612, + "learning_rate": 5.77461496939141e-06, + "loss": 0.9298, "step": 16605 }, { - "epoch": 0.47057156621043383, + "epoch": 0.6497378511620627, "grad_norm": 0.0, - "learning_rate": 1.1434131460294205e-05, - "loss": 0.8348, + "learning_rate": 5.773466454769946e-06, + "loss": 0.9804, "step": 16606 }, { - "epoch": 0.4705999036526963, + "epoch": 0.6497769778542922, "grad_norm": 0.0, - "learning_rate": 1.1433223152263475e-05, - "loss": 1.0648, + "learning_rate": 5.772318008021314e-06, + "loss": 0.9997, "step": 16607 }, { - "epoch": 0.47062824109495877, + "epoch": 0.6498161045465216, "grad_norm": 0.0, - "learning_rate": 1.1432314832160176e-05, - "loss": 0.905, + "learning_rate": 5.771169629163957e-06, + "loss": 0.9073, "step": 16608 }, { - "epoch": 0.4706565785372212, + "epoch": 0.6498552312387511, "grad_norm": 0.0, - "learning_rate": 1.1431406499991955e-05, - "loss": 0.8924, + "learning_rate": 5.7700213182163215e-06, + "loss": 0.9798, "step": 16609 }, { - "epoch": 0.4706849159794837, + "epoch": 0.6498943579309805, "grad_norm": 0.0, - "learning_rate": 1.143049815576647e-05, - "loss": 0.8976, + "learning_rate": 5.768873075196838e-06, + "loss": 0.9027, "step": 16610 }, { - "epoch": 0.47071325342174614, + "epoch": 0.64993348462321, "grad_norm": 0.0, - "learning_rate": 1.1429589799491364e-05, - "loss": 0.9874, + "learning_rate": 5.7677249001239565e-06, + "loss": 0.9003, "step": 16611 }, { - "epoch": 0.47074159086400863, + "epoch": 0.6499726113154394, "grad_norm": 0.0, - "learning_rate": 1.1428681431174296e-05, - "loss": 0.8588, + "learning_rate": 5.766576793016101e-06, + "loss": 0.8256, "step": 16612 }, { - "epoch": 0.47076992830627107, + "epoch": 0.6500117380076689, "grad_norm": 0.0, - "learning_rate": 1.142777305082291e-05, - "loss": 0.8463, + "learning_rate": 5.7654287538917244e-06, + "loss": 1.0099, "step": 16613 }, { - "epoch": 0.47079826574853356, + "epoch": 0.6500508646998983, "grad_norm": 0.0, - "learning_rate": 1.1426864658444865e-05, - "loss": 0.9233, + "learning_rate": 5.764280782769254e-06, + "loss": 0.9669, "step": 16614 }, { - "epoch": 0.470826603190796, + "epoch": 0.6500899913921278, "grad_norm": 0.0, - "learning_rate": 1.1425956254047805e-05, - "loss": 0.9109, + "learning_rate": 5.7631328796671285e-06, + "loss": 1.0211, "step": 16615 }, { - "epoch": 0.47085494063305844, + "epoch": 0.6501291180843571, "grad_norm": 0.0, - "learning_rate": 1.142504783763939e-05, - "loss": 0.8919, + "learning_rate": 5.761985044603773e-06, + "loss": 1.1481, "step": 16616 }, { - "epoch": 0.47088327807532093, + "epoch": 0.6501682447765866, "grad_norm": 0.0, - "learning_rate": 1.1424139409227265e-05, - "loss": 0.974, + "learning_rate": 5.7608372775976355e-06, + "loss": 0.872, "step": 16617 }, { - "epoch": 0.47091161551758337, + "epoch": 0.650207371468816, "grad_norm": 0.0, - "learning_rate": 1.1423230968819085e-05, - "loss": 0.9168, + "learning_rate": 5.7596895786671335e-06, + "loss": 1.0822, "step": 16618 }, { - "epoch": 0.47093995295984586, + "epoch": 0.6502464981610455, "grad_norm": 0.0, - "learning_rate": 1.1422322516422506e-05, - "loss": 0.9721, + "learning_rate": 5.75854194783071e-06, + "loss": 0.9386, "step": 16619 }, { - "epoch": 0.4709682904021083, + "epoch": 0.6502856248532749, "grad_norm": 0.0, - "learning_rate": 1.1421414052045174e-05, - "loss": 0.8681, + "learning_rate": 5.757394385106778e-06, + "loss": 1.0089, "step": 16620 }, { - "epoch": 0.47099662784437074, + "epoch": 0.6503247515455044, "grad_norm": 0.0, - "learning_rate": 1.142050557569474e-05, - "loss": 0.9148, + "learning_rate": 5.756246890513784e-06, + "loss": 0.9001, "step": 16621 }, { - "epoch": 0.47102496528663323, + "epoch": 0.6503638782377338, "grad_norm": 0.0, - "learning_rate": 1.1419597087378862e-05, - "loss": 0.8661, + "learning_rate": 5.755099464070144e-06, + "loss": 0.9517, "step": 16622 }, { - "epoch": 0.4710533027288957, + "epoch": 0.6504030049299632, "grad_norm": 0.0, - "learning_rate": 1.1418688587105194e-05, - "loss": 0.9184, + "learning_rate": 5.753952105794291e-06, + "loss": 0.9662, "step": 16623 }, { - "epoch": 0.47108164017115817, + "epoch": 0.6504421316221927, "grad_norm": 0.0, - "learning_rate": 1.1417780074881382e-05, - "loss": 0.886, + "learning_rate": 5.752804815704642e-06, + "loss": 1.0283, "step": 16624 }, { - "epoch": 0.4711099776134206, + "epoch": 0.650481258314422, "grad_norm": 0.0, - "learning_rate": 1.1416871550715086e-05, - "loss": 0.898, + "learning_rate": 5.751657593819627e-06, + "loss": 1.0173, "step": 16625 }, { - "epoch": 0.4711383150556831, + "epoch": 0.6505203850066515, "grad_norm": 0.0, - "learning_rate": 1.1415963014613948e-05, - "loss": 1.0055, + "learning_rate": 5.7505104401576676e-06, + "loss": 0.8837, "step": 16626 }, { - "epoch": 0.47116665249794554, + "epoch": 0.6505595116988809, "grad_norm": 0.0, - "learning_rate": 1.1415054466585633e-05, - "loss": 0.8754, + "learning_rate": 5.749363354737188e-06, + "loss": 0.9841, "step": 16627 }, { - "epoch": 0.471194989940208, + "epoch": 0.6505986383911104, "grad_norm": 0.0, - "learning_rate": 1.1414145906637786e-05, - "loss": 0.9803, + "learning_rate": 5.748216337576604e-06, + "loss": 1.0646, "step": 16628 }, { - "epoch": 0.47122332738247047, + "epoch": 0.6506377650833398, "grad_norm": 0.0, - "learning_rate": 1.1413237334778064e-05, - "loss": 0.8541, + "learning_rate": 5.747069388694337e-06, + "loss": 0.9779, "step": 16629 }, { - "epoch": 0.4712516648247329, + "epoch": 0.6506768917755693, "grad_norm": 0.0, - "learning_rate": 1.1412328751014116e-05, - "loss": 0.9574, + "learning_rate": 5.7459225081088076e-06, + "loss": 0.8804, "step": 16630 }, { - "epoch": 0.4712800022669954, + "epoch": 0.6507160184677987, "grad_norm": 0.0, - "learning_rate": 1.1411420155353605e-05, - "loss": 0.8768, + "learning_rate": 5.744775695838435e-06, + "loss": 1.174, "step": 16631 }, { - "epoch": 0.47130833970925784, + "epoch": 0.6507551451600282, "grad_norm": 0.0, - "learning_rate": 1.1410511547804175e-05, - "loss": 0.7891, + "learning_rate": 5.7436289519016285e-06, + "loss": 0.9639, "step": 16632 }, { - "epoch": 0.4713366771515203, + "epoch": 0.6507942718522576, "grad_norm": 0.0, - "learning_rate": 1.1409602928373483e-05, - "loss": 0.8902, + "learning_rate": 5.742482276316812e-06, + "loss": 1.0008, "step": 16633 }, { - "epoch": 0.47136501459378277, + "epoch": 0.6508333985444871, "grad_norm": 0.0, - "learning_rate": 1.1408694297069178e-05, - "loss": 0.8084, + "learning_rate": 5.741335669102388e-06, + "loss": 1.0335, "step": 16634 }, { - "epoch": 0.4713933520360452, + "epoch": 0.6508725252367165, "grad_norm": 0.0, - "learning_rate": 1.140778565389892e-05, - "loss": 0.8914, + "learning_rate": 5.740189130276783e-06, + "loss": 1.0644, "step": 16635 }, { - "epoch": 0.4714216894783077, + "epoch": 0.650911651928946, "grad_norm": 0.0, - "learning_rate": 1.1406876998870363e-05, - "loss": 0.9052, + "learning_rate": 5.7390426598584e-06, + "loss": 1.1005, "step": 16636 }, { - "epoch": 0.47145002692057014, + "epoch": 0.6509507786211753, "grad_norm": 0.0, - "learning_rate": 1.1405968331991157e-05, - "loss": 0.7807, + "learning_rate": 5.737896257865656e-06, + "loss": 1.0087, "step": 16637 }, { - "epoch": 0.47147836436283264, + "epoch": 0.6509899053134048, "grad_norm": 0.0, - "learning_rate": 1.1405059653268958e-05, - "loss": 0.9301, + "learning_rate": 5.736749924316954e-06, + "loss": 1.0836, "step": 16638 }, { - "epoch": 0.4715067018050951, + "epoch": 0.6510290320056342, "grad_norm": 0.0, - "learning_rate": 1.1404150962711419e-05, - "loss": 0.9042, + "learning_rate": 5.735603659230705e-06, + "loss": 0.9849, "step": 16639 }, { - "epoch": 0.4715350392473575, + "epoch": 0.6510681586978637, "grad_norm": 0.0, - "learning_rate": 1.1403242260326197e-05, - "loss": 0.9454, + "learning_rate": 5.734457462625318e-06, + "loss": 1.0388, "step": 16640 }, { - "epoch": 0.47156337668962, + "epoch": 0.6511072853900931, "grad_norm": 0.0, - "learning_rate": 1.140233354612094e-05, - "loss": 0.9795, + "learning_rate": 5.733311334519204e-06, + "loss": 1.0641, "step": 16641 }, { - "epoch": 0.47159171413188244, + "epoch": 0.6511464120823226, "grad_norm": 0.0, - "learning_rate": 1.1401424820103308e-05, - "loss": 0.8879, + "learning_rate": 5.732165274930759e-06, + "loss": 0.913, "step": 16642 }, { - "epoch": 0.47162005157414494, + "epoch": 0.651185538774552, "grad_norm": 0.0, - "learning_rate": 1.1400516082280957e-05, - "loss": 0.8882, + "learning_rate": 5.731019283878391e-06, + "loss": 1.0015, "step": 16643 }, { - "epoch": 0.4716483890164074, + "epoch": 0.6512246654667815, "grad_norm": 0.0, - "learning_rate": 1.139960733266154e-05, - "loss": 0.8321, + "learning_rate": 5.7298733613805094e-06, + "loss": 1.1379, "step": 16644 }, { - "epoch": 0.4716767264586698, + "epoch": 0.6512637921590109, "grad_norm": 0.0, - "learning_rate": 1.1398698571252709e-05, - "loss": 0.8762, + "learning_rate": 5.7287275074555045e-06, + "loss": 0.9323, "step": 16645 }, { - "epoch": 0.4717050639009323, + "epoch": 0.6513029188512404, "grad_norm": 0.0, - "learning_rate": 1.139778979806212e-05, - "loss": 0.8644, + "learning_rate": 5.727581722121787e-06, + "loss": 0.9561, "step": 16646 }, { - "epoch": 0.47173340134319475, + "epoch": 0.6513420455434697, "grad_norm": 0.0, - "learning_rate": 1.1396881013097429e-05, - "loss": 0.9282, + "learning_rate": 5.7264360053977506e-06, + "loss": 0.9591, "step": 16647 }, { - "epoch": 0.47176173878545724, + "epoch": 0.6513811722356992, "grad_norm": 0.0, - "learning_rate": 1.1395972216366288e-05, - "loss": 0.8491, + "learning_rate": 5.725290357301803e-06, + "loss": 0.7559, "step": 16648 }, { - "epoch": 0.4717900762277197, + "epoch": 0.6514202989279286, "grad_norm": 0.0, - "learning_rate": 1.1395063407876358e-05, - "loss": 0.9763, + "learning_rate": 5.7241447778523295e-06, + "loss": 0.9471, "step": 16649 }, { - "epoch": 0.4718184136699822, + "epoch": 0.651459425620158, "grad_norm": 0.0, - "learning_rate": 1.1394154587635288e-05, - "loss": 0.7763, + "learning_rate": 5.722999267067738e-06, + "loss": 0.8928, "step": 16650 }, { - "epoch": 0.4718467511122446, + "epoch": 0.6514985523123875, "grad_norm": 0.0, - "learning_rate": 1.139324575565074e-05, - "loss": 0.8843, + "learning_rate": 5.721853824966414e-06, + "loss": 0.9519, "step": 16651 }, { - "epoch": 0.47187508855450705, + "epoch": 0.6515376790046169, "grad_norm": 0.0, - "learning_rate": 1.1392336911930363e-05, - "loss": 0.9623, + "learning_rate": 5.720708451566759e-06, + "loss": 1.0066, "step": 16652 }, { - "epoch": 0.47190342599676954, + "epoch": 0.6515768056968464, "grad_norm": 0.0, - "learning_rate": 1.1391428056481814e-05, - "loss": 0.9012, + "learning_rate": 5.719563146887163e-06, + "loss": 0.9353, "step": 16653 }, { - "epoch": 0.471931763439032, + "epoch": 0.6516159323890758, "grad_norm": 0.0, - "learning_rate": 1.1390519189312754e-05, - "loss": 1.0048, + "learning_rate": 5.7184179109460224e-06, + "loss": 1.0592, "step": 16654 }, { - "epoch": 0.4719601008812945, + "epoch": 0.6516550590813053, "grad_norm": 0.0, - "learning_rate": 1.138961031043083e-05, - "loss": 0.9865, + "learning_rate": 5.717272743761722e-06, + "loss": 0.9064, "step": 16655 }, { - "epoch": 0.4719884383235569, + "epoch": 0.6516941857735347, "grad_norm": 0.0, - "learning_rate": 1.1388701419843706e-05, - "loss": 0.9288, + "learning_rate": 5.716127645352654e-06, + "loss": 0.9878, "step": 16656 }, { - "epoch": 0.47201677576581935, + "epoch": 0.6517333124657642, "grad_norm": 0.0, - "learning_rate": 1.1387792517559033e-05, - "loss": 0.8828, + "learning_rate": 5.714982615737208e-06, + "loss": 0.9684, "step": 16657 }, { - "epoch": 0.47204511320808185, + "epoch": 0.6517724391579935, "grad_norm": 0.0, - "learning_rate": 1.1386883603584468e-05, - "loss": 1.0392, + "learning_rate": 5.713837654933776e-06, + "loss": 0.9334, "step": 16658 }, { - "epoch": 0.4720734506503443, + "epoch": 0.651811565850223, "grad_norm": 0.0, - "learning_rate": 1.1385974677927667e-05, - "loss": 0.9342, + "learning_rate": 5.712692762960736e-06, + "loss": 1.0466, "step": 16659 }, { - "epoch": 0.4721017880926068, + "epoch": 0.6518506925424524, "grad_norm": 0.0, - "learning_rate": 1.1385065740596286e-05, - "loss": 0.7767, + "learning_rate": 5.711547939836484e-06, + "loss": 1.0942, "step": 16660 }, { - "epoch": 0.4721301255348692, + "epoch": 0.6518898192346819, "grad_norm": 0.0, - "learning_rate": 1.1384156791597983e-05, - "loss": 0.9146, + "learning_rate": 5.710403185579389e-06, + "loss": 1.0214, "step": 16661 }, { - "epoch": 0.4721584629771317, + "epoch": 0.6519289459269113, "grad_norm": 0.0, - "learning_rate": 1.1383247830940414e-05, - "loss": 0.9052, + "learning_rate": 5.70925850020785e-06, + "loss": 0.9553, "step": 16662 }, { - "epoch": 0.47218680041939415, + "epoch": 0.6519680726191408, "grad_norm": 0.0, - "learning_rate": 1.1382338858631232e-05, - "loss": 0.8645, + "learning_rate": 5.708113883740241e-06, + "loss": 0.8993, "step": 16663 }, { - "epoch": 0.4722151378616566, + "epoch": 0.6520071993113702, "grad_norm": 0.0, - "learning_rate": 1.13814298746781e-05, - "loss": 0.91, + "learning_rate": 5.706969336194948e-06, + "loss": 0.9963, "step": 16664 }, { - "epoch": 0.4722434753039191, + "epoch": 0.6520463260035997, "grad_norm": 0.0, - "learning_rate": 1.138052087908867e-05, - "loss": 0.794, + "learning_rate": 5.705824857590341e-06, + "loss": 0.957, "step": 16665 }, { - "epoch": 0.4722718127461815, + "epoch": 0.6520854526958291, "grad_norm": 0.0, - "learning_rate": 1.13796118718706e-05, - "loss": 0.9919, + "learning_rate": 5.704680447944812e-06, + "loss": 0.9829, "step": 16666 }, { - "epoch": 0.472300150188444, + "epoch": 0.6521245793880586, "grad_norm": 0.0, - "learning_rate": 1.1378702853031545e-05, - "loss": 0.9374, + "learning_rate": 5.70353610727673e-06, + "loss": 0.9966, "step": 16667 }, { - "epoch": 0.47232848763070645, + "epoch": 0.652163706080288, "grad_norm": 0.0, - "learning_rate": 1.1377793822579166e-05, - "loss": 0.9751, + "learning_rate": 5.702391835604479e-06, + "loss": 0.9139, "step": 16668 }, { - "epoch": 0.4723568250729689, + "epoch": 0.6522028327725174, "grad_norm": 0.0, - "learning_rate": 1.1376884780521117e-05, - "loss": 0.9023, + "learning_rate": 5.701247632946425e-06, + "loss": 1.1136, "step": 16669 }, { - "epoch": 0.4723851625152314, + "epoch": 0.6522419594647468, "grad_norm": 0.0, - "learning_rate": 1.1375975726865058e-05, - "loss": 0.921, + "learning_rate": 5.700103499320948e-06, + "loss": 1.0999, "step": 16670 }, { - "epoch": 0.4724134999574938, + "epoch": 0.6522810861569763, "grad_norm": 0.0, - "learning_rate": 1.137506666161864e-05, - "loss": 0.8396, + "learning_rate": 5.698959434746421e-06, + "loss": 1.0325, "step": 16671 }, { - "epoch": 0.4724418373997563, + "epoch": 0.6523202128492057, "grad_norm": 0.0, - "learning_rate": 1.1374157584789532e-05, - "loss": 0.9434, + "learning_rate": 5.6978154392412186e-06, + "loss": 1.0729, "step": 16672 }, { - "epoch": 0.47247017484201875, + "epoch": 0.6523593395414352, "grad_norm": 0.0, - "learning_rate": 1.137324849638538e-05, - "loss": 0.9353, + "learning_rate": 5.696671512823706e-06, + "loss": 1.0416, "step": 16673 }, { - "epoch": 0.47249851228428125, + "epoch": 0.6523984662336646, "grad_norm": 0.0, - "learning_rate": 1.1372339396413845e-05, - "loss": 0.8521, + "learning_rate": 5.695527655512256e-06, + "loss": 1.0412, "step": 16674 }, { - "epoch": 0.4725268497265437, + "epoch": 0.6524375929258941, "grad_norm": 0.0, - "learning_rate": 1.1371430284882586e-05, - "loss": 0.843, + "learning_rate": 5.694383867325238e-06, + "loss": 0.937, "step": 16675 }, { - "epoch": 0.4725551871688061, + "epoch": 0.6524767196181235, "grad_norm": 0.0, - "learning_rate": 1.1370521161799264e-05, - "loss": 0.8866, + "learning_rate": 5.693240148281025e-06, + "loss": 0.9878, "step": 16676 }, { - "epoch": 0.4725835246110686, + "epoch": 0.652515846310353, "grad_norm": 0.0, - "learning_rate": 1.1369612027171531e-05, - "loss": 0.8412, + "learning_rate": 5.692096498397974e-06, + "loss": 0.9389, "step": 16677 }, { - "epoch": 0.47261186205333106, + "epoch": 0.6525549730025824, "grad_norm": 0.0, - "learning_rate": 1.1368702881007048e-05, - "loss": 0.9322, + "learning_rate": 5.690952917694455e-06, + "loss": 1.13, "step": 16678 }, { - "epoch": 0.47264019949559355, + "epoch": 0.6525940996948117, "grad_norm": 0.0, - "learning_rate": 1.1367793723313469e-05, - "loss": 1.0048, + "learning_rate": 5.689809406188832e-06, + "loss": 1.0785, "step": 16679 }, { - "epoch": 0.472668536937856, + "epoch": 0.6526332263870412, "grad_norm": 0.0, - "learning_rate": 1.1366884554098458e-05, - "loss": 0.8453, + "learning_rate": 5.688665963899473e-06, + "loss": 0.9812, "step": 16680 }, { - "epoch": 0.4726968743801184, + "epoch": 0.6526723530792706, "grad_norm": 0.0, - "learning_rate": 1.1365975373369671e-05, - "loss": 0.8153, + "learning_rate": 5.687522590844734e-06, + "loss": 0.8878, "step": 16681 }, { - "epoch": 0.4727252118223809, + "epoch": 0.6527114797715001, "grad_norm": 0.0, - "learning_rate": 1.1365066181134768e-05, - "loss": 0.9182, + "learning_rate": 5.68637928704298e-06, + "loss": 0.9825, "step": 16682 }, { - "epoch": 0.47275354926464336, + "epoch": 0.6527506064637295, "grad_norm": 0.0, - "learning_rate": 1.1364156977401404e-05, - "loss": 0.9431, + "learning_rate": 5.6852360525125686e-06, + "loss": 0.9097, "step": 16683 }, { - "epoch": 0.47278188670690585, + "epoch": 0.652789733155959, "grad_norm": 0.0, - "learning_rate": 1.1363247762177236e-05, - "loss": 0.9373, + "learning_rate": 5.684092887271857e-06, + "loss": 0.9178, "step": 16684 }, { - "epoch": 0.4728102241491683, + "epoch": 0.6528288598481884, "grad_norm": 0.0, - "learning_rate": 1.136233853546993e-05, - "loss": 0.9487, + "learning_rate": 5.6829497913392055e-06, + "loss": 1.0805, "step": 16685 }, { - "epoch": 0.4728385615914308, + "epoch": 0.6528679865404179, "grad_norm": 0.0, - "learning_rate": 1.1361429297287138e-05, - "loss": 0.7957, + "learning_rate": 5.681806764732976e-06, + "loss": 1.2478, "step": 16686 }, { - "epoch": 0.4728668990336932, + "epoch": 0.6529071132326473, "grad_norm": 0.0, - "learning_rate": 1.1360520047636525e-05, - "loss": 0.8317, + "learning_rate": 5.680663807471514e-06, + "loss": 0.9857, "step": 16687 }, { - "epoch": 0.47289523647595566, + "epoch": 0.6529462399248768, "grad_norm": 0.0, - "learning_rate": 1.1359610786525743e-05, - "loss": 0.951, + "learning_rate": 5.6795209195731785e-06, + "loss": 1.0363, "step": 16688 }, { - "epoch": 0.47292357391821815, + "epoch": 0.6529853666171062, "grad_norm": 0.0, - "learning_rate": 1.1358701513962457e-05, - "loss": 0.8726, + "learning_rate": 5.6783781010563275e-06, + "loss": 0.9389, "step": 16689 }, { - "epoch": 0.4729519113604806, + "epoch": 0.6530244933093357, "grad_norm": 0.0, - "learning_rate": 1.1357792229954324e-05, - "loss": 0.9705, + "learning_rate": 5.677235351939305e-06, + "loss": 0.9882, "step": 16690 }, { - "epoch": 0.4729802488027431, + "epoch": 0.653063620001565, "grad_norm": 0.0, - "learning_rate": 1.1356882934509001e-05, - "loss": 0.77, + "learning_rate": 5.676092672240465e-06, + "loss": 1.0833, "step": 16691 }, { - "epoch": 0.4730085862450055, + "epoch": 0.6531027466937945, "grad_norm": 0.0, - "learning_rate": 1.1355973627634147e-05, - "loss": 0.8158, + "learning_rate": 5.6749500619781595e-06, + "loss": 0.9275, "step": 16692 }, { - "epoch": 0.47303692368726796, + "epoch": 0.6531418733860239, "grad_norm": 0.0, - "learning_rate": 1.1355064309337424e-05, - "loss": 0.9037, + "learning_rate": 5.673807521170741e-06, + "loss": 1.0975, "step": 16693 }, { - "epoch": 0.47306526112953046, + "epoch": 0.6531810000782534, "grad_norm": 0.0, - "learning_rate": 1.1354154979626495e-05, - "loss": 0.951, + "learning_rate": 5.672665049836548e-06, + "loss": 0.9933, "step": 16694 }, { - "epoch": 0.4730935985717929, + "epoch": 0.6532201267704828, "grad_norm": 0.0, - "learning_rate": 1.1353245638509016e-05, - "loss": 0.9165, + "learning_rate": 5.671522647993937e-06, + "loss": 1.0527, "step": 16695 }, { - "epoch": 0.4731219360140554, + "epoch": 0.6532592534627123, "grad_norm": 0.0, - "learning_rate": 1.1352336285992643e-05, - "loss": 0.9127, + "learning_rate": 5.6703803156612395e-06, + "loss": 1.1046, "step": 16696 }, { - "epoch": 0.4731502734563178, + "epoch": 0.6532983801549417, "grad_norm": 0.0, - "learning_rate": 1.135142692208504e-05, - "loss": 1.0042, + "learning_rate": 5.669238052856816e-06, + "loss": 0.8994, "step": 16697 }, { - "epoch": 0.4731786108985803, + "epoch": 0.6533375068471712, "grad_norm": 0.0, - "learning_rate": 1.135051754679387e-05, - "loss": 0.9057, + "learning_rate": 5.668095859598999e-06, + "loss": 0.9842, "step": 16698 }, { - "epoch": 0.47320694834084276, + "epoch": 0.6533766335394006, "grad_norm": 0.0, - "learning_rate": 1.1349608160126784e-05, - "loss": 0.8516, + "learning_rate": 5.666953735906138e-06, + "loss": 1.0343, "step": 16699 }, { - "epoch": 0.4732352857831052, + "epoch": 0.6534157602316301, "grad_norm": 0.0, - "learning_rate": 1.1348698762091448e-05, - "loss": 0.9977, + "learning_rate": 5.665811681796567e-06, + "loss": 0.9095, "step": 16700 }, { - "epoch": 0.4732636232253677, + "epoch": 0.6534548869238594, "grad_norm": 0.0, - "learning_rate": 1.1347789352695524e-05, - "loss": 0.8432, + "learning_rate": 5.66466969728863e-06, + "loss": 1.013, "step": 16701 }, { - "epoch": 0.47329196066763013, + "epoch": 0.6534940136160889, "grad_norm": 0.0, - "learning_rate": 1.1346879931946668e-05, - "loss": 0.8877, + "learning_rate": 5.663527782400664e-06, + "loss": 1.0244, "step": 16702 }, { - "epoch": 0.4733202981098926, + "epoch": 0.6535331403083183, "grad_norm": 0.0, - "learning_rate": 1.1345970499852546e-05, - "loss": 0.9734, + "learning_rate": 5.662385937151013e-06, + "loss": 0.9279, "step": 16703 }, { - "epoch": 0.47334863555215506, + "epoch": 0.6535722670005478, "grad_norm": 0.0, - "learning_rate": 1.134506105642081e-05, - "loss": 0.8781, + "learning_rate": 5.661244161558004e-06, + "loss": 1.0458, "step": 16704 }, { - "epoch": 0.4733769729944175, + "epoch": 0.6536113936927772, "grad_norm": 0.0, - "learning_rate": 1.1344151601659125e-05, - "loss": 0.8754, + "learning_rate": 5.660102455639978e-06, + "loss": 1.1083, "step": 16705 }, { - "epoch": 0.47340531043668, + "epoch": 0.6536505203850067, "grad_norm": 0.0, - "learning_rate": 1.1343242135575155e-05, - "loss": 0.9433, + "learning_rate": 5.658960819415267e-06, + "loss": 0.9049, "step": 16706 }, { - "epoch": 0.47343364787894243, + "epoch": 0.6536896470772361, "grad_norm": 0.0, - "learning_rate": 1.1342332658176556e-05, - "loss": 0.8731, + "learning_rate": 5.657819252902209e-06, + "loss": 1.0211, "step": 16707 }, { - "epoch": 0.4734619853212049, + "epoch": 0.6537287737694655, "grad_norm": 0.0, - "learning_rate": 1.134142316947099e-05, - "loss": 0.9902, + "learning_rate": 5.65667775611913e-06, + "loss": 0.8492, "step": 16708 }, { - "epoch": 0.47349032276346736, + "epoch": 0.653767900461695, "grad_norm": 0.0, - "learning_rate": 1.134051366946612e-05, - "loss": 0.8742, + "learning_rate": 5.655536329084368e-06, + "loss": 0.9878, "step": 16709 }, { - "epoch": 0.47351866020572986, + "epoch": 0.6538070271539244, "grad_norm": 0.0, - "learning_rate": 1.1339604158169606e-05, - "loss": 0.8593, + "learning_rate": 5.654394971816239e-06, + "loss": 0.9247, "step": 16710 }, { - "epoch": 0.4735469976479923, + "epoch": 0.6538461538461539, "grad_norm": 0.0, - "learning_rate": 1.133869463558911e-05, - "loss": 0.9094, + "learning_rate": 5.653253684333091e-06, + "loss": 0.9169, "step": 16711 }, { - "epoch": 0.47357533509025473, + "epoch": 0.6538852805383832, "grad_norm": 0.0, - "learning_rate": 1.1337785101732286e-05, - "loss": 0.8496, + "learning_rate": 5.652112466653237e-06, + "loss": 0.9543, "step": 16712 }, { - "epoch": 0.47360367253251723, + "epoch": 0.6539244072306127, "grad_norm": 0.0, - "learning_rate": 1.1336875556606806e-05, - "loss": 0.8837, + "learning_rate": 5.650971318795013e-06, + "loss": 1.0327, "step": 16713 }, { - "epoch": 0.47363200997477967, + "epoch": 0.6539635339228421, "grad_norm": 0.0, - "learning_rate": 1.1335966000220325e-05, - "loss": 0.9159, + "learning_rate": 5.649830240776733e-06, + "loss": 0.9651, "step": 16714 }, { - "epoch": 0.47366034741704216, + "epoch": 0.6540026606150716, "grad_norm": 0.0, - "learning_rate": 1.133505643258051e-05, - "loss": 0.9415, + "learning_rate": 5.648689232616735e-06, + "loss": 1.0222, "step": 16715 }, { - "epoch": 0.4736886848593046, + "epoch": 0.654041787307301, "grad_norm": 0.0, - "learning_rate": 1.1334146853695017e-05, - "loss": 0.9005, + "learning_rate": 5.647548294333332e-06, + "loss": 1.0136, "step": 16716 }, { - "epoch": 0.47371702230156704, + "epoch": 0.6540809139995305, "grad_norm": 0.0, - "learning_rate": 1.1333237263571506e-05, - "loss": 0.9219, + "learning_rate": 5.646407425944853e-06, + "loss": 0.9893, "step": 16717 }, { - "epoch": 0.47374535974382953, + "epoch": 0.6541200406917599, "grad_norm": 0.0, - "learning_rate": 1.1332327662217646e-05, - "loss": 0.8555, + "learning_rate": 5.645266627469612e-06, + "loss": 1.0136, "step": 16718 }, { - "epoch": 0.47377369718609197, + "epoch": 0.6541591673839894, "grad_norm": 0.0, - "learning_rate": 1.1331418049641091e-05, - "loss": 0.8794, + "learning_rate": 5.644125898925932e-06, + "loss": 0.9485, "step": 16719 }, { - "epoch": 0.47380203462835446, + "epoch": 0.6541982940762188, "grad_norm": 0.0, - "learning_rate": 1.1330508425849514e-05, - "loss": 0.9659, + "learning_rate": 5.642985240332131e-06, + "loss": 1.1038, "step": 16720 }, { - "epoch": 0.4738303720706169, + "epoch": 0.6542374207684483, "grad_norm": 0.0, - "learning_rate": 1.1329598790850565e-05, - "loss": 0.8981, + "learning_rate": 5.6418446517065316e-06, + "loss": 0.9553, "step": 16721 }, { - "epoch": 0.4738587095128794, + "epoch": 0.6542765474606776, "grad_norm": 0.0, - "learning_rate": 1.1328689144651915e-05, - "loss": 0.9253, + "learning_rate": 5.640704133067443e-06, + "loss": 0.9686, "step": 16722 }, { - "epoch": 0.47388704695514183, + "epoch": 0.6543156741529071, "grad_norm": 0.0, - "learning_rate": 1.1327779487261222e-05, - "loss": 0.8588, + "learning_rate": 5.639563684433182e-06, + "loss": 1.088, "step": 16723 }, { - "epoch": 0.47391538439740427, + "epoch": 0.6543548008451365, "grad_norm": 0.0, - "learning_rate": 1.132686981868615e-05, - "loss": 0.917, + "learning_rate": 5.638423305822066e-06, + "loss": 1.0007, "step": 16724 }, { - "epoch": 0.47394372183966677, + "epoch": 0.654393927537366, "grad_norm": 0.0, - "learning_rate": 1.132596013893436e-05, - "loss": 0.8829, + "learning_rate": 5.637282997252409e-06, + "loss": 0.9171, "step": 16725 }, { - "epoch": 0.4739720592819292, + "epoch": 0.6544330542295954, "grad_norm": 0.0, - "learning_rate": 1.1325050448013513e-05, - "loss": 0.905, + "learning_rate": 5.636142758742518e-06, + "loss": 1.0522, "step": 16726 }, { - "epoch": 0.4740003967241917, + "epoch": 0.6544721809218249, "grad_norm": 0.0, - "learning_rate": 1.1324140745931278e-05, - "loss": 0.8941, + "learning_rate": 5.635002590310708e-06, + "loss": 0.9946, "step": 16727 }, { - "epoch": 0.47402873416645414, + "epoch": 0.6545113076140543, "grad_norm": 0.0, - "learning_rate": 1.1323231032695313e-05, - "loss": 0.8805, + "learning_rate": 5.633862491975284e-06, + "loss": 0.99, "step": 16728 }, { - "epoch": 0.4740570716087166, + "epoch": 0.6545504343062838, "grad_norm": 0.0, - "learning_rate": 1.1322321308313278e-05, - "loss": 0.8768, + "learning_rate": 5.6327224637545574e-06, + "loss": 0.9673, "step": 16729 }, { - "epoch": 0.47408540905097907, + "epoch": 0.6545895609985132, "grad_norm": 0.0, - "learning_rate": 1.1321411572792844e-05, - "loss": 0.8113, + "learning_rate": 5.631582505666835e-06, + "loss": 1.113, "step": 16730 }, { - "epoch": 0.4741137464932415, + "epoch": 0.6546286876907427, "grad_norm": 0.0, - "learning_rate": 1.1320501826141668e-05, - "loss": 0.9681, + "learning_rate": 5.630442617730427e-06, + "loss": 1.0164, "step": 16731 }, { - "epoch": 0.474142083935504, + "epoch": 0.6546678143829721, "grad_norm": 0.0, - "learning_rate": 1.1319592068367413e-05, - "loss": 0.8815, + "learning_rate": 5.629302799963633e-06, + "loss": 1.0515, "step": 16732 }, { - "epoch": 0.47417042137776644, + "epoch": 0.6547069410752016, "grad_norm": 0.0, - "learning_rate": 1.1318682299477746e-05, - "loss": 0.8567, + "learning_rate": 5.628163052384759e-06, + "loss": 0.9334, "step": 16733 }, { - "epoch": 0.47419875882002893, + "epoch": 0.6547460677674309, "grad_norm": 0.0, - "learning_rate": 1.1317772519480328e-05, - "loss": 0.8333, + "learning_rate": 5.62702337501211e-06, + "loss": 0.9127, "step": 16734 }, { - "epoch": 0.47422709626229137, + "epoch": 0.6547851944596604, "grad_norm": 0.0, - "learning_rate": 1.1316862728382825e-05, - "loss": 0.8664, + "learning_rate": 5.6258837678639845e-06, + "loss": 0.8934, "step": 16735 }, { - "epoch": 0.4742554337045538, + "epoch": 0.6548243211518898, "grad_norm": 0.0, - "learning_rate": 1.1315952926192898e-05, - "loss": 0.9599, + "learning_rate": 5.624744230958683e-06, + "loss": 0.9432, "step": 16736 }, { - "epoch": 0.4742837711468163, + "epoch": 0.6548634478441192, "grad_norm": 0.0, - "learning_rate": 1.1315043112918206e-05, - "loss": 0.8762, + "learning_rate": 5.623604764314507e-06, + "loss": 0.9915, "step": 16737 }, { - "epoch": 0.47431210858907874, + "epoch": 0.6549025745363487, "grad_norm": 0.0, - "learning_rate": 1.131413328856642e-05, - "loss": 0.8603, + "learning_rate": 5.622465367949759e-06, + "loss": 0.8807, "step": 16738 }, { - "epoch": 0.47434044603134123, + "epoch": 0.6549417012285781, "grad_norm": 0.0, - "learning_rate": 1.1313223453145202e-05, - "loss": 0.9632, + "learning_rate": 5.621326041882727e-06, + "loss": 1.033, "step": 16739 }, { - "epoch": 0.4743687834736037, + "epoch": 0.6549808279208076, "grad_norm": 0.0, - "learning_rate": 1.1312313606662216e-05, - "loss": 0.929, + "learning_rate": 5.620186786131713e-06, + "loss": 0.994, "step": 16740 }, { - "epoch": 0.4743971209158661, + "epoch": 0.655019954613037, "grad_norm": 0.0, - "learning_rate": 1.1311403749125123e-05, - "loss": 0.7994, + "learning_rate": 5.619047600715011e-06, + "loss": 0.9011, "step": 16741 }, { - "epoch": 0.4744254583581286, + "epoch": 0.6550590813052665, "grad_norm": 0.0, - "learning_rate": 1.1310493880541588e-05, - "loss": 0.8481, + "learning_rate": 5.617908485650918e-06, + "loss": 0.994, "step": 16742 }, { - "epoch": 0.47445379580039104, + "epoch": 0.6550982079974959, "grad_norm": 0.0, - "learning_rate": 1.130958400091928e-05, - "loss": 0.9703, + "learning_rate": 5.616769440957719e-06, + "loss": 0.9603, "step": 16743 }, { - "epoch": 0.47448213324265354, + "epoch": 0.6551373346897253, "grad_norm": 0.0, - "learning_rate": 1.1308674110265861e-05, - "loss": 0.8848, + "learning_rate": 5.6156304666537166e-06, + "loss": 0.8468, "step": 16744 }, { - "epoch": 0.474510470684916, + "epoch": 0.6551764613819547, "grad_norm": 0.0, - "learning_rate": 1.1307764208588989e-05, - "loss": 0.9352, + "learning_rate": 5.614491562757185e-06, + "loss": 1.0632, "step": 16745 }, { - "epoch": 0.47453880812717847, + "epoch": 0.6552155880741842, "grad_norm": 0.0, - "learning_rate": 1.1306854295896335e-05, - "loss": 0.9797, + "learning_rate": 5.613352729286432e-06, + "loss": 0.9782, "step": 16746 }, { - "epoch": 0.4745671455694409, + "epoch": 0.6552547147664136, "grad_norm": 0.0, - "learning_rate": 1.1305944372195564e-05, - "loss": 0.8999, + "learning_rate": 5.6122139662597345e-06, + "loss": 1.1292, "step": 16747 }, { - "epoch": 0.47459548301170335, + "epoch": 0.6552938414586431, "grad_norm": 0.0, - "learning_rate": 1.1305034437494337e-05, - "loss": 0.9849, + "learning_rate": 5.611075273695385e-06, + "loss": 1.1208, "step": 16748 }, { - "epoch": 0.47462382045396584, + "epoch": 0.6553329681508725, "grad_norm": 0.0, - "learning_rate": 1.130412449180032e-05, - "loss": 1.0, + "learning_rate": 5.609936651611662e-06, + "loss": 0.9826, "step": 16749 }, { - "epoch": 0.4746521578962283, + "epoch": 0.655372094843102, "grad_norm": 0.0, - "learning_rate": 1.1303214535121181e-05, - "loss": 0.8895, + "learning_rate": 5.608798100026858e-06, + "loss": 0.9543, "step": 16750 }, { - "epoch": 0.47468049533849077, + "epoch": 0.6554112215353314, "grad_norm": 0.0, - "learning_rate": 1.130230456746458e-05, - "loss": 0.9152, + "learning_rate": 5.607659618959253e-06, + "loss": 0.9625, "step": 16751 }, { - "epoch": 0.4747088327807532, + "epoch": 0.6554503482275609, "grad_norm": 0.0, - "learning_rate": 1.130139458883818e-05, - "loss": 0.8912, + "learning_rate": 5.6065212084271344e-06, + "loss": 0.9623, "step": 16752 }, { - "epoch": 0.47473717022301565, + "epoch": 0.6554894749197903, "grad_norm": 0.0, - "learning_rate": 1.1300484599249656e-05, - "loss": 0.8191, + "learning_rate": 5.6053828684487765e-06, + "loss": 1.0076, "step": 16753 }, { - "epoch": 0.47476550766527814, + "epoch": 0.6555286016120198, "grad_norm": 0.0, - "learning_rate": 1.1299574598706663e-05, - "loss": 0.9166, + "learning_rate": 5.604244599042462e-06, + "loss": 1.011, "step": 16754 }, { - "epoch": 0.4747938451075406, + "epoch": 0.6555677283042491, "grad_norm": 0.0, - "learning_rate": 1.1298664587216877e-05, - "loss": 1.0305, + "learning_rate": 5.603106400226472e-06, + "loss": 1.0068, "step": 16755 }, { - "epoch": 0.4748221825498031, + "epoch": 0.6556068549964786, "grad_norm": 0.0, - "learning_rate": 1.1297754564787952e-05, - "loss": 0.8947, + "learning_rate": 5.601968272019087e-06, + "loss": 1.0608, "step": 16756 }, { - "epoch": 0.4748505199920655, + "epoch": 0.655645981688708, "grad_norm": 0.0, - "learning_rate": 1.129684453142756e-05, - "loss": 0.9106, + "learning_rate": 5.600830214438577e-06, + "loss": 0.9782, "step": 16757 }, { - "epoch": 0.47487885743432795, + "epoch": 0.6556851083809375, "grad_norm": 0.0, - "learning_rate": 1.1295934487143364e-05, - "loss": 0.9711, + "learning_rate": 5.599692227503224e-06, + "loss": 0.9361, "step": 16758 }, { - "epoch": 0.47490719487659044, + "epoch": 0.6557242350731669, "grad_norm": 0.0, - "learning_rate": 1.1295024431943029e-05, - "loss": 0.9331, + "learning_rate": 5.598554311231293e-06, + "loss": 1.0001, "step": 16759 }, { - "epoch": 0.4749355323188529, + "epoch": 0.6557633617653964, "grad_norm": 0.0, - "learning_rate": 1.1294114365834225e-05, - "loss": 0.8518, + "learning_rate": 5.5974164656410725e-06, + "loss": 0.9736, "step": 16760 }, { - "epoch": 0.4749638697611154, + "epoch": 0.6558024884576258, "grad_norm": 0.0, - "learning_rate": 1.1293204288824615e-05, - "loss": 0.85, + "learning_rate": 5.596278690750822e-06, + "loss": 1.0179, "step": 16761 }, { - "epoch": 0.4749922072033778, + "epoch": 0.6558416151498553, "grad_norm": 0.0, - "learning_rate": 1.1292294200921862e-05, - "loss": 0.8539, + "learning_rate": 5.595140986578823e-06, + "loss": 1.0057, "step": 16762 }, { - "epoch": 0.4750205446456403, + "epoch": 0.6558807418420847, "grad_norm": 0.0, - "learning_rate": 1.1291384102133638e-05, - "loss": 0.8931, + "learning_rate": 5.5940033531433314e-06, + "loss": 0.8877, "step": 16763 }, { - "epoch": 0.47504888208790275, + "epoch": 0.655919868534314, "grad_norm": 0.0, - "learning_rate": 1.1290473992467607e-05, - "loss": 0.9389, + "learning_rate": 5.5928657904626325e-06, + "loss": 1.1679, "step": 16764 }, { - "epoch": 0.4750772195301652, + "epoch": 0.6559589952265436, "grad_norm": 0.0, - "learning_rate": 1.128956387193143e-05, - "loss": 0.8317, + "learning_rate": 5.591728298554984e-06, + "loss": 0.9917, "step": 16765 }, { - "epoch": 0.4751055569724277, + "epoch": 0.6559981219187729, "grad_norm": 0.0, - "learning_rate": 1.1288653740532782e-05, - "loss": 0.8922, + "learning_rate": 5.590590877438657e-06, + "loss": 0.8846, "step": 16766 }, { - "epoch": 0.4751338944146901, + "epoch": 0.6560372486110024, "grad_norm": 0.0, - "learning_rate": 1.1287743598279323e-05, - "loss": 1.0338, + "learning_rate": 5.589453527131912e-06, + "loss": 0.9992, "step": 16767 }, { - "epoch": 0.4751622318569526, + "epoch": 0.6560763753032318, "grad_norm": 0.0, - "learning_rate": 1.1286833445178722e-05, - "loss": 0.8804, + "learning_rate": 5.588316247653017e-06, + "loss": 0.9904, "step": 16768 }, { - "epoch": 0.47519056929921505, + "epoch": 0.6561155019954613, "grad_norm": 0.0, - "learning_rate": 1.1285923281238646e-05, - "loss": 0.8246, + "learning_rate": 5.587179039020235e-06, + "loss": 1.1024, "step": 16769 }, { - "epoch": 0.4752189067414775, + "epoch": 0.6561546286876907, "grad_norm": 0.0, - "learning_rate": 1.128501310646676e-05, - "loss": 0.8601, + "learning_rate": 5.586041901251832e-06, + "loss": 1.0864, "step": 16770 }, { - "epoch": 0.47524724418374, + "epoch": 0.6561937553799202, "grad_norm": 0.0, - "learning_rate": 1.128410292087073e-05, - "loss": 1.0277, + "learning_rate": 5.584904834366061e-06, + "loss": 0.9757, "step": 16771 }, { - "epoch": 0.4752755816260024, + "epoch": 0.6562328820721496, "grad_norm": 0.0, - "learning_rate": 1.1283192724458225e-05, - "loss": 0.9296, + "learning_rate": 5.583767838381186e-06, + "loss": 1.1052, "step": 16772 }, { - "epoch": 0.4753039190682649, + "epoch": 0.6562720087643791, "grad_norm": 0.0, - "learning_rate": 1.1282282517236913e-05, - "loss": 0.8132, + "learning_rate": 5.582630913315465e-06, + "loss": 0.9493, "step": 16773 }, { - "epoch": 0.47533225651052735, + "epoch": 0.6563111354566085, "grad_norm": 0.0, - "learning_rate": 1.1281372299214457e-05, - "loss": 0.8506, + "learning_rate": 5.581494059187162e-06, + "loss": 1.0811, "step": 16774 }, { - "epoch": 0.47536059395278984, + "epoch": 0.656350262148838, "grad_norm": 0.0, - "learning_rate": 1.1280462070398529e-05, - "loss": 0.8385, + "learning_rate": 5.580357276014522e-06, + "loss": 1.0577, "step": 16775 }, { - "epoch": 0.4753889313950523, + "epoch": 0.6563893888410673, "grad_norm": 0.0, - "learning_rate": 1.1279551830796792e-05, - "loss": 0.828, + "learning_rate": 5.5792205638158105e-06, + "loss": 0.9847, "step": 16776 }, { - "epoch": 0.4754172688373147, + "epoch": 0.6564285155332968, "grad_norm": 0.0, - "learning_rate": 1.127864158041691e-05, - "loss": 0.9435, + "learning_rate": 5.578083922609273e-06, + "loss": 0.9894, "step": 16777 }, { - "epoch": 0.4754456062795772, + "epoch": 0.6564676422255262, "grad_norm": 0.0, - "learning_rate": 1.1277731319266562e-05, - "loss": 0.8958, + "learning_rate": 5.576947352413167e-06, + "loss": 1.0532, "step": 16778 }, { - "epoch": 0.47547394372183965, + "epoch": 0.6565067689177557, "grad_norm": 0.0, - "learning_rate": 1.1276821047353403e-05, - "loss": 0.904, + "learning_rate": 5.575810853245744e-06, + "loss": 0.9259, "step": 16779 }, { - "epoch": 0.47550228116410215, + "epoch": 0.6565458956099851, "grad_norm": 0.0, - "learning_rate": 1.1275910764685111e-05, - "loss": 0.8677, + "learning_rate": 5.574674425125259e-06, + "loss": 0.9747, "step": 16780 }, { - "epoch": 0.4755306186063646, + "epoch": 0.6565850223022146, "grad_norm": 0.0, - "learning_rate": 1.1275000471269348e-05, - "loss": 1.0218, + "learning_rate": 5.573538068069954e-06, + "loss": 0.9744, "step": 16781 }, { - "epoch": 0.475558956048627, + "epoch": 0.656624148994444, "grad_norm": 0.0, - "learning_rate": 1.1274090167113783e-05, - "loss": 0.9226, + "learning_rate": 5.572401782098081e-06, + "loss": 1.1124, "step": 16782 }, { - "epoch": 0.4755872934908895, + "epoch": 0.6566632756866735, "grad_norm": 0.0, - "learning_rate": 1.127317985222608e-05, - "loss": 0.7647, + "learning_rate": 5.5712655672278905e-06, + "loss": 1.0727, "step": 16783 }, { - "epoch": 0.47561563093315196, + "epoch": 0.6567024023789029, "grad_norm": 0.0, - "learning_rate": 1.1272269526613913e-05, - "loss": 0.8717, + "learning_rate": 5.5701294234776215e-06, + "loss": 0.9709, "step": 16784 }, { - "epoch": 0.47564396837541445, + "epoch": 0.6567415290711324, "grad_norm": 0.0, - "learning_rate": 1.1271359190284947e-05, - "loss": 0.854, + "learning_rate": 5.568993350865522e-06, + "loss": 0.9796, "step": 16785 }, { - "epoch": 0.4756723058176769, + "epoch": 0.6567806557633618, "grad_norm": 0.0, - "learning_rate": 1.127044884324685e-05, - "loss": 0.9551, + "learning_rate": 5.5678573494098384e-06, + "loss": 0.9283, "step": 16786 }, { - "epoch": 0.4757006432599394, + "epoch": 0.6568197824555913, "grad_norm": 0.0, - "learning_rate": 1.126953848550729e-05, - "loss": 0.9221, + "learning_rate": 5.566721419128815e-06, + "loss": 1.023, "step": 16787 }, { - "epoch": 0.4757289807022018, + "epoch": 0.6568589091478206, "grad_norm": 0.0, - "learning_rate": 1.1268628117073939e-05, - "loss": 0.9178, + "learning_rate": 5.565585560040688e-06, + "loss": 1.0407, "step": 16788 }, { - "epoch": 0.47575731814446426, + "epoch": 0.6568980358400501, "grad_norm": 0.0, - "learning_rate": 1.126771773795446e-05, - "loss": 1.0032, + "learning_rate": 5.564449772163704e-06, + "loss": 0.953, "step": 16789 }, { - "epoch": 0.47578565558672675, + "epoch": 0.6569371625322795, "grad_norm": 0.0, - "learning_rate": 1.1266807348156521e-05, - "loss": 0.9948, + "learning_rate": 5.563314055516089e-06, + "loss": 0.9584, "step": 16790 }, { - "epoch": 0.4758139930289892, + "epoch": 0.656976289224509, "grad_norm": 0.0, - "learning_rate": 1.1265896947687796e-05, - "loss": 0.9521, + "learning_rate": 5.5621784101161e-06, + "loss": 1.0316, "step": 16791 }, { - "epoch": 0.4758423304712517, + "epoch": 0.6570154159167384, "grad_norm": 0.0, - "learning_rate": 1.1264986536555951e-05, - "loss": 0.9194, + "learning_rate": 5.5610428359819605e-06, + "loss": 0.9921, "step": 16792 }, { - "epoch": 0.4758706679135141, + "epoch": 0.6570545426089678, "grad_norm": 0.0, - "learning_rate": 1.1264076114768657e-05, - "loss": 0.9477, + "learning_rate": 5.559907333131915e-06, + "loss": 0.9875, "step": 16793 }, { - "epoch": 0.47589900535577656, + "epoch": 0.6570936693011973, "grad_norm": 0.0, - "learning_rate": 1.1263165682333577e-05, - "loss": 0.9617, + "learning_rate": 5.5587719015841856e-06, + "loss": 1.1145, "step": 16794 }, { - "epoch": 0.47592734279803905, + "epoch": 0.6571327959934267, "grad_norm": 0.0, - "learning_rate": 1.1262255239258385e-05, - "loss": 0.9532, + "learning_rate": 5.557636541357023e-06, + "loss": 0.9958, "step": 16795 }, { - "epoch": 0.4759556802403015, + "epoch": 0.6571719226856562, "grad_norm": 0.0, - "learning_rate": 1.1261344785550748e-05, - "loss": 0.8064, + "learning_rate": 5.556501252468647e-06, + "loss": 0.882, "step": 16796 }, { - "epoch": 0.475984017682564, + "epoch": 0.6572110493778855, "grad_norm": 0.0, - "learning_rate": 1.1260434321218334e-05, - "loss": 0.8564, + "learning_rate": 5.555366034937295e-06, + "loss": 1.0179, "step": 16797 }, { - "epoch": 0.4760123551248264, + "epoch": 0.657250176070115, "grad_norm": 0.0, - "learning_rate": 1.1259523846268816e-05, - "loss": 1.0193, + "learning_rate": 5.5542308887811934e-06, + "loss": 1.0649, "step": 16798 }, { - "epoch": 0.4760406925670889, + "epoch": 0.6572893027623444, "grad_norm": 0.0, - "learning_rate": 1.125861336070986e-05, - "loss": 0.9343, + "learning_rate": 5.553095814018572e-06, + "loss": 1.059, "step": 16799 }, { - "epoch": 0.47606903000935136, + "epoch": 0.6573284294545739, "grad_norm": 0.0, - "learning_rate": 1.1257702864549134e-05, - "loss": 0.892, + "learning_rate": 5.551960810667658e-06, + "loss": 1.0562, "step": 16800 }, { - "epoch": 0.4760973674516138, + "epoch": 0.6573675561468033, "grad_norm": 0.0, - "learning_rate": 1.1256792357794313e-05, - "loss": 0.8446, + "learning_rate": 5.550825878746686e-06, + "loss": 1.1271, "step": 16801 }, { - "epoch": 0.4761257048938763, + "epoch": 0.6574066828390328, "grad_norm": 0.0, - "learning_rate": 1.1255881840453065e-05, - "loss": 0.9618, + "learning_rate": 5.549691018273871e-06, + "loss": 0.9961, "step": 16802 }, { - "epoch": 0.4761540423361387, + "epoch": 0.6574458095312622, "grad_norm": 0.0, - "learning_rate": 1.1254971312533052e-05, - "loss": 1.0222, + "learning_rate": 5.548556229267441e-06, + "loss": 0.976, "step": 16803 }, { - "epoch": 0.4761823797784012, + "epoch": 0.6574849362234917, "grad_norm": 0.0, - "learning_rate": 1.1254060774041953e-05, - "loss": 0.9083, + "learning_rate": 5.547421511745619e-06, + "loss": 1.0322, "step": 16804 }, { - "epoch": 0.47621071722066366, + "epoch": 0.6575240629157211, "grad_norm": 0.0, - "learning_rate": 1.1253150224987435e-05, - "loss": 0.8802, + "learning_rate": 5.546286865726633e-06, + "loss": 1.0228, "step": 16805 }, { - "epoch": 0.4762390546629261, + "epoch": 0.6575631896079506, "grad_norm": 0.0, - "learning_rate": 1.1252239665377167e-05, - "loss": 0.9542, + "learning_rate": 5.545152291228696e-06, + "loss": 0.9663, "step": 16806 }, { - "epoch": 0.4762673921051886, + "epoch": 0.65760231630018, "grad_norm": 0.0, - "learning_rate": 1.1251329095218819e-05, - "loss": 0.9464, + "learning_rate": 5.5440177882700345e-06, + "loss": 0.9773, "step": 16807 }, { - "epoch": 0.47629572954745103, + "epoch": 0.6576414429924095, "grad_norm": 0.0, - "learning_rate": 1.1250418514520061e-05, - "loss": 0.766, + "learning_rate": 5.542883356868856e-06, + "loss": 1.0372, "step": 16808 }, { - "epoch": 0.4763240669897135, + "epoch": 0.6576805696846388, "grad_norm": 0.0, - "learning_rate": 1.1249507923288563e-05, - "loss": 0.877, + "learning_rate": 5.541748997043392e-06, + "loss": 0.9148, "step": 16809 }, { - "epoch": 0.47635240443197596, + "epoch": 0.6577196963768683, "grad_norm": 0.0, - "learning_rate": 1.1248597321531995e-05, - "loss": 0.8974, + "learning_rate": 5.540614708811849e-06, + "loss": 1.0558, "step": 16810 }, { - "epoch": 0.47638074187423846, + "epoch": 0.6577588230690977, "grad_norm": 0.0, - "learning_rate": 1.124768670925803e-05, - "loss": 0.8366, + "learning_rate": 5.53948049219245e-06, + "loss": 1.0858, "step": 16811 }, { - "epoch": 0.4764090793165009, + "epoch": 0.6577979497613272, "grad_norm": 0.0, - "learning_rate": 1.1246776086474335e-05, - "loss": 0.8705, + "learning_rate": 5.5383463472033985e-06, + "loss": 0.9424, "step": 16812 }, { - "epoch": 0.47643741675876333, + "epoch": 0.6578370764535566, "grad_norm": 0.0, - "learning_rate": 1.1245865453188584e-05, - "loss": 0.977, + "learning_rate": 5.537212273862922e-06, + "loss": 0.9368, "step": 16813 }, { - "epoch": 0.4764657542010258, + "epoch": 0.6578762031457861, "grad_norm": 0.0, - "learning_rate": 1.1244954809408446e-05, - "loss": 0.8973, + "learning_rate": 5.536078272189218e-06, + "loss": 0.8394, "step": 16814 }, { - "epoch": 0.47649409164328826, + "epoch": 0.6579153298380155, "grad_norm": 0.0, - "learning_rate": 1.124404415514159e-05, - "loss": 0.8725, + "learning_rate": 5.534944342200508e-06, + "loss": 0.9951, "step": 16815 }, { - "epoch": 0.47652242908555076, + "epoch": 0.657954456530245, "grad_norm": 0.0, - "learning_rate": 1.1243133490395687e-05, - "loss": 0.8711, + "learning_rate": 5.533810483914994e-06, + "loss": 1.0064, "step": 16816 }, { - "epoch": 0.4765507665278132, + "epoch": 0.6579935832224744, "grad_norm": 0.0, - "learning_rate": 1.1242222815178409e-05, - "loss": 0.9965, + "learning_rate": 5.532676697350887e-06, + "loss": 0.9243, "step": 16817 }, { - "epoch": 0.47657910397007563, + "epoch": 0.6580327099147039, "grad_norm": 0.0, - "learning_rate": 1.124131212949743e-05, - "loss": 0.9111, + "learning_rate": 5.5315429825263935e-06, + "loss": 0.8653, "step": 16818 }, { - "epoch": 0.47660744141233813, + "epoch": 0.6580718366069332, "grad_norm": 0.0, - "learning_rate": 1.1240401433360417e-05, - "loss": 0.9383, + "learning_rate": 5.530409339459724e-06, + "loss": 1.1233, "step": 16819 }, { - "epoch": 0.47663577885460057, + "epoch": 0.6581109632991627, "grad_norm": 0.0, - "learning_rate": 1.123949072677504e-05, - "loss": 0.9226, + "learning_rate": 5.529275768169077e-06, + "loss": 0.9736, "step": 16820 }, { - "epoch": 0.47666411629686306, + "epoch": 0.6581500899913921, "grad_norm": 0.0, - "learning_rate": 1.1238580009748975e-05, - "loss": 0.8997, + "learning_rate": 5.528142268672659e-06, + "loss": 1.0402, "step": 16821 }, { - "epoch": 0.4766924537391255, + "epoch": 0.6581892166836215, "grad_norm": 0.0, - "learning_rate": 1.1237669282289889e-05, - "loss": 0.9179, + "learning_rate": 5.527008840988676e-06, + "loss": 0.8987, "step": 16822 }, { - "epoch": 0.476720791181388, + "epoch": 0.658228343375851, "grad_norm": 0.0, - "learning_rate": 1.1236758544405454e-05, - "loss": 0.8842, + "learning_rate": 5.525875485135321e-06, + "loss": 0.9654, "step": 16823 }, { - "epoch": 0.47674912862365043, + "epoch": 0.6582674700680804, "grad_norm": 0.0, - "learning_rate": 1.1235847796103345e-05, - "loss": 0.8867, + "learning_rate": 5.524742201130801e-06, + "loss": 1.0745, "step": 16824 }, { - "epoch": 0.47677746606591287, + "epoch": 0.6583065967603099, "grad_norm": 0.0, - "learning_rate": 1.1234937037391227e-05, - "loss": 0.8471, + "learning_rate": 5.523608988993317e-06, + "loss": 1.0413, "step": 16825 }, { - "epoch": 0.47680580350817536, + "epoch": 0.6583457234525393, "grad_norm": 0.0, - "learning_rate": 1.123402626827678e-05, - "loss": 0.938, + "learning_rate": 5.52247584874106e-06, + "loss": 0.8851, "step": 16826 }, { - "epoch": 0.4768341409504378, + "epoch": 0.6583848501447688, "grad_norm": 0.0, - "learning_rate": 1.1233115488767672e-05, - "loss": 0.9326, + "learning_rate": 5.521342780392229e-06, + "loss": 1.0575, "step": 16827 }, { - "epoch": 0.4768624783927003, + "epoch": 0.6584239768369982, "grad_norm": 0.0, - "learning_rate": 1.1232204698871572e-05, - "loss": 0.9019, + "learning_rate": 5.520209783965026e-06, + "loss": 1.1201, "step": 16828 }, { - "epoch": 0.47689081583496273, + "epoch": 0.6584631035292277, "grad_norm": 0.0, - "learning_rate": 1.1231293898596154e-05, - "loss": 0.9343, + "learning_rate": 5.519076859477635e-06, + "loss": 0.98, "step": 16829 }, { - "epoch": 0.47691915327722517, + "epoch": 0.658502230221457, "grad_norm": 0.0, - "learning_rate": 1.123038308794909e-05, - "loss": 0.8741, + "learning_rate": 5.517944006948257e-06, + "loss": 1.0574, "step": 16830 }, { - "epoch": 0.47694749071948767, + "epoch": 0.6585413569136865, "grad_norm": 0.0, - "learning_rate": 1.1229472266938052e-05, - "loss": 0.9242, + "learning_rate": 5.51681122639508e-06, + "loss": 1.1426, "step": 16831 }, { - "epoch": 0.4769758281617501, + "epoch": 0.6585804836059159, "grad_norm": 0.0, - "learning_rate": 1.1228561435570718e-05, - "loss": 1.0056, + "learning_rate": 5.515678517836301e-06, + "loss": 0.9503, "step": 16832 }, { - "epoch": 0.4770041656040126, + "epoch": 0.6586196102981454, "grad_norm": 0.0, - "learning_rate": 1.122765059385475e-05, - "loss": 0.9603, + "learning_rate": 5.514545881290103e-06, + "loss": 1.1164, "step": 16833 }, { - "epoch": 0.47703250304627504, + "epoch": 0.6586587369903748, "grad_norm": 0.0, - "learning_rate": 1.1226739741797825e-05, - "loss": 0.8421, + "learning_rate": 5.51341331677468e-06, + "loss": 0.9835, "step": 16834 }, { - "epoch": 0.47706084048853753, + "epoch": 0.6586978636826043, "grad_norm": 0.0, - "learning_rate": 1.1225828879407617e-05, - "loss": 0.9133, + "learning_rate": 5.51228082430821e-06, + "loss": 0.9919, "step": 16835 }, { - "epoch": 0.47708917793079997, + "epoch": 0.6587369903748337, "grad_norm": 0.0, - "learning_rate": 1.1224918006691793e-05, - "loss": 0.8817, + "learning_rate": 5.511148403908895e-06, + "loss": 1.0128, "step": 16836 }, { - "epoch": 0.4771175153730624, + "epoch": 0.6587761170670632, "grad_norm": 0.0, - "learning_rate": 1.1224007123658034e-05, - "loss": 0.8252, + "learning_rate": 5.510016055594907e-06, + "loss": 0.9721, "step": 16837 }, { - "epoch": 0.4771458528153249, + "epoch": 0.6588152437592926, "grad_norm": 0.0, - "learning_rate": 1.1223096230314008e-05, - "loss": 0.8705, + "learning_rate": 5.50888377938444e-06, + "loss": 1.0777, "step": 16838 }, { - "epoch": 0.47717419025758734, + "epoch": 0.6588543704515221, "grad_norm": 0.0, - "learning_rate": 1.122218532666739e-05, - "loss": 0.9104, + "learning_rate": 5.507751575295662e-06, + "loss": 1.0324, "step": 16839 }, { - "epoch": 0.47720252769984983, + "epoch": 0.6588934971437514, "grad_norm": 0.0, - "learning_rate": 1.122127441272585e-05, - "loss": 0.8453, + "learning_rate": 5.506619443346775e-06, + "loss": 0.9766, "step": 16840 }, { - "epoch": 0.47723086514211227, + "epoch": 0.658932623835981, "grad_norm": 0.0, - "learning_rate": 1.1220363488497059e-05, - "loss": 0.9261, + "learning_rate": 5.505487383555943e-06, + "loss": 0.8975, "step": 16841 }, { - "epoch": 0.4772592025843747, + "epoch": 0.6589717505282103, "grad_norm": 0.0, - "learning_rate": 1.1219452553988696e-05, - "loss": 0.8713, + "learning_rate": 5.504355395941356e-06, + "loss": 1.0982, "step": 16842 }, { - "epoch": 0.4772875400266372, + "epoch": 0.6590108772204398, "grad_norm": 0.0, - "learning_rate": 1.121854160920843e-05, - "loss": 0.8921, + "learning_rate": 5.503223480521185e-06, + "loss": 0.9926, "step": 16843 }, { - "epoch": 0.47731587746889964, + "epoch": 0.6590500039126692, "grad_norm": 0.0, - "learning_rate": 1.1217630654163938e-05, - "loss": 0.8433, + "learning_rate": 5.502091637313609e-06, + "loss": 0.9983, "step": 16844 }, { - "epoch": 0.47734421491116213, + "epoch": 0.6590891306048987, "grad_norm": 0.0, - "learning_rate": 1.1216719688862888e-05, - "loss": 0.803, + "learning_rate": 5.500959866336806e-06, + "loss": 1.0264, "step": 16845 }, { - "epoch": 0.4773725523534246, + "epoch": 0.6591282572971281, "grad_norm": 0.0, - "learning_rate": 1.121580871331296e-05, - "loss": 0.8538, + "learning_rate": 5.499828167608954e-06, + "loss": 1.0754, "step": 16846 }, { - "epoch": 0.47740088979568707, + "epoch": 0.6591673839893576, "grad_norm": 0.0, - "learning_rate": 1.1214897727521821e-05, - "loss": 0.8432, + "learning_rate": 5.4986965411482185e-06, + "loss": 0.9775, "step": 16847 }, { - "epoch": 0.4774292272379495, + "epoch": 0.659206510681587, "grad_norm": 0.0, - "learning_rate": 1.1213986731497146e-05, - "loss": 0.9226, + "learning_rate": 5.497564986972775e-06, + "loss": 0.9908, "step": 16848 }, { - "epoch": 0.47745756468021194, + "epoch": 0.6592456373738164, "grad_norm": 0.0, - "learning_rate": 1.1213075725246612e-05, - "loss": 0.9594, + "learning_rate": 5.496433505100798e-06, + "loss": 0.9592, "step": 16849 }, { - "epoch": 0.47748590212247444, + "epoch": 0.6592847640660459, "grad_norm": 0.0, - "learning_rate": 1.1212164708777889e-05, - "loss": 0.8029, + "learning_rate": 5.495302095550457e-06, + "loss": 0.934, "step": 16850 }, { - "epoch": 0.4775142395647369, + "epoch": 0.6593238907582752, "grad_norm": 0.0, - "learning_rate": 1.1211253682098653e-05, - "loss": 0.9169, + "learning_rate": 5.494170758339918e-06, + "loss": 0.9087, "step": 16851 }, { - "epoch": 0.47754257700699937, + "epoch": 0.6593630174505047, "grad_norm": 0.0, - "learning_rate": 1.1210342645216578e-05, - "loss": 1.0145, + "learning_rate": 5.493039493487349e-06, + "loss": 0.9769, "step": 16852 }, { - "epoch": 0.4775709144492618, + "epoch": 0.6594021441427341, "grad_norm": 0.0, - "learning_rate": 1.120943159813934e-05, - "loss": 0.9758, + "learning_rate": 5.4919083010109185e-06, + "loss": 1.1624, "step": 16853 }, { - "epoch": 0.47759925189152425, + "epoch": 0.6594412708349636, "grad_norm": 0.0, - "learning_rate": 1.1208520540874607e-05, - "loss": 0.9624, + "learning_rate": 5.490777180928797e-06, + "loss": 0.9649, "step": 16854 }, { - "epoch": 0.47762758933378674, + "epoch": 0.659480397527193, "grad_norm": 0.0, - "learning_rate": 1.1207609473430059e-05, - "loss": 0.9109, + "learning_rate": 5.489646133259138e-06, + "loss": 0.9716, "step": 16855 }, { - "epoch": 0.4776559267760492, + "epoch": 0.6595195242194225, "grad_norm": 0.0, - "learning_rate": 1.1206698395813365e-05, - "loss": 0.849, + "learning_rate": 5.488515158020115e-06, + "loss": 1.0275, "step": 16856 }, { - "epoch": 0.47768426421831167, + "epoch": 0.6595586509116519, "grad_norm": 0.0, - "learning_rate": 1.1205787308032205e-05, - "loss": 0.9086, + "learning_rate": 5.4873842552298775e-06, + "loss": 0.9297, "step": 16857 }, { - "epoch": 0.4777126016605741, + "epoch": 0.6595977776038814, "grad_norm": 0.0, - "learning_rate": 1.1204876210094248e-05, - "loss": 0.9687, + "learning_rate": 5.486253424906602e-06, + "loss": 0.8918, "step": 16858 }, { - "epoch": 0.4777409391028366, + "epoch": 0.6596369042961108, "grad_norm": 0.0, - "learning_rate": 1.1203965102007176e-05, - "loss": 0.8574, + "learning_rate": 5.485122667068436e-06, + "loss": 0.9314, "step": 16859 }, { - "epoch": 0.47776927654509904, + "epoch": 0.6596760309883403, "grad_norm": 0.0, - "learning_rate": 1.1203053983778655e-05, - "loss": 0.9717, + "learning_rate": 5.4839919817335474e-06, + "loss": 1.0134, "step": 16860 }, { - "epoch": 0.4777976139873615, + "epoch": 0.6597151576805697, "grad_norm": 0.0, - "learning_rate": 1.1202142855416365e-05, - "loss": 0.9863, + "learning_rate": 5.482861368920084e-06, + "loss": 1.0223, "step": 16861 }, { - "epoch": 0.477825951429624, + "epoch": 0.6597542843727991, "grad_norm": 0.0, - "learning_rate": 1.1201231716927979e-05, - "loss": 0.8576, + "learning_rate": 5.4817308286462065e-06, + "loss": 0.8779, "step": 16862 }, { - "epoch": 0.4778542888718864, + "epoch": 0.6597934110650285, "grad_norm": 0.0, - "learning_rate": 1.120032056832117e-05, - "loss": 0.8726, + "learning_rate": 5.48060036093007e-06, + "loss": 0.9417, "step": 16863 }, { - "epoch": 0.4778826263141489, + "epoch": 0.659832537757258, "grad_norm": 0.0, - "learning_rate": 1.1199409409603618e-05, - "loss": 0.9093, + "learning_rate": 5.4794699657898345e-06, + "loss": 0.9641, "step": 16864 }, { - "epoch": 0.47791096375641134, + "epoch": 0.6598716644494874, "grad_norm": 0.0, - "learning_rate": 1.1198498240782996e-05, - "loss": 0.9316, + "learning_rate": 5.478339643243642e-06, + "loss": 0.8398, "step": 16865 }, { - "epoch": 0.4779393011986738, + "epoch": 0.6599107911417169, "grad_norm": 0.0, - "learning_rate": 1.1197587061866975e-05, - "loss": 0.8649, + "learning_rate": 5.4772093933096485e-06, + "loss": 0.9801, "step": 16866 }, { - "epoch": 0.4779676386409363, + "epoch": 0.6599499178339463, "grad_norm": 0.0, - "learning_rate": 1.1196675872863235e-05, - "loss": 0.8887, + "learning_rate": 5.476079216006007e-06, + "loss": 1.0281, "step": 16867 }, { - "epoch": 0.4779959760831987, + "epoch": 0.6599890445261758, "grad_norm": 0.0, - "learning_rate": 1.119576467377945e-05, - "loss": 0.8893, + "learning_rate": 5.474949111350862e-06, + "loss": 0.9756, "step": 16868 }, { - "epoch": 0.4780243135254612, + "epoch": 0.6600281712184052, "grad_norm": 0.0, - "learning_rate": 1.1194853464623294e-05, - "loss": 0.9022, + "learning_rate": 5.473819079362366e-06, + "loss": 1.026, "step": 16869 }, { - "epoch": 0.47805265096772365, + "epoch": 0.6600672979106347, "grad_norm": 0.0, - "learning_rate": 1.1193942245402443e-05, - "loss": 0.8229, + "learning_rate": 5.472689120058661e-06, + "loss": 0.9906, "step": 16870 }, { - "epoch": 0.47808098840998614, + "epoch": 0.6601064246028641, "grad_norm": 0.0, - "learning_rate": 1.1193031016124576e-05, - "loss": 0.9461, + "learning_rate": 5.471559233457902e-06, + "loss": 0.931, "step": 16871 }, { - "epoch": 0.4781093258522486, + "epoch": 0.6601455512950936, "grad_norm": 0.0, - "learning_rate": 1.1192119776797366e-05, - "loss": 0.9043, + "learning_rate": 5.470429419578221e-06, + "loss": 0.9519, "step": 16872 }, { - "epoch": 0.478137663294511, + "epoch": 0.6601846779873229, "grad_norm": 0.0, - "learning_rate": 1.1191208527428488e-05, - "loss": 0.9422, + "learning_rate": 5.469299678437774e-06, + "loss": 1.0812, "step": 16873 }, { - "epoch": 0.4781660007367735, + "epoch": 0.6602238046795524, "grad_norm": 0.0, - "learning_rate": 1.1190297268025614e-05, - "loss": 0.9201, + "learning_rate": 5.468170010054691e-06, + "loss": 0.9411, "step": 16874 }, { - "epoch": 0.47819433817903595, + "epoch": 0.6602629313717818, "grad_norm": 0.0, - "learning_rate": 1.1189385998596429e-05, - "loss": 0.9061, + "learning_rate": 5.46704041444712e-06, + "loss": 1.0525, "step": 16875 }, { - "epoch": 0.47822267562129844, + "epoch": 0.6603020580640113, "grad_norm": 0.0, - "learning_rate": 1.1188474719148601e-05, - "loss": 0.9852, + "learning_rate": 5.465910891633199e-06, + "loss": 1.1063, "step": 16876 }, { - "epoch": 0.4782510130635609, + "epoch": 0.6603411847562407, "grad_norm": 0.0, - "learning_rate": 1.1187563429689809e-05, - "loss": 1.0067, + "learning_rate": 5.464781441631073e-06, + "loss": 0.9021, "step": 16877 }, { - "epoch": 0.4782793505058233, + "epoch": 0.6603803114484701, "grad_norm": 0.0, - "learning_rate": 1.1186652130227734e-05, - "loss": 0.8394, + "learning_rate": 5.463652064458869e-06, + "loss": 0.8469, "step": 16878 }, { - "epoch": 0.4783076879480858, + "epoch": 0.6604194381406996, "grad_norm": 0.0, - "learning_rate": 1.1185740820770042e-05, - "loss": 0.9331, + "learning_rate": 5.4625227601347276e-06, + "loss": 0.9561, "step": 16879 }, { - "epoch": 0.47833602539034825, + "epoch": 0.660458564832929, "grad_norm": 0.0, - "learning_rate": 1.1184829501324416e-05, - "loss": 0.9074, + "learning_rate": 5.461393528676786e-06, + "loss": 0.956, "step": 16880 }, { - "epoch": 0.47836436283261075, + "epoch": 0.6604976915251585, "grad_norm": 0.0, - "learning_rate": 1.118391817189853e-05, - "loss": 0.9812, + "learning_rate": 5.460264370103181e-06, + "loss": 1.0688, "step": 16881 }, { - "epoch": 0.4783927002748732, + "epoch": 0.6605368182173879, "grad_norm": 0.0, - "learning_rate": 1.1183006832500065e-05, - "loss": 0.8822, + "learning_rate": 5.459135284432038e-06, + "loss": 0.9637, "step": 16882 }, { - "epoch": 0.4784210377171357, + "epoch": 0.6605759449096174, "grad_norm": 0.0, - "learning_rate": 1.1182095483136692e-05, - "loss": 0.9846, + "learning_rate": 5.458006271681495e-06, + "loss": 0.8733, "step": 16883 }, { - "epoch": 0.4784493751593981, + "epoch": 0.6606150716018467, "grad_norm": 0.0, - "learning_rate": 1.1181184123816092e-05, - "loss": 0.9463, + "learning_rate": 5.456877331869674e-06, + "loss": 0.9594, "step": 16884 }, { - "epoch": 0.47847771260166055, + "epoch": 0.6606541982940762, "grad_norm": 0.0, - "learning_rate": 1.1180272754545939e-05, - "loss": 0.8222, + "learning_rate": 5.455748465014716e-06, + "loss": 0.9806, "step": 16885 }, { - "epoch": 0.47850605004392305, + "epoch": 0.6606933249863056, "grad_norm": 0.0, - "learning_rate": 1.1179361375333907e-05, - "loss": 0.8639, + "learning_rate": 5.454619671134741e-06, + "loss": 0.9095, "step": 16886 }, { - "epoch": 0.4785343874861855, + "epoch": 0.6607324516785351, "grad_norm": 0.0, - "learning_rate": 1.1178449986187679e-05, - "loss": 0.9442, + "learning_rate": 5.453490950247882e-06, + "loss": 1.0411, "step": 16887 }, { - "epoch": 0.478562724928448, + "epoch": 0.6607715783707645, "grad_norm": 0.0, - "learning_rate": 1.1177538587114926e-05, - "loss": 0.8234, + "learning_rate": 5.452362302372253e-06, + "loss": 0.9865, "step": 16888 }, { - "epoch": 0.4785910623707104, + "epoch": 0.660810705062994, "grad_norm": 0.0, - "learning_rate": 1.1176627178123332e-05, - "loss": 0.8231, + "learning_rate": 5.451233727525995e-06, + "loss": 0.9323, "step": 16889 }, { - "epoch": 0.47861939981297286, + "epoch": 0.6608498317552234, "grad_norm": 0.0, - "learning_rate": 1.117571575922057e-05, - "loss": 0.9078, + "learning_rate": 5.450105225727219e-06, + "loss": 0.9334, "step": 16890 }, { - "epoch": 0.47864773725523535, + "epoch": 0.6608889584474529, "grad_norm": 0.0, - "learning_rate": 1.1174804330414315e-05, - "loss": 0.9211, + "learning_rate": 5.448976796994057e-06, + "loss": 1.0837, "step": 16891 }, { - "epoch": 0.4786760746974978, + "epoch": 0.6609280851396823, "grad_norm": 0.0, - "learning_rate": 1.1173892891712251e-05, - "loss": 0.9332, + "learning_rate": 5.447848441344618e-06, + "loss": 0.9916, "step": 16892 }, { - "epoch": 0.4787044121397603, + "epoch": 0.6609672118319118, "grad_norm": 0.0, - "learning_rate": 1.1172981443122048e-05, - "loss": 0.8226, + "learning_rate": 5.446720158797032e-06, + "loss": 1.0785, "step": 16893 }, { - "epoch": 0.4787327495820227, + "epoch": 0.6610063385241411, "grad_norm": 0.0, - "learning_rate": 1.1172069984651388e-05, - "loss": 0.9937, + "learning_rate": 5.4455919493694134e-06, + "loss": 0.9973, "step": 16894 }, { - "epoch": 0.4787610870242852, + "epoch": 0.6610454652163706, "grad_norm": 0.0, - "learning_rate": 1.1171158516307944e-05, - "loss": 0.9545, + "learning_rate": 5.444463813079885e-06, + "loss": 0.9869, "step": 16895 }, { - "epoch": 0.47878942446654765, + "epoch": 0.6610845919086, "grad_norm": 0.0, - "learning_rate": 1.1170247038099402e-05, - "loss": 0.8156, + "learning_rate": 5.443335749946556e-06, + "loss": 0.9391, "step": 16896 }, { - "epoch": 0.4788177619088101, + "epoch": 0.6611237186008295, "grad_norm": 0.0, - "learning_rate": 1.1169335550033434e-05, - "loss": 0.9434, + "learning_rate": 5.442207759987545e-06, + "loss": 1.0074, "step": 16897 }, { - "epoch": 0.4788460993510726, + "epoch": 0.6611628452930589, "grad_norm": 0.0, - "learning_rate": 1.1168424052117717e-05, - "loss": 0.8502, + "learning_rate": 5.441079843220965e-06, + "loss": 0.9902, "step": 16898 }, { - "epoch": 0.478874436793335, + "epoch": 0.6612019719852884, "grad_norm": 0.0, - "learning_rate": 1.1167512544359929e-05, - "loss": 0.8835, + "learning_rate": 5.439951999664934e-06, + "loss": 0.9266, "step": 16899 }, { - "epoch": 0.4789027742355975, + "epoch": 0.6612410986775178, "grad_norm": 0.0, - "learning_rate": 1.1166601026767749e-05, - "loss": 0.9721, + "learning_rate": 5.438824229337558e-06, + "loss": 1.0476, "step": 16900 }, { - "epoch": 0.47893111167785996, + "epoch": 0.6612802253697473, "grad_norm": 0.0, - "learning_rate": 1.1165689499348857e-05, - "loss": 0.9033, + "learning_rate": 5.437696532256951e-06, + "loss": 0.8206, "step": 16901 }, { - "epoch": 0.4789594491201224, + "epoch": 0.6613193520619767, "grad_norm": 0.0, - "learning_rate": 1.1164777962110929e-05, - "loss": 0.7971, + "learning_rate": 5.4365689084412144e-06, + "loss": 0.7554, "step": 16902 }, { - "epoch": 0.4789877865623849, + "epoch": 0.6613584787542062, "grad_norm": 0.0, - "learning_rate": 1.1163866415061643e-05, - "loss": 0.8769, + "learning_rate": 5.4354413579084704e-06, + "loss": 1.0834, "step": 16903 }, { - "epoch": 0.4790161240046473, + "epoch": 0.6613976054464356, "grad_norm": 0.0, - "learning_rate": 1.1162954858208682e-05, - "loss": 0.8651, + "learning_rate": 5.434313880676813e-06, + "loss": 0.9996, "step": 16904 }, { - "epoch": 0.4790444614469098, + "epoch": 0.661436732138665, "grad_norm": 0.0, - "learning_rate": 1.1162043291559716e-05, - "loss": 0.9728, + "learning_rate": 5.43318647676436e-06, + "loss": 1.0643, "step": 16905 }, { - "epoch": 0.47907279888917226, + "epoch": 0.6614758588308944, "grad_norm": 0.0, - "learning_rate": 1.1161131715122432e-05, - "loss": 0.8882, + "learning_rate": 5.432059146189205e-06, + "loss": 0.9694, "step": 16906 }, { - "epoch": 0.47910113633143475, + "epoch": 0.6615149855231238, "grad_norm": 0.0, - "learning_rate": 1.1160220128904498e-05, - "loss": 0.8475, + "learning_rate": 5.430931888969455e-06, + "loss": 1.0842, "step": 16907 }, { - "epoch": 0.4791294737736972, + "epoch": 0.6615541122153533, "grad_norm": 0.0, - "learning_rate": 1.1159308532913601e-05, - "loss": 0.8837, + "learning_rate": 5.429804705123215e-06, + "loss": 0.9955, "step": 16908 }, { - "epoch": 0.47915781121595963, + "epoch": 0.6615932389075827, "grad_norm": 0.0, - "learning_rate": 1.115839692715742e-05, - "loss": 0.8534, + "learning_rate": 5.428677594668588e-06, + "loss": 0.9458, "step": 16909 }, { - "epoch": 0.4791861486582221, + "epoch": 0.6616323655998122, "grad_norm": 0.0, - "learning_rate": 1.1157485311643632e-05, - "loss": 0.8678, + "learning_rate": 5.427550557623667e-06, + "loss": 0.8956, "step": 16910 }, { - "epoch": 0.47921448610048456, + "epoch": 0.6616714922920416, "grad_norm": 0.0, - "learning_rate": 1.1156573686379915e-05, - "loss": 0.9646, + "learning_rate": 5.4264235940065536e-06, + "loss": 1.0416, "step": 16911 }, { - "epoch": 0.47924282354274705, + "epoch": 0.6617106189842711, "grad_norm": 0.0, - "learning_rate": 1.1155662051373946e-05, - "loss": 0.9642, + "learning_rate": 5.42529670383535e-06, + "loss": 0.9307, "step": 16912 }, { - "epoch": 0.4792711609850095, + "epoch": 0.6617497456765005, "grad_norm": 0.0, - "learning_rate": 1.115475040663341e-05, - "loss": 0.769, + "learning_rate": 5.424169887128145e-06, + "loss": 0.9997, "step": 16913 }, { - "epoch": 0.47929949842727193, + "epoch": 0.66178887236873, "grad_norm": 0.0, - "learning_rate": 1.115383875216598e-05, - "loss": 0.9793, + "learning_rate": 5.4230431439030395e-06, + "loss": 1.0213, "step": 16914 }, { - "epoch": 0.4793278358695344, + "epoch": 0.6618279990609593, "grad_norm": 0.0, - "learning_rate": 1.1152927087979337e-05, - "loss": 0.9928, + "learning_rate": 5.421916474178124e-06, + "loss": 0.9577, "step": 16915 }, { - "epoch": 0.47935617331179686, + "epoch": 0.6618671257531888, "grad_norm": 0.0, - "learning_rate": 1.115201541408116e-05, - "loss": 0.9148, + "learning_rate": 5.420789877971497e-06, + "loss": 0.9749, "step": 16916 }, { - "epoch": 0.47938451075405936, + "epoch": 0.6619062524454182, "grad_norm": 0.0, - "learning_rate": 1.1151103730479134e-05, - "loss": 0.9547, + "learning_rate": 5.419663355301245e-06, + "loss": 0.9327, "step": 16917 }, { - "epoch": 0.4794128481963218, + "epoch": 0.6619453791376477, "grad_norm": 0.0, - "learning_rate": 1.1150192037180932e-05, - "loss": 0.9595, + "learning_rate": 5.418536906185462e-06, + "loss": 1.0056, "step": 16918 }, { - "epoch": 0.4794411856385843, + "epoch": 0.6619845058298771, "grad_norm": 0.0, - "learning_rate": 1.1149280334194238e-05, - "loss": 0.8333, + "learning_rate": 5.417410530642229e-06, + "loss": 0.9967, "step": 16919 }, { - "epoch": 0.4794695230808467, + "epoch": 0.6620236325221066, "grad_norm": 0.0, - "learning_rate": 1.1148368621526721e-05, - "loss": 0.9108, + "learning_rate": 5.416284228689647e-06, + "loss": 0.9566, "step": 16920 }, { - "epoch": 0.47949786052310917, + "epoch": 0.662062759214336, "grad_norm": 0.0, - "learning_rate": 1.1147456899186073e-05, - "loss": 0.955, + "learning_rate": 5.415158000345792e-06, + "loss": 0.9918, "step": 16921 }, { - "epoch": 0.47952619796537166, + "epoch": 0.6621018859065655, "grad_norm": 0.0, - "learning_rate": 1.1146545167179972e-05, - "loss": 1.0167, + "learning_rate": 5.41403184562876e-06, + "loss": 1.0207, "step": 16922 }, { - "epoch": 0.4795545354076341, + "epoch": 0.6621410125987949, "grad_norm": 0.0, - "learning_rate": 1.1145633425516094e-05, - "loss": 0.8796, + "learning_rate": 5.412905764556626e-06, + "loss": 1.0393, "step": 16923 }, { - "epoch": 0.4795828728498966, + "epoch": 0.6621801392910244, "grad_norm": 0.0, - "learning_rate": 1.1144721674202116e-05, - "loss": 0.9585, + "learning_rate": 5.411779757147476e-06, + "loss": 0.9254, "step": 16924 }, { - "epoch": 0.47961121029215903, + "epoch": 0.6622192659832538, "grad_norm": 0.0, - "learning_rate": 1.1143809913245727e-05, - "loss": 0.8071, + "learning_rate": 5.4106538234193936e-06, + "loss": 0.9221, "step": 16925 }, { - "epoch": 0.47963954773442147, + "epoch": 0.6622583926754833, "grad_norm": 0.0, - "learning_rate": 1.1142898142654603e-05, - "loss": 0.922, + "learning_rate": 5.4095279633904665e-06, + "loss": 0.9333, "step": 16926 }, { - "epoch": 0.47966788517668396, + "epoch": 0.6622975193677126, "grad_norm": 0.0, - "learning_rate": 1.1141986362436419e-05, - "loss": 0.9818, + "learning_rate": 5.408402177078762e-06, + "loss": 0.9054, "step": 16927 }, { - "epoch": 0.4796962226189464, + "epoch": 0.6623366460599421, "grad_norm": 0.0, - "learning_rate": 1.1141074572598863e-05, - "loss": 0.8816, + "learning_rate": 5.407276464502366e-06, + "loss": 0.9657, "step": 16928 }, { - "epoch": 0.4797245600612089, + "epoch": 0.6623757727521715, "grad_norm": 0.0, - "learning_rate": 1.1140162773149612e-05, - "loss": 1.0475, + "learning_rate": 5.406150825679355e-06, + "loss": 0.9657, "step": 16929 }, { - "epoch": 0.47975289750347133, + "epoch": 0.662414899444401, "grad_norm": 0.0, - "learning_rate": 1.1139250964096346e-05, - "loss": 0.8739, + "learning_rate": 5.405025260627809e-06, + "loss": 1.1079, "step": 16930 }, { - "epoch": 0.4797812349457338, + "epoch": 0.6624540261366304, "grad_norm": 0.0, - "learning_rate": 1.1138339145446746e-05, - "loss": 0.8537, + "learning_rate": 5.403899769365795e-06, + "loss": 0.9389, "step": 16931 }, { - "epoch": 0.47980957238799626, + "epoch": 0.6624931528288599, "grad_norm": 0.0, - "learning_rate": 1.1137427317208494e-05, - "loss": 0.9504, + "learning_rate": 5.402774351911395e-06, + "loss": 1.1172, "step": 16932 }, { - "epoch": 0.4798379098302587, + "epoch": 0.6625322795210893, "grad_norm": 0.0, - "learning_rate": 1.1136515479389267e-05, - "loss": 0.8184, + "learning_rate": 5.401649008282673e-06, + "loss": 1.0992, "step": 16933 }, { - "epoch": 0.4798662472725212, + "epoch": 0.6625714062133188, "grad_norm": 0.0, - "learning_rate": 1.1135603631996748e-05, - "loss": 0.8716, + "learning_rate": 5.400523738497711e-06, + "loss": 0.9998, "step": 16934 }, { - "epoch": 0.47989458471478363, + "epoch": 0.6626105329055482, "grad_norm": 0.0, - "learning_rate": 1.113469177503862e-05, - "loss": 0.9319, + "learning_rate": 5.399398542574571e-06, + "loss": 1.068, "step": 16935 }, { - "epoch": 0.47992292215704613, + "epoch": 0.6626496595977776, "grad_norm": 0.0, - "learning_rate": 1.1133779908522561e-05, - "loss": 0.7664, + "learning_rate": 5.39827342053133e-06, + "loss": 0.9137, "step": 16936 }, { - "epoch": 0.47995125959930857, + "epoch": 0.662688786290007, "grad_norm": 0.0, - "learning_rate": 1.1132868032456252e-05, - "loss": 0.9008, + "learning_rate": 5.397148372386043e-06, + "loss": 1.0495, "step": 16937 }, { - "epoch": 0.479979597041571, + "epoch": 0.6627279129822364, "grad_norm": 0.0, - "learning_rate": 1.1131956146847379e-05, - "loss": 0.8236, + "learning_rate": 5.396023398156794e-06, + "loss": 0.9342, "step": 16938 }, { - "epoch": 0.4800079344838335, + "epoch": 0.6627670396744659, "grad_norm": 0.0, - "learning_rate": 1.1131044251703615e-05, - "loss": 0.9138, + "learning_rate": 5.3948984978616355e-06, + "loss": 1.0022, "step": 16939 }, { - "epoch": 0.48003627192609594, + "epoch": 0.6628061663666953, "grad_norm": 0.0, - "learning_rate": 1.1130132347032646e-05, - "loss": 0.8618, + "learning_rate": 5.3937736715186415e-06, + "loss": 1.032, "step": 16940 }, { - "epoch": 0.48006460936835843, + "epoch": 0.6628452930589248, "grad_norm": 0.0, - "learning_rate": 1.1129220432842149e-05, - "loss": 0.9576, + "learning_rate": 5.392648919145867e-06, + "loss": 0.8624, "step": 16941 }, { - "epoch": 0.48009294681062087, + "epoch": 0.6628844197511542, "grad_norm": 0.0, - "learning_rate": 1.1128308509139814e-05, - "loss": 0.8092, + "learning_rate": 5.391524240761376e-06, + "loss": 0.8524, "step": 16942 }, { - "epoch": 0.48012128425288336, + "epoch": 0.6629235464433837, "grad_norm": 0.0, - "learning_rate": 1.1127396575933315e-05, - "loss": 1.048, + "learning_rate": 5.390399636383231e-06, + "loss": 1.0098, "step": 16943 }, { - "epoch": 0.4801496216951458, + "epoch": 0.6629626731356131, "grad_norm": 0.0, - "learning_rate": 1.112648463323034e-05, - "loss": 0.9037, + "learning_rate": 5.3892751060294966e-06, + "loss": 1.0099, "step": 16944 }, { - "epoch": 0.48017795913740824, + "epoch": 0.6630017998278426, "grad_norm": 0.0, - "learning_rate": 1.1125572681038561e-05, - "loss": 0.8025, + "learning_rate": 5.3881506497182225e-06, + "loss": 1.0596, "step": 16945 }, { - "epoch": 0.48020629657967073, + "epoch": 0.663040926520072, "grad_norm": 0.0, - "learning_rate": 1.1124660719365669e-05, - "loss": 0.8888, + "learning_rate": 5.387026267467469e-06, + "loss": 0.918, "step": 16946 }, { - "epoch": 0.48023463402193317, + "epoch": 0.6630800532123015, "grad_norm": 0.0, - "learning_rate": 1.112374874821934e-05, - "loss": 0.7786, + "learning_rate": 5.385901959295293e-06, + "loss": 1.0607, "step": 16947 }, { - "epoch": 0.48026297146419566, + "epoch": 0.6631191799045308, "grad_norm": 0.0, - "learning_rate": 1.1122836767607259e-05, - "loss": 1.0224, + "learning_rate": 5.384777725219754e-06, + "loss": 1.0435, "step": 16948 }, { - "epoch": 0.4802913089064581, + "epoch": 0.6631583065967603, "grad_norm": 0.0, - "learning_rate": 1.1121924777537108e-05, - "loss": 0.8797, + "learning_rate": 5.3836535652589e-06, + "loss": 1.0495, "step": 16949 }, { - "epoch": 0.48031964634872054, + "epoch": 0.6631974332889897, "grad_norm": 0.0, - "learning_rate": 1.1121012778016567e-05, - "loss": 0.8808, + "learning_rate": 5.382529479430786e-06, + "loss": 1.0121, "step": 16950 }, { - "epoch": 0.48034798379098304, + "epoch": 0.6632365599812192, "grad_norm": 0.0, - "learning_rate": 1.112010076905332e-05, - "loss": 0.9432, + "learning_rate": 5.38140546775346e-06, + "loss": 0.9835, "step": 16951 }, { - "epoch": 0.4803763212332455, + "epoch": 0.6632756866734486, "grad_norm": 0.0, - "learning_rate": 1.1119188750655047e-05, - "loss": 0.9901, + "learning_rate": 5.380281530244974e-06, + "loss": 0.9971, "step": 16952 }, { - "epoch": 0.48040465867550797, + "epoch": 0.6633148133656781, "grad_norm": 0.0, - "learning_rate": 1.111827672282943e-05, - "loss": 0.9982, + "learning_rate": 5.379157666923378e-06, + "loss": 1.031, "step": 16953 }, { - "epoch": 0.4804329961177704, + "epoch": 0.6633539400579075, "grad_norm": 0.0, - "learning_rate": 1.1117364685584154e-05, - "loss": 0.9893, + "learning_rate": 5.378033877806724e-06, + "loss": 1.0156, "step": 16954 }, { - "epoch": 0.48046133356003284, + "epoch": 0.663393066750137, "grad_norm": 0.0, - "learning_rate": 1.1116452638926903e-05, - "loss": 0.8273, + "learning_rate": 5.37691016291305e-06, + "loss": 1.0329, "step": 16955 }, { - "epoch": 0.48048967100229534, + "epoch": 0.6634321934423664, "grad_norm": 0.0, - "learning_rate": 1.1115540582865357e-05, - "loss": 0.8722, + "learning_rate": 5.375786522260407e-06, + "loss": 0.9719, "step": 16956 }, { - "epoch": 0.4805180084445578, + "epoch": 0.6634713201345959, "grad_norm": 0.0, - "learning_rate": 1.1114628517407193e-05, - "loss": 0.9395, + "learning_rate": 5.374662955866841e-06, + "loss": 0.9469, "step": 16957 }, { - "epoch": 0.48054634588682027, + "epoch": 0.6635104468268253, "grad_norm": 0.0, - "learning_rate": 1.1113716442560102e-05, - "loss": 0.803, + "learning_rate": 5.373539463750388e-06, + "loss": 0.9842, "step": 16958 }, { - "epoch": 0.4805746833290827, + "epoch": 0.6635495735190547, "grad_norm": 0.0, - "learning_rate": 1.1112804358331766e-05, - "loss": 0.9579, + "learning_rate": 5.372416045929093e-06, + "loss": 1.1381, "step": 16959 }, { - "epoch": 0.4806030207713452, + "epoch": 0.6635887002112841, "grad_norm": 0.0, - "learning_rate": 1.1111892264729862e-05, - "loss": 0.8616, + "learning_rate": 5.371292702420998e-06, + "loss": 1.0221, "step": 16960 }, { - "epoch": 0.48063135821360764, + "epoch": 0.6636278269035136, "grad_norm": 0.0, - "learning_rate": 1.1110980161762078e-05, - "loss": 0.9796, + "learning_rate": 5.370169433244148e-06, + "loss": 0.9287, "step": 16961 }, { - "epoch": 0.4806596956558701, + "epoch": 0.663666953595743, "grad_norm": 0.0, - "learning_rate": 1.1110068049436098e-05, - "loss": 0.9569, + "learning_rate": 5.369046238416569e-06, + "loss": 1.0178, "step": 16962 }, { - "epoch": 0.48068803309813257, + "epoch": 0.6637060802879724, "grad_norm": 0.0, - "learning_rate": 1.11091559277596e-05, - "loss": 0.8795, + "learning_rate": 5.367923117956309e-06, + "loss": 0.994, "step": 16963 }, { - "epoch": 0.480716370540395, + "epoch": 0.6637452069802019, "grad_norm": 0.0, - "learning_rate": 1.1108243796740272e-05, - "loss": 0.9362, + "learning_rate": 5.366800071881393e-06, + "loss": 1.0533, "step": 16964 }, { - "epoch": 0.4807447079826575, + "epoch": 0.6637843336724313, "grad_norm": 0.0, - "learning_rate": 1.1107331656385793e-05, - "loss": 0.8832, + "learning_rate": 5.365677100209868e-06, + "loss": 1.0187, "step": 16965 }, { - "epoch": 0.48077304542491994, + "epoch": 0.6638234603646608, "grad_norm": 0.0, - "learning_rate": 1.110641950670385e-05, - "loss": 0.9135, + "learning_rate": 5.3645542029597594e-06, + "loss": 1.0618, "step": 16966 }, { - "epoch": 0.4808013828671824, + "epoch": 0.6638625870568902, "grad_norm": 0.0, - "learning_rate": 1.110550734770212e-05, - "loss": 0.9148, + "learning_rate": 5.363431380149104e-06, + "loss": 0.9091, "step": 16967 }, { - "epoch": 0.4808297203094449, + "epoch": 0.6639017137491197, "grad_norm": 0.0, - "learning_rate": 1.1104595179388295e-05, - "loss": 0.854, + "learning_rate": 5.362308631795925e-06, + "loss": 0.9224, "step": 16968 }, { - "epoch": 0.4808580577517073, + "epoch": 0.663940840441349, "grad_norm": 0.0, - "learning_rate": 1.1103683001770055e-05, - "loss": 0.8696, + "learning_rate": 5.361185957918264e-06, + "loss": 0.9641, "step": 16969 }, { - "epoch": 0.4808863951939698, + "epoch": 0.6639799671335785, "grad_norm": 0.0, - "learning_rate": 1.1102770814855081e-05, - "loss": 0.9901, + "learning_rate": 5.360063358534141e-06, + "loss": 1.0712, "step": 16970 }, { - "epoch": 0.48091473263623224, + "epoch": 0.6640190938258079, "grad_norm": 0.0, - "learning_rate": 1.1101858618651062e-05, - "loss": 0.9749, + "learning_rate": 5.35894083366159e-06, + "loss": 1.0526, "step": 16971 }, { - "epoch": 0.48094307007849474, + "epoch": 0.6640582205180374, "grad_norm": 0.0, - "learning_rate": 1.1100946413165677e-05, - "loss": 0.8766, + "learning_rate": 5.357818383318629e-06, + "loss": 1.0618, "step": 16972 }, { - "epoch": 0.4809714075207572, + "epoch": 0.6640973472102668, "grad_norm": 0.0, - "learning_rate": 1.110003419840661e-05, - "loss": 0.9934, + "learning_rate": 5.356696007523288e-06, + "loss": 1.0168, "step": 16973 }, { - "epoch": 0.4809997449630196, + "epoch": 0.6641364739024963, "grad_norm": 0.0, - "learning_rate": 1.1099121974381546e-05, - "loss": 0.9608, + "learning_rate": 5.355573706293591e-06, + "loss": 0.9692, "step": 16974 }, { - "epoch": 0.4810280824052821, + "epoch": 0.6641756005947257, "grad_norm": 0.0, - "learning_rate": 1.1098209741098173e-05, - "loss": 0.8389, + "learning_rate": 5.354451479647564e-06, + "loss": 1.0828, "step": 16975 }, { - "epoch": 0.48105641984754455, + "epoch": 0.6642147272869552, "grad_norm": 0.0, - "learning_rate": 1.109729749856417e-05, - "loss": 0.942, + "learning_rate": 5.353329327603222e-06, + "loss": 1.1445, "step": 16976 }, { - "epoch": 0.48108475728980704, + "epoch": 0.6642538539791846, "grad_norm": 0.0, - "learning_rate": 1.1096385246787225e-05, - "loss": 0.9251, + "learning_rate": 5.3522072501785884e-06, + "loss": 0.8965, "step": 16977 }, { - "epoch": 0.4811130947320695, + "epoch": 0.6642929806714141, "grad_norm": 0.0, - "learning_rate": 1.1095472985775015e-05, - "loss": 0.8705, + "learning_rate": 5.351085247391681e-06, + "loss": 0.9734, "step": 16978 }, { - "epoch": 0.4811414321743319, + "epoch": 0.6643321073636435, "grad_norm": 0.0, - "learning_rate": 1.1094560715535232e-05, - "loss": 0.92, + "learning_rate": 5.3499633192605245e-06, + "loss": 1.0382, "step": 16979 }, { - "epoch": 0.4811697696165944, + "epoch": 0.664371234055873, "grad_norm": 0.0, - "learning_rate": 1.1093648436075558e-05, - "loss": 0.9152, + "learning_rate": 5.348841465803125e-06, + "loss": 0.9322, "step": 16980 }, { - "epoch": 0.48119810705885685, + "epoch": 0.6644103607481023, "grad_norm": 0.0, - "learning_rate": 1.1092736147403675e-05, - "loss": 0.9532, + "learning_rate": 5.34771968703751e-06, + "loss": 1.0129, "step": 16981 }, { - "epoch": 0.48122644450111934, + "epoch": 0.6644494874403318, "grad_norm": 0.0, - "learning_rate": 1.109182384952727e-05, - "loss": 1.0055, + "learning_rate": 5.346597982981676e-06, + "loss": 1.0432, "step": 16982 }, { - "epoch": 0.4812547819433818, + "epoch": 0.6644886141325612, "grad_norm": 0.0, - "learning_rate": 1.109091154245403e-05, - "loss": 0.8491, + "learning_rate": 5.345476353653656e-06, + "loss": 1.0993, "step": 16983 }, { - "epoch": 0.4812831193856443, + "epoch": 0.6645277408247907, "grad_norm": 0.0, - "learning_rate": 1.1089999226191637e-05, - "loss": 0.9502, + "learning_rate": 5.344354799071451e-06, + "loss": 0.9779, "step": 16984 }, { - "epoch": 0.4813114568279067, + "epoch": 0.6645668675170201, "grad_norm": 0.0, - "learning_rate": 1.1089086900747774e-05, - "loss": 0.9189, + "learning_rate": 5.343233319253076e-06, + "loss": 1.0582, "step": 16985 }, { - "epoch": 0.48133979427016915, + "epoch": 0.6646059942092496, "grad_norm": 0.0, - "learning_rate": 1.1088174566130126e-05, - "loss": 0.834, + "learning_rate": 5.342111914216532e-06, + "loss": 1.0335, "step": 16986 }, { - "epoch": 0.48136813171243165, + "epoch": 0.664645120901479, "grad_norm": 0.0, - "learning_rate": 1.108726222234638e-05, - "loss": 0.92, + "learning_rate": 5.340990583979841e-06, + "loss": 0.9436, "step": 16987 }, { - "epoch": 0.4813964691546941, + "epoch": 0.6646842475937085, "grad_norm": 0.0, - "learning_rate": 1.1086349869404222e-05, - "loss": 0.8868, + "learning_rate": 5.339869328561e-06, + "loss": 1.0912, "step": 16988 }, { - "epoch": 0.4814248065969566, + "epoch": 0.6647233742859379, "grad_norm": 0.0, - "learning_rate": 1.108543750731134e-05, - "loss": 0.9213, + "learning_rate": 5.338748147978022e-06, + "loss": 1.1313, "step": 16989 }, { - "epoch": 0.481453144039219, + "epoch": 0.6647625009781674, "grad_norm": 0.0, - "learning_rate": 1.1084525136075408e-05, - "loss": 0.9911, + "learning_rate": 5.337627042248904e-06, + "loss": 1.1251, "step": 16990 }, { - "epoch": 0.48148148148148145, + "epoch": 0.6648016276703967, "grad_norm": 0.0, - "learning_rate": 1.1083612755704121e-05, - "loss": 0.9192, + "learning_rate": 5.336506011391653e-06, + "loss": 1.023, "step": 16991 }, { - "epoch": 0.48150981892374395, + "epoch": 0.6648407543626261, "grad_norm": 0.0, - "learning_rate": 1.1082700366205157e-05, - "loss": 0.9919, + "learning_rate": 5.3353850554242715e-06, + "loss": 1.0032, "step": 16992 }, { - "epoch": 0.4815381563660064, + "epoch": 0.6648798810548556, "grad_norm": 0.0, - "learning_rate": 1.108178796758621e-05, - "loss": 0.8903, + "learning_rate": 5.334264174364766e-06, + "loss": 1.0211, "step": 16993 }, { - "epoch": 0.4815664938082689, + "epoch": 0.664919007747085, "grad_norm": 0.0, - "learning_rate": 1.1080875559854962e-05, - "loss": 0.8582, + "learning_rate": 5.333143368231126e-06, + "loss": 0.9822, "step": 16994 }, { - "epoch": 0.4815948312505313, + "epoch": 0.6649581344393145, "grad_norm": 0.0, - "learning_rate": 1.1079963143019097e-05, - "loss": 0.9459, + "learning_rate": 5.332022637041356e-06, + "loss": 0.9796, "step": 16995 }, { - "epoch": 0.4816231686927938, + "epoch": 0.6649972611315439, "grad_norm": 0.0, - "learning_rate": 1.1079050717086301e-05, - "loss": 0.8543, + "learning_rate": 5.330901980813459e-06, + "loss": 1.1103, "step": 16996 }, { - "epoch": 0.48165150613505625, + "epoch": 0.6650363878237734, "grad_norm": 0.0, - "learning_rate": 1.107813828206426e-05, - "loss": 0.9128, + "learning_rate": 5.329781399565419e-06, + "loss": 1.0421, "step": 16997 }, { - "epoch": 0.4816798435773187, + "epoch": 0.6650755145160028, "grad_norm": 0.0, - "learning_rate": 1.1077225837960658e-05, - "loss": 0.8656, + "learning_rate": 5.328660893315238e-06, + "loss": 0.9851, "step": 16998 }, { - "epoch": 0.4817081810195812, + "epoch": 0.6651146412082323, "grad_norm": 0.0, - "learning_rate": 1.1076313384783183e-05, - "loss": 0.85, + "learning_rate": 5.327540462080916e-06, + "loss": 1.0786, "step": 16999 }, { - "epoch": 0.4817365184618436, + "epoch": 0.6651537679004617, "grad_norm": 0.0, - "learning_rate": 1.1075400922539525e-05, - "loss": 0.8867, + "learning_rate": 5.326420105880432e-06, + "loss": 0.9821, "step": 17000 }, { - "epoch": 0.4817648559041061, + "epoch": 0.6651928945926912, "grad_norm": 0.0, - "learning_rate": 1.1074488451237364e-05, - "loss": 0.8963, + "learning_rate": 5.325299824731786e-06, + "loss": 0.9854, "step": 17001 }, { - "epoch": 0.48179319334636855, + "epoch": 0.6652320212849205, "grad_norm": 0.0, - "learning_rate": 1.1073575970884387e-05, - "loss": 0.8833, + "learning_rate": 5.324179618652971e-06, + "loss": 1.0477, "step": 17002 }, { - "epoch": 0.481821530788631, + "epoch": 0.66527114797715, "grad_norm": 0.0, - "learning_rate": 1.107266348148828e-05, - "loss": 0.8476, + "learning_rate": 5.323059487661969e-06, + "loss": 1.0381, "step": 17003 }, { - "epoch": 0.4818498682308935, + "epoch": 0.6653102746693794, "grad_norm": 0.0, - "learning_rate": 1.1071750983056733e-05, - "loss": 0.9612, + "learning_rate": 5.32193943177677e-06, + "loss": 0.9709, "step": 17004 }, { - "epoch": 0.4818782056731559, + "epoch": 0.6653494013616089, "grad_norm": 0.0, - "learning_rate": 1.1070838475597426e-05, - "loss": 0.8075, + "learning_rate": 5.320819451015363e-06, + "loss": 0.9228, "step": 17005 }, { - "epoch": 0.4819065431154184, + "epoch": 0.6653885280538383, "grad_norm": 0.0, - "learning_rate": 1.1069925959118048e-05, - "loss": 0.9245, + "learning_rate": 5.319699545395736e-06, + "loss": 1.2228, "step": 17006 }, { - "epoch": 0.48193488055768086, + "epoch": 0.6654276547460678, "grad_norm": 0.0, - "learning_rate": 1.106901343362629e-05, - "loss": 0.9567, + "learning_rate": 5.318579714935866e-06, + "loss": 0.9157, "step": 17007 }, { - "epoch": 0.48196321799994335, + "epoch": 0.6654667814382972, "grad_norm": 0.0, - "learning_rate": 1.1068100899129832e-05, - "loss": 0.8331, + "learning_rate": 5.317459959653741e-06, + "loss": 0.9619, "step": 17008 }, { - "epoch": 0.4819915554422058, + "epoch": 0.6655059081305267, "grad_norm": 0.0, - "learning_rate": 1.1067188355636366e-05, - "loss": 0.9213, + "learning_rate": 5.316340279567335e-06, + "loss": 0.9512, "step": 17009 }, { - "epoch": 0.4820198928844682, + "epoch": 0.6655450348227561, "grad_norm": 0.0, - "learning_rate": 1.1066275803153577e-05, - "loss": 0.9191, + "learning_rate": 5.315220674694643e-06, + "loss": 1.0379, "step": 17010 }, { - "epoch": 0.4820482303267307, + "epoch": 0.6655841615149856, "grad_norm": 0.0, - "learning_rate": 1.1065363241689148e-05, - "loss": 0.8839, + "learning_rate": 5.314101145053634e-06, + "loss": 0.968, "step": 17011 }, { - "epoch": 0.48207656776899316, + "epoch": 0.665623288207215, "grad_norm": 0.0, - "learning_rate": 1.1064450671250768e-05, - "loss": 0.8823, + "learning_rate": 5.312981690662293e-06, + "loss": 1.0365, "step": 17012 }, { - "epoch": 0.48210490521125565, + "epoch": 0.6656624148994444, "grad_norm": 0.0, - "learning_rate": 1.106353809184613e-05, - "loss": 0.8765, + "learning_rate": 5.311862311538583e-06, + "loss": 1.0745, "step": 17013 }, { - "epoch": 0.4821332426535181, + "epoch": 0.6657015415916738, "grad_norm": 0.0, - "learning_rate": 1.1062625503482911e-05, - "loss": 0.8416, + "learning_rate": 5.3107430077004984e-06, + "loss": 1.0863, "step": 17014 }, { - "epoch": 0.48216158009578053, + "epoch": 0.6657406682839033, "grad_norm": 0.0, - "learning_rate": 1.1061712906168802e-05, - "loss": 0.9931, + "learning_rate": 5.3096237791660014e-06, + "loss": 1.0423, "step": 17015 }, { - "epoch": 0.482189917538043, + "epoch": 0.6657797949761327, "grad_norm": 0.0, - "learning_rate": 1.1060800299911496e-05, - "loss": 0.9323, + "learning_rate": 5.308504625953072e-06, + "loss": 1.1509, "step": 17016 }, { - "epoch": 0.48221825498030546, + "epoch": 0.6658189216683622, "grad_norm": 0.0, - "learning_rate": 1.1059887684718673e-05, - "loss": 0.915, + "learning_rate": 5.3073855480796735e-06, + "loss": 1.0609, "step": 17017 }, { - "epoch": 0.48224659242256795, + "epoch": 0.6658580483605916, "grad_norm": 0.0, - "learning_rate": 1.1058975060598021e-05, - "loss": 0.9143, + "learning_rate": 5.30626654556379e-06, + "loss": 0.9778, "step": 17018 }, { - "epoch": 0.4822749298648304, + "epoch": 0.6658971750528211, "grad_norm": 0.0, - "learning_rate": 1.105806242755723e-05, - "loss": 0.9718, + "learning_rate": 5.30514761842338e-06, + "loss": 0.9803, "step": 17019 }, { - "epoch": 0.4823032673070929, + "epoch": 0.6659363017450505, "grad_norm": 0.0, - "learning_rate": 1.1057149785603984e-05, - "loss": 0.927, + "learning_rate": 5.30402876667642e-06, + "loss": 0.9534, "step": 17020 }, { - "epoch": 0.4823316047493553, + "epoch": 0.6659754284372799, "grad_norm": 0.0, - "learning_rate": 1.1056237134745975e-05, - "loss": 0.8604, + "learning_rate": 5.3029099903408695e-06, + "loss": 1.0474, "step": 17021 }, { - "epoch": 0.48235994219161776, + "epoch": 0.6660145551295094, "grad_norm": 0.0, - "learning_rate": 1.1055324474990889e-05, - "loss": 0.9124, + "learning_rate": 5.301791289434699e-06, + "loss": 1.0052, "step": 17022 }, { - "epoch": 0.48238827963388026, + "epoch": 0.6660536818217387, "grad_norm": 0.0, - "learning_rate": 1.1054411806346413e-05, - "loss": 0.8513, + "learning_rate": 5.300672663975875e-06, + "loss": 1.0564, "step": 17023 }, { - "epoch": 0.4824166170761427, + "epoch": 0.6660928085139682, "grad_norm": 0.0, - "learning_rate": 1.1053499128820232e-05, - "loss": 1.0166, + "learning_rate": 5.299554113982362e-06, + "loss": 0.8806, "step": 17024 }, { - "epoch": 0.4824449545184052, + "epoch": 0.6661319352061976, "grad_norm": 0.0, - "learning_rate": 1.1052586442420036e-05, - "loss": 0.9463, + "learning_rate": 5.298435639472115e-06, + "loss": 0.944, "step": 17025 }, { - "epoch": 0.4824732919606676, + "epoch": 0.6661710618984271, "grad_norm": 0.0, - "learning_rate": 1.1051673747153518e-05, - "loss": 0.9861, + "learning_rate": 5.297317240463102e-06, + "loss": 1.1126, "step": 17026 }, { - "epoch": 0.48250162940293007, + "epoch": 0.6662101885906565, "grad_norm": 0.0, - "learning_rate": 1.105076104302836e-05, - "loss": 0.9412, + "learning_rate": 5.29619891697328e-06, + "loss": 0.8902, "step": 17027 }, { - "epoch": 0.48252996684519256, + "epoch": 0.666249315282886, "grad_norm": 0.0, - "learning_rate": 1.1049848330052251e-05, - "loss": 0.8716, + "learning_rate": 5.295080669020614e-06, + "loss": 0.9801, "step": 17028 }, { - "epoch": 0.482558304287455, + "epoch": 0.6662884419751154, "grad_norm": 0.0, - "learning_rate": 1.104893560823288e-05, - "loss": 0.8699, + "learning_rate": 5.293962496623051e-06, + "loss": 1.0602, "step": 17029 }, { - "epoch": 0.4825866417297175, + "epoch": 0.6663275686673449, "grad_norm": 0.0, - "learning_rate": 1.1048022877577935e-05, - "loss": 0.8851, + "learning_rate": 5.292844399798559e-06, + "loss": 0.9694, "step": 17030 }, { - "epoch": 0.48261497917197993, + "epoch": 0.6663666953595743, "grad_norm": 0.0, - "learning_rate": 1.1047110138095102e-05, - "loss": 0.8945, + "learning_rate": 5.29172637856508e-06, + "loss": 0.9359, "step": 17031 }, { - "epoch": 0.4826433166142424, + "epoch": 0.6664058220518038, "grad_norm": 0.0, - "learning_rate": 1.1046197389792073e-05, - "loss": 0.8693, + "learning_rate": 5.2906084329405825e-06, + "loss": 0.9954, "step": 17032 }, { - "epoch": 0.48267165405650486, + "epoch": 0.6664449487440331, "grad_norm": 0.0, - "learning_rate": 1.1045284632676535e-05, - "loss": 0.88, + "learning_rate": 5.28949056294301e-06, + "loss": 0.9866, "step": 17033 }, { - "epoch": 0.4826999914987673, + "epoch": 0.6664840754362626, "grad_norm": 0.0, - "learning_rate": 1.1044371866756178e-05, - "loss": 0.8672, + "learning_rate": 5.288372768590317e-06, + "loss": 1.0031, "step": 17034 }, { - "epoch": 0.4827283289410298, + "epoch": 0.666523202128492, "grad_norm": 0.0, - "learning_rate": 1.1043459092038688e-05, - "loss": 0.896, + "learning_rate": 5.287255049900453e-06, + "loss": 1.0628, "step": 17035 }, { - "epoch": 0.48275666638329223, + "epoch": 0.6665623288207215, "grad_norm": 0.0, - "learning_rate": 1.1042546308531752e-05, - "loss": 0.9279, + "learning_rate": 5.2861374068913654e-06, + "loss": 0.9069, "step": 17036 }, { - "epoch": 0.4827850038255547, + "epoch": 0.6666014555129509, "grad_norm": 0.0, - "learning_rate": 1.1041633516243064e-05, - "loss": 0.9488, + "learning_rate": 5.285019839581005e-06, + "loss": 1.0399, "step": 17037 }, { - "epoch": 0.48281334126781716, + "epoch": 0.6666405822051804, "grad_norm": 0.0, - "learning_rate": 1.104072071518031e-05, - "loss": 0.8884, + "learning_rate": 5.283902347987322e-06, + "loss": 0.9985, "step": 17038 }, { - "epoch": 0.4828416787100796, + "epoch": 0.6666797088974098, "grad_norm": 0.0, - "learning_rate": 1.1039807905351179e-05, - "loss": 0.9042, + "learning_rate": 5.282784932128253e-06, + "loss": 1.0369, "step": 17039 }, { - "epoch": 0.4828700161523421, + "epoch": 0.6667188355896393, "grad_norm": 0.0, - "learning_rate": 1.1038895086763357e-05, - "loss": 0.8295, + "learning_rate": 5.2816675920217475e-06, + "loss": 1.0034, "step": 17040 }, { - "epoch": 0.48289835359460453, + "epoch": 0.6667579622818687, "grad_norm": 0.0, - "learning_rate": 1.103798225942454e-05, - "loss": 0.8958, + "learning_rate": 5.280550327685752e-06, + "loss": 1.106, "step": 17041 }, { - "epoch": 0.48292669103686703, + "epoch": 0.6667970889740982, "grad_norm": 0.0, - "learning_rate": 1.103706942334241e-05, - "loss": 0.933, + "learning_rate": 5.2794331391382e-06, + "loss": 0.9933, "step": 17042 }, { - "epoch": 0.48295502847912947, + "epoch": 0.6668362156663276, "grad_norm": 0.0, - "learning_rate": 1.103615657852466e-05, - "loss": 0.9305, + "learning_rate": 5.278316026397037e-06, + "loss": 0.9716, "step": 17043 }, { - "epoch": 0.48298336592139196, + "epoch": 0.666875342358557, "grad_norm": 0.0, - "learning_rate": 1.1035243724978979e-05, - "loss": 0.9081, + "learning_rate": 5.277198989480202e-06, + "loss": 0.9704, "step": 17044 }, { - "epoch": 0.4830117033636544, + "epoch": 0.6669144690507864, "grad_norm": 0.0, - "learning_rate": 1.1034330862713054e-05, - "loss": 0.8728, + "learning_rate": 5.276082028405638e-06, + "loss": 1.0125, "step": 17045 }, { - "epoch": 0.48304004080591684, + "epoch": 0.6669535957430159, "grad_norm": 0.0, - "learning_rate": 1.1033417991734579e-05, - "loss": 0.8599, + "learning_rate": 5.274965143191272e-06, + "loss": 0.978, "step": 17046 }, { - "epoch": 0.48306837824817933, + "epoch": 0.6669927224352453, "grad_norm": 0.0, - "learning_rate": 1.103250511205124e-05, - "loss": 0.8243, + "learning_rate": 5.273848333855045e-06, + "loss": 1.0488, "step": 17047 }, { - "epoch": 0.48309671569044177, + "epoch": 0.6670318491274748, "grad_norm": 0.0, - "learning_rate": 1.1031592223670726e-05, - "loss": 0.7565, + "learning_rate": 5.272731600414895e-06, + "loss": 1.1107, "step": 17048 }, { - "epoch": 0.48312505313270426, + "epoch": 0.6670709758197042, "grad_norm": 0.0, - "learning_rate": 1.1030679326600726e-05, - "loss": 0.9557, + "learning_rate": 5.27161494288875e-06, + "loss": 0.9793, "step": 17049 }, { - "epoch": 0.4831533905749667, + "epoch": 0.6671101025119336, "grad_norm": 0.0, - "learning_rate": 1.1029766420848932e-05, - "loss": 0.9023, + "learning_rate": 5.270498361294542e-06, + "loss": 0.9348, "step": 17050 }, { - "epoch": 0.48318172801722914, + "epoch": 0.6671492292041631, "grad_norm": 0.0, - "learning_rate": 1.1028853506423034e-05, - "loss": 0.8899, + "learning_rate": 5.269381855650209e-06, + "loss": 0.9592, "step": 17051 }, { - "epoch": 0.48321006545949163, + "epoch": 0.6671883558963925, "grad_norm": 0.0, - "learning_rate": 1.1027940583330721e-05, - "loss": 0.9708, + "learning_rate": 5.268265425973672e-06, + "loss": 1.0699, "step": 17052 }, { - "epoch": 0.48323840290175407, + "epoch": 0.667227482588622, "grad_norm": 0.0, - "learning_rate": 1.1027027651579683e-05, - "loss": 0.9134, + "learning_rate": 5.2671490722828625e-06, + "loss": 0.9158, "step": 17053 }, { - "epoch": 0.48326674034401657, + "epoch": 0.6672666092808514, "grad_norm": 0.0, - "learning_rate": 1.1026114711177608e-05, - "loss": 0.871, + "learning_rate": 5.266032794595708e-06, + "loss": 1.0557, "step": 17054 }, { - "epoch": 0.483295077786279, + "epoch": 0.6673057359730808, "grad_norm": 0.0, - "learning_rate": 1.1025201762132192e-05, - "loss": 0.8831, + "learning_rate": 5.264916592930139e-06, + "loss": 0.8138, "step": 17055 }, { - "epoch": 0.4833234152285415, + "epoch": 0.6673448626653102, "grad_norm": 0.0, - "learning_rate": 1.1024288804451118e-05, - "loss": 0.8978, + "learning_rate": 5.263800467304072e-06, + "loss": 1.0598, "step": 17056 }, { - "epoch": 0.48335175267080394, + "epoch": 0.6673839893575397, "grad_norm": 0.0, - "learning_rate": 1.1023375838142076e-05, - "loss": 0.7812, + "learning_rate": 5.26268441773544e-06, + "loss": 1.1088, "step": 17057 }, { - "epoch": 0.4833800901130664, + "epoch": 0.6674231160497691, "grad_norm": 0.0, - "learning_rate": 1.1022462863212762e-05, - "loss": 0.9445, + "learning_rate": 5.261568444242151e-06, + "loss": 1.0367, "step": 17058 }, { - "epoch": 0.48340842755532887, + "epoch": 0.6674622427419986, "grad_norm": 0.0, - "learning_rate": 1.1021549879670865e-05, - "loss": 1.0401, + "learning_rate": 5.260452546842143e-06, + "loss": 0.9557, "step": 17059 }, { - "epoch": 0.4834367649975913, + "epoch": 0.667501369434228, "grad_norm": 0.0, - "learning_rate": 1.1020636887524072e-05, - "loss": 0.8415, + "learning_rate": 5.259336725553323e-06, + "loss": 0.9458, "step": 17060 }, { - "epoch": 0.4834651024398538, + "epoch": 0.6675404961264575, "grad_norm": 0.0, - "learning_rate": 1.1019723886780075e-05, - "loss": 0.9918, + "learning_rate": 5.2582209803936215e-06, + "loss": 0.9951, "step": 17061 }, { - "epoch": 0.48349343988211624, + "epoch": 0.6675796228186869, "grad_norm": 0.0, - "learning_rate": 1.1018810877446569e-05, - "loss": 0.9498, + "learning_rate": 5.257105311380939e-06, + "loss": 1.0856, "step": 17062 }, { - "epoch": 0.4835217773243787, + "epoch": 0.6676187495109164, "grad_norm": 0.0, - "learning_rate": 1.1017897859531237e-05, - "loss": 0.8809, + "learning_rate": 5.255989718533212e-06, + "loss": 0.929, "step": 17063 }, { - "epoch": 0.48355011476664117, + "epoch": 0.6676578762031458, "grad_norm": 0.0, - "learning_rate": 1.1016984833041773e-05, - "loss": 0.8951, + "learning_rate": 5.254874201868341e-06, + "loss": 0.9899, "step": 17064 }, { - "epoch": 0.4835784522089036, + "epoch": 0.6676970028953753, "grad_norm": 0.0, - "learning_rate": 1.1016071797985867e-05, - "loss": 1.0067, + "learning_rate": 5.253758761404246e-06, + "loss": 1.0019, "step": 17065 }, { - "epoch": 0.4836067896511661, + "epoch": 0.6677361295876046, "grad_norm": 0.0, - "learning_rate": 1.1015158754371217e-05, - "loss": 0.8506, + "learning_rate": 5.2526433971588366e-06, + "loss": 0.9188, "step": 17066 }, { - "epoch": 0.48363512709342854, + "epoch": 0.6677752562798341, "grad_norm": 0.0, - "learning_rate": 1.1014245702205504e-05, - "loss": 0.8776, + "learning_rate": 5.251528109150024e-06, + "loss": 0.9628, "step": 17067 }, { - "epoch": 0.48366346453569103, + "epoch": 0.6678143829720635, "grad_norm": 0.0, - "learning_rate": 1.1013332641496424e-05, - "loss": 0.8821, + "learning_rate": 5.2504128973957205e-06, + "loss": 1.0101, "step": 17068 }, { - "epoch": 0.4836918019779535, + "epoch": 0.667853509664293, "grad_norm": 0.0, - "learning_rate": 1.1012419572251665e-05, - "loss": 0.8882, + "learning_rate": 5.249297761913839e-06, + "loss": 1.0546, "step": 17069 }, { - "epoch": 0.4837201394202159, + "epoch": 0.6678926363565224, "grad_norm": 0.0, - "learning_rate": 1.1011506494478921e-05, - "loss": 1.0267, + "learning_rate": 5.248182702722278e-06, + "loss": 0.9956, "step": 17070 }, { - "epoch": 0.4837484768624784, + "epoch": 0.6679317630487519, "grad_norm": 0.0, - "learning_rate": 1.101059340818588e-05, - "loss": 0.8591, + "learning_rate": 5.247067719838948e-06, + "loss": 0.9948, "step": 17071 }, { - "epoch": 0.48377681430474084, + "epoch": 0.6679708897409813, "grad_norm": 0.0, - "learning_rate": 1.100968031338024e-05, - "loss": 0.9346, + "learning_rate": 5.245952813281754e-06, + "loss": 1.0493, "step": 17072 }, { - "epoch": 0.48380515174700334, + "epoch": 0.6680100164332108, "grad_norm": 0.0, - "learning_rate": 1.1008767210069684e-05, - "loss": 0.9146, + "learning_rate": 5.244837983068605e-06, + "loss": 1.1084, "step": 17073 }, { - "epoch": 0.4838334891892658, + "epoch": 0.6680491431254402, "grad_norm": 0.0, - "learning_rate": 1.1007854098261908e-05, - "loss": 0.9325, + "learning_rate": 5.243723229217397e-06, + "loss": 0.9956, "step": 17074 }, { - "epoch": 0.4838618266315282, + "epoch": 0.6680882698176697, "grad_norm": 0.0, - "learning_rate": 1.1006940977964604e-05, - "loss": 0.9158, + "learning_rate": 5.2426085517460325e-06, + "loss": 0.9088, "step": 17075 }, { - "epoch": 0.4838901640737907, + "epoch": 0.668127396509899, "grad_norm": 0.0, - "learning_rate": 1.1006027849185463e-05, - "loss": 0.871, + "learning_rate": 5.241493950672414e-06, + "loss": 0.9762, "step": 17076 }, { - "epoch": 0.48391850151605315, + "epoch": 0.6681665232021284, "grad_norm": 0.0, - "learning_rate": 1.1005114711932172e-05, - "loss": 0.872, + "learning_rate": 5.240379426014444e-06, + "loss": 0.9938, "step": 17077 }, { - "epoch": 0.48394683895831564, + "epoch": 0.6682056498943579, "grad_norm": 0.0, - "learning_rate": 1.1004201566212426e-05, - "loss": 0.9887, + "learning_rate": 5.239264977790011e-06, + "loss": 1.0294, "step": 17078 }, { - "epoch": 0.4839751764005781, + "epoch": 0.6682447765865873, "grad_norm": 0.0, - "learning_rate": 1.1003288412033923e-05, - "loss": 0.8835, + "learning_rate": 5.238150606017021e-06, + "loss": 0.9572, "step": 17079 }, { - "epoch": 0.48400351384284057, + "epoch": 0.6682839032788168, "grad_norm": 0.0, - "learning_rate": 1.1002375249404347e-05, - "loss": 0.8484, + "learning_rate": 5.237036310713359e-06, + "loss": 1.0173, "step": 17080 }, { - "epoch": 0.484031851285103, + "epoch": 0.6683230299710462, "grad_norm": 0.0, - "learning_rate": 1.1001462078331394e-05, - "loss": 1.024, + "learning_rate": 5.235922091896932e-06, + "loss": 0.9526, "step": 17081 }, { - "epoch": 0.48406018872736545, + "epoch": 0.6683621566632757, "grad_norm": 0.0, - "learning_rate": 1.1000548898822748e-05, - "loss": 0.8932, + "learning_rate": 5.234807949585623e-06, + "loss": 0.989, "step": 17082 }, { - "epoch": 0.48408852616962794, + "epoch": 0.6684012833555051, "grad_norm": 0.0, - "learning_rate": 1.0999635710886112e-05, - "loss": 0.7994, + "learning_rate": 5.233693883797332e-06, + "loss": 0.801, "step": 17083 }, { - "epoch": 0.4841168636118904, + "epoch": 0.6684404100477346, "grad_norm": 0.0, - "learning_rate": 1.099872251452917e-05, - "loss": 0.924, + "learning_rate": 5.232579894549939e-06, + "loss": 0.9828, "step": 17084 }, { - "epoch": 0.4841452010541529, + "epoch": 0.668479536739964, "grad_norm": 0.0, - "learning_rate": 1.099780930975962e-05, - "loss": 1.0228, + "learning_rate": 5.231465981861341e-06, + "loss": 1.0392, "step": 17085 }, { - "epoch": 0.4841735384964153, + "epoch": 0.6685186634321935, "grad_norm": 0.0, - "learning_rate": 1.0996896096585148e-05, - "loss": 0.8782, + "learning_rate": 5.230352145749422e-06, + "loss": 0.9039, "step": 17086 }, { - "epoch": 0.48420187593867775, + "epoch": 0.6685577901244228, "grad_norm": 0.0, - "learning_rate": 1.0995982875013453e-05, - "loss": 0.805, + "learning_rate": 5.229238386232076e-06, + "loss": 0.9203, "step": 17087 }, { - "epoch": 0.48423021338094024, + "epoch": 0.6685969168166523, "grad_norm": 0.0, - "learning_rate": 1.0995069645052226e-05, - "loss": 0.9151, + "learning_rate": 5.2281247033271795e-06, + "loss": 0.9955, "step": 17088 }, { - "epoch": 0.4842585508232027, + "epoch": 0.6686360435088817, "grad_norm": 0.0, - "learning_rate": 1.0994156406709155e-05, - "loss": 0.7835, + "learning_rate": 5.227011097052621e-06, + "loss": 1.1134, "step": 17089 }, { - "epoch": 0.4842868882654652, + "epoch": 0.6686751702011112, "grad_norm": 0.0, - "learning_rate": 1.0993243159991936e-05, - "loss": 0.8628, + "learning_rate": 5.225897567426287e-06, + "loss": 1.0223, "step": 17090 }, { - "epoch": 0.4843152257077276, + "epoch": 0.6687142968933406, "grad_norm": 0.0, - "learning_rate": 1.0992329904908261e-05, - "loss": 0.8588, + "learning_rate": 5.224784114466051e-06, + "loss": 0.9455, "step": 17091 }, { - "epoch": 0.4843435631499901, + "epoch": 0.6687534235855701, "grad_norm": 0.0, - "learning_rate": 1.0991416641465823e-05, - "loss": 0.9285, + "learning_rate": 5.2236707381898e-06, + "loss": 1.0191, "step": 17092 }, { - "epoch": 0.48437190059225255, + "epoch": 0.6687925502777995, "grad_norm": 0.0, - "learning_rate": 1.0990503369672316e-05, - "loss": 0.9109, + "learning_rate": 5.222557438615411e-06, + "loss": 1.118, "step": 17093 }, { - "epoch": 0.484400238034515, + "epoch": 0.668831676970029, "grad_norm": 0.0, - "learning_rate": 1.0989590089535426e-05, - "loss": 0.8701, + "learning_rate": 5.221444215760767e-06, + "loss": 0.9279, "step": 17094 }, { - "epoch": 0.4844285754767775, + "epoch": 0.6688708036622584, "grad_norm": 0.0, - "learning_rate": 1.0988676801062858e-05, - "loss": 0.8307, + "learning_rate": 5.220331069643737e-06, + "loss": 0.9428, "step": 17095 }, { - "epoch": 0.4844569129190399, + "epoch": 0.6689099303544879, "grad_norm": 0.0, - "learning_rate": 1.0987763504262297e-05, - "loss": 0.8877, + "learning_rate": 5.219218000282204e-06, + "loss": 0.9537, "step": 17096 }, { - "epoch": 0.4844852503613024, + "epoch": 0.6689490570467173, "grad_norm": 0.0, - "learning_rate": 1.0986850199141432e-05, - "loss": 0.8337, + "learning_rate": 5.218105007694037e-06, + "loss": 1.0008, "step": 17097 }, { - "epoch": 0.48451358780356485, + "epoch": 0.6689881837389468, "grad_norm": 0.0, - "learning_rate": 1.0985936885707965e-05, - "loss": 0.9905, + "learning_rate": 5.216992091897111e-06, + "loss": 0.9477, "step": 17098 }, { - "epoch": 0.4845419252458273, + "epoch": 0.6690273104311761, "grad_norm": 0.0, - "learning_rate": 1.0985023563969585e-05, - "loss": 0.8301, + "learning_rate": 5.215879252909298e-06, + "loss": 1.0322, "step": 17099 }, { - "epoch": 0.4845702626880898, + "epoch": 0.6690664371234056, "grad_norm": 0.0, - "learning_rate": 1.0984110233933987e-05, - "loss": 0.9077, + "learning_rate": 5.214766490748473e-06, + "loss": 1.0855, "step": 17100 }, { - "epoch": 0.4845986001303522, + "epoch": 0.669105563815635, "grad_norm": 0.0, - "learning_rate": 1.0983196895608863e-05, - "loss": 0.7973, + "learning_rate": 5.213653805432499e-06, + "loss": 1.1318, "step": 17101 }, { - "epoch": 0.4846269375726147, + "epoch": 0.6691446905078645, "grad_norm": 0.0, - "learning_rate": 1.0982283549001904e-05, - "loss": 0.8005, + "learning_rate": 5.212541196979248e-06, + "loss": 1.0648, "step": 17102 }, { - "epoch": 0.48465527501487715, + "epoch": 0.6691838172000939, "grad_norm": 0.0, - "learning_rate": 1.0981370194120808e-05, - "loss": 0.8211, + "learning_rate": 5.211428665406584e-06, + "loss": 1.0786, "step": 17103 }, { - "epoch": 0.48468361245713965, + "epoch": 0.6692229438923234, "grad_norm": 0.0, - "learning_rate": 1.0980456830973266e-05, - "loss": 0.8464, + "learning_rate": 5.21031621073238e-06, + "loss": 0.9303, "step": 17104 }, { - "epoch": 0.4847119498994021, + "epoch": 0.6692620705845528, "grad_norm": 0.0, - "learning_rate": 1.0979543459566973e-05, - "loss": 0.8622, + "learning_rate": 5.209203832974492e-06, + "loss": 0.8839, "step": 17105 }, { - "epoch": 0.4847402873416645, + "epoch": 0.6693011972767822, "grad_norm": 0.0, - "learning_rate": 1.097863007990962e-05, - "loss": 0.9204, + "learning_rate": 5.208091532150792e-06, + "loss": 0.978, "step": 17106 }, { - "epoch": 0.484768624783927, + "epoch": 0.6693403239690117, "grad_norm": 0.0, - "learning_rate": 1.0977716692008901e-05, - "loss": 0.9435, + "learning_rate": 5.20697930827913e-06, + "loss": 0.9801, "step": 17107 }, { - "epoch": 0.48479696222618945, + "epoch": 0.669379450661241, "grad_norm": 0.0, - "learning_rate": 1.0976803295872513e-05, - "loss": 0.9124, + "learning_rate": 5.2058671613773805e-06, + "loss": 1.0612, "step": 17108 }, { - "epoch": 0.48482529966845195, + "epoch": 0.6694185773534705, "grad_norm": 0.0, - "learning_rate": 1.097588989150815e-05, - "loss": 0.8875, + "learning_rate": 5.2047550914633935e-06, + "loss": 0.889, "step": 17109 }, { - "epoch": 0.4848536371107144, + "epoch": 0.6694577040456999, "grad_norm": 0.0, - "learning_rate": 1.0974976478923503e-05, - "loss": 0.7572, + "learning_rate": 5.203643098555036e-06, + "loss": 1.06, "step": 17110 }, { - "epoch": 0.4848819745529768, + "epoch": 0.6694968307379294, "grad_norm": 0.0, - "learning_rate": 1.0974063058126263e-05, - "loss": 0.8675, + "learning_rate": 5.202531182670151e-06, + "loss": 0.9833, "step": 17111 }, { - "epoch": 0.4849103119952393, + "epoch": 0.6695359574301588, "grad_norm": 0.0, - "learning_rate": 1.0973149629124134e-05, - "loss": 0.8345, + "learning_rate": 5.201419343826611e-06, + "loss": 0.9195, "step": 17112 }, { - "epoch": 0.48493864943750176, + "epoch": 0.6695750841223883, "grad_norm": 0.0, - "learning_rate": 1.0972236191924801e-05, - "loss": 0.9056, + "learning_rate": 5.200307582042261e-06, + "loss": 1.0934, "step": 17113 }, { - "epoch": 0.48496698687976425, + "epoch": 0.6696142108146177, "grad_norm": 0.0, - "learning_rate": 1.0971322746535964e-05, - "loss": 0.8331, + "learning_rate": 5.19919589733496e-06, + "loss": 0.9652, "step": 17114 }, { - "epoch": 0.4849953243220267, + "epoch": 0.6696533375068472, "grad_norm": 0.0, - "learning_rate": 1.0970409292965312e-05, - "loss": 0.8942, + "learning_rate": 5.198084289722553e-06, + "loss": 0.9375, "step": 17115 }, { - "epoch": 0.4850236617642892, + "epoch": 0.6696924641990766, "grad_norm": 0.0, - "learning_rate": 1.0969495831220543e-05, - "loss": 0.7928, + "learning_rate": 5.196972759222895e-06, + "loss": 0.9547, "step": 17116 }, { - "epoch": 0.4850519992065516, + "epoch": 0.6697315908913061, "grad_norm": 0.0, - "learning_rate": 1.096858236130935e-05, - "loss": 0.899, + "learning_rate": 5.195861305853837e-06, + "loss": 0.9738, "step": 17117 }, { - "epoch": 0.48508033664881406, + "epoch": 0.6697707175835355, "grad_norm": 0.0, - "learning_rate": 1.096766888323943e-05, - "loss": 0.9343, + "learning_rate": 5.19474992963323e-06, + "loss": 0.9445, "step": 17118 }, { - "epoch": 0.48510867409107655, + "epoch": 0.669809844275765, "grad_norm": 0.0, - "learning_rate": 1.0966755397018474e-05, - "loss": 0.8342, + "learning_rate": 5.193638630578914e-06, + "loss": 0.9451, "step": 17119 }, { - "epoch": 0.485137011533339, + "epoch": 0.6698489709679943, "grad_norm": 0.0, - "learning_rate": 1.096584190265418e-05, - "loss": 0.8777, + "learning_rate": 5.192527408708739e-06, + "loss": 0.9258, "step": 17120 }, { - "epoch": 0.4851653489756015, + "epoch": 0.6698880976602238, "grad_norm": 0.0, - "learning_rate": 1.096492840015424e-05, - "loss": 0.839, + "learning_rate": 5.19141626404055e-06, + "loss": 0.9677, "step": 17121 }, { - "epoch": 0.4851936864178639, + "epoch": 0.6699272243524532, "grad_norm": 0.0, - "learning_rate": 1.096401488952635e-05, - "loss": 0.9076, + "learning_rate": 5.190305196592193e-06, + "loss": 1.087, "step": 17122 }, { - "epoch": 0.48522202386012636, + "epoch": 0.6699663510446827, "grad_norm": 0.0, - "learning_rate": 1.0963101370778201e-05, - "loss": 0.9161, + "learning_rate": 5.189194206381505e-06, + "loss": 1.0578, "step": 17123 }, { - "epoch": 0.48525036130238886, + "epoch": 0.6700054777369121, "grad_norm": 0.0, - "learning_rate": 1.0962187843917498e-05, - "loss": 0.8232, + "learning_rate": 5.188083293426332e-06, + "loss": 0.9321, "step": 17124 }, { - "epoch": 0.4852786987446513, + "epoch": 0.6700446044291416, "grad_norm": 0.0, - "learning_rate": 1.0961274308951925e-05, - "loss": 0.912, + "learning_rate": 5.1869724577445055e-06, + "loss": 0.9436, "step": 17125 }, { - "epoch": 0.4853070361869138, + "epoch": 0.670083731121371, "grad_norm": 0.0, - "learning_rate": 1.0960360765889185e-05, - "loss": 0.9015, + "learning_rate": 5.185861699353877e-06, + "loss": 1.0803, "step": 17126 }, { - "epoch": 0.4853353736291762, + "epoch": 0.6701228578136005, "grad_norm": 0.0, - "learning_rate": 1.0959447214736966e-05, - "loss": 0.8277, + "learning_rate": 5.1847510182722735e-06, + "loss": 0.9577, "step": 17127 }, { - "epoch": 0.4853637110714387, + "epoch": 0.6701619845058299, "grad_norm": 0.0, - "learning_rate": 1.0958533655502969e-05, - "loss": 0.8148, + "learning_rate": 5.183640414517539e-06, + "loss": 1.0214, "step": 17128 }, { - "epoch": 0.48539204851370116, + "epoch": 0.6702011111980594, "grad_norm": 0.0, - "learning_rate": 1.0957620088194884e-05, - "loss": 0.9662, + "learning_rate": 5.1825298881075e-06, + "loss": 1.0494, "step": 17129 }, { - "epoch": 0.4854203859559636, + "epoch": 0.6702402378902887, "grad_norm": 0.0, - "learning_rate": 1.0956706512820414e-05, - "loss": 0.8591, + "learning_rate": 5.1814194390599945e-06, + "loss": 0.9739, "step": 17130 }, { - "epoch": 0.4854487233982261, + "epoch": 0.6702793645825182, "grad_norm": 0.0, - "learning_rate": 1.0955792929387248e-05, - "loss": 0.8459, + "learning_rate": 5.180309067392855e-06, + "loss": 0.9594, "step": 17131 }, { - "epoch": 0.48547706084048853, + "epoch": 0.6703184912747476, "grad_norm": 0.0, - "learning_rate": 1.0954879337903081e-05, - "loss": 0.7921, + "learning_rate": 5.179198773123917e-06, + "loss": 1.0222, "step": 17132 }, { - "epoch": 0.485505398282751, + "epoch": 0.6703576179669771, "grad_norm": 0.0, - "learning_rate": 1.0953965738375616e-05, - "loss": 0.9918, + "learning_rate": 5.1780885562710014e-06, + "loss": 0.8806, "step": 17133 }, { - "epoch": 0.48553373572501346, + "epoch": 0.6703967446592065, "grad_norm": 0.0, - "learning_rate": 1.095305213081254e-05, - "loss": 0.8931, + "learning_rate": 5.176978416851941e-06, + "loss": 0.9261, "step": 17134 }, { - "epoch": 0.4855620731672759, + "epoch": 0.6704358713514359, "grad_norm": 0.0, - "learning_rate": 1.0952138515221551e-05, - "loss": 0.914, + "learning_rate": 5.1758683548845676e-06, + "loss": 0.9793, "step": 17135 }, { - "epoch": 0.4855904106095384, + "epoch": 0.6704749980436654, "grad_norm": 0.0, - "learning_rate": 1.0951224891610347e-05, - "loss": 0.9089, + "learning_rate": 5.174758370386699e-06, + "loss": 1.0513, "step": 17136 }, { - "epoch": 0.48561874805180083, + "epoch": 0.6705141247358948, "grad_norm": 0.0, - "learning_rate": 1.0950311259986622e-05, - "loss": 0.7898, + "learning_rate": 5.1736484633761665e-06, + "loss": 0.9818, "step": 17137 }, { - "epoch": 0.4856470854940633, + "epoch": 0.6705532514281243, "grad_norm": 0.0, - "learning_rate": 1.0949397620358073e-05, - "loss": 1.0138, + "learning_rate": 5.1725386338707896e-06, + "loss": 1.089, "step": 17138 }, { - "epoch": 0.48567542293632576, + "epoch": 0.6705923781203537, "grad_norm": 0.0, - "learning_rate": 1.0948483972732395e-05, - "loss": 0.862, + "learning_rate": 5.171428881888398e-06, + "loss": 0.9698, "step": 17139 }, { - "epoch": 0.48570376037858826, + "epoch": 0.6706315048125832, "grad_norm": 0.0, - "learning_rate": 1.0947570317117282e-05, - "loss": 0.8166, + "learning_rate": 5.170319207446806e-06, + "loss": 1.023, "step": 17140 }, { - "epoch": 0.4857320978208507, + "epoch": 0.6706706315048125, "grad_norm": 0.0, - "learning_rate": 1.0946656653520435e-05, - "loss": 0.8476, + "learning_rate": 5.169209610563837e-06, + "loss": 1.0582, "step": 17141 }, { - "epoch": 0.48576043526311313, + "epoch": 0.670709758197042, "grad_norm": 0.0, - "learning_rate": 1.0945742981949547e-05, - "loss": 0.9205, + "learning_rate": 5.168100091257301e-06, + "loss": 1.0525, "step": 17142 }, { - "epoch": 0.4857887727053756, + "epoch": 0.6707488848892714, "grad_norm": 0.0, - "learning_rate": 1.0944829302412314e-05, - "loss": 0.8153, + "learning_rate": 5.16699064954503e-06, + "loss": 0.8802, "step": 17143 }, { - "epoch": 0.48581711014763806, + "epoch": 0.6707880115815009, "grad_norm": 0.0, - "learning_rate": 1.0943915614916434e-05, - "loss": 0.8602, + "learning_rate": 5.165881285444832e-06, + "loss": 0.9309, "step": 17144 }, { - "epoch": 0.48584544758990056, + "epoch": 0.6708271382737303, "grad_norm": 0.0, - "learning_rate": 1.0943001919469602e-05, - "loss": 0.8707, + "learning_rate": 5.164771998974524e-06, + "loss": 1.0562, "step": 17145 }, { - "epoch": 0.485873785032163, + "epoch": 0.6708662649659598, "grad_norm": 0.0, - "learning_rate": 1.0942088216079516e-05, - "loss": 0.9164, + "learning_rate": 5.1636627901519155e-06, + "loss": 0.9543, "step": 17146 }, { - "epoch": 0.48590212247442544, + "epoch": 0.6709053916581892, "grad_norm": 0.0, - "learning_rate": 1.094117450475387e-05, - "loss": 0.9078, + "learning_rate": 5.162553658994823e-06, + "loss": 1.0815, "step": 17147 }, { - "epoch": 0.48593045991668793, + "epoch": 0.6709445183504187, "grad_norm": 0.0, - "learning_rate": 1.094026078550036e-05, - "loss": 0.9729, + "learning_rate": 5.1614446055210576e-06, + "loss": 1.0469, "step": 17148 }, { - "epoch": 0.48595879735895037, + "epoch": 0.6709836450426481, "grad_norm": 0.0, - "learning_rate": 1.0939347058326684e-05, - "loss": 0.8496, + "learning_rate": 5.160335629748432e-06, + "loss": 1.0641, "step": 17149 }, { - "epoch": 0.48598713480121286, + "epoch": 0.6710227717348776, "grad_norm": 0.0, - "learning_rate": 1.0938433323240543e-05, - "loss": 0.8913, + "learning_rate": 5.159226731694748e-06, + "loss": 0.9774, "step": 17150 }, { - "epoch": 0.4860154722434753, + "epoch": 0.671061898427107, "grad_norm": 0.0, - "learning_rate": 1.0937519580249628e-05, - "loss": 0.972, + "learning_rate": 5.158117911377816e-06, + "loss": 0.9049, "step": 17151 }, { - "epoch": 0.48604380968573774, + "epoch": 0.6711010251193364, "grad_norm": 0.0, - "learning_rate": 1.0936605829361633e-05, - "loss": 0.9052, + "learning_rate": 5.157009168815443e-06, + "loss": 1.1097, "step": 17152 }, { - "epoch": 0.48607214712800023, + "epoch": 0.6711401518115658, "grad_norm": 0.0, - "learning_rate": 1.0935692070584264e-05, - "loss": 0.9059, + "learning_rate": 5.155900504025439e-06, + "loss": 1.0092, "step": 17153 }, { - "epoch": 0.48610048457026267, + "epoch": 0.6711792785037953, "grad_norm": 0.0, - "learning_rate": 1.0934778303925214e-05, - "loss": 0.9714, + "learning_rate": 5.154791917025599e-06, + "loss": 1.1304, "step": 17154 }, { - "epoch": 0.48612882201252516, + "epoch": 0.6712184051960247, "grad_norm": 0.0, - "learning_rate": 1.0933864529392175e-05, - "loss": 0.8874, + "learning_rate": 5.153683407833734e-06, + "loss": 0.968, "step": 17155 }, { - "epoch": 0.4861571594547876, + "epoch": 0.6712575318882542, "grad_norm": 0.0, - "learning_rate": 1.093295074699285e-05, - "loss": 0.8711, + "learning_rate": 5.152574976467633e-06, + "loss": 1.0192, "step": 17156 }, { - "epoch": 0.4861854968970501, + "epoch": 0.6712966585804836, "grad_norm": 0.0, - "learning_rate": 1.0932036956734935e-05, - "loss": 0.8694, + "learning_rate": 5.151466622945109e-06, + "loss": 1.0996, "step": 17157 }, { - "epoch": 0.48621383433931253, + "epoch": 0.6713357852727131, "grad_norm": 0.0, - "learning_rate": 1.0931123158626127e-05, - "loss": 0.9703, + "learning_rate": 5.150358347283954e-06, + "loss": 0.9154, "step": 17158 }, { - "epoch": 0.48624217178157497, + "epoch": 0.6713749119649425, "grad_norm": 0.0, - "learning_rate": 1.0930209352674123e-05, - "loss": 0.8217, + "learning_rate": 5.14925014950197e-06, + "loss": 0.9612, "step": 17159 }, { - "epoch": 0.48627050922383747, + "epoch": 0.671414038657172, "grad_norm": 0.0, - "learning_rate": 1.0929295538886622e-05, - "loss": 1.0165, + "learning_rate": 5.148142029616943e-06, + "loss": 0.9294, "step": 17160 }, { - "epoch": 0.4862988466660999, + "epoch": 0.6714531653494014, "grad_norm": 0.0, - "learning_rate": 1.0928381717271315e-05, - "loss": 0.9115, + "learning_rate": 5.1470339876466814e-06, + "loss": 0.9841, "step": 17161 }, { - "epoch": 0.4863271841083624, + "epoch": 0.6714922920416307, "grad_norm": 0.0, - "learning_rate": 1.0927467887835905e-05, - "loss": 0.9281, + "learning_rate": 5.14592602360897e-06, + "loss": 0.9886, "step": 17162 }, { - "epoch": 0.48635552155062484, + "epoch": 0.6715314187338602, "grad_norm": 0.0, - "learning_rate": 1.0926554050588091e-05, - "loss": 0.7939, + "learning_rate": 5.144818137521609e-06, + "loss": 1.0241, "step": 17163 }, { - "epoch": 0.4863838589928873, + "epoch": 0.6715705454260896, "grad_norm": 0.0, - "learning_rate": 1.0925640205535569e-05, - "loss": 0.8935, + "learning_rate": 5.1437103294023805e-06, + "loss": 0.9903, "step": 17164 }, { - "epoch": 0.48641219643514977, + "epoch": 0.6716096721183191, "grad_norm": 0.0, - "learning_rate": 1.092472635268603e-05, - "loss": 0.874, + "learning_rate": 5.142602599269077e-06, + "loss": 1.042, "step": 17165 }, { - "epoch": 0.4864405338774122, + "epoch": 0.6716487988105485, "grad_norm": 0.0, - "learning_rate": 1.0923812492047183e-05, - "loss": 0.9374, + "learning_rate": 5.141494947139491e-06, + "loss": 1.0724, "step": 17166 }, { - "epoch": 0.4864688713196747, + "epoch": 0.671687925502778, "grad_norm": 0.0, - "learning_rate": 1.0922898623626721e-05, - "loss": 1.0066, + "learning_rate": 5.140387373031413e-06, + "loss": 1.0657, "step": 17167 }, { - "epoch": 0.48649720876193714, + "epoch": 0.6717270521950074, "grad_norm": 0.0, - "learning_rate": 1.0921984747432336e-05, - "loss": 0.9307, + "learning_rate": 5.139279876962618e-06, + "loss": 1.0537, "step": 17168 }, { - "epoch": 0.48652554620419963, + "epoch": 0.6717661788872369, "grad_norm": 0.0, - "learning_rate": 1.0921070863471732e-05, - "loss": 0.8609, + "learning_rate": 5.138172458950899e-06, + "loss": 0.9236, "step": 17169 }, { - "epoch": 0.48655388364646207, + "epoch": 0.6718053055794663, "grad_norm": 0.0, - "learning_rate": 1.0920156971752612e-05, - "loss": 0.8792, + "learning_rate": 5.1370651190140376e-06, + "loss": 1.1008, "step": 17170 }, { - "epoch": 0.4865822210887245, + "epoch": 0.6718444322716958, "grad_norm": 0.0, - "learning_rate": 1.0919243072282664e-05, - "loss": 0.9511, + "learning_rate": 5.1359578571698195e-06, + "loss": 1.0146, "step": 17171 }, { - "epoch": 0.486610558530987, + "epoch": 0.6718835589639252, "grad_norm": 0.0, - "learning_rate": 1.091832916506959e-05, - "loss": 0.8783, + "learning_rate": 5.13485067343602e-06, + "loss": 1.0555, "step": 17172 }, { - "epoch": 0.48663889597324944, + "epoch": 0.6719226856561547, "grad_norm": 0.0, - "learning_rate": 1.0917415250121088e-05, - "loss": 0.8267, + "learning_rate": 5.133743567830427e-06, + "loss": 0.9804, "step": 17173 }, { - "epoch": 0.48666723341551194, + "epoch": 0.671961812348384, "grad_norm": 0.0, - "learning_rate": 1.0916501327444859e-05, - "loss": 0.8969, + "learning_rate": 5.1326365403708105e-06, + "loss": 1.0236, "step": 17174 }, { - "epoch": 0.4866955708577744, + "epoch": 0.6720009390406135, "grad_norm": 0.0, - "learning_rate": 1.0915587397048595e-05, - "loss": 0.9033, + "learning_rate": 5.131529591074952e-06, + "loss": 0.8941, "step": 17175 }, { - "epoch": 0.4867239083000368, + "epoch": 0.6720400657328429, "grad_norm": 0.0, - "learning_rate": 1.0914673458940002e-05, - "loss": 0.8961, + "learning_rate": 5.1304227199606285e-06, + "loss": 0.8839, "step": 17176 }, { - "epoch": 0.4867522457422993, + "epoch": 0.6720791924250724, "grad_norm": 0.0, - "learning_rate": 1.0913759513126774e-05, - "loss": 0.9069, + "learning_rate": 5.129315927045616e-06, + "loss": 1.0324, "step": 17177 }, { - "epoch": 0.48678058318456174, + "epoch": 0.6721183191173018, "grad_norm": 0.0, - "learning_rate": 1.091284555961661e-05, - "loss": 0.9375, + "learning_rate": 5.1282092123476836e-06, + "loss": 0.9493, "step": 17178 }, { - "epoch": 0.48680892062682424, + "epoch": 0.6721574458095313, "grad_norm": 0.0, - "learning_rate": 1.091193159841721e-05, - "loss": 0.8987, + "learning_rate": 5.1271025758846075e-06, + "loss": 1.1289, "step": 17179 }, { - "epoch": 0.4868372580690867, + "epoch": 0.6721965725017607, "grad_norm": 0.0, - "learning_rate": 1.0911017629536272e-05, - "loss": 1.0339, + "learning_rate": 5.1259960176741594e-06, + "loss": 1.0277, "step": 17180 }, { - "epoch": 0.48686559551134917, + "epoch": 0.6722356991939902, "grad_norm": 0.0, - "learning_rate": 1.091010365298149e-05, - "loss": 0.8736, + "learning_rate": 5.124889537734106e-06, + "loss": 0.8719, "step": 17181 }, { - "epoch": 0.4868939329536116, + "epoch": 0.6722748258862196, "grad_norm": 0.0, - "learning_rate": 1.090918966876057e-05, - "loss": 0.862, + "learning_rate": 5.123783136082218e-06, + "loss": 0.9311, "step": 17182 }, { - "epoch": 0.48692227039587405, + "epoch": 0.6723139525784491, "grad_norm": 0.0, - "learning_rate": 1.0908275676881206e-05, - "loss": 0.8217, + "learning_rate": 5.122676812736262e-06, + "loss": 0.9842, "step": 17183 }, { - "epoch": 0.48695060783813654, + "epoch": 0.6723530792706784, "grad_norm": 0.0, - "learning_rate": 1.09073616773511e-05, - "loss": 0.9183, + "learning_rate": 5.121570567714007e-06, + "loss": 0.8784, "step": 17184 }, { - "epoch": 0.486978945280399, + "epoch": 0.6723922059629079, "grad_norm": 0.0, - "learning_rate": 1.0906447670177948e-05, - "loss": 0.865, + "learning_rate": 5.120464401033214e-06, + "loss": 0.8672, "step": 17185 }, { - "epoch": 0.48700728272266147, + "epoch": 0.6724313326551373, "grad_norm": 0.0, - "learning_rate": 1.0905533655369455e-05, - "loss": 0.8385, + "learning_rate": 5.119358312711651e-06, + "loss": 1.0499, "step": 17186 }, { - "epoch": 0.4870356201649239, + "epoch": 0.6724704593473668, "grad_norm": 0.0, - "learning_rate": 1.0904619632933312e-05, - "loss": 0.8477, + "learning_rate": 5.11825230276707e-06, + "loss": 0.9994, "step": 17187 }, { - "epoch": 0.48706395760718635, + "epoch": 0.6725095860395962, "grad_norm": 0.0, - "learning_rate": 1.0903705602877224e-05, - "loss": 0.927, + "learning_rate": 5.117146371217249e-06, + "loss": 0.9375, "step": 17188 }, { - "epoch": 0.48709229504944884, + "epoch": 0.6725487127318257, "grad_norm": 0.0, - "learning_rate": 1.0902791565208887e-05, - "loss": 0.8229, + "learning_rate": 5.116040518079933e-06, + "loss": 0.9609, "step": 17189 }, { - "epoch": 0.4871206324917113, + "epoch": 0.6725878394240551, "grad_norm": 0.0, - "learning_rate": 1.0901877519936001e-05, - "loss": 0.8285, + "learning_rate": 5.114934743372891e-06, + "loss": 0.9839, "step": 17190 }, { - "epoch": 0.4871489699339738, + "epoch": 0.6726269661162845, "grad_norm": 0.0, - "learning_rate": 1.0900963467066268e-05, - "loss": 0.8823, + "learning_rate": 5.113829047113868e-06, + "loss": 0.9602, "step": 17191 }, { - "epoch": 0.4871773073762362, + "epoch": 0.672666092808514, "grad_norm": 0.0, - "learning_rate": 1.0900049406607383e-05, - "loss": 0.8602, + "learning_rate": 5.112723429320636e-06, + "loss": 0.9334, "step": 17192 }, { - "epoch": 0.4872056448184987, + "epoch": 0.6727052195007434, "grad_norm": 0.0, - "learning_rate": 1.089913533856705e-05, - "loss": 0.8345, + "learning_rate": 5.111617890010937e-06, + "loss": 1.0814, "step": 17193 }, { - "epoch": 0.48723398226076114, + "epoch": 0.6727443461929729, "grad_norm": 0.0, - "learning_rate": 1.0898221262952962e-05, - "loss": 0.8185, + "learning_rate": 5.110512429202533e-06, + "loss": 1.0148, "step": 17194 }, { - "epoch": 0.4872623197030236, + "epoch": 0.6727834728852022, "grad_norm": 0.0, - "learning_rate": 1.0897307179772825e-05, - "loss": 0.8325, + "learning_rate": 5.109407046913169e-06, + "loss": 0.8288, "step": 17195 }, { - "epoch": 0.4872906571452861, + "epoch": 0.6728225995774317, "grad_norm": 0.0, - "learning_rate": 1.0896393089034336e-05, - "loss": 0.934, + "learning_rate": 5.1083017431606e-06, + "loss": 0.8893, "step": 17196 }, { - "epoch": 0.4873189945875485, + "epoch": 0.6728617262696611, "grad_norm": 0.0, - "learning_rate": 1.0895478990745196e-05, - "loss": 0.8675, + "learning_rate": 5.107196517962575e-06, + "loss": 0.9503, "step": 17197 }, { - "epoch": 0.487347332029811, + "epoch": 0.6729008529618906, "grad_norm": 0.0, - "learning_rate": 1.0894564884913103e-05, - "loss": 0.9071, + "learning_rate": 5.106091371336847e-06, + "loss": 0.9263, "step": 17198 }, { - "epoch": 0.48737566947207345, + "epoch": 0.67293997965412, "grad_norm": 0.0, - "learning_rate": 1.089365077154576e-05, - "loss": 0.8148, + "learning_rate": 5.1049863033011535e-06, + "loss": 1.0327, "step": 17199 }, { - "epoch": 0.4874040069143359, + "epoch": 0.6729791063463495, "grad_norm": 0.0, - "learning_rate": 1.0892736650650864e-05, - "loss": 0.8227, + "learning_rate": 5.103881313873249e-06, + "loss": 1.0403, "step": 17200 }, { - "epoch": 0.4874323443565984, + "epoch": 0.6730182330385789, "grad_norm": 0.0, - "learning_rate": 1.0891822522236114e-05, - "loss": 0.9607, + "learning_rate": 5.1027764030708735e-06, + "loss": 0.9505, "step": 17201 }, { - "epoch": 0.4874606817988608, + "epoch": 0.6730573597308084, "grad_norm": 0.0, - "learning_rate": 1.089090838630921e-05, - "loss": 0.9419, + "learning_rate": 5.101671570911777e-06, + "loss": 1.0015, "step": 17202 }, { - "epoch": 0.4874890192411233, + "epoch": 0.6730964864230378, "grad_norm": 0.0, - "learning_rate": 1.0889994242877857e-05, - "loss": 0.8525, + "learning_rate": 5.100566817413693e-06, + "loss": 0.9539, "step": 17203 }, { - "epoch": 0.48751735668338575, + "epoch": 0.6731356131152673, "grad_norm": 0.0, - "learning_rate": 1.088908009194975e-05, - "loss": 0.9319, + "learning_rate": 5.099462142594372e-06, + "loss": 1.0757, "step": 17204 }, { - "epoch": 0.48754569412564824, + "epoch": 0.6731747398074966, "grad_norm": 0.0, - "learning_rate": 1.0888165933532595e-05, - "loss": 0.8471, + "learning_rate": 5.0983575464715374e-06, + "loss": 0.9679, "step": 17205 }, { - "epoch": 0.4875740315679107, + "epoch": 0.6732138664997261, "grad_norm": 0.0, - "learning_rate": 1.0887251767634084e-05, - "loss": 0.9656, + "learning_rate": 5.097253029062947e-06, + "loss": 0.9521, "step": 17206 }, { - "epoch": 0.4876023690101731, + "epoch": 0.6732529931919555, "grad_norm": 0.0, - "learning_rate": 1.0886337594261926e-05, - "loss": 0.9896, + "learning_rate": 5.096148590386327e-06, + "loss": 0.9352, "step": 17207 }, { - "epoch": 0.4876307064524356, + "epoch": 0.673292119884185, "grad_norm": 0.0, - "learning_rate": 1.0885423413423812e-05, - "loss": 0.7697, + "learning_rate": 5.0950442304594206e-06, + "loss": 1.0094, "step": 17208 }, { - "epoch": 0.48765904389469805, + "epoch": 0.6733312465764144, "grad_norm": 0.0, - "learning_rate": 1.0884509225127453e-05, - "loss": 0.8517, + "learning_rate": 5.0939399492999475e-06, + "loss": 0.9763, "step": 17209 }, { - "epoch": 0.48768738133696055, + "epoch": 0.6733703732686439, "grad_norm": 0.0, - "learning_rate": 1.088359502938054e-05, - "loss": 0.9379, + "learning_rate": 5.092835746925659e-06, + "loss": 0.9831, "step": 17210 }, { - "epoch": 0.487715718779223, + "epoch": 0.6734094999608733, "grad_norm": 0.0, - "learning_rate": 1.0882680826190782e-05, - "loss": 0.9187, + "learning_rate": 5.0917316233542765e-06, + "loss": 0.9588, "step": 17211 }, { - "epoch": 0.4877440562214854, + "epoch": 0.6734486266531028, "grad_norm": 0.0, - "learning_rate": 1.0881766615565877e-05, - "loss": 0.8586, + "learning_rate": 5.090627578603537e-06, + "loss": 0.9282, "step": 17212 }, { - "epoch": 0.4877723936637479, + "epoch": 0.6734877533453322, "grad_norm": 0.0, - "learning_rate": 1.0880852397513519e-05, - "loss": 0.9133, + "learning_rate": 5.089523612691165e-06, + "loss": 1.0269, "step": 17213 }, { - "epoch": 0.48780073110601035, + "epoch": 0.6735268800375617, "grad_norm": 0.0, - "learning_rate": 1.0879938172041415e-05, - "loss": 0.9019, + "learning_rate": 5.088419725634887e-06, + "loss": 0.8886, "step": 17214 }, { - "epoch": 0.48782906854827285, + "epoch": 0.6735660067297911, "grad_norm": 0.0, - "learning_rate": 1.0879023939157267e-05, - "loss": 0.8405, + "learning_rate": 5.087315917452438e-06, + "loss": 1.0657, "step": 17215 }, { - "epoch": 0.4878574059905353, + "epoch": 0.6736051334220206, "grad_norm": 0.0, - "learning_rate": 1.0878109698868773e-05, - "loss": 0.9463, + "learning_rate": 5.0862121881615405e-06, + "loss": 0.992, "step": 17216 }, { - "epoch": 0.4878857434327978, + "epoch": 0.6736442601142499, "grad_norm": 0.0, - "learning_rate": 1.0877195451183637e-05, - "loss": 0.8755, + "learning_rate": 5.085108537779915e-06, + "loss": 0.9787, "step": 17217 }, { - "epoch": 0.4879140808750602, + "epoch": 0.6736833868064794, "grad_norm": 0.0, - "learning_rate": 1.0876281196109556e-05, - "loss": 0.8657, + "learning_rate": 5.0840049663252864e-06, + "loss": 0.9115, "step": 17218 }, { - "epoch": 0.48794241831732266, + "epoch": 0.6737225134987088, "grad_norm": 0.0, - "learning_rate": 1.0875366933654232e-05, - "loss": 0.9398, + "learning_rate": 5.0829014738153825e-06, + "loss": 1.0674, "step": 17219 }, { - "epoch": 0.48797075575958515, + "epoch": 0.6737616401909382, "grad_norm": 0.0, - "learning_rate": 1.0874452663825368e-05, - "loss": 0.7991, + "learning_rate": 5.081798060267915e-06, + "loss": 1.0278, "step": 17220 }, { - "epoch": 0.4879990932018476, + "epoch": 0.6738007668831677, "grad_norm": 0.0, - "learning_rate": 1.0873538386630666e-05, - "loss": 0.9463, + "learning_rate": 5.080694725700609e-06, + "loss": 1.1265, "step": 17221 }, { - "epoch": 0.4880274306441101, + "epoch": 0.6738398935753971, "grad_norm": 0.0, - "learning_rate": 1.0872624102077827e-05, - "loss": 0.8557, + "learning_rate": 5.0795914701311845e-06, + "loss": 1.0272, "step": 17222 }, { - "epoch": 0.4880557680863725, + "epoch": 0.6738790202676266, "grad_norm": 0.0, - "learning_rate": 1.0871709810174547e-05, - "loss": 0.8133, + "learning_rate": 5.0784882935773524e-06, + "loss": 1.0168, "step": 17223 }, { - "epoch": 0.48808410552863496, + "epoch": 0.673918146959856, "grad_norm": 0.0, - "learning_rate": 1.0870795510928536e-05, - "loss": 0.9152, + "learning_rate": 5.07738519605683e-06, + "loss": 0.9355, "step": 17224 }, { - "epoch": 0.48811244297089745, + "epoch": 0.6739572736520855, "grad_norm": 0.0, - "learning_rate": 1.0869881204347488e-05, - "loss": 0.7902, + "learning_rate": 5.076282177587339e-06, + "loss": 0.8138, "step": 17225 }, { - "epoch": 0.4881407804131599, + "epoch": 0.6739964003443148, "grad_norm": 0.0, - "learning_rate": 1.0868966890439107e-05, - "loss": 0.9581, + "learning_rate": 5.075179238186581e-06, + "loss": 0.9095, "step": 17226 }, { - "epoch": 0.4881691178554224, + "epoch": 0.6740355270365443, "grad_norm": 0.0, - "learning_rate": 1.0868052569211096e-05, - "loss": 0.8513, + "learning_rate": 5.074076377872272e-06, + "loss": 0.9336, "step": 17227 }, { - "epoch": 0.4881974552976848, + "epoch": 0.6740746537287737, "grad_norm": 0.0, - "learning_rate": 1.0867138240671156e-05, - "loss": 0.9382, + "learning_rate": 5.0729735966621256e-06, + "loss": 1.0013, "step": 17228 }, { - "epoch": 0.4882257927399473, + "epoch": 0.6741137804210032, "grad_norm": 0.0, - "learning_rate": 1.0866223904826992e-05, - "loss": 0.9033, + "learning_rate": 5.071870894573854e-06, + "loss": 1.1159, "step": 17229 }, { - "epoch": 0.48825413018220976, + "epoch": 0.6741529071132326, "grad_norm": 0.0, - "learning_rate": 1.08653095616863e-05, - "loss": 0.8906, + "learning_rate": 5.070768271625155e-06, + "loss": 0.8942, "step": 17230 }, { - "epoch": 0.4882824676244722, + "epoch": 0.6741920338054621, "grad_norm": 0.0, - "learning_rate": 1.0864395211256782e-05, - "loss": 0.8342, + "learning_rate": 5.069665727833746e-06, + "loss": 1.1595, "step": 17231 }, { - "epoch": 0.4883108050667347, + "epoch": 0.6742311604976915, "grad_norm": 0.0, - "learning_rate": 1.0863480853546142e-05, - "loss": 0.947, + "learning_rate": 5.0685632632173185e-06, + "loss": 1.0081, "step": 17232 }, { - "epoch": 0.4883391425089971, + "epoch": 0.674270287189921, "grad_norm": 0.0, - "learning_rate": 1.0862566488562082e-05, - "loss": 0.8779, + "learning_rate": 5.067460877793593e-06, + "loss": 0.9665, "step": 17233 }, { - "epoch": 0.4883674799512596, + "epoch": 0.6743094138821504, "grad_norm": 0.0, - "learning_rate": 1.086165211631231e-05, - "loss": 0.8342, + "learning_rate": 5.0663585715802625e-06, + "loss": 1.029, "step": 17234 }, { - "epoch": 0.48839581739352206, + "epoch": 0.6743485405743799, "grad_norm": 0.0, - "learning_rate": 1.0860737736804517e-05, - "loss": 0.9265, + "learning_rate": 5.065256344595034e-06, + "loss": 1.0852, "step": 17235 }, { - "epoch": 0.4884241548357845, + "epoch": 0.6743876672666093, "grad_norm": 0.0, - "learning_rate": 1.085982335004641e-05, - "loss": 0.9363, + "learning_rate": 5.064154196855597e-06, + "loss": 1.063, "step": 17236 }, { - "epoch": 0.488452492278047, + "epoch": 0.6744267939588388, "grad_norm": 0.0, - "learning_rate": 1.0858908956045695e-05, - "loss": 0.9109, + "learning_rate": 5.063052128379667e-06, + "loss": 1.0233, "step": 17237 }, { - "epoch": 0.48848082972030943, + "epoch": 0.6744659206510681, "grad_norm": 0.0, - "learning_rate": 1.0857994554810069e-05, - "loss": 0.8954, + "learning_rate": 5.06195013918493e-06, + "loss": 0.9742, "step": 17238 }, { - "epoch": 0.4885091671625719, + "epoch": 0.6745050473432976, "grad_norm": 0.0, - "learning_rate": 1.0857080146347236e-05, - "loss": 0.9345, + "learning_rate": 5.06084822928909e-06, + "loss": 1.0118, "step": 17239 }, { - "epoch": 0.48853750460483436, + "epoch": 0.674544174035527, "grad_norm": 0.0, - "learning_rate": 1.0856165730664898e-05, - "loss": 0.9095, + "learning_rate": 5.059746398709834e-06, + "loss": 1.1313, "step": 17240 }, { - "epoch": 0.48856584204709685, + "epoch": 0.6745833007277565, "grad_norm": 0.0, - "learning_rate": 1.085525130777076e-05, - "loss": 0.8784, + "learning_rate": 5.058644647464861e-06, + "loss": 0.8322, "step": 17241 }, { - "epoch": 0.4885941794893593, + "epoch": 0.6746224274199859, "grad_norm": 0.0, - "learning_rate": 1.0854336877672525e-05, - "loss": 0.8925, + "learning_rate": 5.057542975571862e-06, + "loss": 0.9708, "step": 17242 }, { - "epoch": 0.48862251693162173, + "epoch": 0.6746615541122154, "grad_norm": 0.0, - "learning_rate": 1.0853422440377888e-05, - "loss": 0.9379, + "learning_rate": 5.056441383048534e-06, + "loss": 1.0629, "step": 17243 }, { - "epoch": 0.4886508543738842, + "epoch": 0.6747006808044448, "grad_norm": 0.0, - "learning_rate": 1.0852507995894558e-05, - "loss": 0.9288, + "learning_rate": 5.055339869912559e-06, + "loss": 1.0075, "step": 17244 }, { - "epoch": 0.48867919181614666, + "epoch": 0.6747398074966743, "grad_norm": 0.0, - "learning_rate": 1.085159354423024e-05, - "loss": 0.8362, + "learning_rate": 5.054238436181629e-06, + "loss": 0.952, "step": 17245 }, { - "epoch": 0.48870752925840916, + "epoch": 0.6747789341889037, "grad_norm": 0.0, - "learning_rate": 1.085067908539263e-05, - "loss": 0.8746, + "learning_rate": 5.0531370818734325e-06, + "loss": 0.9767, "step": 17246 }, { - "epoch": 0.4887358667006716, + "epoch": 0.6748180608811332, "grad_norm": 0.0, - "learning_rate": 1.0849764619389436e-05, - "loss": 0.9458, + "learning_rate": 5.05203580700566e-06, + "loss": 0.9413, "step": 17247 }, { - "epoch": 0.48876420414293403, + "epoch": 0.6748571875733625, "grad_norm": 0.0, - "learning_rate": 1.0848850146228356e-05, - "loss": 0.9335, + "learning_rate": 5.050934611595988e-06, + "loss": 0.9496, "step": 17248 }, { - "epoch": 0.4887925415851965, + "epoch": 0.6748963142655919, "grad_norm": 0.0, - "learning_rate": 1.08479356659171e-05, - "loss": 0.8819, + "learning_rate": 5.049833495662106e-06, + "loss": 1.0189, "step": 17249 }, { - "epoch": 0.48882087902745897, + "epoch": 0.6749354409578214, "grad_norm": 0.0, - "learning_rate": 1.0847021178463366e-05, - "loss": 0.8785, + "learning_rate": 5.048732459221693e-06, + "loss": 0.9406, "step": 17250 }, { - "epoch": 0.48884921646972146, + "epoch": 0.6749745676500508, "grad_norm": 0.0, - "learning_rate": 1.0846106683874858e-05, - "loss": 0.8412, + "learning_rate": 5.047631502292438e-06, + "loss": 0.9973, "step": 17251 }, { - "epoch": 0.4888775539119839, + "epoch": 0.6750136943422803, "grad_norm": 0.0, - "learning_rate": 1.0845192182159276e-05, - "loss": 0.8041, + "learning_rate": 5.0465306248920096e-06, + "loss": 1.0463, "step": 17252 }, { - "epoch": 0.4889058913542464, + "epoch": 0.6750528210345097, "grad_norm": 0.0, - "learning_rate": 1.0844277673324328e-05, - "loss": 0.8458, + "learning_rate": 5.045429827038099e-06, + "loss": 0.9252, "step": 17253 }, { - "epoch": 0.48893422879650883, + "epoch": 0.6750919477267392, "grad_norm": 0.0, - "learning_rate": 1.0843363157377718e-05, - "loss": 0.9847, + "learning_rate": 5.0443291087483674e-06, + "loss": 1.097, "step": 17254 }, { - "epoch": 0.48896256623877127, + "epoch": 0.6751310744189686, "grad_norm": 0.0, - "learning_rate": 1.0842448634327146e-05, - "loss": 0.8435, + "learning_rate": 5.043228470040509e-06, + "loss": 0.9572, "step": 17255 }, { - "epoch": 0.48899090368103376, + "epoch": 0.6751702011111981, "grad_norm": 0.0, - "learning_rate": 1.0841534104180313e-05, - "loss": 0.8276, + "learning_rate": 5.042127910932185e-06, + "loss": 0.9374, "step": 17256 }, { - "epoch": 0.4890192411232962, + "epoch": 0.6752093278034275, "grad_norm": 0.0, - "learning_rate": 1.084061956694493e-05, - "loss": 0.9108, + "learning_rate": 5.041027431441079e-06, + "loss": 0.972, "step": 17257 }, { - "epoch": 0.4890475785655587, + "epoch": 0.675248454495657, "grad_norm": 0.0, - "learning_rate": 1.0839705022628698e-05, - "loss": 0.9379, + "learning_rate": 5.039927031584854e-06, + "loss": 0.9799, "step": 17258 }, { - "epoch": 0.48907591600782113, + "epoch": 0.6752875811878863, "grad_norm": 0.0, - "learning_rate": 1.0838790471239314e-05, - "loss": 0.8899, + "learning_rate": 5.038826711381186e-06, + "loss": 0.9425, "step": 17259 }, { - "epoch": 0.48910425345008357, + "epoch": 0.6753267078801158, "grad_norm": 0.0, - "learning_rate": 1.0837875912784486e-05, - "loss": 0.8185, + "learning_rate": 5.037726470847746e-06, + "loss": 1.0659, "step": 17260 }, { - "epoch": 0.48913259089234606, + "epoch": 0.6753658345723452, "grad_norm": 0.0, - "learning_rate": 1.0836961347271919e-05, - "loss": 0.8708, + "learning_rate": 5.0366263100022016e-06, + "loss": 1.0225, "step": 17261 }, { - "epoch": 0.4891609283346085, + "epoch": 0.6754049612645747, "grad_norm": 0.0, - "learning_rate": 1.0836046774709319e-05, - "loss": 0.8667, + "learning_rate": 5.035526228862218e-06, + "loss": 1.0841, "step": 17262 }, { - "epoch": 0.489189265776871, + "epoch": 0.6754440879568041, "grad_norm": 0.0, - "learning_rate": 1.0835132195104385e-05, - "loss": 0.9152, + "learning_rate": 5.0344262274454605e-06, + "loss": 0.9083, "step": 17263 }, { - "epoch": 0.48921760321913343, + "epoch": 0.6754832146490336, "grad_norm": 0.0, - "learning_rate": 1.0834217608464819e-05, - "loss": 0.8601, + "learning_rate": 5.0333263057696e-06, + "loss": 0.9651, "step": 17264 }, { - "epoch": 0.48924594066139593, + "epoch": 0.675522341341263, "grad_norm": 0.0, - "learning_rate": 1.0833303014798333e-05, - "loss": 0.8833, + "learning_rate": 5.032226463852292e-06, + "loss": 0.9941, "step": 17265 }, { - "epoch": 0.48927427810365837, + "epoch": 0.6755614680334925, "grad_norm": 0.0, - "learning_rate": 1.0832388414112623e-05, - "loss": 0.9159, + "learning_rate": 5.031126701711202e-06, + "loss": 0.8534, "step": 17266 }, { - "epoch": 0.4893026155459208, + "epoch": 0.6756005947257219, "grad_norm": 0.0, - "learning_rate": 1.0831473806415397e-05, - "loss": 0.8924, + "learning_rate": 5.03002701936399e-06, + "loss": 1.1068, "step": 17267 }, { - "epoch": 0.4893309529881833, + "epoch": 0.6756397214179514, "grad_norm": 0.0, - "learning_rate": 1.0830559191714358e-05, - "loss": 0.855, + "learning_rate": 5.028927416828321e-06, + "loss": 0.9538, "step": 17268 }, { - "epoch": 0.48935929043044574, + "epoch": 0.6756788481101808, "grad_norm": 0.0, - "learning_rate": 1.0829644570017213e-05, - "loss": 0.9471, + "learning_rate": 5.027827894121844e-06, + "loss": 0.8513, "step": 17269 }, { - "epoch": 0.48938762787270823, + "epoch": 0.6757179748024102, "grad_norm": 0.0, - "learning_rate": 1.0828729941331664e-05, - "loss": 0.9232, + "learning_rate": 5.026728451262225e-06, + "loss": 1.0269, "step": 17270 }, { - "epoch": 0.48941596531497067, + "epoch": 0.6757571014946396, "grad_norm": 0.0, - "learning_rate": 1.0827815305665413e-05, - "loss": 0.8768, + "learning_rate": 5.025629088267111e-06, + "loss": 0.947, "step": 17271 }, { - "epoch": 0.4894443027572331, + "epoch": 0.6757962281868691, "grad_norm": 0.0, - "learning_rate": 1.0826900663026166e-05, - "loss": 0.836, + "learning_rate": 5.024529805154161e-06, + "loss": 1.1027, "step": 17272 }, { - "epoch": 0.4894726401994956, + "epoch": 0.6758353548790985, "grad_norm": 0.0, - "learning_rate": 1.0825986013421626e-05, - "loss": 0.8697, + "learning_rate": 5.023430601941029e-06, + "loss": 0.8766, "step": 17273 }, { - "epoch": 0.48950097764175804, + "epoch": 0.675874481571328, "grad_norm": 0.0, - "learning_rate": 1.0825071356859502e-05, - "loss": 0.9321, + "learning_rate": 5.022331478645366e-06, + "loss": 0.9109, "step": 17274 }, { - "epoch": 0.48952931508402053, + "epoch": 0.6759136082635574, "grad_norm": 0.0, - "learning_rate": 1.0824156693347496e-05, - "loss": 0.8748, + "learning_rate": 5.021232435284821e-06, + "loss": 0.9417, "step": 17275 }, { - "epoch": 0.48955765252628297, + "epoch": 0.6759527349557868, "grad_norm": 0.0, - "learning_rate": 1.082324202289331e-05, - "loss": 0.9196, + "learning_rate": 5.020133471877044e-06, + "loss": 1.1399, "step": 17276 }, { - "epoch": 0.48958598996854547, + "epoch": 0.6759918616480163, "grad_norm": 0.0, - "learning_rate": 1.0822327345504651e-05, - "loss": 0.8873, + "learning_rate": 5.0190345884396815e-06, + "loss": 1.0128, "step": 17277 }, { - "epoch": 0.4896143274108079, + "epoch": 0.6760309883402457, "grad_norm": 0.0, - "learning_rate": 1.0821412661189225e-05, - "loss": 0.9608, + "learning_rate": 5.017935784990387e-06, + "loss": 0.9207, "step": 17278 }, { - "epoch": 0.48964266485307034, + "epoch": 0.6760701150324752, "grad_norm": 0.0, - "learning_rate": 1.0820497969954734e-05, - "loss": 0.9069, + "learning_rate": 5.0168370615467975e-06, + "loss": 1.0559, "step": 17279 }, { - "epoch": 0.48967100229533284, + "epoch": 0.6761092417247045, "grad_norm": 0.0, - "learning_rate": 1.0819583271808884e-05, - "loss": 0.8686, + "learning_rate": 5.015738418126565e-06, + "loss": 1.1141, "step": 17280 }, { - "epoch": 0.4896993397375953, + "epoch": 0.676148368416934, "grad_norm": 0.0, - "learning_rate": 1.0818668566759379e-05, - "loss": 0.8681, + "learning_rate": 5.014639854747319e-06, + "loss": 0.7852, "step": 17281 }, { - "epoch": 0.48972767717985777, + "epoch": 0.6761874951091634, "grad_norm": 0.0, - "learning_rate": 1.0817753854813926e-05, - "loss": 0.8337, + "learning_rate": 5.013541371426718e-06, + "loss": 1.0289, "step": 17282 }, { - "epoch": 0.4897560146221202, + "epoch": 0.6762266218013929, "grad_norm": 0.0, - "learning_rate": 1.0816839135980228e-05, - "loss": 0.8809, + "learning_rate": 5.0124429681823896e-06, + "loss": 0.909, "step": 17283 }, { - "epoch": 0.48978435206438264, + "epoch": 0.6762657484936223, "grad_norm": 0.0, - "learning_rate": 1.0815924410265992e-05, - "loss": 0.8994, + "learning_rate": 5.011344645031982e-06, + "loss": 1.0799, "step": 17284 }, { - "epoch": 0.48981268950664514, + "epoch": 0.6763048751858518, "grad_norm": 0.0, - "learning_rate": 1.0815009677678918e-05, - "loss": 0.8471, + "learning_rate": 5.010246401993121e-06, + "loss": 0.9809, "step": 17285 }, { - "epoch": 0.4898410269489076, + "epoch": 0.6763440018780812, "grad_norm": 0.0, - "learning_rate": 1.0814094938226716e-05, - "loss": 0.9181, + "learning_rate": 5.009148239083457e-06, + "loss": 0.8695, "step": 17286 }, { - "epoch": 0.48986936439117007, + "epoch": 0.6763831285703107, "grad_norm": 0.0, - "learning_rate": 1.0813180191917092e-05, - "loss": 1.0093, + "learning_rate": 5.008050156320615e-06, + "loss": 0.9719, "step": 17287 }, { - "epoch": 0.4898977018334325, + "epoch": 0.6764222552625401, "grad_norm": 0.0, - "learning_rate": 1.081226543875775e-05, - "loss": 0.8801, + "learning_rate": 5.006952153722236e-06, + "loss": 1.0092, "step": 17288 }, { - "epoch": 0.489926039275695, + "epoch": 0.6764613819547696, "grad_norm": 0.0, - "learning_rate": 1.0811350678756392e-05, - "loss": 0.9593, + "learning_rate": 5.005854231305945e-06, + "loss": 1.0005, "step": 17289 }, { - "epoch": 0.48995437671795744, + "epoch": 0.676500508646999, "grad_norm": 0.0, - "learning_rate": 1.081043591192073e-05, - "loss": 0.9033, + "learning_rate": 5.004756389089378e-06, + "loss": 0.9931, "step": 17290 }, { - "epoch": 0.4899827141602199, + "epoch": 0.6765396353392285, "grad_norm": 0.0, - "learning_rate": 1.080952113825846e-05, - "loss": 0.8568, + "learning_rate": 5.0036586270901624e-06, + "loss": 1.0562, "step": 17291 }, { - "epoch": 0.4900110516024824, + "epoch": 0.6765787620314578, "grad_norm": 0.0, - "learning_rate": 1.0808606357777296e-05, - "loss": 0.9063, + "learning_rate": 5.0025609453259335e-06, + "loss": 0.9028, "step": 17292 }, { - "epoch": 0.4900393890447448, + "epoch": 0.6766178887236873, "grad_norm": 0.0, - "learning_rate": 1.0807691570484937e-05, - "loss": 0.9449, + "learning_rate": 5.001463343814309e-06, + "loss": 0.8625, "step": 17293 }, { - "epoch": 0.4900677264870073, + "epoch": 0.6766570154159167, "grad_norm": 0.0, - "learning_rate": 1.0806776776389096e-05, - "loss": 0.733, + "learning_rate": 5.000365822572919e-06, + "loss": 0.9004, "step": 17294 }, { - "epoch": 0.49009606392926974, + "epoch": 0.6766961421081462, "grad_norm": 0.0, - "learning_rate": 1.0805861975497473e-05, - "loss": 0.9868, + "learning_rate": 4.999268381619391e-06, + "loss": 0.9569, "step": 17295 }, { - "epoch": 0.4901244013715322, + "epoch": 0.6767352688003756, "grad_norm": 0.0, - "learning_rate": 1.0804947167817778e-05, - "loss": 0.8842, + "learning_rate": 4.99817102097135e-06, + "loss": 1.0316, "step": 17296 }, { - "epoch": 0.4901527388137947, + "epoch": 0.6767743954926051, "grad_norm": 0.0, - "learning_rate": 1.080403235335771e-05, - "loss": 0.7911, + "learning_rate": 4.99707374064641e-06, + "loss": 1.0629, "step": 17297 }, { - "epoch": 0.4901810762560571, + "epoch": 0.6768135221848345, "grad_norm": 0.0, - "learning_rate": 1.0803117532124983e-05, - "loss": 0.9756, + "learning_rate": 4.9959765406622e-06, + "loss": 0.9746, "step": 17298 }, { - "epoch": 0.4902094136983196, + "epoch": 0.676852648877064, "grad_norm": 0.0, - "learning_rate": 1.0802202704127293e-05, - "loss": 0.8657, + "learning_rate": 4.994879421036329e-06, + "loss": 0.9936, "step": 17299 }, { - "epoch": 0.49023775114058205, + "epoch": 0.6768917755692934, "grad_norm": 0.0, - "learning_rate": 1.0801287869372356e-05, - "loss": 0.8402, + "learning_rate": 4.993782381786432e-06, + "loss": 1.0691, "step": 17300 }, { - "epoch": 0.49026608858284454, + "epoch": 0.6769309022615229, "grad_norm": 0.0, - "learning_rate": 1.0800373027867874e-05, - "loss": 0.8612, + "learning_rate": 4.992685422930111e-06, + "loss": 0.986, "step": 17301 }, { - "epoch": 0.490294426025107, + "epoch": 0.6769700289537522, "grad_norm": 0.0, - "learning_rate": 1.0799458179621552e-05, - "loss": 0.8791, + "learning_rate": 4.991588544484993e-06, + "loss": 1.009, "step": 17302 }, { - "epoch": 0.4903227634673694, + "epoch": 0.6770091556459817, "grad_norm": 0.0, - "learning_rate": 1.0798543324641096e-05, - "loss": 0.9011, + "learning_rate": 4.990491746468682e-06, + "loss": 0.8822, "step": 17303 }, { - "epoch": 0.4903511009096319, + "epoch": 0.6770482823382111, "grad_norm": 0.0, - "learning_rate": 1.0797628462934214e-05, - "loss": 0.7843, + "learning_rate": 4.989395028898797e-06, + "loss": 1.0365, "step": 17304 }, { - "epoch": 0.49037943835189435, + "epoch": 0.6770874090304405, "grad_norm": 0.0, - "learning_rate": 1.079671359450861e-05, - "loss": 0.8553, + "learning_rate": 4.98829839179295e-06, + "loss": 0.9078, "step": 17305 }, { - "epoch": 0.49040777579415684, + "epoch": 0.67712653572267, "grad_norm": 0.0, - "learning_rate": 1.079579871937199e-05, - "loss": 0.8313, + "learning_rate": 4.987201835168752e-06, + "loss": 1.1174, "step": 17306 }, { - "epoch": 0.4904361132364193, + "epoch": 0.6771656624148994, "grad_norm": 0.0, - "learning_rate": 1.0794883837532066e-05, - "loss": 0.9081, + "learning_rate": 4.98610535904381e-06, + "loss": 0.9492, "step": 17307 }, { - "epoch": 0.4904644506786817, + "epoch": 0.6772047891071289, "grad_norm": 0.0, - "learning_rate": 1.0793968948996538e-05, - "loss": 0.8114, + "learning_rate": 4.9850089634357325e-06, + "loss": 1.0425, "step": 17308 }, { - "epoch": 0.4904927881209442, + "epoch": 0.6772439157993583, "grad_norm": 0.0, - "learning_rate": 1.0793054053773118e-05, - "loss": 0.9049, + "learning_rate": 4.98391264836213e-06, + "loss": 0.8959, "step": 17309 }, { - "epoch": 0.49052112556320665, + "epoch": 0.6772830424915878, "grad_norm": 0.0, - "learning_rate": 1.0792139151869505e-05, - "loss": 0.7191, + "learning_rate": 4.982816413840601e-06, + "loss": 1.0288, "step": 17310 }, { - "epoch": 0.49054946300546914, + "epoch": 0.6773221691838172, "grad_norm": 0.0, - "learning_rate": 1.0791224243293412e-05, - "loss": 0.766, + "learning_rate": 4.9817202598887536e-06, + "loss": 1.0103, "step": 17311 }, { - "epoch": 0.4905778004477316, + "epoch": 0.6773612958760467, "grad_norm": 0.0, - "learning_rate": 1.0790309328052539e-05, - "loss": 0.9142, + "learning_rate": 4.980624186524191e-06, + "loss": 1.0039, "step": 17312 }, { - "epoch": 0.4906061378899941, + "epoch": 0.677400422568276, "grad_norm": 0.0, - "learning_rate": 1.0789394406154603e-05, - "loss": 0.8659, + "learning_rate": 4.979528193764518e-06, + "loss": 1.1285, "step": 17313 }, { - "epoch": 0.4906344753322565, + "epoch": 0.6774395492605055, "grad_norm": 0.0, - "learning_rate": 1.07884794776073e-05, - "loss": 1.0161, + "learning_rate": 4.978432281627328e-06, + "loss": 0.9448, "step": 17314 }, { - "epoch": 0.49066281277451895, + "epoch": 0.6774786759527349, "grad_norm": 0.0, - "learning_rate": 1.0787564542418346e-05, - "loss": 0.9418, + "learning_rate": 4.977336450130227e-06, + "loss": 0.9773, "step": 17315 }, { - "epoch": 0.49069115021678145, + "epoch": 0.6775178026449644, "grad_norm": 0.0, - "learning_rate": 1.0786649600595442e-05, - "loss": 0.8539, + "learning_rate": 4.976240699290799e-06, + "loss": 0.784, "step": 17316 }, { - "epoch": 0.4907194876590439, + "epoch": 0.6775569293371938, "grad_norm": 0.0, - "learning_rate": 1.0785734652146296e-05, - "loss": 0.8953, + "learning_rate": 4.97514502912666e-06, + "loss": 0.9745, "step": 17317 }, { - "epoch": 0.4907478251013064, + "epoch": 0.6775960560294233, "grad_norm": 0.0, - "learning_rate": 1.0784819697078614e-05, - "loss": 0.8865, + "learning_rate": 4.974049439655392e-06, + "loss": 1.0034, "step": 17318 }, { - "epoch": 0.4907761625435688, + "epoch": 0.6776351827216527, "grad_norm": 0.0, - "learning_rate": 1.0783904735400103e-05, - "loss": 0.9213, + "learning_rate": 4.972953930894595e-06, + "loss": 0.9635, "step": 17319 }, { - "epoch": 0.49080449998583126, + "epoch": 0.6776743094138822, "grad_norm": 0.0, - "learning_rate": 1.0782989767118475e-05, - "loss": 0.7867, + "learning_rate": 4.9718585028618546e-06, + "loss": 1.0857, "step": 17320 }, { - "epoch": 0.49083283742809375, + "epoch": 0.6777134361061116, "grad_norm": 0.0, - "learning_rate": 1.0782074792241432e-05, - "loss": 1.0515, + "learning_rate": 4.970763155574766e-06, + "loss": 0.9483, "step": 17321 }, { - "epoch": 0.4908611748703562, + "epoch": 0.6777525627983411, "grad_norm": 0.0, - "learning_rate": 1.0781159810776682e-05, - "loss": 0.8986, + "learning_rate": 4.96966788905092e-06, + "loss": 1.0316, "step": 17322 }, { - "epoch": 0.4908895123126187, + "epoch": 0.6777916894905704, "grad_norm": 0.0, - "learning_rate": 1.0780244822731935e-05, - "loss": 0.9046, + "learning_rate": 4.9685727033079066e-06, + "loss": 0.946, "step": 17323 }, { - "epoch": 0.4909178497548811, + "epoch": 0.6778308161828, "grad_norm": 0.0, - "learning_rate": 1.0779329828114895e-05, - "loss": 0.7873, + "learning_rate": 4.967477598363308e-06, + "loss": 1.109, "step": 17324 }, { - "epoch": 0.4909461871971436, + "epoch": 0.6778699428750293, "grad_norm": 0.0, - "learning_rate": 1.077841482693327e-05, - "loss": 0.8823, + "learning_rate": 4.966382574234714e-06, + "loss": 0.7799, "step": 17325 }, { - "epoch": 0.49097452463940605, + "epoch": 0.6779090695672588, "grad_norm": 0.0, - "learning_rate": 1.0777499819194766e-05, - "loss": 0.9085, + "learning_rate": 4.965287630939707e-06, + "loss": 1.0033, "step": 17326 }, { - "epoch": 0.4910028620816685, + "epoch": 0.6779481962594882, "grad_norm": 0.0, - "learning_rate": 1.0776584804907096e-05, - "loss": 0.9437, + "learning_rate": 4.964192768495876e-06, + "loss": 1.015, "step": 17327 }, { - "epoch": 0.491031199523931, + "epoch": 0.6779873229517177, "grad_norm": 0.0, - "learning_rate": 1.0775669784077961e-05, - "loss": 0.9459, + "learning_rate": 4.963097986920795e-06, + "loss": 0.9371, "step": 17328 }, { - "epoch": 0.4910595369661934, + "epoch": 0.6780264496439471, "grad_norm": 0.0, - "learning_rate": 1.0774754756715074e-05, - "loss": 0.9209, + "learning_rate": 4.9620032862320535e-06, + "loss": 1.0962, "step": 17329 }, { - "epoch": 0.4910878744084559, + "epoch": 0.6780655763361766, "grad_norm": 0.0, - "learning_rate": 1.0773839722826137e-05, - "loss": 0.8882, + "learning_rate": 4.960908666447217e-06, + "loss": 1.0708, "step": 17330 }, { - "epoch": 0.49111621185071835, + "epoch": 0.678104703028406, "grad_norm": 0.0, - "learning_rate": 1.0772924682418862e-05, - "loss": 0.8673, + "learning_rate": 4.9598141275838814e-06, + "loss": 0.9467, "step": 17331 }, { - "epoch": 0.4911445492929808, + "epoch": 0.6781438297206355, "grad_norm": 0.0, - "learning_rate": 1.0772009635500952e-05, - "loss": 0.9475, + "learning_rate": 4.958719669659612e-06, + "loss": 0.9818, "step": 17332 }, { - "epoch": 0.4911728867352433, + "epoch": 0.6781829564128649, "grad_norm": 0.0, - "learning_rate": 1.0771094582080124e-05, - "loss": 0.9059, + "learning_rate": 4.957625292691991e-06, + "loss": 1.0594, "step": 17333 }, { - "epoch": 0.4912012241775057, + "epoch": 0.6782220831050942, "grad_norm": 0.0, - "learning_rate": 1.0770179522164079e-05, - "loss": 0.9017, + "learning_rate": 4.956530996698581e-06, + "loss": 0.9853, "step": 17334 }, { - "epoch": 0.4912295616197682, + "epoch": 0.6782612097973237, "grad_norm": 0.0, - "learning_rate": 1.0769264455760521e-05, - "loss": 0.8299, + "learning_rate": 4.955436781696972e-06, + "loss": 0.8957, "step": 17335 }, { - "epoch": 0.49125789906203066, + "epoch": 0.6783003364895531, "grad_norm": 0.0, - "learning_rate": 1.0768349382877168e-05, - "loss": 0.9379, + "learning_rate": 4.954342647704723e-06, + "loss": 0.9689, "step": 17336 }, { - "epoch": 0.49128623650429315, + "epoch": 0.6783394631817826, "grad_norm": 0.0, - "learning_rate": 1.076743430352172e-05, - "loss": 0.8283, + "learning_rate": 4.953248594739412e-06, + "loss": 1.02, "step": 17337 }, { - "epoch": 0.4913145739465556, + "epoch": 0.678378589874012, "grad_norm": 0.0, - "learning_rate": 1.076651921770189e-05, - "loss": 0.9069, + "learning_rate": 4.952154622818601e-06, + "loss": 0.8723, "step": 17338 }, { - "epoch": 0.491342911388818, + "epoch": 0.6784177165662415, "grad_norm": 0.0, - "learning_rate": 1.0765604125425381e-05, - "loss": 0.9766, + "learning_rate": 4.951060731959861e-06, + "loss": 1.0371, "step": 17339 }, { - "epoch": 0.4913712488310805, + "epoch": 0.6784568432584709, "grad_norm": 0.0, - "learning_rate": 1.0764689026699909e-05, - "loss": 0.8276, + "learning_rate": 4.94996692218076e-06, + "loss": 1.008, "step": 17340 }, { - "epoch": 0.49139958627334296, + "epoch": 0.6784959699507004, "grad_norm": 0.0, - "learning_rate": 1.0763773921533174e-05, - "loss": 0.7896, + "learning_rate": 4.948873193498866e-06, + "loss": 1.0056, "step": 17341 }, { - "epoch": 0.49142792371560545, + "epoch": 0.6785350966429298, "grad_norm": 0.0, - "learning_rate": 1.076285880993289e-05, - "loss": 0.8667, + "learning_rate": 4.947779545931734e-06, + "loss": 0.9006, "step": 17342 }, { - "epoch": 0.4914562611578679, + "epoch": 0.6785742233351593, "grad_norm": 0.0, - "learning_rate": 1.0761943691906758e-05, - "loss": 0.8793, + "learning_rate": 4.946685979496933e-06, + "loss": 1.0063, "step": 17343 }, { - "epoch": 0.49148459860013033, + "epoch": 0.6786133500273887, "grad_norm": 0.0, - "learning_rate": 1.0761028567462492e-05, - "loss": 0.8285, + "learning_rate": 4.9455924942120215e-06, + "loss": 0.9276, "step": 17344 }, { - "epoch": 0.4915129360423928, + "epoch": 0.6786524767196181, "grad_norm": 0.0, - "learning_rate": 1.0760113436607804e-05, - "loss": 0.8767, + "learning_rate": 4.944499090094567e-06, + "loss": 1.006, "step": 17345 }, { - "epoch": 0.49154127348465526, + "epoch": 0.6786916034118475, "grad_norm": 0.0, - "learning_rate": 1.0759198299350398e-05, - "loss": 0.9229, + "learning_rate": 4.943405767162116e-06, + "loss": 1.0035, "step": 17346 }, { - "epoch": 0.49156961092691775, + "epoch": 0.678730730104077, "grad_norm": 0.0, - "learning_rate": 1.075828315569798e-05, - "loss": 0.9792, + "learning_rate": 4.942312525432238e-06, + "loss": 0.8788, "step": 17347 }, { - "epoch": 0.4915979483691802, + "epoch": 0.6787698567963064, "grad_norm": 0.0, - "learning_rate": 1.0757368005658264e-05, - "loss": 0.9055, + "learning_rate": 4.941219364922478e-06, + "loss": 0.9974, "step": 17348 }, { - "epoch": 0.49162628581144263, + "epoch": 0.6788089834885359, "grad_norm": 0.0, - "learning_rate": 1.0756452849238955e-05, - "loss": 0.8993, + "learning_rate": 4.940126285650396e-06, + "loss": 1.0456, "step": 17349 }, { - "epoch": 0.4916546232537051, + "epoch": 0.6788481101807653, "grad_norm": 0.0, - "learning_rate": 1.075553768644776e-05, - "loss": 0.8857, + "learning_rate": 4.9390332876335466e-06, + "loss": 1.0011, "step": 17350 }, { - "epoch": 0.49168296069596756, + "epoch": 0.6788872368729948, "grad_norm": 0.0, - "learning_rate": 1.0754622517292393e-05, - "loss": 0.9618, + "learning_rate": 4.937940370889483e-06, + "loss": 0.9885, "step": 17351 }, { - "epoch": 0.49171129813823006, + "epoch": 0.6789263635652242, "grad_norm": 0.0, - "learning_rate": 1.0753707341780562e-05, - "loss": 0.8988, + "learning_rate": 4.936847535435753e-06, + "loss": 1.0711, "step": 17352 }, { - "epoch": 0.4917396355804925, + "epoch": 0.6789654902574537, "grad_norm": 0.0, - "learning_rate": 1.075279215991997e-05, - "loss": 0.7666, + "learning_rate": 4.935754781289904e-06, + "loss": 1.0162, "step": 17353 }, { - "epoch": 0.491767973022755, + "epoch": 0.6790046169496831, "grad_norm": 0.0, - "learning_rate": 1.0751876971718336e-05, - "loss": 1.0208, + "learning_rate": 4.934662108469489e-06, + "loss": 0.9864, "step": 17354 }, { - "epoch": 0.4917963104650174, + "epoch": 0.6790437436419126, "grad_norm": 0.0, - "learning_rate": 1.0750961777183357e-05, - "loss": 0.8627, + "learning_rate": 4.933569516992057e-06, + "loss": 0.944, "step": 17355 }, { - "epoch": 0.49182464790727987, + "epoch": 0.6790828703341419, "grad_norm": 0.0, - "learning_rate": 1.0750046576322752e-05, - "loss": 0.8737, + "learning_rate": 4.9324770068751456e-06, + "loss": 1.0633, "step": 17356 }, { - "epoch": 0.49185298534954236, + "epoch": 0.6791219970263714, "grad_norm": 0.0, - "learning_rate": 1.0749131369144224e-05, - "loss": 0.8668, + "learning_rate": 4.931384578136303e-06, + "loss": 0.9858, "step": 17357 }, { - "epoch": 0.4918813227918048, + "epoch": 0.6791611237186008, "grad_norm": 0.0, - "learning_rate": 1.0748216155655484e-05, - "loss": 0.9446, + "learning_rate": 4.930292230793078e-06, + "loss": 0.9083, "step": 17358 }, { - "epoch": 0.4919096602340673, + "epoch": 0.6792002504108303, "grad_norm": 0.0, - "learning_rate": 1.0747300935864245e-05, - "loss": 0.8693, + "learning_rate": 4.9291999648630025e-06, + "loss": 0.9829, "step": 17359 }, { - "epoch": 0.49193799767632973, + "epoch": 0.6792393771030597, "grad_norm": 0.0, - "learning_rate": 1.0746385709778209e-05, - "loss": 0.989, + "learning_rate": 4.928107780363622e-06, + "loss": 1.1408, "step": 17360 }, { - "epoch": 0.49196633511859217, + "epoch": 0.6792785037952892, "grad_norm": 0.0, - "learning_rate": 1.0745470477405091e-05, - "loss": 0.8272, + "learning_rate": 4.927015677312474e-06, + "loss": 0.9582, "step": 17361 }, { - "epoch": 0.49199467256085466, + "epoch": 0.6793176304875186, "grad_norm": 0.0, - "learning_rate": 1.0744555238752598e-05, - "loss": 0.9955, + "learning_rate": 4.925923655727103e-06, + "loss": 0.9872, "step": 17362 }, { - "epoch": 0.4920230100031171, + "epoch": 0.679356757179748, "grad_norm": 0.0, - "learning_rate": 1.074363999382844e-05, - "loss": 0.8625, + "learning_rate": 4.924831715625035e-06, + "loss": 1.0173, "step": 17363 }, { - "epoch": 0.4920513474453796, + "epoch": 0.6793958838719775, "grad_norm": 0.0, - "learning_rate": 1.0742724742640323e-05, - "loss": 0.9642, + "learning_rate": 4.9237398570238135e-06, + "loss": 0.9489, "step": 17364 }, { - "epoch": 0.49207968488764203, + "epoch": 0.6794350105642069, "grad_norm": 0.0, - "learning_rate": 1.0741809485195962e-05, - "loss": 0.8623, + "learning_rate": 4.922648079940962e-06, + "loss": 0.9285, "step": 17365 }, { - "epoch": 0.4921080223299045, + "epoch": 0.6794741372564364, "grad_norm": 0.0, - "learning_rate": 1.0740894221503067e-05, - "loss": 0.9371, + "learning_rate": 4.921556384394028e-06, + "loss": 0.9895, "step": 17366 }, { - "epoch": 0.49213635977216696, + "epoch": 0.6795132639486657, "grad_norm": 0.0, - "learning_rate": 1.0739978951569343e-05, - "loss": 0.9463, + "learning_rate": 4.92046477040053e-06, + "loss": 0.8533, "step": 17367 }, { - "epoch": 0.4921646972144294, + "epoch": 0.6795523906408952, "grad_norm": 0.0, - "learning_rate": 1.0739063675402499e-05, - "loss": 0.9527, + "learning_rate": 4.919373237978007e-06, + "loss": 0.9772, "step": 17368 }, { - "epoch": 0.4921930346566919, + "epoch": 0.6795915173331246, "grad_norm": 0.0, - "learning_rate": 1.0738148393010251e-05, - "loss": 0.8923, + "learning_rate": 4.9182817871439804e-06, + "loss": 0.9536, "step": 17369 }, { - "epoch": 0.49222137209895434, + "epoch": 0.6796306440253541, "grad_norm": 0.0, - "learning_rate": 1.07372331044003e-05, - "loss": 0.8899, + "learning_rate": 4.917190417915979e-06, + "loss": 1.0517, "step": 17370 }, { - "epoch": 0.49224970954121683, + "epoch": 0.6796697707175835, "grad_norm": 0.0, - "learning_rate": 1.0736317809580365e-05, - "loss": 0.8317, + "learning_rate": 4.916099130311531e-06, + "loss": 0.9103, "step": 17371 }, { - "epoch": 0.49227804698347927, + "epoch": 0.679708897409813, "grad_norm": 0.0, - "learning_rate": 1.073540250855815e-05, - "loss": 0.8843, + "learning_rate": 4.915007924348165e-06, + "loss": 0.9294, "step": 17372 }, { - "epoch": 0.4923063844257417, + "epoch": 0.6797480241020424, "grad_norm": 0.0, - "learning_rate": 1.0734487201341368e-05, - "loss": 0.9796, + "learning_rate": 4.913916800043396e-06, + "loss": 0.9619, "step": 17373 }, { - "epoch": 0.4923347218680042, + "epoch": 0.6797871507942719, "grad_norm": 0.0, - "learning_rate": 1.0733571887937726e-05, - "loss": 0.8784, + "learning_rate": 4.91282575741475e-06, + "loss": 0.8942, "step": 17374 }, { - "epoch": 0.49236305931026664, + "epoch": 0.6798262774865013, "grad_norm": 0.0, - "learning_rate": 1.0732656568354938e-05, - "loss": 0.856, + "learning_rate": 4.911734796479747e-06, + "loss": 1.0087, "step": 17375 }, { - "epoch": 0.49239139675252913, + "epoch": 0.6798654041787308, "grad_norm": 0.0, - "learning_rate": 1.0731741242600709e-05, - "loss": 0.9035, + "learning_rate": 4.910643917255911e-06, + "loss": 0.9489, "step": 17376 }, { - "epoch": 0.49241973419479157, + "epoch": 0.6799045308709601, "grad_norm": 0.0, - "learning_rate": 1.0730825910682751e-05, - "loss": 0.8554, + "learning_rate": 4.909553119760753e-06, + "loss": 1.1925, "step": 17377 }, { - "epoch": 0.49244807163705406, + "epoch": 0.6799436575631896, "grad_norm": 0.0, - "learning_rate": 1.0729910572608776e-05, - "loss": 0.8431, + "learning_rate": 4.908462404011797e-06, + "loss": 0.9384, "step": 17378 }, { - "epoch": 0.4924764090793165, + "epoch": 0.679982784255419, "grad_norm": 0.0, - "learning_rate": 1.0728995228386496e-05, - "loss": 0.8955, + "learning_rate": 4.9073717700265465e-06, + "loss": 0.9736, "step": 17379 }, { - "epoch": 0.49250474652157894, + "epoch": 0.6800219109476485, "grad_norm": 0.0, - "learning_rate": 1.0728079878023617e-05, - "loss": 0.8156, + "learning_rate": 4.906281217822532e-06, + "loss": 0.9443, "step": 17380 }, { - "epoch": 0.49253308396384143, + "epoch": 0.6800610376398779, "grad_norm": 0.0, - "learning_rate": 1.0727164521527848e-05, - "loss": 0.8939, + "learning_rate": 4.905190747417256e-06, + "loss": 1.0862, "step": 17381 }, { - "epoch": 0.49256142140610387, + "epoch": 0.6801001643321074, "grad_norm": 0.0, - "learning_rate": 1.0726249158906908e-05, - "loss": 0.9092, + "learning_rate": 4.904100358828234e-06, + "loss": 1.0446, "step": 17382 }, { - "epoch": 0.49258975884836637, + "epoch": 0.6801392910243368, "grad_norm": 0.0, - "learning_rate": 1.0725333790168496e-05, - "loss": 0.9064, + "learning_rate": 4.9030100520729684e-06, + "loss": 0.9433, "step": 17383 }, { - "epoch": 0.4926180962906288, + "epoch": 0.6801784177165663, "grad_norm": 0.0, - "learning_rate": 1.0724418415320328e-05, - "loss": 0.8605, + "learning_rate": 4.901919827168982e-06, + "loss": 0.9173, "step": 17384 }, { - "epoch": 0.49264643373289124, + "epoch": 0.6802175444087957, "grad_norm": 0.0, - "learning_rate": 1.0723503034370117e-05, - "loss": 0.949, + "learning_rate": 4.90082968413377e-06, + "loss": 0.9151, "step": 17385 }, { - "epoch": 0.49267477117515374, + "epoch": 0.6802566711010252, "grad_norm": 0.0, - "learning_rate": 1.0722587647325573e-05, - "loss": 0.9611, + "learning_rate": 4.899739622984848e-06, + "loss": 1.0362, "step": 17386 }, { - "epoch": 0.4927031086174162, + "epoch": 0.6802957977932546, "grad_norm": 0.0, - "learning_rate": 1.0721672254194404e-05, - "loss": 0.8323, + "learning_rate": 4.898649643739714e-06, + "loss": 1.0514, "step": 17387 }, { - "epoch": 0.49273144605967867, + "epoch": 0.680334924485484, "grad_norm": 0.0, - "learning_rate": 1.0720756854984322e-05, - "loss": 0.7914, + "learning_rate": 4.897559746415873e-06, + "loss": 1.0725, "step": 17388 }, { - "epoch": 0.4927597835019411, + "epoch": 0.6803740511777134, "grad_norm": 0.0, - "learning_rate": 1.0719841449703035e-05, - "loss": 0.8883, + "learning_rate": 4.896469931030829e-06, + "loss": 1.097, "step": 17389 }, { - "epoch": 0.4927881209442036, + "epoch": 0.6804131778699428, "grad_norm": 0.0, - "learning_rate": 1.0718926038358256e-05, - "loss": 0.8657, + "learning_rate": 4.895380197602088e-06, + "loss": 1.0325, "step": 17390 }, { - "epoch": 0.49281645838646604, + "epoch": 0.6804523045621723, "grad_norm": 0.0, - "learning_rate": 1.0718010620957697e-05, - "loss": 0.8958, + "learning_rate": 4.894290546147139e-06, + "loss": 1.0509, "step": 17391 }, { - "epoch": 0.4928447958287285, + "epoch": 0.6804914312544017, "grad_norm": 0.0, - "learning_rate": 1.0717095197509068e-05, - "loss": 0.91, + "learning_rate": 4.893200976683486e-06, + "loss": 0.9972, "step": 17392 }, { - "epoch": 0.49287313327099097, + "epoch": 0.6805305579466312, "grad_norm": 0.0, - "learning_rate": 1.071617976802008e-05, - "loss": 0.9572, + "learning_rate": 4.892111489228628e-06, + "loss": 0.9764, "step": 17393 }, { - "epoch": 0.4929014707132534, + "epoch": 0.6805696846388606, "grad_norm": 0.0, - "learning_rate": 1.0715264332498445e-05, - "loss": 0.9041, + "learning_rate": 4.891022083800061e-06, + "loss": 0.9979, "step": 17394 }, { - "epoch": 0.4929298081555159, + "epoch": 0.6806088113310901, "grad_norm": 0.0, - "learning_rate": 1.0714348890951871e-05, - "loss": 0.8568, + "learning_rate": 4.889932760415275e-06, + "loss": 1.0298, "step": 17395 }, { - "epoch": 0.49295814559777834, + "epoch": 0.6806479380233195, "grad_norm": 0.0, - "learning_rate": 1.071343344338807e-05, - "loss": 0.9041, + "learning_rate": 4.888843519091768e-06, + "loss": 1.0397, "step": 17396 }, { - "epoch": 0.4929864830400408, + "epoch": 0.680687064715549, "grad_norm": 0.0, - "learning_rate": 1.0712517989814754e-05, - "loss": 0.8568, + "learning_rate": 4.887754359847026e-06, + "loss": 1.0646, "step": 17397 }, { - "epoch": 0.4930148204823033, + "epoch": 0.6807261914077783, "grad_norm": 0.0, - "learning_rate": 1.071160253023964e-05, - "loss": 0.9727, + "learning_rate": 4.886665282698544e-06, + "loss": 0.9233, "step": 17398 }, { - "epoch": 0.4930431579245657, + "epoch": 0.6807653181000078, "grad_norm": 0.0, - "learning_rate": 1.071068706467043e-05, - "loss": 0.8282, + "learning_rate": 4.885576287663809e-06, + "loss": 0.8954, "step": 17399 }, { - "epoch": 0.4930714953668282, + "epoch": 0.6808044447922372, "grad_norm": 0.0, - "learning_rate": 1.070977159311484e-05, - "loss": 0.9776, + "learning_rate": 4.884487374760314e-06, + "loss": 1.0213, "step": 17400 }, { - "epoch": 0.49309983280909064, + "epoch": 0.6808435714844667, "grad_norm": 0.0, - "learning_rate": 1.0708856115580578e-05, - "loss": 0.966, + "learning_rate": 4.883398544005539e-06, + "loss": 1.0755, "step": 17401 }, { - "epoch": 0.49312817025135314, + "epoch": 0.6808826981766961, "grad_norm": 0.0, - "learning_rate": 1.070794063207536e-05, - "loss": 0.8448, + "learning_rate": 4.8823097954169705e-06, + "loss": 0.9655, "step": 17402 }, { - "epoch": 0.4931565076936156, + "epoch": 0.6809218248689256, "grad_norm": 0.0, - "learning_rate": 1.0707025142606893e-05, - "loss": 0.8852, + "learning_rate": 4.881221129012098e-06, + "loss": 1.0612, "step": 17403 }, { - "epoch": 0.493184845135878, + "epoch": 0.680960951561155, "grad_norm": 0.0, - "learning_rate": 1.0706109647182891e-05, - "loss": 0.9413, + "learning_rate": 4.880132544808397e-06, + "loss": 1.0278, "step": 17404 }, { - "epoch": 0.4932131825781405, + "epoch": 0.6810000782533845, "grad_norm": 0.0, - "learning_rate": 1.0705194145811066e-05, - "loss": 0.8838, + "learning_rate": 4.879044042823351e-06, + "loss": 0.9555, "step": 17405 }, { - "epoch": 0.49324152002040295, + "epoch": 0.6810392049456139, "grad_norm": 0.0, - "learning_rate": 1.0704278638499128e-05, - "loss": 0.8077, + "learning_rate": 4.877955623074441e-06, + "loss": 0.9614, "step": 17406 }, { - "epoch": 0.49326985746266544, + "epoch": 0.6810783316378434, "grad_norm": 0.0, - "learning_rate": 1.0703363125254792e-05, - "loss": 0.8053, + "learning_rate": 4.876867285579149e-06, + "loss": 1.0917, "step": 17407 }, { - "epoch": 0.4932981949049279, + "epoch": 0.6811174583300728, "grad_norm": 0.0, - "learning_rate": 1.0702447606085767e-05, - "loss": 0.7859, + "learning_rate": 4.875779030354946e-06, + "loss": 1.0595, "step": 17408 }, { - "epoch": 0.4933265323471903, + "epoch": 0.6811565850223023, "grad_norm": 0.0, - "learning_rate": 1.0701532080999762e-05, - "loss": 0.8795, + "learning_rate": 4.874690857419313e-06, + "loss": 0.946, "step": 17409 }, { - "epoch": 0.4933548697894528, + "epoch": 0.6811957117145316, "grad_norm": 0.0, - "learning_rate": 1.0700616550004492e-05, - "loss": 0.8985, + "learning_rate": 4.873602766789715e-06, + "loss": 1.0928, "step": 17410 }, { - "epoch": 0.49338320723171525, + "epoch": 0.6812348384067611, "grad_norm": 0.0, - "learning_rate": 1.069970101310767e-05, - "loss": 0.9544, + "learning_rate": 4.872514758483642e-06, + "loss": 1.0463, "step": 17411 }, { - "epoch": 0.49341154467397774, + "epoch": 0.6812739650989905, "grad_norm": 0.0, - "learning_rate": 1.0698785470317008e-05, - "loss": 0.8958, + "learning_rate": 4.871426832518552e-06, + "loss": 0.8758, "step": 17412 }, { - "epoch": 0.4934398821162402, + "epoch": 0.68131309179122, "grad_norm": 0.0, - "learning_rate": 1.0697869921640216e-05, - "loss": 0.9889, + "learning_rate": 4.870338988911924e-06, + "loss": 0.9112, "step": 17413 }, { - "epoch": 0.4934682195585027, + "epoch": 0.6813522184834494, "grad_norm": 0.0, - "learning_rate": 1.0696954367085004e-05, - "loss": 0.7845, + "learning_rate": 4.869251227681221e-06, + "loss": 0.9284, "step": 17414 }, { - "epoch": 0.4934965570007651, + "epoch": 0.6813913451756789, "grad_norm": 0.0, - "learning_rate": 1.0696038806659087e-05, - "loss": 0.8928, + "learning_rate": 4.868163548843914e-06, + "loss": 1.0844, "step": 17415 }, { - "epoch": 0.49352489444302755, + "epoch": 0.6814304718679083, "grad_norm": 0.0, - "learning_rate": 1.0695123240370178e-05, - "loss": 0.8627, + "learning_rate": 4.867075952417469e-06, + "loss": 1.0234, "step": 17416 }, { - "epoch": 0.49355323188529004, + "epoch": 0.6814695985601378, "grad_norm": 0.0, - "learning_rate": 1.0694207668225989e-05, - "loss": 1.0263, + "learning_rate": 4.865988438419357e-06, + "loss": 0.9128, "step": 17417 }, { - "epoch": 0.4935815693275525, + "epoch": 0.6815087252523672, "grad_norm": 0.0, - "learning_rate": 1.0693292090234228e-05, - "loss": 0.952, + "learning_rate": 4.864901006867033e-06, + "loss": 0.851, "step": 17418 }, { - "epoch": 0.493609906769815, + "epoch": 0.6815478519445965, "grad_norm": 0.0, - "learning_rate": 1.0692376506402614e-05, - "loss": 0.9047, + "learning_rate": 4.863813657777965e-06, + "loss": 1.1227, "step": 17419 }, { - "epoch": 0.4936382442120774, + "epoch": 0.681586978636826, "grad_norm": 0.0, - "learning_rate": 1.0691460916738854e-05, - "loss": 0.898, + "learning_rate": 4.862726391169613e-06, + "loss": 0.9624, "step": 17420 }, { - "epoch": 0.49366658165433985, + "epoch": 0.6816261053290554, "grad_norm": 0.0, - "learning_rate": 1.069054532125066e-05, - "loss": 0.8164, + "learning_rate": 4.861639207059442e-06, + "loss": 0.9456, "step": 17421 }, { - "epoch": 0.49369491909660235, + "epoch": 0.6816652320212849, "grad_norm": 0.0, - "learning_rate": 1.0689629719945746e-05, - "loss": 0.8918, + "learning_rate": 4.8605521054649016e-06, + "loss": 1.0344, "step": 17422 }, { - "epoch": 0.4937232565388648, + "epoch": 0.6817043587135143, "grad_norm": 0.0, - "learning_rate": 1.0688714112831826e-05, - "loss": 0.8499, + "learning_rate": 4.859465086403457e-06, + "loss": 0.9344, "step": 17423 }, { - "epoch": 0.4937515939811273, + "epoch": 0.6817434854057438, "grad_norm": 0.0, - "learning_rate": 1.0687798499916613e-05, - "loss": 0.8918, + "learning_rate": 4.858378149892559e-06, + "loss": 0.9814, "step": 17424 }, { - "epoch": 0.4937799314233897, + "epoch": 0.6817826120979732, "grad_norm": 0.0, - "learning_rate": 1.0686882881207818e-05, - "loss": 0.8966, + "learning_rate": 4.85729129594967e-06, + "loss": 0.9363, "step": 17425 }, { - "epoch": 0.4938082688656522, + "epoch": 0.6818217387902027, "grad_norm": 0.0, - "learning_rate": 1.068596725671315e-05, - "loss": 0.9552, + "learning_rate": 4.856204524592234e-06, + "loss": 0.9448, "step": 17426 }, { - "epoch": 0.49383660630791465, + "epoch": 0.6818608654824321, "grad_norm": 0.0, - "learning_rate": 1.0685051626440328e-05, - "loss": 0.7915, + "learning_rate": 4.855117835837713e-06, + "loss": 1.1708, "step": 17427 }, { - "epoch": 0.4938649437501771, + "epoch": 0.6818999921746616, "grad_norm": 0.0, - "learning_rate": 1.0684135990397062e-05, - "loss": 0.953, + "learning_rate": 4.854031229703544e-06, + "loss": 0.945, "step": 17428 }, { - "epoch": 0.4938932811924396, + "epoch": 0.681939118866891, "grad_norm": 0.0, - "learning_rate": 1.068322034859106e-05, - "loss": 0.8438, + "learning_rate": 4.8529447062071935e-06, + "loss": 0.885, "step": 17429 }, { - "epoch": 0.493921618634702, + "epoch": 0.6819782455591205, "grad_norm": 0.0, - "learning_rate": 1.0682304701030044e-05, - "loss": 0.9071, + "learning_rate": 4.851858265366098e-06, + "loss": 0.867, "step": 17430 }, { - "epoch": 0.4939499560769645, + "epoch": 0.6820173722513498, "grad_norm": 0.0, - "learning_rate": 1.0681389047721722e-05, - "loss": 0.8487, + "learning_rate": 4.85077190719771e-06, + "loss": 1.1138, "step": 17431 }, { - "epoch": 0.49397829351922695, + "epoch": 0.6820564989435793, "grad_norm": 0.0, - "learning_rate": 1.0680473388673807e-05, - "loss": 0.9224, + "learning_rate": 4.849685631719465e-06, + "loss": 0.9921, "step": 17432 }, { - "epoch": 0.4940066309614894, + "epoch": 0.6820956256358087, "grad_norm": 0.0, - "learning_rate": 1.0679557723894009e-05, - "loss": 0.9199, + "learning_rate": 4.848599438948825e-06, + "loss": 0.8253, "step": 17433 }, { - "epoch": 0.4940349684037519, + "epoch": 0.6821347523280382, "grad_norm": 0.0, - "learning_rate": 1.0678642053390045e-05, - "loss": 0.8513, + "learning_rate": 4.847513328903217e-06, + "loss": 1.0114, "step": 17434 }, { - "epoch": 0.4940633058460143, + "epoch": 0.6821738790202676, "grad_norm": 0.0, - "learning_rate": 1.0677726377169628e-05, - "loss": 0.8946, + "learning_rate": 4.846427301600093e-06, + "loss": 0.9142, "step": 17435 }, { - "epoch": 0.4940916432882768, + "epoch": 0.6822130057124971, "grad_norm": 0.0, - "learning_rate": 1.0676810695240469e-05, - "loss": 0.9144, + "learning_rate": 4.845341357056885e-06, + "loss": 1.0898, "step": 17436 }, { - "epoch": 0.49411998073053925, + "epoch": 0.6822521324047265, "grad_norm": 0.0, - "learning_rate": 1.0675895007610285e-05, - "loss": 0.8893, + "learning_rate": 4.844255495291036e-06, + "loss": 1.0331, "step": 17437 }, { - "epoch": 0.49414831817280175, + "epoch": 0.682291259096956, "grad_norm": 0.0, - "learning_rate": 1.0674979314286782e-05, - "loss": 0.8888, + "learning_rate": 4.843169716319983e-06, + "loss": 0.9996, "step": 17438 }, { - "epoch": 0.4941766556150642, + "epoch": 0.6823303857891854, "grad_norm": 0.0, - "learning_rate": 1.0674063615277681e-05, - "loss": 0.881, + "learning_rate": 4.8420840201611665e-06, + "loss": 0.9609, "step": 17439 }, { - "epoch": 0.4942049930573266, + "epoch": 0.6823695124814149, "grad_norm": 0.0, - "learning_rate": 1.0673147910590691e-05, - "loss": 0.8971, + "learning_rate": 4.840998406832013e-06, + "loss": 1.0073, "step": 17440 }, { - "epoch": 0.4942333304995891, + "epoch": 0.6824086391736442, "grad_norm": 0.0, - "learning_rate": 1.0672232200233525e-05, - "loss": 0.9067, + "learning_rate": 4.839912876349961e-06, + "loss": 0.9587, "step": 17441 }, { - "epoch": 0.49426166794185156, + "epoch": 0.6824477658658737, "grad_norm": 0.0, - "learning_rate": 1.0671316484213899e-05, - "loss": 0.9216, + "learning_rate": 4.838827428732446e-06, + "loss": 0.9335, "step": 17442 }, { - "epoch": 0.49429000538411405, + "epoch": 0.6824868925581031, "grad_norm": 0.0, - "learning_rate": 1.0670400762539524e-05, - "loss": 0.9093, + "learning_rate": 4.837742063996891e-06, + "loss": 1.0422, "step": 17443 }, { - "epoch": 0.4943183428263765, + "epoch": 0.6825260192503326, "grad_norm": 0.0, - "learning_rate": 1.0669485035218114e-05, - "loss": 0.8804, + "learning_rate": 4.83665678216073e-06, + "loss": 0.973, "step": 17444 }, { - "epoch": 0.4943466802686389, + "epoch": 0.682565145942562, "grad_norm": 0.0, - "learning_rate": 1.0668569302257385e-05, - "loss": 0.9759, + "learning_rate": 4.835571583241395e-06, + "loss": 0.916, "step": 17445 }, { - "epoch": 0.4943750177109014, + "epoch": 0.6826042726347915, "grad_norm": 0.0, - "learning_rate": 1.0667653563665049e-05, - "loss": 0.8889, + "learning_rate": 4.834486467256306e-06, + "loss": 0.9285, "step": 17446 }, { - "epoch": 0.49440335515316386, + "epoch": 0.6826433993270209, "grad_norm": 0.0, - "learning_rate": 1.0666737819448816e-05, - "loss": 0.9833, + "learning_rate": 4.83340143422289e-06, + "loss": 0.9804, "step": 17447 }, { - "epoch": 0.49443169259542635, + "epoch": 0.6826825260192503, "grad_norm": 0.0, - "learning_rate": 1.0665822069616404e-05, - "loss": 0.8474, + "learning_rate": 4.832316484158577e-06, + "loss": 0.8599, "step": 17448 }, { - "epoch": 0.4944600300376888, + "epoch": 0.6827216527114798, "grad_norm": 0.0, - "learning_rate": 1.0664906314175525e-05, - "loss": 0.8934, + "learning_rate": 4.831231617080783e-06, + "loss": 1.0212, "step": 17449 }, { - "epoch": 0.4944883674799513, + "epoch": 0.6827607794037092, "grad_norm": 0.0, - "learning_rate": 1.0663990553133896e-05, - "loss": 0.9157, + "learning_rate": 4.830146833006931e-06, + "loss": 0.9833, "step": 17450 }, { - "epoch": 0.4945167049222137, + "epoch": 0.6827999060959387, "grad_norm": 0.0, - "learning_rate": 1.0663074786499223e-05, - "loss": 0.9613, + "learning_rate": 4.829062131954444e-06, + "loss": 0.8878, "step": 17451 }, { - "epoch": 0.49454504236447616, + "epoch": 0.682839032788168, "grad_norm": 0.0, - "learning_rate": 1.066215901427923e-05, - "loss": 0.8685, + "learning_rate": 4.827977513940742e-06, + "loss": 1.0077, "step": 17452 }, { - "epoch": 0.49457337980673866, + "epoch": 0.6828781594803975, "grad_norm": 0.0, - "learning_rate": 1.0661243236481624e-05, - "loss": 1.0286, + "learning_rate": 4.826892978983238e-06, + "loss": 1.0898, "step": 17453 }, { - "epoch": 0.4946017172490011, + "epoch": 0.6829172861726269, "grad_norm": 0.0, - "learning_rate": 1.0660327453114118e-05, - "loss": 0.8534, + "learning_rate": 4.8258085270993525e-06, + "loss": 0.9717, "step": 17454 }, { - "epoch": 0.4946300546912636, + "epoch": 0.6829564128648564, "grad_norm": 0.0, - "learning_rate": 1.065941166418443e-05, - "loss": 0.8701, + "learning_rate": 4.824724158306492e-06, + "loss": 1.1072, "step": 17455 }, { - "epoch": 0.494658392133526, + "epoch": 0.6829955395570858, "grad_norm": 0.0, - "learning_rate": 1.0658495869700273e-05, - "loss": 0.8958, + "learning_rate": 4.823639872622084e-06, + "loss": 1.0188, "step": 17456 }, { - "epoch": 0.49468672957578846, + "epoch": 0.6830346662493153, "grad_norm": 0.0, - "learning_rate": 1.0657580069669363e-05, - "loss": 0.8297, + "learning_rate": 4.82255567006353e-06, + "loss": 0.8348, "step": 17457 }, { - "epoch": 0.49471506701805096, + "epoch": 0.6830737929415447, "grad_norm": 0.0, - "learning_rate": 1.065666426409941e-05, - "loss": 0.9035, + "learning_rate": 4.821471550648247e-06, + "loss": 1.0245, "step": 17458 }, { - "epoch": 0.4947434044603134, + "epoch": 0.6831129196337742, "grad_norm": 0.0, - "learning_rate": 1.065574845299813e-05, - "loss": 0.8858, + "learning_rate": 4.8203875143936355e-06, + "loss": 0.9897, "step": 17459 }, { - "epoch": 0.4947717419025759, + "epoch": 0.6831520463260036, "grad_norm": 0.0, - "learning_rate": 1.0654832636373239e-05, - "loss": 0.9017, + "learning_rate": 4.819303561317117e-06, + "loss": 0.9659, "step": 17460 }, { - "epoch": 0.49480007934483833, + "epoch": 0.6831911730182331, "grad_norm": 0.0, - "learning_rate": 1.0653916814232445e-05, - "loss": 0.9495, + "learning_rate": 4.818219691436087e-06, + "loss": 0.9798, "step": 17461 }, { - "epoch": 0.4948284167871008, + "epoch": 0.6832302997104625, "grad_norm": 0.0, - "learning_rate": 1.0653000986583471e-05, - "loss": 0.9503, + "learning_rate": 4.81713590476796e-06, + "loss": 0.9647, "step": 17462 }, { - "epoch": 0.49485675422936326, + "epoch": 0.683269426402692, "grad_norm": 0.0, - "learning_rate": 1.0652085153434025e-05, - "loss": 0.892, + "learning_rate": 4.816052201330133e-06, + "loss": 0.9877, "step": 17463 }, { - "epoch": 0.4948850916716257, + "epoch": 0.6833085530949213, "grad_norm": 0.0, - "learning_rate": 1.0651169314791825e-05, - "loss": 0.9841, + "learning_rate": 4.81496858114001e-06, + "loss": 0.978, "step": 17464 }, { - "epoch": 0.4949134291138882, + "epoch": 0.6833476797871508, "grad_norm": 0.0, - "learning_rate": 1.0650253470664584e-05, - "loss": 0.9776, + "learning_rate": 4.813885044214996e-06, + "loss": 0.8413, "step": 17465 }, { - "epoch": 0.49494176655615063, + "epoch": 0.6833868064793802, "grad_norm": 0.0, - "learning_rate": 1.0649337621060018e-05, - "loss": 0.8262, + "learning_rate": 4.8128015905724926e-06, + "loss": 0.9865, "step": 17466 }, { - "epoch": 0.4949701039984131, + "epoch": 0.6834259331716097, "grad_norm": 0.0, - "learning_rate": 1.0648421765985837e-05, - "loss": 0.7948, + "learning_rate": 4.811718220229892e-06, + "loss": 1.0406, "step": 17467 }, { - "epoch": 0.49499844144067556, + "epoch": 0.6834650598638391, "grad_norm": 0.0, - "learning_rate": 1.0647505905449758e-05, - "loss": 0.8946, + "learning_rate": 4.8106349332045954e-06, + "loss": 0.8698, "step": 17468 }, { - "epoch": 0.495026778882938, + "epoch": 0.6835041865560686, "grad_norm": 0.0, - "learning_rate": 1.0646590039459499e-05, - "loss": 0.8517, + "learning_rate": 4.809551729513999e-06, + "loss": 0.9468, "step": 17469 }, { - "epoch": 0.4950551163252005, + "epoch": 0.683543313248298, "grad_norm": 0.0, - "learning_rate": 1.0645674168022772e-05, - "loss": 0.9438, + "learning_rate": 4.808468609175502e-06, + "loss": 0.9928, "step": 17470 }, { - "epoch": 0.49508345376746293, + "epoch": 0.6835824399405275, "grad_norm": 0.0, - "learning_rate": 1.0644758291147293e-05, - "loss": 0.9037, + "learning_rate": 4.807385572206491e-06, + "loss": 1.1273, "step": 17471 }, { - "epoch": 0.4951117912097254, + "epoch": 0.6836215666327569, "grad_norm": 0.0, - "learning_rate": 1.0643842408840772e-05, - "loss": 0.9014, + "learning_rate": 4.806302618624363e-06, + "loss": 0.9662, "step": 17472 }, { - "epoch": 0.49514012865198787, + "epoch": 0.6836606933249864, "grad_norm": 0.0, - "learning_rate": 1.064292652111093e-05, - "loss": 0.9068, + "learning_rate": 4.8052197484465e-06, + "loss": 1.0329, "step": 17473 }, { - "epoch": 0.49516846609425036, + "epoch": 0.6836998200172157, "grad_norm": 0.0, - "learning_rate": 1.0642010627965475e-05, - "loss": 0.7574, + "learning_rate": 4.8041369616903065e-06, + "loss": 1.0574, "step": 17474 }, { - "epoch": 0.4951968035365128, + "epoch": 0.6837389467094452, "grad_norm": 0.0, - "learning_rate": 1.0641094729412132e-05, - "loss": 0.9088, + "learning_rate": 4.803054258373158e-06, + "loss": 1.0226, "step": 17475 }, { - "epoch": 0.49522514097877524, + "epoch": 0.6837780734016746, "grad_norm": 0.0, - "learning_rate": 1.0640178825458605e-05, - "loss": 0.8445, + "learning_rate": 4.8019716385124505e-06, + "loss": 1.0336, "step": 17476 }, { - "epoch": 0.49525347842103773, + "epoch": 0.683817200093904, "grad_norm": 0.0, - "learning_rate": 1.0639262916112615e-05, - "loss": 0.9414, + "learning_rate": 4.800889102125558e-06, + "loss": 1.1031, "step": 17477 }, { - "epoch": 0.49528181586330017, + "epoch": 0.6838563267861335, "grad_norm": 0.0, - "learning_rate": 1.063834700138188e-05, - "loss": 0.8362, + "learning_rate": 4.799806649229878e-06, + "loss": 0.996, "step": 17478 }, { - "epoch": 0.49531015330556266, + "epoch": 0.6838954534783629, "grad_norm": 0.0, - "learning_rate": 1.0637431081274108e-05, - "loss": 0.8492, + "learning_rate": 4.798724279842783e-06, + "loss": 0.8871, "step": 17479 }, { - "epoch": 0.4953384907478251, + "epoch": 0.6839345801705924, "grad_norm": 0.0, - "learning_rate": 1.0636515155797018e-05, - "loss": 0.8403, + "learning_rate": 4.7976419939816635e-06, + "loss": 1.0629, "step": 17480 }, { - "epoch": 0.49536682819008754, + "epoch": 0.6839737068628218, "grad_norm": 0.0, - "learning_rate": 1.0635599224958321e-05, - "loss": 0.9441, + "learning_rate": 4.7965597916638895e-06, + "loss": 0.9837, "step": 17481 }, { - "epoch": 0.49539516563235003, + "epoch": 0.6840128335550513, "grad_norm": 0.0, - "learning_rate": 1.0634683288765741e-05, - "loss": 0.8686, + "learning_rate": 4.795477672906845e-06, + "loss": 0.9592, "step": 17482 }, { - "epoch": 0.49542350307461247, + "epoch": 0.6840519602472807, "grad_norm": 0.0, - "learning_rate": 1.0633767347226987e-05, - "loss": 0.9173, + "learning_rate": 4.794395637727909e-06, + "loss": 1.0703, "step": 17483 }, { - "epoch": 0.49545184051687496, + "epoch": 0.6840910869395102, "grad_norm": 0.0, - "learning_rate": 1.063285140034977e-05, - "loss": 0.9017, + "learning_rate": 4.793313686144458e-06, + "loss": 1.0504, "step": 17484 }, { - "epoch": 0.4954801779591374, + "epoch": 0.6841302136317395, "grad_norm": 0.0, - "learning_rate": 1.0631935448141817e-05, - "loss": 0.8445, + "learning_rate": 4.7922318181738625e-06, + "loss": 1.0138, "step": 17485 }, { - "epoch": 0.4955085154013999, + "epoch": 0.684169340323969, "grad_norm": 0.0, - "learning_rate": 1.0631019490610837e-05, - "loss": 0.9721, + "learning_rate": 4.7911500338334985e-06, + "loss": 0.9863, "step": 17486 }, { - "epoch": 0.49553685284366233, + "epoch": 0.6842084670161984, "grad_norm": 0.0, - "learning_rate": 1.0630103527764542e-05, - "loss": 0.9216, + "learning_rate": 4.790068333140741e-06, + "loss": 0.9929, "step": 17487 }, { - "epoch": 0.4955651902859248, + "epoch": 0.6842475937084279, "grad_norm": 0.0, - "learning_rate": 1.0629187559610649e-05, - "loss": 0.8376, + "learning_rate": 4.7889867161129534e-06, + "loss": 0.9487, "step": 17488 }, { - "epoch": 0.49559352772818727, + "epoch": 0.6842867204006573, "grad_norm": 0.0, - "learning_rate": 1.062827158615688e-05, - "loss": 0.9373, + "learning_rate": 4.787905182767511e-06, + "loss": 0.8453, "step": 17489 }, { - "epoch": 0.4956218651704497, + "epoch": 0.6843258470928868, "grad_norm": 0.0, - "learning_rate": 1.0627355607410948e-05, - "loss": 0.8044, + "learning_rate": 4.786823733121778e-06, + "loss": 1.0587, "step": 17490 }, { - "epoch": 0.4956502026127122, + "epoch": 0.6843649737851162, "grad_norm": 0.0, - "learning_rate": 1.0626439623380562e-05, - "loss": 0.8684, + "learning_rate": 4.785742367193128e-06, + "loss": 1.0907, "step": 17491 }, { - "epoch": 0.49567854005497464, + "epoch": 0.6844041004773457, "grad_norm": 0.0, - "learning_rate": 1.0625523634073445e-05, - "loss": 0.8942, + "learning_rate": 4.784661084998919e-06, + "loss": 0.9386, "step": 17492 }, { - "epoch": 0.4957068774972371, + "epoch": 0.6844432271695751, "grad_norm": 0.0, - "learning_rate": 1.062460763949731e-05, - "loss": 0.8695, + "learning_rate": 4.7835798865565205e-06, + "loss": 0.993, "step": 17493 }, { - "epoch": 0.49573521493949957, + "epoch": 0.6844823538618046, "grad_norm": 0.0, - "learning_rate": 1.062369163965987e-05, - "loss": 0.8266, + "learning_rate": 4.78249877188329e-06, + "loss": 0.9911, "step": 17494 }, { - "epoch": 0.495763552381762, + "epoch": 0.684521480554034, "grad_norm": 0.0, - "learning_rate": 1.0622775634568847e-05, - "loss": 0.9054, + "learning_rate": 4.7814177409965885e-06, + "loss": 0.9529, "step": 17495 }, { - "epoch": 0.4957918898240245, + "epoch": 0.6845606072462634, "grad_norm": 0.0, - "learning_rate": 1.0621859624231952e-05, - "loss": 0.8392, + "learning_rate": 4.780336793913781e-06, + "loss": 1.039, "step": 17496 }, { - "epoch": 0.49582022726628694, + "epoch": 0.6845997339384928, "grad_norm": 0.0, - "learning_rate": 1.0620943608656901e-05, - "loss": 0.88, + "learning_rate": 4.779255930652228e-06, + "loss": 1.028, "step": 17497 }, { - "epoch": 0.49584856470854943, + "epoch": 0.6846388606307223, "grad_norm": 0.0, - "learning_rate": 1.0620027587851417e-05, - "loss": 0.9736, + "learning_rate": 4.778175151229279e-06, + "loss": 1.0323, "step": 17498 }, { - "epoch": 0.49587690215081187, + "epoch": 0.6846779873229517, "grad_norm": 0.0, - "learning_rate": 1.0619111561823208e-05, - "loss": 0.9864, + "learning_rate": 4.777094455662292e-06, + "loss": 0.9438, "step": 17499 }, { - "epoch": 0.4959052395930743, + "epoch": 0.6847171140151812, "grad_norm": 0.0, - "learning_rate": 1.0618195530579989e-05, - "loss": 0.9095, + "learning_rate": 4.776013843968625e-06, + "loss": 0.9469, "step": 17500 }, { - "epoch": 0.4959335770353368, + "epoch": 0.6847562407074106, "grad_norm": 0.0, - "learning_rate": 1.061727949412948e-05, - "loss": 0.9478, + "learning_rate": 4.774933316165633e-06, + "loss": 0.9137, "step": 17501 }, { - "epoch": 0.49596191447759924, + "epoch": 0.6847953673996401, "grad_norm": 0.0, - "learning_rate": 1.0616363452479399e-05, - "loss": 0.8228, + "learning_rate": 4.773852872270661e-06, + "loss": 0.9817, "step": 17502 }, { - "epoch": 0.49599025191986174, + "epoch": 0.6848344940918695, "grad_norm": 0.0, - "learning_rate": 1.061544740563746e-05, - "loss": 0.9563, + "learning_rate": 4.772772512301066e-06, + "loss": 0.9268, "step": 17503 }, { - "epoch": 0.4960185893621242, + "epoch": 0.6848736207840989, "grad_norm": 0.0, - "learning_rate": 1.061453135361138e-05, - "loss": 0.927, + "learning_rate": 4.771692236274188e-06, + "loss": 1.0035, "step": 17504 }, { - "epoch": 0.4960469268043866, + "epoch": 0.6849127474763284, "grad_norm": 0.0, - "learning_rate": 1.061361529640887e-05, - "loss": 0.9205, + "learning_rate": 4.770612044207389e-06, + "loss": 0.9716, "step": 17505 }, { - "epoch": 0.4960752642466491, + "epoch": 0.6849518741685577, "grad_norm": 0.0, - "learning_rate": 1.0612699234037653e-05, - "loss": 0.926, + "learning_rate": 4.769531936118002e-06, + "loss": 1.0846, "step": 17506 }, { - "epoch": 0.49610360168891154, + "epoch": 0.6849910008607872, "grad_norm": 0.0, - "learning_rate": 1.061178316650544e-05, - "loss": 0.9459, + "learning_rate": 4.768451912023384e-06, + "loss": 0.9604, "step": 17507 }, { - "epoch": 0.49613193913117404, + "epoch": 0.6850301275530166, "grad_norm": 0.0, - "learning_rate": 1.0610867093819954e-05, - "loss": 0.8579, + "learning_rate": 4.767371971940864e-06, + "loss": 0.932, "step": 17508 }, { - "epoch": 0.4961602765734365, + "epoch": 0.6850692542452461, "grad_norm": 0.0, - "learning_rate": 1.0609951015988907e-05, - "loss": 0.8993, + "learning_rate": 4.766292115887801e-06, + "loss": 1.0408, "step": 17509 }, { - "epoch": 0.49618861401569897, + "epoch": 0.6851083809374755, "grad_norm": 0.0, - "learning_rate": 1.0609034933020015e-05, - "loss": 0.8738, + "learning_rate": 4.765212343881524e-06, + "loss": 1.0794, "step": 17510 }, { - "epoch": 0.4962169514579614, + "epoch": 0.685147507629705, "grad_norm": 0.0, - "learning_rate": 1.0608118844920996e-05, - "loss": 0.8857, + "learning_rate": 4.764132655939383e-06, + "loss": 0.937, "step": 17511 }, { - "epoch": 0.49624528890022385, + "epoch": 0.6851866343219344, "grad_norm": 0.0, - "learning_rate": 1.0607202751699568e-05, - "loss": 0.9263, + "learning_rate": 4.763053052078705e-06, + "loss": 0.9238, "step": 17512 }, { - "epoch": 0.49627362634248634, + "epoch": 0.6852257610141639, "grad_norm": 0.0, - "learning_rate": 1.0606286653363442e-05, - "loss": 0.8979, + "learning_rate": 4.761973532316834e-06, + "loss": 0.9038, "step": 17513 }, { - "epoch": 0.4963019637847488, + "epoch": 0.6852648877063933, "grad_norm": 0.0, - "learning_rate": 1.060537054992034e-05, - "loss": 0.7812, + "learning_rate": 4.7608940966711036e-06, + "loss": 0.954, "step": 17514 }, { - "epoch": 0.4963303012270113, + "epoch": 0.6853040143986228, "grad_norm": 0.0, - "learning_rate": 1.0604454441377978e-05, - "loss": 0.926, + "learning_rate": 4.759814745158853e-06, + "loss": 1.0707, "step": 17515 }, { - "epoch": 0.4963586386692737, + "epoch": 0.6853431410908521, "grad_norm": 0.0, - "learning_rate": 1.0603538327744071e-05, - "loss": 0.8643, + "learning_rate": 4.758735477797407e-06, + "loss": 0.944, "step": 17516 }, { - "epoch": 0.49638697611153615, + "epoch": 0.6853822677830816, "grad_norm": 0.0, - "learning_rate": 1.0602622209026336e-05, - "loss": 0.8835, + "learning_rate": 4.7576562946041025e-06, + "loss": 1.0922, "step": 17517 }, { - "epoch": 0.49641531355379864, + "epoch": 0.685421394475311, "grad_norm": 0.0, - "learning_rate": 1.0601706085232492e-05, - "loss": 0.9256, + "learning_rate": 4.756577195596268e-06, + "loss": 0.9562, "step": 17518 }, { - "epoch": 0.4964436509960611, + "epoch": 0.6854605211675405, "grad_norm": 0.0, - "learning_rate": 1.0600789956370254e-05, - "loss": 0.8222, + "learning_rate": 4.755498180791238e-06, + "loss": 0.9648, "step": 17519 }, { - "epoch": 0.4964719884383236, + "epoch": 0.6854996478597699, "grad_norm": 0.0, - "learning_rate": 1.0599873822447338e-05, - "loss": 0.7586, + "learning_rate": 4.754419250206331e-06, + "loss": 0.9978, "step": 17520 }, { - "epoch": 0.496500325880586, + "epoch": 0.6855387745519994, "grad_norm": 0.0, - "learning_rate": 1.059895768347146e-05, - "loss": 0.8725, + "learning_rate": 4.753340403858883e-06, + "loss": 0.9251, "step": 17521 }, { - "epoch": 0.4965286633228485, + "epoch": 0.6855779012442288, "grad_norm": 0.0, - "learning_rate": 1.0598041539450344e-05, - "loss": 0.8594, + "learning_rate": 4.7522616417662034e-06, + "loss": 1.0155, "step": 17522 }, { - "epoch": 0.49655700076511095, + "epoch": 0.6856170279364583, "grad_norm": 0.0, - "learning_rate": 1.0597125390391697e-05, - "loss": 0.8992, + "learning_rate": 4.7511829639456365e-06, + "loss": 1.0818, "step": 17523 }, { - "epoch": 0.4965853382073734, + "epoch": 0.6856561546286877, "grad_norm": 0.0, - "learning_rate": 1.0596209236303246e-05, - "loss": 0.9024, + "learning_rate": 4.750104370414489e-06, + "loss": 0.9892, "step": 17524 }, { - "epoch": 0.4966136756496359, + "epoch": 0.6856952813209172, "grad_norm": 0.0, - "learning_rate": 1.0595293077192699e-05, - "loss": 0.8262, + "learning_rate": 4.74902586119009e-06, + "loss": 0.9, "step": 17525 }, { - "epoch": 0.4966420130918983, + "epoch": 0.6857344080131466, "grad_norm": 0.0, - "learning_rate": 1.059437691306778e-05, - "loss": 0.8913, + "learning_rate": 4.747947436289753e-06, + "loss": 0.8787, "step": 17526 }, { - "epoch": 0.4966703505341608, + "epoch": 0.685773534705376, "grad_norm": 0.0, - "learning_rate": 1.0593460743936202e-05, - "loss": 0.908, + "learning_rate": 4.7468690957307985e-06, + "loss": 0.9619, "step": 17527 }, { - "epoch": 0.49669868797642325, + "epoch": 0.6858126613976054, "grad_norm": 0.0, - "learning_rate": 1.0592544569805685e-05, - "loss": 0.9221, + "learning_rate": 4.745790839530544e-06, + "loss": 1.0579, "step": 17528 }, { - "epoch": 0.4967270254186857, + "epoch": 0.6858517880898349, "grad_norm": 0.0, - "learning_rate": 1.0591628390683945e-05, - "loss": 0.8964, + "learning_rate": 4.7447126677063086e-06, + "loss": 1.0799, "step": 17529 }, { - "epoch": 0.4967553628609482, + "epoch": 0.6858909147820643, "grad_norm": 0.0, - "learning_rate": 1.0590712206578698e-05, - "loss": 0.8664, + "learning_rate": 4.743634580275398e-06, + "loss": 1.0433, "step": 17530 }, { - "epoch": 0.4967837003032106, + "epoch": 0.6859300414742938, "grad_norm": 0.0, - "learning_rate": 1.0589796017497665e-05, - "loss": 0.9329, + "learning_rate": 4.742556577255129e-06, + "loss": 1.0614, "step": 17531 }, { - "epoch": 0.4968120377454731, + "epoch": 0.6859691681665232, "grad_norm": 0.0, - "learning_rate": 1.0588879823448559e-05, - "loss": 0.8345, + "learning_rate": 4.741478658662819e-06, + "loss": 0.9561, "step": 17532 }, { - "epoch": 0.49684037518773555, + "epoch": 0.6860082948587526, "grad_norm": 0.0, - "learning_rate": 1.0587963624439099e-05, - "loss": 0.8843, + "learning_rate": 4.740400824515768e-06, + "loss": 1.0113, "step": 17533 }, { - "epoch": 0.49686871262999804, + "epoch": 0.6860474215509821, "grad_norm": 0.0, - "learning_rate": 1.0587047420477003e-05, - "loss": 0.8587, + "learning_rate": 4.739323074831289e-06, + "loss": 0.9178, "step": 17534 }, { - "epoch": 0.4968970500722605, + "epoch": 0.6860865482432115, "grad_norm": 0.0, - "learning_rate": 1.0586131211569992e-05, - "loss": 0.9409, + "learning_rate": 4.73824540962669e-06, + "loss": 0.8689, "step": 17535 }, { - "epoch": 0.4969253875145229, + "epoch": 0.686125674935441, "grad_norm": 0.0, - "learning_rate": 1.0585214997725778e-05, - "loss": 0.8979, + "learning_rate": 4.737167828919279e-06, + "loss": 0.912, "step": 17536 }, { - "epoch": 0.4969537249567854, + "epoch": 0.6861648016276704, "grad_norm": 0.0, - "learning_rate": 1.0584298778952082e-05, - "loss": 0.9922, + "learning_rate": 4.736090332726354e-06, + "loss": 1.0617, "step": 17537 }, { - "epoch": 0.49698206239904785, + "epoch": 0.6862039283198998, "grad_norm": 0.0, - "learning_rate": 1.0583382555256618e-05, - "loss": 0.8415, + "learning_rate": 4.735012921065228e-06, + "loss": 1.008, "step": 17538 }, { - "epoch": 0.49701039984131035, + "epoch": 0.6862430550121292, "grad_norm": 0.0, - "learning_rate": 1.058246632664711e-05, - "loss": 0.8996, + "learning_rate": 4.733935593953187e-06, + "loss": 1.0506, "step": 17539 }, { - "epoch": 0.4970387372835728, + "epoch": 0.6862821817043587, "grad_norm": 0.0, - "learning_rate": 1.0581550093131266e-05, - "loss": 0.8514, + "learning_rate": 4.732858351407551e-06, + "loss": 0.8296, "step": 17540 }, { - "epoch": 0.4970670747258352, + "epoch": 0.6863213083965881, "grad_norm": 0.0, - "learning_rate": 1.0580633854716814e-05, - "loss": 0.834, + "learning_rate": 4.7317811934456046e-06, + "loss": 0.9833, "step": 17541 }, { - "epoch": 0.4970954121680977, + "epoch": 0.6863604350888176, "grad_norm": 0.0, - "learning_rate": 1.0579717611411464e-05, - "loss": 0.8847, + "learning_rate": 4.730704120084656e-06, + "loss": 1.013, "step": 17542 }, { - "epoch": 0.49712374961036015, + "epoch": 0.686399561781047, "grad_norm": 0.0, - "learning_rate": 1.0578801363222941e-05, - "loss": 1.0428, + "learning_rate": 4.729627131341992e-06, + "loss": 0.8833, "step": 17543 }, { - "epoch": 0.49715208705262265, + "epoch": 0.6864386884732765, "grad_norm": 0.0, - "learning_rate": 1.0577885110158959e-05, - "loss": 0.8853, + "learning_rate": 4.728550227234912e-06, + "loss": 1.0055, "step": 17544 }, { - "epoch": 0.4971804244948851, + "epoch": 0.6864778151655059, "grad_norm": 0.0, - "learning_rate": 1.0576968852227236e-05, - "loss": 0.9236, + "learning_rate": 4.72747340778071e-06, + "loss": 0.9509, "step": 17545 }, { - "epoch": 0.4972087619371475, + "epoch": 0.6865169418577354, "grad_norm": 0.0, - "learning_rate": 1.0576052589435485e-05, - "loss": 0.9125, + "learning_rate": 4.7263966729966825e-06, + "loss": 1.05, "step": 17546 }, { - "epoch": 0.49723709937941, + "epoch": 0.6865560685499648, "grad_norm": 0.0, - "learning_rate": 1.0575136321791433e-05, - "loss": 0.9624, + "learning_rate": 4.725320022900112e-06, + "loss": 1.1117, "step": 17547 }, { - "epoch": 0.49726543682167246, + "epoch": 0.6865951952421943, "grad_norm": 0.0, - "learning_rate": 1.0574220049302795e-05, - "loss": 0.9387, + "learning_rate": 4.724243457508292e-06, + "loss": 0.9399, "step": 17548 }, { - "epoch": 0.49729377426393495, + "epoch": 0.6866343219344236, "grad_norm": 0.0, - "learning_rate": 1.057330377197729e-05, - "loss": 0.9585, + "learning_rate": 4.723166976838511e-06, + "loss": 0.8983, "step": 17549 }, { - "epoch": 0.4973221117061974, + "epoch": 0.6866734486266531, "grad_norm": 0.0, - "learning_rate": 1.0572387489822628e-05, - "loss": 0.9359, + "learning_rate": 4.722090580908061e-06, + "loss": 0.9237, "step": 17550 }, { - "epoch": 0.4973504491484599, + "epoch": 0.6867125753188825, "grad_norm": 0.0, - "learning_rate": 1.057147120284654e-05, - "loss": 0.9298, + "learning_rate": 4.721014269734218e-06, + "loss": 0.8389, "step": 17551 }, { - "epoch": 0.4973787865907223, + "epoch": 0.686751702011112, "grad_norm": 0.0, - "learning_rate": 1.0570554911056736e-05, - "loss": 1.0155, + "learning_rate": 4.719938043334276e-06, + "loss": 1.0909, "step": 17552 }, { - "epoch": 0.49740712403298476, + "epoch": 0.6867908287033414, "grad_norm": 0.0, - "learning_rate": 1.0569638614460936e-05, - "loss": 0.8203, + "learning_rate": 4.718861901725504e-06, + "loss": 0.9568, "step": 17553 }, { - "epoch": 0.49743546147524725, + "epoch": 0.6868299553955709, "grad_norm": 0.0, - "learning_rate": 1.0568722313066856e-05, - "loss": 0.894, + "learning_rate": 4.717785844925199e-06, + "loss": 0.9342, "step": 17554 }, { - "epoch": 0.4974637989175097, + "epoch": 0.6868690820878003, "grad_norm": 0.0, - "learning_rate": 1.056780600688222e-05, - "loss": 0.9687, + "learning_rate": 4.716709872950632e-06, + "loss": 0.963, "step": 17555 }, { - "epoch": 0.4974921363597722, + "epoch": 0.6869082087800298, "grad_norm": 0.0, - "learning_rate": 1.0566889695914741e-05, - "loss": 0.8007, + "learning_rate": 4.715633985819087e-06, + "loss": 0.9662, "step": 17556 }, { - "epoch": 0.4975204738020346, + "epoch": 0.6869473354722592, "grad_norm": 0.0, - "learning_rate": 1.0565973380172144e-05, - "loss": 0.9145, + "learning_rate": 4.7145581835478314e-06, + "loss": 0.9325, "step": 17557 }, { - "epoch": 0.49754881124429706, + "epoch": 0.6869864621644887, "grad_norm": 0.0, - "learning_rate": 1.0565057059662137e-05, - "loss": 0.9307, + "learning_rate": 4.713482466154155e-06, + "loss": 0.9532, "step": 17558 }, { - "epoch": 0.49757714868655956, + "epoch": 0.687025588856718, "grad_norm": 0.0, - "learning_rate": 1.0564140734392445e-05, - "loss": 0.9316, + "learning_rate": 4.7124068336553245e-06, + "loss": 1.0476, "step": 17559 }, { - "epoch": 0.497605486128822, + "epoch": 0.6870647155489475, "grad_norm": 0.0, - "learning_rate": 1.056322440437079e-05, - "loss": 0.8163, + "learning_rate": 4.711331286068616e-06, + "loss": 1.0747, "step": 17560 }, { - "epoch": 0.4976338235710845, + "epoch": 0.6871038422411769, "grad_norm": 0.0, - "learning_rate": 1.0562308069604886e-05, - "loss": 0.9517, + "learning_rate": 4.710255823411297e-06, + "loss": 1.0572, "step": 17561 }, { - "epoch": 0.4976621610133469, + "epoch": 0.6871429689334063, "grad_norm": 0.0, - "learning_rate": 1.056139173010245e-05, - "loss": 0.8337, + "learning_rate": 4.709180445700641e-06, + "loss": 0.889, "step": 17562 }, { - "epoch": 0.4976904984556094, + "epoch": 0.6871820956256358, "grad_norm": 0.0, - "learning_rate": 1.0560475385871202e-05, - "loss": 0.8225, + "learning_rate": 4.7081051529539166e-06, + "loss": 1.0018, "step": 17563 }, { - "epoch": 0.49771883589787186, + "epoch": 0.6872212223178652, "grad_norm": 0.0, - "learning_rate": 1.0559559036918867e-05, - "loss": 0.9908, + "learning_rate": 4.707029945188398e-06, + "loss": 0.7917, "step": 17564 }, { - "epoch": 0.4977471733401343, + "epoch": 0.6872603490100947, "grad_norm": 0.0, - "learning_rate": 1.0558642683253153e-05, - "loss": 0.898, + "learning_rate": 4.705954822421341e-06, + "loss": 1.1052, "step": 17565 }, { - "epoch": 0.4977755107823968, + "epoch": 0.6872994757023241, "grad_norm": 0.0, - "learning_rate": 1.0557726324881787e-05, - "loss": 0.8099, + "learning_rate": 4.704879784670015e-06, + "loss": 0.9526, "step": 17566 }, { - "epoch": 0.49780384822465923, + "epoch": 0.6873386023945536, "grad_norm": 0.0, - "learning_rate": 1.0556809961812484e-05, - "loss": 0.9101, + "learning_rate": 4.703804831951685e-06, + "loss": 1.0457, "step": 17567 }, { - "epoch": 0.4978321856669217, + "epoch": 0.687377729086783, "grad_norm": 0.0, - "learning_rate": 1.0555893594052965e-05, - "loss": 0.8746, + "learning_rate": 4.702729964283617e-06, + "loss": 1.0403, "step": 17568 }, { - "epoch": 0.49786052310918416, + "epoch": 0.6874168557790125, "grad_norm": 0.0, - "learning_rate": 1.055497722161095e-05, - "loss": 0.8614, + "learning_rate": 4.701655181683064e-06, + "loss": 0.8743, "step": 17569 }, { - "epoch": 0.4978888605514466, + "epoch": 0.6874559824712418, "grad_norm": 0.0, - "learning_rate": 1.0554060844494152e-05, - "loss": 0.9292, + "learning_rate": 4.700580484167293e-06, + "loss": 0.9422, "step": 17570 }, { - "epoch": 0.4979171979937091, + "epoch": 0.6874951091634713, "grad_norm": 0.0, - "learning_rate": 1.0553144462710293e-05, - "loss": 0.8956, + "learning_rate": 4.6995058717535555e-06, + "loss": 1.0045, "step": 17571 }, { - "epoch": 0.49794553543597153, + "epoch": 0.6875342358557007, "grad_norm": 0.0, - "learning_rate": 1.0552228076267094e-05, - "loss": 0.7231, + "learning_rate": 4.698431344459112e-06, + "loss": 0.859, "step": 17572 }, { - "epoch": 0.497973872878234, + "epoch": 0.6875733625479302, "grad_norm": 0.0, - "learning_rate": 1.0551311685172275e-05, - "loss": 0.8213, + "learning_rate": 4.6973569023012175e-06, + "loss": 1.0966, "step": 17573 }, { - "epoch": 0.49800221032049646, + "epoch": 0.6876124892401596, "grad_norm": 0.0, - "learning_rate": 1.0550395289433553e-05, - "loss": 0.7874, + "learning_rate": 4.696282545297131e-06, + "loss": 1.0472, "step": 17574 }, { - "epoch": 0.49803054776275896, + "epoch": 0.6876516159323891, "grad_norm": 0.0, - "learning_rate": 1.0549478889058644e-05, - "loss": 0.8441, + "learning_rate": 4.695208273464097e-06, + "loss": 1.0089, "step": 17575 }, { - "epoch": 0.4980588852050214, + "epoch": 0.6876907426246185, "grad_norm": 0.0, - "learning_rate": 1.0548562484055274e-05, - "loss": 0.9942, + "learning_rate": 4.6941340868193696e-06, + "loss": 0.8284, "step": 17576 }, { - "epoch": 0.49808722264728383, + "epoch": 0.687729869316848, "grad_norm": 0.0, - "learning_rate": 1.0547646074431155e-05, - "loss": 0.8937, + "learning_rate": 4.693059985380205e-06, + "loss": 1.0446, "step": 17577 }, { - "epoch": 0.4981155600895463, + "epoch": 0.6877689960090774, "grad_norm": 0.0, - "learning_rate": 1.0546729660194011e-05, - "loss": 0.8848, + "learning_rate": 4.691985969163844e-06, + "loss": 0.9649, "step": 17578 }, { - "epoch": 0.49814389753180877, + "epoch": 0.6878081227013069, "grad_norm": 0.0, - "learning_rate": 1.054581324135156e-05, - "loss": 0.8526, + "learning_rate": 4.690912038187535e-06, + "loss": 1.0157, "step": 17579 }, { - "epoch": 0.49817223497407126, + "epoch": 0.6878472493935363, "grad_norm": 0.0, - "learning_rate": 1.0544896817911521e-05, - "loss": 0.8616, + "learning_rate": 4.689838192468528e-06, + "loss": 0.9546, "step": 17580 }, { - "epoch": 0.4982005724163337, + "epoch": 0.6878863760857657, "grad_norm": 0.0, - "learning_rate": 1.0543980389881613e-05, - "loss": 0.9243, + "learning_rate": 4.688764432024068e-06, + "loss": 1.0516, "step": 17581 }, { - "epoch": 0.49822890985859614, + "epoch": 0.6879255027779951, "grad_norm": 0.0, - "learning_rate": 1.0543063957269558e-05, - "loss": 0.8728, + "learning_rate": 4.687690756871393e-06, + "loss": 0.9724, "step": 17582 }, { - "epoch": 0.49825724730085863, + "epoch": 0.6879646294702246, "grad_norm": 0.0, - "learning_rate": 1.0542147520083077e-05, - "loss": 0.8525, + "learning_rate": 4.686617167027751e-06, + "loss": 0.9028, "step": 17583 }, { - "epoch": 0.49828558474312107, + "epoch": 0.688003756162454, "grad_norm": 0.0, - "learning_rate": 1.0541231078329881e-05, - "loss": 0.9297, + "learning_rate": 4.685543662510371e-06, + "loss": 0.9283, "step": 17584 }, { - "epoch": 0.49831392218538356, + "epoch": 0.6880428828546835, "grad_norm": 0.0, - "learning_rate": 1.0540314632017694e-05, - "loss": 0.8715, + "learning_rate": 4.68447024333651e-06, + "loss": 0.9571, "step": 17585 }, { - "epoch": 0.498342259627646, + "epoch": 0.6880820095469129, "grad_norm": 0.0, - "learning_rate": 1.0539398181154239e-05, - "loss": 0.8381, + "learning_rate": 4.68339690952339e-06, + "loss": 0.9247, "step": 17586 }, { - "epoch": 0.4983705970699085, + "epoch": 0.6881211362391424, "grad_norm": 0.0, - "learning_rate": 1.0538481725747232e-05, - "loss": 0.944, + "learning_rate": 4.682323661088259e-06, + "loss": 0.943, "step": 17587 }, { - "epoch": 0.49839893451217093, + "epoch": 0.6881602629313718, "grad_norm": 0.0, - "learning_rate": 1.0537565265804392e-05, - "loss": 0.9611, + "learning_rate": 4.681250498048342e-06, + "loss": 1.0915, "step": 17588 }, { - "epoch": 0.49842727195443337, + "epoch": 0.6881993896236012, "grad_norm": 0.0, - "learning_rate": 1.0536648801333443e-05, - "loss": 0.8567, + "learning_rate": 4.6801774204208775e-06, + "loss": 0.9071, "step": 17589 }, { - "epoch": 0.49845560939669586, + "epoch": 0.6882385163158307, "grad_norm": 0.0, - "learning_rate": 1.0535732332342102e-05, - "loss": 0.9265, + "learning_rate": 4.679104428223098e-06, + "loss": 0.9737, "step": 17590 }, { - "epoch": 0.4984839468389583, + "epoch": 0.68827764300806, "grad_norm": 0.0, - "learning_rate": 1.0534815858838085e-05, - "loss": 0.94, + "learning_rate": 4.678031521472237e-06, + "loss": 1.006, "step": 17591 }, { - "epoch": 0.4985122842812208, + "epoch": 0.6883167697002895, "grad_norm": 0.0, - "learning_rate": 1.0533899380829116e-05, - "loss": 0.8855, + "learning_rate": 4.676958700185518e-06, + "loss": 1.1429, "step": 17592 }, { - "epoch": 0.49854062172348323, + "epoch": 0.6883558963925189, "grad_norm": 0.0, - "learning_rate": 1.0532982898322916e-05, - "loss": 0.76, + "learning_rate": 4.675885964380171e-06, + "loss": 0.9624, "step": 17593 }, { - "epoch": 0.4985689591657457, + "epoch": 0.6883950230847484, "grad_norm": 0.0, - "learning_rate": 1.0532066411327204e-05, - "loss": 0.8924, + "learning_rate": 4.6748133140734245e-06, + "loss": 0.9996, "step": 17594 }, { - "epoch": 0.49859729660800817, + "epoch": 0.6884341497769778, "grad_norm": 0.0, - "learning_rate": 1.0531149919849699e-05, - "loss": 0.8408, + "learning_rate": 4.673740749282507e-06, + "loss": 1.0997, "step": 17595 }, { - "epoch": 0.4986256340502706, + "epoch": 0.6884732764692073, "grad_norm": 0.0, - "learning_rate": 1.0530233423898118e-05, - "loss": 0.8632, + "learning_rate": 4.672668270024635e-06, + "loss": 0.7862, "step": 17596 }, { - "epoch": 0.4986539714925331, + "epoch": 0.6885124031614367, "grad_norm": 0.0, - "learning_rate": 1.0529316923480186e-05, - "loss": 0.8543, + "learning_rate": 4.671595876317035e-06, + "loss": 1.0001, "step": 17597 }, { - "epoch": 0.49868230893479554, + "epoch": 0.6885515298536662, "grad_norm": 0.0, - "learning_rate": 1.0528400418603622e-05, - "loss": 0.8075, + "learning_rate": 4.670523568176929e-06, + "loss": 0.9453, "step": 17598 }, { - "epoch": 0.49871064637705803, + "epoch": 0.6885906565458956, "grad_norm": 0.0, - "learning_rate": 1.0527483909276144e-05, - "loss": 0.9152, + "learning_rate": 4.669451345621541e-06, + "loss": 0.8579, "step": 17599 }, { - "epoch": 0.49873898381932047, + "epoch": 0.6886297832381251, "grad_norm": 0.0, - "learning_rate": 1.0526567395505472e-05, - "loss": 1.0014, + "learning_rate": 4.668379208668079e-06, + "loss": 0.9736, "step": 17600 }, { - "epoch": 0.4987673212615829, + "epoch": 0.6886689099303545, "grad_norm": 0.0, - "learning_rate": 1.0525650877299326e-05, - "loss": 0.8192, + "learning_rate": 4.667307157333771e-06, + "loss": 1.073, "step": 17601 }, { - "epoch": 0.4987956587038454, + "epoch": 0.688708036622584, "grad_norm": 0.0, - "learning_rate": 1.0524734354665433e-05, - "loss": 0.8209, + "learning_rate": 4.666235191635819e-06, + "loss": 0.9716, "step": 17602 }, { - "epoch": 0.49882399614610784, + "epoch": 0.6887471633148133, "grad_norm": 0.0, - "learning_rate": 1.0523817827611504e-05, - "loss": 0.7732, + "learning_rate": 4.665163311591455e-06, + "loss": 1.1151, "step": 17603 }, { - "epoch": 0.49885233358837033, + "epoch": 0.6887862900070428, "grad_norm": 0.0, - "learning_rate": 1.0522901296145263e-05, - "loss": 0.9764, + "learning_rate": 4.664091517217879e-06, + "loss": 0.9746, "step": 17604 }, { - "epoch": 0.49888067103063277, + "epoch": 0.6888254166992722, "grad_norm": 0.0, - "learning_rate": 1.0521984760274429e-05, - "loss": 0.9394, + "learning_rate": 4.663019808532311e-06, + "loss": 1.1367, "step": 17605 }, { - "epoch": 0.4989090084728952, + "epoch": 0.6888645433915017, "grad_norm": 0.0, - "learning_rate": 1.0521068220006727e-05, - "loss": 0.9473, + "learning_rate": 4.66194818555195e-06, + "loss": 0.99, "step": 17606 }, { - "epoch": 0.4989373459151577, + "epoch": 0.6889036700837311, "grad_norm": 0.0, - "learning_rate": 1.0520151675349873e-05, - "loss": 1.0008, + "learning_rate": 4.660876648294019e-06, + "loss": 0.9175, "step": 17607 }, { - "epoch": 0.49896568335742014, + "epoch": 0.6889427967759606, "grad_norm": 0.0, - "learning_rate": 1.0519235126311584e-05, - "loss": 0.8458, + "learning_rate": 4.659805196775715e-06, + "loss": 0.944, "step": 17608 }, { - "epoch": 0.49899402079968264, + "epoch": 0.68898192346819, "grad_norm": 0.0, - "learning_rate": 1.051831857289959e-05, - "loss": 0.9403, + "learning_rate": 4.6587338310142526e-06, + "loss": 1.0013, "step": 17609 }, { - "epoch": 0.4990223582419451, + "epoch": 0.6890210501604195, "grad_norm": 0.0, - "learning_rate": 1.0517402015121606e-05, - "loss": 0.9394, + "learning_rate": 4.657662551026827e-06, + "loss": 1.0311, "step": 17610 }, { - "epoch": 0.49905069568420757, + "epoch": 0.6890601768526489, "grad_norm": 0.0, - "learning_rate": 1.0516485452985349e-05, - "loss": 0.8737, + "learning_rate": 4.656591356830648e-06, + "loss": 0.9919, "step": 17611 }, { - "epoch": 0.49907903312647, + "epoch": 0.6890993035448784, "grad_norm": 0.0, - "learning_rate": 1.0515568886498546e-05, - "loss": 0.9682, + "learning_rate": 4.655520248442914e-06, + "loss": 1.0227, "step": 17612 }, { - "epoch": 0.49910737056873244, + "epoch": 0.6891384302371077, "grad_norm": 0.0, - "learning_rate": 1.0514652315668911e-05, - "loss": 0.8547, + "learning_rate": 4.654449225880833e-06, + "loss": 0.9324, "step": 17613 }, { - "epoch": 0.49913570801099494, + "epoch": 0.6891775569293372, "grad_norm": 0.0, - "learning_rate": 1.0513735740504175e-05, - "loss": 0.8403, + "learning_rate": 4.653378289161595e-06, + "loss": 0.9649, "step": 17614 }, { - "epoch": 0.4991640454532574, + "epoch": 0.6892166836215666, "grad_norm": 0.0, - "learning_rate": 1.0512819161012046e-05, - "loss": 0.9709, + "learning_rate": 4.652307438302402e-06, + "loss": 0.8479, "step": 17615 }, { - "epoch": 0.49919238289551987, + "epoch": 0.6892558103137961, "grad_norm": 0.0, - "learning_rate": 1.0511902577200255e-05, - "loss": 0.889, + "learning_rate": 4.651236673320454e-06, + "loss": 0.9116, "step": 17616 }, { - "epoch": 0.4992207203377823, + "epoch": 0.6892949370060255, "grad_norm": 0.0, - "learning_rate": 1.0510985989076517e-05, - "loss": 0.8824, + "learning_rate": 4.650165994232939e-06, + "loss": 0.9972, "step": 17617 }, { - "epoch": 0.49924905778004475, + "epoch": 0.6893340636982549, "grad_norm": 0.0, - "learning_rate": 1.0510069396648553e-05, - "loss": 0.8894, + "learning_rate": 4.649095401057055e-06, + "loss": 0.9313, "step": 17618 }, { - "epoch": 0.49927739522230724, + "epoch": 0.6893731903904844, "grad_norm": 0.0, - "learning_rate": 1.0509152799924085e-05, - "loss": 0.9288, + "learning_rate": 4.648024893809997e-06, + "loss": 1.0385, "step": 17619 }, { - "epoch": 0.4993057326645697, + "epoch": 0.6894123170827138, "grad_norm": 0.0, - "learning_rate": 1.0508236198910836e-05, - "loss": 0.9609, + "learning_rate": 4.64695447250895e-06, + "loss": 1.0576, "step": 17620 }, { - "epoch": 0.4993340701068322, + "epoch": 0.6894514437749433, "grad_norm": 0.0, - "learning_rate": 1.0507319593616523e-05, - "loss": 0.8182, + "learning_rate": 4.645884137171105e-06, + "loss": 1.0985, "step": 17621 }, { - "epoch": 0.4993624075490946, + "epoch": 0.6894905704671727, "grad_norm": 0.0, - "learning_rate": 1.0506402984048872e-05, - "loss": 0.9024, + "learning_rate": 4.6448138878136565e-06, + "loss": 0.9176, "step": 17622 }, { - "epoch": 0.4993907449913571, + "epoch": 0.6895296971594022, "grad_norm": 0.0, - "learning_rate": 1.0505486370215597e-05, - "loss": 0.8246, + "learning_rate": 4.643743724453782e-06, + "loss": 1.1108, "step": 17623 }, { - "epoch": 0.49941908243361954, + "epoch": 0.6895688238516315, "grad_norm": 0.0, - "learning_rate": 1.0504569752124423e-05, - "loss": 0.9013, + "learning_rate": 4.642673647108672e-06, + "loss": 0.8901, "step": 17624 }, { - "epoch": 0.499447419875882, + "epoch": 0.689607950543861, "grad_norm": 0.0, - "learning_rate": 1.050365312978307e-05, - "loss": 0.9284, + "learning_rate": 4.6416036557955105e-06, + "loss": 1.0763, "step": 17625 }, { - "epoch": 0.4994757573181445, + "epoch": 0.6896470772360904, "grad_norm": 0.0, - "learning_rate": 1.0502736503199262e-05, - "loss": 0.8247, + "learning_rate": 4.640533750531483e-06, + "loss": 0.9658, "step": 17626 }, { - "epoch": 0.4995040947604069, + "epoch": 0.6896862039283199, "grad_norm": 0.0, - "learning_rate": 1.0501819872380717e-05, - "loss": 0.7249, + "learning_rate": 4.639463931333763e-06, + "loss": 0.922, "step": 17627 }, { - "epoch": 0.4995324322026694, + "epoch": 0.6897253306205493, "grad_norm": 0.0, - "learning_rate": 1.0500903237335157e-05, - "loss": 0.7759, + "learning_rate": 4.638394198219537e-06, + "loss": 1.037, "step": 17628 }, { - "epoch": 0.49956076964493185, + "epoch": 0.6897644573127788, "grad_norm": 0.0, - "learning_rate": 1.0499986598070302e-05, - "loss": 0.9722, + "learning_rate": 4.637324551205981e-06, + "loss": 0.9898, "step": 17629 }, { - "epoch": 0.4995891070871943, + "epoch": 0.6898035840050082, "grad_norm": 0.0, - "learning_rate": 1.0499069954593874e-05, - "loss": 0.7375, + "learning_rate": 4.636254990310276e-06, + "loss": 0.9324, "step": 17630 }, { - "epoch": 0.4996174445294568, + "epoch": 0.6898427106972377, "grad_norm": 0.0, - "learning_rate": 1.0498153306913595e-05, - "loss": 0.8823, + "learning_rate": 4.635185515549592e-06, + "loss": 0.8893, "step": 17631 }, { - "epoch": 0.4996457819717192, + "epoch": 0.6898818373894671, "grad_norm": 0.0, - "learning_rate": 1.0497236655037187e-05, - "loss": 0.931, + "learning_rate": 4.634116126941108e-06, + "loss": 0.9832, "step": 17632 }, { - "epoch": 0.4996741194139817, + "epoch": 0.6899209640816966, "grad_norm": 0.0, - "learning_rate": 1.0496319998972366e-05, - "loss": 0.9139, + "learning_rate": 4.63304682450199e-06, + "loss": 1.121, "step": 17633 }, { - "epoch": 0.49970245685624415, + "epoch": 0.689960090773926, "grad_norm": 0.0, - "learning_rate": 1.0495403338726862e-05, - "loss": 0.8797, + "learning_rate": 4.6319776082494215e-06, + "loss": 0.8967, "step": 17634 }, { - "epoch": 0.49973079429850664, + "epoch": 0.6899992174661554, "grad_norm": 0.0, - "learning_rate": 1.049448667430839e-05, - "loss": 0.9456, + "learning_rate": 4.630908478200562e-06, + "loss": 1.0472, "step": 17635 }, { - "epoch": 0.4997591317407691, + "epoch": 0.6900383441583848, "grad_norm": 0.0, - "learning_rate": 1.0493570005724676e-05, - "loss": 0.8721, + "learning_rate": 4.6298394343725884e-06, + "loss": 1.0116, "step": 17636 }, { - "epoch": 0.4997874691830315, + "epoch": 0.6900774708506143, "grad_norm": 0.0, - "learning_rate": 1.0492653332983434e-05, - "loss": 0.8693, + "learning_rate": 4.62877047678266e-06, + "loss": 1.0002, "step": 17637 }, { - "epoch": 0.499815806625294, + "epoch": 0.6901165975428437, "grad_norm": 0.0, - "learning_rate": 1.049173665609239e-05, - "loss": 1.0205, + "learning_rate": 4.627701605447949e-06, + "loss": 0.9916, "step": 17638 }, { - "epoch": 0.49984414406755645, + "epoch": 0.6901557242350732, "grad_norm": 0.0, - "learning_rate": 1.0490819975059268e-05, - "loss": 0.9463, + "learning_rate": 4.626632820385617e-06, + "loss": 0.8826, "step": 17639 }, { - "epoch": 0.49987248150981894, + "epoch": 0.6901948509273026, "grad_norm": 0.0, - "learning_rate": 1.0489903289891787e-05, - "loss": 0.8509, + "learning_rate": 4.6255641216128326e-06, + "loss": 1.1205, "step": 17640 }, { - "epoch": 0.4999008189520814, + "epoch": 0.6902339776195321, "grad_norm": 0.0, - "learning_rate": 1.0488986600597669e-05, - "loss": 0.9659, + "learning_rate": 4.624495509146749e-06, + "loss": 1.0158, "step": 17641 }, { - "epoch": 0.4999291563943438, + "epoch": 0.6902731043117615, "grad_norm": 0.0, - "learning_rate": 1.0488069907184632e-05, - "loss": 0.8364, + "learning_rate": 4.623426983004533e-06, + "loss": 0.9791, "step": 17642 }, { - "epoch": 0.4999574938366063, + "epoch": 0.690312231003991, "grad_norm": 0.0, - "learning_rate": 1.0487153209660405e-05, - "loss": 0.7855, + "learning_rate": 4.622358543203342e-06, + "loss": 1.1974, "step": 17643 }, { - "epoch": 0.49998583127886875, + "epoch": 0.6903513576962204, "grad_norm": 0.0, - "learning_rate": 1.0486236508032703e-05, - "loss": 0.8903, + "learning_rate": 4.621290189760339e-06, + "loss": 1.0608, "step": 17644 }, { - "epoch": 0.5000141687211312, + "epoch": 0.6903904843884499, "grad_norm": 0.0, - "learning_rate": 1.048531980230925e-05, - "loss": 0.9685, + "learning_rate": 4.6202219226926704e-06, + "loss": 0.9715, "step": 17645 }, { - "epoch": 0.5000425061633937, + "epoch": 0.6904296110806792, "grad_norm": 0.0, - "learning_rate": 1.048440309249777e-05, - "loss": 0.8864, + "learning_rate": 4.619153742017501e-06, + "loss": 0.9304, "step": 17646 }, { - "epoch": 0.5000708436056561, + "epoch": 0.6904687377729086, "grad_norm": 0.0, - "learning_rate": 1.0483486378605983e-05, - "loss": 0.9616, + "learning_rate": 4.618085647751973e-06, + "loss": 0.9673, "step": 17647 }, { - "epoch": 0.5000991810479186, + "epoch": 0.6905078644651381, "grad_norm": 0.0, - "learning_rate": 1.0482569660641611e-05, - "loss": 0.865, + "learning_rate": 4.617017639913252e-06, + "loss": 0.9927, "step": 17648 }, { - "epoch": 0.5001275184901811, + "epoch": 0.6905469911573675, "grad_norm": 0.0, - "learning_rate": 1.0481652938612374e-05, - "loss": 0.8841, + "learning_rate": 4.6159497185184785e-06, + "loss": 0.8848, "step": 17649 }, { - "epoch": 0.5001558559324435, + "epoch": 0.690586117849597, "grad_norm": 0.0, - "learning_rate": 1.0480736212525996e-05, - "loss": 0.8916, + "learning_rate": 4.614881883584811e-06, + "loss": 1.0562, "step": 17650 }, { - "epoch": 0.500184193374706, + "epoch": 0.6906252445418264, "grad_norm": 0.0, - "learning_rate": 1.0479819482390194e-05, - "loss": 0.9031, + "learning_rate": 4.613814135129384e-06, + "loss": 0.971, "step": 17651 }, { - "epoch": 0.5002125308169685, + "epoch": 0.6906643712340559, "grad_norm": 0.0, - "learning_rate": 1.0478902748212701e-05, - "loss": 0.9257, + "learning_rate": 4.61274647316936e-06, + "loss": 0.9237, "step": 17652 }, { - "epoch": 0.500240868259231, + "epoch": 0.6907034979262853, "grad_norm": 0.0, - "learning_rate": 1.0477986010001232e-05, - "loss": 0.9207, + "learning_rate": 4.611678897721874e-06, + "loss": 0.9343, "step": 17653 }, { - "epoch": 0.5002692057014934, + "epoch": 0.6907426246185148, "grad_norm": 0.0, - "learning_rate": 1.0477069267763505e-05, - "loss": 0.8812, + "learning_rate": 4.610611408804077e-06, + "loss": 1.0528, "step": 17654 }, { - "epoch": 0.5002975431437559, + "epoch": 0.6907817513107442, "grad_norm": 0.0, - "learning_rate": 1.0476152521507247e-05, - "loss": 0.9237, + "learning_rate": 4.6095440064331035e-06, + "loss": 1.0288, "step": 17655 }, { - "epoch": 0.5003258805860183, + "epoch": 0.6908208780029736, "grad_norm": 0.0, - "learning_rate": 1.0475235771240185e-05, - "loss": 0.9858, + "learning_rate": 4.608476690626098e-06, + "loss": 0.9436, "step": 17656 }, { - "epoch": 0.5003542180282807, + "epoch": 0.690860004695203, "grad_norm": 0.0, - "learning_rate": 1.047431901697003e-05, - "loss": 0.9157, + "learning_rate": 4.6074094614002015e-06, + "loss": 1.0511, "step": 17657 }, { - "epoch": 0.5003825554705432, + "epoch": 0.6908991313874325, "grad_norm": 0.0, - "learning_rate": 1.0473402258704509e-05, - "loss": 0.8332, + "learning_rate": 4.606342318772556e-06, + "loss": 1.1205, "step": 17658 }, { - "epoch": 0.5004108929128057, + "epoch": 0.6909382580796619, "grad_norm": 0.0, - "learning_rate": 1.0472485496451347e-05, - "loss": 0.8394, + "learning_rate": 4.605275262760289e-06, + "loss": 1.0218, "step": 17659 }, { - "epoch": 0.5004392303550681, + "epoch": 0.6909773847718914, "grad_norm": 0.0, - "learning_rate": 1.0471568730218267e-05, - "loss": 0.8863, + "learning_rate": 4.6042082933805425e-06, + "loss": 1.0069, "step": 17660 }, { - "epoch": 0.5004675677973306, + "epoch": 0.6910165114641208, "grad_norm": 0.0, - "learning_rate": 1.0470651960012987e-05, - "loss": 0.8541, + "learning_rate": 4.603141410650449e-06, + "loss": 0.9379, "step": 17661 }, { - "epoch": 0.5004959052395931, + "epoch": 0.6910556381563503, "grad_norm": 0.0, - "learning_rate": 1.0469735185843228e-05, - "loss": 0.7814, + "learning_rate": 4.6020746145871454e-06, + "loss": 0.942, "step": 17662 }, { - "epoch": 0.5005242426818556, + "epoch": 0.6910947648485797, "grad_norm": 0.0, - "learning_rate": 1.0468818407716719e-05, - "loss": 0.9026, + "learning_rate": 4.601007905207756e-06, + "loss": 0.9584, "step": 17663 }, { - "epoch": 0.500552580124118, + "epoch": 0.6911338915408092, "grad_norm": 0.0, - "learning_rate": 1.0467901625641174e-05, - "loss": 1.0093, + "learning_rate": 4.5999412825294145e-06, + "loss": 0.942, "step": 17664 }, { - "epoch": 0.5005809175663805, + "epoch": 0.6911730182330386, "grad_norm": 0.0, - "learning_rate": 1.0466984839624324e-05, - "loss": 0.8927, + "learning_rate": 4.5988747465692525e-06, + "loss": 1.0162, "step": 17665 }, { - "epoch": 0.500609255008643, + "epoch": 0.6912121449252681, "grad_norm": 0.0, - "learning_rate": 1.0466068049673883e-05, - "loss": 0.8161, + "learning_rate": 4.59780829734439e-06, + "loss": 0.961, "step": 17666 }, { - "epoch": 0.5006375924509053, + "epoch": 0.6912512716174974, "grad_norm": 0.0, - "learning_rate": 1.0465151255797582e-05, - "loss": 0.9286, + "learning_rate": 4.596741934871959e-06, + "loss": 1.0605, "step": 17667 }, { - "epoch": 0.5006659298931678, + "epoch": 0.6912903983097269, "grad_norm": 0.0, - "learning_rate": 1.0464234458003139e-05, - "loss": 0.9893, + "learning_rate": 4.5956756591690845e-06, + "loss": 0.9688, "step": 17668 }, { - "epoch": 0.5006942673354303, + "epoch": 0.6913295250019563, "grad_norm": 0.0, - "learning_rate": 1.0463317656298273e-05, - "loss": 0.7915, + "learning_rate": 4.594609470252882e-06, + "loss": 1.0639, "step": 17669 }, { - "epoch": 0.5007226047776928, + "epoch": 0.6913686516941858, "grad_norm": 0.0, - "learning_rate": 1.0462400850690715e-05, - "loss": 0.9532, + "learning_rate": 4.5935433681404795e-06, + "loss": 0.9646, "step": 17670 }, { - "epoch": 0.5007509422199552, + "epoch": 0.6914077783864152, "grad_norm": 0.0, - "learning_rate": 1.0461484041188179e-05, - "loss": 0.8687, + "learning_rate": 4.5924773528489986e-06, + "loss": 0.8969, "step": 17671 }, { - "epoch": 0.5007792796622177, + "epoch": 0.6914469050786447, "grad_norm": 0.0, - "learning_rate": 1.0460567227798392e-05, - "loss": 0.8647, + "learning_rate": 4.591411424395552e-06, + "loss": 0.999, "step": 17672 }, { - "epoch": 0.5008076171044802, + "epoch": 0.6914860317708741, "grad_norm": 0.0, - "learning_rate": 1.045965041052908e-05, - "loss": 0.8394, + "learning_rate": 4.590345582797261e-06, + "loss": 1.068, "step": 17673 }, { - "epoch": 0.5008359545467426, + "epoch": 0.6915251584631036, "grad_norm": 0.0, - "learning_rate": 1.045873358938796e-05, - "loss": 0.8847, + "learning_rate": 4.589279828071242e-06, + "loss": 1.0153, "step": 17674 }, { - "epoch": 0.5008642919890051, + "epoch": 0.691564285155333, "grad_norm": 0.0, - "learning_rate": 1.0457816764382756e-05, - "loss": 0.9183, + "learning_rate": 4.588214160234611e-06, + "loss": 0.8823, "step": 17675 }, { - "epoch": 0.5008926294312676, + "epoch": 0.6916034118475624, "grad_norm": 0.0, - "learning_rate": 1.0456899935521187e-05, - "loss": 0.8942, + "learning_rate": 4.587148579304477e-06, + "loss": 0.9717, "step": 17676 }, { - "epoch": 0.50092096687353, + "epoch": 0.6916425385397919, "grad_norm": 0.0, - "learning_rate": 1.0455983102810987e-05, - "loss": 0.9431, + "learning_rate": 4.586083085297957e-06, + "loss": 1.037, "step": 17677 }, { - "epoch": 0.5009493043157924, + "epoch": 0.6916816652320212, "grad_norm": 0.0, - "learning_rate": 1.045506626625987e-05, - "loss": 0.8863, + "learning_rate": 4.585017678232151e-06, + "loss": 0.9737, "step": 17678 }, { - "epoch": 0.5009776417580549, + "epoch": 0.6917207919242507, "grad_norm": 0.0, - "learning_rate": 1.045414942587556e-05, - "loss": 0.8729, + "learning_rate": 4.583952358124183e-06, + "loss": 1.0368, "step": 17679 }, { - "epoch": 0.5010059792003174, + "epoch": 0.6917599186164801, "grad_norm": 0.0, - "learning_rate": 1.0453232581665783e-05, - "loss": 0.7876, + "learning_rate": 4.58288712499115e-06, + "loss": 1.0675, "step": 17680 }, { - "epoch": 0.5010343166425798, + "epoch": 0.6917990453087096, "grad_norm": 0.0, - "learning_rate": 1.0452315733638257e-05, - "loss": 0.8498, + "learning_rate": 4.581821978850166e-06, + "loss": 1.0942, "step": 17681 }, { - "epoch": 0.5010626540848423, + "epoch": 0.691838172000939, "grad_norm": 0.0, - "learning_rate": 1.0451398881800708e-05, - "loss": 0.9907, + "learning_rate": 4.580756919718323e-06, + "loss": 1.0628, "step": 17682 }, { - "epoch": 0.5010909915271048, + "epoch": 0.6918772986931685, "grad_norm": 0.0, - "learning_rate": 1.0450482026160855e-05, - "loss": 0.9177, + "learning_rate": 4.579691947612742e-06, + "loss": 0.9594, "step": 17683 }, { - "epoch": 0.5011193289693672, + "epoch": 0.6919164253853979, "grad_norm": 0.0, - "learning_rate": 1.044956516672643e-05, - "loss": 0.9574, + "learning_rate": 4.578627062550513e-06, + "loss": 0.9486, "step": 17684 }, { - "epoch": 0.5011476664116297, + "epoch": 0.6919555520776274, "grad_norm": 0.0, - "learning_rate": 1.044864830350515e-05, - "loss": 0.8944, + "learning_rate": 4.577562264548741e-06, + "loss": 0.9657, "step": 17685 }, { - "epoch": 0.5011760038538922, + "epoch": 0.6919946787698568, "grad_norm": 0.0, - "learning_rate": 1.044773143650474e-05, - "loss": 0.9388, + "learning_rate": 4.576497553624523e-06, + "loss": 0.8609, "step": 17686 }, { - "epoch": 0.5012043412961547, + "epoch": 0.6920338054620863, "grad_norm": 0.0, - "learning_rate": 1.0446814565732919e-05, - "loss": 0.8774, + "learning_rate": 4.575432929794959e-06, + "loss": 0.9489, "step": 17687 }, { - "epoch": 0.501232678738417, + "epoch": 0.6920729321543156, "grad_norm": 0.0, - "learning_rate": 1.0445897691197412e-05, - "loss": 0.9685, + "learning_rate": 4.5743683930771425e-06, + "loss": 0.9364, "step": 17688 }, { - "epoch": 0.5012610161806795, + "epoch": 0.6921120588465451, "grad_norm": 0.0, - "learning_rate": 1.0444980812905945e-05, - "loss": 0.7891, + "learning_rate": 4.573303943488175e-06, + "loss": 0.9239, "step": 17689 }, { - "epoch": 0.501289353622942, + "epoch": 0.6921511855387745, "grad_norm": 0.0, - "learning_rate": 1.044406393086624e-05, - "loss": 0.9582, + "learning_rate": 4.572239581045144e-06, + "loss": 1.0925, "step": 17690 }, { - "epoch": 0.5013176910652044, + "epoch": 0.692190312231004, "grad_norm": 0.0, - "learning_rate": 1.0443147045086017e-05, - "loss": 0.8952, + "learning_rate": 4.571175305765143e-06, + "loss": 0.9766, "step": 17691 }, { - "epoch": 0.5013460285074669, + "epoch": 0.6922294389232334, "grad_norm": 0.0, - "learning_rate": 1.0442230155573005e-05, - "loss": 0.9482, + "learning_rate": 4.570111117665263e-06, + "loss": 0.9683, "step": 17692 }, { - "epoch": 0.5013743659497294, + "epoch": 0.6922685656154629, "grad_norm": 0.0, - "learning_rate": 1.0441313262334925e-05, - "loss": 0.9994, + "learning_rate": 4.5690470167626e-06, + "loss": 1.0397, "step": 17693 }, { - "epoch": 0.5014027033919919, + "epoch": 0.6923076923076923, "grad_norm": 0.0, - "learning_rate": 1.0440396365379496e-05, - "loss": 0.9001, + "learning_rate": 4.567983003074231e-06, + "loss": 0.9018, "step": 17694 }, { - "epoch": 0.5014310408342543, + "epoch": 0.6923468189999218, "grad_norm": 0.0, - "learning_rate": 1.0439479464714447e-05, - "loss": 0.8545, + "learning_rate": 4.566919076617254e-06, + "loss": 0.9023, "step": 17695 }, { - "epoch": 0.5014593782765168, + "epoch": 0.6923859456921512, "grad_norm": 0.0, - "learning_rate": 1.0438562560347499e-05, - "loss": 0.8483, + "learning_rate": 4.56585523740874e-06, + "loss": 0.9747, "step": 17696 }, { - "epoch": 0.5014877157187793, + "epoch": 0.6924250723843807, "grad_norm": 0.0, - "learning_rate": 1.0437645652286374e-05, - "loss": 0.8907, + "learning_rate": 4.564791485465788e-06, + "loss": 0.991, "step": 17697 }, { - "epoch": 0.5015160531610416, + "epoch": 0.69246419907661, "grad_norm": 0.0, - "learning_rate": 1.04367287405388e-05, - "loss": 1.0026, + "learning_rate": 4.563727820805471e-06, + "loss": 1.0188, "step": 17698 }, { - "epoch": 0.5015443906033041, + "epoch": 0.6925033257688396, "grad_norm": 0.0, - "learning_rate": 1.0435811825112496e-05, - "loss": 0.917, + "learning_rate": 4.562664243444877e-06, + "loss": 0.9876, "step": 17699 }, { - "epoch": 0.5015727280455666, + "epoch": 0.6925424524610689, "grad_norm": 0.0, - "learning_rate": 1.0434894906015188e-05, - "loss": 0.8313, + "learning_rate": 4.561600753401075e-06, + "loss": 1.0435, "step": 17700 }, { - "epoch": 0.5016010654878291, + "epoch": 0.6925815791532984, "grad_norm": 0.0, - "learning_rate": 1.0433977983254598e-05, - "loss": 0.9057, + "learning_rate": 4.560537350691158e-06, + "loss": 0.9844, "step": 17701 }, { - "epoch": 0.5016294029300915, + "epoch": 0.6926207058455278, "grad_norm": 0.0, - "learning_rate": 1.0433061056838449e-05, - "loss": 0.8657, + "learning_rate": 4.5594740353321895e-06, + "loss": 0.9664, "step": 17702 }, { - "epoch": 0.501657740372354, + "epoch": 0.6926598325377572, "grad_norm": 0.0, - "learning_rate": 1.0432144126774469e-05, - "loss": 0.9168, + "learning_rate": 4.558410807341256e-06, + "loss": 1.039, "step": 17703 }, { - "epoch": 0.5016860778146165, + "epoch": 0.6926989592299867, "grad_norm": 0.0, - "learning_rate": 1.0431227193070374e-05, - "loss": 0.9673, + "learning_rate": 4.557347666735422e-06, + "loss": 0.8457, "step": 17704 }, { - "epoch": 0.5017144152568789, + "epoch": 0.6927380859222161, "grad_norm": 0.0, - "learning_rate": 1.0430310255733895e-05, - "loss": 0.9047, + "learning_rate": 4.556284613531764e-06, + "loss": 1.0482, "step": 17705 }, { - "epoch": 0.5017427526991414, + "epoch": 0.6927772126144456, "grad_norm": 0.0, - "learning_rate": 1.0429393314772756e-05, - "loss": 0.9508, + "learning_rate": 4.555221647747354e-06, + "loss": 1.0587, "step": 17706 }, { - "epoch": 0.5017710901414039, + "epoch": 0.692816339306675, "grad_norm": 0.0, - "learning_rate": 1.0428476370194675e-05, - "loss": 0.9252, + "learning_rate": 4.554158769399266e-06, + "loss": 0.962, "step": 17707 }, { - "epoch": 0.5017994275836662, + "epoch": 0.6928554659989045, "grad_norm": 0.0, - "learning_rate": 1.0427559422007375e-05, - "loss": 0.8915, + "learning_rate": 4.553095978504561e-06, + "loss": 0.9009, "step": 17708 }, { - "epoch": 0.5018277650259287, + "epoch": 0.6928945926911338, "grad_norm": 0.0, - "learning_rate": 1.0426642470218587e-05, - "loss": 0.9696, + "learning_rate": 4.5520332750803075e-06, + "loss": 1.121, "step": 17709 }, { - "epoch": 0.5018561024681912, + "epoch": 0.6929337193833633, "grad_norm": 0.0, - "learning_rate": 1.042572551483603e-05, - "loss": 0.9515, + "learning_rate": 4.550970659143578e-06, + "loss": 0.97, "step": 17710 }, { - "epoch": 0.5018844399104537, + "epoch": 0.6929728460755927, "grad_norm": 0.0, - "learning_rate": 1.0424808555867429e-05, - "loss": 0.9288, + "learning_rate": 4.549908130711427e-06, + "loss": 0.9557, "step": 17711 }, { - "epoch": 0.5019127773527161, + "epoch": 0.6930119727678222, "grad_norm": 0.0, - "learning_rate": 1.0423891593320507e-05, - "loss": 0.901, + "learning_rate": 4.548845689800923e-06, + "loss": 1.0282, "step": 17712 }, { - "epoch": 0.5019411147949786, + "epoch": 0.6930510994600516, "grad_norm": 0.0, - "learning_rate": 1.042297462720299e-05, - "loss": 0.92, + "learning_rate": 4.547783336429124e-06, + "loss": 0.9454, "step": 17713 }, { - "epoch": 0.5019694522372411, + "epoch": 0.6930902261522811, "grad_norm": 0.0, - "learning_rate": 1.0422057657522602e-05, - "loss": 0.8894, + "learning_rate": 4.546721070613099e-06, + "loss": 0.9593, "step": 17714 }, { - "epoch": 0.5019977896795035, + "epoch": 0.6931293528445105, "grad_norm": 0.0, - "learning_rate": 1.0421140684287063e-05, - "loss": 0.764, + "learning_rate": 4.545658892369897e-06, + "loss": 0.9842, "step": 17715 }, { - "epoch": 0.502026127121766, + "epoch": 0.69316847953674, "grad_norm": 0.0, - "learning_rate": 1.04202237075041e-05, - "loss": 0.9155, + "learning_rate": 4.54459680171658e-06, + "loss": 0.9404, "step": 17716 }, { - "epoch": 0.5020544645640285, + "epoch": 0.6932076062289694, "grad_norm": 0.0, - "learning_rate": 1.0419306727181438e-05, - "loss": 0.791, + "learning_rate": 4.5435347986702e-06, + "loss": 1.0261, "step": 17717 }, { - "epoch": 0.502082802006291, + "epoch": 0.6932467329211989, "grad_norm": 0.0, - "learning_rate": 1.04183897433268e-05, - "loss": 0.8645, + "learning_rate": 4.542472883247814e-06, + "loss": 1.0318, "step": 17718 }, { - "epoch": 0.5021111394485533, + "epoch": 0.6932858596134283, "grad_norm": 0.0, - "learning_rate": 1.0417472755947908e-05, - "loss": 0.8194, + "learning_rate": 4.541411055466474e-06, + "loss": 0.8776, "step": 17719 }, { - "epoch": 0.5021394768908158, + "epoch": 0.6933249863056578, "grad_norm": 0.0, - "learning_rate": 1.0416555765052487e-05, - "loss": 0.8582, + "learning_rate": 4.540349315343236e-06, + "loss": 1.1007, "step": 17720 }, { - "epoch": 0.5021678143330783, + "epoch": 0.6933641129978871, "grad_norm": 0.0, - "learning_rate": 1.0415638770648266e-05, - "loss": 0.9344, + "learning_rate": 4.539287662895143e-06, + "loss": 1.052, "step": 17721 }, { - "epoch": 0.5021961517753407, + "epoch": 0.6934032396901166, "grad_norm": 0.0, - "learning_rate": 1.0414721772742962e-05, - "loss": 0.888, + "learning_rate": 4.538226098139249e-06, + "loss": 1.0153, "step": 17722 }, { - "epoch": 0.5022244892176032, + "epoch": 0.693442366382346, "grad_norm": 0.0, - "learning_rate": 1.0413804771344305e-05, - "loss": 0.8125, + "learning_rate": 4.537164621092599e-06, + "loss": 0.9335, "step": 17723 }, { - "epoch": 0.5022528266598657, + "epoch": 0.6934814930745755, "grad_norm": 0.0, - "learning_rate": 1.0412887766460017e-05, - "loss": 0.9528, + "learning_rate": 4.536103231772243e-06, + "loss": 0.9633, "step": 17724 }, { - "epoch": 0.5022811641021282, + "epoch": 0.6935206197668049, "grad_norm": 0.0, - "learning_rate": 1.0411970758097818e-05, - "loss": 0.8768, + "learning_rate": 4.535041930195218e-06, + "loss": 0.8797, "step": 17725 }, { - "epoch": 0.5023095015443906, + "epoch": 0.6935597464590344, "grad_norm": 0.0, - "learning_rate": 1.041105374626544e-05, - "loss": 0.873, + "learning_rate": 4.533980716378577e-06, + "loss": 0.891, "step": 17726 }, { - "epoch": 0.5023378389866531, + "epoch": 0.6935988731512638, "grad_norm": 0.0, - "learning_rate": 1.0410136730970603e-05, - "loss": 0.8928, + "learning_rate": 4.532919590339349e-06, + "loss": 1.0516, "step": 17727 }, { - "epoch": 0.5023661764289156, + "epoch": 0.6936379998434933, "grad_norm": 0.0, - "learning_rate": 1.040921971222103e-05, - "loss": 0.7908, + "learning_rate": 4.531858552094589e-06, + "loss": 1.035, "step": 17728 }, { - "epoch": 0.502394513871178, + "epoch": 0.6936771265357227, "grad_norm": 0.0, - "learning_rate": 1.0408302690024447e-05, - "loss": 1.0005, + "learning_rate": 4.530797601661324e-06, + "loss": 0.8813, "step": 17729 }, { - "epoch": 0.5024228513134404, + "epoch": 0.6937162532279522, "grad_norm": 0.0, - "learning_rate": 1.040738566438858e-05, - "loss": 0.9338, + "learning_rate": 4.529736739056601e-06, + "loss": 0.9378, "step": 17730 }, { - "epoch": 0.5024511887557029, + "epoch": 0.6937553799201815, "grad_norm": 0.0, - "learning_rate": 1.0406468635321157e-05, - "loss": 0.828, + "learning_rate": 4.528675964297443e-06, + "loss": 0.9032, "step": 17731 }, { - "epoch": 0.5024795261979653, + "epoch": 0.6937945066124109, "grad_norm": 0.0, - "learning_rate": 1.0405551602829893e-05, - "loss": 0.8519, + "learning_rate": 4.527615277400901e-06, + "loss": 1.0789, "step": 17732 }, { - "epoch": 0.5025078636402278, + "epoch": 0.6938336333046404, "grad_norm": 0.0, - "learning_rate": 1.0404634566922516e-05, - "loss": 0.9304, + "learning_rate": 4.526554678383997e-06, + "loss": 1.0857, "step": 17733 }, { - "epoch": 0.5025362010824903, + "epoch": 0.6938727599968698, "grad_norm": 0.0, - "learning_rate": 1.0403717527606757e-05, - "loss": 0.9146, + "learning_rate": 4.52549416726377e-06, + "loss": 1.0765, "step": 17734 }, { - "epoch": 0.5025645385247528, + "epoch": 0.6939118866890993, "grad_norm": 0.0, - "learning_rate": 1.040280048489033e-05, - "loss": 0.8405, + "learning_rate": 4.5244337440572415e-06, + "loss": 0.9962, "step": 17735 }, { - "epoch": 0.5025928759670152, + "epoch": 0.6939510133813287, "grad_norm": 0.0, - "learning_rate": 1.0401883438780966e-05, - "loss": 0.9566, + "learning_rate": 4.523373408781447e-06, + "loss": 0.9348, "step": 17736 }, { - "epoch": 0.5026212134092777, + "epoch": 0.6939901400735582, "grad_norm": 0.0, - "learning_rate": 1.040096638928639e-05, - "loss": 0.936, + "learning_rate": 4.522313161453413e-06, + "loss": 1.0516, "step": 17737 }, { - "epoch": 0.5026495508515402, + "epoch": 0.6940292667657876, "grad_norm": 0.0, - "learning_rate": 1.0400049336414323e-05, - "loss": 0.9631, + "learning_rate": 4.521253002090169e-06, + "loss": 1.0154, "step": 17738 }, { - "epoch": 0.5026778882938026, + "epoch": 0.6940683934580171, "grad_norm": 0.0, - "learning_rate": 1.0399132280172494e-05, - "loss": 0.849, + "learning_rate": 4.5201929307087336e-06, + "loss": 1.0444, "step": 17739 }, { - "epoch": 0.502706225736065, + "epoch": 0.6941075201502465, "grad_norm": 0.0, - "learning_rate": 1.0398215220568629e-05, - "loss": 0.9843, + "learning_rate": 4.519132947326132e-06, + "loss": 1.0602, "step": 17740 }, { - "epoch": 0.5027345631783275, + "epoch": 0.694146646842476, "grad_norm": 0.0, - "learning_rate": 1.0397298157610442e-05, - "loss": 0.9782, + "learning_rate": 4.518073051959388e-06, + "loss": 0.8544, "step": 17741 }, { - "epoch": 0.50276290062059, + "epoch": 0.6941857735347053, "grad_norm": 0.0, - "learning_rate": 1.0396381091305666e-05, - "loss": 0.8057, + "learning_rate": 4.517013244625526e-06, + "loss": 1.0012, "step": 17742 }, { - "epoch": 0.5027912380628524, + "epoch": 0.6942249002269348, "grad_norm": 0.0, - "learning_rate": 1.0395464021662031e-05, - "loss": 0.789, + "learning_rate": 4.515953525341555e-06, + "loss": 1.015, "step": 17743 }, { - "epoch": 0.5028195755051149, + "epoch": 0.6942640269191642, "grad_norm": 0.0, - "learning_rate": 1.0394546948687253e-05, - "loss": 0.9099, + "learning_rate": 4.514893894124504e-06, + "loss": 1.1168, "step": 17744 }, { - "epoch": 0.5028479129473774, + "epoch": 0.6943031536113937, "grad_norm": 0.0, - "learning_rate": 1.0393629872389057e-05, - "loss": 0.8968, + "learning_rate": 4.513834350991376e-06, + "loss": 1.1058, "step": 17745 }, { - "epoch": 0.5028762503896398, + "epoch": 0.6943422803036231, "grad_norm": 0.0, - "learning_rate": 1.0392712792775172e-05, - "loss": 0.8969, + "learning_rate": 4.512774895959201e-06, + "loss": 0.9448, "step": 17746 }, { - "epoch": 0.5029045878319023, + "epoch": 0.6943814069958526, "grad_norm": 0.0, - "learning_rate": 1.0391795709853323e-05, - "loss": 0.8545, + "learning_rate": 4.511715529044982e-06, + "loss": 0.9517, "step": 17747 }, { - "epoch": 0.5029329252741648, + "epoch": 0.694420533688082, "grad_norm": 0.0, - "learning_rate": 1.039087862363123e-05, - "loss": 0.9463, + "learning_rate": 4.510656250265738e-06, + "loss": 0.9703, "step": 17748 }, { - "epoch": 0.5029612627164272, + "epoch": 0.6944596603803115, "grad_norm": 0.0, - "learning_rate": 1.0389961534116622e-05, - "loss": 0.9225, + "learning_rate": 4.509597059638472e-06, + "loss": 1.0297, "step": 17749 }, { - "epoch": 0.5029896001586897, + "epoch": 0.6944987870725409, "grad_norm": 0.0, - "learning_rate": 1.0389044441317224e-05, - "loss": 0.9385, + "learning_rate": 4.5085379571801966e-06, + "loss": 1.0371, "step": 17750 }, { - "epoch": 0.5030179376009521, + "epoch": 0.6945379137647704, "grad_norm": 0.0, - "learning_rate": 1.0388127345240762e-05, - "loss": 0.9216, + "learning_rate": 4.5074789429079224e-06, + "loss": 0.9686, "step": 17751 }, { - "epoch": 0.5030462750432146, + "epoch": 0.6945770404569998, "grad_norm": 0.0, - "learning_rate": 1.0387210245894959e-05, - "loss": 0.882, + "learning_rate": 4.506420016838656e-06, + "loss": 0.9663, "step": 17752 }, { - "epoch": 0.503074612485477, + "epoch": 0.6946161671492292, "grad_norm": 0.0, - "learning_rate": 1.038629314328754e-05, - "loss": 0.875, + "learning_rate": 4.505361178989397e-06, + "loss": 0.9431, "step": 17753 }, { - "epoch": 0.5031029499277395, + "epoch": 0.6946552938414586, "grad_norm": 0.0, - "learning_rate": 1.0385376037426227e-05, - "loss": 0.9159, + "learning_rate": 4.504302429377152e-06, + "loss": 0.9797, "step": 17754 }, { - "epoch": 0.503131287370002, + "epoch": 0.6946944205336881, "grad_norm": 0.0, - "learning_rate": 1.038445892831875e-05, - "loss": 0.915, + "learning_rate": 4.503243768018928e-06, + "loss": 1.0994, "step": 17755 }, { - "epoch": 0.5031596248122644, + "epoch": 0.6947335472259175, "grad_norm": 0.0, - "learning_rate": 1.0383541815972835e-05, - "loss": 0.7697, + "learning_rate": 4.502185194931718e-06, + "loss": 0.9623, "step": 17756 }, { - "epoch": 0.5031879622545269, + "epoch": 0.694772673918147, "grad_norm": 0.0, - "learning_rate": 1.0382624700396204e-05, - "loss": 0.9447, + "learning_rate": 4.501126710132523e-06, + "loss": 1.017, "step": 17757 }, { - "epoch": 0.5032162996967894, + "epoch": 0.6948118006103764, "grad_norm": 0.0, - "learning_rate": 1.0381707581596581e-05, - "loss": 0.8691, + "learning_rate": 4.500068313638343e-06, + "loss": 0.919, "step": 17758 }, { - "epoch": 0.5032446371390519, + "epoch": 0.6948509273026059, "grad_norm": 0.0, - "learning_rate": 1.0380790459581695e-05, - "loss": 0.8911, + "learning_rate": 4.4990100054661775e-06, + "loss": 0.8878, "step": 17759 }, { - "epoch": 0.5032729745813143, + "epoch": 0.6948900539948353, "grad_norm": 0.0, - "learning_rate": 1.037987333435927e-05, - "loss": 0.9435, + "learning_rate": 4.4979517856330155e-06, + "loss": 0.933, "step": 17760 }, { - "epoch": 0.5033013120235768, + "epoch": 0.6949291806870647, "grad_norm": 0.0, - "learning_rate": 1.037895620593703e-05, - "loss": 0.9362, + "learning_rate": 4.496893654155858e-06, + "loss": 1.0247, "step": 17761 }, { - "epoch": 0.5033296494658392, + "epoch": 0.6949683073792942, "grad_norm": 0.0, - "learning_rate": 1.0378039074322699e-05, - "loss": 0.8417, + "learning_rate": 4.495835611051687e-06, + "loss": 0.9715, "step": 17762 }, { - "epoch": 0.5033579869081016, + "epoch": 0.6950074340715235, "grad_norm": 0.0, - "learning_rate": 1.0377121939524009e-05, - "loss": 0.9706, + "learning_rate": 4.4947776563374986e-06, + "loss": 0.9961, "step": 17763 }, { - "epoch": 0.5033863243503641, + "epoch": 0.695046560763753, "grad_norm": 0.0, - "learning_rate": 1.0376204801548677e-05, - "loss": 0.8903, + "learning_rate": 4.493719790030284e-06, + "loss": 0.9927, "step": 17764 }, { - "epoch": 0.5034146617926266, + "epoch": 0.6950856874559824, "grad_norm": 0.0, - "learning_rate": 1.0375287660404436e-05, - "loss": 0.9099, + "learning_rate": 4.492662012147033e-06, + "loss": 0.9359, "step": 17765 }, { - "epoch": 0.5034429992348891, + "epoch": 0.6951248141482119, "grad_norm": 0.0, - "learning_rate": 1.0374370516099004e-05, - "loss": 0.977, + "learning_rate": 4.491604322704726e-06, + "loss": 0.9691, "step": 17766 }, { - "epoch": 0.5034713366771515, + "epoch": 0.6951639408404413, "grad_norm": 0.0, - "learning_rate": 1.0373453368640112e-05, - "loss": 0.9143, + "learning_rate": 4.49054672172035e-06, + "loss": 0.9188, "step": 17767 }, { - "epoch": 0.503499674119414, + "epoch": 0.6952030675326708, "grad_norm": 0.0, - "learning_rate": 1.0372536218035482e-05, - "loss": 0.8715, + "learning_rate": 4.489489209210891e-06, + "loss": 1.0026, "step": 17768 }, { - "epoch": 0.5035280115616765, + "epoch": 0.6952421942249002, "grad_norm": 0.0, - "learning_rate": 1.0371619064292844e-05, - "loss": 0.7415, + "learning_rate": 4.488431785193333e-06, + "loss": 1.0206, "step": 17769 }, { - "epoch": 0.5035563490039389, + "epoch": 0.6952813209171297, "grad_norm": 0.0, - "learning_rate": 1.0370701907419918e-05, - "loss": 0.8991, + "learning_rate": 4.48737444968465e-06, + "loss": 1.007, "step": 17770 }, { - "epoch": 0.5035846864462014, + "epoch": 0.6953204476093591, "grad_norm": 0.0, - "learning_rate": 1.0369784747424434e-05, - "loss": 0.8906, + "learning_rate": 4.486317202701828e-06, + "loss": 1.0169, "step": 17771 }, { - "epoch": 0.5036130238884639, + "epoch": 0.6953595743015886, "grad_norm": 0.0, - "learning_rate": 1.0368867584314115e-05, - "loss": 0.8243, + "learning_rate": 4.485260044261841e-06, + "loss": 1.0123, "step": 17772 }, { - "epoch": 0.5036413613307262, + "epoch": 0.695398700993818, "grad_norm": 0.0, - "learning_rate": 1.0367950418096689e-05, - "loss": 0.8502, + "learning_rate": 4.484202974381671e-06, + "loss": 0.9824, "step": 17773 }, { - "epoch": 0.5036696987729887, + "epoch": 0.6954378276860474, "grad_norm": 0.0, - "learning_rate": 1.0367033248779876e-05, - "loss": 0.9366, + "learning_rate": 4.483145993078286e-06, + "loss": 1.0795, "step": 17774 }, { - "epoch": 0.5036980362152512, + "epoch": 0.6954769543782768, "grad_norm": 0.0, - "learning_rate": 1.0366116076371407e-05, - "loss": 0.9466, + "learning_rate": 4.482089100368667e-06, + "loss": 0.9649, "step": 17775 }, { - "epoch": 0.5037263736575137, + "epoch": 0.6955160810705063, "grad_norm": 0.0, - "learning_rate": 1.0365198900879008e-05, - "loss": 0.8259, + "learning_rate": 4.481032296269775e-06, + "loss": 1.0375, "step": 17776 }, { - "epoch": 0.5037547110997761, + "epoch": 0.6955552077627357, "grad_norm": 0.0, - "learning_rate": 1.0364281722310403e-05, - "loss": 0.8657, + "learning_rate": 4.4799755807985965e-06, + "loss": 1.0986, "step": 17777 }, { - "epoch": 0.5037830485420386, + "epoch": 0.6955943344549652, "grad_norm": 0.0, - "learning_rate": 1.0363364540673315e-05, - "loss": 1.0197, + "learning_rate": 4.47891895397209e-06, + "loss": 0.9067, "step": 17778 }, { - "epoch": 0.5038113859843011, + "epoch": 0.6956334611471946, "grad_norm": 0.0, - "learning_rate": 1.0362447355975475e-05, - "loss": 0.8303, + "learning_rate": 4.4778624158072305e-06, + "loss": 0.921, "step": 17779 }, { - "epoch": 0.5038397234265635, + "epoch": 0.6956725878394241, "grad_norm": 0.0, - "learning_rate": 1.0361530168224605e-05, - "loss": 0.9433, + "learning_rate": 4.476805966320973e-06, + "loss": 0.9038, "step": 17780 }, { - "epoch": 0.503868060868826, + "epoch": 0.6957117145316535, "grad_norm": 0.0, - "learning_rate": 1.0360612977428435e-05, - "loss": 0.9787, + "learning_rate": 4.475749605530297e-06, + "loss": 0.9462, "step": 17781 }, { - "epoch": 0.5038963983110885, + "epoch": 0.695750841223883, "grad_norm": 0.0, - "learning_rate": 1.0359695783594687e-05, - "loss": 0.9453, + "learning_rate": 4.474693333452158e-06, + "loss": 0.9303, "step": 17782 }, { - "epoch": 0.503924735753351, + "epoch": 0.6957899679161124, "grad_norm": 0.0, - "learning_rate": 1.0358778586731084e-05, - "loss": 0.7892, + "learning_rate": 4.473637150103524e-06, + "loss": 0.9416, "step": 17783 }, { - "epoch": 0.5039530731956133, + "epoch": 0.6958290946083419, "grad_norm": 0.0, - "learning_rate": 1.035786138684536e-05, - "loss": 0.903, + "learning_rate": 4.472581055501348e-06, + "loss": 0.9781, "step": 17784 }, { - "epoch": 0.5039814106378758, + "epoch": 0.6958682213005712, "grad_norm": 0.0, - "learning_rate": 1.0356944183945237e-05, - "loss": 0.8894, + "learning_rate": 4.471525049662595e-06, + "loss": 1.0491, "step": 17785 }, { - "epoch": 0.5040097480801383, + "epoch": 0.6959073479928007, "grad_norm": 0.0, - "learning_rate": 1.0356026978038437e-05, - "loss": 0.9012, + "learning_rate": 4.470469132604222e-06, + "loss": 1.0387, "step": 17786 }, { - "epoch": 0.5040380855224007, + "epoch": 0.6959464746850301, "grad_norm": 0.0, - "learning_rate": 1.0355109769132689e-05, - "loss": 0.8456, + "learning_rate": 4.469413304343189e-06, + "loss": 0.9341, "step": 17787 }, { - "epoch": 0.5040664229646632, + "epoch": 0.6959856013772596, "grad_norm": 0.0, - "learning_rate": 1.0354192557235725e-05, - "loss": 0.8547, + "learning_rate": 4.468357564896445e-06, + "loss": 1.0791, "step": 17788 }, { - "epoch": 0.5040947604069257, + "epoch": 0.696024728069489, "grad_norm": 0.0, - "learning_rate": 1.0353275342355262e-05, - "loss": 0.8202, + "learning_rate": 4.467301914280946e-06, + "loss": 0.9518, "step": 17789 }, { - "epoch": 0.5041230978491882, + "epoch": 0.6960638547617184, "grad_norm": 0.0, - "learning_rate": 1.0352358124499031e-05, - "loss": 0.9204, + "learning_rate": 4.466246352513646e-06, + "loss": 1.0596, "step": 17790 }, { - "epoch": 0.5041514352914506, + "epoch": 0.6961029814539479, "grad_norm": 0.0, - "learning_rate": 1.0351440903674757e-05, - "loss": 0.9117, + "learning_rate": 4.4651908796115e-06, + "loss": 0.9791, "step": 17791 }, { - "epoch": 0.5041797727337131, + "epoch": 0.6961421081461773, "grad_norm": 0.0, - "learning_rate": 1.0350523679890163e-05, - "loss": 0.858, + "learning_rate": 4.464135495591447e-06, + "loss": 1.018, "step": 17792 }, { - "epoch": 0.5042081101759756, + "epoch": 0.6961812348384068, "grad_norm": 0.0, - "learning_rate": 1.0349606453152979e-05, - "loss": 0.841, + "learning_rate": 4.463080200470446e-06, + "loss": 1.0024, "step": 17793 }, { - "epoch": 0.5042364476182379, + "epoch": 0.6962203615306362, "grad_norm": 0.0, - "learning_rate": 1.0348689223470932e-05, - "loss": 0.9999, + "learning_rate": 4.462024994265436e-06, + "loss": 0.864, "step": 17794 }, { - "epoch": 0.5042647850605004, + "epoch": 0.6962594882228657, "grad_norm": 0.0, - "learning_rate": 1.0347771990851742e-05, - "loss": 0.8665, + "learning_rate": 4.460969876993364e-06, + "loss": 0.9681, "step": 17795 }, { - "epoch": 0.5042931225027629, + "epoch": 0.696298614915095, "grad_norm": 0.0, - "learning_rate": 1.0346854755303143e-05, - "loss": 0.81, + "learning_rate": 4.459914848671175e-06, + "loss": 0.9318, "step": 17796 }, { - "epoch": 0.5043214599450253, + "epoch": 0.6963377416073245, "grad_norm": 0.0, - "learning_rate": 1.0345937516832858e-05, - "loss": 0.9191, + "learning_rate": 4.458859909315816e-06, + "loss": 0.8452, "step": 17797 }, { - "epoch": 0.5043497973872878, + "epoch": 0.6963768682995539, "grad_norm": 0.0, - "learning_rate": 1.0345020275448612e-05, - "loss": 0.883, + "learning_rate": 4.457805058944219e-06, + "loss": 0.9474, "step": 17798 }, { - "epoch": 0.5043781348295503, + "epoch": 0.6964159949917834, "grad_norm": 0.0, - "learning_rate": 1.034410303115813e-05, - "loss": 0.8449, + "learning_rate": 4.456750297573329e-06, + "loss": 0.9735, "step": 17799 }, { - "epoch": 0.5044064722718128, + "epoch": 0.6964551216840128, "grad_norm": 0.0, - "learning_rate": 1.034318578396914e-05, - "loss": 0.8154, + "learning_rate": 4.4556956252200855e-06, + "loss": 0.9586, "step": 17800 }, { - "epoch": 0.5044348097140752, + "epoch": 0.6964942483762423, "grad_norm": 0.0, - "learning_rate": 1.0342268533889373e-05, - "loss": 0.7963, + "learning_rate": 4.45464104190142e-06, + "loss": 1.0375, "step": 17801 }, { - "epoch": 0.5044631471563377, + "epoch": 0.6965333750684717, "grad_norm": 0.0, - "learning_rate": 1.034135128092655e-05, - "loss": 0.9598, + "learning_rate": 4.45358654763427e-06, + "loss": 0.9544, "step": 17802 }, { - "epoch": 0.5044914845986002, + "epoch": 0.6965725017607012, "grad_norm": 0.0, - "learning_rate": 1.0340434025088396e-05, - "loss": 0.9213, + "learning_rate": 4.452532142435571e-06, + "loss": 1.0681, "step": 17803 }, { - "epoch": 0.5045198220408625, + "epoch": 0.6966116284529306, "grad_norm": 0.0, - "learning_rate": 1.0339516766382643e-05, - "loss": 0.8914, + "learning_rate": 4.4514778263222565e-06, + "loss": 1.1183, "step": 17804 }, { - "epoch": 0.504548159483125, + "epoch": 0.6966507551451601, "grad_norm": 0.0, - "learning_rate": 1.033859950481701e-05, - "loss": 0.8809, + "learning_rate": 4.450423599311254e-06, + "loss": 0.9187, "step": 17805 }, { - "epoch": 0.5045764969253875, + "epoch": 0.6966898818373894, "grad_norm": 0.0, - "learning_rate": 1.033768224039923e-05, - "loss": 0.8162, + "learning_rate": 4.449369461419496e-06, + "loss": 1.1138, "step": 17806 }, { - "epoch": 0.50460483436765, + "epoch": 0.6967290085296189, "grad_norm": 0.0, - "learning_rate": 1.0336764973137026e-05, - "loss": 0.8537, + "learning_rate": 4.4483154126639026e-06, + "loss": 1.0461, "step": 17807 }, { - "epoch": 0.5046331718099124, + "epoch": 0.6967681352218483, "grad_norm": 0.0, - "learning_rate": 1.0335847703038126e-05, - "loss": 0.8112, + "learning_rate": 4.4472614530614146e-06, + "loss": 0.9915, "step": 17808 }, { - "epoch": 0.5046615092521749, + "epoch": 0.6968072619140778, "grad_norm": 0.0, - "learning_rate": 1.0334930430110258e-05, - "loss": 0.8972, + "learning_rate": 4.446207582628945e-06, + "loss": 1.0322, "step": 17809 }, { - "epoch": 0.5046898466944374, + "epoch": 0.6968463886063072, "grad_norm": 0.0, - "learning_rate": 1.0334013154361147e-05, - "loss": 0.7693, + "learning_rate": 4.445153801383427e-06, + "loss": 0.9747, "step": 17810 }, { - "epoch": 0.5047181841366998, + "epoch": 0.6968855152985367, "grad_norm": 0.0, - "learning_rate": 1.0333095875798517e-05, - "loss": 0.8933, + "learning_rate": 4.444100109341774e-06, + "loss": 1.0335, "step": 17811 }, { - "epoch": 0.5047465215789623, + "epoch": 0.6969246419907661, "grad_norm": 0.0, - "learning_rate": 1.0332178594430096e-05, - "loss": 0.8294, + "learning_rate": 4.443046506520913e-06, + "loss": 1.0203, "step": 17812 }, { - "epoch": 0.5047748590212248, + "epoch": 0.6969637686829956, "grad_norm": 0.0, - "learning_rate": 1.0331261310263612e-05, - "loss": 0.8811, + "learning_rate": 4.44199299293776e-06, + "loss": 0.8036, "step": 17813 }, { - "epoch": 0.5048031964634873, + "epoch": 0.697002895375225, "grad_norm": 0.0, - "learning_rate": 1.0330344023306791e-05, - "loss": 0.8836, + "learning_rate": 4.440939568609239e-06, + "loss": 1.072, "step": 17814 }, { - "epoch": 0.5048315339057496, + "epoch": 0.6970420220674545, "grad_norm": 0.0, - "learning_rate": 1.032942673356736e-05, - "loss": 0.9303, + "learning_rate": 4.4398862335522595e-06, + "loss": 0.9491, "step": 17815 }, { - "epoch": 0.5048598713480121, + "epoch": 0.6970811487596839, "grad_norm": 0.0, - "learning_rate": 1.0328509441053045e-05, - "loss": 0.8615, + "learning_rate": 4.43883298778374e-06, + "loss": 0.9436, "step": 17816 }, { - "epoch": 0.5048882087902746, + "epoch": 0.6971202754519132, "grad_norm": 0.0, - "learning_rate": 1.0327592145771574e-05, - "loss": 0.8198, + "learning_rate": 4.437779831320595e-06, + "loss": 1.0251, "step": 17817 }, { - "epoch": 0.504916546232537, + "epoch": 0.6971594021441427, "grad_norm": 0.0, - "learning_rate": 1.0326674847730673e-05, - "loss": 1.0238, + "learning_rate": 4.436726764179737e-06, + "loss": 1.0067, "step": 17818 }, { - "epoch": 0.5049448836747995, + "epoch": 0.6971985288363721, "grad_norm": 0.0, - "learning_rate": 1.0325757546938067e-05, - "loss": 0.9965, + "learning_rate": 4.435673786378074e-06, + "loss": 0.9438, "step": 17819 }, { - "epoch": 0.504973221117062, + "epoch": 0.6972376555286016, "grad_norm": 0.0, - "learning_rate": 1.0324840243401481e-05, - "loss": 0.8842, + "learning_rate": 4.434620897932521e-06, + "loss": 0.8228, "step": 17820 }, { - "epoch": 0.5050015585593244, + "epoch": 0.697276782220831, "grad_norm": 0.0, - "learning_rate": 1.032392293712865e-05, - "loss": 0.7868, + "learning_rate": 4.433568098859976e-06, + "loss": 0.9586, "step": 17821 }, { - "epoch": 0.5050298960015869, + "epoch": 0.6973159089130605, "grad_norm": 0.0, - "learning_rate": 1.0323005628127297e-05, - "loss": 0.9366, + "learning_rate": 4.432515389177359e-06, + "loss": 0.955, "step": 17822 }, { - "epoch": 0.5050582334438494, + "epoch": 0.6973550356052899, "grad_norm": 0.0, - "learning_rate": 1.0322088316405145e-05, - "loss": 0.9129, + "learning_rate": 4.431462768901564e-06, + "loss": 0.9594, "step": 17823 }, { - "epoch": 0.5050865708861119, + "epoch": 0.6973941622975194, "grad_norm": 0.0, - "learning_rate": 1.0321171001969924e-05, - "loss": 0.8851, + "learning_rate": 4.430410238049504e-06, + "loss": 0.9919, "step": 17824 }, { - "epoch": 0.5051149083283742, + "epoch": 0.6974332889897488, "grad_norm": 0.0, - "learning_rate": 1.032025368482936e-05, - "loss": 0.8791, + "learning_rate": 4.429357796638068e-06, + "loss": 1.0968, "step": 17825 }, { - "epoch": 0.5051432457706367, + "epoch": 0.6974724156819783, "grad_norm": 0.0, - "learning_rate": 1.0319336364991179e-05, - "loss": 0.7894, + "learning_rate": 4.428305444684173e-06, + "loss": 0.8869, "step": 17826 }, { - "epoch": 0.5051715832128992, + "epoch": 0.6975115423742076, "grad_norm": 0.0, - "learning_rate": 1.031841904246311e-05, - "loss": 0.8422, + "learning_rate": 4.427253182204708e-06, + "loss": 1.0568, "step": 17827 }, { - "epoch": 0.5051999206551616, + "epoch": 0.6975506690664371, "grad_norm": 0.0, - "learning_rate": 1.0317501717252878e-05, - "loss": 0.9395, + "learning_rate": 4.426201009216576e-06, + "loss": 1.0655, "step": 17828 }, { - "epoch": 0.5052282580974241, + "epoch": 0.6975897957586665, "grad_norm": 0.0, - "learning_rate": 1.0316584389368213e-05, - "loss": 0.9423, + "learning_rate": 4.425148925736665e-06, + "loss": 0.8702, "step": 17829 }, { - "epoch": 0.5052565955396866, + "epoch": 0.697628922450896, "grad_norm": 0.0, - "learning_rate": 1.0315667058816843e-05, - "loss": 0.9623, + "learning_rate": 4.424096931781885e-06, + "loss": 0.9469, "step": 17830 }, { - "epoch": 0.5052849329819491, + "epoch": 0.6976680491431254, "grad_norm": 0.0, - "learning_rate": 1.031474972560649e-05, - "loss": 0.8587, + "learning_rate": 4.4230450273691164e-06, + "loss": 0.9321, "step": 17831 }, { - "epoch": 0.5053132704242115, + "epoch": 0.6977071758353549, "grad_norm": 0.0, - "learning_rate": 1.031383238974488e-05, - "loss": 0.921, + "learning_rate": 4.42199321251526e-06, + "loss": 0.8261, "step": 17832 }, { - "epoch": 0.505341607866474, + "epoch": 0.6977463025275843, "grad_norm": 0.0, - "learning_rate": 1.0312915051239746e-05, - "loss": 0.7395, + "learning_rate": 4.4209414872371994e-06, + "loss": 1.0892, "step": 17833 }, { - "epoch": 0.5053699453087365, + "epoch": 0.6977854292198138, "grad_norm": 0.0, - "learning_rate": 1.0311997710098812e-05, - "loss": 0.935, + "learning_rate": 4.419889851551827e-06, + "loss": 1.1695, "step": 17834 }, { - "epoch": 0.5053982827509989, + "epoch": 0.6978245559120432, "grad_norm": 0.0, - "learning_rate": 1.0311080366329804e-05, - "loss": 0.8522, + "learning_rate": 4.418838305476033e-06, + "loss": 0.9302, "step": 17835 }, { - "epoch": 0.5054266201932613, + "epoch": 0.6978636826042727, "grad_norm": 0.0, - "learning_rate": 1.0310163019940454e-05, - "loss": 0.9518, + "learning_rate": 4.417786849026704e-06, + "loss": 1.0555, "step": 17836 }, { - "epoch": 0.5054549576355238, + "epoch": 0.6979028092965021, "grad_norm": 0.0, - "learning_rate": 1.0309245670938481e-05, - "loss": 0.8311, + "learning_rate": 4.416735482220721e-06, + "loss": 0.978, "step": 17837 }, { - "epoch": 0.5054832950777863, + "epoch": 0.6979419359887316, "grad_norm": 0.0, - "learning_rate": 1.0308328319331622e-05, - "loss": 0.851, + "learning_rate": 4.415684205074969e-06, + "loss": 0.9658, "step": 17838 }, { - "epoch": 0.5055116325200487, + "epoch": 0.6979810626809609, "grad_norm": 0.0, - "learning_rate": 1.0307410965127595e-05, - "loss": 0.8972, + "learning_rate": 4.414633017606336e-06, + "loss": 0.9323, "step": 17839 }, { - "epoch": 0.5055399699623112, + "epoch": 0.6980201893731904, "grad_norm": 0.0, - "learning_rate": 1.0306493608334134e-05, - "loss": 0.8472, + "learning_rate": 4.413581919831693e-06, + "loss": 1.0424, "step": 17840 }, { - "epoch": 0.5055683074045737, + "epoch": 0.6980593160654198, "grad_norm": 0.0, - "learning_rate": 1.030557624895896e-05, - "loss": 1.002, + "learning_rate": 4.412530911767925e-06, + "loss": 1.0422, "step": 17841 }, { - "epoch": 0.5055966448468361, + "epoch": 0.6980984427576493, "grad_norm": 0.0, - "learning_rate": 1.0304658887009806e-05, - "loss": 0.9758, + "learning_rate": 4.411479993431912e-06, + "loss": 1.0198, "step": 17842 }, { - "epoch": 0.5056249822890986, + "epoch": 0.6981375694498787, "grad_norm": 0.0, - "learning_rate": 1.03037415224944e-05, - "loss": 0.9703, + "learning_rate": 4.410429164840524e-06, + "loss": 1.039, "step": 17843 }, { - "epoch": 0.5056533197313611, + "epoch": 0.6981766961421082, "grad_norm": 0.0, - "learning_rate": 1.0302824155420464e-05, - "loss": 0.9148, + "learning_rate": 4.4093784260106395e-06, + "loss": 0.9913, "step": 17844 }, { - "epoch": 0.5056816571736235, + "epoch": 0.6982158228343376, "grad_norm": 0.0, - "learning_rate": 1.0301906785795726e-05, - "loss": 0.9307, + "learning_rate": 4.408327776959136e-06, + "loss": 1.0504, "step": 17845 }, { - "epoch": 0.505709994615886, + "epoch": 0.698254949526567, "grad_norm": 0.0, - "learning_rate": 1.0300989413627913e-05, - "loss": 0.8223, + "learning_rate": 4.407277217702878e-06, + "loss": 0.9932, "step": 17846 }, { - "epoch": 0.5057383320581484, + "epoch": 0.6982940762187965, "grad_norm": 0.0, - "learning_rate": 1.030007203892476e-05, - "loss": 0.8716, + "learning_rate": 4.406226748258739e-06, + "loss": 1.0947, "step": 17847 }, { - "epoch": 0.5057666695004109, + "epoch": 0.6983332029110259, "grad_norm": 0.0, - "learning_rate": 1.0299154661693987e-05, - "loss": 0.8661, + "learning_rate": 4.40517636864359e-06, + "loss": 0.9802, "step": 17848 }, { - "epoch": 0.5057950069426733, + "epoch": 0.6983723296032553, "grad_norm": 0.0, - "learning_rate": 1.0298237281943321e-05, - "loss": 0.9044, + "learning_rate": 4.404126078874301e-06, + "loss": 1.0093, "step": 17849 }, { - "epoch": 0.5058233443849358, + "epoch": 0.6984114562954847, "grad_norm": 0.0, - "learning_rate": 1.0297319899680493e-05, - "loss": 0.9401, + "learning_rate": 4.403075878967732e-06, + "loss": 0.9715, "step": 17850 }, { - "epoch": 0.5058516818271983, + "epoch": 0.6984505829877142, "grad_norm": 0.0, - "learning_rate": 1.029640251491323e-05, - "loss": 0.9014, + "learning_rate": 4.4020257689407544e-06, + "loss": 1.0069, "step": 17851 }, { - "epoch": 0.5058800192694607, + "epoch": 0.6984897096799436, "grad_norm": 0.0, - "learning_rate": 1.0295485127649258e-05, - "loss": 0.8339, + "learning_rate": 4.400975748810221e-06, + "loss": 0.9601, "step": 17852 }, { - "epoch": 0.5059083567117232, + "epoch": 0.6985288363721731, "grad_norm": 0.0, - "learning_rate": 1.0294567737896304e-05, - "loss": 0.8295, + "learning_rate": 4.399925818593008e-06, + "loss": 0.9135, "step": 17853 }, { - "epoch": 0.5059366941539857, + "epoch": 0.6985679630644025, "grad_norm": 0.0, - "learning_rate": 1.0293650345662099e-05, - "loss": 0.8585, + "learning_rate": 4.398875978305966e-06, + "loss": 1.0018, "step": 17854 }, { - "epoch": 0.5059650315962482, + "epoch": 0.698607089756632, "grad_norm": 0.0, - "learning_rate": 1.0292732950954366e-05, - "loss": 0.8625, + "learning_rate": 4.3978262279659615e-06, + "loss": 1.0178, "step": 17855 }, { - "epoch": 0.5059933690385106, + "epoch": 0.6986462164488614, "grad_norm": 0.0, - "learning_rate": 1.0291815553780835e-05, - "loss": 0.7936, + "learning_rate": 4.396776567589839e-06, + "loss": 0.9644, "step": 17856 }, { - "epoch": 0.506021706480773, + "epoch": 0.6986853431410909, "grad_norm": 0.0, - "learning_rate": 1.0290898154149234e-05, - "loss": 1.0003, + "learning_rate": 4.395726997194472e-06, + "loss": 0.9265, "step": 17857 }, { - "epoch": 0.5060500439230355, + "epoch": 0.6987244698333203, "grad_norm": 0.0, - "learning_rate": 1.028998075206729e-05, - "loss": 0.8441, + "learning_rate": 4.394677516796703e-06, + "loss": 0.9846, "step": 17858 }, { - "epoch": 0.5060783813652979, + "epoch": 0.6987635965255498, "grad_norm": 0.0, - "learning_rate": 1.0289063347542727e-05, - "loss": 0.8832, + "learning_rate": 4.393628126413391e-06, + "loss": 1.0301, "step": 17859 }, { - "epoch": 0.5061067188075604, + "epoch": 0.6988027232177791, "grad_norm": 0.0, - "learning_rate": 1.0288145940583281e-05, - "loss": 0.9265, + "learning_rate": 4.3925788260613855e-06, + "loss": 1.0864, "step": 17860 }, { - "epoch": 0.5061350562498229, + "epoch": 0.6988418499100086, "grad_norm": 0.0, - "learning_rate": 1.028722853119667e-05, - "loss": 0.8903, + "learning_rate": 4.391529615757536e-06, + "loss": 1.0005, "step": 17861 }, { - "epoch": 0.5061633936920854, + "epoch": 0.698880976602238, "grad_norm": 0.0, - "learning_rate": 1.028631111939063e-05, - "loss": 0.8542, + "learning_rate": 4.390480495518693e-06, + "loss": 0.9784, "step": 17862 }, { - "epoch": 0.5061917311343478, + "epoch": 0.6989201032944675, "grad_norm": 0.0, - "learning_rate": 1.0285393705172886e-05, - "loss": 0.7998, + "learning_rate": 4.389431465361708e-06, + "loss": 1.1376, "step": 17863 }, { - "epoch": 0.5062200685766103, + "epoch": 0.6989592299866969, "grad_norm": 0.0, - "learning_rate": 1.0284476288551164e-05, - "loss": 0.9292, + "learning_rate": 4.388382525303419e-06, + "loss": 1.0724, "step": 17864 }, { - "epoch": 0.5062484060188728, + "epoch": 0.6989983566789264, "grad_norm": 0.0, - "learning_rate": 1.028355886953319e-05, - "loss": 0.8856, + "learning_rate": 4.3873336753606766e-06, + "loss": 1.014, "step": 17865 }, { - "epoch": 0.5062767434611352, + "epoch": 0.6990374833711558, "grad_norm": 0.0, - "learning_rate": 1.0282641448126693e-05, - "loss": 0.9038, + "learning_rate": 4.386284915550321e-06, + "loss": 1.0294, "step": 17866 }, { - "epoch": 0.5063050809033977, + "epoch": 0.6990766100633853, "grad_norm": 0.0, - "learning_rate": 1.0281724024339406e-05, - "loss": 0.8581, + "learning_rate": 4.385236245889198e-06, + "loss": 0.9302, "step": 17867 }, { - "epoch": 0.5063334183456601, + "epoch": 0.6991157367556147, "grad_norm": 0.0, - "learning_rate": 1.0280806598179055e-05, - "loss": 0.9698, + "learning_rate": 4.384187666394143e-06, + "loss": 1.0784, "step": 17868 }, { - "epoch": 0.5063617557879225, + "epoch": 0.6991548634478442, "grad_norm": 0.0, - "learning_rate": 1.027988916965336e-05, - "loss": 0.8634, + "learning_rate": 4.383139177082001e-06, + "loss": 1.0223, "step": 17869 }, { - "epoch": 0.506390093230185, + "epoch": 0.6991939901400736, "grad_norm": 0.0, - "learning_rate": 1.0278971738770058e-05, - "loss": 0.8256, + "learning_rate": 4.382090777969598e-06, + "loss": 0.9648, "step": 17870 }, { - "epoch": 0.5064184306724475, + "epoch": 0.699233116832303, "grad_norm": 0.0, - "learning_rate": 1.0278054305536873e-05, - "loss": 0.943, + "learning_rate": 4.381042469073784e-06, + "loss": 0.9265, "step": 17871 }, { - "epoch": 0.50644676811471, + "epoch": 0.6992722435245324, "grad_norm": 0.0, - "learning_rate": 1.0277136869961533e-05, - "loss": 0.9189, + "learning_rate": 4.379994250411383e-06, + "loss": 0.9861, "step": 17872 }, { - "epoch": 0.5064751055569724, + "epoch": 0.6993113702167619, "grad_norm": 0.0, - "learning_rate": 1.0276219432051766e-05, - "loss": 0.9432, + "learning_rate": 4.378946121999237e-06, + "loss": 1.0038, "step": 17873 }, { - "epoch": 0.5065034429992349, + "epoch": 0.6993504969089913, "grad_norm": 0.0, - "learning_rate": 1.0275301991815299e-05, - "loss": 0.8633, + "learning_rate": 4.377898083854165e-06, + "loss": 0.9367, "step": 17874 }, { - "epoch": 0.5065317804414974, + "epoch": 0.6993896236012207, "grad_norm": 0.0, - "learning_rate": 1.0274384549259864e-05, - "loss": 0.8912, + "learning_rate": 4.376850135993012e-06, + "loss": 1.0059, "step": 17875 }, { - "epoch": 0.5065601178837598, + "epoch": 0.6994287502934502, "grad_norm": 0.0, - "learning_rate": 1.0273467104393184e-05, - "loss": 1.0094, + "learning_rate": 4.375802278432596e-06, + "loss": 0.9842, "step": 17876 }, { - "epoch": 0.5065884553260223, + "epoch": 0.6994678769856796, "grad_norm": 0.0, - "learning_rate": 1.0272549657222992e-05, - "loss": 0.8863, + "learning_rate": 4.374754511189751e-06, + "loss": 1.0171, "step": 17877 }, { - "epoch": 0.5066167927682848, + "epoch": 0.6995070036779091, "grad_norm": 0.0, - "learning_rate": 1.0271632207757008e-05, - "loss": 0.8187, + "learning_rate": 4.373706834281297e-06, + "loss": 0.9319, "step": 17878 }, { - "epoch": 0.5066451302105472, + "epoch": 0.6995461303701385, "grad_norm": 0.0, - "learning_rate": 1.0270714756002967e-05, - "loss": 0.9251, + "learning_rate": 4.3726592477240604e-06, + "loss": 1.0004, "step": 17879 }, { - "epoch": 0.5066734676528096, + "epoch": 0.699585257062368, "grad_norm": 0.0, - "learning_rate": 1.0269797301968595e-05, - "loss": 1.0038, + "learning_rate": 4.3716117515348655e-06, + "loss": 1.0546, "step": 17880 }, { - "epoch": 0.5067018050950721, + "epoch": 0.6996243837545973, "grad_norm": 0.0, - "learning_rate": 1.0268879845661623e-05, - "loss": 0.9575, + "learning_rate": 4.370564345730537e-06, + "loss": 1.0114, "step": 17881 }, { - "epoch": 0.5067301425373346, + "epoch": 0.6996635104468268, "grad_norm": 0.0, - "learning_rate": 1.0267962387089771e-05, - "loss": 0.8879, + "learning_rate": 4.369517030327887e-06, + "loss": 0.9634, "step": 17882 }, { - "epoch": 0.506758479979597, + "epoch": 0.6997026371390562, "grad_norm": 0.0, - "learning_rate": 1.026704492626078e-05, - "loss": 0.8548, + "learning_rate": 4.368469805343737e-06, + "loss": 0.8962, "step": 17883 }, { - "epoch": 0.5067868174218595, + "epoch": 0.6997417638312857, "grad_norm": 0.0, - "learning_rate": 1.0266127463182365e-05, - "loss": 0.8962, + "learning_rate": 4.367422670794909e-06, + "loss": 0.8832, "step": 17884 }, { - "epoch": 0.506815154864122, + "epoch": 0.6997808905235151, "grad_norm": 0.0, - "learning_rate": 1.0265209997862259e-05, - "loss": 0.858, + "learning_rate": 4.366375626698212e-06, + "loss": 0.8955, "step": 17885 }, { - "epoch": 0.5068434923063845, + "epoch": 0.6998200172157446, "grad_norm": 0.0, - "learning_rate": 1.0264292530308191e-05, - "loss": 0.9949, + "learning_rate": 4.3653286730704635e-06, + "loss": 0.9553, "step": 17886 }, { - "epoch": 0.5068718297486469, + "epoch": 0.699859143907974, "grad_norm": 0.0, - "learning_rate": 1.026337506052789e-05, - "loss": 0.9882, + "learning_rate": 4.3642818099284754e-06, + "loss": 0.8927, "step": 17887 }, { - "epoch": 0.5069001671909094, + "epoch": 0.6998982706002035, "grad_norm": 0.0, - "learning_rate": 1.0262457588529084e-05, - "loss": 0.8486, + "learning_rate": 4.363235037289064e-06, + "loss": 0.8826, "step": 17888 }, { - "epoch": 0.5069285046331719, + "epoch": 0.6999373972924329, "grad_norm": 0.0, - "learning_rate": 1.02615401143195e-05, - "loss": 0.8839, + "learning_rate": 4.36218835516903e-06, + "loss": 1.0127, "step": 17889 }, { - "epoch": 0.5069568420754342, + "epoch": 0.6999765239846624, "grad_norm": 0.0, - "learning_rate": 1.0260622637906865e-05, - "loss": 0.9158, + "learning_rate": 4.36114176358519e-06, + "loss": 1.0042, "step": 17890 }, { - "epoch": 0.5069851795176967, + "epoch": 0.7000156506768918, "grad_norm": 0.0, - "learning_rate": 1.025970515929891e-05, - "loss": 0.8481, + "learning_rate": 4.360095262554345e-06, + "loss": 0.947, "step": 17891 }, { - "epoch": 0.5070135169599592, + "epoch": 0.7000547773691213, "grad_norm": 0.0, - "learning_rate": 1.0258787678503359e-05, - "loss": 0.8278, + "learning_rate": 4.359048852093301e-06, + "loss": 0.9903, "step": 17892 }, { - "epoch": 0.5070418544022216, + "epoch": 0.7000939040613506, "grad_norm": 0.0, - "learning_rate": 1.0257870195527947e-05, - "loss": 0.9225, + "learning_rate": 4.358002532218865e-06, + "loss": 0.8652, "step": 17893 }, { - "epoch": 0.5070701918444841, + "epoch": 0.7001330307535801, "grad_norm": 0.0, - "learning_rate": 1.0256952710380399e-05, - "loss": 0.9419, + "learning_rate": 4.356956302947843e-06, + "loss": 0.948, "step": 17894 }, { - "epoch": 0.5070985292867466, + "epoch": 0.7001721574458095, "grad_norm": 0.0, - "learning_rate": 1.025603522306844e-05, - "loss": 0.8582, + "learning_rate": 4.355910164297026e-06, + "loss": 0.9238, "step": 17895 }, { - "epoch": 0.5071268667290091, + "epoch": 0.700211284138039, "grad_norm": 0.0, - "learning_rate": 1.0255117733599804e-05, - "loss": 0.8888, + "learning_rate": 4.354864116283221e-06, + "loss": 0.8741, "step": 17896 }, { - "epoch": 0.5071552041712715, + "epoch": 0.7002504108302684, "grad_norm": 0.0, - "learning_rate": 1.0254200241982213e-05, - "loss": 0.8175, + "learning_rate": 4.353818158923223e-06, + "loss": 0.9977, "step": 17897 }, { - "epoch": 0.507183541613534, + "epoch": 0.7002895375224979, "grad_norm": 0.0, - "learning_rate": 1.0253282748223403e-05, - "loss": 0.9607, + "learning_rate": 4.352772292233835e-06, + "loss": 1.0563, "step": 17898 }, { - "epoch": 0.5072118790557965, + "epoch": 0.7003286642147273, "grad_norm": 0.0, - "learning_rate": 1.0252365252331094e-05, - "loss": 0.9596, + "learning_rate": 4.351726516231843e-06, + "loss": 1.0786, "step": 17899 }, { - "epoch": 0.5072402164980588, + "epoch": 0.7003677909069568, "grad_norm": 0.0, - "learning_rate": 1.0251447754313023e-05, - "loss": 0.869, + "learning_rate": 4.350680830934051e-06, + "loss": 0.9177, "step": 17900 }, { - "epoch": 0.5072685539403213, + "epoch": 0.7004069175991862, "grad_norm": 0.0, - "learning_rate": 1.0250530254176913e-05, - "loss": 0.7791, + "learning_rate": 4.349635236357237e-06, + "loss": 1.0614, "step": 17901 }, { - "epoch": 0.5072968913825838, + "epoch": 0.7004460442914155, "grad_norm": 0.0, - "learning_rate": 1.0249612751930494e-05, - "loss": 0.9197, + "learning_rate": 4.3485897325182094e-06, + "loss": 1.0735, "step": 17902 }, { - "epoch": 0.5073252288248463, + "epoch": 0.700485170983645, "grad_norm": 0.0, - "learning_rate": 1.024869524758149e-05, - "loss": 0.9207, + "learning_rate": 4.347544319433744e-06, + "loss": 1.0468, "step": 17903 }, { - "epoch": 0.5073535662671087, + "epoch": 0.7005242976758744, "grad_norm": 0.0, - "learning_rate": 1.0247777741137636e-05, - "loss": 0.899, + "learning_rate": 4.346498997120638e-06, + "loss": 1.0164, "step": 17904 }, { - "epoch": 0.5073819037093712, + "epoch": 0.7005634243681039, "grad_norm": 0.0, - "learning_rate": 1.0246860232606661e-05, - "loss": 0.9053, + "learning_rate": 4.345453765595667e-06, + "loss": 1.0321, "step": 17905 }, { - "epoch": 0.5074102411516337, + "epoch": 0.7006025510603333, "grad_norm": 0.0, - "learning_rate": 1.024594272199629e-05, - "loss": 0.8548, + "learning_rate": 4.34440862487563e-06, + "loss": 0.9418, "step": 17906 }, { - "epoch": 0.5074385785938961, + "epoch": 0.7006416777525628, "grad_norm": 0.0, - "learning_rate": 1.0245025209314248e-05, - "loss": 0.8448, + "learning_rate": 4.3433635749773e-06, + "loss": 0.9595, "step": 17907 }, { - "epoch": 0.5074669160361586, + "epoch": 0.7006808044447922, "grad_norm": 0.0, - "learning_rate": 1.024410769456827e-05, - "loss": 0.8701, + "learning_rate": 4.3423186159174665e-06, + "loss": 0.9859, "step": 17908 }, { - "epoch": 0.5074952534784211, + "epoch": 0.7007199311370217, "grad_norm": 0.0, - "learning_rate": 1.0243190177766084e-05, - "loss": 0.938, + "learning_rate": 4.341273747712903e-06, + "loss": 1.0625, "step": 17909 }, { - "epoch": 0.5075235909206836, + "epoch": 0.7007590578292511, "grad_norm": 0.0, - "learning_rate": 1.0242272658915414e-05, - "loss": 0.9667, + "learning_rate": 4.3402289703803926e-06, + "loss": 1.0732, "step": 17910 }, { - "epoch": 0.5075519283629459, + "epoch": 0.7007981845214806, "grad_norm": 0.0, - "learning_rate": 1.0241355138023992e-05, - "loss": 0.8535, + "learning_rate": 4.3391842839367114e-06, + "loss": 0.9876, "step": 17911 }, { - "epoch": 0.5075802658052084, + "epoch": 0.70083731121371, "grad_norm": 0.0, - "learning_rate": 1.0240437615099548e-05, - "loss": 0.8582, + "learning_rate": 4.3381396883986425e-06, + "loss": 1.0709, "step": 17912 }, { - "epoch": 0.5076086032474709, + "epoch": 0.7008764379059395, "grad_norm": 0.0, - "learning_rate": 1.0239520090149809e-05, - "loss": 0.8517, + "learning_rate": 4.337095183782951e-06, + "loss": 0.9297, "step": 17913 }, { - "epoch": 0.5076369406897333, + "epoch": 0.7009155645981688, "grad_norm": 0.0, - "learning_rate": 1.02386025631825e-05, - "loss": 0.8465, + "learning_rate": 4.336050770106415e-06, + "loss": 0.9092, "step": 17914 }, { - "epoch": 0.5076652781319958, + "epoch": 0.7009546912903983, "grad_norm": 0.0, - "learning_rate": 1.0237685034205353e-05, - "loss": 0.8617, + "learning_rate": 4.335006447385807e-06, + "loss": 0.9011, "step": 17915 }, { - "epoch": 0.5076936155742583, + "epoch": 0.7009938179826277, "grad_norm": 0.0, - "learning_rate": 1.0236767503226102e-05, - "loss": 0.7888, + "learning_rate": 4.333962215637899e-06, + "loss": 0.9959, "step": 17916 }, { - "epoch": 0.5077219530165207, + "epoch": 0.7010329446748572, "grad_norm": 0.0, - "learning_rate": 1.0235849970252465e-05, - "loss": 0.9597, + "learning_rate": 4.3329180748794554e-06, + "loss": 0.9249, "step": 17917 }, { - "epoch": 0.5077502904587832, + "epoch": 0.7010720713670866, "grad_norm": 0.0, - "learning_rate": 1.0234932435292178e-05, - "loss": 0.8772, + "learning_rate": 4.33187402512725e-06, + "loss": 0.9535, "step": 17918 }, { - "epoch": 0.5077786279010457, + "epoch": 0.7011111980593161, "grad_norm": 0.0, - "learning_rate": 1.0234014898352966e-05, - "loss": 0.8953, + "learning_rate": 4.330830066398037e-06, + "loss": 0.8351, "step": 17919 }, { - "epoch": 0.5078069653433082, + "epoch": 0.7011503247515455, "grad_norm": 0.0, - "learning_rate": 1.0233097359442563e-05, - "loss": 0.7822, + "learning_rate": 4.329786198708598e-06, + "loss": 0.8639, "step": 17920 }, { - "epoch": 0.5078353027855705, + "epoch": 0.701189451443775, "grad_norm": 0.0, - "learning_rate": 1.0232179818568692e-05, - "loss": 0.8555, + "learning_rate": 4.328742422075682e-06, + "loss": 0.95, "step": 17921 }, { - "epoch": 0.507863640227833, + "epoch": 0.7012285781360044, "grad_norm": 0.0, - "learning_rate": 1.0231262275739086e-05, - "loss": 0.793, + "learning_rate": 4.3276987365160605e-06, + "loss": 1.108, "step": 17922 }, { - "epoch": 0.5078919776700955, + "epoch": 0.7012677048282339, "grad_norm": 0.0, - "learning_rate": 1.0230344730961471e-05, - "loss": 0.9363, + "learning_rate": 4.3266551420464866e-06, + "loss": 0.8299, "step": 17923 }, { - "epoch": 0.5079203151123579, + "epoch": 0.7013068315204632, "grad_norm": 0.0, - "learning_rate": 1.0229427184243574e-05, - "loss": 0.9038, + "learning_rate": 4.325611638683721e-06, + "loss": 0.9787, "step": 17924 }, { - "epoch": 0.5079486525546204, + "epoch": 0.7013459582126927, "grad_norm": 0.0, - "learning_rate": 1.022850963559313e-05, - "loss": 0.8408, + "learning_rate": 4.3245682264445235e-06, + "loss": 0.9816, "step": 17925 }, { - "epoch": 0.5079769899968829, + "epoch": 0.7013850849049221, "grad_norm": 0.0, - "learning_rate": 1.0227592085017866e-05, - "loss": 0.8674, + "learning_rate": 4.323524905345651e-06, + "loss": 0.9967, "step": 17926 }, { - "epoch": 0.5080053274391454, + "epoch": 0.7014242115971516, "grad_norm": 0.0, - "learning_rate": 1.022667453252551e-05, - "loss": 0.9083, + "learning_rate": 4.322481675403852e-06, + "loss": 0.9134, "step": 17927 }, { - "epoch": 0.5080336648814078, + "epoch": 0.701463338289381, "grad_norm": 0.0, - "learning_rate": 1.0225756978123784e-05, - "loss": 0.8829, + "learning_rate": 4.321438536635884e-06, + "loss": 0.9845, "step": 17928 }, { - "epoch": 0.5080620023236703, + "epoch": 0.7015024649816105, "grad_norm": 0.0, - "learning_rate": 1.0224839421820426e-05, - "loss": 0.8706, + "learning_rate": 4.3203954890585e-06, + "loss": 0.9509, "step": 17929 }, { - "epoch": 0.5080903397659328, + "epoch": 0.7015415916738399, "grad_norm": 0.0, - "learning_rate": 1.0223921863623163e-05, - "loss": 0.8433, + "learning_rate": 4.319352532688444e-06, + "loss": 0.8827, "step": 17930 }, { - "epoch": 0.5081186772081951, + "epoch": 0.7015807183660693, "grad_norm": 0.0, - "learning_rate": 1.0223004303539723e-05, - "loss": 0.9635, + "learning_rate": 4.318309667542467e-06, + "loss": 1.1018, "step": 17931 }, { - "epoch": 0.5081470146504576, + "epoch": 0.7016198450582988, "grad_norm": 0.0, - "learning_rate": 1.0222086741577835e-05, - "loss": 0.9319, + "learning_rate": 4.31726689363732e-06, + "loss": 0.9536, "step": 17932 }, { - "epoch": 0.5081753520927201, + "epoch": 0.7016589717505282, "grad_norm": 0.0, - "learning_rate": 1.0221169177745227e-05, - "loss": 0.8025, + "learning_rate": 4.316224210989747e-06, + "loss": 1.1302, "step": 17933 }, { - "epoch": 0.5082036895349826, + "epoch": 0.7016980984427577, "grad_norm": 0.0, - "learning_rate": 1.022025161204963e-05, - "loss": 0.7961, + "learning_rate": 4.3151816196164885e-06, + "loss": 1.0023, "step": 17934 }, { - "epoch": 0.508232026977245, + "epoch": 0.701737225134987, "grad_norm": 0.0, - "learning_rate": 1.0219334044498773e-05, - "loss": 0.965, + "learning_rate": 4.314139119534289e-06, + "loss": 0.97, "step": 17935 }, { - "epoch": 0.5082603644195075, + "epoch": 0.7017763518272165, "grad_norm": 0.0, - "learning_rate": 1.0218416475100381e-05, - "loss": 0.8527, + "learning_rate": 4.313096710759894e-06, + "loss": 0.9526, "step": 17936 }, { - "epoch": 0.50828870186177, + "epoch": 0.7018154785194459, "grad_norm": 0.0, - "learning_rate": 1.0217498903862186e-05, - "loss": 0.9686, + "learning_rate": 4.312054393310037e-06, + "loss": 1.067, "step": 17937 }, { - "epoch": 0.5083170393040324, + "epoch": 0.7018546052116754, "grad_norm": 0.0, - "learning_rate": 1.0216581330791919e-05, - "loss": 0.9298, + "learning_rate": 4.31101216720146e-06, + "loss": 0.9659, "step": 17938 }, { - "epoch": 0.5083453767462949, + "epoch": 0.7018937319039048, "grad_norm": 0.0, - "learning_rate": 1.0215663755897306e-05, - "loss": 0.8533, + "learning_rate": 4.3099700324509e-06, + "loss": 0.9947, "step": 17939 }, { - "epoch": 0.5083737141885574, + "epoch": 0.7019328585961343, "grad_norm": 0.0, - "learning_rate": 1.0214746179186078e-05, - "loss": 0.927, + "learning_rate": 4.308927989075089e-06, + "loss": 0.9957, "step": 17940 }, { - "epoch": 0.5084020516308198, + "epoch": 0.7019719852883637, "grad_norm": 0.0, - "learning_rate": 1.0213828600665961e-05, - "loss": 0.9993, + "learning_rate": 4.307886037090763e-06, + "loss": 1.0792, "step": 17941 }, { - "epoch": 0.5084303890730822, + "epoch": 0.7020111119805932, "grad_norm": 0.0, - "learning_rate": 1.021291102034469e-05, - "loss": 0.9678, + "learning_rate": 4.306844176514654e-06, + "loss": 0.9854, "step": 17942 }, { - "epoch": 0.5084587265153447, + "epoch": 0.7020502386728226, "grad_norm": 0.0, - "learning_rate": 1.0211993438229985e-05, - "loss": 0.8888, + "learning_rate": 4.3058024073634986e-06, + "loss": 0.9922, "step": 17943 }, { - "epoch": 0.5084870639576072, + "epoch": 0.7020893653650521, "grad_norm": 0.0, - "learning_rate": 1.0211075854329583e-05, - "loss": 0.7555, + "learning_rate": 4.304760729654016e-06, + "loss": 0.9861, "step": 17944 }, { - "epoch": 0.5085154013998696, + "epoch": 0.7021284920572815, "grad_norm": 0.0, - "learning_rate": 1.0210158268651212e-05, - "loss": 0.8597, + "learning_rate": 4.303719143402942e-06, + "loss": 1.0334, "step": 17945 }, { - "epoch": 0.5085437388421321, + "epoch": 0.702167618749511, "grad_norm": 0.0, - "learning_rate": 1.0209240681202602e-05, - "loss": 0.847, + "learning_rate": 4.302677648626998e-06, + "loss": 0.9794, "step": 17946 }, { - "epoch": 0.5085720762843946, + "epoch": 0.7022067454417403, "grad_norm": 0.0, - "learning_rate": 1.0208323091991476e-05, - "loss": 0.8726, + "learning_rate": 4.301636245342918e-06, + "loss": 1.1451, "step": 17947 }, { - "epoch": 0.508600413726657, + "epoch": 0.7022458721339698, "grad_norm": 0.0, - "learning_rate": 1.0207405501025567e-05, - "loss": 0.9504, + "learning_rate": 4.300594933567414e-06, + "loss": 0.9485, "step": 17948 }, { - "epoch": 0.5086287511689195, + "epoch": 0.7022849988261992, "grad_norm": 0.0, - "learning_rate": 1.0206487908312607e-05, - "loss": 0.8227, + "learning_rate": 4.299553713317217e-06, + "loss": 0.8948, "step": 17949 }, { - "epoch": 0.508657088611182, + "epoch": 0.7023241255184287, "grad_norm": 0.0, - "learning_rate": 1.020557031386032e-05, - "loss": 0.9017, + "learning_rate": 4.298512584609038e-06, + "loss": 0.9529, "step": 17950 }, { - "epoch": 0.5086854260534445, + "epoch": 0.7023632522106581, "grad_norm": 0.0, - "learning_rate": 1.020465271767644e-05, - "loss": 0.8371, + "learning_rate": 4.2974715474596096e-06, + "loss": 1.0055, "step": 17951 }, { - "epoch": 0.5087137634957068, + "epoch": 0.7024023789028876, "grad_norm": 0.0, - "learning_rate": 1.0203735119768696e-05, - "loss": 1.0167, + "learning_rate": 4.296430601885639e-06, + "loss": 1.0191, "step": 17952 }, { - "epoch": 0.5087421009379693, + "epoch": 0.702441505595117, "grad_norm": 0.0, - "learning_rate": 1.0202817520144811e-05, - "loss": 0.864, + "learning_rate": 4.295389747903848e-06, + "loss": 0.9626, "step": 17953 }, { - "epoch": 0.5087704383802318, + "epoch": 0.7024806322873465, "grad_norm": 0.0, - "learning_rate": 1.0201899918812522e-05, - "loss": 0.9359, + "learning_rate": 4.294348985530945e-06, + "loss": 1.0316, "step": 17954 }, { - "epoch": 0.5087987758224942, + "epoch": 0.7025197589795759, "grad_norm": 0.0, - "learning_rate": 1.0200982315779555e-05, - "loss": 0.9448, + "learning_rate": 4.293308314783653e-06, + "loss": 0.9794, "step": 17955 }, { - "epoch": 0.5088271132647567, + "epoch": 0.7025588856718054, "grad_norm": 0.0, - "learning_rate": 1.0200064711053636e-05, - "loss": 0.8578, + "learning_rate": 4.292267735678676e-06, + "loss": 0.9858, "step": 17956 }, { - "epoch": 0.5088554507070192, + "epoch": 0.7025980123640347, "grad_norm": 0.0, - "learning_rate": 1.01991471046425e-05, - "loss": 0.9001, + "learning_rate": 4.29122724823273e-06, + "loss": 1.0021, "step": 17957 }, { - "epoch": 0.5088837881492816, + "epoch": 0.7026371390562642, "grad_norm": 0.0, - "learning_rate": 1.0198229496553873e-05, - "loss": 0.7878, + "learning_rate": 4.290186852462517e-06, + "loss": 0.9933, "step": 17958 }, { - "epoch": 0.5089121255915441, + "epoch": 0.7026762657484936, "grad_norm": 0.0, - "learning_rate": 1.0197311886795487e-05, - "loss": 0.8696, + "learning_rate": 4.289146548384749e-06, + "loss": 0.9727, "step": 17959 }, { - "epoch": 0.5089404630338066, + "epoch": 0.702715392440723, "grad_norm": 0.0, - "learning_rate": 1.019639427537507e-05, - "loss": 0.839, + "learning_rate": 4.28810633601613e-06, + "loss": 0.8653, "step": 17960 }, { - "epoch": 0.5089688004760691, + "epoch": 0.7027545191329525, "grad_norm": 0.0, - "learning_rate": 1.0195476662300347e-05, - "loss": 0.7472, + "learning_rate": 4.287066215373371e-06, + "loss": 0.9264, "step": 17961 }, { - "epoch": 0.5089971379183315, + "epoch": 0.7027936458251819, "grad_norm": 0.0, - "learning_rate": 1.0194559047579057e-05, - "loss": 0.8123, + "learning_rate": 4.286026186473165e-06, + "loss": 1.0172, "step": 17962 }, { - "epoch": 0.509025475360594, + "epoch": 0.7028327725174114, "grad_norm": 0.0, - "learning_rate": 1.019364143121892e-05, - "loss": 0.8587, + "learning_rate": 4.28498624933222e-06, + "loss": 0.9286, "step": 17963 }, { - "epoch": 0.5090538128028564, + "epoch": 0.7028718992096408, "grad_norm": 0.0, - "learning_rate": 1.0192723813227672e-05, - "loss": 0.8369, + "learning_rate": 4.283946403967233e-06, + "loss": 0.9596, "step": 17964 }, { - "epoch": 0.5090821502451188, + "epoch": 0.7029110259018703, "grad_norm": 0.0, - "learning_rate": 1.0191806193613037e-05, - "loss": 0.879, + "learning_rate": 4.282906650394909e-06, + "loss": 1.0725, "step": 17965 }, { - "epoch": 0.5091104876873813, + "epoch": 0.7029501525940997, "grad_norm": 0.0, - "learning_rate": 1.019088857238275e-05, - "loss": 0.8386, + "learning_rate": 4.281866988631936e-06, + "loss": 0.9903, "step": 17966 }, { - "epoch": 0.5091388251296438, + "epoch": 0.7029892792863291, "grad_norm": 0.0, - "learning_rate": 1.0189970949544536e-05, - "loss": 0.8991, + "learning_rate": 4.2808274186950175e-06, + "loss": 1.0463, "step": 17967 }, { - "epoch": 0.5091671625719063, + "epoch": 0.7030284059785585, "grad_norm": 0.0, - "learning_rate": 1.0189053325106126e-05, - "loss": 0.9056, + "learning_rate": 4.279787940600837e-06, + "loss": 1.0504, "step": 17968 }, { - "epoch": 0.5091955000141687, + "epoch": 0.703067532670788, "grad_norm": 0.0, - "learning_rate": 1.018813569907525e-05, - "loss": 0.9281, + "learning_rate": 4.278748554366102e-06, + "loss": 0.9948, "step": 17969 }, { - "epoch": 0.5092238374564312, + "epoch": 0.7031066593630174, "grad_norm": 0.0, - "learning_rate": 1.0187218071459635e-05, - "loss": 0.8496, + "learning_rate": 4.277709260007492e-06, + "loss": 1.0031, "step": 17970 }, { - "epoch": 0.5092521748986937, + "epoch": 0.7031457860552469, "grad_norm": 0.0, - "learning_rate": 1.0186300442267016e-05, - "loss": 0.9219, + "learning_rate": 4.276670057541704e-06, + "loss": 0.9922, "step": 17971 }, { - "epoch": 0.5092805123409561, + "epoch": 0.7031849127474763, "grad_norm": 0.0, - "learning_rate": 1.018538281150512e-05, - "loss": 0.8541, + "learning_rate": 4.275630946985421e-06, + "loss": 1.0348, "step": 17972 }, { - "epoch": 0.5093088497832186, + "epoch": 0.7032240394397058, "grad_norm": 0.0, - "learning_rate": 1.0184465179181671e-05, - "loss": 0.9536, + "learning_rate": 4.27459192835533e-06, + "loss": 1.0282, "step": 17973 }, { - "epoch": 0.509337187225481, + "epoch": 0.7032631661319352, "grad_norm": 0.0, - "learning_rate": 1.0183547545304406e-05, - "loss": 0.7999, + "learning_rate": 4.273553001668119e-06, + "loss": 0.9847, "step": 17974 }, { - "epoch": 0.5093655246677435, + "epoch": 0.7033022928241647, "grad_norm": 0.0, - "learning_rate": 1.0182629909881055e-05, - "loss": 0.8251, + "learning_rate": 4.272514166940476e-06, + "loss": 0.9547, "step": 17975 }, { - "epoch": 0.5093938621100059, + "epoch": 0.7033414195163941, "grad_norm": 0.0, - "learning_rate": 1.0181712272919339e-05, - "loss": 0.8568, + "learning_rate": 4.271475424189074e-06, + "loss": 0.9733, "step": 17976 }, { - "epoch": 0.5094221995522684, + "epoch": 0.7033805462086236, "grad_norm": 0.0, - "learning_rate": 1.0180794634426996e-05, - "loss": 0.8191, + "learning_rate": 4.270436773430599e-06, + "loss": 1.0204, "step": 17977 }, { - "epoch": 0.5094505369945309, + "epoch": 0.7034196729008529, "grad_norm": 0.0, - "learning_rate": 1.017987699441175e-05, - "loss": 1.0005, + "learning_rate": 4.269398214681733e-06, + "loss": 0.9166, "step": 17978 }, { - "epoch": 0.5094788744367933, + "epoch": 0.7034587995930824, "grad_norm": 0.0, - "learning_rate": 1.0178959352881337e-05, - "loss": 0.8946, + "learning_rate": 4.2683597479591465e-06, + "loss": 1.0349, "step": 17979 }, { - "epoch": 0.5095072118790558, + "epoch": 0.7034979262853118, "grad_norm": 0.0, - "learning_rate": 1.0178041709843483e-05, - "loss": 0.878, + "learning_rate": 4.26732137327952e-06, + "loss": 0.9668, "step": 17980 }, { - "epoch": 0.5095355493213183, + "epoch": 0.7035370529775413, "grad_norm": 0.0, - "learning_rate": 1.0177124065305917e-05, - "loss": 0.9453, + "learning_rate": 4.266283090659531e-06, + "loss": 0.9969, "step": 17981 }, { - "epoch": 0.5095638867635807, + "epoch": 0.7035761796697707, "grad_norm": 0.0, - "learning_rate": 1.0176206419276366e-05, - "loss": 0.9418, + "learning_rate": 4.265244900115852e-06, + "loss": 0.9522, "step": 17982 }, { - "epoch": 0.5095922242058432, + "epoch": 0.7036153063620002, "grad_norm": 0.0, - "learning_rate": 1.0175288771762563e-05, - "loss": 0.7868, + "learning_rate": 4.264206801665153e-06, + "loss": 0.9472, "step": 17983 }, { - "epoch": 0.5096205616481057, + "epoch": 0.7036544330542296, "grad_norm": 0.0, - "learning_rate": 1.0174371122772241e-05, - "loss": 0.8374, + "learning_rate": 4.263168795324107e-06, + "loss": 0.8736, "step": 17984 }, { - "epoch": 0.5096488990903681, + "epoch": 0.7036935597464591, "grad_norm": 0.0, - "learning_rate": 1.0173453472313127e-05, - "loss": 0.8993, + "learning_rate": 4.262130881109379e-06, + "loss": 0.9787, "step": 17985 }, { - "epoch": 0.5096772365326305, + "epoch": 0.7037326864386885, "grad_norm": 0.0, - "learning_rate": 1.0172535820392947e-05, - "loss": 0.9237, + "learning_rate": 4.261093059037638e-06, + "loss": 1.1045, "step": 17986 }, { - "epoch": 0.509705573974893, + "epoch": 0.703771813130918, "grad_norm": 0.0, - "learning_rate": 1.0171618167019434e-05, - "loss": 0.8881, + "learning_rate": 4.260055329125551e-06, + "loss": 1.0428, "step": 17987 }, { - "epoch": 0.5097339114171555, + "epoch": 0.7038109398231474, "grad_norm": 0.0, - "learning_rate": 1.0170700512200317e-05, - "loss": 0.8301, + "learning_rate": 4.259017691389788e-06, + "loss": 0.8899, "step": 17988 }, { - "epoch": 0.5097622488594179, + "epoch": 0.7038500665153767, "grad_norm": 0.0, - "learning_rate": 1.0169782855943327e-05, - "loss": 0.8948, + "learning_rate": 4.257980145847002e-06, + "loss": 0.9287, "step": 17989 }, { - "epoch": 0.5097905863016804, + "epoch": 0.7038891932076062, "grad_norm": 0.0, - "learning_rate": 1.0168865198256192e-05, - "loss": 0.88, + "learning_rate": 4.25694269251386e-06, + "loss": 1.0298, "step": 17990 }, { - "epoch": 0.5098189237439429, + "epoch": 0.7039283198998356, "grad_norm": 0.0, - "learning_rate": 1.0167947539146645e-05, - "loss": 0.9597, + "learning_rate": 4.255905331407022e-06, + "loss": 0.9244, "step": 17991 }, { - "epoch": 0.5098472611862054, + "epoch": 0.7039674465920651, "grad_norm": 0.0, - "learning_rate": 1.0167029878622415e-05, - "loss": 0.8583, + "learning_rate": 4.254868062543151e-06, + "loss": 1.0661, "step": 17992 }, { - "epoch": 0.5098755986284678, + "epoch": 0.7040065732842945, "grad_norm": 0.0, - "learning_rate": 1.0166112216691227e-05, - "loss": 0.9014, + "learning_rate": 4.253830885938895e-06, + "loss": 0.9025, "step": 17993 }, { - "epoch": 0.5099039360707303, + "epoch": 0.704045699976524, "grad_norm": 0.0, - "learning_rate": 1.0165194553360813e-05, - "loss": 0.9488, + "learning_rate": 4.252793801610919e-06, + "loss": 1.0492, "step": 17994 }, { - "epoch": 0.5099322735129928, + "epoch": 0.7040848266687534, "grad_norm": 0.0, - "learning_rate": 1.0164276888638907e-05, - "loss": 0.9004, + "learning_rate": 4.2517568095758655e-06, + "loss": 0.973, "step": 17995 }, { - "epoch": 0.5099606109552551, + "epoch": 0.7041239533609829, "grad_norm": 0.0, - "learning_rate": 1.0163359222533234e-05, - "loss": 0.8749, + "learning_rate": 4.250719909850402e-06, + "loss": 0.8536, "step": 17996 }, { - "epoch": 0.5099889483975176, + "epoch": 0.7041630800532123, "grad_norm": 0.0, - "learning_rate": 1.0162441555051525e-05, - "loss": 0.8239, + "learning_rate": 4.249683102451169e-06, + "loss": 1.0826, "step": 17997 }, { - "epoch": 0.5100172858397801, + "epoch": 0.7042022067454418, "grad_norm": 0.0, - "learning_rate": 1.0161523886201511e-05, - "loss": 0.8788, + "learning_rate": 4.248646387394823e-06, + "loss": 0.9633, "step": 17998 }, { - "epoch": 0.5100456232820426, + "epoch": 0.7042413334376711, "grad_norm": 0.0, - "learning_rate": 1.0160606215990922e-05, - "loss": 0.8703, + "learning_rate": 4.247609764698002e-06, + "loss": 1.0343, "step": 17999 }, { - "epoch": 0.510073960724305, + "epoch": 0.7042804601299006, "grad_norm": 0.0, - "learning_rate": 1.0159688544427488e-05, - "loss": 1.0295, + "learning_rate": 4.246573234377368e-06, + "loss": 0.9246, "step": 18000 }, { - "epoch": 0.5101022981665675, + "epoch": 0.70431958682213, "grad_norm": 0.0, - "learning_rate": 1.015877087151894e-05, - "loss": 0.8595, + "learning_rate": 4.245536796449555e-06, + "loss": 0.9641, "step": 18001 }, { - "epoch": 0.51013063560883, + "epoch": 0.7043587135143595, "grad_norm": 0.0, - "learning_rate": 1.0157853197273e-05, - "loss": 0.9788, + "learning_rate": 4.244500450931212e-06, + "loss": 0.9974, "step": 18002 }, { - "epoch": 0.5101589730510924, + "epoch": 0.7043978402065889, "grad_norm": 0.0, - "learning_rate": 1.0156935521697406e-05, - "loss": 0.9384, + "learning_rate": 4.243464197838975e-06, + "loss": 1.0002, "step": 18003 }, { - "epoch": 0.5101873104933549, + "epoch": 0.7044369668988184, "grad_norm": 0.0, - "learning_rate": 1.0156017844799888e-05, - "loss": 0.8642, + "learning_rate": 4.242428037189494e-06, + "loss": 0.9872, "step": 18004 }, { - "epoch": 0.5102156479356174, + "epoch": 0.7044760935910478, "grad_norm": 0.0, - "learning_rate": 1.0155100166588174e-05, - "loss": 0.9345, + "learning_rate": 4.241391968999402e-06, + "loss": 1.0434, "step": 18005 }, { - "epoch": 0.5102439853778797, + "epoch": 0.7045152202832773, "grad_norm": 0.0, - "learning_rate": 1.0154182487069992e-05, - "loss": 0.9883, + "learning_rate": 4.240355993285343e-06, + "loss": 0.9237, "step": 18006 }, { - "epoch": 0.5102723228201422, + "epoch": 0.7045543469755067, "grad_norm": 0.0, - "learning_rate": 1.0153264806253074e-05, - "loss": 0.9024, + "learning_rate": 4.239320110063946e-06, + "loss": 0.9786, "step": 18007 }, { - "epoch": 0.5103006602624047, + "epoch": 0.7045934736677362, "grad_norm": 0.0, - "learning_rate": 1.0152347124145148e-05, - "loss": 0.8722, + "learning_rate": 4.238284319351848e-06, + "loss": 0.9504, "step": 18008 }, { - "epoch": 0.5103289977046672, + "epoch": 0.7046326003599656, "grad_norm": 0.0, - "learning_rate": 1.0151429440753948e-05, - "loss": 0.7685, + "learning_rate": 4.237248621165686e-06, + "loss": 0.988, "step": 18009 }, { - "epoch": 0.5103573351469296, + "epoch": 0.704671727052195, "grad_norm": 0.0, - "learning_rate": 1.0150511756087203e-05, - "loss": 0.8512, + "learning_rate": 4.236213015522093e-06, + "loss": 0.9252, "step": 18010 }, { - "epoch": 0.5103856725891921, + "epoch": 0.7047108537444244, "grad_norm": 0.0, - "learning_rate": 1.0149594070152638e-05, - "loss": 0.8309, + "learning_rate": 4.235177502437692e-06, + "loss": 0.9101, "step": 18011 }, { - "epoch": 0.5104140100314546, + "epoch": 0.7047499804366539, "grad_norm": 0.0, - "learning_rate": 1.0148676382957987e-05, - "loss": 0.9488, + "learning_rate": 4.234142081929117e-06, + "loss": 1.0196, "step": 18012 }, { - "epoch": 0.510442347473717, + "epoch": 0.7047891071288833, "grad_norm": 0.0, - "learning_rate": 1.0147758694510984e-05, - "loss": 0.8073, + "learning_rate": 4.233106754012996e-06, + "loss": 1.0114, "step": 18013 }, { - "epoch": 0.5104706849159795, + "epoch": 0.7048282338211128, "grad_norm": 0.0, - "learning_rate": 1.0146841004819348e-05, - "loss": 0.8964, + "learning_rate": 4.232071518705957e-06, + "loss": 0.8524, "step": 18014 }, { - "epoch": 0.510499022358242, + "epoch": 0.7048673605133422, "grad_norm": 0.0, - "learning_rate": 1.0145923313890817e-05, - "loss": 0.865, + "learning_rate": 4.231036376024618e-06, + "loss": 1.0204, "step": 18015 }, { - "epoch": 0.5105273598005045, + "epoch": 0.7049064872055716, "grad_norm": 0.0, - "learning_rate": 1.0145005621733124e-05, - "loss": 0.864, + "learning_rate": 4.23000132598561e-06, + "loss": 1.0135, "step": 18016 }, { - "epoch": 0.5105556972427668, + "epoch": 0.7049456138978011, "grad_norm": 0.0, - "learning_rate": 1.0144087928353991e-05, - "loss": 0.8887, + "learning_rate": 4.228966368605547e-06, + "loss": 0.988, "step": 18017 }, { - "epoch": 0.5105840346850293, + "epoch": 0.7049847405900305, "grad_norm": 0.0, - "learning_rate": 1.0143170233761157e-05, - "loss": 0.8965, + "learning_rate": 4.227931503901052e-06, + "loss": 0.9845, "step": 18018 }, { - "epoch": 0.5106123721272918, + "epoch": 0.70502386728226, "grad_norm": 0.0, - "learning_rate": 1.014225253796234e-05, - "loss": 0.8498, + "learning_rate": 4.2268967318887445e-06, + "loss": 0.9181, "step": 18019 }, { - "epoch": 0.5106407095695542, + "epoch": 0.7050629939744893, "grad_norm": 0.0, - "learning_rate": 1.0141334840965283e-05, - "loss": 0.8577, + "learning_rate": 4.225862052585244e-06, + "loss": 0.9318, "step": 18020 }, { - "epoch": 0.5106690470118167, + "epoch": 0.7051021206667188, "grad_norm": 0.0, - "learning_rate": 1.0140417142777705e-05, - "loss": 0.8054, + "learning_rate": 4.224827466007162e-06, + "loss": 0.9428, "step": 18021 }, { - "epoch": 0.5106973844540792, + "epoch": 0.7051412473589482, "grad_norm": 0.0, - "learning_rate": 1.0139499443407346e-05, - "loss": 0.8221, + "learning_rate": 4.223792972171114e-06, + "loss": 1.056, "step": 18022 }, { - "epoch": 0.5107257218963417, + "epoch": 0.7051803740511777, "grad_norm": 0.0, - "learning_rate": 1.0138581742861926e-05, - "loss": 0.7501, + "learning_rate": 4.222758571093715e-06, + "loss": 0.9969, "step": 18023 }, { - "epoch": 0.5107540593386041, + "epoch": 0.7052195007434071, "grad_norm": 0.0, - "learning_rate": 1.0137664041149187e-05, - "loss": 0.8893, + "learning_rate": 4.221724262791571e-06, + "loss": 1.0497, "step": 18024 }, { - "epoch": 0.5107823967808666, + "epoch": 0.7052586274356366, "grad_norm": 0.0, - "learning_rate": 1.0136746338276848e-05, - "loss": 0.7771, + "learning_rate": 4.220690047281295e-06, + "loss": 0.9635, "step": 18025 }, { - "epoch": 0.5108107342231291, + "epoch": 0.705297754127866, "grad_norm": 0.0, - "learning_rate": 1.0135828634252647e-05, - "loss": 0.813, + "learning_rate": 4.2196559245794944e-06, + "loss": 1.0123, "step": 18026 }, { - "epoch": 0.5108390716653914, + "epoch": 0.7053368808200955, "grad_norm": 0.0, - "learning_rate": 1.0134910929084308e-05, - "loss": 0.832, + "learning_rate": 4.2186218947027804e-06, + "loss": 0.9728, "step": 18027 }, { - "epoch": 0.5108674091076539, + "epoch": 0.7053760075123249, "grad_norm": 0.0, - "learning_rate": 1.0133993222779563e-05, - "loss": 0.8201, + "learning_rate": 4.217587957667751e-06, + "loss": 0.8787, "step": 18028 }, { - "epoch": 0.5108957465499164, + "epoch": 0.7054151342045544, "grad_norm": 0.0, - "learning_rate": 1.0133075515346147e-05, - "loss": 0.9072, + "learning_rate": 4.216554113491017e-06, + "loss": 1.133, "step": 18029 }, { - "epoch": 0.5109240839921788, + "epoch": 0.7054542608967838, "grad_norm": 0.0, - "learning_rate": 1.0132157806791788e-05, - "loss": 0.9546, + "learning_rate": 4.215520362189169e-06, + "loss": 1.0487, "step": 18030 }, { - "epoch": 0.5109524214344413, + "epoch": 0.7054933875890133, "grad_norm": 0.0, - "learning_rate": 1.0131240097124208e-05, - "loss": 0.873, + "learning_rate": 4.214486703778823e-06, + "loss": 1.219, "step": 18031 }, { - "epoch": 0.5109807588767038, + "epoch": 0.7055325142812426, "grad_norm": 0.0, - "learning_rate": 1.013032238635115e-05, - "loss": 0.8575, + "learning_rate": 4.213453138276568e-06, + "loss": 1.0229, "step": 18032 }, { - "epoch": 0.5110090963189663, + "epoch": 0.7055716409734721, "grad_norm": 0.0, - "learning_rate": 1.0129404674480336e-05, - "loss": 0.8208, + "learning_rate": 4.2124196656990065e-06, + "loss": 0.8991, "step": 18033 }, { - "epoch": 0.5110374337612287, + "epoch": 0.7056107676657015, "grad_norm": 0.0, - "learning_rate": 1.0128486961519497e-05, - "loss": 0.8733, + "learning_rate": 4.211386286062731e-06, + "loss": 1.0374, "step": 18034 }, { - "epoch": 0.5110657712034912, + "epoch": 0.705649894357931, "grad_norm": 0.0, - "learning_rate": 1.0127569247476367e-05, - "loss": 0.854, + "learning_rate": 4.2103529993843385e-06, + "loss": 0.957, "step": 18035 }, { - "epoch": 0.5110941086457537, + "epoch": 0.7056890210501604, "grad_norm": 0.0, - "learning_rate": 1.0126651532358671e-05, - "loss": 0.901, + "learning_rate": 4.209319805680421e-06, + "loss": 0.9893, "step": 18036 }, { - "epoch": 0.511122446088016, + "epoch": 0.7057281477423899, "grad_norm": 0.0, - "learning_rate": 1.0125733816174145e-05, - "loss": 0.9855, + "learning_rate": 4.208286704967574e-06, + "loss": 0.9283, "step": 18037 }, { - "epoch": 0.5111507835302785, + "epoch": 0.7057672744346193, "grad_norm": 0.0, - "learning_rate": 1.0124816098930516e-05, - "loss": 0.9095, + "learning_rate": 4.207253697262383e-06, + "loss": 1.0201, "step": 18038 }, { - "epoch": 0.511179120972541, + "epoch": 0.7058064011268488, "grad_norm": 0.0, - "learning_rate": 1.0123898380635515e-05, - "loss": 0.8727, + "learning_rate": 4.206220782581438e-06, + "loss": 1.0847, "step": 18039 }, { - "epoch": 0.5112074584148035, + "epoch": 0.7058455278190782, "grad_norm": 0.0, - "learning_rate": 1.012298066129687e-05, - "loss": 0.9129, + "learning_rate": 4.205187960941328e-06, + "loss": 1.0129, "step": 18040 }, { - "epoch": 0.5112357958570659, + "epoch": 0.7058846545113077, "grad_norm": 0.0, - "learning_rate": 1.0122062940922313e-05, - "loss": 0.9153, + "learning_rate": 4.204155232358642e-06, + "loss": 0.9684, "step": 18041 }, { - "epoch": 0.5112641332993284, + "epoch": 0.705923781203537, "grad_norm": 0.0, - "learning_rate": 1.0121145219519574e-05, - "loss": 0.8696, + "learning_rate": 4.203122596849956e-06, + "loss": 0.9455, "step": 18042 }, { - "epoch": 0.5112924707415909, + "epoch": 0.7059629078957665, "grad_norm": 0.0, - "learning_rate": 1.0120227497096388e-05, - "loss": 0.9143, + "learning_rate": 4.202090054431861e-06, + "loss": 0.9885, "step": 18043 }, { - "epoch": 0.5113208081838533, + "epoch": 0.7060020345879959, "grad_norm": 0.0, - "learning_rate": 1.0119309773660478e-05, - "loss": 0.8661, + "learning_rate": 4.201057605120927e-06, + "loss": 0.9767, "step": 18044 }, { - "epoch": 0.5113491456261158, + "epoch": 0.7060411612802253, "grad_norm": 0.0, - "learning_rate": 1.0118392049219578e-05, - "loss": 0.889, + "learning_rate": 4.200025248933749e-06, + "loss": 0.9684, "step": 18045 }, { - "epoch": 0.5113774830683783, + "epoch": 0.7060802879724548, "grad_norm": 0.0, - "learning_rate": 1.011747432378142e-05, - "loss": 0.7472, + "learning_rate": 4.198992985886894e-06, + "loss": 0.9617, "step": 18046 }, { - "epoch": 0.5114058205106408, + "epoch": 0.7061194146646842, "grad_norm": 0.0, - "learning_rate": 1.0116556597353728e-05, - "loss": 0.7935, + "learning_rate": 4.197960815996945e-06, + "loss": 1.0364, "step": 18047 }, { - "epoch": 0.5114341579529031, + "epoch": 0.7061585413569137, "grad_norm": 0.0, - "learning_rate": 1.0115638869944237e-05, - "loss": 0.9749, + "learning_rate": 4.19692873928047e-06, + "loss": 0.8227, "step": 18048 }, { - "epoch": 0.5114624953951656, + "epoch": 0.7061976680491431, "grad_norm": 0.0, - "learning_rate": 1.011472114156068e-05, - "loss": 0.9299, + "learning_rate": 4.195896755754054e-06, + "loss": 1.0728, "step": 18049 }, { - "epoch": 0.5114908328374281, + "epoch": 0.7062367947413726, "grad_norm": 0.0, - "learning_rate": 1.0113803412210784e-05, - "loss": 0.9904, + "learning_rate": 4.19486486543426e-06, + "loss": 0.937, "step": 18050 }, { - "epoch": 0.5115191702796905, + "epoch": 0.706275921433602, "grad_norm": 0.0, - "learning_rate": 1.0112885681902278e-05, - "loss": 0.8586, + "learning_rate": 4.193833068337663e-06, + "loss": 1.0535, "step": 18051 }, { - "epoch": 0.511547507721953, + "epoch": 0.7063150481258315, "grad_norm": 0.0, - "learning_rate": 1.0111967950642892e-05, - "loss": 0.9023, + "learning_rate": 4.192801364480828e-06, + "loss": 1.1069, "step": 18052 }, { - "epoch": 0.5115758451642155, + "epoch": 0.7063541748180608, "grad_norm": 0.0, - "learning_rate": 1.0111050218440362e-05, - "loss": 0.847, + "learning_rate": 4.191769753880332e-06, + "loss": 0.9036, "step": 18053 }, { - "epoch": 0.5116041826064779, + "epoch": 0.7063933015102903, "grad_norm": 0.0, - "learning_rate": 1.011013248530241e-05, - "loss": 0.8313, + "learning_rate": 4.19073823655273e-06, + "loss": 0.9716, "step": 18054 }, { - "epoch": 0.5116325200487404, + "epoch": 0.7064324282025197, "grad_norm": 0.0, - "learning_rate": 1.0109214751236778e-05, - "loss": 0.8872, + "learning_rate": 4.189706812514599e-06, + "loss": 0.9026, "step": 18055 }, { - "epoch": 0.5116608574910029, + "epoch": 0.7064715548947492, "grad_norm": 0.0, - "learning_rate": 1.0108297016251182e-05, - "loss": 0.8273, + "learning_rate": 4.1886754817824904e-06, + "loss": 0.9493, "step": 18056 }, { - "epoch": 0.5116891949332654, + "epoch": 0.7065106815869786, "grad_norm": 0.0, - "learning_rate": 1.0107379280353367e-05, - "loss": 0.9539, + "learning_rate": 4.187644244372973e-06, + "loss": 1.035, "step": 18057 }, { - "epoch": 0.5117175323755278, + "epoch": 0.7065498082792081, "grad_norm": 0.0, - "learning_rate": 1.0106461543551053e-05, - "loss": 0.8697, + "learning_rate": 4.186613100302605e-06, + "loss": 1.0014, "step": 18058 }, { - "epoch": 0.5117458698177902, + "epoch": 0.7065889349714375, "grad_norm": 0.0, - "learning_rate": 1.0105543805851977e-05, - "loss": 0.9539, + "learning_rate": 4.18558204958795e-06, + "loss": 0.9461, "step": 18059 }, { - "epoch": 0.5117742072600527, + "epoch": 0.706628061663667, "grad_norm": 0.0, - "learning_rate": 1.0104626067263861e-05, - "loss": 0.8414, + "learning_rate": 4.184551092245557e-06, + "loss": 0.9932, "step": 18060 }, { - "epoch": 0.5118025447023151, + "epoch": 0.7066671883558964, "grad_norm": 0.0, - "learning_rate": 1.0103708327794444e-05, - "loss": 0.9496, + "learning_rate": 4.183520228291987e-06, + "loss": 0.8869, "step": 18061 }, { - "epoch": 0.5118308821445776, + "epoch": 0.7067063150481259, "grad_norm": 0.0, - "learning_rate": 1.0102790587451452e-05, - "loss": 0.8845, + "learning_rate": 4.182489457743797e-06, + "loss": 1.0099, "step": 18062 }, { - "epoch": 0.5118592195868401, + "epoch": 0.7067454417403553, "grad_norm": 0.0, - "learning_rate": 1.010187284624262e-05, - "loss": 0.9211, + "learning_rate": 4.1814587806175324e-06, + "loss": 1.0339, "step": 18063 }, { - "epoch": 0.5118875570291026, + "epoch": 0.7067845684325847, "grad_norm": 0.0, - "learning_rate": 1.0100955104175671e-05, - "loss": 0.87, + "learning_rate": 4.180428196929749e-06, + "loss": 0.9286, "step": 18064 }, { - "epoch": 0.511915894471365, + "epoch": 0.7068236951248141, "grad_norm": 0.0, - "learning_rate": 1.0100037361258342e-05, - "loss": 0.8745, + "learning_rate": 4.179397706697001e-06, + "loss": 0.9482, "step": 18065 }, { - "epoch": 0.5119442319136275, + "epoch": 0.7068628218170436, "grad_norm": 0.0, - "learning_rate": 1.009911961749836e-05, - "loss": 0.9782, + "learning_rate": 4.178367309935828e-06, + "loss": 0.9906, "step": 18066 }, { - "epoch": 0.51197256935589, + "epoch": 0.706901948509273, "grad_norm": 0.0, - "learning_rate": 1.0098201872903457e-05, - "loss": 0.9751, + "learning_rate": 4.177337006662781e-06, + "loss": 1.1184, "step": 18067 }, { - "epoch": 0.5120009067981524, + "epoch": 0.7069410752015025, "grad_norm": 0.0, - "learning_rate": 1.0097284127481364e-05, - "loss": 0.8565, + "learning_rate": 4.176306796894409e-06, + "loss": 0.9459, "step": 18068 }, { - "epoch": 0.5120292442404148, + "epoch": 0.7069802018937319, "grad_norm": 0.0, - "learning_rate": 1.0096366381239808e-05, - "loss": 0.9119, + "learning_rate": 4.175276680647249e-06, + "loss": 0.931, "step": 18069 }, { - "epoch": 0.5120575816826773, + "epoch": 0.7070193285859614, "grad_norm": 0.0, - "learning_rate": 1.0095448634186527e-05, - "loss": 0.9106, + "learning_rate": 4.174246657937846e-06, + "loss": 0.9462, "step": 18070 }, { - "epoch": 0.5120859191249398, + "epoch": 0.7070584552781908, "grad_norm": 0.0, - "learning_rate": 1.0094530886329244e-05, - "loss": 0.8644, + "learning_rate": 4.173216728782743e-06, + "loss": 0.9336, "step": 18071 }, { - "epoch": 0.5121142565672022, + "epoch": 0.7070975819704203, "grad_norm": 0.0, - "learning_rate": 1.0093613137675693e-05, - "loss": 0.9119, + "learning_rate": 4.1721868931984796e-06, + "loss": 0.9961, "step": 18072 }, { - "epoch": 0.5121425940094647, + "epoch": 0.7071367086626497, "grad_norm": 0.0, - "learning_rate": 1.0092695388233601e-05, - "loss": 0.905, + "learning_rate": 4.1711571512015905e-06, + "loss": 0.9143, "step": 18073 }, { - "epoch": 0.5121709314517272, + "epoch": 0.707175835354879, "grad_norm": 0.0, - "learning_rate": 1.0091777638010702e-05, - "loss": 0.7699, + "learning_rate": 4.170127502808617e-06, + "loss": 1.1172, "step": 18074 }, { - "epoch": 0.5121992688939896, + "epoch": 0.7072149620471085, "grad_norm": 0.0, - "learning_rate": 1.0090859887014728e-05, - "loss": 0.8577, + "learning_rate": 4.169097948036081e-06, + "loss": 0.9434, "step": 18075 }, { - "epoch": 0.5122276063362521, + "epoch": 0.7072540887393379, "grad_norm": 0.0, - "learning_rate": 1.0089942135253407e-05, - "loss": 0.9736, + "learning_rate": 4.168068486900535e-06, + "loss": 0.9561, "step": 18076 }, { - "epoch": 0.5122559437785146, + "epoch": 0.7072932154315674, "grad_norm": 0.0, - "learning_rate": 1.008902438273447e-05, - "loss": 0.7982, + "learning_rate": 4.167039119418496e-06, + "loss": 1.0924, "step": 18077 }, { - "epoch": 0.512284281220777, + "epoch": 0.7073323421237968, "grad_norm": 0.0, - "learning_rate": 1.0088106629465646e-05, - "loss": 0.9227, + "learning_rate": 4.166009845606505e-06, + "loss": 0.9388, "step": 18078 }, { - "epoch": 0.5123126186630395, + "epoch": 0.7073714688160263, "grad_norm": 0.0, - "learning_rate": 1.008718887545467e-05, - "loss": 0.904, + "learning_rate": 4.164980665481078e-06, + "loss": 1.086, "step": 18079 }, { - "epoch": 0.512340956105302, + "epoch": 0.7074105955082557, "grad_norm": 0.0, - "learning_rate": 1.0086271120709265e-05, - "loss": 0.8544, + "learning_rate": 4.163951579058756e-06, + "loss": 0.8568, "step": 18080 }, { - "epoch": 0.5123692935475644, + "epoch": 0.7074497222004852, "grad_norm": 0.0, - "learning_rate": 1.008535336523717e-05, - "loss": 0.7829, + "learning_rate": 4.162922586356055e-06, + "loss": 1.0301, "step": 18081 }, { - "epoch": 0.5123976309898268, + "epoch": 0.7074888488927146, "grad_norm": 0.0, - "learning_rate": 1.008443560904611e-05, - "loss": 0.886, + "learning_rate": 4.161893687389508e-06, + "loss": 1.0449, "step": 18082 }, { - "epoch": 0.5124259684320893, + "epoch": 0.7075279755849441, "grad_norm": 0.0, - "learning_rate": 1.0083517852143821e-05, - "loss": 0.9298, + "learning_rate": 4.160864882175628e-06, + "loss": 0.9528, "step": 18083 }, { - "epoch": 0.5124543058743518, + "epoch": 0.7075671022771735, "grad_norm": 0.0, - "learning_rate": 1.0082600094538029e-05, - "loss": 0.8994, + "learning_rate": 4.159836170730942e-06, + "loss": 0.9149, "step": 18084 }, { - "epoch": 0.5124826433166142, + "epoch": 0.707606228969403, "grad_norm": 0.0, - "learning_rate": 1.0081682336236462e-05, - "loss": 0.8997, + "learning_rate": 4.158807553071969e-06, + "loss": 0.9377, "step": 18085 }, { - "epoch": 0.5125109807588767, + "epoch": 0.7076453556616323, "grad_norm": 0.0, - "learning_rate": 1.0080764577246858e-05, - "loss": 0.8173, + "learning_rate": 4.15777902921523e-06, + "loss": 1.0653, "step": 18086 }, { - "epoch": 0.5125393182011392, + "epoch": 0.7076844823538618, "grad_norm": 0.0, - "learning_rate": 1.0079846817576942e-05, - "loss": 0.8225, + "learning_rate": 4.156750599177235e-06, + "loss": 0.8922, "step": 18087 }, { - "epoch": 0.5125676556434017, + "epoch": 0.7077236090460912, "grad_norm": 0.0, - "learning_rate": 1.0078929057234449e-05, - "loss": 0.8066, + "learning_rate": 4.155722262974504e-06, + "loss": 0.9196, "step": 18088 }, { - "epoch": 0.5125959930856641, + "epoch": 0.7077627357383207, "grad_norm": 0.0, - "learning_rate": 1.0078011296227104e-05, - "loss": 0.8401, + "learning_rate": 4.154694020623551e-06, + "loss": 0.9974, "step": 18089 }, { - "epoch": 0.5126243305279266, + "epoch": 0.7078018624305501, "grad_norm": 0.0, - "learning_rate": 1.0077093534562643e-05, - "loss": 0.7832, + "learning_rate": 4.153665872140891e-06, + "loss": 1.0197, "step": 18090 }, { - "epoch": 0.512652667970189, + "epoch": 0.7078409891227796, "grad_norm": 0.0, - "learning_rate": 1.0076175772248795e-05, - "loss": 0.9754, + "learning_rate": 4.152637817543026e-06, + "loss": 1.0049, "step": 18091 }, { - "epoch": 0.5126810054124514, + "epoch": 0.707880115815009, "grad_norm": 0.0, - "learning_rate": 1.007525800929329e-05, - "loss": 0.8542, + "learning_rate": 4.151609856846476e-06, + "loss": 1.0793, "step": 18092 }, { - "epoch": 0.5127093428547139, + "epoch": 0.7079192425072385, "grad_norm": 0.0, - "learning_rate": 1.0074340245703857e-05, - "loss": 0.8258, + "learning_rate": 4.1505819900677345e-06, + "loss": 0.893, "step": 18093 }, { - "epoch": 0.5127376802969764, + "epoch": 0.7079583691994679, "grad_norm": 0.0, - "learning_rate": 1.007342248148823e-05, - "loss": 0.817, + "learning_rate": 4.149554217223325e-06, + "loss": 1.0457, "step": 18094 }, { - "epoch": 0.5127660177392389, + "epoch": 0.7079974958916974, "grad_norm": 0.0, - "learning_rate": 1.0072504716654138e-05, - "loss": 0.9776, + "learning_rate": 4.1485265383297394e-06, + "loss": 1.0854, "step": 18095 }, { - "epoch": 0.5127943551815013, + "epoch": 0.7080366225839267, "grad_norm": 0.0, - "learning_rate": 1.0071586951209311e-05, - "loss": 0.9186, + "learning_rate": 4.14749895340349e-06, + "loss": 0.8219, "step": 18096 }, { - "epoch": 0.5128226926237638, + "epoch": 0.7080757492761562, "grad_norm": 0.0, - "learning_rate": 1.0070669185161484e-05, - "loss": 0.9933, + "learning_rate": 4.146471462461065e-06, + "loss": 1.0166, "step": 18097 }, { - "epoch": 0.5128510300660263, + "epoch": 0.7081148759683856, "grad_norm": 0.0, - "learning_rate": 1.0069751418518379e-05, - "loss": 0.8997, + "learning_rate": 4.145444065518981e-06, + "loss": 0.8531, "step": 18098 }, { - "epoch": 0.5128793675082887, + "epoch": 0.7081540026606151, "grad_norm": 0.0, - "learning_rate": 1.0068833651287736e-05, - "loss": 0.8897, + "learning_rate": 4.144416762593726e-06, + "loss": 0.9632, "step": 18099 }, { - "epoch": 0.5129077049505512, + "epoch": 0.7081931293528445, "grad_norm": 0.0, - "learning_rate": 1.0067915883477277e-05, - "loss": 0.858, + "learning_rate": 4.143389553701803e-06, + "loss": 0.9012, "step": 18100 }, { - "epoch": 0.5129360423928137, + "epoch": 0.708232256045074, "grad_norm": 0.0, - "learning_rate": 1.0066998115094742e-05, - "loss": 0.8948, + "learning_rate": 4.142362438859703e-06, + "loss": 1.0148, "step": 18101 }, { - "epoch": 0.512964379835076, + "epoch": 0.7082713827373034, "grad_norm": 0.0, - "learning_rate": 1.0066080346147853e-05, - "loss": 0.8674, + "learning_rate": 4.1413354180839215e-06, + "loss": 1.0027, "step": 18102 }, { - "epoch": 0.5129927172773385, + "epoch": 0.7083105094295328, "grad_norm": 0.0, - "learning_rate": 1.0065162576644348e-05, - "loss": 0.7743, + "learning_rate": 4.140308491390952e-06, + "loss": 0.831, "step": 18103 }, { - "epoch": 0.513021054719601, + "epoch": 0.7083496361217623, "grad_norm": 0.0, - "learning_rate": 1.0064244806591953e-05, - "loss": 0.8368, + "learning_rate": 4.139281658797288e-06, + "loss": 0.8423, "step": 18104 }, { - "epoch": 0.5130493921618635, + "epoch": 0.7083887628139917, "grad_norm": 0.0, - "learning_rate": 1.0063327035998402e-05, - "loss": 0.9705, + "learning_rate": 4.138254920319414e-06, + "loss": 0.8784, "step": 18105 }, { - "epoch": 0.5130777296041259, + "epoch": 0.7084278895062212, "grad_norm": 0.0, - "learning_rate": 1.0062409264871423e-05, - "loss": 0.8908, + "learning_rate": 4.137228275973821e-06, + "loss": 0.9766, "step": 18106 }, { - "epoch": 0.5131060670463884, + "epoch": 0.7084670161984505, "grad_norm": 0.0, - "learning_rate": 1.0061491493218744e-05, - "loss": 0.8545, + "learning_rate": 4.136201725776999e-06, + "loss": 1.0381, "step": 18107 }, { - "epoch": 0.5131344044886509, + "epoch": 0.70850614289068, "grad_norm": 0.0, - "learning_rate": 1.0060573721048104e-05, - "loss": 0.8967, + "learning_rate": 4.135175269745426e-06, + "loss": 1.0651, "step": 18108 }, { - "epoch": 0.5131627419309133, + "epoch": 0.7085452695829094, "grad_norm": 0.0, - "learning_rate": 1.0059655948367229e-05, - "loss": 0.8768, + "learning_rate": 4.134148907895589e-06, + "loss": 0.9859, "step": 18109 }, { - "epoch": 0.5131910793731758, + "epoch": 0.7085843962751389, "grad_norm": 0.0, - "learning_rate": 1.0058738175183847e-05, - "loss": 0.8513, + "learning_rate": 4.1331226402439695e-06, + "loss": 0.9989, "step": 18110 }, { - "epoch": 0.5132194168154383, + "epoch": 0.7086235229673683, "grad_norm": 0.0, - "learning_rate": 1.005782040150569e-05, - "loss": 0.8461, + "learning_rate": 4.132096466807053e-06, + "loss": 0.908, "step": 18111 }, { - "epoch": 0.5132477542577008, + "epoch": 0.7086626496595978, "grad_norm": 0.0, - "learning_rate": 1.0056902627340498e-05, - "loss": 0.8433, + "learning_rate": 4.131070387601312e-06, + "loss": 1.0018, "step": 18112 }, { - "epoch": 0.5132760916999631, + "epoch": 0.7087017763518272, "grad_norm": 0.0, - "learning_rate": 1.0055984852695985e-05, - "loss": 0.9633, + "learning_rate": 4.130044402643228e-06, + "loss": 0.8837, "step": 18113 }, { - "epoch": 0.5133044291422256, + "epoch": 0.7087409030440567, "grad_norm": 0.0, - "learning_rate": 1.0055067077579894e-05, - "loss": 0.9332, + "learning_rate": 4.129018511949272e-06, + "loss": 0.9664, "step": 18114 }, { - "epoch": 0.5133327665844881, + "epoch": 0.7087800297362861, "grad_norm": 0.0, - "learning_rate": 1.0054149301999953e-05, - "loss": 0.8247, + "learning_rate": 4.127992715535922e-06, + "loss": 0.9742, "step": 18115 }, { - "epoch": 0.5133611040267505, + "epoch": 0.7088191564285156, "grad_norm": 0.0, - "learning_rate": 1.0053231525963894e-05, - "loss": 0.9639, + "learning_rate": 4.126967013419652e-06, + "loss": 0.9275, "step": 18116 }, { - "epoch": 0.513389441469013, + "epoch": 0.708858283120745, "grad_norm": 0.0, - "learning_rate": 1.0052313749479445e-05, - "loss": 0.9351, + "learning_rate": 4.1259414056169355e-06, + "loss": 0.985, "step": 18117 }, { - "epoch": 0.5134177789112755, + "epoch": 0.7088974098129744, "grad_norm": 0.0, - "learning_rate": 1.0051395972554336e-05, - "loss": 0.8497, + "learning_rate": 4.124915892144236e-06, + "loss": 1.0756, "step": 18118 }, { - "epoch": 0.513446116353538, + "epoch": 0.7089365365052038, "grad_norm": 0.0, - "learning_rate": 1.0050478195196303e-05, - "loss": 0.9246, + "learning_rate": 4.123890473018025e-06, + "loss": 0.9905, "step": 18119 }, { - "epoch": 0.5134744537958004, + "epoch": 0.7089756631974333, "grad_norm": 0.0, - "learning_rate": 1.0049560417413071e-05, - "loss": 0.9816, + "learning_rate": 4.12286514825477e-06, + "loss": 0.9389, "step": 18120 }, { - "epoch": 0.5135027912380629, + "epoch": 0.7090147898896627, "grad_norm": 0.0, - "learning_rate": 1.0048642639212374e-05, - "loss": 0.8587, + "learning_rate": 4.12183991787094e-06, + "loss": 1.0247, "step": 18121 }, { - "epoch": 0.5135311286803254, + "epoch": 0.7090539165818922, "grad_norm": 0.0, - "learning_rate": 1.0047724860601943e-05, - "loss": 0.8106, + "learning_rate": 4.120814781882991e-06, + "loss": 0.9402, "step": 18122 }, { - "epoch": 0.5135594661225877, + "epoch": 0.7090930432741216, "grad_norm": 0.0, - "learning_rate": 1.0046807081589504e-05, - "loss": 0.8521, + "learning_rate": 4.119789740307394e-06, + "loss": 0.9771, "step": 18123 }, { - "epoch": 0.5135878035648502, + "epoch": 0.7091321699663511, "grad_norm": 0.0, - "learning_rate": 1.0045889302182797e-05, - "loss": 0.843, + "learning_rate": 4.118764793160598e-06, + "loss": 1.0654, "step": 18124 }, { - "epoch": 0.5136161410071127, + "epoch": 0.7091712966585805, "grad_norm": 0.0, - "learning_rate": 1.0044971522389542e-05, - "loss": 0.9044, + "learning_rate": 4.117739940459077e-06, + "loss": 0.8769, "step": 18125 }, { - "epoch": 0.5136444784493751, + "epoch": 0.70921042335081, "grad_norm": 0.0, - "learning_rate": 1.004405374221748e-05, - "loss": 0.8743, + "learning_rate": 4.1167151822192775e-06, + "loss": 0.9146, "step": 18126 }, { - "epoch": 0.5136728158916376, + "epoch": 0.7092495500430394, "grad_norm": 0.0, - "learning_rate": 1.0043135961674333e-05, - "loss": 0.8897, + "learning_rate": 4.1156905184576646e-06, + "loss": 0.979, "step": 18127 }, { - "epoch": 0.5137011533339001, + "epoch": 0.7092886767352689, "grad_norm": 0.0, - "learning_rate": 1.0042218180767838e-05, - "loss": 0.9537, + "learning_rate": 4.1146659491906805e-06, + "loss": 0.9673, "step": 18128 }, { - "epoch": 0.5137294907761626, + "epoch": 0.7093278034274982, "grad_norm": 0.0, - "learning_rate": 1.0041300399505724e-05, - "loss": 0.7663, + "learning_rate": 4.113641474434794e-06, + "loss": 1.0009, "step": 18129 }, { - "epoch": 0.513757828218425, + "epoch": 0.7093669301197276, "grad_norm": 0.0, - "learning_rate": 1.004038261789572e-05, - "loss": 0.8899, + "learning_rate": 4.112617094206445e-06, + "loss": 0.9176, "step": 18130 }, { - "epoch": 0.5137861656606875, + "epoch": 0.7094060568119571, "grad_norm": 0.0, - "learning_rate": 1.0039464835945558e-05, - "loss": 0.8359, + "learning_rate": 4.111592808522093e-06, + "loss": 0.8408, "step": 18131 }, { - "epoch": 0.51381450310295, + "epoch": 0.7094451835041865, "grad_norm": 0.0, - "learning_rate": 1.0038547053662968e-05, - "loss": 0.983, + "learning_rate": 4.110568617398178e-06, + "loss": 0.9987, "step": 18132 }, { - "epoch": 0.5138428405452123, + "epoch": 0.709484310196416, "grad_norm": 0.0, - "learning_rate": 1.0037629271055684e-05, - "loss": 0.9904, + "learning_rate": 4.109544520851151e-06, + "loss": 0.9797, "step": 18133 }, { - "epoch": 0.5138711779874748, + "epoch": 0.7095234368886454, "grad_norm": 0.0, - "learning_rate": 1.0036711488131437e-05, - "loss": 0.9146, + "learning_rate": 4.1085205188974575e-06, + "loss": 1.0494, "step": 18134 }, { - "epoch": 0.5138995154297373, + "epoch": 0.7095625635808749, "grad_norm": 0.0, - "learning_rate": 1.003579370489795e-05, - "loss": 0.9805, + "learning_rate": 4.107496611553547e-06, + "loss": 1.1243, "step": 18135 }, { - "epoch": 0.5139278528719998, + "epoch": 0.7096016902731043, "grad_norm": 0.0, - "learning_rate": 1.0034875921362963e-05, - "loss": 0.844, + "learning_rate": 4.106472798835852e-06, + "loss": 0.8484, "step": 18136 }, { - "epoch": 0.5139561903142622, + "epoch": 0.7096408169653338, "grad_norm": 0.0, - "learning_rate": 1.0033958137534203e-05, - "loss": 0.9297, + "learning_rate": 4.105449080760819e-06, + "loss": 0.8976, "step": 18137 }, { - "epoch": 0.5139845277565247, + "epoch": 0.7096799436575632, "grad_norm": 0.0, - "learning_rate": 1.00330403534194e-05, - "loss": 0.8808, + "learning_rate": 4.1044254573448885e-06, + "loss": 0.8911, "step": 18138 }, { - "epoch": 0.5140128651987872, + "epoch": 0.7097190703497926, "grad_norm": 0.0, - "learning_rate": 1.0032122569026284e-05, - "loss": 0.9495, + "learning_rate": 4.1034019286045e-06, + "loss": 1.0576, "step": 18139 }, { - "epoch": 0.5140412026410496, + "epoch": 0.709758197042022, "grad_norm": 0.0, - "learning_rate": 1.0031204784362591e-05, - "loss": 0.8755, + "learning_rate": 4.102378494556085e-06, + "loss": 1.0852, "step": 18140 }, { - "epoch": 0.5140695400833121, + "epoch": 0.7097973237342515, "grad_norm": 0.0, - "learning_rate": 1.0030286999436048e-05, - "loss": 0.8356, + "learning_rate": 4.101355155216084e-06, + "loss": 0.8773, "step": 18141 }, { - "epoch": 0.5140978775255746, + "epoch": 0.7098364504264809, "grad_norm": 0.0, - "learning_rate": 1.0029369214254385e-05, - "loss": 0.8809, + "learning_rate": 4.100331910600922e-06, + "loss": 0.9505, "step": 18142 }, { - "epoch": 0.514126214967837, + "epoch": 0.7098755771187104, "grad_norm": 0.0, - "learning_rate": 1.0028451428825334e-05, - "loss": 0.8265, + "learning_rate": 4.099308760727043e-06, + "loss": 0.9867, "step": 18143 }, { - "epoch": 0.5141545524100994, + "epoch": 0.7099147038109398, "grad_norm": 0.0, - "learning_rate": 1.0027533643156629e-05, - "loss": 0.9271, + "learning_rate": 4.098285705610867e-06, + "loss": 0.962, "step": 18144 }, { - "epoch": 0.5141828898523619, + "epoch": 0.7099538305031693, "grad_norm": 0.0, - "learning_rate": 1.0026615857255994e-05, - "loss": 0.8598, + "learning_rate": 4.097262745268833e-06, + "loss": 0.992, "step": 18145 }, { - "epoch": 0.5142112272946244, + "epoch": 0.7099929571953987, "grad_norm": 0.0, - "learning_rate": 1.0025698071131166e-05, - "loss": 0.9153, + "learning_rate": 4.0962398797173575e-06, + "loss": 0.9155, "step": 18146 }, { - "epoch": 0.5142395647368868, + "epoch": 0.7100320838876282, "grad_norm": 0.0, - "learning_rate": 1.0024780284789875e-05, - "loss": 0.8324, + "learning_rate": 4.095217108972872e-06, + "loss": 0.8997, "step": 18147 }, { - "epoch": 0.5142679021791493, + "epoch": 0.7100712105798576, "grad_norm": 0.0, - "learning_rate": 1.0023862498239847e-05, - "loss": 0.8934, + "learning_rate": 4.0941944330518004e-06, + "loss": 0.9416, "step": 18148 }, { - "epoch": 0.5142962396214118, + "epoch": 0.710110337272087, "grad_norm": 0.0, - "learning_rate": 1.0022944711488818e-05, - "loss": 0.9496, + "learning_rate": 4.09317185197057e-06, + "loss": 0.9871, "step": 18149 }, { - "epoch": 0.5143245770636742, + "epoch": 0.7101494639643164, "grad_norm": 0.0, - "learning_rate": 1.0022026924544517e-05, - "loss": 0.8146, + "learning_rate": 4.092149365745594e-06, + "loss": 1.0251, "step": 18150 }, { - "epoch": 0.5143529145059367, + "epoch": 0.7101885906565459, "grad_norm": 0.0, - "learning_rate": 1.0021109137414674e-05, - "loss": 0.8492, + "learning_rate": 4.091126974393297e-06, + "loss": 1.011, "step": 18151 }, { - "epoch": 0.5143812519481992, + "epoch": 0.7102277173487753, "grad_norm": 0.0, - "learning_rate": 1.002019135010702e-05, - "loss": 0.9747, + "learning_rate": 4.090104677930099e-06, + "loss": 1.0233, "step": 18152 }, { - "epoch": 0.5144095893904617, + "epoch": 0.7102668440410048, "grad_norm": 0.0, - "learning_rate": 1.001927356262929e-05, - "loss": 0.9348, + "learning_rate": 4.0890824763724115e-06, + "loss": 0.9592, "step": 18153 }, { - "epoch": 0.514437926832724, + "epoch": 0.7103059707332342, "grad_norm": 0.0, - "learning_rate": 1.0018355774989213e-05, - "loss": 0.8617, + "learning_rate": 4.088060369736653e-06, + "loss": 0.8635, "step": 18154 }, { - "epoch": 0.5144662642749865, + "epoch": 0.7103450974254637, "grad_norm": 0.0, - "learning_rate": 1.0017437987194516e-05, - "loss": 0.9512, + "learning_rate": 4.087038358039236e-06, + "loss": 0.9082, "step": 18155 }, { - "epoch": 0.514494601717249, + "epoch": 0.7103842241176931, "grad_norm": 0.0, - "learning_rate": 1.001652019925293e-05, - "loss": 0.8912, + "learning_rate": 4.086016441296578e-06, + "loss": 1.0714, "step": 18156 }, { - "epoch": 0.5145229391595114, + "epoch": 0.7104233508099226, "grad_norm": 0.0, - "learning_rate": 1.0015602411172191e-05, - "loss": 0.9771, + "learning_rate": 4.08499461952508e-06, + "loss": 1.0318, "step": 18157 }, { - "epoch": 0.5145512766017739, + "epoch": 0.710462477502152, "grad_norm": 0.0, - "learning_rate": 1.0014684622960027e-05, - "loss": 1.0342, + "learning_rate": 4.083972892741161e-06, + "loss": 1.0147, "step": 18158 }, { - "epoch": 0.5145796140440364, + "epoch": 0.7105016041943814, "grad_norm": 0.0, - "learning_rate": 1.0013766834624168e-05, - "loss": 0.9473, + "learning_rate": 4.082951260961222e-06, + "loss": 0.9522, "step": 18159 }, { - "epoch": 0.5146079514862989, + "epoch": 0.7105407308866108, "grad_norm": 0.0, - "learning_rate": 1.0012849046172346e-05, - "loss": 0.9445, + "learning_rate": 4.08192972420167e-06, + "loss": 0.9405, "step": 18160 }, { - "epoch": 0.5146362889285613, + "epoch": 0.7105798575788402, "grad_norm": 0.0, - "learning_rate": 1.0011931257612292e-05, - "loss": 0.9778, + "learning_rate": 4.080908282478911e-06, + "loss": 1.0097, "step": 18161 }, { - "epoch": 0.5146646263708238, + "epoch": 0.7106189842710697, "grad_norm": 0.0, - "learning_rate": 1.0011013468951738e-05, - "loss": 0.7979, + "learning_rate": 4.079886935809352e-06, + "loss": 1.0476, "step": 18162 }, { - "epoch": 0.5146929638130863, + "epoch": 0.7106581109632991, "grad_norm": 0.0, - "learning_rate": 1.0010095680198413e-05, - "loss": 0.8791, + "learning_rate": 4.078865684209385e-06, + "loss": 0.9708, "step": 18163 }, { - "epoch": 0.5147213012553487, + "epoch": 0.7106972376555286, "grad_norm": 0.0, - "learning_rate": 1.0009177891360048e-05, - "loss": 0.8644, + "learning_rate": 4.077844527695418e-06, + "loss": 1.0969, "step": 18164 }, { - "epoch": 0.5147496386976111, + "epoch": 0.710736364347758, "grad_norm": 0.0, - "learning_rate": 1.0008260102444369e-05, - "loss": 0.8386, + "learning_rate": 4.076823466283846e-06, + "loss": 1.005, "step": 18165 }, { - "epoch": 0.5147779761398736, + "epoch": 0.7107754910399875, "grad_norm": 0.0, - "learning_rate": 1.000734231345912e-05, - "loss": 0.8946, + "learning_rate": 4.075802499991071e-06, + "loss": 1.0276, "step": 18166 }, { - "epoch": 0.514806313582136, + "epoch": 0.7108146177322169, "grad_norm": 0.0, - "learning_rate": 1.000642452441202e-05, - "loss": 0.8781, + "learning_rate": 4.07478162883348e-06, + "loss": 1.0826, "step": 18167 }, { - "epoch": 0.5148346510243985, + "epoch": 0.7108537444244464, "grad_norm": 0.0, - "learning_rate": 1.0005506735310803e-05, - "loss": 0.9538, + "learning_rate": 4.073760852827472e-06, + "loss": 0.938, "step": 18168 }, { - "epoch": 0.514862988466661, + "epoch": 0.7108928711166758, "grad_norm": 0.0, - "learning_rate": 1.0004588946163203e-05, - "loss": 0.851, + "learning_rate": 4.072740171989438e-06, + "loss": 1.0042, "step": 18169 }, { - "epoch": 0.5148913259089235, + "epoch": 0.7109319978089053, "grad_norm": 0.0, - "learning_rate": 1.0003671156976948e-05, - "loss": 0.882, + "learning_rate": 4.071719586335774e-06, + "loss": 1.0156, "step": 18170 }, { - "epoch": 0.5149196633511859, + "epoch": 0.7109711245011346, "grad_norm": 0.0, - "learning_rate": 1.000275336775977e-05, - "loss": 0.9022, + "learning_rate": 4.07069909588286e-06, + "loss": 1.0038, "step": 18171 }, { - "epoch": 0.5149480007934484, + "epoch": 0.7110102511933641, "grad_norm": 0.0, - "learning_rate": 1.0001835578519397e-05, - "loss": 0.8133, + "learning_rate": 4.069678700647094e-06, + "loss": 1.0375, "step": 18172 }, { - "epoch": 0.5149763382357109, + "epoch": 0.7110493778855935, "grad_norm": 0.0, - "learning_rate": 1.0000917789263565e-05, - "loss": 0.7326, + "learning_rate": 4.068658400644848e-06, + "loss": 0.8284, "step": 18173 }, { - "epoch": 0.5150046756779733, + "epoch": 0.711088504577823, "grad_norm": 0.0, - "learning_rate": 1e-05, - "loss": 0.8854, + "learning_rate": 4.067638195892525e-06, + "loss": 0.9897, "step": 18174 }, { - "epoch": 0.5150330131202357, + "epoch": 0.7111276312700524, "grad_norm": 0.0, - "learning_rate": 9.999082210736437e-06, - "loss": 0.8115, + "learning_rate": 4.066618086406494e-06, + "loss": 1.0085, "step": 18175 }, { - "epoch": 0.5150613505624982, + "epoch": 0.7111667579622819, "grad_norm": 0.0, - "learning_rate": 9.998164421480606e-06, - "loss": 0.9234, + "learning_rate": 4.065598072203145e-06, + "loss": 0.8585, "step": 18176 }, { - "epoch": 0.5150896880047607, + "epoch": 0.7112058846545113, "grad_norm": 0.0, - "learning_rate": 9.997246632240234e-06, - "loss": 0.866, + "learning_rate": 4.064578153298848e-06, + "loss": 0.9146, "step": 18177 }, { - "epoch": 0.5151180254470231, + "epoch": 0.7112450113467408, "grad_norm": 0.0, - "learning_rate": 9.996328843023055e-06, - "loss": 0.7746, + "learning_rate": 4.063558329709996e-06, + "loss": 1.0061, "step": 18178 }, { - "epoch": 0.5151463628892856, + "epoch": 0.7112841380389702, "grad_norm": 0.0, - "learning_rate": 9.995411053836798e-06, - "loss": 0.8696, + "learning_rate": 4.062538601452954e-06, + "loss": 0.8987, "step": 18179 }, { - "epoch": 0.5151747003315481, + "epoch": 0.7113232647311997, "grad_norm": 0.0, - "learning_rate": 9.994493264689197e-06, - "loss": 0.8898, + "learning_rate": 4.061518968544106e-06, + "loss": 0.9269, "step": 18180 }, { - "epoch": 0.5152030377738105, + "epoch": 0.711362391423429, "grad_norm": 0.0, - "learning_rate": 9.993575475587984e-06, - "loss": 0.9419, + "learning_rate": 4.060499430999818e-06, + "loss": 0.8856, "step": 18181 }, { - "epoch": 0.515231375216073, + "epoch": 0.7114015181156585, "grad_norm": 0.0, - "learning_rate": 9.992657686540884e-06, - "loss": 0.9687, + "learning_rate": 4.059479988836467e-06, + "loss": 1.0102, "step": 18182 }, { - "epoch": 0.5152597126583355, + "epoch": 0.7114406448078879, "grad_norm": 0.0, - "learning_rate": 9.99173989755563e-06, - "loss": 0.8654, + "learning_rate": 4.0584606420704235e-06, + "loss": 1.0403, "step": 18183 }, { - "epoch": 0.515288050100598, + "epoch": 0.7114797715001174, "grad_norm": 0.0, - "learning_rate": 9.990822108639957e-06, - "loss": 0.9589, + "learning_rate": 4.05744139071806e-06, + "loss": 0.9266, "step": 18184 }, { - "epoch": 0.5153163875428604, + "epoch": 0.7115188981923468, "grad_norm": 0.0, - "learning_rate": 9.98990431980159e-06, - "loss": 0.8036, + "learning_rate": 4.056422234795738e-06, + "loss": 1.0587, "step": 18185 }, { - "epoch": 0.5153447249851228, + "epoch": 0.7115580248845763, "grad_norm": 0.0, - "learning_rate": 9.988986531048267e-06, - "loss": 0.9002, + "learning_rate": 4.055403174319828e-06, + "loss": 0.9473, "step": 18186 }, { - "epoch": 0.5153730624273853, + "epoch": 0.7115971515768057, "grad_norm": 0.0, - "learning_rate": 9.988068742387711e-06, - "loss": 0.8289, + "learning_rate": 4.054384209306692e-06, + "loss": 0.9389, "step": 18187 }, { - "epoch": 0.5154013998696477, + "epoch": 0.7116362782690351, "grad_norm": 0.0, - "learning_rate": 9.987150953827656e-06, - "loss": 0.9337, + "learning_rate": 4.0533653397727005e-06, + "loss": 0.9775, "step": 18188 }, { - "epoch": 0.5154297373119102, + "epoch": 0.7116754049612646, "grad_norm": 0.0, - "learning_rate": 9.986233165375837e-06, - "loss": 0.8277, + "learning_rate": 4.052346565734207e-06, + "loss": 0.9898, "step": 18189 }, { - "epoch": 0.5154580747541727, + "epoch": 0.711714531653494, "grad_norm": 0.0, - "learning_rate": 9.985315377039978e-06, - "loss": 0.7892, + "learning_rate": 4.051327887207577e-06, + "loss": 1.0475, "step": 18190 }, { - "epoch": 0.5154864121964351, + "epoch": 0.7117536583457235, "grad_norm": 0.0, - "learning_rate": 9.984397588827812e-06, - "loss": 0.9836, + "learning_rate": 4.0503093042091645e-06, + "loss": 0.9811, "step": 18191 }, { - "epoch": 0.5155147496386976, + "epoch": 0.7117927850379528, "grad_norm": 0.0, - "learning_rate": 9.983479800747072e-06, - "loss": 0.8972, + "learning_rate": 4.049290816755328e-06, + "loss": 0.9558, "step": 18192 }, { - "epoch": 0.5155430870809601, + "epoch": 0.7118319117301823, "grad_norm": 0.0, - "learning_rate": 9.982562012805487e-06, - "loss": 0.9414, + "learning_rate": 4.048272424862425e-06, + "loss": 0.861, "step": 18193 }, { - "epoch": 0.5155714245232226, + "epoch": 0.7118710384224117, "grad_norm": 0.0, - "learning_rate": 9.981644225010794e-06, - "loss": 0.9136, + "learning_rate": 4.047254128546813e-06, + "loss": 1.0027, "step": 18194 }, { - "epoch": 0.515599761965485, + "epoch": 0.7119101651146412, "grad_norm": 0.0, - "learning_rate": 9.980726437370713e-06, - "loss": 0.8264, + "learning_rate": 4.046235927824836e-06, + "loss": 1.1299, "step": 18195 }, { - "epoch": 0.5156280994077475, + "epoch": 0.7119492918068706, "grad_norm": 0.0, - "learning_rate": 9.979808649892979e-06, - "loss": 0.7958, + "learning_rate": 4.0452178227128505e-06, + "loss": 0.6706, "step": 18196 }, { - "epoch": 0.51565643685001, + "epoch": 0.7119884184991001, "grad_norm": 0.0, - "learning_rate": 9.978890862585329e-06, - "loss": 0.9588, + "learning_rate": 4.044199813227208e-06, + "loss": 0.9004, "step": 18197 }, { - "epoch": 0.5156847742922723, + "epoch": 0.7120275451913295, "grad_norm": 0.0, - "learning_rate": 9.977973075455485e-06, - "loss": 0.8654, + "learning_rate": 4.04318189938425e-06, + "loss": 1.0101, "step": 18198 }, { - "epoch": 0.5157131117345348, + "epoch": 0.712066671883559, "grad_norm": 0.0, - "learning_rate": 9.977055288511182e-06, - "loss": 0.9258, + "learning_rate": 4.042164081200326e-06, + "loss": 1.0159, "step": 18199 }, { - "epoch": 0.5157414491767973, + "epoch": 0.7121057985757884, "grad_norm": 0.0, - "learning_rate": 9.976137501760157e-06, - "loss": 0.9177, + "learning_rate": 4.041146358691782e-06, + "loss": 1.0167, "step": 18200 }, { - "epoch": 0.5157697866190598, + "epoch": 0.7121449252680179, "grad_norm": 0.0, - "learning_rate": 9.975219715210129e-06, - "loss": 0.9212, + "learning_rate": 4.040128731874964e-06, + "loss": 0.9599, "step": 18201 }, { - "epoch": 0.5157981240613222, + "epoch": 0.7121840519602473, "grad_norm": 0.0, - "learning_rate": 9.974301928868839e-06, - "loss": 0.8393, + "learning_rate": 4.039111200766207e-06, + "loss": 0.9498, "step": 18202 }, { - "epoch": 0.5158264615035847, + "epoch": 0.7122231786524768, "grad_norm": 0.0, - "learning_rate": 9.97338414274401e-06, - "loss": 0.7884, + "learning_rate": 4.038093765381857e-06, + "loss": 0.9728, "step": 18203 }, { - "epoch": 0.5158547989458472, + "epoch": 0.7122623053447061, "grad_norm": 0.0, - "learning_rate": 9.972466356843375e-06, - "loss": 0.8597, + "learning_rate": 4.037076425738245e-06, + "loss": 0.9727, "step": 18204 }, { - "epoch": 0.5158831363881096, + "epoch": 0.7123014320369356, "grad_norm": 0.0, - "learning_rate": 9.971548571174668e-06, - "loss": 0.9337, + "learning_rate": 4.036059181851721e-06, + "loss": 0.9384, "step": 18205 }, { - "epoch": 0.5159114738303721, + "epoch": 0.712340558729165, "grad_norm": 0.0, - "learning_rate": 9.970630785745617e-06, - "loss": 0.7753, + "learning_rate": 4.0350420337386075e-06, + "loss": 1.0279, "step": 18206 }, { - "epoch": 0.5159398112726346, + "epoch": 0.7123796854213945, "grad_norm": 0.0, - "learning_rate": 9.969713000563957e-06, - "loss": 0.8838, + "learning_rate": 4.03402498141525e-06, + "loss": 0.8969, "step": 18207 }, { - "epoch": 0.515968148714897, + "epoch": 0.7124188121136239, "grad_norm": 0.0, - "learning_rate": 9.968795215637412e-06, - "loss": 0.7622, + "learning_rate": 4.033008024897971e-06, + "loss": 1.0467, "step": 18208 }, { - "epoch": 0.5159964861571594, + "epoch": 0.7124579388058534, "grad_norm": 0.0, - "learning_rate": 9.967877430973716e-06, - "loss": 0.9655, + "learning_rate": 4.031991164203106e-06, + "loss": 1.011, "step": 18209 }, { - "epoch": 0.5160248235994219, + "epoch": 0.7124970654980828, "grad_norm": 0.0, - "learning_rate": 9.966959646580604e-06, - "loss": 0.8595, + "learning_rate": 4.030974399346985e-06, + "loss": 0.9373, "step": 18210 }, { - "epoch": 0.5160531610416844, + "epoch": 0.7125361921903123, "grad_norm": 0.0, - "learning_rate": 9.966041862465799e-06, - "loss": 0.7882, + "learning_rate": 4.029957730345939e-06, + "loss": 0.9365, "step": 18211 }, { - "epoch": 0.5160814984839468, + "epoch": 0.7125753188825417, "grad_norm": 0.0, - "learning_rate": 9.965124078637037e-06, - "loss": 0.9123, + "learning_rate": 4.028941157216287e-06, + "loss": 0.9915, "step": 18212 }, { - "epoch": 0.5161098359262093, + "epoch": 0.7126144455747712, "grad_norm": 0.0, - "learning_rate": 9.964206295102052e-06, - "loss": 0.9324, + "learning_rate": 4.027924679974358e-06, + "loss": 1.1229, "step": 18213 }, { - "epoch": 0.5161381733684718, + "epoch": 0.7126535722670005, "grad_norm": 0.0, - "learning_rate": 9.963288511868567e-06, - "loss": 0.9663, + "learning_rate": 4.026908298636476e-06, + "loss": 0.9879, "step": 18214 }, { - "epoch": 0.5161665108107342, + "epoch": 0.71269269895923, "grad_norm": 0.0, - "learning_rate": 9.96237072894432e-06, - "loss": 0.8676, + "learning_rate": 4.025892013218965e-06, + "loss": 0.9587, "step": 18215 }, { - "epoch": 0.5161948482529967, + "epoch": 0.7127318256514594, "grad_norm": 0.0, - "learning_rate": 9.961452946337035e-06, - "loss": 0.8961, + "learning_rate": 4.0248758237381395e-06, + "loss": 1.0146, "step": 18216 }, { - "epoch": 0.5162231856952592, + "epoch": 0.7127709523436888, "grad_norm": 0.0, - "learning_rate": 9.960535164054444e-06, - "loss": 0.895, + "learning_rate": 4.023859730210323e-06, + "loss": 1.0357, "step": 18217 }, { - "epoch": 0.5162515231375217, + "epoch": 0.7128100790359183, "grad_norm": 0.0, - "learning_rate": 9.959617382104284e-06, - "loss": 0.9228, + "learning_rate": 4.0228437326518245e-06, + "loss": 0.9405, "step": 18218 }, { - "epoch": 0.516279860579784, + "epoch": 0.7128492057281477, "grad_norm": 0.0, - "learning_rate": 9.95869960049428e-06, - "loss": 0.9246, + "learning_rate": 4.021827831078972e-06, + "loss": 0.9444, "step": 18219 }, { - "epoch": 0.5163081980220465, + "epoch": 0.7128883324203772, "grad_norm": 0.0, - "learning_rate": 9.957781819232163e-06, - "loss": 0.9453, + "learning_rate": 4.020812025508072e-06, + "loss": 0.8803, "step": 18220 }, { - "epoch": 0.516336535464309, + "epoch": 0.7129274591126066, "grad_norm": 0.0, - "learning_rate": 9.95686403832567e-06, - "loss": 0.9387, + "learning_rate": 4.019796315955441e-06, + "loss": 0.9599, "step": 18221 }, { - "epoch": 0.5163648729065714, + "epoch": 0.7129665858048361, "grad_norm": 0.0, - "learning_rate": 9.955946257782524e-06, - "loss": 0.9027, + "learning_rate": 4.018780702437381e-06, + "loss": 1.0294, "step": 18222 }, { - "epoch": 0.5163932103488339, + "epoch": 0.7130057124970655, "grad_norm": 0.0, - "learning_rate": 9.95502847761046e-06, - "loss": 0.8136, + "learning_rate": 4.017765184970213e-06, + "loss": 0.8691, "step": 18223 }, { - "epoch": 0.5164215477910964, + "epoch": 0.713044839189295, "grad_norm": 0.0, - "learning_rate": 9.954110697817207e-06, - "loss": 0.8748, + "learning_rate": 4.016749763570238e-06, + "loss": 0.9666, "step": 18224 }, { - "epoch": 0.5164498852333589, + "epoch": 0.7130839658815243, "grad_norm": 0.0, - "learning_rate": 9.953192918410496e-06, - "loss": 0.82, + "learning_rate": 4.015734438253768e-06, + "loss": 1.0296, "step": 18225 }, { - "epoch": 0.5164782226756213, + "epoch": 0.7131230925737538, "grad_norm": 0.0, - "learning_rate": 9.952275139398062e-06, - "loss": 0.805, + "learning_rate": 4.014719209037097e-06, + "loss": 0.9847, "step": 18226 }, { - "epoch": 0.5165065601178838, + "epoch": 0.7131622192659832, "grad_norm": 0.0, - "learning_rate": 9.95135736078763e-06, - "loss": 0.86, + "learning_rate": 4.013704075936543e-06, + "loss": 0.8811, "step": 18227 }, { - "epoch": 0.5165348975601463, + "epoch": 0.7132013459582127, "grad_norm": 0.0, - "learning_rate": 9.95043958258693e-06, - "loss": 0.9467, + "learning_rate": 4.012689038968396e-06, + "loss": 0.9435, "step": 18228 }, { - "epoch": 0.5165632350024086, + "epoch": 0.7132404726504421, "grad_norm": 0.0, - "learning_rate": 9.949521804803699e-06, - "loss": 1.0147, + "learning_rate": 4.011674098148964e-06, + "loss": 0.9901, "step": 18229 }, { - "epoch": 0.5165915724446711, + "epoch": 0.7132795993426716, "grad_norm": 0.0, - "learning_rate": 9.948604027445666e-06, - "loss": 0.8821, + "learning_rate": 4.010659253494538e-06, + "loss": 1.1058, "step": 18230 }, { - "epoch": 0.5166199098869336, + "epoch": 0.713318726034901, "grad_norm": 0.0, - "learning_rate": 9.94768625052056e-06, - "loss": 0.9642, + "learning_rate": 4.009644505021422e-06, + "loss": 1.0956, "step": 18231 }, { - "epoch": 0.5166482473291961, + "epoch": 0.7133578527271305, "grad_norm": 0.0, - "learning_rate": 9.946768474036107e-06, - "loss": 0.8297, + "learning_rate": 4.008629852745907e-06, + "loss": 0.9651, "step": 18232 }, { - "epoch": 0.5166765847714585, + "epoch": 0.7133969794193599, "grad_norm": 0.0, - "learning_rate": 9.945850698000047e-06, - "loss": 0.8809, + "learning_rate": 4.007615296684293e-06, + "loss": 1.0903, "step": 18233 }, { - "epoch": 0.516704922213721, + "epoch": 0.7134361061115894, "grad_norm": 0.0, - "learning_rate": 9.944932922420109e-06, - "loss": 0.9025, + "learning_rate": 4.006600836852864e-06, + "loss": 1.0658, "step": 18234 }, { - "epoch": 0.5167332596559835, + "epoch": 0.7134752328038187, "grad_norm": 0.0, - "learning_rate": 9.944015147304018e-06, - "loss": 0.9091, + "learning_rate": 4.005586473267916e-06, + "loss": 1.0586, "step": 18235 }, { - "epoch": 0.5167615970982459, + "epoch": 0.7135143594960482, "grad_norm": 0.0, - "learning_rate": 9.943097372659509e-06, - "loss": 0.9406, + "learning_rate": 4.0045722059457415e-06, + "loss": 0.9864, "step": 18236 }, { - "epoch": 0.5167899345405084, + "epoch": 0.7135534861882776, "grad_norm": 0.0, - "learning_rate": 9.94217959849431e-06, - "loss": 1.0366, + "learning_rate": 4.003558034902621e-06, + "loss": 0.9756, "step": 18237 }, { - "epoch": 0.5168182719827709, + "epoch": 0.7135926128805071, "grad_norm": 0.0, - "learning_rate": 9.941261824816155e-06, - "loss": 0.7775, + "learning_rate": 4.002543960154844e-06, + "loss": 0.966, "step": 18238 }, { - "epoch": 0.5168466094250332, + "epoch": 0.7136317395727365, "grad_norm": 0.0, - "learning_rate": 9.940344051632778e-06, - "loss": 1.0202, + "learning_rate": 4.001529981718701e-06, + "loss": 0.9601, "step": 18239 }, { - "epoch": 0.5168749468672957, + "epoch": 0.713670866264966, "grad_norm": 0.0, - "learning_rate": 9.9394262789519e-06, - "loss": 0.8237, + "learning_rate": 4.000516099610465e-06, + "loss": 0.876, "step": 18240 }, { - "epoch": 0.5169032843095582, + "epoch": 0.7137099929571954, "grad_norm": 0.0, - "learning_rate": 9.938508506781256e-06, - "loss": 0.9061, + "learning_rate": 3.999502313846423e-06, + "loss": 0.9805, "step": 18241 }, { - "epoch": 0.5169316217518207, + "epoch": 0.7137491196494249, "grad_norm": 0.0, - "learning_rate": 9.93759073512858e-06, - "loss": 0.8837, + "learning_rate": 3.998488624442854e-06, + "loss": 1.0519, "step": 18242 }, { - "epoch": 0.5169599591940831, + "epoch": 0.7137882463416543, "grad_norm": 0.0, - "learning_rate": 9.9366729640016e-06, - "loss": 0.8822, + "learning_rate": 3.997475031416042e-06, + "loss": 1.0391, "step": 18243 }, { - "epoch": 0.5169882966363456, + "epoch": 0.7138273730338837, "grad_norm": 0.0, - "learning_rate": 9.935755193408052e-06, - "loss": 1.0098, + "learning_rate": 3.9964615347822555e-06, + "loss": 0.9659, "step": 18244 }, { - "epoch": 0.5170166340786081, + "epoch": 0.7138664997261132, "grad_norm": 0.0, - "learning_rate": 9.934837423355654e-06, - "loss": 0.8322, + "learning_rate": 3.995448134557775e-06, + "loss": 0.8938, "step": 18245 }, { - "epoch": 0.5170449715208705, + "epoch": 0.7139056264183425, "grad_norm": 0.0, - "learning_rate": 9.933919653852147e-06, - "loss": 0.9472, + "learning_rate": 3.994434830758875e-06, + "loss": 0.9697, "step": 18246 }, { - "epoch": 0.517073308963133, + "epoch": 0.713944753110572, "grad_norm": 0.0, - "learning_rate": 9.933001884905263e-06, - "loss": 1.028, + "learning_rate": 3.993421623401824e-06, + "loss": 1.0589, "step": 18247 }, { - "epoch": 0.5171016464053955, + "epoch": 0.7139838798028014, "grad_norm": 0.0, - "learning_rate": 9.932084116522725e-06, - "loss": 0.9038, + "learning_rate": 3.992408512502894e-06, + "loss": 1.0269, "step": 18248 }, { - "epoch": 0.517129983847658, + "epoch": 0.7140230064950309, "grad_norm": 0.0, - "learning_rate": 9.931166348712268e-06, - "loss": 0.8991, + "learning_rate": 3.9913954980783565e-06, + "loss": 1.0587, "step": 18249 }, { - "epoch": 0.5171583212899203, + "epoch": 0.7140621331872603, "grad_norm": 0.0, - "learning_rate": 9.930248581481625e-06, - "loss": 0.868, + "learning_rate": 3.990382580144481e-06, + "loss": 1.0524, "step": 18250 }, { - "epoch": 0.5171866587321828, + "epoch": 0.7141012598794898, "grad_norm": 0.0, - "learning_rate": 9.92933081483852e-06, - "loss": 0.9747, + "learning_rate": 3.989369758717528e-06, + "loss": 0.9616, "step": 18251 }, { - "epoch": 0.5172149961744453, + "epoch": 0.7141403865717192, "grad_norm": 0.0, - "learning_rate": 9.928413048790694e-06, - "loss": 0.8388, + "learning_rate": 3.988357033813767e-06, + "loss": 0.9368, "step": 18252 }, { - "epoch": 0.5172433336167077, + "epoch": 0.7141795132639487, "grad_norm": 0.0, - "learning_rate": 9.927495283345866e-06, - "loss": 0.982, + "learning_rate": 3.987344405449453e-06, + "loss": 0.9445, "step": 18253 }, { - "epoch": 0.5172716710589702, + "epoch": 0.7142186399561781, "grad_norm": 0.0, - "learning_rate": 9.926577518511772e-06, - "loss": 0.8915, + "learning_rate": 3.986331873640861e-06, + "loss": 0.9977, "step": 18254 }, { - "epoch": 0.5173000085012327, + "epoch": 0.7142577666484076, "grad_norm": 0.0, - "learning_rate": 9.925659754296145e-06, - "loss": 0.9141, + "learning_rate": 3.985319438404238e-06, + "loss": 0.9155, "step": 18255 }, { - "epoch": 0.5173283459434952, + "epoch": 0.714296893340637, "grad_norm": 0.0, - "learning_rate": 9.924741990706712e-06, - "loss": 0.9376, + "learning_rate": 3.984307099755853e-06, + "loss": 1.0781, "step": 18256 }, { - "epoch": 0.5173566833857576, + "epoch": 0.7143360200328664, "grad_norm": 0.0, - "learning_rate": 9.923824227751205e-06, - "loss": 0.7575, + "learning_rate": 3.983294857711955e-06, + "loss": 1.0675, "step": 18257 }, { - "epoch": 0.5173850208280201, + "epoch": 0.7143751467250958, "grad_norm": 0.0, - "learning_rate": 9.92290646543736e-06, - "loss": 0.8606, + "learning_rate": 3.982282712288802e-06, + "loss": 0.8663, "step": 18258 }, { - "epoch": 0.5174133582702826, + "epoch": 0.7144142734173253, "grad_norm": 0.0, - "learning_rate": 9.921988703772897e-06, - "loss": 0.8009, + "learning_rate": 3.981270663502647e-06, + "loss": 1.0665, "step": 18259 }, { - "epoch": 0.517441695712545, + "epoch": 0.7144534001095547, "grad_norm": 0.0, - "learning_rate": 9.921070942765556e-06, - "loss": 0.9523, + "learning_rate": 3.980258711369747e-06, + "loss": 1.082, "step": 18260 }, { - "epoch": 0.5174700331548074, + "epoch": 0.7144925268017842, "grad_norm": 0.0, - "learning_rate": 9.920153182423062e-06, - "loss": 0.8791, + "learning_rate": 3.979246855906346e-06, + "loss": 1.0965, "step": 18261 }, { - "epoch": 0.5174983705970699, + "epoch": 0.7145316534940136, "grad_norm": 0.0, - "learning_rate": 9.919235422753143e-06, - "loss": 0.887, + "learning_rate": 3.9782350971286965e-06, + "loss": 0.968, "step": 18262 }, { - "epoch": 0.5175267080393323, + "epoch": 0.7145707801862431, "grad_norm": 0.0, - "learning_rate": 9.91831766376354e-06, - "loss": 1.0197, + "learning_rate": 3.977223435053045e-06, + "loss": 0.9506, "step": 18263 }, { - "epoch": 0.5175550454815948, + "epoch": 0.7146099068784725, "grad_norm": 0.0, - "learning_rate": 9.917399905461974e-06, - "loss": 0.9069, + "learning_rate": 3.976211869695641e-06, + "loss": 1.035, "step": 18264 }, { - "epoch": 0.5175833829238573, + "epoch": 0.714649033570702, "grad_norm": 0.0, - "learning_rate": 9.916482147856184e-06, - "loss": 0.8711, + "learning_rate": 3.975200401072723e-06, + "loss": 0.9878, "step": 18265 }, { - "epoch": 0.5176117203661198, + "epoch": 0.7146881602629314, "grad_norm": 0.0, - "learning_rate": 9.915564390953891e-06, - "loss": 0.9355, + "learning_rate": 3.974189029200542e-06, + "loss": 0.9865, "step": 18266 }, { - "epoch": 0.5176400578083822, + "epoch": 0.7147272869551609, "grad_norm": 0.0, - "learning_rate": 9.91464663476283e-06, - "loss": 0.9176, + "learning_rate": 3.973177754095325e-06, + "loss": 1.0046, "step": 18267 }, { - "epoch": 0.5176683952506447, + "epoch": 0.7147664136473902, "grad_norm": 0.0, - "learning_rate": 9.913728879290736e-06, - "loss": 0.854, + "learning_rate": 3.9721665757733295e-06, + "loss": 0.9634, "step": 18268 }, { - "epoch": 0.5176967326929072, + "epoch": 0.7148055403396197, "grad_norm": 0.0, - "learning_rate": 9.912811124545334e-06, - "loss": 0.8706, + "learning_rate": 3.971155494250783e-06, + "loss": 0.9866, "step": 18269 }, { - "epoch": 0.5177250701351696, + "epoch": 0.7148446670318491, "grad_norm": 0.0, - "learning_rate": 9.911893370534354e-06, - "loss": 0.9911, + "learning_rate": 3.970144509543927e-06, + "loss": 0.9703, "step": 18270 }, { - "epoch": 0.517753407577432, + "epoch": 0.7148837937240786, "grad_norm": 0.0, - "learning_rate": 9.910975617265535e-06, - "loss": 0.9021, + "learning_rate": 3.969133621668987e-06, + "loss": 1.0846, "step": 18271 }, { - "epoch": 0.5177817450196945, + "epoch": 0.714922920416308, "grad_norm": 0.0, - "learning_rate": 9.910057864746596e-06, - "loss": 0.8292, + "learning_rate": 3.968122830642211e-06, + "loss": 1.0243, "step": 18272 }, { - "epoch": 0.517810082461957, + "epoch": 0.7149620471085374, "grad_norm": 0.0, - "learning_rate": 9.909140112985277e-06, - "loss": 0.9001, + "learning_rate": 3.967112136479822e-06, + "loss": 1.0187, "step": 18273 }, { - "epoch": 0.5178384199042194, + "epoch": 0.7150011738007669, "grad_norm": 0.0, - "learning_rate": 9.908222361989301e-06, - "loss": 0.9565, + "learning_rate": 3.966101539198055e-06, + "loss": 1.0763, "step": 18274 }, { - "epoch": 0.5178667573464819, + "epoch": 0.7150403004929963, "grad_norm": 0.0, - "learning_rate": 9.907304611766402e-06, - "loss": 0.9058, + "learning_rate": 3.965091038813132e-06, + "loss": 0.9347, "step": 18275 }, { - "epoch": 0.5178950947887444, + "epoch": 0.7150794271852258, "grad_norm": 0.0, - "learning_rate": 9.906386862324313e-06, - "loss": 0.8941, + "learning_rate": 3.9640806353412866e-06, + "loss": 0.9823, "step": 18276 }, { - "epoch": 0.5179234322310068, + "epoch": 0.7151185538774552, "grad_norm": 0.0, - "learning_rate": 9.90546911367076e-06, - "loss": 0.9723, + "learning_rate": 3.963070328798741e-06, + "loss": 1.0406, "step": 18277 }, { - "epoch": 0.5179517696732693, + "epoch": 0.7151576805696847, "grad_norm": 0.0, - "learning_rate": 9.904551365813475e-06, - "loss": 0.9258, + "learning_rate": 3.962060119201726e-06, + "loss": 0.9409, "step": 18278 }, { - "epoch": 0.5179801071155318, + "epoch": 0.715196807261914, "grad_norm": 0.0, - "learning_rate": 9.903633618760195e-06, - "loss": 0.9315, + "learning_rate": 3.961050006566455e-06, + "loss": 0.9358, "step": 18279 }, { - "epoch": 0.5180084445577943, + "epoch": 0.7152359339541435, "grad_norm": 0.0, - "learning_rate": 9.90271587251864e-06, - "loss": 1.0048, + "learning_rate": 3.960039990909155e-06, + "loss": 1.0783, "step": 18280 }, { - "epoch": 0.5180367820000567, + "epoch": 0.7152750606463729, "grad_norm": 0.0, - "learning_rate": 9.901798127096545e-06, - "loss": 0.8736, + "learning_rate": 3.959030072246043e-06, + "loss": 1.0445, "step": 18281 }, { - "epoch": 0.5180651194423191, + "epoch": 0.7153141873386024, "grad_norm": 0.0, - "learning_rate": 9.900880382501641e-06, - "loss": 0.8913, + "learning_rate": 3.958020250593342e-06, + "loss": 0.9789, "step": 18282 }, { - "epoch": 0.5180934568845816, + "epoch": 0.7153533140308318, "grad_norm": 0.0, - "learning_rate": 9.89996263874166e-06, - "loss": 0.8516, + "learning_rate": 3.957010525967262e-06, + "loss": 1.0402, "step": 18283 }, { - "epoch": 0.518121794326844, + "epoch": 0.7153924407230613, "grad_norm": 0.0, - "learning_rate": 9.899044895824332e-06, - "loss": 0.9956, + "learning_rate": 3.956000898384019e-06, + "loss": 1.0362, "step": 18284 }, { - "epoch": 0.5181501317691065, + "epoch": 0.7154315674152907, "grad_norm": 0.0, - "learning_rate": 9.898127153757385e-06, - "loss": 0.8291, + "learning_rate": 3.954991367859833e-06, + "loss": 0.9147, "step": 18285 }, { - "epoch": 0.518178469211369, + "epoch": 0.7154706941075202, "grad_norm": 0.0, - "learning_rate": 9.89720941254855e-06, - "loss": 0.8829, + "learning_rate": 3.953981934410907e-06, + "loss": 0.9615, "step": 18286 }, { - "epoch": 0.5182068066536314, + "epoch": 0.7155098207997496, "grad_norm": 0.0, - "learning_rate": 9.89629167220556e-06, - "loss": 0.8583, + "learning_rate": 3.952972598053455e-06, + "loss": 1.0566, "step": 18287 }, { - "epoch": 0.5182351440958939, + "epoch": 0.7155489474919791, "grad_norm": 0.0, - "learning_rate": 9.89537393273614e-06, - "loss": 0.8708, + "learning_rate": 3.95196335880369e-06, + "loss": 1.1979, "step": 18288 }, { - "epoch": 0.5182634815381564, + "epoch": 0.7155880741842084, "grad_norm": 0.0, - "learning_rate": 9.894456194148028e-06, - "loss": 0.8198, + "learning_rate": 3.95095421667781e-06, + "loss": 0.8674, "step": 18289 }, { - "epoch": 0.5182918189804189, + "epoch": 0.7156272008764379, "grad_norm": 0.0, - "learning_rate": 9.893538456448949e-06, - "loss": 0.8026, + "learning_rate": 3.949945171692026e-06, + "loss": 1.018, "step": 18290 }, { - "epoch": 0.5183201564226813, + "epoch": 0.7156663275686673, "grad_norm": 0.0, - "learning_rate": 9.892620719646635e-06, - "loss": 0.8286, + "learning_rate": 3.948936223862545e-06, + "loss": 0.9605, "step": 18291 }, { - "epoch": 0.5183484938649437, + "epoch": 0.7157054542608968, "grad_norm": 0.0, - "learning_rate": 9.89170298374882e-06, - "loss": 0.8854, + "learning_rate": 3.947927373205562e-06, + "loss": 0.943, "step": 18292 }, { - "epoch": 0.5183768313072062, + "epoch": 0.7157445809531262, "grad_norm": 0.0, - "learning_rate": 9.890785248763227e-06, - "loss": 0.8265, + "learning_rate": 3.946918619737282e-06, + "loss": 0.9566, "step": 18293 }, { - "epoch": 0.5184051687494686, + "epoch": 0.7157837076453557, "grad_norm": 0.0, - "learning_rate": 9.889867514697591e-06, - "loss": 0.8863, + "learning_rate": 3.945909963473904e-06, + "loss": 0.9575, "step": 18294 }, { - "epoch": 0.5184335061917311, + "epoch": 0.7158228343375851, "grad_norm": 0.0, - "learning_rate": 9.888949781559642e-06, - "loss": 0.8223, + "learning_rate": 3.944901404431629e-06, + "loss": 1.0728, "step": 18295 }, { - "epoch": 0.5184618436339936, + "epoch": 0.7158619610298146, "grad_norm": 0.0, - "learning_rate": 9.888032049357108e-06, - "loss": 0.865, + "learning_rate": 3.943892942626647e-06, + "loss": 0.9734, "step": 18296 }, { - "epoch": 0.5184901810762561, + "epoch": 0.715901087722044, "grad_norm": 0.0, - "learning_rate": 9.887114318097728e-06, - "loss": 0.8753, + "learning_rate": 3.942884578075158e-06, + "loss": 0.9158, "step": 18297 }, { - "epoch": 0.5185185185185185, + "epoch": 0.7159402144142735, "grad_norm": 0.0, - "learning_rate": 9.886196587789221e-06, - "loss": 0.9015, + "learning_rate": 3.9418763107933465e-06, + "loss": 0.8702, "step": 18298 }, { - "epoch": 0.518546855960781, + "epoch": 0.7159793411065029, "grad_norm": 0.0, - "learning_rate": 9.885278858439321e-06, - "loss": 0.91, + "learning_rate": 3.940868140797418e-06, + "loss": 0.8654, "step": 18299 }, { - "epoch": 0.5185751934030435, + "epoch": 0.7160184677987323, "grad_norm": 0.0, - "learning_rate": 9.884361130055766e-06, - "loss": 0.8926, + "learning_rate": 3.93986006810355e-06, + "loss": 0.9667, "step": 18300 }, { - "epoch": 0.5186035308453059, + "epoch": 0.7160575944909617, "grad_norm": 0.0, - "learning_rate": 9.883443402646275e-06, - "loss": 0.871, + "learning_rate": 3.938852092727939e-06, + "loss": 0.9978, "step": 18301 }, { - "epoch": 0.5186318682875684, + "epoch": 0.7160967211831911, "grad_norm": 0.0, - "learning_rate": 9.882525676218586e-06, - "loss": 0.9958, + "learning_rate": 3.937844214686763e-06, + "loss": 0.9402, "step": 18302 }, { - "epoch": 0.5186602057298308, + "epoch": 0.7161358478754206, "grad_norm": 0.0, - "learning_rate": 9.881607950780424e-06, - "loss": 0.9312, + "learning_rate": 3.936836433996217e-06, + "loss": 0.9684, "step": 18303 }, { - "epoch": 0.5186885431720933, + "epoch": 0.71617497456765, "grad_norm": 0.0, - "learning_rate": 9.880690226339524e-06, - "loss": 0.8245, + "learning_rate": 3.935828750672478e-06, + "loss": 0.9176, "step": 18304 }, { - "epoch": 0.5187168806143557, + "epoch": 0.7162141012598795, "grad_norm": 0.0, - "learning_rate": 9.879772502903617e-06, - "loss": 0.9714, + "learning_rate": 3.934821164731735e-06, + "loss": 0.9908, "step": 18305 }, { - "epoch": 0.5187452180566182, + "epoch": 0.7162532279521089, "grad_norm": 0.0, - "learning_rate": 9.878854780480427e-06, + "learning_rate": 3.933813676190159e-06, "loss": 0.9051, "step": 18306 }, { - "epoch": 0.5187735554988807, + "epoch": 0.7162923546443384, "grad_norm": 0.0, - "learning_rate": 9.877937059077689e-06, - "loss": 0.9661, + "learning_rate": 3.9328062850639346e-06, + "loss": 1.0685, "step": 18307 }, { - "epoch": 0.5188018929411431, + "epoch": 0.7163314813365678, "grad_norm": 0.0, - "learning_rate": 9.877019338703133e-06, - "loss": 0.8738, + "learning_rate": 3.931798991369239e-06, + "loss": 1.0338, "step": 18308 }, { - "epoch": 0.5188302303834056, + "epoch": 0.7163706080287973, "grad_norm": 0.0, - "learning_rate": 9.876101619364487e-06, - "loss": 0.8627, + "learning_rate": 3.930791795122251e-06, + "loss": 0.9224, "step": 18309 }, { - "epoch": 0.5188585678256681, + "epoch": 0.7164097347210266, "grad_norm": 0.0, - "learning_rate": 9.875183901069489e-06, - "loss": 0.752, + "learning_rate": 3.929784696339137e-06, + "loss": 0.8753, "step": 18310 }, { - "epoch": 0.5188869052679305, + "epoch": 0.7164488614132561, "grad_norm": 0.0, - "learning_rate": 9.874266183825858e-06, - "loss": 0.9116, + "learning_rate": 3.928777695036076e-06, + "loss": 1.0135, "step": 18311 }, { - "epoch": 0.518915242710193, + "epoch": 0.7164879881054855, "grad_norm": 0.0, - "learning_rate": 9.873348467641329e-06, - "loss": 0.8876, + "learning_rate": 3.927770791229236e-06, + "loss": 0.8522, "step": 18312 }, { - "epoch": 0.5189435801524555, + "epoch": 0.716527114797715, "grad_norm": 0.0, - "learning_rate": 9.872430752523638e-06, - "loss": 0.8712, + "learning_rate": 3.926763984934792e-06, + "loss": 0.9381, "step": 18313 }, { - "epoch": 0.518971917594718, + "epoch": 0.7165662414899444, "grad_norm": 0.0, - "learning_rate": 9.871513038480506e-06, - "loss": 0.9102, + "learning_rate": 3.925757276168907e-06, + "loss": 1.0504, "step": 18314 }, { - "epoch": 0.5190002550369803, + "epoch": 0.7166053681821739, "grad_norm": 0.0, - "learning_rate": 9.870595325519669e-06, - "loss": 1.058, + "learning_rate": 3.924750664947749e-06, + "loss": 1.0437, "step": 18315 }, { - "epoch": 0.5190285924792428, + "epoch": 0.7166444948744033, "grad_norm": 0.0, - "learning_rate": 9.869677613648853e-06, - "loss": 0.8329, + "learning_rate": 3.923744151287477e-06, + "loss": 0.8938, "step": 18316 }, { - "epoch": 0.5190569299215053, + "epoch": 0.7166836215666328, "grad_norm": 0.0, - "learning_rate": 9.868759902875792e-06, - "loss": 0.9184, + "learning_rate": 3.922737735204267e-06, + "loss": 0.9723, "step": 18317 }, { - "epoch": 0.5190852673637677, + "epoch": 0.7167227482588622, "grad_norm": 0.0, - "learning_rate": 9.86784219320822e-06, - "loss": 0.8085, + "learning_rate": 3.92173141671427e-06, + "loss": 0.9841, "step": 18318 }, { - "epoch": 0.5191136048060302, + "epoch": 0.7167618749510917, "grad_norm": 0.0, - "learning_rate": 9.866924484653856e-06, - "loss": 0.811, + "learning_rate": 3.9207251958336545e-06, + "loss": 1.0024, "step": 18319 }, { - "epoch": 0.5191419422482927, + "epoch": 0.7168010016433211, "grad_norm": 0.0, - "learning_rate": 9.866006777220437e-06, - "loss": 0.9082, + "learning_rate": 3.919719072578567e-06, + "loss": 1.0305, "step": 18320 }, { - "epoch": 0.5191702796905552, + "epoch": 0.7168401283355506, "grad_norm": 0.0, - "learning_rate": 9.865089070915695e-06, - "loss": 0.8639, + "learning_rate": 3.918713046965179e-06, + "loss": 0.966, "step": 18321 }, { - "epoch": 0.5191986171328176, + "epoch": 0.7168792550277799, "grad_norm": 0.0, - "learning_rate": 9.864171365747356e-06, - "loss": 0.8989, + "learning_rate": 3.917707119009636e-06, + "loss": 0.9731, "step": 18322 }, { - "epoch": 0.5192269545750801, + "epoch": 0.7169183817200094, "grad_norm": 0.0, - "learning_rate": 9.863253661723157e-06, - "loss": 0.8261, + "learning_rate": 3.916701288728097e-06, + "loss": 0.9948, "step": 18323 }, { - "epoch": 0.5192552920173426, + "epoch": 0.7169575084122388, "grad_norm": 0.0, - "learning_rate": 9.862335958850816e-06, - "loss": 0.8494, + "learning_rate": 3.915695556136712e-06, + "loss": 1.009, "step": 18324 }, { - "epoch": 0.5192836294596049, + "epoch": 0.7169966351044683, "grad_norm": 0.0, - "learning_rate": 9.861418257138074e-06, - "loss": 0.8574, + "learning_rate": 3.91468992125163e-06, + "loss": 0.9039, "step": 18325 }, { - "epoch": 0.5193119669018674, + "epoch": 0.7170357617966977, "grad_norm": 0.0, - "learning_rate": 9.860500556592661e-06, - "loss": 0.8841, + "learning_rate": 3.913684384089002e-06, + "loss": 1.0886, "step": 18326 }, { - "epoch": 0.5193403043441299, + "epoch": 0.7170748884889272, "grad_norm": 0.0, - "learning_rate": 9.859582857222297e-06, - "loss": 0.9839, + "learning_rate": 3.9126789446649815e-06, + "loss": 0.9548, "step": 18327 }, { - "epoch": 0.5193686417863924, + "epoch": 0.7171140151811566, "grad_norm": 0.0, - "learning_rate": 9.85866515903472e-06, - "loss": 0.9112, + "learning_rate": 3.911673602995705e-06, + "loss": 1.1428, "step": 18328 }, { - "epoch": 0.5193969792286548, + "epoch": 0.717153141873386, "grad_norm": 0.0, - "learning_rate": 9.857747462037663e-06, - "loss": 0.8825, + "learning_rate": 3.910668359097321e-06, + "loss": 1.091, "step": 18329 }, { - "epoch": 0.5194253166709173, + "epoch": 0.7171922685656155, "grad_norm": 0.0, - "learning_rate": 9.856829766238846e-06, - "loss": 0.9276, + "learning_rate": 3.909663212985978e-06, + "loss": 0.9631, "step": 18330 }, { - "epoch": 0.5194536541131798, + "epoch": 0.7172313952578449, "grad_norm": 0.0, - "learning_rate": 9.855912071646012e-06, - "loss": 0.9572, + "learning_rate": 3.908658164677807e-06, + "loss": 0.9646, "step": 18331 }, { - "epoch": 0.5194819915554422, + "epoch": 0.7172705219500743, "grad_norm": 0.0, - "learning_rate": 9.85499437826688e-06, - "loss": 0.8811, + "learning_rate": 3.907653214188953e-06, + "loss": 1.0776, "step": 18332 }, { - "epoch": 0.5195103289977047, + "epoch": 0.7173096486423037, "grad_norm": 0.0, - "learning_rate": 9.854076686109183e-06, - "loss": 0.8164, + "learning_rate": 3.906648361535559e-06, + "loss": 0.8913, "step": 18333 }, { - "epoch": 0.5195386664399672, + "epoch": 0.7173487753345332, "grad_norm": 0.0, - "learning_rate": 9.853158995180656e-06, - "loss": 0.8561, + "learning_rate": 3.905643606733752e-06, + "loss": 0.8903, "step": 18334 }, { - "epoch": 0.5195670038822295, + "epoch": 0.7173879020267626, "grad_norm": 0.0, - "learning_rate": 9.852241305489021e-06, - "loss": 0.9391, + "learning_rate": 3.904638949799673e-06, + "loss": 1.0627, "step": 18335 }, { - "epoch": 0.519595341324492, + "epoch": 0.7174270287189921, "grad_norm": 0.0, - "learning_rate": 9.851323617042012e-06, - "loss": 0.9166, + "learning_rate": 3.903634390749458e-06, + "loss": 0.8922, "step": 18336 }, { - "epoch": 0.5196236787667545, + "epoch": 0.7174661554112215, "grad_norm": 0.0, - "learning_rate": 9.850405929847367e-06, - "loss": 0.8912, + "learning_rate": 3.902629929599231e-06, + "loss": 0.8147, "step": 18337 }, { - "epoch": 0.519652016209017, + "epoch": 0.717505282103451, "grad_norm": 0.0, - "learning_rate": 9.849488243912802e-06, - "loss": 0.9764, + "learning_rate": 3.901625566365128e-06, + "loss": 0.9372, "step": 18338 }, { - "epoch": 0.5196803536512794, + "epoch": 0.7175444087956804, "grad_norm": 0.0, - "learning_rate": 9.848570559246055e-06, - "loss": 0.9753, + "learning_rate": 3.900621301063276e-06, + "loss": 0.9669, "step": 18339 }, { - "epoch": 0.5197086910935419, + "epoch": 0.7175835354879099, "grad_norm": 0.0, - "learning_rate": 9.847652875854855e-06, - "loss": 0.8827, + "learning_rate": 3.899617133709807e-06, + "loss": 0.9479, "step": 18340 }, { - "epoch": 0.5197370285358044, + "epoch": 0.7176226621801393, "grad_norm": 0.0, - "learning_rate": 9.846735193746929e-06, - "loss": 0.8931, + "learning_rate": 3.8986130643208385e-06, + "loss": 0.9155, "step": 18341 }, { - "epoch": 0.5197653659780668, + "epoch": 0.7176617888723688, "grad_norm": 0.0, - "learning_rate": 9.845817512930012e-06, - "loss": 0.8974, + "learning_rate": 3.8976090929125e-06, + "loss": 0.9429, "step": 18342 }, { - "epoch": 0.5197937034203293, + "epoch": 0.7177009155645981, "grad_norm": 0.0, - "learning_rate": 9.84489983341183e-06, - "loss": 0.9174, + "learning_rate": 3.89660521950091e-06, + "loss": 1.0202, "step": 18343 }, { - "epoch": 0.5198220408625918, + "epoch": 0.7177400422568276, "grad_norm": 0.0, - "learning_rate": 9.843982155200117e-06, - "loss": 0.9254, + "learning_rate": 3.895601444102198e-06, + "loss": 0.9831, "step": 18344 }, { - "epoch": 0.5198503783048543, + "epoch": 0.717779168949057, "grad_norm": 0.0, - "learning_rate": 9.843064478302596e-06, - "loss": 0.9695, + "learning_rate": 3.894597766732474e-06, + "loss": 1.1303, "step": 18345 }, { - "epoch": 0.5198787157471166, + "epoch": 0.7178182956412865, "grad_norm": 0.0, - "learning_rate": 9.842146802727001e-06, - "loss": 0.8582, + "learning_rate": 3.893594187407863e-06, + "loss": 1.1356, "step": 18346 }, { - "epoch": 0.5199070531893791, + "epoch": 0.7178574223335159, "grad_norm": 0.0, - "learning_rate": 9.841229128481065e-06, - "loss": 0.8543, + "learning_rate": 3.89259070614447e-06, + "loss": 1.094, "step": 18347 }, { - "epoch": 0.5199353906316416, + "epoch": 0.7178965490257454, "grad_norm": 0.0, - "learning_rate": 9.840311455572515e-06, - "loss": 0.9283, + "learning_rate": 3.891587322958424e-06, + "loss": 1.0718, "step": 18348 }, { - "epoch": 0.519963728073904, + "epoch": 0.7179356757179748, "grad_norm": 0.0, - "learning_rate": 9.839393784009078e-06, - "loss": 0.8756, + "learning_rate": 3.890584037865829e-06, + "loss": 1.0462, "step": 18349 }, { - "epoch": 0.5199920655161665, + "epoch": 0.7179748024102043, "grad_norm": 0.0, - "learning_rate": 9.838476113798492e-06, - "loss": 0.7839, + "learning_rate": 3.889580850882801e-06, + "loss": 0.9301, "step": 18350 }, { - "epoch": 0.520020402958429, + "epoch": 0.7180139291024337, "grad_norm": 0.0, - "learning_rate": 9.837558444948478e-06, - "loss": 0.7851, + "learning_rate": 3.888577762025441e-06, + "loss": 0.9997, "step": 18351 }, { - "epoch": 0.5200487404006914, + "epoch": 0.7180530557946632, "grad_norm": 0.0, - "learning_rate": 9.836640777466771e-06, - "loss": 0.8572, + "learning_rate": 3.887574771309871e-06, + "loss": 0.8361, "step": 18352 }, { - "epoch": 0.5200770778429539, + "epoch": 0.7180921824868925, "grad_norm": 0.0, - "learning_rate": 9.835723111361096e-06, - "loss": 0.8802, + "learning_rate": 3.886571878752188e-06, + "loss": 0.9494, "step": 18353 }, { - "epoch": 0.5201054152852164, + "epoch": 0.718131309179122, "grad_norm": 0.0, - "learning_rate": 9.834805446639187e-06, - "loss": 0.8477, + "learning_rate": 3.885569084368503e-06, + "loss": 1.0156, "step": 18354 }, { - "epoch": 0.5201337527274789, + "epoch": 0.7181704358713514, "grad_norm": 0.0, - "learning_rate": 9.833887783308778e-06, - "loss": 0.9015, + "learning_rate": 3.884566388174914e-06, + "loss": 1.0449, "step": 18355 }, { - "epoch": 0.5201620901697412, + "epoch": 0.7182095625635809, "grad_norm": 0.0, - "learning_rate": 9.83297012137759e-06, - "loss": 0.8438, + "learning_rate": 3.883563790187526e-06, + "loss": 0.9318, "step": 18356 }, { - "epoch": 0.5201904276120037, + "epoch": 0.7182486892558103, "grad_norm": 0.0, - "learning_rate": 9.832052460853356e-06, - "loss": 0.9023, + "learning_rate": 3.882561290422437e-06, + "loss": 0.8817, "step": 18357 }, { - "epoch": 0.5202187650542662, + "epoch": 0.7182878159480397, "grad_norm": 0.0, - "learning_rate": 9.83113480174381e-06, - "loss": 0.8376, + "learning_rate": 3.881558888895754e-06, + "loss": 1.1002, "step": 18358 }, { - "epoch": 0.5202471024965286, + "epoch": 0.7183269426402692, "grad_norm": 0.0, - "learning_rate": 9.830217144056675e-06, - "loss": 0.8712, + "learning_rate": 3.880556585623564e-06, + "loss": 0.995, "step": 18359 }, { - "epoch": 0.5202754399387911, + "epoch": 0.7183660693324986, "grad_norm": 0.0, - "learning_rate": 9.829299487799686e-06, - "loss": 0.7423, + "learning_rate": 3.879554380621967e-06, + "loss": 0.9389, "step": 18360 }, { - "epoch": 0.5203037773810536, + "epoch": 0.7184051960247281, "grad_norm": 0.0, - "learning_rate": 9.82838183298057e-06, - "loss": 0.7809, + "learning_rate": 3.878552273907057e-06, + "loss": 0.9782, "step": 18361 }, { - "epoch": 0.5203321148233161, + "epoch": 0.7184443227169575, "grad_norm": 0.0, - "learning_rate": 9.827464179607055e-06, - "loss": 0.8202, + "learning_rate": 3.87755026549493e-06, + "loss": 0.9914, "step": 18362 }, { - "epoch": 0.5203604522655785, + "epoch": 0.718483449409187, "grad_norm": 0.0, - "learning_rate": 9.826546527686878e-06, - "loss": 0.9244, + "learning_rate": 3.876548355401672e-06, + "loss": 1.0349, "step": 18363 }, { - "epoch": 0.520388789707841, + "epoch": 0.7185225761014163, "grad_norm": 0.0, - "learning_rate": 9.825628877227762e-06, - "loss": 0.8063, + "learning_rate": 3.875546543643377e-06, + "loss": 1.0716, "step": 18364 }, { - "epoch": 0.5204171271501035, + "epoch": 0.7185617027936458, "grad_norm": 0.0, - "learning_rate": 9.824711228237437e-06, - "loss": 0.9116, + "learning_rate": 3.874544830236123e-06, + "loss": 0.9666, "step": 18365 }, { - "epoch": 0.5204454645923658, + "epoch": 0.7186008294858752, "grad_norm": 0.0, - "learning_rate": 9.823793580723637e-06, - "loss": 0.8627, + "learning_rate": 3.87354321519601e-06, + "loss": 1.0109, "step": 18366 }, { - "epoch": 0.5204738020346283, + "epoch": 0.7186399561781047, "grad_norm": 0.0, - "learning_rate": 9.822875934694086e-06, - "loss": 0.8401, + "learning_rate": 3.872541698539113e-06, + "loss": 1.0488, "step": 18367 }, { - "epoch": 0.5205021394768908, + "epoch": 0.7186790828703341, "grad_norm": 0.0, - "learning_rate": 9.821958290156522e-06, - "loss": 0.8698, + "learning_rate": 3.871540280281521e-06, + "loss": 0.9397, "step": 18368 }, { - "epoch": 0.5205304769191533, + "epoch": 0.7187182095625636, "grad_norm": 0.0, - "learning_rate": 9.821040647118666e-06, - "loss": 0.8873, + "learning_rate": 3.870538960439311e-06, + "loss": 1.1004, "step": 18369 }, { - "epoch": 0.5205588143614157, + "epoch": 0.718757336254793, "grad_norm": 0.0, - "learning_rate": 9.82012300558825e-06, - "loss": 0.7708, + "learning_rate": 3.869537739028563e-06, + "loss": 0.9772, "step": 18370 }, { - "epoch": 0.5205871518036782, + "epoch": 0.7187964629470225, "grad_norm": 0.0, - "learning_rate": 9.819205365573009e-06, - "loss": 0.995, + "learning_rate": 3.868536616065358e-06, + "loss": 1.1122, "step": 18371 }, { - "epoch": 0.5206154892459407, + "epoch": 0.7188355896392519, "grad_norm": 0.0, - "learning_rate": 9.818287727080663e-06, - "loss": 0.8961, + "learning_rate": 3.8675355915657755e-06, + "loss": 0.9313, "step": 18372 }, { - "epoch": 0.5206438266882031, + "epoch": 0.7188747163314814, "grad_norm": 0.0, - "learning_rate": 9.81737009011895e-06, - "loss": 0.9628, + "learning_rate": 3.866534665545882e-06, + "loss": 1.0689, "step": 18373 }, { - "epoch": 0.5206721641304656, + "epoch": 0.7189138430237108, "grad_norm": 0.0, - "learning_rate": 9.816452454695596e-06, - "loss": 0.9417, + "learning_rate": 3.8655338380217575e-06, + "loss": 0.9174, "step": 18374 }, { - "epoch": 0.5207005015727281, + "epoch": 0.7189529697159402, "grad_norm": 0.0, - "learning_rate": 9.815534820818329e-06, - "loss": 0.8824, + "learning_rate": 3.8645331090094755e-06, + "loss": 0.9597, "step": 18375 }, { - "epoch": 0.5207288390149905, + "epoch": 0.7189920964081696, "grad_norm": 0.0, - "learning_rate": 9.814617188494886e-06, - "loss": 0.9837, + "learning_rate": 3.8635324785251e-06, + "loss": 0.984, "step": 18376 }, { - "epoch": 0.520757176457253, + "epoch": 0.7190312231003991, "grad_norm": 0.0, - "learning_rate": 9.813699557732986e-06, - "loss": 0.8647, + "learning_rate": 3.862531946584705e-06, + "loss": 0.9665, "step": 18377 }, { - "epoch": 0.5207855138995154, + "epoch": 0.7190703497926285, "grad_norm": 0.0, - "learning_rate": 9.812781928540365e-06, - "loss": 0.8599, + "learning_rate": 3.861531513204354e-06, + "loss": 0.9275, "step": 18378 }, { - "epoch": 0.5208138513417779, + "epoch": 0.719109476484858, "grad_norm": 0.0, - "learning_rate": 9.811864300924753e-06, - "loss": 0.9096, + "learning_rate": 3.86053117840012e-06, + "loss": 0.9817, "step": 18379 }, { - "epoch": 0.5208421887840403, + "epoch": 0.7191486031770874, "grad_norm": 0.0, - "learning_rate": 9.810946674893876e-06, - "loss": 0.9073, + "learning_rate": 3.859530942188058e-06, + "loss": 1.0412, "step": 18380 }, { - "epoch": 0.5208705262263028, + "epoch": 0.7191877298693169, "grad_norm": 0.0, - "learning_rate": 9.81002905045547e-06, - "loss": 0.8666, + "learning_rate": 3.8585308045842375e-06, + "loss": 0.9162, "step": 18381 }, { - "epoch": 0.5208988636685653, + "epoch": 0.7192268565615463, "grad_norm": 0.0, - "learning_rate": 9.809111427617254e-06, - "loss": 0.887, + "learning_rate": 3.857530765604713e-06, + "loss": 0.9742, "step": 18382 }, { - "epoch": 0.5209272011108277, + "epoch": 0.7192659832537758, "grad_norm": 0.0, - "learning_rate": 9.808193806386965e-06, - "loss": 0.9882, + "learning_rate": 3.856530825265549e-06, + "loss": 0.9547, "step": 18383 }, { - "epoch": 0.5209555385530902, + "epoch": 0.7193051099460052, "grad_norm": 0.0, - "learning_rate": 9.807276186772335e-06, - "loss": 0.8747, + "learning_rate": 3.855530983582801e-06, + "loss": 0.9725, "step": 18384 }, { - "epoch": 0.5209838759953527, + "epoch": 0.7193442366382347, "grad_norm": 0.0, - "learning_rate": 9.806358568781084e-06, - "loss": 0.7921, + "learning_rate": 3.854531240572529e-06, + "loss": 0.9813, "step": 18385 }, { - "epoch": 0.5210122134376152, + "epoch": 0.719383363330464, "grad_norm": 0.0, - "learning_rate": 9.805440952420946e-06, - "loss": 0.7456, + "learning_rate": 3.853531596250781e-06, + "loss": 0.9455, "step": 18386 }, { - "epoch": 0.5210405508798776, + "epoch": 0.7194224900226934, "grad_norm": 0.0, - "learning_rate": 9.804523337699654e-06, - "loss": 1.0082, + "learning_rate": 3.852532050633615e-06, + "loss": 0.9655, "step": 18387 }, { - "epoch": 0.52106888832214, + "epoch": 0.7194616167149229, "grad_norm": 0.0, - "learning_rate": 9.803605724624932e-06, - "loss": 0.9795, + "learning_rate": 3.8515326037370805e-06, + "loss": 0.9689, "step": 18388 }, { - "epoch": 0.5210972257644025, + "epoch": 0.7195007434071523, "grad_norm": 0.0, - "learning_rate": 9.802688113204518e-06, - "loss": 0.8102, + "learning_rate": 3.85053325557723e-06, + "loss": 0.9897, "step": 18389 }, { - "epoch": 0.5211255632066649, + "epoch": 0.7195398700993818, "grad_norm": 0.0, - "learning_rate": 9.801770503446129e-06, - "loss": 0.8647, + "learning_rate": 3.849534006170108e-06, + "loss": 0.9897, "step": 18390 }, { - "epoch": 0.5211539006489274, + "epoch": 0.7195789967916112, "grad_norm": 0.0, - "learning_rate": 9.800852895357502e-06, - "loss": 0.8478, + "learning_rate": 3.848534855531766e-06, + "loss": 0.9573, "step": 18391 }, { - "epoch": 0.5211822380911899, + "epoch": 0.7196181234838407, "grad_norm": 0.0, - "learning_rate": 9.799935288946365e-06, - "loss": 0.811, + "learning_rate": 3.847535803678238e-06, + "loss": 1.1685, "step": 18392 }, { - "epoch": 0.5212105755334524, + "epoch": 0.7196572501760701, "grad_norm": 0.0, - "learning_rate": 9.799017684220449e-06, - "loss": 0.822, + "learning_rate": 3.846536850625583e-06, + "loss": 1.0887, "step": 18393 }, { - "epoch": 0.5212389129757148, + "epoch": 0.7196963768682996, "grad_norm": 0.0, - "learning_rate": 9.79810008118748e-06, - "loss": 1.0204, + "learning_rate": 3.845537996389832e-06, + "loss": 1.091, "step": 18394 }, { - "epoch": 0.5212672504179773, + "epoch": 0.719735503560529, "grad_norm": 0.0, - "learning_rate": 9.797182479855192e-06, - "loss": 0.8682, + "learning_rate": 3.844539240987033e-06, + "loss": 1.0518, "step": 18395 }, { - "epoch": 0.5212955878602398, + "epoch": 0.7197746302527585, "grad_norm": 0.0, - "learning_rate": 9.796264880231307e-06, - "loss": 0.9262, + "learning_rate": 3.843540584433213e-06, + "loss": 1.0155, "step": 18396 }, { - "epoch": 0.5213239253025022, + "epoch": 0.7198137569449878, "grad_norm": 0.0, - "learning_rate": 9.795347282323563e-06, - "loss": 0.7912, + "learning_rate": 3.842542026744423e-06, + "loss": 1.1337, "step": 18397 }, { - "epoch": 0.5213522627447646, + "epoch": 0.7198528836372173, "grad_norm": 0.0, - "learning_rate": 9.794429686139683e-06, - "loss": 1.007, + "learning_rate": 3.841543567936691e-06, + "loss": 1.0536, "step": 18398 }, { - "epoch": 0.5213806001870271, + "epoch": 0.7198920103294467, "grad_norm": 0.0, - "learning_rate": 9.793512091687396e-06, - "loss": 0.8981, + "learning_rate": 3.840545208026054e-06, + "loss": 1.0052, "step": 18399 }, { - "epoch": 0.5214089376292895, + "epoch": 0.7199311370216762, "grad_norm": 0.0, - "learning_rate": 9.792594498974436e-06, - "loss": 0.9372, + "learning_rate": 3.839546947028536e-06, + "loss": 0.9474, "step": 18400 }, { - "epoch": 0.521437275071552, + "epoch": 0.7199702637139056, "grad_norm": 0.0, - "learning_rate": 9.791676908008526e-06, - "loss": 0.9301, + "learning_rate": 3.838548784960182e-06, + "loss": 1.0231, "step": 18401 }, { - "epoch": 0.5214656125138145, + "epoch": 0.7200093904061351, "grad_norm": 0.0, - "learning_rate": 9.790759318797405e-06, - "loss": 0.9211, + "learning_rate": 3.83755072183701e-06, + "loss": 1.0147, "step": 18402 }, { - "epoch": 0.521493949956077, + "epoch": 0.7200485170983645, "grad_norm": 0.0, - "learning_rate": 9.78984173134879e-06, - "loss": 0.7759, + "learning_rate": 3.836552757675055e-06, + "loss": 0.9179, "step": 18403 }, { - "epoch": 0.5215222873983394, + "epoch": 0.720087643790594, "grad_norm": 0.0, - "learning_rate": 9.788924145670418e-06, - "loss": 0.8738, + "learning_rate": 3.835554892490335e-06, + "loss": 0.9844, "step": 18404 }, { - "epoch": 0.5215506248406019, + "epoch": 0.7201267704828234, "grad_norm": 0.0, - "learning_rate": 9.788006561770018e-06, - "loss": 0.9268, + "learning_rate": 3.83455712629888e-06, + "loss": 0.9676, "step": 18405 }, { - "epoch": 0.5215789622828644, + "epoch": 0.7201658971750529, "grad_norm": 0.0, - "learning_rate": 9.787088979655314e-06, - "loss": 0.934, + "learning_rate": 3.8335594591167114e-06, + "loss": 1.0134, "step": 18406 }, { - "epoch": 0.5216072997251268, + "epoch": 0.7202050238672822, "grad_norm": 0.0, - "learning_rate": 9.786171399334039e-06, - "loss": 0.8706, + "learning_rate": 3.832561890959855e-06, + "loss": 0.9716, "step": 18407 }, { - "epoch": 0.5216356371673893, + "epoch": 0.7202441505595117, "grad_norm": 0.0, - "learning_rate": 9.785253820813927e-06, - "loss": 0.7573, + "learning_rate": 3.831564421844323e-06, + "loss": 1.0852, "step": 18408 }, { - "epoch": 0.5216639746096517, + "epoch": 0.7202832772517411, "grad_norm": 0.0, - "learning_rate": 9.784336244102697e-06, - "loss": 0.9558, + "learning_rate": 3.830567051786136e-06, + "loss": 1.0068, "step": 18409 }, { - "epoch": 0.5216923120519142, + "epoch": 0.7203224039439706, "grad_norm": 0.0, - "learning_rate": 9.783418669208086e-06, - "loss": 0.9018, + "learning_rate": 3.8295697808013124e-06, + "loss": 0.976, "step": 18410 }, { - "epoch": 0.5217206494941766, + "epoch": 0.7203615306362, "grad_norm": 0.0, - "learning_rate": 9.782501096137817e-06, - "loss": 0.9489, + "learning_rate": 3.8285726089058685e-06, + "loss": 0.9736, "step": 18411 }, { - "epoch": 0.5217489869364391, + "epoch": 0.7204006573284295, "grad_norm": 0.0, - "learning_rate": 9.781583524899622e-06, - "loss": 0.8942, + "learning_rate": 3.827575536115813e-06, + "loss": 0.8959, "step": 18412 }, { - "epoch": 0.5217773243787016, + "epoch": 0.7204397840206589, "grad_norm": 0.0, - "learning_rate": 9.780665955501232e-06, - "loss": 0.9099, + "learning_rate": 3.826578562447162e-06, + "loss": 0.8378, "step": 18413 }, { - "epoch": 0.521805661820964, + "epoch": 0.7204789107128884, "grad_norm": 0.0, - "learning_rate": 9.779748387950372e-06, - "loss": 0.9468, + "learning_rate": 3.825581687915922e-06, + "loss": 1.022, "step": 18414 }, { - "epoch": 0.5218339992632265, + "epoch": 0.7205180374051178, "grad_norm": 0.0, - "learning_rate": 9.778830822254773e-06, - "loss": 0.9881, + "learning_rate": 3.824584912538101e-06, + "loss": 1.1023, "step": 18415 }, { - "epoch": 0.521862336705489, + "epoch": 0.7205571640973472, "grad_norm": 0.0, - "learning_rate": 9.777913258422168e-06, - "loss": 0.8478, + "learning_rate": 3.8235882363297095e-06, + "loss": 0.9895, "step": 18416 }, { - "epoch": 0.5218906741477515, + "epoch": 0.7205962907895767, "grad_norm": 0.0, - "learning_rate": 9.776995696460279e-06, - "loss": 0.9325, + "learning_rate": 3.822591659306754e-06, + "loss": 0.9514, "step": 18417 }, { - "epoch": 0.5219190115900139, + "epoch": 0.720635417481806, "grad_norm": 0.0, - "learning_rate": 9.77607813637684e-06, - "loss": 0.8265, + "learning_rate": 3.821595181485232e-06, + "loss": 0.8389, "step": 18418 }, { - "epoch": 0.5219473490322764, + "epoch": 0.7206745441740355, "grad_norm": 0.0, - "learning_rate": 9.775160578179575e-06, - "loss": 0.9169, + "learning_rate": 3.820598802881151e-06, + "loss": 0.9642, "step": 18419 }, { - "epoch": 0.5219756864745388, + "epoch": 0.7207136708662649, "grad_norm": 0.0, - "learning_rate": 9.774243021876216e-06, - "loss": 0.8239, + "learning_rate": 3.8196025235105124e-06, + "loss": 0.9893, "step": 18420 }, { - "epoch": 0.5220040239168012, + "epoch": 0.7207527975584944, "grad_norm": 0.0, - "learning_rate": 9.773325467474497e-06, - "loss": 0.8537, + "learning_rate": 3.81860634338931e-06, + "loss": 1.0309, "step": 18421 }, { - "epoch": 0.5220323613590637, + "epoch": 0.7207919242507238, "grad_norm": 0.0, - "learning_rate": 9.772407914982136e-06, - "loss": 0.9013, + "learning_rate": 3.817610262533543e-06, + "loss": 1.0522, "step": 18422 }, { - "epoch": 0.5220606988013262, + "epoch": 0.7208310509429533, "grad_norm": 0.0, - "learning_rate": 9.77149036440687e-06, - "loss": 0.928, + "learning_rate": 3.816614280959209e-06, + "loss": 0.9081, "step": 18423 }, { - "epoch": 0.5220890362435886, + "epoch": 0.7208701776351827, "grad_norm": 0.0, - "learning_rate": 9.770572815756428e-06, - "loss": 0.9141, + "learning_rate": 3.815618398682305e-06, + "loss": 1.0337, "step": 18424 }, { - "epoch": 0.5221173736858511, + "epoch": 0.7209093043274122, "grad_norm": 0.0, - "learning_rate": 9.76965526903853e-06, - "loss": 0.9977, + "learning_rate": 3.814622615718816e-06, + "loss": 0.9205, "step": 18425 }, { - "epoch": 0.5221457111281136, + "epoch": 0.7209484310196416, "grad_norm": 0.0, - "learning_rate": 9.768737724260919e-06, - "loss": 0.9297, + "learning_rate": 3.8136269320847406e-06, + "loss": 1.0372, "step": 18426 }, { - "epoch": 0.5221740485703761, + "epoch": 0.7209875577118711, "grad_norm": 0.0, - "learning_rate": 9.76782018143131e-06, - "loss": 0.9052, + "learning_rate": 3.8126313477960577e-06, + "loss": 0.8879, "step": 18427 }, { - "epoch": 0.5222023860126385, + "epoch": 0.7210266844041004, "grad_norm": 0.0, - "learning_rate": 9.766902640557438e-06, - "loss": 0.8534, + "learning_rate": 3.81163586286877e-06, + "loss": 1.026, "step": 18428 }, { - "epoch": 0.522230723454901, + "epoch": 0.72106581109633, "grad_norm": 0.0, - "learning_rate": 9.765985101647037e-06, - "loss": 0.9196, + "learning_rate": 3.810640477318851e-06, + "loss": 0.9324, "step": 18429 }, { - "epoch": 0.5222590608971635, + "epoch": 0.7211049377885593, "grad_norm": 0.0, - "learning_rate": 9.765067564707825e-06, - "loss": 0.8659, + "learning_rate": 3.8096451911622945e-06, + "loss": 1.057, "step": 18430 }, { - "epoch": 0.5222873983394258, + "epoch": 0.7211440644807888, "grad_norm": 0.0, - "learning_rate": 9.764150029747538e-06, - "loss": 0.9008, + "learning_rate": 3.8086500044150753e-06, + "loss": 0.981, "step": 18431 }, { - "epoch": 0.5223157357816883, + "epoch": 0.7211831911730182, "grad_norm": 0.0, - "learning_rate": 9.763232496773903e-06, - "loss": 0.8922, + "learning_rate": 3.80765491709318e-06, + "loss": 1.0098, "step": 18432 }, { - "epoch": 0.5223440732239508, + "epoch": 0.7212223178652477, "grad_norm": 0.0, - "learning_rate": 9.762314965794645e-06, - "loss": 0.8545, + "learning_rate": 3.806659929212586e-06, + "loss": 0.9573, "step": 18433 }, { - "epoch": 0.5223724106662133, + "epoch": 0.7212614445574771, "grad_norm": 0.0, - "learning_rate": 9.761397436817504e-06, - "loss": 0.8367, + "learning_rate": 3.805665040789277e-06, + "loss": 0.9037, "step": 18434 }, { - "epoch": 0.5224007481084757, + "epoch": 0.7213005712497066, "grad_norm": 0.0, - "learning_rate": 9.760479909850196e-06, - "loss": 0.8948, + "learning_rate": 3.804670251839222e-06, + "loss": 0.9536, "step": 18435 }, { - "epoch": 0.5224290855507382, + "epoch": 0.721339697941936, "grad_norm": 0.0, - "learning_rate": 9.759562384900453e-06, - "loss": 1.0221, + "learning_rate": 3.8036755623783994e-06, + "loss": 1.0135, "step": 18436 }, { - "epoch": 0.5224574229930007, + "epoch": 0.7213788246341655, "grad_norm": 0.0, - "learning_rate": 9.758644861976012e-06, - "loss": 0.8836, + "learning_rate": 3.802680972422783e-06, + "loss": 0.9108, "step": 18437 }, { - "epoch": 0.5224857604352631, + "epoch": 0.7214179513263949, "grad_norm": 0.0, - "learning_rate": 9.757727341084588e-06, - "loss": 0.9335, + "learning_rate": 3.801686481988348e-06, + "loss": 0.9357, "step": 18438 }, { - "epoch": 0.5225140978775256, + "epoch": 0.7214570780186244, "grad_norm": 0.0, - "learning_rate": 9.75680982223392e-06, - "loss": 0.8962, + "learning_rate": 3.8006920910910583e-06, + "loss": 1.1292, "step": 18439 }, { - "epoch": 0.5225424353197881, + "epoch": 0.7214962047108537, "grad_norm": 0.0, - "learning_rate": 9.755892305431733e-06, - "loss": 0.9178, + "learning_rate": 3.7996977997468874e-06, + "loss": 0.9808, "step": 18440 }, { - "epoch": 0.5225707727620506, + "epoch": 0.7215353314030832, "grad_norm": 0.0, - "learning_rate": 9.754974790685754e-06, - "loss": 0.8474, + "learning_rate": 3.798703607971795e-06, + "loss": 0.9613, "step": 18441 }, { - "epoch": 0.5225991102043129, + "epoch": 0.7215744580953126, "grad_norm": 0.0, - "learning_rate": 9.754057278003717e-06, - "loss": 0.8879, + "learning_rate": 3.797709515781758e-06, + "loss": 1.0553, "step": 18442 }, { - "epoch": 0.5226274476465754, + "epoch": 0.721613584787542, "grad_norm": 0.0, - "learning_rate": 9.753139767393342e-06, - "loss": 0.9783, + "learning_rate": 3.796715523192731e-06, + "loss": 1.0316, "step": 18443 }, { - "epoch": 0.5226557850888379, + "epoch": 0.7216527114797715, "grad_norm": 0.0, - "learning_rate": 9.752222258862364e-06, - "loss": 0.8522, + "learning_rate": 3.7957216302206833e-06, + "loss": 1.0647, "step": 18444 }, { - "epoch": 0.5226841225311003, + "epoch": 0.7216918381720009, "grad_norm": 0.0, - "learning_rate": 9.751304752418512e-06, - "loss": 0.8349, + "learning_rate": 3.7947278368815644e-06, + "loss": 1.0072, "step": 18445 }, { - "epoch": 0.5227124599733628, + "epoch": 0.7217309648642304, "grad_norm": 0.0, - "learning_rate": 9.75038724806951e-06, - "loss": 0.8217, + "learning_rate": 3.7937341431913486e-06, + "loss": 1.0516, "step": 18446 }, { - "epoch": 0.5227407974156253, + "epoch": 0.7217700915564598, "grad_norm": 0.0, - "learning_rate": 9.749469745823092e-06, - "loss": 0.7465, + "learning_rate": 3.7927405491659818e-06, + "loss": 0.8924, "step": 18447 }, { - "epoch": 0.5227691348578877, + "epoch": 0.7218092182486893, "grad_norm": 0.0, - "learning_rate": 9.74855224568698e-06, - "loss": 0.8414, + "learning_rate": 3.791747054821426e-06, + "loss": 0.9987, "step": 18448 }, { - "epoch": 0.5227974723001502, + "epoch": 0.7218483449409187, "grad_norm": 0.0, - "learning_rate": 9.747634747668906e-06, - "loss": 0.8835, + "learning_rate": 3.7907536601736306e-06, + "loss": 0.9779, "step": 18449 }, { - "epoch": 0.5228258097424127, + "epoch": 0.7218874716331481, "grad_norm": 0.0, - "learning_rate": 9.746717251776602e-06, - "loss": 0.9307, + "learning_rate": 3.7897603652385505e-06, + "loss": 0.9615, "step": 18450 }, { - "epoch": 0.5228541471846752, + "epoch": 0.7219265983253775, "grad_norm": 0.0, - "learning_rate": 9.74579975801779e-06, - "loss": 0.8292, + "learning_rate": 3.7887671700321383e-06, + "loss": 0.845, "step": 18451 }, { - "epoch": 0.5228824846269375, + "epoch": 0.721965725017607, "grad_norm": 0.0, - "learning_rate": 9.7448822664002e-06, - "loss": 0.9458, + "learning_rate": 3.787774074570344e-06, + "loss": 1.0815, "step": 18452 }, { - "epoch": 0.5229108220692, + "epoch": 0.7220048517098364, "grad_norm": 0.0, - "learning_rate": 9.743964776931562e-06, - "loss": 0.856, + "learning_rate": 3.7867810788691105e-06, + "loss": 1.0637, "step": 18453 }, { - "epoch": 0.5229391595114625, + "epoch": 0.7220439784020659, "grad_norm": 0.0, - "learning_rate": 9.743047289619604e-06, - "loss": 0.8695, + "learning_rate": 3.7857881829443887e-06, + "loss": 1.0721, "step": 18454 }, { - "epoch": 0.5229674969537249, + "epoch": 0.7220831050942953, "grad_norm": 0.0, - "learning_rate": 9.742129804472056e-06, - "loss": 0.9664, + "learning_rate": 3.78479538681212e-06, + "loss": 0.8058, "step": 18455 }, { - "epoch": 0.5229958343959874, + "epoch": 0.7221222317865248, "grad_norm": 0.0, - "learning_rate": 9.741212321496643e-06, - "loss": 0.8926, + "learning_rate": 3.7838026904882543e-06, + "loss": 0.8935, "step": 18456 }, { - "epoch": 0.5230241718382499, + "epoch": 0.7221613584787542, "grad_norm": 0.0, - "learning_rate": 9.740294840701094e-06, - "loss": 0.8427, + "learning_rate": 3.7828100939887235e-06, + "loss": 0.9547, "step": 18457 }, { - "epoch": 0.5230525092805124, + "epoch": 0.7222004851709837, "grad_norm": 0.0, - "learning_rate": 9.739377362093138e-06, - "loss": 0.882, + "learning_rate": 3.7818175973294722e-06, + "loss": 0.9296, "step": 18458 }, { - "epoch": 0.5230808467227748, + "epoch": 0.7222396118632131, "grad_norm": 0.0, - "learning_rate": 9.738459885680502e-06, - "loss": 0.7879, + "learning_rate": 3.7808252005264422e-06, + "loss": 0.9218, "step": 18459 }, { - "epoch": 0.5231091841650373, + "epoch": 0.7222787385554426, "grad_norm": 0.0, - "learning_rate": 9.73754241147092e-06, - "loss": 0.8672, + "learning_rate": 3.7798329035955627e-06, + "loss": 1.0516, "step": 18460 }, { - "epoch": 0.5231375216072998, + "epoch": 0.7223178652476719, "grad_norm": 0.0, - "learning_rate": 9.736624939472112e-06, - "loss": 1.0504, + "learning_rate": 3.778840706552773e-06, + "loss": 1.0002, "step": 18461 }, { - "epoch": 0.5231658590495621, + "epoch": 0.7223569919399014, "grad_norm": 0.0, - "learning_rate": 9.73570746969181e-06, - "loss": 0.9199, + "learning_rate": 3.7778486094140086e-06, + "loss": 0.9609, "step": 18462 }, { - "epoch": 0.5231941964918246, + "epoch": 0.7223961186321308, "grad_norm": 0.0, - "learning_rate": 9.734790002137743e-06, - "loss": 0.7955, + "learning_rate": 3.7768566121951966e-06, + "loss": 1.0159, "step": 18463 }, { - "epoch": 0.5232225339340871, + "epoch": 0.7224352453243603, "grad_norm": 0.0, - "learning_rate": 9.733872536817637e-06, - "loss": 1.0709, + "learning_rate": 3.7758647149122683e-06, + "loss": 0.9827, "step": 18464 }, { - "epoch": 0.5232508713763496, + "epoch": 0.7224743720165897, "grad_norm": 0.0, - "learning_rate": 9.732955073739222e-06, - "loss": 0.9605, + "learning_rate": 3.7748729175811573e-06, + "loss": 0.9669, "step": 18465 }, { - "epoch": 0.523279208818612, + "epoch": 0.7225134987088192, "grad_norm": 0.0, - "learning_rate": 9.73203761291023e-06, - "loss": 0.8827, + "learning_rate": 3.7738812202177832e-06, + "loss": 0.9246, "step": 18466 }, { - "epoch": 0.5233075462608745, + "epoch": 0.7225526254010486, "grad_norm": 0.0, - "learning_rate": 9.73112015433838e-06, - "loss": 0.8711, + "learning_rate": 3.772889622838074e-06, + "loss": 1.0892, "step": 18467 }, { - "epoch": 0.523335883703137, + "epoch": 0.7225917520932781, "grad_norm": 0.0, - "learning_rate": 9.73020269803141e-06, - "loss": 0.8239, + "learning_rate": 3.7718981254579557e-06, + "loss": 0.9399, "step": 18468 }, { - "epoch": 0.5233642211453994, + "epoch": 0.7226308787855075, "grad_norm": 0.0, - "learning_rate": 9.729285243997037e-06, - "loss": 0.8457, + "learning_rate": 3.770906728093352e-06, + "loss": 1.0352, "step": 18469 }, { - "epoch": 0.5233925585876619, + "epoch": 0.722670005477737, "grad_norm": 0.0, - "learning_rate": 9.728367792242993e-06, - "loss": 0.9075, + "learning_rate": 3.769915430760178e-06, + "loss": 0.8765, "step": 18470 }, { - "epoch": 0.5234208960299244, + "epoch": 0.7227091321699664, "grad_norm": 0.0, - "learning_rate": 9.727450342777015e-06, - "loss": 0.816, + "learning_rate": 3.768924233474358e-06, + "loss": 1.033, "step": 18471 }, { - "epoch": 0.5234492334721867, + "epoch": 0.7227482588621957, "grad_norm": 0.0, - "learning_rate": 9.726532895606819e-06, - "loss": 0.9017, + "learning_rate": 3.767933136251801e-06, + "loss": 0.9126, "step": 18472 }, { - "epoch": 0.5234775709144492, + "epoch": 0.7227873855544252, "grad_norm": 0.0, - "learning_rate": 9.725615450740138e-06, - "loss": 0.797, + "learning_rate": 3.766942139108435e-06, + "loss": 0.9108, "step": 18473 }, { - "epoch": 0.5235059083567117, + "epoch": 0.7228265122466546, "grad_norm": 0.0, - "learning_rate": 9.724698008184705e-06, - "loss": 0.878, + "learning_rate": 3.765951242060164e-06, + "loss": 0.9582, "step": 18474 }, { - "epoch": 0.5235342457989742, + "epoch": 0.7228656389388841, "grad_norm": 0.0, - "learning_rate": 9.723780567948236e-06, - "loss": 0.9417, + "learning_rate": 3.7649604451229082e-06, + "loss": 0.9972, "step": 18475 }, { - "epoch": 0.5235625832412366, + "epoch": 0.7229047656311135, "grad_norm": 0.0, - "learning_rate": 9.722863130038472e-06, - "loss": 0.9236, + "learning_rate": 3.763969748312568e-06, + "loss": 0.9075, "step": 18476 }, { - "epoch": 0.5235909206834991, + "epoch": 0.722943892323343, "grad_norm": 0.0, - "learning_rate": 9.721945694463129e-06, - "loss": 0.7834, + "learning_rate": 3.7629791516450652e-06, + "loss": 0.9544, "step": 18477 }, { - "epoch": 0.5236192581257616, + "epoch": 0.7229830190155724, "grad_norm": 0.0, - "learning_rate": 9.721028261229944e-06, - "loss": 0.8209, + "learning_rate": 3.7619886551362992e-06, + "loss": 0.9131, "step": 18478 }, { - "epoch": 0.523647595568024, + "epoch": 0.7230221457078019, "grad_norm": 0.0, - "learning_rate": 9.720110830346643e-06, - "loss": 0.9326, + "learning_rate": 3.7609982588021833e-06, + "loss": 0.9773, "step": 18479 }, { - "epoch": 0.5236759330102865, + "epoch": 0.7230612724000313, "grad_norm": 0.0, - "learning_rate": 9.71919340182095e-06, - "loss": 0.9695, + "learning_rate": 3.760007962658613e-06, + "loss": 1.0682, "step": 18480 }, { - "epoch": 0.523704270452549, + "epoch": 0.7231003990922608, "grad_norm": 0.0, - "learning_rate": 9.718275975660594e-06, - "loss": 0.8856, + "learning_rate": 3.7590177667214957e-06, + "loss": 1.0021, "step": 18481 }, { - "epoch": 0.5237326078948115, + "epoch": 0.7231395257844901, "grad_norm": 0.0, - "learning_rate": 9.71735855187331e-06, - "loss": 0.8824, + "learning_rate": 3.758027671006732e-06, + "loss": 0.9065, "step": 18482 }, { - "epoch": 0.5237609453370738, + "epoch": 0.7231786524767196, "grad_norm": 0.0, - "learning_rate": 9.716441130466814e-06, - "loss": 0.8435, + "learning_rate": 3.7570376755302263e-06, + "loss": 1.0294, "step": 18483 }, { - "epoch": 0.5237892827793363, + "epoch": 0.723217779168949, "grad_norm": 0.0, - "learning_rate": 9.715523711448842e-06, - "loss": 0.9704, + "learning_rate": 3.7560477803078687e-06, + "loss": 0.9899, "step": 18484 }, { - "epoch": 0.5238176202215988, + "epoch": 0.7232569058611785, "grad_norm": 0.0, - "learning_rate": 9.714606294827119e-06, - "loss": 0.8329, + "learning_rate": 3.75505798535556e-06, + "loss": 0.9153, "step": 18485 }, { - "epoch": 0.5238459576638612, + "epoch": 0.7232960325534079, "grad_norm": 0.0, - "learning_rate": 9.71368888060937e-06, - "loss": 0.8616, + "learning_rate": 3.7540682906891957e-06, + "loss": 1.1453, "step": 18486 }, { - "epoch": 0.5238742951061237, + "epoch": 0.7233351592456374, "grad_norm": 0.0, - "learning_rate": 9.712771468803333e-06, - "loss": 0.8823, + "learning_rate": 3.75307869632467e-06, + "loss": 0.9128, "step": 18487 }, { - "epoch": 0.5239026325483862, + "epoch": 0.7233742859378668, "grad_norm": 0.0, - "learning_rate": 9.711854059416722e-06, - "loss": 0.9285, + "learning_rate": 3.75208920227787e-06, + "loss": 0.9955, "step": 18488 }, { - "epoch": 0.5239309699906487, + "epoch": 0.7234134126300963, "grad_norm": 0.0, - "learning_rate": 9.710936652457276e-06, - "loss": 0.9011, + "learning_rate": 3.751099808564692e-06, + "loss": 1.0104, "step": 18489 }, { - "epoch": 0.5239593074329111, + "epoch": 0.7234525393223257, "grad_norm": 0.0, - "learning_rate": 9.710019247932714e-06, - "loss": 0.8225, + "learning_rate": 3.7501105152010132e-06, + "loss": 1.2011, "step": 18490 }, { - "epoch": 0.5239876448751736, + "epoch": 0.7234916660145552, "grad_norm": 0.0, - "learning_rate": 9.709101845850768e-06, - "loss": 0.9403, + "learning_rate": 3.7491213222027347e-06, + "loss": 0.9826, "step": 18491 }, { - "epoch": 0.5240159823174361, + "epoch": 0.7235307927067846, "grad_norm": 0.0, - "learning_rate": 9.708184446219168e-06, - "loss": 0.9052, + "learning_rate": 3.7481322295857327e-06, + "loss": 0.9057, "step": 18492 }, { - "epoch": 0.5240443197596985, + "epoch": 0.723569919399014, "grad_norm": 0.0, - "learning_rate": 9.707267049045636e-06, - "loss": 0.7624, + "learning_rate": 3.7471432373658955e-06, + "loss": 0.9673, "step": 18493 }, { - "epoch": 0.524072657201961, + "epoch": 0.7236090460912434, "grad_norm": 0.0, - "learning_rate": 9.706349654337903e-06, - "loss": 0.8781, + "learning_rate": 3.7461543455590952e-06, + "loss": 0.9674, "step": 18494 }, { - "epoch": 0.5241009946442234, + "epoch": 0.7236481727834729, "grad_norm": 0.0, - "learning_rate": 9.7054322621037e-06, - "loss": 0.8156, + "learning_rate": 3.745165554181228e-06, + "loss": 1.1053, "step": 18495 }, { - "epoch": 0.5241293320864858, + "epoch": 0.7236872994757023, "grad_norm": 0.0, - "learning_rate": 9.704514872350745e-06, - "loss": 0.8683, + "learning_rate": 3.74417686324816e-06, + "loss": 0.8886, "step": 18496 }, { - "epoch": 0.5241576695287483, + "epoch": 0.7237264261679318, "grad_norm": 0.0, - "learning_rate": 9.703597485086775e-06, - "loss": 0.8691, + "learning_rate": 3.743188272775776e-06, + "loss": 0.9847, "step": 18497 }, { - "epoch": 0.5241860069710108, + "epoch": 0.7237655528601612, "grad_norm": 0.0, - "learning_rate": 9.702680100319509e-06, - "loss": 0.9482, + "learning_rate": 3.742199782779945e-06, + "loss": 0.9484, "step": 18498 }, { - "epoch": 0.5242143444132733, + "epoch": 0.7238046795523907, "grad_norm": 0.0, - "learning_rate": 9.70176271805668e-06, - "loss": 0.9097, + "learning_rate": 3.7412113932765436e-06, + "loss": 0.7785, "step": 18499 }, { - "epoch": 0.5242426818555357, + "epoch": 0.7238438062446201, "grad_norm": 0.0, - "learning_rate": 9.700845338306018e-06, - "loss": 0.9216, + "learning_rate": 3.740223104281445e-06, + "loss": 0.9158, "step": 18500 }, { - "epoch": 0.5242710192977982, + "epoch": 0.7238829329368495, "grad_norm": 0.0, - "learning_rate": 9.699927961075245e-06, - "loss": 0.9424, + "learning_rate": 3.7392349158105223e-06, + "loss": 0.9964, "step": 18501 }, { - "epoch": 0.5242993567400607, + "epoch": 0.723922059629079, "grad_norm": 0.0, - "learning_rate": 9.699010586372087e-06, - "loss": 0.7969, + "learning_rate": 3.7382468278796393e-06, + "loss": 0.993, "step": 18502 }, { - "epoch": 0.5243276941823231, + "epoch": 0.7239611863213083, "grad_norm": 0.0, - "learning_rate": 9.698093214204279e-06, - "loss": 0.8524, + "learning_rate": 3.737258840504665e-06, + "loss": 0.9277, "step": 18503 }, { - "epoch": 0.5243560316245856, + "epoch": 0.7240003130135378, "grad_norm": 0.0, - "learning_rate": 9.69717584457954e-06, - "loss": 0.81, + "learning_rate": 3.7362709537014696e-06, + "loss": 0.9345, "step": 18504 }, { - "epoch": 0.524384369066848, + "epoch": 0.7240394397057672, "grad_norm": 0.0, - "learning_rate": 9.696258477505606e-06, - "loss": 0.9054, + "learning_rate": 3.7352831674859103e-06, + "loss": 0.8997, "step": 18505 }, { - "epoch": 0.5244127065091105, + "epoch": 0.7240785663979967, "grad_norm": 0.0, - "learning_rate": 9.695341112990196e-06, - "loss": 0.8307, + "learning_rate": 3.734295481873853e-06, + "loss": 1.1153, "step": 18506 }, { - "epoch": 0.5244410439513729, + "epoch": 0.7241176930902261, "grad_norm": 0.0, - "learning_rate": 9.69442375104104e-06, - "loss": 0.9188, + "learning_rate": 3.733307896881162e-06, + "loss": 0.9953, "step": 18507 }, { - "epoch": 0.5244693813936354, + "epoch": 0.7241568197824556, "grad_norm": 0.0, - "learning_rate": 9.693506391665873e-06, - "loss": 0.8836, + "learning_rate": 3.732320412523691e-06, + "loss": 1.0735, "step": 18508 }, { - "epoch": 0.5244977188358979, + "epoch": 0.724195946474685, "grad_norm": 0.0, - "learning_rate": 9.692589034872408e-06, - "loss": 0.907, + "learning_rate": 3.7313330288173e-06, + "loss": 0.8842, "step": 18509 }, { - "epoch": 0.5245260562781603, + "epoch": 0.7242350731669145, "grad_norm": 0.0, - "learning_rate": 9.691671680668381e-06, - "loss": 0.8822, + "learning_rate": 3.7303457457778493e-06, + "loss": 0.9628, "step": 18510 }, { - "epoch": 0.5245543937204228, + "epoch": 0.7242741998591439, "grad_norm": 0.0, - "learning_rate": 9.69075432906152e-06, - "loss": 0.8203, + "learning_rate": 3.729358563421186e-06, + "loss": 0.9781, "step": 18511 }, { - "epoch": 0.5245827311626853, + "epoch": 0.7243133265513734, "grad_norm": 0.0, - "learning_rate": 9.68983698005955e-06, - "loss": 0.9308, + "learning_rate": 3.7283714817631665e-06, + "loss": 0.9541, "step": 18512 }, { - "epoch": 0.5246110686049478, + "epoch": 0.7243524532436028, "grad_norm": 0.0, - "learning_rate": 9.6889196336702e-06, - "loss": 0.9039, + "learning_rate": 3.727384500819642e-06, + "loss": 0.8925, "step": 18513 }, { - "epoch": 0.5246394060472102, + "epoch": 0.7243915799358323, "grad_norm": 0.0, - "learning_rate": 9.688002289901192e-06, - "loss": 0.892, + "learning_rate": 3.7263976206064657e-06, + "loss": 0.8785, "step": 18514 }, { - "epoch": 0.5246677434894726, + "epoch": 0.7244307066280616, "grad_norm": 0.0, - "learning_rate": 9.687084948760256e-06, - "loss": 0.9963, + "learning_rate": 3.7254108411394794e-06, + "loss": 0.9082, "step": 18515 }, { - "epoch": 0.5246960809317351, + "epoch": 0.7244698333202911, "grad_norm": 0.0, - "learning_rate": 9.686167610255123e-06, - "loss": 0.8452, + "learning_rate": 3.724424162434532e-06, + "loss": 0.9497, "step": 18516 }, { - "epoch": 0.5247244183739975, + "epoch": 0.7245089600125205, "grad_norm": 0.0, - "learning_rate": 9.685250274393515e-06, - "loss": 0.9101, + "learning_rate": 3.7234375845074686e-06, + "loss": 0.8982, "step": 18517 }, { - "epoch": 0.52475275581626, + "epoch": 0.72454808670475, "grad_norm": 0.0, - "learning_rate": 9.684332941183164e-06, - "loss": 0.8788, + "learning_rate": 3.7224511073741376e-06, + "loss": 1.1094, "step": 18518 }, { - "epoch": 0.5247810932585225, + "epoch": 0.7245872133969794, "grad_norm": 0.0, - "learning_rate": 9.683415610631788e-06, - "loss": 0.8767, + "learning_rate": 3.7214647310503704e-06, + "loss": 0.8817, "step": 18519 }, { - "epoch": 0.5248094307007849, + "epoch": 0.7246263400892089, "grad_norm": 0.0, - "learning_rate": 9.682498282747122e-06, - "loss": 0.8575, + "learning_rate": 3.720478455552017e-06, + "loss": 0.9825, "step": 18520 }, { - "epoch": 0.5248377681430474, + "epoch": 0.7246654667814383, "grad_norm": 0.0, - "learning_rate": 9.681580957536895e-06, - "loss": 0.865, + "learning_rate": 3.719492280894903e-06, + "loss": 0.9795, "step": 18521 }, { - "epoch": 0.5248661055853099, + "epoch": 0.7247045934736678, "grad_norm": 0.0, - "learning_rate": 9.680663635008824e-06, - "loss": 0.9682, + "learning_rate": 3.7185062070948806e-06, + "loss": 1.1268, "step": 18522 }, { - "epoch": 0.5248944430275724, + "epoch": 0.7247437201658972, "grad_norm": 0.0, - "learning_rate": 9.679746315170643e-06, - "loss": 0.8391, + "learning_rate": 3.717520234167773e-06, + "loss": 0.91, "step": 18523 }, { - "epoch": 0.5249227804698348, + "epoch": 0.7247828468581267, "grad_norm": 0.0, - "learning_rate": 9.678828998030081e-06, - "loss": 0.8602, + "learning_rate": 3.7165343621294227e-06, + "loss": 1.0094, "step": 18524 }, { - "epoch": 0.5249511179120973, + "epoch": 0.724821973550356, "grad_norm": 0.0, - "learning_rate": 9.677911683594858e-06, - "loss": 0.8969, + "learning_rate": 3.715548590995649e-06, + "loss": 0.9585, "step": 18525 }, { - "epoch": 0.5249794553543597, + "epoch": 0.7248611002425855, "grad_norm": 0.0, - "learning_rate": 9.676994371872708e-06, - "loss": 0.8588, + "learning_rate": 3.7145629207822973e-06, + "loss": 1.0496, "step": 18526 }, { - "epoch": 0.5250077927966221, + "epoch": 0.7249002269348149, "grad_norm": 0.0, - "learning_rate": 9.676077062871352e-06, - "loss": 0.8522, + "learning_rate": 3.7135773515051866e-06, + "loss": 0.9199, "step": 18527 }, { - "epoch": 0.5250361302388846, + "epoch": 0.7249393536270444, "grad_norm": 0.0, - "learning_rate": 9.675159756598519e-06, - "loss": 0.7846, + "learning_rate": 3.712591883180149e-06, + "loss": 0.9583, "step": 18528 }, { - "epoch": 0.5250644676811471, + "epoch": 0.7249784803192738, "grad_norm": 0.0, - "learning_rate": 9.674242453061938e-06, - "loss": 0.8871, + "learning_rate": 3.711606515823003e-06, + "loss": 1.0324, "step": 18529 }, { - "epoch": 0.5250928051234096, + "epoch": 0.7250176070115032, "grad_norm": 0.0, - "learning_rate": 9.67332515226933e-06, - "loss": 0.8539, + "learning_rate": 3.7106212494495776e-06, + "loss": 0.94, "step": 18530 }, { - "epoch": 0.525121142565672, + "epoch": 0.7250567337037327, "grad_norm": 0.0, - "learning_rate": 9.672407854228428e-06, - "loss": 0.9196, + "learning_rate": 3.709636084075693e-06, + "loss": 1.0884, "step": 18531 }, { - "epoch": 0.5251494800079345, + "epoch": 0.7250958603959621, "grad_norm": 0.0, - "learning_rate": 9.671490558946957e-06, - "loss": 0.9252, + "learning_rate": 3.7086510197171744e-06, + "loss": 0.9059, "step": 18532 }, { - "epoch": 0.525177817450197, + "epoch": 0.7251349870881916, "grad_norm": 0.0, - "learning_rate": 9.670573266432643e-06, - "loss": 0.9104, + "learning_rate": 3.7076660563898336e-06, + "loss": 1.0137, "step": 18533 }, { - "epoch": 0.5252061548924594, + "epoch": 0.725174113780421, "grad_norm": 0.0, - "learning_rate": 9.669655976693214e-06, - "loss": 0.8929, + "learning_rate": 3.7066811941094915e-06, + "loss": 1.0008, "step": 18534 }, { - "epoch": 0.5252344923347219, + "epoch": 0.7252132404726505, "grad_norm": 0.0, - "learning_rate": 9.668738689736393e-06, - "loss": 0.9232, + "learning_rate": 3.705696432891963e-06, + "loss": 1.062, "step": 18535 }, { - "epoch": 0.5252628297769844, + "epoch": 0.7252523671648798, "grad_norm": 0.0, - "learning_rate": 9.667821405569907e-06, - "loss": 0.8862, + "learning_rate": 3.704711772753066e-06, + "loss": 1.0058, "step": 18536 }, { - "epoch": 0.5252911672192468, + "epoch": 0.7252914938571093, "grad_norm": 0.0, - "learning_rate": 9.666904124201488e-06, - "loss": 0.9656, + "learning_rate": 3.7037272137086067e-06, + "loss": 0.9149, "step": 18537 }, { - "epoch": 0.5253195046615092, + "epoch": 0.7253306205493387, "grad_norm": 0.0, - "learning_rate": 9.665986845638857e-06, - "loss": 0.9294, + "learning_rate": 3.702742755774401e-06, + "loss": 0.9362, "step": 18538 }, { - "epoch": 0.5253478421037717, + "epoch": 0.7253697472415682, "grad_norm": 0.0, - "learning_rate": 9.665069569889742e-06, - "loss": 0.8985, + "learning_rate": 3.70175839896625e-06, + "loss": 1.0685, "step": 18539 }, { - "epoch": 0.5253761795460342, + "epoch": 0.7254088739337976, "grad_norm": 0.0, - "learning_rate": 9.664152296961876e-06, - "loss": 0.8814, + "learning_rate": 3.7007741432999734e-06, + "loss": 0.9842, "step": 18540 }, { - "epoch": 0.5254045169882966, + "epoch": 0.7254480006260271, "grad_norm": 0.0, - "learning_rate": 9.663235026862976e-06, - "loss": 0.7881, + "learning_rate": 3.699789988791367e-06, + "loss": 0.8668, "step": 18541 }, { - "epoch": 0.5254328544305591, + "epoch": 0.7254871273182565, "grad_norm": 0.0, - "learning_rate": 9.662317759600774e-06, - "loss": 0.8811, + "learning_rate": 3.698805935456242e-06, + "loss": 1.0886, "step": 18542 }, { - "epoch": 0.5254611918728216, + "epoch": 0.725526254010486, "grad_norm": 0.0, - "learning_rate": 9.661400495182991e-06, - "loss": 0.894, + "learning_rate": 3.6978219833103946e-06, + "loss": 1.0965, "step": 18543 }, { - "epoch": 0.525489529315084, + "epoch": 0.7255653807027154, "grad_norm": 0.0, - "learning_rate": 9.660483233617359e-06, - "loss": 0.8412, + "learning_rate": 3.69683813236963e-06, + "loss": 0.9256, "step": 18544 }, { - "epoch": 0.5255178667573465, + "epoch": 0.7256045073949449, "grad_norm": 0.0, - "learning_rate": 9.659565974911608e-06, - "loss": 0.9306, + "learning_rate": 3.6958543826497462e-06, + "loss": 0.9311, "step": 18545 }, { - "epoch": 0.525546204199609, + "epoch": 0.7256436340871742, "grad_norm": 0.0, - "learning_rate": 9.658648719073454e-06, - "loss": 0.8509, + "learning_rate": 3.694870734166545e-06, + "loss": 0.8863, "step": 18546 }, { - "epoch": 0.5255745416418715, + "epoch": 0.7256827607794037, "grad_norm": 0.0, - "learning_rate": 9.657731466110632e-06, - "loss": 0.9077, + "learning_rate": 3.693887186935814e-06, + "loss": 0.9347, "step": 18547 }, { - "epoch": 0.5256028790841338, + "epoch": 0.7257218874716331, "grad_norm": 0.0, - "learning_rate": 9.656814216030861e-06, - "loss": 0.7979, + "learning_rate": 3.6929037409733546e-06, + "loss": 0.9744, "step": 18548 }, { - "epoch": 0.5256312165263963, + "epoch": 0.7257610141638626, "grad_norm": 0.0, - "learning_rate": 9.655896968841873e-06, - "loss": 0.8521, + "learning_rate": 3.691920396294957e-06, + "loss": 1.0496, "step": 18549 }, { - "epoch": 0.5256595539686588, + "epoch": 0.725800140856092, "grad_norm": 0.0, - "learning_rate": 9.654979724551393e-06, - "loss": 0.9602, + "learning_rate": 3.6909371529164174e-06, + "loss": 0.9503, "step": 18550 }, { - "epoch": 0.5256878914109212, + "epoch": 0.7258392675483215, "grad_norm": 0.0, - "learning_rate": 9.654062483167145e-06, - "loss": 0.9834, + "learning_rate": 3.6899540108535183e-06, + "loss": 1.0172, "step": 18551 }, { - "epoch": 0.5257162288531837, + "epoch": 0.7258783942405509, "grad_norm": 0.0, - "learning_rate": 9.653145244696857e-06, - "loss": 0.8793, + "learning_rate": 3.68897097012205e-06, + "loss": 1.1028, "step": 18552 }, { - "epoch": 0.5257445662954462, + "epoch": 0.7259175209327804, "grad_norm": 0.0, - "learning_rate": 9.65222800914826e-06, - "loss": 0.8373, + "learning_rate": 3.6879880307378035e-06, + "loss": 1.106, "step": 18553 }, { - "epoch": 0.5257729037377087, + "epoch": 0.7259566476250098, "grad_norm": 0.0, - "learning_rate": 9.651310776529072e-06, - "loss": 0.7558, + "learning_rate": 3.6870051927165562e-06, + "loss": 0.9484, "step": 18554 }, { - "epoch": 0.5258012411799711, + "epoch": 0.7259957743172393, "grad_norm": 0.0, - "learning_rate": 9.650393546847023e-06, - "loss": 0.9371, + "learning_rate": 3.6860224560740956e-06, + "loss": 1.0945, "step": 18555 }, { - "epoch": 0.5258295786222336, + "epoch": 0.7260349010094687, "grad_norm": 0.0, - "learning_rate": 9.649476320109838e-06, - "loss": 0.8538, + "learning_rate": 3.685039820826205e-06, + "loss": 1.0674, "step": 18556 }, { - "epoch": 0.5258579160644961, + "epoch": 0.726074027701698, "grad_norm": 0.0, - "learning_rate": 9.648559096325245e-06, - "loss": 0.8924, + "learning_rate": 3.684057286988658e-06, + "loss": 1.0325, "step": 18557 }, { - "epoch": 0.5258862535067584, + "epoch": 0.7261131543939275, "grad_norm": 0.0, - "learning_rate": 9.647641875500972e-06, - "loss": 0.9312, + "learning_rate": 3.6830748545772377e-06, + "loss": 0.9135, "step": 18558 }, { - "epoch": 0.5259145909490209, + "epoch": 0.7261522810861569, "grad_norm": 0.0, - "learning_rate": 9.64672465764474e-06, - "loss": 0.924, + "learning_rate": 3.6820925236077232e-06, + "loss": 0.9507, "step": 18559 }, { - "epoch": 0.5259429283912834, + "epoch": 0.7261914077783864, "grad_norm": 0.0, - "learning_rate": 9.645807442764277e-06, - "loss": 0.8282, + "learning_rate": 3.681110294095882e-06, + "loss": 1.0515, "step": 18560 }, { - "epoch": 0.5259712658335458, + "epoch": 0.7262305344706158, "grad_norm": 0.0, - "learning_rate": 9.644890230867313e-06, - "loss": 0.8418, + "learning_rate": 3.6801281660574915e-06, + "loss": 0.973, "step": 18561 }, { - "epoch": 0.5259996032758083, + "epoch": 0.7262696611628453, "grad_norm": 0.0, - "learning_rate": 9.643973021961566e-06, - "loss": 0.8681, + "learning_rate": 3.6791461395083238e-06, + "loss": 0.9799, "step": 18562 }, { - "epoch": 0.5260279407180708, + "epoch": 0.7263087878550747, "grad_norm": 0.0, - "learning_rate": 9.643055816054768e-06, - "loss": 0.8835, + "learning_rate": 3.678164214464152e-06, + "loss": 0.9163, "step": 18563 }, { - "epoch": 0.5260562781603333, + "epoch": 0.7263479145473042, "grad_norm": 0.0, - "learning_rate": 9.642138613154643e-06, - "loss": 0.9112, + "learning_rate": 3.6771823909407377e-06, + "loss": 1.0049, "step": 18564 }, { - "epoch": 0.5260846156025957, + "epoch": 0.7263870412395336, "grad_norm": 0.0, - "learning_rate": 9.641221413268916e-06, - "loss": 0.8484, + "learning_rate": 3.6762006689538543e-06, + "loss": 1.019, "step": 18565 }, { - "epoch": 0.5261129530448582, + "epoch": 0.7264261679317631, "grad_norm": 0.0, - "learning_rate": 9.640304216405318e-06, - "loss": 0.8394, + "learning_rate": 3.6752190485192575e-06, + "loss": 1.0329, "step": 18566 }, { - "epoch": 0.5261412904871207, + "epoch": 0.7264652946239925, "grad_norm": 0.0, - "learning_rate": 9.639387022571569e-06, - "loss": 0.9259, + "learning_rate": 3.6742375296527244e-06, + "loss": 0.92, "step": 18567 }, { - "epoch": 0.526169627929383, + "epoch": 0.726504421316222, "grad_norm": 0.0, - "learning_rate": 9.638469831775395e-06, - "loss": 0.8472, + "learning_rate": 3.673256112370006e-06, + "loss": 1.0453, "step": 18568 }, { - "epoch": 0.5261979653716455, + "epoch": 0.7265435480084513, "grad_norm": 0.0, - "learning_rate": 9.637552644024526e-06, - "loss": 0.8904, + "learning_rate": 3.67227479668687e-06, + "loss": 0.8965, "step": 18569 }, { - "epoch": 0.526226302813908, + "epoch": 0.7265826747006808, "grad_norm": 0.0, - "learning_rate": 9.636635459326685e-06, - "loss": 0.7771, + "learning_rate": 3.6712935826190656e-06, + "loss": 1.0354, "step": 18570 }, { - "epoch": 0.5262546402561705, + "epoch": 0.7266218013929102, "grad_norm": 0.0, - "learning_rate": 9.635718277689602e-06, - "loss": 0.8835, + "learning_rate": 3.6703124701823623e-06, + "loss": 0.9711, "step": 18571 }, { - "epoch": 0.5262829776984329, + "epoch": 0.7266609280851397, "grad_norm": 0.0, - "learning_rate": 9.634801099120996e-06, - "loss": 0.9076, + "learning_rate": 3.6693314593925054e-06, + "loss": 0.8955, "step": 18572 }, { - "epoch": 0.5263113151406954, + "epoch": 0.7267000547773691, "grad_norm": 0.0, - "learning_rate": 9.633883923628595e-06, - "loss": 0.9077, + "learning_rate": 3.6683505502652563e-06, + "loss": 1.0054, "step": 18573 }, { - "epoch": 0.5263396525829579, + "epoch": 0.7267391814695986, "grad_norm": 0.0, - "learning_rate": 9.632966751220127e-06, - "loss": 0.9386, + "learning_rate": 3.6673697428163568e-06, + "loss": 0.9917, "step": 18574 }, { - "epoch": 0.5263679900252203, + "epoch": 0.726778308161828, "grad_norm": 0.0, - "learning_rate": 9.632049581903315e-06, - "loss": 0.8947, + "learning_rate": 3.6663890370615705e-06, + "loss": 1.0978, "step": 18575 }, { - "epoch": 0.5263963274674828, + "epoch": 0.7268174348540575, "grad_norm": 0.0, - "learning_rate": 9.63113241568589e-06, - "loss": 0.8261, + "learning_rate": 3.6654084330166362e-06, + "loss": 1.0928, "step": 18576 }, { - "epoch": 0.5264246649097453, + "epoch": 0.7268565615462869, "grad_norm": 0.0, - "learning_rate": 9.63021525257557e-06, - "loss": 0.8234, + "learning_rate": 3.6644279306973083e-06, + "loss": 1.0273, "step": 18577 }, { - "epoch": 0.5264530023520078, + "epoch": 0.7268956882385164, "grad_norm": 0.0, - "learning_rate": 9.629298092580083e-06, - "loss": 0.8191, + "learning_rate": 3.6634475301193264e-06, + "loss": 1.075, "step": 18578 }, { - "epoch": 0.5264813397942701, + "epoch": 0.7269348149307457, "grad_norm": 0.0, - "learning_rate": 9.62838093570716e-06, - "loss": 0.8199, + "learning_rate": 3.6624672312984367e-06, + "loss": 1.0994, "step": 18579 }, { - "epoch": 0.5265096772365326, + "epoch": 0.7269739416229752, "grad_norm": 0.0, - "learning_rate": 9.627463781964521e-06, - "loss": 0.9205, + "learning_rate": 3.6614870342503806e-06, + "loss": 0.9298, "step": 18580 }, { - "epoch": 0.5265380146787951, + "epoch": 0.7270130683152046, "grad_norm": 0.0, - "learning_rate": 9.62654663135989e-06, - "loss": 0.8981, + "learning_rate": 3.6605069389909044e-06, + "loss": 1.1204, "step": 18581 }, { - "epoch": 0.5265663521210575, + "epoch": 0.7270521950074341, "grad_norm": 0.0, - "learning_rate": 9.625629483901e-06, - "loss": 0.8886, + "learning_rate": 3.6595269455357395e-06, + "loss": 0.9583, "step": 18582 }, { - "epoch": 0.52659468956332, + "epoch": 0.7270913216996635, "grad_norm": 0.0, - "learning_rate": 9.624712339595567e-06, - "loss": 0.8848, + "learning_rate": 3.6585470539006274e-06, + "loss": 1.0483, "step": 18583 }, { - "epoch": 0.5266230270055825, + "epoch": 0.727130448391893, "grad_norm": 0.0, - "learning_rate": 9.623795198451326e-06, - "loss": 1.0312, + "learning_rate": 3.6575672641013028e-06, + "loss": 0.9148, "step": 18584 }, { - "epoch": 0.5266513644478449, + "epoch": 0.7271695750841224, "grad_norm": 0.0, - "learning_rate": 9.622878060475995e-06, - "loss": 0.9328, + "learning_rate": 3.6565875761535032e-06, + "loss": 0.9312, "step": 18585 }, { - "epoch": 0.5266797018901074, + "epoch": 0.7272087017763518, "grad_norm": 0.0, - "learning_rate": 9.621960925677301e-06, - "loss": 0.8988, + "learning_rate": 3.6556079900729555e-06, + "loss": 0.9705, "step": 18586 }, { - "epoch": 0.5267080393323699, + "epoch": 0.7272478284685813, "grad_norm": 0.0, - "learning_rate": 9.621043794062974e-06, - "loss": 0.9519, + "learning_rate": 3.654628505875397e-06, + "loss": 1.0225, "step": 18587 }, { - "epoch": 0.5267363767746324, + "epoch": 0.7272869551608107, "grad_norm": 0.0, - "learning_rate": 9.620126665640732e-06, - "loss": 0.7772, + "learning_rate": 3.653649123576547e-06, + "loss": 0.8918, "step": 18588 }, { - "epoch": 0.5267647142168947, + "epoch": 0.7273260818530402, "grad_norm": 0.0, - "learning_rate": 9.619209540418307e-06, - "loss": 0.9358, + "learning_rate": 3.6526698431921458e-06, + "loss": 1.0784, "step": 18589 }, { - "epoch": 0.5267930516591572, + "epoch": 0.7273652085452695, "grad_norm": 0.0, - "learning_rate": 9.618292418403422e-06, - "loss": 0.9441, + "learning_rate": 3.6516906647379103e-06, + "loss": 0.8969, "step": 18590 }, { - "epoch": 0.5268213891014197, + "epoch": 0.727404335237499, "grad_norm": 0.0, - "learning_rate": 9.617375299603799e-06, - "loss": 0.9311, + "learning_rate": 3.650711588229572e-06, + "loss": 1.0334, "step": 18591 }, { - "epoch": 0.5268497265436821, + "epoch": 0.7274434619297284, "grad_norm": 0.0, - "learning_rate": 9.616458184027171e-06, - "loss": 0.8064, + "learning_rate": 3.649732613682845e-06, + "loss": 1.0466, "step": 18592 }, { - "epoch": 0.5268780639859446, + "epoch": 0.7274825886219579, "grad_norm": 0.0, - "learning_rate": 9.615541071681253e-06, - "loss": 1.013, + "learning_rate": 3.6487537411134545e-06, + "loss": 0.8871, "step": 18593 }, { - "epoch": 0.5269064014282071, + "epoch": 0.7275217153141873, "grad_norm": 0.0, - "learning_rate": 9.614623962573776e-06, - "loss": 0.9071, + "learning_rate": 3.6477749705371215e-06, + "loss": 0.9821, "step": 18594 }, { - "epoch": 0.5269347388704696, + "epoch": 0.7275608420064168, "grad_norm": 0.0, - "learning_rate": 9.613706856712466e-06, - "loss": 1.0063, + "learning_rate": 3.646796301969565e-06, + "loss": 1.0359, "step": 18595 }, { - "epoch": 0.526963076312732, + "epoch": 0.7275999686986462, "grad_norm": 0.0, - "learning_rate": 9.612789754105044e-06, - "loss": 0.8814, + "learning_rate": 3.6458177354264967e-06, + "loss": 0.9586, "step": 18596 }, { - "epoch": 0.5269914137549945, + "epoch": 0.7276390953908757, "grad_norm": 0.0, - "learning_rate": 9.611872654759242e-06, - "loss": 0.8171, + "learning_rate": 3.6448392709236324e-06, + "loss": 1.0088, "step": 18597 }, { - "epoch": 0.527019751197257, + "epoch": 0.7276782220831051, "grad_norm": 0.0, - "learning_rate": 9.610955558682778e-06, - "loss": 0.8444, + "learning_rate": 3.6438609084766894e-06, + "loss": 1.0083, "step": 18598 }, { - "epoch": 0.5270480886395194, + "epoch": 0.7277173487753346, "grad_norm": 0.0, - "learning_rate": 9.610038465883378e-06, - "loss": 0.8535, + "learning_rate": 3.6428826481013725e-06, + "loss": 0.9538, "step": 18599 }, { - "epoch": 0.5270764260817818, + "epoch": 0.727756475467564, "grad_norm": 0.0, - "learning_rate": 9.609121376368772e-06, - "loss": 0.9162, + "learning_rate": 3.641904489813395e-06, + "loss": 1.0193, "step": 18600 }, { - "epoch": 0.5271047635240443, + "epoch": 0.7277956021597934, "grad_norm": 0.0, - "learning_rate": 9.60820429014668e-06, - "loss": 0.8909, + "learning_rate": 3.6409264336284635e-06, + "loss": 1.011, "step": 18601 }, { - "epoch": 0.5271331009663068, + "epoch": 0.7278347288520228, "grad_norm": 0.0, - "learning_rate": 9.607287207224828e-06, - "loss": 0.9493, + "learning_rate": 3.6399484795622874e-06, + "loss": 0.8738, "step": 18602 }, { - "epoch": 0.5271614384085692, + "epoch": 0.7278738555442523, "grad_norm": 0.0, - "learning_rate": 9.606370127610946e-06, - "loss": 0.9156, + "learning_rate": 3.638970627630567e-06, + "loss": 0.9989, "step": 18603 }, { - "epoch": 0.5271897758508317, + "epoch": 0.7279129822364817, "grad_norm": 0.0, - "learning_rate": 9.605453051312749e-06, - "loss": 0.8934, + "learning_rate": 3.6379928778490117e-06, + "loss": 0.9689, "step": 18604 }, { - "epoch": 0.5272181132930942, + "epoch": 0.7279521089287112, "grad_norm": 0.0, - "learning_rate": 9.604535978337974e-06, - "loss": 0.9213, + "learning_rate": 3.637015230233314e-06, + "loss": 0.9969, "step": 18605 }, { - "epoch": 0.5272464507353566, + "epoch": 0.7279912356209406, "grad_norm": 0.0, - "learning_rate": 9.603618908694335e-06, - "loss": 0.8987, + "learning_rate": 3.6360376847991785e-06, + "loss": 0.9385, "step": 18606 }, { - "epoch": 0.5272747881776191, + "epoch": 0.7280303623131701, "grad_norm": 0.0, - "learning_rate": 9.60270184238956e-06, - "loss": 0.8683, + "learning_rate": 3.635060241562304e-06, + "loss": 1.1137, "step": 18607 }, { - "epoch": 0.5273031256198816, + "epoch": 0.7280694890053995, "grad_norm": 0.0, - "learning_rate": 9.601784779431376e-06, - "loss": 0.8224, + "learning_rate": 3.6340829005383893e-06, + "loss": 0.9856, "step": 18608 }, { - "epoch": 0.527331463062144, + "epoch": 0.728108615697629, "grad_norm": 0.0, - "learning_rate": 9.600867719827507e-06, - "loss": 0.8638, + "learning_rate": 3.6331056617431224e-06, + "loss": 0.8272, "step": 18609 }, { - "epoch": 0.5273598005044065, + "epoch": 0.7281477423898584, "grad_norm": 0.0, - "learning_rate": 9.599950663585677e-06, - "loss": 0.8582, + "learning_rate": 3.632128525192201e-06, + "loss": 1.0792, "step": 18610 }, { - "epoch": 0.527388137946669, + "epoch": 0.7281868690820879, "grad_norm": 0.0, - "learning_rate": 9.599033610713614e-06, - "loss": 0.9348, + "learning_rate": 3.6311514909013155e-06, + "loss": 0.9043, "step": 18611 }, { - "epoch": 0.5274164753889314, + "epoch": 0.7282259957743172, "grad_norm": 0.0, - "learning_rate": 9.598116561219036e-06, - "loss": 0.9323, + "learning_rate": 3.63017455888616e-06, + "loss": 1.1376, "step": 18612 }, { - "epoch": 0.5274448128311938, + "epoch": 0.7282651224665467, "grad_norm": 0.0, - "learning_rate": 9.597199515109674e-06, - "loss": 0.8093, + "learning_rate": 3.629197729162417e-06, + "loss": 1.0641, "step": 18613 }, { - "epoch": 0.5274731502734563, + "epoch": 0.7283042491587761, "grad_norm": 0.0, - "learning_rate": 9.596282472393248e-06, - "loss": 0.8371, + "learning_rate": 3.6282210017457775e-06, + "loss": 1.0013, "step": 18614 }, { - "epoch": 0.5275014877157188, + "epoch": 0.7283433758510055, "grad_norm": 0.0, - "learning_rate": 9.595365433077484e-06, - "loss": 0.8332, + "learning_rate": 3.6272443766519183e-06, + "loss": 1.0188, "step": 18615 }, { - "epoch": 0.5275298251579812, + "epoch": 0.728382502543235, "grad_norm": 0.0, - "learning_rate": 9.594448397170112e-06, - "loss": 0.9137, + "learning_rate": 3.6262678538965357e-06, + "loss": 0.9207, "step": 18616 }, { - "epoch": 0.5275581626002437, + "epoch": 0.7284216292354644, "grad_norm": 0.0, - "learning_rate": 9.593531364678848e-06, - "loss": 0.8383, + "learning_rate": 3.6252914334953017e-06, + "loss": 1.0642, "step": 18617 }, { - "epoch": 0.5275865000425062, + "epoch": 0.7284607559276939, "grad_norm": 0.0, - "learning_rate": 9.592614335611418e-06, - "loss": 0.8146, + "learning_rate": 3.624315115463901e-06, + "loss": 0.9276, "step": 18618 }, { - "epoch": 0.5276148374847687, + "epoch": 0.7284998826199233, "grad_norm": 0.0, - "learning_rate": 9.591697309975556e-06, - "loss": 0.7436, + "learning_rate": 3.6233388998180054e-06, + "loss": 0.927, "step": 18619 }, { - "epoch": 0.5276431749270311, + "epoch": 0.7285390093121528, "grad_norm": 0.0, - "learning_rate": 9.590780287778973e-06, - "loss": 0.9753, + "learning_rate": 3.6223627865733025e-06, + "loss": 0.9797, "step": 18620 }, { - "epoch": 0.5276715123692935, + "epoch": 0.7285781360043821, "grad_norm": 0.0, - "learning_rate": 9.589863269029402e-06, - "loss": 0.818, + "learning_rate": 3.6213867757454578e-06, + "loss": 0.9789, "step": 18621 }, { - "epoch": 0.527699849811556, + "epoch": 0.7286172626966116, "grad_norm": 0.0, - "learning_rate": 9.588946253734564e-06, - "loss": 0.9741, + "learning_rate": 3.6204108673501526e-06, + "loss": 1.0113, "step": 18622 }, { - "epoch": 0.5277281872538184, + "epoch": 0.728656389388841, "grad_norm": 0.0, - "learning_rate": 9.588029241902182e-06, - "loss": 0.8474, + "learning_rate": 3.6194350614030514e-06, + "loss": 1.0945, "step": 18623 }, { - "epoch": 0.5277565246960809, + "epoch": 0.7286955160810705, "grad_norm": 0.0, - "learning_rate": 9.587112233539988e-06, - "loss": 0.9324, + "learning_rate": 3.618459357919828e-06, + "loss": 0.9873, "step": 18624 }, { - "epoch": 0.5277848621383434, + "epoch": 0.7287346427732999, "grad_norm": 0.0, - "learning_rate": 9.586195228655698e-06, - "loss": 0.9247, + "learning_rate": 3.6174837569161513e-06, + "loss": 1.0565, "step": 18625 }, { - "epoch": 0.5278131995806059, + "epoch": 0.7287737694655294, "grad_norm": 0.0, - "learning_rate": 9.585278227257042e-06, - "loss": 0.9302, + "learning_rate": 3.6165082584076906e-06, + "loss": 0.976, "step": 18626 }, { - "epoch": 0.5278415370228683, + "epoch": 0.7288128961577588, "grad_norm": 0.0, - "learning_rate": 9.584361229351735e-06, - "loss": 0.7748, + "learning_rate": 3.6155328624101036e-06, + "loss": 0.9516, "step": 18627 }, { - "epoch": 0.5278698744651308, + "epoch": 0.7288520228499883, "grad_norm": 0.0, - "learning_rate": 9.583444234947513e-06, - "loss": 0.8642, + "learning_rate": 3.614557568939061e-06, + "loss": 1.0144, "step": 18628 }, { - "epoch": 0.5278982119073933, + "epoch": 0.7288911495422177, "grad_norm": 0.0, - "learning_rate": 9.582527244052095e-06, - "loss": 0.9861, + "learning_rate": 3.613582378010221e-06, + "loss": 0.9719, "step": 18629 }, { - "epoch": 0.5279265493496557, + "epoch": 0.7289302762344472, "grad_norm": 0.0, - "learning_rate": 9.581610256673205e-06, - "loss": 0.858, + "learning_rate": 3.612607289639248e-06, + "loss": 1.0866, "step": 18630 }, { - "epoch": 0.5279548867919182, + "epoch": 0.7289694029266766, "grad_norm": 0.0, - "learning_rate": 9.580693272818564e-06, - "loss": 0.8362, + "learning_rate": 3.611632303841797e-06, + "loss": 0.9716, "step": 18631 }, { - "epoch": 0.5279832242341806, + "epoch": 0.729008529618906, "grad_norm": 0.0, - "learning_rate": 9.579776292495903e-06, - "loss": 0.9164, + "learning_rate": 3.6106574206335244e-06, + "loss": 1.0997, "step": 18632 }, { - "epoch": 0.528011561676443, + "epoch": 0.7290476563111354, "grad_norm": 0.0, - "learning_rate": 9.578859315712939e-06, - "loss": 0.8608, + "learning_rate": 3.6096826400300875e-06, + "loss": 0.9918, "step": 18633 }, { - "epoch": 0.5280398991187055, + "epoch": 0.7290867830033649, "grad_norm": 0.0, - "learning_rate": 9.577942342477403e-06, - "loss": 0.9725, + "learning_rate": 3.6087079620471443e-06, + "loss": 1.0207, "step": 18634 }, { - "epoch": 0.528068236560968, + "epoch": 0.7291259096955943, "grad_norm": 0.0, - "learning_rate": 9.577025372797012e-06, - "loss": 0.7521, + "learning_rate": 3.6077333867003382e-06, + "loss": 1.0304, "step": 18635 }, { - "epoch": 0.5280965740032305, + "epoch": 0.7291650363878238, "grad_norm": 0.0, - "learning_rate": 9.576108406679493e-06, - "loss": 0.9554, + "learning_rate": 3.606758914005327e-06, + "loss": 0.9539, "step": 18636 }, { - "epoch": 0.5281249114454929, + "epoch": 0.7292041630800532, "grad_norm": 0.0, - "learning_rate": 9.575191444132574e-06, - "loss": 0.8547, + "learning_rate": 3.605784543977754e-06, + "loss": 0.8312, "step": 18637 }, { - "epoch": 0.5281532488877554, + "epoch": 0.7292432897722827, "grad_norm": 0.0, - "learning_rate": 9.574274485163972e-06, - "loss": 0.6726, + "learning_rate": 3.6048102766332683e-06, + "loss": 0.8896, "step": 18638 }, { - "epoch": 0.5281815863300179, + "epoch": 0.7292824164645121, "grad_norm": 0.0, - "learning_rate": 9.573357529781413e-06, - "loss": 0.9159, + "learning_rate": 3.6038361119875154e-06, + "loss": 0.9284, "step": 18639 }, { - "epoch": 0.5282099237722803, + "epoch": 0.7293215431567416, "grad_norm": 0.0, - "learning_rate": 9.572440577992626e-06, - "loss": 0.8517, + "learning_rate": 3.602862050056144e-06, + "loss": 0.9865, "step": 18640 }, { - "epoch": 0.5282382612145428, + "epoch": 0.729360669848971, "grad_norm": 0.0, - "learning_rate": 9.571523629805328e-06, - "loss": 0.9135, + "learning_rate": 3.6018880908547884e-06, + "loss": 1.1137, "step": 18641 }, { - "epoch": 0.5282665986568053, + "epoch": 0.7293997965412004, "grad_norm": 0.0, - "learning_rate": 9.57060668522725e-06, - "loss": 0.9599, + "learning_rate": 3.6009142343990934e-06, + "loss": 0.9693, "step": 18642 }, { - "epoch": 0.5282949360990677, + "epoch": 0.7294389232334298, "grad_norm": 0.0, - "learning_rate": 9.569689744266107e-06, - "loss": 0.8801, + "learning_rate": 3.5999404807047e-06, + "loss": 1.0441, "step": 18643 }, { - "epoch": 0.5283232735413301, + "epoch": 0.7294780499256592, "grad_norm": 0.0, - "learning_rate": 9.568772806929626e-06, - "loss": 0.8496, + "learning_rate": 3.5989668297872392e-06, + "loss": 1.0304, "step": 18644 }, { - "epoch": 0.5283516109835926, + "epoch": 0.7295171766178887, "grad_norm": 0.0, - "learning_rate": 9.567855873225536e-06, - "loss": 0.9051, + "learning_rate": 3.59799328166235e-06, + "loss": 0.8556, "step": 18645 }, { - "epoch": 0.5283799484258551, + "epoch": 0.7295563033101181, "grad_norm": 0.0, - "learning_rate": 9.566938943161555e-06, - "loss": 0.7904, + "learning_rate": 3.5970198363456665e-06, + "loss": 1.0248, "step": 18646 }, { - "epoch": 0.5284082858681175, + "epoch": 0.7295954300023476, "grad_norm": 0.0, - "learning_rate": 9.566022016745406e-06, - "loss": 0.8706, + "learning_rate": 3.596046493852825e-06, + "loss": 0.9718, "step": 18647 }, { - "epoch": 0.52843662331038, + "epoch": 0.729634556694577, "grad_norm": 0.0, - "learning_rate": 9.565105093984815e-06, - "loss": 0.8602, + "learning_rate": 3.5950732541994494e-06, + "loss": 0.9668, "step": 18648 }, { - "epoch": 0.5284649607526425, + "epoch": 0.7296736833868065, "grad_norm": 0.0, - "learning_rate": 9.564188174887505e-06, - "loss": 1.0027, + "learning_rate": 3.5941001174011738e-06, + "loss": 0.9795, "step": 18649 }, { - "epoch": 0.528493298194905, + "epoch": 0.7297128100790359, "grad_norm": 0.0, - "learning_rate": 9.563271259461206e-06, - "loss": 0.9341, + "learning_rate": 3.5931270834736164e-06, + "loss": 0.9237, "step": 18650 }, { - "epoch": 0.5285216356371674, + "epoch": 0.7297519367712654, "grad_norm": 0.0, - "learning_rate": 9.562354347713628e-06, - "loss": 0.8687, + "learning_rate": 3.5921541524324165e-06, + "loss": 0.9897, "step": 18651 }, { - "epoch": 0.5285499730794299, + "epoch": 0.7297910634634948, "grad_norm": 0.0, - "learning_rate": 9.561437439652503e-06, - "loss": 0.8931, + "learning_rate": 3.591181324293189e-06, + "loss": 1.0971, "step": 18652 }, { - "epoch": 0.5285783105216924, + "epoch": 0.7298301901557243, "grad_norm": 0.0, - "learning_rate": 9.560520535285555e-06, - "loss": 0.8589, + "learning_rate": 3.590208599071562e-06, + "loss": 1.0456, "step": 18653 }, { - "epoch": 0.5286066479639547, + "epoch": 0.7298693168479536, "grad_norm": 0.0, - "learning_rate": 9.559603634620505e-06, - "loss": 0.9202, + "learning_rate": 3.589235976783149e-06, + "loss": 0.8835, "step": 18654 }, { - "epoch": 0.5286349854062172, + "epoch": 0.7299084435401831, "grad_norm": 0.0, - "learning_rate": 9.55868673766508e-06, - "loss": 0.829, + "learning_rate": 3.5882634574435737e-06, + "loss": 1.0286, "step": 18655 }, { - "epoch": 0.5286633228484797, + "epoch": 0.7299475702324125, "grad_norm": 0.0, - "learning_rate": 9.557769844426997e-06, - "loss": 0.8473, + "learning_rate": 3.5872910410684525e-06, + "loss": 1.01, "step": 18656 }, { - "epoch": 0.5286916602907421, + "epoch": 0.729986696924642, "grad_norm": 0.0, - "learning_rate": 9.556852954913983e-06, - "loss": 0.8293, + "learning_rate": 3.5863187276734045e-06, + "loss": 0.8284, "step": 18657 }, { - "epoch": 0.5287199977330046, + "epoch": 0.7300258236168714, "grad_norm": 0.0, - "learning_rate": 9.555936069133765e-06, - "loss": 1.0084, + "learning_rate": 3.5853465172740387e-06, + "loss": 0.9449, "step": 18658 }, { - "epoch": 0.5287483351752671, + "epoch": 0.7300649503091009, "grad_norm": 0.0, - "learning_rate": 9.555019187094058e-06, - "loss": 0.8669, + "learning_rate": 3.584374409885969e-06, + "loss": 0.9694, "step": 18659 }, { - "epoch": 0.5287766726175296, + "epoch": 0.7301040770013303, "grad_norm": 0.0, - "learning_rate": 9.55410230880259e-06, - "loss": 0.8041, + "learning_rate": 3.5834024055248072e-06, + "loss": 0.9373, "step": 18660 }, { - "epoch": 0.528805010059792, + "epoch": 0.7301432036935598, "grad_norm": 0.0, - "learning_rate": 9.553185434267084e-06, - "loss": 0.8428, + "learning_rate": 3.5824305042061656e-06, + "loss": 0.9794, "step": 18661 }, { - "epoch": 0.5288333475020545, + "epoch": 0.7301823303857892, "grad_norm": 0.0, - "learning_rate": 9.552268563495264e-06, - "loss": 0.8628, + "learning_rate": 3.5814587059456453e-06, + "loss": 1.0778, "step": 18662 }, { - "epoch": 0.528861684944317, + "epoch": 0.7302214570780187, "grad_norm": 0.0, - "learning_rate": 9.551351696494854e-06, - "loss": 0.9615, + "learning_rate": 3.5804870107588585e-06, + "loss": 0.9858, "step": 18663 }, { - "epoch": 0.5288900223865793, + "epoch": 0.730260583770248, "grad_norm": 0.0, - "learning_rate": 9.550434833273572e-06, - "loss": 0.8487, + "learning_rate": 3.579515418661399e-06, + "loss": 1.0226, "step": 18664 }, { - "epoch": 0.5289183598288418, + "epoch": 0.7302997104624775, "grad_norm": 0.0, - "learning_rate": 9.549517973839143e-06, - "loss": 0.8026, + "learning_rate": 3.578543929668884e-06, + "loss": 1.0191, "step": 18665 }, { - "epoch": 0.5289466972711043, + "epoch": 0.7303388371547069, "grad_norm": 0.0, - "learning_rate": 9.548601118199297e-06, - "loss": 0.8409, + "learning_rate": 3.5775725437969033e-06, + "loss": 1.0194, "step": 18666 }, { - "epoch": 0.5289750347133668, + "epoch": 0.7303779638469364, "grad_norm": 0.0, - "learning_rate": 9.547684266361746e-06, - "loss": 0.9625, + "learning_rate": 3.5766012610610635e-06, + "loss": 0.9759, "step": 18667 }, { - "epoch": 0.5290033721556292, + "epoch": 0.7304170905391658, "grad_norm": 0.0, - "learning_rate": 9.546767418334219e-06, - "loss": 0.9515, + "learning_rate": 3.575630081476952e-06, + "loss": 1.0704, "step": 18668 }, { - "epoch": 0.5290317095978917, + "epoch": 0.7304562172313953, "grad_norm": 0.0, - "learning_rate": 9.545850574124444e-06, - "loss": 0.9243, + "learning_rate": 3.574659005060177e-06, + "loss": 1.0022, "step": 18669 }, { - "epoch": 0.5290600470401542, + "epoch": 0.7304953439236247, "grad_norm": 0.0, - "learning_rate": 9.544933733740133e-06, - "loss": 0.9027, + "learning_rate": 3.5736880318263243e-06, + "loss": 0.8512, "step": 18670 }, { - "epoch": 0.5290883844824166, + "epoch": 0.7305344706158541, "grad_norm": 0.0, - "learning_rate": 9.544016897189018e-06, - "loss": 0.8997, + "learning_rate": 3.572717161790993e-06, + "loss": 0.9576, "step": 18671 }, { - "epoch": 0.5291167219246791, + "epoch": 0.7305735973080836, "grad_norm": 0.0, - "learning_rate": 9.543100064478815e-06, - "loss": 0.9024, + "learning_rate": 3.5717463949697663e-06, + "loss": 0.9981, "step": 18672 }, { - "epoch": 0.5291450593669416, + "epoch": 0.730612724000313, "grad_norm": 0.0, - "learning_rate": 9.54218323561725e-06, - "loss": 0.821, + "learning_rate": 3.5707757313782366e-06, + "loss": 0.8874, "step": 18673 }, { - "epoch": 0.529173396809204, + "epoch": 0.7306518506925425, "grad_norm": 0.0, - "learning_rate": 9.541266410612045e-06, - "loss": 0.8692, + "learning_rate": 3.5698051710319936e-06, + "loss": 1.0547, "step": 18674 }, { - "epoch": 0.5292017342514664, + "epoch": 0.7306909773847718, "grad_norm": 0.0, - "learning_rate": 9.540349589470923e-06, - "loss": 0.9198, + "learning_rate": 3.568834713946625e-06, + "loss": 1.1039, "step": 18675 }, { - "epoch": 0.5292300716937289, + "epoch": 0.7307301040770013, "grad_norm": 0.0, - "learning_rate": 9.539432772201606e-06, - "loss": 0.823, + "learning_rate": 3.567864360137708e-06, + "loss": 0.9441, "step": 18676 }, { - "epoch": 0.5292584091359914, + "epoch": 0.7307692307692307, "grad_norm": 0.0, - "learning_rate": 9.538515958811824e-06, - "loss": 0.7651, + "learning_rate": 3.566894109620831e-06, + "loss": 0.9496, "step": 18677 }, { - "epoch": 0.5292867465782538, + "epoch": 0.7308083574614602, "grad_norm": 0.0, - "learning_rate": 9.537599149309288e-06, - "loss": 0.8811, + "learning_rate": 3.565923962411573e-06, + "loss": 1.0225, "step": 18678 }, { - "epoch": 0.5293150840205163, + "epoch": 0.7308474841536896, "grad_norm": 0.0, - "learning_rate": 9.53668234370173e-06, - "loss": 0.9056, + "learning_rate": 3.5649539185255167e-06, + "loss": 0.9185, "step": 18679 }, { - "epoch": 0.5293434214627788, + "epoch": 0.7308866108459191, "grad_norm": 0.0, - "learning_rate": 9.535765541996864e-06, - "loss": 0.8998, + "learning_rate": 3.5639839779782336e-06, + "loss": 0.9585, "step": 18680 }, { - "epoch": 0.5293717589050412, + "epoch": 0.7309257375381485, "grad_norm": 0.0, - "learning_rate": 9.53484874420242e-06, - "loss": 0.8866, + "learning_rate": 3.5630141407853068e-06, + "loss": 0.9841, "step": 18681 }, { - "epoch": 0.5294000963473037, + "epoch": 0.730964864230378, "grad_norm": 0.0, - "learning_rate": 9.533931950326118e-06, - "loss": 0.8107, + "learning_rate": 3.562044406962303e-06, + "loss": 1.0084, "step": 18682 }, { - "epoch": 0.5294284337895662, + "epoch": 0.7310039909226074, "grad_norm": 0.0, - "learning_rate": 9.53301516037568e-06, - "loss": 0.8589, + "learning_rate": 3.561074776524799e-06, + "loss": 0.8344, "step": 18683 }, { - "epoch": 0.5294567712318287, + "epoch": 0.7310431176148369, "grad_norm": 0.0, - "learning_rate": 9.532098374358828e-06, - "loss": 0.867, + "learning_rate": 3.560105249488366e-06, + "loss": 0.8331, "step": 18684 }, { - "epoch": 0.529485108674091, + "epoch": 0.7310822443070663, "grad_norm": 0.0, - "learning_rate": 9.531181592283285e-06, - "loss": 0.9006, + "learning_rate": 3.559135825868576e-06, + "loss": 0.943, "step": 18685 }, { - "epoch": 0.5295134461163535, + "epoch": 0.7311213709992957, "grad_norm": 0.0, - "learning_rate": 9.530264814156772e-06, - "loss": 0.9967, + "learning_rate": 3.5581665056809912e-06, + "loss": 0.9916, "step": 18686 }, { - "epoch": 0.529541783558616, + "epoch": 0.7311604976915251, "grad_norm": 0.0, - "learning_rate": 9.52934803998702e-06, - "loss": 0.8327, + "learning_rate": 3.557197288941179e-06, + "loss": 0.9001, "step": 18687 }, { - "epoch": 0.5295701210008784, + "epoch": 0.7311996243837546, "grad_norm": 0.0, - "learning_rate": 9.528431269781737e-06, - "loss": 0.8651, + "learning_rate": 3.556228175664709e-06, + "loss": 0.9999, "step": 18688 }, { - "epoch": 0.5295984584431409, + "epoch": 0.731238751075984, "grad_norm": 0.0, - "learning_rate": 9.52751450354865e-06, - "loss": 0.8224, + "learning_rate": 3.5552591658671365e-06, + "loss": 1.0369, "step": 18689 }, { - "epoch": 0.5296267958854034, + "epoch": 0.7312778777682135, "grad_norm": 0.0, - "learning_rate": 9.526597741295493e-06, - "loss": 0.7591, + "learning_rate": 3.5542902595640273e-06, + "loss": 0.9671, "step": 18690 }, { - "epoch": 0.5296551333276659, + "epoch": 0.7313170044604429, "grad_norm": 0.0, - "learning_rate": 9.525680983029973e-06, - "loss": 0.8896, + "learning_rate": 3.5533214567709383e-06, + "loss": 0.8964, "step": 18691 }, { - "epoch": 0.5296834707699283, + "epoch": 0.7313561311526724, "grad_norm": 0.0, - "learning_rate": 9.524764228759822e-06, - "loss": 0.853, + "learning_rate": 3.552352757503432e-06, + "loss": 1.0366, "step": 18692 }, { - "epoch": 0.5297118082121908, + "epoch": 0.7313952578449018, "grad_norm": 0.0, - "learning_rate": 9.523847478492754e-06, - "loss": 0.8806, + "learning_rate": 3.5513841617770583e-06, + "loss": 0.9774, "step": 18693 }, { - "epoch": 0.5297401456544533, + "epoch": 0.7314343845371313, "grad_norm": 0.0, - "learning_rate": 9.522930732236497e-06, - "loss": 0.7728, + "learning_rate": 3.5504156696073767e-06, + "loss": 0.9022, "step": 18694 }, { - "epoch": 0.5297684830967156, + "epoch": 0.7314735112293607, "grad_norm": 0.0, - "learning_rate": 9.522013989998773e-06, - "loss": 0.9777, + "learning_rate": 3.5494472810099325e-06, + "loss": 0.8968, "step": 18695 }, { - "epoch": 0.5297968205389781, + "epoch": 0.7315126379215902, "grad_norm": 0.0, - "learning_rate": 9.521097251787302e-06, - "loss": 0.9363, + "learning_rate": 3.5484789960002876e-06, + "loss": 1.0055, "step": 18696 }, { - "epoch": 0.5298251579812406, + "epoch": 0.7315517646138195, "grad_norm": 0.0, - "learning_rate": 9.520180517609806e-06, - "loss": 0.8676, + "learning_rate": 3.547510814593982e-06, + "loss": 1.0938, "step": 18697 }, { - "epoch": 0.5298534954235031, + "epoch": 0.731590891306049, "grad_norm": 0.0, - "learning_rate": 9.519263787474008e-06, - "loss": 0.7851, + "learning_rate": 3.5465427368065717e-06, + "loss": 0.9459, "step": 18698 }, { - "epoch": 0.5298818328657655, + "epoch": 0.7316300179982784, "grad_norm": 0.0, - "learning_rate": 9.518347061387629e-06, - "loss": 0.8578, + "learning_rate": 3.5455747626535907e-06, + "loss": 1.0866, "step": 18699 }, { - "epoch": 0.529910170308028, + "epoch": 0.7316691446905078, "grad_norm": 0.0, - "learning_rate": 9.517430339358395e-06, - "loss": 0.8782, + "learning_rate": 3.5446068921505994e-06, + "loss": 0.9555, "step": 18700 }, { - "epoch": 0.5299385077502905, + "epoch": 0.7317082713827373, "grad_norm": 0.0, - "learning_rate": 9.51651362139402e-06, - "loss": 0.8526, + "learning_rate": 3.5436391253131275e-06, + "loss": 0.9703, "step": 18701 }, { - "epoch": 0.5299668451925529, + "epoch": 0.7317473980749667, "grad_norm": 0.0, - "learning_rate": 9.515596907502231e-06, - "loss": 0.8157, + "learning_rate": 3.542671462156725e-06, + "loss": 1.0636, "step": 18702 }, { - "epoch": 0.5299951826348154, + "epoch": 0.7317865247671962, "grad_norm": 0.0, - "learning_rate": 9.514680197690753e-06, - "loss": 0.8619, + "learning_rate": 3.5417039026969246e-06, + "loss": 1.0349, "step": 18703 }, { - "epoch": 0.5300235200770779, + "epoch": 0.7318256514594256, "grad_norm": 0.0, - "learning_rate": 9.5137634919673e-06, - "loss": 1.0096, + "learning_rate": 3.5407364469492657e-06, + "loss": 0.9801, "step": 18704 }, { - "epoch": 0.5300518575193403, + "epoch": 0.7318647781516551, "grad_norm": 0.0, - "learning_rate": 9.512846790339598e-06, - "loss": 0.9353, + "learning_rate": 3.539769094929286e-06, + "loss": 0.8907, "step": 18705 }, { - "epoch": 0.5300801949616027, + "epoch": 0.7319039048438845, "grad_norm": 0.0, - "learning_rate": 9.51193009281537e-06, - "loss": 0.8702, + "learning_rate": 3.5388018466525233e-06, + "loss": 1.1174, "step": 18706 }, { - "epoch": 0.5301085324038652, + "epoch": 0.731943031536114, "grad_norm": 0.0, - "learning_rate": 9.511013399402333e-06, - "loss": 0.8893, + "learning_rate": 3.5378347021345026e-06, + "loss": 0.8555, "step": 18707 }, { - "epoch": 0.5301368698461277, + "epoch": 0.7319821582283433, "grad_norm": 0.0, - "learning_rate": 9.510096710108218e-06, - "loss": 0.7776, + "learning_rate": 3.5368676613907595e-06, + "loss": 0.9969, "step": 18708 }, { - "epoch": 0.5301652072883901, + "epoch": 0.7320212849205728, "grad_norm": 0.0, - "learning_rate": 9.509180024940735e-06, - "loss": 0.9663, + "learning_rate": 3.5359007244368225e-06, + "loss": 1.0062, "step": 18709 }, { - "epoch": 0.5301935447306526, + "epoch": 0.7320604116128022, "grad_norm": 0.0, - "learning_rate": 9.508263343907612e-06, - "loss": 0.9742, + "learning_rate": 3.5349338912882238e-06, + "loss": 0.946, "step": 18710 }, { - "epoch": 0.5302218821729151, + "epoch": 0.7320995383050317, "grad_norm": 0.0, - "learning_rate": 9.50734666701657e-06, - "loss": 0.8568, + "learning_rate": 3.533967161960481e-06, + "loss": 0.993, "step": 18711 }, { - "epoch": 0.5302502196151775, + "epoch": 0.7321386649972611, "grad_norm": 0.0, - "learning_rate": 9.506429994275328e-06, - "loss": 0.8598, + "learning_rate": 3.5330005364691276e-06, + "loss": 0.9599, "step": 18712 }, { - "epoch": 0.53027855705744, + "epoch": 0.7321777916894906, "grad_norm": 0.0, - "learning_rate": 9.505513325691615e-06, - "loss": 0.9174, + "learning_rate": 3.532034014829675e-06, + "loss": 1.0629, "step": 18713 }, { - "epoch": 0.5303068944997025, + "epoch": 0.73221691838172, "grad_norm": 0.0, - "learning_rate": 9.504596661273141e-06, - "loss": 0.9138, + "learning_rate": 3.5310675970576593e-06, + "loss": 0.8217, "step": 18714 }, { - "epoch": 0.530335231941965, + "epoch": 0.7322560450739495, "grad_norm": 0.0, - "learning_rate": 9.503680001027633e-06, - "loss": 0.853, + "learning_rate": 3.5301012831685866e-06, + "loss": 1.0504, "step": 18715 }, { - "epoch": 0.5303635693842274, + "epoch": 0.7322951717661789, "grad_norm": 0.0, - "learning_rate": 9.502763344962818e-06, - "loss": 0.9001, + "learning_rate": 3.5291350731779848e-06, + "loss": 0.8068, "step": 18716 }, { - "epoch": 0.5303919068264898, + "epoch": 0.7323342984584084, "grad_norm": 0.0, - "learning_rate": 9.501846693086408e-06, - "loss": 0.8167, + "learning_rate": 3.5281689671013574e-06, + "loss": 1.0668, "step": 18717 }, { - "epoch": 0.5304202442687523, + "epoch": 0.7323734251506377, "grad_norm": 0.0, - "learning_rate": 9.500930045406129e-06, - "loss": 0.9376, + "learning_rate": 3.527202964954235e-06, + "loss": 0.9335, "step": 18718 }, { - "epoch": 0.5304485817110147, + "epoch": 0.7324125518428672, "grad_norm": 0.0, - "learning_rate": 9.500013401929701e-06, - "loss": 0.944, + "learning_rate": 3.526237066752117e-06, + "loss": 1.012, "step": 18719 }, { - "epoch": 0.5304769191532772, + "epoch": 0.7324516785350966, "grad_norm": 0.0, - "learning_rate": 9.499096762664845e-06, - "loss": 0.9143, + "learning_rate": 3.5252712725105245e-06, + "loss": 0.9618, "step": 18720 }, { - "epoch": 0.5305052565955397, + "epoch": 0.7324908052273261, "grad_norm": 0.0, - "learning_rate": 9.498180127619288e-06, - "loss": 0.8075, + "learning_rate": 3.5243055822449577e-06, + "loss": 1.0019, "step": 18721 }, { - "epoch": 0.5305335940378022, + "epoch": 0.7325299319195555, "grad_norm": 0.0, - "learning_rate": 9.497263496800741e-06, - "loss": 0.836, + "learning_rate": 3.523339995970929e-06, + "loss": 1.0292, "step": 18722 }, { - "epoch": 0.5305619314800646, + "epoch": 0.732569058611785, "grad_norm": 0.0, - "learning_rate": 9.496346870216932e-06, - "loss": 0.9312, + "learning_rate": 3.5223745137039446e-06, + "loss": 0.9477, "step": 18723 }, { - "epoch": 0.5305902689223271, + "epoch": 0.7326081853040144, "grad_norm": 0.0, - "learning_rate": 9.49543024787558e-06, - "loss": 0.9044, + "learning_rate": 3.5214091354595125e-06, + "loss": 0.9987, "step": 18724 }, { - "epoch": 0.5306186063645896, + "epoch": 0.7326473119962439, "grad_norm": 0.0, - "learning_rate": 9.494513629784405e-06, - "loss": 0.7126, + "learning_rate": 3.5204438612531264e-06, + "loss": 0.9572, "step": 18725 }, { - "epoch": 0.530646943806852, + "epoch": 0.7326864386884733, "grad_norm": 0.0, - "learning_rate": 9.49359701595113e-06, - "loss": 0.9209, + "learning_rate": 3.5194786911002944e-06, + "loss": 0.9754, "step": 18726 }, { - "epoch": 0.5306752812491144, + "epoch": 0.7327255653807028, "grad_norm": 0.0, - "learning_rate": 9.49268040638348e-06, - "loss": 0.8435, + "learning_rate": 3.5185136250165163e-06, + "loss": 1.0526, "step": 18727 }, { - "epoch": 0.5307036186913769, + "epoch": 0.7327646920729322, "grad_norm": 0.0, - "learning_rate": 9.491763801089167e-06, - "loss": 0.844, + "learning_rate": 3.517548663017285e-06, + "loss": 0.9366, "step": 18728 }, { - "epoch": 0.5307319561336393, + "epoch": 0.7328038187651615, "grad_norm": 0.0, - "learning_rate": 9.490847200075919e-06, - "loss": 0.8872, + "learning_rate": 3.5165838051180988e-06, + "loss": 0.9742, "step": 18729 }, { - "epoch": 0.5307602935759018, + "epoch": 0.732842945457391, "grad_norm": 0.0, - "learning_rate": 9.48993060335145e-06, - "loss": 0.843, + "learning_rate": 3.5156190513344556e-06, + "loss": 0.985, "step": 18730 }, { - "epoch": 0.5307886310181643, + "epoch": 0.7328820721496204, "grad_norm": 0.0, - "learning_rate": 9.489014010923486e-06, - "loss": 0.8723, + "learning_rate": 3.5146544016818417e-06, + "loss": 0.8951, "step": 18731 }, { - "epoch": 0.5308169684604268, + "epoch": 0.7329211988418499, "grad_norm": 0.0, - "learning_rate": 9.48809742279975e-06, - "loss": 0.9957, + "learning_rate": 3.5136898561757517e-06, + "loss": 0.9468, "step": 18732 }, { - "epoch": 0.5308453059026892, + "epoch": 0.7329603255340793, "grad_norm": 0.0, - "learning_rate": 9.487180838987955e-06, - "loss": 0.973, + "learning_rate": 3.512725414831678e-06, + "loss": 0.9051, "step": 18733 }, { - "epoch": 0.5308736433449517, + "epoch": 0.7329994522263088, "grad_norm": 0.0, - "learning_rate": 9.486264259495827e-06, - "loss": 0.8672, + "learning_rate": 3.5117610776651023e-06, + "loss": 0.8555, "step": 18734 }, { - "epoch": 0.5309019807872142, + "epoch": 0.7330385789185382, "grad_norm": 0.0, - "learning_rate": 9.48534768433109e-06, - "loss": 0.8797, + "learning_rate": 3.510796844691513e-06, + "loss": 1.0214, "step": 18735 }, { - "epoch": 0.5309303182294766, + "epoch": 0.7330777056107677, "grad_norm": 0.0, - "learning_rate": 9.484431113501458e-06, - "loss": 0.8512, + "learning_rate": 3.5098327159263957e-06, + "loss": 0.8996, "step": 18736 }, { - "epoch": 0.530958655671739, + "epoch": 0.7331168323029971, "grad_norm": 0.0, - "learning_rate": 9.483514547014653e-06, - "loss": 0.9558, + "learning_rate": 3.5088686913852353e-06, + "loss": 0.8393, "step": 18737 }, { - "epoch": 0.5309869931140015, + "epoch": 0.7331559589952266, "grad_norm": 0.0, - "learning_rate": 9.482597984878398e-06, - "loss": 0.8631, + "learning_rate": 3.5079047710835055e-06, + "loss": 0.9333, "step": 18738 }, { - "epoch": 0.531015330556264, + "epoch": 0.733195085687456, "grad_norm": 0.0, - "learning_rate": 9.481681427100411e-06, - "loss": 0.8006, + "learning_rate": 3.506940955036695e-06, + "loss": 0.9893, "step": 18739 }, { - "epoch": 0.5310436679985264, + "epoch": 0.7332342123796854, "grad_norm": 0.0, - "learning_rate": 9.480764873688417e-06, - "loss": 0.8104, + "learning_rate": 3.505977243260269e-06, + "loss": 0.9117, "step": 18740 }, { - "epoch": 0.5310720054407889, + "epoch": 0.7332733390719148, "grad_norm": 0.0, - "learning_rate": 9.47984832465013e-06, - "loss": 0.9218, + "learning_rate": 3.5050136357697174e-06, + "loss": 1.0518, "step": 18741 }, { - "epoch": 0.5311003428830514, + "epoch": 0.7333124657641443, "grad_norm": 0.0, - "learning_rate": 9.47893177999328e-06, - "loss": 0.9103, + "learning_rate": 3.5040501325805055e-06, + "loss": 0.8963, "step": 18742 }, { - "epoch": 0.5311286803253138, + "epoch": 0.7333515924563737, "grad_norm": 0.0, - "learning_rate": 9.478015239725573e-06, - "loss": 0.7729, + "learning_rate": 3.503086733708111e-06, + "loss": 0.9951, "step": 18743 }, { - "epoch": 0.5311570177675763, + "epoch": 0.7333907191486032, "grad_norm": 0.0, - "learning_rate": 9.47709870385474e-06, - "loss": 0.9715, + "learning_rate": 3.502123439167997e-06, + "loss": 0.9442, "step": 18744 }, { - "epoch": 0.5311853552098388, + "epoch": 0.7334298458408326, "grad_norm": 0.0, - "learning_rate": 9.476182172388501e-06, - "loss": 0.9179, + "learning_rate": 3.5011602489756437e-06, + "loss": 0.9785, "step": 18745 }, { - "epoch": 0.5312136926521012, + "epoch": 0.7334689725330621, "grad_norm": 0.0, - "learning_rate": 9.47526564533457e-06, - "loss": 0.9172, + "learning_rate": 3.5001971631465117e-06, + "loss": 1.0082, "step": 18746 }, { - "epoch": 0.5312420300943637, + "epoch": 0.7335080992252915, "grad_norm": 0.0, - "learning_rate": 9.474349122700672e-06, - "loss": 0.8864, + "learning_rate": 3.4992341816960693e-06, + "loss": 0.993, "step": 18747 }, { - "epoch": 0.5312703675366262, + "epoch": 0.733547225917521, "grad_norm": 0.0, - "learning_rate": 9.473432604494532e-06, - "loss": 0.8438, + "learning_rate": 3.4982713046397755e-06, + "loss": 0.9693, "step": 18748 }, { - "epoch": 0.5312987049788886, + "epoch": 0.7335863526097504, "grad_norm": 0.0, - "learning_rate": 9.47251609072386e-06, - "loss": 0.9788, + "learning_rate": 3.4973085319931034e-06, + "loss": 0.9939, "step": 18749 }, { - "epoch": 0.531327042421151, + "epoch": 0.7336254793019799, "grad_norm": 0.0, - "learning_rate": 9.471599581396383e-06, - "loss": 0.9257, + "learning_rate": 3.496345863771504e-06, + "loss": 0.8938, "step": 18750 }, { - "epoch": 0.5313553798634135, + "epoch": 0.7336646059942092, "grad_norm": 0.0, - "learning_rate": 9.470683076519815e-06, - "loss": 0.8349, + "learning_rate": 3.4953832999904446e-06, + "loss": 0.97, "step": 18751 }, { - "epoch": 0.531383717305676, + "epoch": 0.7337037326864387, "grad_norm": 0.0, - "learning_rate": 9.469766576101882e-06, - "loss": 0.9345, + "learning_rate": 3.494420840665376e-06, + "loss": 1.0813, "step": 18752 }, { - "epoch": 0.5314120547479384, + "epoch": 0.7337428593786681, "grad_norm": 0.0, - "learning_rate": 9.468850080150306e-06, - "loss": 0.8999, + "learning_rate": 3.493458485811756e-06, + "loss": 0.9327, "step": 18753 }, { - "epoch": 0.5314403921902009, + "epoch": 0.7337819860708976, "grad_norm": 0.0, - "learning_rate": 9.467933588672799e-06, - "loss": 0.868, + "learning_rate": 3.4924962354450388e-06, + "loss": 0.982, "step": 18754 }, { - "epoch": 0.5314687296324634, + "epoch": 0.733821112763127, "grad_norm": 0.0, - "learning_rate": 9.467017101677084e-06, - "loss": 0.9183, + "learning_rate": 3.4915340895806816e-06, + "loss": 0.9325, "step": 18755 }, { - "epoch": 0.5314970670747259, + "epoch": 0.7338602394553564, "grad_norm": 0.0, - "learning_rate": 9.466100619170887e-06, - "loss": 0.9424, + "learning_rate": 3.4905720482341287e-06, + "loss": 0.9654, "step": 18756 }, { - "epoch": 0.5315254045169883, + "epoch": 0.7338993661475859, "grad_norm": 0.0, - "learning_rate": 9.465184141161918e-06, - "loss": 0.9097, + "learning_rate": 3.4896101114208313e-06, + "loss": 0.8959, "step": 18757 }, { - "epoch": 0.5315537419592508, + "epoch": 0.7339384928398153, "grad_norm": 0.0, - "learning_rate": 9.464267667657903e-06, - "loss": 0.8818, + "learning_rate": 3.488648279156237e-06, + "loss": 1.0574, "step": 18758 }, { - "epoch": 0.5315820794015133, + "epoch": 0.7339776195320448, "grad_norm": 0.0, - "learning_rate": 9.46335119866656e-06, - "loss": 0.8978, + "learning_rate": 3.4876865514557966e-06, + "loss": 0.9558, "step": 18759 }, { - "epoch": 0.5316104168437756, + "epoch": 0.7340167462242742, "grad_norm": 0.0, - "learning_rate": 9.462434734195608e-06, - "loss": 0.8596, + "learning_rate": 3.486724928334946e-06, + "loss": 0.8591, "step": 18760 }, { - "epoch": 0.5316387542860381, + "epoch": 0.7340558729165036, "grad_norm": 0.0, - "learning_rate": 9.461518274252772e-06, - "loss": 0.8996, + "learning_rate": 3.4857634098091353e-06, + "loss": 1.0481, "step": 18761 }, { - "epoch": 0.5316670917283006, + "epoch": 0.734094999608733, "grad_norm": 0.0, - "learning_rate": 9.460601818845764e-06, - "loss": 0.8487, + "learning_rate": 3.484801995893794e-06, + "loss": 0.938, "step": 18762 }, { - "epoch": 0.5316954291705631, + "epoch": 0.7341341263009625, "grad_norm": 0.0, - "learning_rate": 9.459685367982308e-06, - "loss": 0.8834, + "learning_rate": 3.483840686604375e-06, + "loss": 0.9955, "step": 18763 }, { - "epoch": 0.5317237666128255, + "epoch": 0.7341732529931919, "grad_norm": 0.0, - "learning_rate": 9.458768921670124e-06, - "loss": 0.9652, + "learning_rate": 3.482879481956307e-06, + "loss": 0.9977, "step": 18764 }, { - "epoch": 0.531752104055088, + "epoch": 0.7342123796854214, "grad_norm": 0.0, - "learning_rate": 9.457852479916927e-06, - "loss": 0.9358, + "learning_rate": 3.4819183819650303e-06, + "loss": 0.9591, "step": 18765 }, { - "epoch": 0.5317804414973505, + "epoch": 0.7342515063776508, "grad_norm": 0.0, - "learning_rate": 9.456936042730445e-06, - "loss": 0.8842, + "learning_rate": 3.4809573866459744e-06, + "loss": 0.9593, "step": 18766 }, { - "epoch": 0.5318087789396129, + "epoch": 0.7342906330698803, "grad_norm": 0.0, - "learning_rate": 9.456019610118388e-06, - "loss": 0.9457, + "learning_rate": 3.4799964960145738e-06, + "loss": 0.9998, "step": 18767 }, { - "epoch": 0.5318371163818754, + "epoch": 0.7343297597621097, "grad_norm": 0.0, - "learning_rate": 9.45510318208848e-06, - "loss": 0.8254, + "learning_rate": 3.4790357100862604e-06, + "loss": 0.9445, "step": 18768 }, { - "epoch": 0.5318654538241379, + "epoch": 0.7343688864543392, "grad_norm": 0.0, - "learning_rate": 9.454186758648444e-06, - "loss": 0.9063, + "learning_rate": 3.478075028876464e-06, + "loss": 1.0154, "step": 18769 }, { - "epoch": 0.5318937912664002, + "epoch": 0.7344080131465686, "grad_norm": 0.0, - "learning_rate": 9.453270339805992e-06, - "loss": 0.951, + "learning_rate": 3.4771144524006072e-06, + "loss": 0.8857, "step": 18770 }, { - "epoch": 0.5319221287086627, + "epoch": 0.7344471398387981, "grad_norm": 0.0, - "learning_rate": 9.452353925568849e-06, - "loss": 0.9572, + "learning_rate": 3.4761539806741194e-06, + "loss": 1.0593, "step": 18771 }, { - "epoch": 0.5319504661509252, + "epoch": 0.7344862665310274, "grad_norm": 0.0, - "learning_rate": 9.451437515944731e-06, - "loss": 0.8939, + "learning_rate": 3.4751936137124265e-06, + "loss": 0.8766, "step": 18772 }, { - "epoch": 0.5319788035931877, + "epoch": 0.7345253932232569, "grad_norm": 0.0, - "learning_rate": 9.450521110941356e-06, - "loss": 0.7906, + "learning_rate": 3.4742333515309457e-06, + "loss": 1.0251, "step": 18773 }, { - "epoch": 0.5320071410354501, + "epoch": 0.7345645199154863, "grad_norm": 0.0, - "learning_rate": 9.449604710566452e-06, - "loss": 0.8386, + "learning_rate": 3.473273194145099e-06, + "loss": 0.9702, "step": 18774 }, { - "epoch": 0.5320354784777126, + "epoch": 0.7346036466077158, "grad_norm": 0.0, - "learning_rate": 9.448688314827727e-06, - "loss": 0.8027, + "learning_rate": 3.472313141570307e-06, + "loss": 0.9907, "step": 18775 }, { - "epoch": 0.5320638159199751, + "epoch": 0.7346427732999452, "grad_norm": 0.0, - "learning_rate": 9.447771923732906e-06, - "loss": 0.8354, + "learning_rate": 3.471353193821989e-06, + "loss": 1.0269, "step": 18776 }, { - "epoch": 0.5320921533622375, + "epoch": 0.7346818999921747, "grad_norm": 0.0, - "learning_rate": 9.446855537289708e-06, - "loss": 0.869, + "learning_rate": 3.470393350915555e-06, + "loss": 0.9456, "step": 18777 }, { - "epoch": 0.5321204908045, + "epoch": 0.7347210266844041, "grad_norm": 0.0, - "learning_rate": 9.44593915550585e-06, - "loss": 0.8876, + "learning_rate": 3.469433612866425e-06, + "loss": 1.0518, "step": 18778 }, { - "epoch": 0.5321488282467625, + "epoch": 0.7347601533766336, "grad_norm": 0.0, - "learning_rate": 9.445022778389057e-06, - "loss": 0.7986, + "learning_rate": 3.4684739796900045e-06, + "loss": 1.0717, "step": 18779 }, { - "epoch": 0.532177165689025, + "epoch": 0.734799280068863, "grad_norm": 0.0, - "learning_rate": 9.444106405947038e-06, - "loss": 0.9072, + "learning_rate": 3.4675144514017078e-06, + "loss": 0.8055, "step": 18780 }, { - "epoch": 0.5322055031312873, + "epoch": 0.7348384067610925, "grad_norm": 0.0, - "learning_rate": 9.443190038187517e-06, - "loss": 1.0038, + "learning_rate": 3.4665550280169435e-06, + "loss": 1.0445, "step": 18781 }, { - "epoch": 0.5322338405735498, + "epoch": 0.7348775334533219, "grad_norm": 0.0, - "learning_rate": 9.442273675118218e-06, - "loss": 0.8272, + "learning_rate": 3.4655957095511206e-06, + "loss": 0.8891, "step": 18782 }, { - "epoch": 0.5322621780158123, + "epoch": 0.7349166601455513, "grad_norm": 0.0, - "learning_rate": 9.441357316746849e-06, - "loss": 0.9501, + "learning_rate": 3.464636496019641e-06, + "loss": 0.9402, "step": 18783 }, { - "epoch": 0.5322905154580747, + "epoch": 0.7349557868377807, "grad_norm": 0.0, - "learning_rate": 9.440440963081136e-06, - "loss": 0.9523, + "learning_rate": 3.463677387437908e-06, + "loss": 1.0401, "step": 18784 }, { - "epoch": 0.5323188529003372, + "epoch": 0.7349949135300101, "grad_norm": 0.0, - "learning_rate": 9.4395246141288e-06, - "loss": 0.8608, + "learning_rate": 3.4627183838213274e-06, + "loss": 0.9835, "step": 18785 }, { - "epoch": 0.5323471903425997, + "epoch": 0.7350340402222396, "grad_norm": 0.0, - "learning_rate": 9.438608269897552e-06, - "loss": 0.9751, + "learning_rate": 3.461759485185301e-06, + "loss": 1.0381, "step": 18786 }, { - "epoch": 0.5323755277848622, + "epoch": 0.735073166914469, "grad_norm": 0.0, - "learning_rate": 9.437691930395121e-06, - "loss": 1.0103, + "learning_rate": 3.46080069154522e-06, + "loss": 1.0397, "step": 18787 }, { - "epoch": 0.5324038652271246, + "epoch": 0.7351122936066985, "grad_norm": 0.0, - "learning_rate": 9.436775595629214e-06, - "loss": 0.8579, + "learning_rate": 3.4598420029164905e-06, + "loss": 0.9728, "step": 18788 }, { - "epoch": 0.5324322026693871, + "epoch": 0.7351514202989279, "grad_norm": 0.0, - "learning_rate": 9.435859265607555e-06, - "loss": 0.8704, + "learning_rate": 3.458883419314495e-06, + "loss": 0.9579, "step": 18789 }, { - "epoch": 0.5324605401116496, + "epoch": 0.7351905469911574, "grad_norm": 0.0, - "learning_rate": 9.434942940337867e-06, - "loss": 0.9011, + "learning_rate": 3.4579249407546435e-06, + "loss": 1.0269, "step": 18790 }, { - "epoch": 0.5324888775539119, + "epoch": 0.7352296736833868, "grad_norm": 0.0, - "learning_rate": 9.434026619827861e-06, - "loss": 0.9499, + "learning_rate": 3.4569665672523155e-06, + "loss": 1.0291, "step": 18791 }, { - "epoch": 0.5325172149961744, + "epoch": 0.7352688003756163, "grad_norm": 0.0, - "learning_rate": 9.433110304085259e-06, - "loss": 0.8759, + "learning_rate": 3.4560082988229093e-06, + "loss": 0.9481, "step": 18792 }, { - "epoch": 0.5325455524384369, + "epoch": 0.7353079270678456, "grad_norm": 0.0, - "learning_rate": 9.432193993117784e-06, - "loss": 1.0055, + "learning_rate": 3.4550501354818023e-06, + "loss": 0.9423, "step": 18793 }, { - "epoch": 0.5325738898806993, + "epoch": 0.7353470537600751, "grad_norm": 0.0, - "learning_rate": 9.431277686933145e-06, - "loss": 0.8213, + "learning_rate": 3.4540920772443966e-06, + "loss": 0.9521, "step": 18794 }, { - "epoch": 0.5326022273229618, + "epoch": 0.7353861804523045, "grad_norm": 0.0, - "learning_rate": 9.430361385539069e-06, - "loss": 0.9024, + "learning_rate": 3.4531341241260653e-06, + "loss": 1.087, "step": 18795 }, { - "epoch": 0.5326305647652243, + "epoch": 0.735425307144534, "grad_norm": 0.0, - "learning_rate": 9.429445088943267e-06, - "loss": 0.9345, + "learning_rate": 3.4521762761421996e-06, + "loss": 0.9325, "step": 18796 }, { - "epoch": 0.5326589022074868, + "epoch": 0.7354644338367634, "grad_norm": 0.0, - "learning_rate": 9.428528797153462e-06, - "loss": 0.8009, + "learning_rate": 3.451218533308176e-06, + "loss": 0.9924, "step": 18797 }, { - "epoch": 0.5326872396497492, + "epoch": 0.7355035605289929, "grad_norm": 0.0, - "learning_rate": 9.427612510177374e-06, - "loss": 0.9678, + "learning_rate": 3.4502608956393756e-06, + "loss": 1.012, "step": 18798 }, { - "epoch": 0.5327155770920117, + "epoch": 0.7355426872212223, "grad_norm": 0.0, - "learning_rate": 9.426696228022714e-06, - "loss": 0.9184, + "learning_rate": 3.4493033631511786e-06, + "loss": 0.9591, "step": 18799 }, { - "epoch": 0.5327439145342742, + "epoch": 0.7355818139134518, "grad_norm": 0.0, - "learning_rate": 9.42577995069721e-06, - "loss": 0.8546, + "learning_rate": 3.4483459358589634e-06, + "loss": 0.8812, "step": 18800 }, { - "epoch": 0.5327722519765365, + "epoch": 0.7356209406056812, "grad_norm": 0.0, - "learning_rate": 9.424863678208568e-06, - "loss": 0.839, + "learning_rate": 3.4473886137781e-06, + "loss": 0.8331, "step": 18801 }, { - "epoch": 0.532800589418799, + "epoch": 0.7356600672979107, "grad_norm": 0.0, - "learning_rate": 9.423947410564516e-06, - "loss": 0.9247, + "learning_rate": 3.446431396923965e-06, + "loss": 0.9817, "step": 18802 }, { - "epoch": 0.5328289268610615, + "epoch": 0.73569919399014, "grad_norm": 0.0, - "learning_rate": 9.42303114777277e-06, - "loss": 0.9522, + "learning_rate": 3.4454742853119293e-06, + "loss": 1.1218, "step": 18803 }, { - "epoch": 0.532857264303324, + "epoch": 0.7357383206823696, "grad_norm": 0.0, - "learning_rate": 9.422114889841045e-06, - "loss": 0.8212, + "learning_rate": 3.4445172789573666e-06, + "loss": 1.1722, "step": 18804 }, { - "epoch": 0.5328856017455864, + "epoch": 0.7357774473745989, "grad_norm": 0.0, - "learning_rate": 9.421198636777059e-06, - "loss": 0.8353, + "learning_rate": 3.4435603778756386e-06, + "loss": 1.0225, "step": 18805 }, { - "epoch": 0.5329139391878489, + "epoch": 0.7358165740668284, "grad_norm": 0.0, - "learning_rate": 9.420282388588539e-06, - "loss": 0.922, + "learning_rate": 3.4426035820821156e-06, + "loss": 0.9876, "step": 18806 }, { - "epoch": 0.5329422766301114, + "epoch": 0.7358557007590578, "grad_norm": 0.0, - "learning_rate": 9.419366145283188e-06, - "loss": 0.891, + "learning_rate": 3.4416468915921617e-06, + "loss": 0.9468, "step": 18807 }, { - "epoch": 0.5329706140723738, + "epoch": 0.7358948274512873, "grad_norm": 0.0, - "learning_rate": 9.418449906868736e-06, - "loss": 0.905, + "learning_rate": 3.440690306421144e-06, + "loss": 1.0018, "step": 18808 }, { - "epoch": 0.5329989515146363, + "epoch": 0.7359339541435167, "grad_norm": 0.0, - "learning_rate": 9.417533673352895e-06, - "loss": 0.9189, + "learning_rate": 3.4397338265844184e-06, + "loss": 0.9079, "step": 18809 }, { - "epoch": 0.5330272889568988, + "epoch": 0.7359730808357462, "grad_norm": 0.0, - "learning_rate": 9.416617444743382e-06, - "loss": 0.9219, + "learning_rate": 3.4387774520973495e-06, + "loss": 0.8796, "step": 18810 }, { - "epoch": 0.5330556263991613, + "epoch": 0.7360122075279756, "grad_norm": 0.0, - "learning_rate": 9.415701221047923e-06, - "loss": 0.8972, + "learning_rate": 3.4378211829752893e-06, + "loss": 1.0471, "step": 18811 }, { - "epoch": 0.5330839638414236, + "epoch": 0.7360513342202051, "grad_norm": 0.0, - "learning_rate": 9.414785002274225e-06, - "loss": 0.9362, + "learning_rate": 3.4368650192335985e-06, + "loss": 0.9664, "step": 18812 }, { - "epoch": 0.5331123012836861, + "epoch": 0.7360904609124345, "grad_norm": 0.0, - "learning_rate": 9.41386878843001e-06, - "loss": 0.9163, + "learning_rate": 3.4359089608876316e-06, + "loss": 1.0112, "step": 18813 }, { - "epoch": 0.5331406387259486, + "epoch": 0.7361295876046638, "grad_norm": 0.0, - "learning_rate": 9.412952579523e-06, - "loss": 0.8846, + "learning_rate": 3.434953007952745e-06, + "loss": 0.9705, "step": 18814 }, { - "epoch": 0.533168976168211, + "epoch": 0.7361687142968933, "grad_norm": 0.0, - "learning_rate": 9.412036375560903e-06, - "loss": 0.9919, + "learning_rate": 3.4339971604442823e-06, + "loss": 1.0536, "step": 18815 }, { - "epoch": 0.5331973136104735, + "epoch": 0.7362078409891227, "grad_norm": 0.0, - "learning_rate": 9.411120176551445e-06, - "loss": 0.8109, + "learning_rate": 3.4330414183775985e-06, + "loss": 0.9344, "step": 18816 }, { - "epoch": 0.533225651052736, + "epoch": 0.7362469676813522, "grad_norm": 0.0, - "learning_rate": 9.410203982502338e-06, - "loss": 0.8682, + "learning_rate": 3.4320857817680443e-06, + "loss": 0.9611, "step": 18817 }, { - "epoch": 0.5332539884949984, + "epoch": 0.7362860943735816, "grad_norm": 0.0, - "learning_rate": 9.409287793421302e-06, - "loss": 0.9149, + "learning_rate": 3.4311302506309573e-06, + "loss": 1.0045, "step": 18818 }, { - "epoch": 0.5332823259372609, + "epoch": 0.7363252210658111, "grad_norm": 0.0, - "learning_rate": 9.40837160931606e-06, - "loss": 0.8965, + "learning_rate": 3.430174824981689e-06, + "loss": 0.9837, "step": 18819 }, { - "epoch": 0.5333106633795234, + "epoch": 0.7363643477580405, "grad_norm": 0.0, - "learning_rate": 9.407455430194318e-06, - "loss": 0.8468, + "learning_rate": 3.4292195048355804e-06, + "loss": 0.985, "step": 18820 }, { - "epoch": 0.5333390008217859, + "epoch": 0.73640347445027, "grad_norm": 0.0, - "learning_rate": 9.4065392560638e-06, - "loss": 1.0217, + "learning_rate": 3.4282642902079755e-06, + "loss": 0.9666, "step": 18821 }, { - "epoch": 0.5333673382640483, + "epoch": 0.7364426011424994, "grad_norm": 0.0, - "learning_rate": 9.405623086932223e-06, - "loss": 0.8091, + "learning_rate": 3.427309181114208e-06, + "loss": 0.9754, "step": 18822 }, { - "epoch": 0.5333956757063107, + "epoch": 0.7364817278347289, "grad_norm": 0.0, - "learning_rate": 9.404706922807301e-06, - "loss": 0.8665, + "learning_rate": 3.4263541775696195e-06, + "loss": 1.0406, "step": 18823 }, { - "epoch": 0.5334240131485732, + "epoch": 0.7365208545269583, "grad_norm": 0.0, - "learning_rate": 9.40379076369676e-06, - "loss": 0.8207, + "learning_rate": 3.4253992795895454e-06, + "loss": 1.0499, "step": 18824 }, { - "epoch": 0.5334523505908356, + "epoch": 0.7365599812191878, "grad_norm": 0.0, - "learning_rate": 9.402874609608304e-06, - "loss": 0.9673, + "learning_rate": 3.4244444871893236e-06, + "loss": 0.8846, "step": 18825 }, { - "epoch": 0.5334806880330981, + "epoch": 0.7365991079114171, "grad_norm": 0.0, - "learning_rate": 9.401958460549658e-06, - "loss": 0.9257, + "learning_rate": 3.423489800384281e-06, + "loss": 0.9861, "step": 18826 }, { - "epoch": 0.5335090254753606, + "epoch": 0.7366382346036466, "grad_norm": 0.0, - "learning_rate": 9.401042316528542e-06, - "loss": 0.9156, + "learning_rate": 3.422535219189753e-06, + "loss": 0.8671, "step": 18827 }, { - "epoch": 0.5335373629176231, + "epoch": 0.736677361295876, "grad_norm": 0.0, - "learning_rate": 9.400126177552666e-06, - "loss": 0.8289, + "learning_rate": 3.421580743621066e-06, + "loss": 0.9047, "step": 18828 }, { - "epoch": 0.5335657003598855, + "epoch": 0.7367164879881055, "grad_norm": 0.0, - "learning_rate": 9.399210043629751e-06, - "loss": 0.8977, + "learning_rate": 3.4206263736935486e-06, + "loss": 0.8688, "step": 18829 }, { - "epoch": 0.533594037802148, + "epoch": 0.7367556146803349, "grad_norm": 0.0, - "learning_rate": 9.398293914767511e-06, - "loss": 0.9302, + "learning_rate": 3.419672109422527e-06, + "loss": 0.9736, "step": 18830 }, { - "epoch": 0.5336223752444105, + "epoch": 0.7367947413725644, "grad_norm": 0.0, - "learning_rate": 9.397377790973664e-06, - "loss": 0.8858, + "learning_rate": 3.418717950823328e-06, + "loss": 0.9256, "step": 18831 }, { - "epoch": 0.5336507126866729, + "epoch": 0.7368338680647938, "grad_norm": 0.0, - "learning_rate": 9.396461672255934e-06, - "loss": 0.9523, + "learning_rate": 3.4177638979112706e-06, + "loss": 1.13, "step": 18832 }, { - "epoch": 0.5336790501289354, + "epoch": 0.7368729947570233, "grad_norm": 0.0, - "learning_rate": 9.395545558622025e-06, - "loss": 0.9158, + "learning_rate": 3.416809950701675e-06, + "loss": 0.9699, "step": 18833 }, { - "epoch": 0.5337073875711978, + "epoch": 0.7369121214492527, "grad_norm": 0.0, - "learning_rate": 9.394629450079661e-06, - "loss": 0.886, + "learning_rate": 3.415856109209864e-06, + "loss": 0.9832, "step": 18834 }, { - "epoch": 0.5337357250134603, + "epoch": 0.7369512481414822, "grad_norm": 0.0, - "learning_rate": 9.39371334663656e-06, - "loss": 0.8664, + "learning_rate": 3.4149023734511553e-06, + "loss": 0.9079, "step": 18835 }, { - "epoch": 0.5337640624557227, + "epoch": 0.7369903748337115, "grad_norm": 0.0, - "learning_rate": 9.392797248300435e-06, - "loss": 0.8401, + "learning_rate": 3.4139487434408615e-06, + "loss": 1.0406, "step": 18836 }, { - "epoch": 0.5337923998979852, + "epoch": 0.737029501525941, "grad_norm": 0.0, - "learning_rate": 9.391881155079007e-06, - "loss": 0.8554, + "learning_rate": 3.4129952191942995e-06, + "loss": 1.1104, "step": 18837 }, { - "epoch": 0.5338207373402477, + "epoch": 0.7370686282181704, "grad_norm": 0.0, - "learning_rate": 9.390965066979987e-06, - "loss": 0.8497, + "learning_rate": 3.412041800726775e-06, + "loss": 1.0585, "step": 18838 }, { - "epoch": 0.5338490747825101, + "epoch": 0.7371077549103999, "grad_norm": 0.0, - "learning_rate": 9.390048984011095e-06, - "loss": 0.8703, + "learning_rate": 3.41108848805361e-06, + "loss": 0.9292, "step": 18839 }, { - "epoch": 0.5338774122247726, + "epoch": 0.7371468816026293, "grad_norm": 0.0, - "learning_rate": 9.38913290618005e-06, - "loss": 0.7805, + "learning_rate": 3.4101352811901044e-06, + "loss": 0.9647, "step": 18840 }, { - "epoch": 0.5339057496670351, + "epoch": 0.7371860082948588, "grad_norm": 0.0, - "learning_rate": 9.388216833494562e-06, - "loss": 0.8673, + "learning_rate": 3.4091821801515724e-06, + "loss": 0.8662, "step": 18841 }, { - "epoch": 0.5339340871092975, + "epoch": 0.7372251349870882, "grad_norm": 0.0, - "learning_rate": 9.38730076596235e-06, - "loss": 1.0151, + "learning_rate": 3.408229184953308e-06, + "loss": 0.8578, "step": 18842 }, { - "epoch": 0.53396242455156, + "epoch": 0.7372642616793176, "grad_norm": 0.0, - "learning_rate": 9.386384703591133e-06, - "loss": 0.8026, + "learning_rate": 3.40727629561063e-06, + "loss": 0.9692, "step": 18843 }, { - "epoch": 0.5339907619938224, + "epoch": 0.7373033883715471, "grad_norm": 0.0, - "learning_rate": 9.385468646388624e-06, - "loss": 1.0521, + "learning_rate": 3.4063235121388305e-06, + "loss": 0.9517, "step": 18844 }, { - "epoch": 0.5340190994360849, + "epoch": 0.7373425150637765, "grad_norm": 0.0, - "learning_rate": 9.384552594362545e-06, - "loss": 0.9384, + "learning_rate": 3.4053708345532166e-06, + "loss": 0.9917, "step": 18845 }, { - "epoch": 0.5340474368783473, + "epoch": 0.737381641756006, "grad_norm": 0.0, - "learning_rate": 9.383636547520604e-06, - "loss": 0.8319, + "learning_rate": 3.4044182628690803e-06, + "loss": 0.8956, "step": 18846 }, { - "epoch": 0.5340757743206098, + "epoch": 0.7374207684482353, "grad_norm": 0.0, - "learning_rate": 9.382720505870521e-06, - "loss": 0.7997, + "learning_rate": 3.4034657971017215e-06, + "loss": 0.9979, "step": 18847 }, { - "epoch": 0.5341041117628723, + "epoch": 0.7374598951404648, "grad_norm": 0.0, - "learning_rate": 9.381804469420015e-06, - "loss": 0.7847, + "learning_rate": 3.402513437266436e-06, + "loss": 1.0038, "step": 18848 }, { - "epoch": 0.5341324492051347, + "epoch": 0.7374990218326942, "grad_norm": 0.0, - "learning_rate": 9.380888438176797e-06, - "loss": 0.8305, + "learning_rate": 3.4015611833785213e-06, + "loss": 0.9485, "step": 18849 }, { - "epoch": 0.5341607866473972, + "epoch": 0.7375381485249237, "grad_norm": 0.0, - "learning_rate": 9.379972412148585e-06, - "loss": 0.823, + "learning_rate": 3.4006090354532617e-06, + "loss": 1.0115, "step": 18850 }, { - "epoch": 0.5341891240896597, + "epoch": 0.7375772752171531, "grad_norm": 0.0, - "learning_rate": 9.3790563913431e-06, - "loss": 0.8796, + "learning_rate": 3.399656993505952e-06, + "loss": 1.1462, "step": 18851 }, { - "epoch": 0.5342174615319222, + "epoch": 0.7376164019093826, "grad_norm": 0.0, - "learning_rate": 9.37814037576805e-06, - "loss": 0.8347, + "learning_rate": 3.398705057551881e-06, + "loss": 1.0567, "step": 18852 }, { - "epoch": 0.5342457989741846, + "epoch": 0.737655528601612, "grad_norm": 0.0, - "learning_rate": 9.377224365431158e-06, - "loss": 0.8756, + "learning_rate": 3.3977532276063373e-06, + "loss": 0.9447, "step": 18853 }, { - "epoch": 0.534274136416447, + "epoch": 0.7376946552938415, "grad_norm": 0.0, - "learning_rate": 9.376308360340132e-06, - "loss": 0.8455, + "learning_rate": 3.3968015036846003e-06, + "loss": 0.9048, "step": 18854 }, { - "epoch": 0.5343024738587095, + "epoch": 0.7377337819860709, "grad_norm": 0.0, - "learning_rate": 9.375392360502694e-06, - "loss": 0.9026, + "learning_rate": 3.395849885801961e-06, + "loss": 1.0624, "step": 18855 }, { - "epoch": 0.5343308113009719, + "epoch": 0.7377729086783004, "grad_norm": 0.0, - "learning_rate": 9.374476365926559e-06, - "loss": 0.8993, + "learning_rate": 3.3948983739736896e-06, + "loss": 1.0082, "step": 18856 }, { - "epoch": 0.5343591487432344, + "epoch": 0.7378120353705298, "grad_norm": 0.0, - "learning_rate": 9.37356037661944e-06, - "loss": 0.7322, + "learning_rate": 3.3939469682150807e-06, + "loss": 0.9921, "step": 18857 }, { - "epoch": 0.5343874861854969, + "epoch": 0.7378511620627592, "grad_norm": 0.0, - "learning_rate": 9.372644392589059e-06, - "loss": 0.9596, + "learning_rate": 3.392995668541402e-06, + "loss": 0.8195, "step": 18858 }, { - "epoch": 0.5344158236277594, + "epoch": 0.7378902887549886, "grad_norm": 0.0, - "learning_rate": 9.371728413843122e-06, - "loss": 0.8976, + "learning_rate": 3.3920444749679372e-06, + "loss": 0.8629, "step": 18859 }, { - "epoch": 0.5344441610700218, + "epoch": 0.7379294154472181, "grad_norm": 0.0, - "learning_rate": 9.370812440389351e-06, - "loss": 0.9409, + "learning_rate": 3.3910933875099548e-06, + "loss": 0.9846, "step": 18860 }, { - "epoch": 0.5344724985122843, + "epoch": 0.7379685421394475, "grad_norm": 0.0, - "learning_rate": 9.369896472235463e-06, - "loss": 0.9226, + "learning_rate": 3.3901424061827315e-06, + "loss": 1.0361, "step": 18861 }, { - "epoch": 0.5345008359545468, + "epoch": 0.738007668831677, "grad_norm": 0.0, - "learning_rate": 9.368980509389167e-06, - "loss": 0.9453, + "learning_rate": 3.3891915310015378e-06, + "loss": 1.0124, "step": 18862 }, { - "epoch": 0.5345291733968092, + "epoch": 0.7380467955239064, "grad_norm": 0.0, - "learning_rate": 9.368064551858183e-06, - "loss": 0.8316, + "learning_rate": 3.388240761981648e-06, + "loss": 1.0079, "step": 18863 }, { - "epoch": 0.5345575108390717, + "epoch": 0.7380859222161359, "grad_norm": 0.0, - "learning_rate": 9.367148599650231e-06, - "loss": 0.941, + "learning_rate": 3.387290099138324e-06, + "loss": 0.9478, "step": 18864 }, { - "epoch": 0.5345858482813342, + "epoch": 0.7381250489083653, "grad_norm": 0.0, - "learning_rate": 9.366232652773016e-06, - "loss": 0.8458, + "learning_rate": 3.386339542486834e-06, + "loss": 1.0186, "step": 18865 }, { - "epoch": 0.5346141857235965, + "epoch": 0.7381641756005948, "grad_norm": 0.0, - "learning_rate": 9.365316711234264e-06, - "loss": 0.8564, + "learning_rate": 3.385389092042447e-06, + "loss": 0.9893, "step": 18866 }, { - "epoch": 0.534642523165859, + "epoch": 0.7382033022928242, "grad_norm": 0.0, - "learning_rate": 9.36440077504168e-06, - "loss": 0.9056, + "learning_rate": 3.384438747820419e-06, + "loss": 0.9154, "step": 18867 }, { - "epoch": 0.5346708606081215, + "epoch": 0.7382424289850537, "grad_norm": 0.0, - "learning_rate": 9.363484844202985e-06, - "loss": 0.8738, + "learning_rate": 3.3834885098360148e-06, + "loss": 0.8889, "step": 18868 }, { - "epoch": 0.534699198050384, + "epoch": 0.738281555677283, "grad_norm": 0.0, - "learning_rate": 9.362568918725895e-06, - "loss": 0.9518, + "learning_rate": 3.382538378104495e-06, + "loss": 0.9699, "step": 18869 }, { - "epoch": 0.5347275354926464, + "epoch": 0.7383206823695124, "grad_norm": 0.0, - "learning_rate": 9.361652998618124e-06, - "loss": 0.9193, + "learning_rate": 3.3815883526411197e-06, + "loss": 0.8961, "step": 18870 }, { - "epoch": 0.5347558729349089, + "epoch": 0.7383598090617419, "grad_norm": 0.0, - "learning_rate": 9.360737083887385e-06, - "loss": 0.9607, + "learning_rate": 3.3806384334611386e-06, + "loss": 0.8919, "step": 18871 }, { - "epoch": 0.5347842103771714, + "epoch": 0.7383989357539713, "grad_norm": 0.0, - "learning_rate": 9.359821174541399e-06, - "loss": 0.8807, + "learning_rate": 3.379688620579813e-06, + "loss": 0.9593, "step": 18872 }, { - "epoch": 0.5348125478194338, + "epoch": 0.7384380624462008, "grad_norm": 0.0, - "learning_rate": 9.358905270587873e-06, - "loss": 0.7605, + "learning_rate": 3.378738914012386e-06, + "loss": 0.9482, "step": 18873 }, { - "epoch": 0.5348408852616963, + "epoch": 0.7384771891384302, "grad_norm": 0.0, - "learning_rate": 9.357989372034527e-06, - "loss": 0.7926, + "learning_rate": 3.3777893137741214e-06, + "loss": 1.0522, "step": 18874 }, { - "epoch": 0.5348692227039588, + "epoch": 0.7385163158306597, "grad_norm": 0.0, - "learning_rate": 9.357073478889074e-06, - "loss": 0.8672, + "learning_rate": 3.37683981988026e-06, + "loss": 0.8546, "step": 18875 }, { - "epoch": 0.5348975601462213, + "epoch": 0.7385554425228891, "grad_norm": 0.0, - "learning_rate": 9.35615759115923e-06, - "loss": 0.9236, + "learning_rate": 3.375890432346054e-06, + "loss": 0.9054, "step": 18876 }, { - "epoch": 0.5349258975884836, + "epoch": 0.7385945692151186, "grad_norm": 0.0, - "learning_rate": 9.355241708852712e-06, - "loss": 0.8078, + "learning_rate": 3.3749411511867436e-06, + "loss": 1.1046, "step": 18877 }, { - "epoch": 0.5349542350307461, + "epoch": 0.738633695907348, "grad_norm": 0.0, - "learning_rate": 9.35432583197723e-06, - "loss": 0.8643, + "learning_rate": 3.373991976417578e-06, + "loss": 1.0106, "step": 18878 }, { - "epoch": 0.5349825724730086, + "epoch": 0.7386728225995774, "grad_norm": 0.0, - "learning_rate": 9.353409960540506e-06, - "loss": 0.8883, + "learning_rate": 3.3730429080537975e-06, + "loss": 0.9277, "step": 18879 }, { - "epoch": 0.535010909915271, + "epoch": 0.7387119492918068, "grad_norm": 0.0, - "learning_rate": 9.352494094550243e-06, - "loss": 0.8774, + "learning_rate": 3.372093946110647e-06, + "loss": 0.8902, "step": 18880 }, { - "epoch": 0.5350392473575335, + "epoch": 0.7387510759840363, "grad_norm": 0.0, - "learning_rate": 9.351578234014167e-06, - "loss": 0.8742, + "learning_rate": 3.3711450906033603e-06, + "loss": 0.9475, "step": 18881 }, { - "epoch": 0.535067584799796, + "epoch": 0.7387902026762657, "grad_norm": 0.0, - "learning_rate": 9.350662378939987e-06, - "loss": 1.0509, + "learning_rate": 3.370196341547176e-06, + "loss": 1.0147, "step": 18882 }, { - "epoch": 0.5350959222420585, + "epoch": 0.7388293293684952, "grad_norm": 0.0, - "learning_rate": 9.349746529335418e-06, - "loss": 0.8609, + "learning_rate": 3.3692476989573318e-06, + "loss": 1.0436, "step": 18883 }, { - "epoch": 0.5351242596843209, + "epoch": 0.7388684560607246, "grad_norm": 0.0, - "learning_rate": 9.348830685208175e-06, - "loss": 0.9595, + "learning_rate": 3.3682991628490634e-06, + "loss": 0.8632, "step": 18884 }, { - "epoch": 0.5351525971265834, + "epoch": 0.7389075827529541, "grad_norm": 0.0, - "learning_rate": 9.347914846565979e-06, - "loss": 0.8946, + "learning_rate": 3.3673507332375966e-06, + "loss": 1.0179, "step": 18885 }, { - "epoch": 0.5351809345688459, + "epoch": 0.7389467094451835, "grad_norm": 0.0, - "learning_rate": 9.346999013416532e-06, - "loss": 0.9355, + "learning_rate": 3.36640241013817e-06, + "loss": 0.9202, "step": 18886 }, { - "epoch": 0.5352092720111082, + "epoch": 0.738985836137413, "grad_norm": 0.0, - "learning_rate": 9.346083185767556e-06, - "loss": 0.9176, + "learning_rate": 3.3654541935660014e-06, + "loss": 1.0038, "step": 18887 }, { - "epoch": 0.5352376094533707, + "epoch": 0.7390249628296424, "grad_norm": 0.0, - "learning_rate": 9.345167363626765e-06, - "loss": 0.7569, + "learning_rate": 3.364506083536332e-06, + "loss": 1.0113, "step": 18888 }, { - "epoch": 0.5352659468956332, + "epoch": 0.7390640895218719, "grad_norm": 0.0, - "learning_rate": 9.344251547001872e-06, - "loss": 0.8103, + "learning_rate": 3.3635580800643765e-06, + "loss": 0.969, "step": 18889 }, { - "epoch": 0.5352942843378956, + "epoch": 0.7391032162141012, "grad_norm": 0.0, - "learning_rate": 9.343335735900593e-06, - "loss": 0.8398, + "learning_rate": 3.362610183165366e-06, + "loss": 1.0219, "step": 18890 }, { - "epoch": 0.5353226217801581, + "epoch": 0.7391423429063307, "grad_norm": 0.0, - "learning_rate": 9.342419930330639e-06, - "loss": 0.9517, + "learning_rate": 3.3616623928545113e-06, + "loss": 1.0519, "step": 18891 }, { - "epoch": 0.5353509592224206, + "epoch": 0.7391814695985601, "grad_norm": 0.0, - "learning_rate": 9.341504130299727e-06, - "loss": 0.8085, + "learning_rate": 3.360714709147047e-06, + "loss": 1.0187, "step": 18892 }, { - "epoch": 0.5353792966646831, + "epoch": 0.7392205962907896, "grad_norm": 0.0, - "learning_rate": 9.340588335815574e-06, - "loss": 1.0306, + "learning_rate": 3.3597671320581825e-06, + "loss": 0.9434, "step": 18893 }, { - "epoch": 0.5354076341069455, + "epoch": 0.739259722983019, "grad_norm": 0.0, - "learning_rate": 9.339672546885885e-06, - "loss": 0.8288, + "learning_rate": 3.35881966160314e-06, + "loss": 0.9146, "step": 18894 }, { - "epoch": 0.535435971549208, + "epoch": 0.7392988496752485, "grad_norm": 0.0, - "learning_rate": 9.338756763518381e-06, - "loss": 0.8979, + "learning_rate": 3.3578722977971277e-06, + "loss": 0.9749, "step": 18895 }, { - "epoch": 0.5354643089914705, + "epoch": 0.7393379763674779, "grad_norm": 0.0, - "learning_rate": 9.337840985720774e-06, - "loss": 0.8392, + "learning_rate": 3.3569250406553644e-06, + "loss": 1.0222, "step": 18896 }, { - "epoch": 0.5354926464337328, + "epoch": 0.7393771030597074, "grad_norm": 0.0, - "learning_rate": 9.336925213500777e-06, - "loss": 0.8848, + "learning_rate": 3.3559778901930606e-06, + "loss": 0.9898, "step": 18897 }, { - "epoch": 0.5355209838759953, + "epoch": 0.7394162297519368, "grad_norm": 0.0, - "learning_rate": 9.33600944686611e-06, - "loss": 0.7927, + "learning_rate": 3.35503084642543e-06, + "loss": 0.9659, "step": 18898 }, { - "epoch": 0.5355493213182578, + "epoch": 0.7394553564441662, "grad_norm": 0.0, - "learning_rate": 9.335093685824479e-06, - "loss": 0.6856, + "learning_rate": 3.3540839093676735e-06, + "loss": 1.0325, "step": 18899 }, { - "epoch": 0.5355776587605203, + "epoch": 0.7394944831363957, "grad_norm": 0.0, - "learning_rate": 9.334177930383598e-06, - "loss": 0.826, + "learning_rate": 3.3531370790350016e-06, + "loss": 0.972, "step": 18900 }, { - "epoch": 0.5356059962027827, + "epoch": 0.739533609828625, "grad_norm": 0.0, - "learning_rate": 9.333262180551187e-06, - "loss": 0.8183, + "learning_rate": 3.3521903554426193e-06, + "loss": 0.899, "step": 18901 }, { - "epoch": 0.5356343336450452, + "epoch": 0.7395727365208545, "grad_norm": 0.0, - "learning_rate": 9.332346436334953e-06, - "loss": 0.888, + "learning_rate": 3.351243738605734e-06, + "loss": 0.9464, "step": 18902 }, { - "epoch": 0.5356626710873077, + "epoch": 0.7396118632130839, "grad_norm": 0.0, - "learning_rate": 9.33143069774262e-06, - "loss": 0.9545, + "learning_rate": 3.3502972285395384e-06, + "loss": 0.9033, "step": 18903 }, { - "epoch": 0.5356910085295701, + "epoch": 0.7396509899053134, "grad_norm": 0.0, - "learning_rate": 9.330514964781889e-06, - "loss": 0.9823, + "learning_rate": 3.34935082525924e-06, + "loss": 1.003, "step": 18904 }, { - "epoch": 0.5357193459718326, + "epoch": 0.7396901165975428, "grad_norm": 0.0, - "learning_rate": 9.329599237460478e-06, - "loss": 0.8704, + "learning_rate": 3.3484045287800317e-06, + "loss": 0.8514, "step": 18905 }, { - "epoch": 0.5357476834140951, + "epoch": 0.7397292432897723, "grad_norm": 0.0, - "learning_rate": 9.328683515786103e-06, - "loss": 0.8075, + "learning_rate": 3.3474583391171102e-06, + "loss": 0.8765, "step": 18906 }, { - "epoch": 0.5357760208563576, + "epoch": 0.7397683699820017, "grad_norm": 0.0, - "learning_rate": 9.327767799766478e-06, - "loss": 0.9082, + "learning_rate": 3.346512256285672e-06, + "loss": 0.9861, "step": 18907 }, { - "epoch": 0.5358043582986199, + "epoch": 0.7398074966742312, "grad_norm": 0.0, - "learning_rate": 9.326852089409314e-06, - "loss": 0.9343, + "learning_rate": 3.345566280300914e-06, + "loss": 1.0817, "step": 18908 }, { - "epoch": 0.5358326957408824, + "epoch": 0.7398466233664606, "grad_norm": 0.0, - "learning_rate": 9.325936384722322e-06, - "loss": 0.8754, + "learning_rate": 3.344620411178019e-06, + "loss": 0.8673, "step": 18909 }, { - "epoch": 0.5358610331831449, + "epoch": 0.7398857500586901, "grad_norm": 0.0, - "learning_rate": 9.325020685713218e-06, - "loss": 0.8655, + "learning_rate": 3.3436746489321803e-06, + "loss": 0.9675, "step": 18910 }, { - "epoch": 0.5358893706254073, + "epoch": 0.7399248767509194, "grad_norm": 0.0, - "learning_rate": 9.32410499238972e-06, - "loss": 0.8411, + "learning_rate": 3.342728993578589e-06, + "loss": 0.9616, "step": 18911 }, { - "epoch": 0.5359177080676698, + "epoch": 0.7399640034431489, "grad_norm": 0.0, - "learning_rate": 9.323189304759533e-06, - "loss": 0.7901, + "learning_rate": 3.341783445132425e-06, + "loss": 0.9259, "step": 18912 }, { - "epoch": 0.5359460455099323, + "epoch": 0.7400031301353783, "grad_norm": 0.0, - "learning_rate": 9.322273622830373e-06, - "loss": 0.9223, + "learning_rate": 3.3408380036088762e-06, + "loss": 1.0424, "step": 18913 }, { - "epoch": 0.5359743829521947, + "epoch": 0.7400422568276078, "grad_norm": 0.0, - "learning_rate": 9.321357946609957e-06, - "loss": 0.9384, + "learning_rate": 3.339892669023125e-06, + "loss": 0.9199, "step": 18914 }, { - "epoch": 0.5360027203944572, + "epoch": 0.7400813835198372, "grad_norm": 0.0, - "learning_rate": 9.320442276105993e-06, - "loss": 0.9139, + "learning_rate": 3.3389474413903542e-06, + "loss": 1.0975, "step": 18915 }, { - "epoch": 0.5360310578367197, + "epoch": 0.7401205102120667, "grad_norm": 0.0, - "learning_rate": 9.3195266113262e-06, - "loss": 0.8751, + "learning_rate": 3.3380023207257374e-06, + "loss": 0.9833, "step": 18916 }, { - "epoch": 0.5360593952789822, + "epoch": 0.7401596369042961, "grad_norm": 0.0, - "learning_rate": 9.318610952278282e-06, - "loss": 0.8549, + "learning_rate": 3.337057307044459e-06, + "loss": 1.0715, "step": 18917 }, { - "epoch": 0.5360877327212445, + "epoch": 0.7401987635965256, "grad_norm": 0.0, - "learning_rate": 9.317695298969957e-06, - "loss": 0.8246, + "learning_rate": 3.336112400361685e-06, + "loss": 0.9101, "step": 18918 }, { - "epoch": 0.536116070163507, + "epoch": 0.740237890288755, "grad_norm": 0.0, - "learning_rate": 9.316779651408941e-06, - "loss": 0.8351, + "learning_rate": 3.3351676006926015e-06, + "loss": 0.9985, "step": 18919 }, { - "epoch": 0.5361444076057695, + "epoch": 0.7402770169809845, "grad_norm": 0.0, - "learning_rate": 9.315864009602941e-06, - "loss": 0.9448, + "learning_rate": 3.3342229080523715e-06, + "loss": 0.9647, "step": 18920 }, { - "epoch": 0.5361727450480319, + "epoch": 0.7403161436732139, "grad_norm": 0.0, - "learning_rate": 9.314948373559672e-06, - "loss": 0.9927, + "learning_rate": 3.333278322456173e-06, + "loss": 1.0005, "step": 18921 }, { - "epoch": 0.5362010824902944, + "epoch": 0.7403552703654434, "grad_norm": 0.0, - "learning_rate": 9.314032743286853e-06, - "loss": 0.9252, + "learning_rate": 3.332333843919163e-06, + "loss": 0.8254, "step": 18922 }, { - "epoch": 0.5362294199325569, + "epoch": 0.7403943970576727, "grad_norm": 0.0, - "learning_rate": 9.313117118792185e-06, - "loss": 0.9878, + "learning_rate": 3.3313894724565244e-06, + "loss": 1.0638, "step": 18923 }, { - "epoch": 0.5362577573748194, + "epoch": 0.7404335237499022, "grad_norm": 0.0, - "learning_rate": 9.312201500083392e-06, - "loss": 0.9608, + "learning_rate": 3.3304452080834103e-06, + "loss": 0.8342, "step": 18924 }, { - "epoch": 0.5362860948170818, + "epoch": 0.7404726504421316, "grad_norm": 0.0, - "learning_rate": 9.311285887168176e-06, - "loss": 0.9279, + "learning_rate": 3.3295010508149916e-06, + "loss": 0.9176, "step": 18925 }, { - "epoch": 0.5363144322593443, + "epoch": 0.7405117771343611, "grad_norm": 0.0, - "learning_rate": 9.310370280054255e-06, - "loss": 0.9929, + "learning_rate": 3.3285570006664257e-06, + "loss": 1.0267, "step": 18926 }, { - "epoch": 0.5363427697016068, + "epoch": 0.7405509038265905, "grad_norm": 0.0, - "learning_rate": 9.309454678749343e-06, - "loss": 0.8512, + "learning_rate": 3.327613057652873e-06, + "loss": 0.9637, "step": 18927 }, { - "epoch": 0.5363711071438692, + "epoch": 0.7405900305188199, "grad_norm": 0.0, - "learning_rate": 9.30853908326115e-06, - "loss": 0.9464, + "learning_rate": 3.3266692217894947e-06, + "loss": 0.9835, "step": 18928 }, { - "epoch": 0.5363994445861316, + "epoch": 0.7406291572110494, "grad_norm": 0.0, - "learning_rate": 9.307623493597388e-06, - "loss": 0.9221, + "learning_rate": 3.3257254930914497e-06, + "loss": 1.0547, "step": 18929 }, { - "epoch": 0.5364277820283941, + "epoch": 0.7406682839032788, "grad_norm": 0.0, - "learning_rate": 9.306707909765775e-06, - "loss": 0.786, + "learning_rate": 3.3247818715738867e-06, + "loss": 0.9882, "step": 18930 }, { - "epoch": 0.5364561194706566, + "epoch": 0.7407074105955083, "grad_norm": 0.0, - "learning_rate": 9.305792331774015e-06, - "loss": 0.8129, + "learning_rate": 3.3238383572519618e-06, + "loss": 0.881, "step": 18931 }, { - "epoch": 0.536484456912919, + "epoch": 0.7407465372877376, "grad_norm": 0.0, - "learning_rate": 9.304876759629824e-06, - "loss": 0.7249, + "learning_rate": 3.322894950140827e-06, + "loss": 1.1089, "step": 18932 }, { - "epoch": 0.5365127943551815, + "epoch": 0.7407856639799671, "grad_norm": 0.0, - "learning_rate": 9.303961193340914e-06, - "loss": 0.8327, + "learning_rate": 3.321951650255637e-06, + "loss": 1.0186, "step": 18933 }, { - "epoch": 0.536541131797444, + "epoch": 0.7408247906721965, "grad_norm": 0.0, - "learning_rate": 9.303045632914997e-06, - "loss": 0.7038, + "learning_rate": 3.321008457611531e-06, + "loss": 0.8639, "step": 18934 }, { - "epoch": 0.5365694692397064, + "epoch": 0.740863917364426, "grad_norm": 0.0, - "learning_rate": 9.30213007835979e-06, - "loss": 0.8872, + "learning_rate": 3.3200653722236632e-06, + "loss": 1.0161, "step": 18935 }, { - "epoch": 0.5365978066819689, + "epoch": 0.7409030440566554, "grad_norm": 0.0, - "learning_rate": 9.301214529682995e-06, - "loss": 0.832, + "learning_rate": 3.3191223941071694e-06, + "loss": 1.0874, "step": 18936 }, { - "epoch": 0.5366261441242314, + "epoch": 0.7409421707488849, "grad_norm": 0.0, - "learning_rate": 9.300298986892335e-06, - "loss": 0.7767, + "learning_rate": 3.318179523277204e-06, + "loss": 1.0339, "step": 18937 }, { - "epoch": 0.5366544815664938, + "epoch": 0.7409812974411143, "grad_norm": 0.0, - "learning_rate": 9.299383449995511e-06, - "loss": 0.8048, + "learning_rate": 3.3172367597488998e-06, + "loss": 1.0283, "step": 18938 }, { - "epoch": 0.5366828190087563, + "epoch": 0.7410204241333438, "grad_norm": 0.0, - "learning_rate": 9.29846791900024e-06, - "loss": 0.829, + "learning_rate": 3.316294103537402e-06, + "loss": 1.0537, "step": 18939 }, { - "epoch": 0.5367111564510187, + "epoch": 0.7410595508255732, "grad_norm": 0.0, - "learning_rate": 9.297552393914238e-06, - "loss": 0.9865, + "learning_rate": 3.3153515546578395e-06, + "loss": 1.0783, "step": 18940 }, { - "epoch": 0.5367394938932812, + "epoch": 0.7410986775178027, "grad_norm": 0.0, - "learning_rate": 9.296636874745211e-06, - "loss": 0.8326, + "learning_rate": 3.314409113125362e-06, + "loss": 0.9134, "step": 18941 }, { - "epoch": 0.5367678313355436, + "epoch": 0.7411378042100321, "grad_norm": 0.0, - "learning_rate": 9.29572136150087e-06, - "loss": 0.8929, + "learning_rate": 3.3134667789550924e-06, + "loss": 0.9322, "step": 18942 }, { - "epoch": 0.5367961687778061, + "epoch": 0.7411769309022616, "grad_norm": 0.0, - "learning_rate": 9.294805854188937e-06, - "loss": 0.874, + "learning_rate": 3.312524552162172e-06, + "loss": 0.9538, "step": 18943 }, { - "epoch": 0.5368245062200686, + "epoch": 0.7412160575944909, "grad_norm": 0.0, - "learning_rate": 9.29389035281711e-06, - "loss": 1.0827, + "learning_rate": 3.311582432761723e-06, + "loss": 1.0594, "step": 18944 }, { - "epoch": 0.536852843662331, + "epoch": 0.7412551842867204, "grad_norm": 0.0, - "learning_rate": 9.292974857393112e-06, - "loss": 0.8052, + "learning_rate": 3.310640420768879e-06, + "loss": 1.0134, "step": 18945 }, { - "epoch": 0.5368811811045935, + "epoch": 0.7412943109789498, "grad_norm": 0.0, - "learning_rate": 9.292059367924644e-06, - "loss": 0.9217, + "learning_rate": 3.309698516198768e-06, + "loss": 1.0114, "step": 18946 }, { - "epoch": 0.536909518546856, + "epoch": 0.7413334376711793, "grad_norm": 0.0, - "learning_rate": 9.291143884419424e-06, - "loss": 0.9712, + "learning_rate": 3.3087567190665194e-06, + "loss": 1.005, "step": 18947 }, { - "epoch": 0.5369378559891185, + "epoch": 0.7413725643634087, "grad_norm": 0.0, - "learning_rate": 9.290228406885166e-06, - "loss": 0.8729, + "learning_rate": 3.307815029387249e-06, + "loss": 0.9362, "step": 18948 }, { - "epoch": 0.5369661934313809, + "epoch": 0.7414116910556382, "grad_norm": 0.0, - "learning_rate": 9.289312935329573e-06, - "loss": 0.7329, + "learning_rate": 3.3068734471760846e-06, + "loss": 0.929, "step": 18949 }, { - "epoch": 0.5369945308736433, + "epoch": 0.7414508177478676, "grad_norm": 0.0, - "learning_rate": 9.288397469760362e-06, - "loss": 0.857, + "learning_rate": 3.3059319724481477e-06, + "loss": 0.9905, "step": 18950 }, { - "epoch": 0.5370228683159058, + "epoch": 0.7414899444400971, "grad_norm": 0.0, - "learning_rate": 9.287482010185247e-06, - "loss": 0.9359, + "learning_rate": 3.3049906052185534e-06, + "loss": 0.9779, "step": 18951 }, { - "epoch": 0.5370512057581682, + "epoch": 0.7415290711323265, "grad_norm": 0.0, - "learning_rate": 9.286566556611932e-06, - "loss": 0.8574, + "learning_rate": 3.304049345502419e-06, + "loss": 0.9558, "step": 18952 }, { - "epoch": 0.5370795432004307, + "epoch": 0.741568197824556, "grad_norm": 0.0, - "learning_rate": 9.285651109048134e-06, - "loss": 0.8648, + "learning_rate": 3.3031081933148656e-06, + "loss": 0.8943, "step": 18953 }, { - "epoch": 0.5371078806426932, + "epoch": 0.7416073245167853, "grad_norm": 0.0, - "learning_rate": 9.284735667501558e-06, - "loss": 0.8226, + "learning_rate": 3.3021671486709993e-06, + "loss": 0.9553, "step": 18954 }, { - "epoch": 0.5371362180849556, + "epoch": 0.7416464512090148, "grad_norm": 0.0, - "learning_rate": 9.28382023197992e-06, - "loss": 0.8129, + "learning_rate": 3.301226211585936e-06, + "loss": 0.9238, "step": 18955 }, { - "epoch": 0.5371645555272181, + "epoch": 0.7416855779012442, "grad_norm": 0.0, - "learning_rate": 9.282904802490937e-06, - "loss": 0.939, + "learning_rate": 3.3002853820747893e-06, + "loss": 0.9101, "step": 18956 }, { - "epoch": 0.5371928929694806, + "epoch": 0.7417247045934736, "grad_norm": 0.0, - "learning_rate": 9.281989379042306e-06, - "loss": 0.868, + "learning_rate": 3.2993446601526613e-06, + "loss": 1.1271, "step": 18957 }, { - "epoch": 0.5372212304117431, + "epoch": 0.7417638312857031, "grad_norm": 0.0, - "learning_rate": 9.281073961641746e-06, - "loss": 0.8597, + "learning_rate": 3.298404045834661e-06, + "loss": 1.1483, "step": 18958 }, { - "epoch": 0.5372495678540055, + "epoch": 0.7418029579779325, "grad_norm": 0.0, - "learning_rate": 9.28015855029697e-06, - "loss": 0.8313, + "learning_rate": 3.2974635391358943e-06, + "loss": 0.9487, "step": 18959 }, { - "epoch": 0.537277905296268, + "epoch": 0.741842084670162, "grad_norm": 0.0, - "learning_rate": 9.279243145015681e-06, - "loss": 0.8235, + "learning_rate": 3.296523140071467e-06, + "loss": 1.0549, "step": 18960 }, { - "epoch": 0.5373062427385304, + "epoch": 0.7418812113623914, "grad_norm": 0.0, - "learning_rate": 9.278327745805603e-06, - "loss": 0.8104, + "learning_rate": 3.2955828486564754e-06, + "loss": 1.007, "step": 18961 }, { - "epoch": 0.5373345801807928, + "epoch": 0.7419203380546209, "grad_norm": 0.0, - "learning_rate": 9.277412352674429e-06, - "loss": 0.8651, + "learning_rate": 3.2946426649060258e-06, + "loss": 0.9064, "step": 18962 }, { - "epoch": 0.5373629176230553, + "epoch": 0.7419594647468503, "grad_norm": 0.0, - "learning_rate": 9.276496965629883e-06, - "loss": 0.8617, + "learning_rate": 3.293702588835206e-06, + "loss": 0.8696, "step": 18963 }, { - "epoch": 0.5373912550653178, + "epoch": 0.7419985914390798, "grad_norm": 0.0, - "learning_rate": 9.275581584679675e-06, - "loss": 0.9639, + "learning_rate": 3.2927626204591257e-06, + "loss": 1.0065, "step": 18964 }, { - "epoch": 0.5374195925075803, + "epoch": 0.7420377181313091, "grad_norm": 0.0, - "learning_rate": 9.274666209831508e-06, - "loss": 0.9209, + "learning_rate": 3.2918227597928686e-06, + "loss": 0.9329, "step": 18965 }, { - "epoch": 0.5374479299498427, + "epoch": 0.7420768448235386, "grad_norm": 0.0, - "learning_rate": 9.273750841093099e-06, - "loss": 0.9622, + "learning_rate": 3.2908830068515376e-06, + "loss": 1.0673, "step": 18966 }, { - "epoch": 0.5374762673921052, + "epoch": 0.742115971515768, "grad_norm": 0.0, - "learning_rate": 9.272835478472154e-06, - "loss": 0.9135, + "learning_rate": 3.289943361650211e-06, + "loss": 0.9774, "step": 18967 }, { - "epoch": 0.5375046048343677, + "epoch": 0.7421550982079975, "grad_norm": 0.0, - "learning_rate": 9.271920121976385e-06, - "loss": 0.9636, + "learning_rate": 3.289003824203991e-06, + "loss": 1.0288, "step": 18968 }, { - "epoch": 0.5375329422766301, + "epoch": 0.7421942249002269, "grad_norm": 0.0, - "learning_rate": 9.271004771613509e-06, - "loss": 0.9062, + "learning_rate": 3.2880643945279587e-06, + "loss": 0.9373, "step": 18969 }, { - "epoch": 0.5375612797188926, + "epoch": 0.7422333515924564, "grad_norm": 0.0, - "learning_rate": 9.270089427391225e-06, - "loss": 0.8646, + "learning_rate": 3.2871250726372052e-06, + "loss": 0.9156, "step": 18970 }, { - "epoch": 0.537589617161155, + "epoch": 0.7422724782846858, "grad_norm": 0.0, - "learning_rate": 9.26917408931725e-06, - "loss": 0.9151, + "learning_rate": 3.2861858585468065e-06, + "loss": 1.1036, "step": 18971 }, { - "epoch": 0.5376179546034175, + "epoch": 0.7423116049769153, "grad_norm": 0.0, - "learning_rate": 9.268258757399295e-06, - "loss": 0.9042, + "learning_rate": 3.285246752271851e-06, + "loss": 0.9856, "step": 18972 }, { - "epoch": 0.5376462920456799, + "epoch": 0.7423507316691447, "grad_norm": 0.0, - "learning_rate": 9.267343431645065e-06, - "loss": 0.8196, + "learning_rate": 3.2843077538274183e-06, + "loss": 1.0218, "step": 18973 }, { - "epoch": 0.5376746294879424, + "epoch": 0.7423898583613742, "grad_norm": 0.0, - "learning_rate": 9.266428112062277e-06, - "loss": 0.9769, + "learning_rate": 3.283368863228591e-06, + "loss": 0.97, "step": 18974 }, { - "epoch": 0.5377029669302049, + "epoch": 0.7424289850536036, "grad_norm": 0.0, - "learning_rate": 9.265512798658636e-06, - "loss": 0.7658, + "learning_rate": 3.282430080490441e-06, + "loss": 0.8696, "step": 18975 }, { - "epoch": 0.5377313043724673, + "epoch": 0.742468111745833, "grad_norm": 0.0, - "learning_rate": 9.264597491441851e-06, - "loss": 0.8623, + "learning_rate": 3.2814914056280456e-06, + "loss": 0.9296, "step": 18976 }, { - "epoch": 0.5377596418147298, + "epoch": 0.7425072384380624, "grad_norm": 0.0, - "learning_rate": 9.263682190419639e-06, - "loss": 0.8351, + "learning_rate": 3.2805528386564787e-06, + "loss": 0.9498, "step": 18977 }, { - "epoch": 0.5377879792569923, + "epoch": 0.7425463651302919, "grad_norm": 0.0, - "learning_rate": 9.262766895599701e-06, - "loss": 0.9636, + "learning_rate": 3.2796143795908174e-06, + "loss": 0.8711, "step": 18978 }, { - "epoch": 0.5378163166992547, + "epoch": 0.7425854918225213, "grad_norm": 0.0, - "learning_rate": 9.261851606989754e-06, - "loss": 0.9698, + "learning_rate": 3.278676028446125e-06, + "loss": 0.9356, "step": 18979 }, { - "epoch": 0.5378446541415172, + "epoch": 0.7426246185147508, "grad_norm": 0.0, - "learning_rate": 9.260936324597504e-06, - "loss": 0.9361, + "learning_rate": 3.2777377852374734e-06, + "loss": 1.1028, "step": 18980 }, { - "epoch": 0.5378729915837797, + "epoch": 0.7426637452069802, "grad_norm": 0.0, - "learning_rate": 9.26002104843066e-06, - "loss": 0.8792, + "learning_rate": 3.2767996499799295e-06, + "loss": 0.9283, "step": 18981 }, { - "epoch": 0.5379013290260422, + "epoch": 0.7427028718992097, "grad_norm": 0.0, - "learning_rate": 9.259105778496938e-06, - "loss": 0.8918, + "learning_rate": 3.275861622688561e-06, + "loss": 0.9345, "step": 18982 }, { - "epoch": 0.5379296664683045, + "epoch": 0.7427419985914391, "grad_norm": 0.0, - "learning_rate": 9.25819051480404e-06, - "loss": 0.9802, + "learning_rate": 3.2749237033784267e-06, + "loss": 0.9978, "step": 18983 }, { - "epoch": 0.537958003910567, + "epoch": 0.7427811252836685, "grad_norm": 0.0, - "learning_rate": 9.257275257359679e-06, - "loss": 0.9602, + "learning_rate": 3.273985892064593e-06, + "loss": 0.9863, "step": 18984 }, { - "epoch": 0.5379863413528295, + "epoch": 0.742820251975898, "grad_norm": 0.0, - "learning_rate": 9.256360006171564e-06, - "loss": 0.993, + "learning_rate": 3.2730481887621125e-06, + "loss": 0.9213, "step": 18985 }, { - "epoch": 0.5380146787950919, + "epoch": 0.7428593786681273, "grad_norm": 0.0, - "learning_rate": 9.255444761247403e-06, - "loss": 0.903, + "learning_rate": 3.2721105934860544e-06, + "loss": 0.9531, "step": 18986 }, { - "epoch": 0.5380430162373544, + "epoch": 0.7428985053603568, "grad_norm": 0.0, - "learning_rate": 9.254529522594909e-06, - "loss": 0.9982, + "learning_rate": 3.271173106251466e-06, + "loss": 1.0407, "step": 18987 }, { - "epoch": 0.5380713536796169, + "epoch": 0.7429376320525862, "grad_norm": 0.0, - "learning_rate": 9.253614290221794e-06, - "loss": 0.8685, + "learning_rate": 3.27023572707341e-06, + "loss": 0.9649, "step": 18988 }, { - "epoch": 0.5380996911218794, + "epoch": 0.7429767587448157, "grad_norm": 0.0, - "learning_rate": 9.252699064135759e-06, - "loss": 0.9049, + "learning_rate": 3.2692984559669315e-06, + "loss": 0.8475, "step": 18989 }, { - "epoch": 0.5381280285641418, + "epoch": 0.7430158854370451, "grad_norm": 0.0, - "learning_rate": 9.251783844344519e-06, - "loss": 0.9201, + "learning_rate": 3.2683612929470854e-06, + "loss": 1.0342, "step": 18990 }, { - "epoch": 0.5381563660064043, + "epoch": 0.7430550121292746, "grad_norm": 0.0, - "learning_rate": 9.250868630855779e-06, - "loss": 0.864, + "learning_rate": 3.2674242380289222e-06, + "loss": 0.9798, "step": 18991 }, { - "epoch": 0.5381847034486668, + "epoch": 0.743094138821504, "grad_norm": 0.0, - "learning_rate": 9.249953423677252e-06, - "loss": 0.9604, + "learning_rate": 3.2664872912274924e-06, + "loss": 0.9876, "step": 18992 }, { - "epoch": 0.5382130408909291, + "epoch": 0.7431332655137335, "grad_norm": 0.0, - "learning_rate": 9.249038222816645e-06, - "loss": 0.8834, + "learning_rate": 3.2655504525578364e-06, + "loss": 1.0153, "step": 18993 }, { - "epoch": 0.5382413783331916, + "epoch": 0.7431723922059629, "grad_norm": 0.0, - "learning_rate": 9.248123028281668e-06, - "loss": 0.8906, + "learning_rate": 3.2646137220350016e-06, + "loss": 0.9573, "step": 18994 }, { - "epoch": 0.5382697157754541, + "epoch": 0.7432115188981924, "grad_norm": 0.0, - "learning_rate": 9.247207840080034e-06, - "loss": 0.7995, + "learning_rate": 3.263677099674033e-06, + "loss": 0.9934, "step": 18995 }, { - "epoch": 0.5382980532177166, + "epoch": 0.7432506455904218, "grad_norm": 0.0, - "learning_rate": 9.246292658219442e-06, - "loss": 0.9371, + "learning_rate": 3.2627405854899665e-06, + "loss": 0.9579, "step": 18996 }, { - "epoch": 0.538326390659979, + "epoch": 0.7432897722826513, "grad_norm": 0.0, - "learning_rate": 9.245377482707609e-06, - "loss": 0.9091, + "learning_rate": 3.2618041794978426e-06, + "loss": 1.0198, "step": 18997 }, { - "epoch": 0.5383547281022415, + "epoch": 0.7433288989748806, "grad_norm": 0.0, - "learning_rate": 9.244462313552241e-06, - "loss": 0.9282, + "learning_rate": 3.2608678817127014e-06, + "loss": 1.0514, "step": 18998 }, { - "epoch": 0.538383065544504, + "epoch": 0.7433680256671101, "grad_norm": 0.0, - "learning_rate": 9.243547150761047e-06, - "loss": 0.9237, + "learning_rate": 3.2599316921495806e-06, + "loss": 0.8931, "step": 18999 }, { - "epoch": 0.5384114029867664, + "epoch": 0.7434071523593395, "grad_norm": 0.0, - "learning_rate": 9.242631994341738e-06, - "loss": 0.8516, + "learning_rate": 3.258995610823508e-06, + "loss": 1.0615, "step": 19000 }, { - "epoch": 0.5384397404290289, + "epoch": 0.743446279051569, "grad_norm": 0.0, - "learning_rate": 9.241716844302024e-06, - "loss": 0.8842, + "learning_rate": 3.258059637749521e-06, + "loss": 0.9759, "step": 19001 }, { - "epoch": 0.5384680778712914, + "epoch": 0.7434854057437984, "grad_norm": 0.0, - "learning_rate": 9.240801700649605e-06, - "loss": 0.916, + "learning_rate": 3.2571237729426464e-06, + "loss": 1.0961, "step": 19002 }, { - "epoch": 0.5384964153135537, + "epoch": 0.7435245324360279, "grad_norm": 0.0, - "learning_rate": 9.2398865633922e-06, - "loss": 0.8512, + "learning_rate": 3.2561880164179138e-06, + "loss": 0.783, "step": 19003 }, { - "epoch": 0.5385247527558162, + "epoch": 0.7435636591282573, "grad_norm": 0.0, - "learning_rate": 9.23897143253751e-06, - "loss": 0.8365, + "learning_rate": 3.2552523681903516e-06, + "loss": 1.0278, "step": 19004 }, { - "epoch": 0.5385530901980787, + "epoch": 0.7436027858204868, "grad_norm": 0.0, - "learning_rate": 9.238056308093244e-06, - "loss": 0.824, + "learning_rate": 3.254316828274987e-06, + "loss": 1.0528, "step": 19005 }, { - "epoch": 0.5385814276403412, + "epoch": 0.7436419125127162, "grad_norm": 0.0, - "learning_rate": 9.237141190067116e-06, - "loss": 1.0206, + "learning_rate": 3.253381396686839e-06, + "loss": 0.9618, "step": 19006 }, { - "epoch": 0.5386097650826036, + "epoch": 0.7436810392049457, "grad_norm": 0.0, - "learning_rate": 9.23622607846683e-06, - "loss": 0.8007, + "learning_rate": 3.252446073440931e-06, + "loss": 1.0196, "step": 19007 }, { - "epoch": 0.5386381025248661, + "epoch": 0.743720165897175, "grad_norm": 0.0, - "learning_rate": 9.235310973300093e-06, - "loss": 0.7979, + "learning_rate": 3.251510858552285e-06, + "loss": 0.9888, "step": 19008 }, { - "epoch": 0.5386664399671286, + "epoch": 0.7437592925894045, "grad_norm": 0.0, - "learning_rate": 9.23439587457462e-06, - "loss": 0.9063, + "learning_rate": 3.2505757520359205e-06, + "loss": 1.0426, "step": 19009 }, { - "epoch": 0.538694777409391, + "epoch": 0.7437984192816339, "grad_norm": 0.0, - "learning_rate": 9.233480782298111e-06, - "loss": 0.9087, + "learning_rate": 3.2496407539068497e-06, + "loss": 0.9695, "step": 19010 }, { - "epoch": 0.5387231148516535, + "epoch": 0.7438375459738634, "grad_norm": 0.0, - "learning_rate": 9.232565696478282e-06, - "loss": 1.0254, + "learning_rate": 3.2487058641800928e-06, + "loss": 1.0029, "step": 19011 }, { - "epoch": 0.538751452293916, + "epoch": 0.7438766726660928, "grad_norm": 0.0, - "learning_rate": 9.231650617122833e-06, - "loss": 1.0083, + "learning_rate": 3.247771082870652e-06, + "loss": 1.0152, "step": 19012 }, { - "epoch": 0.5387797897361785, + "epoch": 0.7439157993583222, "grad_norm": 0.0, - "learning_rate": 9.230735544239477e-06, - "loss": 0.7862, + "learning_rate": 3.2468364099935546e-06, + "loss": 0.8634, "step": 19013 }, { - "epoch": 0.5388081271784408, + "epoch": 0.7439549260505517, "grad_norm": 0.0, - "learning_rate": 9.229820477835926e-06, - "loss": 0.8765, + "learning_rate": 3.245901845563798e-06, + "loss": 0.9739, "step": 19014 }, { - "epoch": 0.5388364646207033, + "epoch": 0.7439940527427811, "grad_norm": 0.0, - "learning_rate": 9.228905417919879e-06, - "loss": 0.8517, + "learning_rate": 3.2449673895963985e-06, + "loss": 0.9877, "step": 19015 }, { - "epoch": 0.5388648020629658, + "epoch": 0.7440331794350106, "grad_norm": 0.0, - "learning_rate": 9.227990364499046e-06, - "loss": 0.811, + "learning_rate": 3.2440330421063513e-06, + "loss": 0.8421, "step": 19016 }, { - "epoch": 0.5388931395052282, + "epoch": 0.74407230612724, "grad_norm": 0.0, - "learning_rate": 9.227075317581141e-06, - "loss": 0.9535, + "learning_rate": 3.2430988031086742e-06, + "loss": 1.0471, "step": 19017 }, { - "epoch": 0.5389214769474907, + "epoch": 0.7441114328194695, "grad_norm": 0.0, - "learning_rate": 9.226160277173867e-06, - "loss": 0.9043, + "learning_rate": 3.242164672618361e-06, + "loss": 1.0289, "step": 19018 }, { - "epoch": 0.5389498143897532, + "epoch": 0.7441505595116988, "grad_norm": 0.0, - "learning_rate": 9.225245243284931e-06, - "loss": 0.9072, + "learning_rate": 3.2412306506504175e-06, + "loss": 0.9554, "step": 19019 }, { - "epoch": 0.5389781518320157, + "epoch": 0.7441896862039283, "grad_norm": 0.0, - "learning_rate": 9.224330215922042e-06, - "loss": 0.8941, + "learning_rate": 3.240296737219837e-06, + "loss": 0.9474, "step": 19020 }, { - "epoch": 0.5390064892742781, + "epoch": 0.7442288128961577, "grad_norm": 0.0, - "learning_rate": 9.223415195092906e-06, - "loss": 0.8128, + "learning_rate": 3.2393629323416207e-06, + "loss": 1.0664, "step": 19021 }, { - "epoch": 0.5390348267165406, + "epoch": 0.7442679395883872, "grad_norm": 0.0, - "learning_rate": 9.222500180805237e-06, - "loss": 0.9013, + "learning_rate": 3.2384292360307646e-06, + "loss": 0.8626, "step": 19022 }, { - "epoch": 0.5390631641588031, + "epoch": 0.7443070662806166, "grad_norm": 0.0, - "learning_rate": 9.221585173066735e-06, - "loss": 0.8691, + "learning_rate": 3.2374956483022656e-06, + "loss": 1.1008, "step": 19023 }, { - "epoch": 0.5390915016010654, + "epoch": 0.7443461929728461, "grad_norm": 0.0, - "learning_rate": 9.22067017188511e-06, - "loss": 1.0891, + "learning_rate": 3.236562169171109e-06, + "loss": 0.8844, "step": 19024 }, { - "epoch": 0.5391198390433279, + "epoch": 0.7443853196650755, "grad_norm": 0.0, - "learning_rate": 9.219755177268068e-06, - "loss": 0.8344, + "learning_rate": 3.2356287986522895e-06, + "loss": 0.9296, "step": 19025 }, { - "epoch": 0.5391481764855904, + "epoch": 0.744424446357305, "grad_norm": 0.0, - "learning_rate": 9.218840189223318e-06, - "loss": 0.9383, + "learning_rate": 3.2346955367607944e-06, + "loss": 1.0822, "step": 19026 }, { - "epoch": 0.5391765139278528, + "epoch": 0.7444635730495344, "grad_norm": 0.0, - "learning_rate": 9.217925207758571e-06, - "loss": 0.95, + "learning_rate": 3.233762383511615e-06, + "loss": 1.1017, "step": 19027 }, { - "epoch": 0.5392048513701153, + "epoch": 0.7445026997417639, "grad_norm": 0.0, - "learning_rate": 9.217010232881527e-06, - "loss": 0.9455, + "learning_rate": 3.2328293389197297e-06, + "loss": 1.0403, "step": 19028 }, { - "epoch": 0.5392331888123778, + "epoch": 0.7445418264339932, "grad_norm": 0.0, - "learning_rate": 9.216095264599895e-06, - "loss": 0.9061, + "learning_rate": 3.2318964030001297e-06, + "loss": 1.0927, "step": 19029 }, { - "epoch": 0.5392615262546403, + "epoch": 0.7445809531262227, "grad_norm": 0.0, - "learning_rate": 9.215180302921387e-06, - "loss": 0.8618, + "learning_rate": 3.2309635757677847e-06, + "loss": 0.8625, "step": 19030 }, { - "epoch": 0.5392898636969027, + "epoch": 0.7446200798184521, "grad_norm": 0.0, - "learning_rate": 9.214265347853706e-06, - "loss": 0.7029, + "learning_rate": 3.2300308572376906e-06, + "loss": 1.1059, "step": 19031 }, { - "epoch": 0.5393182011391652, + "epoch": 0.7446592065106816, "grad_norm": 0.0, - "learning_rate": 9.213350399404563e-06, - "loss": 0.9572, + "learning_rate": 3.2290982474248135e-06, + "loss": 0.9617, "step": 19032 }, { - "epoch": 0.5393465385814277, + "epoch": 0.744698333202911, "grad_norm": 0.0, - "learning_rate": 9.212435457581656e-06, - "loss": 0.8169, + "learning_rate": 3.2281657463441375e-06, + "loss": 0.9014, "step": 19033 }, { - "epoch": 0.53937487602369, + "epoch": 0.7447374598951405, "grad_norm": 0.0, - "learning_rate": 9.2115205223927e-06, - "loss": 0.8231, + "learning_rate": 3.2272333540106305e-06, + "loss": 1.0237, "step": 19034 }, { - "epoch": 0.5394032134659525, + "epoch": 0.7447765865873699, "grad_norm": 0.0, - "learning_rate": 9.210605593845402e-06, - "loss": 0.8549, + "learning_rate": 3.2263010704392694e-06, + "loss": 1.0741, "step": 19035 }, { - "epoch": 0.539431550908215, + "epoch": 0.7448157132795994, "grad_norm": 0.0, - "learning_rate": 9.209690671947463e-06, - "loss": 0.827, + "learning_rate": 3.2253688956450258e-06, + "loss": 0.9355, "step": 19036 }, { - "epoch": 0.5394598883504775, + "epoch": 0.7448548399718288, "grad_norm": 0.0, - "learning_rate": 9.208775756706591e-06, - "loss": 0.8807, + "learning_rate": 3.2244368296428706e-06, + "loss": 1.0427, "step": 19037 }, { - "epoch": 0.5394882257927399, + "epoch": 0.7448939666640583, "grad_norm": 0.0, - "learning_rate": 9.207860848130498e-06, - "loss": 0.9135, + "learning_rate": 3.223504872447768e-06, + "loss": 0.9198, "step": 19038 }, { - "epoch": 0.5395165632350024, + "epoch": 0.7449330933562877, "grad_norm": 0.0, - "learning_rate": 9.206945946226885e-06, - "loss": 0.8884, + "learning_rate": 3.2225730240746844e-06, + "loss": 0.9232, "step": 19039 }, { - "epoch": 0.5395449006772649, + "epoch": 0.7449722200485172, "grad_norm": 0.0, - "learning_rate": 9.206031051003465e-06, - "loss": 0.8585, + "learning_rate": 3.2216412845385893e-06, + "loss": 1.072, "step": 19040 }, { - "epoch": 0.5395732381195273, + "epoch": 0.7450113467407465, "grad_norm": 0.0, - "learning_rate": 9.205116162467938e-06, - "loss": 0.8905, + "learning_rate": 3.220709653854438e-06, + "loss": 1.0016, "step": 19041 }, { - "epoch": 0.5396015755617898, + "epoch": 0.7450504734329759, "grad_norm": 0.0, - "learning_rate": 9.204201280628011e-06, - "loss": 0.9799, + "learning_rate": 3.2197781320371944e-06, + "loss": 0.8226, "step": 19042 }, { - "epoch": 0.5396299130040523, + "epoch": 0.7450896001252054, "grad_norm": 0.0, - "learning_rate": 9.203286405491393e-06, - "loss": 0.8916, + "learning_rate": 3.218846719101818e-06, + "loss": 0.9529, "step": 19043 }, { - "epoch": 0.5396582504463148, + "epoch": 0.7451287268174348, "grad_norm": 0.0, - "learning_rate": 9.202371537065788e-06, - "loss": 0.9645, + "learning_rate": 3.21791541506327e-06, + "loss": 0.9685, "step": 19044 }, { - "epoch": 0.5396865878885772, + "epoch": 0.7451678535096643, "grad_norm": 0.0, - "learning_rate": 9.201456675358904e-06, - "loss": 0.6554, + "learning_rate": 3.2169842199364977e-06, + "loss": 1.0076, "step": 19045 }, { - "epoch": 0.5397149253308396, + "epoch": 0.7452069802018937, "grad_norm": 0.0, - "learning_rate": 9.200541820378452e-06, - "loss": 0.9208, + "learning_rate": 3.216053133736463e-06, + "loss": 0.9897, "step": 19046 }, { - "epoch": 0.5397432627731021, + "epoch": 0.7452461068941232, "grad_norm": 0.0, - "learning_rate": 9.199626972132128e-06, - "loss": 0.8903, + "learning_rate": 3.2151221564781076e-06, + "loss": 0.9802, "step": 19047 }, { - "epoch": 0.5397716002153645, + "epoch": 0.7452852335863526, "grad_norm": 0.0, - "learning_rate": 9.19871213062765e-06, - "loss": 0.8723, + "learning_rate": 3.214191288176396e-06, + "loss": 0.9216, "step": 19048 }, { - "epoch": 0.539799937657627, + "epoch": 0.7453243602785821, "grad_norm": 0.0, - "learning_rate": 9.197797295872709e-06, - "loss": 0.7637, + "learning_rate": 3.213260528846265e-06, + "loss": 1.0199, "step": 19049 }, { - "epoch": 0.5398282750998895, + "epoch": 0.7453634869708115, "grad_norm": 0.0, - "learning_rate": 9.19688246787502e-06, - "loss": 0.968, + "learning_rate": 3.212329878502669e-06, + "loss": 1.0023, "step": 19050 }, { - "epoch": 0.5398566125421519, + "epoch": 0.745402613663041, "grad_norm": 0.0, - "learning_rate": 9.195967646642294e-06, - "loss": 0.8031, + "learning_rate": 3.2113993371605457e-06, + "loss": 0.959, "step": 19051 }, { - "epoch": 0.5398849499844144, + "epoch": 0.7454417403552703, "grad_norm": 0.0, - "learning_rate": 9.195052832182225e-06, - "loss": 0.9057, + "learning_rate": 3.2104689048348436e-06, + "loss": 0.9857, "step": 19052 }, { - "epoch": 0.5399132874266769, + "epoch": 0.7454808670474998, "grad_norm": 0.0, - "learning_rate": 9.19413802450253e-06, - "loss": 0.7683, + "learning_rate": 3.2095385815405023e-06, + "loss": 0.921, "step": 19053 }, { - "epoch": 0.5399416248689394, + "epoch": 0.7455199937397292, "grad_norm": 0.0, - "learning_rate": 9.193223223610906e-06, - "loss": 0.9136, + "learning_rate": 3.208608367292466e-06, + "loss": 1.0154, "step": 19054 }, { - "epoch": 0.5399699623112018, + "epoch": 0.7455591204319587, "grad_norm": 0.0, - "learning_rate": 9.192308429515063e-06, - "loss": 0.8498, + "learning_rate": 3.207678262105667e-06, + "loss": 0.9734, "step": 19055 }, { - "epoch": 0.5399982997534643, + "epoch": 0.7455982471241881, "grad_norm": 0.0, - "learning_rate": 9.191393642222707e-06, - "loss": 0.9279, + "learning_rate": 3.206748265995042e-06, + "loss": 1.2098, "step": 19056 }, { - "epoch": 0.5400266371957267, + "epoch": 0.7456373738164176, "grad_norm": 0.0, - "learning_rate": 9.190478861741542e-06, - "loss": 0.8813, + "learning_rate": 3.2058183789755294e-06, + "loss": 1.032, "step": 19057 }, { - "epoch": 0.5400549746379891, + "epoch": 0.745676500508647, "grad_norm": 0.0, - "learning_rate": 9.189564088079272e-06, - "loss": 0.8192, + "learning_rate": 3.2048886010620617e-06, + "loss": 1.0316, "step": 19058 }, { - "epoch": 0.5400833120802516, + "epoch": 0.7457156272008765, "grad_norm": 0.0, - "learning_rate": 9.18864932124361e-06, - "loss": 0.936, + "learning_rate": 3.2039589322695664e-06, + "loss": 0.9305, "step": 19059 }, { - "epoch": 0.5401116495225141, + "epoch": 0.7457547538931059, "grad_norm": 0.0, - "learning_rate": 9.187734561242252e-06, - "loss": 0.876, + "learning_rate": 3.203029372612977e-06, + "loss": 0.9903, "step": 19060 }, { - "epoch": 0.5401399869647766, + "epoch": 0.7457938805853354, "grad_norm": 0.0, - "learning_rate": 9.186819808082912e-06, - "loss": 0.8635, + "learning_rate": 3.2020999221072125e-06, + "loss": 1.0311, "step": 19061 }, { - "epoch": 0.540168324407039, + "epoch": 0.7458330072775647, "grad_norm": 0.0, - "learning_rate": 9.185905061773286e-06, - "loss": 0.8436, + "learning_rate": 3.201170580767211e-06, + "loss": 0.9421, "step": 19062 }, { - "epoch": 0.5401966618493015, + "epoch": 0.7458721339697942, "grad_norm": 0.0, - "learning_rate": 9.184990322321083e-06, - "loss": 0.7198, + "learning_rate": 3.200241348607889e-06, + "loss": 1.0968, "step": 19063 }, { - "epoch": 0.540224999291564, + "epoch": 0.7459112606620236, "grad_norm": 0.0, - "learning_rate": 9.184075589734013e-06, - "loss": 0.8389, + "learning_rate": 3.1993122256441713e-06, + "loss": 0.9389, "step": 19064 }, { - "epoch": 0.5402533367338264, + "epoch": 0.7459503873542531, "grad_norm": 0.0, - "learning_rate": 9.183160864019774e-06, - "loss": 0.8549, + "learning_rate": 3.198383211890973e-06, + "loss": 1.0271, "step": 19065 }, { - "epoch": 0.5402816741760889, + "epoch": 0.7459895140464825, "grad_norm": 0.0, - "learning_rate": 9.182246145186076e-06, - "loss": 0.833, + "learning_rate": 3.1974543073632224e-06, + "loss": 0.9622, "step": 19066 }, { - "epoch": 0.5403100116183513, + "epoch": 0.746028640738712, "grad_norm": 0.0, - "learning_rate": 9.181331433240625e-06, - "loss": 0.8657, + "learning_rate": 3.1965255120758285e-06, + "loss": 0.9439, "step": 19067 }, { - "epoch": 0.5403383490606138, + "epoch": 0.7460677674309414, "grad_norm": 0.0, - "learning_rate": 9.18041672819112e-06, - "loss": 0.8402, + "learning_rate": 3.195596826043714e-06, + "loss": 1.015, "step": 19068 }, { - "epoch": 0.5403666865028762, + "epoch": 0.7461068941231708, "grad_norm": 0.0, - "learning_rate": 9.17950203004527e-06, - "loss": 0.9174, + "learning_rate": 3.194668249281785e-06, + "loss": 1.0767, "step": 19069 }, { - "epoch": 0.5403950239451387, + "epoch": 0.7461460208154003, "grad_norm": 0.0, - "learning_rate": 9.178587338810778e-06, - "loss": 0.9341, + "learning_rate": 3.1937397818049555e-06, + "loss": 0.9103, "step": 19070 }, { - "epoch": 0.5404233613874012, + "epoch": 0.7461851475076297, "grad_norm": 0.0, - "learning_rate": 9.17767265449535e-06, - "loss": 0.8161, + "learning_rate": 3.192811423628136e-06, + "loss": 0.9658, "step": 19071 }, { - "epoch": 0.5404516988296636, + "epoch": 0.7462242741998591, "grad_norm": 0.0, - "learning_rate": 9.176757977106693e-06, - "loss": 0.8596, + "learning_rate": 3.191883174766239e-06, + "loss": 0.9928, "step": 19072 }, { - "epoch": 0.5404800362719261, + "epoch": 0.7462634008920885, "grad_norm": 0.0, - "learning_rate": 9.175843306652507e-06, - "loss": 0.856, + "learning_rate": 3.190955035234163e-06, + "loss": 0.9749, "step": 19073 }, { - "epoch": 0.5405083737141886, + "epoch": 0.746302527584318, "grad_norm": 0.0, - "learning_rate": 9.1749286431405e-06, - "loss": 0.9231, + "learning_rate": 3.1900270050468184e-06, + "loss": 0.8244, "step": 19074 }, { - "epoch": 0.540536711156451, + "epoch": 0.7463416542765474, "grad_norm": 0.0, - "learning_rate": 9.174013986578377e-06, - "loss": 0.9208, + "learning_rate": 3.189099084219106e-06, + "loss": 0.9479, "step": 19075 }, { - "epoch": 0.5405650485987135, + "epoch": 0.7463807809687769, "grad_norm": 0.0, - "learning_rate": 9.173099336973838e-06, - "loss": 0.8538, + "learning_rate": 3.188171272765931e-06, + "loss": 1.0121, "step": 19076 }, { - "epoch": 0.540593386040976, + "epoch": 0.7464199076610063, "grad_norm": 0.0, - "learning_rate": 9.172184694334592e-06, - "loss": 0.7841, + "learning_rate": 3.1872435707021865e-06, + "loss": 0.9846, "step": 19077 }, { - "epoch": 0.5406217234832384, + "epoch": 0.7464590343532358, "grad_norm": 0.0, - "learning_rate": 9.17127005866834e-06, - "loss": 0.8911, + "learning_rate": 3.1863159780427765e-06, + "loss": 0.933, "step": 19078 }, { - "epoch": 0.5406500609255008, + "epoch": 0.7464981610454652, "grad_norm": 0.0, - "learning_rate": 9.170355429982787e-06, - "loss": 0.8568, + "learning_rate": 3.1853884948025904e-06, + "loss": 0.9752, "step": 19079 }, { - "epoch": 0.5406783983677633, + "epoch": 0.7465372877376947, "grad_norm": 0.0, - "learning_rate": 9.169440808285644e-06, - "loss": 0.7318, + "learning_rate": 3.1844611209965272e-06, + "loss": 0.9578, "step": 19080 }, { - "epoch": 0.5407067358100258, + "epoch": 0.7465764144299241, "grad_norm": 0.0, - "learning_rate": 9.168526193584604e-06, - "loss": 0.9446, + "learning_rate": 3.183533856639477e-06, + "loss": 0.8609, "step": 19081 }, { - "epoch": 0.5407350732522882, + "epoch": 0.7466155411221536, "grad_norm": 0.0, - "learning_rate": 9.16761158588738e-06, - "loss": 0.9069, + "learning_rate": 3.1826067017463346e-06, + "loss": 0.7818, "step": 19082 }, { - "epoch": 0.5407634106945507, + "epoch": 0.7466546678143829, "grad_norm": 0.0, - "learning_rate": 9.16669698520167e-06, - "loss": 0.8156, + "learning_rate": 3.181679656331983e-06, + "loss": 1.0143, "step": 19083 }, { - "epoch": 0.5407917481368132, + "epoch": 0.7466937945066124, "grad_norm": 0.0, - "learning_rate": 9.165782391535181e-06, - "loss": 0.8522, + "learning_rate": 3.180752720411312e-06, + "loss": 1.0683, "step": 19084 }, { - "epoch": 0.5408200855790757, + "epoch": 0.7467329211988418, "grad_norm": 0.0, - "learning_rate": 9.16486780489562e-06, - "loss": 0.7717, + "learning_rate": 3.179825893999211e-06, + "loss": 0.9647, "step": 19085 }, { - "epoch": 0.5408484230213381, + "epoch": 0.7467720478910713, "grad_norm": 0.0, - "learning_rate": 9.163953225290685e-06, - "loss": 0.7999, + "learning_rate": 3.178899177110556e-06, + "loss": 0.8977, "step": 19086 }, { - "epoch": 0.5408767604636006, + "epoch": 0.7468111745833007, "grad_norm": 0.0, - "learning_rate": 9.163038652728081e-06, - "loss": 0.9002, + "learning_rate": 3.177972569760234e-06, + "loss": 1.1148, "step": 19087 }, { - "epoch": 0.540905097905863, + "epoch": 0.7468503012755302, "grad_norm": 0.0, - "learning_rate": 9.162124087215519e-06, - "loss": 0.8923, + "learning_rate": 3.1770460719631237e-06, + "loss": 1.0381, "step": 19088 }, { - "epoch": 0.5409334353481254, + "epoch": 0.7468894279677596, "grad_norm": 0.0, - "learning_rate": 9.161209528760691e-06, - "loss": 0.9847, + "learning_rate": 3.1761196837341056e-06, + "loss": 1.1831, "step": 19089 }, { - "epoch": 0.5409617727903879, + "epoch": 0.7469285546599891, "grad_norm": 0.0, - "learning_rate": 9.160294977371309e-06, - "loss": 0.9586, + "learning_rate": 3.1751934050880527e-06, + "loss": 1.0189, "step": 19090 }, { - "epoch": 0.5409901102326504, + "epoch": 0.7469676813522185, "grad_norm": 0.0, - "learning_rate": 9.159380433055074e-06, - "loss": 0.9078, + "learning_rate": 3.1742672360398453e-06, + "loss": 0.9229, "step": 19091 }, { - "epoch": 0.5410184476749129, + "epoch": 0.747006808044448, "grad_norm": 0.0, - "learning_rate": 9.158465895819687e-06, - "loss": 0.8215, + "learning_rate": 3.1733411766043466e-06, + "loss": 0.9916, "step": 19092 }, { - "epoch": 0.5410467851171753, + "epoch": 0.7470459347366774, "grad_norm": 0.0, - "learning_rate": 9.157551365672859e-06, - "loss": 0.8571, + "learning_rate": 3.1724152267964404e-06, + "loss": 1.0059, "step": 19093 }, { - "epoch": 0.5410751225594378, + "epoch": 0.7470850614289068, "grad_norm": 0.0, - "learning_rate": 9.156636842622286e-06, - "loss": 0.8121, + "learning_rate": 3.171489386630986e-06, + "loss": 0.9398, "step": 19094 }, { - "epoch": 0.5411034600017003, + "epoch": 0.7471241881211362, "grad_norm": 0.0, - "learning_rate": 9.155722326675672e-06, - "loss": 0.813, + "learning_rate": 3.1705636561228605e-06, + "loss": 0.958, "step": 19095 }, { - "epoch": 0.5411317974439627, + "epoch": 0.7471633148133657, "grad_norm": 0.0, - "learning_rate": 9.154807817840726e-06, - "loss": 0.8088, + "learning_rate": 3.1696380352869173e-06, + "loss": 0.9233, "step": 19096 }, { - "epoch": 0.5411601348862252, + "epoch": 0.7472024415055951, "grad_norm": 0.0, - "learning_rate": 9.153893316125145e-06, - "loss": 1.0493, + "learning_rate": 3.1687125241380346e-06, + "loss": 1.0165, "step": 19097 }, { - "epoch": 0.5411884723284877, + "epoch": 0.7472415681978245, "grad_norm": 0.0, - "learning_rate": 9.15297882153664e-06, - "loss": 0.8172, + "learning_rate": 3.1677871226910663e-06, + "loss": 0.9383, "step": 19098 }, { - "epoch": 0.54121680977075, + "epoch": 0.747280694890054, "grad_norm": 0.0, - "learning_rate": 9.152064334082905e-06, - "loss": 0.9935, + "learning_rate": 3.166861830960878e-06, + "loss": 0.9818, "step": 19099 }, { - "epoch": 0.5412451472130125, + "epoch": 0.7473198215822834, "grad_norm": 0.0, - "learning_rate": 9.151149853771645e-06, - "loss": 0.8231, + "learning_rate": 3.1659366489623235e-06, + "loss": 0.9516, "step": 19100 }, { - "epoch": 0.541273484655275, + "epoch": 0.7473589482745129, "grad_norm": 0.0, - "learning_rate": 9.15023538061057e-06, - "loss": 0.9749, + "learning_rate": 3.165011576710262e-06, + "loss": 0.9758, "step": 19101 }, { - "epoch": 0.5413018220975375, + "epoch": 0.7473980749667423, "grad_norm": 0.0, - "learning_rate": 9.149320914607374e-06, - "loss": 0.8945, + "learning_rate": 3.1640866142195504e-06, + "loss": 0.9615, "step": 19102 }, { - "epoch": 0.5413301595397999, + "epoch": 0.7474372016589718, "grad_norm": 0.0, - "learning_rate": 9.148406455769762e-06, - "loss": 1.0204, + "learning_rate": 3.1631617615050457e-06, + "loss": 1.0562, "step": 19103 }, { - "epoch": 0.5413584969820624, + "epoch": 0.7474763283512011, "grad_norm": 0.0, - "learning_rate": 9.147492004105443e-06, - "loss": 0.8569, + "learning_rate": 3.162237018581592e-06, + "loss": 0.9221, "step": 19104 }, { - "epoch": 0.5413868344243249, + "epoch": 0.7475154550434306, "grad_norm": 0.0, - "learning_rate": 9.146577559622114e-06, - "loss": 0.816, + "learning_rate": 3.161312385464045e-06, + "loss": 0.8838, "step": 19105 }, { - "epoch": 0.5414151718665873, + "epoch": 0.74755458173566, "grad_norm": 0.0, - "learning_rate": 9.145663122327482e-06, - "loss": 0.9294, + "learning_rate": 3.16038786216725e-06, + "loss": 1.054, "step": 19106 }, { - "epoch": 0.5414435093088498, + "epoch": 0.7475937084278895, "grad_norm": 0.0, - "learning_rate": 9.144748692229242e-06, - "loss": 0.8549, + "learning_rate": 3.1594634487060595e-06, + "loss": 0.9604, "step": 19107 }, { - "epoch": 0.5414718467511123, + "epoch": 0.7476328351201189, "grad_norm": 0.0, - "learning_rate": 9.143834269335102e-06, - "loss": 0.8561, + "learning_rate": 3.158539145095312e-06, + "loss": 0.9795, "step": 19108 }, { - "epoch": 0.5415001841933748, + "epoch": 0.7476719618123484, "grad_norm": 0.0, - "learning_rate": 9.142919853652766e-06, - "loss": 0.9355, + "learning_rate": 3.1576149513498544e-06, + "loss": 1.0668, "step": 19109 }, { - "epoch": 0.5415285216356371, + "epoch": 0.7477110885045778, "grad_norm": 0.0, - "learning_rate": 9.142005445189933e-06, - "loss": 0.9566, + "learning_rate": 3.156690867484521e-06, + "loss": 0.9633, "step": 19110 }, { - "epoch": 0.5415568590778996, + "epoch": 0.7477502151968073, "grad_norm": 0.0, - "learning_rate": 9.14109104395431e-06, - "loss": 1.0236, + "learning_rate": 3.155766893514164e-06, + "loss": 0.8427, "step": 19111 }, { - "epoch": 0.5415851965201621, + "epoch": 0.7477893418890367, "grad_norm": 0.0, - "learning_rate": 9.140176649953592e-06, - "loss": 1.0427, + "learning_rate": 3.1548430294536115e-06, + "loss": 0.9483, "step": 19112 }, { - "epoch": 0.5416135339624245, + "epoch": 0.7478284685812662, "grad_norm": 0.0, - "learning_rate": 9.139262263195485e-06, - "loss": 0.8355, + "learning_rate": 3.153919275317705e-06, + "loss": 0.8711, "step": 19113 }, { - "epoch": 0.541641871404687, + "epoch": 0.7478675952734956, "grad_norm": 0.0, - "learning_rate": 9.138347883687695e-06, - "loss": 0.7882, + "learning_rate": 3.1529956311212693e-06, + "loss": 0.934, "step": 19114 }, { - "epoch": 0.5416702088469495, + "epoch": 0.747906721965725, "grad_norm": 0.0, - "learning_rate": 9.13743351143792e-06, - "loss": 0.8131, + "learning_rate": 3.1520720968791506e-06, + "loss": 0.9302, "step": 19115 }, { - "epoch": 0.541698546289212, + "epoch": 0.7479458486579544, "grad_norm": 0.0, - "learning_rate": 9.13651914645386e-06, - "loss": 0.9188, + "learning_rate": 3.1511486726061712e-06, + "loss": 1.0442, "step": 19116 }, { - "epoch": 0.5417268837314744, + "epoch": 0.7479849753501839, "grad_norm": 0.0, - "learning_rate": 9.135604788743222e-06, - "loss": 0.8885, + "learning_rate": 3.1502253583171637e-06, + "loss": 1.0195, "step": 19117 }, { - "epoch": 0.5417552211737369, + "epoch": 0.7480241020424133, "grad_norm": 0.0, - "learning_rate": 9.134690438313704e-06, - "loss": 1.0302, + "learning_rate": 3.149302154026952e-06, + "loss": 0.8342, "step": 19118 }, { - "epoch": 0.5417835586159994, + "epoch": 0.7480632287346428, "grad_norm": 0.0, - "learning_rate": 9.133776095173015e-06, - "loss": 0.9708, + "learning_rate": 3.1483790597503617e-06, + "loss": 0.8613, "step": 19119 }, { - "epoch": 0.5418118960582617, + "epoch": 0.7481023554268722, "grad_norm": 0.0, - "learning_rate": 9.132861759328845e-06, - "loss": 0.91, + "learning_rate": 3.147456075502219e-06, + "loss": 0.9549, "step": 19120 }, { - "epoch": 0.5418402335005242, + "epoch": 0.7481414821191017, "grad_norm": 0.0, - "learning_rate": 9.131947430788905e-06, - "loss": 0.8287, + "learning_rate": 3.1465332012973483e-06, + "loss": 1.0103, "step": 19121 }, { - "epoch": 0.5418685709427867, + "epoch": 0.7481806088113311, "grad_norm": 0.0, - "learning_rate": 9.131033109560896e-06, - "loss": 0.8252, + "learning_rate": 3.1456104371505636e-06, + "loss": 0.8828, "step": 19122 }, { - "epoch": 0.5418969083850491, + "epoch": 0.7482197355035606, "grad_norm": 0.0, - "learning_rate": 9.130118795652515e-06, - "loss": 0.8691, + "learning_rate": 3.1446877830766853e-06, + "loss": 1.0948, "step": 19123 }, { - "epoch": 0.5419252458273116, + "epoch": 0.74825886219579, "grad_norm": 0.0, - "learning_rate": 9.129204489071465e-06, - "loss": 0.8625, + "learning_rate": 3.1437652390905348e-06, + "loss": 1.1025, "step": 19124 }, { - "epoch": 0.5419535832695741, + "epoch": 0.7482979888880195, "grad_norm": 0.0, - "learning_rate": 9.128290189825457e-06, - "loss": 0.9363, + "learning_rate": 3.1428428052069195e-06, + "loss": 0.9836, "step": 19125 }, { - "epoch": 0.5419819207118366, + "epoch": 0.7483371155802488, "grad_norm": 0.0, - "learning_rate": 9.127375897922177e-06, - "loss": 0.6759, + "learning_rate": 3.1419204814406566e-06, + "loss": 0.9781, "step": 19126 }, { - "epoch": 0.542010258154099, + "epoch": 0.7483762422724782, "grad_norm": 0.0, - "learning_rate": 9.126461613369337e-06, - "loss": 0.8742, + "learning_rate": 3.140998267806561e-06, + "loss": 0.9705, "step": 19127 }, { - "epoch": 0.5420385955963615, + "epoch": 0.7484153689647077, "grad_norm": 0.0, - "learning_rate": 9.125547336174634e-06, - "loss": 0.8589, + "learning_rate": 3.1400761643194345e-06, + "loss": 0.9651, "step": 19128 }, { - "epoch": 0.542066933038624, + "epoch": 0.7484544956569371, "grad_norm": 0.0, - "learning_rate": 9.12463306634577e-06, - "loss": 0.8816, + "learning_rate": 3.139154170994089e-06, + "loss": 0.8396, "step": 19129 }, { - "epoch": 0.5420952704808863, + "epoch": 0.7484936223491666, "grad_norm": 0.0, - "learning_rate": 9.123718803890449e-06, - "loss": 0.8548, + "learning_rate": 3.1382322878453298e-06, + "loss": 0.9593, "step": 19130 }, { - "epoch": 0.5421236079231488, + "epoch": 0.748532749041396, "grad_norm": 0.0, - "learning_rate": 9.122804548816366e-06, - "loss": 0.9081, + "learning_rate": 3.1373105148879656e-06, + "loss": 1.0623, "step": 19131 }, { - "epoch": 0.5421519453654113, + "epoch": 0.7485718757336255, "grad_norm": 0.0, - "learning_rate": 9.121890301131227e-06, - "loss": 0.9503, + "learning_rate": 3.1363888521367924e-06, + "loss": 0.9819, "step": 19132 }, { - "epoch": 0.5421802828076738, + "epoch": 0.7486110024258549, "grad_norm": 0.0, - "learning_rate": 9.120976060842736e-06, - "loss": 0.9996, + "learning_rate": 3.1354672996066128e-06, + "loss": 0.8193, "step": 19133 }, { - "epoch": 0.5422086202499362, + "epoch": 0.7486501291180844, "grad_norm": 0.0, - "learning_rate": 9.120061827958586e-06, - "loss": 0.8256, + "learning_rate": 3.13454585731223e-06, + "loss": 1.0282, "step": 19134 }, { - "epoch": 0.5422369576921987, + "epoch": 0.7486892558103138, "grad_norm": 0.0, - "learning_rate": 9.119147602486484e-06, - "loss": 0.8341, + "learning_rate": 3.1336245252684348e-06, + "loss": 0.8399, "step": 19135 }, { - "epoch": 0.5422652951344612, + "epoch": 0.7487283825025433, "grad_norm": 0.0, - "learning_rate": 9.118233384434128e-06, - "loss": 0.8374, + "learning_rate": 3.1327033034900254e-06, + "loss": 0.9715, "step": 19136 }, { - "epoch": 0.5422936325767236, + "epoch": 0.7487675091947726, "grad_norm": 0.0, - "learning_rate": 9.117319173809218e-06, - "loss": 0.8731, + "learning_rate": 3.1317821919917957e-06, + "loss": 1.0543, "step": 19137 }, { - "epoch": 0.5423219700189861, + "epoch": 0.7488066358870021, "grad_norm": 0.0, - "learning_rate": 9.116404970619461e-06, - "loss": 0.8202, + "learning_rate": 3.130861190788541e-06, + "loss": 0.8804, "step": 19138 }, { - "epoch": 0.5423503074612486, + "epoch": 0.7488457625792315, "grad_norm": 0.0, - "learning_rate": 9.115490774872549e-06, - "loss": 0.7892, + "learning_rate": 3.129940299895046e-06, + "loss": 0.8422, "step": 19139 }, { - "epoch": 0.542378644903511, + "epoch": 0.748884889271461, "grad_norm": 0.0, - "learning_rate": 9.11457658657619e-06, - "loss": 0.8861, + "learning_rate": 3.129019519326102e-06, + "loss": 0.9573, "step": 19140 }, { - "epoch": 0.5424069823457734, + "epoch": 0.7489240159636904, "grad_norm": 0.0, - "learning_rate": 9.113662405738077e-06, - "loss": 0.8704, + "learning_rate": 3.1280988490964903e-06, + "loss": 1.0297, "step": 19141 }, { - "epoch": 0.5424353197880359, + "epoch": 0.7489631426559199, "grad_norm": 0.0, - "learning_rate": 9.112748232365916e-06, - "loss": 0.836, + "learning_rate": 3.1271782892210055e-06, + "loss": 0.9605, "step": 19142 }, { - "epoch": 0.5424636572302984, + "epoch": 0.7490022693481493, "grad_norm": 0.0, - "learning_rate": 9.11183406646741e-06, - "loss": 0.8021, + "learning_rate": 3.1262578397144216e-06, + "loss": 0.8989, "step": 19143 }, { - "epoch": 0.5424919946725608, + "epoch": 0.7490413960403788, "grad_norm": 0.0, - "learning_rate": 9.110919908050251e-06, - "loss": 0.9319, + "learning_rate": 3.1253375005915276e-06, + "loss": 0.9411, "step": 19144 }, { - "epoch": 0.5425203321148233, + "epoch": 0.7490805227326082, "grad_norm": 0.0, - "learning_rate": 9.110005757122144e-06, - "loss": 0.9536, + "learning_rate": 3.124417271867093e-06, + "loss": 0.9799, "step": 19145 }, { - "epoch": 0.5425486695570858, + "epoch": 0.7491196494248377, "grad_norm": 0.0, - "learning_rate": 9.109091613690794e-06, - "loss": 0.8345, + "learning_rate": 3.123497153555907e-06, + "loss": 0.8805, "step": 19146 }, { - "epoch": 0.5425770069993482, + "epoch": 0.749158776117067, "grad_norm": 0.0, - "learning_rate": 9.108177477763891e-06, - "loss": 0.8147, + "learning_rate": 3.1225771456727373e-06, + "loss": 0.9521, "step": 19147 }, { - "epoch": 0.5426053444416107, + "epoch": 0.7491979028092965, "grad_norm": 0.0, - "learning_rate": 9.107263349349143e-06, - "loss": 1.0311, + "learning_rate": 3.1216572482323628e-06, + "loss": 0.9382, "step": 19148 }, { - "epoch": 0.5426336818838732, + "epoch": 0.7492370295015259, "grad_norm": 0.0, - "learning_rate": 9.106349228454242e-06, - "loss": 0.97, + "learning_rate": 3.120737461249551e-06, + "loss": 1.0078, "step": 19149 }, { - "epoch": 0.5426620193261357, + "epoch": 0.7492761561937554, "grad_norm": 0.0, - "learning_rate": 9.105435115086898e-06, - "loss": 0.7299, + "learning_rate": 3.1198177847390764e-06, + "loss": 0.8674, "step": 19150 }, { - "epoch": 0.542690356768398, + "epoch": 0.7493152828859848, "grad_norm": 0.0, - "learning_rate": 9.104521009254807e-06, - "loss": 0.808, + "learning_rate": 3.1188982187157056e-06, + "loss": 0.9789, "step": 19151 }, { - "epoch": 0.5427186942106605, + "epoch": 0.7493544095782143, "grad_norm": 0.0, - "learning_rate": 9.103606910965666e-06, - "loss": 0.951, + "learning_rate": 3.1179787631942117e-06, + "loss": 0.9693, "step": 19152 }, { - "epoch": 0.542747031652923, + "epoch": 0.7493935362704437, "grad_norm": 0.0, - "learning_rate": 9.102692820227177e-06, - "loss": 0.9226, + "learning_rate": 3.117059418189351e-06, + "loss": 1.0367, "step": 19153 }, { - "epoch": 0.5427753690951854, + "epoch": 0.7494326629626732, "grad_norm": 0.0, - "learning_rate": 9.10177873704704e-06, - "loss": 0.8763, + "learning_rate": 3.116140183715891e-06, + "loss": 1.1336, "step": 19154 }, { - "epoch": 0.5428037065374479, + "epoch": 0.7494717896549026, "grad_norm": 0.0, - "learning_rate": 9.100864661432952e-06, - "loss": 0.9031, + "learning_rate": 3.1152210597885933e-06, + "loss": 1.1317, "step": 19155 }, { - "epoch": 0.5428320439797104, + "epoch": 0.749510916347132, "grad_norm": 0.0, - "learning_rate": 9.099950593392622e-06, - "loss": 0.8284, + "learning_rate": 3.114302046422223e-06, + "loss": 0.9568, "step": 19156 }, { - "epoch": 0.5428603814219729, + "epoch": 0.7495500430393615, "grad_norm": 0.0, - "learning_rate": 9.099036532933736e-06, - "loss": 0.8513, + "learning_rate": 3.1133831436315288e-06, + "loss": 0.9494, "step": 19157 }, { - "epoch": 0.5428887188642353, + "epoch": 0.7495891697315908, "grad_norm": 0.0, - "learning_rate": 9.098122480064e-06, - "loss": 0.7908, + "learning_rate": 3.112464351431276e-06, + "loss": 1.0977, "step": 19158 }, { - "epoch": 0.5429170563064978, + "epoch": 0.7496282964238203, "grad_norm": 0.0, - "learning_rate": 9.097208434791118e-06, - "loss": 0.8872, + "learning_rate": 3.111545669836209e-06, + "loss": 0.8621, "step": 19159 }, { - "epoch": 0.5429453937487603, + "epoch": 0.7496674231160497, "grad_norm": 0.0, - "learning_rate": 9.09629439712278e-06, - "loss": 0.8247, + "learning_rate": 3.110627098861092e-06, + "loss": 0.8895, "step": 19160 }, { - "epoch": 0.5429737311910227, + "epoch": 0.7497065498082792, "grad_norm": 0.0, - "learning_rate": 9.095380367066691e-06, - "loss": 0.9287, + "learning_rate": 3.109708638520669e-06, + "loss": 0.8825, "step": 19161 }, { - "epoch": 0.5430020686332852, + "epoch": 0.7497456765005086, "grad_norm": 0.0, - "learning_rate": 9.094466344630548e-06, - "loss": 0.9552, + "learning_rate": 3.108790288829694e-06, + "loss": 0.9941, "step": 19162 }, { - "epoch": 0.5430304060755476, + "epoch": 0.7497848031927381, "grad_norm": 0.0, - "learning_rate": 9.09355232982205e-06, - "loss": 0.8787, + "learning_rate": 3.107872049802908e-06, + "loss": 0.9959, "step": 19163 }, { - "epoch": 0.54305874351781, + "epoch": 0.7498239298849675, "grad_norm": 0.0, - "learning_rate": 9.092638322648904e-06, - "loss": 0.8409, + "learning_rate": 3.1069539214550614e-06, + "loss": 1.0531, "step": 19164 }, { - "epoch": 0.5430870809600725, + "epoch": 0.749863056577197, "grad_norm": 0.0, - "learning_rate": 9.091724323118797e-06, - "loss": 0.8908, + "learning_rate": 3.106035903800897e-06, + "loss": 0.8471, "step": 19165 }, { - "epoch": 0.543115418402335, + "epoch": 0.7499021832694264, "grad_norm": 0.0, - "learning_rate": 9.090810331239432e-06, - "loss": 0.9077, + "learning_rate": 3.1051179968551604e-06, + "loss": 0.997, "step": 19166 }, { - "epoch": 0.5431437558445975, + "epoch": 0.7499413099616559, "grad_norm": 0.0, - "learning_rate": 9.089896347018512e-06, - "loss": 0.9323, + "learning_rate": 3.104200200632587e-06, + "loss": 1.0435, "step": 19167 }, { - "epoch": 0.5431720932868599, + "epoch": 0.7499804366538853, "grad_norm": 0.0, - "learning_rate": 9.088982370463732e-06, - "loss": 0.8208, + "learning_rate": 3.1032825151479163e-06, + "loss": 0.926, "step": 19168 }, { - "epoch": 0.5432004307291224, + "epoch": 0.7500195633461147, "grad_norm": 0.0, - "learning_rate": 9.088068401582795e-06, - "loss": 0.8611, + "learning_rate": 3.102364940415886e-06, + "loss": 1.0262, "step": 19169 }, { - "epoch": 0.5432287681713849, + "epoch": 0.7500586900383441, "grad_norm": 0.0, - "learning_rate": 9.087154440383394e-06, - "loss": 0.8391, + "learning_rate": 3.1014474764512347e-06, + "loss": 0.95, "step": 19170 }, { - "epoch": 0.5432571056136473, + "epoch": 0.7500978167305736, "grad_norm": 0.0, - "learning_rate": 9.086240486873227e-06, - "loss": 0.7651, + "learning_rate": 3.10053012326869e-06, + "loss": 0.9264, "step": 19171 }, { - "epoch": 0.5432854430559098, + "epoch": 0.750136943422803, "grad_norm": 0.0, - "learning_rate": 9.085326541060002e-06, - "loss": 0.8974, + "learning_rate": 3.099612880882986e-06, + "loss": 0.8175, "step": 19172 }, { - "epoch": 0.5433137804981722, + "epoch": 0.7501760701150325, "grad_norm": 0.0, - "learning_rate": 9.084412602951406e-06, - "loss": 0.8091, + "learning_rate": 3.0986957493088555e-06, + "loss": 0.9965, "step": 19173 }, { - "epoch": 0.5433421179404347, + "epoch": 0.7502151968072619, "grad_norm": 0.0, - "learning_rate": 9.083498672555144e-06, - "loss": 0.8664, + "learning_rate": 3.0977787285610206e-06, + "loss": 1.0378, "step": 19174 }, { - "epoch": 0.5433704553826971, + "epoch": 0.7502543234994914, "grad_norm": 0.0, - "learning_rate": 9.082584749878915e-06, - "loss": 0.8225, + "learning_rate": 3.09686181865421e-06, + "loss": 0.9682, "step": 19175 }, { - "epoch": 0.5433987928249596, + "epoch": 0.7502934501917208, "grad_norm": 0.0, - "learning_rate": 9.081670834930413e-06, - "loss": 0.7696, + "learning_rate": 3.0959450196031516e-06, + "loss": 1.0002, "step": 19176 }, { - "epoch": 0.5434271302672221, + "epoch": 0.7503325768839503, "grad_norm": 0.0, - "learning_rate": 9.080756927717343e-06, - "loss": 0.89, + "learning_rate": 3.0950283314225627e-06, + "loss": 1.018, "step": 19177 }, { - "epoch": 0.5434554677094845, + "epoch": 0.7503717035761797, "grad_norm": 0.0, - "learning_rate": 9.079843028247393e-06, - "loss": 0.8595, + "learning_rate": 3.094111754127165e-06, + "loss": 0.8954, "step": 19178 }, { - "epoch": 0.543483805151747, + "epoch": 0.7504108302684092, "grad_norm": 0.0, - "learning_rate": 9.078929136528267e-06, - "loss": 0.9078, + "learning_rate": 3.093195287731683e-06, + "loss": 0.8867, "step": 19179 }, { - "epoch": 0.5435121425940095, + "epoch": 0.7504499569606385, "grad_norm": 0.0, - "learning_rate": 9.078015252567667e-06, - "loss": 0.9083, + "learning_rate": 3.092278932250826e-06, + "loss": 1.032, "step": 19180 }, { - "epoch": 0.543540480036272, + "epoch": 0.750489083652868, "grad_norm": 0.0, - "learning_rate": 9.077101376373282e-06, - "loss": 0.9372, + "learning_rate": 3.0913626876993142e-06, + "loss": 0.8618, "step": 19181 }, { - "epoch": 0.5435688174785344, + "epoch": 0.7505282103450974, "grad_norm": 0.0, - "learning_rate": 9.076187507952816e-06, - "loss": 0.8673, + "learning_rate": 3.0904465540918605e-06, + "loss": 0.9134, "step": 19182 }, { - "epoch": 0.5435971549207969, + "epoch": 0.7505673370373268, "grad_norm": 0.0, - "learning_rate": 9.075273647313971e-06, - "loss": 0.8641, + "learning_rate": 3.08953053144318e-06, + "loss": 0.9814, "step": 19183 }, { - "epoch": 0.5436254923630593, + "epoch": 0.7506064637295563, "grad_norm": 0.0, - "learning_rate": 9.074359794464436e-06, - "loss": 0.9071, + "learning_rate": 3.0886146197679766e-06, + "loss": 1.0089, "step": 19184 }, { - "epoch": 0.5436538298053217, + "epoch": 0.7506455904217857, "grad_norm": 0.0, - "learning_rate": 9.073445949411914e-06, - "loss": 0.9497, + "learning_rate": 3.0876988190809655e-06, + "loss": 1.0519, "step": 19185 }, { - "epoch": 0.5436821672475842, + "epoch": 0.7506847171140152, "grad_norm": 0.0, - "learning_rate": 9.072532112164097e-06, - "loss": 0.785, + "learning_rate": 3.086783129396843e-06, + "loss": 0.9715, "step": 19186 }, { - "epoch": 0.5437105046898467, + "epoch": 0.7507238438062446, "grad_norm": 0.0, - "learning_rate": 9.071618282728689e-06, - "loss": 0.8655, + "learning_rate": 3.0858675507303273e-06, + "loss": 0.9246, "step": 19187 }, { - "epoch": 0.5437388421321091, + "epoch": 0.7507629704984741, "grad_norm": 0.0, - "learning_rate": 9.070704461113385e-06, - "loss": 0.8475, + "learning_rate": 3.084952083096111e-06, + "loss": 0.9426, "step": 19188 }, { - "epoch": 0.5437671795743716, + "epoch": 0.7508020971907035, "grad_norm": 0.0, - "learning_rate": 9.069790647325879e-06, - "loss": 0.8906, + "learning_rate": 3.0840367265089034e-06, + "loss": 0.8679, "step": 19189 }, { - "epoch": 0.5437955170166341, + "epoch": 0.750841223882933, "grad_norm": 0.0, - "learning_rate": 9.068876841373878e-06, - "loss": 0.829, + "learning_rate": 3.083121480983393e-06, + "loss": 0.9717, "step": 19190 }, { - "epoch": 0.5438238544588966, + "epoch": 0.7508803505751623, "grad_norm": 0.0, - "learning_rate": 9.067963043265068e-06, - "loss": 0.8631, + "learning_rate": 3.0822063465342913e-06, + "loss": 0.9784, "step": 19191 }, { - "epoch": 0.543852191901159, + "epoch": 0.7509194772673918, "grad_norm": 0.0, - "learning_rate": 9.067049253007151e-06, - "loss": 0.9027, + "learning_rate": 3.0812913231762832e-06, + "loss": 0.882, "step": 19192 }, { - "epoch": 0.5438805293434215, + "epoch": 0.7509586039596212, "grad_norm": 0.0, - "learning_rate": 9.066135470607828e-06, - "loss": 0.8652, + "learning_rate": 3.0803764109240697e-06, + "loss": 0.9489, "step": 19193 }, { - "epoch": 0.543908866785684, + "epoch": 0.7509977306518507, "grad_norm": 0.0, - "learning_rate": 9.06522169607479e-06, - "loss": 0.8374, + "learning_rate": 3.0794616097923378e-06, + "loss": 1.0029, "step": 19194 }, { - "epoch": 0.5439372042279463, + "epoch": 0.7510368573440801, "grad_norm": 0.0, - "learning_rate": 9.064307929415737e-06, - "loss": 0.8768, + "learning_rate": 3.0785469197957806e-06, + "loss": 0.966, "step": 19195 }, { - "epoch": 0.5439655416702088, + "epoch": 0.7510759840363096, "grad_norm": 0.0, - "learning_rate": 9.063394170638369e-06, - "loss": 0.7771, + "learning_rate": 3.077632340949086e-06, + "loss": 0.9782, "step": 19196 }, { - "epoch": 0.5439938791124713, + "epoch": 0.751115110728539, "grad_norm": 0.0, - "learning_rate": 9.062480419750377e-06, - "loss": 0.8914, + "learning_rate": 3.0767178732669454e-06, + "loss": 0.9728, "step": 19197 }, { - "epoch": 0.5440222165547338, + "epoch": 0.7511542374207685, "grad_norm": 0.0, - "learning_rate": 9.061566676759464e-06, - "loss": 0.8618, + "learning_rate": 3.0758035167640376e-06, + "loss": 0.9111, "step": 19198 }, { - "epoch": 0.5440505539969962, + "epoch": 0.7511933641129979, "grad_norm": 0.0, - "learning_rate": 9.060652941673317e-06, - "loss": 0.9419, + "learning_rate": 3.0748892714550483e-06, + "loss": 0.8748, "step": 19199 }, { - "epoch": 0.5440788914392587, + "epoch": 0.7512324908052274, "grad_norm": 0.0, - "learning_rate": 9.059739214499643e-06, - "loss": 0.9036, + "learning_rate": 3.07397513735466e-06, + "loss": 1.0236, "step": 19200 }, { - "epoch": 0.5441072288815212, + "epoch": 0.7512716174974567, "grad_norm": 0.0, - "learning_rate": 9.058825495246134e-06, - "loss": 0.8318, + "learning_rate": 3.0730611144775547e-06, + "loss": 0.946, "step": 19201 }, { - "epoch": 0.5441355663237836, + "epoch": 0.7513107441896862, "grad_norm": 0.0, - "learning_rate": 9.057911783920487e-06, - "loss": 0.8492, + "learning_rate": 3.072147202838406e-06, + "loss": 0.9306, "step": 19202 }, { - "epoch": 0.5441639037660461, + "epoch": 0.7513498708819156, "grad_norm": 0.0, - "learning_rate": 9.056998080530398e-06, - "loss": 0.8617, + "learning_rate": 3.071233402451891e-06, + "loss": 1.0031, "step": 19203 }, { - "epoch": 0.5441922412083086, + "epoch": 0.7513889975741451, "grad_norm": 0.0, - "learning_rate": 9.056084385083569e-06, - "loss": 0.857, + "learning_rate": 3.0703197133326856e-06, + "loss": 1.0521, "step": 19204 }, { - "epoch": 0.544220578650571, + "epoch": 0.7514281242663745, "grad_norm": 0.0, - "learning_rate": 9.055170697587688e-06, - "loss": 0.9603, + "learning_rate": 3.069406135495466e-06, + "loss": 0.9047, "step": 19205 }, { - "epoch": 0.5442489160928334, + "epoch": 0.751467250958604, "grad_norm": 0.0, - "learning_rate": 9.054257018050456e-06, - "loss": 0.9676, + "learning_rate": 3.0684926689548954e-06, + "loss": 1.0215, "step": 19206 }, { - "epoch": 0.5442772535350959, + "epoch": 0.7515063776508334, "grad_norm": 0.0, - "learning_rate": 9.053343346479567e-06, - "loss": 0.8506, + "learning_rate": 3.0675793137256505e-06, + "loss": 1.0833, "step": 19207 }, { - "epoch": 0.5443055909773584, + "epoch": 0.7515455043430629, "grad_norm": 0.0, - "learning_rate": 9.052429682882717e-06, - "loss": 0.7857, + "learning_rate": 3.0666660698223884e-06, + "loss": 0.8901, "step": 19208 }, { - "epoch": 0.5443339284196208, + "epoch": 0.7515846310352923, "grad_norm": 0.0, - "learning_rate": 9.05151602726761e-06, - "loss": 0.8793, + "learning_rate": 3.065752937259788e-06, + "loss": 0.7934, "step": 19209 }, { - "epoch": 0.5443622658618833, + "epoch": 0.7516237577275218, "grad_norm": 0.0, - "learning_rate": 9.05060237964193e-06, - "loss": 0.9413, + "learning_rate": 3.064839916052503e-06, + "loss": 1.0383, "step": 19210 }, { - "epoch": 0.5443906033041458, + "epoch": 0.7516628844197512, "grad_norm": 0.0, - "learning_rate": 9.04968874001338e-06, - "loss": 0.9991, + "learning_rate": 3.0639270062152014e-06, + "loss": 0.9895, "step": 19211 }, { - "epoch": 0.5444189407464082, + "epoch": 0.7517020111119805, "grad_norm": 0.0, - "learning_rate": 9.048775108389658e-06, - "loss": 0.7775, + "learning_rate": 3.063014207762538e-06, + "loss": 0.9227, "step": 19212 }, { - "epoch": 0.5444472781886707, + "epoch": 0.75174113780421, "grad_norm": 0.0, - "learning_rate": 9.047861484778454e-06, - "loss": 0.8744, + "learning_rate": 3.0621015207091744e-06, + "loss": 1.1237, "step": 19213 }, { - "epoch": 0.5444756156309332, + "epoch": 0.7517802644964394, "grad_norm": 0.0, - "learning_rate": 9.046947869187465e-06, - "loss": 0.8415, + "learning_rate": 3.0611889450697663e-06, + "loss": 0.9544, "step": 19214 }, { - "epoch": 0.5445039530731957, + "epoch": 0.7518193911886689, "grad_norm": 0.0, - "learning_rate": 9.046034261624389e-06, - "loss": 0.9655, + "learning_rate": 3.0602764808589714e-06, + "loss": 0.9868, "step": 19215 }, { - "epoch": 0.544532290515458, + "epoch": 0.7518585178808983, "grad_norm": 0.0, - "learning_rate": 9.045120662096917e-06, - "loss": 0.8693, + "learning_rate": 3.059364128091438e-06, + "loss": 1.0438, "step": 19216 }, { - "epoch": 0.5445606279577205, + "epoch": 0.7518976445731278, "grad_norm": 0.0, - "learning_rate": 9.044207070612756e-06, - "loss": 0.9432, + "learning_rate": 3.0584518867818192e-06, + "loss": 0.9286, "step": 19217 }, { - "epoch": 0.544588965399983, + "epoch": 0.7519367712653572, "grad_norm": 0.0, - "learning_rate": 9.043293487179588e-06, - "loss": 0.9944, + "learning_rate": 3.057539756944767e-06, + "loss": 0.866, "step": 19218 }, { - "epoch": 0.5446173028422454, + "epoch": 0.7519758979575867, "grad_norm": 0.0, - "learning_rate": 9.042379911805117e-06, - "loss": 0.8711, + "learning_rate": 3.056627738594926e-06, + "loss": 1.022, "step": 19219 }, { - "epoch": 0.5446456402845079, + "epoch": 0.7520150246498161, "grad_norm": 0.0, - "learning_rate": 9.041466344497033e-06, - "loss": 0.7771, + "learning_rate": 3.0557158317469414e-06, + "loss": 0.8974, "step": 19220 }, { - "epoch": 0.5446739777267704, + "epoch": 0.7520541513420456, "grad_norm": 0.0, - "learning_rate": 9.040552785263036e-06, - "loss": 0.8731, + "learning_rate": 3.0548040364154597e-06, + "loss": 0.8705, "step": 19221 }, { - "epoch": 0.5447023151690329, + "epoch": 0.752093278034275, "grad_norm": 0.0, - "learning_rate": 9.03963923411082e-06, - "loss": 0.995, + "learning_rate": 3.053892352615124e-06, + "loss": 1.0671, "step": 19222 }, { - "epoch": 0.5447306526112953, + "epoch": 0.7521324047265044, "grad_norm": 0.0, - "learning_rate": 9.038725691048076e-06, - "loss": 0.9212, + "learning_rate": 3.0529807803605717e-06, + "loss": 0.9503, "step": 19223 }, { - "epoch": 0.5447589900535578, + "epoch": 0.7521715314187338, "grad_norm": 0.0, - "learning_rate": 9.037812156082503e-06, - "loss": 0.8711, + "learning_rate": 3.0520693196664453e-06, + "loss": 0.9985, "step": 19224 }, { - "epoch": 0.5447873274958203, + "epoch": 0.7522106581109633, "grad_norm": 0.0, - "learning_rate": 9.0368986292218e-06, - "loss": 0.8529, + "learning_rate": 3.051157970547376e-06, + "loss": 0.9208, "step": 19225 }, { - "epoch": 0.5448156649380826, + "epoch": 0.7522497848031927, "grad_norm": 0.0, - "learning_rate": 9.035985110473654e-06, - "loss": 0.804, + "learning_rate": 3.050246733018003e-06, + "loss": 0.9568, "step": 19226 }, { - "epoch": 0.5448440023803451, + "epoch": 0.7522889114954222, "grad_norm": 0.0, - "learning_rate": 9.035071599845763e-06, - "loss": 0.7707, + "learning_rate": 3.049335607092959e-06, + "loss": 1.0029, "step": 19227 }, { - "epoch": 0.5448723398226076, + "epoch": 0.7523280381876516, "grad_norm": 0.0, - "learning_rate": 9.034158097345823e-06, - "loss": 0.8937, + "learning_rate": 3.048424592786878e-06, + "loss": 1.0163, "step": 19228 }, { - "epoch": 0.5449006772648701, + "epoch": 0.7523671648798811, "grad_norm": 0.0, - "learning_rate": 9.033244602981527e-06, - "loss": 0.9774, + "learning_rate": 3.047513690114384e-06, + "loss": 0.9321, "step": 19229 }, { - "epoch": 0.5449290147071325, + "epoch": 0.7524062915721105, "grad_norm": 0.0, - "learning_rate": 9.032331116760574e-06, - "loss": 0.9165, + "learning_rate": 3.0466028990901084e-06, + "loss": 1.0782, "step": 19230 }, { - "epoch": 0.544957352149395, + "epoch": 0.75244541826434, "grad_norm": 0.0, - "learning_rate": 9.031417638690653e-06, - "loss": 0.8997, + "learning_rate": 3.0456922197286776e-06, + "loss": 0.9141, "step": 19231 }, { - "epoch": 0.5449856895916575, + "epoch": 0.7524845449565694, "grad_norm": 0.0, - "learning_rate": 9.030504168779458e-06, - "loss": 0.7305, + "learning_rate": 3.0447816520447182e-06, + "loss": 1.045, "step": 19232 }, { - "epoch": 0.5450140270339199, + "epoch": 0.7525236716487989, "grad_norm": 0.0, - "learning_rate": 9.029590707034691e-06, - "loss": 0.9009, + "learning_rate": 3.0438711960528476e-06, + "loss": 0.9791, "step": 19233 }, { - "epoch": 0.5450423644761824, + "epoch": 0.7525627983410282, "grad_norm": 0.0, - "learning_rate": 9.02867725346404e-06, - "loss": 0.7335, + "learning_rate": 3.0429608517676913e-06, + "loss": 1.0258, "step": 19234 }, { - "epoch": 0.5450707019184449, + "epoch": 0.7526019250332577, "grad_norm": 0.0, - "learning_rate": 9.027763808075202e-06, - "loss": 0.8549, + "learning_rate": 3.0420506192038603e-06, + "loss": 0.8466, "step": 19235 }, { - "epoch": 0.5450990393607072, + "epoch": 0.7526410517254871, "grad_norm": 0.0, - "learning_rate": 9.026850370875871e-06, - "loss": 0.9325, + "learning_rate": 3.041140498375984e-06, + "loss": 0.8956, "step": 19236 }, { - "epoch": 0.5451273768029697, + "epoch": 0.7526801784177166, "grad_norm": 0.0, - "learning_rate": 9.025936941873737e-06, - "loss": 0.8956, + "learning_rate": 3.0402304892986677e-06, + "loss": 1.0878, "step": 19237 }, { - "epoch": 0.5451557142452322, + "epoch": 0.752719305109946, "grad_norm": 0.0, - "learning_rate": 9.025023521076504e-06, - "loss": 0.831, + "learning_rate": 3.039320591986532e-06, + "loss": 1.0345, "step": 19238 }, { - "epoch": 0.5451840516874947, + "epoch": 0.7527584318021755, "grad_norm": 0.0, - "learning_rate": 9.024110108491855e-06, - "loss": 0.9569, + "learning_rate": 3.0384108064541795e-06, + "loss": 0.9561, "step": 19239 }, { - "epoch": 0.5452123891297571, + "epoch": 0.7527975584944049, "grad_norm": 0.0, - "learning_rate": 9.023196704127489e-06, - "loss": 0.7976, + "learning_rate": 3.037501132716232e-06, + "loss": 0.8005, "step": 19240 }, { - "epoch": 0.5452407265720196, + "epoch": 0.7528366851866343, "grad_norm": 0.0, - "learning_rate": 9.022283307991102e-06, - "loss": 0.9286, + "learning_rate": 3.0365915707872883e-06, + "loss": 0.9897, "step": 19241 }, { - "epoch": 0.5452690640142821, + "epoch": 0.7528758118788638, "grad_norm": 0.0, - "learning_rate": 9.021369920090384e-06, - "loss": 0.8755, + "learning_rate": 3.035682120681962e-06, + "loss": 1.0961, "step": 19242 }, { - "epoch": 0.5452974014565445, + "epoch": 0.7529149385710932, "grad_norm": 0.0, - "learning_rate": 9.020456540433033e-06, - "loss": 0.839, + "learning_rate": 3.0347727824148508e-06, + "loss": 0.9488, "step": 19243 }, { - "epoch": 0.545325738898807, + "epoch": 0.7529540652633226, "grad_norm": 0.0, - "learning_rate": 9.019543169026739e-06, - "loss": 0.9516, + "learning_rate": 3.0338635560005614e-06, + "loss": 0.9302, "step": 19244 }, { - "epoch": 0.5453540763410695, + "epoch": 0.752993191955552, "grad_norm": 0.0, - "learning_rate": 9.018629805879194e-06, - "loss": 0.8652, + "learning_rate": 3.0329544414536927e-06, + "loss": 1.1264, "step": 19245 }, { - "epoch": 0.545382413783332, + "epoch": 0.7530323186477815, "grad_norm": 0.0, - "learning_rate": 9.017716450998099e-06, - "loss": 0.8698, + "learning_rate": 3.0320454387888496e-06, + "loss": 0.933, "step": 19246 }, { - "epoch": 0.5454107512255943, + "epoch": 0.7530714453400109, "grad_norm": 0.0, - "learning_rate": 9.01680310439114e-06, - "loss": 0.8302, + "learning_rate": 3.0311365480206224e-06, + "loss": 0.9673, "step": 19247 }, { - "epoch": 0.5454390886678568, + "epoch": 0.7531105720322404, "grad_norm": 0.0, - "learning_rate": 9.015889766066018e-06, - "loss": 0.888, + "learning_rate": 3.0302277691636096e-06, + "loss": 0.9583, "step": 19248 }, { - "epoch": 0.5454674261101193, + "epoch": 0.7531496987244698, "grad_norm": 0.0, - "learning_rate": 9.014976436030417e-06, - "loss": 0.9019, + "learning_rate": 3.0293191022324055e-06, + "loss": 0.9955, "step": 19249 }, { - "epoch": 0.5454957635523817, + "epoch": 0.7531888254166993, "grad_norm": 0.0, - "learning_rate": 9.014063114292037e-06, - "loss": 0.7733, + "learning_rate": 3.0284105472416046e-06, + "loss": 0.9915, "step": 19250 }, { - "epoch": 0.5455241009946442, + "epoch": 0.7532279521089287, "grad_norm": 0.0, - "learning_rate": 9.01314980085857e-06, - "loss": 0.8416, + "learning_rate": 3.0275021042057907e-06, + "loss": 1.0174, "step": 19251 }, { - "epoch": 0.5455524384369067, + "epoch": 0.7532670788011582, "grad_norm": 0.0, - "learning_rate": 9.012236495737708e-06, - "loss": 0.8511, + "learning_rate": 3.0265937731395602e-06, + "loss": 1.0452, "step": 19252 }, { - "epoch": 0.5455807758791692, + "epoch": 0.7533062054933876, "grad_norm": 0.0, - "learning_rate": 9.011323198937144e-06, - "loss": 0.7916, + "learning_rate": 3.0256855540574894e-06, + "loss": 1.0318, "step": 19253 }, { - "epoch": 0.5456091133214316, + "epoch": 0.7533453321856171, "grad_norm": 0.0, - "learning_rate": 9.010409910464575e-06, - "loss": 1.0084, + "learning_rate": 3.0247774469741742e-06, + "loss": 1.0574, "step": 19254 }, { - "epoch": 0.5456374507636941, + "epoch": 0.7533844588778464, "grad_norm": 0.0, - "learning_rate": 9.009496630327687e-06, - "loss": 0.8701, + "learning_rate": 3.023869451904191e-06, + "loss": 1.0516, "step": 19255 }, { - "epoch": 0.5456657882059566, + "epoch": 0.7534235855700759, "grad_norm": 0.0, - "learning_rate": 9.008583358534182e-06, - "loss": 0.9141, + "learning_rate": 3.022961568862125e-06, + "loss": 0.9652, "step": 19256 }, { - "epoch": 0.545694125648219, + "epoch": 0.7534627122623053, "grad_norm": 0.0, - "learning_rate": 9.007670095091744e-06, - "loss": 0.8467, + "learning_rate": 3.022053797862551e-06, + "loss": 0.9313, "step": 19257 }, { - "epoch": 0.5457224630904814, + "epoch": 0.7535018389545348, "grad_norm": 0.0, - "learning_rate": 9.006756840008065e-06, - "loss": 0.7868, + "learning_rate": 3.0211461389200493e-06, + "loss": 1.0154, "step": 19258 }, { - "epoch": 0.5457508005327439, + "epoch": 0.7535409656467642, "grad_norm": 0.0, - "learning_rate": 9.005843593290849e-06, - "loss": 0.9759, + "learning_rate": 3.020238592049195e-06, + "loss": 0.8466, "step": 19259 }, { - "epoch": 0.5457791379750063, + "epoch": 0.7535800923389937, "grad_norm": 0.0, - "learning_rate": 9.004930354947777e-06, - "loss": 0.9286, + "learning_rate": 3.0193311572645655e-06, + "loss": 0.9539, "step": 19260 }, { - "epoch": 0.5458074754172688, + "epoch": 0.7536192190312231, "grad_norm": 0.0, - "learning_rate": 9.004017124986547e-06, - "loss": 0.7766, + "learning_rate": 3.0184238345807284e-06, + "loss": 1.037, "step": 19261 }, { - "epoch": 0.5458358128595313, + "epoch": 0.7536583457234526, "grad_norm": 0.0, - "learning_rate": 9.003103903414855e-06, - "loss": 0.8367, + "learning_rate": 3.0175166240122554e-06, + "loss": 1.0652, "step": 19262 }, { - "epoch": 0.5458641503017938, + "epoch": 0.753697472415682, "grad_norm": 0.0, - "learning_rate": 9.002190690240384e-06, - "loss": 1.0292, + "learning_rate": 3.0166095255737193e-06, + "loss": 0.9344, "step": 19263 }, { - "epoch": 0.5458924877440562, + "epoch": 0.7537365991079115, "grad_norm": 0.0, - "learning_rate": 9.001277485470834e-06, - "loss": 0.8143, + "learning_rate": 3.0157025392796803e-06, + "loss": 0.9596, "step": 19264 }, { - "epoch": 0.5459208251863187, + "epoch": 0.7537757258001409, "grad_norm": 0.0, - "learning_rate": 9.000364289113893e-06, - "loss": 0.942, + "learning_rate": 3.0147956651447064e-06, + "loss": 1.0678, "step": 19265 }, { - "epoch": 0.5459491626285812, + "epoch": 0.7538148524923703, "grad_norm": 0.0, - "learning_rate": 8.999451101177252e-06, - "loss": 0.7691, + "learning_rate": 3.0138889031833616e-06, + "loss": 0.9864, "step": 19266 }, { - "epoch": 0.5459775000708436, + "epoch": 0.7538539791845997, "grad_norm": 0.0, - "learning_rate": 8.998537921668613e-06, - "loss": 0.89, + "learning_rate": 3.0129822534102093e-06, + "loss": 0.9162, "step": 19267 }, { - "epoch": 0.546005837513106, + "epoch": 0.7538931058768292, "grad_norm": 0.0, - "learning_rate": 8.997624750595657e-06, - "loss": 0.844, + "learning_rate": 3.0120757158398052e-06, + "loss": 0.9147, "step": 19268 }, { - "epoch": 0.5460341749553685, + "epoch": 0.7539322325690586, "grad_norm": 0.0, - "learning_rate": 8.996711587966079e-06, - "loss": 0.9474, + "learning_rate": 3.011169290486711e-06, + "loss": 1.0322, "step": 19269 }, { - "epoch": 0.546062512397631, + "epoch": 0.753971359261288, "grad_norm": 0.0, - "learning_rate": 8.995798433787576e-06, - "loss": 0.9096, + "learning_rate": 3.0102629773654736e-06, + "loss": 1.0757, "step": 19270 }, { - "epoch": 0.5460908498398934, + "epoch": 0.7540104859535175, "grad_norm": 0.0, - "learning_rate": 8.994885288067831e-06, - "loss": 0.7394, + "learning_rate": 3.0093567764906606e-06, + "loss": 0.9182, "step": 19271 }, { - "epoch": 0.5461191872821559, + "epoch": 0.7540496126457469, "grad_norm": 0.0, - "learning_rate": 8.993972150814542e-06, - "loss": 0.9055, + "learning_rate": 3.008450687876815e-06, + "loss": 0.9888, "step": 19272 }, { - "epoch": 0.5461475247244184, + "epoch": 0.7540887393379764, "grad_norm": 0.0, - "learning_rate": 8.9930590220354e-06, - "loss": 0.9434, + "learning_rate": 3.007544711538495e-06, + "loss": 0.9976, "step": 19273 }, { - "epoch": 0.5461758621666808, + "epoch": 0.7541278660302058, "grad_norm": 0.0, - "learning_rate": 8.992145901738092e-06, - "loss": 0.8766, + "learning_rate": 3.0066388474902395e-06, + "loss": 0.9684, "step": 19274 }, { - "epoch": 0.5462041996089433, + "epoch": 0.7541669927224353, "grad_norm": 0.0, - "learning_rate": 8.99123278993032e-06, - "loss": 0.7427, + "learning_rate": 3.0057330957466025e-06, + "loss": 0.9685, "step": 19275 }, { - "epoch": 0.5462325370512058, + "epoch": 0.7542061194146646, "grad_norm": 0.0, - "learning_rate": 8.990319686619764e-06, - "loss": 0.8456, + "learning_rate": 3.0048274563221267e-06, + "loss": 1.0632, "step": 19276 }, { - "epoch": 0.5462608744934683, + "epoch": 0.7542452461068941, "grad_norm": 0.0, - "learning_rate": 8.989406591814123e-06, - "loss": 0.8722, + "learning_rate": 3.0039219292313603e-06, + "loss": 1.1827, "step": 19277 }, { - "epoch": 0.5462892119357307, + "epoch": 0.7542843727991235, "grad_norm": 0.0, - "learning_rate": 8.988493505521082e-06, - "loss": 0.9291, + "learning_rate": 3.003016514488838e-06, + "loss": 0.9562, "step": 19278 }, { - "epoch": 0.5463175493779932, + "epoch": 0.754323499491353, "grad_norm": 0.0, - "learning_rate": 8.987580427748335e-06, - "loss": 0.9786, + "learning_rate": 3.002111212109102e-06, + "loss": 0.9767, "step": 19279 }, { - "epoch": 0.5463458868202556, + "epoch": 0.7543626261835824, "grad_norm": 0.0, - "learning_rate": 8.98666735850358e-06, - "loss": 0.8663, + "learning_rate": 3.001206022106693e-06, + "loss": 0.8708, "step": 19280 }, { - "epoch": 0.546374224262518, + "epoch": 0.7544017528758119, "grad_norm": 0.0, - "learning_rate": 8.9857542977945e-06, - "loss": 0.8923, + "learning_rate": 3.000300944496146e-06, + "loss": 0.9407, "step": 19281 }, { - "epoch": 0.5464025617047805, + "epoch": 0.7544408795680413, "grad_norm": 0.0, - "learning_rate": 8.984841245628785e-06, - "loss": 0.8028, + "learning_rate": 2.9993959792919934e-06, + "loss": 1.1397, "step": 19282 }, { - "epoch": 0.546430899147043, + "epoch": 0.7544800062602708, "grad_norm": 0.0, - "learning_rate": 8.983928202014135e-06, - "loss": 0.9121, + "learning_rate": 2.998491126508771e-06, + "loss": 0.9809, "step": 19283 }, { - "epoch": 0.5464592365893054, + "epoch": 0.7545191329525002, "grad_norm": 0.0, - "learning_rate": 8.983015166958228e-06, - "loss": 0.7885, + "learning_rate": 2.997586386161002e-06, + "loss": 0.9257, "step": 19284 }, { - "epoch": 0.5464875740315679, + "epoch": 0.7545582596447297, "grad_norm": 0.0, - "learning_rate": 8.982102140468766e-06, - "loss": 0.9195, + "learning_rate": 2.996681758263228e-06, + "loss": 0.9943, "step": 19285 }, { - "epoch": 0.5465159114738304, + "epoch": 0.754597386336959, "grad_norm": 0.0, - "learning_rate": 8.981189122553436e-06, - "loss": 0.8057, + "learning_rate": 2.9957772428299657e-06, + "loss": 0.9941, "step": 19286 }, { - "epoch": 0.5465442489160929, + "epoch": 0.7546365130291885, "grad_norm": 0.0, - "learning_rate": 8.980276113219925e-06, - "loss": 0.9158, + "learning_rate": 2.9948728398757475e-06, + "loss": 1.0374, "step": 19287 }, { - "epoch": 0.5465725863583553, + "epoch": 0.7546756397214179, "grad_norm": 0.0, - "learning_rate": 8.979363112475933e-06, - "loss": 0.8679, + "learning_rate": 2.993968549415087e-06, + "loss": 1.1213, "step": 19288 }, { - "epoch": 0.5466009238006178, + "epoch": 0.7547147664136474, "grad_norm": 0.0, - "learning_rate": 8.978450120329138e-06, - "loss": 0.8982, + "learning_rate": 2.9930643714625183e-06, + "loss": 1.0229, "step": 19289 }, { - "epoch": 0.5466292612428802, + "epoch": 0.7547538931058768, "grad_norm": 0.0, - "learning_rate": 8.977537136787238e-06, - "loss": 0.8008, + "learning_rate": 2.9921603060325533e-06, + "loss": 0.8525, "step": 19290 }, { - "epoch": 0.5466575986851426, + "epoch": 0.7547930197981063, "grad_norm": 0.0, - "learning_rate": 8.976624161857925e-06, - "loss": 0.9946, + "learning_rate": 2.9912563531397156e-06, + "loss": 0.9898, "step": 19291 }, { - "epoch": 0.5466859361274051, + "epoch": 0.7548321464903357, "grad_norm": 0.0, - "learning_rate": 8.975711195548885e-06, - "loss": 0.8662, + "learning_rate": 2.9903525127985144e-06, + "loss": 0.9361, "step": 19292 }, { - "epoch": 0.5467142735696676, + "epoch": 0.7548712731825652, "grad_norm": 0.0, - "learning_rate": 8.974798237867814e-06, - "loss": 0.9701, + "learning_rate": 2.9894487850234687e-06, + "loss": 0.9385, "step": 19293 }, { - "epoch": 0.5467426110119301, + "epoch": 0.7549103998747946, "grad_norm": 0.0, - "learning_rate": 8.973885288822393e-06, - "loss": 0.7863, + "learning_rate": 2.988545169829091e-06, + "loss": 1.0152, "step": 19294 }, { - "epoch": 0.5467709484541925, + "epoch": 0.7549495265670241, "grad_norm": 0.0, - "learning_rate": 8.972972348420318e-06, - "loss": 0.9109, + "learning_rate": 2.9876416672298945e-06, + "loss": 0.9604, "step": 19295 }, { - "epoch": 0.546799285896455, + "epoch": 0.7549886532592535, "grad_norm": 0.0, - "learning_rate": 8.972059416669282e-06, - "loss": 0.9281, + "learning_rate": 2.986738277240384e-06, + "loss": 1.0637, "step": 19296 }, { - "epoch": 0.5468276233387175, + "epoch": 0.7550277799514828, "grad_norm": 0.0, - "learning_rate": 8.97114649357697e-06, - "loss": 1.0071, + "learning_rate": 2.985834999875068e-06, + "loss": 0.9299, "step": 19297 }, { - "epoch": 0.5468559607809799, + "epoch": 0.7550669066437123, "grad_norm": 0.0, - "learning_rate": 8.97023357915107e-06, - "loss": 0.8331, + "learning_rate": 2.9849318351484522e-06, + "loss": 0.9101, "step": 19298 }, { - "epoch": 0.5468842982232424, + "epoch": 0.7551060333359417, "grad_norm": 0.0, - "learning_rate": 8.969320673399276e-06, - "loss": 0.9302, + "learning_rate": 2.9840287830750446e-06, + "loss": 1.0255, "step": 19299 }, { - "epoch": 0.5469126356655049, + "epoch": 0.7551451600281712, "grad_norm": 0.0, - "learning_rate": 8.968407776329277e-06, - "loss": 0.8939, + "learning_rate": 2.98312584366934e-06, + "loss": 1.0064, "step": 19300 }, { - "epoch": 0.5469409731077673, + "epoch": 0.7551842867204006, "grad_norm": 0.0, - "learning_rate": 8.967494887948766e-06, - "loss": 0.9319, + "learning_rate": 2.9822230169458445e-06, + "loss": 1.0172, "step": 19301 }, { - "epoch": 0.5469693105500297, + "epoch": 0.7552234134126301, "grad_norm": 0.0, - "learning_rate": 8.966582008265424e-06, - "loss": 0.9264, + "learning_rate": 2.9813203029190505e-06, + "loss": 0.9058, "step": 19302 }, { - "epoch": 0.5469976479922922, + "epoch": 0.7552625401048595, "grad_norm": 0.0, - "learning_rate": 8.965669137286946e-06, - "loss": 0.9873, + "learning_rate": 2.9804177016034576e-06, + "loss": 1.0889, "step": 19303 }, { - "epoch": 0.5470259854345547, + "epoch": 0.755301666797089, "grad_norm": 0.0, - "learning_rate": 8.964756275021024e-06, - "loss": 0.886, + "learning_rate": 2.9795152130135606e-06, + "loss": 1.0385, "step": 19304 }, { - "epoch": 0.5470543228768171, + "epoch": 0.7553407934893184, "grad_norm": 0.0, - "learning_rate": 8.96384342147534e-06, - "loss": 0.8367, + "learning_rate": 2.9786128371638543e-06, + "loss": 0.8628, "step": 19305 }, { - "epoch": 0.5470826603190796, + "epoch": 0.7553799201815479, "grad_norm": 0.0, - "learning_rate": 8.962930576657593e-06, - "loss": 0.8797, + "learning_rate": 2.977710574068826e-06, + "loss": 0.9891, "step": 19306 }, { - "epoch": 0.5471109977613421, + "epoch": 0.7554190468737773, "grad_norm": 0.0, - "learning_rate": 8.962017740575464e-06, - "loss": 0.9678, + "learning_rate": 2.976808423742965e-06, + "loss": 1.1055, "step": 19307 }, { - "epoch": 0.5471393352036045, + "epoch": 0.7554581735660068, "grad_norm": 0.0, - "learning_rate": 8.961104913236643e-06, - "loss": 0.8652, + "learning_rate": 2.9759063862007644e-06, + "loss": 0.9832, "step": 19308 }, { - "epoch": 0.547167672645867, + "epoch": 0.7554973002582361, "grad_norm": 0.0, - "learning_rate": 8.960192094648828e-06, - "loss": 0.9259, + "learning_rate": 2.975004461456702e-06, + "loss": 0.9842, "step": 19309 }, { - "epoch": 0.5471960100881295, + "epoch": 0.7555364269504656, "grad_norm": 0.0, - "learning_rate": 8.959279284819694e-06, - "loss": 0.8037, + "learning_rate": 2.9741026495252657e-06, + "loss": 0.8399, "step": 19310 }, { - "epoch": 0.547224347530392, + "epoch": 0.755575553642695, "grad_norm": 0.0, - "learning_rate": 8.958366483756938e-06, - "loss": 0.8631, + "learning_rate": 2.973200950420936e-06, + "loss": 1.0709, "step": 19311 }, { - "epoch": 0.5472526849726543, + "epoch": 0.7556146803349245, "grad_norm": 0.0, - "learning_rate": 8.95745369146825e-06, - "loss": 0.9383, + "learning_rate": 2.9722993641581975e-06, + "loss": 1.1309, "step": 19312 }, { - "epoch": 0.5472810224149168, + "epoch": 0.7556538070271539, "grad_norm": 0.0, - "learning_rate": 8.956540907961315e-06, - "loss": 0.7943, + "learning_rate": 2.9713978907515217e-06, + "loss": 1.1648, "step": 19313 }, { - "epoch": 0.5473093598571793, + "epoch": 0.7556929337193834, "grad_norm": 0.0, - "learning_rate": 8.955628133243828e-06, - "loss": 0.836, + "learning_rate": 2.970496530215391e-06, + "loss": 0.9144, "step": 19314 }, { - "epoch": 0.5473376972994417, + "epoch": 0.7557320604116128, "grad_norm": 0.0, - "learning_rate": 8.954715367323468e-06, - "loss": 0.929, + "learning_rate": 2.9695952825642725e-06, + "loss": 0.9254, "step": 19315 }, { - "epoch": 0.5473660347417042, + "epoch": 0.7557711871038423, "grad_norm": 0.0, - "learning_rate": 8.953802610207928e-06, - "loss": 0.9034, + "learning_rate": 2.9686941478126494e-06, + "loss": 1.0266, "step": 19316 }, { - "epoch": 0.5473943721839667, + "epoch": 0.7558103137960717, "grad_norm": 0.0, - "learning_rate": 8.9528898619049e-06, - "loss": 0.9087, + "learning_rate": 2.9677931259749846e-06, + "loss": 0.999, "step": 19317 }, { - "epoch": 0.5474227096262292, + "epoch": 0.7558494404883012, "grad_norm": 0.0, - "learning_rate": 8.951977122422067e-06, - "loss": 0.7883, + "learning_rate": 2.9668922170657543e-06, + "loss": 0.9934, "step": 19318 }, { - "epoch": 0.5474510470684916, + "epoch": 0.7558885671805305, "grad_norm": 0.0, - "learning_rate": 8.95106439176712e-06, - "loss": 0.88, + "learning_rate": 2.9659914210994156e-06, + "loss": 1.0214, "step": 19319 }, { - "epoch": 0.5474793845107541, + "epoch": 0.75592769387276, "grad_norm": 0.0, - "learning_rate": 8.950151669947754e-06, - "loss": 0.8939, + "learning_rate": 2.965090738090446e-06, + "loss": 1.0076, "step": 19320 }, { - "epoch": 0.5475077219530166, + "epoch": 0.7559668205649894, "grad_norm": 0.0, - "learning_rate": 8.949238956971642e-06, - "loss": 0.9134, + "learning_rate": 2.9641901680533015e-06, + "loss": 1.02, "step": 19321 }, { - "epoch": 0.5475360593952789, + "epoch": 0.7560059472572189, "grad_norm": 0.0, - "learning_rate": 8.948326252846487e-06, - "loss": 0.9183, + "learning_rate": 2.9632897110024493e-06, + "loss": 0.9674, "step": 19322 }, { - "epoch": 0.5475643968375414, + "epoch": 0.7560450739494483, "grad_norm": 0.0, - "learning_rate": 8.947413557579965e-06, - "loss": 0.8048, + "learning_rate": 2.962389366952344e-06, + "loss": 1.0697, "step": 19323 }, { - "epoch": 0.5475927342798039, + "epoch": 0.7560842006416778, "grad_norm": 0.0, - "learning_rate": 8.946500871179771e-06, - "loss": 1.0196, + "learning_rate": 2.961489135917447e-06, + "loss": 0.9569, "step": 19324 }, { - "epoch": 0.5476210717220664, + "epoch": 0.7561233273339072, "grad_norm": 0.0, - "learning_rate": 8.945588193653592e-06, - "loss": 0.795, + "learning_rate": 2.960589017912214e-06, + "loss": 1.0197, "step": 19325 }, { - "epoch": 0.5476494091643288, + "epoch": 0.7561624540261366, "grad_norm": 0.0, - "learning_rate": 8.944675525009114e-06, - "loss": 0.7587, + "learning_rate": 2.9596890129511047e-06, + "loss": 0.8645, "step": 19326 }, { - "epoch": 0.5476777466065913, + "epoch": 0.7562015807183661, "grad_norm": 0.0, - "learning_rate": 8.943762865254025e-06, - "loss": 0.8146, + "learning_rate": 2.9587891210485644e-06, + "loss": 1.0314, "step": 19327 }, { - "epoch": 0.5477060840488538, + "epoch": 0.7562407074105955, "grad_norm": 0.0, - "learning_rate": 8.94285021439602e-06, - "loss": 0.8178, + "learning_rate": 2.9578893422190467e-06, + "loss": 0.9968, "step": 19328 }, { - "epoch": 0.5477344214911162, + "epoch": 0.756279834102825, "grad_norm": 0.0, - "learning_rate": 8.941937572442773e-06, - "loss": 0.8121, + "learning_rate": 2.9569896764770024e-06, + "loss": 1.017, "step": 19329 }, { - "epoch": 0.5477627589333787, + "epoch": 0.7563189607950543, "grad_norm": 0.0, - "learning_rate": 8.941024939401984e-06, - "loss": 0.744, + "learning_rate": 2.9560901238368823e-06, + "loss": 0.9893, "step": 19330 }, { - "epoch": 0.5477910963756412, + "epoch": 0.7563580874872838, "grad_norm": 0.0, - "learning_rate": 8.94011231528133e-06, - "loss": 0.8879, + "learning_rate": 2.955190684313124e-06, + "loss": 1.0185, "step": 19331 }, { - "epoch": 0.5478194338179035, + "epoch": 0.7563972141795132, "grad_norm": 0.0, - "learning_rate": 8.939199700088506e-06, - "loss": 0.8185, + "learning_rate": 2.9542913579201803e-06, + "loss": 0.8972, "step": 19332 }, { - "epoch": 0.547847771260166, + "epoch": 0.7564363408717427, "grad_norm": 0.0, - "learning_rate": 8.9382870938312e-06, - "loss": 1.0032, + "learning_rate": 2.9533921446724813e-06, + "loss": 1.0919, "step": 19333 }, { - "epoch": 0.5478761087024285, + "epoch": 0.7564754675639721, "grad_norm": 0.0, - "learning_rate": 8.937374496517092e-06, - "loss": 0.9122, + "learning_rate": 2.9524930445844814e-06, + "loss": 0.9686, "step": 19334 }, { - "epoch": 0.547904446144691, + "epoch": 0.7565145942562016, "grad_norm": 0.0, - "learning_rate": 8.936461908153877e-06, - "loss": 0.9014, + "learning_rate": 2.951594057670608e-06, + "loss": 1.0355, "step": 19335 }, { - "epoch": 0.5479327835869534, + "epoch": 0.756553720948431, "grad_norm": 0.0, - "learning_rate": 8.935549328749235e-06, - "loss": 0.8545, + "learning_rate": 2.9506951839453057e-06, + "loss": 0.9854, "step": 19336 }, { - "epoch": 0.5479611210292159, + "epoch": 0.7565928476406605, "grad_norm": 0.0, - "learning_rate": 8.934636758310855e-06, - "loss": 0.8046, + "learning_rate": 2.9497964234229993e-06, + "loss": 0.8319, "step": 19337 }, { - "epoch": 0.5479894584714784, + "epoch": 0.7566319743328899, "grad_norm": 0.0, - "learning_rate": 8.933724196846428e-06, - "loss": 0.9376, + "learning_rate": 2.9488977761181347e-06, + "loss": 0.8911, "step": 19338 }, { - "epoch": 0.5480177959137408, + "epoch": 0.7566711010251194, "grad_norm": 0.0, - "learning_rate": 8.932811644363635e-06, - "loss": 0.8183, + "learning_rate": 2.947999242045132e-06, + "loss": 1.0679, "step": 19339 }, { - "epoch": 0.5480461333560033, + "epoch": 0.7567102277173487, "grad_norm": 0.0, - "learning_rate": 8.931899100870168e-06, - "loss": 0.8099, + "learning_rate": 2.9471008212184295e-06, + "loss": 1.0613, "step": 19340 }, { - "epoch": 0.5480744707982658, + "epoch": 0.7567493544095782, "grad_norm": 0.0, - "learning_rate": 8.930986566373715e-06, - "loss": 0.9519, + "learning_rate": 2.9462025136524453e-06, + "loss": 1.0529, "step": 19341 }, { - "epoch": 0.5481028082405283, + "epoch": 0.7567884811018076, "grad_norm": 0.0, - "learning_rate": 8.930074040881954e-06, - "loss": 0.8645, + "learning_rate": 2.9453043193616103e-06, + "loss": 1.0095, "step": 19342 }, { - "epoch": 0.5481311456827906, + "epoch": 0.7568276077940371, "grad_norm": 0.0, - "learning_rate": 8.92916152440258e-06, - "loss": 0.8475, + "learning_rate": 2.944406238360349e-06, + "loss": 1.0226, "step": 19343 }, { - "epoch": 0.5481594831250531, + "epoch": 0.7568667344862665, "grad_norm": 0.0, - "learning_rate": 8.92824901694327e-06, - "loss": 0.9481, + "learning_rate": 2.9435082706630836e-06, + "loss": 0.9529, "step": 19344 }, { - "epoch": 0.5481878205673156, + "epoch": 0.756905861178496, "grad_norm": 0.0, - "learning_rate": 8.92733651851172e-06, - "loss": 0.9341, + "learning_rate": 2.9426104162842317e-06, + "loss": 0.9747, "step": 19345 }, { - "epoch": 0.548216158009578, + "epoch": 0.7569449878707254, "grad_norm": 0.0, - "learning_rate": 8.926424029115618e-06, - "loss": 0.9872, + "learning_rate": 2.941712675238212e-06, + "loss": 0.9931, "step": 19346 }, { - "epoch": 0.5482444954518405, + "epoch": 0.7569841145629549, "grad_norm": 0.0, - "learning_rate": 8.925511548762639e-06, - "loss": 0.954, + "learning_rate": 2.940815047539446e-06, + "loss": 0.8206, "step": 19347 }, { - "epoch": 0.548272832894103, + "epoch": 0.7570232412551843, "grad_norm": 0.0, - "learning_rate": 8.924599077460477e-06, - "loss": 0.9052, + "learning_rate": 2.939917533202341e-06, + "loss": 0.9673, "step": 19348 }, { - "epoch": 0.5483011703363654, + "epoch": 0.7570623679474138, "grad_norm": 0.0, - "learning_rate": 8.923686615216818e-06, - "loss": 0.9464, + "learning_rate": 2.9390201322413137e-06, + "loss": 0.9628, "step": 19349 }, { - "epoch": 0.5483295077786279, + "epoch": 0.7571014946396432, "grad_norm": 0.0, - "learning_rate": 8.922774162039343e-06, - "loss": 1.018, + "learning_rate": 2.9381228446707787e-06, + "loss": 0.9402, "step": 19350 }, { - "epoch": 0.5483578452208904, + "epoch": 0.7571406213318727, "grad_norm": 0.0, - "learning_rate": 8.921861717935744e-06, - "loss": 0.807, + "learning_rate": 2.9372256705051384e-06, + "loss": 0.908, "step": 19351 }, { - "epoch": 0.5483861826631529, + "epoch": 0.757179748024102, "grad_norm": 0.0, - "learning_rate": 8.920949282913702e-06, - "loss": 0.8615, + "learning_rate": 2.936328609758804e-06, + "loss": 1.0758, "step": 19352 }, { - "epoch": 0.5484145201054152, + "epoch": 0.7572188747163315, "grad_norm": 0.0, - "learning_rate": 8.920036856980905e-06, - "loss": 0.9031, + "learning_rate": 2.9354316624461832e-06, + "loss": 1.0237, "step": 19353 }, { - "epoch": 0.5484428575476777, + "epoch": 0.7572580014085609, "grad_norm": 0.0, - "learning_rate": 8.919124440145041e-06, - "loss": 0.8201, + "learning_rate": 2.9345348285816755e-06, + "loss": 0.9605, "step": 19354 }, { - "epoch": 0.5484711949899402, + "epoch": 0.7572971281007903, "grad_norm": 0.0, - "learning_rate": 8.918212032413791e-06, - "loss": 0.9056, + "learning_rate": 2.933638108179684e-06, + "loss": 0.946, "step": 19355 }, { - "epoch": 0.5484995324322026, + "epoch": 0.7573362547930198, "grad_norm": 0.0, - "learning_rate": 8.917299633794843e-06, - "loss": 0.9132, + "learning_rate": 2.93274150125461e-06, + "loss": 0.9592, "step": 19356 }, { - "epoch": 0.5485278698744651, + "epoch": 0.7573753814852492, "grad_norm": 0.0, - "learning_rate": 8.916387244295884e-06, - "loss": 0.8869, + "learning_rate": 2.931845007820855e-06, + "loss": 1.1512, "step": 19357 }, { - "epoch": 0.5485562073167276, + "epoch": 0.7574145081774787, "grad_norm": 0.0, - "learning_rate": 8.915474863924594e-06, - "loss": 0.9493, + "learning_rate": 2.930948627892809e-06, + "loss": 0.9173, "step": 19358 }, { - "epoch": 0.5485845447589901, + "epoch": 0.7574536348697081, "grad_norm": 0.0, - "learning_rate": 8.914562492688667e-06, - "loss": 0.9036, + "learning_rate": 2.9300523614848743e-06, + "loss": 1.029, "step": 19359 }, { - "epoch": 0.5486128822012525, + "epoch": 0.7574927615619376, "grad_norm": 0.0, - "learning_rate": 8.91365013059578e-06, - "loss": 0.7536, + "learning_rate": 2.9291562086114322e-06, + "loss": 0.917, "step": 19360 }, { - "epoch": 0.548641219643515, + "epoch": 0.757531888254167, "grad_norm": 0.0, - "learning_rate": 8.91273777765362e-06, - "loss": 0.9763, + "learning_rate": 2.9282601692868873e-06, + "loss": 0.8929, "step": 19361 }, { - "epoch": 0.5486695570857775, + "epoch": 0.7575710149463964, "grad_norm": 0.0, - "learning_rate": 8.911825433869876e-06, - "loss": 0.8269, + "learning_rate": 2.927364243525619e-06, + "loss": 1.0453, "step": 19362 }, { - "epoch": 0.5486978945280399, + "epoch": 0.7576101416386258, "grad_norm": 0.0, - "learning_rate": 8.91091309925223e-06, - "loss": 0.889, + "learning_rate": 2.9264684313420224e-06, + "loss": 1.0216, "step": 19363 }, { - "epoch": 0.5487262319703023, + "epoch": 0.7576492683308553, "grad_norm": 0.0, - "learning_rate": 8.91000077380837e-06, - "loss": 0.9431, + "learning_rate": 2.9255727327504735e-06, + "loss": 1.0316, "step": 19364 }, { - "epoch": 0.5487545694125648, + "epoch": 0.7576883950230847, "grad_norm": 0.0, - "learning_rate": 8.909088457545973e-06, - "loss": 0.7585, + "learning_rate": 2.9246771477653666e-06, + "loss": 0.9733, "step": 19365 }, { - "epoch": 0.5487829068548273, + "epoch": 0.7577275217153142, "grad_norm": 0.0, - "learning_rate": 8.90817615047273e-06, - "loss": 0.9089, + "learning_rate": 2.9237816764010763e-06, + "loss": 0.9006, "step": 19366 }, { - "epoch": 0.5488112442970897, + "epoch": 0.7577666484075436, "grad_norm": 0.0, - "learning_rate": 8.90726385259633e-06, - "loss": 0.989, + "learning_rate": 2.922886318671989e-06, + "loss": 0.9083, "step": 19367 }, { - "epoch": 0.5488395817393522, + "epoch": 0.7578057750997731, "grad_norm": 0.0, - "learning_rate": 8.906351563924446e-06, - "loss": 0.937, + "learning_rate": 2.9219910745924764e-06, + "loss": 0.9261, "step": 19368 }, { - "epoch": 0.5488679191816147, + "epoch": 0.7578449017920025, "grad_norm": 0.0, - "learning_rate": 8.90543928446477e-06, - "loss": 0.9919, + "learning_rate": 2.921095944176916e-06, + "loss": 1.1008, "step": 19369 }, { - "epoch": 0.5488962566238771, + "epoch": 0.757884028484232, "grad_norm": 0.0, - "learning_rate": 8.904527014224988e-06, - "loss": 0.9078, + "learning_rate": 2.920200927439686e-06, + "loss": 1.0097, "step": 19370 }, { - "epoch": 0.5489245940661396, + "epoch": 0.7579231551764614, "grad_norm": 0.0, - "learning_rate": 8.90361475321278e-06, - "loss": 0.9741, + "learning_rate": 2.91930602439516e-06, + "loss": 1.0206, "step": 19371 }, { - "epoch": 0.5489529315084021, + "epoch": 0.7579622818686909, "grad_norm": 0.0, - "learning_rate": 8.902702501435835e-06, - "loss": 0.862, + "learning_rate": 2.918411235057704e-06, + "loss": 0.9542, "step": 19372 }, { - "epoch": 0.5489812689506645, + "epoch": 0.7580014085609202, "grad_norm": 0.0, - "learning_rate": 8.90179025890183e-06, - "loss": 0.8238, + "learning_rate": 2.91751655944169e-06, + "loss": 1.0056, "step": 19373 }, { - "epoch": 0.549009606392927, + "epoch": 0.7580405352531497, "grad_norm": 0.0, - "learning_rate": 8.900878025618453e-06, - "loss": 0.8581, + "learning_rate": 2.9166219975614852e-06, + "loss": 0.9725, "step": 19374 }, { - "epoch": 0.5490379438351894, + "epoch": 0.7580796619453791, "grad_norm": 0.0, - "learning_rate": 8.899965801593393e-06, - "loss": 0.8033, + "learning_rate": 2.9157275494314576e-06, + "loss": 0.9466, "step": 19375 }, { - "epoch": 0.5490662812774519, + "epoch": 0.7581187886376086, "grad_norm": 0.0, - "learning_rate": 8.899053586834325e-06, - "loss": 0.8597, + "learning_rate": 2.914833215065965e-06, + "loss": 0.9422, "step": 19376 }, { - "epoch": 0.5490946187197143, + "epoch": 0.758157915329838, "grad_norm": 0.0, - "learning_rate": 8.89814138134894e-06, - "loss": 0.7073, + "learning_rate": 2.913938994479374e-06, + "loss": 0.8899, "step": 19377 }, { - "epoch": 0.5491229561619768, + "epoch": 0.7581970420220675, "grad_norm": 0.0, - "learning_rate": 8.897229185144922e-06, - "loss": 0.8136, + "learning_rate": 2.9130448876860427e-06, + "loss": 1.0073, "step": 19378 }, { - "epoch": 0.5491512936042393, + "epoch": 0.7582361687142969, "grad_norm": 0.0, - "learning_rate": 8.896316998229946e-06, - "loss": 0.9101, + "learning_rate": 2.9121508947003343e-06, + "loss": 0.9684, "step": 19379 }, { - "epoch": 0.5491796310465017, + "epoch": 0.7582752954065264, "grad_norm": 0.0, - "learning_rate": 8.89540482061171e-06, - "loss": 0.8006, + "learning_rate": 2.911257015536596e-06, + "loss": 0.8301, "step": 19380 }, { - "epoch": 0.5492079684887642, + "epoch": 0.7583144220987558, "grad_norm": 0.0, - "learning_rate": 8.894492652297883e-06, - "loss": 0.8757, + "learning_rate": 2.9103632502091917e-06, + "loss": 1.1815, "step": 19381 }, { - "epoch": 0.5492363059310267, + "epoch": 0.7583535487909852, "grad_norm": 0.0, - "learning_rate": 8.893580493296155e-06, - "loss": 0.8835, + "learning_rate": 2.9094695987324627e-06, + "loss": 1.0489, "step": 19382 }, { - "epoch": 0.5492646433732892, + "epoch": 0.7583926754832147, "grad_norm": 0.0, - "learning_rate": 8.89266834361421e-06, - "loss": 0.8169, + "learning_rate": 2.9085760611207736e-06, + "loss": 0.9843, "step": 19383 }, { - "epoch": 0.5492929808155516, + "epoch": 0.758431802175444, "grad_norm": 0.0, - "learning_rate": 8.89175620325973e-06, - "loss": 0.9503, + "learning_rate": 2.9076826373884647e-06, + "loss": 0.9286, "step": 19384 }, { - "epoch": 0.549321318257814, + "epoch": 0.7584709288676735, "grad_norm": 0.0, - "learning_rate": 8.8908440722404e-06, - "loss": 0.8201, + "learning_rate": 2.9067893275498872e-06, + "loss": 0.9798, "step": 19385 }, { - "epoch": 0.5493496557000765, + "epoch": 0.7585100555599029, "grad_norm": 0.0, - "learning_rate": 8.889931950563907e-06, - "loss": 0.8948, + "learning_rate": 2.9058961316193823e-06, + "loss": 0.9629, "step": 19386 }, { - "epoch": 0.5493779931423389, + "epoch": 0.7585491822521324, "grad_norm": 0.0, - "learning_rate": 8.889019838237922e-06, - "loss": 0.8447, + "learning_rate": 2.905003049611297e-06, + "loss": 0.9696, "step": 19387 }, { - "epoch": 0.5494063305846014, + "epoch": 0.7585883089443618, "grad_norm": 0.0, - "learning_rate": 8.88810773527014e-06, - "loss": 0.8027, + "learning_rate": 2.9041100815399715e-06, + "loss": 0.9404, "step": 19388 }, { - "epoch": 0.5494346680268639, + "epoch": 0.7586274356365913, "grad_norm": 0.0, - "learning_rate": 8.887195641668235e-06, - "loss": 0.8199, + "learning_rate": 2.903217227419749e-06, + "loss": 1.0674, "step": 19389 }, { - "epoch": 0.5494630054691264, + "epoch": 0.7586665623288207, "grad_norm": 0.0, - "learning_rate": 8.886283557439898e-06, - "loss": 0.7821, + "learning_rate": 2.9023244872649626e-06, + "loss": 0.9165, "step": 19390 }, { - "epoch": 0.5494913429113888, + "epoch": 0.7587056890210502, "grad_norm": 0.0, - "learning_rate": 8.885371482592809e-06, - "loss": 0.8614, + "learning_rate": 2.9014318610899504e-06, + "loss": 0.9888, "step": 19391 }, { - "epoch": 0.5495196803536513, + "epoch": 0.7587448157132796, "grad_norm": 0.0, - "learning_rate": 8.884459417134648e-06, - "loss": 0.813, + "learning_rate": 2.9005393489090506e-06, + "loss": 0.9481, "step": 19392 }, { - "epoch": 0.5495480177959138, + "epoch": 0.7587839424055091, "grad_norm": 0.0, - "learning_rate": 8.883547361073102e-06, - "loss": 0.9132, + "learning_rate": 2.89964695073659e-06, + "loss": 1.0099, "step": 19393 }, { - "epoch": 0.5495763552381762, + "epoch": 0.7588230690977384, "grad_norm": 0.0, - "learning_rate": 8.882635314415848e-06, - "loss": 0.87, + "learning_rate": 2.8987546665869025e-06, + "loss": 0.876, "step": 19394 }, { - "epoch": 0.5496046926804387, + "epoch": 0.7588621957899679, "grad_norm": 0.0, - "learning_rate": 8.881723277170573e-06, - "loss": 0.9271, + "learning_rate": 2.897862496474316e-06, + "loss": 1.0276, "step": 19395 }, { - "epoch": 0.5496330301227011, + "epoch": 0.7589013224821973, "grad_norm": 0.0, - "learning_rate": 8.880811249344958e-06, - "loss": 0.8798, + "learning_rate": 2.8969704404131626e-06, + "loss": 0.973, "step": 19396 }, { - "epoch": 0.5496613675649635, + "epoch": 0.7589404491744268, "grad_norm": 0.0, - "learning_rate": 8.879899230946684e-06, - "loss": 0.9301, + "learning_rate": 2.8960784984177594e-06, + "loss": 0.9266, "step": 19397 }, { - "epoch": 0.549689705007226, + "epoch": 0.7589795758666562, "grad_norm": 0.0, - "learning_rate": 8.878987221983434e-06, - "loss": 0.9029, + "learning_rate": 2.8951866705024366e-06, + "loss": 1.0271, "step": 19398 }, { - "epoch": 0.5497180424494885, + "epoch": 0.7590187025588857, "grad_norm": 0.0, - "learning_rate": 8.878075222462896e-06, - "loss": 0.9179, + "learning_rate": 2.8942949566815103e-06, + "loss": 1.0266, "step": 19399 }, { - "epoch": 0.549746379891751, + "epoch": 0.7590578292511151, "grad_norm": 0.0, - "learning_rate": 8.877163232392743e-06, - "loss": 0.9056, + "learning_rate": 2.893403356969303e-06, + "loss": 1.0404, "step": 19400 }, { - "epoch": 0.5497747173340134, + "epoch": 0.7590969559433446, "grad_norm": 0.0, - "learning_rate": 8.876251251780663e-06, - "loss": 0.83, + "learning_rate": 2.8925118713801325e-06, + "loss": 0.9104, "step": 19401 }, { - "epoch": 0.5498030547762759, + "epoch": 0.759136082635574, "grad_norm": 0.0, - "learning_rate": 8.875339280634334e-06, - "loss": 0.8427, + "learning_rate": 2.8916204999283184e-06, + "loss": 0.9929, "step": 19402 }, { - "epoch": 0.5498313922185384, + "epoch": 0.7591752093278035, "grad_norm": 0.0, - "learning_rate": 8.874427318961439e-06, - "loss": 0.8856, + "learning_rate": 2.8907292426281686e-06, + "loss": 0.8789, "step": 19403 }, { - "epoch": 0.5498597296608008, + "epoch": 0.7592143360200329, "grad_norm": 0.0, - "learning_rate": 8.873515366769666e-06, - "loss": 0.8343, + "learning_rate": 2.889838099493999e-06, + "loss": 0.919, "step": 19404 }, { - "epoch": 0.5498880671030633, + "epoch": 0.7592534627122624, "grad_norm": 0.0, - "learning_rate": 8.872603424066686e-06, - "loss": 0.8279, + "learning_rate": 2.88894707054012e-06, + "loss": 0.8449, "step": 19405 }, { - "epoch": 0.5499164045453258, + "epoch": 0.7592925894044917, "grad_norm": 0.0, - "learning_rate": 8.871691490860188e-06, - "loss": 0.9107, + "learning_rate": 2.888056155780844e-06, + "loss": 0.9841, "step": 19406 }, { - "epoch": 0.5499447419875882, + "epoch": 0.7593317160967212, "grad_norm": 0.0, - "learning_rate": 8.870779567157853e-06, - "loss": 0.8032, + "learning_rate": 2.8871653552304703e-06, + "loss": 0.9784, "step": 19407 }, { - "epoch": 0.5499730794298506, + "epoch": 0.7593708427889506, "grad_norm": 0.0, - "learning_rate": 8.86986765296736e-06, - "loss": 0.8917, + "learning_rate": 2.8862746689033117e-06, + "loss": 1.0973, "step": 19408 }, { - "epoch": 0.5500014168721131, + "epoch": 0.7594099694811801, "grad_norm": 0.0, - "learning_rate": 8.868955748296391e-06, - "loss": 0.7643, + "learning_rate": 2.8853840968136614e-06, + "loss": 0.8254, "step": 19409 }, { - "epoch": 0.5500297543143756, + "epoch": 0.7594490961734095, "grad_norm": 0.0, - "learning_rate": 8.868043853152626e-06, - "loss": 0.7545, + "learning_rate": 2.8844936389758337e-06, + "loss": 1.0434, "step": 19410 }, { - "epoch": 0.550058091756638, + "epoch": 0.7594882228656389, "grad_norm": 0.0, - "learning_rate": 8.867131967543748e-06, - "loss": 0.8237, + "learning_rate": 2.8836032954041194e-06, + "loss": 0.8993, "step": 19411 }, { - "epoch": 0.5500864291989005, + "epoch": 0.7595273495578684, "grad_norm": 0.0, - "learning_rate": 8.866220091477444e-06, - "loss": 0.8523, + "learning_rate": 2.882713066112821e-06, + "loss": 1.0246, "step": 19412 }, { - "epoch": 0.550114766641163, + "epoch": 0.7595664762500978, "grad_norm": 0.0, - "learning_rate": 8.865308224961381e-06, - "loss": 0.9022, + "learning_rate": 2.8818229511162265e-06, + "loss": 0.9277, "step": 19413 }, { - "epoch": 0.5501431040834255, + "epoch": 0.7596056029423273, "grad_norm": 0.0, - "learning_rate": 8.864396368003252e-06, - "loss": 0.863, + "learning_rate": 2.880932950428642e-06, + "loss": 0.9418, "step": 19414 }, { - "epoch": 0.5501714415256879, + "epoch": 0.7596447296345566, "grad_norm": 0.0, - "learning_rate": 8.863484520610736e-06, - "loss": 0.9167, + "learning_rate": 2.8800430640643507e-06, + "loss": 0.9599, "step": 19415 }, { - "epoch": 0.5501997789679504, + "epoch": 0.7596838563267861, "grad_norm": 0.0, - "learning_rate": 8.862572682791508e-06, - "loss": 0.8568, + "learning_rate": 2.8791532920376496e-06, + "loss": 0.935, "step": 19416 }, { - "epoch": 0.5502281164102129, + "epoch": 0.7597229830190155, "grad_norm": 0.0, - "learning_rate": 8.861660854553257e-06, - "loss": 0.8479, + "learning_rate": 2.8782636343628203e-06, + "loss": 1.034, "step": 19417 }, { - "epoch": 0.5502564538524752, + "epoch": 0.759762109711245, "grad_norm": 0.0, - "learning_rate": 8.860749035903657e-06, - "loss": 0.933, + "learning_rate": 2.8773740910541524e-06, + "loss": 0.987, "step": 19418 }, { - "epoch": 0.5502847912947377, + "epoch": 0.7598012364034744, "grad_norm": 0.0, - "learning_rate": 8.85983722685039e-06, - "loss": 0.9304, + "learning_rate": 2.8764846621259313e-06, + "loss": 0.919, "step": 19419 }, { - "epoch": 0.5503131287370002, + "epoch": 0.7598403630957039, "grad_norm": 0.0, - "learning_rate": 8.858925427401142e-06, - "loss": 0.7782, + "learning_rate": 2.8755953475924447e-06, + "loss": 0.8838, "step": 19420 }, { - "epoch": 0.5503414661792626, + "epoch": 0.7598794897879333, "grad_norm": 0.0, - "learning_rate": 8.858013637563583e-06, - "loss": 0.8673, + "learning_rate": 2.874706147467965e-06, + "loss": 1.1432, "step": 19421 }, { - "epoch": 0.5503698036215251, + "epoch": 0.7599186164801628, "grad_norm": 0.0, - "learning_rate": 8.857101857345402e-06, - "loss": 0.8755, + "learning_rate": 2.873817061766776e-06, + "loss": 1.0025, "step": 19422 }, { - "epoch": 0.5503981410637876, + "epoch": 0.7599577431723922, "grad_norm": 0.0, - "learning_rate": 8.856190086754274e-06, - "loss": 0.8714, + "learning_rate": 2.8729280905031563e-06, + "loss": 0.9006, "step": 19423 }, { - "epoch": 0.5504264785060501, + "epoch": 0.7599968698646217, "grad_norm": 0.0, - "learning_rate": 8.855278325797884e-06, - "loss": 0.7117, + "learning_rate": 2.872039233691384e-06, + "loss": 0.9237, "step": 19424 }, { - "epoch": 0.5504548159483125, + "epoch": 0.7600359965568511, "grad_norm": 0.0, - "learning_rate": 8.854366574483913e-06, - "loss": 0.8992, + "learning_rate": 2.8711504913457256e-06, + "loss": 0.9897, "step": 19425 }, { - "epoch": 0.550483153390575, + "epoch": 0.7600751232490806, "grad_norm": 0.0, - "learning_rate": 8.853454832820033e-06, - "loss": 0.8633, + "learning_rate": 2.8702618634804613e-06, + "loss": 1.0299, "step": 19426 }, { - "epoch": 0.5505114908328375, + "epoch": 0.7601142499413099, "grad_norm": 0.0, - "learning_rate": 8.852543100813927e-06, - "loss": 0.852, + "learning_rate": 2.869373350109851e-06, + "loss": 0.9894, "step": 19427 }, { - "epoch": 0.5505398282750998, + "epoch": 0.7601533766335394, "grad_norm": 0.0, - "learning_rate": 8.85163137847328e-06, - "loss": 0.7926, + "learning_rate": 2.868484951248175e-06, + "loss": 0.9211, "step": 19428 }, { - "epoch": 0.5505681657173623, + "epoch": 0.7601925033257688, "grad_norm": 0.0, - "learning_rate": 8.850719665805768e-06, - "loss": 0.8167, + "learning_rate": 2.867596666909692e-06, + "loss": 0.9143, "step": 19429 }, { - "epoch": 0.5505965031596248, + "epoch": 0.7602316300179983, "grad_norm": 0.0, - "learning_rate": 8.849807962819072e-06, - "loss": 0.8621, + "learning_rate": 2.8667084971086724e-06, + "loss": 0.9202, "step": 19430 }, { - "epoch": 0.5506248406018873, + "epoch": 0.7602707567102277, "grad_norm": 0.0, - "learning_rate": 8.84889626952087e-06, - "loss": 0.8723, + "learning_rate": 2.8658204418593726e-06, + "loss": 0.9375, "step": 19431 }, { - "epoch": 0.5506531780441497, + "epoch": 0.7603098834024572, "grad_norm": 0.0, - "learning_rate": 8.847984585918838e-06, - "loss": 0.8399, + "learning_rate": 2.8649325011760566e-06, + "loss": 0.8796, "step": 19432 }, { - "epoch": 0.5506815154864122, + "epoch": 0.7603490100946866, "grad_norm": 0.0, - "learning_rate": 8.847072912020668e-06, - "loss": 0.8977, + "learning_rate": 2.8640446750729846e-06, + "loss": 0.9223, "step": 19433 }, { - "epoch": 0.5507098529286747, + "epoch": 0.7603881367869161, "grad_norm": 0.0, - "learning_rate": 8.846161247834024e-06, - "loss": 0.8823, + "learning_rate": 2.863156963564415e-06, + "loss": 0.9337, "step": 19434 }, { - "epoch": 0.5507381903709371, + "epoch": 0.7604272634791455, "grad_norm": 0.0, - "learning_rate": 8.845249593366594e-06, - "loss": 0.9057, + "learning_rate": 2.8622693666645996e-06, + "loss": 1.0615, "step": 19435 }, { - "epoch": 0.5507665278131996, + "epoch": 0.760466390171375, "grad_norm": 0.0, - "learning_rate": 8.844337948626056e-06, - "loss": 0.8437, + "learning_rate": 2.861381884387794e-06, + "loss": 1.033, "step": 19436 }, { - "epoch": 0.5507948652554621, + "epoch": 0.7605055168636043, "grad_norm": 0.0, - "learning_rate": 8.843426313620087e-06, - "loss": 0.834, + "learning_rate": 2.8604945167482532e-06, + "loss": 0.9799, "step": 19437 }, { - "epoch": 0.5508232026977246, + "epoch": 0.7605446435558338, "grad_norm": 0.0, - "learning_rate": 8.842514688356373e-06, - "loss": 0.9301, + "learning_rate": 2.8596072637602213e-06, + "loss": 0.9118, "step": 19438 }, { - "epoch": 0.5508515401399869, + "epoch": 0.7605837702480632, "grad_norm": 0.0, - "learning_rate": 8.841603072842582e-06, - "loss": 0.9161, + "learning_rate": 2.858720125437948e-06, + "loss": 1.0078, "step": 19439 }, { - "epoch": 0.5508798775822494, + "epoch": 0.7606228969402926, "grad_norm": 0.0, - "learning_rate": 8.840691467086399e-06, - "loss": 0.8249, + "learning_rate": 2.857833101795683e-06, + "loss": 1.0138, "step": 19440 }, { - "epoch": 0.5509082150245119, + "epoch": 0.7606620236325221, "grad_norm": 0.0, - "learning_rate": 8.839779871095504e-06, - "loss": 0.9073, + "learning_rate": 2.8569461928476703e-06, + "loss": 1.0001, "step": 19441 }, { - "epoch": 0.5509365524667743, + "epoch": 0.7607011503247515, "grad_norm": 0.0, - "learning_rate": 8.838868284877573e-06, - "loss": 0.8109, + "learning_rate": 2.8560593986081484e-06, + "loss": 0.9146, "step": 19442 }, { - "epoch": 0.5509648899090368, + "epoch": 0.760740277016981, "grad_norm": 0.0, - "learning_rate": 8.83795670844029e-06, - "loss": 0.8071, + "learning_rate": 2.85517271909136e-06, + "loss": 0.9715, "step": 19443 }, { - "epoch": 0.5509932273512993, + "epoch": 0.7607794037092104, "grad_norm": 0.0, - "learning_rate": 8.837045141791323e-06, - "loss": 0.7335, + "learning_rate": 2.8542861543115462e-06, + "loss": 0.9286, "step": 19444 }, { - "epoch": 0.5510215647935617, + "epoch": 0.7608185304014399, "grad_norm": 0.0, - "learning_rate": 8.836133584938358e-06, - "loss": 0.8837, + "learning_rate": 2.8533997042829444e-06, + "loss": 1.0238, "step": 19445 }, { - "epoch": 0.5510499022358242, + "epoch": 0.7608576570936693, "grad_norm": 0.0, - "learning_rate": 8.835222037889074e-06, - "loss": 0.8871, + "learning_rate": 2.8525133690197857e-06, + "loss": 0.9342, "step": 19446 }, { - "epoch": 0.5510782396780867, + "epoch": 0.7608967837858988, "grad_norm": 0.0, - "learning_rate": 8.834310500651146e-06, - "loss": 0.8642, + "learning_rate": 2.851627148536309e-06, + "loss": 0.96, "step": 19447 }, { - "epoch": 0.5511065771203492, + "epoch": 0.7609359104781281, "grad_norm": 0.0, - "learning_rate": 8.833398973232253e-06, - "loss": 0.8283, + "learning_rate": 2.8507410428467395e-06, + "loss": 0.9323, "step": 19448 }, { - "epoch": 0.5511349145626115, + "epoch": 0.7609750371703576, "grad_norm": 0.0, - "learning_rate": 8.832487455640074e-06, - "loss": 0.9105, + "learning_rate": 2.849855051965311e-06, + "loss": 0.9437, "step": 19449 }, { - "epoch": 0.551163252004874, + "epoch": 0.761014163862587, "grad_norm": 0.0, - "learning_rate": 8.831575947882288e-06, - "loss": 0.9509, + "learning_rate": 2.848969175906251e-06, + "loss": 0.979, "step": 19450 }, { - "epoch": 0.5511915894471365, + "epoch": 0.7610532905548165, "grad_norm": 0.0, - "learning_rate": 8.830664449966573e-06, - "loss": 0.8066, + "learning_rate": 2.8480834146837877e-06, + "loss": 1.032, "step": 19451 }, { - "epoch": 0.5512199268893989, + "epoch": 0.7610924172470459, "grad_norm": 0.0, - "learning_rate": 8.829752961900602e-06, - "loss": 0.898, + "learning_rate": 2.84719776831214e-06, + "loss": 0.9919, "step": 19452 }, { - "epoch": 0.5512482643316614, + "epoch": 0.7611315439392754, "grad_norm": 0.0, - "learning_rate": 8.828841483692057e-06, - "loss": 0.8813, + "learning_rate": 2.846312236805533e-06, + "loss": 0.8361, "step": 19453 }, { - "epoch": 0.5512766017739239, + "epoch": 0.7611706706315048, "grad_norm": 0.0, - "learning_rate": 8.827930015348616e-06, - "loss": 0.8101, + "learning_rate": 2.8454268201781876e-06, + "loss": 1.0078, "step": 19454 }, { - "epoch": 0.5513049392161864, + "epoch": 0.7612097973237343, "grad_norm": 0.0, - "learning_rate": 8.827018556877955e-06, - "loss": 0.9052, + "learning_rate": 2.8445415184443248e-06, + "loss": 1.0372, "step": 19455 }, { - "epoch": 0.5513332766584488, + "epoch": 0.7612489240159637, "grad_norm": 0.0, - "learning_rate": 8.82610710828775e-06, - "loss": 0.9174, + "learning_rate": 2.8436563316181567e-06, + "loss": 1.0199, "step": 19456 }, { - "epoch": 0.5513616141007113, + "epoch": 0.7612880507081932, "grad_norm": 0.0, - "learning_rate": 8.825195669585687e-06, - "loss": 0.9059, + "learning_rate": 2.8427712597139032e-06, + "loss": 0.9778, "step": 19457 }, { - "epoch": 0.5513899515429738, + "epoch": 0.7613271774004226, "grad_norm": 0.0, - "learning_rate": 8.824284240779433e-06, - "loss": 0.8469, + "learning_rate": 2.841886302745769e-06, + "loss": 1.0591, "step": 19458 }, { - "epoch": 0.5514182889852361, + "epoch": 0.761366304092652, "grad_norm": 0.0, - "learning_rate": 8.823372821876673e-06, - "loss": 0.8791, + "learning_rate": 2.8410014607279767e-06, + "loss": 0.9507, "step": 19459 }, { - "epoch": 0.5514466264274986, + "epoch": 0.7614054307848814, "grad_norm": 0.0, - "learning_rate": 8.822461412885076e-06, - "loss": 0.9056, + "learning_rate": 2.840116733674727e-06, + "loss": 0.9754, "step": 19460 }, { - "epoch": 0.5514749638697611, + "epoch": 0.7614445574771109, "grad_norm": 0.0, - "learning_rate": 8.821550013812324e-06, - "loss": 0.8079, + "learning_rate": 2.839232121600234e-06, + "loss": 1.0062, "step": 19461 }, { - "epoch": 0.5515033013120236, + "epoch": 0.7614836841693403, "grad_norm": 0.0, - "learning_rate": 8.820638624666096e-06, - "loss": 0.9089, + "learning_rate": 2.8383476245186946e-06, + "loss": 0.8908, "step": 19462 }, { - "epoch": 0.551531638754286, + "epoch": 0.7615228108615698, "grad_norm": 0.0, - "learning_rate": 8.819727245454065e-06, - "loss": 0.9305, + "learning_rate": 2.8374632424443236e-06, + "loss": 1.0203, "step": 19463 }, { - "epoch": 0.5515599761965485, + "epoch": 0.7615619375537992, "grad_norm": 0.0, - "learning_rate": 8.81881587618391e-06, - "loss": 0.8433, + "learning_rate": 2.8365789753913154e-06, + "loss": 1.0141, "step": 19464 }, { - "epoch": 0.551588313638811, + "epoch": 0.7616010642460287, "grad_norm": 0.0, - "learning_rate": 8.817904516863311e-06, - "loss": 0.7939, + "learning_rate": 2.8356948233738746e-06, + "loss": 0.9947, "step": 19465 }, { - "epoch": 0.5516166510810734, + "epoch": 0.7616401909382581, "grad_norm": 0.0, - "learning_rate": 8.816993167499938e-06, - "loss": 0.8615, + "learning_rate": 2.834810786406196e-06, + "loss": 0.8395, "step": 19466 }, { - "epoch": 0.5516449885233359, + "epoch": 0.7616793176304876, "grad_norm": 0.0, - "learning_rate": 8.816081828101471e-06, - "loss": 0.9121, + "learning_rate": 2.8339268645024766e-06, + "loss": 1.0071, "step": 19467 }, { - "epoch": 0.5516733259655984, + "epoch": 0.761718444322717, "grad_norm": 0.0, - "learning_rate": 8.815170498675585e-06, - "loss": 0.8794, + "learning_rate": 2.833043057676913e-06, + "loss": 0.9079, "step": 19468 }, { - "epoch": 0.5517016634078608, + "epoch": 0.7617575710149463, "grad_norm": 0.0, - "learning_rate": 8.81425917922996e-06, - "loss": 0.8944, + "learning_rate": 2.8321593659436998e-06, + "loss": 0.9971, "step": 19469 }, { - "epoch": 0.5517300008501232, + "epoch": 0.7617966977071758, "grad_norm": 0.0, - "learning_rate": 8.813347869772273e-06, - "loss": 0.9254, + "learning_rate": 2.8312757893170216e-06, + "loss": 0.9043, "step": 19470 }, { - "epoch": 0.5517583382923857, + "epoch": 0.7618358243994052, "grad_norm": 0.0, - "learning_rate": 8.812436570310193e-06, - "loss": 0.9409, + "learning_rate": 2.8303923278110724e-06, + "loss": 0.8658, "step": 19471 }, { - "epoch": 0.5517866757346482, + "epoch": 0.7618749510916347, "grad_norm": 0.0, - "learning_rate": 8.811525280851402e-06, - "loss": 0.8817, + "learning_rate": 2.829508981440038e-06, + "loss": 0.9991, "step": 19472 }, { - "epoch": 0.5518150131769106, + "epoch": 0.7619140777838641, "grad_norm": 0.0, - "learning_rate": 8.810614001403574e-06, - "loss": 0.9383, + "learning_rate": 2.828625750218107e-06, + "loss": 1.0527, "step": 19473 }, { - "epoch": 0.5518433506191731, + "epoch": 0.7619532044760936, "grad_norm": 0.0, - "learning_rate": 8.809702731974387e-06, - "loss": 0.8636, + "learning_rate": 2.8277426341594572e-06, + "loss": 1.1654, "step": 19474 }, { - "epoch": 0.5518716880614356, + "epoch": 0.761992331168323, "grad_norm": 0.0, - "learning_rate": 8.80879147257152e-06, - "loss": 0.8929, + "learning_rate": 2.826859633278277e-06, + "loss": 0.9797, "step": 19475 }, { - "epoch": 0.551900025503698, + "epoch": 0.7620314578605525, "grad_norm": 0.0, - "learning_rate": 8.807880223202639e-06, - "loss": 0.9059, + "learning_rate": 2.8259767475887355e-06, + "loss": 0.9324, "step": 19476 }, { - "epoch": 0.5519283629459605, + "epoch": 0.7620705845527819, "grad_norm": 0.0, - "learning_rate": 8.806968983875424e-06, - "loss": 0.9068, + "learning_rate": 2.8250939771050257e-06, + "loss": 0.9127, "step": 19477 }, { - "epoch": 0.551956700388223, + "epoch": 0.7621097112450114, "grad_norm": 0.0, - "learning_rate": 8.806057754597559e-06, - "loss": 0.8542, + "learning_rate": 2.8242113218413115e-06, + "loss": 0.934, "step": 19478 }, { - "epoch": 0.5519850378304855, + "epoch": 0.7621488379372408, "grad_norm": 0.0, - "learning_rate": 8.805146535376709e-06, - "loss": 0.8235, + "learning_rate": 2.823328781811775e-06, + "loss": 0.9915, "step": 19479 }, { - "epoch": 0.5520133752727479, + "epoch": 0.7621879646294702, "grad_norm": 0.0, - "learning_rate": 8.804235326220554e-06, - "loss": 0.7358, + "learning_rate": 2.8224463570305828e-06, + "loss": 1.0238, "step": 19480 }, { - "epoch": 0.5520417127150103, + "epoch": 0.7622270913216996, "grad_norm": 0.0, - "learning_rate": 8.803324127136767e-06, - "loss": 0.8953, + "learning_rate": 2.8215640475119077e-06, + "loss": 0.9824, "step": 19481 }, { - "epoch": 0.5520700501572728, + "epoch": 0.7622662180139291, "grad_norm": 0.0, - "learning_rate": 8.802412938133026e-06, - "loss": 0.9469, + "learning_rate": 2.8206818532699186e-06, + "loss": 1.0065, "step": 19482 }, { - "epoch": 0.5520983875995352, + "epoch": 0.7623053447061585, "grad_norm": 0.0, - "learning_rate": 8.801501759217011e-06, - "loss": 0.8665, + "learning_rate": 2.8197997743187867e-06, + "loss": 0.9266, "step": 19483 }, { - "epoch": 0.5521267250417977, + "epoch": 0.762344471398388, "grad_norm": 0.0, - "learning_rate": 8.800590590396385e-06, - "loss": 0.8988, + "learning_rate": 2.818917810672669e-06, + "loss": 1.028, "step": 19484 }, { - "epoch": 0.5521550624840602, + "epoch": 0.7623835980906174, "grad_norm": 0.0, - "learning_rate": 8.799679431678831e-06, - "loss": 0.8799, + "learning_rate": 2.8180359623457345e-06, + "loss": 0.947, "step": 19485 }, { - "epoch": 0.5521833999263227, + "epoch": 0.7624227247828469, "grad_norm": 0.0, - "learning_rate": 8.798768283072025e-06, - "loss": 0.8587, + "learning_rate": 2.817154229352145e-06, + "loss": 1.0063, "step": 19486 }, { - "epoch": 0.5522117373685851, + "epoch": 0.7624618514750763, "grad_norm": 0.0, - "learning_rate": 8.797857144583637e-06, - "loss": 0.9222, + "learning_rate": 2.816272611706055e-06, + "loss": 1.0551, "step": 19487 }, { - "epoch": 0.5522400748108476, + "epoch": 0.7625009781673058, "grad_norm": 0.0, - "learning_rate": 8.79694601622135e-06, - "loss": 0.8822, + "learning_rate": 2.8153911094216246e-06, + "loss": 0.996, "step": 19488 }, { - "epoch": 0.5522684122531101, + "epoch": 0.7625401048595352, "grad_norm": 0.0, - "learning_rate": 8.79603489799283e-06, - "loss": 0.7423, + "learning_rate": 2.8145097225130104e-06, + "loss": 1.0496, "step": 19489 }, { - "epoch": 0.5522967496953725, + "epoch": 0.7625792315517647, "grad_norm": 0.0, - "learning_rate": 8.795123789905753e-06, - "loss": 0.869, + "learning_rate": 2.813628450994369e-06, + "loss": 0.8943, "step": 19490 }, { - "epoch": 0.552325087137635, + "epoch": 0.762618358243994, "grad_norm": 0.0, - "learning_rate": 8.7942126919678e-06, - "loss": 0.8435, + "learning_rate": 2.8127472948798474e-06, + "loss": 0.9879, "step": 19491 }, { - "epoch": 0.5523534245798974, + "epoch": 0.7626574849362235, "grad_norm": 0.0, - "learning_rate": 8.793301604186638e-06, - "loss": 0.7852, + "learning_rate": 2.8118662541836006e-06, + "loss": 0.9638, "step": 19492 }, { - "epoch": 0.5523817620221598, + "epoch": 0.7626966116284529, "grad_norm": 0.0, - "learning_rate": 8.792390526569944e-06, - "loss": 0.8863, + "learning_rate": 2.8109853289197685e-06, + "loss": 1.0597, "step": 19493 }, { - "epoch": 0.5524100994644223, + "epoch": 0.7627357383206824, "grad_norm": 0.0, - "learning_rate": 8.791479459125396e-06, - "loss": 0.8724, + "learning_rate": 2.81010451910251e-06, + "loss": 1.0113, "step": 19494 }, { - "epoch": 0.5524384369066848, + "epoch": 0.7627748650129118, "grad_norm": 0.0, - "learning_rate": 8.790568401860663e-06, - "loss": 0.8489, + "learning_rate": 2.80922382474596e-06, + "loss": 0.9549, "step": 19495 }, { - "epoch": 0.5524667743489473, + "epoch": 0.7628139917051412, "grad_norm": 0.0, - "learning_rate": 8.789657354783425e-06, - "loss": 0.9114, + "learning_rate": 2.808343245864268e-06, + "loss": 1.0129, "step": 19496 }, { - "epoch": 0.5524951117912097, + "epoch": 0.7628531183973707, "grad_norm": 0.0, - "learning_rate": 8.788746317901349e-06, - "loss": 0.8965, + "learning_rate": 2.8074627824715683e-06, + "loss": 0.8345, "step": 19497 }, { - "epoch": 0.5525234492334722, + "epoch": 0.7628922450896001, "grad_norm": 0.0, - "learning_rate": 8.787835291222113e-06, - "loss": 0.8963, + "learning_rate": 2.8065824345820048e-06, + "loss": 0.8701, "step": 19498 }, { - "epoch": 0.5525517866757347, + "epoch": 0.7629313717818296, "grad_norm": 0.0, - "learning_rate": 8.786924274753391e-06, - "loss": 0.9195, + "learning_rate": 2.8057022022097125e-06, + "loss": 0.8839, "step": 19499 }, { - "epoch": 0.5525801241179971, + "epoch": 0.762970498474059, "grad_norm": 0.0, - "learning_rate": 8.786013268502855e-06, - "loss": 0.8991, + "learning_rate": 2.8048220853688314e-06, + "loss": 0.888, "step": 19500 }, { - "epoch": 0.5526084615602596, + "epoch": 0.7630096251662885, "grad_norm": 0.0, - "learning_rate": 8.785102272478185e-06, - "loss": 0.942, + "learning_rate": 2.8039420840734887e-06, + "loss": 1.0005, "step": 19501 }, { - "epoch": 0.552636799002522, + "epoch": 0.7630487518585178, "grad_norm": 0.0, - "learning_rate": 8.784191286687044e-06, - "loss": 0.778, + "learning_rate": 2.803062198337818e-06, + "loss": 0.8009, "step": 19502 }, { - "epoch": 0.5526651364447845, + "epoch": 0.7630878785507473, "grad_norm": 0.0, - "learning_rate": 8.783280311137114e-06, - "loss": 0.87, + "learning_rate": 2.8021824281759514e-06, + "loss": 0.984, "step": 19503 }, { - "epoch": 0.5526934738870469, + "epoch": 0.7631270052429767, "grad_norm": 0.0, - "learning_rate": 8.782369345836067e-06, - "loss": 0.9515, + "learning_rate": 2.801302773602018e-06, + "loss": 1.0792, "step": 19504 }, { - "epoch": 0.5527218113293094, + "epoch": 0.7631661319352062, "grad_norm": 0.0, - "learning_rate": 8.781458390791573e-06, - "loss": 0.846, + "learning_rate": 2.8004232346301384e-06, + "loss": 0.9613, "step": 19505 }, { - "epoch": 0.5527501487715719, + "epoch": 0.7632052586274356, "grad_norm": 0.0, - "learning_rate": 8.780547446011306e-06, - "loss": 0.843, + "learning_rate": 2.799543811274443e-06, + "loss": 0.9949, "step": 19506 }, { - "epoch": 0.5527784862138343, + "epoch": 0.7632443853196651, "grad_norm": 0.0, - "learning_rate": 8.779636511502944e-06, - "loss": 0.8853, + "learning_rate": 2.798664503549047e-06, + "loss": 0.9873, "step": 19507 }, { - "epoch": 0.5528068236560968, + "epoch": 0.7632835120118945, "grad_norm": 0.0, - "learning_rate": 8.778725587274152e-06, - "loss": 0.7456, + "learning_rate": 2.7977853114680796e-06, + "loss": 1.0979, "step": 19508 }, { - "epoch": 0.5528351610983593, + "epoch": 0.763322638704124, "grad_norm": 0.0, - "learning_rate": 8.777814673332615e-06, - "loss": 0.91, + "learning_rate": 2.7969062350456522e-06, + "loss": 0.9472, "step": 19509 }, { - "epoch": 0.5528634985406218, + "epoch": 0.7633617653963534, "grad_norm": 0.0, - "learning_rate": 8.776903769685994e-06, - "loss": 0.9152, + "learning_rate": 2.796027274295888e-06, + "loss": 1.0439, "step": 19510 }, { - "epoch": 0.5528918359828842, + "epoch": 0.7634008920885829, "grad_norm": 0.0, - "learning_rate": 8.775992876341966e-06, - "loss": 0.837, + "learning_rate": 2.7951484292328925e-06, + "loss": 0.9558, "step": 19511 }, { - "epoch": 0.5529201734251467, + "epoch": 0.7634400187808122, "grad_norm": 0.0, - "learning_rate": 8.775081993308208e-06, - "loss": 0.8644, + "learning_rate": 2.7942696998707918e-06, + "loss": 0.8894, "step": 19512 }, { - "epoch": 0.5529485108674091, + "epoch": 0.7634791454730417, "grad_norm": 0.0, - "learning_rate": 8.774171120592386e-06, - "loss": 0.9011, + "learning_rate": 2.793391086223687e-06, + "loss": 1.0409, "step": 19513 }, { - "epoch": 0.5529768483096715, + "epoch": 0.7635182721652711, "grad_norm": 0.0, - "learning_rate": 8.773260258202177e-06, - "loss": 0.8887, + "learning_rate": 2.7925125883056936e-06, + "loss": 1.0362, "step": 19514 }, { - "epoch": 0.553005185751934, + "epoch": 0.7635573988575006, "grad_norm": 0.0, - "learning_rate": 8.772349406145256e-06, - "loss": 0.9498, + "learning_rate": 2.791634206130913e-06, + "loss": 0.965, "step": 19515 }, { - "epoch": 0.5530335231941965, + "epoch": 0.76359652554973, "grad_norm": 0.0, - "learning_rate": 8.771438564429286e-06, - "loss": 0.8567, + "learning_rate": 2.7907559397134554e-06, + "loss": 1.0666, "step": 19516 }, { - "epoch": 0.5530618606364589, + "epoch": 0.7636356522419595, "grad_norm": 0.0, - "learning_rate": 8.770527733061951e-06, - "loss": 0.7411, + "learning_rate": 2.7898777890674246e-06, + "loss": 1.0122, "step": 19517 }, { - "epoch": 0.5530901980787214, + "epoch": 0.7636747789341889, "grad_norm": 0.0, - "learning_rate": 8.769616912050914e-06, - "loss": 0.8858, + "learning_rate": 2.7889997542069234e-06, + "loss": 0.9614, "step": 19518 }, { - "epoch": 0.5531185355209839, + "epoch": 0.7637139056264184, "grad_norm": 0.0, - "learning_rate": 8.76870610140385e-06, - "loss": 0.8687, + "learning_rate": 2.7881218351460473e-06, + "loss": 0.9126, "step": 19519 }, { - "epoch": 0.5531468729632464, + "epoch": 0.7637530323186478, "grad_norm": 0.0, - "learning_rate": 8.767795301128433e-06, - "loss": 0.8537, + "learning_rate": 2.787244031898898e-06, + "loss": 0.9706, "step": 19520 }, { - "epoch": 0.5531752104055088, + "epoch": 0.7637921590108773, "grad_norm": 0.0, - "learning_rate": 8.766884511232333e-06, - "loss": 0.8283, + "learning_rate": 2.7863663444795706e-06, + "loss": 1.0919, "step": 19521 }, { - "epoch": 0.5532035478477713, + "epoch": 0.7638312857031067, "grad_norm": 0.0, - "learning_rate": 8.765973731723221e-06, - "loss": 0.9852, + "learning_rate": 2.7854887729021652e-06, + "loss": 1.1076, "step": 19522 }, { - "epoch": 0.5532318852900338, + "epoch": 0.7638704123953362, "grad_norm": 0.0, - "learning_rate": 8.765062962608775e-06, - "loss": 0.8156, + "learning_rate": 2.7846113171807656e-06, + "loss": 0.9488, "step": 19523 }, { - "epoch": 0.5532602227322961, + "epoch": 0.7639095390875655, "grad_norm": 0.0, - "learning_rate": 8.764152203896658e-06, - "loss": 0.9386, + "learning_rate": 2.7837339773294704e-06, + "loss": 0.9853, "step": 19524 }, { - "epoch": 0.5532885601745586, + "epoch": 0.7639486657797949, "grad_norm": 0.0, - "learning_rate": 8.763241455594548e-06, - "loss": 0.82, + "learning_rate": 2.782856753362361e-06, + "loss": 1.1, "step": 19525 }, { - "epoch": 0.5533168976168211, + "epoch": 0.7639877924720244, "grad_norm": 0.0, - "learning_rate": 8.762330717710113e-06, - "loss": 0.9936, + "learning_rate": 2.7819796452935286e-06, + "loss": 0.9706, "step": 19526 }, { - "epoch": 0.5533452350590836, + "epoch": 0.7640269191642538, "grad_norm": 0.0, - "learning_rate": 8.761419990251027e-06, - "loss": 0.8754, + "learning_rate": 2.78110265313706e-06, + "loss": 0.8768, "step": 19527 }, { - "epoch": 0.553373572501346, + "epoch": 0.7640660458564833, "grad_norm": 0.0, - "learning_rate": 8.760509273224963e-06, - "loss": 0.8449, + "learning_rate": 2.7802257769070384e-06, + "loss": 1.0143, "step": 19528 }, { - "epoch": 0.5534019099436085, + "epoch": 0.7641051725487127, "grad_norm": 0.0, - "learning_rate": 8.759598566639586e-06, - "loss": 0.8247, + "learning_rate": 2.779349016617542e-06, + "loss": 1.0145, "step": 19529 }, { - "epoch": 0.553430247385871, + "epoch": 0.7641442992409422, "grad_norm": 0.0, - "learning_rate": 8.758687870502576e-06, - "loss": 0.7886, + "learning_rate": 2.7784723722826522e-06, + "loss": 0.9642, "step": 19530 }, { - "epoch": 0.5534585848281334, + "epoch": 0.7641834259331716, "grad_norm": 0.0, - "learning_rate": 8.757777184821593e-06, - "loss": 0.7742, + "learning_rate": 2.7775958439164496e-06, + "loss": 0.8903, "step": 19531 }, { - "epoch": 0.5534869222703959, + "epoch": 0.7642225526254011, "grad_norm": 0.0, - "learning_rate": 8.756866509604314e-06, - "loss": 0.9561, + "learning_rate": 2.7767194315330047e-06, + "loss": 0.9504, "step": 19532 }, { - "epoch": 0.5535152597126584, + "epoch": 0.7642616793176304, "grad_norm": 0.0, - "learning_rate": 8.755955844858415e-06, - "loss": 0.8453, + "learning_rate": 2.7758431351463944e-06, + "loss": 0.8998, "step": 19533 }, { - "epoch": 0.5535435971549209, + "epoch": 0.76430080600986, "grad_norm": 0.0, - "learning_rate": 8.755045190591557e-06, - "loss": 0.8501, + "learning_rate": 2.7749669547706914e-06, + "loss": 1.039, "step": 19534 }, { - "epoch": 0.5535719345971832, + "epoch": 0.7643399327020893, "grad_norm": 0.0, - "learning_rate": 8.754134546811416e-06, - "loss": 0.8121, + "learning_rate": 2.7740908904199683e-06, + "loss": 0.9895, "step": 19535 }, { - "epoch": 0.5536002720394457, + "epoch": 0.7643790593943188, "grad_norm": 0.0, - "learning_rate": 8.753223913525668e-06, - "loss": 0.8893, + "learning_rate": 2.773214942108288e-06, + "loss": 1.1109, "step": 19536 }, { - "epoch": 0.5536286094817082, + "epoch": 0.7644181860865482, "grad_norm": 0.0, - "learning_rate": 8.752313290741972e-06, - "loss": 0.787, + "learning_rate": 2.772339109849723e-06, + "loss": 0.9072, "step": 19537 }, { - "epoch": 0.5536569469239706, + "epoch": 0.7644573127787777, "grad_norm": 0.0, - "learning_rate": 8.751402678468008e-06, - "loss": 0.8563, + "learning_rate": 2.771463393658329e-06, + "loss": 0.9974, "step": 19538 }, { - "epoch": 0.5536852843662331, + "epoch": 0.7644964394710071, "grad_norm": 0.0, - "learning_rate": 8.750492076711439e-06, - "loss": 0.8661, + "learning_rate": 2.770587793548182e-06, + "loss": 0.9331, "step": 19539 }, { - "epoch": 0.5537136218084956, + "epoch": 0.7645355661632366, "grad_norm": 0.0, - "learning_rate": 8.74958148547994e-06, - "loss": 1.0074, + "learning_rate": 2.769712309533332e-06, + "loss": 0.9418, "step": 19540 }, { - "epoch": 0.553741959250758, + "epoch": 0.764574692855466, "grad_norm": 0.0, - "learning_rate": 8.748670904781186e-06, - "loss": 0.8843, + "learning_rate": 2.768836941627846e-06, + "loss": 0.8229, "step": 19541 }, { - "epoch": 0.5537702966930205, + "epoch": 0.7646138195476955, "grad_norm": 0.0, - "learning_rate": 8.747760334622838e-06, - "loss": 0.8325, + "learning_rate": 2.767961689845774e-06, + "loss": 1.0129, "step": 19542 }, { - "epoch": 0.553798634135283, + "epoch": 0.7646529462399249, "grad_norm": 0.0, - "learning_rate": 8.746849775012566e-06, - "loss": 0.9036, + "learning_rate": 2.767086554201175e-06, + "loss": 1.0043, "step": 19543 }, { - "epoch": 0.5538269715775455, + "epoch": 0.7646920729321544, "grad_norm": 0.0, - "learning_rate": 8.745939225958052e-06, - "loss": 0.8479, + "learning_rate": 2.766211534708102e-06, + "loss": 0.8429, "step": 19544 }, { - "epoch": 0.5538553090198078, + "epoch": 0.7647311996243837, "grad_norm": 0.0, - "learning_rate": 8.74502868746695e-06, - "loss": 0.9237, + "learning_rate": 2.7653366313806117e-06, + "loss": 1.1288, "step": 19545 }, { - "epoch": 0.5538836464620703, + "epoch": 0.7647703263166132, "grad_norm": 0.0, - "learning_rate": 8.744118159546942e-06, - "loss": 0.9677, + "learning_rate": 2.764461844232745e-06, + "loss": 0.926, "step": 19546 }, { - "epoch": 0.5539119839043328, + "epoch": 0.7648094530088426, "grad_norm": 0.0, - "learning_rate": 8.743207642205688e-06, - "loss": 0.805, + "learning_rate": 2.7635871732785557e-06, + "loss": 0.89, "step": 19547 }, { - "epoch": 0.5539403213465952, + "epoch": 0.7648485797010721, "grad_norm": 0.0, - "learning_rate": 8.742297135450866e-06, - "loss": 0.9515, + "learning_rate": 2.7627126185320884e-06, + "loss": 0.9813, "step": 19548 }, { - "epoch": 0.5539686587888577, + "epoch": 0.7648877063933015, "grad_norm": 0.0, - "learning_rate": 8.741386639290145e-06, - "loss": 0.7907, + "learning_rate": 2.76183818000739e-06, + "loss": 1.02, "step": 19549 }, { - "epoch": 0.5539969962311202, + "epoch": 0.764926833085531, "grad_norm": 0.0, - "learning_rate": 8.740476153731187e-06, - "loss": 0.8274, + "learning_rate": 2.7609638577184982e-06, + "loss": 0.8663, "step": 19550 }, { - "epoch": 0.5540253336733827, + "epoch": 0.7649659597777604, "grad_norm": 0.0, - "learning_rate": 8.739565678781668e-06, - "loss": 0.8535, + "learning_rate": 2.7600896516794563e-06, + "loss": 0.9328, "step": 19551 }, { - "epoch": 0.5540536711156451, + "epoch": 0.7650050864699899, "grad_norm": 0.0, - "learning_rate": 8.738655214449256e-06, - "loss": 0.8779, + "learning_rate": 2.7592155619043015e-06, + "loss": 0.9861, "step": 19552 }, { - "epoch": 0.5540820085579076, + "epoch": 0.7650442131622193, "grad_norm": 0.0, - "learning_rate": 8.737744760741616e-06, - "loss": 0.9266, + "learning_rate": 2.758341588407075e-06, + "loss": 1.01, "step": 19553 }, { - "epoch": 0.5541103460001701, + "epoch": 0.7650833398544487, "grad_norm": 0.0, - "learning_rate": 8.736834317666428e-06, - "loss": 0.8167, + "learning_rate": 2.757467731201805e-06, + "loss": 0.9829, "step": 19554 }, { - "epoch": 0.5541386834424324, + "epoch": 0.7651224665466781, "grad_norm": 0.0, - "learning_rate": 8.735923885231348e-06, - "loss": 0.8107, + "learning_rate": 2.7565939903025305e-06, + "loss": 0.8591, "step": 19555 }, { - "epoch": 0.5541670208846949, + "epoch": 0.7651615932389075, "grad_norm": 0.0, - "learning_rate": 8.735013463444049e-06, - "loss": 0.9451, + "learning_rate": 2.7557203657232757e-06, + "loss": 0.9805, "step": 19556 }, { - "epoch": 0.5541953583269574, + "epoch": 0.765200719931137, "grad_norm": 0.0, - "learning_rate": 8.734103052312207e-06, - "loss": 0.8593, + "learning_rate": 2.7548468574780784e-06, + "loss": 0.88, "step": 19557 }, { - "epoch": 0.5542236957692198, + "epoch": 0.7652398466233664, "grad_norm": 0.0, - "learning_rate": 8.73319265184348e-06, - "loss": 0.8815, + "learning_rate": 2.7539734655809604e-06, + "loss": 0.8987, "step": 19558 }, { - "epoch": 0.5542520332114823, + "epoch": 0.7652789733155959, "grad_norm": 0.0, - "learning_rate": 8.732282262045546e-06, - "loss": 0.8808, + "learning_rate": 2.75310019004595e-06, + "loss": 0.9941, "step": 19559 }, { - "epoch": 0.5542803706537448, + "epoch": 0.7653181000078253, "grad_norm": 0.0, - "learning_rate": 8.731371882926065e-06, - "loss": 0.8198, + "learning_rate": 2.7522270308870647e-06, + "loss": 0.9576, "step": 19560 }, { - "epoch": 0.5543087080960073, + "epoch": 0.7653572267000548, "grad_norm": 0.0, - "learning_rate": 8.73046151449271e-06, - "loss": 0.9712, + "learning_rate": 2.7513539881183373e-06, + "loss": 1.0488, "step": 19561 }, { - "epoch": 0.5543370455382697, + "epoch": 0.7653963533922842, "grad_norm": 0.0, - "learning_rate": 8.729551156753155e-06, - "loss": 0.8787, + "learning_rate": 2.7504810617537793e-06, + "loss": 0.8735, "step": 19562 }, { - "epoch": 0.5543653829805322, + "epoch": 0.7654354800845137, "grad_norm": 0.0, - "learning_rate": 8.728640809715057e-06, - "loss": 0.8008, + "learning_rate": 2.749608251807413e-06, + "loss": 0.9283, "step": 19563 }, { - "epoch": 0.5543937204227947, + "epoch": 0.7654746067767431, "grad_norm": 0.0, - "learning_rate": 8.727730473386089e-06, - "loss": 0.9333, + "learning_rate": 2.7487355582932505e-06, + "loss": 1.0076, "step": 19564 }, { - "epoch": 0.554422057865057, + "epoch": 0.7655137334689726, "grad_norm": 0.0, - "learning_rate": 8.726820147773923e-06, - "loss": 0.9018, + "learning_rate": 2.747862981225309e-06, + "loss": 0.9595, "step": 19565 }, { - "epoch": 0.5544503953073195, + "epoch": 0.7655528601612019, "grad_norm": 0.0, - "learning_rate": 8.72590983288622e-06, - "loss": 0.8288, + "learning_rate": 2.7469905206176006e-06, + "loss": 1.0342, "step": 19566 }, { - "epoch": 0.554478732749582, + "epoch": 0.7655919868534314, "grad_norm": 0.0, - "learning_rate": 8.724999528730657e-06, - "loss": 0.9313, + "learning_rate": 2.7461181764841383e-06, + "loss": 1.056, "step": 19567 }, { - "epoch": 0.5545070701918445, + "epoch": 0.7656311135456608, "grad_norm": 0.0, - "learning_rate": 8.72408923531489e-06, - "loss": 0.9135, + "learning_rate": 2.7452459488389262e-06, + "loss": 0.9467, "step": 19568 }, { - "epoch": 0.5545354076341069, + "epoch": 0.7656702402378903, "grad_norm": 0.0, - "learning_rate": 8.723178952646597e-06, - "loss": 0.9488, + "learning_rate": 2.744373837695973e-06, + "loss": 0.9302, "step": 19569 }, { - "epoch": 0.5545637450763694, + "epoch": 0.7657093669301197, "grad_norm": 0.0, - "learning_rate": 8.722268680733443e-06, - "loss": 0.9076, + "learning_rate": 2.743501843069286e-06, + "loss": 0.9192, "step": 19570 }, { - "epoch": 0.5545920825186319, + "epoch": 0.7657484936223492, "grad_norm": 0.0, - "learning_rate": 8.72135841958309e-06, - "loss": 0.9283, + "learning_rate": 2.742629964972865e-06, + "loss": 0.9818, "step": 19571 }, { - "epoch": 0.5546204199608943, + "epoch": 0.7657876203145786, "grad_norm": 0.0, - "learning_rate": 8.720448169203213e-06, - "loss": 0.8778, + "learning_rate": 2.7417582034207122e-06, + "loss": 1.0778, "step": 19572 }, { - "epoch": 0.5546487574031568, + "epoch": 0.7658267470068081, "grad_norm": 0.0, - "learning_rate": 8.719537929601476e-06, - "loss": 0.8752, + "learning_rate": 2.7408865584268305e-06, + "loss": 1.0345, "step": 19573 }, { - "epoch": 0.5546770948454193, + "epoch": 0.7658658736990375, "grad_norm": 0.0, - "learning_rate": 8.718627700785545e-06, - "loss": 0.9283, + "learning_rate": 2.740015030005212e-06, + "loss": 0.977, "step": 19574 }, { - "epoch": 0.5547054322876818, + "epoch": 0.765905000391267, "grad_norm": 0.0, - "learning_rate": 8.717717482763092e-06, - "loss": 0.8569, + "learning_rate": 2.739143618169855e-06, + "loss": 1.0043, "step": 19575 }, { - "epoch": 0.5547337697299441, + "epoch": 0.7659441270834964, "grad_norm": 0.0, - "learning_rate": 8.716807275541778e-06, - "loss": 0.8287, + "learning_rate": 2.738272322934756e-06, + "loss": 1.0146, "step": 19576 }, { - "epoch": 0.5547621071722066, + "epoch": 0.7659832537757258, "grad_norm": 0.0, - "learning_rate": 8.715897079129272e-06, - "loss": 0.8944, + "learning_rate": 2.7374011443139004e-06, + "loss": 1.0191, "step": 19577 }, { - "epoch": 0.5547904446144691, + "epoch": 0.7660223804679552, "grad_norm": 0.0, - "learning_rate": 8.714986893533244e-06, - "loss": 0.8905, + "learning_rate": 2.7365300823212826e-06, + "loss": 0.8629, "step": 19578 }, { - "epoch": 0.5548187820567315, + "epoch": 0.7660615071601847, "grad_norm": 0.0, - "learning_rate": 8.714076718761357e-06, - "loss": 0.8239, + "learning_rate": 2.7356591369708894e-06, + "loss": 0.9287, "step": 19579 }, { - "epoch": 0.554847119498994, + "epoch": 0.7661006338524141, "grad_norm": 0.0, - "learning_rate": 8.713166554821277e-06, - "loss": 0.8618, + "learning_rate": 2.7347883082767113e-06, + "loss": 1.0492, "step": 19580 }, { - "epoch": 0.5548754569412565, + "epoch": 0.7661397605446436, "grad_norm": 0.0, - "learning_rate": 8.71225640172068e-06, - "loss": 0.9694, + "learning_rate": 2.7339175962527263e-06, + "loss": 1.0728, "step": 19581 }, { - "epoch": 0.5549037943835189, + "epoch": 0.766178887236873, "grad_norm": 0.0, - "learning_rate": 8.71134625946722e-06, - "loss": 0.8875, + "learning_rate": 2.7330470009129217e-06, + "loss": 0.9308, "step": 19582 }, { - "epoch": 0.5549321318257814, + "epoch": 0.7662180139291024, "grad_norm": 0.0, - "learning_rate": 8.710436128068572e-06, - "loss": 0.7554, + "learning_rate": 2.7321765222712717e-06, + "loss": 0.9365, "step": 19583 }, { - "epoch": 0.5549604692680439, + "epoch": 0.7662571406213319, "grad_norm": 0.0, - "learning_rate": 8.709526007532396e-06, - "loss": 0.8511, + "learning_rate": 2.7313061603417646e-06, + "loss": 0.9921, "step": 19584 }, { - "epoch": 0.5549888067103064, + "epoch": 0.7662962673135613, "grad_norm": 0.0, - "learning_rate": 8.708615897866363e-06, - "loss": 0.8599, + "learning_rate": 2.7304359151383697e-06, + "loss": 0.9699, "step": 19585 }, { - "epoch": 0.5550171441525688, + "epoch": 0.7663353940057908, "grad_norm": 0.0, - "learning_rate": 8.70770579907814e-06, - "loss": 0.8938, + "learning_rate": 2.729565786675068e-06, + "loss": 1.0844, "step": 19586 }, { - "epoch": 0.5550454815948312, + "epoch": 0.7663745206980201, "grad_norm": 0.0, - "learning_rate": 8.706795711175389e-06, - "loss": 0.9626, + "learning_rate": 2.728695774965823e-06, + "loss": 1.0976, "step": 19587 }, { - "epoch": 0.5550738190370937, + "epoch": 0.7664136473902496, "grad_norm": 0.0, - "learning_rate": 8.70588563416578e-06, - "loss": 0.8213, + "learning_rate": 2.7278258800246184e-06, + "loss": 0.9509, "step": 19588 }, { - "epoch": 0.5551021564793561, + "epoch": 0.766452774082479, "grad_norm": 0.0, - "learning_rate": 8.704975568056975e-06, - "loss": 0.8747, + "learning_rate": 2.7269561018654146e-06, + "loss": 1.0366, "step": 19589 }, { - "epoch": 0.5551304939216186, + "epoch": 0.7664919007747085, "grad_norm": 0.0, - "learning_rate": 8.70406551285664e-06, - "loss": 0.7842, + "learning_rate": 2.726086440502186e-06, + "loss": 1.0735, "step": 19590 }, { - "epoch": 0.5551588313638811, + "epoch": 0.7665310274669379, "grad_norm": 0.0, - "learning_rate": 8.703155468572443e-06, - "loss": 0.9324, + "learning_rate": 2.72521689594889e-06, + "loss": 0.8486, "step": 19591 }, { - "epoch": 0.5551871688061436, + "epoch": 0.7665701541591674, "grad_norm": 0.0, - "learning_rate": 8.702245435212051e-06, - "loss": 0.8395, + "learning_rate": 2.724347468219496e-06, + "loss": 1.1319, "step": 19592 }, { - "epoch": 0.555215506248406, + "epoch": 0.7666092808513968, "grad_norm": 0.0, - "learning_rate": 8.701335412783124e-06, - "loss": 0.9778, + "learning_rate": 2.7234781573279645e-06, + "loss": 0.9738, "step": 19593 }, { - "epoch": 0.5552438436906685, + "epoch": 0.7666484075436263, "grad_norm": 0.0, - "learning_rate": 8.700425401293338e-06, - "loss": 0.8368, + "learning_rate": 2.722608963288258e-06, + "loss": 0.9683, "step": 19594 }, { - "epoch": 0.555272181132931, + "epoch": 0.7666875342358557, "grad_norm": 0.0, - "learning_rate": 8.699515400750345e-06, - "loss": 0.8688, + "learning_rate": 2.7217398861143306e-06, + "loss": 0.9966, "step": 19595 }, { - "epoch": 0.5553005185751934, + "epoch": 0.7667266609280852, "grad_norm": 0.0, - "learning_rate": 8.69860541116182e-06, - "loss": 0.9688, + "learning_rate": 2.720870925820139e-06, + "loss": 0.9123, "step": 19596 }, { - "epoch": 0.5553288560174559, + "epoch": 0.7667657876203146, "grad_norm": 0.0, - "learning_rate": 8.697695432535424e-06, - "loss": 0.9474, + "learning_rate": 2.7200020824196404e-06, + "loss": 0.9327, "step": 19597 }, { - "epoch": 0.5553571934597183, + "epoch": 0.766804914312544, "grad_norm": 0.0, - "learning_rate": 8.69678546487882e-06, - "loss": 0.8326, + "learning_rate": 2.7191333559267895e-06, + "loss": 0.8216, "step": 19598 }, { - "epoch": 0.5553855309019808, + "epoch": 0.7668440410047734, "grad_norm": 0.0, - "learning_rate": 8.695875508199683e-06, - "loss": 0.8218, + "learning_rate": 2.718264746355529e-06, + "loss": 0.9723, "step": 19599 }, { - "epoch": 0.5554138683442432, + "epoch": 0.7668831676970029, "grad_norm": 0.0, - "learning_rate": 8.694965562505664e-06, - "loss": 0.9697, + "learning_rate": 2.717396253719816e-06, + "loss": 0.8924, "step": 19600 }, { - "epoch": 0.5554422057865057, + "epoch": 0.7669222943892323, "grad_norm": 0.0, - "learning_rate": 8.694055627804438e-06, - "loss": 0.892, + "learning_rate": 2.716527878033588e-06, + "loss": 0.8304, "step": 19601 }, { - "epoch": 0.5554705432287682, + "epoch": 0.7669614210814618, "grad_norm": 0.0, - "learning_rate": 8.693145704103669e-06, - "loss": 0.7434, + "learning_rate": 2.715659619310801e-06, + "loss": 1.0037, "step": 19602 }, { - "epoch": 0.5554988806710306, + "epoch": 0.7670005477736912, "grad_norm": 0.0, - "learning_rate": 8.692235791411013e-06, - "loss": 0.9844, + "learning_rate": 2.7147914775653896e-06, + "loss": 1.0439, "step": 19603 }, { - "epoch": 0.5555272181132931, + "epoch": 0.7670396744659207, "grad_norm": 0.0, - "learning_rate": 8.691325889734144e-06, - "loss": 0.8747, + "learning_rate": 2.713923452811301e-06, + "loss": 1.0696, "step": 19604 }, { - "epoch": 0.5555555555555556, + "epoch": 0.7670788011581501, "grad_norm": 0.0, - "learning_rate": 8.690415999080721e-06, - "loss": 0.8754, + "learning_rate": 2.713055545062465e-06, + "loss": 0.9185, "step": 19605 }, { - "epoch": 0.555583892997818, + "epoch": 0.7671179278503796, "grad_norm": 0.0, - "learning_rate": 8.68950611945841e-06, - "loss": 0.8607, + "learning_rate": 2.7121877543328334e-06, + "loss": 0.9703, "step": 19606 }, { - "epoch": 0.5556122304400805, + "epoch": 0.767157054542609, "grad_norm": 0.0, - "learning_rate": 8.688596250874882e-06, - "loss": 0.8925, + "learning_rate": 2.7113200806363316e-06, + "loss": 0.8329, "step": 19607 }, { - "epoch": 0.555640567882343, + "epoch": 0.7671961812348385, "grad_norm": 0.0, - "learning_rate": 8.687686393337789e-06, - "loss": 0.9747, + "learning_rate": 2.710452523986897e-06, + "loss": 1.005, "step": 19608 }, { - "epoch": 0.5556689053246054, + "epoch": 0.7672353079270678, "grad_norm": 0.0, - "learning_rate": 8.6867765468548e-06, - "loss": 0.7631, + "learning_rate": 2.7095850843984595e-06, + "loss": 0.9115, "step": 19609 }, { - "epoch": 0.5556972427668678, + "epoch": 0.7672744346192972, "grad_norm": 0.0, - "learning_rate": 8.685866711433582e-06, - "loss": 0.8122, + "learning_rate": 2.708717761884949e-06, + "loss": 1.004, "step": 19610 }, { - "epoch": 0.5557255802091303, + "epoch": 0.7673135613115267, "grad_norm": 0.0, - "learning_rate": 8.684956887081795e-06, - "loss": 0.7976, + "learning_rate": 2.7078505564602965e-06, + "loss": 0.8384, "step": 19611 }, { - "epoch": 0.5557539176513928, + "epoch": 0.7673526880037561, "grad_norm": 0.0, - "learning_rate": 8.684047073807109e-06, - "loss": 0.7998, + "learning_rate": 2.706983468138428e-06, + "loss": 0.8257, "step": 19612 }, { - "epoch": 0.5557822550936552, + "epoch": 0.7673918146959856, "grad_norm": 0.0, - "learning_rate": 8.683137271617179e-06, - "loss": 0.9053, + "learning_rate": 2.7061164969332634e-06, + "loss": 0.9026, "step": 19613 }, { - "epoch": 0.5558105925359177, + "epoch": 0.767430941388215, "grad_norm": 0.0, - "learning_rate": 8.682227480519672e-06, - "loss": 0.8429, + "learning_rate": 2.705249642858728e-06, + "loss": 0.9347, "step": 19614 }, { - "epoch": 0.5558389299781802, + "epoch": 0.7674700680804445, "grad_norm": 0.0, - "learning_rate": 8.681317700522257e-06, - "loss": 0.8831, + "learning_rate": 2.7043829059287462e-06, + "loss": 1.058, "step": 19615 }, { - "epoch": 0.5558672674204427, + "epoch": 0.7675091947726739, "grad_norm": 0.0, - "learning_rate": 8.680407931632589e-06, - "loss": 0.8401, + "learning_rate": 2.7035162861572297e-06, + "loss": 1.016, "step": 19616 }, { - "epoch": 0.5558956048627051, + "epoch": 0.7675483214649034, "grad_norm": 0.0, - "learning_rate": 8.679498173858335e-06, - "loss": 0.8808, + "learning_rate": 2.7026497835580978e-06, + "loss": 1.0554, "step": 19617 }, { - "epoch": 0.5559239423049676, + "epoch": 0.7675874481571328, "grad_norm": 0.0, - "learning_rate": 8.67858842720716e-06, - "loss": 0.8077, + "learning_rate": 2.701783398145268e-06, + "loss": 0.9076, "step": 19618 }, { - "epoch": 0.55595227974723, + "epoch": 0.7676265748493623, "grad_norm": 0.0, - "learning_rate": 8.677678691686722e-06, - "loss": 0.8915, + "learning_rate": 2.700917129932653e-06, + "loss": 0.9541, "step": 19619 }, { - "epoch": 0.5559806171894924, + "epoch": 0.7676657015415916, "grad_norm": 0.0, - "learning_rate": 8.676768967304692e-06, - "loss": 0.8818, + "learning_rate": 2.70005097893416e-06, + "loss": 0.9742, "step": 19620 }, { - "epoch": 0.5560089546317549, + "epoch": 0.7677048282338211, "grad_norm": 0.0, - "learning_rate": 8.675859254068726e-06, - "loss": 0.7964, + "learning_rate": 2.699184945163704e-06, + "loss": 0.8453, "step": 19621 }, { - "epoch": 0.5560372920740174, + "epoch": 0.7677439549260505, "grad_norm": 0.0, - "learning_rate": 8.674949551986487e-06, - "loss": 0.9084, + "learning_rate": 2.698319028635188e-06, + "loss": 0.9752, "step": 19622 }, { - "epoch": 0.5560656295162799, + "epoch": 0.76778308161828, "grad_norm": 0.0, - "learning_rate": 8.674039861065644e-06, - "loss": 0.8459, + "learning_rate": 2.6974532293625166e-06, + "loss": 1.0572, "step": 19623 }, { - "epoch": 0.5560939669585423, + "epoch": 0.7678222083105094, "grad_norm": 0.0, - "learning_rate": 8.673130181313852e-06, - "loss": 0.8803, + "learning_rate": 2.6965875473595972e-06, + "loss": 0.9114, "step": 19624 }, { - "epoch": 0.5561223044008048, + "epoch": 0.7678613350027389, "grad_norm": 0.0, - "learning_rate": 8.672220512738783e-06, - "loss": 0.8605, + "learning_rate": 2.6957219826403325e-06, + "loss": 0.9241, "step": 19625 }, { - "epoch": 0.5561506418430673, + "epoch": 0.7679004616949683, "grad_norm": 0.0, - "learning_rate": 8.671310855348089e-06, - "loss": 0.8434, + "learning_rate": 2.694856535218616e-06, + "loss": 0.9211, "step": 19626 }, { - "epoch": 0.5561789792853297, + "epoch": 0.7679395883871978, "grad_norm": 0.0, - "learning_rate": 8.670401209149435e-06, - "loss": 0.8798, + "learning_rate": 2.6939912051083517e-06, + "loss": 0.9649, "step": 19627 }, { - "epoch": 0.5562073167275922, + "epoch": 0.7679787150794272, "grad_norm": 0.0, - "learning_rate": 8.669491574150493e-06, - "loss": 0.8827, + "learning_rate": 2.6931259923234323e-06, + "loss": 1.1406, "step": 19628 }, { - "epoch": 0.5562356541698547, + "epoch": 0.7680178417716567, "grad_norm": 0.0, - "learning_rate": 8.66858195035891e-06, - "loss": 0.8832, + "learning_rate": 2.692260896877756e-06, + "loss": 0.9493, "step": 19629 }, { - "epoch": 0.556263991612117, + "epoch": 0.768056968463886, "grad_norm": 0.0, - "learning_rate": 8.667672337782359e-06, - "loss": 0.8323, + "learning_rate": 2.6913959187852114e-06, + "loss": 1.0145, "step": 19630 }, { - "epoch": 0.5562923290543795, + "epoch": 0.7680960951561155, "grad_norm": 0.0, - "learning_rate": 8.666762736428497e-06, - "loss": 0.8152, + "learning_rate": 2.6905310580596922e-06, + "loss": 0.8625, "step": 19631 }, { - "epoch": 0.556320666496642, + "epoch": 0.7681352218483449, "grad_norm": 0.0, - "learning_rate": 8.665853146304988e-06, - "loss": 0.8896, + "learning_rate": 2.689666314715079e-06, + "loss": 0.9879, "step": 19632 }, { - "epoch": 0.5563490039389045, + "epoch": 0.7681743485405744, "grad_norm": 0.0, - "learning_rate": 8.664943567419497e-06, - "loss": 0.8505, + "learning_rate": 2.6888016887652703e-06, + "loss": 1.0212, "step": 19633 }, { - "epoch": 0.5563773413811669, + "epoch": 0.7682134752328038, "grad_norm": 0.0, - "learning_rate": 8.664033999779677e-06, - "loss": 0.9211, + "learning_rate": 2.6879371802241418e-06, + "loss": 0.9408, "step": 19634 }, { - "epoch": 0.5564056788234294, + "epoch": 0.7682526019250333, "grad_norm": 0.0, - "learning_rate": 8.663124443393195e-06, - "loss": 0.9059, + "learning_rate": 2.6870727891055826e-06, + "loss": 1.0409, "step": 19635 }, { - "epoch": 0.5564340162656919, + "epoch": 0.7682917286172627, "grad_norm": 0.0, - "learning_rate": 8.662214898267715e-06, - "loss": 0.9011, + "learning_rate": 2.686208515423465e-06, + "loss": 1.084, "step": 19636 }, { - "epoch": 0.5564623537079543, + "epoch": 0.7683308553094922, "grad_norm": 0.0, - "learning_rate": 8.661305364410894e-06, - "loss": 0.9471, + "learning_rate": 2.6853443591916806e-06, + "loss": 1.0522, "step": 19637 }, { - "epoch": 0.5564906911502168, + "epoch": 0.7683699820017216, "grad_norm": 0.0, - "learning_rate": 8.660395841830395e-06, - "loss": 0.8143, + "learning_rate": 2.6844803204240968e-06, + "loss": 0.9413, "step": 19638 }, { - "epoch": 0.5565190285924793, + "epoch": 0.768409108693951, "grad_norm": 0.0, - "learning_rate": 8.659486330533883e-06, - "loss": 0.8693, + "learning_rate": 2.6836163991345943e-06, + "loss": 1.0426, "step": 19639 }, { - "epoch": 0.5565473660347418, + "epoch": 0.7684482353861805, "grad_norm": 0.0, - "learning_rate": 8.658576830529011e-06, - "loss": 0.8837, + "learning_rate": 2.6827525953370425e-06, + "loss": 0.9835, "step": 19640 }, { - "epoch": 0.5565757034770041, + "epoch": 0.7684873620784098, "grad_norm": 0.0, - "learning_rate": 8.657667341823449e-06, - "loss": 0.9102, + "learning_rate": 2.681888909045315e-06, + "loss": 0.9664, "step": 19641 }, { - "epoch": 0.5566040409192666, + "epoch": 0.7685264887706393, "grad_norm": 0.0, - "learning_rate": 8.656757864424848e-06, - "loss": 0.802, + "learning_rate": 2.6810253402732798e-06, + "loss": 1.0508, "step": 19642 }, { - "epoch": 0.5566323783615291, + "epoch": 0.7685656154628687, "grad_norm": 0.0, - "learning_rate": 8.655848398340876e-06, - "loss": 0.8761, + "learning_rate": 2.6801618890348113e-06, + "loss": 1.004, "step": 19643 }, { - "epoch": 0.5566607158037915, + "epoch": 0.7686047421550982, "grad_norm": 0.0, - "learning_rate": 8.654938943579194e-06, - "loss": 0.8623, + "learning_rate": 2.679298555343767e-06, + "loss": 0.9996, "step": 19644 }, { - "epoch": 0.556689053246054, + "epoch": 0.7686438688473276, "grad_norm": 0.0, - "learning_rate": 8.654029500147458e-06, - "loss": 0.8741, + "learning_rate": 2.678435339214015e-06, + "loss": 0.8669, "step": 19645 }, { - "epoch": 0.5567173906883165, + "epoch": 0.7686829955395571, "grad_norm": 0.0, - "learning_rate": 8.653120068053336e-06, - "loss": 0.7093, + "learning_rate": 2.677572240659416e-06, + "loss": 0.9438, "step": 19646 }, { - "epoch": 0.556745728130579, + "epoch": 0.7687221222317865, "grad_norm": 0.0, - "learning_rate": 8.65221064730448e-06, - "loss": 0.8995, + "learning_rate": 2.6767092596938347e-06, + "loss": 0.9391, "step": 19647 }, { - "epoch": 0.5567740655728414, + "epoch": 0.768761248924016, "grad_norm": 0.0, - "learning_rate": 8.651301237908552e-06, - "loss": 0.8705, + "learning_rate": 2.675846396331123e-06, + "loss": 0.9861, "step": 19648 }, { - "epoch": 0.5568024030151039, + "epoch": 0.7688003756162454, "grad_norm": 0.0, - "learning_rate": 8.65039183987322e-06, - "loss": 0.9645, + "learning_rate": 2.6749836505851443e-06, + "loss": 1.1159, "step": 19649 }, { - "epoch": 0.5568307404573664, + "epoch": 0.7688395023084749, "grad_norm": 0.0, - "learning_rate": 8.649482453206134e-06, - "loss": 0.8955, + "learning_rate": 2.6741210224697435e-06, + "loss": 0.8968, "step": 19650 }, { - "epoch": 0.5568590778996287, + "epoch": 0.7688786290007043, "grad_norm": 0.0, - "learning_rate": 8.64857307791496e-06, - "loss": 0.8614, + "learning_rate": 2.6732585119987842e-06, + "loss": 0.9168, "step": 19651 }, { - "epoch": 0.5568874153418912, + "epoch": 0.7689177556929337, "grad_norm": 0.0, - "learning_rate": 8.64766371400736e-06, - "loss": 0.9167, + "learning_rate": 2.6723961191861093e-06, + "loss": 0.989, "step": 19652 }, { - "epoch": 0.5569157527841537, + "epoch": 0.7689568823851631, "grad_norm": 0.0, - "learning_rate": 8.646754361490988e-06, - "loss": 0.8075, + "learning_rate": 2.671533844045574e-06, + "loss": 0.9546, "step": 19653 }, { - "epoch": 0.5569440902264161, + "epoch": 0.7689960090773926, "grad_norm": 0.0, - "learning_rate": 8.645845020373508e-06, - "loss": 0.8748, + "learning_rate": 2.67067168659102e-06, + "loss": 0.9824, "step": 19654 }, { - "epoch": 0.5569724276686786, + "epoch": 0.769035135769622, "grad_norm": 0.0, - "learning_rate": 8.644935690662578e-06, - "loss": 0.8225, + "learning_rate": 2.6698096468362933e-06, + "loss": 0.9691, "step": 19655 }, { - "epoch": 0.5570007651109411, + "epoch": 0.7690742624618515, "grad_norm": 0.0, - "learning_rate": 8.644026372365855e-06, - "loss": 0.8781, + "learning_rate": 2.668947724795239e-06, + "loss": 0.9968, "step": 19656 }, { - "epoch": 0.5570291025532036, + "epoch": 0.7691133891540809, "grad_norm": 0.0, - "learning_rate": 8.643117065491005e-06, - "loss": 0.8263, + "learning_rate": 2.668085920481701e-06, + "loss": 1.0043, "step": 19657 }, { - "epoch": 0.557057439995466, + "epoch": 0.7691525158463104, "grad_norm": 0.0, - "learning_rate": 8.64220777004568e-06, - "loss": 0.855, + "learning_rate": 2.6672242339095124e-06, + "loss": 1.0062, "step": 19658 }, { - "epoch": 0.5570857774377285, + "epoch": 0.7691916425385398, "grad_norm": 0.0, - "learning_rate": 8.641298486037543e-06, - "loss": 0.8974, + "learning_rate": 2.6663626650925146e-06, + "loss": 0.8494, "step": 19659 }, { - "epoch": 0.557114114879991, + "epoch": 0.7692307692307693, "grad_norm": 0.0, - "learning_rate": 8.640389213474259e-06, - "loss": 0.9587, + "learning_rate": 2.6655012140445447e-06, + "loss": 0.8299, "step": 19660 }, { - "epoch": 0.5571424523222533, + "epoch": 0.7692698959229987, "grad_norm": 0.0, - "learning_rate": 8.639479952363478e-06, - "loss": 0.8653, + "learning_rate": 2.6646398807794326e-06, + "loss": 0.9403, "step": 19661 }, { - "epoch": 0.5571707897645158, + "epoch": 0.7693090226152282, "grad_norm": 0.0, - "learning_rate": 8.638570702712863e-06, - "loss": 0.7566, + "learning_rate": 2.663778665311012e-06, + "loss": 0.9544, "step": 19662 }, { - "epoch": 0.5571991272067783, + "epoch": 0.7693481493074575, "grad_norm": 0.0, - "learning_rate": 8.637661464530072e-06, - "loss": 0.8612, + "learning_rate": 2.662917567653114e-06, + "loss": 0.9084, "step": 19663 }, { - "epoch": 0.5572274646490408, + "epoch": 0.769387275999687, "grad_norm": 0.0, - "learning_rate": 8.636752237822762e-06, - "loss": 0.8454, + "learning_rate": 2.662056587819568e-06, + "loss": 0.897, "step": 19664 }, { - "epoch": 0.5572558020913032, + "epoch": 0.7694264026919164, "grad_norm": 0.0, - "learning_rate": 8.6358430225986e-06, - "loss": 0.8791, + "learning_rate": 2.661195725824195e-06, + "loss": 1.239, "step": 19665 }, { - "epoch": 0.5572841395335657, + "epoch": 0.7694655293841459, "grad_norm": 0.0, - "learning_rate": 8.634933818865235e-06, - "loss": 0.9322, + "learning_rate": 2.6603349816808268e-06, + "loss": 0.9412, "step": 19666 }, { - "epoch": 0.5573124769758282, + "epoch": 0.7695046560763753, "grad_norm": 0.0, - "learning_rate": 8.634024626630329e-06, - "loss": 0.8297, + "learning_rate": 2.6594743554032753e-06, + "loss": 0.8664, "step": 19667 }, { - "epoch": 0.5573408144180906, + "epoch": 0.7695437827686047, "grad_norm": 0.0, - "learning_rate": 8.633115445901545e-06, - "loss": 0.8479, + "learning_rate": 2.6586138470053725e-06, + "loss": 0.9401, "step": 19668 }, { - "epoch": 0.5573691518603531, + "epoch": 0.7695829094608342, "grad_norm": 0.0, - "learning_rate": 8.632206276686533e-06, - "loss": 0.8947, + "learning_rate": 2.65775345650093e-06, + "loss": 1.0392, "step": 19669 }, { - "epoch": 0.5573974893026156, + "epoch": 0.7696220361530636, "grad_norm": 0.0, - "learning_rate": 8.631297118992957e-06, - "loss": 0.8743, + "learning_rate": 2.656893183903769e-06, + "loss": 0.9837, "step": 19670 }, { - "epoch": 0.5574258267448781, + "epoch": 0.7696611628452931, "grad_norm": 0.0, - "learning_rate": 8.630387972828472e-06, - "loss": 0.9894, + "learning_rate": 2.6560330292277e-06, + "loss": 1.0089, "step": 19671 }, { - "epoch": 0.5574541641871404, + "epoch": 0.7697002895375225, "grad_norm": 0.0, - "learning_rate": 8.629478838200737e-06, - "loss": 0.7568, + "learning_rate": 2.6551729924865377e-06, + "loss": 0.9439, "step": 19672 }, { - "epoch": 0.5574825016294029, + "epoch": 0.769739416229752, "grad_norm": 0.0, - "learning_rate": 8.628569715117416e-06, - "loss": 0.9399, + "learning_rate": 2.6543130736940936e-06, + "loss": 1.0641, "step": 19673 }, { - "epoch": 0.5575108390716654, + "epoch": 0.7697785429219813, "grad_norm": 0.0, - "learning_rate": 8.627660603586157e-06, - "loss": 0.8499, + "learning_rate": 2.6534532728641794e-06, + "loss": 1.0223, "step": 19674 }, { - "epoch": 0.5575391765139278, + "epoch": 0.7698176696142108, "grad_norm": 0.0, - "learning_rate": 8.626751503614624e-06, - "loss": 0.8653, + "learning_rate": 2.652593590010597e-06, + "loss": 0.9327, "step": 19675 }, { - "epoch": 0.5575675139561903, + "epoch": 0.7698567963064402, "grad_norm": 0.0, - "learning_rate": 8.625842415210471e-06, - "loss": 0.9237, + "learning_rate": 2.6517340251471546e-06, + "loss": 0.7505, "step": 19676 }, { - "epoch": 0.5575958513984528, + "epoch": 0.7698959229986697, "grad_norm": 0.0, - "learning_rate": 8.624933338381358e-06, - "loss": 0.8422, + "learning_rate": 2.6508745782876564e-06, + "loss": 0.9682, "step": 19677 }, { - "epoch": 0.5576241888407152, + "epoch": 0.7699350496908991, "grad_norm": 0.0, - "learning_rate": 8.624024273134947e-06, - "loss": 0.8455, + "learning_rate": 2.6500152494459063e-06, + "loss": 1.0177, "step": 19678 }, { - "epoch": 0.5576525262829777, + "epoch": 0.7699741763831286, "grad_norm": 0.0, - "learning_rate": 8.623115219478884e-06, - "loss": 0.9776, + "learning_rate": 2.6491560386356986e-06, + "loss": 1.0736, "step": 19679 }, { - "epoch": 0.5576808637252402, + "epoch": 0.770013303075358, "grad_norm": 0.0, - "learning_rate": 8.622206177420836e-06, - "loss": 0.7797, + "learning_rate": 2.6482969458708362e-06, + "loss": 0.938, "step": 19680 }, { - "epoch": 0.5577092011675027, + "epoch": 0.7700524297675875, "grad_norm": 0.0, - "learning_rate": 8.62129714696846e-06, - "loss": 0.7962, + "learning_rate": 2.6474379711651067e-06, + "loss": 1.0063, "step": 19681 }, { - "epoch": 0.557737538609765, + "epoch": 0.7700915564598169, "grad_norm": 0.0, - "learning_rate": 8.620388128129404e-06, - "loss": 0.8054, + "learning_rate": 2.646579114532316e-06, + "loss": 0.9745, "step": 19682 }, { - "epoch": 0.5577658760520275, + "epoch": 0.7701306831520464, "grad_norm": 0.0, - "learning_rate": 8.619479120911334e-06, - "loss": 0.8758, + "learning_rate": 2.6457203759862473e-06, + "loss": 0.8619, "step": 19683 }, { - "epoch": 0.55779421349429, + "epoch": 0.7701698098442757, "grad_norm": 0.0, - "learning_rate": 8.618570125321903e-06, - "loss": 0.9296, + "learning_rate": 2.6448617555406973e-06, + "loss": 0.9875, "step": 19684 }, { - "epoch": 0.5578225509365524, + "epoch": 0.7702089365365052, "grad_norm": 0.0, - "learning_rate": 8.617661141368768e-06, - "loss": 0.8507, + "learning_rate": 2.6440032532094453e-06, + "loss": 0.9684, "step": 19685 }, { - "epoch": 0.5578508883788149, + "epoch": 0.7702480632287346, "grad_norm": 0.0, - "learning_rate": 8.616752169059591e-06, - "loss": 0.7974, + "learning_rate": 2.643144869006289e-06, + "loss": 0.89, "step": 19686 }, { - "epoch": 0.5578792258210774, + "epoch": 0.7702871899209641, "grad_norm": 0.0, - "learning_rate": 8.615843208402019e-06, - "loss": 0.8764, + "learning_rate": 2.6422866029450046e-06, + "loss": 1.0536, "step": 19687 }, { - "epoch": 0.5579075632633399, + "epoch": 0.7703263166131935, "grad_norm": 0.0, - "learning_rate": 8.614934259403716e-06, - "loss": 0.9954, + "learning_rate": 2.641428455039381e-06, + "loss": 1.0067, "step": 19688 }, { - "epoch": 0.5579359007056023, + "epoch": 0.770365443305423, "grad_norm": 0.0, - "learning_rate": 8.614025322072338e-06, - "loss": 0.8922, + "learning_rate": 2.6405704253031916e-06, + "loss": 1.0216, "step": 19689 }, { - "epoch": 0.5579642381478648, + "epoch": 0.7704045699976524, "grad_norm": 0.0, - "learning_rate": 8.613116396415534e-06, - "loss": 0.9474, + "learning_rate": 2.63971251375022e-06, + "loss": 0.8478, "step": 19690 }, { - "epoch": 0.5579925755901273, + "epoch": 0.7704436966898819, "grad_norm": 0.0, - "learning_rate": 8.612207482440972e-06, - "loss": 0.8733, + "learning_rate": 2.638854720394243e-06, + "loss": 0.8535, "step": 19691 }, { - "epoch": 0.5580209130323897, + "epoch": 0.7704828233821113, "grad_norm": 0.0, - "learning_rate": 8.611298580156297e-06, - "loss": 0.7581, + "learning_rate": 2.6379970452490368e-06, + "loss": 0.9893, "step": 19692 }, { - "epoch": 0.5580492504746521, + "epoch": 0.7705219500743408, "grad_norm": 0.0, - "learning_rate": 8.610389689569171e-06, - "loss": 0.7532, + "learning_rate": 2.6371394883283708e-06, + "loss": 0.997, "step": 19693 }, { - "epoch": 0.5580775879169146, + "epoch": 0.7705610767665702, "grad_norm": 0.0, - "learning_rate": 8.60948081068725e-06, - "loss": 0.866, + "learning_rate": 2.6362820496460185e-06, + "loss": 0.9998, "step": 19694 }, { - "epoch": 0.5581059253591771, + "epoch": 0.7706002034587996, "grad_norm": 0.0, - "learning_rate": 8.608571943518187e-06, - "loss": 0.7892, + "learning_rate": 2.6354247292157486e-06, + "loss": 0.9492, "step": 19695 }, { - "epoch": 0.5581342628014395, + "epoch": 0.770639330151029, "grad_norm": 0.0, - "learning_rate": 8.607663088069639e-06, - "loss": 0.8737, + "learning_rate": 2.6345675270513325e-06, + "loss": 1.0852, "step": 19696 }, { - "epoch": 0.558162600243702, + "epoch": 0.7706784568432584, "grad_norm": 0.0, - "learning_rate": 8.606754244349264e-06, - "loss": 0.91, + "learning_rate": 2.6337104431665294e-06, + "loss": 1.0305, "step": 19697 }, { - "epoch": 0.5581909376859645, + "epoch": 0.7707175835354879, "grad_norm": 0.0, - "learning_rate": 8.605845412364712e-06, - "loss": 0.7344, + "learning_rate": 2.6328534775751103e-06, + "loss": 1.0686, "step": 19698 }, { - "epoch": 0.5582192751282269, + "epoch": 0.7707567102277173, "grad_norm": 0.0, - "learning_rate": 8.604936592123647e-06, - "loss": 0.8845, + "learning_rate": 2.6319966302908286e-06, + "loss": 1.0465, "step": 19699 }, { - "epoch": 0.5582476125704894, + "epoch": 0.7707958369199468, "grad_norm": 0.0, - "learning_rate": 8.604027783633713e-06, - "loss": 0.8664, + "learning_rate": 2.6311399013274484e-06, + "loss": 0.9121, "step": 19700 }, { - "epoch": 0.5582759500127519, + "epoch": 0.7708349636121762, "grad_norm": 0.0, - "learning_rate": 8.603118986902574e-06, - "loss": 0.8678, + "learning_rate": 2.6302832906987287e-06, + "loss": 0.8772, "step": 19701 }, { - "epoch": 0.5583042874550143, + "epoch": 0.7708740903044057, "grad_norm": 0.0, - "learning_rate": 8.602210201937884e-06, - "loss": 0.9076, + "learning_rate": 2.6294267984184264e-06, + "loss": 1.071, "step": 19702 }, { - "epoch": 0.5583326248972768, + "epoch": 0.7709132169966351, "grad_norm": 0.0, - "learning_rate": 8.601301428747293e-06, - "loss": 0.862, + "learning_rate": 2.6285704245002907e-06, + "loss": 1.0387, "step": 19703 }, { - "epoch": 0.5583609623395392, + "epoch": 0.7709523436888646, "grad_norm": 0.0, - "learning_rate": 8.600392667338465e-06, - "loss": 0.8092, + "learning_rate": 2.6277141689580777e-06, + "loss": 0.8909, "step": 19704 }, { - "epoch": 0.5583892997818017, + "epoch": 0.770991470381094, "grad_norm": 0.0, - "learning_rate": 8.599483917719044e-06, - "loss": 0.853, + "learning_rate": 2.6268580318055403e-06, + "loss": 0.8456, "step": 19705 }, { - "epoch": 0.5584176372240641, + "epoch": 0.7710305970733234, "grad_norm": 0.0, - "learning_rate": 8.59857517989669e-06, - "loss": 0.9447, + "learning_rate": 2.6260020130564212e-06, + "loss": 1.0046, "step": 19706 }, { - "epoch": 0.5584459746663266, + "epoch": 0.7710697237655528, "grad_norm": 0.0, - "learning_rate": 8.597666453879062e-06, - "loss": 0.9642, + "learning_rate": 2.625146112724468e-06, + "loss": 1.0429, "step": 19707 }, { - "epoch": 0.5584743121085891, + "epoch": 0.7711088504577823, "grad_norm": 0.0, - "learning_rate": 8.596757739673806e-06, - "loss": 0.8963, + "learning_rate": 2.624290330823429e-06, + "loss": 0.8206, "step": 19708 }, { - "epoch": 0.5585026495508515, + "epoch": 0.7711479771500117, "grad_norm": 0.0, - "learning_rate": 8.595849037288581e-06, - "loss": 0.8751, + "learning_rate": 2.6234346673670463e-06, + "loss": 0.9653, "step": 19709 }, { - "epoch": 0.558530986993114, + "epoch": 0.7711871038422412, "grad_norm": 0.0, - "learning_rate": 8.594940346731047e-06, - "loss": 0.9431, + "learning_rate": 2.6225791223690577e-06, + "loss": 1.0697, "step": 19710 }, { - "epoch": 0.5585593244353765, + "epoch": 0.7712262305344706, "grad_norm": 0.0, - "learning_rate": 8.594031668008845e-06, - "loss": 0.9643, + "learning_rate": 2.6217236958432034e-06, + "loss": 0.9412, "step": 19711 }, { - "epoch": 0.558587661877639, + "epoch": 0.7712653572267001, "grad_norm": 0.0, - "learning_rate": 8.593123001129642e-06, - "loss": 1.0517, + "learning_rate": 2.6208683878032214e-06, + "loss": 0.8821, "step": 19712 }, { - "epoch": 0.5586159993199014, + "epoch": 0.7713044839189295, "grad_norm": 0.0, - "learning_rate": 8.592214346101083e-06, - "loss": 0.7392, + "learning_rate": 2.6200131982628497e-06, + "loss": 1.0864, "step": 19713 }, { - "epoch": 0.5586443367621639, + "epoch": 0.771343610611159, "grad_norm": 0.0, - "learning_rate": 8.591305702930824e-06, - "loss": 0.9266, + "learning_rate": 2.6191581272358145e-06, + "loss": 0.8793, "step": 19714 }, { - "epoch": 0.5586726742044263, + "epoch": 0.7713827373033884, "grad_norm": 0.0, - "learning_rate": 8.590397071626522e-06, - "loss": 0.8799, + "learning_rate": 2.6183031747358546e-06, + "loss": 1.0358, "step": 19715 }, { - "epoch": 0.5587010116466887, + "epoch": 0.7714218639956179, "grad_norm": 0.0, - "learning_rate": 8.589488452195829e-06, - "loss": 0.88, + "learning_rate": 2.6174483407766938e-06, + "loss": 0.8956, "step": 19716 }, { - "epoch": 0.5587293490889512, + "epoch": 0.7714609906878472, "grad_norm": 0.0, - "learning_rate": 8.588579844646397e-06, - "loss": 0.9504, + "learning_rate": 2.61659362537206e-06, + "loss": 1.1234, "step": 19717 }, { - "epoch": 0.5587576865312137, + "epoch": 0.7715001173800767, "grad_norm": 0.0, - "learning_rate": 8.587671248985885e-06, - "loss": 0.8549, + "learning_rate": 2.615739028535682e-06, + "loss": 0.9909, "step": 19718 }, { - "epoch": 0.5587860239734762, + "epoch": 0.7715392440723061, "grad_norm": 0.0, - "learning_rate": 8.58676266522194e-06, - "loss": 0.8775, + "learning_rate": 2.6148845502812846e-06, + "loss": 0.9932, "step": 19719 }, { - "epoch": 0.5588143614157386, + "epoch": 0.7715783707645356, "grad_norm": 0.0, - "learning_rate": 8.585854093362219e-06, - "loss": 0.9792, + "learning_rate": 2.614030190622584e-06, + "loss": 1.0574, "step": 19720 }, { - "epoch": 0.5588426988580011, + "epoch": 0.771617497456765, "grad_norm": 0.0, - "learning_rate": 8.58494553341437e-06, - "loss": 0.8903, + "learning_rate": 2.6131759495733046e-06, + "loss": 0.9092, "step": 19721 }, { - "epoch": 0.5588710363002636, + "epoch": 0.7716566241489945, "grad_norm": 0.0, - "learning_rate": 8.584036985386053e-06, - "loss": 0.7887, + "learning_rate": 2.612321827147162e-06, + "loss": 1.0658, "step": 19722 }, { - "epoch": 0.558899373742526, + "epoch": 0.7716957508412239, "grad_norm": 0.0, - "learning_rate": 8.583128449284921e-06, - "loss": 0.9289, + "learning_rate": 2.611467823357877e-06, + "loss": 0.9182, "step": 19723 }, { - "epoch": 0.5589277111847885, + "epoch": 0.7717348775334533, "grad_norm": 0.0, - "learning_rate": 8.58221992511862e-06, - "loss": 0.8874, + "learning_rate": 2.6106139382191575e-06, + "loss": 1.0053, "step": 19724 }, { - "epoch": 0.558956048627051, + "epoch": 0.7717740042256828, "grad_norm": 0.0, - "learning_rate": 8.581311412894811e-06, - "loss": 0.8046, + "learning_rate": 2.6097601717447186e-06, + "loss": 0.8955, "step": 19725 }, { - "epoch": 0.5589843860693133, + "epoch": 0.7718131309179121, "grad_norm": 0.0, - "learning_rate": 8.58040291262114e-06, - "loss": 0.9169, + "learning_rate": 2.6089065239482714e-06, + "loss": 1.0805, "step": 19726 }, { - "epoch": 0.5590127235115758, + "epoch": 0.7718522576101416, "grad_norm": 0.0, - "learning_rate": 8.579494424305261e-06, - "loss": 0.795, + "learning_rate": 2.6080529948435262e-06, + "loss": 0.9439, "step": 19727 }, { - "epoch": 0.5590410609538383, + "epoch": 0.771891384302371, "grad_norm": 0.0, - "learning_rate": 8.578585947954831e-06, - "loss": 0.9054, + "learning_rate": 2.6071995844441845e-06, + "loss": 1.0444, "step": 19728 }, { - "epoch": 0.5590693983961008, + "epoch": 0.7719305109946005, "grad_norm": 0.0, - "learning_rate": 8.577677483577498e-06, - "loss": 0.8096, + "learning_rate": 2.606346292763957e-06, + "loss": 1.0351, "step": 19729 }, { - "epoch": 0.5590977358383632, + "epoch": 0.7719696376868299, "grad_norm": 0.0, - "learning_rate": 8.576769031180913e-06, - "loss": 0.8237, + "learning_rate": 2.605493119816537e-06, + "loss": 1.0465, "step": 19730 }, { - "epoch": 0.5591260732806257, + "epoch": 0.7720087643790594, "grad_norm": 0.0, - "learning_rate": 8.575860590772737e-06, - "loss": 0.855, + "learning_rate": 2.604640065615638e-06, + "loss": 0.9454, "step": 19731 }, { - "epoch": 0.5591544107228882, + "epoch": 0.7720478910712888, "grad_norm": 0.0, - "learning_rate": 8.574952162360612e-06, - "loss": 0.9134, + "learning_rate": 2.6037871301749484e-06, + "loss": 0.9823, "step": 19732 }, { - "epoch": 0.5591827481651506, + "epoch": 0.7720870177635183, "grad_norm": 0.0, - "learning_rate": 8.574043745952196e-06, - "loss": 0.9199, + "learning_rate": 2.602934313508174e-06, + "loss": 1.0998, "step": 19733 }, { - "epoch": 0.5592110856074131, + "epoch": 0.7721261444557477, "grad_norm": 0.0, - "learning_rate": 8.573135341555138e-06, - "loss": 0.8288, + "learning_rate": 2.6020816156289986e-06, + "loss": 0.9898, "step": 19734 }, { - "epoch": 0.5592394230496756, + "epoch": 0.7721652711479772, "grad_norm": 0.0, - "learning_rate": 8.57222694917709e-06, - "loss": 0.9725, + "learning_rate": 2.6012290365511297e-06, + "loss": 0.8615, "step": 19735 }, { - "epoch": 0.559267760491938, + "epoch": 0.7722043978402066, "grad_norm": 0.0, - "learning_rate": 8.571318568825709e-06, - "loss": 0.9464, + "learning_rate": 2.6003765762882473e-06, + "loss": 0.9733, "step": 19736 }, { - "epoch": 0.5592960979342004, + "epoch": 0.772243524532436, "grad_norm": 0.0, - "learning_rate": 8.570410200508637e-06, - "loss": 0.8083, + "learning_rate": 2.599524234854047e-06, + "loss": 1.0372, "step": 19737 }, { - "epoch": 0.5593244353764629, + "epoch": 0.7722826512246654, "grad_norm": 0.0, - "learning_rate": 8.569501844233531e-06, - "loss": 0.8778, + "learning_rate": 2.598672012262212e-06, + "loss": 0.8918, "step": 19738 }, { - "epoch": 0.5593527728187254, + "epoch": 0.7723217779168949, "grad_norm": 0.0, - "learning_rate": 8.568593500008047e-06, - "loss": 0.9805, + "learning_rate": 2.59781990852643e-06, + "loss": 0.9553, "step": 19739 }, { - "epoch": 0.5593811102609878, + "epoch": 0.7723609046091243, "grad_norm": 0.0, - "learning_rate": 8.567685167839827e-06, - "loss": 0.9217, + "learning_rate": 2.596967923660385e-06, + "loss": 0.9085, "step": 19740 }, { - "epoch": 0.5594094477032503, + "epoch": 0.7724000313013538, "grad_norm": 0.0, - "learning_rate": 8.566776847736528e-06, - "loss": 0.9224, + "learning_rate": 2.596116057677761e-06, + "loss": 0.8432, "step": 19741 }, { - "epoch": 0.5594377851455128, + "epoch": 0.7724391579935832, "grad_norm": 0.0, - "learning_rate": 8.5658685397058e-06, - "loss": 0.9254, + "learning_rate": 2.595264310592234e-06, + "loss": 0.9997, "step": 19742 }, { - "epoch": 0.5594661225877752, + "epoch": 0.7724782846858127, "grad_norm": 0.0, - "learning_rate": 8.564960243755292e-06, - "loss": 0.8292, + "learning_rate": 2.594412682417482e-06, + "loss": 0.8987, "step": 19743 }, { - "epoch": 0.5594944600300377, + "epoch": 0.7725174113780421, "grad_norm": 0.0, - "learning_rate": 8.564051959892662e-06, - "loss": 0.9016, + "learning_rate": 2.593561173167186e-06, + "loss": 0.9339, "step": 19744 }, { - "epoch": 0.5595227974723002, + "epoch": 0.7725565380702716, "grad_norm": 0.0, - "learning_rate": 8.56314368812555e-06, - "loss": 0.8398, + "learning_rate": 2.592709782855014e-06, + "loss": 0.8, "step": 19745 }, { - "epoch": 0.5595511349145627, + "epoch": 0.772595664762501, "grad_norm": 0.0, - "learning_rate": 8.562235428461614e-06, - "loss": 0.931, + "learning_rate": 2.5918585114946415e-06, + "loss": 0.9162, "step": 19746 }, { - "epoch": 0.559579472356825, + "epoch": 0.7726347914547305, "grad_norm": 0.0, - "learning_rate": 8.561327180908503e-06, - "loss": 0.9113, + "learning_rate": 2.591007359099741e-06, + "loss": 0.9158, "step": 19747 }, { - "epoch": 0.5596078097990875, + "epoch": 0.7726739181469598, "grad_norm": 0.0, - "learning_rate": 8.560418945473866e-06, - "loss": 0.9031, + "learning_rate": 2.5901563256839745e-06, + "loss": 0.9678, "step": 19748 }, { - "epoch": 0.55963614724135, + "epoch": 0.7727130448391893, "grad_norm": 0.0, - "learning_rate": 8.55951072216536e-06, - "loss": 0.829, + "learning_rate": 2.589305411261014e-06, + "loss": 0.912, "step": 19749 }, { - "epoch": 0.5596644846836124, + "epoch": 0.7727521715314187, "grad_norm": 0.0, - "learning_rate": 8.558602510990625e-06, - "loss": 0.8037, + "learning_rate": 2.588454615844521e-06, + "loss": 0.9366, "step": 19750 }, { - "epoch": 0.5596928221258749, + "epoch": 0.7727912982236482, "grad_norm": 0.0, - "learning_rate": 8.557694311957316e-06, - "loss": 0.8379, + "learning_rate": 2.5876039394481634e-06, + "loss": 1.0416, "step": 19751 }, { - "epoch": 0.5597211595681374, + "epoch": 0.7728304249158776, "grad_norm": 0.0, - "learning_rate": 8.556786125073089e-06, - "loss": 0.8511, + "learning_rate": 2.586753382085595e-06, + "loss": 1.0642, "step": 19752 }, { - "epoch": 0.5597494970103999, + "epoch": 0.772869551608107, "grad_norm": 0.0, - "learning_rate": 8.555877950345584e-06, - "loss": 0.9447, + "learning_rate": 2.5859029437704775e-06, + "loss": 0.9503, "step": 19753 }, { - "epoch": 0.5597778344526623, + "epoch": 0.7729086783003365, "grad_norm": 0.0, - "learning_rate": 8.554969787782456e-06, - "loss": 1.0066, + "learning_rate": 2.585052624516472e-06, + "loss": 0.9641, "step": 19754 }, { - "epoch": 0.5598061718949248, + "epoch": 0.7729478049925659, "grad_norm": 0.0, - "learning_rate": 8.554061637391353e-06, - "loss": 0.8525, + "learning_rate": 2.5842024243372268e-06, + "loss": 0.9081, "step": 19755 }, { - "epoch": 0.5598345093371873, + "epoch": 0.7729869316847954, "grad_norm": 0.0, - "learning_rate": 8.553153499179926e-06, - "loss": 0.8653, + "learning_rate": 2.5833523432463982e-06, + "loss": 0.9245, "step": 19756 }, { - "epoch": 0.5598628467794496, + "epoch": 0.7730260583770248, "grad_norm": 0.0, - "learning_rate": 8.552245373155827e-06, - "loss": 0.8253, + "learning_rate": 2.5825023812576377e-06, + "loss": 1.0266, "step": 19757 }, { - "epoch": 0.5598911842217121, + "epoch": 0.7730651850692543, "grad_norm": 0.0, - "learning_rate": 8.5513372593267e-06, - "loss": 0.9061, + "learning_rate": 2.5816525383845968e-06, + "loss": 1.0833, "step": 19758 }, { - "epoch": 0.5599195216639746, + "epoch": 0.7731043117614836, "grad_norm": 0.0, - "learning_rate": 8.550429157700196e-06, - "loss": 0.882, + "learning_rate": 2.5808028146409182e-06, + "loss": 1.0352, "step": 19759 }, { - "epoch": 0.5599478591062371, + "epoch": 0.7731434384537131, "grad_norm": 0.0, - "learning_rate": 8.549521068283968e-06, - "loss": 1.0286, + "learning_rate": 2.579953210040251e-06, + "loss": 0.9842, "step": 19760 }, { - "epoch": 0.5599761965484995, + "epoch": 0.7731825651459425, "grad_norm": 0.0, - "learning_rate": 8.548612991085661e-06, - "loss": 0.8782, + "learning_rate": 2.5791037245962324e-06, + "loss": 0.9734, "step": 19761 }, { - "epoch": 0.560004533990762, + "epoch": 0.773221691838172, "grad_norm": 0.0, - "learning_rate": 8.547704926112931e-06, - "loss": 0.9083, + "learning_rate": 2.578254358322515e-06, + "loss": 1.1058, "step": 19762 }, { - "epoch": 0.5600328714330245, + "epoch": 0.7732608185304014, "grad_norm": 0.0, - "learning_rate": 8.546796873373415e-06, - "loss": 0.8174, + "learning_rate": 2.5774051112327305e-06, + "loss": 0.9519, "step": 19763 }, { - "epoch": 0.5600612088752869, + "epoch": 0.7732999452226309, "grad_norm": 0.0, - "learning_rate": 8.54588883287477e-06, - "loss": 0.9403, + "learning_rate": 2.5765559833405205e-06, + "loss": 0.9885, "step": 19764 }, { - "epoch": 0.5600895463175494, + "epoch": 0.7733390719148603, "grad_norm": 0.0, - "learning_rate": 8.54498080462465e-06, - "loss": 0.8325, + "learning_rate": 2.5757069746595175e-06, + "loss": 1.0694, "step": 19765 }, { - "epoch": 0.5601178837598119, + "epoch": 0.7733781986070898, "grad_norm": 0.0, - "learning_rate": 8.544072788630688e-06, - "loss": 0.8894, + "learning_rate": 2.5748580852033565e-06, + "loss": 1.0513, "step": 19766 }, { - "epoch": 0.5601462212020742, + "epoch": 0.7734173252993192, "grad_norm": 0.0, - "learning_rate": 8.543164784900544e-06, - "loss": 0.899, + "learning_rate": 2.5740093149856706e-06, + "loss": 0.869, "step": 19767 }, { - "epoch": 0.5601745586443367, + "epoch": 0.7734564519915487, "grad_norm": 0.0, - "learning_rate": 8.542256793441866e-06, - "loss": 0.9634, + "learning_rate": 2.5731606640200923e-06, + "loss": 0.958, "step": 19768 }, { - "epoch": 0.5602028960865992, + "epoch": 0.773495578683778, "grad_norm": 0.0, - "learning_rate": 8.541348814262298e-06, - "loss": 0.8012, + "learning_rate": 2.572312132320246e-06, + "loss": 0.8479, "step": 19769 }, { - "epoch": 0.5602312335288617, + "epoch": 0.7735347053760075, "grad_norm": 0.0, - "learning_rate": 8.540440847369495e-06, - "loss": 0.8893, + "learning_rate": 2.5714637198997583e-06, + "loss": 0.9901, "step": 19770 }, { - "epoch": 0.5602595709711241, + "epoch": 0.7735738320682369, "grad_norm": 0.0, - "learning_rate": 8.539532892771098e-06, - "loss": 0.8979, + "learning_rate": 2.570615426772255e-06, + "loss": 0.9521, "step": 19771 }, { - "epoch": 0.5602879084133866, + "epoch": 0.7736129587604664, "grad_norm": 0.0, - "learning_rate": 8.538624950474756e-06, - "loss": 0.8155, + "learning_rate": 2.5697672529513605e-06, + "loss": 0.9025, "step": 19772 }, { - "epoch": 0.5603162458556491, + "epoch": 0.7736520854526958, "grad_norm": 0.0, - "learning_rate": 8.53771702048812e-06, - "loss": 0.7782, + "learning_rate": 2.568919198450691e-06, + "loss": 0.994, "step": 19773 }, { - "epoch": 0.5603445832979115, + "epoch": 0.7736912121449253, "grad_norm": 0.0, - "learning_rate": 8.536809102818836e-06, - "loss": 0.9873, + "learning_rate": 2.5680712632838713e-06, + "loss": 0.998, "step": 19774 }, { - "epoch": 0.560372920740174, + "epoch": 0.7737303388371547, "grad_norm": 0.0, - "learning_rate": 8.535901197474553e-06, - "loss": 0.9285, + "learning_rate": 2.5672234474645076e-06, + "loss": 0.9254, "step": 19775 }, { - "epoch": 0.5604012581824365, + "epoch": 0.7737694655293842, "grad_norm": 0.0, - "learning_rate": 8.53499330446292e-06, - "loss": 0.896, + "learning_rate": 2.566375751006227e-06, + "loss": 0.8698, "step": 19776 }, { - "epoch": 0.560429595624699, + "epoch": 0.7738085922216136, "grad_norm": 0.0, - "learning_rate": 8.534085423791579e-06, - "loss": 0.8851, + "learning_rate": 2.5655281739226356e-06, + "loss": 0.9299, "step": 19777 }, { - "epoch": 0.5604579330669613, + "epoch": 0.7738477189138431, "grad_norm": 0.0, - "learning_rate": 8.533177555468185e-06, - "loss": 0.7649, + "learning_rate": 2.564680716227348e-06, + "loss": 0.9112, "step": 19778 }, { - "epoch": 0.5604862705092238, + "epoch": 0.7738868456060725, "grad_norm": 0.0, - "learning_rate": 8.532269699500377e-06, - "loss": 0.9567, + "learning_rate": 2.563833377933964e-06, + "loss": 0.9756, "step": 19779 }, { - "epoch": 0.5605146079514863, + "epoch": 0.773925972298302, "grad_norm": 0.0, - "learning_rate": 8.531361855895806e-06, - "loss": 0.846, + "learning_rate": 2.5629861590561055e-06, + "loss": 1.0142, "step": 19780 }, { - "epoch": 0.5605429453937487, + "epoch": 0.7739650989905313, "grad_norm": 0.0, - "learning_rate": 8.530454024662123e-06, - "loss": 0.8729, + "learning_rate": 2.5621390596073657e-06, + "loss": 1.0188, "step": 19781 }, { - "epoch": 0.5605712828360112, + "epoch": 0.7740042256827607, "grad_norm": 0.0, - "learning_rate": 8.529546205806967e-06, - "loss": 0.9756, + "learning_rate": 2.5612920796013575e-06, + "loss": 1.0431, "step": 19782 }, { - "epoch": 0.5605996202782737, + "epoch": 0.7740433523749902, "grad_norm": 0.0, - "learning_rate": 8.528638399337997e-06, - "loss": 0.8797, + "learning_rate": 2.5604452190516693e-06, + "loss": 0.882, "step": 19783 }, { - "epoch": 0.5606279577205362, + "epoch": 0.7740824790672196, "grad_norm": 0.0, - "learning_rate": 8.527730605262846e-06, - "loss": 0.7649, + "learning_rate": 2.559598477971915e-06, + "loss": 0.9645, "step": 19784 }, { - "epoch": 0.5606562951627986, + "epoch": 0.7741216057594491, "grad_norm": 0.0, - "learning_rate": 8.526822823589165e-06, - "loss": 0.9621, + "learning_rate": 2.5587518563756843e-06, + "loss": 0.9366, "step": 19785 }, { - "epoch": 0.5606846326050611, + "epoch": 0.7741607324516785, "grad_norm": 0.0, - "learning_rate": 8.525915054324607e-06, - "loss": 0.793, + "learning_rate": 2.557905354276575e-06, + "loss": 0.9741, "step": 19786 }, { - "epoch": 0.5607129700473236, + "epoch": 0.774199859143908, "grad_norm": 0.0, - "learning_rate": 8.52500729747681e-06, - "loss": 0.8161, + "learning_rate": 2.5570589716881787e-06, + "loss": 0.9329, "step": 19787 }, { - "epoch": 0.560741307489586, + "epoch": 0.7742389858361374, "grad_norm": 0.0, - "learning_rate": 8.524099553053425e-06, - "loss": 0.8471, + "learning_rate": 2.5562127086240893e-06, + "loss": 0.8906, "step": 19788 }, { - "epoch": 0.5607696449318484, + "epoch": 0.7742781125283669, "grad_norm": 0.0, - "learning_rate": 8.523191821062103e-06, - "loss": 0.8021, + "learning_rate": 2.5553665650978953e-06, + "loss": 1.0296, "step": 19789 }, { - "epoch": 0.5607979823741109, + "epoch": 0.7743172392205963, "grad_norm": 0.0, - "learning_rate": 8.52228410151048e-06, - "loss": 0.7796, + "learning_rate": 2.554520541123189e-06, + "loss": 1.0166, "step": 19790 }, { - "epoch": 0.5608263198163733, + "epoch": 0.7743563659128258, "grad_norm": 0.0, - "learning_rate": 8.52137639440621e-06, - "loss": 0.8185, + "learning_rate": 2.55367463671355e-06, + "loss": 0.9698, "step": 19791 }, { - "epoch": 0.5608546572586358, + "epoch": 0.7743954926050551, "grad_norm": 0.0, - "learning_rate": 8.520468699756932e-06, - "loss": 0.8503, + "learning_rate": 2.5528288518825652e-06, + "loss": 1.0091, "step": 19792 }, { - "epoch": 0.5608829947008983, + "epoch": 0.7744346192972846, "grad_norm": 0.0, - "learning_rate": 8.519561017570295e-06, - "loss": 0.7972, + "learning_rate": 2.5519831866438205e-06, + "loss": 0.8959, "step": 19793 }, { - "epoch": 0.5609113321431608, + "epoch": 0.774473745989514, "grad_norm": 0.0, - "learning_rate": 8.518653347853948e-06, - "loss": 0.7953, + "learning_rate": 2.55113764101089e-06, + "loss": 0.9815, "step": 19794 }, { - "epoch": 0.5609396695854232, + "epoch": 0.7745128726817435, "grad_norm": 0.0, - "learning_rate": 8.517745690615531e-06, - "loss": 0.9381, + "learning_rate": 2.5502922149973553e-06, + "loss": 0.8812, "step": 19795 }, { - "epoch": 0.5609680070276857, + "epoch": 0.7745519993739729, "grad_norm": 0.0, - "learning_rate": 8.516838045862694e-06, - "loss": 0.8841, + "learning_rate": 2.549446908616795e-06, + "loss": 1.0417, "step": 19796 }, { - "epoch": 0.5609963444699482, + "epoch": 0.7745911260662024, "grad_norm": 0.0, - "learning_rate": 8.515930413603084e-06, - "loss": 0.8351, + "learning_rate": 2.5486017218827784e-06, + "loss": 0.934, "step": 19797 }, { - "epoch": 0.5610246819122106, + "epoch": 0.7746302527584318, "grad_norm": 0.0, - "learning_rate": 8.51502279384434e-06, - "loss": 0.7927, + "learning_rate": 2.547756654808882e-06, + "loss": 0.9546, "step": 19798 }, { - "epoch": 0.561053019354473, + "epoch": 0.7746693794506613, "grad_norm": 0.0, - "learning_rate": 8.51411518659411e-06, - "loss": 0.87, + "learning_rate": 2.546911707408677e-06, + "loss": 1.0166, "step": 19799 }, { - "epoch": 0.5610813567967355, + "epoch": 0.7747085061428907, "grad_norm": 0.0, - "learning_rate": 8.51320759186004e-06, - "loss": 0.9357, + "learning_rate": 2.546066879695729e-06, + "loss": 0.9543, "step": 19800 }, { - "epoch": 0.561109694238998, + "epoch": 0.7747476328351202, "grad_norm": 0.0, - "learning_rate": 8.512300009649774e-06, - "loss": 0.7653, + "learning_rate": 2.545222171683606e-06, + "loss": 0.9337, "step": 19801 }, { - "epoch": 0.5611380316812604, + "epoch": 0.7747867595273495, "grad_norm": 0.0, - "learning_rate": 8.511392439970962e-06, - "loss": 0.9143, + "learning_rate": 2.544377583385873e-06, + "loss": 0.9643, "step": 19802 }, { - "epoch": 0.5611663691235229, + "epoch": 0.774825886219579, "grad_norm": 0.0, - "learning_rate": 8.510484882831238e-06, - "loss": 0.8906, + "learning_rate": 2.543533114816098e-06, + "loss": 0.9101, "step": 19803 }, { - "epoch": 0.5611947065657854, + "epoch": 0.7748650129118084, "grad_norm": 0.0, - "learning_rate": 8.509577338238255e-06, - "loss": 0.8681, + "learning_rate": 2.542688765987833e-06, + "loss": 0.964, "step": 19804 }, { - "epoch": 0.5612230440080478, + "epoch": 0.7749041396040379, "grad_norm": 0.0, - "learning_rate": 8.508669806199658e-06, - "loss": 0.8747, + "learning_rate": 2.5418445369146462e-06, + "loss": 0.969, "step": 19805 }, { - "epoch": 0.5612513814503103, + "epoch": 0.7749432662962673, "grad_norm": 0.0, - "learning_rate": 8.507762286723088e-06, - "loss": 0.9234, + "learning_rate": 2.5410004276100842e-06, + "loss": 0.9481, "step": 19806 }, { - "epoch": 0.5612797188925728, + "epoch": 0.7749823929884968, "grad_norm": 0.0, - "learning_rate": 8.506854779816191e-06, - "loss": 0.9687, + "learning_rate": 2.540156438087714e-06, + "loss": 1.0579, "step": 19807 }, { - "epoch": 0.5613080563348353, + "epoch": 0.7750215196807262, "grad_norm": 0.0, - "learning_rate": 8.505947285486608e-06, - "loss": 0.8878, + "learning_rate": 2.539312568361082e-06, + "loss": 1.019, "step": 19808 }, { - "epoch": 0.5613363937770977, + "epoch": 0.7750606463729556, "grad_norm": 0.0, - "learning_rate": 8.505039803741985e-06, - "loss": 0.8037, + "learning_rate": 2.5384688184437433e-06, + "loss": 0.9686, "step": 19809 }, { - "epoch": 0.5613647312193601, + "epoch": 0.7750997730651851, "grad_norm": 0.0, - "learning_rate": 8.504132334589972e-06, - "loss": 0.8472, + "learning_rate": 2.537625188349241e-06, + "loss": 0.9828, "step": 19810 }, { - "epoch": 0.5613930686616226, + "epoch": 0.7751388997574145, "grad_norm": 0.0, - "learning_rate": 8.503224878038203e-06, - "loss": 0.7028, + "learning_rate": 2.5367816780911312e-06, + "loss": 0.9565, "step": 19811 }, { - "epoch": 0.561421406103885, + "epoch": 0.775178026449644, "grad_norm": 0.0, - "learning_rate": 8.502317434094331e-06, - "loss": 0.8926, + "learning_rate": 2.535938287682954e-06, + "loss": 0.8945, "step": 19812 }, { - "epoch": 0.5614497435461475, + "epoch": 0.7752171531418733, "grad_norm": 0.0, - "learning_rate": 8.501410002765991e-06, - "loss": 0.864, + "learning_rate": 2.5350950171382583e-06, + "loss": 1.0399, "step": 19813 }, { - "epoch": 0.56147808098841, + "epoch": 0.7752562798341028, "grad_norm": 0.0, - "learning_rate": 8.500502584060832e-06, - "loss": 0.8973, + "learning_rate": 2.5342518664705786e-06, + "loss": 1.0017, "step": 19814 }, { - "epoch": 0.5615064184306724, + "epoch": 0.7752954065263322, "grad_norm": 0.0, - "learning_rate": 8.4995951779865e-06, - "loss": 0.8075, + "learning_rate": 2.5334088356934592e-06, + "loss": 1.0029, "step": 19815 }, { - "epoch": 0.5615347558729349, + "epoch": 0.7753345332185617, "grad_norm": 0.0, - "learning_rate": 8.498687784550632e-06, - "loss": 0.8341, + "learning_rate": 2.532565924820438e-06, + "loss": 1.006, "step": 19816 }, { - "epoch": 0.5615630933151974, + "epoch": 0.7753736599107911, "grad_norm": 0.0, - "learning_rate": 8.497780403760872e-06, - "loss": 0.8583, + "learning_rate": 2.5317231338650538e-06, + "loss": 1.0504, "step": 19817 }, { - "epoch": 0.5615914307574599, + "epoch": 0.7754127866030206, "grad_norm": 0.0, - "learning_rate": 8.49687303562487e-06, - "loss": 0.83, + "learning_rate": 2.5308804628408346e-06, + "loss": 1.1013, "step": 19818 }, { - "epoch": 0.5616197681997223, + "epoch": 0.77545191329525, "grad_norm": 0.0, - "learning_rate": 8.49596568015026e-06, - "loss": 0.8059, + "learning_rate": 2.530037911761315e-06, + "loss": 0.9132, "step": 19819 }, { - "epoch": 0.5616481056419848, + "epoch": 0.7754910399874795, "grad_norm": 0.0, - "learning_rate": 8.495058337344698e-06, - "loss": 0.9564, + "learning_rate": 2.529195480640028e-06, + "loss": 0.9003, "step": 19820 }, { - "epoch": 0.5616764430842472, + "epoch": 0.7755301666797089, "grad_norm": 0.0, - "learning_rate": 8.494151007215811e-06, - "loss": 0.8661, + "learning_rate": 2.5283531694905016e-06, + "loss": 1.0274, "step": 19821 }, { - "epoch": 0.5617047805265096, + "epoch": 0.7755692933719384, "grad_norm": 0.0, - "learning_rate": 8.49324368977125e-06, - "loss": 0.9678, + "learning_rate": 2.5275109783262586e-06, + "loss": 1.011, "step": 19822 }, { - "epoch": 0.5617331179687721, + "epoch": 0.7756084200641677, "grad_norm": 0.0, - "learning_rate": 8.49233638501866e-06, - "loss": 0.837, + "learning_rate": 2.5266689071608285e-06, + "loss": 1.062, "step": 19823 }, { - "epoch": 0.5617614554110346, + "epoch": 0.7756475467563972, "grad_norm": 0.0, - "learning_rate": 8.491429092965677e-06, - "loss": 0.8876, + "learning_rate": 2.525826956007724e-06, + "loss": 0.8643, "step": 19824 }, { - "epoch": 0.5617897928532971, + "epoch": 0.7756866734486266, "grad_norm": 0.0, - "learning_rate": 8.490521813619947e-06, - "loss": 0.9327, + "learning_rate": 2.5249851248804804e-06, + "loss": 0.9611, "step": 19825 }, { - "epoch": 0.5618181302955595, + "epoch": 0.7757258001408561, "grad_norm": 0.0, - "learning_rate": 8.489614546989116e-06, - "loss": 0.8748, + "learning_rate": 2.524143413792606e-06, + "loss": 1.0054, "step": 19826 }, { - "epoch": 0.561846467737822, + "epoch": 0.7757649268330855, "grad_norm": 0.0, - "learning_rate": 8.48870729308082e-06, - "loss": 0.9498, + "learning_rate": 2.523301822757623e-06, + "loss": 0.9096, "step": 19827 }, { - "epoch": 0.5618748051800845, + "epoch": 0.775804053525315, "grad_norm": 0.0, - "learning_rate": 8.487800051902706e-06, - "loss": 0.8609, + "learning_rate": 2.5224603517890377e-06, + "loss": 0.9222, "step": 19828 }, { - "epoch": 0.5619031426223469, + "epoch": 0.7758431802175444, "grad_norm": 0.0, - "learning_rate": 8.48689282346241e-06, - "loss": 0.8569, + "learning_rate": 2.521619000900376e-06, + "loss": 0.9294, "step": 19829 }, { - "epoch": 0.5619314800646094, + "epoch": 0.7758823069097739, "grad_norm": 0.0, - "learning_rate": 8.485985607767578e-06, - "loss": 0.8546, + "learning_rate": 2.5207777701051385e-06, + "loss": 1.0168, "step": 19830 }, { - "epoch": 0.5619598175068719, + "epoch": 0.7759214336020033, "grad_norm": 0.0, - "learning_rate": 8.485078404825854e-06, - "loss": 0.8463, + "learning_rate": 2.5199366594168417e-06, + "loss": 1.1129, "step": 19831 }, { - "epoch": 0.5619881549491343, + "epoch": 0.7759605602942328, "grad_norm": 0.0, - "learning_rate": 8.484171214644876e-06, - "loss": 0.821, + "learning_rate": 2.5190956688489855e-06, + "loss": 1.0177, "step": 19832 }, { - "epoch": 0.5620164923913967, + "epoch": 0.7759996869864622, "grad_norm": 0.0, - "learning_rate": 8.483264037232284e-06, - "loss": 0.8277, + "learning_rate": 2.5182547984150794e-06, + "loss": 0.9419, "step": 19833 }, { - "epoch": 0.5620448298336592, + "epoch": 0.7760388136786917, "grad_norm": 0.0, - "learning_rate": 8.482356872595729e-06, - "loss": 0.8242, + "learning_rate": 2.5174140481286257e-06, + "loss": 1.0178, "step": 19834 }, { - "epoch": 0.5620731672759217, + "epoch": 0.776077940370921, "grad_norm": 0.0, - "learning_rate": 8.48144972074284e-06, - "loss": 0.9982, + "learning_rate": 2.5165734180031286e-06, + "loss": 1.054, "step": 19835 }, { - "epoch": 0.5621015047181841, + "epoch": 0.7761170670631505, "grad_norm": 0.0, - "learning_rate": 8.480542581681268e-06, - "loss": 0.9182, + "learning_rate": 2.515732908052083e-06, + "loss": 1.1055, "step": 19836 }, { - "epoch": 0.5621298421604466, + "epoch": 0.7761561937553799, "grad_norm": 0.0, - "learning_rate": 8.479635455418647e-06, - "loss": 0.819, + "learning_rate": 2.514892518288988e-06, + "loss": 1.0009, "step": 19837 }, { - "epoch": 0.5621581796027091, + "epoch": 0.7761953204476093, "grad_norm": 0.0, - "learning_rate": 8.478728341962619e-06, - "loss": 0.8687, + "learning_rate": 2.514052248727343e-06, + "loss": 0.8876, "step": 19838 }, { - "epoch": 0.5621865170449715, + "epoch": 0.7762344471398388, "grad_norm": 0.0, - "learning_rate": 8.477821241320831e-06, - "loss": 0.8372, + "learning_rate": 2.5132120993806366e-06, + "loss": 0.9797, "step": 19839 }, { - "epoch": 0.562214854487234, + "epoch": 0.7762735738320682, "grad_norm": 0.0, - "learning_rate": 8.476914153500917e-06, - "loss": 0.8998, + "learning_rate": 2.5123720702623612e-06, + "loss": 1.0221, "step": 19840 }, { - "epoch": 0.5622431919294965, + "epoch": 0.7763127005242977, "grad_norm": 0.0, - "learning_rate": 8.476007078510526e-06, - "loss": 0.9085, + "learning_rate": 2.511532161386008e-06, + "loss": 0.9426, "step": 19841 }, { - "epoch": 0.562271529371759, + "epoch": 0.7763518272165271, "grad_norm": 0.0, - "learning_rate": 8.475100016357288e-06, - "loss": 0.8696, + "learning_rate": 2.510692372765068e-06, + "loss": 1.0336, "step": 19842 }, { - "epoch": 0.5622998668140213, + "epoch": 0.7763909539087566, "grad_norm": 0.0, - "learning_rate": 8.47419296704885e-06, - "loss": 0.8713, + "learning_rate": 2.5098527044130207e-06, + "loss": 1.047, "step": 19843 }, { - "epoch": 0.5623282042562838, + "epoch": 0.776430080600986, "grad_norm": 0.0, - "learning_rate": 8.473285930592852e-06, - "loss": 0.9647, + "learning_rate": 2.509013156343356e-06, + "loss": 1.04, "step": 19844 }, { - "epoch": 0.5623565416985463, + "epoch": 0.7764692072932154, "grad_norm": 0.0, - "learning_rate": 8.472378906996932e-06, - "loss": 0.7881, + "learning_rate": 2.508173728569551e-06, + "loss": 0.9055, "step": 19845 }, { - "epoch": 0.5623848791408087, + "epoch": 0.7765083339854448, "grad_norm": 0.0, - "learning_rate": 8.471471896268732e-06, - "loss": 0.9216, + "learning_rate": 2.5073344211050875e-06, + "loss": 1.0221, "step": 19846 }, { - "epoch": 0.5624132165830712, + "epoch": 0.7765474606776743, "grad_norm": 0.0, - "learning_rate": 8.470564898415897e-06, - "loss": 0.8582, + "learning_rate": 2.506495233963444e-06, + "loss": 1.0629, "step": 19847 }, { - "epoch": 0.5624415540253337, + "epoch": 0.7765865873699037, "grad_norm": 0.0, - "learning_rate": 8.469657913446055e-06, - "loss": 0.8893, + "learning_rate": 2.5056561671581003e-06, + "loss": 1.0078, "step": 19848 }, { - "epoch": 0.5624698914675962, + "epoch": 0.7766257140621332, "grad_norm": 0.0, - "learning_rate": 8.468750941366858e-06, - "loss": 0.6564, + "learning_rate": 2.5048172207025257e-06, + "loss": 1.0537, "step": 19849 }, { - "epoch": 0.5624982289098586, + "epoch": 0.7766648407543626, "grad_norm": 0.0, - "learning_rate": 8.467843982185937e-06, - "loss": 0.9567, + "learning_rate": 2.5039783946101935e-06, + "loss": 0.9175, "step": 19850 }, { - "epoch": 0.5625265663521211, + "epoch": 0.7767039674465921, "grad_norm": 0.0, - "learning_rate": 8.466937035910934e-06, - "loss": 0.9444, + "learning_rate": 2.503139688894576e-06, + "loss": 0.9621, "step": 19851 }, { - "epoch": 0.5625549037943836, + "epoch": 0.7767430941388215, "grad_norm": 0.0, - "learning_rate": 8.466030102549493e-06, - "loss": 0.6908, + "learning_rate": 2.5023011035691435e-06, + "loss": 0.9601, "step": 19852 }, { - "epoch": 0.5625832412366459, + "epoch": 0.776782220831051, "grad_norm": 0.0, - "learning_rate": 8.465123182109247e-06, - "loss": 0.9272, + "learning_rate": 2.501462638647357e-06, + "loss": 0.8281, "step": 19853 }, { - "epoch": 0.5626115786789084, + "epoch": 0.7768213475232804, "grad_norm": 0.0, - "learning_rate": 8.464216274597839e-06, - "loss": 0.9267, + "learning_rate": 2.5006242941426874e-06, + "loss": 0.9291, "step": 19854 }, { - "epoch": 0.5626399161211709, + "epoch": 0.7768604742155099, "grad_norm": 0.0, - "learning_rate": 8.463309380022911e-06, - "loss": 0.9084, + "learning_rate": 2.4997860700685883e-06, + "loss": 0.8788, "step": 19855 }, { - "epoch": 0.5626682535634334, + "epoch": 0.7768996009077392, "grad_norm": 0.0, - "learning_rate": 8.462402498392095e-06, - "loss": 0.7834, + "learning_rate": 2.498947966438533e-06, + "loss": 0.9539, "step": 19856 }, { - "epoch": 0.5626965910056958, + "epoch": 0.7769387275999687, "grad_norm": 0.0, - "learning_rate": 8.461495629713036e-06, - "loss": 0.8237, + "learning_rate": 2.4981099832659706e-06, + "loss": 0.9387, "step": 19857 }, { - "epoch": 0.5627249284479583, + "epoch": 0.7769778542921981, "grad_norm": 0.0, - "learning_rate": 8.460588773993368e-06, - "loss": 0.9051, + "learning_rate": 2.497272120564365e-06, + "loss": 0.898, "step": 19858 }, { - "epoch": 0.5627532658902208, + "epoch": 0.7770169809844276, "grad_norm": 0.0, - "learning_rate": 8.459681931240734e-06, - "loss": 0.8462, + "learning_rate": 2.496434378347161e-06, + "loss": 1.0617, "step": 19859 }, { - "epoch": 0.5627816033324832, + "epoch": 0.777056107676657, "grad_norm": 0.0, - "learning_rate": 8.458775101462773e-06, - "loss": 0.787, + "learning_rate": 2.495596756627825e-06, + "loss": 0.9244, "step": 19860 }, { - "epoch": 0.5628099407747457, + "epoch": 0.7770952343688865, "grad_norm": 0.0, - "learning_rate": 8.45786828466712e-06, - "loss": 0.8765, + "learning_rate": 2.4947592554197988e-06, + "loss": 1.0395, "step": 19861 }, { - "epoch": 0.5628382782170082, + "epoch": 0.7771343610611159, "grad_norm": 0.0, - "learning_rate": 8.456961480861413e-06, - "loss": 0.8828, + "learning_rate": 2.493921874736537e-06, + "loss": 1.0191, "step": 19862 }, { - "epoch": 0.5628666156592705, + "epoch": 0.7771734877533454, "grad_norm": 0.0, - "learning_rate": 8.456054690053296e-06, - "loss": 0.8455, + "learning_rate": 2.493084614591481e-06, + "loss": 1.0336, "step": 19863 }, { - "epoch": 0.562894953101533, + "epoch": 0.7772126144455748, "grad_norm": 0.0, - "learning_rate": 8.455147912250401e-06, - "loss": 0.9138, + "learning_rate": 2.4922474749980798e-06, + "loss": 0.9453, "step": 19864 }, { - "epoch": 0.5629232905437955, + "epoch": 0.7772517411378043, "grad_norm": 0.0, - "learning_rate": 8.45424114746037e-06, - "loss": 0.8014, + "learning_rate": 2.491410455969776e-06, + "loss": 0.9952, "step": 19865 }, { - "epoch": 0.562951627986058, + "epoch": 0.7772908678300336, "grad_norm": 0.0, - "learning_rate": 8.453334395690839e-06, - "loss": 0.7997, + "learning_rate": 2.490573557520014e-06, + "loss": 1.0132, "step": 19866 }, { - "epoch": 0.5629799654283204, + "epoch": 0.777329994522263, "grad_norm": 0.0, - "learning_rate": 8.452427656949446e-06, - "loss": 0.9088, + "learning_rate": 2.4897367796622283e-06, + "loss": 0.8665, "step": 19867 }, { - "epoch": 0.5630083028705829, + "epoch": 0.7773691212144925, "grad_norm": 0.0, - "learning_rate": 8.451520931243833e-06, - "loss": 0.8637, + "learning_rate": 2.4889001224098596e-06, + "loss": 1.0019, "step": 19868 }, { - "epoch": 0.5630366403128454, + "epoch": 0.7774082479067219, "grad_norm": 0.0, - "learning_rate": 8.450614218581631e-06, - "loss": 0.8893, + "learning_rate": 2.4880635857763424e-06, + "loss": 0.9815, "step": 19869 }, { - "epoch": 0.5630649777551078, + "epoch": 0.7774473745989514, "grad_norm": 0.0, - "learning_rate": 8.449707518970482e-06, - "loss": 0.8123, + "learning_rate": 2.487227169775115e-06, + "loss": 1.0017, "step": 19870 }, { - "epoch": 0.5630933151973703, + "epoch": 0.7774865012911808, "grad_norm": 0.0, - "learning_rate": 8.448800832418022e-06, - "loss": 0.9363, + "learning_rate": 2.486390874419601e-06, + "loss": 0.9318, "step": 19871 }, { - "epoch": 0.5631216526396328, + "epoch": 0.7775256279834103, "grad_norm": 0.0, - "learning_rate": 8.447894158931888e-06, - "loss": 0.8635, + "learning_rate": 2.4855546997232383e-06, + "loss": 0.8961, "step": 19872 }, { - "epoch": 0.5631499900818953, + "epoch": 0.7775647546756397, "grad_norm": 0.0, - "learning_rate": 8.446987498519722e-06, - "loss": 0.857, + "learning_rate": 2.484718645699444e-06, + "loss": 1.137, "step": 19873 }, { - "epoch": 0.5631783275241576, + "epoch": 0.7776038813678692, "grad_norm": 0.0, - "learning_rate": 8.44608085118915e-06, - "loss": 0.8079, + "learning_rate": 2.483882712361658e-06, + "loss": 1.0109, "step": 19874 }, { - "epoch": 0.5632066649664201, + "epoch": 0.7776430080600986, "grad_norm": 0.0, - "learning_rate": 8.445174216947819e-06, - "loss": 0.8874, + "learning_rate": 2.4830468997232947e-06, + "loss": 1.0599, "step": 19875 }, { - "epoch": 0.5632350024086826, + "epoch": 0.7776821347523281, "grad_norm": 0.0, - "learning_rate": 8.444267595803368e-06, - "loss": 0.9382, + "learning_rate": 2.482211207797781e-06, + "loss": 1.0577, "step": 19876 }, { - "epoch": 0.563263339850945, + "epoch": 0.7777212614445574, "grad_norm": 0.0, - "learning_rate": 8.443360987763421e-06, - "loss": 0.7484, + "learning_rate": 2.481375636598532e-06, + "loss": 0.909, "step": 19877 }, { - "epoch": 0.5632916772932075, + "epoch": 0.7777603881367869, "grad_norm": 0.0, - "learning_rate": 8.442454392835627e-06, - "loss": 0.8079, + "learning_rate": 2.48054018613897e-06, + "loss": 0.9942, "step": 19878 }, { - "epoch": 0.56332001473547, + "epoch": 0.7777995148290163, "grad_norm": 0.0, - "learning_rate": 8.441547811027615e-06, - "loss": 0.8747, + "learning_rate": 2.479704856432509e-06, + "loss": 0.9736, "step": 19879 }, { - "epoch": 0.5633483521777325, + "epoch": 0.7778386415212458, "grad_norm": 0.0, - "learning_rate": 8.440641242347025e-06, - "loss": 0.7348, + "learning_rate": 2.4788696474925677e-06, + "loss": 0.9874, "step": 19880 }, { - "epoch": 0.5633766896199949, + "epoch": 0.7778777682134752, "grad_norm": 0.0, - "learning_rate": 8.439734686801498e-06, - "loss": 0.7422, + "learning_rate": 2.4780345593325527e-06, + "loss": 1.1449, "step": 19881 }, { - "epoch": 0.5634050270622574, + "epoch": 0.7779168949057047, "grad_norm": 0.0, - "learning_rate": 8.43882814439866e-06, - "loss": 0.7562, + "learning_rate": 2.4771995919658777e-06, + "loss": 0.9253, "step": 19882 }, { - "epoch": 0.5634333645045199, + "epoch": 0.7779560215979341, "grad_norm": 0.0, - "learning_rate": 8.437921615146152e-06, - "loss": 0.9724, + "learning_rate": 2.4763647454059524e-06, + "loss": 0.9568, "step": 19883 }, { - "epoch": 0.5634617019467822, + "epoch": 0.7779951482901636, "grad_norm": 0.0, - "learning_rate": 8.437015099051613e-06, - "loss": 0.7251, + "learning_rate": 2.475530019666179e-06, + "loss": 0.7978, "step": 19884 }, { - "epoch": 0.5634900393890447, + "epoch": 0.778034274982393, "grad_norm": 0.0, - "learning_rate": 8.436108596122673e-06, - "loss": 0.8575, + "learning_rate": 2.4746954147599655e-06, + "loss": 0.8896, "step": 19885 }, { - "epoch": 0.5635183768313072, + "epoch": 0.7780734016746225, "grad_norm": 0.0, - "learning_rate": 8.435202106366976e-06, - "loss": 0.9316, + "learning_rate": 2.473860930700713e-06, + "loss": 1.0486, "step": 19886 }, { - "epoch": 0.5635467142735696, + "epoch": 0.7781125283668519, "grad_norm": 0.0, - "learning_rate": 8.434295629792149e-06, - "loss": 0.9139, + "learning_rate": 2.4730265675018274e-06, + "loss": 0.9277, "step": 19887 }, { - "epoch": 0.5635750517158321, + "epoch": 0.7781516550590813, "grad_norm": 0.0, - "learning_rate": 8.433389166405829e-06, - "loss": 0.9692, + "learning_rate": 2.4721923251766998e-06, + "loss": 0.9472, "step": 19888 }, { - "epoch": 0.5636033891580946, + "epoch": 0.7781907817513107, "grad_norm": 0.0, - "learning_rate": 8.432482716215663e-06, - "loss": 0.9109, + "learning_rate": 2.471358203738733e-06, + "loss": 0.9944, "step": 19889 }, { - "epoch": 0.5636317266003571, + "epoch": 0.7782299084435402, "grad_norm": 0.0, - "learning_rate": 8.431576279229268e-06, - "loss": 0.7918, + "learning_rate": 2.4705242032013166e-06, + "loss": 0.9009, "step": 19890 }, { - "epoch": 0.5636600640426195, + "epoch": 0.7782690351357696, "grad_norm": 0.0, - "learning_rate": 8.43066985545429e-06, - "loss": 0.913, + "learning_rate": 2.4696903235778467e-06, + "loss": 1.0329, "step": 19891 }, { - "epoch": 0.563688401484882, + "epoch": 0.7783081618279991, "grad_norm": 0.0, - "learning_rate": 8.429763444898364e-06, - "loss": 0.9398, + "learning_rate": 2.4688565648817153e-06, + "loss": 0.8418, "step": 19892 }, { - "epoch": 0.5637167389271445, + "epoch": 0.7783472885202285, "grad_norm": 0.0, - "learning_rate": 8.428857047569124e-06, - "loss": 0.8838, + "learning_rate": 2.4680229271263123e-06, + "loss": 0.9286, "step": 19893 }, { - "epoch": 0.5637450763694069, + "epoch": 0.778386415212458, "grad_norm": 0.0, - "learning_rate": 8.427950663474207e-06, - "loss": 0.9069, + "learning_rate": 2.4671894103250194e-06, + "loss": 1.0271, "step": 19894 }, { - "epoch": 0.5637734138116693, + "epoch": 0.7784255419046874, "grad_norm": 0.0, - "learning_rate": 8.427044292621241e-06, - "loss": 0.9057, + "learning_rate": 2.4663560144912267e-06, + "loss": 0.8675, "step": 19895 }, { - "epoch": 0.5638017512539318, + "epoch": 0.7784646685969168, "grad_norm": 0.0, - "learning_rate": 8.426137935017865e-06, - "loss": 0.8936, + "learning_rate": 2.465522739638315e-06, + "loss": 0.9089, "step": 19896 }, { - "epoch": 0.5638300886961943, + "epoch": 0.7785037952891463, "grad_norm": 0.0, - "learning_rate": 8.425231590671716e-06, - "loss": 0.8669, + "learning_rate": 2.4646895857796715e-06, + "loss": 0.897, "step": 19897 }, { - "epoch": 0.5638584261384567, + "epoch": 0.7785429219813756, "grad_norm": 0.0, - "learning_rate": 8.424325259590425e-06, - "loss": 0.8955, + "learning_rate": 2.4638565529286664e-06, + "loss": 1.0362, "step": 19898 }, { - "epoch": 0.5638867635807192, + "epoch": 0.7785820486736051, "grad_norm": 0.0, - "learning_rate": 8.42341894178163e-06, - "loss": 0.864, + "learning_rate": 2.463023641098683e-06, + "loss": 0.9695, "step": 19899 }, { - "epoch": 0.5639151010229817, + "epoch": 0.7786211753658345, "grad_norm": 0.0, - "learning_rate": 8.422512637252958e-06, - "loss": 0.8604, + "learning_rate": 2.4621908503030946e-06, + "loss": 0.9493, "step": 19900 }, { - "epoch": 0.5639434384652441, + "epoch": 0.778660302058064, "grad_norm": 0.0, - "learning_rate": 8.42160634601205e-06, - "loss": 0.8734, + "learning_rate": 2.461358180555279e-06, + "loss": 1.0781, "step": 19901 }, { - "epoch": 0.5639717759075066, + "epoch": 0.7786994287502934, "grad_norm": 0.0, - "learning_rate": 8.42070006806654e-06, - "loss": 0.8132, + "learning_rate": 2.460525631868602e-06, + "loss": 0.9827, "step": 19902 }, { - "epoch": 0.5640001133497691, + "epoch": 0.7787385554425229, "grad_norm": 0.0, - "learning_rate": 8.419793803424057e-06, - "loss": 0.8892, + "learning_rate": 2.4596932042564382e-06, + "loss": 0.8286, "step": 19903 }, { - "epoch": 0.5640284507920316, + "epoch": 0.7787776821347523, "grad_norm": 0.0, - "learning_rate": 8.418887552092237e-06, - "loss": 0.8866, + "learning_rate": 2.458860897732147e-06, + "loss": 1.0392, "step": 19904 }, { - "epoch": 0.564056788234294, + "epoch": 0.7788168088269818, "grad_norm": 0.0, - "learning_rate": 8.417981314078717e-06, - "loss": 0.8929, + "learning_rate": 2.4580287123091073e-06, + "loss": 0.9418, "step": 19905 }, { - "epoch": 0.5640851256765564, + "epoch": 0.7788559355192112, "grad_norm": 0.0, - "learning_rate": 8.417075089391125e-06, - "loss": 0.9778, + "learning_rate": 2.4571966480006713e-06, + "loss": 0.9621, "step": 19906 }, { - "epoch": 0.5641134631188189, + "epoch": 0.7788950622114407, "grad_norm": 0.0, - "learning_rate": 8.416168878037103e-06, - "loss": 0.7999, + "learning_rate": 2.45636470482021e-06, + "loss": 0.9497, "step": 19907 }, { - "epoch": 0.5641418005610813, + "epoch": 0.77893418890367, "grad_norm": 0.0, - "learning_rate": 8.415262680024272e-06, - "loss": 0.8072, + "learning_rate": 2.455532882781072e-06, + "loss": 1.008, "step": 19908 }, { - "epoch": 0.5641701380033438, + "epoch": 0.7789733155958996, "grad_norm": 0.0, - "learning_rate": 8.414356495360273e-06, - "loss": 1.0126, + "learning_rate": 2.4547011818966283e-06, + "loss": 0.9137, "step": 19909 }, { - "epoch": 0.5641984754456063, + "epoch": 0.7790124422881289, "grad_norm": 0.0, - "learning_rate": 8.41345032405274e-06, - "loss": 0.9385, + "learning_rate": 2.4538696021802257e-06, + "loss": 0.948, "step": 19910 }, { - "epoch": 0.5642268128878687, + "epoch": 0.7790515689803584, "grad_norm": 0.0, - "learning_rate": 8.412544166109304e-06, - "loss": 0.9756, + "learning_rate": 2.4530381436452244e-06, + "loss": 1.0415, "step": 19911 }, { - "epoch": 0.5642551503301312, + "epoch": 0.7790906956725878, "grad_norm": 0.0, - "learning_rate": 8.411638021537596e-06, - "loss": 0.9008, + "learning_rate": 2.4522068063049707e-06, + "loss": 0.943, "step": 19912 }, { - "epoch": 0.5642834877723937, + "epoch": 0.7791298223648173, "grad_norm": 0.0, - "learning_rate": 8.410731890345256e-06, - "loss": 0.8227, + "learning_rate": 2.4513755901728177e-06, + "loss": 0.9881, "step": 19913 }, { - "epoch": 0.5643118252146562, + "epoch": 0.7791689490570467, "grad_norm": 0.0, - "learning_rate": 8.409825772539905e-06, - "loss": 0.9421, + "learning_rate": 2.450544495262115e-06, + "loss": 0.979, "step": 19914 }, { - "epoch": 0.5643401626569186, + "epoch": 0.7792080757492762, "grad_norm": 0.0, - "learning_rate": 8.408919668129186e-06, - "loss": 0.88, + "learning_rate": 2.449713521586209e-06, + "loss": 0.9155, "step": 19915 }, { - "epoch": 0.564368500099181, + "epoch": 0.7792472024415056, "grad_norm": 0.0, - "learning_rate": 8.408013577120729e-06, - "loss": 0.862, + "learning_rate": 2.44888266915844e-06, + "loss": 1.058, "step": 19916 }, { - "epoch": 0.5643968375414435, + "epoch": 0.7792863291337351, "grad_norm": 0.0, - "learning_rate": 8.407107499522158e-06, - "loss": 0.8888, + "learning_rate": 2.448051937992154e-06, + "loss": 0.9389, "step": 19917 }, { - "epoch": 0.5644251749837059, + "epoch": 0.7793254558259645, "grad_norm": 0.0, - "learning_rate": 8.406201435341118e-06, - "loss": 0.9592, + "learning_rate": 2.4472213281006905e-06, + "loss": 0.9, "step": 19918 }, { - "epoch": 0.5644535124259684, + "epoch": 0.779364582518194, "grad_norm": 0.0, - "learning_rate": 8.405295384585232e-06, - "loss": 0.8001, + "learning_rate": 2.446390839497391e-06, + "loss": 1.002, "step": 19919 }, { - "epoch": 0.5644818498682309, + "epoch": 0.7794037092104233, "grad_norm": 0.0, - "learning_rate": 8.404389347262136e-06, - "loss": 0.8431, + "learning_rate": 2.4455604721955872e-06, + "loss": 0.8806, "step": 19920 }, { - "epoch": 0.5645101873104934, + "epoch": 0.7794428359026528, "grad_norm": 0.0, - "learning_rate": 8.403483323379465e-06, - "loss": 0.8653, + "learning_rate": 2.444730226208618e-06, + "loss": 1.0013, "step": 19921 }, { - "epoch": 0.5645385247527558, + "epoch": 0.7794819625948822, "grad_norm": 0.0, - "learning_rate": 8.402577312944842e-06, - "loss": 0.9525, + "learning_rate": 2.443900101549812e-06, + "loss": 1.0303, "step": 19922 }, { - "epoch": 0.5645668621950183, + "epoch": 0.7795210892871116, "grad_norm": 0.0, - "learning_rate": 8.401671315965905e-06, - "loss": 0.8567, + "learning_rate": 2.4430700982325018e-06, + "loss": 0.9787, "step": 19923 }, { - "epoch": 0.5645951996372808, + "epoch": 0.7795602159793411, "grad_norm": 0.0, - "learning_rate": 8.400765332450283e-06, - "loss": 1.0071, + "learning_rate": 2.4422402162700153e-06, + "loss": 0.9775, "step": 19924 }, { - "epoch": 0.5646235370795432, + "epoch": 0.7795993426715705, "grad_norm": 0.0, - "learning_rate": 8.399859362405606e-06, - "loss": 0.9345, + "learning_rate": 2.4414104556756845e-06, + "loss": 0.9624, "step": 19925 }, { - "epoch": 0.5646518745218057, + "epoch": 0.7796384693638, "grad_norm": 0.0, - "learning_rate": 8.398953405839516e-06, - "loss": 0.8534, + "learning_rate": 2.4405808164628275e-06, + "loss": 0.9075, "step": 19926 }, { - "epoch": 0.5646802119640681, + "epoch": 0.7796775960560294, "grad_norm": 0.0, - "learning_rate": 8.39804746275963e-06, - "loss": 0.9607, + "learning_rate": 2.43975129864477e-06, + "loss": 0.9501, "step": 19927 }, { - "epoch": 0.5647085494063306, + "epoch": 0.7797167227482589, "grad_norm": 0.0, - "learning_rate": 8.397141533173586e-06, - "loss": 0.7987, + "learning_rate": 2.4389219022348366e-06, + "loss": 1.0756, "step": 19928 }, { - "epoch": 0.564736886848593, + "epoch": 0.7797558494404883, "grad_norm": 0.0, - "learning_rate": 8.396235617089013e-06, - "loss": 0.9061, + "learning_rate": 2.438092627246339e-06, + "loss": 0.9654, "step": 19929 }, { - "epoch": 0.5647652242908555, + "epoch": 0.7797949761327178, "grad_norm": 0.0, - "learning_rate": 8.395329714513543e-06, - "loss": 0.8855, + "learning_rate": 2.437263473692598e-06, + "loss": 0.9237, "step": 19930 }, { - "epoch": 0.564793561733118, + "epoch": 0.7798341028249471, "grad_norm": 0.0, - "learning_rate": 8.394423825454812e-06, - "loss": 0.8225, + "learning_rate": 2.4364344415869303e-06, + "loss": 0.949, "step": 19931 }, { - "epoch": 0.5648218991753804, + "epoch": 0.7798732295171766, "grad_norm": 0.0, - "learning_rate": 8.393517949920438e-06, - "loss": 0.9412, + "learning_rate": 2.43560553094265e-06, + "loss": 0.9169, "step": 19932 }, { - "epoch": 0.5648502366176429, + "epoch": 0.779912356209406, "grad_norm": 0.0, - "learning_rate": 8.392612087918062e-06, - "loss": 0.8792, + "learning_rate": 2.4347767417730626e-06, + "loss": 0.9534, "step": 19933 }, { - "epoch": 0.5648785740599054, + "epoch": 0.7799514829016355, "grad_norm": 0.0, - "learning_rate": 8.391706239455316e-06, - "loss": 0.7867, + "learning_rate": 2.4339480740914844e-06, + "loss": 0.9251, "step": 19934 }, { - "epoch": 0.5649069115021678, + "epoch": 0.7799906095938649, "grad_norm": 0.0, - "learning_rate": 8.390800404539818e-06, - "loss": 0.8261, + "learning_rate": 2.433119527911214e-06, + "loss": 1.0859, "step": 19935 }, { - "epoch": 0.5649352489444303, + "epoch": 0.7800297362860944, "grad_norm": 0.0, - "learning_rate": 8.38989458317921e-06, - "loss": 0.7963, + "learning_rate": 2.4322911032455676e-06, + "loss": 0.9863, "step": 19936 }, { - "epoch": 0.5649635863866928, + "epoch": 0.7800688629783238, "grad_norm": 0.0, - "learning_rate": 8.388988775381115e-06, - "loss": 0.7863, + "learning_rate": 2.43146280010784e-06, + "loss": 0.9155, "step": 19937 }, { - "epoch": 0.5649919238289552, + "epoch": 0.7801079896705533, "grad_norm": 0.0, - "learning_rate": 8.388082981153167e-06, - "loss": 0.8316, + "learning_rate": 2.4306346185113395e-06, + "loss": 0.9193, "step": 19938 }, { - "epoch": 0.5650202612712176, + "epoch": 0.7801471163627827, "grad_norm": 0.0, - "learning_rate": 8.387177200502996e-06, - "loss": 0.7887, + "learning_rate": 2.4298065584693586e-06, + "loss": 0.8473, "step": 19939 }, { - "epoch": 0.5650485987134801, + "epoch": 0.7801862430550122, "grad_norm": 0.0, - "learning_rate": 8.386271433438228e-06, - "loss": 0.8955, + "learning_rate": 2.428978619995198e-06, + "loss": 1.0774, "step": 19940 }, { - "epoch": 0.5650769361557426, + "epoch": 0.7802253697472415, "grad_norm": 0.0, - "learning_rate": 8.385365679966495e-06, - "loss": 0.822, + "learning_rate": 2.428150803102155e-06, + "loss": 1.0028, "step": 19941 }, { - "epoch": 0.565105273598005, + "epoch": 0.780264496439471, "grad_norm": 0.0, - "learning_rate": 8.38445994009543e-06, - "loss": 0.8955, + "learning_rate": 2.4273231078035244e-06, + "loss": 0.913, "step": 19942 }, { - "epoch": 0.5651336110402675, + "epoch": 0.7803036231317004, "grad_norm": 0.0, - "learning_rate": 8.383554213832654e-06, - "loss": 0.8125, + "learning_rate": 2.426495534112592e-06, + "loss": 0.9368, "step": 19943 }, { - "epoch": 0.56516194848253, + "epoch": 0.7803427498239299, "grad_norm": 0.0, - "learning_rate": 8.382648501185806e-06, - "loss": 0.8572, + "learning_rate": 2.4256680820426515e-06, + "loss": 0.9845, "step": 19944 }, { - "epoch": 0.5651902859247925, + "epoch": 0.7803818765161593, "grad_norm": 0.0, - "learning_rate": 8.381742802162506e-06, - "loss": 0.871, + "learning_rate": 2.4248407516069903e-06, + "loss": 1.0455, "step": 19945 }, { - "epoch": 0.5652186233670549, + "epoch": 0.7804210032083888, "grad_norm": 0.0, - "learning_rate": 8.380837116770389e-06, - "loss": 0.8725, + "learning_rate": 2.424013542818898e-06, + "loss": 0.8371, "step": 19946 }, { - "epoch": 0.5652469608093174, + "epoch": 0.7804601299006182, "grad_norm": 0.0, - "learning_rate": 8.379931445017086e-06, - "loss": 0.7688, + "learning_rate": 2.423186455691652e-06, + "loss": 1.0406, "step": 19947 }, { - "epoch": 0.5652752982515798, + "epoch": 0.7804992565928477, "grad_norm": 0.0, - "learning_rate": 8.379025786910217e-06, - "loss": 0.8757, + "learning_rate": 2.4223594902385394e-06, + "loss": 0.8084, "step": 19948 }, { - "epoch": 0.5653036356938422, + "epoch": 0.7805383832850771, "grad_norm": 0.0, - "learning_rate": 8.378120142457415e-06, - "loss": 0.9586, + "learning_rate": 2.421532646472833e-06, + "loss": 0.9335, "step": 19949 }, { - "epoch": 0.5653319731361047, + "epoch": 0.7805775099773066, "grad_norm": 0.0, - "learning_rate": 8.377214511666313e-06, - "loss": 0.9087, + "learning_rate": 2.4207059244078212e-06, + "loss": 0.9597, "step": 19950 }, { - "epoch": 0.5653603105783672, + "epoch": 0.780616636669536, "grad_norm": 0.0, - "learning_rate": 8.376308894544533e-06, - "loss": 0.9109, + "learning_rate": 2.419879324056773e-06, + "loss": 0.9035, "step": 19951 }, { - "epoch": 0.5653886480206296, + "epoch": 0.7806557633617653, "grad_norm": 0.0, - "learning_rate": 8.37540329109971e-06, - "loss": 0.7876, + "learning_rate": 2.4190528454329666e-06, + "loss": 0.9483, "step": 19952 }, { - "epoch": 0.5654169854628921, + "epoch": 0.7806948900539948, "grad_norm": 0.0, - "learning_rate": 8.374497701339466e-06, - "loss": 0.8636, + "learning_rate": 2.4182264885496663e-06, + "loss": 0.9621, "step": 19953 }, { - "epoch": 0.5654453229051546, + "epoch": 0.7807340167462242, "grad_norm": 0.0, - "learning_rate": 8.37359212527143e-06, - "loss": 0.8623, + "learning_rate": 2.4174002534201536e-06, + "loss": 0.8837, "step": 19954 }, { - "epoch": 0.5654736603474171, + "epoch": 0.7807731434384537, "grad_norm": 0.0, - "learning_rate": 8.372686562903233e-06, - "loss": 0.8955, + "learning_rate": 2.4165741400576894e-06, + "loss": 1.0178, "step": 19955 }, { - "epoch": 0.5655019977896795, + "epoch": 0.7808122701306831, "grad_norm": 0.0, - "learning_rate": 8.3717810142425e-06, - "loss": 0.7799, + "learning_rate": 2.415748148475543e-06, + "loss": 0.939, "step": 19956 }, { - "epoch": 0.565530335231942, + "epoch": 0.7808513968229126, "grad_norm": 0.0, - "learning_rate": 8.370875479296864e-06, - "loss": 1.0375, + "learning_rate": 2.414922278686973e-06, + "loss": 1.0837, "step": 19957 }, { - "epoch": 0.5655586726742045, + "epoch": 0.780890523515142, "grad_norm": 0.0, - "learning_rate": 8.369969958073945e-06, - "loss": 0.882, + "learning_rate": 2.4140965307052534e-06, + "loss": 0.8988, "step": 19958 }, { - "epoch": 0.5655870101164668, + "epoch": 0.7809296502073715, "grad_norm": 0.0, - "learning_rate": 8.369064450581374e-06, - "loss": 0.9233, + "learning_rate": 2.413270904543633e-06, + "loss": 1.0498, "step": 19959 }, { - "epoch": 0.5656153475587293, + "epoch": 0.7809687768996009, "grad_norm": 0.0, - "learning_rate": 8.368158956826783e-06, - "loss": 1.0023, + "learning_rate": 2.4124454002153796e-06, + "loss": 0.8948, "step": 19960 }, { - "epoch": 0.5656436850009918, + "epoch": 0.7810079035918304, "grad_norm": 0.0, - "learning_rate": 8.36725347681779e-06, - "loss": 0.819, + "learning_rate": 2.411620017733741e-06, + "loss": 0.9858, "step": 19961 }, { - "epoch": 0.5656720224432543, + "epoch": 0.7810470302840598, "grad_norm": 0.0, - "learning_rate": 8.36634801056203e-06, - "loss": 0.9952, + "learning_rate": 2.410794757111977e-06, + "loss": 0.9482, "step": 19962 }, { - "epoch": 0.5657003598855167, + "epoch": 0.7810861569762892, "grad_norm": 0.0, - "learning_rate": 8.365442558067127e-06, - "loss": 1.0118, + "learning_rate": 2.40996961836334e-06, + "loss": 1.0986, "step": 19963 }, { - "epoch": 0.5657286973277792, + "epoch": 0.7811252836685186, "grad_norm": 0.0, - "learning_rate": 8.364537119340705e-06, - "loss": 0.7759, + "learning_rate": 2.409144601501082e-06, + "loss": 0.9857, "step": 19964 }, { - "epoch": 0.5657570347700417, + "epoch": 0.7811644103607481, "grad_norm": 0.0, - "learning_rate": 8.363631694390402e-06, - "loss": 0.8291, + "learning_rate": 2.408319706538448e-06, + "loss": 0.8947, "step": 19965 }, { - "epoch": 0.5657853722123041, + "epoch": 0.7812035370529775, "grad_norm": 0.0, - "learning_rate": 8.36272628322383e-06, - "loss": 0.914, + "learning_rate": 2.4074949334886855e-06, + "loss": 0.969, "step": 19966 }, { - "epoch": 0.5658137096545666, + "epoch": 0.781242663745207, "grad_norm": 0.0, - "learning_rate": 8.361820885848623e-06, - "loss": 0.8696, + "learning_rate": 2.4066702823650434e-06, + "loss": 0.9842, "step": 19967 }, { - "epoch": 0.5658420470968291, + "epoch": 0.7812817904374364, "grad_norm": 0.0, - "learning_rate": 8.36091550227241e-06, - "loss": 0.8431, + "learning_rate": 2.4058457531807587e-06, + "loss": 1.0115, "step": 19968 }, { - "epoch": 0.5658703845390916, + "epoch": 0.7813209171296659, "grad_norm": 0.0, - "learning_rate": 8.360010132502811e-06, - "loss": 0.8836, + "learning_rate": 2.4050213459490755e-06, + "loss": 0.9583, "step": 19969 }, { - "epoch": 0.5658987219813539, + "epoch": 0.7813600438218953, "grad_norm": 0.0, - "learning_rate": 8.359104776547458e-06, - "loss": 0.851, + "learning_rate": 2.4041970606832355e-06, + "loss": 0.9827, "step": 19970 }, { - "epoch": 0.5659270594236164, + "epoch": 0.7813991705141248, "grad_norm": 0.0, - "learning_rate": 8.358199434413977e-06, - "loss": 0.7944, + "learning_rate": 2.40337289739647e-06, + "loss": 0.9172, "step": 19971 }, { - "epoch": 0.5659553968658789, + "epoch": 0.7814382972063542, "grad_norm": 0.0, - "learning_rate": 8.357294106109988e-06, - "loss": 0.9524, + "learning_rate": 2.402548856102016e-06, + "loss": 0.8872, "step": 19972 }, { - "epoch": 0.5659837343081413, + "epoch": 0.7814774238985837, "grad_norm": 0.0, - "learning_rate": 8.356388791643126e-06, - "loss": 0.9264, + "learning_rate": 2.40172493681311e-06, + "loss": 0.9127, "step": 19973 }, { - "epoch": 0.5660120717504038, + "epoch": 0.781516550590813, "grad_norm": 0.0, - "learning_rate": 8.355483491021007e-06, - "loss": 0.9222, + "learning_rate": 2.4009011395429784e-06, + "loss": 1.0936, "step": 19974 }, { - "epoch": 0.5660404091926663, + "epoch": 0.7815556772830425, "grad_norm": 0.0, - "learning_rate": 8.35457820425126e-06, - "loss": 0.9421, + "learning_rate": 2.400077464304852e-06, + "loss": 0.9017, "step": 19975 }, { - "epoch": 0.5660687466349287, + "epoch": 0.7815948039752719, "grad_norm": 0.0, - "learning_rate": 8.353672931341514e-06, - "loss": 0.8901, + "learning_rate": 2.3992539111119585e-06, + "loss": 0.9255, "step": 19976 }, { - "epoch": 0.5660970840771912, + "epoch": 0.7816339306675014, "grad_norm": 0.0, - "learning_rate": 8.35276767229939e-06, - "loss": 0.8833, + "learning_rate": 2.398430479977525e-06, + "loss": 0.9583, "step": 19977 }, { - "epoch": 0.5661254215194537, + "epoch": 0.7816730573597308, "grad_norm": 0.0, - "learning_rate": 8.351862427132516e-06, - "loss": 0.7935, + "learning_rate": 2.39760717091477e-06, + "loss": 0.8884, "step": 19978 }, { - "epoch": 0.5661537589617162, + "epoch": 0.7817121840519603, "grad_norm": 0.0, - "learning_rate": 8.350957195848521e-06, - "loss": 0.8047, + "learning_rate": 2.39678398393692e-06, + "loss": 0.8484, "step": 19979 }, { - "epoch": 0.5661820964039785, + "epoch": 0.7817513107441897, "grad_norm": 0.0, - "learning_rate": 8.350051978455023e-06, - "loss": 0.7519, + "learning_rate": 2.3959609190571864e-06, + "loss": 0.9744, "step": 19980 }, { - "epoch": 0.566210433846241, + "epoch": 0.7817904374364191, "grad_norm": 0.0, - "learning_rate": 8.349146774959651e-06, - "loss": 0.8166, + "learning_rate": 2.3951379762887983e-06, + "loss": 1.0137, "step": 19981 }, { - "epoch": 0.5662387712885035, + "epoch": 0.7818295641286486, "grad_norm": 0.0, - "learning_rate": 8.348241585370026e-06, - "loss": 0.9521, + "learning_rate": 2.394315155644962e-06, + "loss": 0.936, "step": 19982 }, { - "epoch": 0.5662671087307659, + "epoch": 0.781868690820878, "grad_norm": 0.0, - "learning_rate": 8.347336409693776e-06, - "loss": 0.9518, + "learning_rate": 2.393492457138896e-06, + "loss": 1.0257, "step": 19983 }, { - "epoch": 0.5662954461730284, + "epoch": 0.7819078175131075, "grad_norm": 0.0, - "learning_rate": 8.34643124793853e-06, - "loss": 0.7933, + "learning_rate": 2.3926698807838045e-06, + "loss": 0.9797, "step": 19984 }, { - "epoch": 0.5663237836152909, + "epoch": 0.7819469442053368, "grad_norm": 0.0, - "learning_rate": 8.345526100111903e-06, - "loss": 1.0188, + "learning_rate": 2.3918474265929082e-06, + "loss": 1.0435, "step": 19985 }, { - "epoch": 0.5663521210575534, + "epoch": 0.7819860708975663, "grad_norm": 0.0, - "learning_rate": 8.344620966221528e-06, - "loss": 0.8581, + "learning_rate": 2.3910250945794055e-06, + "loss": 1.0895, "step": 19986 }, { - "epoch": 0.5663804584998158, + "epoch": 0.7820251975897957, "grad_norm": 0.0, - "learning_rate": 8.34371584627502e-06, - "loss": 0.7828, + "learning_rate": 2.3902028847565074e-06, + "loss": 1.0884, "step": 19987 }, { - "epoch": 0.5664087959420783, + "epoch": 0.7820643242820252, "grad_norm": 0.0, - "learning_rate": 8.34281074028001e-06, - "loss": 0.8157, + "learning_rate": 2.389380797137413e-06, + "loss": 0.9995, "step": 19988 }, { - "epoch": 0.5664371333843408, + "epoch": 0.7821034509742546, "grad_norm": 0.0, - "learning_rate": 8.341905648244122e-06, - "loss": 0.9095, + "learning_rate": 2.388558831735327e-06, + "loss": 1.0356, "step": 19989 }, { - "epoch": 0.5664654708266031, + "epoch": 0.7821425776664841, "grad_norm": 0.0, - "learning_rate": 8.341000570174977e-06, - "loss": 0.9547, + "learning_rate": 2.387736988563447e-06, + "loss": 1.0725, "step": 19990 }, { - "epoch": 0.5664938082688656, + "epoch": 0.7821817043587135, "grad_norm": 0.0, - "learning_rate": 8.340095506080198e-06, - "loss": 0.8902, + "learning_rate": 2.386915267634976e-06, + "loss": 0.975, "step": 19991 }, { - "epoch": 0.5665221457111281, + "epoch": 0.782220831050943, "grad_norm": 0.0, - "learning_rate": 8.339190455967418e-06, - "loss": 0.8424, + "learning_rate": 2.3860936689631042e-06, + "loss": 0.9686, "step": 19992 }, { - "epoch": 0.5665504831533906, + "epoch": 0.7822599577431724, "grad_norm": 0.0, - "learning_rate": 8.338285419844249e-06, - "loss": 0.9204, + "learning_rate": 2.3852721925610257e-06, + "loss": 1.0195, "step": 19993 }, { - "epoch": 0.566578820595653, + "epoch": 0.7822990844354019, "grad_norm": 0.0, - "learning_rate": 8.33738039771832e-06, - "loss": 0.8438, + "learning_rate": 2.384450838441935e-06, + "loss": 1.0097, "step": 19994 }, { - "epoch": 0.5666071580379155, + "epoch": 0.7823382111276312, "grad_norm": 0.0, - "learning_rate": 8.336475389597252e-06, - "loss": 0.8146, + "learning_rate": 2.3836296066190235e-06, + "loss": 0.9867, "step": 19995 }, { - "epoch": 0.566635495480178, + "epoch": 0.7823773378198607, "grad_norm": 0.0, - "learning_rate": 8.335570395488668e-06, - "loss": 0.8368, + "learning_rate": 2.382808497105473e-06, + "loss": 0.9867, "step": 19996 }, { - "epoch": 0.5666638329224404, + "epoch": 0.7824164645120901, "grad_norm": 0.0, - "learning_rate": 8.3346654154002e-06, - "loss": 0.8961, + "learning_rate": 2.3819875099144772e-06, + "loss": 0.9549, "step": 19997 }, { - "epoch": 0.5666921703647029, + "epoch": 0.7824555912043196, "grad_norm": 0.0, - "learning_rate": 8.333760449339456e-06, - "loss": 0.9328, + "learning_rate": 2.3811666450592108e-06, + "loss": 0.968, "step": 19998 }, { - "epoch": 0.5667205078069654, + "epoch": 0.782494717896549, "grad_norm": 0.0, - "learning_rate": 8.332855497314068e-06, - "loss": 0.8289, + "learning_rate": 2.3803459025528654e-06, + "loss": 0.9388, "step": 19999 }, { - "epoch": 0.5667488452492278, + "epoch": 0.7825338445887785, "grad_norm": 0.0, - "learning_rate": 8.33195055933166e-06, - "loss": 0.8723, + "learning_rate": 2.3795252824086145e-06, + "loss": 0.9673, "step": 20000 }, { - "epoch": 0.5667771826914902, + "epoch": 0.7825729712810079, "grad_norm": 0.0, - "learning_rate": 8.33104563539985e-06, - "loss": 0.942, + "learning_rate": 2.3787047846396418e-06, + "loss": 0.9264, "step": 20001 }, { - "epoch": 0.5668055201337527, + "epoch": 0.7826120979732374, "grad_norm": 0.0, - "learning_rate": 8.330140725526264e-06, - "loss": 0.8542, + "learning_rate": 2.377884409259115e-06, + "loss": 0.9872, "step": 20002 }, { - "epoch": 0.5668338575760152, + "epoch": 0.7826512246654668, "grad_norm": 0.0, - "learning_rate": 8.329235829718519e-06, - "loss": 0.9136, + "learning_rate": 2.377064156280219e-06, + "loss": 0.9103, "step": 20003 }, { - "epoch": 0.5668621950182776, + "epoch": 0.7826903513576963, "grad_norm": 0.0, - "learning_rate": 8.328330947984243e-06, - "loss": 0.9463, + "learning_rate": 2.376244025716117e-06, + "loss": 0.8831, "step": 20004 }, { - "epoch": 0.5668905324605401, + "epoch": 0.7827294780499257, "grad_norm": 0.0, - "learning_rate": 8.327426080331058e-06, - "loss": 0.8587, + "learning_rate": 2.375424017579986e-06, + "loss": 1.0229, "step": 20005 }, { - "epoch": 0.5669188699028026, + "epoch": 0.7827686047421551, "grad_norm": 0.0, - "learning_rate": 8.326521226766583e-06, - "loss": 0.8349, + "learning_rate": 2.3746041318849876e-06, + "loss": 0.9976, "step": 20006 }, { - "epoch": 0.566947207345065, + "epoch": 0.7828077314343845, "grad_norm": 0.0, - "learning_rate": 8.32561638729844e-06, - "loss": 0.8399, + "learning_rate": 2.373784368644293e-06, + "loss": 0.98, "step": 20007 }, { - "epoch": 0.5669755447873275, + "epoch": 0.782846858126614, "grad_norm": 0.0, - "learning_rate": 8.324711561934253e-06, - "loss": 0.8997, + "learning_rate": 2.372964727871064e-06, + "loss": 0.9834, "step": 20008 }, { - "epoch": 0.56700388222959, + "epoch": 0.7828859848188434, "grad_norm": 0.0, - "learning_rate": 8.323806750681641e-06, - "loss": 0.8822, + "learning_rate": 2.372145209578468e-06, + "loss": 0.8915, "step": 20009 }, { - "epoch": 0.5670322196718525, + "epoch": 0.7829251115110728, "grad_norm": 0.0, - "learning_rate": 8.322901953548232e-06, - "loss": 0.9327, + "learning_rate": 2.371325813779659e-06, + "loss": 0.8263, "step": 20010 }, { - "epoch": 0.5670605571141148, + "epoch": 0.7829642382033023, "grad_norm": 0.0, - "learning_rate": 8.321997170541638e-06, - "loss": 0.8558, + "learning_rate": 2.3705065404877982e-06, + "loss": 0.9303, "step": 20011 }, { - "epoch": 0.5670888945563773, + "epoch": 0.7830033648955317, "grad_norm": 0.0, - "learning_rate": 8.321092401669484e-06, - "loss": 0.8271, + "learning_rate": 2.3696873897160442e-06, + "loss": 0.9215, "step": 20012 }, { - "epoch": 0.5671172319986398, + "epoch": 0.7830424915877612, "grad_norm": 0.0, - "learning_rate": 8.320187646939397e-06, - "loss": 0.9402, + "learning_rate": 2.3688683614775464e-06, + "loss": 0.9632, "step": 20013 }, { - "epoch": 0.5671455694409022, + "epoch": 0.7830816182799906, "grad_norm": 0.0, - "learning_rate": 8.31928290635899e-06, - "loss": 0.9096, + "learning_rate": 2.3680494557854604e-06, + "loss": 1.012, "step": 20014 }, { - "epoch": 0.5671739068831647, + "epoch": 0.7831207449722201, "grad_norm": 0.0, - "learning_rate": 8.318378179935888e-06, - "loss": 0.8872, + "learning_rate": 2.367230672652938e-06, + "loss": 0.9075, "step": 20015 }, { - "epoch": 0.5672022443254272, + "epoch": 0.7831598716644494, "grad_norm": 0.0, - "learning_rate": 8.317473467677711e-06, - "loss": 0.8403, + "learning_rate": 2.366412012093129e-06, + "loss": 0.8337, "step": 20016 }, { - "epoch": 0.5672305817676897, + "epoch": 0.7831989983566789, "grad_norm": 0.0, - "learning_rate": 8.316568769592078e-06, - "loss": 0.8235, + "learning_rate": 2.3655934741191734e-06, + "loss": 0.9622, "step": 20017 }, { - "epoch": 0.5672589192099521, + "epoch": 0.7832381250489083, "grad_norm": 0.0, - "learning_rate": 8.315664085686614e-06, - "loss": 0.8157, + "learning_rate": 2.364775058744224e-06, + "loss": 1.063, "step": 20018 }, { - "epoch": 0.5672872566522146, + "epoch": 0.7832772517411378, "grad_norm": 0.0, - "learning_rate": 8.314759415968936e-06, - "loss": 0.8582, + "learning_rate": 2.3639567659814176e-06, + "loss": 0.939, "step": 20019 }, { - "epoch": 0.5673155940944771, + "epoch": 0.7833163784333672, "grad_norm": 0.0, - "learning_rate": 8.313854760446664e-06, - "loss": 0.8611, + "learning_rate": 2.3631385958438958e-06, + "loss": 0.8827, "step": 20020 }, { - "epoch": 0.5673439315367395, + "epoch": 0.7833555051255967, "grad_norm": 0.0, - "learning_rate": 8.312950119127422e-06, - "loss": 0.8653, + "learning_rate": 2.3623205483448e-06, + "loss": 0.9563, "step": 20021 }, { - "epoch": 0.567372268979002, + "epoch": 0.7833946318178261, "grad_norm": 0.0, - "learning_rate": 8.312045492018822e-06, - "loss": 0.8925, + "learning_rate": 2.3615026234972673e-06, + "loss": 1.028, "step": 20022 }, { - "epoch": 0.5674006064212644, + "epoch": 0.7834337585100556, "grad_norm": 0.0, - "learning_rate": 8.311140879128498e-06, - "loss": 0.8199, + "learning_rate": 2.3606848213144284e-06, + "loss": 1.0767, "step": 20023 }, { - "epoch": 0.5674289438635268, + "epoch": 0.783472885202285, "grad_norm": 0.0, - "learning_rate": 8.310236280464056e-06, - "loss": 0.8774, + "learning_rate": 2.359867141809419e-06, + "loss": 0.9525, "step": 20024 }, { - "epoch": 0.5674572813057893, + "epoch": 0.7835120118945145, "grad_norm": 0.0, - "learning_rate": 8.309331696033122e-06, - "loss": 0.925, + "learning_rate": 2.359049584995369e-06, + "loss": 0.9976, "step": 20025 }, { - "epoch": 0.5674856187480518, + "epoch": 0.7835511385867439, "grad_norm": 0.0, - "learning_rate": 8.308427125843318e-06, - "loss": 0.8288, + "learning_rate": 2.3582321508854112e-06, + "loss": 1.0307, "step": 20026 }, { - "epoch": 0.5675139561903143, + "epoch": 0.7835902652789734, "grad_norm": 0.0, - "learning_rate": 8.307522569902256e-06, - "loss": 0.9013, + "learning_rate": 2.3574148394926668e-06, + "loss": 1.0161, "step": 20027 }, { - "epoch": 0.5675422936325767, + "epoch": 0.7836293919712027, "grad_norm": 0.0, - "learning_rate": 8.306618028217561e-06, - "loss": 0.9302, + "learning_rate": 2.3565976508302657e-06, + "loss": 0.9762, "step": 20028 }, { - "epoch": 0.5675706310748392, + "epoch": 0.7836685186634322, "grad_norm": 0.0, - "learning_rate": 8.305713500796852e-06, - "loss": 0.8981, + "learning_rate": 2.3557805849113234e-06, + "loss": 0.9552, "step": 20029 }, { - "epoch": 0.5675989685171017, + "epoch": 0.7837076453556616, "grad_norm": 0.0, - "learning_rate": 8.304808987647747e-06, - "loss": 0.8474, + "learning_rate": 2.354963641748972e-06, + "loss": 0.9203, "step": 20030 }, { - "epoch": 0.5676273059593641, + "epoch": 0.7837467720478911, "grad_norm": 0.0, - "learning_rate": 8.303904488777868e-06, - "loss": 0.9597, + "learning_rate": 2.3541468213563213e-06, + "loss": 1.1042, "step": 20031 }, { - "epoch": 0.5676556434016266, + "epoch": 0.7837858987401205, "grad_norm": 0.0, - "learning_rate": 8.303000004194829e-06, - "loss": 0.8696, + "learning_rate": 2.3533301237464945e-06, + "loss": 1.0231, "step": 20032 }, { - "epoch": 0.567683980843889, + "epoch": 0.78382502543235, "grad_norm": 0.0, - "learning_rate": 8.302095533906248e-06, - "loss": 0.7109, + "learning_rate": 2.352513548932599e-06, + "loss": 1.1117, "step": 20033 }, { - "epoch": 0.5677123182861515, + "epoch": 0.7838641521245794, "grad_norm": 0.0, - "learning_rate": 8.301191077919753e-06, - "loss": 0.8972, + "learning_rate": 2.3516970969277585e-06, + "loss": 0.994, "step": 20034 }, { - "epoch": 0.5677406557284139, + "epoch": 0.7839032788168089, "grad_norm": 0.0, - "learning_rate": 8.300286636242951e-06, - "loss": 0.9033, + "learning_rate": 2.350880767745076e-06, + "loss": 1.0526, "step": 20035 }, { - "epoch": 0.5677689931706764, + "epoch": 0.7839424055090383, "grad_norm": 0.0, - "learning_rate": 8.299382208883473e-06, - "loss": 0.8852, + "learning_rate": 2.350064561397667e-06, + "loss": 1.0861, "step": 20036 }, { - "epoch": 0.5677973306129389, + "epoch": 0.7839815322012677, "grad_norm": 0.0, - "learning_rate": 8.298477795848922e-06, - "loss": 0.8148, + "learning_rate": 2.349248477898631e-06, + "loss": 0.972, "step": 20037 }, { - "epoch": 0.5678256680552013, + "epoch": 0.7840206588934971, "grad_norm": 0.0, - "learning_rate": 8.297573397146928e-06, - "loss": 0.8777, + "learning_rate": 2.3484325172610788e-06, + "loss": 0.9581, "step": 20038 }, { - "epoch": 0.5678540054974638, + "epoch": 0.7840597855857265, "grad_norm": 0.0, - "learning_rate": 8.296669012785105e-06, - "loss": 0.9162, + "learning_rate": 2.3476166794981124e-06, + "loss": 1.1013, "step": 20039 }, { - "epoch": 0.5678823429397263, + "epoch": 0.784098912277956, "grad_norm": 0.0, - "learning_rate": 8.295764642771072e-06, - "loss": 0.8539, + "learning_rate": 2.346800964622835e-06, + "loss": 0.906, "step": 20040 }, { - "epoch": 0.5679106803819888, + "epoch": 0.7841380389701854, "grad_norm": 0.0, - "learning_rate": 8.294860287112444e-06, - "loss": 0.9138, + "learning_rate": 2.345985372648342e-06, + "loss": 1.0363, "step": 20041 }, { - "epoch": 0.5679390178242512, + "epoch": 0.7841771656624149, "grad_norm": 0.0, - "learning_rate": 8.293955945816846e-06, - "loss": 0.8994, + "learning_rate": 2.345169903587733e-06, + "loss": 0.9018, "step": 20042 }, { - "epoch": 0.5679673552665137, + "epoch": 0.7842162923546443, "grad_norm": 0.0, - "learning_rate": 8.293051618891885e-06, - "loss": 0.8705, + "learning_rate": 2.3443545574541026e-06, + "loss": 0.9091, "step": 20043 }, { - "epoch": 0.5679956927087761, + "epoch": 0.7842554190468738, "grad_norm": 0.0, - "learning_rate": 8.292147306345191e-06, - "loss": 0.8798, + "learning_rate": 2.3435393342605484e-06, + "loss": 0.7903, "step": 20044 }, { - "epoch": 0.5680240301510385, + "epoch": 0.7842945457391032, "grad_norm": 0.0, - "learning_rate": 8.291243008184368e-06, - "loss": 0.8749, + "learning_rate": 2.342724234020155e-06, + "loss": 0.9038, "step": 20045 }, { - "epoch": 0.568052367593301, + "epoch": 0.7843336724313327, "grad_norm": 0.0, - "learning_rate": 8.29033872441704e-06, - "loss": 0.7817, + "learning_rate": 2.341909256746019e-06, + "loss": 1.1422, "step": 20046 }, { - "epoch": 0.5680807050355635, + "epoch": 0.7843727991235621, "grad_norm": 0.0, - "learning_rate": 8.289434455050826e-06, - "loss": 0.9671, + "learning_rate": 2.3410944024512184e-06, + "loss": 0.9023, "step": 20047 }, { - "epoch": 0.5681090424778259, + "epoch": 0.7844119258157916, "grad_norm": 0.0, - "learning_rate": 8.28853020009334e-06, - "loss": 0.9596, + "learning_rate": 2.3402796711488494e-06, + "loss": 0.8974, "step": 20048 }, { - "epoch": 0.5681373799200884, + "epoch": 0.7844510525080209, "grad_norm": 0.0, - "learning_rate": 8.2876259595522e-06, - "loss": 0.9246, + "learning_rate": 2.3394650628519876e-06, + "loss": 1.0092, "step": 20049 }, { - "epoch": 0.5681657173623509, + "epoch": 0.7844901792002504, "grad_norm": 0.0, - "learning_rate": 8.286721733435023e-06, - "loss": 0.8526, + "learning_rate": 2.3386505775737224e-06, + "loss": 1.0239, "step": 20050 }, { - "epoch": 0.5681940548046134, + "epoch": 0.7845293058924798, "grad_norm": 0.0, - "learning_rate": 8.285817521749423e-06, - "loss": 0.8784, + "learning_rate": 2.3378362153271238e-06, + "loss": 0.7791, "step": 20051 }, { - "epoch": 0.5682223922468758, + "epoch": 0.7845684325847093, "grad_norm": 0.0, - "learning_rate": 8.284913324503021e-06, - "loss": 0.9891, + "learning_rate": 2.3370219761252757e-06, + "loss": 1.0132, "step": 20052 }, { - "epoch": 0.5682507296891383, + "epoch": 0.7846075592769387, "grad_norm": 0.0, - "learning_rate": 8.28400914170343e-06, - "loss": 0.7858, + "learning_rate": 2.336207859981252e-06, + "loss": 0.897, "step": 20053 }, { - "epoch": 0.5682790671314008, + "epoch": 0.7846466859691682, "grad_norm": 0.0, - "learning_rate": 8.283104973358265e-06, - "loss": 0.8528, + "learning_rate": 2.33539386690813e-06, + "loss": 0.9306, "step": 20054 }, { - "epoch": 0.5683074045736631, + "epoch": 0.7846858126613976, "grad_norm": 0.0, - "learning_rate": 8.28220081947515e-06, - "loss": 0.8151, + "learning_rate": 2.334579996918975e-06, + "loss": 0.9937, "step": 20055 }, { - "epoch": 0.5683357420159256, + "epoch": 0.7847249393536271, "grad_norm": 0.0, - "learning_rate": 8.28129668006169e-06, - "loss": 0.889, + "learning_rate": 2.33376625002686e-06, + "loss": 0.8123, "step": 20056 }, { - "epoch": 0.5683640794581881, + "epoch": 0.7847640660458565, "grad_norm": 0.0, - "learning_rate": 8.280392555125506e-06, - "loss": 0.8216, + "learning_rate": 2.332952626244852e-06, + "loss": 1.0593, "step": 20057 }, { - "epoch": 0.5683924169004506, + "epoch": 0.784803192738086, "grad_norm": 0.0, - "learning_rate": 8.279488444674221e-06, - "loss": 0.861, + "learning_rate": 2.3321391255860215e-06, + "loss": 0.8983, "step": 20058 }, { - "epoch": 0.568420754342713, + "epoch": 0.7848423194303153, "grad_norm": 0.0, - "learning_rate": 8.278584348715436e-06, - "loss": 0.8285, + "learning_rate": 2.3313257480634244e-06, + "loss": 1.0579, "step": 20059 }, { - "epoch": 0.5684490917849755, + "epoch": 0.7848814461225448, "grad_norm": 0.0, - "learning_rate": 8.27768026725678e-06, - "loss": 0.8779, + "learning_rate": 2.3305124936901267e-06, + "loss": 1.0285, "step": 20060 }, { - "epoch": 0.568477429227238, + "epoch": 0.7849205728147742, "grad_norm": 0.0, - "learning_rate": 8.276776200305858e-06, - "loss": 0.8524, + "learning_rate": 2.329699362479192e-06, + "loss": 0.9706, "step": 20061 }, { - "epoch": 0.5685057666695004, + "epoch": 0.7849596995070037, "grad_norm": 0.0, - "learning_rate": 8.27587214787029e-06, - "loss": 0.8834, + "learning_rate": 2.32888635444367e-06, + "loss": 0.8694, "step": 20062 }, { - "epoch": 0.5685341041117629, + "epoch": 0.7849988261992331, "grad_norm": 0.0, - "learning_rate": 8.274968109957696e-06, - "loss": 0.9357, + "learning_rate": 2.328073469596621e-06, + "loss": 0.9092, "step": 20063 }, { - "epoch": 0.5685624415540254, + "epoch": 0.7850379528914626, "grad_norm": 0.0, - "learning_rate": 8.274064086575682e-06, - "loss": 0.9681, + "learning_rate": 2.327260707951101e-06, + "loss": 0.917, "step": 20064 }, { - "epoch": 0.5685907789962878, + "epoch": 0.785077079583692, "grad_norm": 0.0, - "learning_rate": 8.27316007773187e-06, - "loss": 0.6922, + "learning_rate": 2.3264480695201575e-06, + "loss": 0.9643, "step": 20065 }, { - "epoch": 0.5686191164385502, + "epoch": 0.7851162062759214, "grad_norm": 0.0, - "learning_rate": 8.272256083433869e-06, - "loss": 0.8337, + "learning_rate": 2.325635554316842e-06, + "loss": 0.8557, "step": 20066 }, { - "epoch": 0.5686474538808127, + "epoch": 0.7851553329681509, "grad_norm": 0.0, - "learning_rate": 8.271352103689296e-06, - "loss": 0.9109, + "learning_rate": 2.324823162354206e-06, + "loss": 0.9922, "step": 20067 }, { - "epoch": 0.5686757913230752, + "epoch": 0.7851944596603803, "grad_norm": 0.0, - "learning_rate": 8.270448138505771e-06, - "loss": 0.999, + "learning_rate": 2.3240108936452897e-06, + "loss": 1.0401, "step": 20068 }, { - "epoch": 0.5687041287653376, + "epoch": 0.7852335863526098, "grad_norm": 0.0, - "learning_rate": 8.2695441878909e-06, - "loss": 0.8709, + "learning_rate": 2.3231987482031403e-06, + "loss": 0.9979, "step": 20069 }, { - "epoch": 0.5687324662076001, + "epoch": 0.7852727130448391, "grad_norm": 0.0, - "learning_rate": 8.2686402518523e-06, - "loss": 0.786, + "learning_rate": 2.3223867260407985e-06, + "loss": 0.8742, "step": 20070 }, { - "epoch": 0.5687608036498626, + "epoch": 0.7853118397370686, "grad_norm": 0.0, - "learning_rate": 8.26773633039759e-06, - "loss": 0.8656, + "learning_rate": 2.321574827171309e-06, + "loss": 0.7975, "step": 20071 }, { - "epoch": 0.568789141092125, + "epoch": 0.785350966429298, "grad_norm": 0.0, - "learning_rate": 8.266832423534376e-06, - "loss": 0.882, + "learning_rate": 2.3207630516077027e-06, + "loss": 0.9783, "step": 20072 }, { - "epoch": 0.5688174785343875, + "epoch": 0.7853900931215275, "grad_norm": 0.0, - "learning_rate": 8.265928531270279e-06, - "loss": 0.9589, + "learning_rate": 2.3199513993630195e-06, + "loss": 0.976, "step": 20073 }, { - "epoch": 0.56884581597665, + "epoch": 0.7854292198137569, "grad_norm": 0.0, - "learning_rate": 8.265024653612908e-06, - "loss": 0.9117, + "learning_rate": 2.3191398704502933e-06, + "loss": 0.8432, "step": 20074 }, { - "epoch": 0.5688741534189125, + "epoch": 0.7854683465059864, "grad_norm": 0.0, - "learning_rate": 8.264120790569877e-06, - "loss": 0.8674, + "learning_rate": 2.3183284648825577e-06, + "loss": 1.0163, "step": 20075 }, { - "epoch": 0.5689024908611748, + "epoch": 0.7855074731982158, "grad_norm": 0.0, - "learning_rate": 8.263216942148806e-06, - "loss": 0.9855, + "learning_rate": 2.3175171826728403e-06, + "loss": 1.0674, "step": 20076 }, { - "epoch": 0.5689308283034373, + "epoch": 0.7855465998904453, "grad_norm": 0.0, - "learning_rate": 8.2623131083573e-06, - "loss": 0.9244, + "learning_rate": 2.316706023834172e-06, + "loss": 1.0981, "step": 20077 }, { - "epoch": 0.5689591657456998, + "epoch": 0.7855857265826747, "grad_norm": 0.0, - "learning_rate": 8.261409289202976e-06, - "loss": 0.8968, + "learning_rate": 2.315894988379572e-06, + "loss": 1.0002, "step": 20078 }, { - "epoch": 0.5689875031879622, + "epoch": 0.7856248532749042, "grad_norm": 0.0, - "learning_rate": 8.260505484693449e-06, - "loss": 0.8937, + "learning_rate": 2.315084076322075e-06, + "loss": 0.977, "step": 20079 }, { - "epoch": 0.5690158406302247, + "epoch": 0.7856639799671336, "grad_norm": 0.0, - "learning_rate": 8.259601694836328e-06, - "loss": 0.8347, + "learning_rate": 2.3142732876746954e-06, + "loss": 0.9621, "step": 20080 }, { - "epoch": 0.5690441780724872, + "epoch": 0.785703106659363, "grad_norm": 0.0, - "learning_rate": 8.258697919639234e-06, - "loss": 0.7694, + "learning_rate": 2.3134626224504586e-06, + "loss": 1.0034, "step": 20081 }, { - "epoch": 0.5690725155147497, + "epoch": 0.7857422333515924, "grad_norm": 0.0, - "learning_rate": 8.257794159109768e-06, - "loss": 0.8257, + "learning_rate": 2.3126520806623752e-06, + "loss": 0.928, "step": 20082 }, { - "epoch": 0.5691008529570121, + "epoch": 0.7857813600438219, "grad_norm": 0.0, - "learning_rate": 8.256890413255549e-06, - "loss": 0.8455, + "learning_rate": 2.311841662323472e-06, + "loss": 0.8886, "step": 20083 }, { - "epoch": 0.5691291903992746, + "epoch": 0.7858204867360513, "grad_norm": 0.0, - "learning_rate": 8.255986682084194e-06, - "loss": 0.8802, + "learning_rate": 2.3110313674467556e-06, + "loss": 0.9438, "step": 20084 }, { - "epoch": 0.5691575278415371, + "epoch": 0.7858596134282808, "grad_norm": 0.0, - "learning_rate": 8.255082965603307e-06, - "loss": 0.7696, + "learning_rate": 2.3102211960452427e-06, + "loss": 0.9785, "step": 20085 }, { - "epoch": 0.5691858652837994, + "epoch": 0.7858987401205102, "grad_norm": 0.0, - "learning_rate": 8.254179263820503e-06, - "loss": 0.8628, + "learning_rate": 2.3094111481319394e-06, + "loss": 0.9788, "step": 20086 }, { - "epoch": 0.5692142027260619, + "epoch": 0.7859378668127397, "grad_norm": 0.0, - "learning_rate": 8.253275576743397e-06, - "loss": 0.8012, + "learning_rate": 2.3086012237198553e-06, + "loss": 1.0364, "step": 20087 }, { - "epoch": 0.5692425401683244, + "epoch": 0.7859769935049691, "grad_norm": 0.0, - "learning_rate": 8.252371904379598e-06, - "loss": 0.8811, + "learning_rate": 2.307791422821998e-06, + "loss": 0.915, "step": 20088 }, { - "epoch": 0.5692708776105869, + "epoch": 0.7860161201971986, "grad_norm": 0.0, - "learning_rate": 8.251468246736725e-06, - "loss": 0.8235, + "learning_rate": 2.3069817454513744e-06, + "loss": 0.8765, "step": 20089 }, { - "epoch": 0.5692992150528493, + "epoch": 0.786055246889428, "grad_norm": 0.0, - "learning_rate": 8.250564603822378e-06, - "loss": 0.8578, + "learning_rate": 2.3061721916209824e-06, + "loss": 1.0214, "step": 20090 }, { - "epoch": 0.5693275524951118, + "epoch": 0.7860943735816575, "grad_norm": 0.0, - "learning_rate": 8.249660975644176e-06, - "loss": 0.8073, + "learning_rate": 2.3053627613438224e-06, + "loss": 0.984, "step": 20091 }, { - "epoch": 0.5693558899373743, + "epoch": 0.7861335002738868, "grad_norm": 0.0, - "learning_rate": 8.248757362209732e-06, - "loss": 0.8676, + "learning_rate": 2.3045534546328952e-06, + "loss": 0.9636, "step": 20092 }, { - "epoch": 0.5693842273796367, + "epoch": 0.7861726269661163, "grad_norm": 0.0, - "learning_rate": 8.247853763526652e-06, - "loss": 0.7514, + "learning_rate": 2.3037442715011994e-06, + "loss": 0.973, "step": 20093 }, { - "epoch": 0.5694125648218992, + "epoch": 0.7862117536583457, "grad_norm": 0.0, - "learning_rate": 8.246950179602554e-06, - "loss": 0.8861, + "learning_rate": 2.302935211961724e-06, + "loss": 0.9434, "step": 20094 }, { - "epoch": 0.5694409022641617, + "epoch": 0.7862508803505751, "grad_norm": 0.0, - "learning_rate": 8.246046610445044e-06, - "loss": 0.9029, + "learning_rate": 2.3021262760274667e-06, + "loss": 1.0226, "step": 20095 }, { - "epoch": 0.569469239706424, + "epoch": 0.7862900070428046, "grad_norm": 0.0, - "learning_rate": 8.245143056061732e-06, - "loss": 0.8008, + "learning_rate": 2.30131746371141e-06, + "loss": 0.8509, "step": 20096 }, { - "epoch": 0.5694975771486865, + "epoch": 0.786329133735034, "grad_norm": 0.0, - "learning_rate": 8.244239516460238e-06, - "loss": 0.818, + "learning_rate": 2.3005087750265532e-06, + "loss": 1.0206, "step": 20097 }, { - "epoch": 0.569525914590949, + "epoch": 0.7863682604272635, "grad_norm": 0.0, - "learning_rate": 8.243335991648163e-06, - "loss": 0.7404, + "learning_rate": 2.2997002099858756e-06, + "loss": 1.0413, "step": 20098 }, { - "epoch": 0.5695542520332115, + "epoch": 0.7864073871194929, "grad_norm": 0.0, - "learning_rate": 8.242432481633119e-06, - "loss": 0.8756, + "learning_rate": 2.298891768602365e-06, + "loss": 0.9036, "step": 20099 }, { - "epoch": 0.5695825894754739, + "epoch": 0.7864465138117224, "grad_norm": 0.0, - "learning_rate": 8.241528986422722e-06, - "loss": 0.9243, + "learning_rate": 2.2980834508890004e-06, + "loss": 0.9626, "step": 20100 }, { - "epoch": 0.5696109269177364, + "epoch": 0.7864856405039518, "grad_norm": 0.0, - "learning_rate": 8.240625506024576e-06, - "loss": 1.0107, + "learning_rate": 2.2972752568587652e-06, + "loss": 0.9235, "step": 20101 }, { - "epoch": 0.5696392643599989, + "epoch": 0.7865247671961813, "grad_norm": 0.0, - "learning_rate": 8.239722040446301e-06, - "loss": 0.7875, + "learning_rate": 2.2964671865246367e-06, + "loss": 0.8895, "step": 20102 }, { - "epoch": 0.5696676018022613, + "epoch": 0.7865638938884106, "grad_norm": 0.0, - "learning_rate": 8.238818589695496e-06, - "loss": 0.7709, + "learning_rate": 2.2956592398995947e-06, + "loss": 0.9901, "step": 20103 }, { - "epoch": 0.5696959392445238, + "epoch": 0.7866030205806401, "grad_norm": 0.0, - "learning_rate": 8.237915153779774e-06, - "loss": 0.8543, + "learning_rate": 2.2948514169966086e-06, + "loss": 1.0293, "step": 20104 }, { - "epoch": 0.5697242766867863, + "epoch": 0.7866421472728695, "grad_norm": 0.0, - "learning_rate": 8.237011732706751e-06, - "loss": 0.837, + "learning_rate": 2.2940437178286532e-06, + "loss": 0.9081, "step": 20105 }, { - "epoch": 0.5697526141290488, + "epoch": 0.786681273965099, "grad_norm": 0.0, - "learning_rate": 8.23610832648403e-06, - "loss": 0.9477, + "learning_rate": 2.2932361424087035e-06, + "loss": 0.9415, "step": 20106 }, { - "epoch": 0.5697809515713111, + "epoch": 0.7867204006573284, "grad_norm": 0.0, - "learning_rate": 8.235204935119223e-06, - "loss": 0.8574, + "learning_rate": 2.2924286907497207e-06, + "loss": 0.9471, "step": 20107 }, { - "epoch": 0.5698092890135736, + "epoch": 0.7867595273495579, "grad_norm": 0.0, - "learning_rate": 8.234301558619948e-06, - "loss": 0.7752, + "learning_rate": 2.2916213628646754e-06, + "loss": 0.9, "step": 20108 }, { - "epoch": 0.5698376264558361, + "epoch": 0.7867986540417873, "grad_norm": 0.0, - "learning_rate": 8.2333981969938e-06, - "loss": 0.8258, + "learning_rate": 2.290814158766531e-06, + "loss": 1.0547, "step": 20109 }, { - "epoch": 0.5698659638980985, + "epoch": 0.7868377807340168, "grad_norm": 0.0, - "learning_rate": 8.232494850248398e-06, - "loss": 0.9484, + "learning_rate": 2.2900070784682548e-06, + "loss": 0.9367, "step": 20110 }, { - "epoch": 0.569894301340361, + "epoch": 0.7868769074262462, "grad_norm": 0.0, - "learning_rate": 8.231591518391345e-06, - "loss": 0.9123, + "learning_rate": 2.289200121982801e-06, + "loss": 1.0704, "step": 20111 }, { - "epoch": 0.5699226387826235, + "epoch": 0.7869160341184757, "grad_norm": 0.0, - "learning_rate": 8.230688201430253e-06, - "loss": 0.8665, + "learning_rate": 2.288393289323133e-06, + "loss": 1.008, "step": 20112 }, { - "epoch": 0.569950976224886, + "epoch": 0.786955160810705, "grad_norm": 0.0, - "learning_rate": 8.229784899372734e-06, - "loss": 0.7996, + "learning_rate": 2.2875865805022037e-06, + "loss": 1.0015, "step": 20113 }, { - "epoch": 0.5699793136671484, + "epoch": 0.7869942875029345, "grad_norm": 0.0, - "learning_rate": 8.228881612226391e-06, - "loss": 0.95, + "learning_rate": 2.2867799955329695e-06, + "loss": 1.0483, "step": 20114 }, { - "epoch": 0.5700076511094109, + "epoch": 0.7870334141951639, "grad_norm": 0.0, - "learning_rate": 8.227978339998838e-06, - "loss": 0.9644, + "learning_rate": 2.285973534428383e-06, + "loss": 0.9852, "step": 20115 }, { - "epoch": 0.5700359885516734, + "epoch": 0.7870725408873934, "grad_norm": 0.0, - "learning_rate": 8.227075082697685e-06, - "loss": 0.8384, + "learning_rate": 2.285167197201398e-06, + "loss": 0.9857, "step": 20116 }, { - "epoch": 0.5700643259939357, + "epoch": 0.7871116675796228, "grad_norm": 0.0, - "learning_rate": 8.226171840330531e-06, - "loss": 0.802, + "learning_rate": 2.2843609838649584e-06, + "loss": 1.148, "step": 20117 }, { - "epoch": 0.5700926634361982, + "epoch": 0.7871507942718523, "grad_norm": 0.0, - "learning_rate": 8.225268612904994e-06, - "loss": 0.7895, + "learning_rate": 2.2835548944320127e-06, + "loss": 0.852, "step": 20118 }, { - "epoch": 0.5701210008784607, + "epoch": 0.7871899209640817, "grad_norm": 0.0, - "learning_rate": 8.224365400428676e-06, - "loss": 0.8523, + "learning_rate": 2.2827489289155056e-06, + "loss": 0.9014, "step": 20119 }, { - "epoch": 0.5701493383207231, + "epoch": 0.7872290476563112, "grad_norm": 0.0, - "learning_rate": 8.223462202909187e-06, - "loss": 0.8878, + "learning_rate": 2.2819430873283843e-06, + "loss": 0.9106, "step": 20120 }, { - "epoch": 0.5701776757629856, + "epoch": 0.7872681743485406, "grad_norm": 0.0, - "learning_rate": 8.22255902035414e-06, - "loss": 0.9877, + "learning_rate": 2.2811373696835827e-06, + "loss": 0.9592, "step": 20121 }, { - "epoch": 0.5702060132052481, + "epoch": 0.78730730104077, "grad_norm": 0.0, - "learning_rate": 8.221655852771134e-06, - "loss": 0.8687, + "learning_rate": 2.280331775994046e-06, + "loss": 0.9735, "step": 20122 }, { - "epoch": 0.5702343506475106, + "epoch": 0.7873464277329995, "grad_norm": 0.0, - "learning_rate": 8.220752700167786e-06, - "loss": 0.8983, + "learning_rate": 2.2795263062727013e-06, + "loss": 0.9249, "step": 20123 }, { - "epoch": 0.570262688089773, + "epoch": 0.7873855544252288, "grad_norm": 0.0, - "learning_rate": 8.219849562551695e-06, - "loss": 0.8181, + "learning_rate": 2.2787209605324954e-06, + "loss": 0.9327, "step": 20124 }, { - "epoch": 0.5702910255320355, + "epoch": 0.7874246811174583, "grad_norm": 0.0, - "learning_rate": 8.21894643993047e-06, - "loss": 0.8033, + "learning_rate": 2.2779157387863527e-06, + "loss": 0.9731, "step": 20125 }, { - "epoch": 0.570319362974298, + "epoch": 0.7874638078096877, "grad_norm": 0.0, - "learning_rate": 8.218043332311724e-06, - "loss": 0.882, + "learning_rate": 2.27711064104721e-06, + "loss": 0.8684, "step": 20126 }, { - "epoch": 0.5703477004165604, + "epoch": 0.7875029345019172, "grad_norm": 0.0, - "learning_rate": 8.217140239703059e-06, - "loss": 0.7478, + "learning_rate": 2.2763056673279873e-06, + "loss": 1.0411, "step": 20127 }, { - "epoch": 0.5703760378588228, + "epoch": 0.7875420611941466, "grad_norm": 0.0, - "learning_rate": 8.216237162112085e-06, - "loss": 0.8309, + "learning_rate": 2.2755008176416236e-06, + "loss": 0.9843, "step": 20128 }, { - "epoch": 0.5704043753010853, + "epoch": 0.7875811878863761, "grad_norm": 0.0, - "learning_rate": 8.215334099546411e-06, - "loss": 0.8864, + "learning_rate": 2.274696092001034e-06, + "loss": 1.0374, "step": 20129 }, { - "epoch": 0.5704327127433478, + "epoch": 0.7876203145786055, "grad_norm": 0.0, - "learning_rate": 8.214431052013636e-06, - "loss": 0.8551, + "learning_rate": 2.2738914904191477e-06, + "loss": 0.8611, "step": 20130 }, { - "epoch": 0.5704610501856102, + "epoch": 0.787659441270835, "grad_norm": 0.0, - "learning_rate": 8.213528019521372e-06, - "loss": 0.8267, + "learning_rate": 2.2730870129088767e-06, + "loss": 1.0607, "step": 20131 }, { - "epoch": 0.5704893876278727, + "epoch": 0.7876985679630644, "grad_norm": 0.0, - "learning_rate": 8.212625002077223e-06, - "loss": 0.8381, + "learning_rate": 2.2722826594831515e-06, + "loss": 1.0618, "step": 20132 }, { - "epoch": 0.5705177250701352, + "epoch": 0.7877376946552939, "grad_norm": 0.0, - "learning_rate": 8.2117219996888e-06, - "loss": 0.7677, + "learning_rate": 2.271478430154881e-06, + "loss": 0.9294, "step": 20133 }, { - "epoch": 0.5705460625123976, + "epoch": 0.7877768213475232, "grad_norm": 0.0, - "learning_rate": 8.21081901236371e-06, - "loss": 0.8886, + "learning_rate": 2.2706743249369855e-06, + "loss": 0.9501, "step": 20134 }, { - "epoch": 0.5705743999546601, + "epoch": 0.7878159480397527, "grad_norm": 0.0, - "learning_rate": 8.20991604010955e-06, - "loss": 0.8383, + "learning_rate": 2.269870343842372e-06, + "loss": 1.0004, "step": 20135 }, { - "epoch": 0.5706027373969226, + "epoch": 0.7878550747319821, "grad_norm": 0.0, - "learning_rate": 8.209013082933932e-06, - "loss": 0.8495, + "learning_rate": 2.269066486883954e-06, + "loss": 0.8585, "step": 20136 }, { - "epoch": 0.570631074839185, + "epoch": 0.7878942014242116, "grad_norm": 0.0, - "learning_rate": 8.208110140844467e-06, - "loss": 0.8083, + "learning_rate": 2.2682627540746415e-06, + "loss": 0.8376, "step": 20137 }, { - "epoch": 0.5706594122814475, + "epoch": 0.787933328116441, "grad_norm": 0.0, - "learning_rate": 8.20720721384875e-06, - "loss": 0.8174, + "learning_rate": 2.267459145427343e-06, + "loss": 0.8859, "step": 20138 }, { - "epoch": 0.57068774972371, + "epoch": 0.7879724548086705, "grad_norm": 0.0, - "learning_rate": 8.206304301954397e-06, - "loss": 0.7749, + "learning_rate": 2.2666556609549597e-06, + "loss": 1.0775, "step": 20139 }, { - "epoch": 0.5707160871659724, + "epoch": 0.7880115815008999, "grad_norm": 0.0, - "learning_rate": 8.205401405169007e-06, - "loss": 0.8742, + "learning_rate": 2.2658523006703948e-06, + "loss": 0.8753, "step": 20140 }, { - "epoch": 0.5707444246082348, + "epoch": 0.7880507081931294, "grad_norm": 0.0, - "learning_rate": 8.204498523500185e-06, - "loss": 0.9115, + "learning_rate": 2.265049064586551e-06, + "loss": 0.8994, "step": 20141 }, { - "epoch": 0.5707727620504973, + "epoch": 0.7880898348853588, "grad_norm": 0.0, - "learning_rate": 8.203595656955546e-06, - "loss": 0.9522, + "learning_rate": 2.2642459527163287e-06, + "loss": 1.0865, "step": 20142 }, { - "epoch": 0.5708010994927598, + "epoch": 0.7881289615775883, "grad_norm": 0.0, - "learning_rate": 8.20269280554268e-06, - "loss": 0.9793, + "learning_rate": 2.263442965072621e-06, + "loss": 0.9994, "step": 20143 }, { - "epoch": 0.5708294369350222, + "epoch": 0.7881680882698177, "grad_norm": 0.0, - "learning_rate": 8.201789969269202e-06, - "loss": 0.9137, + "learning_rate": 2.2626401016683275e-06, + "loss": 1.0322, "step": 20144 }, { - "epoch": 0.5708577743772847, + "epoch": 0.7882072149620472, "grad_norm": 0.0, - "learning_rate": 8.200887148142716e-06, - "loss": 0.9247, + "learning_rate": 2.2618373625163347e-06, + "loss": 0.8134, "step": 20145 }, { - "epoch": 0.5708861118195472, + "epoch": 0.7882463416542765, "grad_norm": 0.0, - "learning_rate": 8.199984342170823e-06, - "loss": 0.8739, + "learning_rate": 2.261034747629539e-06, + "loss": 0.8657, "step": 20146 }, { - "epoch": 0.5709144492618097, + "epoch": 0.788285468346506, "grad_norm": 0.0, - "learning_rate": 8.199081551361137e-06, - "loss": 0.7416, + "learning_rate": 2.260232257020827e-06, + "loss": 0.9724, "step": 20147 }, { - "epoch": 0.5709427867040721, + "epoch": 0.7883245950387354, "grad_norm": 0.0, - "learning_rate": 8.198178775721249e-06, - "loss": 0.8636, + "learning_rate": 2.25942989070309e-06, + "loss": 1.0123, "step": 20148 }, { - "epoch": 0.5709711241463346, + "epoch": 0.7883637217309649, "grad_norm": 0.0, - "learning_rate": 8.197276015258773e-06, - "loss": 0.79, + "learning_rate": 2.2586276486892055e-06, + "loss": 0.9657, "step": 20149 }, { - "epoch": 0.570999461588597, + "epoch": 0.7884028484231943, "grad_norm": 0.0, - "learning_rate": 8.196373269981311e-06, - "loss": 0.8669, + "learning_rate": 2.2578255309920605e-06, + "loss": 0.9675, "step": 20150 }, { - "epoch": 0.5710277990308594, + "epoch": 0.7884419751154237, "grad_norm": 0.0, - "learning_rate": 8.195470539896464e-06, - "loss": 0.8178, + "learning_rate": 2.2570235376245397e-06, + "loss": 1.0171, "step": 20151 }, { - "epoch": 0.5710561364731219, + "epoch": 0.7884811018076532, "grad_norm": 0.0, - "learning_rate": 8.194567825011843e-06, - "loss": 0.832, + "learning_rate": 2.2562216685995154e-06, + "loss": 0.9557, "step": 20152 }, { - "epoch": 0.5710844739153844, + "epoch": 0.7885202284998826, "grad_norm": 0.0, - "learning_rate": 8.193665125335043e-06, - "loss": 0.9144, + "learning_rate": 2.255419923929867e-06, + "loss": 1.0297, "step": 20153 }, { - "epoch": 0.5711128113576469, + "epoch": 0.7885593551921121, "grad_norm": 0.0, - "learning_rate": 8.192762440873675e-06, - "loss": 0.8036, + "learning_rate": 2.254618303628471e-06, + "loss": 0.9458, "step": 20154 }, { - "epoch": 0.5711411487999093, + "epoch": 0.7885984818843415, "grad_norm": 0.0, - "learning_rate": 8.191859771635343e-06, - "loss": 0.9486, + "learning_rate": 2.2538168077082013e-06, + "loss": 1.0275, "step": 20155 }, { - "epoch": 0.5711694862421718, + "epoch": 0.788637608576571, "grad_norm": 0.0, - "learning_rate": 8.190957117627642e-06, - "loss": 0.9444, + "learning_rate": 2.2530154361819257e-06, + "loss": 0.9971, "step": 20156 }, { - "epoch": 0.5711978236844343, + "epoch": 0.7886767352688003, "grad_norm": 0.0, - "learning_rate": 8.190054478858183e-06, - "loss": 0.9266, + "learning_rate": 2.252214189062516e-06, + "loss": 0.8773, "step": 20157 }, { - "epoch": 0.5712261611266967, + "epoch": 0.7887158619610298, "grad_norm": 0.0, - "learning_rate": 8.189151855334569e-06, - "loss": 0.8922, + "learning_rate": 2.2514130663628333e-06, + "loss": 1.043, "step": 20158 }, { - "epoch": 0.5712544985689592, + "epoch": 0.7887549886532592, "grad_norm": 0.0, - "learning_rate": 8.188249247064398e-06, - "loss": 0.7789, + "learning_rate": 2.2506120680957522e-06, + "loss": 0.9444, "step": 20159 }, { - "epoch": 0.5712828360112217, + "epoch": 0.7887941153454887, "grad_norm": 0.0, - "learning_rate": 8.187346654055282e-06, - "loss": 0.9788, + "learning_rate": 2.2498111942741284e-06, + "loss": 0.9217, "step": 20160 }, { - "epoch": 0.571311173453484, + "epoch": 0.7888332420377181, "grad_norm": 0.0, - "learning_rate": 8.186444076314813e-06, - "loss": 0.8893, + "learning_rate": 2.2490104449108287e-06, + "loss": 0.9107, "step": 20161 }, { - "epoch": 0.5713395108957465, + "epoch": 0.7888723687299476, "grad_norm": 0.0, - "learning_rate": 8.1855415138506e-06, - "loss": 0.8319, + "learning_rate": 2.2482098200187054e-06, + "loss": 0.9727, "step": 20162 }, { - "epoch": 0.571367848338009, + "epoch": 0.788911495422177, "grad_norm": 0.0, - "learning_rate": 8.184638966670246e-06, - "loss": 0.8682, + "learning_rate": 2.2474093196106183e-06, + "loss": 0.9089, "step": 20163 }, { - "epoch": 0.5713961857802715, + "epoch": 0.7889506221144065, "grad_norm": 0.0, - "learning_rate": 8.183736434781349e-06, - "loss": 0.8297, + "learning_rate": 2.2466089436994232e-06, + "loss": 0.9526, "step": 20164 }, { - "epoch": 0.5714245232225339, + "epoch": 0.7889897488066359, "grad_norm": 0.0, - "learning_rate": 8.182833918191515e-06, - "loss": 0.8545, + "learning_rate": 2.245808692297976e-06, + "loss": 0.8217, "step": 20165 }, { - "epoch": 0.5714528606647964, + "epoch": 0.7890288754988654, "grad_norm": 0.0, - "learning_rate": 8.181931416908351e-06, - "loss": 0.8609, + "learning_rate": 2.245008565419121e-06, + "loss": 1.0745, "step": 20166 }, { - "epoch": 0.5714811981070589, + "epoch": 0.7890680021910947, "grad_norm": 0.0, - "learning_rate": 8.181028930939448e-06, - "loss": 0.9221, + "learning_rate": 2.2442085630757128e-06, + "loss": 1.0242, "step": 20167 }, { - "epoch": 0.5715095355493213, + "epoch": 0.7891071288833242, "grad_norm": 0.0, - "learning_rate": 8.18012646029242e-06, - "loss": 0.9209, + "learning_rate": 2.243408685280595e-06, + "loss": 1.0576, "step": 20168 }, { - "epoch": 0.5715378729915838, + "epoch": 0.7891462555755536, "grad_norm": 0.0, - "learning_rate": 8.179224004974857e-06, - "loss": 0.9679, + "learning_rate": 2.242608932046617e-06, + "loss": 0.9861, "step": 20169 }, { - "epoch": 0.5715662104338463, + "epoch": 0.7891853822677831, "grad_norm": 0.0, - "learning_rate": 8.178321564994368e-06, - "loss": 0.86, + "learning_rate": 2.241809303386616e-06, + "loss": 0.9436, "step": 20170 }, { - "epoch": 0.5715945478761087, + "epoch": 0.7892245089600125, "grad_norm": 0.0, - "learning_rate": 8.177419140358553e-06, - "loss": 0.8364, + "learning_rate": 2.2410097993134393e-06, + "loss": 0.804, "step": 20171 }, { - "epoch": 0.5716228853183711, + "epoch": 0.789263635652242, "grad_norm": 0.0, - "learning_rate": 8.176516731075012e-06, - "loss": 0.9361, + "learning_rate": 2.240210419839918e-06, + "loss": 1.0034, "step": 20172 }, { - "epoch": 0.5716512227606336, + "epoch": 0.7893027623444714, "grad_norm": 0.0, - "learning_rate": 8.175614337151348e-06, - "loss": 0.8323, + "learning_rate": 2.2394111649788986e-06, + "loss": 1.0085, "step": 20173 }, { - "epoch": 0.5716795602028961, + "epoch": 0.7893418890367009, "grad_norm": 0.0, - "learning_rate": 8.174711958595165e-06, - "loss": 1.0352, + "learning_rate": 2.238612034743208e-06, + "loss": 0.9435, "step": 20174 }, { - "epoch": 0.5717078976451585, + "epoch": 0.7893810157289303, "grad_norm": 0.0, - "learning_rate": 8.173809595414057e-06, - "loss": 0.8885, + "learning_rate": 2.2378130291456856e-06, + "loss": 1.0454, "step": 20175 }, { - "epoch": 0.571736235087421, + "epoch": 0.7894201424211598, "grad_norm": 0.0, - "learning_rate": 8.172907247615632e-06, - "loss": 0.9755, + "learning_rate": 2.237014148199155e-06, + "loss": 0.919, "step": 20176 }, { - "epoch": 0.5717645725296835, + "epoch": 0.7894592691133892, "grad_norm": 0.0, - "learning_rate": 8.172004915207485e-06, - "loss": 0.7521, + "learning_rate": 2.236215391916453e-06, + "loss": 1.0288, "step": 20177 }, { - "epoch": 0.571792909971946, + "epoch": 0.7894983958056186, "grad_norm": 0.0, - "learning_rate": 8.17110259819722e-06, - "loss": 0.8837, + "learning_rate": 2.2354167603104016e-06, + "loss": 1.0086, "step": 20178 }, { - "epoch": 0.5718212474142084, + "epoch": 0.789537522497848, "grad_norm": 0.0, - "learning_rate": 8.17020029659244e-06, - "loss": 0.8986, + "learning_rate": 2.234618253393831e-06, + "loss": 0.9257, "step": 20179 }, { - "epoch": 0.5718495848564709, + "epoch": 0.7895766491900774, "grad_norm": 0.0, - "learning_rate": 8.169298010400739e-06, - "loss": 0.8721, + "learning_rate": 2.2338198711795543e-06, + "loss": 0.9652, "step": 20180 }, { - "epoch": 0.5718779222987334, + "epoch": 0.7896157758823069, "grad_norm": 0.0, - "learning_rate": 8.168395739629726e-06, - "loss": 0.9695, + "learning_rate": 2.2330216136804038e-06, + "loss": 1.06, "step": 20181 }, { - "epoch": 0.5719062597409957, + "epoch": 0.7896549025745363, "grad_norm": 0.0, - "learning_rate": 8.16749348428699e-06, - "loss": 0.8073, + "learning_rate": 2.2322234809091925e-06, + "loss": 0.9791, "step": 20182 }, { - "epoch": 0.5719345971832582, + "epoch": 0.7896940292667658, "grad_norm": 0.0, - "learning_rate": 8.166591244380138e-06, - "loss": 0.9124, + "learning_rate": 2.23142547287874e-06, + "loss": 1.0022, "step": 20183 }, { - "epoch": 0.5719629346255207, + "epoch": 0.7897331559589952, "grad_norm": 0.0, - "learning_rate": 8.165689019916769e-06, - "loss": 0.8987, + "learning_rate": 2.2306275896018583e-06, + "loss": 0.8983, "step": 20184 }, { - "epoch": 0.5719912720677831, + "epoch": 0.7897722826512247, "grad_norm": 0.0, - "learning_rate": 8.164786810904482e-06, - "loss": 0.8755, + "learning_rate": 2.2298298310913603e-06, + "loss": 0.9224, "step": 20185 }, { - "epoch": 0.5720196095100456, + "epoch": 0.7898114093434541, "grad_norm": 0.0, - "learning_rate": 8.163884617350876e-06, - "loss": 0.8799, + "learning_rate": 2.2290321973600593e-06, + "loss": 0.9023, "step": 20186 }, { - "epoch": 0.5720479469523081, + "epoch": 0.7898505360356836, "grad_norm": 0.0, - "learning_rate": 8.162982439263558e-06, - "loss": 0.7938, + "learning_rate": 2.228234688420767e-06, + "loss": 0.9476, "step": 20187 }, { - "epoch": 0.5720762843945706, + "epoch": 0.789889662727913, "grad_norm": 0.0, - "learning_rate": 8.162080276650115e-06, - "loss": 0.8867, + "learning_rate": 2.227437304286284e-06, + "loss": 0.9895, "step": 20188 }, { - "epoch": 0.572104621836833, + "epoch": 0.7899287894201424, "grad_norm": 0.0, - "learning_rate": 8.161178129518155e-06, - "loss": 0.7921, + "learning_rate": 2.226640044969418e-06, + "loss": 0.9139, "step": 20189 }, { - "epoch": 0.5721329592790955, + "epoch": 0.7899679161123718, "grad_norm": 0.0, - "learning_rate": 8.160275997875272e-06, - "loss": 0.91, + "learning_rate": 2.2258429104829747e-06, + "loss": 0.9902, "step": 20190 }, { - "epoch": 0.572161296721358, + "epoch": 0.7900070428046013, "grad_norm": 0.0, - "learning_rate": 8.159373881729068e-06, - "loss": 0.827, + "learning_rate": 2.2250459008397505e-06, + "loss": 1.0789, "step": 20191 }, { - "epoch": 0.5721896341636203, + "epoch": 0.7900461694968307, "grad_norm": 0.0, - "learning_rate": 8.158471781087145e-06, - "loss": 1.0001, + "learning_rate": 2.2242490160525467e-06, + "loss": 0.9473, "step": 20192 }, { - "epoch": 0.5722179716058828, + "epoch": 0.7900852961890602, "grad_norm": 0.0, - "learning_rate": 8.157569695957094e-06, - "loss": 0.8127, + "learning_rate": 2.2234522561341632e-06, + "loss": 0.8991, "step": 20193 }, { - "epoch": 0.5722463090481453, + "epoch": 0.7901244228812896, "grad_norm": 0.0, - "learning_rate": 8.156667626346518e-06, - "loss": 0.7936, + "learning_rate": 2.2226556210973882e-06, + "loss": 1.0328, "step": 20194 }, { - "epoch": 0.5722746464904078, + "epoch": 0.7901635495735191, "grad_norm": 0.0, - "learning_rate": 8.15576557226302e-06, - "loss": 1.0005, + "learning_rate": 2.221859110955019e-06, + "loss": 0.8693, "step": 20195 }, { - "epoch": 0.5723029839326702, + "epoch": 0.7902026762657485, "grad_norm": 0.0, - "learning_rate": 8.154863533714189e-06, - "loss": 0.9027, + "learning_rate": 2.2210627257198493e-06, + "loss": 0.9375, "step": 20196 }, { - "epoch": 0.5723313213749327, + "epoch": 0.790241802957978, "grad_norm": 0.0, - "learning_rate": 8.153961510707628e-06, - "loss": 0.8671, + "learning_rate": 2.22026646540466e-06, + "loss": 0.9202, "step": 20197 }, { - "epoch": 0.5723596588171952, + "epoch": 0.7902809296502074, "grad_norm": 0.0, - "learning_rate": 8.153059503250934e-06, - "loss": 0.9321, + "learning_rate": 2.219470330022244e-06, + "loss": 0.9979, "step": 20198 }, { - "epoch": 0.5723879962594576, + "epoch": 0.7903200563424368, "grad_norm": 0.0, - "learning_rate": 8.152157511351704e-06, - "loss": 0.9756, + "learning_rate": 2.2186743195853855e-06, + "loss": 0.9371, "step": 20199 }, { - "epoch": 0.5724163337017201, + "epoch": 0.7903591830346662, "grad_norm": 0.0, - "learning_rate": 8.151255535017544e-06, - "loss": 0.9598, + "learning_rate": 2.2178784341068683e-06, + "loss": 1.0999, "step": 20200 }, { - "epoch": 0.5724446711439826, + "epoch": 0.7903983097268957, "grad_norm": 0.0, - "learning_rate": 8.15035357425604e-06, - "loss": 0.8513, + "learning_rate": 2.21708267359947e-06, + "loss": 0.976, "step": 20201 }, { - "epoch": 0.5724730085862451, + "epoch": 0.7904374364191251, "grad_norm": 0.0, - "learning_rate": 8.149451629074793e-06, - "loss": 0.8624, + "learning_rate": 2.216287038075974e-06, + "loss": 0.9705, "step": 20202 }, { - "epoch": 0.5725013460285074, + "epoch": 0.7904765631113546, "grad_norm": 0.0, - "learning_rate": 8.148549699481406e-06, - "loss": 0.8591, + "learning_rate": 2.2154915275491493e-06, + "loss": 1.0195, "step": 20203 }, { - "epoch": 0.5725296834707699, + "epoch": 0.790515689803584, "grad_norm": 0.0, - "learning_rate": 8.147647785483471e-06, - "loss": 0.9716, + "learning_rate": 2.2146961420317815e-06, + "loss": 1.0897, "step": 20204 }, { - "epoch": 0.5725580209130324, + "epoch": 0.7905548164958135, "grad_norm": 0.0, - "learning_rate": 8.146745887088589e-06, - "loss": 0.9685, + "learning_rate": 2.2139008815366346e-06, + "loss": 0.8438, "step": 20205 }, { - "epoch": 0.5725863583552948, + "epoch": 0.7905939431880429, "grad_norm": 0.0, - "learning_rate": 8.145844004304352e-06, - "loss": 0.9526, + "learning_rate": 2.213105746076487e-06, + "loss": 1.0326, "step": 20206 }, { - "epoch": 0.5726146957975573, + "epoch": 0.7906330698802724, "grad_norm": 0.0, - "learning_rate": 8.144942137138358e-06, - "loss": 0.7905, + "learning_rate": 2.212310735664097e-06, + "loss": 1.0074, "step": 20207 }, { - "epoch": 0.5726430332398198, + "epoch": 0.7906721965725018, "grad_norm": 0.0, - "learning_rate": 8.14404028559821e-06, - "loss": 0.9262, + "learning_rate": 2.2115158503122447e-06, + "loss": 0.8892, "step": 20208 }, { - "epoch": 0.5726713706820822, + "epoch": 0.7907113232647311, "grad_norm": 0.0, - "learning_rate": 8.143138449691495e-06, - "loss": 0.9029, + "learning_rate": 2.210721090033685e-06, + "loss": 0.912, "step": 20209 }, { - "epoch": 0.5726997081243447, + "epoch": 0.7907504499569606, "grad_norm": 0.0, - "learning_rate": 8.142236629425817e-06, - "loss": 0.8929, + "learning_rate": 2.209926454841187e-06, + "loss": 1.017, "step": 20210 }, { - "epoch": 0.5727280455666072, + "epoch": 0.79078957664919, "grad_norm": 0.0, - "learning_rate": 8.141334824808769e-06, - "loss": 0.8019, + "learning_rate": 2.2091319447475057e-06, + "loss": 0.8323, "step": 20211 }, { - "epoch": 0.5727563830088697, + "epoch": 0.7908287033414195, "grad_norm": 0.0, - "learning_rate": 8.140433035847947e-06, - "loss": 0.8914, + "learning_rate": 2.208337559765403e-06, + "loss": 0.9676, "step": 20212 }, { - "epoch": 0.572784720451132, + "epoch": 0.7908678300336489, "grad_norm": 0.0, - "learning_rate": 8.139531262550952e-06, - "loss": 0.8634, + "learning_rate": 2.2075432999076353e-06, + "loss": 0.9471, "step": 20213 }, { - "epoch": 0.5728130578933945, + "epoch": 0.7909069567258784, "grad_norm": 0.0, - "learning_rate": 8.138629504925372e-06, - "loss": 0.9673, + "learning_rate": 2.206749165186961e-06, + "loss": 0.9734, "step": 20214 }, { - "epoch": 0.572841395335657, + "epoch": 0.7909460834181078, "grad_norm": 0.0, - "learning_rate": 8.137727762978807e-06, - "loss": 0.8146, + "learning_rate": 2.2059551556161265e-06, + "loss": 1.0102, "step": 20215 }, { - "epoch": 0.5728697327779194, + "epoch": 0.7909852101103373, "grad_norm": 0.0, - "learning_rate": 8.136826036718854e-06, - "loss": 0.9119, + "learning_rate": 2.205161271207885e-06, + "loss": 0.9564, "step": 20216 }, { - "epoch": 0.5728980702201819, + "epoch": 0.7910243368025667, "grad_norm": 0.0, - "learning_rate": 8.135924326153106e-06, - "loss": 0.9136, + "learning_rate": 2.2043675119749874e-06, + "loss": 0.993, "step": 20217 }, { - "epoch": 0.5729264076624444, + "epoch": 0.7910634634947962, "grad_norm": 0.0, - "learning_rate": 8.135022631289164e-06, - "loss": 0.8894, + "learning_rate": 2.2035738779301806e-06, + "loss": 1.0099, "step": 20218 }, { - "epoch": 0.5729547451047069, + "epoch": 0.7911025901870256, "grad_norm": 0.0, - "learning_rate": 8.134120952134613e-06, - "loss": 0.8891, + "learning_rate": 2.202780369086206e-06, + "loss": 0.8793, "step": 20219 }, { - "epoch": 0.5729830825469693, + "epoch": 0.791141716879255, "grad_norm": 0.0, - "learning_rate": 8.133219288697056e-06, - "loss": 0.9371, + "learning_rate": 2.201986985455811e-06, + "loss": 1.0259, "step": 20220 }, { - "epoch": 0.5730114199892318, + "epoch": 0.7911808435714844, "grad_norm": 0.0, - "learning_rate": 8.132317640984088e-06, - "loss": 0.9378, + "learning_rate": 2.201193727051727e-06, + "loss": 0.9049, "step": 20221 }, { - "epoch": 0.5730397574314943, + "epoch": 0.7912199702637139, "grad_norm": 0.0, - "learning_rate": 8.131416009003301e-06, - "loss": 0.8827, + "learning_rate": 2.2004005938867055e-06, + "loss": 0.8919, "step": 20222 }, { - "epoch": 0.5730680948737567, + "epoch": 0.7912590969559433, "grad_norm": 0.0, - "learning_rate": 8.130514392762289e-06, - "loss": 0.8846, + "learning_rate": 2.1996075859734746e-06, + "loss": 0.94, "step": 20223 }, { - "epoch": 0.5730964323160191, + "epoch": 0.7912982236481728, "grad_norm": 0.0, - "learning_rate": 8.12961279226865e-06, - "loss": 0.9551, + "learning_rate": 2.198814703324774e-06, + "loss": 0.9367, "step": 20224 }, { - "epoch": 0.5731247697582816, + "epoch": 0.7913373503404022, "grad_norm": 0.0, - "learning_rate": 8.128711207529976e-06, - "loss": 0.8836, + "learning_rate": 2.1980219459533282e-06, + "loss": 0.9332, "step": 20225 }, { - "epoch": 0.5731531072005441, + "epoch": 0.7913764770326317, "grad_norm": 0.0, - "learning_rate": 8.127809638553868e-06, - "loss": 0.911, + "learning_rate": 2.197229313871878e-06, + "loss": 1.0041, "step": 20226 }, { - "epoch": 0.5731814446428065, + "epoch": 0.7914156037248611, "grad_norm": 0.0, - "learning_rate": 8.126908085347907e-06, - "loss": 0.9218, + "learning_rate": 2.196436807093146e-06, + "loss": 0.9692, "step": 20227 }, { - "epoch": 0.573209782085069, + "epoch": 0.7914547304170906, "grad_norm": 0.0, - "learning_rate": 8.126006547919697e-06, - "loss": 0.9387, + "learning_rate": 2.1956444256298638e-06, + "loss": 0.988, "step": 20228 }, { - "epoch": 0.5732381195273315, + "epoch": 0.79149385710932, "grad_norm": 0.0, - "learning_rate": 8.125105026276832e-06, - "loss": 0.9098, + "learning_rate": 2.194852169494749e-06, + "loss": 1.0904, "step": 20229 }, { - "epoch": 0.5732664569695939, + "epoch": 0.7915329838015495, "grad_norm": 0.0, - "learning_rate": 8.1242035204269e-06, - "loss": 0.9479, + "learning_rate": 2.1940600387005284e-06, + "loss": 0.9311, "step": 20230 }, { - "epoch": 0.5732947944118564, + "epoch": 0.7915721104937788, "grad_norm": 0.0, - "learning_rate": 8.1233020303775e-06, - "loss": 0.8884, + "learning_rate": 2.193268033259921e-06, + "loss": 0.9083, "step": 20231 }, { - "epoch": 0.5733231318541189, + "epoch": 0.7916112371860083, "grad_norm": 0.0, - "learning_rate": 8.122400556136226e-06, - "loss": 0.8202, + "learning_rate": 2.192476153185651e-06, + "loss": 1.0446, "step": 20232 }, { - "epoch": 0.5733514692963813, + "epoch": 0.7916503638782377, "grad_norm": 0.0, - "learning_rate": 8.121499097710667e-06, - "loss": 0.8336, + "learning_rate": 2.1916843984904266e-06, + "loss": 1.0251, "step": 20233 }, { - "epoch": 0.5733798067386437, + "epoch": 0.7916894905704672, "grad_norm": 0.0, - "learning_rate": 8.120597655108422e-06, - "loss": 0.9592, + "learning_rate": 2.1908927691869673e-06, + "loss": 0.8955, "step": 20234 }, { - "epoch": 0.5734081441809062, + "epoch": 0.7917286172626966, "grad_norm": 0.0, - "learning_rate": 8.119696228337077e-06, - "loss": 0.7797, + "learning_rate": 2.1901012652879872e-06, + "loss": 0.9214, "step": 20235 }, { - "epoch": 0.5734364816231687, + "epoch": 0.791767743954926, "grad_norm": 0.0, - "learning_rate": 8.118794817404229e-06, - "loss": 0.8797, + "learning_rate": 2.189309886806191e-06, + "loss": 0.9086, "step": 20236 }, { - "epoch": 0.5734648190654311, + "epoch": 0.7918068706471555, "grad_norm": 0.0, - "learning_rate": 8.117893422317473e-06, - "loss": 0.8319, + "learning_rate": 2.1885186337542918e-06, + "loss": 0.9449, "step": 20237 }, { - "epoch": 0.5734931565076936, + "epoch": 0.7918459973393849, "grad_norm": 0.0, - "learning_rate": 8.116992043084397e-06, - "loss": 0.9018, + "learning_rate": 2.187727506144994e-06, + "loss": 1.0573, "step": 20238 }, { - "epoch": 0.5735214939499561, + "epoch": 0.7918851240316144, "grad_norm": 0.0, - "learning_rate": 8.116090679712601e-06, - "loss": 0.9724, + "learning_rate": 2.1869365039910075e-06, + "loss": 0.9336, "step": 20239 }, { - "epoch": 0.5735498313922185, + "epoch": 0.7919242507238438, "grad_norm": 0.0, - "learning_rate": 8.115189332209667e-06, - "loss": 0.8044, + "learning_rate": 2.1861456273050265e-06, + "loss": 0.9555, "step": 20240 }, { - "epoch": 0.573578168834481, + "epoch": 0.7919633774160733, "grad_norm": 0.0, - "learning_rate": 8.114288000583194e-06, - "loss": 0.9366, + "learning_rate": 2.1853548760997577e-06, + "loss": 1.0214, "step": 20241 }, { - "epoch": 0.5736065062767435, + "epoch": 0.7920025041083026, "grad_norm": 0.0, - "learning_rate": 8.113386684840777e-06, - "loss": 0.8863, + "learning_rate": 2.184564250387895e-06, + "loss": 0.9408, "step": 20242 }, { - "epoch": 0.573634843719006, + "epoch": 0.7920416308005321, "grad_norm": 0.0, - "learning_rate": 8.112485384990001e-06, - "loss": 0.8732, + "learning_rate": 2.1837737501821375e-06, + "loss": 0.8964, "step": 20243 }, { - "epoch": 0.5736631811612684, + "epoch": 0.7920807574927615, "grad_norm": 0.0, - "learning_rate": 8.111584101038462e-06, - "loss": 0.8932, + "learning_rate": 2.182983375495179e-06, + "loss": 0.9443, "step": 20244 }, { - "epoch": 0.5736915186035308, + "epoch": 0.792119884184991, "grad_norm": 0.0, - "learning_rate": 8.110682832993757e-06, - "loss": 0.8979, + "learning_rate": 2.1821931263397156e-06, + "loss": 0.9177, "step": 20245 }, { - "epoch": 0.5737198560457933, + "epoch": 0.7921590108772204, "grad_norm": 0.0, - "learning_rate": 8.109781580863465e-06, - "loss": 0.8813, + "learning_rate": 2.1814030027284306e-06, + "loss": 1.0178, "step": 20246 }, { - "epoch": 0.5737481934880557, + "epoch": 0.7921981375694499, "grad_norm": 0.0, - "learning_rate": 8.10888034465519e-06, - "loss": 0.8199, + "learning_rate": 2.1806130046740172e-06, + "loss": 0.963, "step": 20247 }, { - "epoch": 0.5737765309303182, + "epoch": 0.7922372642616793, "grad_norm": 0.0, - "learning_rate": 8.107979124376516e-06, - "loss": 0.8289, + "learning_rate": 2.1798231321891593e-06, + "loss": 1.0021, "step": 20248 }, { - "epoch": 0.5738048683725807, + "epoch": 0.7922763909539088, "grad_norm": 0.0, - "learning_rate": 8.107077920035032e-06, - "loss": 0.9484, + "learning_rate": 2.1790333852865463e-06, + "loss": 0.9681, "step": 20249 }, { - "epoch": 0.5738332058148432, + "epoch": 0.7923155176461382, "grad_norm": 0.0, - "learning_rate": 8.106176731638338e-06, - "loss": 0.765, + "learning_rate": 2.1782437639788535e-06, + "loss": 0.9628, "step": 20250 }, { - "epoch": 0.5738615432571056, + "epoch": 0.7923546443383677, "grad_norm": 0.0, - "learning_rate": 8.10527555919402e-06, - "loss": 0.9308, + "learning_rate": 2.177454268278768e-06, + "loss": 0.9085, "step": 20251 }, { - "epoch": 0.5738898806993681, + "epoch": 0.792393771030597, "grad_norm": 0.0, - "learning_rate": 8.104374402709669e-06, - "loss": 0.8516, + "learning_rate": 2.176664898198959e-06, + "loss": 0.8659, "step": 20252 }, { - "epoch": 0.5739182181416306, + "epoch": 0.7924328977228265, "grad_norm": 0.0, - "learning_rate": 8.10347326219288e-06, - "loss": 0.9194, + "learning_rate": 2.1758756537521143e-06, + "loss": 1.0593, "step": 20253 }, { - "epoch": 0.573946555583893, + "epoch": 0.7924720244150559, "grad_norm": 0.0, - "learning_rate": 8.102572137651234e-06, - "loss": 0.8835, + "learning_rate": 2.1750865349508997e-06, + "loss": 1.0988, "step": 20254 }, { - "epoch": 0.5739748930261555, + "epoch": 0.7925111511072854, "grad_norm": 0.0, - "learning_rate": 8.101671029092332e-06, - "loss": 0.865, + "learning_rate": 2.1742975418079927e-06, + "loss": 0.9023, "step": 20255 }, { - "epoch": 0.574003230468418, + "epoch": 0.7925502777995148, "grad_norm": 0.0, - "learning_rate": 8.100769936523758e-06, - "loss": 0.8484, + "learning_rate": 2.1735086743360556e-06, + "loss": 0.9441, "step": 20256 }, { - "epoch": 0.5740315679106803, + "epoch": 0.7925894044917443, "grad_norm": 0.0, - "learning_rate": 8.099868859953101e-06, - "loss": 0.9257, + "learning_rate": 2.1727199325477676e-06, + "loss": 1.1052, "step": 20257 }, { - "epoch": 0.5740599053529428, + "epoch": 0.7926285311839737, "grad_norm": 0.0, - "learning_rate": 8.098967799387962e-06, - "loss": 0.8112, + "learning_rate": 2.1719313164557863e-06, + "loss": 1.093, "step": 20258 }, { - "epoch": 0.5740882427952053, + "epoch": 0.7926676578762032, "grad_norm": 0.0, - "learning_rate": 8.098066754835916e-06, - "loss": 0.8759, + "learning_rate": 2.171142826072783e-06, + "loss": 0.9159, "step": 20259 }, { - "epoch": 0.5741165802374678, + "epoch": 0.7927067845684326, "grad_norm": 0.0, - "learning_rate": 8.09716572630456e-06, - "loss": 0.7632, + "learning_rate": 2.1703544614114114e-06, + "loss": 1.0022, "step": 20260 }, { - "epoch": 0.5741449176797302, + "epoch": 0.7927459112606621, "grad_norm": 0.0, - "learning_rate": 8.096264713801489e-06, - "loss": 0.8262, + "learning_rate": 2.1695662224843373e-06, + "loss": 0.9252, "step": 20261 }, { - "epoch": 0.5741732551219927, + "epoch": 0.7927850379528915, "grad_norm": 0.0, - "learning_rate": 8.095363717334284e-06, - "loss": 0.8532, + "learning_rate": 2.168778109304217e-06, + "loss": 0.9748, "step": 20262 }, { - "epoch": 0.5742015925642552, + "epoch": 0.792824164645121, "grad_norm": 0.0, - "learning_rate": 8.09446273691054e-06, - "loss": 0.8043, + "learning_rate": 2.1679901218837098e-06, + "loss": 0.9554, "step": 20263 }, { - "epoch": 0.5742299300065176, + "epoch": 0.7928632913373503, "grad_norm": 0.0, - "learning_rate": 8.093561772537841e-06, - "loss": 0.8122, + "learning_rate": 2.167202260235465e-06, + "loss": 0.9832, "step": 20264 }, { - "epoch": 0.5742582674487801, + "epoch": 0.7929024180295797, "grad_norm": 0.0, - "learning_rate": 8.09266082422378e-06, - "loss": 0.8788, + "learning_rate": 2.1664145243721358e-06, + "loss": 1.0503, "step": 20265 }, { - "epoch": 0.5742866048910426, + "epoch": 0.7929415447218092, "grad_norm": 0.0, - "learning_rate": 8.091759891975948e-06, - "loss": 0.9344, + "learning_rate": 2.1656269143063736e-06, + "loss": 0.9193, "step": 20266 }, { - "epoch": 0.574314942333305, + "epoch": 0.7929806714140386, "grad_norm": 0.0, - "learning_rate": 8.090858975801927e-06, - "loss": 0.9901, + "learning_rate": 2.1648394300508293e-06, + "loss": 0.9581, "step": 20267 }, { - "epoch": 0.5743432797755674, + "epoch": 0.7930197981062681, "grad_norm": 0.0, - "learning_rate": 8.089958075709311e-06, - "loss": 0.7279, + "learning_rate": 2.1640520716181435e-06, + "loss": 1.0865, "step": 20268 }, { - "epoch": 0.5743716172178299, + "epoch": 0.7930589247984975, "grad_norm": 0.0, - "learning_rate": 8.089057191705686e-06, - "loss": 0.8247, + "learning_rate": 2.163264839020964e-06, + "loss": 0.9545, "step": 20269 }, { - "epoch": 0.5743999546600924, + "epoch": 0.793098051490727, "grad_norm": 0.0, - "learning_rate": 8.088156323798644e-06, - "loss": 0.7894, + "learning_rate": 2.162477732271926e-06, + "loss": 0.9865, "step": 20270 }, { - "epoch": 0.5744282921023548, + "epoch": 0.7931371781829564, "grad_norm": 0.0, - "learning_rate": 8.087255471995774e-06, - "loss": 0.8704, + "learning_rate": 2.1616907513836805e-06, + "loss": 1.0531, "step": 20271 }, { - "epoch": 0.5744566295446173, + "epoch": 0.7931763048751859, "grad_norm": 0.0, - "learning_rate": 8.086354636304657e-06, - "loss": 0.9273, + "learning_rate": 2.1609038963688567e-06, + "loss": 0.876, "step": 20272 }, { - "epoch": 0.5744849669868798, + "epoch": 0.7932154315674153, "grad_norm": 0.0, - "learning_rate": 8.085453816732885e-06, - "loss": 0.8724, + "learning_rate": 2.1601171672400966e-06, + "loss": 0.999, "step": 20273 }, { - "epoch": 0.5745133044291423, + "epoch": 0.7932545582596447, "grad_norm": 0.0, - "learning_rate": 8.084553013288048e-06, - "loss": 0.8083, + "learning_rate": 2.159330564010028e-06, + "loss": 1.0619, "step": 20274 }, { - "epoch": 0.5745416418714047, + "epoch": 0.7932936849518741, "grad_norm": 0.0, - "learning_rate": 8.083652225977734e-06, - "loss": 0.7606, + "learning_rate": 2.1585440866912854e-06, + "loss": 0.9849, "step": 20275 }, { - "epoch": 0.5745699793136672, + "epoch": 0.7933328116441036, "grad_norm": 0.0, - "learning_rate": 8.082751454809529e-06, - "loss": 0.8664, + "learning_rate": 2.1577577352964984e-06, + "loss": 0.9201, "step": 20276 }, { - "epoch": 0.5745983167559297, + "epoch": 0.793371938336333, "grad_norm": 0.0, - "learning_rate": 8.081850699791017e-06, - "loss": 0.9473, + "learning_rate": 2.156971509838298e-06, + "loss": 1.111, "step": 20277 }, { - "epoch": 0.574626654198192, + "epoch": 0.7934110650285625, "grad_norm": 0.0, - "learning_rate": 8.08094996092979e-06, - "loss": 0.852, + "learning_rate": 2.1561854103293057e-06, + "loss": 0.8114, "step": 20278 }, { - "epoch": 0.5746549916404545, + "epoch": 0.7934501917207919, "grad_norm": 0.0, - "learning_rate": 8.080049238233439e-06, - "loss": 0.7577, + "learning_rate": 2.155399436782146e-06, + "loss": 1.012, "step": 20279 }, { - "epoch": 0.574683329082717, + "epoch": 0.7934893184130214, "grad_norm": 0.0, - "learning_rate": 8.07914853170954e-06, - "loss": 0.8882, + "learning_rate": 2.1546135892094443e-06, + "loss": 0.9398, "step": 20280 }, { - "epoch": 0.5747116665249794, + "epoch": 0.7935284451052508, "grad_norm": 0.0, - "learning_rate": 8.078247841365686e-06, - "loss": 0.9027, + "learning_rate": 2.1538278676238156e-06, + "loss": 0.9135, "step": 20281 }, { - "epoch": 0.5747400039672419, + "epoch": 0.7935675717974803, "grad_norm": 0.0, - "learning_rate": 8.077347167209467e-06, - "loss": 0.8337, + "learning_rate": 2.1530422720378785e-06, + "loss": 0.9955, "step": 20282 }, { - "epoch": 0.5747683414095044, + "epoch": 0.7936066984897097, "grad_norm": 0.0, - "learning_rate": 8.076446509248466e-06, - "loss": 0.8577, + "learning_rate": 2.1522568024642498e-06, + "loss": 0.9342, "step": 20283 }, { - "epoch": 0.5747966788517669, + "epoch": 0.7936458251819392, "grad_norm": 0.0, - "learning_rate": 8.075545867490272e-06, - "loss": 0.8611, + "learning_rate": 2.151471458915546e-06, + "loss": 0.8957, "step": 20284 }, { - "epoch": 0.5748250162940293, + "epoch": 0.7936849518741685, "grad_norm": 0.0, - "learning_rate": 8.074645241942466e-06, - "loss": 0.9415, + "learning_rate": 2.150686241404374e-06, + "loss": 0.9617, "step": 20285 }, { - "epoch": 0.5748533537362918, + "epoch": 0.793724078566398, "grad_norm": 0.0, - "learning_rate": 8.07374463261264e-06, - "loss": 0.8835, + "learning_rate": 2.1499011499433463e-06, + "loss": 1.0559, "step": 20286 }, { - "epoch": 0.5748816911785543, + "epoch": 0.7937632052586274, "grad_norm": 0.0, - "learning_rate": 8.07284403950838e-06, - "loss": 0.9471, + "learning_rate": 2.149116184545068e-06, + "loss": 0.8569, "step": 20287 }, { - "epoch": 0.5749100286208166, + "epoch": 0.7938023319508569, "grad_norm": 0.0, - "learning_rate": 8.071943462637267e-06, - "loss": 0.9094, + "learning_rate": 2.1483313452221453e-06, + "loss": 1.0273, "step": 20288 }, { - "epoch": 0.5749383660630791, + "epoch": 0.7938414586430863, "grad_norm": 0.0, - "learning_rate": 8.071042902006896e-06, - "loss": 0.9158, + "learning_rate": 2.147546631987183e-06, + "loss": 0.9747, "step": 20289 }, { - "epoch": 0.5749667035053416, + "epoch": 0.7938805853353158, "grad_norm": 0.0, - "learning_rate": 8.070142357624841e-06, - "loss": 0.8896, + "learning_rate": 2.146762044852785e-06, + "loss": 1.0283, "step": 20290 }, { - "epoch": 0.5749950409476041, + "epoch": 0.7939197120275452, "grad_norm": 0.0, - "learning_rate": 8.069241829498694e-06, - "loss": 0.8636, + "learning_rate": 2.1459775838315445e-06, + "loss": 0.9602, "step": 20291 }, { - "epoch": 0.5750233783898665, + "epoch": 0.7939588387197747, "grad_norm": 0.0, - "learning_rate": 8.068341317636045e-06, - "loss": 0.8152, + "learning_rate": 2.1451932489360628e-06, + "loss": 1.0381, "step": 20292 }, { - "epoch": 0.575051715832129, + "epoch": 0.7939979654120041, "grad_norm": 0.0, - "learning_rate": 8.06744082204447e-06, - "loss": 0.7594, + "learning_rate": 2.144409040178934e-06, + "loss": 1.0007, "step": 20293 }, { - "epoch": 0.5750800532743915, + "epoch": 0.7940370921042335, "grad_norm": 0.0, - "learning_rate": 8.066540342731558e-06, - "loss": 0.9757, + "learning_rate": 2.1436249575727564e-06, + "loss": 0.9828, "step": 20294 }, { - "epoch": 0.5751083907166539, + "epoch": 0.794076218796463, "grad_norm": 0.0, - "learning_rate": 8.065639879704896e-06, - "loss": 0.8637, + "learning_rate": 2.1428410011301136e-06, + "loss": 0.9409, "step": 20295 }, { - "epoch": 0.5751367281589164, + "epoch": 0.7941153454886923, "grad_norm": 0.0, - "learning_rate": 8.064739432972068e-06, - "loss": 0.8349, + "learning_rate": 2.1420571708635997e-06, + "loss": 0.9639, "step": 20296 }, { - "epoch": 0.5751650656011789, + "epoch": 0.7941544721809218, "grad_norm": 0.0, - "learning_rate": 8.06383900254066e-06, - "loss": 0.881, + "learning_rate": 2.1412734667858003e-06, + "loss": 0.9252, "step": 20297 }, { - "epoch": 0.5751934030434414, + "epoch": 0.7941935988731512, "grad_norm": 0.0, - "learning_rate": 8.062938588418251e-06, - "loss": 0.7747, + "learning_rate": 2.140489888909305e-06, + "loss": 1.018, "step": 20298 }, { - "epoch": 0.5752217404857037, + "epoch": 0.7942327255653807, "grad_norm": 0.0, - "learning_rate": 8.062038190612431e-06, - "loss": 0.8571, + "learning_rate": 2.1397064372466903e-06, + "loss": 0.8202, "step": 20299 }, { - "epoch": 0.5752500779279662, + "epoch": 0.7942718522576101, "grad_norm": 0.0, - "learning_rate": 8.061137809130785e-06, - "loss": 0.9153, + "learning_rate": 2.1389231118105437e-06, + "loss": 0.8221, "step": 20300 }, { - "epoch": 0.5752784153702287, + "epoch": 0.7943109789498396, "grad_norm": 0.0, - "learning_rate": 8.060237443980892e-06, - "loss": 0.8571, + "learning_rate": 2.138139912613436e-06, + "loss": 0.9843, "step": 20301 }, { - "epoch": 0.5753067528124911, + "epoch": 0.794350105642069, "grad_norm": 0.0, - "learning_rate": 8.05933709517034e-06, - "loss": 0.9071, + "learning_rate": 2.1373568396679557e-06, + "loss": 1.0272, "step": 20302 }, { - "epoch": 0.5753350902547536, + "epoch": 0.7943892323342985, "grad_norm": 0.0, - "learning_rate": 8.058436762706718e-06, - "loss": 0.8623, + "learning_rate": 2.1365738929866686e-06, + "loss": 0.9056, "step": 20303 }, { - "epoch": 0.5753634276970161, + "epoch": 0.7944283590265279, "grad_norm": 0.0, - "learning_rate": 8.057536446597598e-06, - "loss": 0.7812, + "learning_rate": 2.135791072582154e-06, + "loss": 0.9008, "step": 20304 }, { - "epoch": 0.5753917651392785, + "epoch": 0.7944674857187574, "grad_norm": 0.0, - "learning_rate": 8.056636146850575e-06, - "loss": 0.7919, + "learning_rate": 2.135008378466975e-06, + "loss": 1.1369, "step": 20305 }, { - "epoch": 0.575420102581541, + "epoch": 0.7945066124109867, "grad_norm": 0.0, - "learning_rate": 8.055735863473222e-06, - "loss": 0.796, + "learning_rate": 2.134225810653713e-06, + "loss": 0.9998, "step": 20306 }, { - "epoch": 0.5754484400238035, + "epoch": 0.7945457391032162, "grad_norm": 0.0, - "learning_rate": 8.05483559647313e-06, - "loss": 0.9329, + "learning_rate": 2.133443369154924e-06, + "loss": 0.8565, "step": 20307 }, { - "epoch": 0.575476777466066, + "epoch": 0.7945848657954456, "grad_norm": 0.0, - "learning_rate": 8.053935345857879e-06, - "loss": 0.9212, + "learning_rate": 2.1326610539831795e-06, + "loss": 0.9523, "step": 20308 }, { - "epoch": 0.5755051149083283, + "epoch": 0.7946239924876751, "grad_norm": 0.0, - "learning_rate": 8.053035111635054e-06, - "loss": 0.8068, + "learning_rate": 2.131878865151038e-06, + "loss": 0.934, "step": 20309 }, { - "epoch": 0.5755334523505908, + "epoch": 0.7946631191799045, "grad_norm": 0.0, - "learning_rate": 8.052134893812236e-06, - "loss": 0.8036, + "learning_rate": 2.131096802671062e-06, + "loss": 1.1244, "step": 20310 }, { - "epoch": 0.5755617897928533, + "epoch": 0.794702245872134, "grad_norm": 0.0, - "learning_rate": 8.051234692397013e-06, - "loss": 0.8473, + "learning_rate": 2.1303148665558125e-06, + "loss": 0.9354, "step": 20311 }, { - "epoch": 0.5755901272351157, + "epoch": 0.7947413725643634, "grad_norm": 0.0, - "learning_rate": 8.05033450739696e-06, - "loss": 0.8158, + "learning_rate": 2.1295330568178465e-06, + "loss": 0.8744, "step": 20312 }, { - "epoch": 0.5756184646773782, + "epoch": 0.7947804992565929, "grad_norm": 0.0, - "learning_rate": 8.049434338819666e-06, - "loss": 0.8215, + "learning_rate": 2.1287513734697153e-06, + "loss": 0.8979, "step": 20313 }, { - "epoch": 0.5756468021196407, + "epoch": 0.7948196259488223, "grad_norm": 0.0, - "learning_rate": 8.048534186672708e-06, - "loss": 0.879, + "learning_rate": 2.1279698165239737e-06, + "loss": 0.9705, "step": 20314 }, { - "epoch": 0.5756751395619032, + "epoch": 0.7948587526410518, "grad_norm": 0.0, - "learning_rate": 8.04763405096367e-06, - "loss": 0.8699, + "learning_rate": 2.127188385993172e-06, + "loss": 0.9398, "step": 20315 }, { - "epoch": 0.5757034770041656, + "epoch": 0.7948978793332812, "grad_norm": 0.0, - "learning_rate": 8.046733931700142e-06, - "loss": 0.935, + "learning_rate": 2.126407081889863e-06, + "loss": 0.9392, "step": 20316 }, { - "epoch": 0.5757318144464281, + "epoch": 0.7949370060255107, "grad_norm": 0.0, - "learning_rate": 8.045833828889695e-06, - "loss": 0.9318, + "learning_rate": 2.125625904226587e-06, + "loss": 1.0306, "step": 20317 }, { - "epoch": 0.5757601518886906, + "epoch": 0.79497613271774, "grad_norm": 0.0, - "learning_rate": 8.044933742539919e-06, - "loss": 0.8642, + "learning_rate": 2.124844853015895e-06, + "loss": 0.9484, "step": 20318 }, { - "epoch": 0.575788489330953, + "epoch": 0.7950152594099695, "grad_norm": 0.0, - "learning_rate": 8.044033672658387e-06, - "loss": 0.7888, + "learning_rate": 2.1240639282703235e-06, + "loss": 0.9783, "step": 20319 }, { - "epoch": 0.5758168267732154, + "epoch": 0.7950543861021989, "grad_norm": 0.0, - "learning_rate": 8.043133619252687e-06, - "loss": 0.8723, + "learning_rate": 2.123283130002416e-06, + "loss": 0.9885, "step": 20320 }, { - "epoch": 0.5758451642154779, + "epoch": 0.7950935127944284, "grad_norm": 0.0, - "learning_rate": 8.0422335823304e-06, - "loss": 0.8257, + "learning_rate": 2.1225024582247113e-06, + "loss": 0.9558, "step": 20321 }, { - "epoch": 0.5758735016577404, + "epoch": 0.7951326394866578, "grad_norm": 0.0, - "learning_rate": 8.041333561899105e-06, - "loss": 0.9614, + "learning_rate": 2.121721912949749e-06, + "loss": 0.9481, "step": 20322 }, { - "epoch": 0.5759018391000028, + "epoch": 0.7951717661788872, "grad_norm": 0.0, - "learning_rate": 8.040433557966385e-06, - "loss": 0.867, + "learning_rate": 2.1209414941900584e-06, + "loss": 1.0604, "step": 20323 }, { - "epoch": 0.5759301765422653, + "epoch": 0.7952108928711167, "grad_norm": 0.0, - "learning_rate": 8.039533570539826e-06, - "loss": 0.9544, + "learning_rate": 2.120161201958174e-06, + "loss": 0.8739, "step": 20324 }, { - "epoch": 0.5759585139845278, + "epoch": 0.7952500195633461, "grad_norm": 0.0, - "learning_rate": 8.038633599626998e-06, - "loss": 0.9271, + "learning_rate": 2.11938103626663e-06, + "loss": 0.9713, "step": 20325 }, { - "epoch": 0.5759868514267902, + "epoch": 0.7952891462555756, "grad_norm": 0.0, - "learning_rate": 8.03773364523549e-06, - "loss": 0.92, + "learning_rate": 2.1186009971279486e-06, + "loss": 1.0341, "step": 20326 }, { - "epoch": 0.5760151888690527, + "epoch": 0.795328272947805, "grad_norm": 0.0, - "learning_rate": 8.036833707372879e-06, - "loss": 0.9173, + "learning_rate": 2.117821084554659e-06, + "loss": 0.9984, "step": 20327 }, { - "epoch": 0.5760435263113152, + "epoch": 0.7953673996400344, "grad_norm": 0.0, - "learning_rate": 8.035933786046745e-06, - "loss": 0.9386, + "learning_rate": 2.117041298559286e-06, + "loss": 1.0506, "step": 20328 }, { - "epoch": 0.5760718637535776, + "epoch": 0.7954065263322638, "grad_norm": 0.0, - "learning_rate": 8.035033881264676e-06, - "loss": 0.8041, + "learning_rate": 2.1162616391543546e-06, + "loss": 1.063, "step": 20329 }, { - "epoch": 0.57610020119584, + "epoch": 0.7954456530244933, "grad_norm": 0.0, - "learning_rate": 8.034133993034241e-06, - "loss": 0.7388, + "learning_rate": 2.115482106352379e-06, + "loss": 1.0202, "step": 20330 }, { - "epoch": 0.5761285386381025, + "epoch": 0.7954847797167227, "grad_norm": 0.0, - "learning_rate": 8.033234121363026e-06, - "loss": 0.8945, + "learning_rate": 2.1147027001658816e-06, + "loss": 1.0241, "step": 20331 }, { - "epoch": 0.576156876080365, + "epoch": 0.7955239064089522, "grad_norm": 0.0, - "learning_rate": 8.032334266258614e-06, - "loss": 0.8384, + "learning_rate": 2.1139234206073777e-06, + "loss": 0.9057, "step": 20332 }, { - "epoch": 0.5761852135226274, + "epoch": 0.7955630331011816, "grad_norm": 0.0, - "learning_rate": 8.031434427728576e-06, - "loss": 0.8487, + "learning_rate": 2.1131442676893843e-06, + "loss": 0.9177, "step": 20333 }, { - "epoch": 0.5762135509648899, + "epoch": 0.7956021597934111, "grad_norm": 0.0, - "learning_rate": 8.0305346057805e-06, - "loss": 0.9329, + "learning_rate": 2.1123652414244087e-06, + "loss": 0.9949, "step": 20334 }, { - "epoch": 0.5762418884071524, + "epoch": 0.7956412864856405, "grad_norm": 0.0, - "learning_rate": 8.02963480042196e-06, - "loss": 0.8503, + "learning_rate": 2.111586341824967e-06, + "loss": 0.9984, "step": 20335 }, { - "epoch": 0.5762702258494148, + "epoch": 0.79568041317787, "grad_norm": 0.0, - "learning_rate": 8.028735011660537e-06, - "loss": 0.9162, + "learning_rate": 2.110807568903561e-06, + "loss": 1.0781, "step": 20336 }, { - "epoch": 0.5762985632916773, + "epoch": 0.7957195398700994, "grad_norm": 0.0, - "learning_rate": 8.027835239503818e-06, - "loss": 0.8868, + "learning_rate": 2.110028922672699e-06, + "loss": 0.9178, "step": 20337 }, { - "epoch": 0.5763269007339398, + "epoch": 0.7957586665623289, "grad_norm": 0.0, - "learning_rate": 8.026935483959368e-06, - "loss": 0.8362, + "learning_rate": 2.1092504031448867e-06, + "loss": 0.9364, "step": 20338 }, { - "epoch": 0.5763552381762023, + "epoch": 0.7957977932545582, "grad_norm": 0.0, - "learning_rate": 8.026035745034774e-06, - "loss": 0.8619, + "learning_rate": 2.1084720103326274e-06, + "loss": 0.8946, "step": 20339 }, { - "epoch": 0.5763835756184646, + "epoch": 0.7958369199467877, "grad_norm": 0.0, - "learning_rate": 8.025136022737618e-06, - "loss": 0.8491, + "learning_rate": 2.1076937442484156e-06, + "loss": 0.9146, "step": 20340 }, { - "epoch": 0.5764119130607271, + "epoch": 0.7958760466390171, "grad_norm": 0.0, - "learning_rate": 8.02423631707547e-06, - "loss": 0.8434, + "learning_rate": 2.1069156049047535e-06, + "loss": 0.9917, "step": 20341 }, { - "epoch": 0.5764402505029896, + "epoch": 0.7959151733312466, "grad_norm": 0.0, - "learning_rate": 8.023336628055918e-06, - "loss": 0.8241, + "learning_rate": 2.106137592314137e-06, + "loss": 1.045, "step": 20342 }, { - "epoch": 0.576468587945252, + "epoch": 0.795954300023476, "grad_norm": 0.0, - "learning_rate": 8.022436955686532e-06, - "loss": 0.7849, + "learning_rate": 2.1053597064890607e-06, + "loss": 0.9178, "step": 20343 }, { - "epoch": 0.5764969253875145, + "epoch": 0.7959934267157055, "grad_norm": 0.0, - "learning_rate": 8.021537299974893e-06, - "loss": 0.9095, + "learning_rate": 2.1045819474420127e-06, + "loss": 0.9898, "step": 20344 }, { - "epoch": 0.576525262829777, + "epoch": 0.7960325534079349, "grad_norm": 0.0, - "learning_rate": 8.020637660928586e-06, - "loss": 0.8626, + "learning_rate": 2.103804315185488e-06, + "loss": 1.0753, "step": 20345 }, { - "epoch": 0.5765536002720394, + "epoch": 0.7960716801001644, "grad_norm": 0.0, - "learning_rate": 8.019738038555176e-06, - "loss": 0.8993, + "learning_rate": 2.103026809731965e-06, + "loss": 0.9769, "step": 20346 }, { - "epoch": 0.5765819377143019, + "epoch": 0.7961108067923938, "grad_norm": 0.0, - "learning_rate": 8.018838432862251e-06, - "loss": 0.8257, + "learning_rate": 2.102249431093942e-06, + "loss": 0.9406, "step": 20347 }, { - "epoch": 0.5766102751565644, + "epoch": 0.7961499334846233, "grad_norm": 0.0, - "learning_rate": 8.017938843857384e-06, - "loss": 0.9274, + "learning_rate": 2.101472179283894e-06, + "loss": 0.9031, "step": 20348 }, { - "epoch": 0.5766386125988269, + "epoch": 0.7961890601768526, "grad_norm": 0.0, - "learning_rate": 8.017039271548154e-06, - "loss": 0.8603, + "learning_rate": 2.100695054314309e-06, + "loss": 0.9261, "step": 20349 }, { - "epoch": 0.5766669500410893, + "epoch": 0.796228186869082, "grad_norm": 0.0, - "learning_rate": 8.016139715942143e-06, - "loss": 0.898, + "learning_rate": 2.099918056197657e-06, + "loss": 0.9523, "step": 20350 }, { - "epoch": 0.5766952874833517, + "epoch": 0.7962673135613115, "grad_norm": 0.0, - "learning_rate": 8.01524017704692e-06, - "loss": 0.8076, + "learning_rate": 2.099141184946427e-06, + "loss": 0.9167, "step": 20351 }, { - "epoch": 0.5767236249256142, + "epoch": 0.7963064402535409, "grad_norm": 0.0, - "learning_rate": 8.014340654870065e-06, - "loss": 0.8777, + "learning_rate": 2.0983644405730863e-06, + "loss": 0.8737, "step": 20352 }, { - "epoch": 0.5767519623678766, + "epoch": 0.7963455669457704, "grad_norm": 0.0, - "learning_rate": 8.013441149419159e-06, - "loss": 0.9185, + "learning_rate": 2.0975878230901146e-06, + "loss": 1.0768, "step": 20353 }, { - "epoch": 0.5767802998101391, + "epoch": 0.7963846936379998, "grad_norm": 0.0, - "learning_rate": 8.012541660701774e-06, - "loss": 0.7645, + "learning_rate": 2.096811332509975e-06, + "loss": 0.8334, "step": 20354 }, { - "epoch": 0.5768086372524016, + "epoch": 0.7964238203302293, "grad_norm": 0.0, - "learning_rate": 8.011642188725491e-06, - "loss": 0.9405, + "learning_rate": 2.0960349688451463e-06, + "loss": 0.9935, "step": 20355 }, { - "epoch": 0.5768369746946641, + "epoch": 0.7964629470224587, "grad_norm": 0.0, - "learning_rate": 8.010742733497882e-06, - "loss": 0.9107, + "learning_rate": 2.0952587321080898e-06, + "loss": 0.7226, "step": 20356 }, { - "epoch": 0.5768653121369265, + "epoch": 0.7965020737146882, "grad_norm": 0.0, - "learning_rate": 8.009843295026524e-06, - "loss": 0.9668, + "learning_rate": 2.0944826223112756e-06, + "loss": 0.8934, "step": 20357 }, { - "epoch": 0.576893649579189, + "epoch": 0.7965412004069176, "grad_norm": 0.0, - "learning_rate": 8.008943873319e-06, - "loss": 0.8784, + "learning_rate": 2.0937066394671624e-06, + "loss": 0.8988, "step": 20358 }, { - "epoch": 0.5769219870214515, + "epoch": 0.7965803270991471, "grad_norm": 0.0, - "learning_rate": 8.008044468382878e-06, - "loss": 0.924, + "learning_rate": 2.0929307835882117e-06, + "loss": 1.0647, "step": 20359 }, { - "epoch": 0.5769503244637139, + "epoch": 0.7966194537913764, "grad_norm": 0.0, - "learning_rate": 8.007145080225736e-06, - "loss": 0.8262, + "learning_rate": 2.0921550546868864e-06, + "loss": 0.9381, "step": 20360 }, { - "epoch": 0.5769786619059764, + "epoch": 0.7966585804836059, "grad_norm": 0.0, - "learning_rate": 8.006245708855152e-06, - "loss": 0.8811, + "learning_rate": 2.0913794527756425e-06, + "loss": 0.8512, "step": 20361 }, { - "epoch": 0.5770069993482388, + "epoch": 0.7966977071758353, "grad_norm": 0.0, - "learning_rate": 8.0053463542787e-06, - "loss": 0.9001, + "learning_rate": 2.090603977866934e-06, + "loss": 0.9143, "step": 20362 }, { - "epoch": 0.5770353367905013, + "epoch": 0.7967368338680648, "grad_norm": 0.0, - "learning_rate": 8.004447016503962e-06, - "loss": 0.9251, + "learning_rate": 2.0898286299732128e-06, + "loss": 1.0061, "step": 20363 }, { - "epoch": 0.5770636742327637, + "epoch": 0.7967759605602942, "grad_norm": 0.0, - "learning_rate": 8.0035476955385e-06, - "loss": 0.8856, + "learning_rate": 2.0890534091069327e-06, + "loss": 1.0153, "step": 20364 }, { - "epoch": 0.5770920116750262, + "epoch": 0.7968150872525237, "grad_norm": 0.0, - "learning_rate": 8.0026483913899e-06, - "loss": 0.8508, + "learning_rate": 2.0882783152805443e-06, + "loss": 0.9474, "step": 20365 }, { - "epoch": 0.5771203491172887, + "epoch": 0.7968542139447531, "grad_norm": 0.0, - "learning_rate": 8.001749104065735e-06, - "loss": 0.937, + "learning_rate": 2.08750334850649e-06, + "loss": 1.0307, "step": 20366 }, { - "epoch": 0.5771486865595511, + "epoch": 0.7968933406369826, "grad_norm": 0.0, - "learning_rate": 8.000849833573579e-06, - "loss": 0.8925, + "learning_rate": 2.086728508797219e-06, + "loss": 1.0217, "step": 20367 }, { - "epoch": 0.5771770240018136, + "epoch": 0.796932467329212, "grad_norm": 0.0, - "learning_rate": 7.999950579921005e-06, - "loss": 0.8812, + "learning_rate": 2.08595379616517e-06, + "loss": 0.8613, "step": 20368 }, { - "epoch": 0.5772053614440761, + "epoch": 0.7969715940214415, "grad_norm": 0.0, - "learning_rate": 7.999051343115595e-06, - "loss": 0.7614, + "learning_rate": 2.085179210622786e-06, + "loss": 0.8284, "step": 20369 }, { - "epoch": 0.5772336988863385, + "epoch": 0.7970107207136709, "grad_norm": 0.0, - "learning_rate": 7.998152123164916e-06, - "loss": 0.8902, + "learning_rate": 2.084404752182506e-06, + "loss": 0.8776, "step": 20370 }, { - "epoch": 0.577262036328601, + "epoch": 0.7970498474059003, "grad_norm": 0.0, - "learning_rate": 7.997252920076543e-06, - "loss": 0.8851, + "learning_rate": 2.0836304208567705e-06, + "loss": 0.9187, "step": 20371 }, { - "epoch": 0.5772903737708635, + "epoch": 0.7970889740981297, "grad_norm": 0.0, - "learning_rate": 7.996353733858055e-06, - "loss": 0.9841, + "learning_rate": 2.082856216658007e-06, + "loss": 0.8889, "step": 20372 }, { - "epoch": 0.577318711213126, + "epoch": 0.7971281007903592, "grad_norm": 0.0, - "learning_rate": 7.995454564517023e-06, - "loss": 0.9434, + "learning_rate": 2.082082139598651e-06, + "loss": 0.9518, "step": 20373 }, { - "epoch": 0.5773470486553883, + "epoch": 0.7971672274825886, "grad_norm": 0.0, - "learning_rate": 7.994555412061022e-06, - "loss": 0.7696, + "learning_rate": 2.081308189691138e-06, + "loss": 0.9565, "step": 20374 }, { - "epoch": 0.5773753860976508, + "epoch": 0.7972063541748181, "grad_norm": 0.0, - "learning_rate": 7.993656276497623e-06, - "loss": 0.8453, + "learning_rate": 2.0805343669478906e-06, + "loss": 0.9376, "step": 20375 }, { - "epoch": 0.5774037235399133, + "epoch": 0.7972454808670475, "grad_norm": 0.0, - "learning_rate": 7.992757157834408e-06, - "loss": 0.9798, + "learning_rate": 2.079760671381337e-06, + "loss": 0.9644, "step": 20376 }, { - "epoch": 0.5774320609821757, + "epoch": 0.797284607559277, "grad_norm": 0.0, - "learning_rate": 7.991858056078938e-06, - "loss": 0.8141, + "learning_rate": 2.078987103003902e-06, + "loss": 1.0052, "step": 20377 }, { - "epoch": 0.5774603984244382, + "epoch": 0.7973237342515064, "grad_norm": 0.0, - "learning_rate": 7.990958971238796e-06, - "loss": 0.9291, + "learning_rate": 2.0782136618280126e-06, + "loss": 1.0759, "step": 20378 }, { - "epoch": 0.5774887358667007, + "epoch": 0.7973628609437358, "grad_norm": 0.0, - "learning_rate": 7.990059903321554e-06, - "loss": 0.8394, + "learning_rate": 2.0774403478660807e-06, + "loss": 0.8779, "step": 20379 }, { - "epoch": 0.5775170733089632, + "epoch": 0.7974019876359653, "grad_norm": 0.0, - "learning_rate": 7.98916085233478e-06, - "loss": 0.7889, + "learning_rate": 2.0766671611305334e-06, + "loss": 0.9916, "step": 20380 }, { - "epoch": 0.5775454107512256, + "epoch": 0.7974411143281946, "grad_norm": 0.0, - "learning_rate": 7.988261818286051e-06, - "loss": 0.8298, + "learning_rate": 2.0758941016337776e-06, + "loss": 0.9451, "step": 20381 }, { - "epoch": 0.5775737481934881, + "epoch": 0.7974802410204241, "grad_norm": 0.0, - "learning_rate": 7.987362801182946e-06, - "loss": 0.8378, + "learning_rate": 2.075121169388238e-06, + "loss": 0.9922, "step": 20382 }, { - "epoch": 0.5776020856357506, + "epoch": 0.7975193677126535, "grad_norm": 0.0, - "learning_rate": 7.986463801033027e-06, - "loss": 1.008, + "learning_rate": 2.074348364406319e-06, + "loss": 1.0403, "step": 20383 }, { - "epoch": 0.5776304230780129, + "epoch": 0.797558494404883, "grad_norm": 0.0, - "learning_rate": 7.985564817843872e-06, - "loss": 0.8472, + "learning_rate": 2.0735756867004366e-06, + "loss": 0.954, "step": 20384 }, { - "epoch": 0.5776587605202754, + "epoch": 0.7975976210971124, "grad_norm": 0.0, - "learning_rate": 7.984665851623052e-06, - "loss": 1.0144, + "learning_rate": 2.0728031362829935e-06, + "loss": 1.0109, "step": 20385 }, { - "epoch": 0.5776870979625379, + "epoch": 0.7976367477893419, "grad_norm": 0.0, - "learning_rate": 7.983766902378138e-06, - "loss": 0.916, + "learning_rate": 2.0720307131663998e-06, + "loss": 0.9724, "step": 20386 }, { - "epoch": 0.5777154354048004, + "epoch": 0.7976758744815713, "grad_norm": 0.0, - "learning_rate": 7.98286797011671e-06, - "loss": 0.7313, + "learning_rate": 2.071258417363058e-06, + "loss": 0.9876, "step": 20387 }, { - "epoch": 0.5777437728470628, + "epoch": 0.7977150011738008, "grad_norm": 0.0, - "learning_rate": 7.981969054846328e-06, - "loss": 0.7873, + "learning_rate": 2.0704862488853726e-06, + "loss": 0.8546, "step": 20388 }, { - "epoch": 0.5777721102893253, + "epoch": 0.7977541278660302, "grad_norm": 0.0, - "learning_rate": 7.981070156574572e-06, - "loss": 0.8828, + "learning_rate": 2.06971420774574e-06, + "loss": 0.9724, "step": 20389 }, { - "epoch": 0.5778004477315878, + "epoch": 0.7977932545582597, "grad_norm": 0.0, - "learning_rate": 7.980171275309014e-06, - "loss": 0.9628, + "learning_rate": 2.0689422939565596e-06, + "loss": 0.9773, "step": 20390 }, { - "epoch": 0.5778287851738502, + "epoch": 0.797832381250489, "grad_norm": 0.0, - "learning_rate": 7.979272411057222e-06, - "loss": 0.9089, + "learning_rate": 2.0681705075302284e-06, + "loss": 0.9568, "step": 20391 }, { - "epoch": 0.5778571226161127, + "epoch": 0.7978715079427185, "grad_norm": 0.0, - "learning_rate": 7.978373563826769e-06, - "loss": 0.834, + "learning_rate": 2.067398848479142e-06, + "loss": 0.9669, "step": 20392 }, { - "epoch": 0.5778854600583752, + "epoch": 0.7979106346349479, "grad_norm": 0.0, - "learning_rate": 7.977474733625224e-06, - "loss": 0.8855, + "learning_rate": 2.066627316815687e-06, + "loss": 1.0229, "step": 20393 }, { - "epoch": 0.5779137975006375, + "epoch": 0.7979497613271774, "grad_norm": 0.0, - "learning_rate": 7.97657592046016e-06, - "loss": 0.7095, + "learning_rate": 2.0658559125522593e-06, + "loss": 0.8971, "step": 20394 }, { - "epoch": 0.5779421349429, + "epoch": 0.7979888880194068, "grad_norm": 0.0, - "learning_rate": 7.975677124339154e-06, - "loss": 0.9634, + "learning_rate": 2.0650846357012376e-06, + "loss": 1.0058, "step": 20395 }, { - "epoch": 0.5779704723851625, + "epoch": 0.7980280147116363, "grad_norm": 0.0, - "learning_rate": 7.974778345269767e-06, - "loss": 0.8892, + "learning_rate": 2.064313486275019e-06, + "loss": 0.9088, "step": 20396 }, { - "epoch": 0.577998809827425, + "epoch": 0.7980671414038657, "grad_norm": 0.0, - "learning_rate": 7.973879583259573e-06, - "loss": 0.9116, + "learning_rate": 2.0635424642859805e-06, + "loss": 1.0359, "step": 20397 }, { - "epoch": 0.5780271472696874, + "epoch": 0.7981062680960952, "grad_norm": 0.0, - "learning_rate": 7.972980838316146e-06, - "loss": 0.9527, + "learning_rate": 2.0627715697465067e-06, + "loss": 0.9725, "step": 20398 }, { - "epoch": 0.5780554847119499, + "epoch": 0.7981453947883246, "grad_norm": 0.0, - "learning_rate": 7.972082110447052e-06, - "loss": 0.8921, + "learning_rate": 2.062000802668971e-06, + "loss": 0.9302, "step": 20399 }, { - "epoch": 0.5780838221542124, + "epoch": 0.7981845214805541, "grad_norm": 0.0, - "learning_rate": 7.971183399659868e-06, - "loss": 0.9074, + "learning_rate": 2.0612301630657595e-06, + "loss": 0.9228, "step": 20400 }, { - "epoch": 0.5781121595964748, + "epoch": 0.7982236481727835, "grad_norm": 0.0, - "learning_rate": 7.970284705962156e-06, - "loss": 0.8551, + "learning_rate": 2.0604596509492427e-06, + "loss": 0.915, "step": 20401 }, { - "epoch": 0.5781404970387373, + "epoch": 0.798262774865013, "grad_norm": 0.0, - "learning_rate": 7.96938602936149e-06, - "loss": 0.8471, + "learning_rate": 2.059689266331797e-06, + "loss": 0.9032, "step": 20402 }, { - "epoch": 0.5781688344809998, + "epoch": 0.7983019015572423, "grad_norm": 0.0, - "learning_rate": 7.96848736986544e-06, - "loss": 0.9019, + "learning_rate": 2.0589190092257895e-06, + "loss": 0.9322, "step": 20403 }, { - "epoch": 0.5781971719232623, + "epoch": 0.7983410282494718, "grad_norm": 0.0, - "learning_rate": 7.967588727481574e-06, - "loss": 0.845, + "learning_rate": 2.058148879643591e-06, + "loss": 0.9854, "step": 20404 }, { - "epoch": 0.5782255093655246, + "epoch": 0.7983801549417012, "grad_norm": 0.0, - "learning_rate": 7.966690102217467e-06, - "loss": 0.8604, + "learning_rate": 2.057378877597571e-06, + "loss": 1.1246, "step": 20405 }, { - "epoch": 0.5782538468077871, + "epoch": 0.7984192816339307, "grad_norm": 0.0, - "learning_rate": 7.965791494080679e-06, - "loss": 0.7824, + "learning_rate": 2.0566090031000952e-06, + "loss": 1.0334, "step": 20406 }, { - "epoch": 0.5782821842500496, + "epoch": 0.7984584083261601, "grad_norm": 0.0, - "learning_rate": 7.964892903078785e-06, - "loss": 0.9293, + "learning_rate": 2.055839256163523e-06, + "loss": 1.0113, "step": 20407 }, { - "epoch": 0.578310521692312, + "epoch": 0.7984975350183895, "grad_norm": 0.0, - "learning_rate": 7.963994329219359e-06, - "loss": 0.9559, + "learning_rate": 2.055069636800218e-06, + "loss": 1.0284, "step": 20408 }, { - "epoch": 0.5783388591345745, + "epoch": 0.798536661710619, "grad_norm": 0.0, - "learning_rate": 7.96309577250996e-06, - "loss": 0.8872, + "learning_rate": 2.0543001450225387e-06, + "loss": 0.9619, "step": 20409 }, { - "epoch": 0.578367196576837, + "epoch": 0.7985757884028484, "grad_norm": 0.0, - "learning_rate": 7.962197232958162e-06, - "loss": 0.9031, + "learning_rate": 2.0535307808428462e-06, + "loss": 0.8301, "step": 20410 }, { - "epoch": 0.5783955340190995, + "epoch": 0.7986149150950779, "grad_norm": 0.0, - "learning_rate": 7.961298710571536e-06, - "loss": 1.0499, + "learning_rate": 2.052761544273488e-06, + "loss": 0.881, "step": 20411 }, { - "epoch": 0.5784238714613619, + "epoch": 0.7986540417873073, "grad_norm": 0.0, - "learning_rate": 7.960400205357645e-06, - "loss": 0.9348, + "learning_rate": 2.0519924353268215e-06, + "loss": 0.9002, "step": 20412 }, { - "epoch": 0.5784522089036244, + "epoch": 0.7986931684795368, "grad_norm": 0.0, - "learning_rate": 7.959501717324065e-06, - "loss": 0.877, + "learning_rate": 2.0512234540151997e-06, + "loss": 0.9178, "step": 20413 }, { - "epoch": 0.5784805463458869, + "epoch": 0.7987322951717661, "grad_norm": 0.0, - "learning_rate": 7.958603246478355e-06, - "loss": 0.8056, + "learning_rate": 2.050454600350966e-06, + "loss": 1.0464, "step": 20414 }, { - "epoch": 0.5785088837881492, + "epoch": 0.7987714218639956, "grad_norm": 0.0, - "learning_rate": 7.957704792828088e-06, - "loss": 0.8304, + "learning_rate": 2.0496858743464698e-06, + "loss": 1.0372, "step": 20415 }, { - "epoch": 0.5785372212304117, + "epoch": 0.798810548556225, "grad_norm": 0.0, - "learning_rate": 7.956806356380837e-06, - "loss": 0.8227, + "learning_rate": 2.0489172760140594e-06, + "loss": 1.0294, "step": 20416 }, { - "epoch": 0.5785655586726742, + "epoch": 0.7988496752484545, "grad_norm": 0.0, - "learning_rate": 7.95590793714416e-06, - "loss": 0.9271, + "learning_rate": 2.0481488053660714e-06, + "loss": 0.8934, "step": 20417 }, { - "epoch": 0.5785938961149366, + "epoch": 0.7988888019406839, "grad_norm": 0.0, - "learning_rate": 7.95500953512563e-06, - "loss": 0.8767, + "learning_rate": 2.0473804624148498e-06, + "loss": 0.9546, "step": 20418 }, { - "epoch": 0.5786222335571991, + "epoch": 0.7989279286329134, "grad_norm": 0.0, - "learning_rate": 7.954111150332814e-06, - "loss": 0.9249, + "learning_rate": 2.0466122471727347e-06, + "loss": 1.0162, "step": 20419 }, { - "epoch": 0.5786505709994616, + "epoch": 0.7989670553251428, "grad_norm": 0.0, - "learning_rate": 7.95321278277328e-06, - "loss": 0.8179, + "learning_rate": 2.0458441596520584e-06, + "loss": 0.9834, "step": 20420 }, { - "epoch": 0.5786789084417241, + "epoch": 0.7990061820173723, "grad_norm": 0.0, - "learning_rate": 7.952314432454599e-06, - "loss": 0.8259, + "learning_rate": 2.045076199865158e-06, + "loss": 0.9583, "step": 20421 }, { - "epoch": 0.5787072458839865, + "epoch": 0.7990453087096017, "grad_norm": 0.0, - "learning_rate": 7.951416099384328e-06, - "loss": 0.8139, + "learning_rate": 2.044308367824366e-06, + "loss": 1.0076, "step": 20422 }, { - "epoch": 0.578735583326249, + "epoch": 0.7990844354018312, "grad_norm": 0.0, - "learning_rate": 7.950517783570041e-06, - "loss": 0.8398, + "learning_rate": 2.043540663542014e-06, + "loss": 0.8592, "step": 20423 }, { - "epoch": 0.5787639207685115, + "epoch": 0.7991235620940605, "grad_norm": 0.0, - "learning_rate": 7.949619485019307e-06, - "loss": 0.8241, + "learning_rate": 2.0427730870304276e-06, + "loss": 0.9253, "step": 20424 }, { - "epoch": 0.5787922582107738, + "epoch": 0.79916268878629, "grad_norm": 0.0, - "learning_rate": 7.948721203739686e-06, - "loss": 0.8957, + "learning_rate": 2.042005638301937e-06, + "loss": 0.943, "step": 20425 }, { - "epoch": 0.5788205956530363, + "epoch": 0.7992018154785194, "grad_norm": 0.0, - "learning_rate": 7.947822939738747e-06, - "loss": 0.8297, + "learning_rate": 2.041238317368858e-06, + "loss": 0.9189, "step": 20426 }, { - "epoch": 0.5788489330952988, + "epoch": 0.7992409421707489, "grad_norm": 0.0, - "learning_rate": 7.946924693024062e-06, - "loss": 0.8487, + "learning_rate": 2.0404711242435237e-06, + "loss": 0.9824, "step": 20427 }, { - "epoch": 0.5788772705375613, + "epoch": 0.7992800688629783, "grad_norm": 0.0, - "learning_rate": 7.94602646360319e-06, - "loss": 0.8642, + "learning_rate": 2.0397040589382476e-06, + "loss": 0.8706, "step": 20428 }, { - "epoch": 0.5789056079798237, + "epoch": 0.7993191955552078, "grad_norm": 0.0, - "learning_rate": 7.945128251483704e-06, - "loss": 0.8878, + "learning_rate": 2.038937121465352e-06, + "loss": 1.0295, "step": 20429 }, { - "epoch": 0.5789339454220862, + "epoch": 0.7993583222474372, "grad_norm": 0.0, - "learning_rate": 7.944230056673162e-06, - "loss": 0.8599, + "learning_rate": 2.0381703118371445e-06, + "loss": 0.9624, "step": 20430 }, { - "epoch": 0.5789622828643487, + "epoch": 0.7993974489396667, "grad_norm": 0.0, - "learning_rate": 7.94333187917913e-06, - "loss": 0.895, + "learning_rate": 2.0374036300659504e-06, + "loss": 0.8995, "step": 20431 }, { - "epoch": 0.5789906203066111, + "epoch": 0.7994365756318961, "grad_norm": 0.0, - "learning_rate": 7.942433719009183e-06, - "loss": 0.7976, + "learning_rate": 2.036637076164074e-06, + "loss": 0.9481, "step": 20432 }, { - "epoch": 0.5790189577488736, + "epoch": 0.7994757023241256, "grad_norm": 0.0, - "learning_rate": 7.941535576170878e-06, - "loss": 0.9123, + "learning_rate": 2.0358706501438308e-06, + "loss": 1.0222, "step": 20433 }, { - "epoch": 0.5790472951911361, + "epoch": 0.799514829016355, "grad_norm": 0.0, - "learning_rate": 7.940637450671787e-06, - "loss": 0.8282, + "learning_rate": 2.0351043520175216e-06, + "loss": 0.9873, "step": 20434 }, { - "epoch": 0.5790756326333986, + "epoch": 0.7995539557085845, "grad_norm": 0.0, - "learning_rate": 7.939739342519468e-06, - "loss": 0.889, + "learning_rate": 2.0343381817974574e-06, + "loss": 0.935, "step": 20435 }, { - "epoch": 0.579103970075661, + "epoch": 0.7995930824008138, "grad_norm": 0.0, - "learning_rate": 7.938841251721488e-06, - "loss": 0.9103, + "learning_rate": 2.0335721394959396e-06, + "loss": 1.0057, "step": 20436 }, { - "epoch": 0.5791323075179234, + "epoch": 0.7996322090930432, "grad_norm": 0.0, - "learning_rate": 7.937943178285416e-06, - "loss": 0.8748, + "learning_rate": 2.0328062251252735e-06, + "loss": 0.9866, "step": 20437 }, { - "epoch": 0.5791606449601859, + "epoch": 0.7996713357852727, "grad_norm": 0.0, - "learning_rate": 7.937045122218813e-06, - "loss": 0.7664, + "learning_rate": 2.032040438697752e-06, + "loss": 0.8881, "step": 20438 }, { - "epoch": 0.5791889824024483, + "epoch": 0.7997104624775021, "grad_norm": 0.0, - "learning_rate": 7.936147083529245e-06, - "loss": 0.9009, + "learning_rate": 2.0312747802256783e-06, + "loss": 1.0736, "step": 20439 }, { - "epoch": 0.5792173198447108, + "epoch": 0.7997495891697316, "grad_norm": 0.0, - "learning_rate": 7.935249062224281e-06, - "loss": 0.9462, + "learning_rate": 2.0305092497213454e-06, + "loss": 1.0073, "step": 20440 }, { - "epoch": 0.5792456572869733, + "epoch": 0.799788715861961, "grad_norm": 0.0, - "learning_rate": 7.934351058311475e-06, - "loss": 0.8209, + "learning_rate": 2.029743847197051e-06, + "loss": 0.9534, "step": 20441 }, { - "epoch": 0.5792739947292357, + "epoch": 0.7998278425541905, "grad_norm": 0.0, - "learning_rate": 7.933453071798403e-06, - "loss": 0.8958, + "learning_rate": 2.0289785726650803e-06, + "loss": 0.8349, "step": 20442 }, { - "epoch": 0.5793023321714982, + "epoch": 0.7998669692464199, "grad_norm": 0.0, - "learning_rate": 7.932555102692619e-06, - "loss": 0.8041, + "learning_rate": 2.0282134261377273e-06, + "loss": 1.0621, "step": 20443 }, { - "epoch": 0.5793306696137607, + "epoch": 0.7999060959386494, "grad_norm": 0.0, - "learning_rate": 7.93165715100169e-06, - "loss": 0.997, + "learning_rate": 2.0274484076272726e-06, + "loss": 0.8835, "step": 20444 }, { - "epoch": 0.5793590070560232, + "epoch": 0.7999452226308787, "grad_norm": 0.0, - "learning_rate": 7.930759216733183e-06, - "loss": 0.9239, + "learning_rate": 2.026683517146012e-06, + "loss": 1.1021, "step": 20445 }, { - "epoch": 0.5793873444982856, + "epoch": 0.7999843493231082, "grad_norm": 0.0, - "learning_rate": 7.929861299894658e-06, - "loss": 0.8626, + "learning_rate": 2.0259187547062197e-06, + "loss": 0.9484, "step": 20446 }, { - "epoch": 0.579415681940548, + "epoch": 0.8000234760153376, "grad_norm": 0.0, - "learning_rate": 7.92896340049368e-06, - "loss": 0.8432, + "learning_rate": 2.025154120320184e-06, + "loss": 0.9421, "step": 20447 }, { - "epoch": 0.5794440193828105, + "epoch": 0.8000626027075671, "grad_norm": 0.0, - "learning_rate": 7.928065518537816e-06, - "loss": 0.8095, + "learning_rate": 2.024389614000174e-06, + "loss": 1.0435, "step": 20448 }, { - "epoch": 0.5794723568250729, + "epoch": 0.8001017293997965, "grad_norm": 0.0, - "learning_rate": 7.927167654034622e-06, - "loss": 0.8943, + "learning_rate": 2.0236252357584775e-06, + "loss": 0.892, "step": 20449 }, { - "epoch": 0.5795006942673354, + "epoch": 0.800140856092026, "grad_norm": 0.0, - "learning_rate": 7.926269806991666e-06, - "loss": 0.9465, + "learning_rate": 2.0228609856073633e-06, + "loss": 0.8545, "step": 20450 }, { - "epoch": 0.5795290317095979, + "epoch": 0.8001799827842554, "grad_norm": 0.0, - "learning_rate": 7.925371977416508e-06, - "loss": 0.9107, + "learning_rate": 2.0220968635591076e-06, + "loss": 1.0038, "step": 20451 }, { - "epoch": 0.5795573691518604, + "epoch": 0.8002191094764849, "grad_norm": 0.0, - "learning_rate": 7.924474165316712e-06, - "loss": 0.8152, + "learning_rate": 2.021332869625977e-06, + "loss": 0.9, "step": 20452 }, { - "epoch": 0.5795857065941228, + "epoch": 0.8002582361687143, "grad_norm": 0.0, - "learning_rate": 7.923576370699845e-06, - "loss": 0.8665, + "learning_rate": 2.020569003820242e-06, + "loss": 0.8598, "step": 20453 }, { - "epoch": 0.5796140440363853, + "epoch": 0.8002973628609438, "grad_norm": 0.0, - "learning_rate": 7.922678593573462e-06, - "loss": 0.8246, + "learning_rate": 2.019805266154171e-06, + "loss": 0.9473, "step": 20454 }, { - "epoch": 0.5796423814786478, + "epoch": 0.8003364895531732, "grad_norm": 0.0, - "learning_rate": 7.921780833945127e-06, - "loss": 0.8022, + "learning_rate": 2.0190416566400295e-06, + "loss": 1.0199, "step": 20455 }, { - "epoch": 0.5796707189209102, + "epoch": 0.8003756162454027, "grad_norm": 0.0, - "learning_rate": 7.92088309182241e-06, - "loss": 0.9211, + "learning_rate": 2.018278175290076e-06, + "loss": 1.0414, "step": 20456 }, { - "epoch": 0.5796990563631726, + "epoch": 0.800414742937632, "grad_norm": 0.0, - "learning_rate": 7.919985367212861e-06, - "loss": 0.797, + "learning_rate": 2.017514822116574e-06, + "loss": 1.0261, "step": 20457 }, { - "epoch": 0.5797273938054351, + "epoch": 0.8004538696298615, "grad_norm": 0.0, - "learning_rate": 7.91908766012405e-06, - "loss": 0.8889, + "learning_rate": 2.016751597131783e-06, + "loss": 0.8376, "step": 20458 }, { - "epoch": 0.5797557312476976, + "epoch": 0.8004929963220909, "grad_norm": 0.0, - "learning_rate": 7.918189970563534e-06, - "loss": 0.8575, + "learning_rate": 2.015988500347956e-06, + "loss": 0.964, "step": 20459 }, { - "epoch": 0.57978406868996, + "epoch": 0.8005321230143204, "grad_norm": 0.0, - "learning_rate": 7.917292298538877e-06, - "loss": 0.9062, + "learning_rate": 2.0152255317773486e-06, + "loss": 0.8993, "step": 20460 }, { - "epoch": 0.5798124061322225, + "epoch": 0.8005712497065498, "grad_norm": 0.0, - "learning_rate": 7.916394644057645e-06, - "loss": 0.8676, + "learning_rate": 2.014462691432216e-06, + "loss": 1.084, "step": 20461 }, { - "epoch": 0.579840743574485, + "epoch": 0.8006103763987793, "grad_norm": 0.0, - "learning_rate": 7.91549700712739e-06, - "loss": 0.8608, + "learning_rate": 2.013699979324805e-06, + "loss": 0.9799, "step": 20462 }, { - "epoch": 0.5798690810167474, + "epoch": 0.8006495030910087, "grad_norm": 0.0, - "learning_rate": 7.91459938775568e-06, - "loss": 0.7848, + "learning_rate": 2.0129373954673636e-06, + "loss": 0.9716, "step": 20463 }, { - "epoch": 0.5798974184590099, + "epoch": 0.8006886297832381, "grad_norm": 0.0, - "learning_rate": 7.913701785950072e-06, - "loss": 0.8786, + "learning_rate": 2.012174939872142e-06, + "loss": 0.8594, "step": 20464 }, { - "epoch": 0.5799257559012724, + "epoch": 0.8007277564754676, "grad_norm": 0.0, - "learning_rate": 7.912804201718129e-06, - "loss": 0.888, + "learning_rate": 2.011412612551379e-06, + "loss": 0.9013, "step": 20465 }, { - "epoch": 0.5799540933435348, + "epoch": 0.800766883167697, "grad_norm": 0.0, - "learning_rate": 7.911906635067414e-06, - "loss": 0.8607, + "learning_rate": 2.0106504135173187e-06, + "loss": 0.9752, "step": 20466 }, { - "epoch": 0.5799824307857973, + "epoch": 0.8008060098599264, "grad_norm": 0.0, - "learning_rate": 7.911009086005481e-06, - "loss": 0.8974, + "learning_rate": 2.0098883427822026e-06, + "loss": 0.8542, "step": 20467 }, { - "epoch": 0.5800107682280597, + "epoch": 0.8008451365521558, "grad_norm": 0.0, - "learning_rate": 7.910111554539895e-06, - "loss": 0.8812, + "learning_rate": 2.00912640035827e-06, + "loss": 0.9366, "step": 20468 }, { - "epoch": 0.5800391056703222, + "epoch": 0.8008842632443853, "grad_norm": 0.0, - "learning_rate": 7.90921404067822e-06, - "loss": 0.9213, + "learning_rate": 2.0083645862577515e-06, + "loss": 0.9405, "step": 20469 }, { - "epoch": 0.5800674431125846, + "epoch": 0.8009233899366147, "grad_norm": 0.0, - "learning_rate": 7.908316544428007e-06, - "loss": 0.8246, + "learning_rate": 2.0076029004928834e-06, + "loss": 1.0169, "step": 20470 }, { - "epoch": 0.5800957805548471, + "epoch": 0.8009625166288442, "grad_norm": 0.0, - "learning_rate": 7.907419065796822e-06, - "loss": 0.7867, + "learning_rate": 2.006841343075898e-06, + "loss": 0.9876, "step": 20471 }, { - "epoch": 0.5801241179971096, + "epoch": 0.8010016433210736, "grad_norm": 0.0, - "learning_rate": 7.906521604792221e-06, - "loss": 0.8316, + "learning_rate": 2.006079914019027e-06, + "loss": 0.9888, "step": 20472 }, { - "epoch": 0.580152455439372, + "epoch": 0.8010407700133031, "grad_norm": 0.0, - "learning_rate": 7.905624161421767e-06, - "loss": 0.8327, + "learning_rate": 2.0053186133344926e-06, + "loss": 1.0033, "step": 20473 }, { - "epoch": 0.5801807928816345, + "epoch": 0.8010798967055325, "grad_norm": 0.0, - "learning_rate": 7.904726735693021e-06, - "loss": 0.791, + "learning_rate": 2.004557441034527e-06, + "loss": 1.0313, "step": 20474 }, { - "epoch": 0.580209130323897, + "epoch": 0.801119023397762, "grad_norm": 0.0, - "learning_rate": 7.903829327613536e-06, - "loss": 0.9965, + "learning_rate": 2.0037963971313445e-06, + "loss": 0.9788, "step": 20475 }, { - "epoch": 0.5802374677661595, + "epoch": 0.8011581500899914, "grad_norm": 0.0, - "learning_rate": 7.902931937190877e-06, - "loss": 0.8877, + "learning_rate": 2.0030354816371767e-06, + "loss": 1.0532, "step": 20476 }, { - "epoch": 0.5802658052084219, + "epoch": 0.8011972767822209, "grad_norm": 0.0, - "learning_rate": 7.902034564432601e-06, - "loss": 0.7837, + "learning_rate": 2.002274694564236e-06, + "loss": 0.9368, "step": 20477 }, { - "epoch": 0.5802941426506844, + "epoch": 0.8012364034744502, "grad_norm": 0.0, - "learning_rate": 7.901137209346266e-06, - "loss": 0.8055, + "learning_rate": 2.0015140359247453e-06, + "loss": 0.8505, "step": 20478 }, { - "epoch": 0.5803224800929468, + "epoch": 0.8012755301666797, "grad_norm": 0.0, - "learning_rate": 7.900239871939435e-06, - "loss": 0.9064, + "learning_rate": 2.00075350573091e-06, + "loss": 0.9281, "step": 20479 }, { - "epoch": 0.5803508175352092, + "epoch": 0.8013146568589091, "grad_norm": 0.0, - "learning_rate": 7.89934255221966e-06, - "loss": 0.941, + "learning_rate": 1.9999931039949562e-06, + "loss": 0.9965, "step": 20480 }, { - "epoch": 0.5803791549774717, + "epoch": 0.8013537835511386, "grad_norm": 0.0, - "learning_rate": 7.8984452501945e-06, - "loss": 0.9108, + "learning_rate": 1.9992328307290854e-06, + "loss": 0.9865, "step": 20481 }, { - "epoch": 0.5804074924197342, + "epoch": 0.801392910243368, "grad_norm": 0.0, - "learning_rate": 7.897547965871521e-06, - "loss": 0.8509, + "learning_rate": 1.9984726859455127e-06, + "loss": 1.1104, "step": 20482 }, { - "epoch": 0.5804358298619967, + "epoch": 0.8014320369355975, "grad_norm": 0.0, - "learning_rate": 7.896650699258277e-06, - "loss": 0.9956, + "learning_rate": 1.9977126696564387e-06, + "loss": 0.946, "step": 20483 }, { - "epoch": 0.5804641673042591, + "epoch": 0.8014711636278269, "grad_norm": 0.0, - "learning_rate": 7.89575345036232e-06, - "loss": 0.9476, + "learning_rate": 1.996952781874073e-06, + "loss": 0.857, "step": 20484 }, { - "epoch": 0.5804925047465216, + "epoch": 0.8015102903200564, "grad_norm": 0.0, - "learning_rate": 7.894856219191218e-06, - "loss": 0.936, + "learning_rate": 1.9961930226106162e-06, + "loss": 0.8485, "step": 20485 }, { - "epoch": 0.5805208421887841, + "epoch": 0.8015494170122858, "grad_norm": 0.0, - "learning_rate": 7.89395900575252e-06, - "loss": 0.8944, + "learning_rate": 1.9954333918782733e-06, + "loss": 0.9907, "step": 20486 }, { - "epoch": 0.5805491796310465, + "epoch": 0.8015885437045153, "grad_norm": 0.0, - "learning_rate": 7.893061810053792e-06, - "loss": 0.8306, + "learning_rate": 1.994673889689237e-06, + "loss": 1.0327, "step": 20487 }, { - "epoch": 0.580577517073309, + "epoch": 0.8016276703967447, "grad_norm": 0.0, - "learning_rate": 7.89216463210258e-06, - "loss": 0.8639, + "learning_rate": 1.993914516055707e-06, + "loss": 0.9359, "step": 20488 }, { - "epoch": 0.5806058545155715, + "epoch": 0.8016667970889741, "grad_norm": 0.0, - "learning_rate": 7.891267471906453e-06, - "loss": 0.978, + "learning_rate": 1.9931552709898783e-06, + "loss": 0.9154, "step": 20489 }, { - "epoch": 0.5806341919578338, + "epoch": 0.8017059237812035, "grad_norm": 0.0, - "learning_rate": 7.890370329472963e-06, - "loss": 0.9354, + "learning_rate": 1.992396154503945e-06, + "loss": 0.9957, "step": 20490 }, { - "epoch": 0.5806625294000963, + "epoch": 0.801745050473433, "grad_norm": 0.0, - "learning_rate": 7.889473204809664e-06, - "loss": 0.9135, + "learning_rate": 1.991637166610092e-06, + "loss": 1.0771, "step": 20491 }, { - "epoch": 0.5806908668423588, + "epoch": 0.8017841771656624, "grad_norm": 0.0, - "learning_rate": 7.88857609792412e-06, - "loss": 0.8829, + "learning_rate": 1.990878307320514e-06, + "loss": 0.9572, "step": 20492 }, { - "epoch": 0.5807192042846213, + "epoch": 0.8018233038578918, "grad_norm": 0.0, - "learning_rate": 7.887679008823881e-06, - "loss": 0.8361, + "learning_rate": 1.9901195766473903e-06, + "loss": 0.9557, "step": 20493 }, { - "epoch": 0.5807475417268837, + "epoch": 0.8018624305501213, "grad_norm": 0.0, - "learning_rate": 7.886781937516505e-06, - "loss": 0.8053, + "learning_rate": 1.989360974602913e-06, + "loss": 0.85, "step": 20494 }, { - "epoch": 0.5807758791691462, + "epoch": 0.8019015572423507, "grad_norm": 0.0, - "learning_rate": 7.885884884009552e-06, - "loss": 0.8817, + "learning_rate": 1.988602501199258e-06, + "loss": 1.0695, "step": 20495 }, { - "epoch": 0.5808042166114087, + "epoch": 0.8019406839345802, "grad_norm": 0.0, - "learning_rate": 7.884987848310574e-06, - "loss": 0.9282, + "learning_rate": 1.987844156448612e-06, + "loss": 0.9218, "step": 20496 }, { - "epoch": 0.5808325540536711, + "epoch": 0.8019798106268096, "grad_norm": 0.0, - "learning_rate": 7.88409083042713e-06, - "loss": 0.9096, + "learning_rate": 1.987085940363145e-06, + "loss": 0.9183, "step": 20497 }, { - "epoch": 0.5808608914959336, + "epoch": 0.8020189373190391, "grad_norm": 0.0, - "learning_rate": 7.883193830366775e-06, - "loss": 0.8303, + "learning_rate": 1.986327852955038e-06, + "loss": 0.8896, "step": 20498 }, { - "epoch": 0.5808892289381961, + "epoch": 0.8020580640112684, "grad_norm": 0.0, - "learning_rate": 7.882296848137063e-06, - "loss": 0.9326, + "learning_rate": 1.985569894236463e-06, + "loss": 1.0536, "step": 20499 }, { - "epoch": 0.5809175663804585, + "epoch": 0.8020971907034979, "grad_norm": 0.0, - "learning_rate": 7.881399883745555e-06, - "loss": 0.853, + "learning_rate": 1.984812064219597e-06, + "loss": 1.0024, "step": 20500 }, { - "epoch": 0.5809459038227209, + "epoch": 0.8021363173957273, "grad_norm": 0.0, - "learning_rate": 7.880502937199798e-06, - "loss": 0.7246, + "learning_rate": 1.984054362916602e-06, + "loss": 0.9221, "step": 20501 }, { - "epoch": 0.5809742412649834, + "epoch": 0.8021754440879568, "grad_norm": 0.0, - "learning_rate": 7.879606008507351e-06, - "loss": 0.9113, + "learning_rate": 1.9832967903396493e-06, + "loss": 1.0348, "step": 20502 }, { - "epoch": 0.5810025787072459, + "epoch": 0.8022145707801862, "grad_norm": 0.0, - "learning_rate": 7.878709097675775e-06, - "loss": 0.9129, + "learning_rate": 1.9825393465009068e-06, + "loss": 0.9664, "step": 20503 }, { - "epoch": 0.5810309161495083, + "epoch": 0.8022536974724157, "grad_norm": 0.0, - "learning_rate": 7.877812204712614e-06, - "loss": 0.9511, + "learning_rate": 1.9817820314125346e-06, + "loss": 0.9221, "step": 20504 }, { - "epoch": 0.5810592535917708, + "epoch": 0.8022928241646451, "grad_norm": 0.0, - "learning_rate": 7.876915329625431e-06, - "loss": 0.783, + "learning_rate": 1.9810248450866955e-06, + "loss": 0.9367, "step": 20505 }, { - "epoch": 0.5810875910340333, + "epoch": 0.8023319508568746, "grad_norm": 0.0, - "learning_rate": 7.876018472421782e-06, - "loss": 0.8132, + "learning_rate": 1.980267787535548e-06, + "loss": 1.0228, "step": 20506 }, { - "epoch": 0.5811159284762958, + "epoch": 0.802371077549104, "grad_norm": 0.0, - "learning_rate": 7.875121633109214e-06, - "loss": 0.8586, + "learning_rate": 1.979510858771254e-06, + "loss": 0.9757, "step": 20507 }, { - "epoch": 0.5811442659185582, + "epoch": 0.8024102042413335, "grad_norm": 0.0, - "learning_rate": 7.874224811695287e-06, - "loss": 0.8546, + "learning_rate": 1.9787540588059616e-06, + "loss": 0.8896, "step": 20508 }, { - "epoch": 0.5811726033608207, + "epoch": 0.8024493309335629, "grad_norm": 0.0, - "learning_rate": 7.873328008187554e-06, - "loss": 0.8845, + "learning_rate": 1.977997387651832e-06, + "loss": 0.8568, "step": 20509 }, { - "epoch": 0.5812009408030832, + "epoch": 0.8024884576257924, "grad_norm": 0.0, - "learning_rate": 7.872431222593568e-06, - "loss": 0.9106, + "learning_rate": 1.977240845321009e-06, + "loss": 0.9477, "step": 20510 }, { - "epoch": 0.5812292782453455, + "epoch": 0.8025275843180217, "grad_norm": 0.0, - "learning_rate": 7.871534454920886e-06, - "loss": 0.9613, + "learning_rate": 1.976484431825645e-06, + "loss": 0.9849, "step": 20511 }, { - "epoch": 0.581257615687608, + "epoch": 0.8025667110102512, "grad_norm": 0.0, - "learning_rate": 7.870637705177058e-06, - "loss": 0.8496, + "learning_rate": 1.975728147177887e-06, + "loss": 0.9218, "step": 20512 }, { - "epoch": 0.5812859531298705, + "epoch": 0.8026058377024806, "grad_norm": 0.0, - "learning_rate": 7.869740973369639e-06, - "loss": 0.8271, + "learning_rate": 1.9749719913898824e-06, + "loss": 0.9543, "step": 20513 }, { - "epoch": 0.5813142905721329, + "epoch": 0.8026449643947101, "grad_norm": 0.0, - "learning_rate": 7.868844259506186e-06, - "loss": 0.8737, + "learning_rate": 1.9742159644737692e-06, + "loss": 0.8916, "step": 20514 }, { - "epoch": 0.5813426280143954, + "epoch": 0.8026840910869395, "grad_norm": 0.0, - "learning_rate": 7.867947563594246e-06, - "loss": 0.8714, + "learning_rate": 1.9734600664416904e-06, + "loss": 1.1271, "step": 20515 }, { - "epoch": 0.5813709654566579, + "epoch": 0.802723217779169, "grad_norm": 0.0, - "learning_rate": 7.867050885641376e-06, - "loss": 0.9069, + "learning_rate": 1.9727042973057852e-06, + "loss": 0.8914, "step": 20516 }, { - "epoch": 0.5813993028989204, + "epoch": 0.8027623444713984, "grad_norm": 0.0, - "learning_rate": 7.866154225655127e-06, - "loss": 0.74, + "learning_rate": 1.9719486570781933e-06, + "loss": 0.9905, "step": 20517 }, { - "epoch": 0.5814276403411828, + "epoch": 0.8028014711636279, "grad_norm": 0.0, - "learning_rate": 7.865257583643053e-06, - "loss": 0.7842, + "learning_rate": 1.971193145771043e-06, + "loss": 0.9965, "step": 20518 }, { - "epoch": 0.5814559777834453, + "epoch": 0.8028405978558573, "grad_norm": 0.0, - "learning_rate": 7.864360959612714e-06, - "loss": 0.7869, + "learning_rate": 1.9704377633964734e-06, + "loss": 0.9364, "step": 20519 }, { - "epoch": 0.5814843152257078, + "epoch": 0.8028797245480868, "grad_norm": 0.0, - "learning_rate": 7.863464353571649e-06, - "loss": 0.8112, + "learning_rate": 1.969682509966606e-06, + "loss": 1.0312, "step": 20520 }, { - "epoch": 0.5815126526679701, + "epoch": 0.8029188512403161, "grad_norm": 0.0, - "learning_rate": 7.862567765527418e-06, - "loss": 0.7986, + "learning_rate": 1.968927385493581e-06, + "loss": 0.9502, "step": 20521 }, { - "epoch": 0.5815409901102326, + "epoch": 0.8029579779325455, "grad_norm": 0.0, - "learning_rate": 7.861671195487573e-06, - "loss": 0.8907, + "learning_rate": 1.9681723899895142e-06, + "loss": 0.9964, "step": 20522 }, { - "epoch": 0.5815693275524951, + "epoch": 0.802997104624775, "grad_norm": 0.0, - "learning_rate": 7.860774643459664e-06, - "loss": 0.9337, + "learning_rate": 1.9674175234665395e-06, + "loss": 0.9772, "step": 20523 }, { - "epoch": 0.5815976649947576, + "epoch": 0.8030362313170044, "grad_norm": 0.0, - "learning_rate": 7.85987810945125e-06, - "loss": 0.8746, + "learning_rate": 1.966662785936767e-06, + "loss": 1.0243, "step": 20524 }, { - "epoch": 0.58162600243702, + "epoch": 0.8030753580092339, "grad_norm": 0.0, - "learning_rate": 7.858981593469872e-06, - "loss": 0.7758, + "learning_rate": 1.965908177412329e-06, + "loss": 0.8868, "step": 20525 }, { - "epoch": 0.5816543398792825, + "epoch": 0.8031144847014633, "grad_norm": 0.0, - "learning_rate": 7.858085095523088e-06, - "loss": 0.8881, + "learning_rate": 1.9651536979053367e-06, + "loss": 1.0096, "step": 20526 }, { - "epoch": 0.581682677321545, + "epoch": 0.8031536113936928, "grad_norm": 0.0, - "learning_rate": 7.857188615618452e-06, - "loss": 0.9058, + "learning_rate": 1.96439934742791e-06, + "loss": 0.9193, "step": 20527 }, { - "epoch": 0.5817110147638074, + "epoch": 0.8031927380859222, "grad_norm": 0.0, - "learning_rate": 7.856292153763508e-06, - "loss": 0.7858, + "learning_rate": 1.9636451259921553e-06, + "loss": 0.9793, "step": 20528 }, { - "epoch": 0.5817393522060699, + "epoch": 0.8032318647781517, "grad_norm": 0.0, - "learning_rate": 7.855395709965814e-06, - "loss": 0.8549, + "learning_rate": 1.9628910336101948e-06, + "loss": 0.9982, "step": 20529 }, { - "epoch": 0.5817676896483324, + "epoch": 0.8032709914703811, "grad_norm": 0.0, - "learning_rate": 7.854499284232915e-06, - "loss": 0.8862, + "learning_rate": 1.962137070294131e-06, + "loss": 0.9047, "step": 20530 }, { - "epoch": 0.5817960270905949, + "epoch": 0.8033101181626106, "grad_norm": 0.0, - "learning_rate": 7.853602876572367e-06, - "loss": 0.8077, + "learning_rate": 1.961383236056077e-06, + "loss": 0.9482, "step": 20531 }, { - "epoch": 0.5818243645328572, + "epoch": 0.8033492448548399, "grad_norm": 0.0, - "learning_rate": 7.852706486991722e-06, - "loss": 0.8447, + "learning_rate": 1.9606295309081312e-06, + "loss": 0.9471, "step": 20532 }, { - "epoch": 0.5818527019751197, + "epoch": 0.8033883715470694, "grad_norm": 0.0, - "learning_rate": 7.851810115498523e-06, - "loss": 0.9259, + "learning_rate": 1.9598759548624027e-06, + "loss": 1.0527, "step": 20533 }, { - "epoch": 0.5818810394173822, + "epoch": 0.8034274982392988, "grad_norm": 0.0, - "learning_rate": 7.850913762100325e-06, - "loss": 0.9838, + "learning_rate": 1.9591225079309905e-06, + "loss": 1.0269, "step": 20534 }, { - "epoch": 0.5819093768596446, + "epoch": 0.8034666249315283, "grad_norm": 0.0, - "learning_rate": 7.850017426804682e-06, - "loss": 0.944, + "learning_rate": 1.9583691901259983e-06, + "loss": 0.9694, "step": 20535 }, { - "epoch": 0.5819377143019071, + "epoch": 0.8035057516237577, "grad_norm": 0.0, - "learning_rate": 7.849121109619138e-06, - "loss": 0.7425, + "learning_rate": 1.957616001459517e-06, + "loss": 0.9451, "step": 20536 }, { - "epoch": 0.5819660517441696, + "epoch": 0.8035448783159872, "grad_norm": 0.0, - "learning_rate": 7.84822481055125e-06, - "loss": 0.8608, + "learning_rate": 1.956862941943646e-06, + "loss": 0.9979, "step": 20537 }, { - "epoch": 0.581994389186432, + "epoch": 0.8035840050082166, "grad_norm": 0.0, - "learning_rate": 7.847328529608558e-06, - "loss": 0.9205, + "learning_rate": 1.956110011590476e-06, + "loss": 0.928, "step": 20538 }, { - "epoch": 0.5820227266286945, + "epoch": 0.8036231317004461, "grad_norm": 0.0, - "learning_rate": 7.846432266798618e-06, - "loss": 0.7801, + "learning_rate": 1.9553572104121043e-06, + "loss": 0.9554, "step": 20539 }, { - "epoch": 0.582051064070957, + "epoch": 0.8036622583926755, "grad_norm": 0.0, - "learning_rate": 7.845536022128983e-06, - "loss": 0.9493, + "learning_rate": 1.954604538420611e-06, + "loss": 0.9655, "step": 20540 }, { - "epoch": 0.5820794015132195, + "epoch": 0.803701385084905, "grad_norm": 0.0, - "learning_rate": 7.844639795607195e-06, - "loss": 0.7402, + "learning_rate": 1.953851995628091e-06, + "loss": 0.9825, "step": 20541 }, { - "epoch": 0.5821077389554818, + "epoch": 0.8037405117771343, "grad_norm": 0.0, - "learning_rate": 7.843743587240804e-06, - "loss": 0.8894, + "learning_rate": 1.9530995820466223e-06, + "loss": 0.846, "step": 20542 }, { - "epoch": 0.5821360763977443, + "epoch": 0.8037796384693638, "grad_norm": 0.0, - "learning_rate": 7.842847397037366e-06, - "loss": 0.7059, + "learning_rate": 1.952347297688291e-06, + "loss": 0.9173, "step": 20543 }, { - "epoch": 0.5821644138400068, + "epoch": 0.8038187651615932, "grad_norm": 0.0, - "learning_rate": 7.84195122500442e-06, - "loss": 0.8869, + "learning_rate": 1.951595142565178e-06, + "loss": 0.9149, "step": 20544 }, { - "epoch": 0.5821927512822692, + "epoch": 0.8038578918538227, "grad_norm": 0.0, - "learning_rate": 7.841055071149526e-06, - "loss": 0.8615, + "learning_rate": 1.9508431166893647e-06, + "loss": 0.9327, "step": 20545 }, { - "epoch": 0.5822210887245317, + "epoch": 0.8038970185460521, "grad_norm": 0.0, - "learning_rate": 7.840158935480224e-06, - "loss": 0.9582, + "learning_rate": 1.9500912200729216e-06, + "loss": 0.9206, "step": 20546 }, { - "epoch": 0.5822494261667942, + "epoch": 0.8039361452382816, "grad_norm": 0.0, - "learning_rate": 7.839262818004065e-06, - "loss": 0.8779, + "learning_rate": 1.9493394527279262e-06, + "loss": 0.8708, "step": 20547 }, { - "epoch": 0.5822777636090567, + "epoch": 0.803975271930511, "grad_norm": 0.0, - "learning_rate": 7.838366718728599e-06, - "loss": 0.8409, + "learning_rate": 1.948587814666455e-06, + "loss": 1.0051, "step": 20548 }, { - "epoch": 0.5823061010513191, + "epoch": 0.8040143986227404, "grad_norm": 0.0, - "learning_rate": 7.83747063766137e-06, - "loss": 0.8456, + "learning_rate": 1.947836305900571e-06, + "loss": 1.0302, "step": 20549 }, { - "epoch": 0.5823344384935816, + "epoch": 0.8040535253149699, "grad_norm": 0.0, - "learning_rate": 7.836574574809935e-06, - "loss": 0.8928, + "learning_rate": 1.9470849264423466e-06, + "loss": 1.0187, "step": 20550 }, { - "epoch": 0.5823627759358441, + "epoch": 0.8040926520071993, "grad_norm": 0.0, - "learning_rate": 7.83567853018183e-06, - "loss": 0.9651, + "learning_rate": 1.9463336763038465e-06, + "loss": 0.9735, "step": 20551 }, { - "epoch": 0.5823911133781065, + "epoch": 0.8041317786994288, "grad_norm": 0.0, - "learning_rate": 7.83478250378461e-06, - "loss": 0.8662, + "learning_rate": 1.9455825554971384e-06, + "loss": 0.8341, "step": 20552 }, { - "epoch": 0.582419450820369, + "epoch": 0.8041709053916581, "grad_norm": 0.0, - "learning_rate": 7.833886495625825e-06, - "loss": 0.8968, + "learning_rate": 1.9448315640342796e-06, + "loss": 0.9366, "step": 20553 }, { - "epoch": 0.5824477882626314, + "epoch": 0.8042100320838876, "grad_norm": 0.0, - "learning_rate": 7.832990505713012e-06, - "loss": 0.8134, + "learning_rate": 1.9440807019273346e-06, + "loss": 0.9772, "step": 20554 }, { - "epoch": 0.5824761257048938, + "epoch": 0.804249158776117, "grad_norm": 0.0, - "learning_rate": 7.832094534053725e-06, - "loss": 0.8573, + "learning_rate": 1.9433299691883546e-06, + "loss": 0.8969, "step": 20555 }, { - "epoch": 0.5825044631471563, + "epoch": 0.8042882854683465, "grad_norm": 0.0, - "learning_rate": 7.831198580655515e-06, - "loss": 0.8659, + "learning_rate": 1.9425793658294035e-06, + "loss": 0.8783, "step": 20556 }, { - "epoch": 0.5825328005894188, + "epoch": 0.8043274121605759, "grad_norm": 0.0, - "learning_rate": 7.83030264552592e-06, - "loss": 0.7722, + "learning_rate": 1.9418288918625295e-06, + "loss": 1.1025, "step": 20557 }, { - "epoch": 0.5825611380316813, + "epoch": 0.8043665388528054, "grad_norm": 0.0, - "learning_rate": 7.829406728672498e-06, - "loss": 0.9091, + "learning_rate": 1.9410785472997884e-06, + "loss": 1.097, "step": 20558 }, { - "epoch": 0.5825894754739437, + "epoch": 0.8044056655450348, "grad_norm": 0.0, - "learning_rate": 7.828510830102785e-06, - "loss": 0.8528, + "learning_rate": 1.940328332153225e-06, + "loss": 0.897, "step": 20559 }, { - "epoch": 0.5826178129162062, + "epoch": 0.8044447922372643, "grad_norm": 0.0, - "learning_rate": 7.82761494982433e-06, - "loss": 0.7479, + "learning_rate": 1.939578246434889e-06, + "loss": 0.9965, "step": 20560 }, { - "epoch": 0.5826461503584687, + "epoch": 0.8044839189294937, "grad_norm": 0.0, - "learning_rate": 7.826719087844684e-06, - "loss": 0.8, + "learning_rate": 1.9388282901568268e-06, + "loss": 1.0581, "step": 20561 }, { - "epoch": 0.5826744878007311, + "epoch": 0.8045230456217232, "grad_norm": 0.0, - "learning_rate": 7.82582324417139e-06, - "loss": 0.8876, + "learning_rate": 1.9380784633310823e-06, + "loss": 0.8005, "step": 20562 }, { - "epoch": 0.5827028252429935, + "epoch": 0.8045621723139526, "grad_norm": 0.0, - "learning_rate": 7.82492741881199e-06, - "loss": 0.8642, + "learning_rate": 1.9373287659696936e-06, + "loss": 0.9939, "step": 20563 }, { - "epoch": 0.582731162685256, + "epoch": 0.804601299006182, "grad_norm": 0.0, - "learning_rate": 7.824031611774042e-06, - "loss": 0.8565, + "learning_rate": 1.936579198084703e-06, + "loss": 0.889, "step": 20564 }, { - "epoch": 0.5827595001275185, + "epoch": 0.8046404256984114, "grad_norm": 0.0, - "learning_rate": 7.823135823065076e-06, - "loss": 0.9332, + "learning_rate": 1.9358297596881446e-06, + "loss": 0.8496, "step": 20565 }, { - "epoch": 0.5827878375697809, + "epoch": 0.8046795523906409, "grad_norm": 0.0, - "learning_rate": 7.822240052692653e-06, - "loss": 0.8412, + "learning_rate": 1.9350804507920583e-06, + "loss": 0.8732, "step": 20566 }, { - "epoch": 0.5828161750120434, + "epoch": 0.8047186790828703, "grad_norm": 0.0, - "learning_rate": 7.821344300664304e-06, - "loss": 0.8719, + "learning_rate": 1.9343312714084718e-06, + "loss": 0.8934, "step": 20567 }, { - "epoch": 0.5828445124543059, + "epoch": 0.8047578057750998, "grad_norm": 0.0, - "learning_rate": 7.820448566987582e-06, - "loss": 0.9824, + "learning_rate": 1.9335822215494213e-06, + "loss": 0.904, "step": 20568 }, { - "epoch": 0.5828728498965683, + "epoch": 0.8047969324673292, "grad_norm": 0.0, - "learning_rate": 7.819552851670033e-06, - "loss": 0.8185, + "learning_rate": 1.9328333012269264e-06, + "loss": 0.8716, "step": 20569 }, { - "epoch": 0.5829011873388308, + "epoch": 0.8048360591595587, "grad_norm": 0.0, - "learning_rate": 7.818657154719198e-06, - "loss": 0.9125, + "learning_rate": 1.9320845104530263e-06, + "loss": 1.0652, "step": 20570 }, { - "epoch": 0.5829295247810933, + "epoch": 0.8048751858517881, "grad_norm": 0.0, - "learning_rate": 7.817761476142629e-06, - "loss": 0.8997, + "learning_rate": 1.931335849239736e-06, + "loss": 0.9248, "step": 20571 }, { - "epoch": 0.5829578622233558, + "epoch": 0.8049143125440176, "grad_norm": 0.0, - "learning_rate": 7.81686581594786e-06, - "loss": 0.8698, + "learning_rate": 1.930587317599084e-06, + "loss": 1.0361, "step": 20572 }, { - "epoch": 0.5829861996656182, + "epoch": 0.804953439236247, "grad_norm": 0.0, - "learning_rate": 7.815970174142441e-06, - "loss": 0.8897, + "learning_rate": 1.929838915543083e-06, + "loss": 0.96, "step": 20573 }, { - "epoch": 0.5830145371078806, + "epoch": 0.8049925659284765, "grad_norm": 0.0, - "learning_rate": 7.815074550733919e-06, - "loss": 0.9209, + "learning_rate": 1.929090643083761e-06, + "loss": 0.9161, "step": 20574 }, { - "epoch": 0.5830428745501431, + "epoch": 0.8050316926207058, "grad_norm": 0.0, - "learning_rate": 7.814178945729833e-06, - "loss": 0.9683, + "learning_rate": 1.928342500233128e-06, + "loss": 1.0261, "step": 20575 }, { - "epoch": 0.5830712119924055, + "epoch": 0.8050708193129353, "grad_norm": 0.0, - "learning_rate": 7.813283359137728e-06, - "loss": 0.8767, + "learning_rate": 1.9275944870032026e-06, + "loss": 0.9463, "step": 20576 }, { - "epoch": 0.583099549434668, + "epoch": 0.8051099460051647, "grad_norm": 0.0, - "learning_rate": 7.812387790965156e-06, - "loss": 0.8509, + "learning_rate": 1.926846603405992e-06, + "loss": 1.0688, "step": 20577 }, { - "epoch": 0.5831278868769305, + "epoch": 0.8051490726973941, "grad_norm": 0.0, - "learning_rate": 7.811492241219648e-06, - "loss": 0.9914, + "learning_rate": 1.9260988494535082e-06, + "loss": 1.0914, "step": 20578 }, { - "epoch": 0.5831562243191929, + "epoch": 0.8051881993896236, "grad_norm": 0.0, - "learning_rate": 7.810596709908759e-06, - "loss": 0.7637, + "learning_rate": 1.92535122515776e-06, + "loss": 0.8031, "step": 20579 }, { - "epoch": 0.5831845617614554, + "epoch": 0.805227326081853, "grad_norm": 0.0, - "learning_rate": 7.809701197040021e-06, - "loss": 0.9434, + "learning_rate": 1.9246037305307563e-06, + "loss": 0.9137, "step": 20580 }, { - "epoch": 0.5832128992037179, + "epoch": 0.8052664527740825, "grad_norm": 0.0, - "learning_rate": 7.808805702620985e-06, - "loss": 0.8976, + "learning_rate": 1.9238563655844946e-06, + "loss": 0.9082, "step": 20581 }, { - "epoch": 0.5832412366459804, + "epoch": 0.8053055794663119, "grad_norm": 0.0, - "learning_rate": 7.807910226659194e-06, - "loss": 0.8109, + "learning_rate": 1.923109130330979e-06, + "loss": 0.8723, "step": 20582 }, { - "epoch": 0.5832695740882428, + "epoch": 0.8053447061585414, "grad_norm": 0.0, - "learning_rate": 7.807014769162186e-06, - "loss": 0.9412, + "learning_rate": 1.9223620247822107e-06, + "loss": 0.9073, "step": 20583 }, { - "epoch": 0.5832979115305053, + "epoch": 0.8053838328507708, "grad_norm": 0.0, - "learning_rate": 7.806119330137507e-06, - "loss": 0.7774, + "learning_rate": 1.9216150489501883e-06, + "loss": 0.9576, "step": 20584 }, { - "epoch": 0.5833262489727677, + "epoch": 0.8054229595430002, "grad_norm": 0.0, - "learning_rate": 7.805223909592706e-06, - "loss": 0.771, + "learning_rate": 1.920868202846904e-06, + "loss": 1.1031, "step": 20585 }, { - "epoch": 0.5833545864150301, + "epoch": 0.8054620862352296, "grad_norm": 0.0, - "learning_rate": 7.804328507535312e-06, - "loss": 0.8575, + "learning_rate": 1.920121486484352e-06, + "loss": 1.007, "step": 20586 }, { - "epoch": 0.5833829238572926, + "epoch": 0.8055012129274591, "grad_norm": 0.0, - "learning_rate": 7.803433123972878e-06, - "loss": 0.8557, + "learning_rate": 1.9193748998745267e-06, + "loss": 0.937, "step": 20587 }, { - "epoch": 0.5834112612995551, + "epoch": 0.8055403396196885, "grad_norm": 0.0, - "learning_rate": 7.80253775891294e-06, - "loss": 0.8726, + "learning_rate": 1.918628443029412e-06, + "loss": 0.969, "step": 20588 }, { - "epoch": 0.5834395987418176, + "epoch": 0.805579466311918, "grad_norm": 0.0, - "learning_rate": 7.801642412363042e-06, - "loss": 0.8792, + "learning_rate": 1.917882115960998e-06, + "loss": 0.8871, "step": 20589 }, { - "epoch": 0.58346793618408, + "epoch": 0.8056185930041474, "grad_norm": 0.0, - "learning_rate": 7.80074708433073e-06, - "loss": 0.7443, + "learning_rate": 1.917135918681273e-06, + "loss": 0.9571, "step": 20590 }, { - "epoch": 0.5834962736263425, + "epoch": 0.8056577196963769, "grad_norm": 0.0, - "learning_rate": 7.79985177482354e-06, - "loss": 0.8292, + "learning_rate": 1.916389851202214e-06, + "loss": 0.938, "step": 20591 }, { - "epoch": 0.583524611068605, + "epoch": 0.8056968463886063, "grad_norm": 0.0, - "learning_rate": 7.798956483849013e-06, - "loss": 0.788, + "learning_rate": 1.915643913535805e-06, + "loss": 1.0076, "step": 20592 }, { - "epoch": 0.5835529485108674, + "epoch": 0.8057359730808358, "grad_norm": 0.0, - "learning_rate": 7.798061211414696e-06, - "loss": 0.9008, + "learning_rate": 1.9148981056940265e-06, + "loss": 1.0061, "step": 20593 }, { - "epoch": 0.5835812859531299, + "epoch": 0.8057750997730652, "grad_norm": 0.0, - "learning_rate": 7.797165957528127e-06, - "loss": 0.9216, + "learning_rate": 1.9141524276888514e-06, + "loss": 1.0704, "step": 20594 }, { - "epoch": 0.5836096233953924, + "epoch": 0.8058142264652947, "grad_norm": 0.0, - "learning_rate": 7.796270722196848e-06, - "loss": 0.985, + "learning_rate": 1.9134068795322546e-06, + "loss": 0.9281, "step": 20595 }, { - "epoch": 0.5836379608376548, + "epoch": 0.805853353157524, "grad_norm": 0.0, - "learning_rate": 7.795375505428398e-06, - "loss": 0.7849, + "learning_rate": 1.9126614612362114e-06, + "loss": 0.9377, "step": 20596 }, { - "epoch": 0.5836662982799172, + "epoch": 0.8058924798497535, "grad_norm": 0.0, - "learning_rate": 7.794480307230317e-06, - "loss": 0.7839, + "learning_rate": 1.9119161728126935e-06, + "loss": 0.9803, "step": 20597 }, { - "epoch": 0.5836946357221797, + "epoch": 0.8059316065419829, "grad_norm": 0.0, - "learning_rate": 7.79358512761015e-06, - "loss": 0.9059, + "learning_rate": 1.911171014273665e-06, + "loss": 1.0045, "step": 20598 }, { - "epoch": 0.5837229731644422, + "epoch": 0.8059707332342124, "grad_norm": 0.0, - "learning_rate": 7.792689966575433e-06, - "loss": 0.9353, + "learning_rate": 1.910425985631096e-06, + "loss": 1.0026, "step": 20599 }, { - "epoch": 0.5837513106067046, + "epoch": 0.8060098599264418, "grad_norm": 0.0, - "learning_rate": 7.791794824133709e-06, - "loss": 0.8885, + "learning_rate": 1.909681086896944e-06, + "loss": 0.9692, "step": 20600 }, { - "epoch": 0.5837796480489671, + "epoch": 0.8060489866186713, "grad_norm": 0.0, - "learning_rate": 7.790899700292516e-06, - "loss": 0.8879, + "learning_rate": 1.9089363180831798e-06, + "loss": 0.9593, "step": 20601 }, { - "epoch": 0.5838079854912296, + "epoch": 0.8060881133109007, "grad_norm": 0.0, - "learning_rate": 7.790004595059395e-06, - "loss": 0.9424, + "learning_rate": 1.9081916792017584e-06, + "loss": 0.9698, "step": 20602 }, { - "epoch": 0.583836322933492, + "epoch": 0.8061272400031302, "grad_norm": 0.0, - "learning_rate": 7.78910950844189e-06, - "loss": 0.8981, + "learning_rate": 1.9074471702646423e-06, + "loss": 0.9724, "step": 20603 }, { - "epoch": 0.5838646603757545, + "epoch": 0.8061663666953596, "grad_norm": 0.0, - "learning_rate": 7.788214440447532e-06, - "loss": 0.9129, + "learning_rate": 1.9067027912837776e-06, + "loss": 0.8496, "step": 20604 }, { - "epoch": 0.583892997818017, + "epoch": 0.8062054933875891, "grad_norm": 0.0, - "learning_rate": 7.787319391083864e-06, - "loss": 0.9407, + "learning_rate": 1.9059585422711302e-06, + "loss": 0.9364, "step": 20605 }, { - "epoch": 0.5839213352602795, + "epoch": 0.8062446200798185, "grad_norm": 0.0, - "learning_rate": 7.78642436035843e-06, - "loss": 0.985, + "learning_rate": 1.9052144232386438e-06, + "loss": 0.9882, "step": 20606 }, { - "epoch": 0.5839496727025418, + "epoch": 0.8062837467720478, "grad_norm": 0.0, - "learning_rate": 7.785529348278765e-06, - "loss": 0.873, + "learning_rate": 1.9044704341982733e-06, + "loss": 1.0148, "step": 20607 }, { - "epoch": 0.5839780101448043, + "epoch": 0.8063228734642773, "grad_norm": 0.0, - "learning_rate": 7.784634354852411e-06, - "loss": 0.877, + "learning_rate": 1.9037265751619606e-06, + "loss": 0.9106, "step": 20608 }, { - "epoch": 0.5840063475870668, + "epoch": 0.8063620001565067, "grad_norm": 0.0, - "learning_rate": 7.7837393800869e-06, - "loss": 0.835, + "learning_rate": 1.9029828461416532e-06, + "loss": 0.9482, "step": 20609 }, { - "epoch": 0.5840346850293292, + "epoch": 0.8064011268487362, "grad_norm": 0.0, - "learning_rate": 7.782844423989777e-06, - "loss": 0.8256, + "learning_rate": 1.9022392471492956e-06, + "loss": 0.9154, "step": 20610 }, { - "epoch": 0.5840630224715917, + "epoch": 0.8064402535409656, "grad_norm": 0.0, - "learning_rate": 7.781949486568581e-06, - "loss": 0.8307, + "learning_rate": 1.9014957781968313e-06, + "loss": 0.954, "step": 20611 }, { - "epoch": 0.5840913599138542, + "epoch": 0.8064793802331951, "grad_norm": 0.0, - "learning_rate": 7.781054567830845e-06, - "loss": 0.9453, + "learning_rate": 1.9007524392961941e-06, + "loss": 0.9612, "step": 20612 }, { - "epoch": 0.5841196973561167, + "epoch": 0.8065185069254245, "grad_norm": 0.0, - "learning_rate": 7.78015966778411e-06, - "loss": 0.8368, + "learning_rate": 1.9000092304593242e-06, + "loss": 1.076, "step": 20613 }, { - "epoch": 0.5841480347983791, + "epoch": 0.806557633617654, "grad_norm": 0.0, - "learning_rate": 7.779264786435916e-06, - "loss": 0.8247, + "learning_rate": 1.8992661516981558e-06, + "loss": 0.9681, "step": 20614 }, { - "epoch": 0.5841763722406416, + "epoch": 0.8065967603098834, "grad_norm": 0.0, - "learning_rate": 7.778369923793799e-06, - "loss": 0.757, + "learning_rate": 1.8985232030246248e-06, + "loss": 0.9197, "step": 20615 }, { - "epoch": 0.584204709682904, + "epoch": 0.8066358870021129, "grad_norm": 0.0, - "learning_rate": 7.777475079865298e-06, - "loss": 0.9014, + "learning_rate": 1.897780384450657e-06, + "loss": 0.8984, "step": 20616 }, { - "epoch": 0.5842330471251664, + "epoch": 0.8066750136943422, "grad_norm": 0.0, - "learning_rate": 7.776580254657948e-06, - "loss": 0.7402, + "learning_rate": 1.8970376959881864e-06, + "loss": 0.8744, "step": 20617 }, { - "epoch": 0.5842613845674289, + "epoch": 0.8067141403865717, "grad_norm": 0.0, - "learning_rate": 7.775685448179288e-06, - "loss": 0.7643, + "learning_rate": 1.896295137649131e-06, + "loss": 0.9377, "step": 20618 }, { - "epoch": 0.5842897220096914, + "epoch": 0.8067532670788011, "grad_norm": 0.0, - "learning_rate": 7.774790660436857e-06, - "loss": 0.8171, + "learning_rate": 1.8955527094454262e-06, + "loss": 0.8611, "step": 20619 }, { - "epoch": 0.5843180594519539, + "epoch": 0.8067923937710306, "grad_norm": 0.0, - "learning_rate": 7.773895891438189e-06, - "loss": 0.8989, + "learning_rate": 1.8948104113889876e-06, + "loss": 0.8814, "step": 20620 }, { - "epoch": 0.5843463968942163, + "epoch": 0.80683152046326, "grad_norm": 0.0, - "learning_rate": 7.773001141190822e-06, - "loss": 0.7724, + "learning_rate": 1.8940682434917402e-06, + "loss": 0.8741, "step": 20621 }, { - "epoch": 0.5843747343364788, + "epoch": 0.8068706471554895, "grad_norm": 0.0, - "learning_rate": 7.772106409702297e-06, - "loss": 0.8551, + "learning_rate": 1.8933262057655933e-06, + "loss": 0.8809, "step": 20622 }, { - "epoch": 0.5844030717787413, + "epoch": 0.8069097738477189, "grad_norm": 0.0, - "learning_rate": 7.771211696980145e-06, - "loss": 0.8902, + "learning_rate": 1.8925842982224752e-06, + "loss": 0.9486, "step": 20623 }, { - "epoch": 0.5844314092210037, + "epoch": 0.8069489005399484, "grad_norm": 0.0, - "learning_rate": 7.770317003031908e-06, - "loss": 0.9498, + "learning_rate": 1.8918425208742919e-06, + "loss": 0.8744, "step": 20624 }, { - "epoch": 0.5844597466632662, + "epoch": 0.8069880272321778, "grad_norm": 0.0, - "learning_rate": 7.769422327865113e-06, - "loss": 0.8647, + "learning_rate": 1.8911008737329595e-06, + "loss": 0.9062, "step": 20625 }, { - "epoch": 0.5844880841055287, + "epoch": 0.8070271539244073, "grad_norm": 0.0, - "learning_rate": 7.768527671487304e-06, - "loss": 0.8533, + "learning_rate": 1.890359356810384e-06, + "loss": 0.872, "step": 20626 }, { - "epoch": 0.584516421547791, + "epoch": 0.8070662806166367, "grad_norm": 0.0, - "learning_rate": 7.767633033906016e-06, - "loss": 0.7842, + "learning_rate": 1.8896179701184748e-06, + "loss": 0.8861, "step": 20627 }, { - "epoch": 0.5845447589900535, + "epoch": 0.8071054073088662, "grad_norm": 0.0, - "learning_rate": 7.766738415128781e-06, - "loss": 0.8055, + "learning_rate": 1.8888767136691378e-06, + "loss": 0.8708, "step": 20628 }, { - "epoch": 0.584573096432316, + "epoch": 0.8071445340010955, "grad_norm": 0.0, - "learning_rate": 7.765843815163143e-06, - "loss": 0.8388, + "learning_rate": 1.8881355874742802e-06, + "loss": 0.9338, "step": 20629 }, { - "epoch": 0.5846014338745785, + "epoch": 0.807183660693325, "grad_norm": 0.0, - "learning_rate": 7.76494923401663e-06, - "loss": 0.7443, + "learning_rate": 1.887394591545798e-06, + "loss": 1.019, "step": 20630 }, { - "epoch": 0.5846297713168409, + "epoch": 0.8072227873855544, "grad_norm": 0.0, - "learning_rate": 7.764054671696776e-06, - "loss": 0.7879, + "learning_rate": 1.886653725895592e-06, + "loss": 0.9605, "step": 20631 }, { - "epoch": 0.5846581087591034, + "epoch": 0.8072619140777839, "grad_norm": 0.0, - "learning_rate": 7.763160128211123e-06, - "loss": 0.8762, + "learning_rate": 1.8859129905355645e-06, + "loss": 0.9266, "step": 20632 }, { - "epoch": 0.5846864462013659, + "epoch": 0.8073010407700133, "grad_norm": 0.0, - "learning_rate": 7.762265603567202e-06, - "loss": 0.7718, + "learning_rate": 1.8851723854776028e-06, + "loss": 0.961, "step": 20633 }, { - "epoch": 0.5847147836436283, + "epoch": 0.8073401674622428, "grad_norm": 0.0, - "learning_rate": 7.761371097772548e-06, - "loss": 0.8451, + "learning_rate": 1.8844319107336051e-06, + "loss": 0.833, "step": 20634 }, { - "epoch": 0.5847431210858908, + "epoch": 0.8073792941544722, "grad_norm": 0.0, - "learning_rate": 7.7604766108347e-06, - "loss": 0.8581, + "learning_rate": 1.8836915663154643e-06, + "loss": 0.9471, "step": 20635 }, { - "epoch": 0.5847714585281533, + "epoch": 0.8074184208467016, "grad_norm": 0.0, - "learning_rate": 7.759582142761186e-06, - "loss": 0.8817, + "learning_rate": 1.8829513522350628e-06, + "loss": 1.0576, "step": 20636 }, { - "epoch": 0.5847997959704158, + "epoch": 0.8074575475389311, "grad_norm": 0.0, - "learning_rate": 7.758687693559547e-06, - "loss": 0.9878, + "learning_rate": 1.8822112685042927e-06, + "loss": 1.0312, "step": 20637 }, { - "epoch": 0.5848281334126781, + "epoch": 0.8074966742311604, "grad_norm": 0.0, - "learning_rate": 7.75779326323731e-06, - "loss": 0.7733, + "learning_rate": 1.8814713151350373e-06, + "loss": 1.0395, "step": 20638 }, { - "epoch": 0.5848564708549406, + "epoch": 0.80753580092339, "grad_norm": 0.0, - "learning_rate": 7.756898851802014e-06, - "loss": 0.8047, + "learning_rate": 1.8807314921391816e-06, + "loss": 0.9293, "step": 20639 }, { - "epoch": 0.5848848082972031, + "epoch": 0.8075749276156193, "grad_norm": 0.0, - "learning_rate": 7.756004459261192e-06, - "loss": 0.7798, + "learning_rate": 1.879991799528601e-06, + "loss": 0.944, "step": 20640 }, { - "epoch": 0.5849131457394655, + "epoch": 0.8076140543078488, "grad_norm": 0.0, - "learning_rate": 7.755110085622377e-06, - "loss": 0.918, + "learning_rate": 1.8792522373151778e-06, + "loss": 0.8898, "step": 20641 }, { - "epoch": 0.584941483181728, + "epoch": 0.8076531810000782, "grad_norm": 0.0, - "learning_rate": 7.754215730893103e-06, - "loss": 0.8379, + "learning_rate": 1.8785128055107904e-06, + "loss": 1.0295, "step": 20642 }, { - "epoch": 0.5849698206239905, + "epoch": 0.8076923076923077, "grad_norm": 0.0, - "learning_rate": 7.753321395080907e-06, - "loss": 0.9251, + "learning_rate": 1.8777735041273083e-06, + "loss": 0.9845, "step": 20643 }, { - "epoch": 0.584998158066253, + "epoch": 0.8077314343845371, "grad_norm": 0.0, - "learning_rate": 7.752427078193316e-06, - "loss": 0.8275, + "learning_rate": 1.877034333176606e-06, + "loss": 0.907, "step": 20644 }, { - "epoch": 0.5850264955085154, + "epoch": 0.8077705610767666, "grad_norm": 0.0, - "learning_rate": 7.75153278023787e-06, - "loss": 0.8497, + "learning_rate": 1.8762952926705536e-06, + "loss": 0.9017, "step": 20645 }, { - "epoch": 0.5850548329507779, + "epoch": 0.807809687768996, "grad_norm": 0.0, - "learning_rate": 7.750638501222093e-06, - "loss": 0.7917, + "learning_rate": 1.875556382621021e-06, + "loss": 0.9193, "step": 20646 }, { - "epoch": 0.5850831703930404, + "epoch": 0.8078488144612255, "grad_norm": 0.0, - "learning_rate": 7.749744241153524e-06, - "loss": 1.0211, + "learning_rate": 1.87481760303987e-06, + "loss": 0.8822, "step": 20647 }, { - "epoch": 0.5851115078353027, + "epoch": 0.8078879411534549, "grad_norm": 0.0, - "learning_rate": 7.748850000039702e-06, - "loss": 0.8651, + "learning_rate": 1.8740789539389703e-06, + "loss": 0.9897, "step": 20648 }, { - "epoch": 0.5851398452775652, + "epoch": 0.8079270678456844, "grad_norm": 0.0, - "learning_rate": 7.747955777888145e-06, - "loss": 0.8203, + "learning_rate": 1.8733404353301742e-06, + "loss": 0.8921, "step": 20649 }, { - "epoch": 0.5851681827198277, + "epoch": 0.8079661945379137, "grad_norm": 0.0, - "learning_rate": 7.747061574706394e-06, - "loss": 0.9174, + "learning_rate": 1.8726020472253537e-06, + "loss": 1.0068, "step": 20650 }, { - "epoch": 0.5851965201620901, + "epoch": 0.8080053212301432, "grad_norm": 0.0, - "learning_rate": 7.746167390501984e-06, - "loss": 0.9349, + "learning_rate": 1.8718637896363567e-06, + "loss": 1.0214, "step": 20651 }, { - "epoch": 0.5852248576043526, + "epoch": 0.8080444479223726, "grad_norm": 0.0, - "learning_rate": 7.745273225282439e-06, - "loss": 0.7849, + "learning_rate": 1.871125662575045e-06, + "loss": 0.9712, "step": 20652 }, { - "epoch": 0.5852531950466151, + "epoch": 0.8080835746146021, "grad_norm": 0.0, - "learning_rate": 7.744379079055297e-06, - "loss": 0.9429, + "learning_rate": 1.8703876660532638e-06, + "loss": 0.8577, "step": 20653 }, { - "epoch": 0.5852815324888776, + "epoch": 0.8081227013068315, "grad_norm": 0.0, - "learning_rate": 7.743484951828085e-06, - "loss": 0.8461, + "learning_rate": 1.8696498000828744e-06, + "loss": 0.9987, "step": 20654 }, { - "epoch": 0.58530986993114, + "epoch": 0.808161827999061, "grad_norm": 0.0, - "learning_rate": 7.742590843608337e-06, - "loss": 0.8692, + "learning_rate": 1.8689120646757196e-06, + "loss": 0.9833, "step": 20655 }, { - "epoch": 0.5853382073734025, + "epoch": 0.8082009546912904, "grad_norm": 0.0, - "learning_rate": 7.74169675440359e-06, - "loss": 0.9087, + "learning_rate": 1.8681744598436503e-06, + "loss": 0.9413, "step": 20656 }, { - "epoch": 0.585366544815665, + "epoch": 0.8082400813835199, "grad_norm": 0.0, - "learning_rate": 7.740802684221364e-06, - "loss": 0.8465, + "learning_rate": 1.8674369855985064e-06, + "loss": 0.9481, "step": 20657 }, { - "epoch": 0.5853948822579274, + "epoch": 0.8082792080757493, "grad_norm": 0.0, - "learning_rate": 7.7399086330692e-06, - "loss": 0.9457, + "learning_rate": 1.8666996419521344e-06, + "loss": 1.0059, "step": 20658 }, { - "epoch": 0.5854232197001898, + "epoch": 0.8083183347679788, "grad_norm": 0.0, - "learning_rate": 7.739014600954623e-06, - "loss": 0.9176, + "learning_rate": 1.8659624289163748e-06, + "loss": 1.0256, "step": 20659 }, { - "epoch": 0.5854515571424523, + "epoch": 0.8083574614602081, "grad_norm": 0.0, - "learning_rate": 7.738120587885163e-06, - "loss": 0.8501, + "learning_rate": 1.865225346503069e-06, + "loss": 0.9748, "step": 20660 }, { - "epoch": 0.5854798945847148, + "epoch": 0.8083965881524376, "grad_norm": 0.0, - "learning_rate": 7.737226593868359e-06, - "loss": 0.8775, + "learning_rate": 1.8644883947240467e-06, + "loss": 0.894, "step": 20661 }, { - "epoch": 0.5855082320269772, + "epoch": 0.808435714844667, "grad_norm": 0.0, - "learning_rate": 7.736332618911731e-06, - "loss": 0.8067, + "learning_rate": 1.863751573591147e-06, + "loss": 0.994, "step": 20662 }, { - "epoch": 0.5855365694692397, + "epoch": 0.8084748415368964, "grad_norm": 0.0, - "learning_rate": 7.735438663022815e-06, - "loss": 0.8111, + "learning_rate": 1.863014883116202e-06, + "loss": 0.9423, "step": 20663 }, { - "epoch": 0.5855649069115022, + "epoch": 0.8085139682291259, "grad_norm": 0.0, - "learning_rate": 7.734544726209143e-06, - "loss": 0.7663, + "learning_rate": 1.8622783233110453e-06, + "loss": 0.9662, "step": 20664 }, { - "epoch": 0.5855932443537646, + "epoch": 0.8085530949213553, "grad_norm": 0.0, - "learning_rate": 7.733650808478239e-06, - "loss": 0.7805, + "learning_rate": 1.8615418941874973e-06, + "loss": 0.8328, "step": 20665 }, { - "epoch": 0.5856215817960271, + "epoch": 0.8085922216135848, "grad_norm": 0.0, - "learning_rate": 7.732756909837636e-06, - "loss": 0.7344, + "learning_rate": 1.8608055957573922e-06, + "loss": 1.0209, "step": 20666 }, { - "epoch": 0.5856499192382896, + "epoch": 0.8086313483058142, "grad_norm": 0.0, - "learning_rate": 7.731863030294864e-06, - "loss": 0.8135, + "learning_rate": 1.860069428032545e-06, + "loss": 1.0742, "step": 20667 }, { - "epoch": 0.5856782566805521, + "epoch": 0.8086704749980437, "grad_norm": 0.0, - "learning_rate": 7.73096916985745e-06, - "loss": 0.9237, + "learning_rate": 1.8593333910247868e-06, + "loss": 0.8893, "step": 20668 }, { - "epoch": 0.5857065941228145, + "epoch": 0.8087096016902731, "grad_norm": 0.0, - "learning_rate": 7.73007532853293e-06, - "loss": 0.859, + "learning_rate": 1.858597484745932e-06, + "loss": 1.0006, "step": 20669 }, { - "epoch": 0.5857349315650769, + "epoch": 0.8087487283825026, "grad_norm": 0.0, - "learning_rate": 7.729181506328825e-06, - "loss": 0.8864, + "learning_rate": 1.8578617092078021e-06, + "loss": 0.9265, "step": 20670 }, { - "epoch": 0.5857632690073394, + "epoch": 0.8087878550747319, "grad_norm": 0.0, - "learning_rate": 7.728287703252667e-06, - "loss": 0.8261, + "learning_rate": 1.8571260644222056e-06, + "loss": 0.8759, "step": 20671 }, { - "epoch": 0.5857916064496018, + "epoch": 0.8088269817669614, "grad_norm": 0.0, - "learning_rate": 7.727393919311986e-06, - "loss": 0.8724, + "learning_rate": 1.856390550400966e-06, + "loss": 0.8967, "step": 20672 }, { - "epoch": 0.5858199438918643, + "epoch": 0.8088661084591908, "grad_norm": 0.0, - "learning_rate": 7.726500154514308e-06, - "loss": 0.8543, + "learning_rate": 1.8556551671558864e-06, + "loss": 1.0552, "step": 20673 }, { - "epoch": 0.5858482813341268, + "epoch": 0.8089052351514203, "grad_norm": 0.0, - "learning_rate": 7.725606408867168e-06, - "loss": 0.853, + "learning_rate": 1.8549199146987827e-06, + "loss": 1.0547, "step": 20674 }, { - "epoch": 0.5858766187763892, + "epoch": 0.8089443618436497, "grad_norm": 0.0, - "learning_rate": 7.724712682378088e-06, - "loss": 0.7986, + "learning_rate": 1.8541847930414559e-06, + "loss": 0.9833, "step": 20675 }, { - "epoch": 0.5859049562186517, + "epoch": 0.8089834885358792, "grad_norm": 0.0, - "learning_rate": 7.723818975054596e-06, - "loss": 0.9624, + "learning_rate": 1.8534498021957147e-06, + "loss": 0.9418, "step": 20676 }, { - "epoch": 0.5859332936609142, + "epoch": 0.8090226152281086, "grad_norm": 0.0, - "learning_rate": 7.722925286904225e-06, - "loss": 0.8868, + "learning_rate": 1.8527149421733604e-06, + "loss": 0.8256, "step": 20677 }, { - "epoch": 0.5859616311031767, + "epoch": 0.8090617419203381, "grad_norm": 0.0, - "learning_rate": 7.722031617934497e-06, - "loss": 0.7766, + "learning_rate": 1.8519802129861986e-06, + "loss": 0.822, "step": 20678 }, { - "epoch": 0.585989968545439, + "epoch": 0.8091008686125675, "grad_norm": 0.0, - "learning_rate": 7.721137968152944e-06, - "loss": 0.8373, + "learning_rate": 1.851245614646021e-06, + "loss": 0.9153, "step": 20679 }, { - "epoch": 0.5860183059877015, + "epoch": 0.809139995304797, "grad_norm": 0.0, - "learning_rate": 7.720244337567092e-06, - "loss": 0.8215, + "learning_rate": 1.8505111471646287e-06, + "loss": 0.9016, "step": 20680 }, { - "epoch": 0.586046643429964, + "epoch": 0.8091791219970264, "grad_norm": 0.0, - "learning_rate": 7.719350726184467e-06, - "loss": 0.8599, + "learning_rate": 1.8497768105538183e-06, + "loss": 0.886, "step": 20681 }, { - "epoch": 0.5860749808722264, + "epoch": 0.8092182486892558, "grad_norm": 0.0, - "learning_rate": 7.718457134012601e-06, - "loss": 0.9114, + "learning_rate": 1.8490426048253762e-06, + "loss": 1.007, "step": 20682 }, { - "epoch": 0.5861033183144889, + "epoch": 0.8092573753814852, "grad_norm": 0.0, - "learning_rate": 7.717563561059016e-06, - "loss": 0.9081, + "learning_rate": 1.8483085299910964e-06, + "loss": 1.0854, "step": 20683 }, { - "epoch": 0.5861316557567514, + "epoch": 0.8092965020737147, "grad_norm": 0.0, - "learning_rate": 7.716670007331238e-06, - "loss": 0.9418, + "learning_rate": 1.8475745860627692e-06, + "loss": 0.9385, "step": 20684 }, { - "epoch": 0.5861599931990139, + "epoch": 0.8093356287659441, "grad_norm": 0.0, - "learning_rate": 7.715776472836801e-06, - "loss": 0.8418, + "learning_rate": 1.8468407730521764e-06, + "loss": 0.9676, "step": 20685 }, { - "epoch": 0.5861883306412763, + "epoch": 0.8093747554581736, "grad_norm": 0.0, - "learning_rate": 7.714882957583222e-06, - "loss": 0.9027, + "learning_rate": 1.8461070909711043e-06, + "loss": 0.996, "step": 20686 }, { - "epoch": 0.5862166680835388, + "epoch": 0.809413882150403, "grad_norm": 0.0, - "learning_rate": 7.713989461578039e-06, - "loss": 0.8623, + "learning_rate": 1.8453735398313376e-06, + "loss": 0.9348, "step": 20687 }, { - "epoch": 0.5862450055258013, + "epoch": 0.8094530088426325, "grad_norm": 0.0, - "learning_rate": 7.713095984828767e-06, - "loss": 0.9047, + "learning_rate": 1.8446401196446506e-06, + "loss": 0.9148, "step": 20688 }, { - "epoch": 0.5862733429680637, + "epoch": 0.8094921355348619, "grad_norm": 0.0, - "learning_rate": 7.712202527342937e-06, - "loss": 0.827, + "learning_rate": 1.8439068304228247e-06, + "loss": 0.9936, "step": 20689 }, { - "epoch": 0.5863016804103262, + "epoch": 0.8095312622270914, "grad_norm": 0.0, - "learning_rate": 7.711309089128078e-06, - "loss": 0.9229, + "learning_rate": 1.8431736721776338e-06, + "loss": 1.0194, "step": 20690 }, { - "epoch": 0.5863300178525886, + "epoch": 0.8095703889193208, "grad_norm": 0.0, - "learning_rate": 7.710415670191708e-06, - "loss": 0.9014, + "learning_rate": 1.8424406449208565e-06, + "loss": 0.9287, "step": 20691 }, { - "epoch": 0.5863583552948511, + "epoch": 0.8096095156115501, "grad_norm": 0.0, - "learning_rate": 7.70952227054136e-06, - "loss": 0.7594, + "learning_rate": 1.8417077486642564e-06, + "loss": 0.9799, "step": 20692 }, { - "epoch": 0.5863866927371135, + "epoch": 0.8096486423037796, "grad_norm": 0.0, - "learning_rate": 7.708628890184556e-06, - "loss": 0.8437, + "learning_rate": 1.840974983419611e-06, + "loss": 0.8877, "step": 20693 }, { - "epoch": 0.586415030179376, + "epoch": 0.809687768996009, "grad_norm": 0.0, - "learning_rate": 7.707735529128819e-06, - "loss": 0.9212, + "learning_rate": 1.8402423491986777e-06, + "loss": 0.8288, "step": 20694 }, { - "epoch": 0.5864433676216385, + "epoch": 0.8097268956882385, "grad_norm": 0.0, - "learning_rate": 7.706842187381683e-06, - "loss": 0.7452, + "learning_rate": 1.8395098460132322e-06, + "loss": 0.8979, "step": 20695 }, { - "epoch": 0.5864717050639009, + "epoch": 0.8097660223804679, "grad_norm": 0.0, - "learning_rate": 7.70594886495066e-06, - "loss": 0.7848, + "learning_rate": 1.8387774738750308e-06, + "loss": 0.8945, "step": 20696 }, { - "epoch": 0.5865000425061634, + "epoch": 0.8098051490726974, "grad_norm": 0.0, - "learning_rate": 7.705055561843285e-06, - "loss": 0.8233, + "learning_rate": 1.8380452327958397e-06, + "loss": 1.0755, "step": 20697 }, { - "epoch": 0.5865283799484259, + "epoch": 0.8098442757649268, "grad_norm": 0.0, - "learning_rate": 7.704162278067077e-06, - "loss": 0.9022, + "learning_rate": 1.8373131227874085e-06, + "loss": 0.8657, "step": 20698 }, { - "epoch": 0.5865567173906883, + "epoch": 0.8098834024571563, "grad_norm": 0.0, - "learning_rate": 7.703269013629565e-06, - "loss": 0.9388, + "learning_rate": 1.8365811438615066e-06, + "loss": 0.8562, "step": 20699 }, { - "epoch": 0.5865850548329508, + "epoch": 0.8099225291493857, "grad_norm": 0.0, - "learning_rate": 7.702375768538268e-06, - "loss": 0.7928, + "learning_rate": 1.835849296029879e-06, + "loss": 0.9305, "step": 20700 }, { - "epoch": 0.5866133922752133, + "epoch": 0.8099616558416152, "grad_norm": 0.0, - "learning_rate": 7.701482542800718e-06, - "loss": 0.7987, + "learning_rate": 1.835117579304283e-06, + "loss": 1.066, "step": 20701 }, { - "epoch": 0.5866417297174757, + "epoch": 0.8100007825338446, "grad_norm": 0.0, - "learning_rate": 7.700589336424431e-06, - "loss": 0.8966, + "learning_rate": 1.8343859936964636e-06, + "loss": 0.8152, "step": 20702 }, { - "epoch": 0.5866700671597381, + "epoch": 0.810039909226074, "grad_norm": 0.0, - "learning_rate": 7.699696149416935e-06, - "loss": 0.9799, + "learning_rate": 1.8336545392181782e-06, + "loss": 0.8571, "step": 20703 }, { - "epoch": 0.5866984046020006, + "epoch": 0.8100790359183034, "grad_norm": 0.0, - "learning_rate": 7.698802981785752e-06, - "loss": 0.86, + "learning_rate": 1.8329232158811649e-06, + "loss": 0.9614, "step": 20704 }, { - "epoch": 0.5867267420442631, + "epoch": 0.8101181626105329, "grad_norm": 0.0, - "learning_rate": 7.697909833538405e-06, - "loss": 0.814, + "learning_rate": 1.8321920236971735e-06, + "loss": 1.0732, "step": 20705 }, { - "epoch": 0.5867550794865255, + "epoch": 0.8101572893027623, "grad_norm": 0.0, - "learning_rate": 7.697016704682421e-06, - "loss": 0.7786, + "learning_rate": 1.83146096267794e-06, + "loss": 0.998, "step": 20706 }, { - "epoch": 0.586783416928788, + "epoch": 0.8101964159949918, "grad_norm": 0.0, - "learning_rate": 7.696123595225316e-06, - "loss": 0.8893, + "learning_rate": 1.8307300328352084e-06, + "loss": 0.8547, "step": 20707 }, { - "epoch": 0.5868117543710505, + "epoch": 0.8102355426872212, "grad_norm": 0.0, - "learning_rate": 7.69523050517462e-06, - "loss": 0.7841, + "learning_rate": 1.829999234180716e-06, + "loss": 1.0951, "step": 20708 }, { - "epoch": 0.586840091813313, + "epoch": 0.8102746693794507, "grad_norm": 0.0, - "learning_rate": 7.694337434537856e-06, - "loss": 0.9286, + "learning_rate": 1.829268566726201e-06, + "loss": 1.0251, "step": 20709 }, { - "epoch": 0.5868684292555754, + "epoch": 0.8103137960716801, "grad_norm": 0.0, - "learning_rate": 7.69344438332254e-06, - "loss": 1.0008, + "learning_rate": 1.8285380304833912e-06, + "loss": 0.9222, "step": 20710 }, { - "epoch": 0.5868967666978379, + "epoch": 0.8103529227639096, "grad_norm": 0.0, - "learning_rate": 7.692551351536202e-06, - "loss": 0.7912, + "learning_rate": 1.827807625464022e-06, + "loss": 0.9371, "step": 20711 }, { - "epoch": 0.5869251041401004, + "epoch": 0.810392049456139, "grad_norm": 0.0, - "learning_rate": 7.691658339186356e-06, - "loss": 0.8822, + "learning_rate": 1.8270773516798212e-06, + "loss": 0.8691, "step": 20712 }, { - "epoch": 0.5869534415823627, + "epoch": 0.8104311761483685, "grad_norm": 0.0, - "learning_rate": 7.690765346280532e-06, - "loss": 0.9097, + "learning_rate": 1.8263472091425195e-06, + "loss": 1.1442, "step": 20713 }, { - "epoch": 0.5869817790246252, + "epoch": 0.8104703028405978, "grad_norm": 0.0, - "learning_rate": 7.68987237282625e-06, - "loss": 0.8411, + "learning_rate": 1.825617197863837e-06, + "loss": 1.0154, "step": 20714 }, { - "epoch": 0.5870101164668877, + "epoch": 0.8105094295328273, "grad_norm": 0.0, - "learning_rate": 7.68897941883103e-06, - "loss": 0.804, + "learning_rate": 1.8248873178555026e-06, + "loss": 0.845, "step": 20715 }, { - "epoch": 0.5870384539091502, + "epoch": 0.8105485562250567, "grad_norm": 0.0, - "learning_rate": 7.688086484302394e-06, - "loss": 0.8506, + "learning_rate": 1.824157569129228e-06, + "loss": 0.8394, "step": 20716 }, { - "epoch": 0.5870667913514126, + "epoch": 0.8105876829172862, "grad_norm": 0.0, - "learning_rate": 7.687193569247863e-06, - "loss": 0.8515, + "learning_rate": 1.8234279516967435e-06, + "loss": 0.8416, "step": 20717 }, { - "epoch": 0.5870951287936751, + "epoch": 0.8106268096095156, "grad_norm": 0.0, - "learning_rate": 7.686300673674959e-06, - "loss": 0.8681, + "learning_rate": 1.8226984655697567e-06, + "loss": 1.0082, "step": 20718 }, { - "epoch": 0.5871234662359376, + "epoch": 0.8106659363017451, "grad_norm": 0.0, - "learning_rate": 7.685407797591207e-06, - "loss": 0.9163, + "learning_rate": 1.8219691107599891e-06, + "loss": 0.8365, "step": 20719 }, { - "epoch": 0.5871518036782, + "epoch": 0.8107050629939745, "grad_norm": 0.0, - "learning_rate": 7.684514941004121e-06, - "loss": 0.773, + "learning_rate": 1.821239887279147e-06, + "loss": 0.9191, "step": 20720 }, { - "epoch": 0.5871801411204625, + "epoch": 0.8107441896862039, "grad_norm": 0.0, - "learning_rate": 7.683622103921228e-06, - "loss": 0.9025, + "learning_rate": 1.8205107951389444e-06, + "loss": 0.9815, "step": 20721 }, { - "epoch": 0.587208478562725, + "epoch": 0.8107833163784334, "grad_norm": 0.0, - "learning_rate": 7.682729286350048e-06, - "loss": 0.8958, + "learning_rate": 1.8197818343510887e-06, + "loss": 0.8579, "step": 20722 }, { - "epoch": 0.5872368160049873, + "epoch": 0.8108224430706628, "grad_norm": 0.0, - "learning_rate": 7.681836488298096e-06, - "loss": 0.9268, + "learning_rate": 1.8190530049272892e-06, + "loss": 0.8876, "step": 20723 }, { - "epoch": 0.5872651534472498, + "epoch": 0.8108615697628923, "grad_norm": 0.0, - "learning_rate": 7.680943709772899e-06, - "loss": 0.8335, + "learning_rate": 1.8183243068792444e-06, + "loss": 0.8007, "step": 20724 }, { - "epoch": 0.5872934908895123, + "epoch": 0.8109006964551216, "grad_norm": 0.0, - "learning_rate": 7.68005095078197e-06, - "loss": 0.837, + "learning_rate": 1.8175957402186584e-06, + "loss": 1.0057, "step": 20725 }, { - "epoch": 0.5873218283317748, + "epoch": 0.8109398231473511, "grad_norm": 0.0, - "learning_rate": 7.679158211332834e-06, - "loss": 0.8301, + "learning_rate": 1.816867304957235e-06, + "loss": 0.8761, "step": 20726 }, { - "epoch": 0.5873501657740372, + "epoch": 0.8109789498395805, "grad_norm": 0.0, - "learning_rate": 7.678265491433015e-06, - "loss": 0.8705, + "learning_rate": 1.816139001106665e-06, + "loss": 0.9241, "step": 20727 }, { - "epoch": 0.5873785032162997, + "epoch": 0.81101807653181, "grad_norm": 0.0, - "learning_rate": 7.677372791090025e-06, - "loss": 0.998, + "learning_rate": 1.8154108286786486e-06, + "loss": 1.0102, "step": 20728 }, { - "epoch": 0.5874068406585622, + "epoch": 0.8110572032240394, "grad_norm": 0.0, - "learning_rate": 7.676480110311385e-06, - "loss": 0.874, + "learning_rate": 1.8146827876848783e-06, + "loss": 1.0247, "step": 20729 }, { - "epoch": 0.5874351781008246, + "epoch": 0.8110963299162689, "grad_norm": 0.0, - "learning_rate": 7.675587449104618e-06, - "loss": 0.8596, + "learning_rate": 1.8139548781370486e-06, + "loss": 0.9356, "step": 20730 }, { - "epoch": 0.5874635155430871, + "epoch": 0.8111354566084983, "grad_norm": 0.0, - "learning_rate": 7.674694807477239e-06, - "loss": 0.7406, + "learning_rate": 1.8132271000468427e-06, + "loss": 0.9321, "step": 20731 }, { - "epoch": 0.5874918529853496, + "epoch": 0.8111745833007278, "grad_norm": 0.0, - "learning_rate": 7.673802185436774e-06, - "loss": 0.7995, + "learning_rate": 1.8124994534259532e-06, + "loss": 0.936, "step": 20732 }, { - "epoch": 0.587520190427612, + "epoch": 0.8112137099929572, "grad_norm": 0.0, - "learning_rate": 7.672909582990731e-06, - "loss": 0.8641, + "learning_rate": 1.811771938286061e-06, + "loss": 0.9797, "step": 20733 }, { - "epoch": 0.5875485278698744, + "epoch": 0.8112528366851867, "grad_norm": 0.0, - "learning_rate": 7.672017000146637e-06, - "loss": 0.9331, + "learning_rate": 1.811044554638851e-06, + "loss": 0.9173, "step": 20734 }, { - "epoch": 0.5875768653121369, + "epoch": 0.811291963377416, "grad_norm": 0.0, - "learning_rate": 7.671124436912012e-06, - "loss": 0.8638, + "learning_rate": 1.8103173024960042e-06, + "loss": 0.9493, "step": 20735 }, { - "epoch": 0.5876052027543994, + "epoch": 0.8113310900696455, "grad_norm": 0.0, - "learning_rate": 7.670231893294365e-06, - "loss": 0.8568, + "learning_rate": 1.8095901818692018e-06, + "loss": 0.8553, "step": 20736 }, { - "epoch": 0.5876335401966618, + "epoch": 0.8113702167618749, "grad_norm": 0.0, - "learning_rate": 7.669339369301221e-06, - "loss": 0.8915, + "learning_rate": 1.808863192770116e-06, + "loss": 0.9473, "step": 20737 }, { - "epoch": 0.5876618776389243, + "epoch": 0.8114093434541044, "grad_norm": 0.0, - "learning_rate": 7.6684468649401e-06, - "loss": 0.7796, + "learning_rate": 1.808136335210422e-06, + "loss": 0.9292, "step": 20738 }, { - "epoch": 0.5876902150811868, + "epoch": 0.8114484701463338, "grad_norm": 0.0, - "learning_rate": 7.667554380218513e-06, - "loss": 0.8768, + "learning_rate": 1.8074096092017944e-06, + "loss": 0.9238, "step": 20739 }, { - "epoch": 0.5877185525234492, + "epoch": 0.8114875968385633, "grad_norm": 0.0, - "learning_rate": 7.666661915143985e-06, - "loss": 0.7978, + "learning_rate": 1.8066830147559045e-06, + "loss": 0.9837, "step": 20740 }, { - "epoch": 0.5877468899657117, + "epoch": 0.8115267235307927, "grad_norm": 0.0, - "learning_rate": 7.665769469724029e-06, - "loss": 0.8543, + "learning_rate": 1.805956551884417e-06, + "loss": 1.0396, "step": 20741 }, { - "epoch": 0.5877752274079742, + "epoch": 0.8115658502230222, "grad_norm": 0.0, - "learning_rate": 7.664877043966162e-06, - "loss": 0.8633, + "learning_rate": 1.8052302205990014e-06, + "loss": 1.031, "step": 20742 }, { - "epoch": 0.5878035648502367, + "epoch": 0.8116049769152516, "grad_norm": 0.0, - "learning_rate": 7.663984637877903e-06, - "loss": 0.8413, + "learning_rate": 1.804504020911314e-06, + "loss": 0.924, "step": 20743 }, { - "epoch": 0.587831902292499, + "epoch": 0.8116441036074811, "grad_norm": 0.0, - "learning_rate": 7.66309225146677e-06, - "loss": 0.9718, + "learning_rate": 1.803777952833029e-06, + "loss": 0.8172, "step": 20744 }, { - "epoch": 0.5878602397347615, + "epoch": 0.8116832302997105, "grad_norm": 0.0, - "learning_rate": 7.66219988474028e-06, - "loss": 0.9424, + "learning_rate": 1.8030520163757959e-06, + "loss": 0.9144, "step": 20745 }, { - "epoch": 0.587888577177024, + "epoch": 0.81172235699194, "grad_norm": 0.0, - "learning_rate": 7.661307537705947e-06, - "loss": 0.8442, + "learning_rate": 1.8023262115512795e-06, + "loss": 0.8607, "step": 20746 }, { - "epoch": 0.5879169146192864, + "epoch": 0.8117614836841693, "grad_norm": 0.0, - "learning_rate": 7.660415210371288e-06, - "loss": 0.9278, + "learning_rate": 1.8016005383711265e-06, + "loss": 0.9296, "step": 20747 }, { - "epoch": 0.5879452520615489, + "epoch": 0.8118006103763988, "grad_norm": 0.0, - "learning_rate": 7.659522902743824e-06, - "loss": 0.8986, + "learning_rate": 1.8008749968470007e-06, + "loss": 0.9718, "step": 20748 }, { - "epoch": 0.5879735895038114, + "epoch": 0.8118397370686282, "grad_norm": 0.0, - "learning_rate": 7.658630614831066e-06, - "loss": 0.8194, + "learning_rate": 1.800149586990545e-06, + "loss": 0.9479, "step": 20749 }, { - "epoch": 0.5880019269460739, + "epoch": 0.8118788637608576, "grad_norm": 0.0, - "learning_rate": 7.65773834664053e-06, - "loss": 0.767, + "learning_rate": 1.7994243088134157e-06, + "loss": 0.9783, "step": 20750 }, { - "epoch": 0.5880302643883363, + "epoch": 0.8119179904530871, "grad_norm": 0.0, - "learning_rate": 7.656846098179735e-06, - "loss": 0.8939, + "learning_rate": 1.7986991623272533e-06, + "loss": 1.006, "step": 20751 }, { - "epoch": 0.5880586018305988, + "epoch": 0.8119571171453165, "grad_norm": 0.0, - "learning_rate": 7.655953869456194e-06, - "loss": 0.8337, + "learning_rate": 1.797974147543705e-06, + "loss": 0.9069, "step": 20752 }, { - "epoch": 0.5880869392728613, + "epoch": 0.811996243837546, "grad_norm": 0.0, - "learning_rate": 7.655061660477428e-06, - "loss": 0.9072, + "learning_rate": 1.7972492644744154e-06, + "loss": 0.9942, "step": 20753 }, { - "epoch": 0.5881152767151236, + "epoch": 0.8120353705297754, "grad_norm": 0.0, - "learning_rate": 7.654169471250945e-06, - "loss": 0.8211, + "learning_rate": 1.796524513131025e-06, + "loss": 0.9491, "step": 20754 }, { - "epoch": 0.5881436141573861, + "epoch": 0.8120744972220049, "grad_norm": 0.0, - "learning_rate": 7.653277301784262e-06, - "loss": 0.8682, + "learning_rate": 1.7957998935251697e-06, + "loss": 0.9107, "step": 20755 }, { - "epoch": 0.5881719515996486, + "epoch": 0.8121136239142343, "grad_norm": 0.0, - "learning_rate": 7.652385152084898e-06, - "loss": 0.8692, + "learning_rate": 1.7950754056684882e-06, + "loss": 1.0443, "step": 20756 }, { - "epoch": 0.5882002890419111, + "epoch": 0.8121527506064637, "grad_norm": 0.0, - "learning_rate": 7.651493022160366e-06, - "loss": 0.8358, + "learning_rate": 1.7943510495726135e-06, + "loss": 0.9811, "step": 20757 }, { - "epoch": 0.5882286264841735, + "epoch": 0.8121918772986931, "grad_norm": 0.0, - "learning_rate": 7.650600912018178e-06, - "loss": 0.8384, + "learning_rate": 1.7936268252491817e-06, + "loss": 0.9271, "step": 20758 }, { - "epoch": 0.588256963926436, + "epoch": 0.8122310039909226, "grad_norm": 0.0, - "learning_rate": 7.649708821665856e-06, - "loss": 0.8404, + "learning_rate": 1.792902732709817e-06, + "loss": 0.8727, "step": 20759 }, { - "epoch": 0.5882853013686985, + "epoch": 0.812270130683152, "grad_norm": 0.0, - "learning_rate": 7.648816751110905e-06, - "loss": 0.8606, + "learning_rate": 1.7921787719661499e-06, + "loss": 0.9419, "step": 20760 }, { - "epoch": 0.5883136388109609, + "epoch": 0.8123092573753815, "grad_norm": 0.0, - "learning_rate": 7.647924700360847e-06, - "loss": 0.9376, + "learning_rate": 1.7914549430298078e-06, + "loss": 1.1334, "step": 20761 }, { - "epoch": 0.5883419762532234, + "epoch": 0.8123483840676109, "grad_norm": 0.0, - "learning_rate": 7.64703266942319e-06, - "loss": 0.9575, + "learning_rate": 1.7907312459124148e-06, + "loss": 1.0079, "step": 20762 }, { - "epoch": 0.5883703136954859, + "epoch": 0.8123875107598404, "grad_norm": 0.0, - "learning_rate": 7.64614065830545e-06, - "loss": 0.8435, + "learning_rate": 1.7900076806255894e-06, + "loss": 1.1044, "step": 20763 }, { - "epoch": 0.5883986511377483, + "epoch": 0.8124266374520698, "grad_norm": 0.0, - "learning_rate": 7.645248667015143e-06, - "loss": 0.8471, + "learning_rate": 1.7892842471809558e-06, + "loss": 0.874, "step": 20764 }, { - "epoch": 0.5884269885800107, + "epoch": 0.8124657641442993, "grad_norm": 0.0, - "learning_rate": 7.644356695559779e-06, - "loss": 0.9319, + "learning_rate": 1.7885609455901253e-06, + "loss": 0.9193, "step": 20765 }, { - "epoch": 0.5884553260222732, + "epoch": 0.8125048908365287, "grad_norm": 0.0, - "learning_rate": 7.643464743946873e-06, - "loss": 0.9155, + "learning_rate": 1.787837775864717e-06, + "loss": 1.0515, "step": 20766 }, { - "epoch": 0.5884836634645357, + "epoch": 0.8125440175287582, "grad_norm": 0.0, - "learning_rate": 7.642572812183944e-06, - "loss": 0.8423, + "learning_rate": 1.7871147380163445e-06, + "loss": 0.9369, "step": 20767 }, { - "epoch": 0.5885120009067981, + "epoch": 0.8125831442209875, "grad_norm": 0.0, - "learning_rate": 7.641680900278494e-06, - "loss": 0.8265, + "learning_rate": 1.7863918320566186e-06, + "loss": 1.0055, "step": 20768 }, { - "epoch": 0.5885403383490606, + "epoch": 0.812622270913217, "grad_norm": 0.0, - "learning_rate": 7.640789008238044e-06, - "loss": 0.8973, + "learning_rate": 1.7856690579971458e-06, + "loss": 0.8837, "step": 20769 }, { - "epoch": 0.5885686757913231, + "epoch": 0.8126613976054464, "grad_norm": 0.0, - "learning_rate": 7.639897136070103e-06, - "loss": 0.8578, + "learning_rate": 1.7849464158495355e-06, + "loss": 0.9827, "step": 20770 }, { - "epoch": 0.5885970132335855, + "epoch": 0.8127005242976759, "grad_norm": 0.0, - "learning_rate": 7.639005283782183e-06, - "loss": 0.8364, + "learning_rate": 1.784223905625394e-06, + "loss": 0.9503, "step": 20771 }, { - "epoch": 0.588625350675848, + "epoch": 0.8127396509899053, "grad_norm": 0.0, - "learning_rate": 7.638113451381804e-06, - "loss": 0.8457, + "learning_rate": 1.7835015273363188e-06, + "loss": 0.8777, "step": 20772 }, { - "epoch": 0.5886536881181105, + "epoch": 0.8127787776821348, "grad_norm": 0.0, - "learning_rate": 7.637221638876468e-06, - "loss": 0.9034, + "learning_rate": 1.7827792809939137e-06, + "loss": 0.9473, "step": 20773 }, { - "epoch": 0.588682025560373, + "epoch": 0.8128179043743642, "grad_norm": 0.0, - "learning_rate": 7.636329846273695e-06, - "loss": 0.9323, + "learning_rate": 1.7820571666097764e-06, + "loss": 0.9989, "step": 20774 }, { - "epoch": 0.5887103630026354, + "epoch": 0.8128570310665937, "grad_norm": 0.0, - "learning_rate": 7.63543807358099e-06, - "loss": 0.8608, + "learning_rate": 1.781335184195505e-06, + "loss": 1.0177, "step": 20775 }, { - "epoch": 0.5887387004448978, + "epoch": 0.8128961577588231, "grad_norm": 0.0, - "learning_rate": 7.634546320805869e-06, - "loss": 0.8161, + "learning_rate": 1.7806133337626908e-06, + "loss": 1.0839, "step": 20776 }, { - "epoch": 0.5887670378871603, + "epoch": 0.8129352844510525, "grad_norm": 0.0, - "learning_rate": 7.633654587955844e-06, - "loss": 0.8033, + "learning_rate": 1.7798916153229284e-06, + "loss": 0.9195, "step": 20777 }, { - "epoch": 0.5887953753294227, + "epoch": 0.812974411143282, "grad_norm": 0.0, - "learning_rate": 7.632762875038422e-06, - "loss": 0.9235, + "learning_rate": 1.779170028887801e-06, + "loss": 1.036, "step": 20778 }, { - "epoch": 0.5888237127716852, + "epoch": 0.8130135378355113, "grad_norm": 0.0, - "learning_rate": 7.631871182061117e-06, - "loss": 0.8665, + "learning_rate": 1.7784485744689083e-06, + "loss": 0.9945, "step": 20779 }, { - "epoch": 0.5888520502139477, + "epoch": 0.8130526645277408, "grad_norm": 0.0, - "learning_rate": 7.630979509031446e-06, - "loss": 0.7597, + "learning_rate": 1.7777272520778255e-06, + "loss": 0.9906, "step": 20780 }, { - "epoch": 0.5888803876562102, + "epoch": 0.8130917912199702, "grad_norm": 0.0, - "learning_rate": 7.630087855956911e-06, - "loss": 0.8799, + "learning_rate": 1.777006061726143e-06, + "loss": 1.0416, "step": 20781 }, { - "epoch": 0.5889087250984726, + "epoch": 0.8131309179121997, "grad_norm": 0.0, - "learning_rate": 7.629196222845027e-06, - "loss": 0.9403, + "learning_rate": 1.7762850034254364e-06, + "loss": 0.9013, "step": 20782 }, { - "epoch": 0.5889370625407351, + "epoch": 0.8131700446044291, "grad_norm": 0.0, - "learning_rate": 7.628304609703301e-06, - "loss": 0.7808, + "learning_rate": 1.7755640771872873e-06, + "loss": 0.8221, "step": 20783 }, { - "epoch": 0.5889653999829976, + "epoch": 0.8132091712966586, "grad_norm": 0.0, - "learning_rate": 7.627413016539247e-06, - "loss": 0.884, + "learning_rate": 1.7748432830232743e-06, + "loss": 0.7912, "step": 20784 }, { - "epoch": 0.58899373742526, + "epoch": 0.813248297988888, "grad_norm": 0.0, - "learning_rate": 7.6265214433603775e-06, - "loss": 0.8336, + "learning_rate": 1.7741226209449737e-06, + "loss": 0.9432, "step": 20785 }, { - "epoch": 0.5890220748675224, + "epoch": 0.8132874246811175, "grad_norm": 0.0, - "learning_rate": 7.625629890174196e-06, - "loss": 0.8644, + "learning_rate": 1.7734020909639538e-06, + "loss": 0.9455, "step": 20786 }, { - "epoch": 0.5890504123097849, + "epoch": 0.8133265513733469, "grad_norm": 0.0, - "learning_rate": 7.624738356988215e-06, - "loss": 0.8872, + "learning_rate": 1.7726816930917878e-06, + "loss": 0.9045, "step": 20787 }, { - "epoch": 0.5890787497520473, + "epoch": 0.8133656780655764, "grad_norm": 0.0, - "learning_rate": 7.6238468438099485e-06, - "loss": 0.8052, + "learning_rate": 1.7719614273400432e-06, + "loss": 0.9088, "step": 20788 }, { - "epoch": 0.5891070871943098, + "epoch": 0.8134048047578057, "grad_norm": 0.0, - "learning_rate": 7.622955350646899e-06, - "loss": 0.8727, + "learning_rate": 1.7712412937202917e-06, + "loss": 1.0236, "step": 20789 }, { - "epoch": 0.5891354246365723, + "epoch": 0.8134439314500352, "grad_norm": 0.0, - "learning_rate": 7.622063877506581e-06, - "loss": 0.7951, + "learning_rate": 1.7705212922440907e-06, + "loss": 0.9232, "step": 20790 }, { - "epoch": 0.5891637620788348, + "epoch": 0.8134830581422646, "grad_norm": 0.0, - "learning_rate": 7.621172424396501e-06, - "loss": 0.9012, + "learning_rate": 1.7698014229230087e-06, + "loss": 1.0597, "step": 20791 }, { - "epoch": 0.5891920995210972, + "epoch": 0.8135221848344941, "grad_norm": 0.0, - "learning_rate": 7.620280991324167e-06, - "loss": 0.8158, + "learning_rate": 1.7690816857685978e-06, + "loss": 0.9033, "step": 20792 }, { - "epoch": 0.5892204369633597, + "epoch": 0.8135613115267235, "grad_norm": 0.0, - "learning_rate": 7.619389578297096e-06, - "loss": 0.9153, + "learning_rate": 1.7683620807924251e-06, + "loss": 1.0379, "step": 20793 }, { - "epoch": 0.5892487744056222, + "epoch": 0.813600438218953, "grad_norm": 0.0, - "learning_rate": 7.618498185322786e-06, - "loss": 0.8657, + "learning_rate": 1.7676426080060404e-06, + "loss": 0.9338, "step": 20794 }, { - "epoch": 0.5892771118478846, + "epoch": 0.8136395649111824, "grad_norm": 0.0, - "learning_rate": 7.617606812408749e-06, - "loss": 0.8409, + "learning_rate": 1.7669232674210025e-06, + "loss": 0.9502, "step": 20795 }, { - "epoch": 0.589305449290147, + "epoch": 0.8136786916034119, "grad_norm": 0.0, - "learning_rate": 7.616715459562498e-06, - "loss": 0.9767, + "learning_rate": 1.7662040590488562e-06, + "loss": 0.9753, "step": 20796 }, { - "epoch": 0.5893337867324095, + "epoch": 0.8137178182956413, "grad_norm": 0.0, - "learning_rate": 7.615824126791534e-06, - "loss": 0.7975, + "learning_rate": 1.7654849829011588e-06, + "loss": 0.8825, "step": 20797 }, { - "epoch": 0.589362124174672, + "epoch": 0.8137569449878708, "grad_norm": 0.0, - "learning_rate": 7.614932814103375e-06, - "loss": 0.7439, + "learning_rate": 1.7647660389894517e-06, + "loss": 0.7832, "step": 20798 }, { - "epoch": 0.5893904616169344, + "epoch": 0.8137960716801002, "grad_norm": 0.0, - "learning_rate": 7.614041521505517e-06, - "loss": 0.8284, + "learning_rate": 1.7640472273252861e-06, + "loss": 1.0575, "step": 20799 }, { - "epoch": 0.5894187990591969, + "epoch": 0.8138351983723296, "grad_norm": 0.0, - "learning_rate": 7.613150249005473e-06, - "loss": 0.8529, + "learning_rate": 1.7633285479201988e-06, + "loss": 0.8389, "step": 20800 }, { - "epoch": 0.5894471365014594, + "epoch": 0.813874325064559, "grad_norm": 0.0, - "learning_rate": 7.612258996610756e-06, - "loss": 0.8864, + "learning_rate": 1.762610000785734e-06, + "loss": 1.1072, "step": 20801 }, { - "epoch": 0.5894754739437218, + "epoch": 0.8139134517567885, "grad_norm": 0.0, - "learning_rate": 7.611367764328863e-06, - "loss": 0.8833, + "learning_rate": 1.7618915859334306e-06, + "loss": 0.9499, "step": 20802 }, { - "epoch": 0.5895038113859843, + "epoch": 0.8139525784490179, "grad_norm": 0.0, - "learning_rate": 7.610476552167309e-06, - "loss": 0.9573, + "learning_rate": 1.761173303374827e-06, + "loss": 0.8858, "step": 20803 }, { - "epoch": 0.5895321488282468, + "epoch": 0.8139917051412474, "grad_norm": 0.0, - "learning_rate": 7.609585360133596e-06, - "loss": 0.8763, + "learning_rate": 1.7604551531214554e-06, + "loss": 0.9731, "step": 20804 }, { - "epoch": 0.5895604862705093, + "epoch": 0.8140308318334768, "grad_norm": 0.0, - "learning_rate": 7.608694188235234e-06, - "loss": 0.8092, + "learning_rate": 1.759737135184848e-06, + "loss": 0.9477, "step": 20805 }, { - "epoch": 0.5895888237127717, + "epoch": 0.8140699585257062, "grad_norm": 0.0, - "learning_rate": 7.607803036479731e-06, - "loss": 0.9286, + "learning_rate": 1.7590192495765369e-06, + "loss": 0.9722, "step": 20806 }, { - "epoch": 0.5896171611550342, + "epoch": 0.8141090852179357, "grad_norm": 0.0, - "learning_rate": 7.60691190487459e-06, - "loss": 0.9704, + "learning_rate": 1.7583014963080535e-06, + "loss": 0.8572, "step": 20807 }, { - "epoch": 0.5896454985972966, + "epoch": 0.8141482119101651, "grad_norm": 0.0, - "learning_rate": 7.606020793427316e-06, - "loss": 0.9303, + "learning_rate": 1.757583875390917e-06, + "loss": 1.0586, "step": 20808 }, { - "epoch": 0.589673836039559, + "epoch": 0.8141873386023946, "grad_norm": 0.0, - "learning_rate": 7.605129702145422e-06, - "loss": 0.9067, + "learning_rate": 1.7568663868366586e-06, + "loss": 0.9155, "step": 20809 }, { - "epoch": 0.5897021734818215, + "epoch": 0.814226465294624, "grad_norm": 0.0, - "learning_rate": 7.6042386310364055e-06, - "loss": 0.8183, + "learning_rate": 1.756149030656793e-06, + "loss": 1.0091, "step": 20810 }, { - "epoch": 0.589730510924084, + "epoch": 0.8142655919868534, "grad_norm": 0.0, - "learning_rate": 7.603347580107782e-06, - "loss": 0.8244, + "learning_rate": 1.755431806862845e-06, + "loss": 0.9927, "step": 20811 }, { - "epoch": 0.5897588483663464, + "epoch": 0.8143047186790828, "grad_norm": 0.0, - "learning_rate": 7.6024565493670485e-06, - "loss": 1.0576, + "learning_rate": 1.7547147154663313e-06, + "loss": 0.9006, "step": 20812 }, { - "epoch": 0.5897871858086089, + "epoch": 0.8143438453713123, "grad_norm": 0.0, - "learning_rate": 7.601565538821714e-06, - "loss": 0.8936, + "learning_rate": 1.753997756478769e-06, + "loss": 0.9606, "step": 20813 }, { - "epoch": 0.5898155232508714, + "epoch": 0.8143829720635417, "grad_norm": 0.0, - "learning_rate": 7.6006745484792855e-06, - "loss": 0.7064, + "learning_rate": 1.753280929911667e-06, + "loss": 1.067, "step": 20814 }, { - "epoch": 0.5898438606931339, + "epoch": 0.8144220987557712, "grad_norm": 0.0, - "learning_rate": 7.599783578347264e-06, - "loss": 0.882, + "learning_rate": 1.7525642357765405e-06, + "loss": 0.8013, "step": 20815 }, { - "epoch": 0.5898721981353963, + "epoch": 0.8144612254480006, "grad_norm": 0.0, - "learning_rate": 7.598892628433157e-06, - "loss": 0.8793, + "learning_rate": 1.7518476740849e-06, + "loss": 1.0075, "step": 20816 }, { - "epoch": 0.5899005355776588, + "epoch": 0.8145003521402301, "grad_norm": 0.0, - "learning_rate": 7.598001698744469e-06, - "loss": 0.9536, + "learning_rate": 1.7511312448482488e-06, + "loss": 0.8851, "step": 20817 }, { - "epoch": 0.5899288730199213, + "epoch": 0.8145394788324595, "grad_norm": 0.0, - "learning_rate": 7.597110789288704e-06, - "loss": 0.7734, + "learning_rate": 1.7504149480780918e-06, + "loss": 0.8337, "step": 20818 }, { - "epoch": 0.5899572104621836, + "epoch": 0.814578605524689, "grad_norm": 0.0, - "learning_rate": 7.596219900073372e-06, - "loss": 0.8584, + "learning_rate": 1.7496987837859346e-06, + "loss": 0.8793, "step": 20819 }, { - "epoch": 0.5899855479044461, + "epoch": 0.8146177322169184, "grad_norm": 0.0, - "learning_rate": 7.595329031105967e-06, - "loss": 0.9441, + "learning_rate": 1.748982751983278e-06, + "loss": 0.9991, "step": 20820 }, { - "epoch": 0.5900138853467086, + "epoch": 0.8146568589091479, "grad_norm": 0.0, - "learning_rate": 7.5944381823939985e-06, - "loss": 0.8907, + "learning_rate": 1.7482668526816183e-06, + "loss": 0.8875, "step": 20821 }, { - "epoch": 0.5900422227889711, + "epoch": 0.8146959856013772, "grad_norm": 0.0, - "learning_rate": 7.593547353944972e-06, - "loss": 0.8948, + "learning_rate": 1.747551085892455e-06, + "loss": 1.059, "step": 20822 }, { - "epoch": 0.5900705602312335, + "epoch": 0.8147351122936067, "grad_norm": 0.0, - "learning_rate": 7.592656545766389e-06, - "loss": 0.9529, + "learning_rate": 1.7468354516272746e-06, + "loss": 0.8405, "step": 20823 }, { - "epoch": 0.590098897673496, + "epoch": 0.8147742389858361, "grad_norm": 0.0, - "learning_rate": 7.591765757865753e-06, - "loss": 0.8663, + "learning_rate": 1.74611994989758e-06, + "loss": 0.9073, "step": 20824 }, { - "epoch": 0.5901272351157585, + "epoch": 0.8148133656780656, "grad_norm": 0.0, - "learning_rate": 7.590874990250574e-06, - "loss": 0.8869, + "learning_rate": 1.7454045807148545e-06, + "loss": 0.8709, "step": 20825 }, { - "epoch": 0.5901555725580209, + "epoch": 0.814852492370295, "grad_norm": 0.0, - "learning_rate": 7.5899842429283434e-06, - "loss": 0.9651, + "learning_rate": 1.744689344090591e-06, + "loss": 1.0662, "step": 20826 }, { - "epoch": 0.5901839100002834, + "epoch": 0.8148916190625245, "grad_norm": 0.0, - "learning_rate": 7.589093515906574e-06, - "loss": 0.8936, + "learning_rate": 1.7439742400362658e-06, + "loss": 0.9496, "step": 20827 }, { - "epoch": 0.5902122474425459, + "epoch": 0.8149307457547539, "grad_norm": 0.0, - "learning_rate": 7.588202809192762e-06, - "loss": 0.844, + "learning_rate": 1.7432592685633743e-06, + "loss": 0.9672, "step": 20828 }, { - "epoch": 0.5902405848848084, + "epoch": 0.8149698724469834, "grad_norm": 0.0, - "learning_rate": 7.587312122794414e-06, - "loss": 0.896, + "learning_rate": 1.7425444296833904e-06, + "loss": 1.0024, "step": 20829 }, { - "epoch": 0.5902689223270707, + "epoch": 0.8150089991392128, "grad_norm": 0.0, - "learning_rate": 7.586421456719037e-06, - "loss": 0.8343, + "learning_rate": 1.7418297234077986e-06, + "loss": 1.03, "step": 20830 }, { - "epoch": 0.5902972597693332, + "epoch": 0.8150481258314423, "grad_norm": 0.0, - "learning_rate": 7.585530810974122e-06, - "loss": 0.8658, + "learning_rate": 1.7411151497480704e-06, + "loss": 0.954, "step": 20831 }, { - "epoch": 0.5903255972115957, + "epoch": 0.8150872525236716, "grad_norm": 0.0, - "learning_rate": 7.584640185567184e-06, - "loss": 0.808, + "learning_rate": 1.7404007087156839e-06, + "loss": 1.0722, "step": 20832 }, { - "epoch": 0.5903539346538581, + "epoch": 0.8151263792159011, "grad_norm": 0.0, - "learning_rate": 7.583749580505712e-06, - "loss": 0.8158, + "learning_rate": 1.7396864003221125e-06, + "loss": 1.082, "step": 20833 }, { - "epoch": 0.5903822720961206, + "epoch": 0.8151655059081305, "grad_norm": 0.0, - "learning_rate": 7.582858995797217e-06, - "loss": 0.7782, + "learning_rate": 1.7389722245788287e-06, + "loss": 0.8661, "step": 20834 }, { - "epoch": 0.5904106095383831, + "epoch": 0.8152046326003599, "grad_norm": 0.0, - "learning_rate": 7.5819684314491984e-06, - "loss": 0.8481, + "learning_rate": 1.7382581814972977e-06, + "loss": 0.8965, "step": 20835 }, { - "epoch": 0.5904389469806455, + "epoch": 0.8152437592925894, "grad_norm": 0.0, - "learning_rate": 7.581077887469157e-06, - "loss": 0.8911, + "learning_rate": 1.7375442710889868e-06, + "loss": 0.8822, "step": 20836 }, { - "epoch": 0.590467284422908, + "epoch": 0.8152828859848188, "grad_norm": 0.0, - "learning_rate": 7.580187363864593e-06, - "loss": 0.8871, + "learning_rate": 1.7368304933653624e-06, + "loss": 0.9038, "step": 20837 }, { - "epoch": 0.5904956218651705, + "epoch": 0.8153220126770483, "grad_norm": 0.0, - "learning_rate": 7.579296860643015e-06, - "loss": 0.8884, + "learning_rate": 1.7361168483378877e-06, + "loss": 0.8827, "step": 20838 }, { - "epoch": 0.590523959307433, + "epoch": 0.8153611393692777, "grad_norm": 0.0, - "learning_rate": 7.578406377811914e-06, - "loss": 0.8976, + "learning_rate": 1.735403336018019e-06, + "loss": 0.9473, "step": 20839 }, { - "epoch": 0.5905522967496953, + "epoch": 0.8154002660615072, "grad_norm": 0.0, - "learning_rate": 7.577515915378798e-06, - "loss": 0.8324, + "learning_rate": 1.7346899564172193e-06, + "loss": 0.9034, "step": 20840 }, { - "epoch": 0.5905806341919578, + "epoch": 0.8154393927537366, "grad_norm": 0.0, - "learning_rate": 7.576625473351162e-06, - "loss": 0.9006, + "learning_rate": 1.733976709546936e-06, + "loss": 0.9951, "step": 20841 }, { - "epoch": 0.5906089716342203, + "epoch": 0.815478519445966, "grad_norm": 0.0, - "learning_rate": 7.57573505173651e-06, - "loss": 0.7461, + "learning_rate": 1.733263595418635e-06, + "loss": 0.9727, "step": 20842 }, { - "epoch": 0.5906373090764827, + "epoch": 0.8155176461381954, "grad_norm": 0.0, - "learning_rate": 7.574844650542346e-06, - "loss": 0.873, + "learning_rate": 1.7325506140437587e-06, + "loss": 0.8943, "step": 20843 }, { - "epoch": 0.5906656465187452, + "epoch": 0.8155567728304249, "grad_norm": 0.0, - "learning_rate": 7.5739542697761615e-06, - "loss": 0.8686, + "learning_rate": 1.7318377654337626e-06, + "loss": 1.0869, "step": 20844 }, { - "epoch": 0.5906939839610077, + "epoch": 0.8155958995226543, "grad_norm": 0.0, - "learning_rate": 7.573063909445462e-06, - "loss": 0.8, + "learning_rate": 1.7311250496000853e-06, + "loss": 0.9242, "step": 20845 }, { - "epoch": 0.5907223214032702, + "epoch": 0.8156350262148838, "grad_norm": 0.0, - "learning_rate": 7.5721735695577494e-06, - "loss": 0.851, + "learning_rate": 1.7304124665541843e-06, + "loss": 0.9592, "step": 20846 }, { - "epoch": 0.5907506588455326, + "epoch": 0.8156741529071132, "grad_norm": 0.0, - "learning_rate": 7.5712832501205165e-06, - "loss": 0.8532, + "learning_rate": 1.729700016307495e-06, + "loss": 1.0396, "step": 20847 }, { - "epoch": 0.5907789962877951, + "epoch": 0.8157132795993427, "grad_norm": 0.0, - "learning_rate": 7.57039295114127e-06, - "loss": 0.934, + "learning_rate": 1.7289876988714615e-06, + "loss": 0.984, "step": 20848 }, { - "epoch": 0.5908073337300576, + "epoch": 0.8157524062915721, "grad_norm": 0.0, - "learning_rate": 7.569502672627502e-06, - "loss": 0.8831, + "learning_rate": 1.7282755142575191e-06, + "loss": 0.9816, "step": 20849 }, { - "epoch": 0.5908356711723199, + "epoch": 0.8157915329838016, "grad_norm": 0.0, - "learning_rate": 7.568612414586717e-06, - "loss": 0.885, + "learning_rate": 1.7275634624771066e-06, + "loss": 0.9958, "step": 20850 }, { - "epoch": 0.5908640086145824, + "epoch": 0.815830659676031, "grad_norm": 0.0, - "learning_rate": 7.5677221770264154e-06, - "loss": 0.8648, + "learning_rate": 1.72685154354166e-06, + "loss": 0.9968, "step": 20851 }, { - "epoch": 0.5908923460568449, + "epoch": 0.8158697863682605, "grad_norm": 0.0, - "learning_rate": 7.56683195995409e-06, - "loss": 0.8387, + "learning_rate": 1.7261397574626125e-06, + "loss": 1.0164, "step": 20852 }, { - "epoch": 0.5909206834991074, + "epoch": 0.8159089130604898, "grad_norm": 0.0, - "learning_rate": 7.565941763377244e-06, - "loss": 0.8165, + "learning_rate": 1.7254281042513898e-06, + "loss": 0.9775, "step": 20853 }, { - "epoch": 0.5909490209413698, + "epoch": 0.8159480397527193, "grad_norm": 0.0, - "learning_rate": 7.565051587303373e-06, - "loss": 0.844, + "learning_rate": 1.7247165839194234e-06, + "loss": 1.0152, "step": 20854 }, { - "epoch": 0.5909773583836323, + "epoch": 0.8159871664449487, "grad_norm": 0.0, - "learning_rate": 7.5641614317399755e-06, - "loss": 0.7967, + "learning_rate": 1.7240051964781424e-06, + "loss": 1.1125, "step": 20855 }, { - "epoch": 0.5910056958258948, + "epoch": 0.8160262931371782, "grad_norm": 0.0, - "learning_rate": 7.563271296694555e-06, - "loss": 0.8825, + "learning_rate": 1.7232939419389648e-06, + "loss": 0.903, "step": 20856 }, { - "epoch": 0.5910340332681572, + "epoch": 0.8160654198294076, "grad_norm": 0.0, - "learning_rate": 7.562381182174603e-06, - "loss": 0.9282, + "learning_rate": 1.7225828203133143e-06, + "loss": 1.0418, "step": 20857 }, { - "epoch": 0.5910623707104197, + "epoch": 0.8161045465216371, "grad_norm": 0.0, - "learning_rate": 7.561491088187618e-06, - "loss": 0.816, + "learning_rate": 1.7218718316126149e-06, + "loss": 0.994, "step": 20858 }, { - "epoch": 0.5910907081526822, + "epoch": 0.8161436732138665, "grad_norm": 0.0, - "learning_rate": 7.560601014741103e-06, - "loss": 0.9816, + "learning_rate": 1.7211609758482784e-06, + "loss": 0.9989, "step": 20859 }, { - "epoch": 0.5911190455949445, + "epoch": 0.816182799906096, "grad_norm": 0.0, - "learning_rate": 7.559710961842548e-06, - "loss": 0.7255, + "learning_rate": 1.7204502530317224e-06, + "loss": 0.8632, "step": 20860 }, { - "epoch": 0.591147383037207, + "epoch": 0.8162219265983254, "grad_norm": 0.0, - "learning_rate": 7.558820929499455e-06, - "loss": 0.8162, + "learning_rate": 1.7197396631743634e-06, + "loss": 0.9708, "step": 20861 }, { - "epoch": 0.5911757204794695, + "epoch": 0.8162610532905548, "grad_norm": 0.0, - "learning_rate": 7.5579309177193185e-06, - "loss": 0.9028, + "learning_rate": 1.719029206287607e-06, + "loss": 1.1278, "step": 20862 }, { - "epoch": 0.591204057921732, + "epoch": 0.8163001799827843, "grad_norm": 0.0, - "learning_rate": 7.557040926509637e-06, - "loss": 0.835, + "learning_rate": 1.7183188823828644e-06, + "loss": 0.9946, "step": 20863 }, { - "epoch": 0.5912323953639944, + "epoch": 0.8163393066750136, "grad_norm": 0.0, - "learning_rate": 7.55615095587791e-06, - "loss": 0.8181, + "learning_rate": 1.7176086914715428e-06, + "loss": 0.818, "step": 20864 }, { - "epoch": 0.5912607328062569, + "epoch": 0.8163784333672431, "grad_norm": 0.0, - "learning_rate": 7.555261005831628e-06, - "loss": 0.9179, + "learning_rate": 1.7168986335650506e-06, + "loss": 1.0053, "step": 20865 }, { - "epoch": 0.5912890702485194, + "epoch": 0.8164175600594725, "grad_norm": 0.0, - "learning_rate": 7.55437107637829e-06, - "loss": 0.8472, + "learning_rate": 1.7161887086747842e-06, + "loss": 0.8599, "step": 20866 }, { - "epoch": 0.5913174076907818, + "epoch": 0.816456686751702, "grad_norm": 0.0, - "learning_rate": 7.553481167525394e-06, - "loss": 0.9258, + "learning_rate": 1.7154789168121488e-06, + "loss": 0.8823, "step": 20867 }, { - "epoch": 0.5913457451330443, + "epoch": 0.8164958134439314, "grad_norm": 0.0, - "learning_rate": 7.552591279280434e-06, - "loss": 0.8284, + "learning_rate": 1.7147692579885366e-06, + "loss": 0.9841, "step": 20868 }, { - "epoch": 0.5913740825753068, + "epoch": 0.8165349401361609, "grad_norm": 0.0, - "learning_rate": 7.5517014116509094e-06, - "loss": 0.7781, + "learning_rate": 1.714059732215353e-06, + "loss": 0.9947, "step": 20869 }, { - "epoch": 0.5914024200175693, + "epoch": 0.8165740668283903, "grad_norm": 0.0, - "learning_rate": 7.550811564644309e-06, - "loss": 0.9194, + "learning_rate": 1.7133503395039841e-06, + "loss": 1.0936, "step": 20870 }, { - "epoch": 0.5914307574598316, + "epoch": 0.8166131935206198, "grad_norm": 0.0, - "learning_rate": 7.549921738268132e-06, - "loss": 0.9077, + "learning_rate": 1.7126410798658288e-06, + "loss": 0.9233, "step": 20871 }, { - "epoch": 0.5914590949020941, + "epoch": 0.8166523202128492, "grad_norm": 0.0, - "learning_rate": 7.549031932529879e-06, - "loss": 0.8909, + "learning_rate": 1.711931953312267e-06, + "loss": 0.931, "step": 20872 }, { - "epoch": 0.5914874323443566, + "epoch": 0.8166914469050787, "grad_norm": 0.0, - "learning_rate": 7.5481421474370354e-06, - "loss": 0.7972, + "learning_rate": 1.7112229598546982e-06, + "loss": 0.8926, "step": 20873 }, { - "epoch": 0.591515769786619, + "epoch": 0.816730573597308, "grad_norm": 0.0, - "learning_rate": 7.547252382997101e-06, - "loss": 0.9335, + "learning_rate": 1.710514099504499e-06, + "loss": 1.0573, "step": 20874 }, { - "epoch": 0.5915441072288815, + "epoch": 0.8167697002895375, "grad_norm": 0.0, - "learning_rate": 7.546362639217572e-06, - "loss": 0.9583, + "learning_rate": 1.709805372273059e-06, + "loss": 0.9918, "step": 20875 }, { - "epoch": 0.591572444671144, + "epoch": 0.8168088269817669, "grad_norm": 0.0, - "learning_rate": 7.545472916105941e-06, - "loss": 0.8175, + "learning_rate": 1.7090967781717516e-06, + "loss": 0.7952, "step": 20876 }, { - "epoch": 0.5916007821134065, + "epoch": 0.8168479536739964, "grad_norm": 0.0, - "learning_rate": 7.544583213669707e-06, - "loss": 0.8376, + "learning_rate": 1.7083883172119665e-06, + "loss": 1.0665, "step": 20877 }, { - "epoch": 0.5916291195556689, + "epoch": 0.8168870803662258, "grad_norm": 0.0, - "learning_rate": 7.5436935319163565e-06, - "loss": 0.9164, + "learning_rate": 1.707679989405071e-06, + "loss": 0.797, "step": 20878 }, { - "epoch": 0.5916574569979314, + "epoch": 0.8169262070584553, "grad_norm": 0.0, - "learning_rate": 7.5428038708533856e-06, - "loss": 0.8825, + "learning_rate": 1.7069717947624475e-06, + "loss": 0.924, "step": 20879 }, { - "epoch": 0.5916857944401939, + "epoch": 0.8169653337506847, "grad_norm": 0.0, - "learning_rate": 7.541914230488294e-06, - "loss": 0.8863, + "learning_rate": 1.7062637332954634e-06, + "loss": 1.0128, "step": 20880 }, { - "epoch": 0.5917141318824563, + "epoch": 0.8170044604429142, "grad_norm": 0.0, - "learning_rate": 7.541024610828569e-06, - "loss": 0.7763, + "learning_rate": 1.7055558050154896e-06, + "loss": 0.9484, "step": 20881 }, { - "epoch": 0.5917424693247187, + "epoch": 0.8170435871351436, "grad_norm": 0.0, - "learning_rate": 7.5401350118817106e-06, - "loss": 0.7616, + "learning_rate": 1.7048480099338972e-06, + "loss": 0.9229, "step": 20882 }, { - "epoch": 0.5917708067669812, + "epoch": 0.8170827138273731, "grad_norm": 0.0, - "learning_rate": 7.539245433655204e-06, - "loss": 0.8621, + "learning_rate": 1.7041403480620534e-06, + "loss": 0.9448, "step": 20883 }, { - "epoch": 0.5917991442092436, + "epoch": 0.8171218405196025, "grad_norm": 0.0, - "learning_rate": 7.538355876156549e-06, - "loss": 0.9023, + "learning_rate": 1.7034328194113181e-06, + "loss": 0.957, "step": 20884 }, { - "epoch": 0.5918274816515061, + "epoch": 0.817160967211832, "grad_norm": 0.0, - "learning_rate": 7.537466339393239e-06, - "loss": 0.9203, + "learning_rate": 1.7027254239930547e-06, + "loss": 1.0709, "step": 20885 }, { - "epoch": 0.5918558190937686, + "epoch": 0.8172000939040613, "grad_norm": 0.0, - "learning_rate": 7.536576823372761e-06, - "loss": 0.8472, + "learning_rate": 1.7020181618186248e-06, + "loss": 0.9578, "step": 20886 }, { - "epoch": 0.5918841565360311, + "epoch": 0.8172392205962908, "grad_norm": 0.0, - "learning_rate": 7.53568732810261e-06, - "loss": 0.7974, + "learning_rate": 1.7013110328993875e-06, + "loss": 1.0642, "step": 20887 }, { - "epoch": 0.5919124939782935, + "epoch": 0.8172783472885202, "grad_norm": 0.0, - "learning_rate": 7.534797853590283e-06, - "loss": 0.9174, + "learning_rate": 1.7006040372466937e-06, + "loss": 1.0349, "step": 20888 }, { - "epoch": 0.591940831420556, + "epoch": 0.8173174739807497, "grad_norm": 0.0, - "learning_rate": 7.533908399843266e-06, - "loss": 0.9325, + "learning_rate": 1.6998971748719018e-06, + "loss": 0.9469, "step": 20889 }, { - "epoch": 0.5919691688628185, + "epoch": 0.8173566006729791, "grad_norm": 0.0, - "learning_rate": 7.53301896686906e-06, - "loss": 0.7692, + "learning_rate": 1.699190445786355e-06, + "loss": 1.0009, "step": 20890 }, { - "epoch": 0.5919975063050809, + "epoch": 0.8173957273652085, "grad_norm": 0.0, - "learning_rate": 7.532129554675146e-06, - "loss": 0.7506, + "learning_rate": 1.6984838500014145e-06, + "loss": 0.8962, "step": 20891 }, { - "epoch": 0.5920258437473433, + "epoch": 0.817434854057438, "grad_norm": 0.0, - "learning_rate": 7.531240163269021e-06, - "loss": 0.7999, + "learning_rate": 1.6977773875284176e-06, + "loss": 0.9382, "step": 20892 }, { - "epoch": 0.5920541811896058, + "epoch": 0.8174739807496674, "grad_norm": 0.0, - "learning_rate": 7.5303507926581795e-06, - "loss": 0.8855, + "learning_rate": 1.6970710583787153e-06, + "loss": 0.9671, "step": 20893 }, { - "epoch": 0.5920825186318683, + "epoch": 0.8175131074418969, "grad_norm": 0.0, - "learning_rate": 7.529461442850108e-06, - "loss": 0.8923, + "learning_rate": 1.6963648625636454e-06, + "loss": 1.0682, "step": 20894 }, { - "epoch": 0.5921108560741307, + "epoch": 0.8175522341341263, "grad_norm": 0.0, - "learning_rate": 7.528572113852301e-06, - "loss": 0.8331, + "learning_rate": 1.6956588000945507e-06, + "loss": 0.8904, "step": 20895 }, { - "epoch": 0.5921391935163932, + "epoch": 0.8175913608263558, "grad_norm": 0.0, - "learning_rate": 7.527682805672252e-06, - "loss": 0.8489, + "learning_rate": 1.694952870982769e-06, + "loss": 1.084, "step": 20896 }, { - "epoch": 0.5921675309586557, + "epoch": 0.8176304875185851, "grad_norm": 0.0, - "learning_rate": 7.526793518317446e-06, - "loss": 0.8753, + "learning_rate": 1.69424707523964e-06, + "loss": 0.9406, "step": 20897 }, { - "epoch": 0.5921958684009181, + "epoch": 0.8176696142108146, "grad_norm": 0.0, - "learning_rate": 7.52590425179538e-06, - "loss": 0.8374, + "learning_rate": 1.6935414128764939e-06, + "loss": 0.883, "step": 20898 }, { - "epoch": 0.5922242058431806, + "epoch": 0.817708740903044, "grad_norm": 0.0, - "learning_rate": 7.525015006113537e-06, - "loss": 0.8554, + "learning_rate": 1.6928358839046633e-06, + "loss": 0.9745, "step": 20899 }, { - "epoch": 0.5922525432854431, + "epoch": 0.8177478675952735, "grad_norm": 0.0, - "learning_rate": 7.52412578127941e-06, - "loss": 0.7732, + "learning_rate": 1.6921304883354817e-06, + "loss": 0.9794, "step": 20900 }, { - "epoch": 0.5922808807277056, + "epoch": 0.8177869942875029, "grad_norm": 0.0, - "learning_rate": 7.5232365773004945e-06, - "loss": 0.8831, + "learning_rate": 1.691425226180271e-06, + "loss": 0.9252, "step": 20901 }, { - "epoch": 0.592309218169968, + "epoch": 0.8178261209797324, "grad_norm": 0.0, - "learning_rate": 7.522347394184275e-06, - "loss": 0.9818, + "learning_rate": 1.6907200974503601e-06, + "loss": 0.9217, "step": 20902 }, { - "epoch": 0.5923375556122304, + "epoch": 0.8178652476719618, "grad_norm": 0.0, - "learning_rate": 7.521458231938244e-06, - "loss": 0.936, + "learning_rate": 1.6900151021570732e-06, + "loss": 0.9628, "step": 20903 }, { - "epoch": 0.5923658930544929, + "epoch": 0.8179043743641913, "grad_norm": 0.0, - "learning_rate": 7.520569090569894e-06, - "loss": 0.8288, + "learning_rate": 1.6893102403117322e-06, + "loss": 0.8937, "step": 20904 }, { - "epoch": 0.5923942304967553, + "epoch": 0.8179435010564207, "grad_norm": 0.0, - "learning_rate": 7.5196799700867075e-06, - "loss": 0.8355, + "learning_rate": 1.6886055119256529e-06, + "loss": 0.8115, "step": 20905 }, { - "epoch": 0.5924225679390178, + "epoch": 0.8179826277486502, "grad_norm": 0.0, - "learning_rate": 7.518790870496178e-06, - "loss": 0.9052, + "learning_rate": 1.6879009170101568e-06, + "loss": 1.0262, "step": 20906 }, { - "epoch": 0.5924509053812803, + "epoch": 0.8180217544408795, "grad_norm": 0.0, - "learning_rate": 7.517901791805795e-06, - "loss": 0.7949, + "learning_rate": 1.687196455576554e-06, + "loss": 0.9555, "step": 20907 }, { - "epoch": 0.5924792428235427, + "epoch": 0.818060881133109, "grad_norm": 0.0, - "learning_rate": 7.517012734023046e-06, - "loss": 0.7575, + "learning_rate": 1.68649212763616e-06, + "loss": 0.9118, "step": 20908 }, { - "epoch": 0.5925075802658052, + "epoch": 0.8181000078253384, "grad_norm": 0.0, - "learning_rate": 7.516123697155424e-06, - "loss": 0.8926, + "learning_rate": 1.6857879332002847e-06, + "loss": 0.9729, "step": 20909 }, { - "epoch": 0.5925359177080677, + "epoch": 0.8181391345175679, "grad_norm": 0.0, - "learning_rate": 7.515234681210412e-06, - "loss": 0.7611, + "learning_rate": 1.6850838722802386e-06, + "loss": 1.0209, "step": 20910 }, { - "epoch": 0.5925642551503302, + "epoch": 0.8181782612097973, "grad_norm": 0.0, - "learning_rate": 7.514345686195503e-06, - "loss": 0.8729, + "learning_rate": 1.6843799448873244e-06, + "loss": 1.0143, "step": 20911 }, { - "epoch": 0.5925925925925926, + "epoch": 0.8182173879020268, "grad_norm": 0.0, - "learning_rate": 7.513456712118181e-06, - "loss": 0.9888, + "learning_rate": 1.683676151032848e-06, + "loss": 0.9668, "step": 20912 }, { - "epoch": 0.592620930034855, + "epoch": 0.8182565145942562, "grad_norm": 0.0, - "learning_rate": 7.512567758985936e-06, - "loss": 0.861, + "learning_rate": 1.682972490728112e-06, + "loss": 0.9014, "step": 20913 }, { - "epoch": 0.5926492674771175, + "epoch": 0.8182956412864857, "grad_norm": 0.0, - "learning_rate": 7.511678826806258e-06, - "loss": 0.9471, + "learning_rate": 1.682268963984418e-06, + "loss": 0.903, "step": 20914 }, { - "epoch": 0.5926776049193799, + "epoch": 0.8183347679787151, "grad_norm": 0.0, - "learning_rate": 7.510789915586631e-06, - "loss": 0.8539, + "learning_rate": 1.6815655708130597e-06, + "loss": 0.9576, "step": 20915 }, { - "epoch": 0.5927059423616424, + "epoch": 0.8183738946709446, "grad_norm": 0.0, - "learning_rate": 7.509901025334546e-06, - "loss": 0.8508, + "learning_rate": 1.6808623112253375e-06, + "loss": 1.0367, "step": 20916 }, { - "epoch": 0.5927342798039049, + "epoch": 0.818413021363174, "grad_norm": 0.0, - "learning_rate": 7.5090121560574924e-06, - "loss": 0.8153, + "learning_rate": 1.6801591852325371e-06, + "loss": 0.8832, "step": 20917 }, { - "epoch": 0.5927626172461674, + "epoch": 0.8184521480554035, "grad_norm": 0.0, - "learning_rate": 7.5081233077629515e-06, - "loss": 0.8807, + "learning_rate": 1.6794561928459596e-06, + "loss": 0.857, "step": 20918 }, { - "epoch": 0.5927909546884298, + "epoch": 0.8184912747476328, "grad_norm": 0.0, - "learning_rate": 7.507234480458414e-06, - "loss": 0.8423, + "learning_rate": 1.678753334076887e-06, + "loss": 0.9368, "step": 20919 }, { - "epoch": 0.5928192921306923, + "epoch": 0.8185304014398622, "grad_norm": 0.0, - "learning_rate": 7.5063456741513655e-06, - "loss": 0.8549, + "learning_rate": 1.6780506089366112e-06, + "loss": 0.9473, "step": 20920 }, { - "epoch": 0.5928476295729548, + "epoch": 0.8185695281320917, "grad_norm": 0.0, - "learning_rate": 7.505456888849292e-06, - "loss": 0.8251, + "learning_rate": 1.6773480174364088e-06, + "loss": 0.8779, "step": 20921 }, { - "epoch": 0.5928759670152172, + "epoch": 0.8186086548243211, "grad_norm": 0.0, - "learning_rate": 7.504568124559686e-06, - "loss": 0.7721, + "learning_rate": 1.6766455595875742e-06, + "loss": 0.9878, "step": 20922 }, { - "epoch": 0.5929043044574797, + "epoch": 0.8186477815165506, "grad_norm": 0.0, - "learning_rate": 7.503679381290025e-06, - "loss": 0.7095, + "learning_rate": 1.6759432354013794e-06, + "loss": 0.9257, "step": 20923 }, { - "epoch": 0.5929326418997422, + "epoch": 0.81868690820878, "grad_norm": 0.0, - "learning_rate": 7.5027906590478e-06, - "loss": 0.7625, + "learning_rate": 1.6752410448891088e-06, + "loss": 1.0336, "step": 20924 }, { - "epoch": 0.5929609793420046, + "epoch": 0.8187260349010095, "grad_norm": 0.0, - "learning_rate": 7.501901957840501e-06, - "loss": 0.8251, + "learning_rate": 1.6745389880620322e-06, + "loss": 0.9675, "step": 20925 }, { - "epoch": 0.592989316784267, + "epoch": 0.8187651615932389, "grad_norm": 0.0, - "learning_rate": 7.501013277675605e-06, - "loss": 0.8774, + "learning_rate": 1.6738370649314272e-06, + "loss": 0.9669, "step": 20926 }, { - "epoch": 0.5930176542265295, + "epoch": 0.8188042882854684, "grad_norm": 0.0, - "learning_rate": 7.500124618560605e-06, - "loss": 0.8015, + "learning_rate": 1.673135275508566e-06, + "loss": 0.9352, "step": 20927 }, { - "epoch": 0.593045991668792, + "epoch": 0.8188434149776977, "grad_norm": 0.0, - "learning_rate": 7.49923598050298e-06, - "loss": 0.8044, + "learning_rate": 1.6724336198047208e-06, + "loss": 0.9243, "step": 20928 }, { - "epoch": 0.5930743291110544, + "epoch": 0.8188825416699272, "grad_norm": 0.0, - "learning_rate": 7.49834736351022e-06, - "loss": 0.8356, + "learning_rate": 1.6717320978311535e-06, + "loss": 0.9249, "step": 20929 }, { - "epoch": 0.5931026665533169, + "epoch": 0.8189216683621566, "grad_norm": 0.0, - "learning_rate": 7.4974587675898134e-06, - "loss": 0.7896, + "learning_rate": 1.671030709599133e-06, + "loss": 1.002, "step": 20930 }, { - "epoch": 0.5931310039955794, + "epoch": 0.8189607950543861, "grad_norm": 0.0, - "learning_rate": 7.496570192749235e-06, - "loss": 0.9369, + "learning_rate": 1.6703294551199222e-06, + "loss": 0.9885, "step": 20931 }, { - "epoch": 0.5931593414378418, + "epoch": 0.8189999217466155, "grad_norm": 0.0, - "learning_rate": 7.4956816389959775e-06, - "loss": 0.7812, + "learning_rate": 1.669628334404786e-06, + "loss": 0.9045, "step": 20932 }, { - "epoch": 0.5931876788801043, + "epoch": 0.819039048438845, "grad_norm": 0.0, - "learning_rate": 7.494793106337526e-06, - "loss": 0.9196, + "learning_rate": 1.6689273474649781e-06, + "loss": 0.9768, "step": 20933 }, { - "epoch": 0.5932160163223668, + "epoch": 0.8190781751310744, "grad_norm": 0.0, - "learning_rate": 7.493904594781358e-06, - "loss": 0.8386, + "learning_rate": 1.6682264943117566e-06, + "loss": 0.8022, "step": 20934 }, { - "epoch": 0.5932443537646293, + "epoch": 0.8191173018233039, "grad_norm": 0.0, - "learning_rate": 7.493016104334968e-06, - "loss": 0.8703, + "learning_rate": 1.6675257749563767e-06, + "loss": 0.8122, "step": 20935 }, { - "epoch": 0.5932726912068916, + "epoch": 0.8191564285155333, "grad_norm": 0.0, - "learning_rate": 7.49212763500583e-06, - "loss": 0.7663, + "learning_rate": 1.666825189410095e-06, + "loss": 0.8885, "step": 20936 }, { - "epoch": 0.5933010286491541, + "epoch": 0.8191955552077628, "grad_norm": 0.0, - "learning_rate": 7.491239186801431e-06, - "loss": 0.9019, + "learning_rate": 1.6661247376841573e-06, + "loss": 0.8284, "step": 20937 }, { - "epoch": 0.5933293660914166, + "epoch": 0.8192346818999922, "grad_norm": 0.0, - "learning_rate": 7.490350759729259e-06, - "loss": 0.8859, + "learning_rate": 1.665424419789814e-06, + "loss": 0.8787, "step": 20938 }, { - "epoch": 0.593357703533679, + "epoch": 0.8192738085922217, "grad_norm": 0.0, - "learning_rate": 7.489462353796792e-06, - "loss": 0.8123, + "learning_rate": 1.6647242357383087e-06, + "loss": 0.8653, "step": 20939 }, { - "epoch": 0.5933860409759415, + "epoch": 0.819312935284451, "grad_norm": 0.0, - "learning_rate": 7.488573969011521e-06, - "loss": 0.7866, + "learning_rate": 1.6640241855408877e-06, + "loss": 0.9939, "step": 20940 }, { - "epoch": 0.593414378418204, + "epoch": 0.8193520619766805, "grad_norm": 0.0, - "learning_rate": 7.48768560538092e-06, - "loss": 0.8318, + "learning_rate": 1.6633242692087937e-06, + "loss": 0.8849, "step": 20941 }, { - "epoch": 0.5934427158604665, + "epoch": 0.8193911886689099, "grad_norm": 0.0, - "learning_rate": 7.486797262912475e-06, - "loss": 0.7871, + "learning_rate": 1.662624486753267e-06, + "loss": 1.0396, "step": 20942 }, { - "epoch": 0.5934710533027289, + "epoch": 0.8194303153611394, "grad_norm": 0.0, - "learning_rate": 7.485908941613675e-06, - "loss": 0.9534, + "learning_rate": 1.6619248381855413e-06, + "loss": 1.0279, "step": 20943 }, { - "epoch": 0.5934993907449914, + "epoch": 0.8194694420533688, "grad_norm": 0.0, - "learning_rate": 7.485020641491993e-06, - "loss": 0.7682, + "learning_rate": 1.6612253235168541e-06, + "loss": 1.1006, "step": 20944 }, { - "epoch": 0.5935277281872539, + "epoch": 0.8195085687455983, "grad_norm": 0.0, - "learning_rate": 7.484132362554915e-06, - "loss": 0.7838, + "learning_rate": 1.6605259427584398e-06, + "loss": 0.9481, "step": 20945 }, { - "epoch": 0.5935560656295162, + "epoch": 0.8195476954378277, "grad_norm": 0.0, - "learning_rate": 7.483244104809928e-06, - "loss": 0.8253, + "learning_rate": 1.6598266959215315e-06, + "loss": 0.9583, "step": 20946 }, { - "epoch": 0.5935844030717787, + "epoch": 0.8195868221300572, "grad_norm": 0.0, - "learning_rate": 7.482355868264508e-06, - "loss": 0.7892, + "learning_rate": 1.6591275830173525e-06, + "loss": 0.8861, "step": 20947 }, { - "epoch": 0.5936127405140412, + "epoch": 0.8196259488222866, "grad_norm": 0.0, - "learning_rate": 7.4814676529261435e-06, - "loss": 0.7322, + "learning_rate": 1.6584286040571329e-06, + "loss": 0.9829, "step": 20948 }, { - "epoch": 0.5936410779563036, + "epoch": 0.819665075514516, "grad_norm": 0.0, - "learning_rate": 7.4805794588023086e-06, - "loss": 0.86, + "learning_rate": 1.6577297590521002e-06, + "loss": 0.9974, "step": 20949 }, { - "epoch": 0.5936694153985661, + "epoch": 0.8197042022067454, "grad_norm": 0.0, - "learning_rate": 7.479691285900487e-06, - "loss": 0.7937, + "learning_rate": 1.6570310480134722e-06, + "loss": 0.8996, "step": 20950 }, { - "epoch": 0.5936977528408286, + "epoch": 0.8197433288989748, "grad_norm": 0.0, - "learning_rate": 7.4788031342281644e-06, - "loss": 0.7714, + "learning_rate": 1.6563324709524697e-06, + "loss": 0.9294, "step": 20951 }, { - "epoch": 0.5937260902830911, + "epoch": 0.8197824555912043, "grad_norm": 0.0, - "learning_rate": 7.477915003792817e-06, - "loss": 0.8015, + "learning_rate": 1.655634027880314e-06, + "loss": 0.9466, "step": 20952 }, { - "epoch": 0.5937544277253535, + "epoch": 0.8198215822834337, "grad_norm": 0.0, - "learning_rate": 7.477026894601929e-06, - "loss": 0.7916, + "learning_rate": 1.6549357188082215e-06, + "loss": 1.0024, "step": 20953 }, { - "epoch": 0.593782765167616, + "epoch": 0.8198607089756632, "grad_norm": 0.0, - "learning_rate": 7.476138806662983e-06, - "loss": 0.8138, + "learning_rate": 1.6542375437474023e-06, + "loss": 0.9146, "step": 20954 }, { - "epoch": 0.5938111026098785, + "epoch": 0.8198998356678926, "grad_norm": 0.0, - "learning_rate": 7.475250739983454e-06, - "loss": 0.8341, + "learning_rate": 1.6535395027090717e-06, + "loss": 0.9576, "step": 20955 }, { - "epoch": 0.5938394400521408, + "epoch": 0.8199389623601221, "grad_norm": 0.0, - "learning_rate": 7.4743626945708294e-06, - "loss": 0.8561, + "learning_rate": 1.6528415957044363e-06, + "loss": 0.9042, "step": 20956 }, { - "epoch": 0.5938677774944033, + "epoch": 0.8199780890523515, "grad_norm": 0.0, - "learning_rate": 7.473474670432581e-06, - "loss": 0.878, + "learning_rate": 1.652143822744705e-06, + "loss": 0.8889, "step": 20957 }, { - "epoch": 0.5938961149366658, + "epoch": 0.820017215744581, "grad_norm": 0.0, - "learning_rate": 7.472586667576194e-06, - "loss": 0.8846, + "learning_rate": 1.6514461838410846e-06, + "loss": 1.0105, "step": 20958 }, { - "epoch": 0.5939244523789283, + "epoch": 0.8200563424368104, "grad_norm": 0.0, - "learning_rate": 7.47169868600915e-06, - "loss": 0.96, + "learning_rate": 1.650748679004779e-06, + "loss": 0.9874, "step": 20959 }, { - "epoch": 0.5939527898211907, + "epoch": 0.8200954691290399, "grad_norm": 0.0, - "learning_rate": 7.4708107257389265e-06, - "loss": 0.8298, + "learning_rate": 1.650051308246985e-06, + "loss": 1.1024, "step": 20960 }, { - "epoch": 0.5939811272634532, + "epoch": 0.8201345958212692, "grad_norm": 0.0, - "learning_rate": 7.469922786773e-06, - "loss": 0.876, + "learning_rate": 1.6493540715789036e-06, + "loss": 0.9767, "step": 20961 }, { - "epoch": 0.5940094647057157, + "epoch": 0.8201737225134987, "grad_norm": 0.0, - "learning_rate": 7.469034869118861e-06, - "loss": 0.9106, + "learning_rate": 1.6486569690117315e-06, + "loss": 0.9698, "step": 20962 }, { - "epoch": 0.5940378021479781, + "epoch": 0.8202128492057281, "grad_norm": 0.0, - "learning_rate": 7.468146972783976e-06, - "loss": 0.855, + "learning_rate": 1.6479600005566666e-06, + "loss": 0.9041, "step": 20963 }, { - "epoch": 0.5940661395902406, + "epoch": 0.8202519758979576, "grad_norm": 0.0, - "learning_rate": 7.4672590977758295e-06, - "loss": 0.8038, + "learning_rate": 1.647263166224896e-06, + "loss": 1.0401, "step": 20964 }, { - "epoch": 0.5940944770325031, + "epoch": 0.820291102590187, "grad_norm": 0.0, - "learning_rate": 7.4663712441019e-06, - "loss": 0.8936, + "learning_rate": 1.646566466027615e-06, + "loss": 0.8868, "step": 20965 }, { - "epoch": 0.5941228144747656, + "epoch": 0.8203302292824165, "grad_norm": 0.0, - "learning_rate": 7.465483411769665e-06, - "loss": 0.7889, + "learning_rate": 1.6458698999760036e-06, + "loss": 1.0162, "step": 20966 }, { - "epoch": 0.5941511519170279, + "epoch": 0.8203693559746459, "grad_norm": 0.0, - "learning_rate": 7.4645956007866105e-06, - "loss": 0.9669, + "learning_rate": 1.645173468081258e-06, + "loss": 0.9412, "step": 20967 }, { - "epoch": 0.5941794893592904, + "epoch": 0.8204084826668754, "grad_norm": 0.0, - "learning_rate": 7.4637078111602034e-06, - "loss": 0.9264, + "learning_rate": 1.644477170354556e-06, + "loss": 0.899, "step": 20968 }, { - "epoch": 0.5942078268015529, + "epoch": 0.8204476093591048, "grad_norm": 0.0, - "learning_rate": 7.462820042897932e-06, - "loss": 0.7775, + "learning_rate": 1.6437810068070826e-06, + "loss": 0.9782, "step": 20969 }, { - "epoch": 0.5942361642438153, + "epoch": 0.8204867360513343, "grad_norm": 0.0, - "learning_rate": 7.461932296007264e-06, - "loss": 0.8229, + "learning_rate": 1.6430849774500102e-06, + "loss": 1.0875, "step": 20970 }, { - "epoch": 0.5942645016860778, + "epoch": 0.8205258627435636, "grad_norm": 0.0, - "learning_rate": 7.461044570495684e-06, - "loss": 0.9145, + "learning_rate": 1.6423890822945266e-06, + "loss": 0.9967, "step": 20971 }, { - "epoch": 0.5942928391283403, + "epoch": 0.8205649894357931, "grad_norm": 0.0, - "learning_rate": 7.4601568663706694e-06, - "loss": 0.9325, + "learning_rate": 1.6416933213517983e-06, + "loss": 0.9409, "step": 20972 }, { - "epoch": 0.5943211765706027, + "epoch": 0.8206041161280225, "grad_norm": 0.0, - "learning_rate": 7.459269183639695e-06, - "loss": 0.6732, + "learning_rate": 1.6409976946330042e-06, + "loss": 0.9551, "step": 20973 }, { - "epoch": 0.5943495140128652, + "epoch": 0.820643242820252, "grad_norm": 0.0, - "learning_rate": 7.4583815223102395e-06, - "loss": 0.7182, + "learning_rate": 1.6403022021493109e-06, + "loss": 0.9934, "step": 20974 }, { - "epoch": 0.5943778514551277, + "epoch": 0.8206823695124814, "grad_norm": 0.0, - "learning_rate": 7.457493882389786e-06, - "loss": 0.8335, + "learning_rate": 1.6396068439118895e-06, + "loss": 0.9563, "step": 20975 }, { - "epoch": 0.5944061888973902, + "epoch": 0.8207214962047108, "grad_norm": 0.0, - "learning_rate": 7.456606263885799e-06, - "loss": 0.7996, + "learning_rate": 1.6389116199319054e-06, + "loss": 1.0085, "step": 20976 }, { - "epoch": 0.5944345263396525, + "epoch": 0.8207606228969403, "grad_norm": 0.0, - "learning_rate": 7.455718666805766e-06, - "loss": 0.8472, + "learning_rate": 1.6382165302205255e-06, + "loss": 0.8825, "step": 20977 }, { - "epoch": 0.594462863781915, + "epoch": 0.8207997495891697, "grad_norm": 0.0, - "learning_rate": 7.454831091157156e-06, - "loss": 0.882, + "learning_rate": 1.6375215747889084e-06, + "loss": 0.7986, "step": 20978 }, { - "epoch": 0.5944912012241775, + "epoch": 0.8208388762813992, "grad_norm": 0.0, - "learning_rate": 7.45394353694745e-06, - "loss": 0.8934, + "learning_rate": 1.6368267536482162e-06, + "loss": 0.882, "step": 20979 }, { - "epoch": 0.5945195386664399, + "epoch": 0.8208780029736286, "grad_norm": 0.0, - "learning_rate": 7.453056004184127e-06, - "loss": 0.8558, + "learning_rate": 1.636132066809606e-06, + "loss": 0.971, "step": 20980 }, { - "epoch": 0.5945478761087024, + "epoch": 0.8209171296658581, "grad_norm": 0.0, - "learning_rate": 7.452168492874654e-06, - "loss": 0.8139, + "learning_rate": 1.6354375142842371e-06, + "loss": 0.9017, "step": 20981 }, { - "epoch": 0.5945762135509649, + "epoch": 0.8209562563580874, "grad_norm": 0.0, - "learning_rate": 7.451281003026514e-06, - "loss": 0.8821, + "learning_rate": 1.634743096083259e-06, + "loss": 0.8992, "step": 20982 }, { - "epoch": 0.5946045509932274, + "epoch": 0.8209953830503169, "grad_norm": 0.0, - "learning_rate": 7.450393534647183e-06, - "loss": 0.9099, + "learning_rate": 1.6340488122178255e-06, + "loss": 1.0906, "step": 20983 }, { - "epoch": 0.5946328884354898, + "epoch": 0.8210345097425463, "grad_norm": 0.0, - "learning_rate": 7.44950608774413e-06, - "loss": 0.9648, + "learning_rate": 1.6333546626990804e-06, + "loss": 0.864, "step": 20984 }, { - "epoch": 0.5946612258777523, + "epoch": 0.8210736364347758, "grad_norm": 0.0, - "learning_rate": 7.448618662324836e-06, - "loss": 0.8512, + "learning_rate": 1.63266064753818e-06, + "loss": 1.0001, "step": 20985 }, { - "epoch": 0.5946895633200148, + "epoch": 0.8211127631270052, "grad_norm": 0.0, - "learning_rate": 7.4477312583967735e-06, - "loss": 0.837, + "learning_rate": 1.6319667667462635e-06, + "loss": 0.9969, "step": 20986 }, { - "epoch": 0.5947179007622772, + "epoch": 0.8211518898192347, "grad_norm": 0.0, - "learning_rate": 7.446843875967418e-06, - "loss": 0.8615, + "learning_rate": 1.6312730203344763e-06, + "loss": 0.9949, "step": 20987 }, { - "epoch": 0.5947462382045396, + "epoch": 0.8211910165114641, "grad_norm": 0.0, - "learning_rate": 7.4459565150442484e-06, - "loss": 0.7519, + "learning_rate": 1.6305794083139559e-06, + "loss": 0.9865, "step": 20988 }, { - "epoch": 0.5947745756468021, + "epoch": 0.8212301432036936, "grad_norm": 0.0, - "learning_rate": 7.4450691756347315e-06, - "loss": 0.8659, + "learning_rate": 1.6298859306958425e-06, + "loss": 0.875, "step": 20989 }, { - "epoch": 0.5948029130890646, + "epoch": 0.821269269895923, "grad_norm": 0.0, - "learning_rate": 7.444181857746344e-06, - "loss": 0.9791, + "learning_rate": 1.6291925874912729e-06, + "loss": 1.0393, "step": 20990 }, { - "epoch": 0.594831250531327, + "epoch": 0.8213083965881525, "grad_norm": 0.0, - "learning_rate": 7.443294561386567e-06, - "loss": 0.952, + "learning_rate": 1.6284993787113834e-06, + "loss": 0.9504, "step": 20991 }, { - "epoch": 0.5948595879735895, + "epoch": 0.8213475232803819, "grad_norm": 0.0, - "learning_rate": 7.442407286562865e-06, - "loss": 0.8992, + "learning_rate": 1.627806304367301e-06, + "loss": 0.9915, "step": 20992 }, { - "epoch": 0.594887925415852, + "epoch": 0.8213866499726113, "grad_norm": 0.0, - "learning_rate": 7.441520033282721e-06, - "loss": 0.8627, + "learning_rate": 1.6271133644701586e-06, + "loss": 0.9201, "step": 20993 }, { - "epoch": 0.5949162628581144, + "epoch": 0.8214257766648407, "grad_norm": 0.0, - "learning_rate": 7.4406328015536e-06, - "loss": 0.8622, + "learning_rate": 1.6264205590310867e-06, + "loss": 0.8876, "step": 20994 }, { - "epoch": 0.5949446003003769, + "epoch": 0.8214649033570702, "grad_norm": 0.0, - "learning_rate": 7.439745591382978e-06, - "loss": 0.995, + "learning_rate": 1.6257278880612048e-06, + "loss": 1.0756, "step": 20995 }, { - "epoch": 0.5949729377426394, + "epoch": 0.8215040300492996, "grad_norm": 0.0, - "learning_rate": 7.438858402778336e-06, - "loss": 0.9557, + "learning_rate": 1.62503535157164e-06, + "loss": 1.0015, "step": 20996 }, { - "epoch": 0.5950012751849018, + "epoch": 0.8215431567415291, "grad_norm": 0.0, - "learning_rate": 7.437971235747135e-06, - "loss": 0.9544, + "learning_rate": 1.624342949573513e-06, + "loss": 0.8756, "step": 20997 }, { - "epoch": 0.5950296126271643, + "epoch": 0.8215822834337585, "grad_norm": 0.0, - "learning_rate": 7.437084090296856e-06, - "loss": 0.8012, + "learning_rate": 1.623650682077945e-06, + "loss": 1.0731, "step": 20998 }, { - "epoch": 0.5950579500694267, + "epoch": 0.821621410125988, "grad_norm": 0.0, - "learning_rate": 7.436196966434968e-06, - "loss": 0.7935, + "learning_rate": 1.6229585490960487e-06, + "loss": 0.9005, "step": 20999 }, { - "epoch": 0.5950862875116892, + "epoch": 0.8216605368182174, "grad_norm": 0.0, - "learning_rate": 7.435309864168945e-06, - "loss": 0.9178, + "learning_rate": 1.6222665506389435e-06, + "loss": 0.9199, "step": 21000 }, { - "epoch": 0.5951146249539516, + "epoch": 0.8216996635104469, "grad_norm": 0.0, - "learning_rate": 7.434422783506264e-06, - "loss": 0.9324, + "learning_rate": 1.6215746867177352e-06, + "loss": 1.0027, "step": 21001 }, { - "epoch": 0.5951429623962141, + "epoch": 0.8217387902026763, "grad_norm": 0.0, - "learning_rate": 7.433535724454386e-06, - "loss": 0.9334, + "learning_rate": 1.6208829573435436e-06, + "loss": 1.0567, "step": 21002 }, { - "epoch": 0.5951712998384766, + "epoch": 0.8217779168949058, "grad_norm": 0.0, - "learning_rate": 7.432648687020791e-06, - "loss": 0.8958, + "learning_rate": 1.620191362527469e-06, + "loss": 0.895, "step": 21003 }, { - "epoch": 0.595199637280739, + "epoch": 0.8218170435871351, "grad_norm": 0.0, - "learning_rate": 7.431761671212952e-06, - "loss": 0.8418, + "learning_rate": 1.6194999022806235e-06, + "loss": 0.9302, "step": 21004 }, { - "epoch": 0.5952279747230015, + "epoch": 0.8218561702793645, "grad_norm": 0.0, - "learning_rate": 7.430874677038336e-06, - "loss": 0.9499, + "learning_rate": 1.6188085766141049e-06, + "loss": 0.9497, "step": 21005 }, { - "epoch": 0.595256312165264, + "epoch": 0.821895296971594, "grad_norm": 0.0, - "learning_rate": 7.42998770450442e-06, - "loss": 0.8568, + "learning_rate": 1.6181173855390186e-06, + "loss": 0.8109, "step": 21006 }, { - "epoch": 0.5952846496075265, + "epoch": 0.8219344236638234, "grad_norm": 0.0, - "learning_rate": 7.429100753618668e-06, - "loss": 0.8832, + "learning_rate": 1.6174263290664648e-06, + "loss": 1.0009, "step": 21007 }, { - "epoch": 0.5953129870497889, + "epoch": 0.8219735503560529, "grad_norm": 0.0, - "learning_rate": 7.428213824388556e-06, - "loss": 0.8765, + "learning_rate": 1.6167354072075425e-06, + "loss": 0.9675, "step": 21008 }, { - "epoch": 0.5953413244920513, + "epoch": 0.8220126770482823, "grad_norm": 0.0, - "learning_rate": 7.427326916821557e-06, - "loss": 0.8841, + "learning_rate": 1.6160446199733415e-06, + "loss": 1.0406, "step": 21009 }, { - "epoch": 0.5953696619343138, + "epoch": 0.8220518037405118, "grad_norm": 0.0, - "learning_rate": 7.426440030925135e-06, - "loss": 0.8543, + "learning_rate": 1.6153539673749586e-06, + "loss": 0.8864, "step": 21010 }, { - "epoch": 0.5953979993765762, + "epoch": 0.8220909304327412, "grad_norm": 0.0, - "learning_rate": 7.4255531667067645e-06, - "loss": 0.7916, + "learning_rate": 1.6146634494234848e-06, + "loss": 0.9519, "step": 21011 }, { - "epoch": 0.5954263368188387, + "epoch": 0.8221300571249707, "grad_norm": 0.0, - "learning_rate": 7.424666324173917e-06, - "loss": 0.8011, + "learning_rate": 1.6139730661300113e-06, + "loss": 0.9202, "step": 21012 }, { - "epoch": 0.5954546742611012, + "epoch": 0.8221691838172, "grad_norm": 0.0, - "learning_rate": 7.423779503334061e-06, - "loss": 0.9041, + "learning_rate": 1.6132828175056193e-06, + "loss": 0.9186, "step": 21013 }, { - "epoch": 0.5954830117033637, + "epoch": 0.8222083105094296, "grad_norm": 0.0, - "learning_rate": 7.422892704194669e-06, - "loss": 0.8391, + "learning_rate": 1.6125927035613975e-06, + "loss": 1.1562, "step": 21014 }, { - "epoch": 0.5955113491456261, + "epoch": 0.8222474372016589, "grad_norm": 0.0, - "learning_rate": 7.422005926763205e-06, - "loss": 0.8185, + "learning_rate": 1.6119027243084228e-06, + "loss": 1.0962, "step": 21015 }, { - "epoch": 0.5955396865878886, + "epoch": 0.8222865638938884, "grad_norm": 0.0, - "learning_rate": 7.421119171047144e-06, - "loss": 0.9247, + "learning_rate": 1.6112128797577853e-06, + "loss": 0.9677, "step": 21016 }, { - "epoch": 0.5955680240301511, + "epoch": 0.8223256905861178, "grad_norm": 0.0, - "learning_rate": 7.420232437053954e-06, - "loss": 0.9392, + "learning_rate": 1.6105231699205537e-06, + "loss": 0.8908, "step": 21017 }, { - "epoch": 0.5955963614724135, + "epoch": 0.8223648172783473, "grad_norm": 0.0, - "learning_rate": 7.419345724791103e-06, - "loss": 0.9253, + "learning_rate": 1.6098335948078104e-06, + "loss": 0.8915, "step": 21018 }, { - "epoch": 0.595624698914676, + "epoch": 0.8224039439705767, "grad_norm": 0.0, - "learning_rate": 7.418459034266061e-06, - "loss": 0.8521, + "learning_rate": 1.6091441544306208e-06, + "loss": 0.9755, "step": 21019 }, { - "epoch": 0.5956530363569384, + "epoch": 0.8224430706628062, "grad_norm": 0.0, - "learning_rate": 7.4175723654863015e-06, - "loss": 0.8316, + "learning_rate": 1.6084548488000663e-06, + "loss": 0.9786, "step": 21020 }, { - "epoch": 0.5956813737992008, + "epoch": 0.8224821973550356, "grad_norm": 0.0, - "learning_rate": 7.416685718459285e-06, - "loss": 0.8973, + "learning_rate": 1.60776567792721e-06, + "loss": 0.8295, "step": 21021 }, { - "epoch": 0.5957097112414633, + "epoch": 0.8225213240472651, "grad_norm": 0.0, - "learning_rate": 7.4157990931924884e-06, - "loss": 0.8822, + "learning_rate": 1.6070766418231222e-06, + "loss": 0.8835, "step": 21022 }, { - "epoch": 0.5957380486837258, + "epoch": 0.8225604507394945, "grad_norm": 0.0, - "learning_rate": 7.414912489693371e-06, - "loss": 0.7479, + "learning_rate": 1.606387740498865e-06, + "loss": 0.9724, "step": 21023 }, { - "epoch": 0.5957663861259883, + "epoch": 0.822599577431724, "grad_norm": 0.0, - "learning_rate": 7.414025907969404e-06, - "loss": 0.7661, + "learning_rate": 1.6056989739655027e-06, + "loss": 0.8725, "step": 21024 }, { - "epoch": 0.5957947235682507, + "epoch": 0.8226387041239533, "grad_norm": 0.0, - "learning_rate": 7.4131393480280624e-06, - "loss": 0.9715, + "learning_rate": 1.6050103422340967e-06, + "loss": 0.9216, "step": 21025 }, { - "epoch": 0.5958230610105132, + "epoch": 0.8226778308161828, "grad_norm": 0.0, - "learning_rate": 7.412252809876804e-06, - "loss": 0.8095, + "learning_rate": 1.6043218453157072e-06, + "loss": 0.8402, "step": 21026 }, { - "epoch": 0.5958513984527757, + "epoch": 0.8227169575084122, "grad_norm": 0.0, - "learning_rate": 7.411366293523107e-06, - "loss": 0.8062, + "learning_rate": 1.6036334832213863e-06, + "loss": 0.9219, "step": 21027 }, { - "epoch": 0.5958797358950381, + "epoch": 0.8227560842006417, "grad_norm": 0.0, - "learning_rate": 7.410479798974428e-06, - "loss": 0.8663, + "learning_rate": 1.6029452559621895e-06, + "loss": 0.9616, "step": 21028 }, { - "epoch": 0.5959080733373006, + "epoch": 0.8227952108928711, "grad_norm": 0.0, - "learning_rate": 7.409593326238239e-06, - "loss": 0.8574, + "learning_rate": 1.6022571635491713e-06, + "loss": 0.8076, "step": 21029 }, { - "epoch": 0.595936410779563, + "epoch": 0.8228343375851006, "grad_norm": 0.0, - "learning_rate": 7.408706875322009e-06, - "loss": 0.9246, + "learning_rate": 1.6015692059933808e-06, + "loss": 0.9991, "step": 21030 }, { - "epoch": 0.5959647482218255, + "epoch": 0.82287346427733, "grad_norm": 0.0, - "learning_rate": 7.407820446233203e-06, - "loss": 0.9066, + "learning_rate": 1.6008813833058635e-06, + "loss": 0.9393, "step": 21031 }, { - "epoch": 0.5959930856640879, + "epoch": 0.8229125909695595, "grad_norm": 0.0, - "learning_rate": 7.406934038979286e-06, - "loss": 0.9341, + "learning_rate": 1.6001936954976694e-06, + "loss": 0.888, "step": 21032 }, { - "epoch": 0.5960214231063504, + "epoch": 0.8229517176617889, "grad_norm": 0.0, - "learning_rate": 7.406047653567731e-06, - "loss": 0.8574, + "learning_rate": 1.5995061425798363e-06, + "loss": 0.9139, "step": 21033 }, { - "epoch": 0.5960497605486129, + "epoch": 0.8229908443540183, "grad_norm": 0.0, - "learning_rate": 7.405161290005998e-06, - "loss": 0.9027, + "learning_rate": 1.598818724563408e-06, + "loss": 0.9789, "step": 21034 }, { - "epoch": 0.5960780979908753, + "epoch": 0.8230299710462478, "grad_norm": 0.0, - "learning_rate": 7.404274948301558e-06, - "loss": 0.8296, + "learning_rate": 1.5981314414594229e-06, + "loss": 0.9558, "step": 21035 }, { - "epoch": 0.5961064354331378, + "epoch": 0.8230690977384771, "grad_norm": 0.0, - "learning_rate": 7.4033886284618675e-06, - "loss": 0.889, + "learning_rate": 1.5974442932789224e-06, + "loss": 0.9269, "step": 21036 }, { - "epoch": 0.5961347728754003, + "epoch": 0.8231082244307066, "grad_norm": 0.0, - "learning_rate": 7.402502330494401e-06, - "loss": 0.9236, + "learning_rate": 1.5967572800329345e-06, + "loss": 0.9717, "step": 21037 }, { - "epoch": 0.5961631103176628, + "epoch": 0.823147351122936, "grad_norm": 0.0, - "learning_rate": 7.401616054406624e-06, - "loss": 1.0203, + "learning_rate": 1.5960704017324946e-06, + "loss": 1.0432, "step": 21038 }, { - "epoch": 0.5961914477599252, + "epoch": 0.8231864778151655, "grad_norm": 0.0, - "learning_rate": 7.4007298002059965e-06, - "loss": 0.8587, + "learning_rate": 1.595383658388636e-06, + "loss": 0.9667, "step": 21039 }, { - "epoch": 0.5962197852021877, + "epoch": 0.8232256045073949, "grad_norm": 0.0, - "learning_rate": 7.399843567899988e-06, - "loss": 0.836, + "learning_rate": 1.5946970500123826e-06, + "loss": 0.9299, "step": 21040 }, { - "epoch": 0.5962481226444502, + "epoch": 0.8232647311996244, "grad_norm": 0.0, - "learning_rate": 7.398957357496067e-06, - "loss": 0.8788, + "learning_rate": 1.5940105766147618e-06, + "loss": 1.0065, "step": 21041 }, { - "epoch": 0.5962764600867125, + "epoch": 0.8233038578918538, "grad_norm": 0.0, - "learning_rate": 7.39807116900169e-06, - "loss": 0.9363, + "learning_rate": 1.593324238206797e-06, + "loss": 0.8101, "step": 21042 }, { - "epoch": 0.596304797528975, + "epoch": 0.8233429845840833, "grad_norm": 0.0, - "learning_rate": 7.397185002424328e-06, - "loss": 0.8589, + "learning_rate": 1.592638034799514e-06, + "loss": 1.0509, "step": 21043 }, { - "epoch": 0.5963331349712375, + "epoch": 0.8233821112763127, "grad_norm": 0.0, - "learning_rate": 7.39629885777144e-06, - "loss": 0.855, + "learning_rate": 1.5919519664039263e-06, + "loss": 0.9397, "step": 21044 }, { - "epoch": 0.5963614724134999, + "epoch": 0.8234212379685422, "grad_norm": 0.0, - "learning_rate": 7.395412735050493e-06, - "loss": 0.9249, + "learning_rate": 1.591266033031057e-06, + "loss": 1.0019, "step": 21045 }, { - "epoch": 0.5963898098557624, + "epoch": 0.8234603646607715, "grad_norm": 0.0, - "learning_rate": 7.394526634268958e-06, - "loss": 0.7954, + "learning_rate": 1.590580234691913e-06, + "loss": 1.021, "step": 21046 }, { - "epoch": 0.5964181472980249, + "epoch": 0.823499491353001, "grad_norm": 0.0, - "learning_rate": 7.393640555434287e-06, - "loss": 0.9251, + "learning_rate": 1.5898945713975178e-06, + "loss": 0.9308, "step": 21047 }, { - "epoch": 0.5964464847402874, + "epoch": 0.8235386180452304, "grad_norm": 0.0, - "learning_rate": 7.392754498553952e-06, - "loss": 0.857, + "learning_rate": 1.589209043158876e-06, + "loss": 1.0104, "step": 21048 }, { - "epoch": 0.5964748221825498, + "epoch": 0.8235777447374599, "grad_norm": 0.0, - "learning_rate": 7.391868463635414e-06, - "loss": 0.8441, + "learning_rate": 1.5885236499869995e-06, + "loss": 0.8346, "step": 21049 }, { - "epoch": 0.5965031596248123, + "epoch": 0.8236168714296893, "grad_norm": 0.0, - "learning_rate": 7.390982450686134e-06, - "loss": 0.8852, + "learning_rate": 1.5878383918928874e-06, + "loss": 0.8968, "step": 21050 }, { - "epoch": 0.5965314970670748, + "epoch": 0.8236559981219188, "grad_norm": 0.0, - "learning_rate": 7.390096459713583e-06, - "loss": 0.877, + "learning_rate": 1.5871532688875547e-06, + "loss": 0.9682, "step": 21051 }, { - "epoch": 0.5965598345093371, + "epoch": 0.8236951248141482, "grad_norm": 0.0, - "learning_rate": 7.3892104907252136e-06, - "loss": 0.8984, + "learning_rate": 1.5864682809819965e-06, + "loss": 0.9514, "step": 21052 }, { - "epoch": 0.5965881719515996, + "epoch": 0.8237342515063777, "grad_norm": 0.0, - "learning_rate": 7.388324543728493e-06, - "loss": 0.7563, + "learning_rate": 1.5857834281872175e-06, + "loss": 0.8586, "step": 21053 }, { - "epoch": 0.5966165093938621, + "epoch": 0.8237733781986071, "grad_norm": 0.0, - "learning_rate": 7.387438618730891e-06, - "loss": 0.8265, + "learning_rate": 1.5850987105142113e-06, + "loss": 1.0328, "step": 21054 }, { - "epoch": 0.5966448468361246, + "epoch": 0.8238125048908366, "grad_norm": 0.0, - "learning_rate": 7.386552715739857e-06, - "loss": 0.7781, + "learning_rate": 1.5844141279739745e-06, + "loss": 0.9349, "step": 21055 }, { - "epoch": 0.596673184278387, + "epoch": 0.823851631583066, "grad_norm": 0.0, - "learning_rate": 7.385666834762863e-06, - "loss": 0.8206, + "learning_rate": 1.5837296805775016e-06, + "loss": 1.0341, "step": 21056 }, { - "epoch": 0.5967015217206495, + "epoch": 0.8238907582752955, "grad_norm": 0.0, - "learning_rate": 7.384780975807367e-06, - "loss": 0.9418, + "learning_rate": 1.583045368335786e-06, + "loss": 0.9252, "step": 21057 }, { - "epoch": 0.596729859162912, + "epoch": 0.8239298849675248, "grad_norm": 0.0, - "learning_rate": 7.383895138880833e-06, - "loss": 0.8154, + "learning_rate": 1.5823611912598126e-06, + "loss": 1.0502, "step": 21058 }, { - "epoch": 0.5967581966051744, + "epoch": 0.8239690116597543, "grad_norm": 0.0, - "learning_rate": 7.383009323990723e-06, - "loss": 0.8949, + "learning_rate": 1.581677149360571e-06, + "loss": 0.9237, "step": 21059 }, { - "epoch": 0.5967865340474369, + "epoch": 0.8240081383519837, "grad_norm": 0.0, - "learning_rate": 7.382123531144494e-06, - "loss": 0.9004, + "learning_rate": 1.580993242649045e-06, + "loss": 0.9696, "step": 21060 }, { - "epoch": 0.5968148714896994, + "epoch": 0.8240472650442132, "grad_norm": 0.0, - "learning_rate": 7.381237760349611e-06, - "loss": 0.7742, + "learning_rate": 1.58030947113622e-06, + "loss": 0.8151, "step": 21061 }, { - "epoch": 0.5968432089319619, + "epoch": 0.8240863917364426, "grad_norm": 0.0, - "learning_rate": 7.380352011613537e-06, - "loss": 0.8932, + "learning_rate": 1.5796258348330728e-06, + "loss": 1.022, "step": 21062 }, { - "epoch": 0.5968715463742242, + "epoch": 0.824125518428672, "grad_norm": 0.0, - "learning_rate": 7.379466284943728e-06, - "loss": 0.8681, + "learning_rate": 1.5789423337505849e-06, + "loss": 0.9439, "step": 21063 }, { - "epoch": 0.5968998838164867, + "epoch": 0.8241646451209015, "grad_norm": 0.0, - "learning_rate": 7.378580580347652e-06, - "loss": 0.8073, + "learning_rate": 1.5782589678997274e-06, + "loss": 0.926, "step": 21064 }, { - "epoch": 0.5969282212587492, + "epoch": 0.8242037718131309, "grad_norm": 0.0, - "learning_rate": 7.377694897832761e-06, - "loss": 0.8369, + "learning_rate": 1.5775757372914824e-06, + "loss": 1.0537, "step": 21065 }, { - "epoch": 0.5969565587010116, + "epoch": 0.8242428985053604, "grad_norm": 0.0, - "learning_rate": 7.3768092374065205e-06, - "loss": 0.8833, + "learning_rate": 1.5768926419368135e-06, + "loss": 0.8632, "step": 21066 }, { - "epoch": 0.5969848961432741, + "epoch": 0.8242820251975898, "grad_norm": 0.0, - "learning_rate": 7.375923599076394e-06, - "loss": 0.9163, + "learning_rate": 1.5762096818466976e-06, + "loss": 0.9333, "step": 21067 }, { - "epoch": 0.5970132335855366, + "epoch": 0.8243211518898192, "grad_norm": 0.0, - "learning_rate": 7.375037982849833e-06, - "loss": 0.7639, + "learning_rate": 1.5755268570320936e-06, + "loss": 0.9572, "step": 21068 }, { - "epoch": 0.597041571027799, + "epoch": 0.8243602785820486, "grad_norm": 0.0, - "learning_rate": 7.3741523887343015e-06, - "loss": 0.8048, + "learning_rate": 1.574844167503976e-06, + "loss": 0.962, "step": 21069 }, { - "epoch": 0.5970699084700615, + "epoch": 0.8243994052742781, "grad_norm": 0.0, - "learning_rate": 7.373266816737261e-06, - "loss": 0.9137, + "learning_rate": 1.5741616132733029e-06, + "loss": 0.9774, "step": 21070 }, { - "epoch": 0.597098245912324, + "epoch": 0.8244385319665075, "grad_norm": 0.0, - "learning_rate": 7.372381266866169e-06, - "loss": 0.7991, + "learning_rate": 1.5734791943510375e-06, + "loss": 0.9672, "step": 21071 }, { - "epoch": 0.5971265833545865, + "epoch": 0.824477658658737, "grad_norm": 0.0, - "learning_rate": 7.371495739128488e-06, - "loss": 0.7918, + "learning_rate": 1.5727969107481345e-06, + "loss": 0.9546, "step": 21072 }, { - "epoch": 0.5971549207968488, + "epoch": 0.8245167853509664, "grad_norm": 0.0, - "learning_rate": 7.370610233531671e-06, - "loss": 0.8611, + "learning_rate": 1.5721147624755539e-06, + "loss": 0.8964, "step": 21073 }, { - "epoch": 0.5971832582391113, + "epoch": 0.8245559120431959, "grad_norm": 0.0, - "learning_rate": 7.3697247500831805e-06, - "loss": 0.8279, + "learning_rate": 1.5714327495442483e-06, + "loss": 0.9513, "step": 21074 }, { - "epoch": 0.5972115956813738, + "epoch": 0.8245950387354253, "grad_norm": 0.0, - "learning_rate": 7.368839288790477e-06, - "loss": 0.928, + "learning_rate": 1.5707508719651755e-06, + "loss": 0.9018, "step": 21075 }, { - "epoch": 0.5972399331236362, + "epoch": 0.8246341654276548, "grad_norm": 0.0, - "learning_rate": 7.3679538496610146e-06, - "loss": 0.91, + "learning_rate": 1.5700691297492777e-06, + "loss": 0.9496, "step": 21076 }, { - "epoch": 0.5972682705658987, + "epoch": 0.8246732921198842, "grad_norm": 0.0, - "learning_rate": 7.3670684327022555e-06, - "loss": 0.8716, + "learning_rate": 1.5693875229075062e-06, + "loss": 0.9268, "step": 21077 }, { - "epoch": 0.5972966080081612, + "epoch": 0.8247124188121137, "grad_norm": 0.0, - "learning_rate": 7.366183037921659e-06, - "loss": 0.8531, + "learning_rate": 1.568706051450809e-06, + "loss": 1.0954, "step": 21078 }, { - "epoch": 0.5973249454504237, + "epoch": 0.824751545504343, "grad_norm": 0.0, - "learning_rate": 7.3652976653266785e-06, - "loss": 0.867, + "learning_rate": 1.5680247153901263e-06, + "loss": 0.8949, "step": 21079 }, { - "epoch": 0.5973532828926861, + "epoch": 0.8247906721965725, "grad_norm": 0.0, - "learning_rate": 7.3644123149247784e-06, - "loss": 0.916, + "learning_rate": 1.5673435147364002e-06, + "loss": 0.8943, "step": 21080 }, { - "epoch": 0.5973816203349486, + "epoch": 0.8248297988888019, "grad_norm": 0.0, - "learning_rate": 7.363526986723406e-06, - "loss": 0.8421, + "learning_rate": 1.5666624495005734e-06, + "loss": 1.0442, "step": 21081 }, { - "epoch": 0.5974099577772111, + "epoch": 0.8248689255810314, "grad_norm": 0.0, - "learning_rate": 7.362641680730027e-06, - "loss": 0.8211, + "learning_rate": 1.5659815196935767e-06, + "loss": 0.9955, "step": 21082 }, { - "epoch": 0.5974382952194734, + "epoch": 0.8249080522732608, "grad_norm": 0.0, - "learning_rate": 7.361756396952097e-06, - "loss": 0.861, + "learning_rate": 1.5653007253263498e-06, + "loss": 0.9678, "step": 21083 }, { - "epoch": 0.5974666326617359, + "epoch": 0.8249471789654903, "grad_norm": 0.0, - "learning_rate": 7.360871135397072e-06, - "loss": 0.8562, + "learning_rate": 1.5646200664098254e-06, + "loss": 0.9341, "step": 21084 }, { - "epoch": 0.5974949701039984, + "epoch": 0.8249863056577197, "grad_norm": 0.0, - "learning_rate": 7.359985896072412e-06, - "loss": 0.8366, + "learning_rate": 1.5639395429549297e-06, + "loss": 1.0097, "step": 21085 }, { - "epoch": 0.5975233075462609, + "epoch": 0.8250254323499492, "grad_norm": 0.0, - "learning_rate": 7.359100678985568e-06, - "loss": 0.8056, + "learning_rate": 1.563259154972595e-06, + "loss": 0.9205, "step": 21086 }, { - "epoch": 0.5975516449885233, + "epoch": 0.8250645590421786, "grad_norm": 0.0, - "learning_rate": 7.358215484144e-06, - "loss": 0.8159, + "learning_rate": 1.562578902473746e-06, + "loss": 0.9283, "step": 21087 }, { - "epoch": 0.5975799824307858, + "epoch": 0.8251036857344081, "grad_norm": 0.0, - "learning_rate": 7.357330311555164e-06, - "loss": 0.8441, + "learning_rate": 1.5618987854693102e-06, + "loss": 0.9183, "step": 21088 }, { - "epoch": 0.5976083198730483, + "epoch": 0.8251428124266375, "grad_norm": 0.0, - "learning_rate": 7.356445161226516e-06, - "loss": 0.7843, + "learning_rate": 1.5612188039702037e-06, + "loss": 0.9131, "step": 21089 }, { - "epoch": 0.5976366573153107, + "epoch": 0.8251819391188668, "grad_norm": 0.0, - "learning_rate": 7.355560033165512e-06, - "loss": 0.8783, + "learning_rate": 1.5605389579873508e-06, + "loss": 1.0281, "step": 21090 }, { - "epoch": 0.5976649947575732, + "epoch": 0.8252210658110963, "grad_norm": 0.0, - "learning_rate": 7.354674927379612e-06, - "loss": 0.8549, + "learning_rate": 1.559859247531662e-06, + "loss": 1.013, "step": 21091 }, { - "epoch": 0.5976933321998357, + "epoch": 0.8252601925033257, "grad_norm": 0.0, - "learning_rate": 7.353789843876263e-06, - "loss": 0.9441, + "learning_rate": 1.5591796726140629e-06, + "loss": 0.9798, "step": 21092 }, { - "epoch": 0.597721669642098, + "epoch": 0.8252993191955552, "grad_norm": 0.0, - "learning_rate": 7.352904782662927e-06, - "loss": 0.9156, + "learning_rate": 1.558500233245459e-06, + "loss": 0.9969, "step": 21093 }, { - "epoch": 0.5977500070843605, + "epoch": 0.8253384458877846, "grad_norm": 0.0, - "learning_rate": 7.352019743747055e-06, - "loss": 0.8582, + "learning_rate": 1.5578209294367653e-06, + "loss": 0.8629, "step": 21094 }, { - "epoch": 0.597778344526623, + "epoch": 0.8253775725800141, "grad_norm": 0.0, - "learning_rate": 7.351134727136105e-06, - "loss": 0.7925, + "learning_rate": 1.5571417611988849e-06, + "loss": 0.9156, "step": 21095 }, { - "epoch": 0.5978066819688855, + "epoch": 0.8254166992722435, "grad_norm": 0.0, - "learning_rate": 7.35024973283753e-06, - "loss": 0.798, + "learning_rate": 1.5564627285427325e-06, + "loss": 0.9152, "step": 21096 }, { - "epoch": 0.5978350194111479, + "epoch": 0.825455825964473, "grad_norm": 0.0, - "learning_rate": 7.349364760858785e-06, - "loss": 0.8068, + "learning_rate": 1.5557838314792062e-06, + "loss": 1.0514, "step": 21097 }, { - "epoch": 0.5978633568534104, + "epoch": 0.8254949526567024, "grad_norm": 0.0, - "learning_rate": 7.3484798112073255e-06, - "loss": 0.8895, + "learning_rate": 1.5551050700192127e-06, + "loss": 0.8363, "step": 21098 }, { - "epoch": 0.5978916942956729, + "epoch": 0.8255340793489319, "grad_norm": 0.0, - "learning_rate": 7.347594883890608e-06, - "loss": 0.8729, + "learning_rate": 1.5544264441736467e-06, + "loss": 0.916, "step": 21099 }, { - "epoch": 0.5979200317379353, + "epoch": 0.8255732060411612, "grad_norm": 0.0, - "learning_rate": 7.34670997891608e-06, - "loss": 0.972, + "learning_rate": 1.5537479539534106e-06, + "loss": 0.9258, "step": 21100 }, { - "epoch": 0.5979483691801978, + "epoch": 0.8256123327333907, "grad_norm": 0.0, - "learning_rate": 7.345825096291201e-06, - "loss": 0.8468, + "learning_rate": 1.5530695993693978e-06, + "loss": 1.0317, "step": 21101 }, { - "epoch": 0.5979767066224603, + "epoch": 0.8256514594256201, "grad_norm": 0.0, - "learning_rate": 7.344940236023421e-06, - "loss": 0.8296, + "learning_rate": 1.5523913804325042e-06, + "loss": 1.0369, "step": 21102 }, { - "epoch": 0.5980050440647228, + "epoch": 0.8256905861178496, "grad_norm": 0.0, - "learning_rate": 7.3440553981201956e-06, - "loss": 0.9029, + "learning_rate": 1.5517132971536187e-06, + "loss": 1.0121, "step": 21103 }, { - "epoch": 0.5980333815069852, + "epoch": 0.825729712810079, "grad_norm": 0.0, - "learning_rate": 7.343170582588981e-06, - "loss": 0.8564, + "learning_rate": 1.5510353495436303e-06, + "loss": 0.9017, "step": 21104 }, { - "epoch": 0.5980617189492476, + "epoch": 0.8257688395023085, "grad_norm": 0.0, - "learning_rate": 7.342285789437225e-06, - "loss": 0.8222, + "learning_rate": 1.5503575376134272e-06, + "loss": 1.056, "step": 21105 }, { - "epoch": 0.5980900563915101, + "epoch": 0.8258079661945379, "grad_norm": 0.0, - "learning_rate": 7.3414010186723805e-06, - "loss": 0.8487, + "learning_rate": 1.5496798613738974e-06, + "loss": 0.9428, "step": 21106 }, { - "epoch": 0.5981183938337725, + "epoch": 0.8258470928867674, "grad_norm": 0.0, - "learning_rate": 7.340516270301908e-06, - "loss": 0.8793, + "learning_rate": 1.5490023208359161e-06, + "loss": 0.987, "step": 21107 }, { - "epoch": 0.598146731276035, + "epoch": 0.8258862195789968, "grad_norm": 0.0, - "learning_rate": 7.33963154433325e-06, - "loss": 0.7911, + "learning_rate": 1.5483249160103696e-06, + "loss": 0.9996, "step": 21108 }, { - "epoch": 0.5981750687182975, + "epoch": 0.8259253462712263, "grad_norm": 0.0, - "learning_rate": 7.338746840773866e-06, - "loss": 0.8086, + "learning_rate": 1.5476476469081337e-06, + "loss": 0.9896, "step": 21109 }, { - "epoch": 0.59820340616056, + "epoch": 0.8259644729634557, "grad_norm": 0.0, - "learning_rate": 7.337862159631203e-06, - "loss": 0.8718, + "learning_rate": 1.5469705135400869e-06, + "loss": 0.899, "step": 21110 }, { - "epoch": 0.5982317436028224, + "epoch": 0.8260035996556852, "grad_norm": 0.0, - "learning_rate": 7.336977500912716e-06, - "loss": 0.8755, + "learning_rate": 1.5462935159171e-06, + "loss": 0.9067, "step": 21111 }, { - "epoch": 0.5982600810450849, + "epoch": 0.8260427263479145, "grad_norm": 0.0, - "learning_rate": 7.3360928646258586e-06, - "loss": 0.8441, + "learning_rate": 1.5456166540500473e-06, + "loss": 0.9587, "step": 21112 }, { - "epoch": 0.5982884184873474, + "epoch": 0.826081853040144, "grad_norm": 0.0, - "learning_rate": 7.335208250778078e-06, - "loss": 0.8482, + "learning_rate": 1.5449399279497934e-06, + "loss": 0.8314, "step": 21113 }, { - "epoch": 0.5983167559296098, + "epoch": 0.8261209797323734, "grad_norm": 0.0, - "learning_rate": 7.3343236593768295e-06, - "loss": 0.7911, + "learning_rate": 1.5442633376272142e-06, + "loss": 0.9356, "step": 21114 }, { - "epoch": 0.5983450933718722, + "epoch": 0.8261601064246029, "grad_norm": 0.0, - "learning_rate": 7.333439090429562e-06, - "loss": 0.7921, + "learning_rate": 1.5435868830931678e-06, + "loss": 0.9286, "step": 21115 }, { - "epoch": 0.5983734308141347, + "epoch": 0.8261992331168323, "grad_norm": 0.0, - "learning_rate": 7.332554543943725e-06, - "loss": 0.8876, + "learning_rate": 1.5429105643585218e-06, + "loss": 0.9097, "step": 21116 }, { - "epoch": 0.5984017682563971, + "epoch": 0.8262383598090618, "grad_norm": 0.0, - "learning_rate": 7.331670019926778e-06, - "loss": 0.8469, + "learning_rate": 1.5422343814341323e-06, + "loss": 0.8741, "step": 21117 }, { - "epoch": 0.5984301056986596, + "epoch": 0.8262774865012912, "grad_norm": 0.0, - "learning_rate": 7.3307855183861585e-06, - "loss": 0.8794, + "learning_rate": 1.5415583343308604e-06, + "loss": 0.9799, "step": 21118 }, { - "epoch": 0.5984584431409221, + "epoch": 0.8263166131935206, "grad_norm": 0.0, - "learning_rate": 7.3299010393293255e-06, - "loss": 0.8539, + "learning_rate": 1.5408824230595632e-06, + "loss": 1.1024, "step": 21119 }, { - "epoch": 0.5984867805831846, + "epoch": 0.8263557398857501, "grad_norm": 0.0, - "learning_rate": 7.3290165827637305e-06, - "loss": 0.8337, + "learning_rate": 1.5402066476310963e-06, + "loss": 0.9567, "step": 21120 }, { - "epoch": 0.598515118025447, + "epoch": 0.8263948665779794, "grad_norm": 0.0, - "learning_rate": 7.328132148696818e-06, - "loss": 0.9172, + "learning_rate": 1.5395310080563074e-06, + "loss": 0.9463, "step": 21121 }, { - "epoch": 0.5985434554677095, + "epoch": 0.8264339932702089, "grad_norm": 0.0, - "learning_rate": 7.327247737136042e-06, - "loss": 0.8107, + "learning_rate": 1.5388555043460495e-06, + "loss": 0.8466, "step": 21122 }, { - "epoch": 0.598571792909972, + "epoch": 0.8264731199624383, "grad_norm": 0.0, - "learning_rate": 7.326363348088848e-06, - "loss": 0.9212, + "learning_rate": 1.5381801365111726e-06, + "loss": 0.8597, "step": 21123 }, { - "epoch": 0.5986001303522344, + "epoch": 0.8265122466546678, "grad_norm": 0.0, - "learning_rate": 7.325478981562689e-06, - "loss": 0.8275, + "learning_rate": 1.5375049045625157e-06, + "loss": 1.008, "step": 21124 }, { - "epoch": 0.5986284677944969, + "epoch": 0.8265513733468972, "grad_norm": 0.0, - "learning_rate": 7.324594637565019e-06, - "loss": 0.8805, + "learning_rate": 1.5368298085109269e-06, + "loss": 0.9358, "step": 21125 }, { - "epoch": 0.5986568052367593, + "epoch": 0.8265905000391267, "grad_norm": 0.0, - "learning_rate": 7.323710316103277e-06, - "loss": 0.7664, + "learning_rate": 1.536154848367246e-06, + "loss": 0.9189, "step": 21126 }, { - "epoch": 0.5986851426790218, + "epoch": 0.8266296267313561, "grad_norm": 0.0, - "learning_rate": 7.322826017184915e-06, - "loss": 0.9555, + "learning_rate": 1.5354800241423152e-06, + "loss": 0.877, "step": 21127 }, { - "epoch": 0.5987134801212842, + "epoch": 0.8266687534235856, "grad_norm": 0.0, - "learning_rate": 7.321941740817388e-06, - "loss": 0.9551, + "learning_rate": 1.5348053358469661e-06, + "loss": 0.9633, "step": 21128 }, { - "epoch": 0.5987418175635467, + "epoch": 0.826707880115815, "grad_norm": 0.0, - "learning_rate": 7.321057487008136e-06, - "loss": 0.9134, + "learning_rate": 1.5341307834920383e-06, + "loss": 0.8789, "step": 21129 }, { - "epoch": 0.5987701550058092, + "epoch": 0.8267470068080445, "grad_norm": 0.0, - "learning_rate": 7.320173255764617e-06, - "loss": 0.781, + "learning_rate": 1.5334563670883585e-06, + "loss": 0.9502, "step": 21130 }, { - "epoch": 0.5987984924480716, + "epoch": 0.8267861335002739, "grad_norm": 0.0, - "learning_rate": 7.31928904709427e-06, - "loss": 0.8687, + "learning_rate": 1.5327820866467613e-06, + "loss": 0.9788, "step": 21131 }, { - "epoch": 0.5988268298903341, + "epoch": 0.8268252601925034, "grad_norm": 0.0, - "learning_rate": 7.318404861004547e-06, - "loss": 0.8031, + "learning_rate": 1.5321079421780738e-06, + "loss": 0.984, "step": 21132 }, { - "epoch": 0.5988551673325966, + "epoch": 0.8268643868847327, "grad_norm": 0.0, - "learning_rate": 7.3175206975028985e-06, - "loss": 0.6363, + "learning_rate": 1.531433933693124e-06, + "loss": 0.901, "step": 21133 }, { - "epoch": 0.598883504774859, + "epoch": 0.8269035135769622, "grad_norm": 0.0, - "learning_rate": 7.316636556596766e-06, - "loss": 0.8275, + "learning_rate": 1.5307600612027307e-06, + "loss": 0.996, "step": 21134 }, { - "epoch": 0.5989118422171215, + "epoch": 0.8269426402691916, "grad_norm": 0.0, - "learning_rate": 7.315752438293602e-06, - "loss": 0.8733, + "learning_rate": 1.5300863247177178e-06, + "loss": 0.8651, "step": 21135 }, { - "epoch": 0.598940179659384, + "epoch": 0.8269817669614211, "grad_norm": 0.0, - "learning_rate": 7.31486834260085e-06, - "loss": 0.9866, + "learning_rate": 1.5294127242489065e-06, + "loss": 1.0009, "step": 21136 }, { - "epoch": 0.5989685171016464, + "epoch": 0.8270208936536505, "grad_norm": 0.0, - "learning_rate": 7.31398426952596e-06, - "loss": 0.9101, + "learning_rate": 1.528739259807115e-06, + "loss": 0.9616, "step": 21137 }, { - "epoch": 0.5989968545439088, + "epoch": 0.82706002034588, "grad_norm": 0.0, - "learning_rate": 7.313100219076381e-06, - "loss": 0.8856, + "learning_rate": 1.5280659314031521e-06, + "loss": 0.8873, "step": 21138 }, { - "epoch": 0.5990251919861713, + "epoch": 0.8270991470381094, "grad_norm": 0.0, - "learning_rate": 7.312216191259552e-06, - "loss": 0.8939, + "learning_rate": 1.5273927390478383e-06, + "loss": 1.0071, "step": 21139 }, { - "epoch": 0.5990535294284338, + "epoch": 0.8271382737303389, "grad_norm": 0.0, - "learning_rate": 7.311332186082925e-06, - "loss": 0.8037, + "learning_rate": 1.5267196827519748e-06, + "loss": 0.9687, "step": 21140 }, { - "epoch": 0.5990818668706962, + "epoch": 0.8271774004225683, "grad_norm": 0.0, - "learning_rate": 7.310448203553947e-06, - "loss": 0.8353, + "learning_rate": 1.5260467625263798e-06, + "loss": 0.9907, "step": 21141 }, { - "epoch": 0.5991102043129587, + "epoch": 0.8272165271147978, "grad_norm": 0.0, - "learning_rate": 7.309564243680061e-06, - "loss": 0.9166, + "learning_rate": 1.5253739783818544e-06, + "loss": 1.0063, "step": 21142 }, { - "epoch": 0.5991385417552212, + "epoch": 0.8272556538070271, "grad_norm": 0.0, - "learning_rate": 7.308680306468719e-06, - "loss": 0.9172, + "learning_rate": 1.5247013303292046e-06, + "loss": 0.8983, "step": 21143 }, { - "epoch": 0.5991668791974837, + "epoch": 0.8272947804992566, "grad_norm": 0.0, - "learning_rate": 7.307796391927356e-06, - "loss": 0.8909, + "learning_rate": 1.5240288183792273e-06, + "loss": 1.0508, "step": 21144 }, { - "epoch": 0.5991952166397461, + "epoch": 0.827333907191486, "grad_norm": 0.0, - "learning_rate": 7.306912500063425e-06, - "loss": 1.0015, + "learning_rate": 1.5233564425427305e-06, + "loss": 0.9155, "step": 21145 }, { - "epoch": 0.5992235540820086, + "epoch": 0.8273730338837155, "grad_norm": 0.0, - "learning_rate": 7.306028630884374e-06, - "loss": 0.8562, + "learning_rate": 1.5226842028305056e-06, + "loss": 0.9573, "step": 21146 }, { - "epoch": 0.599251891524271, + "epoch": 0.8274121605759449, "grad_norm": 0.0, - "learning_rate": 7.305144784397641e-06, - "loss": 0.8033, + "learning_rate": 1.5220120992533515e-06, + "loss": 0.8715, "step": 21147 }, { - "epoch": 0.5992802289665334, + "epoch": 0.8274512872681743, "grad_norm": 0.0, - "learning_rate": 7.304260960610674e-06, - "loss": 0.8346, + "learning_rate": 1.5213401318220577e-06, + "loss": 0.9426, "step": 21148 }, { - "epoch": 0.5993085664087959, + "epoch": 0.8274904139604038, "grad_norm": 0.0, - "learning_rate": 7.303377159530919e-06, - "loss": 0.7252, + "learning_rate": 1.5206683005474165e-06, + "loss": 0.941, "step": 21149 }, { - "epoch": 0.5993369038510584, + "epoch": 0.8275295406526332, "grad_norm": 0.0, - "learning_rate": 7.302493381165818e-06, - "loss": 0.8158, + "learning_rate": 1.5199966054402182e-06, + "loss": 0.8342, "step": 21150 }, { - "epoch": 0.5993652412933209, + "epoch": 0.8275686673448627, "grad_norm": 0.0, - "learning_rate": 7.301609625522821e-06, - "loss": 0.8497, + "learning_rate": 1.5193250465112496e-06, + "loss": 1.062, "step": 21151 }, { - "epoch": 0.5993935787355833, + "epoch": 0.8276077940370921, "grad_norm": 0.0, - "learning_rate": 7.300725892609364e-06, - "loss": 0.9134, + "learning_rate": 1.5186536237712923e-06, + "loss": 1.0703, "step": 21152 }, { - "epoch": 0.5994219161778458, + "epoch": 0.8276469207293216, "grad_norm": 0.0, - "learning_rate": 7.299842182432895e-06, - "loss": 0.9518, + "learning_rate": 1.5179823372311298e-06, + "loss": 0.8972, "step": 21153 }, { - "epoch": 0.5994502536201083, + "epoch": 0.8276860474215509, "grad_norm": 0.0, - "learning_rate": 7.29895849500086e-06, - "loss": 0.8347, + "learning_rate": 1.5173111869015423e-06, + "loss": 0.9459, "step": 21154 }, { - "epoch": 0.5994785910623707, + "epoch": 0.8277251741137804, "grad_norm": 0.0, - "learning_rate": 7.298074830320699e-06, - "loss": 0.837, + "learning_rate": 1.51664017279331e-06, + "loss": 1.0001, "step": 21155 }, { - "epoch": 0.5995069285046332, + "epoch": 0.8277643008060098, "grad_norm": 0.0, - "learning_rate": 7.297191188399857e-06, - "loss": 0.8274, + "learning_rate": 1.5159692949172045e-06, + "loss": 0.9187, "step": 21156 }, { - "epoch": 0.5995352659468957, + "epoch": 0.8278034274982393, "grad_norm": 0.0, - "learning_rate": 7.296307569245782e-06, - "loss": 0.9137, + "learning_rate": 1.5152985532840026e-06, + "loss": 1.0369, "step": 21157 }, { - "epoch": 0.599563603389158, + "epoch": 0.8278425541904687, "grad_norm": 0.0, - "learning_rate": 7.295423972865907e-06, - "loss": 0.8833, + "learning_rate": 1.5146279479044702e-06, + "loss": 1.0999, "step": 21158 }, { - "epoch": 0.5995919408314205, + "epoch": 0.8278816808826982, "grad_norm": 0.0, - "learning_rate": 7.294540399267682e-06, - "loss": 0.8522, + "learning_rate": 1.513957478789384e-06, + "loss": 0.8448, "step": 21159 }, { - "epoch": 0.599620278273683, + "epoch": 0.8279208075749276, "grad_norm": 0.0, - "learning_rate": 7.2936568484585475e-06, - "loss": 0.7804, + "learning_rate": 1.5132871459495047e-06, + "loss": 0.9104, "step": 21160 }, { - "epoch": 0.5996486157159455, + "epoch": 0.8279599342671571, "grad_norm": 0.0, - "learning_rate": 7.292773320445947e-06, - "loss": 0.8851, + "learning_rate": 1.5126169493956022e-06, + "loss": 0.9986, "step": 21161 }, { - "epoch": 0.5996769531582079, + "epoch": 0.8279990609593865, "grad_norm": 0.0, - "learning_rate": 7.291889815237323e-06, - "loss": 0.7653, + "learning_rate": 1.5119468891384336e-06, + "loss": 1.0059, "step": 21162 }, { - "epoch": 0.5997052906004704, + "epoch": 0.828038187651616, "grad_norm": 0.0, - "learning_rate": 7.291006332840113e-06, - "loss": 0.7999, + "learning_rate": 1.5112769651887626e-06, + "loss": 1.0123, "step": 21163 }, { - "epoch": 0.5997336280427329, + "epoch": 0.8280773143438453, "grad_norm": 0.0, - "learning_rate": 7.290122873261769e-06, - "loss": 0.8756, + "learning_rate": 1.510607177557346e-06, + "loss": 1.0165, "step": 21164 }, { - "epoch": 0.5997619654849953, + "epoch": 0.8281164410360748, "grad_norm": 0.0, - "learning_rate": 7.289239436509721e-06, - "loss": 0.875, + "learning_rate": 1.509937526254942e-06, + "loss": 0.9377, "step": 21165 }, { - "epoch": 0.5997903029272578, + "epoch": 0.8281555677283042, "grad_norm": 0.0, - "learning_rate": 7.2883560225914165e-06, - "loss": 0.8364, + "learning_rate": 1.509268011292302e-06, + "loss": 0.8664, "step": 21166 }, { - "epoch": 0.5998186403695203, + "epoch": 0.8281946944205337, "grad_norm": 0.0, - "learning_rate": 7.287472631514298e-06, - "loss": 0.8582, + "learning_rate": 1.5085986326801772e-06, + "loss": 1.0032, "step": 21167 }, { - "epoch": 0.5998469778117828, + "epoch": 0.8282338211127631, "grad_norm": 0.0, - "learning_rate": 7.286589263285801e-06, - "loss": 0.7542, + "learning_rate": 1.5079293904293202e-06, + "loss": 0.9639, "step": 21168 }, { - "epoch": 0.5998753152540451, + "epoch": 0.8282729478049926, "grad_norm": 0.0, - "learning_rate": 7.285705917913372e-06, - "loss": 0.8047, + "learning_rate": 1.5072602845504737e-06, + "loss": 0.8248, "step": 21169 }, { - "epoch": 0.5999036526963076, + "epoch": 0.828312074497222, "grad_norm": 0.0, - "learning_rate": 7.284822595404455e-06, - "loss": 0.756, + "learning_rate": 1.5065913150543853e-06, + "loss": 0.9411, "step": 21170 }, { - "epoch": 0.5999319901385701, + "epoch": 0.8283512011894515, "grad_norm": 0.0, - "learning_rate": 7.28393929576648e-06, - "loss": 0.8108, + "learning_rate": 1.5059224819517982e-06, + "loss": 0.9276, "step": 21171 }, { - "epoch": 0.5999603275808325, + "epoch": 0.8283903278816809, "grad_norm": 0.0, - "learning_rate": 7.283056019006895e-06, - "loss": 0.7771, + "learning_rate": 1.5052537852534532e-06, + "loss": 0.9988, "step": 21172 }, { - "epoch": 0.599988665023095, + "epoch": 0.8284294545739104, "grad_norm": 0.0, - "learning_rate": 7.2821727651331355e-06, - "loss": 0.8617, + "learning_rate": 1.5045852249700864e-06, + "loss": 1.0077, "step": 21173 }, { - "epoch": 0.6000170024653575, + "epoch": 0.8284685812661398, "grad_norm": 0.0, - "learning_rate": 7.281289534152644e-06, - "loss": 0.9615, + "learning_rate": 1.5039168011124383e-06, + "loss": 0.917, "step": 21174 }, { - "epoch": 0.60004533990762, + "epoch": 0.8285077079583693, "grad_norm": 0.0, - "learning_rate": 7.280406326072866e-06, - "loss": 0.9448, + "learning_rate": 1.5032485136912345e-06, + "loss": 0.9378, "step": 21175 }, { - "epoch": 0.6000736773498824, + "epoch": 0.8285468346505986, "grad_norm": 0.0, - "learning_rate": 7.27952314090123e-06, - "loss": 0.8074, + "learning_rate": 1.5025803627172187e-06, + "loss": 0.9297, "step": 21176 }, { - "epoch": 0.6001020147921449, + "epoch": 0.828585961342828, "grad_norm": 0.0, - "learning_rate": 7.2786399786451825e-06, - "loss": 0.9363, + "learning_rate": 1.5019123482011111e-06, + "loss": 0.8334, "step": 21177 }, { - "epoch": 0.6001303522344074, + "epoch": 0.8286250880350575, "grad_norm": 0.0, - "learning_rate": 7.2777568393121645e-06, - "loss": 0.8219, + "learning_rate": 1.5012444701536444e-06, + "loss": 0.9745, "step": 21178 }, { - "epoch": 0.6001586896766697, + "epoch": 0.8286642147272869, "grad_norm": 0.0, - "learning_rate": 7.276873722909606e-06, - "loss": 0.8878, + "learning_rate": 1.5005767285855398e-06, + "loss": 1.0865, "step": 21179 }, { - "epoch": 0.6001870271189322, + "epoch": 0.8287033414195164, "grad_norm": 0.0, - "learning_rate": 7.275990629444954e-06, - "loss": 0.8648, + "learning_rate": 1.4999091235075237e-06, + "loss": 0.9194, "step": 21180 }, { - "epoch": 0.6002153645611947, + "epoch": 0.8287424681117458, "grad_norm": 0.0, - "learning_rate": 7.275107558925642e-06, - "loss": 0.8974, + "learning_rate": 1.4992416549303146e-06, + "loss": 1.0057, "step": 21181 }, { - "epoch": 0.6002437020034571, + "epoch": 0.8287815948039753, "grad_norm": 0.0, - "learning_rate": 7.274224511359112e-06, - "loss": 0.8353, + "learning_rate": 1.4985743228646355e-06, + "loss": 1.0544, "step": 21182 }, { - "epoch": 0.6002720394457196, + "epoch": 0.8288207214962047, "grad_norm": 0.0, - "learning_rate": 7.273341486752805e-06, - "loss": 0.7133, + "learning_rate": 1.4979071273211964e-06, + "loss": 0.8769, "step": 21183 }, { - "epoch": 0.6003003768879821, + "epoch": 0.8288598481884342, "grad_norm": 0.0, - "learning_rate": 7.272458485114151e-06, - "loss": 0.8553, + "learning_rate": 1.4972400683107168e-06, + "loss": 1.0358, "step": 21184 }, { - "epoch": 0.6003287143302446, + "epoch": 0.8288989748806636, "grad_norm": 0.0, - "learning_rate": 7.2715755064505926e-06, - "loss": 0.9384, + "learning_rate": 1.4965731458439058e-06, + "loss": 0.9805, "step": 21185 }, { - "epoch": 0.600357051772507, + "epoch": 0.828938101572893, "grad_norm": 0.0, - "learning_rate": 7.270692550769567e-06, - "loss": 0.7985, + "learning_rate": 1.4959063599314784e-06, + "loss": 1.0133, "step": 21186 }, { - "epoch": 0.6003853892147695, + "epoch": 0.8289772282651224, "grad_norm": 0.0, - "learning_rate": 7.269809618078512e-06, - "loss": 0.9115, + "learning_rate": 1.4952397105841355e-06, + "loss": 0.8855, "step": 21187 }, { - "epoch": 0.600413726657032, + "epoch": 0.8290163549573519, "grad_norm": 0.0, - "learning_rate": 7.268926708384867e-06, - "loss": 0.9262, + "learning_rate": 1.4945731978125876e-06, + "loss": 0.9665, "step": 21188 }, { - "epoch": 0.6004420640992943, + "epoch": 0.8290554816495813, "grad_norm": 0.0, - "learning_rate": 7.268043821696062e-06, - "loss": 0.856, + "learning_rate": 1.493906821627532e-06, + "loss": 1.059, "step": 21189 }, { - "epoch": 0.6004704015415568, + "epoch": 0.8290946083418108, "grad_norm": 0.0, - "learning_rate": 7.267160958019539e-06, - "loss": 0.8778, + "learning_rate": 1.4932405820396788e-06, + "loss": 0.9539, "step": 21190 }, { - "epoch": 0.6004987389838193, + "epoch": 0.8291337350340402, "grad_norm": 0.0, - "learning_rate": 7.266278117362737e-06, - "loss": 0.8133, + "learning_rate": 1.4925744790597207e-06, + "loss": 0.8228, "step": 21191 }, { - "epoch": 0.6005270764260818, + "epoch": 0.8291728617262697, "grad_norm": 0.0, - "learning_rate": 7.265395299733088e-06, - "loss": 0.8456, + "learning_rate": 1.491908512698358e-06, + "loss": 0.9244, "step": 21192 }, { - "epoch": 0.6005554138683442, + "epoch": 0.8292119884184991, "grad_norm": 0.0, - "learning_rate": 7.26451250513803e-06, - "loss": 0.7882, + "learning_rate": 1.4912426829662774e-06, + "loss": 0.9526, "step": 21193 }, { - "epoch": 0.6005837513106067, + "epoch": 0.8292511151107286, "grad_norm": 0.0, - "learning_rate": 7.263629733584998e-06, - "loss": 0.7835, + "learning_rate": 1.490576989874184e-06, + "loss": 0.9044, "step": 21194 }, { - "epoch": 0.6006120887528692, + "epoch": 0.829290241802958, "grad_norm": 0.0, - "learning_rate": 7.262746985081428e-06, - "loss": 0.8167, + "learning_rate": 1.489911433432757e-06, + "loss": 1.0135, "step": 21195 }, { - "epoch": 0.6006404261951316, + "epoch": 0.8293293684951875, "grad_norm": 0.0, - "learning_rate": 7.261864259634761e-06, - "loss": 0.8278, + "learning_rate": 1.489246013652692e-06, + "loss": 0.968, "step": 21196 }, { - "epoch": 0.6006687636373941, + "epoch": 0.8293684951874168, "grad_norm": 0.0, - "learning_rate": 7.260981557252425e-06, - "loss": 0.7933, + "learning_rate": 1.4885807305446687e-06, + "loss": 0.9266, "step": 21197 }, { - "epoch": 0.6006971010796566, + "epoch": 0.8294076218796463, "grad_norm": 0.0, - "learning_rate": 7.260098877941857e-06, - "loss": 0.8073, + "learning_rate": 1.4879155841193738e-06, + "loss": 0.9256, "step": 21198 }, { - "epoch": 0.6007254385219191, + "epoch": 0.8294467485718757, "grad_norm": 0.0, - "learning_rate": 7.259216221710496e-06, - "loss": 0.9083, + "learning_rate": 1.4872505743874888e-06, + "loss": 0.8789, "step": 21199 }, { - "epoch": 0.6007537759641814, + "epoch": 0.8294858752641052, "grad_norm": 0.0, - "learning_rate": 7.258333588565771e-06, - "loss": 0.9651, + "learning_rate": 1.486585701359694e-06, + "loss": 0.9001, "step": 21200 }, { - "epoch": 0.6007821134064439, + "epoch": 0.8295250019563346, "grad_norm": 0.0, - "learning_rate": 7.257450978515127e-06, - "loss": 0.8283, + "learning_rate": 1.4859209650466632e-06, + "loss": 0.9476, "step": 21201 }, { - "epoch": 0.6008104508487064, + "epoch": 0.8295641286485641, "grad_norm": 0.0, - "learning_rate": 7.256568391565987e-06, - "loss": 0.8903, + "learning_rate": 1.4852563654590724e-06, + "loss": 1.0095, "step": 21202 }, { - "epoch": 0.6008387882909688, + "epoch": 0.8296032553407935, "grad_norm": 0.0, - "learning_rate": 7.255685827725789e-06, - "loss": 0.8172, + "learning_rate": 1.484591902607595e-06, + "loss": 1.0308, "step": 21203 }, { - "epoch": 0.6008671257332313, + "epoch": 0.8296423820330229, "grad_norm": 0.0, - "learning_rate": 7.254803287001975e-06, - "loss": 0.9525, + "learning_rate": 1.4839275765029038e-06, + "loss": 0.8912, "step": 21204 }, { - "epoch": 0.6008954631754938, + "epoch": 0.8296815087252524, "grad_norm": 0.0, - "learning_rate": 7.253920769401965e-06, - "loss": 0.8161, + "learning_rate": 1.4832633871556623e-06, + "loss": 0.9306, "step": 21205 }, { - "epoch": 0.6009238006177562, + "epoch": 0.8297206354174818, "grad_norm": 0.0, - "learning_rate": 7.253038274933202e-06, - "loss": 0.8331, + "learning_rate": 1.4825993345765399e-06, + "loss": 0.8798, "step": 21206 }, { - "epoch": 0.6009521380600187, + "epoch": 0.8297597621097113, "grad_norm": 0.0, - "learning_rate": 7.25215580360312e-06, - "loss": 0.897, + "learning_rate": 1.481935418776198e-06, + "loss": 0.8582, "step": 21207 }, { - "epoch": 0.6009804755022812, + "epoch": 0.8297988888019406, "grad_norm": 0.0, - "learning_rate": 7.251273355419147e-06, - "loss": 0.8564, + "learning_rate": 1.4812716397652994e-06, + "loss": 0.786, "step": 21208 }, { - "epoch": 0.6010088129445437, + "epoch": 0.8298380154941701, "grad_norm": 0.0, - "learning_rate": 7.250390930388725e-06, - "loss": 0.9273, + "learning_rate": 1.480607997554504e-06, + "loss": 1.0247, "step": 21209 }, { - "epoch": 0.601037150386806, + "epoch": 0.8298771421863995, "grad_norm": 0.0, - "learning_rate": 7.249508528519275e-06, - "loss": 0.962, + "learning_rate": 1.4799444921544703e-06, + "loss": 0.9909, "step": 21210 }, { - "epoch": 0.6010654878290685, + "epoch": 0.829916268878629, "grad_norm": 0.0, - "learning_rate": 7.248626149818237e-06, - "loss": 0.8509, + "learning_rate": 1.479281123575851e-06, + "loss": 0.923, "step": 21211 }, { - "epoch": 0.601093825271331, + "epoch": 0.8299553955708584, "grad_norm": 0.0, - "learning_rate": 7.247743794293044e-06, - "loss": 0.814, + "learning_rate": 1.478617891829298e-06, + "loss": 0.9118, "step": 21212 }, { - "epoch": 0.6011221627135934, + "epoch": 0.8299945222630879, "grad_norm": 0.0, - "learning_rate": 7.2468614619511255e-06, - "loss": 0.9143, + "learning_rate": 1.4779547969254671e-06, + "loss": 0.9758, "step": 21213 }, { - "epoch": 0.6011505001558559, + "epoch": 0.8300336489553173, "grad_norm": 0.0, - "learning_rate": 7.245979152799915e-06, - "loss": 0.9299, + "learning_rate": 1.4772918388750001e-06, + "loss": 0.9247, "step": 21214 }, { - "epoch": 0.6011788375981184, + "epoch": 0.8300727756475468, "grad_norm": 0.0, - "learning_rate": 7.2450968668468506e-06, - "loss": 0.7833, + "learning_rate": 1.4766290176885479e-06, + "loss": 1.0244, "step": 21215 }, { - "epoch": 0.6012071750403809, + "epoch": 0.8301119023397762, "grad_norm": 0.0, - "learning_rate": 7.244214604099351e-06, - "loss": 0.7811, + "learning_rate": 1.4759663333767527e-06, + "loss": 1.0735, "step": 21216 }, { - "epoch": 0.6012355124826433, + "epoch": 0.8301510290320057, "grad_norm": 0.0, - "learning_rate": 7.2433323645648615e-06, - "loss": 0.8219, + "learning_rate": 1.4753037859502595e-06, + "loss": 1.0095, "step": 21217 }, { - "epoch": 0.6012638499249058, + "epoch": 0.830190155724235, "grad_norm": 0.0, - "learning_rate": 7.242450148250804e-06, - "loss": 0.8865, + "learning_rate": 1.474641375419703e-06, + "loss": 0.9986, "step": 21218 }, { - "epoch": 0.6012921873671683, + "epoch": 0.8302292824164645, "grad_norm": 0.0, - "learning_rate": 7.24156795516461e-06, - "loss": 0.7319, + "learning_rate": 1.4739791017957228e-06, + "loss": 0.8646, "step": 21219 }, { - "epoch": 0.6013205248094307, + "epoch": 0.8302684091086939, "grad_norm": 0.0, - "learning_rate": 7.240685785313717e-06, - "loss": 0.8235, + "learning_rate": 1.4733169650889556e-06, + "loss": 0.9604, "step": 21220 }, { - "epoch": 0.6013488622516932, + "epoch": 0.8303075358009234, "grad_norm": 0.0, - "learning_rate": 7.23980363870555e-06, - "loss": 0.8893, + "learning_rate": 1.472654965310034e-06, + "loss": 0.9107, "step": 21221 }, { - "epoch": 0.6013771996939556, + "epoch": 0.8303466624931528, "grad_norm": 0.0, - "learning_rate": 7.238921515347547e-06, - "loss": 0.8844, + "learning_rate": 1.4719931024695877e-06, + "loss": 1.0022, "step": 21222 }, { - "epoch": 0.6014055371362181, + "epoch": 0.8303857891853823, "grad_norm": 0.0, - "learning_rate": 7.238039415247129e-06, - "loss": 0.7287, + "learning_rate": 1.4713313765782477e-06, + "loss": 0.9735, "step": 21223 }, { - "epoch": 0.6014338745784805, + "epoch": 0.8304249158776117, "grad_norm": 0.0, - "learning_rate": 7.237157338411729e-06, - "loss": 0.7193, + "learning_rate": 1.4706697876466336e-06, + "loss": 0.892, "step": 21224 }, { - "epoch": 0.601462212020743, + "epoch": 0.8304640425698412, "grad_norm": 0.0, - "learning_rate": 7.2362752848487814e-06, - "loss": 0.825, + "learning_rate": 1.47000833568538e-06, + "loss": 0.9803, "step": 21225 }, { - "epoch": 0.6014905494630055, + "epoch": 0.8305031692620706, "grad_norm": 0.0, - "learning_rate": 7.235393254565713e-06, - "loss": 0.8336, + "learning_rate": 1.469347020705102e-06, + "loss": 0.9642, "step": 21226 }, { - "epoch": 0.6015188869052679, + "epoch": 0.8305422959543001, "grad_norm": 0.0, - "learning_rate": 7.234511247569953e-06, - "loss": 0.7847, + "learning_rate": 1.468685842716423e-06, + "loss": 0.9933, "step": 21227 }, { - "epoch": 0.6015472243475304, + "epoch": 0.8305814226465295, "grad_norm": 0.0, - "learning_rate": 7.233629263868935e-06, - "loss": 0.8127, + "learning_rate": 1.468024801729957e-06, + "loss": 0.887, "step": 21228 }, { - "epoch": 0.6015755617897929, + "epoch": 0.830620549338759, "grad_norm": 0.0, - "learning_rate": 7.232747303470082e-06, - "loss": 0.8657, + "learning_rate": 1.467363897756321e-06, + "loss": 1.0014, "step": 21229 }, { - "epoch": 0.6016038992320553, + "epoch": 0.8306596760309883, "grad_norm": 0.0, - "learning_rate": 7.231865366380828e-06, - "loss": 0.8196, + "learning_rate": 1.4667031308061287e-06, + "loss": 0.9629, "step": 21230 }, { - "epoch": 0.6016322366743178, + "epoch": 0.8306988027232178, "grad_norm": 0.0, - "learning_rate": 7.230983452608598e-06, - "loss": 0.8936, + "learning_rate": 1.4660425008899947e-06, + "loss": 0.8699, "step": 21231 }, { - "epoch": 0.6016605741165802, + "epoch": 0.8307379294154472, "grad_norm": 0.0, - "learning_rate": 7.230101562160822e-06, - "loss": 0.8956, + "learning_rate": 1.465382008018521e-06, + "loss": 0.9178, "step": 21232 }, { - "epoch": 0.6016889115588427, + "epoch": 0.8307770561076766, "grad_norm": 0.0, - "learning_rate": 7.229219695044931e-06, - "loss": 0.8853, + "learning_rate": 1.4647216522023188e-06, + "loss": 0.9376, "step": 21233 }, { - "epoch": 0.6017172490011051, + "epoch": 0.8308161827999061, "grad_norm": 0.0, - "learning_rate": 7.22833785126835e-06, - "loss": 0.8807, + "learning_rate": 1.4640614334519908e-06, + "loss": 0.97, "step": 21234 }, { - "epoch": 0.6017455864433676, + "epoch": 0.8308553094921355, "grad_norm": 0.0, - "learning_rate": 7.2274560308385065e-06, - "loss": 0.8906, + "learning_rate": 1.463401351778142e-06, + "loss": 0.933, "step": 21235 }, { - "epoch": 0.6017739238856301, + "epoch": 0.830894436184365, "grad_norm": 0.0, - "learning_rate": 7.2265742337628374e-06, - "loss": 0.8487, + "learning_rate": 1.4627414071913693e-06, + "loss": 1.0509, "step": 21236 }, { - "epoch": 0.6018022613278925, + "epoch": 0.8309335628765944, "grad_norm": 0.0, - "learning_rate": 7.225692460048756e-06, - "loss": 0.7817, + "learning_rate": 1.4620815997022742e-06, + "loss": 0.9481, "step": 21237 }, { - "epoch": 0.601830598770155, + "epoch": 0.8309726895688239, "grad_norm": 0.0, - "learning_rate": 7.224810709703703e-06, - "loss": 0.8667, + "learning_rate": 1.461421929321445e-06, + "loss": 0.9471, "step": 21238 }, { - "epoch": 0.6018589362124175, + "epoch": 0.8310118162610532, "grad_norm": 0.0, - "learning_rate": 7.223928982735096e-06, - "loss": 0.8784, + "learning_rate": 1.4607623960594842e-06, + "loss": 1.0839, "step": 21239 }, { - "epoch": 0.60188727365468, + "epoch": 0.8310509429532827, "grad_norm": 0.0, - "learning_rate": 7.2230472791503655e-06, - "loss": 0.9157, + "learning_rate": 1.4601029999269767e-06, + "loss": 0.9331, "step": 21240 }, { - "epoch": 0.6019156110969424, + "epoch": 0.8310900696455121, "grad_norm": 0.0, - "learning_rate": 7.222165598956943e-06, - "loss": 0.8989, + "learning_rate": 1.4594437409345175e-06, + "loss": 0.9573, "step": 21241 }, { - "epoch": 0.6019439485392049, + "epoch": 0.8311291963377416, "grad_norm": 0.0, - "learning_rate": 7.2212839421622485e-06, - "loss": 0.877, + "learning_rate": 1.4587846190926846e-06, + "loss": 0.9508, "step": 21242 }, { - "epoch": 0.6019722859814673, + "epoch": 0.831168323029971, "grad_norm": 0.0, - "learning_rate": 7.220402308773711e-06, - "loss": 0.9024, + "learning_rate": 1.458125634412072e-06, + "loss": 0.9221, "step": 21243 }, { - "epoch": 0.6020006234237297, + "epoch": 0.8312074497222005, "grad_norm": 0.0, - "learning_rate": 7.21952069879876e-06, - "loss": 0.9552, + "learning_rate": 1.4574667869032567e-06, + "loss": 0.8709, "step": 21244 }, { - "epoch": 0.6020289608659922, + "epoch": 0.8312465764144299, "grad_norm": 0.0, - "learning_rate": 7.218639112244815e-06, - "loss": 0.8555, + "learning_rate": 1.4568080765768223e-06, + "loss": 0.9938, "step": 21245 }, { - "epoch": 0.6020572983082547, + "epoch": 0.8312857031066594, "grad_norm": 0.0, - "learning_rate": 7.217757549119308e-06, - "loss": 0.9829, + "learning_rate": 1.4561495034433426e-06, + "loss": 0.9168, "step": 21246 }, { - "epoch": 0.6020856357505172, + "epoch": 0.8313248297988888, "grad_norm": 0.0, - "learning_rate": 7.21687600942966e-06, - "loss": 0.9144, + "learning_rate": 1.455491067513396e-06, + "loss": 0.9072, "step": 21247 }, { - "epoch": 0.6021139731927796, + "epoch": 0.8313639564911183, "grad_norm": 0.0, - "learning_rate": 7.215994493183298e-06, - "loss": 0.8131, + "learning_rate": 1.4548327687975562e-06, + "loss": 0.9457, "step": 21248 }, { - "epoch": 0.6021423106350421, + "epoch": 0.8314030831833477, "grad_norm": 0.0, - "learning_rate": 7.215113000387654e-06, - "loss": 0.7248, + "learning_rate": 1.4541746073063967e-06, + "loss": 0.8553, "step": 21249 }, { - "epoch": 0.6021706480773046, + "epoch": 0.8314422098755772, "grad_norm": 0.0, - "learning_rate": 7.214231531050142e-06, - "loss": 0.7444, + "learning_rate": 1.4535165830504815e-06, + "loss": 1.012, "step": 21250 }, { - "epoch": 0.602198985519567, + "epoch": 0.8314813365678065, "grad_norm": 0.0, - "learning_rate": 7.213350085178195e-06, - "loss": 0.9135, + "learning_rate": 1.4528586960403812e-06, + "loss": 0.84, "step": 21251 }, { - "epoch": 0.6022273229618295, + "epoch": 0.831520463260036, "grad_norm": 0.0, - "learning_rate": 7.212468662779233e-06, - "loss": 0.8024, + "learning_rate": 1.4522009462866604e-06, + "loss": 1.0172, "step": 21252 }, { - "epoch": 0.602255660404092, + "epoch": 0.8315595899522654, "grad_norm": 0.0, - "learning_rate": 7.211587263860682e-06, - "loss": 0.8317, + "learning_rate": 1.451543333799883e-06, + "loss": 0.9656, "step": 21253 }, { - "epoch": 0.6022839978463543, + "epoch": 0.8315987166444949, "grad_norm": 0.0, - "learning_rate": 7.210705888429972e-06, - "loss": 0.8649, + "learning_rate": 1.4508858585906061e-06, + "loss": 1.043, "step": 21254 }, { - "epoch": 0.6023123352886168, + "epoch": 0.8316378433367243, "grad_norm": 0.0, - "learning_rate": 7.2098245364945165e-06, - "loss": 0.7661, + "learning_rate": 1.450228520669391e-06, + "loss": 0.9715, "step": 21255 }, { - "epoch": 0.6023406727308793, + "epoch": 0.8316769700289538, "grad_norm": 0.0, - "learning_rate": 7.208943208061746e-06, - "loss": 0.986, + "learning_rate": 1.449571320046791e-06, + "loss": 0.9251, "step": 21256 }, { - "epoch": 0.6023690101731418, + "epoch": 0.8317160967211832, "grad_norm": 0.0, - "learning_rate": 7.208061903139087e-06, - "loss": 0.7608, + "learning_rate": 1.4489142567333614e-06, + "loss": 0.8959, "step": 21257 }, { - "epoch": 0.6023973476154042, + "epoch": 0.8317552234134127, "grad_norm": 0.0, - "learning_rate": 7.207180621733956e-06, - "loss": 0.9004, + "learning_rate": 1.448257330739653e-06, + "loss": 1.0175, "step": 21258 }, { - "epoch": 0.6024256850576667, + "epoch": 0.8317943501056421, "grad_norm": 0.0, - "learning_rate": 7.2062993638537815e-06, - "loss": 0.845, + "learning_rate": 1.4476005420762196e-06, + "loss": 0.9612, "step": 21259 }, { - "epoch": 0.6024540224999292, + "epoch": 0.8318334767978716, "grad_norm": 0.0, - "learning_rate": 7.205418129505982e-06, - "loss": 0.8632, + "learning_rate": 1.4469438907536014e-06, + "loss": 0.9418, "step": 21260 }, { - "epoch": 0.6024823599421916, + "epoch": 0.831872603490101, "grad_norm": 0.0, - "learning_rate": 7.2045369186979845e-06, - "loss": 0.7163, + "learning_rate": 1.446287376782346e-06, + "loss": 0.9322, "step": 21261 }, { - "epoch": 0.6025106973844541, + "epoch": 0.8319117301823303, "grad_norm": 0.0, - "learning_rate": 7.203655731437214e-06, - "loss": 0.8701, + "learning_rate": 1.4456310001730001e-06, + "loss": 0.9458, "step": 21262 }, { - "epoch": 0.6025390348267166, + "epoch": 0.8319508568745598, "grad_norm": 0.0, - "learning_rate": 7.202774567731086e-06, - "loss": 0.9841, + "learning_rate": 1.4449747609360998e-06, + "loss": 0.8749, "step": 21263 }, { - "epoch": 0.602567372268979, + "epoch": 0.8319899835667892, "grad_norm": 0.0, - "learning_rate": 7.201893427587026e-06, - "loss": 0.8422, + "learning_rate": 1.4443186590821835e-06, + "loss": 1.0759, "step": 21264 }, { - "epoch": 0.6025957097112414, + "epoch": 0.8320291102590187, "grad_norm": 0.0, - "learning_rate": 7.201012311012459e-06, - "loss": 0.8689, + "learning_rate": 1.4436626946217891e-06, + "loss": 0.9332, "step": 21265 }, { - "epoch": 0.6026240471535039, + "epoch": 0.8320682369512481, "grad_norm": 0.0, - "learning_rate": 7.200131218014803e-06, - "loss": 0.8101, + "learning_rate": 1.443006867565452e-06, + "loss": 1.08, "step": 21266 }, { - "epoch": 0.6026523845957664, + "epoch": 0.8321073636434776, "grad_norm": 0.0, - "learning_rate": 7.199250148601485e-06, - "loss": 0.864, + "learning_rate": 1.4423511779236998e-06, + "loss": 1.011, "step": 21267 }, { - "epoch": 0.6026807220380288, + "epoch": 0.832146490335707, "grad_norm": 0.0, - "learning_rate": 7.198369102779919e-06, - "loss": 0.8362, + "learning_rate": 1.441695625707067e-06, + "loss": 0.9406, "step": 21268 }, { - "epoch": 0.6027090594802913, + "epoch": 0.8321856170279365, "grad_norm": 0.0, - "learning_rate": 7.197488080557531e-06, - "loss": 0.8651, + "learning_rate": 1.4410402109260734e-06, + "loss": 0.8778, "step": 21269 }, { - "epoch": 0.6027373969225538, + "epoch": 0.8322247437201659, "grad_norm": 0.0, - "learning_rate": 7.196607081941742e-06, - "loss": 0.8458, + "learning_rate": 1.4403849335912545e-06, + "loss": 0.9647, "step": 21270 }, { - "epoch": 0.6027657343648163, + "epoch": 0.8322638704123954, "grad_norm": 0.0, - "learning_rate": 7.1957261069399745e-06, - "loss": 0.7986, + "learning_rate": 1.439729793713125e-06, + "loss": 0.951, "step": 21271 }, { - "epoch": 0.6027940718070787, + "epoch": 0.8323029971046247, "grad_norm": 0.0, - "learning_rate": 7.1948451555596445e-06, - "loss": 0.7279, + "learning_rate": 1.4390747913022108e-06, + "loss": 0.983, "step": 21272 }, { - "epoch": 0.6028224092493412, + "epoch": 0.8323421237968542, "grad_norm": 0.0, - "learning_rate": 7.193964227808177e-06, - "loss": 0.8889, + "learning_rate": 1.4384199263690223e-06, + "loss": 0.8877, "step": 21273 }, { - "epoch": 0.6028507466916037, + "epoch": 0.8323812504890836, "grad_norm": 0.0, - "learning_rate": 7.193083323692989e-06, - "loss": 0.8197, + "learning_rate": 1.437765198924087e-06, + "loss": 1.065, "step": 21274 }, { - "epoch": 0.602879084133866, + "epoch": 0.8324203771813131, "grad_norm": 0.0, - "learning_rate": 7.192202443221508e-06, - "loss": 0.7793, + "learning_rate": 1.437110608977912e-06, + "loss": 0.9195, "step": 21275 }, { - "epoch": 0.6029074215761285, + "epoch": 0.8324595038735425, "grad_norm": 0.0, - "learning_rate": 7.191321586401143e-06, - "loss": 0.8215, + "learning_rate": 1.4364561565410117e-06, + "loss": 1.1031, "step": 21276 }, { - "epoch": 0.602935759018391, + "epoch": 0.832498630565772, "grad_norm": 0.0, - "learning_rate": 7.1904407532393196e-06, - "loss": 0.9083, + "learning_rate": 1.4358018416238928e-06, + "loss": 0.8553, "step": 21277 }, { - "epoch": 0.6029640964606534, + "epoch": 0.8325377572580014, "grad_norm": 0.0, - "learning_rate": 7.189559943743458e-06, - "loss": 0.8323, + "learning_rate": 1.4351476642370654e-06, + "loss": 0.8354, "step": 21278 }, { - "epoch": 0.6029924339029159, + "epoch": 0.8325768839502309, "grad_norm": 0.0, - "learning_rate": 7.188679157920977e-06, - "loss": 0.9594, + "learning_rate": 1.4344936243910335e-06, + "loss": 0.9024, "step": 21279 }, { - "epoch": 0.6030207713451784, + "epoch": 0.8326160106424603, "grad_norm": 0.0, - "learning_rate": 7.187798395779298e-06, - "loss": 0.8465, + "learning_rate": 1.433839722096303e-06, + "loss": 0.9846, "step": 21280 }, { - "epoch": 0.6030491087874409, + "epoch": 0.8326551373346898, "grad_norm": 0.0, - "learning_rate": 7.186917657325833e-06, - "loss": 0.8116, + "learning_rate": 1.433185957363371e-06, + "loss": 1.0498, "step": 21281 }, { - "epoch": 0.6030774462297033, + "epoch": 0.8326942640269192, "grad_norm": 0.0, - "learning_rate": 7.186036942568004e-06, - "loss": 0.939, + "learning_rate": 1.4325323302027371e-06, + "loss": 1.0326, "step": 21282 }, { - "epoch": 0.6031057836719658, + "epoch": 0.8327333907191486, "grad_norm": 0.0, - "learning_rate": 7.185156251513236e-06, - "loss": 0.8894, + "learning_rate": 1.4318788406248985e-06, + "loss": 1.0126, "step": 21283 }, { - "epoch": 0.6031341211142283, + "epoch": 0.832772517411378, "grad_norm": 0.0, - "learning_rate": 7.1842755841689385e-06, - "loss": 0.857, + "learning_rate": 1.4312254886403521e-06, + "loss": 0.9881, "step": 21284 }, { - "epoch": 0.6031624585564906, + "epoch": 0.8328116441036075, "grad_norm": 0.0, - "learning_rate": 7.183394940542532e-06, - "loss": 0.8823, + "learning_rate": 1.430572274259584e-06, + "loss": 1.0184, "step": 21285 }, { - "epoch": 0.6031907959987531, + "epoch": 0.8328507707958369, "grad_norm": 0.0, - "learning_rate": 7.1825143206414425e-06, - "loss": 0.7734, + "learning_rate": 1.4299191974930904e-06, + "loss": 0.8704, "step": 21286 }, { - "epoch": 0.6032191334410156, + "epoch": 0.8328898974880664, "grad_norm": 0.0, - "learning_rate": 7.181633724473075e-06, - "loss": 0.7884, + "learning_rate": 1.4292662583513495e-06, + "loss": 0.8705, "step": 21287 }, { - "epoch": 0.6032474708832781, + "epoch": 0.8329290241802958, "grad_norm": 0.0, - "learning_rate": 7.180753152044859e-06, - "loss": 0.9077, + "learning_rate": 1.4286134568448585e-06, + "loss": 0.8804, "step": 21288 }, { - "epoch": 0.6032758083255405, + "epoch": 0.8329681508725252, "grad_norm": 0.0, - "learning_rate": 7.1798726033642e-06, - "loss": 0.8425, + "learning_rate": 1.4279607929840922e-06, + "loss": 1.0775, "step": 21289 }, { - "epoch": 0.603304145767803, + "epoch": 0.8330072775647547, "grad_norm": 0.0, - "learning_rate": 7.178992078438522e-06, - "loss": 0.8261, + "learning_rate": 1.427308266779537e-06, + "loss": 0.9059, "step": 21290 }, { - "epoch": 0.6033324832100655, + "epoch": 0.8330464042569841, "grad_norm": 0.0, - "learning_rate": 7.178111577275244e-06, - "loss": 0.8292, + "learning_rate": 1.4266558782416628e-06, + "loss": 0.9082, "step": 21291 }, { - "epoch": 0.6033608206523279, + "epoch": 0.8330855309492136, "grad_norm": 0.0, - "learning_rate": 7.177231099881778e-06, - "loss": 0.9546, + "learning_rate": 1.4260036273809585e-06, + "loss": 0.9477, "step": 21292 }, { - "epoch": 0.6033891580945904, + "epoch": 0.833124657641443, "grad_norm": 0.0, - "learning_rate": 7.176350646265542e-06, - "loss": 0.8924, + "learning_rate": 1.4253515142078888e-06, + "loss": 0.9509, "step": 21293 }, { - "epoch": 0.6034174955368529, + "epoch": 0.8331637843336724, "grad_norm": 0.0, - "learning_rate": 7.1754702164339575e-06, - "loss": 0.8716, + "learning_rate": 1.424699538732931e-06, + "loss": 1.0138, "step": 21294 }, { - "epoch": 0.6034458329791154, + "epoch": 0.8332029110259018, "grad_norm": 0.0, - "learning_rate": 7.174589810394432e-06, - "loss": 0.8254, + "learning_rate": 1.4240477009665521e-06, + "loss": 1.0442, "step": 21295 }, { - "epoch": 0.6034741704213777, + "epoch": 0.8332420377181313, "grad_norm": 0.0, - "learning_rate": 7.17370942815439e-06, - "loss": 0.7878, + "learning_rate": 1.4233960009192204e-06, + "loss": 0.9395, "step": 21296 }, { - "epoch": 0.6035025078636402, + "epoch": 0.8332811644103607, "grad_norm": 0.0, - "learning_rate": 7.172829069721238e-06, - "loss": 0.9081, + "learning_rate": 1.422744438601401e-06, + "loss": 1.0452, "step": 21297 }, { - "epoch": 0.6035308453059027, + "epoch": 0.8333202911025902, "grad_norm": 0.0, - "learning_rate": 7.171948735102396e-06, - "loss": 0.8417, + "learning_rate": 1.4220930140235613e-06, + "loss": 1.0301, "step": 21298 }, { - "epoch": 0.6035591827481651, + "epoch": 0.8333594177948196, "grad_norm": 0.0, - "learning_rate": 7.171068424305286e-06, - "loss": 0.8, + "learning_rate": 1.4214417271961567e-06, + "loss": 0.8855, "step": 21299 }, { - "epoch": 0.6035875201904276, + "epoch": 0.8333985444870491, "grad_norm": 0.0, - "learning_rate": 7.170188137337313e-06, - "loss": 0.8336, + "learning_rate": 1.4207905781296483e-06, + "loss": 0.9364, "step": 21300 }, { - "epoch": 0.6036158576326901, + "epoch": 0.8334376711792785, "grad_norm": 0.0, - "learning_rate": 7.169307874205896e-06, - "loss": 0.7831, + "learning_rate": 1.4201395668344953e-06, + "loss": 1.0471, "step": 21301 }, { - "epoch": 0.6036441950749525, + "epoch": 0.833476797871508, "grad_norm": 0.0, - "learning_rate": 7.168427634918453e-06, - "loss": 0.8453, + "learning_rate": 1.4194886933211471e-06, + "loss": 1.063, "step": 21302 }, { - "epoch": 0.603672532517215, + "epoch": 0.8335159245637374, "grad_norm": 0.0, - "learning_rate": 7.167547419482393e-06, - "loss": 0.8806, + "learning_rate": 1.418837957600059e-06, + "loss": 0.8733, "step": 21303 }, { - "epoch": 0.6037008699594775, + "epoch": 0.8335550512559669, "grad_norm": 0.0, - "learning_rate": 7.1666672279051345e-06, - "loss": 0.7646, + "learning_rate": 1.418187359681682e-06, + "loss": 0.9975, "step": 21304 }, { - "epoch": 0.60372920740174, + "epoch": 0.8335941779481962, "grad_norm": 0.0, - "learning_rate": 7.165787060194087e-06, - "loss": 0.7922, + "learning_rate": 1.4175368995764604e-06, + "loss": 0.9657, "step": 21305 }, { - "epoch": 0.6037575448440023, + "epoch": 0.8336333046404257, "grad_norm": 0.0, - "learning_rate": 7.1649069163566685e-06, - "loss": 0.8542, + "learning_rate": 1.4168865772948414e-06, + "loss": 0.9841, "step": 21306 }, { - "epoch": 0.6037858822862648, + "epoch": 0.8336724313326551, "grad_norm": 0.0, - "learning_rate": 7.1640267964002965e-06, - "loss": 0.8293, + "learning_rate": 1.4162363928472722e-06, + "loss": 0.8973, "step": 21307 }, { - "epoch": 0.6038142197285273, + "epoch": 0.8337115580248846, "grad_norm": 0.0, - "learning_rate": 7.163146700332374e-06, - "loss": 0.8633, + "learning_rate": 1.4155863462441876e-06, + "loss": 0.968, "step": 21308 }, { - "epoch": 0.6038425571707897, + "epoch": 0.833750684717114, "grad_norm": 0.0, - "learning_rate": 7.1622666281603235e-06, - "loss": 0.9361, + "learning_rate": 1.4149364374960294e-06, + "loss": 1.0251, "step": 21309 }, { - "epoch": 0.6038708946130522, + "epoch": 0.8337898114093435, "grad_norm": 0.0, - "learning_rate": 7.161386579891552e-06, - "loss": 0.8785, + "learning_rate": 1.4142866666132337e-06, + "loss": 0.916, "step": 21310 }, { - "epoch": 0.6038992320553147, + "epoch": 0.8338289381015729, "grad_norm": 0.0, - "learning_rate": 7.160506555533476e-06, - "loss": 0.9375, + "learning_rate": 1.413637033606239e-06, + "loss": 1.0287, "step": 21311 }, { - "epoch": 0.6039275694975772, + "epoch": 0.8338680647938024, "grad_norm": 0.0, - "learning_rate": 7.159626555093513e-06, - "loss": 0.7942, + "learning_rate": 1.4129875384854708e-06, + "loss": 0.7816, "step": 21312 }, { - "epoch": 0.6039559069398396, + "epoch": 0.8339071914860318, "grad_norm": 0.0, - "learning_rate": 7.158746578579065e-06, - "loss": 0.7924, + "learning_rate": 1.4123381812613657e-06, + "loss": 1.0399, "step": 21313 }, { - "epoch": 0.6039842443821021, + "epoch": 0.8339463181782613, "grad_norm": 0.0, - "learning_rate": 7.157866625997549e-06, - "loss": 0.8082, + "learning_rate": 1.4116889619443431e-06, + "loss": 0.8451, "step": 21314 }, { - "epoch": 0.6040125818243646, + "epoch": 0.8339854448704906, "grad_norm": 0.0, - "learning_rate": 7.156986697356383e-06, - "loss": 0.8736, + "learning_rate": 1.41103988054484e-06, + "loss": 1.011, "step": 21315 }, { - "epoch": 0.604040919266627, + "epoch": 0.8340245715627201, "grad_norm": 0.0, - "learning_rate": 7.156106792662969e-06, - "loss": 0.8202, + "learning_rate": 1.4103909370732704e-06, + "loss": 0.985, "step": 21316 }, { - "epoch": 0.6040692567088894, + "epoch": 0.8340636982549495, "grad_norm": 0.0, - "learning_rate": 7.155226911924727e-06, - "loss": 0.9435, + "learning_rate": 1.4097421315400617e-06, + "loss": 0.8886, "step": 21317 }, { - "epoch": 0.6040975941511519, + "epoch": 0.8341028249471789, "grad_norm": 0.0, - "learning_rate": 7.154347055149061e-06, - "loss": 0.85, + "learning_rate": 1.4090934639556252e-06, + "loss": 0.97, "step": 21318 }, { - "epoch": 0.6041259315934144, + "epoch": 0.8341419516394084, "grad_norm": 0.0, - "learning_rate": 7.153467222343386e-06, - "loss": 0.9477, + "learning_rate": 1.4084449343303874e-06, + "loss": 0.816, "step": 21319 }, { - "epoch": 0.6041542690356768, + "epoch": 0.8341810783316378, "grad_norm": 0.0, - "learning_rate": 7.1525874135151204e-06, - "loss": 0.8734, + "learning_rate": 1.4077965426747564e-06, + "loss": 0.8493, "step": 21320 }, { - "epoch": 0.6041826064779393, + "epoch": 0.8342202050238673, "grad_norm": 0.0, - "learning_rate": 7.151707628671662e-06, - "loss": 0.8113, + "learning_rate": 1.4071482889991472e-06, + "loss": 0.9699, "step": 21321 }, { - "epoch": 0.6042109439202018, + "epoch": 0.8342593317160967, "grad_norm": 0.0, - "learning_rate": 7.15082786782043e-06, - "loss": 0.8286, + "learning_rate": 1.4065001733139683e-06, + "loss": 0.9069, "step": 21322 }, { - "epoch": 0.6042392813624642, + "epoch": 0.8342984584083262, "grad_norm": 0.0, - "learning_rate": 7.1499481309688336e-06, - "loss": 0.972, + "learning_rate": 1.4058521956296278e-06, + "loss": 0.8895, "step": 21323 }, { - "epoch": 0.6042676188047267, + "epoch": 0.8343375851005556, "grad_norm": 0.0, - "learning_rate": 7.149068418124281e-06, - "loss": 0.821, + "learning_rate": 1.405204355956532e-06, + "loss": 0.8512, "step": 21324 }, { - "epoch": 0.6042959562469892, + "epoch": 0.834376711792785, "grad_norm": 0.0, - "learning_rate": 7.148188729294188e-06, - "loss": 0.9415, + "learning_rate": 1.404556654305086e-06, + "loss": 0.9451, "step": 21325 }, { - "epoch": 0.6043242936892516, + "epoch": 0.8344158384850144, "grad_norm": 0.0, - "learning_rate": 7.1473090644859555e-06, - "loss": 0.9353, + "learning_rate": 1.4039090906856877e-06, + "loss": 0.9846, "step": 21326 }, { - "epoch": 0.604352631131514, + "epoch": 0.8344549651772439, "grad_norm": 0.0, - "learning_rate": 7.146429423706998e-06, - "loss": 0.742, + "learning_rate": 1.4032616651087382e-06, + "loss": 0.9691, "step": 21327 }, { - "epoch": 0.6043809685737765, + "epoch": 0.8344940918694733, "grad_norm": 0.0, - "learning_rate": 7.14554980696473e-06, - "loss": 0.8657, + "learning_rate": 1.4026143775846334e-06, + "loss": 0.8464, "step": 21328 }, { - "epoch": 0.604409306016039, + "epoch": 0.8345332185617028, "grad_norm": 0.0, - "learning_rate": 7.144670214266551e-06, - "loss": 0.8691, + "learning_rate": 1.4019672281237716e-06, + "loss": 0.952, "step": 21329 }, { - "epoch": 0.6044376434583014, + "epoch": 0.8345723452539322, "grad_norm": 0.0, - "learning_rate": 7.143790645619875e-06, - "loss": 0.8381, + "learning_rate": 1.401320216736539e-06, + "loss": 0.9403, "step": 21330 }, { - "epoch": 0.6044659809005639, + "epoch": 0.8346114719461617, "grad_norm": 0.0, - "learning_rate": 7.142911101032114e-06, - "loss": 0.9166, + "learning_rate": 1.4006733434333297e-06, + "loss": 0.9908, "step": 21331 }, { - "epoch": 0.6044943183428264, + "epoch": 0.8346505986383911, "grad_norm": 0.0, - "learning_rate": 7.142031580510671e-06, - "loss": 0.8548, + "learning_rate": 1.4000266082245305e-06, + "loss": 1.0227, "step": 21332 }, { - "epoch": 0.6045226557850888, + "epoch": 0.8346897253306206, "grad_norm": 0.0, - "learning_rate": 7.141152084062962e-06, - "loss": 0.9295, + "learning_rate": 1.3993800111205302e-06, + "loss": 1.0024, "step": 21333 }, { - "epoch": 0.6045509932273513, + "epoch": 0.83472885202285, "grad_norm": 0.0, - "learning_rate": 7.140272611696386e-06, - "loss": 0.782, + "learning_rate": 1.3987335521317068e-06, + "loss": 0.9378, "step": 21334 }, { - "epoch": 0.6045793306696138, + "epoch": 0.8347679787150795, "grad_norm": 0.0, - "learning_rate": 7.139393163418355e-06, - "loss": 0.8457, + "learning_rate": 1.398087231268448e-06, + "loss": 0.828, "step": 21335 }, { - "epoch": 0.6046076681118763, + "epoch": 0.8348071054073088, "grad_norm": 0.0, - "learning_rate": 7.138513739236281e-06, - "loss": 0.8552, + "learning_rate": 1.3974410485411238e-06, + "loss": 0.9512, "step": 21336 }, { - "epoch": 0.6046360055541387, + "epoch": 0.8348462320995383, "grad_norm": 0.0, - "learning_rate": 7.137634339157566e-06, - "loss": 0.7963, + "learning_rate": 1.396795003960122e-06, + "loss": 1.0194, "step": 21337 }, { - "epoch": 0.6046643429964011, + "epoch": 0.8348853587917677, "grad_norm": 0.0, - "learning_rate": 7.136754963189625e-06, - "loss": 0.7577, + "learning_rate": 1.3961490975358095e-06, + "loss": 1.0229, "step": 21338 }, { - "epoch": 0.6046926804386636, + "epoch": 0.8349244854839972, "grad_norm": 0.0, - "learning_rate": 7.1358756113398545e-06, - "loss": 0.7423, + "learning_rate": 1.3955033292785636e-06, + "loss": 1.0191, "step": 21339 }, { - "epoch": 0.604721017880926, + "epoch": 0.8349636121762266, "grad_norm": 0.0, - "learning_rate": 7.134996283615667e-06, - "loss": 0.8485, + "learning_rate": 1.3948576991987495e-06, + "loss": 0.8223, "step": 21340 }, { - "epoch": 0.6047493553231885, + "epoch": 0.8350027388684561, "grad_norm": 0.0, - "learning_rate": 7.134116980024474e-06, - "loss": 0.8356, + "learning_rate": 1.3942122073067388e-06, + "loss": 0.9495, "step": 21341 }, { - "epoch": 0.604777692765451, + "epoch": 0.8350418655606855, "grad_norm": 0.0, - "learning_rate": 7.133237700573676e-06, - "loss": 0.8393, + "learning_rate": 1.3935668536128955e-06, + "loss": 0.9417, "step": 21342 }, { - "epoch": 0.6048060302077134, + "epoch": 0.835080992252915, "grad_norm": 0.0, - "learning_rate": 7.132358445270679e-06, - "loss": 0.8674, + "learning_rate": 1.3929216381275866e-06, + "loss": 0.8976, "step": 21343 }, { - "epoch": 0.6048343676499759, + "epoch": 0.8351201189451444, "grad_norm": 0.0, - "learning_rate": 7.131479214122894e-06, - "loss": 0.923, + "learning_rate": 1.3922765608611687e-06, + "loss": 0.8626, "step": 21344 }, { - "epoch": 0.6048627050922384, + "epoch": 0.8351592456373739, "grad_norm": 0.0, - "learning_rate": 7.130600007137724e-06, - "loss": 0.8161, + "learning_rate": 1.3916316218240034e-06, + "loss": 1.0572, "step": 21345 }, { - "epoch": 0.6048910425345009, + "epoch": 0.8351983723296033, "grad_norm": 0.0, - "learning_rate": 7.129720824322579e-06, - "loss": 0.7991, + "learning_rate": 1.39098682102645e-06, + "loss": 0.9258, "step": 21346 }, { - "epoch": 0.6049193799767633, + "epoch": 0.8352374990218326, "grad_norm": 0.0, - "learning_rate": 7.128841665684856e-06, - "loss": 0.7769, + "learning_rate": 1.3903421584788579e-06, + "loss": 0.9478, "step": 21347 }, { - "epoch": 0.6049477174190258, + "epoch": 0.8352766257140621, "grad_norm": 0.0, - "learning_rate": 7.1279625312319675e-06, - "loss": 0.8516, + "learning_rate": 1.3896976341915814e-06, + "loss": 0.9173, "step": 21348 }, { - "epoch": 0.6049760548612882, + "epoch": 0.8353157524062915, "grad_norm": 0.0, - "learning_rate": 7.127083420971319e-06, - "loss": 0.9318, + "learning_rate": 1.389053248174973e-06, + "loss": 1.0143, "step": 21349 }, { - "epoch": 0.6050043923035506, + "epoch": 0.835354879098521, "grad_norm": 0.0, - "learning_rate": 7.126204334910312e-06, - "loss": 0.8467, + "learning_rate": 1.3884090004393803e-06, + "loss": 0.9366, "step": 21350 }, { - "epoch": 0.6050327297458131, + "epoch": 0.8353940057907504, "grad_norm": 0.0, - "learning_rate": 7.125325273056351e-06, - "loss": 0.9298, + "learning_rate": 1.3877648909951468e-06, + "loss": 0.947, "step": 21351 }, { - "epoch": 0.6050610671880756, + "epoch": 0.8354331324829799, "grad_norm": 0.0, - "learning_rate": 7.124446235416849e-06, - "loss": 0.794, + "learning_rate": 1.3871209198526191e-06, + "loss": 0.9763, "step": 21352 }, { - "epoch": 0.6050894046303381, + "epoch": 0.8354722591752093, "grad_norm": 0.0, - "learning_rate": 7.123567221999199e-06, - "loss": 0.8698, + "learning_rate": 1.3864770870221344e-06, + "loss": 1.0345, "step": 21353 }, { - "epoch": 0.6051177420726005, + "epoch": 0.8355113858674388, "grad_norm": 0.0, - "learning_rate": 7.122688232810815e-06, - "loss": 0.832, + "learning_rate": 1.3858333925140354e-06, + "loss": 1.0275, "step": 21354 }, { - "epoch": 0.605146079514863, + "epoch": 0.8355505125596682, "grad_norm": 0.0, - "learning_rate": 7.121809267859092e-06, - "loss": 0.849, + "learning_rate": 1.3851898363386574e-06, + "loss": 0.9193, "step": 21355 }, { - "epoch": 0.6051744169571255, + "epoch": 0.8355896392518977, "grad_norm": 0.0, - "learning_rate": 7.120930327151439e-06, - "loss": 0.834, + "learning_rate": 1.3845464185063373e-06, + "loss": 0.9969, "step": 21356 }, { - "epoch": 0.6052027543993879, + "epoch": 0.835628765944127, "grad_norm": 0.0, - "learning_rate": 7.1200514106952586e-06, - "loss": 0.8884, + "learning_rate": 1.383903139027404e-06, + "loss": 0.9254, "step": 21357 }, { - "epoch": 0.6052310918416504, + "epoch": 0.8356678926363565, "grad_norm": 0.0, - "learning_rate": 7.1191725184979554e-06, - "loss": 0.8912, + "learning_rate": 1.3832599979121907e-06, + "loss": 1.0135, "step": 21358 }, { - "epoch": 0.6052594292839129, + "epoch": 0.8357070193285859, "grad_norm": 0.0, - "learning_rate": 7.118293650566931e-06, - "loss": 0.9155, + "learning_rate": 1.3826169951710234e-06, + "loss": 0.9654, "step": 21359 }, { - "epoch": 0.6052877667261753, + "epoch": 0.8357461460208154, "grad_norm": 0.0, - "learning_rate": 7.117414806909593e-06, - "loss": 0.9268, + "learning_rate": 1.381974130814231e-06, + "loss": 0.9631, "step": 21360 }, { - "epoch": 0.6053161041684377, + "epoch": 0.8357852727130448, "grad_norm": 0.0, - "learning_rate": 7.1165359875333374e-06, - "loss": 0.8896, + "learning_rate": 1.3813314048521332e-06, + "loss": 0.9417, "step": 21361 }, { - "epoch": 0.6053444416107002, + "epoch": 0.8358243994052743, "grad_norm": 0.0, - "learning_rate": 7.115657192445571e-06, - "loss": 0.7774, + "learning_rate": 1.380688817295056e-06, + "loss": 1.034, "step": 21362 }, { - "epoch": 0.6053727790529627, + "epoch": 0.8358635260975037, "grad_norm": 0.0, - "learning_rate": 7.114778421653693e-06, - "loss": 0.9168, + "learning_rate": 1.3800463681533104e-06, + "loss": 1.0363, "step": 21363 }, { - "epoch": 0.6054011164952251, + "epoch": 0.8359026527897332, "grad_norm": 0.0, - "learning_rate": 7.113899675165108e-06, - "loss": 0.8763, + "learning_rate": 1.3794040574372247e-06, + "loss": 0.9132, "step": 21364 }, { - "epoch": 0.6054294539374876, + "epoch": 0.8359417794819626, "grad_norm": 0.0, - "learning_rate": 7.113020952987222e-06, - "loss": 0.9931, + "learning_rate": 1.3787618851571038e-06, + "loss": 0.9402, "step": 21365 }, { - "epoch": 0.6054577913797501, + "epoch": 0.8359809061741921, "grad_norm": 0.0, - "learning_rate": 7.112142255127427e-06, - "loss": 0.8709, + "learning_rate": 1.3781198513232675e-06, + "loss": 0.8636, "step": 21366 }, { - "epoch": 0.6054861288220125, + "epoch": 0.8360200328664215, "grad_norm": 0.0, - "learning_rate": 7.111263581593137e-06, - "loss": 0.913, + "learning_rate": 1.3774779559460171e-06, + "loss": 0.8662, "step": 21367 }, { - "epoch": 0.605514466264275, + "epoch": 0.836059159558651, "grad_norm": 0.0, - "learning_rate": 7.1103849323917406e-06, - "loss": 0.9552, + "learning_rate": 1.3768361990356705e-06, + "loss": 0.9764, "step": 21368 }, { - "epoch": 0.6055428037065375, + "epoch": 0.8360982862508803, "grad_norm": 0.0, - "learning_rate": 7.109506307530646e-06, - "loss": 0.9327, + "learning_rate": 1.3761945806025279e-06, + "loss": 0.8917, "step": 21369 }, { - "epoch": 0.6055711411488, + "epoch": 0.8361374129431098, "grad_norm": 0.0, - "learning_rate": 7.108627707017255e-06, - "loss": 0.8772, + "learning_rate": 1.3755531006568956e-06, + "loss": 0.9948, "step": 21370 }, { - "epoch": 0.6055994785910623, + "epoch": 0.8361765396353392, "grad_norm": 0.0, - "learning_rate": 7.107749130858963e-06, - "loss": 0.9135, + "learning_rate": 1.374911759209071e-06, + "loss": 0.9473, "step": 21371 }, { - "epoch": 0.6056278160333248, + "epoch": 0.8362156663275687, "grad_norm": 0.0, - "learning_rate": 7.1068705790631766e-06, - "loss": 0.8642, + "learning_rate": 1.3742705562693559e-06, + "loss": 0.9938, "step": 21372 }, { - "epoch": 0.6056561534755873, + "epoch": 0.8362547930197981, "grad_norm": 0.0, - "learning_rate": 7.105992051637296e-06, - "loss": 0.7909, + "learning_rate": 1.3736294918480475e-06, + "loss": 0.8256, "step": 21373 }, { - "epoch": 0.6056844909178497, + "epoch": 0.8362939197120276, "grad_norm": 0.0, - "learning_rate": 7.1051135485887146e-06, - "loss": 0.8729, + "learning_rate": 1.372988565955442e-06, + "loss": 1.0381, "step": 21374 }, { - "epoch": 0.6057128283601122, + "epoch": 0.836333046404257, "grad_norm": 0.0, - "learning_rate": 7.1042350699248394e-06, - "loss": 0.9041, + "learning_rate": 1.372347778601828e-06, + "loss": 1.1286, "step": 21375 }, { - "epoch": 0.6057411658023747, + "epoch": 0.8363721730964864, "grad_norm": 0.0, - "learning_rate": 7.103356615653065e-06, - "loss": 0.743, + "learning_rate": 1.371707129797497e-06, + "loss": 0.8997, "step": 21376 }, { - "epoch": 0.6057695032446372, + "epoch": 0.8364112997887159, "grad_norm": 0.0, - "learning_rate": 7.102478185780794e-06, - "loss": 0.9404, + "learning_rate": 1.3710666195527377e-06, + "loss": 0.8284, "step": 21377 }, { - "epoch": 0.6057978406868996, + "epoch": 0.8364504264809453, "grad_norm": 0.0, - "learning_rate": 7.10159978031543e-06, - "loss": 0.9211, + "learning_rate": 1.3704262478778385e-06, + "loss": 0.8908, "step": 21378 }, { - "epoch": 0.6058261781291621, + "epoch": 0.8364895531731747, "grad_norm": 0.0, - "learning_rate": 7.100721399264363e-06, - "loss": 0.8971, + "learning_rate": 1.3697860147830778e-06, + "loss": 1.0914, "step": 21379 }, { - "epoch": 0.6058545155714246, + "epoch": 0.8365286798654041, "grad_norm": 0.0, - "learning_rate": 7.0998430426349955e-06, - "loss": 0.8593, + "learning_rate": 1.3691459202787417e-06, + "loss": 1.1274, "step": 21380 }, { - "epoch": 0.6058828530136869, + "epoch": 0.8365678065576336, "grad_norm": 0.0, - "learning_rate": 7.0989647104347306e-06, - "loss": 0.8298, + "learning_rate": 1.368505964375102e-06, + "loss": 0.8129, "step": 21381 }, { - "epoch": 0.6059111904559494, + "epoch": 0.836606933249863, "grad_norm": 0.0, - "learning_rate": 7.0980864026709605e-06, - "loss": 0.8155, + "learning_rate": 1.3678661470824461e-06, + "loss": 0.9144, "step": 21382 }, { - "epoch": 0.6059395278982119, + "epoch": 0.8366460599420925, "grad_norm": 0.0, - "learning_rate": 7.097208119351089e-06, - "loss": 0.8816, + "learning_rate": 1.3672264684110404e-06, + "loss": 0.8026, "step": 21383 }, { - "epoch": 0.6059678653404744, + "epoch": 0.8366851866343219, "grad_norm": 0.0, - "learning_rate": 7.096329860482507e-06, - "loss": 0.8392, + "learning_rate": 1.366586928371163e-06, + "loss": 1.0786, "step": 21384 }, { - "epoch": 0.6059962027827368, + "epoch": 0.8367243133265514, "grad_norm": 0.0, - "learning_rate": 7.095451626072618e-06, - "loss": 0.8501, + "learning_rate": 1.3659475269730782e-06, + "loss": 0.9825, "step": 21385 }, { - "epoch": 0.6060245402249993, + "epoch": 0.8367634400187808, "grad_norm": 0.0, - "learning_rate": 7.094573416128823e-06, - "loss": 0.9145, + "learning_rate": 1.3653082642270575e-06, + "loss": 0.8557, "step": 21386 }, { - "epoch": 0.6060528776672618, + "epoch": 0.8368025667110103, "grad_norm": 0.0, - "learning_rate": 7.093695230658511e-06, - "loss": 0.7761, + "learning_rate": 1.3646691401433666e-06, + "loss": 0.95, "step": 21387 }, { - "epoch": 0.6060812151095242, + "epoch": 0.8368416934032397, "grad_norm": 0.0, - "learning_rate": 7.092817069669082e-06, - "loss": 0.8777, + "learning_rate": 1.3640301547322698e-06, + "loss": 0.892, "step": 21388 }, { - "epoch": 0.6061095525517867, + "epoch": 0.8368808200954692, "grad_norm": 0.0, - "learning_rate": 7.0919389331679365e-06, - "loss": 0.8083, + "learning_rate": 1.363391308004025e-06, + "loss": 0.9225, "step": 21389 }, { - "epoch": 0.6061378899940492, + "epoch": 0.8369199467876985, "grad_norm": 0.0, - "learning_rate": 7.091060821162468e-06, - "loss": 0.8801, + "learning_rate": 1.3627525999688952e-06, + "loss": 1.0656, "step": 21390 }, { - "epoch": 0.6061662274363115, + "epoch": 0.836959073479928, "grad_norm": 0.0, - "learning_rate": 7.0901827336600795e-06, - "loss": 0.8805, + "learning_rate": 1.3621140306371362e-06, + "loss": 0.9826, "step": 21391 }, { - "epoch": 0.606194564878574, + "epoch": 0.8369982001721574, "grad_norm": 0.0, - "learning_rate": 7.089304670668158e-06, - "loss": 0.8949, + "learning_rate": 1.361475600019e-06, + "loss": 0.837, "step": 21392 }, { - "epoch": 0.6062229023208365, + "epoch": 0.8370373268643869, "grad_norm": 0.0, - "learning_rate": 7.088426632194103e-06, - "loss": 0.879, + "learning_rate": 1.3608373081247417e-06, + "loss": 0.9626, "step": 21393 }, { - "epoch": 0.606251239763099, + "epoch": 0.8370764535566163, "grad_norm": 0.0, - "learning_rate": 7.087548618245314e-06, - "loss": 0.7933, + "learning_rate": 1.3601991549646098e-06, + "loss": 1.0288, "step": 21394 }, { - "epoch": 0.6062795772053614, + "epoch": 0.8371155802488458, "grad_norm": 0.0, - "learning_rate": 7.086670628829182e-06, - "loss": 0.8806, + "learning_rate": 1.3595611405488563e-06, + "loss": 0.9415, "step": 21395 }, { - "epoch": 0.6063079146476239, + "epoch": 0.8371547069410752, "grad_norm": 0.0, - "learning_rate": 7.0857926639531104e-06, - "loss": 0.8423, + "learning_rate": 1.3589232648877205e-06, + "loss": 0.972, "step": 21396 }, { - "epoch": 0.6063362520898864, + "epoch": 0.8371938336333047, "grad_norm": 0.0, - "learning_rate": 7.084914723624483e-06, - "loss": 0.8282, + "learning_rate": 1.358285527991453e-06, + "loss": 0.896, "step": 21397 }, { - "epoch": 0.6063645895321488, + "epoch": 0.8372329603255341, "grad_norm": 0.0, - "learning_rate": 7.084036807850704e-06, - "loss": 0.7643, + "learning_rate": 1.3576479298702849e-06, + "loss": 0.9951, "step": 21398 }, { - "epoch": 0.6063929269744113, + "epoch": 0.8372720870177636, "grad_norm": 0.0, - "learning_rate": 7.083158916639169e-06, - "loss": 0.9376, + "learning_rate": 1.3570104705344678e-06, + "loss": 0.8919, "step": 21399 }, { - "epoch": 0.6064212644166738, + "epoch": 0.837311213709993, "grad_norm": 0.0, - "learning_rate": 7.082281049997265e-06, - "loss": 0.8425, + "learning_rate": 1.356373149994229e-06, + "loss": 1.0326, "step": 21400 }, { - "epoch": 0.6064496018589363, + "epoch": 0.8373503404022224, "grad_norm": 0.0, - "learning_rate": 7.081403207932391e-06, - "loss": 0.8712, + "learning_rate": 1.3557359682598092e-06, + "loss": 0.8878, "step": 21401 }, { - "epoch": 0.6064779393011986, + "epoch": 0.8373894670944518, "grad_norm": 0.0, - "learning_rate": 7.080525390451945e-06, - "loss": 0.8267, + "learning_rate": 1.355098925341435e-06, + "loss": 0.8502, "step": 21402 }, { - "epoch": 0.6065062767434611, + "epoch": 0.8374285937866812, "grad_norm": 0.0, - "learning_rate": 7.079647597563315e-06, - "loss": 0.7535, + "learning_rate": 1.3544620212493397e-06, + "loss": 0.9245, "step": 21403 }, { - "epoch": 0.6065346141857236, + "epoch": 0.8374677204789107, "grad_norm": 0.0, - "learning_rate": 7.078769829273901e-06, - "loss": 0.7426, + "learning_rate": 1.3538252559937504e-06, + "loss": 0.9426, "step": 21404 }, { - "epoch": 0.606562951627986, + "epoch": 0.8375068471711401, "grad_norm": 0.0, - "learning_rate": 7.0778920855910905e-06, - "loss": 0.8039, + "learning_rate": 1.3531886295848961e-06, + "loss": 0.9615, "step": 21405 }, { - "epoch": 0.6065912890702485, + "epoch": 0.8375459738633696, "grad_norm": 0.0, - "learning_rate": 7.077014366522279e-06, - "loss": 0.8559, + "learning_rate": 1.3525521420329958e-06, + "loss": 0.996, "step": 21406 }, { - "epoch": 0.606619626512511, + "epoch": 0.837585100555599, "grad_norm": 0.0, - "learning_rate": 7.076136672074865e-06, - "loss": 0.8902, + "learning_rate": 1.3519157933482707e-06, + "loss": 0.8733, "step": 21407 }, { - "epoch": 0.6066479639547735, + "epoch": 0.8376242272478285, "grad_norm": 0.0, - "learning_rate": 7.0752590022562325e-06, - "loss": 0.8293, + "learning_rate": 1.351279583540942e-06, + "loss": 0.9953, "step": 21408 }, { - "epoch": 0.6066763013970359, + "epoch": 0.8376633539400579, "grad_norm": 0.0, - "learning_rate": 7.074381357073782e-06, - "loss": 0.9153, + "learning_rate": 1.350643512621228e-06, + "loss": 0.7398, "step": 21409 }, { - "epoch": 0.6067046388392984, + "epoch": 0.8377024806322874, "grad_norm": 0.0, - "learning_rate": 7.0735037365349065e-06, - "loss": 0.8671, + "learning_rate": 1.3500075805993385e-06, + "loss": 1.1234, "step": 21410 }, { - "epoch": 0.6067329762815609, + "epoch": 0.8377416073245167, "grad_norm": 0.0, - "learning_rate": 7.072626140646992e-06, - "loss": 0.7565, + "learning_rate": 1.3493717874854905e-06, + "loss": 0.8871, "step": 21411 }, { - "epoch": 0.6067613137238232, + "epoch": 0.8377807340167462, "grad_norm": 0.0, - "learning_rate": 7.071748569417439e-06, - "loss": 0.8155, + "learning_rate": 1.3487361332898875e-06, + "loss": 1.0182, "step": 21412 }, { - "epoch": 0.6067896511660857, + "epoch": 0.8378198607089756, "grad_norm": 0.0, - "learning_rate": 7.070871022853632e-06, - "loss": 0.8442, + "learning_rate": 1.3481006180227462e-06, + "loss": 0.8879, "step": 21413 }, { - "epoch": 0.6068179886083482, + "epoch": 0.8378589874012051, "grad_norm": 0.0, - "learning_rate": 7.069993500962964e-06, - "loss": 0.8918, + "learning_rate": 1.3474652416942647e-06, + "loss": 0.9882, "step": 21414 }, { - "epoch": 0.6068463260506106, + "epoch": 0.8378981140934345, "grad_norm": 0.0, - "learning_rate": 7.069116003752831e-06, - "loss": 0.8155, + "learning_rate": 1.346830004314652e-06, + "loss": 0.9634, "step": 21415 }, { - "epoch": 0.6068746634928731, + "epoch": 0.837937240785664, "grad_norm": 0.0, - "learning_rate": 7.068238531230622e-06, - "loss": 0.9714, + "learning_rate": 1.3461949058941015e-06, + "loss": 0.9529, "step": 21416 }, { - "epoch": 0.6069030009351356, + "epoch": 0.8379763674778934, "grad_norm": 0.0, - "learning_rate": 7.067361083403732e-06, - "loss": 0.7798, + "learning_rate": 1.3455599464428215e-06, + "loss": 0.928, "step": 21417 }, { - "epoch": 0.6069313383773981, + "epoch": 0.8380154941701229, "grad_norm": 0.0, - "learning_rate": 7.066483660279544e-06, - "loss": 0.8776, + "learning_rate": 1.3449251259710017e-06, + "loss": 1.0122, "step": 21418 }, { - "epoch": 0.6069596758196605, + "epoch": 0.8380546208623523, "grad_norm": 0.0, - "learning_rate": 7.065606261865453e-06, - "loss": 0.8025, + "learning_rate": 1.3442904444888416e-06, + "loss": 0.9988, "step": 21419 }, { - "epoch": 0.606988013261923, + "epoch": 0.8380937475545818, "grad_norm": 0.0, - "learning_rate": 7.064728888168853e-06, - "loss": 0.8838, + "learning_rate": 1.3436559020065288e-06, + "loss": 0.9446, "step": 21420 }, { - "epoch": 0.6070163507041855, + "epoch": 0.8381328742468112, "grad_norm": 0.0, - "learning_rate": 7.063851539197128e-06, - "loss": 0.8818, + "learning_rate": 1.343021498534255e-06, + "loss": 0.9018, "step": 21421 }, { - "epoch": 0.6070446881464479, + "epoch": 0.8381720009390407, "grad_norm": 0.0, - "learning_rate": 7.062974214957674e-06, - "loss": 0.8647, + "learning_rate": 1.3423872340822074e-06, + "loss": 0.7943, "step": 21422 }, { - "epoch": 0.6070730255887103, + "epoch": 0.83821112763127, "grad_norm": 0.0, - "learning_rate": 7.062096915457881e-06, - "loss": 0.8743, + "learning_rate": 1.3417531086605751e-06, + "loss": 0.8997, "step": 21423 }, { - "epoch": 0.6071013630309728, + "epoch": 0.8382502543234995, "grad_norm": 0.0, - "learning_rate": 7.061219640705135e-06, - "loss": 0.8383, + "learning_rate": 1.3411191222795362e-06, + "loss": 0.9214, "step": 21424 }, { - "epoch": 0.6071297004732353, + "epoch": 0.8382893810157289, "grad_norm": 0.0, - "learning_rate": 7.060342390706829e-06, - "loss": 0.9049, + "learning_rate": 1.3404852749492737e-06, + "loss": 0.9912, "step": 21425 }, { - "epoch": 0.6071580379154977, + "epoch": 0.8383285077079584, "grad_norm": 0.0, - "learning_rate": 7.059465165470347e-06, - "loss": 0.9825, + "learning_rate": 1.3398515666799673e-06, + "loss": 0.9656, "step": 21426 }, { - "epoch": 0.6071863753577602, + "epoch": 0.8383676344001878, "grad_norm": 0.0, - "learning_rate": 7.058587965003083e-06, - "loss": 0.8144, + "learning_rate": 1.3392179974817953e-06, + "loss": 0.9534, "step": 21427 }, { - "epoch": 0.6072147128000227, + "epoch": 0.8384067610924173, "grad_norm": 0.0, - "learning_rate": 7.057710789312427e-06, - "loss": 0.8834, + "learning_rate": 1.3385845673649268e-06, + "loss": 1.0002, "step": 21428 }, { - "epoch": 0.6072430502422851, + "epoch": 0.8384458877846467, "grad_norm": 0.0, - "learning_rate": 7.056833638405762e-06, - "loss": 0.8844, + "learning_rate": 1.3379512763395397e-06, + "loss": 0.9452, "step": 21429 }, { - "epoch": 0.6072713876845476, + "epoch": 0.8384850144768762, "grad_norm": 0.0, - "learning_rate": 7.05595651229048e-06, - "loss": 0.8864, + "learning_rate": 1.3373181244157972e-06, + "loss": 0.9312, "step": 21430 }, { - "epoch": 0.6072997251268101, + "epoch": 0.8385241411691056, "grad_norm": 0.0, - "learning_rate": 7.055079410973975e-06, - "loss": 0.925, + "learning_rate": 1.3366851116038726e-06, + "loss": 0.9343, "step": 21431 }, { - "epoch": 0.6073280625690726, + "epoch": 0.838563267861335, "grad_norm": 0.0, - "learning_rate": 7.0542023344636255e-06, - "loss": 0.8743, + "learning_rate": 1.3360522379139285e-06, + "loss": 0.9856, "step": 21432 }, { - "epoch": 0.607356400011335, + "epoch": 0.8386023945535644, "grad_norm": 0.0, - "learning_rate": 7.053325282766826e-06, - "loss": 0.8851, + "learning_rate": 1.3354195033561313e-06, + "loss": 0.9594, "step": 21433 }, { - "epoch": 0.6073847374535974, + "epoch": 0.8386415212457938, "grad_norm": 0.0, - "learning_rate": 7.052448255890958e-06, - "loss": 0.8377, + "learning_rate": 1.3347869079406372e-06, + "loss": 0.9205, "step": 21434 }, { - "epoch": 0.6074130748958599, + "epoch": 0.8386806479380233, "grad_norm": 0.0, - "learning_rate": 7.051571253843415e-06, - "loss": 0.8511, + "learning_rate": 1.3341544516776072e-06, + "loss": 0.9399, "step": 21435 }, { - "epoch": 0.6074414123381223, + "epoch": 0.8387197746302527, "grad_norm": 0.0, - "learning_rate": 7.050694276631584e-06, - "loss": 0.8428, + "learning_rate": 1.3335221345771999e-06, + "loss": 1.0016, "step": 21436 }, { - "epoch": 0.6074697497803848, + "epoch": 0.8387589013224822, "grad_norm": 0.0, - "learning_rate": 7.049817324262848e-06, - "loss": 0.8754, + "learning_rate": 1.3328899566495656e-06, + "loss": 0.8138, "step": 21437 }, { - "epoch": 0.6074980872226473, + "epoch": 0.8387980280147116, "grad_norm": 0.0, - "learning_rate": 7.048940396744596e-06, - "loss": 0.9609, + "learning_rate": 1.3322579179048578e-06, + "loss": 0.9218, "step": 21438 }, { - "epoch": 0.6075264246649097, + "epoch": 0.8388371547069411, "grad_norm": 0.0, - "learning_rate": 7.048063494084218e-06, - "loss": 0.8513, + "learning_rate": 1.331626018353226e-06, + "loss": 0.9634, "step": 21439 }, { - "epoch": 0.6075547621071722, + "epoch": 0.8388762813991705, "grad_norm": 0.0, - "learning_rate": 7.047186616289095e-06, - "loss": 0.886, + "learning_rate": 1.330994258004822e-06, + "loss": 1.0264, "step": 21440 }, { - "epoch": 0.6075830995494347, + "epoch": 0.8389154080914, "grad_norm": 0.0, - "learning_rate": 7.046309763366617e-06, - "loss": 0.9509, + "learning_rate": 1.330362636869783e-06, + "loss": 0.9308, "step": 21441 }, { - "epoch": 0.6076114369916972, + "epoch": 0.8389545347836294, "grad_norm": 0.0, - "learning_rate": 7.0454329353241655e-06, - "loss": 0.8664, + "learning_rate": 1.3297311549582603e-06, + "loss": 0.8918, "step": 21442 }, { - "epoch": 0.6076397744339596, + "epoch": 0.8389936614758589, "grad_norm": 0.0, - "learning_rate": 7.0445561321691304e-06, - "loss": 0.8564, + "learning_rate": 1.3290998122803856e-06, + "loss": 1.0336, "step": 21443 }, { - "epoch": 0.607668111876222, + "epoch": 0.8390327881680882, "grad_norm": 0.0, - "learning_rate": 7.043679353908901e-06, - "loss": 0.9538, + "learning_rate": 1.3284686088463072e-06, + "loss": 0.965, "step": 21444 }, { - "epoch": 0.6076964493184845, + "epoch": 0.8390719148603177, "grad_norm": 0.0, - "learning_rate": 7.042802600550853e-06, - "loss": 0.8476, + "learning_rate": 1.327837544666155e-06, + "loss": 0.9793, "step": 21445 }, { - "epoch": 0.6077247867607469, + "epoch": 0.8391110415525471, "grad_norm": 0.0, - "learning_rate": 7.04192587210238e-06, - "loss": 0.9415, + "learning_rate": 1.3272066197500677e-06, + "loss": 0.8042, "step": 21446 }, { - "epoch": 0.6077531242030094, + "epoch": 0.8391501682447766, "grad_norm": 0.0, - "learning_rate": 7.041049168570862e-06, - "loss": 0.8673, + "learning_rate": 1.3265758341081692e-06, + "loss": 0.885, "step": 21447 }, { - "epoch": 0.6077814616452719, + "epoch": 0.839189294937006, "grad_norm": 0.0, - "learning_rate": 7.040172489963683e-06, - "loss": 0.8533, + "learning_rate": 1.325945187750599e-06, + "loss": 0.9428, "step": 21448 }, { - "epoch": 0.6078097990875344, + "epoch": 0.8392284216292355, "grad_norm": 0.0, - "learning_rate": 7.039295836288238e-06, - "loss": 0.9504, + "learning_rate": 1.3253146806874773e-06, + "loss": 0.989, "step": 21449 }, { - "epoch": 0.6078381365297968, + "epoch": 0.8392675483214649, "grad_norm": 0.0, - "learning_rate": 7.038419207551896e-06, - "loss": 0.8603, + "learning_rate": 1.3246843129289343e-06, + "loss": 1.0248, "step": 21450 }, { - "epoch": 0.6078664739720593, + "epoch": 0.8393066750136944, "grad_norm": 0.0, - "learning_rate": 7.037542603762051e-06, - "loss": 0.8724, + "learning_rate": 1.324054084485089e-06, + "loss": 0.9412, "step": 21451 }, { - "epoch": 0.6078948114143218, + "epoch": 0.8393458017059238, "grad_norm": 0.0, - "learning_rate": 7.0366660249260885e-06, - "loss": 0.8746, + "learning_rate": 1.3234239953660633e-06, + "loss": 1.0883, "step": 21452 }, { - "epoch": 0.6079231488565842, + "epoch": 0.8393849283981533, "grad_norm": 0.0, - "learning_rate": 7.0357894710513845e-06, - "loss": 0.7957, + "learning_rate": 1.3227940455819755e-06, + "loss": 0.9247, "step": 21453 }, { - "epoch": 0.6079514862988467, + "epoch": 0.8394240550903826, "grad_norm": 0.0, - "learning_rate": 7.034912942145329e-06, - "loss": 0.675, + "learning_rate": 1.3221642351429442e-06, + "loss": 0.9602, "step": 21454 }, { - "epoch": 0.6079798237411091, + "epoch": 0.8394631817826121, "grad_norm": 0.0, - "learning_rate": 7.034036438215299e-06, - "loss": 0.8675, + "learning_rate": 1.3215345640590793e-06, + "loss": 0.9654, "step": 21455 }, { - "epoch": 0.6080081611833716, + "epoch": 0.8395023084748415, "grad_norm": 0.0, - "learning_rate": 7.033159959268683e-06, - "loss": 0.8449, + "learning_rate": 1.320905032340495e-06, + "loss": 1.0444, "step": 21456 }, { - "epoch": 0.608036498625634, + "epoch": 0.839541435167071, "grad_norm": 0.0, - "learning_rate": 7.032283505312865e-06, - "loss": 0.8377, + "learning_rate": 1.3202756399972993e-06, + "loss": 0.914, "step": 21457 }, { - "epoch": 0.6080648360678965, + "epoch": 0.8395805618593004, "grad_norm": 0.0, - "learning_rate": 7.0314070763552236e-06, - "loss": 0.8846, + "learning_rate": 1.3196463870396037e-06, + "loss": 1.0861, "step": 21458 }, { - "epoch": 0.608093173510159, + "epoch": 0.8396196885515299, "grad_norm": 0.0, - "learning_rate": 7.0305306724031396e-06, - "loss": 0.9116, + "learning_rate": 1.3190172734775075e-06, + "loss": 1.1248, "step": 21459 }, { - "epoch": 0.6081215109524214, + "epoch": 0.8396588152437593, "grad_norm": 0.0, - "learning_rate": 7.029654293464004e-06, - "loss": 0.7807, + "learning_rate": 1.3183882993211184e-06, + "loss": 0.9155, "step": 21460 }, { - "epoch": 0.6081498483946839, + "epoch": 0.8396979419359887, "grad_norm": 0.0, - "learning_rate": 7.028777939545189e-06, - "loss": 0.8307, + "learning_rate": 1.3177594645805304e-06, + "loss": 0.9843, "step": 21461 }, { - "epoch": 0.6081781858369464, + "epoch": 0.8397370686282182, "grad_norm": 0.0, - "learning_rate": 7.0279016106540846e-06, - "loss": 0.8018, + "learning_rate": 1.3171307692658497e-06, + "loss": 1.0757, "step": 21462 }, { - "epoch": 0.6082065232792088, + "epoch": 0.8397761953204476, "grad_norm": 0.0, - "learning_rate": 7.027025306798065e-06, - "loss": 0.9066, + "learning_rate": 1.3165022133871664e-06, + "loss": 0.9062, "step": 21463 }, { - "epoch": 0.6082348607214713, + "epoch": 0.8398153220126771, "grad_norm": 0.0, - "learning_rate": 7.0261490279845145e-06, - "loss": 0.8571, + "learning_rate": 1.3158737969545788e-06, + "loss": 0.9041, "step": 21464 }, { - "epoch": 0.6082631981637338, + "epoch": 0.8398544487049064, "grad_norm": 0.0, - "learning_rate": 7.025272774220821e-06, - "loss": 0.8177, + "learning_rate": 1.3152455199781723e-06, + "loss": 0.9596, "step": 21465 }, { - "epoch": 0.6082915356059962, + "epoch": 0.8398935753971359, "grad_norm": 0.0, - "learning_rate": 7.024396545514354e-06, - "loss": 0.8282, + "learning_rate": 1.3146173824680442e-06, + "loss": 0.9767, "step": 21466 }, { - "epoch": 0.6083198730482586, + "epoch": 0.8399327020893653, "grad_norm": 0.0, - "learning_rate": 7.0235203418725004e-06, - "loss": 0.8513, + "learning_rate": 1.3139893844342756e-06, + "loss": 0.8831, "step": 21467 }, { - "epoch": 0.6083482104905211, + "epoch": 0.8399718287815948, "grad_norm": 0.0, - "learning_rate": 7.022644163302641e-06, - "loss": 0.8175, + "learning_rate": 1.3133615258869548e-06, + "loss": 0.8931, "step": 21468 }, { - "epoch": 0.6083765479327836, + "epoch": 0.8400109554738242, "grad_norm": 0.0, - "learning_rate": 7.021768009812155e-06, - "loss": 0.7902, + "learning_rate": 1.3127338068361617e-06, + "loss": 0.8997, "step": 21469 }, { - "epoch": 0.608404885375046, + "epoch": 0.8400500821660537, "grad_norm": 0.0, - "learning_rate": 7.020891881408427e-06, - "loss": 0.8102, + "learning_rate": 1.312106227291977e-06, + "loss": 0.8994, "step": 21470 }, { - "epoch": 0.6084332228173085, + "epoch": 0.8400892088582831, "grad_norm": 0.0, - "learning_rate": 7.0200157780988275e-06, - "loss": 0.844, + "learning_rate": 1.3114787872644808e-06, + "loss": 0.9154, "step": 21471 }, { - "epoch": 0.608461560259571, + "epoch": 0.8401283355505126, "grad_norm": 0.0, - "learning_rate": 7.019139699890743e-06, - "loss": 0.8751, + "learning_rate": 1.3108514867637489e-06, + "loss": 0.8081, "step": 21472 }, { - "epoch": 0.6084898977018335, + "epoch": 0.840167462242742, "grad_norm": 0.0, - "learning_rate": 7.018263646791555e-06, - "loss": 0.797, + "learning_rate": 1.3102243257998526e-06, + "loss": 0.8638, "step": 21473 }, { - "epoch": 0.6085182351440959, + "epoch": 0.8402065889349715, "grad_norm": 0.0, - "learning_rate": 7.017387618808634e-06, - "loss": 0.8528, + "learning_rate": 1.3095973043828647e-06, + "loss": 1.0236, "step": 21474 }, { - "epoch": 0.6085465725863584, + "epoch": 0.8402457156272009, "grad_norm": 0.0, - "learning_rate": 7.016511615949371e-06, - "loss": 0.9619, + "learning_rate": 1.308970422522856e-06, + "loss": 0.9713, "step": 21475 }, { - "epoch": 0.6085749100286209, + "epoch": 0.8402848423194303, "grad_norm": 0.0, - "learning_rate": 7.015635638221134e-06, - "loss": 0.946, + "learning_rate": 1.30834368022989e-06, + "loss": 0.9161, "step": 21476 }, { - "epoch": 0.6086032474708832, + "epoch": 0.8403239690116597, "grad_norm": 0.0, - "learning_rate": 7.0147596856313076e-06, - "loss": 0.8119, + "learning_rate": 1.3077170775140336e-06, + "loss": 0.9257, "step": 21477 }, { - "epoch": 0.6086315849131457, + "epoch": 0.8403630957038892, "grad_norm": 0.0, - "learning_rate": 7.013883758187271e-06, - "loss": 0.8677, + "learning_rate": 1.3070906143853513e-06, + "loss": 0.9605, "step": 21478 }, { - "epoch": 0.6086599223554082, + "epoch": 0.8404022223961186, "grad_norm": 0.0, - "learning_rate": 7.013007855896396e-06, - "loss": 0.8141, + "learning_rate": 1.3064642908538983e-06, + "loss": 1.0287, "step": 21479 }, { - "epoch": 0.6086882597976707, + "epoch": 0.8404413490883481, "grad_norm": 0.0, - "learning_rate": 7.012131978766067e-06, - "loss": 0.9112, + "learning_rate": 1.3058381069297343e-06, + "loss": 0.9376, "step": 21480 }, { - "epoch": 0.6087165972399331, + "epoch": 0.8404804757805775, "grad_norm": 0.0, - "learning_rate": 7.01125612680366e-06, - "loss": 0.9142, + "learning_rate": 1.3052120626229192e-06, + "loss": 1.0355, "step": 21481 }, { - "epoch": 0.6087449346821956, + "epoch": 0.840519602472807, "grad_norm": 0.0, - "learning_rate": 7.010380300016553e-06, - "loss": 0.8057, + "learning_rate": 1.3045861579435003e-06, + "loss": 0.9812, "step": 21482 }, { - "epoch": 0.6087732721244581, + "epoch": 0.8405587291650364, "grad_norm": 0.0, - "learning_rate": 7.009504498412125e-06, - "loss": 0.8746, + "learning_rate": 1.3039603929015321e-06, + "loss": 0.8158, "step": 21483 }, { - "epoch": 0.6088016095667205, + "epoch": 0.8405978558572659, "grad_norm": 0.0, - "learning_rate": 7.008628721997747e-06, - "loss": 0.879, + "learning_rate": 1.303334767507064e-06, + "loss": 0.9676, "step": 21484 }, { - "epoch": 0.608829947008983, + "epoch": 0.8406369825494953, "grad_norm": 0.0, - "learning_rate": 7.0077529707808e-06, - "loss": 0.8701, + "learning_rate": 1.3027092817701437e-06, + "loss": 0.834, "step": 21485 }, { - "epoch": 0.6088582844512455, + "epoch": 0.8406761092417248, "grad_norm": 0.0, - "learning_rate": 7.006877244768664e-06, - "loss": 0.8544, + "learning_rate": 1.3020839357008108e-06, + "loss": 0.9901, "step": 21486 }, { - "epoch": 0.6088866218935078, + "epoch": 0.8407152359339541, "grad_norm": 0.0, - "learning_rate": 7.00600154396871e-06, - "loss": 0.9196, + "learning_rate": 1.3014587293091141e-06, + "loss": 0.9773, "step": 21487 }, { - "epoch": 0.6089149593357703, + "epoch": 0.8407543626261836, "grad_norm": 0.0, - "learning_rate": 7.005125868388316e-06, - "loss": 0.834, + "learning_rate": 1.3008336626050854e-06, + "loss": 1.0328, "step": 21488 }, { - "epoch": 0.6089432967780328, + "epoch": 0.840793489318413, "grad_norm": 0.0, - "learning_rate": 7.0042502180348635e-06, - "loss": 0.8634, + "learning_rate": 1.3002087355987726e-06, + "loss": 0.9051, "step": 21489 }, { - "epoch": 0.6089716342202953, + "epoch": 0.8408326160106424, "grad_norm": 0.0, - "learning_rate": 7.00337459291572e-06, - "loss": 0.9182, + "learning_rate": 1.2995839483002027e-06, + "loss": 0.8431, "step": 21490 }, { - "epoch": 0.6089999716625577, + "epoch": 0.8408717427028719, "grad_norm": 0.0, - "learning_rate": 7.002498993038267e-06, - "loss": 0.8267, + "learning_rate": 1.2989593007194157e-06, + "loss": 0.8645, "step": 21491 }, { - "epoch": 0.6090283091048202, + "epoch": 0.8409108693951013, "grad_norm": 0.0, - "learning_rate": 7.001623418409878e-06, - "loss": 0.7981, + "learning_rate": 1.2983347928664348e-06, + "loss": 0.9247, "step": 21492 }, { - "epoch": 0.6090566465470827, + "epoch": 0.8409499960873308, "grad_norm": 0.0, - "learning_rate": 7.000747869037927e-06, - "loss": 0.8086, + "learning_rate": 1.2977104247512973e-06, + "loss": 0.9959, "step": 21493 }, { - "epoch": 0.6090849839893451, + "epoch": 0.8409891227795602, "grad_norm": 0.0, - "learning_rate": 6.999872344929791e-06, - "loss": 0.8396, + "learning_rate": 1.297086196384023e-06, + "loss": 1.0452, "step": 21494 }, { - "epoch": 0.6091133214316076, + "epoch": 0.8410282494717897, "grad_norm": 0.0, - "learning_rate": 6.9989968460928425e-06, - "loss": 0.7802, + "learning_rate": 1.296462107774642e-06, + "loss": 0.9796, "step": 21495 }, { - "epoch": 0.6091416588738701, + "epoch": 0.841067376164019, "grad_norm": 0.0, - "learning_rate": 6.998121372534459e-06, - "loss": 0.8696, + "learning_rate": 1.2958381589331714e-06, + "loss": 0.9915, "step": 21496 }, { - "epoch": 0.6091699963161326, + "epoch": 0.8411065028562486, "grad_norm": 0.0, - "learning_rate": 6.997245924262018e-06, - "loss": 0.825, + "learning_rate": 1.2952143498696324e-06, + "loss": 0.9948, "step": 21497 }, { - "epoch": 0.6091983337583949, + "epoch": 0.8411456295484779, "grad_norm": 0.0, - "learning_rate": 6.996370501282885e-06, - "loss": 0.9277, + "learning_rate": 1.2945906805940434e-06, + "loss": 0.9922, "step": 21498 }, { - "epoch": 0.6092266712006574, + "epoch": 0.8411847562407074, "grad_norm": 0.0, - "learning_rate": 6.995495103604442e-06, - "loss": 0.9522, + "learning_rate": 1.2939671511164221e-06, + "loss": 0.869, "step": 21499 }, { - "epoch": 0.6092550086429199, + "epoch": 0.8412238829329368, "grad_norm": 0.0, - "learning_rate": 6.994619731234056e-06, - "loss": 0.7384, + "learning_rate": 1.293343761446777e-06, + "loss": 0.9442, "step": 21500 }, { - "epoch": 0.6092833460851823, + "epoch": 0.8412630096251663, "grad_norm": 0.0, - "learning_rate": 6.993744384179103e-06, - "loss": 0.9238, + "learning_rate": 1.2927205115951202e-06, + "loss": 1.0633, "step": 21501 }, { - "epoch": 0.6093116835274448, + "epoch": 0.8413021363173957, "grad_norm": 0.0, - "learning_rate": 6.992869062446963e-06, - "loss": 0.836, + "learning_rate": 1.2920974015714617e-06, + "loss": 1.0247, "step": 21502 }, { - "epoch": 0.6093400209697073, + "epoch": 0.8413412630096252, "grad_norm": 0.0, - "learning_rate": 6.991993766045e-06, - "loss": 0.8551, + "learning_rate": 1.2914744313858097e-06, + "loss": 1.0535, "step": 21503 }, { - "epoch": 0.6093683584119698, + "epoch": 0.8413803897018546, "grad_norm": 0.0, - "learning_rate": 6.991118494980591e-06, - "loss": 0.9189, + "learning_rate": 1.290851601048163e-06, + "loss": 0.8448, "step": 21504 }, { - "epoch": 0.6093966958542322, + "epoch": 0.8414195163940841, "grad_norm": 0.0, - "learning_rate": 6.9902432492611065e-06, - "loss": 0.7758, + "learning_rate": 1.2902289105685272e-06, + "loss": 1.0196, "step": 21505 }, { - "epoch": 0.6094250332964947, + "epoch": 0.8414586430863135, "grad_norm": 0.0, - "learning_rate": 6.989368028893921e-06, - "loss": 0.7625, + "learning_rate": 1.289606359956901e-06, + "loss": 1.0338, "step": 21506 }, { - "epoch": 0.6094533707387572, + "epoch": 0.841497769778543, "grad_norm": 0.0, - "learning_rate": 6.988492833886411e-06, - "loss": 0.9173, + "learning_rate": 1.2889839492232836e-06, + "loss": 0.9525, "step": 21507 }, { - "epoch": 0.6094817081810195, + "epoch": 0.8415368964707723, "grad_norm": 0.0, - "learning_rate": 6.987617664245941e-06, - "loss": 0.8006, + "learning_rate": 1.2883616783776676e-06, + "loss": 0.9671, "step": 21508 }, { - "epoch": 0.609510045623282, + "epoch": 0.8415760231630018, "grad_norm": 0.0, - "learning_rate": 6.9867425199798834e-06, - "loss": 0.916, + "learning_rate": 1.2877395474300481e-06, + "loss": 0.9336, "step": 21509 }, { - "epoch": 0.6095383830655445, + "epoch": 0.8416151498552312, "grad_norm": 0.0, - "learning_rate": 6.985867401095618e-06, - "loss": 0.8531, + "learning_rate": 1.2871175563904092e-06, + "loss": 0.9341, "step": 21510 }, { - "epoch": 0.6095667205078069, + "epoch": 0.8416542765474607, "grad_norm": 0.0, - "learning_rate": 6.984992307600508e-06, - "loss": 0.8398, + "learning_rate": 1.2864957052687499e-06, + "loss": 1.0349, "step": 21511 }, { - "epoch": 0.6095950579500694, + "epoch": 0.8416934032396901, "grad_norm": 0.0, - "learning_rate": 6.984117239501928e-06, - "loss": 0.851, + "learning_rate": 1.2858739940750485e-06, + "loss": 1.0294, "step": 21512 }, { - "epoch": 0.6096233953923319, + "epoch": 0.8417325299319196, "grad_norm": 0.0, - "learning_rate": 6.983242196807246e-06, - "loss": 0.9086, + "learning_rate": 1.285252422819293e-06, + "loss": 0.9298, "step": 21513 }, { - "epoch": 0.6096517328345944, + "epoch": 0.841771656624149, "grad_norm": 0.0, - "learning_rate": 6.982367179523836e-06, - "loss": 0.8586, + "learning_rate": 1.2846309915114618e-06, + "loss": 0.9327, "step": 21514 }, { - "epoch": 0.6096800702768568, + "epoch": 0.8418107833163785, "grad_norm": 0.0, - "learning_rate": 6.981492187659071e-06, - "loss": 0.7759, + "learning_rate": 1.284009700161536e-06, + "loss": 0.8663, "step": 21515 }, { - "epoch": 0.6097084077191193, + "epoch": 0.8418499100086079, "grad_norm": 0.0, - "learning_rate": 6.980617221220316e-06, - "loss": 0.9672, + "learning_rate": 1.283388548779493e-06, + "loss": 0.9008, "step": 21516 }, { - "epoch": 0.6097367451613818, + "epoch": 0.8418890367008373, "grad_norm": 0.0, - "learning_rate": 6.979742280214942e-06, - "loss": 0.7968, + "learning_rate": 1.282767537375309e-06, + "loss": 0.8813, "step": 21517 }, { - "epoch": 0.6097650826036441, + "epoch": 0.8419281633930668, "grad_norm": 0.0, - "learning_rate": 6.978867364650322e-06, - "loss": 0.8246, + "learning_rate": 1.282146665958952e-06, + "loss": 0.9274, "step": 21518 }, { - "epoch": 0.6097934200459066, + "epoch": 0.8419672900852961, "grad_norm": 0.0, - "learning_rate": 6.977992474533823e-06, - "loss": 0.91, + "learning_rate": 1.2815259345403975e-06, + "loss": 0.9301, "step": 21519 }, { - "epoch": 0.6098217574881691, + "epoch": 0.8420064167775256, "grad_norm": 0.0, - "learning_rate": 6.977117609872819e-06, - "loss": 0.8582, + "learning_rate": 1.2809053431296127e-06, + "loss": 0.8674, "step": 21520 }, { - "epoch": 0.6098500949304316, + "epoch": 0.842045543469755, "grad_norm": 0.0, - "learning_rate": 6.976242770674673e-06, - "loss": 0.8267, + "learning_rate": 1.2802848917365595e-06, + "loss": 0.952, "step": 21521 }, { - "epoch": 0.609878432372694, + "epoch": 0.8420846701619845, "grad_norm": 0.0, - "learning_rate": 6.9753679569467545e-06, - "loss": 0.8733, + "learning_rate": 1.2796645803712048e-06, + "loss": 1.0419, "step": 21522 }, { - "epoch": 0.6099067698149565, + "epoch": 0.8421237968542139, "grad_norm": 0.0, - "learning_rate": 6.974493168696441e-06, - "loss": 0.9104, + "learning_rate": 1.2790444090435106e-06, + "loss": 0.9639, "step": 21523 }, { - "epoch": 0.609935107257219, + "epoch": 0.8421629235464434, "grad_norm": 0.0, - "learning_rate": 6.973618405931091e-06, - "loss": 0.8302, + "learning_rate": 1.2784243777634375e-06, + "loss": 0.9478, "step": 21524 }, { - "epoch": 0.6099634446994814, + "epoch": 0.8422020502386728, "grad_norm": 0.0, - "learning_rate": 6.972743668658075e-06, - "loss": 0.9308, + "learning_rate": 1.2778044865409377e-06, + "loss": 0.9247, "step": 21525 }, { - "epoch": 0.6099917821417439, + "epoch": 0.8422411769309023, "grad_norm": 0.0, - "learning_rate": 6.971868956884767e-06, - "loss": 0.7569, + "learning_rate": 1.277184735385968e-06, + "loss": 1.0541, "step": 21526 }, { - "epoch": 0.6100201195840064, + "epoch": 0.8422803036231317, "grad_norm": 0.0, - "learning_rate": 6.970994270618529e-06, - "loss": 0.7948, + "learning_rate": 1.2765651243084831e-06, + "loss": 0.9921, "step": 21527 }, { - "epoch": 0.6100484570262689, + "epoch": 0.8423194303153612, "grad_norm": 0.0, - "learning_rate": 6.970119609866736e-06, - "loss": 0.7424, + "learning_rate": 1.27594565331843e-06, + "loss": 0.9818, "step": 21528 }, { - "epoch": 0.6100767944685312, + "epoch": 0.8423585570075905, "grad_norm": 0.0, - "learning_rate": 6.969244974636745e-06, - "loss": 0.8934, + "learning_rate": 1.2753263224257572e-06, + "loss": 0.9242, "step": 21529 }, { - "epoch": 0.6101051319107937, + "epoch": 0.84239768369982, "grad_norm": 0.0, - "learning_rate": 6.9683703649359305e-06, - "loss": 0.8737, + "learning_rate": 1.2747071316404126e-06, + "loss": 0.8429, "step": 21530 }, { - "epoch": 0.6101334693530562, + "epoch": 0.8424368103920494, "grad_norm": 0.0, - "learning_rate": 6.967495780771658e-06, - "loss": 0.8012, + "learning_rate": 1.2740880809723366e-06, + "loss": 0.8281, "step": 21531 }, { - "epoch": 0.6101618067953186, + "epoch": 0.8424759370842789, "grad_norm": 0.0, - "learning_rate": 6.966621222151294e-06, - "loss": 0.7927, + "learning_rate": 1.2734691704314717e-06, + "loss": 0.9346, "step": 21532 }, { - "epoch": 0.6101901442375811, + "epoch": 0.8425150637765083, "grad_norm": 0.0, - "learning_rate": 6.96574668908221e-06, - "loss": 0.8698, + "learning_rate": 1.2728504000277575e-06, + "loss": 0.949, "step": 21533 }, { - "epoch": 0.6102184816798436, + "epoch": 0.8425541904687378, "grad_norm": 0.0, - "learning_rate": 6.964872181571765e-06, - "loss": 0.8212, + "learning_rate": 1.2722317697711318e-06, + "loss": 0.9434, "step": 21534 }, { - "epoch": 0.610246819122106, + "epoch": 0.8425933171609672, "grad_norm": 0.0, - "learning_rate": 6.963997699627327e-06, - "loss": 0.8726, + "learning_rate": 1.2716132796715252e-06, + "loss": 1.0457, "step": 21535 }, { - "epoch": 0.6102751565643685, + "epoch": 0.8426324438531967, "grad_norm": 0.0, - "learning_rate": 6.963123243256269e-06, - "loss": 0.8454, + "learning_rate": 1.2709949297388746e-06, + "loss": 0.8623, "step": 21536 }, { - "epoch": 0.610303494006631, + "epoch": 0.8426715705454261, "grad_norm": 0.0, - "learning_rate": 6.962248812465947e-06, - "loss": 0.7734, + "learning_rate": 1.270376719983103e-06, + "loss": 0.9507, "step": 21537 }, { - "epoch": 0.6103318314488935, + "epoch": 0.8427106972376556, "grad_norm": 0.0, - "learning_rate": 6.961374407263732e-06, - "loss": 0.8141, + "learning_rate": 1.2697586504141458e-06, + "loss": 0.8999, "step": 21538 }, { - "epoch": 0.6103601688911559, + "epoch": 0.842749823929885, "grad_norm": 0.0, - "learning_rate": 6.96050002765699e-06, - "loss": 0.8401, + "learning_rate": 1.2691407210419238e-06, + "loss": 1.021, "step": 21539 }, { - "epoch": 0.6103885063334183, + "epoch": 0.8427889506221145, "grad_norm": 0.0, - "learning_rate": 6.959625673653083e-06, - "loss": 0.9084, + "learning_rate": 1.2685229318763637e-06, + "loss": 1.0633, "step": 21540 }, { - "epoch": 0.6104168437756808, + "epoch": 0.8428280773143438, "grad_norm": 0.0, - "learning_rate": 6.958751345259383e-06, - "loss": 0.8279, + "learning_rate": 1.2679052829273796e-06, + "loss": 1.0002, "step": 21541 }, { - "epoch": 0.6104451812179432, + "epoch": 0.8428672040065733, "grad_norm": 0.0, - "learning_rate": 6.9578770424832444e-06, - "loss": 0.8773, + "learning_rate": 1.2672877742048984e-06, + "loss": 0.8498, "step": 21542 }, { - "epoch": 0.6104735186602057, + "epoch": 0.8429063306988027, "grad_norm": 0.0, - "learning_rate": 6.957002765332037e-06, - "loss": 0.8186, + "learning_rate": 1.2666704057188317e-06, + "loss": 1.0337, "step": 21543 }, { - "epoch": 0.6105018561024682, + "epoch": 0.8429454573910322, "grad_norm": 0.0, - "learning_rate": 6.9561285138131285e-06, - "loss": 0.9145, + "learning_rate": 1.2660531774790964e-06, + "loss": 0.8978, "step": 21544 }, { - "epoch": 0.6105301935447307, + "epoch": 0.8429845840832616, "grad_norm": 0.0, - "learning_rate": 6.955254287933877e-06, - "loss": 0.9124, + "learning_rate": 1.2654360894956008e-06, + "loss": 0.9748, "step": 21545 }, { - "epoch": 0.6105585309869931, + "epoch": 0.843023710775491, "grad_norm": 0.0, - "learning_rate": 6.95438008770165e-06, - "loss": 0.8414, + "learning_rate": 1.2648191417782562e-06, + "loss": 1.0693, "step": 21546 }, { - "epoch": 0.6105868684292556, + "epoch": 0.8430628374677205, "grad_norm": 0.0, - "learning_rate": 6.953505913123814e-06, - "loss": 0.8676, + "learning_rate": 1.2642023343369714e-06, + "loss": 0.9349, "step": 21547 }, { - "epoch": 0.6106152058715181, + "epoch": 0.8431019641599499, "grad_norm": 0.0, - "learning_rate": 6.952631764207724e-06, - "loss": 0.9002, + "learning_rate": 1.2635856671816516e-06, + "loss": 1.0772, "step": 21548 }, { - "epoch": 0.6106435433137805, + "epoch": 0.8431410908521794, "grad_norm": 0.0, - "learning_rate": 6.9517576409607545e-06, - "loss": 0.8399, + "learning_rate": 1.2629691403221977e-06, + "loss": 1.0145, "step": 21549 }, { - "epoch": 0.610671880756043, + "epoch": 0.8431802175444087, "grad_norm": 0.0, - "learning_rate": 6.950883543390257e-06, - "loss": 0.802, + "learning_rate": 1.2623527537685098e-06, + "loss": 1.0427, "step": 21550 }, { - "epoch": 0.6107002181983054, + "epoch": 0.8432193442366382, "grad_norm": 0.0, - "learning_rate": 6.950009471503601e-06, - "loss": 0.8055, + "learning_rate": 1.2617365075304888e-06, + "loss": 0.9173, "step": 21551 }, { - "epoch": 0.6107285556405678, + "epoch": 0.8432584709288676, "grad_norm": 0.0, - "learning_rate": 6.949135425308147e-06, - "loss": 0.876, + "learning_rate": 1.2611204016180324e-06, + "loss": 0.8784, "step": 21552 }, { - "epoch": 0.6107568930828303, + "epoch": 0.8432975976210971, "grad_norm": 0.0, - "learning_rate": 6.948261404811259e-06, - "loss": 0.9002, + "learning_rate": 1.260504436041029e-06, + "loss": 1.0945, "step": 21553 }, { - "epoch": 0.6107852305250928, + "epoch": 0.8433367243133265, "grad_norm": 0.0, - "learning_rate": 6.947387410020296e-06, - "loss": 0.8119, + "learning_rate": 1.2598886108093755e-06, + "loss": 0.9542, "step": 21554 }, { - "epoch": 0.6108135679673553, + "epoch": 0.843375851005556, "grad_norm": 0.0, - "learning_rate": 6.946513440942628e-06, - "loss": 0.8452, + "learning_rate": 1.259272925932954e-06, + "loss": 1.0022, "step": 21555 }, { - "epoch": 0.6108419054096177, + "epoch": 0.8434149776977854, "grad_norm": 0.0, - "learning_rate": 6.945639497585608e-06, - "loss": 0.8885, + "learning_rate": 1.2586573814216619e-06, + "loss": 0.8386, "step": 21556 }, { - "epoch": 0.6108702428518802, + "epoch": 0.8434541043900149, "grad_norm": 0.0, - "learning_rate": 6.944765579956601e-06, - "loss": 0.9052, + "learning_rate": 1.2580419772853758e-06, + "loss": 0.8245, "step": 21557 }, { - "epoch": 0.6108985802941427, + "epoch": 0.8434932310822443, "grad_norm": 0.0, - "learning_rate": 6.943891688062967e-06, - "loss": 0.858, + "learning_rate": 1.2574267135339836e-06, + "loss": 0.9692, "step": 21558 }, { - "epoch": 0.6109269177364051, + "epoch": 0.8435323577744738, "grad_norm": 0.0, - "learning_rate": 6.943017821912068e-06, - "loss": 0.8437, + "learning_rate": 1.2568115901773593e-06, + "loss": 0.9474, "step": 21559 }, { - "epoch": 0.6109552551786676, + "epoch": 0.8435714844667032, "grad_norm": 0.0, - "learning_rate": 6.942143981511269e-06, - "loss": 0.8914, + "learning_rate": 1.2561966072253896e-06, + "loss": 0.9585, "step": 21560 }, { - "epoch": 0.61098359262093, + "epoch": 0.8436106111589327, "grad_norm": 0.0, - "learning_rate": 6.941270166867922e-06, - "loss": 0.8953, + "learning_rate": 1.255581764687943e-06, + "loss": 0.8791, "step": 21561 }, { - "epoch": 0.6110119300631925, + "epoch": 0.843649737851162, "grad_norm": 0.0, - "learning_rate": 6.9403963779893975e-06, - "loss": 0.9315, + "learning_rate": 1.2549670625748988e-06, + "loss": 0.9742, "step": 21562 }, { - "epoch": 0.6110402675054549, + "epoch": 0.8436888645433915, "grad_norm": 0.0, - "learning_rate": 6.939522614883045e-06, - "loss": 0.8272, + "learning_rate": 1.2543525008961222e-06, + "loss": 0.8661, "step": 21563 }, { - "epoch": 0.6110686049477174, + "epoch": 0.8437279912356209, "grad_norm": 0.0, - "learning_rate": 6.938648877556231e-06, - "loss": 0.8292, + "learning_rate": 1.2537380796614863e-06, + "loss": 0.9659, "step": 21564 }, { - "epoch": 0.6110969423899799, + "epoch": 0.8437671179278504, "grad_norm": 0.0, - "learning_rate": 6.937775166016316e-06, - "loss": 0.8121, + "learning_rate": 1.2531237988808565e-06, + "loss": 0.9773, "step": 21565 }, { - "epoch": 0.6111252798322423, + "epoch": 0.8438062446200798, "grad_norm": 0.0, - "learning_rate": 6.9369014802706566e-06, - "loss": 0.778, + "learning_rate": 1.2525096585641006e-06, + "loss": 0.8665, "step": 21566 }, { - "epoch": 0.6111536172745048, + "epoch": 0.8438453713123093, "grad_norm": 0.0, - "learning_rate": 6.936027820326613e-06, - "loss": 0.879, + "learning_rate": 1.2518956587210761e-06, + "loss": 0.9651, "step": 21567 }, { - "epoch": 0.6111819547167673, + "epoch": 0.8438844980045387, "grad_norm": 0.0, - "learning_rate": 6.935154186191549e-06, - "loss": 0.8058, + "learning_rate": 1.2512817993616455e-06, + "loss": 0.9586, "step": 21568 }, { - "epoch": 0.6112102921590298, + "epoch": 0.8439236246967682, "grad_norm": 0.0, - "learning_rate": 6.934280577872814e-06, - "loss": 0.9384, + "learning_rate": 1.2506680804956695e-06, + "loss": 0.8979, "step": 21569 }, { - "epoch": 0.6112386296012922, + "epoch": 0.8439627513889976, "grad_norm": 0.0, - "learning_rate": 6.933406995377776e-06, - "loss": 0.8818, + "learning_rate": 1.250054502132997e-06, + "loss": 0.9964, "step": 21570 }, { - "epoch": 0.6112669670435547, + "epoch": 0.8440018780812271, "grad_norm": 0.0, - "learning_rate": 6.932533438713787e-06, - "loss": 0.8597, + "learning_rate": 1.2494410642834853e-06, + "loss": 0.7969, "step": 21571 }, { - "epoch": 0.6112953044858171, + "epoch": 0.8440410047734564, "grad_norm": 0.0, - "learning_rate": 6.931659907888208e-06, - "loss": 0.9718, + "learning_rate": 1.248827766956986e-06, + "loss": 1.0448, "step": 21572 }, { - "epoch": 0.6113236419280795, + "epoch": 0.844080131465686, "grad_norm": 0.0, - "learning_rate": 6.930786402908401e-06, - "loss": 0.827, + "learning_rate": 1.2482146101633474e-06, + "loss": 0.8784, "step": 21573 }, { - "epoch": 0.611351979370342, + "epoch": 0.8441192581579153, "grad_norm": 0.0, - "learning_rate": 6.929912923781716e-06, - "loss": 0.8101, + "learning_rate": 1.2476015939124142e-06, + "loss": 0.9784, "step": 21574 }, { - "epoch": 0.6113803168126045, + "epoch": 0.8441583848501447, "grad_norm": 0.0, - "learning_rate": 6.929039470515513e-06, - "loss": 0.8827, + "learning_rate": 1.246988718214035e-06, + "loss": 1.0677, "step": 21575 }, { - "epoch": 0.6114086542548669, + "epoch": 0.8441975115423742, "grad_norm": 0.0, - "learning_rate": 6.928166043117157e-06, - "loss": 0.916, + "learning_rate": 1.2463759830780453e-06, + "loss": 1.0263, "step": 21576 }, { - "epoch": 0.6114369916971294, + "epoch": 0.8442366382346036, "grad_norm": 0.0, - "learning_rate": 6.927292641593993e-06, - "loss": 0.9407, + "learning_rate": 1.2457633885142896e-06, + "loss": 0.7995, "step": 21577 }, { - "epoch": 0.6114653291393919, + "epoch": 0.8442757649268331, "grad_norm": 0.0, - "learning_rate": 6.926419265953388e-06, - "loss": 0.9169, + "learning_rate": 1.245150934532603e-06, + "loss": 0.9853, "step": 21578 }, { - "epoch": 0.6114936665816544, + "epoch": 0.8443148916190625, "grad_norm": 0.0, - "learning_rate": 6.925545916202692e-06, - "loss": 0.8964, + "learning_rate": 1.2445386211428256e-06, + "loss": 0.9158, "step": 21579 }, { - "epoch": 0.6115220040239168, + "epoch": 0.844354018311292, "grad_norm": 0.0, - "learning_rate": 6.924672592349264e-06, - "loss": 0.8304, + "learning_rate": 1.243926448354783e-06, + "loss": 0.8724, "step": 21580 }, { - "epoch": 0.6115503414661793, + "epoch": 0.8443931450035214, "grad_norm": 0.0, - "learning_rate": 6.923799294400466e-06, - "loss": 0.7581, + "learning_rate": 1.2433144161783106e-06, + "loss": 1.0495, "step": 21581 }, { - "epoch": 0.6115786789084418, + "epoch": 0.8444322716957509, "grad_norm": 0.0, - "learning_rate": 6.922926022363644e-06, - "loss": 0.8259, + "learning_rate": 1.242702524623236e-06, + "loss": 0.9652, "step": 21582 }, { - "epoch": 0.6116070163507041, + "epoch": 0.8444713983879802, "grad_norm": 0.0, - "learning_rate": 6.92205277624616e-06, - "loss": 0.8955, + "learning_rate": 1.242090773699387e-06, + "loss": 0.9393, "step": 21583 }, { - "epoch": 0.6116353537929666, + "epoch": 0.8445105250802097, "grad_norm": 0.0, - "learning_rate": 6.921179556055369e-06, - "loss": 0.8589, + "learning_rate": 1.2414791634165846e-06, + "loss": 0.8979, "step": 21584 }, { - "epoch": 0.6116636912352291, + "epoch": 0.8445496517724391, "grad_norm": 0.0, - "learning_rate": 6.9203063617986235e-06, - "loss": 0.7011, + "learning_rate": 1.240867693784653e-06, + "loss": 0.8757, "step": 21585 }, { - "epoch": 0.6116920286774916, + "epoch": 0.8445887784646686, "grad_norm": 0.0, - "learning_rate": 6.919433193483287e-06, - "loss": 0.847, + "learning_rate": 1.2402563648134059e-06, + "loss": 0.9905, "step": 21586 }, { - "epoch": 0.611720366119754, + "epoch": 0.844627905156898, "grad_norm": 0.0, - "learning_rate": 6.918560051116703e-06, - "loss": 0.8343, + "learning_rate": 1.2396451765126704e-06, + "loss": 0.9481, "step": 21587 }, { - "epoch": 0.6117487035620165, + "epoch": 0.8446670318491275, "grad_norm": 0.0, - "learning_rate": 6.9176869347062325e-06, - "loss": 0.9184, + "learning_rate": 1.2390341288922535e-06, + "loss": 0.9841, "step": 21588 }, { - "epoch": 0.611777041004279, + "epoch": 0.8447061585413569, "grad_norm": 0.0, - "learning_rate": 6.916813844259234e-06, - "loss": 0.7975, + "learning_rate": 1.2384232219619719e-06, + "loss": 1.0276, "step": 21589 }, { - "epoch": 0.6118053784465414, + "epoch": 0.8447452852335864, "grad_norm": 0.0, - "learning_rate": 6.915940779783052e-06, - "loss": 0.8076, + "learning_rate": 1.2378124557316306e-06, + "loss": 0.7226, "step": 21590 }, { - "epoch": 0.6118337158888039, + "epoch": 0.8447844119258158, "grad_norm": 0.0, - "learning_rate": 6.9150677412850485e-06, - "loss": 0.7723, + "learning_rate": 1.2372018302110466e-06, + "loss": 0.9857, "step": 21591 }, { - "epoch": 0.6118620533310664, + "epoch": 0.8448235386180453, "grad_norm": 0.0, - "learning_rate": 6.914194728772574e-06, - "loss": 0.8491, + "learning_rate": 1.2365913454100175e-06, + "loss": 0.9023, "step": 21592 }, { - "epoch": 0.6118903907733289, + "epoch": 0.8448626653102747, "grad_norm": 0.0, - "learning_rate": 6.913321742252983e-06, - "loss": 0.8971, + "learning_rate": 1.2359810013383534e-06, + "loss": 0.9929, "step": 21593 }, { - "epoch": 0.6119187282155912, + "epoch": 0.8449017920025041, "grad_norm": 0.0, - "learning_rate": 6.912448781733633e-06, - "loss": 0.9283, + "learning_rate": 1.2353707980058494e-06, + "loss": 0.9429, "step": 21594 }, { - "epoch": 0.6119470656578537, + "epoch": 0.8449409186947335, "grad_norm": 0.0, - "learning_rate": 6.9115758472218695e-06, - "loss": 0.8051, + "learning_rate": 1.234760735422309e-06, + "loss": 0.7998, "step": 21595 }, { - "epoch": 0.6119754031001162, + "epoch": 0.844980045386963, "grad_norm": 0.0, - "learning_rate": 6.910702938725049e-06, - "loss": 0.8872, + "learning_rate": 1.2341508135975266e-06, + "loss": 0.896, "step": 21596 }, { - "epoch": 0.6120037405423786, + "epoch": 0.8450191720791924, "grad_norm": 0.0, - "learning_rate": 6.909830056250527e-06, - "loss": 0.8036, + "learning_rate": 1.2335410325413e-06, + "loss": 0.8639, "step": 21597 }, { - "epoch": 0.6120320779846411, + "epoch": 0.8450582987714219, "grad_norm": 0.0, - "learning_rate": 6.9089571998056525e-06, - "loss": 0.9722, + "learning_rate": 1.2329313922634178e-06, + "loss": 0.9272, "step": 21598 }, { - "epoch": 0.6120604154269036, + "epoch": 0.8450974254636513, "grad_norm": 0.0, - "learning_rate": 6.908084369397783e-06, - "loss": 0.879, + "learning_rate": 1.2323218927736713e-06, + "loss": 0.8461, "step": 21599 }, { - "epoch": 0.612088752869166, + "epoch": 0.8451365521558808, "grad_norm": 0.0, - "learning_rate": 6.907211565034262e-06, - "loss": 0.7285, + "learning_rate": 1.2317125340818492e-06, + "loss": 1.0847, "step": 21600 }, { - "epoch": 0.6121170903114285, + "epoch": 0.8451756788481102, "grad_norm": 0.0, - "learning_rate": 6.906338786722448e-06, - "loss": 0.7732, + "learning_rate": 1.231103316197738e-06, + "loss": 1.0121, "step": 21601 }, { - "epoch": 0.612145427753691, + "epoch": 0.8452148055403396, "grad_norm": 0.0, - "learning_rate": 6.905466034469695e-06, - "loss": 0.8997, + "learning_rate": 1.230494239131118e-06, + "loss": 0.9466, "step": 21602 }, { - "epoch": 0.6121737651959535, + "epoch": 0.8452539322325691, "grad_norm": 0.0, - "learning_rate": 6.904593308283345e-06, - "loss": 0.8498, + "learning_rate": 1.2298853028917733e-06, + "loss": 1.0027, "step": 21603 }, { - "epoch": 0.6122021026382158, + "epoch": 0.8452930589247984, "grad_norm": 0.0, - "learning_rate": 6.903720608170757e-06, - "loss": 0.9739, + "learning_rate": 1.2292765074894775e-06, + "loss": 0.9543, "step": 21604 }, { - "epoch": 0.6122304400804783, + "epoch": 0.8453321856170279, "grad_norm": 0.0, - "learning_rate": 6.902847934139281e-06, - "loss": 0.82, + "learning_rate": 1.2286678529340146e-06, + "loss": 1.1312, "step": 21605 }, { - "epoch": 0.6122587775227408, + "epoch": 0.8453713123092573, "grad_norm": 0.0, - "learning_rate": 6.901975286196265e-06, - "loss": 0.9373, + "learning_rate": 1.2280593392351526e-06, + "loss": 0.9736, "step": 21606 }, { - "epoch": 0.6122871149650032, + "epoch": 0.8454104390014868, "grad_norm": 0.0, - "learning_rate": 6.901102664349067e-06, - "loss": 0.8571, + "learning_rate": 1.227450966402668e-06, + "loss": 0.8778, "step": 21607 }, { - "epoch": 0.6123154524072657, + "epoch": 0.8454495656937162, "grad_norm": 0.0, - "learning_rate": 6.900230068605027e-06, - "loss": 0.9172, + "learning_rate": 1.2268427344463263e-06, + "loss": 0.8939, "step": 21608 }, { - "epoch": 0.6123437898495282, + "epoch": 0.8454886923859457, "grad_norm": 0.0, - "learning_rate": 6.8993574989714995e-06, - "loss": 0.8964, + "learning_rate": 1.2262346433758965e-06, + "loss": 0.953, "step": 21609 }, { - "epoch": 0.6123721272917907, + "epoch": 0.8455278190781751, "grad_norm": 0.0, - "learning_rate": 6.898484955455837e-06, - "loss": 0.8369, + "learning_rate": 1.225626693201144e-06, + "loss": 1.0069, "step": 21610 }, { - "epoch": 0.6124004647340531, + "epoch": 0.8455669457704046, "grad_norm": 0.0, - "learning_rate": 6.897612438065388e-06, - "loss": 0.86, + "learning_rate": 1.2250188839318344e-06, + "loss": 0.9406, "step": 21611 }, { - "epoch": 0.6124288021763156, + "epoch": 0.845606072462634, "grad_norm": 0.0, - "learning_rate": 6.896739946807499e-06, - "loss": 0.7956, + "learning_rate": 1.2244112155777243e-06, + "loss": 0.8751, "step": 21612 }, { - "epoch": 0.6124571396185781, + "epoch": 0.8456451991548635, "grad_norm": 0.0, - "learning_rate": 6.895867481689527e-06, - "loss": 0.7894, + "learning_rate": 1.2238036881485726e-06, + "loss": 0.9179, "step": 21613 }, { - "epoch": 0.6124854770608404, + "epoch": 0.8456843258470929, "grad_norm": 0.0, - "learning_rate": 6.8949950427188104e-06, - "loss": 0.8548, + "learning_rate": 1.2231963016541392e-06, + "loss": 0.9344, "step": 21614 }, { - "epoch": 0.6125138145031029, + "epoch": 0.8457234525393224, "grad_norm": 0.0, - "learning_rate": 6.894122629902706e-06, - "loss": 0.9375, + "learning_rate": 1.222589056104173e-06, + "loss": 0.9376, "step": 21615 }, { - "epoch": 0.6125421519453654, + "epoch": 0.8457625792315517, "grad_norm": 0.0, - "learning_rate": 6.893250243248559e-06, - "loss": 0.817, + "learning_rate": 1.2219819515084275e-06, + "loss": 1.0126, "step": 21616 }, { - "epoch": 0.6125704893876279, + "epoch": 0.8458017059237812, "grad_norm": 0.0, - "learning_rate": 6.892377882763719e-06, - "loss": 0.8649, + "learning_rate": 1.2213749878766533e-06, + "loss": 0.8926, "step": 21617 }, { - "epoch": 0.6125988268298903, + "epoch": 0.8458408326160106, "grad_norm": 0.0, - "learning_rate": 6.891505548455538e-06, - "loss": 0.8522, + "learning_rate": 1.2207681652185977e-06, + "loss": 0.9418, "step": 21618 }, { - "epoch": 0.6126271642721528, + "epoch": 0.8458799593082401, "grad_norm": 0.0, - "learning_rate": 6.890633240331355e-06, - "loss": 0.8629, + "learning_rate": 1.2201614835440034e-06, + "loss": 1.0355, "step": 21619 }, { - "epoch": 0.6126555017144153, + "epoch": 0.8459190860004695, "grad_norm": 0.0, - "learning_rate": 6.889760958398528e-06, - "loss": 0.8563, + "learning_rate": 1.2195549428626151e-06, + "loss": 0.8718, "step": 21620 }, { - "epoch": 0.6126838391566777, + "epoch": 0.845958212692699, "grad_norm": 0.0, - "learning_rate": 6.888888702664396e-06, - "loss": 0.8895, + "learning_rate": 1.2189485431841675e-06, + "loss": 1.0058, "step": 21621 }, { - "epoch": 0.6127121765989402, + "epoch": 0.8459973393849284, "grad_norm": 0.0, - "learning_rate": 6.888016473136308e-06, - "loss": 0.813, + "learning_rate": 1.2183422845184078e-06, + "loss": 0.8901, "step": 21622 }, { - "epoch": 0.6127405140412027, + "epoch": 0.8460364660771579, "grad_norm": 0.0, - "learning_rate": 6.887144269821615e-06, - "loss": 0.8081, + "learning_rate": 1.2177361668750641e-06, + "loss": 0.912, "step": 21623 }, { - "epoch": 0.612768851483465, + "epoch": 0.8460755927693873, "grad_norm": 0.0, - "learning_rate": 6.88627209272766e-06, - "loss": 0.8224, + "learning_rate": 1.2171301902638766e-06, + "loss": 0.9866, "step": 21624 }, { - "epoch": 0.6127971889257275, + "epoch": 0.8461147194616168, "grad_norm": 0.0, - "learning_rate": 6.885399941861792e-06, - "loss": 0.9416, + "learning_rate": 1.2165243546945693e-06, + "loss": 1.0374, "step": 21625 }, { - "epoch": 0.61282552636799, + "epoch": 0.8461538461538461, "grad_norm": 0.0, - "learning_rate": 6.8845278172313614e-06, - "loss": 0.8345, + "learning_rate": 1.2159186601768747e-06, + "loss": 0.9292, "step": 21626 }, { - "epoch": 0.6128538638102525, + "epoch": 0.8461929728460756, "grad_norm": 0.0, - "learning_rate": 6.8836557188437045e-06, - "loss": 0.8866, + "learning_rate": 1.2153131067205192e-06, + "loss": 1.0384, "step": 21627 }, { - "epoch": 0.6128822012525149, + "epoch": 0.846232099538305, "grad_norm": 0.0, - "learning_rate": 6.8827836467061745e-06, - "loss": 0.9442, + "learning_rate": 1.2147076943352298e-06, + "loss": 1.0052, "step": 21628 }, { - "epoch": 0.6129105386947774, + "epoch": 0.8462712262305345, "grad_norm": 0.0, - "learning_rate": 6.8819116008261145e-06, - "loss": 0.8575, + "learning_rate": 1.2141024230307242e-06, + "loss": 1.0285, "step": 21629 }, { - "epoch": 0.6129388761370399, + "epoch": 0.8463103529227639, "grad_norm": 0.0, - "learning_rate": 6.881039581210871e-06, - "loss": 0.8428, + "learning_rate": 1.2134972928167232e-06, + "loss": 0.9726, "step": 21630 }, { - "epoch": 0.6129672135793023, + "epoch": 0.8463494796149933, "grad_norm": 0.0, - "learning_rate": 6.880167587867792e-06, - "loss": 0.8397, + "learning_rate": 1.2128923037029471e-06, + "loss": 0.8402, "step": 21631 }, { - "epoch": 0.6129955510215648, + "epoch": 0.8463886063072228, "grad_norm": 0.0, - "learning_rate": 6.879295620804217e-06, - "loss": 0.9321, + "learning_rate": 1.2122874556991115e-06, + "loss": 1.0827, "step": 21632 }, { - "epoch": 0.6130238884638273, + "epoch": 0.8464277329994522, "grad_norm": 0.0, - "learning_rate": 6.878423680027494e-06, - "loss": 0.8571, + "learning_rate": 1.2116827488149252e-06, + "loss": 1.1321, "step": 21633 }, { - "epoch": 0.6130522259060898, + "epoch": 0.8464668596916817, "grad_norm": 0.0, - "learning_rate": 6.877551765544972e-06, - "loss": 0.7731, + "learning_rate": 1.2110781830601036e-06, + "loss": 0.9369, "step": 21634 }, { - "epoch": 0.6130805633483521, + "epoch": 0.8465059863839111, "grad_norm": 0.0, - "learning_rate": 6.876679877363986e-06, - "loss": 0.9044, + "learning_rate": 1.2104737584443492e-06, + "loss": 1.0967, "step": 21635 }, { - "epoch": 0.6131089007906146, + "epoch": 0.8465451130761406, "grad_norm": 0.0, - "learning_rate": 6.8758080154918875e-06, - "loss": 0.7974, + "learning_rate": 1.2098694749773766e-06, + "loss": 1.0433, "step": 21636 }, { - "epoch": 0.6131372382328771, + "epoch": 0.8465842397683699, "grad_norm": 0.0, - "learning_rate": 6.8749361799360155e-06, - "loss": 0.8383, + "learning_rate": 1.2092653326688842e-06, + "loss": 0.928, "step": 21637 }, { - "epoch": 0.6131655756751395, + "epoch": 0.8466233664605994, "grad_norm": 0.0, - "learning_rate": 6.874064370703717e-06, - "loss": 0.7987, + "learning_rate": 1.2086613315285756e-06, + "loss": 1.0097, "step": 21638 }, { - "epoch": 0.613193913117402, + "epoch": 0.8466624931528288, "grad_norm": 0.0, - "learning_rate": 6.87319258780234e-06, - "loss": 0.8224, + "learning_rate": 1.2080574715661474e-06, + "loss": 0.9939, "step": 21639 }, { - "epoch": 0.6132222505596645, + "epoch": 0.8467016198450583, "grad_norm": 0.0, - "learning_rate": 6.872320831239217e-06, - "loss": 0.8151, + "learning_rate": 1.2074537527913022e-06, + "loss": 1.0314, "step": 21640 }, { - "epoch": 0.613250588001927, + "epoch": 0.8467407465372877, "grad_norm": 0.0, - "learning_rate": 6.8714491010216985e-06, - "loss": 0.7989, + "learning_rate": 1.2068501752137308e-06, + "loss": 0.9659, "step": 21641 }, { - "epoch": 0.6132789254441894, + "epoch": 0.8467798732295172, "grad_norm": 0.0, - "learning_rate": 6.870577397157128e-06, - "loss": 0.8678, + "learning_rate": 1.2062467388431287e-06, + "loss": 1.0805, "step": 21642 }, { - "epoch": 0.6133072628864519, + "epoch": 0.8468189999217466, "grad_norm": 0.0, - "learning_rate": 6.869705719652844e-06, - "loss": 0.8613, + "learning_rate": 1.205643443689183e-06, + "loss": 0.9074, "step": 21643 }, { - "epoch": 0.6133356003287144, + "epoch": 0.8468581266139761, "grad_norm": 0.0, - "learning_rate": 6.868834068516195e-06, - "loss": 0.8439, + "learning_rate": 1.2050402897615832e-06, + "loss": 1.0882, "step": 21644 }, { - "epoch": 0.6133639377709768, + "epoch": 0.8468972533062055, "grad_norm": 0.0, - "learning_rate": 6.8679624437545145e-06, - "loss": 0.7928, + "learning_rate": 1.2044372770700162e-06, + "loss": 0.912, "step": 21645 }, { - "epoch": 0.6133922752132392, + "epoch": 0.846936379998435, "grad_norm": 0.0, - "learning_rate": 6.8670908453751505e-06, - "loss": 0.7563, + "learning_rate": 1.203834405624167e-06, + "loss": 1.0241, "step": 21646 }, { - "epoch": 0.6134206126555017, + "epoch": 0.8469755066906643, "grad_norm": 0.0, - "learning_rate": 6.866219273385449e-06, - "loss": 0.9113, + "learning_rate": 1.2032316754337126e-06, + "loss": 1.0231, "step": 21647 }, { - "epoch": 0.6134489500977641, + "epoch": 0.8470146333828938, "grad_norm": 0.0, - "learning_rate": 6.865347727792739e-06, - "loss": 0.7249, + "learning_rate": 1.2026290865083356e-06, + "loss": 0.975, "step": 21648 }, { - "epoch": 0.6134772875400266, + "epoch": 0.8470537600751232, "grad_norm": 0.0, - "learning_rate": 6.8644762086043734e-06, - "loss": 0.8223, + "learning_rate": 1.2020266388577106e-06, + "loss": 0.8468, "step": 21649 }, { - "epoch": 0.6135056249822891, + "epoch": 0.8470928867673527, "grad_norm": 0.0, - "learning_rate": 6.863604715827685e-06, - "loss": 0.8282, + "learning_rate": 1.2014243324915154e-06, + "loss": 0.8462, "step": 21650 }, { - "epoch": 0.6135339624245516, + "epoch": 0.8471320134595821, "grad_norm": 0.0, - "learning_rate": 6.862733249470021e-06, - "loss": 0.9074, + "learning_rate": 1.2008221674194188e-06, + "loss": 0.8982, "step": 21651 }, { - "epoch": 0.613562299866814, + "epoch": 0.8471711401518116, "grad_norm": 0.0, - "learning_rate": 6.861861809538723e-06, - "loss": 0.8266, + "learning_rate": 1.200220143651094e-06, + "loss": 1.022, "step": 21652 }, { - "epoch": 0.6135906373090765, + "epoch": 0.847210266844041, "grad_norm": 0.0, - "learning_rate": 6.860990396041125e-06, - "loss": 0.782, + "learning_rate": 1.1996182611962048e-06, + "loss": 0.9893, "step": 21653 }, { - "epoch": 0.613618974751339, + "epoch": 0.8472493935362705, "grad_norm": 0.0, - "learning_rate": 6.860119008984569e-06, - "loss": 0.9241, + "learning_rate": 1.1990165200644188e-06, + "loss": 0.8889, "step": 21654 }, { - "epoch": 0.6136473121936014, + "epoch": 0.8472885202284999, "grad_norm": 0.0, - "learning_rate": 6.859247648376399e-06, - "loss": 0.7877, + "learning_rate": 1.1984149202653995e-06, + "loss": 0.9989, "step": 21655 }, { - "epoch": 0.6136756496358639, + "epoch": 0.8473276469207294, "grad_norm": 0.0, - "learning_rate": 6.858376314223951e-06, - "loss": 0.8799, + "learning_rate": 1.19781346180881e-06, + "loss": 0.9477, "step": 21656 }, { - "epoch": 0.6137039870781263, + "epoch": 0.8473667736129588, "grad_norm": 0.0, - "learning_rate": 6.857505006534571e-06, - "loss": 0.8606, + "learning_rate": 1.1972121447043038e-06, + "loss": 0.9231, "step": 21657 }, { - "epoch": 0.6137323245203888, + "epoch": 0.8474059003051883, "grad_norm": 0.0, - "learning_rate": 6.856633725315587e-06, - "loss": 0.8658, + "learning_rate": 1.1966109689615402e-06, + "loss": 1.0108, "step": 21658 }, { - "epoch": 0.6137606619626512, + "epoch": 0.8474450269974176, "grad_norm": 0.0, - "learning_rate": 6.855762470574345e-06, - "loss": 0.8737, + "learning_rate": 1.1960099345901743e-06, + "loss": 0.959, "step": 21659 }, { - "epoch": 0.6137889994049137, + "epoch": 0.847484153689647, "grad_norm": 0.0, - "learning_rate": 6.854891242318189e-06, - "loss": 0.9573, + "learning_rate": 1.1954090415998566e-06, + "loss": 1.0513, "step": 21660 }, { - "epoch": 0.6138173368471762, + "epoch": 0.8475232803818765, "grad_norm": 0.0, - "learning_rate": 6.8540200405544455e-06, - "loss": 0.8697, + "learning_rate": 1.1948082900002357e-06, + "loss": 0.9099, "step": 21661 }, { - "epoch": 0.6138456742894386, + "epoch": 0.8475624070741059, "grad_norm": 0.0, - "learning_rate": 6.853148865290461e-06, - "loss": 0.8127, + "learning_rate": 1.1942076798009604e-06, + "loss": 1.0613, "step": 21662 }, { - "epoch": 0.6138740117317011, + "epoch": 0.8476015337663354, "grad_norm": 0.0, - "learning_rate": 6.852277716533573e-06, - "loss": 0.84, + "learning_rate": 1.1936072110116781e-06, + "loss": 1.0781, "step": 21663 }, { - "epoch": 0.6139023491739636, + "epoch": 0.8476406604585648, "grad_norm": 0.0, - "learning_rate": 6.851406594291118e-06, - "loss": 0.7726, + "learning_rate": 1.1930068836420261e-06, + "loss": 1.0333, "step": 21664 }, { - "epoch": 0.6139306866162261, + "epoch": 0.8476797871507943, "grad_norm": 0.0, - "learning_rate": 6.850535498570438e-06, - "loss": 0.7218, + "learning_rate": 1.1924066977016502e-06, + "loss": 0.9189, "step": 21665 }, { - "epoch": 0.6139590240584885, + "epoch": 0.8477189138430237, "grad_norm": 0.0, - "learning_rate": 6.849664429378863e-06, - "loss": 0.9227, + "learning_rate": 1.1918066532001815e-06, + "loss": 1.0473, "step": 21666 }, { - "epoch": 0.613987361500751, + "epoch": 0.8477580405352532, "grad_norm": 0.0, - "learning_rate": 6.848793386723734e-06, - "loss": 0.8729, + "learning_rate": 1.1912067501472656e-06, + "loss": 0.8327, "step": 21667 }, { - "epoch": 0.6140156989430134, + "epoch": 0.8477971672274826, "grad_norm": 0.0, - "learning_rate": 6.84792237061239e-06, - "loss": 0.8579, + "learning_rate": 1.1906069885525285e-06, + "loss": 0.9745, "step": 21668 }, { - "epoch": 0.6140440363852758, + "epoch": 0.847836293919712, "grad_norm": 0.0, - "learning_rate": 6.847051381052165e-06, - "loss": 0.827, + "learning_rate": 1.1900073684256075e-06, + "loss": 0.9565, "step": 21669 }, { - "epoch": 0.6140723738275383, + "epoch": 0.8478754206119414, "grad_norm": 0.0, - "learning_rate": 6.846180418050397e-06, - "loss": 0.8737, + "learning_rate": 1.1894078897761252e-06, + "loss": 0.9208, "step": 21670 }, { - "epoch": 0.6141007112698008, + "epoch": 0.8479145473041709, "grad_norm": 0.0, - "learning_rate": 6.845309481614427e-06, - "loss": 0.7441, + "learning_rate": 1.1888085526137127e-06, + "loss": 0.8953, "step": 21671 }, { - "epoch": 0.6141290487120632, + "epoch": 0.8479536739964003, "grad_norm": 0.0, - "learning_rate": 6.844438571751583e-06, - "loss": 0.8772, + "learning_rate": 1.1882093569479935e-06, + "loss": 0.8307, "step": 21672 }, { - "epoch": 0.6141573861543257, + "epoch": 0.8479928006886298, "grad_norm": 0.0, - "learning_rate": 6.8435676884692085e-06, - "loss": 1.0026, + "learning_rate": 1.1876103027885931e-06, + "loss": 0.9881, "step": 21673 }, { - "epoch": 0.6141857235965882, + "epoch": 0.8480319273808592, "grad_norm": 0.0, - "learning_rate": 6.84269683177463e-06, - "loss": 0.9254, + "learning_rate": 1.1870113901451264e-06, + "loss": 0.8745, "step": 21674 }, { - "epoch": 0.6142140610388507, + "epoch": 0.8480710540730887, "grad_norm": 0.0, - "learning_rate": 6.8418260016751895e-06, - "loss": 0.828, + "learning_rate": 1.186412619027213e-06, + "loss": 0.8979, "step": 21675 }, { - "epoch": 0.6142423984811131, + "epoch": 0.8481101807653181, "grad_norm": 0.0, - "learning_rate": 6.840955198178223e-06, - "loss": 0.8313, + "learning_rate": 1.1858139894444686e-06, + "loss": 1.0461, "step": 21676 }, { - "epoch": 0.6142707359233756, + "epoch": 0.8481493074575476, "grad_norm": 0.0, - "learning_rate": 6.840084421291062e-06, - "loss": 0.8513, + "learning_rate": 1.1852155014065092e-06, + "loss": 0.822, "step": 21677 }, { - "epoch": 0.614299073365638, + "epoch": 0.848188434149777, "grad_norm": 0.0, - "learning_rate": 6.839213671021048e-06, - "loss": 0.8531, + "learning_rate": 1.1846171549229413e-06, + "loss": 0.8773, "step": 21678 }, { - "epoch": 0.6143274108079004, + "epoch": 0.8482275608420065, "grad_norm": 0.0, - "learning_rate": 6.838342947375507e-06, - "loss": 0.7253, + "learning_rate": 1.184018950003376e-06, + "loss": 0.9919, "step": 21679 }, { - "epoch": 0.6143557482501629, + "epoch": 0.8482666875342358, "grad_norm": 0.0, - "learning_rate": 6.837472250361776e-06, - "loss": 0.7784, + "learning_rate": 1.1834208866574182e-06, + "loss": 1.0306, "step": 21680 }, { - "epoch": 0.6143840856924254, + "epoch": 0.8483058142264653, "grad_norm": 0.0, - "learning_rate": 6.836601579987195e-06, - "loss": 0.8776, + "learning_rate": 1.1828229648946764e-06, + "loss": 0.8862, "step": 21681 }, { - "epoch": 0.6144124231346879, + "epoch": 0.8483449409186947, "grad_norm": 0.0, - "learning_rate": 6.83573093625909e-06, - "loss": 0.9547, + "learning_rate": 1.1822251847247468e-06, + "loss": 0.827, "step": 21682 }, { - "epoch": 0.6144407605769503, + "epoch": 0.8483840676109242, "grad_norm": 0.0, - "learning_rate": 6.834860319184797e-06, - "loss": 0.9609, + "learning_rate": 1.1816275461572335e-06, + "loss": 0.8809, "step": 21683 }, { - "epoch": 0.6144690980192128, + "epoch": 0.8484231943031536, "grad_norm": 0.0, - "learning_rate": 6.833989728771657e-06, - "loss": 0.881, + "learning_rate": 1.1810300492017269e-06, + "loss": 1.0005, "step": 21684 }, { - "epoch": 0.6144974354614753, + "epoch": 0.8484623209953831, "grad_norm": 0.0, - "learning_rate": 6.833119165026993e-06, - "loss": 0.8616, + "learning_rate": 1.1804326938678323e-06, + "loss": 1.0257, "step": 21685 }, { - "epoch": 0.6145257729037377, + "epoch": 0.8485014476876125, "grad_norm": 0.0, - "learning_rate": 6.832248627958146e-06, - "loss": 0.7432, + "learning_rate": 1.1798354801651336e-06, + "loss": 0.9791, "step": 21686 }, { - "epoch": 0.6145541103460002, + "epoch": 0.848540574379842, "grad_norm": 0.0, - "learning_rate": 6.831378117572441e-06, - "loss": 0.8206, + "learning_rate": 1.1792384081032282e-06, + "loss": 0.9125, "step": 21687 }, { - "epoch": 0.6145824477882627, + "epoch": 0.8485797010720714, "grad_norm": 0.0, - "learning_rate": 6.8305076338772146e-06, - "loss": 0.8003, + "learning_rate": 1.1786414776916967e-06, + "loss": 0.8569, "step": 21688 }, { - "epoch": 0.6146107852305251, + "epoch": 0.8486188277643008, "grad_norm": 0.0, - "learning_rate": 6.829637176879802e-06, - "loss": 0.7951, + "learning_rate": 1.178044688940132e-06, + "loss": 0.8896, "step": 21689 }, { - "epoch": 0.6146391226727875, + "epoch": 0.8486579544565303, "grad_norm": 0.0, - "learning_rate": 6.828766746587529e-06, - "loss": 0.8389, + "learning_rate": 1.1774480418581147e-06, + "loss": 1.0187, "step": 21690 }, { - "epoch": 0.61466746011505, + "epoch": 0.8486970811487596, "grad_norm": 0.0, - "learning_rate": 6.827896343007734e-06, - "loss": 0.8617, + "learning_rate": 1.176851536455228e-06, + "loss": 1.0159, "step": 21691 }, { - "epoch": 0.6146957975573125, + "epoch": 0.8487362078409891, "grad_norm": 0.0, - "learning_rate": 6.8270259661477475e-06, - "loss": 0.7684, + "learning_rate": 1.1762551727410476e-06, + "loss": 1.0655, "step": 21692 }, { - "epoch": 0.6147241349995749, + "epoch": 0.8487753345332185, "grad_norm": 0.0, - "learning_rate": 6.826155616014897e-06, - "loss": 0.8629, + "learning_rate": 1.1756589507251515e-06, + "loss": 0.9449, "step": 21693 }, { - "epoch": 0.6147524724418374, + "epoch": 0.848814461225448, "grad_norm": 0.0, - "learning_rate": 6.825285292616517e-06, - "loss": 0.8631, + "learning_rate": 1.1750628704171163e-06, + "loss": 0.9482, "step": 21694 }, { - "epoch": 0.6147808098840999, + "epoch": 0.8488535879176774, "grad_norm": 0.0, - "learning_rate": 6.824414995959938e-06, - "loss": 0.91, + "learning_rate": 1.1744669318265146e-06, + "loss": 0.9653, "step": 21695 }, { - "epoch": 0.6148091473263623, + "epoch": 0.8488927146099069, "grad_norm": 0.0, - "learning_rate": 6.823544726052489e-06, - "loss": 0.8512, + "learning_rate": 1.1738711349629128e-06, + "loss": 0.9409, "step": 21696 }, { - "epoch": 0.6148374847686248, + "epoch": 0.8489318413021363, "grad_norm": 0.0, - "learning_rate": 6.822674482901507e-06, - "loss": 0.8941, + "learning_rate": 1.1732754798358815e-06, + "loss": 0.9282, "step": 21697 }, { - "epoch": 0.6148658222108873, + "epoch": 0.8489709679943658, "grad_norm": 0.0, - "learning_rate": 6.821804266514314e-06, - "loss": 0.9989, + "learning_rate": 1.1726799664549871e-06, + "loss": 0.8943, "step": 21698 }, { - "epoch": 0.6148941596531498, + "epoch": 0.8490100946865952, "grad_norm": 0.0, - "learning_rate": 6.820934076898247e-06, - "loss": 0.802, + "learning_rate": 1.1720845948297888e-06, + "loss": 0.9973, "step": 21699 }, { - "epoch": 0.6149224970954121, + "epoch": 0.8490492213788247, "grad_norm": 0.0, - "learning_rate": 6.820063914060628e-06, - "loss": 0.8484, + "learning_rate": 1.17148936496985e-06, + "loss": 0.8541, "step": 21700 }, { - "epoch": 0.6149508345376746, + "epoch": 0.849088348071054, "grad_norm": 0.0, - "learning_rate": 6.819193778008794e-06, - "loss": 0.7723, + "learning_rate": 1.170894276884732e-06, + "loss": 0.7665, "step": 21701 }, { - "epoch": 0.6149791719799371, + "epoch": 0.8491274747632835, "grad_norm": 0.0, - "learning_rate": 6.818323668750073e-06, - "loss": 0.9022, + "learning_rate": 1.170299330583986e-06, + "loss": 1.0473, "step": 21702 }, { - "epoch": 0.6150075094221995, + "epoch": 0.8491666014555129, "grad_norm": 0.0, - "learning_rate": 6.8174535862917905e-06, - "loss": 0.805, + "learning_rate": 1.1697045260771688e-06, + "loss": 1.0124, "step": 21703 }, { - "epoch": 0.615035846864462, + "epoch": 0.8492057281477424, "grad_norm": 0.0, - "learning_rate": 6.816583530641279e-06, - "loss": 0.9368, + "learning_rate": 1.169109863373833e-06, + "loss": 1.0514, "step": 21704 }, { - "epoch": 0.6150641843067245, + "epoch": 0.8492448548399718, "grad_norm": 0.0, - "learning_rate": 6.815713501805869e-06, - "loss": 0.9263, + "learning_rate": 1.168515342483526e-06, + "loss": 1.0363, "step": 21705 }, { - "epoch": 0.615092521748987, + "epoch": 0.8492839815322013, "grad_norm": 0.0, - "learning_rate": 6.8148434997928846e-06, - "loss": 0.7793, + "learning_rate": 1.1679209634157962e-06, + "loss": 0.9389, "step": 21706 }, { - "epoch": 0.6151208591912494, + "epoch": 0.8493231082244307, "grad_norm": 0.0, - "learning_rate": 6.8139735246096575e-06, - "loss": 0.922, + "learning_rate": 1.167326726180188e-06, + "loss": 0.9657, "step": 21707 }, { - "epoch": 0.6151491966335119, + "epoch": 0.8493622349166602, "grad_norm": 0.0, - "learning_rate": 6.813103576263512e-06, - "loss": 0.9753, + "learning_rate": 1.166732630786247e-06, + "loss": 0.9997, "step": 21708 }, { - "epoch": 0.6151775340757744, + "epoch": 0.8494013616088896, "grad_norm": 0.0, - "learning_rate": 6.812233654761779e-06, - "loss": 0.8215, + "learning_rate": 1.166138677243508e-06, + "loss": 0.9526, "step": 21709 }, { - "epoch": 0.6152058715180367, + "epoch": 0.8494404883011191, "grad_norm": 0.0, - "learning_rate": 6.81136376011179e-06, - "loss": 0.9412, + "learning_rate": 1.1655448655615165e-06, + "loss": 0.9326, "step": 21710 }, { - "epoch": 0.6152342089602992, + "epoch": 0.8494796149933485, "grad_norm": 0.0, - "learning_rate": 6.810493892320864e-06, - "loss": 0.9744, + "learning_rate": 1.1649511957497984e-06, + "loss": 0.9146, "step": 21711 }, { - "epoch": 0.6152625464025617, + "epoch": 0.849518741685578, "grad_norm": 0.0, - "learning_rate": 6.809624051396331e-06, - "loss": 0.9399, + "learning_rate": 1.164357667817898e-06, + "loss": 1.0443, "step": 21712 }, { - "epoch": 0.6152908838448242, + "epoch": 0.8495578683778073, "grad_norm": 0.0, - "learning_rate": 6.808754237345525e-06, - "loss": 0.9285, + "learning_rate": 1.1637642817753391e-06, + "loss": 0.9518, "step": 21713 }, { - "epoch": 0.6153192212870866, + "epoch": 0.8495969950700368, "grad_norm": 0.0, - "learning_rate": 6.8078844501757625e-06, - "loss": 0.7243, + "learning_rate": 1.1631710376316562e-06, + "loss": 0.9564, "step": 21714 }, { - "epoch": 0.6153475587293491, + "epoch": 0.8496361217622662, "grad_norm": 0.0, - "learning_rate": 6.807014689894376e-06, - "loss": 0.7041, + "learning_rate": 1.1625779353963695e-06, + "loss": 1.0307, "step": 21715 }, { - "epoch": 0.6153758961716116, + "epoch": 0.8496752484544956, "grad_norm": 0.0, - "learning_rate": 6.806144956508689e-06, - "loss": 0.8393, + "learning_rate": 1.1619849750790113e-06, + "loss": 0.9352, "step": 21716 }, { - "epoch": 0.615404233613874, + "epoch": 0.8497143751467251, "grad_norm": 0.0, - "learning_rate": 6.805275250026029e-06, - "loss": 0.8675, + "learning_rate": 1.1613921566890972e-06, + "loss": 0.9641, "step": 21717 }, { - "epoch": 0.6154325710561365, + "epoch": 0.8497535018389545, "grad_norm": 0.0, - "learning_rate": 6.804405570453727e-06, - "loss": 0.8879, + "learning_rate": 1.160799480236151e-06, + "loss": 0.923, "step": 21718 }, { - "epoch": 0.615460908498399, + "epoch": 0.849792628531184, "grad_norm": 0.0, - "learning_rate": 6.8035359177990976e-06, - "loss": 0.8442, + "learning_rate": 1.1602069457296882e-06, + "loss": 0.9972, "step": 21719 }, { - "epoch": 0.6154892459406613, + "epoch": 0.8498317552234134, "grad_norm": 0.0, - "learning_rate": 6.802666292069473e-06, - "loss": 0.8903, + "learning_rate": 1.1596145531792246e-06, + "loss": 0.96, "step": 21720 }, { - "epoch": 0.6155175833829238, + "epoch": 0.8498708819156429, "grad_norm": 0.0, - "learning_rate": 6.80179669327218e-06, - "loss": 0.8279, + "learning_rate": 1.1590223025942737e-06, + "loss": 0.9021, "step": 21721 }, { - "epoch": 0.6155459208251863, + "epoch": 0.8499100086078722, "grad_norm": 0.0, - "learning_rate": 6.800927121414539e-06, - "loss": 0.9725, + "learning_rate": 1.158430193984348e-06, + "loss": 0.9256, "step": 21722 }, { - "epoch": 0.6155742582674488, + "epoch": 0.8499491353001017, "grad_norm": 0.0, - "learning_rate": 6.80005757650388e-06, - "loss": 0.8375, + "learning_rate": 1.1578382273589516e-06, + "loss": 0.9451, "step": 21723 }, { - "epoch": 0.6156025957097112, + "epoch": 0.8499882619923311, "grad_norm": 0.0, - "learning_rate": 6.799188058547521e-06, - "loss": 0.8615, + "learning_rate": 1.1572464027275942e-06, + "loss": 0.9539, "step": 21724 }, { - "epoch": 0.6156309331519737, + "epoch": 0.8500273886845606, "grad_norm": 0.0, - "learning_rate": 6.79831856755279e-06, - "loss": 0.859, + "learning_rate": 1.1566547200997792e-06, + "loss": 0.8078, "step": 21725 }, { - "epoch": 0.6156592705942362, + "epoch": 0.85006651537679, "grad_norm": 0.0, - "learning_rate": 6.7974491035270115e-06, - "loss": 0.9277, + "learning_rate": 1.1560631794850108e-06, + "loss": 0.8554, "step": 21726 }, { - "epoch": 0.6156876080364986, + "epoch": 0.8501056420690195, "grad_norm": 0.0, - "learning_rate": 6.796579666477507e-06, - "loss": 0.8269, + "learning_rate": 1.1554717808927817e-06, + "loss": 1.0343, "step": 21727 }, { - "epoch": 0.6157159454787611, + "epoch": 0.8501447687612489, "grad_norm": 0.0, - "learning_rate": 6.7957102564116054e-06, - "loss": 0.8189, + "learning_rate": 1.1548805243325966e-06, + "loss": 1.1209, "step": 21728 }, { - "epoch": 0.6157442829210236, + "epoch": 0.8501838954534784, "grad_norm": 0.0, - "learning_rate": 6.794840873336622e-06, - "loss": 0.8158, + "learning_rate": 1.1542894098139423e-06, + "loss": 0.9078, "step": 21729 }, { - "epoch": 0.6157726203632861, + "epoch": 0.8502230221457078, "grad_norm": 0.0, - "learning_rate": 6.793971517259885e-06, - "loss": 0.9286, + "learning_rate": 1.153698437346319e-06, + "loss": 0.9709, "step": 21730 }, { - "epoch": 0.6158009578055484, + "epoch": 0.8502621488379373, "grad_norm": 0.0, - "learning_rate": 6.793102188188719e-06, - "loss": 0.9096, + "learning_rate": 1.1531076069392133e-06, + "loss": 0.9692, "step": 21731 }, { - "epoch": 0.6158292952478109, + "epoch": 0.8503012755301667, "grad_norm": 0.0, - "learning_rate": 6.7922328861304406e-06, - "loss": 0.7622, + "learning_rate": 1.1525169186021146e-06, + "loss": 0.8004, "step": 21732 }, { - "epoch": 0.6158576326900734, + "epoch": 0.8503404022223962, "grad_norm": 0.0, - "learning_rate": 6.791363611092377e-06, - "loss": 0.8066, + "learning_rate": 1.1519263723445028e-06, + "loss": 0.9895, "step": 21733 }, { - "epoch": 0.6158859701323358, + "epoch": 0.8503795289146255, "grad_norm": 0.0, - "learning_rate": 6.79049436308185e-06, - "loss": 0.7819, + "learning_rate": 1.1513359681758717e-06, + "loss": 1.053, "step": 21734 }, { - "epoch": 0.6159143075745983, + "epoch": 0.850418655606855, "grad_norm": 0.0, - "learning_rate": 6.78962514210618e-06, - "loss": 0.8226, + "learning_rate": 1.1507457061056947e-06, + "loss": 0.8235, "step": 21735 }, { - "epoch": 0.6159426450168608, + "epoch": 0.8504577822990844, "grad_norm": 0.0, - "learning_rate": 6.788755948172691e-06, - "loss": 0.9131, + "learning_rate": 1.1501555861434544e-06, + "loss": 1.0267, "step": 21736 }, { - "epoch": 0.6159709824591232, + "epoch": 0.8504969089913139, "grad_norm": 0.0, - "learning_rate": 6.787886781288702e-06, - "loss": 0.821, + "learning_rate": 1.149565608298624e-06, + "loss": 0.907, "step": 21737 }, { - "epoch": 0.6159993199013857, + "epoch": 0.8505360356835433, "grad_norm": 0.0, - "learning_rate": 6.787017641461534e-06, - "loss": 0.9477, + "learning_rate": 1.1489757725806804e-06, + "loss": 0.9955, "step": 21738 }, { - "epoch": 0.6160276573436482, + "epoch": 0.8505751623757728, "grad_norm": 0.0, - "learning_rate": 6.786148528698512e-06, - "loss": 0.8181, + "learning_rate": 1.1483860789990942e-06, + "loss": 0.9028, "step": 21739 }, { - "epoch": 0.6160559947859107, + "epoch": 0.8506142890680022, "grad_norm": 0.0, - "learning_rate": 6.785279443006951e-06, - "loss": 0.762, + "learning_rate": 1.1477965275633386e-06, + "loss": 0.8237, "step": 21740 }, { - "epoch": 0.616084332228173, + "epoch": 0.8506534157602317, "grad_norm": 0.0, - "learning_rate": 6.784410384394176e-06, - "loss": 0.8702, + "learning_rate": 1.147207118282876e-06, + "loss": 0.9582, "step": 21741 }, { - "epoch": 0.6161126696704355, + "epoch": 0.8506925424524611, "grad_norm": 0.0, - "learning_rate": 6.783541352867511e-06, - "loss": 0.925, + "learning_rate": 1.1466178511671732e-06, + "loss": 0.9865, "step": 21742 }, { - "epoch": 0.616141007112698, + "epoch": 0.8507316691446906, "grad_norm": 0.0, - "learning_rate": 6.782672348434267e-06, - "loss": 0.7847, + "learning_rate": 1.1460287262256963e-06, + "loss": 0.9381, "step": 21743 }, { - "epoch": 0.6161693445549604, + "epoch": 0.85077079583692, "grad_norm": 0.0, - "learning_rate": 6.781803371101774e-06, - "loss": 0.8134, + "learning_rate": 1.1454397434679022e-06, + "loss": 1.0397, "step": 21744 }, { - "epoch": 0.6161976819972229, + "epoch": 0.8508099225291493, "grad_norm": 0.0, - "learning_rate": 6.780934420877341e-06, - "loss": 0.6497, + "learning_rate": 1.1448509029032495e-06, + "loss": 0.862, "step": 21745 }, { - "epoch": 0.6162260194394854, + "epoch": 0.8508490492213788, "grad_norm": 0.0, - "learning_rate": 6.780065497768291e-06, - "loss": 0.9847, + "learning_rate": 1.1442622045411955e-06, + "loss": 0.9359, "step": 21746 }, { - "epoch": 0.6162543568817479, + "epoch": 0.8508881759136082, "grad_norm": 0.0, - "learning_rate": 6.7791966017819496e-06, - "loss": 0.8364, + "learning_rate": 1.1436736483911948e-06, + "loss": 1.0336, "step": 21747 }, { - "epoch": 0.6162826943240103, + "epoch": 0.8509273026058377, "grad_norm": 0.0, - "learning_rate": 6.7783277329256285e-06, - "loss": 0.8429, + "learning_rate": 1.1430852344626963e-06, + "loss": 0.8959, "step": 21748 }, { - "epoch": 0.6163110317662728, + "epoch": 0.8509664292980671, "grad_norm": 0.0, - "learning_rate": 6.777458891206649e-06, - "loss": 0.7762, + "learning_rate": 1.142496962765154e-06, + "loss": 0.9725, "step": 21749 }, { - "epoch": 0.6163393692085353, + "epoch": 0.8510055559902966, "grad_norm": 0.0, - "learning_rate": 6.776590076632334e-06, - "loss": 0.8622, + "learning_rate": 1.1419088333080074e-06, + "loss": 1.0051, "step": 21750 }, { - "epoch": 0.6163677066507977, + "epoch": 0.851044682682526, "grad_norm": 0.0, - "learning_rate": 6.775721289209994e-06, - "loss": 0.7729, + "learning_rate": 1.1413208461007063e-06, + "loss": 0.9824, "step": 21751 }, { - "epoch": 0.6163960440930601, + "epoch": 0.8510838093747555, "grad_norm": 0.0, - "learning_rate": 6.774852528946951e-06, - "loss": 0.7718, + "learning_rate": 1.1407330011526907e-06, + "loss": 1.0102, "step": 21752 }, { - "epoch": 0.6164243815353226, + "epoch": 0.8511229360669849, "grad_norm": 0.0, - "learning_rate": 6.773983795850523e-06, - "loss": 0.8417, + "learning_rate": 1.1401452984734051e-06, + "loss": 0.9937, "step": 21753 }, { - "epoch": 0.6164527189775851, + "epoch": 0.8511620627592144, "grad_norm": 0.0, - "learning_rate": 6.7731150899280275e-06, - "loss": 0.8853, + "learning_rate": 1.139557738072281e-06, + "loss": 0.8508, "step": 21754 }, { - "epoch": 0.6164810564198475, + "epoch": 0.8512011894514437, "grad_norm": 0.0, - "learning_rate": 6.772246411186784e-06, - "loss": 0.7951, + "learning_rate": 1.1389703199587577e-06, + "loss": 1.0302, "step": 21755 }, { - "epoch": 0.61650939386211, + "epoch": 0.8512403161436732, "grad_norm": 0.0, - "learning_rate": 6.771377759634105e-06, - "loss": 0.7812, + "learning_rate": 1.1383830441422671e-06, + "loss": 0.9916, "step": 21756 }, { - "epoch": 0.6165377313043725, + "epoch": 0.8512794428359026, "grad_norm": 0.0, - "learning_rate": 6.770509135277315e-06, - "loss": 0.9404, + "learning_rate": 1.1377959106322423e-06, + "loss": 0.914, "step": 21757 }, { - "epoch": 0.6165660687466349, + "epoch": 0.8513185695281321, "grad_norm": 0.0, - "learning_rate": 6.769640538123721e-06, - "loss": 0.9042, + "learning_rate": 1.1372089194381087e-06, + "loss": 1.0332, "step": 21758 }, { - "epoch": 0.6165944061888974, + "epoch": 0.8513576962203615, "grad_norm": 0.0, - "learning_rate": 6.768771968180643e-06, - "loss": 0.8251, + "learning_rate": 1.1366220705692964e-06, + "loss": 0.8952, "step": 21759 }, { - "epoch": 0.6166227436311599, + "epoch": 0.851396822912591, "grad_norm": 0.0, - "learning_rate": 6.767903425455402e-06, - "loss": 0.8502, + "learning_rate": 1.1360353640352228e-06, + "loss": 0.9185, "step": 21760 }, { - "epoch": 0.6166510810734223, + "epoch": 0.8514359496048204, "grad_norm": 0.0, - "learning_rate": 6.7670349099553075e-06, - "loss": 0.7558, + "learning_rate": 1.1354487998453178e-06, + "loss": 0.9831, "step": 21761 }, { - "epoch": 0.6166794185156848, + "epoch": 0.8514750762970499, "grad_norm": 0.0, - "learning_rate": 6.766166421687679e-06, - "loss": 0.8524, + "learning_rate": 1.1348623780089962e-06, + "loss": 0.9396, "step": 21762 }, { - "epoch": 0.6167077559579472, + "epoch": 0.8515142029892793, "grad_norm": 0.0, - "learning_rate": 6.765297960659836e-06, - "loss": 0.7726, + "learning_rate": 1.1342760985356772e-06, + "loss": 0.9346, "step": 21763 }, { - "epoch": 0.6167360934002097, + "epoch": 0.8515533296815088, "grad_norm": 0.0, - "learning_rate": 6.764429526879086e-06, - "loss": 0.8788, + "learning_rate": 1.1336899614347707e-06, + "loss": 0.9463, "step": 21764 }, { - "epoch": 0.6167644308424721, + "epoch": 0.8515924563737381, "grad_norm": 0.0, - "learning_rate": 6.763561120352748e-06, - "loss": 0.7434, + "learning_rate": 1.1331039667156973e-06, + "loss": 0.8795, "step": 21765 }, { - "epoch": 0.6167927682847346, + "epoch": 0.8516315830659676, "grad_norm": 0.0, - "learning_rate": 6.762692741088136e-06, - "loss": 0.7928, + "learning_rate": 1.1325181143878617e-06, + "loss": 0.851, "step": 21766 }, { - "epoch": 0.6168211057269971, + "epoch": 0.851670709758197, "grad_norm": 0.0, - "learning_rate": 6.761824389092564e-06, - "loss": 0.8654, + "learning_rate": 1.1319324044606762e-06, + "loss": 0.8957, "step": 21767 }, { - "epoch": 0.6168494431692595, + "epoch": 0.8517098364504265, "grad_norm": 0.0, - "learning_rate": 6.760956064373352e-06, - "loss": 0.8548, + "learning_rate": 1.131346836943541e-06, + "loss": 0.964, "step": 21768 }, { - "epoch": 0.616877780611522, + "epoch": 0.8517489631426559, "grad_norm": 0.0, - "learning_rate": 6.760087766937806e-06, - "loss": 0.8395, + "learning_rate": 1.1307614118458631e-06, + "loss": 0.9372, "step": 21769 }, { - "epoch": 0.6169061180537845, + "epoch": 0.8517880898348854, "grad_norm": 0.0, - "learning_rate": 6.759219496793245e-06, - "loss": 0.7989, + "learning_rate": 1.130176129177043e-06, + "loss": 1.0019, "step": 21770 }, { - "epoch": 0.616934455496047, + "epoch": 0.8518272165271148, "grad_norm": 0.0, - "learning_rate": 6.758351253946984e-06, - "loss": 0.8536, + "learning_rate": 1.1295909889464817e-06, + "loss": 0.9031, "step": 21771 }, { - "epoch": 0.6169627929383094, + "epoch": 0.8518663432193443, "grad_norm": 0.0, - "learning_rate": 6.757483038406331e-06, - "loss": 0.9484, + "learning_rate": 1.129005991163572e-06, + "loss": 0.9753, "step": 21772 }, { - "epoch": 0.6169911303805719, + "epoch": 0.8519054699115737, "grad_norm": 0.0, - "learning_rate": 6.756614850178603e-06, - "loss": 0.8868, + "learning_rate": 1.1284211358377094e-06, + "loss": 1.0884, "step": 21773 }, { - "epoch": 0.6170194678228343, + "epoch": 0.8519445966038031, "grad_norm": 0.0, - "learning_rate": 6.755746689271112e-06, - "loss": 0.8163, + "learning_rate": 1.1278364229782869e-06, + "loss": 0.9478, "step": 21774 }, { - "epoch": 0.6170478052650967, + "epoch": 0.8519837232960326, "grad_norm": 0.0, - "learning_rate": 6.754878555691171e-06, - "loss": 0.872, + "learning_rate": 1.1272518525946964e-06, + "loss": 0.9091, "step": 21775 }, { - "epoch": 0.6170761427073592, + "epoch": 0.8520228499882619, "grad_norm": 0.0, - "learning_rate": 6.754010449446098e-06, - "loss": 0.8406, + "learning_rate": 1.1266674246963216e-06, + "loss": 0.8909, "step": 21776 }, { - "epoch": 0.6171044801496217, + "epoch": 0.8520619766804914, "grad_norm": 0.0, - "learning_rate": 6.7531423705431945e-06, - "loss": 0.9298, + "learning_rate": 1.1260831392925498e-06, + "loss": 0.95, "step": 21777 }, { - "epoch": 0.6171328175918842, + "epoch": 0.8521011033727208, "grad_norm": 0.0, - "learning_rate": 6.752274318989779e-06, - "loss": 0.7172, + "learning_rate": 1.1254989963927599e-06, + "loss": 0.942, "step": 21778 }, { - "epoch": 0.6171611550341466, + "epoch": 0.8521402300649503, "grad_norm": 0.0, - "learning_rate": 6.7514062947931655e-06, - "loss": 0.9642, + "learning_rate": 1.1249149960063388e-06, + "loss": 0.9366, "step": 21779 }, { - "epoch": 0.6171894924764091, + "epoch": 0.8521793567571797, "grad_norm": 0.0, - "learning_rate": 6.75053829796066e-06, - "loss": 0.8731, + "learning_rate": 1.1243311381426614e-06, + "loss": 0.9575, "step": 21780 }, { - "epoch": 0.6172178299186716, + "epoch": 0.8522184834494092, "grad_norm": 0.0, - "learning_rate": 6.7496703284995824e-06, - "loss": 0.8431, + "learning_rate": 1.1237474228111046e-06, + "loss": 1.0227, "step": 21781 }, { - "epoch": 0.617246167360934, + "epoch": 0.8522576101416386, "grad_norm": 0.0, - "learning_rate": 6.7488023864172345e-06, - "loss": 0.7916, + "learning_rate": 1.1231638500210408e-06, + "loss": 0.9403, "step": 21782 }, { - "epoch": 0.6172745048031965, + "epoch": 0.8522967368338681, "grad_norm": 0.0, - "learning_rate": 6.7479344717209305e-06, - "loss": 0.8325, + "learning_rate": 1.1225804197818413e-06, + "loss": 1.102, "step": 21783 }, { - "epoch": 0.617302842245459, + "epoch": 0.8523358635260975, "grad_norm": 0.0, - "learning_rate": 6.747066584417987e-06, - "loss": 0.8492, + "learning_rate": 1.1219971321028766e-06, + "loss": 1.0047, "step": 21784 }, { - "epoch": 0.6173311796877213, + "epoch": 0.852374990218327, "grad_norm": 0.0, - "learning_rate": 6.746198724515705e-06, - "loss": 0.8571, + "learning_rate": 1.1214139869935147e-06, + "loss": 1.0143, "step": 21785 }, { - "epoch": 0.6173595171299838, + "epoch": 0.8524141169105564, "grad_norm": 0.0, - "learning_rate": 6.745330892021402e-06, - "loss": 0.9452, + "learning_rate": 1.120830984463117e-06, + "loss": 0.997, "step": 21786 }, { - "epoch": 0.6173878545722463, + "epoch": 0.8524532436027858, "grad_norm": 0.0, - "learning_rate": 6.744463086942383e-06, - "loss": 0.8674, + "learning_rate": 1.1202481245210485e-06, + "loss": 0.9682, "step": 21787 }, { - "epoch": 0.6174161920145088, + "epoch": 0.8524923702950152, "grad_norm": 0.0, - "learning_rate": 6.74359530928596e-06, - "loss": 0.9906, + "learning_rate": 1.1196654071766689e-06, + "loss": 0.8909, "step": 21788 }, { - "epoch": 0.6174445294567712, + "epoch": 0.8525314969872447, "grad_norm": 0.0, - "learning_rate": 6.742727559059448e-06, - "loss": 0.8524, + "learning_rate": 1.1190828324393333e-06, + "loss": 0.9376, "step": 21789 }, { - "epoch": 0.6174728668990337, + "epoch": 0.8525706236794741, "grad_norm": 0.0, - "learning_rate": 6.741859836270146e-06, - "loss": 0.8357, + "learning_rate": 1.1185004003183996e-06, + "loss": 0.9384, "step": 21790 }, { - "epoch": 0.6175012043412962, + "epoch": 0.8526097503717036, "grad_norm": 0.0, - "learning_rate": 6.7409921409253685e-06, - "loss": 0.8522, + "learning_rate": 1.1179181108232196e-06, + "loss": 0.959, "step": 21791 }, { - "epoch": 0.6175295417835586, + "epoch": 0.852648877063933, "grad_norm": 0.0, - "learning_rate": 6.740124473032428e-06, - "loss": 0.9457, + "learning_rate": 1.1173359639631476e-06, + "loss": 0.9164, "step": 21792 }, { - "epoch": 0.6175578792258211, + "epoch": 0.8526880037561625, "grad_norm": 0.0, - "learning_rate": 6.739256832598626e-06, - "loss": 0.8098, + "learning_rate": 1.1167539597475273e-06, + "loss": 0.8835, "step": 21793 }, { - "epoch": 0.6175862166680836, + "epoch": 0.8527271304483919, "grad_norm": 0.0, - "learning_rate": 6.73838921963128e-06, - "loss": 0.7956, + "learning_rate": 1.1161720981857082e-06, + "loss": 0.8234, "step": 21794 }, { - "epoch": 0.617614554110346, + "epoch": 0.8527662571406214, "grad_norm": 0.0, - "learning_rate": 6.737521634137687e-06, - "loss": 0.9779, + "learning_rate": 1.1155903792870303e-06, + "loss": 0.8661, "step": 21795 }, { - "epoch": 0.6176428915526084, + "epoch": 0.8528053838328508, "grad_norm": 0.0, - "learning_rate": 6.736654076125162e-06, - "loss": 0.791, + "learning_rate": 1.115008803060842e-06, + "loss": 0.9655, "step": 21796 }, { - "epoch": 0.6176712289948709, + "epoch": 0.8528445105250803, "grad_norm": 0.0, - "learning_rate": 6.735786545601015e-06, - "loss": 0.8185, + "learning_rate": 1.1144273695164764e-06, + "loss": 0.9263, "step": 21797 }, { - "epoch": 0.6176995664371334, + "epoch": 0.8528836372173096, "grad_norm": 0.0, - "learning_rate": 6.734919042572548e-06, - "loss": 0.8718, + "learning_rate": 1.1138460786632743e-06, + "loss": 0.9005, "step": 21798 }, { - "epoch": 0.6177279038793958, + "epoch": 0.8529227639095391, "grad_norm": 0.0, - "learning_rate": 6.734051567047068e-06, - "loss": 0.9011, + "learning_rate": 1.113264930510568e-06, + "loss": 1.0291, "step": 21799 }, { - "epoch": 0.6177562413216583, + "epoch": 0.8529618906017685, "grad_norm": 0.0, - "learning_rate": 6.7331841190318856e-06, - "loss": 0.7667, + "learning_rate": 1.1126839250676913e-06, + "loss": 0.9613, "step": 21800 }, { - "epoch": 0.6177845787639208, + "epoch": 0.853001017293998, "grad_norm": 0.0, - "learning_rate": 6.732316698534307e-06, - "loss": 0.8333, + "learning_rate": 1.1121030623439744e-06, + "loss": 0.9264, "step": 21801 }, { - "epoch": 0.6178129162061833, + "epoch": 0.8530401439862274, "grad_norm": 0.0, - "learning_rate": 6.731449305561641e-06, - "loss": 0.7618, + "learning_rate": 1.1115223423487463e-06, + "loss": 1.0495, "step": 21802 }, { - "epoch": 0.6178412536484457, + "epoch": 0.8530792706784568, "grad_norm": 0.0, - "learning_rate": 6.730581940121188e-06, - "loss": 0.8626, + "learning_rate": 1.11094176509133e-06, + "loss": 0.9063, "step": 21803 }, { - "epoch": 0.6178695910907082, + "epoch": 0.8531183973706863, "grad_norm": 0.0, - "learning_rate": 6.729714602220256e-06, - "loss": 0.8688, + "learning_rate": 1.1103613305810512e-06, + "loss": 0.9938, "step": 21804 }, { - "epoch": 0.6178979285329707, + "epoch": 0.8531575240629157, "grad_norm": 0.0, - "learning_rate": 6.728847291866156e-06, - "loss": 0.8596, + "learning_rate": 1.109781038827229e-06, + "loss": 0.9516, "step": 21805 }, { - "epoch": 0.617926265975233, + "epoch": 0.8531966507551452, "grad_norm": 0.0, - "learning_rate": 6.727980009066186e-06, - "loss": 0.784, + "learning_rate": 1.1092008898391859e-06, + "loss": 1.1035, "step": 21806 }, { - "epoch": 0.6179546034174955, + "epoch": 0.8532357774473746, "grad_norm": 0.0, - "learning_rate": 6.727112753827658e-06, - "loss": 0.8125, + "learning_rate": 1.1086208836262336e-06, + "loss": 0.9639, "step": 21807 }, { - "epoch": 0.617982940859758, + "epoch": 0.853274904139604, "grad_norm": 0.0, - "learning_rate": 6.726245526157877e-06, - "loss": 0.9078, + "learning_rate": 1.1080410201976898e-06, + "loss": 0.895, "step": 21808 }, { - "epoch": 0.6180112783020204, + "epoch": 0.8533140308318334, "grad_norm": 0.0, - "learning_rate": 6.725378326064141e-06, - "loss": 0.869, + "learning_rate": 1.107461299562862e-06, + "loss": 0.897, "step": 21809 }, { - "epoch": 0.6180396157442829, + "epoch": 0.8533531575240629, "grad_norm": 0.0, - "learning_rate": 6.7245111535537654e-06, - "loss": 0.7299, + "learning_rate": 1.1068817217310657e-06, + "loss": 0.9554, "step": 21810 }, { - "epoch": 0.6180679531865454, + "epoch": 0.8533922842162923, "grad_norm": 0.0, - "learning_rate": 6.723644008634043e-06, - "loss": 0.8289, + "learning_rate": 1.1063022867116035e-06, + "loss": 1.0039, "step": 21811 }, { - "epoch": 0.6180962906288079, + "epoch": 0.8534314109085218, "grad_norm": 0.0, - "learning_rate": 6.722776891312284e-06, - "loss": 0.7378, + "learning_rate": 1.1057229945137848e-06, + "loss": 0.9159, "step": 21812 }, { - "epoch": 0.6181246280710703, + "epoch": 0.8534705376007512, "grad_norm": 0.0, - "learning_rate": 6.721909801595794e-06, - "loss": 0.8479, + "learning_rate": 1.1051438451469055e-06, + "loss": 0.7787, "step": 21813 }, { - "epoch": 0.6181529655133328, + "epoch": 0.8535096642929807, "grad_norm": 0.0, - "learning_rate": 6.721042739491874e-06, - "loss": 0.8678, + "learning_rate": 1.1045648386202735e-06, + "loss": 0.9435, "step": 21814 }, { - "epoch": 0.6181813029555953, + "epoch": 0.8535487909852101, "grad_norm": 0.0, - "learning_rate": 6.720175705007832e-06, - "loss": 0.9035, + "learning_rate": 1.1039859749431814e-06, + "loss": 0.9025, "step": 21815 }, { - "epoch": 0.6182096403978576, + "epoch": 0.8535879176774396, "grad_norm": 0.0, - "learning_rate": 6.7193086981509635e-06, - "loss": 0.8481, + "learning_rate": 1.1034072541249297e-06, + "loss": 1.0587, "step": 21816 }, { - "epoch": 0.6182379778401201, + "epoch": 0.853627044369669, "grad_norm": 0.0, - "learning_rate": 6.718441718928577e-06, - "loss": 0.9373, + "learning_rate": 1.1028286761748076e-06, + "loss": 0.9459, "step": 21817 }, { - "epoch": 0.6182663152823826, + "epoch": 0.8536661710618985, "grad_norm": 0.0, - "learning_rate": 6.717574767347977e-06, - "loss": 0.8214, + "learning_rate": 1.1022502411021086e-06, + "loss": 0.9754, "step": 21818 }, { - "epoch": 0.6182946527246451, + "epoch": 0.8537052977541278, "grad_norm": 0.0, - "learning_rate": 6.71670784341646e-06, - "loss": 0.8687, + "learning_rate": 1.101671948916121e-06, + "loss": 0.9797, "step": 21819 }, { - "epoch": 0.6183229901669075, + "epoch": 0.8537444244463573, "grad_norm": 0.0, - "learning_rate": 6.715840947141332e-06, - "loss": 0.9056, + "learning_rate": 1.1010937996261329e-06, + "loss": 0.8976, "step": 21820 }, { - "epoch": 0.61835132760917, + "epoch": 0.8537835511385867, "grad_norm": 0.0, - "learning_rate": 6.714974078529901e-06, - "loss": 0.9038, + "learning_rate": 1.1005157932414257e-06, + "loss": 0.9951, "step": 21821 }, { - "epoch": 0.6183796650514325, + "epoch": 0.8538226778308162, "grad_norm": 0.0, - "learning_rate": 6.7141072375894575e-06, - "loss": 0.8541, + "learning_rate": 1.099937929771283e-06, + "loss": 0.9385, "step": 21822 }, { - "epoch": 0.6184080024936949, + "epoch": 0.8538618045230456, "grad_norm": 0.0, - "learning_rate": 6.713240424327314e-06, - "loss": 0.8835, + "learning_rate": 1.0993602092249855e-06, + "loss": 0.8996, "step": 21823 }, { - "epoch": 0.6184363399359574, + "epoch": 0.8539009312152751, "grad_norm": 0.0, - "learning_rate": 6.712373638750762e-06, - "loss": 0.8074, + "learning_rate": 1.0987826316118123e-06, + "loss": 0.9731, "step": 21824 }, { - "epoch": 0.6184646773782199, + "epoch": 0.8539400579075045, "grad_norm": 0.0, - "learning_rate": 6.711506880867109e-06, - "loss": 0.8269, + "learning_rate": 1.0982051969410334e-06, + "loss": 0.9608, "step": 21825 }, { - "epoch": 0.6184930148204824, + "epoch": 0.853979184599734, "grad_norm": 0.0, - "learning_rate": 6.710640150683656e-06, - "loss": 0.8765, + "learning_rate": 1.0976279052219262e-06, + "loss": 0.9501, "step": 21826 }, { - "epoch": 0.6185213522627447, + "epoch": 0.8540183112919634, "grad_norm": 0.0, - "learning_rate": 6.7097734482077e-06, - "loss": 0.9168, + "learning_rate": 1.0970507564637579e-06, + "loss": 0.8977, "step": 21827 }, { - "epoch": 0.6185496897050072, + "epoch": 0.8540574379841929, "grad_norm": 0.0, - "learning_rate": 6.708906773446544e-06, - "loss": 0.9141, + "learning_rate": 1.0964737506757983e-06, + "loss": 0.9429, "step": 21828 }, { - "epoch": 0.6185780271472697, + "epoch": 0.8540965646764223, "grad_norm": 0.0, - "learning_rate": 6.708040126407493e-06, - "loss": 0.7822, + "learning_rate": 1.0958968878673137e-06, + "loss": 1.0587, "step": 21829 }, { - "epoch": 0.6186063645895321, + "epoch": 0.8541356913686516, "grad_norm": 0.0, - "learning_rate": 6.7071735070978396e-06, - "loss": 0.7991, + "learning_rate": 1.095320168047569e-06, + "loss": 0.865, "step": 21830 }, { - "epoch": 0.6186347020317946, + "epoch": 0.8541748180608811, "grad_norm": 0.0, - "learning_rate": 6.706306915524887e-06, - "loss": 0.8788, + "learning_rate": 1.0947435912258231e-06, + "loss": 0.8919, "step": 21831 }, { - "epoch": 0.6186630394740571, + "epoch": 0.8542139447531105, "grad_norm": 0.0, - "learning_rate": 6.705440351695932e-06, - "loss": 0.888, + "learning_rate": 1.0941671574113355e-06, + "loss": 0.9851, "step": 21832 }, { - "epoch": 0.6186913769163195, + "epoch": 0.85425307144534, "grad_norm": 0.0, - "learning_rate": 6.704573815618277e-06, - "loss": 0.8702, + "learning_rate": 1.0935908666133644e-06, + "loss": 1.0501, "step": 21833 }, { - "epoch": 0.618719714358582, + "epoch": 0.8542921981375694, "grad_norm": 0.0, - "learning_rate": 6.703707307299224e-06, - "loss": 0.7954, + "learning_rate": 1.0930147188411655e-06, + "loss": 0.919, "step": 21834 }, { - "epoch": 0.6187480518008445, + "epoch": 0.8543313248297989, "grad_norm": 0.0, - "learning_rate": 6.702840826746065e-06, - "loss": 0.8258, + "learning_rate": 1.092438714103986e-06, + "loss": 0.9574, "step": 21835 }, { - "epoch": 0.618776389243107, + "epoch": 0.8543704515220283, "grad_norm": 0.0, - "learning_rate": 6.7019743739661025e-06, - "loss": 0.8583, + "learning_rate": 1.0918628524110808e-06, + "loss": 1.0066, "step": 21836 }, { - "epoch": 0.6188047266853693, + "epoch": 0.8544095782142578, "grad_norm": 0.0, - "learning_rate": 6.701107948966635e-06, - "loss": 0.8464, + "learning_rate": 1.0912871337716968e-06, + "loss": 1.0262, "step": 21837 }, { - "epoch": 0.6188330641276318, + "epoch": 0.8544487049064872, "grad_norm": 0.0, - "learning_rate": 6.70024155175496e-06, - "loss": 0.8337, + "learning_rate": 1.0907115581950755e-06, + "loss": 0.9324, "step": 21838 }, { - "epoch": 0.6188614015698943, + "epoch": 0.8544878315987167, "grad_norm": 0.0, - "learning_rate": 6.699375182338379e-06, - "loss": 0.7678, + "learning_rate": 1.090136125690463e-06, + "loss": 1.0026, "step": 21839 }, { - "epoch": 0.6188897390121567, + "epoch": 0.854526958290946, "grad_norm": 0.0, - "learning_rate": 6.698508840724182e-06, - "loss": 0.9094, + "learning_rate": 1.0895608362671005e-06, + "loss": 0.9894, "step": 21840 }, { - "epoch": 0.6189180764544192, + "epoch": 0.8545660849831755, "grad_norm": 0.0, - "learning_rate": 6.697642526919671e-06, - "loss": 1.0568, + "learning_rate": 1.0889856899342267e-06, + "loss": 0.9764, "step": 21841 }, { - "epoch": 0.6189464138966817, + "epoch": 0.8546052116754049, "grad_norm": 0.0, - "learning_rate": 6.696776240932148e-06, - "loss": 0.9548, + "learning_rate": 1.0884106867010746e-06, + "loss": 0.9979, "step": 21842 }, { - "epoch": 0.6189747513389442, + "epoch": 0.8546443383676344, "grad_norm": 0.0, - "learning_rate": 6.6959099827689e-06, - "loss": 0.8733, + "learning_rate": 1.087835826576883e-06, + "loss": 1.0507, "step": 21843 }, { - "epoch": 0.6190030887812066, + "epoch": 0.8546834650598638, "grad_norm": 0.0, - "learning_rate": 6.695043752437234e-06, - "loss": 0.8359, + "learning_rate": 1.0872611095708773e-06, + "loss": 0.9501, "step": 21844 }, { - "epoch": 0.6190314262234691, + "epoch": 0.8547225917520933, "grad_norm": 0.0, - "learning_rate": 6.694177549944436e-06, - "loss": 0.7554, + "learning_rate": 1.0866865356922907e-06, + "loss": 1.0187, "step": 21845 }, { - "epoch": 0.6190597636657316, + "epoch": 0.8547617184443227, "grad_norm": 0.0, - "learning_rate": 6.693311375297811e-06, - "loss": 0.8691, + "learning_rate": 1.0861121049503487e-06, + "loss": 0.8914, "step": 21846 }, { - "epoch": 0.619088101107994, + "epoch": 0.8548008451365522, "grad_norm": 0.0, - "learning_rate": 6.692445228504656e-06, - "loss": 0.7212, + "learning_rate": 1.0855378173542786e-06, + "loss": 0.8592, "step": 21847 }, { - "epoch": 0.6191164385502564, + "epoch": 0.8548399718287816, "grad_norm": 0.0, - "learning_rate": 6.691579109572257e-06, - "loss": 0.7998, + "learning_rate": 1.0849636729132994e-06, + "loss": 0.8931, "step": 21848 }, { - "epoch": 0.6191447759925189, + "epoch": 0.8548790985210111, "grad_norm": 0.0, - "learning_rate": 6.690713018507917e-06, - "loss": 0.9198, + "learning_rate": 1.084389671636632e-06, + "loss": 1.0017, "step": 21849 }, { - "epoch": 0.6191731134347814, + "epoch": 0.8549182252132405, "grad_norm": 0.0, - "learning_rate": 6.6898469553189325e-06, - "loss": 0.6979, + "learning_rate": 1.0838158135334942e-06, + "loss": 0.8854, "step": 21850 }, { - "epoch": 0.6192014508770438, + "epoch": 0.85495735190547, "grad_norm": 0.0, - "learning_rate": 6.688980920012593e-06, - "loss": 0.8806, + "learning_rate": 1.0832420986131044e-06, + "loss": 0.8887, "step": 21851 }, { - "epoch": 0.6192297883193063, + "epoch": 0.8549964785976993, "grad_norm": 0.0, - "learning_rate": 6.688114912596202e-06, - "loss": 0.934, + "learning_rate": 1.0826685268846704e-06, + "loss": 1.04, "step": 21852 }, { - "epoch": 0.6192581257615688, + "epoch": 0.8550356052899288, "grad_norm": 0.0, - "learning_rate": 6.687248933077045e-06, - "loss": 0.9032, + "learning_rate": 1.0820950983574064e-06, + "loss": 0.9308, "step": 21853 }, { - "epoch": 0.6192864632038312, + "epoch": 0.8550747319821582, "grad_norm": 0.0, - "learning_rate": 6.686382981462421e-06, - "loss": 0.89, + "learning_rate": 1.0815218130405203e-06, + "loss": 0.8708, "step": 21854 }, { - "epoch": 0.6193148006460937, + "epoch": 0.8551138586743877, "grad_norm": 0.0, - "learning_rate": 6.685517057759625e-06, - "loss": 0.8848, + "learning_rate": 1.0809486709432204e-06, + "loss": 1.0982, "step": 21855 }, { - "epoch": 0.6193431380883562, + "epoch": 0.8551529853666171, "grad_norm": 0.0, - "learning_rate": 6.684651161975948e-06, - "loss": 0.8676, + "learning_rate": 1.0803756720747072e-06, + "loss": 0.8783, "step": 21856 }, { - "epoch": 0.6193714755306186, + "epoch": 0.8551921120588466, "grad_norm": 0.0, - "learning_rate": 6.683785294118684e-06, - "loss": 0.8298, + "learning_rate": 1.0798028164441854e-06, + "loss": 0.9764, "step": 21857 }, { - "epoch": 0.619399812972881, + "epoch": 0.855231238751076, "grad_norm": 0.0, - "learning_rate": 6.6829194541951315e-06, - "loss": 0.7756, + "learning_rate": 1.0792301040608489e-06, + "loss": 1.0112, "step": 21858 }, { - "epoch": 0.6194281504151435, + "epoch": 0.8552703654433054, "grad_norm": 0.0, - "learning_rate": 6.682053642212576e-06, - "loss": 0.8694, + "learning_rate": 1.0786575349339013e-06, + "loss": 1.067, "step": 21859 }, { - "epoch": 0.619456487857406, + "epoch": 0.8553094921355349, "grad_norm": 0.0, - "learning_rate": 6.681187858178321e-06, - "loss": 0.8441, + "learning_rate": 1.0780851090725342e-06, + "loss": 1.0621, "step": 21860 }, { - "epoch": 0.6194848252996684, + "epoch": 0.8553486188277643, "grad_norm": 0.0, - "learning_rate": 6.680322102099648e-06, - "loss": 0.8742, + "learning_rate": 1.0775128264859413e-06, + "loss": 0.9859, "step": 21861 }, { - "epoch": 0.6195131627419309, + "epoch": 0.8553877455199937, "grad_norm": 0.0, - "learning_rate": 6.679456373983854e-06, - "loss": 0.92, + "learning_rate": 1.0769406871833088e-06, + "loss": 0.9441, "step": 21862 }, { - "epoch": 0.6195415001841934, + "epoch": 0.8554268722122231, "grad_norm": 0.0, - "learning_rate": 6.678590673838234e-06, - "loss": 0.909, + "learning_rate": 1.0763686911738313e-06, + "loss": 0.8719, "step": 21863 }, { - "epoch": 0.6195698376264558, + "epoch": 0.8554659989044526, "grad_norm": 0.0, - "learning_rate": 6.677725001670078e-06, - "loss": 0.9313, + "learning_rate": 1.0757968384666894e-06, + "loss": 0.982, "step": 21864 }, { - "epoch": 0.6195981750687183, + "epoch": 0.855505125596682, "grad_norm": 0.0, - "learning_rate": 6.676859357486676e-06, - "loss": 0.8924, + "learning_rate": 1.075225129071068e-06, + "loss": 1.0718, "step": 21865 }, { - "epoch": 0.6196265125109808, + "epoch": 0.8555442522889115, "grad_norm": 0.0, - "learning_rate": 6.675993741295327e-06, - "loss": 0.9262, + "learning_rate": 1.0746535629961473e-06, + "loss": 0.8931, "step": 21866 }, { - "epoch": 0.6196548499532433, + "epoch": 0.8555833789811409, "grad_norm": 0.0, - "learning_rate": 6.6751281531033116e-06, - "loss": 0.7947, + "learning_rate": 1.0740821402511049e-06, + "loss": 0.9867, "step": 21867 }, { - "epoch": 0.6196831873955057, + "epoch": 0.8556225056733704, "grad_norm": 0.0, - "learning_rate": 6.674262592917933e-06, - "loss": 0.7587, + "learning_rate": 1.0735108608451195e-06, + "loss": 0.8602, "step": 21868 }, { - "epoch": 0.6197115248377681, + "epoch": 0.8556616323655998, "grad_norm": 0.0, - "learning_rate": 6.673397060746469e-06, - "loss": 0.9097, + "learning_rate": 1.0729397247873663e-06, + "loss": 1.0119, "step": 21869 }, { - "epoch": 0.6197398622800306, + "epoch": 0.8557007590578293, "grad_norm": 0.0, - "learning_rate": 6.672531556596218e-06, - "loss": 0.8405, + "learning_rate": 1.0723687320870125e-06, + "loss": 0.8961, "step": 21870 }, { - "epoch": 0.619768199722293, + "epoch": 0.8557398857500587, "grad_norm": 0.0, - "learning_rate": 6.671666080474471e-06, - "loss": 0.929, + "learning_rate": 1.0717978827532293e-06, + "loss": 0.9716, "step": 21871 }, { - "epoch": 0.6197965371645555, + "epoch": 0.8557790124422882, "grad_norm": 0.0, - "learning_rate": 6.670800632388514e-06, - "loss": 0.8633, + "learning_rate": 1.0712271767951853e-06, + "loss": 1.093, "step": 21872 }, { - "epoch": 0.619824874606818, + "epoch": 0.8558181391345175, "grad_norm": 0.0, - "learning_rate": 6.669935212345645e-06, - "loss": 0.8013, + "learning_rate": 1.0706566142220464e-06, + "loss": 0.8768, "step": 21873 }, { - "epoch": 0.6198532120490805, + "epoch": 0.855857265826747, "grad_norm": 0.0, - "learning_rate": 6.6690698203531446e-06, - "loss": 0.8435, + "learning_rate": 1.0700861950429708e-06, + "loss": 0.939, "step": 21874 }, { - "epoch": 0.6198815494913429, + "epoch": 0.8558963925189764, "grad_norm": 0.0, - "learning_rate": 6.668204456418304e-06, - "loss": 0.8921, + "learning_rate": 1.0695159192671234e-06, + "loss": 0.8906, "step": 21875 }, { - "epoch": 0.6199098869336054, + "epoch": 0.8559355192112059, "grad_norm": 0.0, - "learning_rate": 6.6673391205484175e-06, - "loss": 0.8635, + "learning_rate": 1.0689457869036579e-06, + "loss": 0.8797, "step": 21876 }, { - "epoch": 0.6199382243758679, + "epoch": 0.8559746459034353, "grad_norm": 0.0, - "learning_rate": 6.666473812750769e-06, - "loss": 0.9371, + "learning_rate": 1.0683757979617316e-06, + "loss": 0.9884, "step": 21877 }, { - "epoch": 0.6199665618181303, + "epoch": 0.8560137725956648, "grad_norm": 0.0, - "learning_rate": 6.66560853303265e-06, - "loss": 0.864, + "learning_rate": 1.067805952450498e-06, + "loss": 0.9774, "step": 21878 }, { - "epoch": 0.6199948992603928, + "epoch": 0.8560528992878942, "grad_norm": 0.0, - "learning_rate": 6.664743281401351e-06, - "loss": 0.8972, + "learning_rate": 1.06723625037911e-06, + "loss": 0.8683, "step": 21879 }, { - "epoch": 0.6200232367026552, + "epoch": 0.8560920259801237, "grad_norm": 0.0, - "learning_rate": 6.663878057864155e-06, - "loss": 0.8909, + "learning_rate": 1.0666666917567126e-06, + "loss": 0.9991, "step": 21880 }, { - "epoch": 0.6200515741449176, + "epoch": 0.8561311526723531, "grad_norm": 0.0, - "learning_rate": 6.663012862428357e-06, - "loss": 0.9342, + "learning_rate": 1.0660972765924537e-06, + "loss": 0.9039, "step": 21881 }, { - "epoch": 0.6200799115871801, + "epoch": 0.8561702793645826, "grad_norm": 0.0, - "learning_rate": 6.662147695101237e-06, - "loss": 0.8369, + "learning_rate": 1.0655280048954798e-06, + "loss": 1.0233, "step": 21882 }, { - "epoch": 0.6201082490294426, + "epoch": 0.856209406056812, "grad_norm": 0.0, - "learning_rate": 6.661282555890086e-06, - "loss": 0.8967, + "learning_rate": 1.0649588766749297e-06, + "loss": 1.0131, "step": 21883 }, { - "epoch": 0.6201365864717051, + "epoch": 0.8562485327490414, "grad_norm": 0.0, - "learning_rate": 6.660417444802194e-06, - "loss": 0.8528, + "learning_rate": 1.0643898919399431e-06, + "loss": 0.964, "step": 21884 }, { - "epoch": 0.6201649239139675, + "epoch": 0.8562876594412708, "grad_norm": 0.0, - "learning_rate": 6.659552361844844e-06, - "loss": 0.8251, + "learning_rate": 1.063821050699657e-06, + "loss": 0.9952, "step": 21885 }, { - "epoch": 0.62019326135623, + "epoch": 0.8563267861335003, "grad_norm": 0.0, - "learning_rate": 6.658687307025325e-06, - "loss": 0.9893, + "learning_rate": 1.0632523529632099e-06, + "loss": 1.0565, "step": 21886 }, { - "epoch": 0.6202215987984925, + "epoch": 0.8563659128257297, "grad_norm": 0.0, - "learning_rate": 6.657822280350927e-06, - "loss": 0.9036, + "learning_rate": 1.0626837987397299e-06, + "loss": 0.8896, "step": 21887 }, { - "epoch": 0.6202499362407549, + "epoch": 0.8564050395179591, "grad_norm": 0.0, - "learning_rate": 6.65695728182893e-06, - "loss": 0.961, + "learning_rate": 1.0621153880383506e-06, + "loss": 0.8721, "step": 21888 }, { - "epoch": 0.6202782736830174, + "epoch": 0.8564441662101886, "grad_norm": 0.0, - "learning_rate": 6.656092311466624e-06, - "loss": 0.8634, + "learning_rate": 1.061547120868195e-06, + "loss": 1.0494, "step": 21889 }, { - "epoch": 0.6203066111252798, + "epoch": 0.856483292902418, "grad_norm": 0.0, - "learning_rate": 6.6552273692712935e-06, - "loss": 0.8221, + "learning_rate": 1.0609789972383955e-06, + "loss": 1.003, "step": 21890 }, { - "epoch": 0.6203349485675423, + "epoch": 0.8565224195946475, "grad_norm": 0.0, - "learning_rate": 6.654362455250224e-06, - "loss": 0.8793, + "learning_rate": 1.0604110171580706e-06, + "loss": 0.9191, "step": 21891 }, { - "epoch": 0.6203632860098047, + "epoch": 0.8565615462868769, "grad_norm": 0.0, - "learning_rate": 6.653497569410706e-06, - "loss": 0.9656, + "learning_rate": 1.059843180636344e-06, + "loss": 0.8942, "step": 21892 }, { - "epoch": 0.6203916234520672, + "epoch": 0.8566006729791064, "grad_norm": 0.0, - "learning_rate": 6.652632711760017e-06, - "loss": 0.7755, + "learning_rate": 1.059275487682332e-06, + "loss": 0.9575, "step": 21893 }, { - "epoch": 0.6204199608943297, + "epoch": 0.8566397996713357, "grad_norm": 0.0, - "learning_rate": 6.651767882305447e-06, - "loss": 0.7862, + "learning_rate": 1.0587079383051524e-06, + "loss": 1.0388, "step": 21894 }, { - "epoch": 0.6204482983365921, + "epoch": 0.8566789263635652, "grad_norm": 0.0, - "learning_rate": 6.650903081054281e-06, - "loss": 0.81, + "learning_rate": 1.0581405325139194e-06, + "loss": 1.0637, "step": 21895 }, { - "epoch": 0.6204766357788546, + "epoch": 0.8567180530557946, "grad_norm": 0.0, - "learning_rate": 6.6500383080137985e-06, - "loss": 0.8678, + "learning_rate": 1.0575732703177454e-06, + "loss": 0.861, "step": 21896 }, { - "epoch": 0.6205049732211171, + "epoch": 0.8567571797480241, "grad_norm": 0.0, - "learning_rate": 6.64917356319129e-06, - "loss": 0.7718, + "learning_rate": 1.057006151725738e-06, + "loss": 0.9521, "step": 21897 }, { - "epoch": 0.6205333106633796, + "epoch": 0.8567963064402535, "grad_norm": 0.0, - "learning_rate": 6.648308846594035e-06, - "loss": 0.8405, + "learning_rate": 1.0564391767470062e-06, + "loss": 1.0473, "step": 21898 }, { - "epoch": 0.620561648105642, + "epoch": 0.856835433132483, "grad_norm": 0.0, - "learning_rate": 6.647444158229319e-06, - "loss": 0.8725, + "learning_rate": 1.0558723453906538e-06, + "loss": 1.0081, "step": 21899 }, { - "epoch": 0.6205899855479045, + "epoch": 0.8568745598247124, "grad_norm": 0.0, - "learning_rate": 6.64657949810443e-06, - "loss": 0.8677, + "learning_rate": 1.055305657665786e-06, + "loss": 0.8388, "step": 21900 }, { - "epoch": 0.620618322990167, + "epoch": 0.8569136865169419, "grad_norm": 0.0, - "learning_rate": 6.645714866226642e-06, - "loss": 0.9006, + "learning_rate": 1.0547391135814989e-06, + "loss": 0.9166, "step": 21901 }, { - "epoch": 0.6206466604324293, + "epoch": 0.8569528132091713, "grad_norm": 0.0, - "learning_rate": 6.644850262603247e-06, - "loss": 0.7658, + "learning_rate": 1.0541727131468937e-06, + "loss": 1.0122, "step": 21902 }, { - "epoch": 0.6206749978746918, + "epoch": 0.8569919399014008, "grad_norm": 0.0, - "learning_rate": 6.643985687241521e-06, - "loss": 0.8839, + "learning_rate": 1.0536064563710623e-06, + "loss": 0.9895, "step": 21903 }, { - "epoch": 0.6207033353169543, + "epoch": 0.8570310665936302, "grad_norm": 0.0, - "learning_rate": 6.643121140148749e-06, - "loss": 0.8336, + "learning_rate": 1.0530403432631041e-06, + "loss": 0.9507, "step": 21904 }, { - "epoch": 0.6207316727592167, + "epoch": 0.8570701932858596, "grad_norm": 0.0, - "learning_rate": 6.642256621332219e-06, - "loss": 0.734, + "learning_rate": 1.0524743738321052e-06, + "loss": 0.9034, "step": 21905 }, { - "epoch": 0.6207600102014792, + "epoch": 0.857109319978089, "grad_norm": 0.0, - "learning_rate": 6.641392130799205e-06, - "loss": 0.7512, + "learning_rate": 1.0519085480871583e-06, + "loss": 0.9249, "step": 21906 }, { - "epoch": 0.6207883476437417, + "epoch": 0.8571484466703185, "grad_norm": 0.0, - "learning_rate": 6.640527668556993e-06, - "loss": 0.6954, + "learning_rate": 1.0513428660373426e-06, + "loss": 1.0124, "step": 21907 }, { - "epoch": 0.6208166850860042, + "epoch": 0.8571875733625479, "grad_norm": 0.0, - "learning_rate": 6.639663234612865e-06, - "loss": 1.0048, + "learning_rate": 1.050777327691751e-06, + "loss": 1.0209, "step": 21908 }, { - "epoch": 0.6208450225282666, + "epoch": 0.8572267000547774, "grad_norm": 0.0, - "learning_rate": 6.6387988289741e-06, - "loss": 0.8153, + "learning_rate": 1.0502119330594608e-06, + "loss": 0.8888, "step": 21909 }, { - "epoch": 0.6208733599705291, + "epoch": 0.8572658267470068, "grad_norm": 0.0, - "learning_rate": 6.637934451647983e-06, - "loss": 0.8421, + "learning_rate": 1.0496466821495532e-06, + "loss": 0.9318, "step": 21910 }, { - "epoch": 0.6209016974127916, + "epoch": 0.8573049534392363, "grad_norm": 0.0, - "learning_rate": 6.637070102641788e-06, - "loss": 0.8313, + "learning_rate": 1.0490815749711014e-06, + "loss": 1.0639, "step": 21911 }, { - "epoch": 0.6209300348550539, + "epoch": 0.8573440801314657, "grad_norm": 0.0, - "learning_rate": 6.636205781962803e-06, - "loss": 0.8474, + "learning_rate": 1.048516611533187e-06, + "loss": 0.9831, "step": 21912 }, { - "epoch": 0.6209583722973164, + "epoch": 0.8573832068236952, "grad_norm": 0.0, - "learning_rate": 6.635341489618308e-06, - "loss": 0.8788, + "learning_rate": 1.0479517918448767e-06, + "loss": 0.9252, "step": 21913 }, { - "epoch": 0.6209867097395789, + "epoch": 0.8574223335159246, "grad_norm": 0.0, - "learning_rate": 6.6344772256155766e-06, - "loss": 0.8979, + "learning_rate": 1.0473871159152448e-06, + "loss": 0.9902, "step": 21914 }, { - "epoch": 0.6210150471818414, + "epoch": 0.857461460208154, "grad_norm": 0.0, - "learning_rate": 6.633612989961895e-06, - "loss": 0.961, + "learning_rate": 1.0468225837533563e-06, + "loss": 1.0698, "step": 21915 }, { - "epoch": 0.6210433846241038, + "epoch": 0.8575005869003834, "grad_norm": 0.0, - "learning_rate": 6.632748782664542e-06, - "loss": 0.7932, + "learning_rate": 1.0462581953682771e-06, + "loss": 0.9214, "step": 21916 }, { - "epoch": 0.6210717220663663, + "epoch": 0.8575397135926128, "grad_norm": 0.0, - "learning_rate": 6.631884603730796e-06, - "loss": 0.876, + "learning_rate": 1.0456939507690721e-06, + "loss": 0.979, "step": 21917 }, { - "epoch": 0.6211000595086288, + "epoch": 0.8575788402848423, "grad_norm": 0.0, - "learning_rate": 6.631020453167939e-06, - "loss": 0.9641, + "learning_rate": 1.0451298499648043e-06, + "loss": 0.9718, "step": 21918 }, { - "epoch": 0.6211283969508912, + "epoch": 0.8576179669770717, "grad_norm": 0.0, - "learning_rate": 6.630156330983244e-06, - "loss": 0.8362, + "learning_rate": 1.0445658929645275e-06, + "loss": 0.9328, "step": 21919 }, { - "epoch": 0.6211567343931537, + "epoch": 0.8576570936693012, "grad_norm": 0.0, - "learning_rate": 6.629292237183995e-06, - "loss": 0.8698, + "learning_rate": 1.0440020797773009e-06, + "loss": 0.815, "step": 21920 }, { - "epoch": 0.6211850718354162, + "epoch": 0.8576962203615306, "grad_norm": 0.0, - "learning_rate": 6.628428171777473e-06, - "loss": 0.8328, + "learning_rate": 1.0434384104121809e-06, + "loss": 0.962, "step": 21921 }, { - "epoch": 0.6212134092776787, + "epoch": 0.8577353470537601, "grad_norm": 0.0, - "learning_rate": 6.627564134770946e-06, - "loss": 0.901, + "learning_rate": 1.0428748848782145e-06, + "loss": 1.0017, "step": 21922 }, { - "epoch": 0.621241746719941, + "epoch": 0.8577744737459895, "grad_norm": 0.0, - "learning_rate": 6.6267001261717015e-06, - "loss": 0.7656, + "learning_rate": 1.0423115031844534e-06, + "loss": 0.9302, "step": 21923 }, { - "epoch": 0.6212700841622035, + "epoch": 0.857813600438219, "grad_norm": 0.0, - "learning_rate": 6.625836145987015e-06, - "loss": 0.8952, + "learning_rate": 1.041748265339947e-06, + "loss": 0.9759, "step": 21924 }, { - "epoch": 0.621298421604466, + "epoch": 0.8578527271304484, "grad_norm": 0.0, - "learning_rate": 6.624972194224162e-06, - "loss": 0.8269, + "learning_rate": 1.0411851713537358e-06, + "loss": 0.8934, "step": 21925 }, { - "epoch": 0.6213267590467284, + "epoch": 0.8578918538226779, "grad_norm": 0.0, - "learning_rate": 6.624108270890425e-06, - "loss": 0.9047, + "learning_rate": 1.040622221234865e-06, + "loss": 0.9256, "step": 21926 }, { - "epoch": 0.6213550964889909, + "epoch": 0.8579309805149072, "grad_norm": 0.0, - "learning_rate": 6.623244375993074e-06, - "loss": 0.8806, + "learning_rate": 1.040059414992377e-06, + "loss": 0.9192, "step": 21927 }, { - "epoch": 0.6213834339312534, + "epoch": 0.8579701072071367, "grad_norm": 0.0, - "learning_rate": 6.62238050953939e-06, - "loss": 0.8864, + "learning_rate": 1.039496752635305e-06, + "loss": 0.8548, "step": 21928 }, { - "epoch": 0.6214117713735158, + "epoch": 0.8580092338993661, "grad_norm": 0.0, - "learning_rate": 6.62151667153665e-06, - "loss": 0.9359, + "learning_rate": 1.0389342341726872e-06, + "loss": 0.9163, "step": 21929 }, { - "epoch": 0.6214401088157783, + "epoch": 0.8580483605915956, "grad_norm": 0.0, - "learning_rate": 6.620652861992129e-06, - "loss": 0.8543, + "learning_rate": 1.0383718596135561e-06, + "loss": 0.9532, "step": 21930 }, { - "epoch": 0.6214684462580408, + "epoch": 0.858087487283825, "grad_norm": 0.0, - "learning_rate": 6.619789080913106e-06, - "loss": 0.9016, + "learning_rate": 1.037809628966946e-06, + "loss": 1.013, "step": 21931 }, { - "epoch": 0.6214967837003033, + "epoch": 0.8581266139760545, "grad_norm": 0.0, - "learning_rate": 6.618925328306854e-06, - "loss": 0.8724, + "learning_rate": 1.0372475422418816e-06, + "loss": 0.967, "step": 21932 }, { - "epoch": 0.6215251211425656, + "epoch": 0.8581657406682839, "grad_norm": 0.0, - "learning_rate": 6.618061604180645e-06, - "loss": 0.8337, + "learning_rate": 1.0366855994473913e-06, + "loss": 0.9856, "step": 21933 }, { - "epoch": 0.6215534585848281, + "epoch": 0.8582048673605134, "grad_norm": 0.0, - "learning_rate": 6.617197908541767e-06, - "loss": 0.9091, + "learning_rate": 1.0361238005924956e-06, + "loss": 0.9292, "step": 21934 }, { - "epoch": 0.6215817960270906, + "epoch": 0.8582439940527428, "grad_norm": 0.0, - "learning_rate": 6.616334241397482e-06, - "loss": 0.8894, + "learning_rate": 1.035562145686223e-06, + "loss": 1.0175, "step": 21935 }, { - "epoch": 0.621610133469353, + "epoch": 0.8582831207449723, "grad_norm": 0.0, - "learning_rate": 6.61547060275507e-06, - "loss": 0.8134, + "learning_rate": 1.0350006347375874e-06, + "loss": 0.959, "step": 21936 }, { - "epoch": 0.6216384709116155, + "epoch": 0.8583222474372016, "grad_norm": 0.0, - "learning_rate": 6.614606992621807e-06, - "loss": 0.9072, + "learning_rate": 1.0344392677556091e-06, + "loss": 0.9631, "step": 21937 }, { - "epoch": 0.621666808353878, + "epoch": 0.8583613741294311, "grad_norm": 0.0, - "learning_rate": 6.613743411004964e-06, - "loss": 0.8396, + "learning_rate": 1.033878044749299e-06, + "loss": 1.0095, "step": 21938 }, { - "epoch": 0.6216951457961405, + "epoch": 0.8584005008216605, "grad_norm": 0.0, - "learning_rate": 6.612879857911825e-06, - "loss": 0.7241, + "learning_rate": 1.0333169657276754e-06, + "loss": 1.0136, "step": 21939 }, { - "epoch": 0.6217234832384029, + "epoch": 0.85843962751389, "grad_norm": 0.0, - "learning_rate": 6.61201633334965e-06, - "loss": 0.7987, + "learning_rate": 1.032756030699743e-06, + "loss": 1.0349, "step": 21940 }, { - "epoch": 0.6217518206806654, + "epoch": 0.8584787542061194, "grad_norm": 0.0, - "learning_rate": 6.611152837325721e-06, - "loss": 0.8741, + "learning_rate": 1.032195239674515e-06, + "loss": 0.9087, "step": 21941 }, { - "epoch": 0.6217801581229279, + "epoch": 0.8585178808983489, "grad_norm": 0.0, - "learning_rate": 6.610289369847311e-06, - "loss": 0.8516, + "learning_rate": 1.0316345926609927e-06, + "loss": 0.9873, "step": 21942 }, { - "epoch": 0.6218084955651902, + "epoch": 0.8585570075905783, "grad_norm": 0.0, - "learning_rate": 6.60942593092169e-06, - "loss": 0.8792, + "learning_rate": 1.0310740896681803e-06, + "loss": 0.8728, "step": 21943 }, { - "epoch": 0.6218368330074527, + "epoch": 0.8585961342828077, "grad_norm": 0.0, - "learning_rate": 6.608562520556134e-06, - "loss": 0.8286, + "learning_rate": 1.0305137307050782e-06, + "loss": 0.8493, "step": 21944 }, { - "epoch": 0.6218651704497152, + "epoch": 0.8586352609750372, "grad_norm": 0.0, - "learning_rate": 6.6076991387579195e-06, - "loss": 0.8791, + "learning_rate": 1.0299535157806894e-06, + "loss": 0.9718, "step": 21945 }, { - "epoch": 0.6218935078919776, + "epoch": 0.8586743876672666, "grad_norm": 0.0, - "learning_rate": 6.6068357855343115e-06, - "loss": 0.8138, + "learning_rate": 1.0293934449040054e-06, + "loss": 0.9009, "step": 21946 }, { - "epoch": 0.6219218453342401, + "epoch": 0.858713514359496, "grad_norm": 0.0, - "learning_rate": 6.605972460892586e-06, - "loss": 0.8603, + "learning_rate": 1.0288335180840215e-06, + "loss": 0.9443, "step": 21947 }, { - "epoch": 0.6219501827765026, + "epoch": 0.8587526410517254, "grad_norm": 0.0, - "learning_rate": 6.605109164840013e-06, - "loss": 0.8016, + "learning_rate": 1.028273735329729e-06, + "loss": 0.9374, "step": 21948 }, { - "epoch": 0.6219785202187651, + "epoch": 0.8587917677439549, "grad_norm": 0.0, - "learning_rate": 6.6042458973838696e-06, - "loss": 0.889, + "learning_rate": 1.0277140966501209e-06, + "loss": 0.9667, "step": 21949 }, { - "epoch": 0.6220068576610275, + "epoch": 0.8588308944361843, "grad_norm": 0.0, - "learning_rate": 6.603382658531423e-06, - "loss": 0.8313, + "learning_rate": 1.027154602054179e-06, + "loss": 0.8763, "step": 21950 }, { - "epoch": 0.62203519510329, + "epoch": 0.8588700211284138, "grad_norm": 0.0, - "learning_rate": 6.602519448289944e-06, - "loss": 0.8982, + "learning_rate": 1.0265952515508925e-06, + "loss": 1.0254, "step": 21951 }, { - "epoch": 0.6220635325455525, + "epoch": 0.8589091478206432, "grad_norm": 0.0, - "learning_rate": 6.601656266666705e-06, - "loss": 0.7794, + "learning_rate": 1.026036045149239e-06, + "loss": 1.0005, "step": 21952 }, { - "epoch": 0.6220918699878148, + "epoch": 0.8589482745128727, "grad_norm": 0.0, - "learning_rate": 6.600793113668982e-06, - "loss": 0.8891, + "learning_rate": 1.0254769828582046e-06, + "loss": 1.0087, "step": 21953 }, { - "epoch": 0.6221202074300773, + "epoch": 0.8589874012051021, "grad_norm": 0.0, - "learning_rate": 6.599929989304034e-06, - "loss": 0.8249, + "learning_rate": 1.0249180646867629e-06, + "loss": 0.9737, "step": 21954 }, { - "epoch": 0.6221485448723398, + "epoch": 0.8590265278973316, "grad_norm": 0.0, - "learning_rate": 6.5990668935791445e-06, - "loss": 0.8239, + "learning_rate": 1.0243592906438916e-06, + "loss": 1.0085, "step": 21955 }, { - "epoch": 0.6221768823146023, + "epoch": 0.859065654589561, "grad_norm": 0.0, - "learning_rate": 6.598203826501572e-06, - "loss": 0.8475, + "learning_rate": 1.0238006607385597e-06, + "loss": 0.892, "step": 21956 }, { - "epoch": 0.6222052197568647, + "epoch": 0.8591047812817905, "grad_norm": 0.0, - "learning_rate": 6.597340788078594e-06, - "loss": 0.8497, + "learning_rate": 1.0232421749797462e-06, + "loss": 1.1372, "step": 21957 }, { - "epoch": 0.6222335571991272, + "epoch": 0.8591439079740198, "grad_norm": 0.0, - "learning_rate": 6.5964777783174814e-06, - "loss": 0.7928, + "learning_rate": 1.0226838333764111e-06, + "loss": 0.774, "step": 21958 }, { - "epoch": 0.6222618946413897, + "epoch": 0.8591830346662493, "grad_norm": 0.0, - "learning_rate": 6.595614797225497e-06, - "loss": 0.8483, + "learning_rate": 1.0221256359375275e-06, + "loss": 0.9614, "step": 21959 }, { - "epoch": 0.6222902320836521, + "epoch": 0.8592221613584787, "grad_norm": 0.0, - "learning_rate": 6.5947518448099144e-06, - "loss": 0.8172, + "learning_rate": 1.021567582672054e-06, + "loss": 0.8847, "step": 21960 }, { - "epoch": 0.6223185695259146, + "epoch": 0.8592612880507082, "grad_norm": 0.0, - "learning_rate": 6.593888921078e-06, - "loss": 0.9715, + "learning_rate": 1.0210096735889552e-06, + "loss": 0.9809, "step": 21961 }, { - "epoch": 0.6223469069681771, + "epoch": 0.8593004147429376, "grad_norm": 0.0, - "learning_rate": 6.593026026037023e-06, - "loss": 0.8372, + "learning_rate": 1.0204519086971886e-06, + "loss": 0.7972, "step": 21962 }, { - "epoch": 0.6223752444104396, + "epoch": 0.8593395414351671, "grad_norm": 0.0, - "learning_rate": 6.592163159694258e-06, - "loss": 0.8651, + "learning_rate": 1.019894288005715e-06, + "loss": 0.9247, "step": 21963 }, { - "epoch": 0.622403581852702, + "epoch": 0.8593786681273965, "grad_norm": 0.0, - "learning_rate": 6.591300322056964e-06, - "loss": 0.8609, + "learning_rate": 1.0193368115234847e-06, + "loss": 0.923, "step": 21964 }, { - "epoch": 0.6224319192949644, + "epoch": 0.859417794819626, "grad_norm": 0.0, - "learning_rate": 6.590437513132414e-06, - "loss": 0.958, + "learning_rate": 1.0187794792594507e-06, + "loss": 0.8678, "step": 21965 }, { - "epoch": 0.6224602567372269, + "epoch": 0.8594569215118554, "grad_norm": 0.0, - "learning_rate": 6.589574732927878e-06, - "loss": 0.7694, + "learning_rate": 1.018222291222567e-06, + "loss": 0.9234, "step": 21966 }, { - "epoch": 0.6224885941794893, + "epoch": 0.8594960482040849, "grad_norm": 0.0, - "learning_rate": 6.588711981450616e-06, - "loss": 0.8752, + "learning_rate": 1.0176652474217763e-06, + "loss": 0.9639, "step": 21967 }, { - "epoch": 0.6225169316217518, + "epoch": 0.8595351748963143, "grad_norm": 0.0, - "learning_rate": 6.587849258707903e-06, - "loss": 0.9095, + "learning_rate": 1.017108347866027e-06, + "loss": 0.8753, "step": 21968 }, { - "epoch": 0.6225452690640143, + "epoch": 0.8595743015885438, "grad_norm": 0.0, - "learning_rate": 6.5869865647069995e-06, - "loss": 0.8052, + "learning_rate": 1.01655159256426e-06, + "loss": 0.9768, "step": 21969 }, { - "epoch": 0.6225736065062767, + "epoch": 0.8596134282807731, "grad_norm": 0.0, - "learning_rate": 6.586123899455177e-06, - "loss": 0.9008, + "learning_rate": 1.015994981525421e-06, + "loss": 0.9366, "step": 21970 }, { - "epoch": 0.6226019439485392, + "epoch": 0.8596525549730026, "grad_norm": 0.0, - "learning_rate": 6.585261262959703e-06, - "loss": 0.8258, + "learning_rate": 1.0154385147584422e-06, + "loss": 0.9646, "step": 21971 }, { - "epoch": 0.6226302813908017, + "epoch": 0.859691681665232, "grad_norm": 0.0, - "learning_rate": 6.584398655227838e-06, - "loss": 0.7756, + "learning_rate": 1.0148821922722641e-06, + "loss": 0.8519, "step": 21972 }, { - "epoch": 0.6226586188330642, + "epoch": 0.8597308083574614, "grad_norm": 0.0, - "learning_rate": 6.583536076266852e-06, - "loss": 0.9214, + "learning_rate": 1.0143260140758182e-06, + "loss": 0.9779, "step": 21973 }, { - "epoch": 0.6226869562753266, + "epoch": 0.8597699350496909, "grad_norm": 0.0, - "learning_rate": 6.582673526084012e-06, - "loss": 0.887, + "learning_rate": 1.0137699801780365e-06, + "loss": 1.0042, "step": 21974 }, { - "epoch": 0.622715293717589, + "epoch": 0.8598090617419203, "grad_norm": 0.0, - "learning_rate": 6.58181100468658e-06, - "loss": 0.894, + "learning_rate": 1.0132140905878474e-06, + "loss": 0.9199, "step": 21975 }, { - "epoch": 0.6227436311598515, + "epoch": 0.8598481884341498, "grad_norm": 0.0, - "learning_rate": 6.5809485120818265e-06, - "loss": 0.8692, + "learning_rate": 1.0126583453141826e-06, + "loss": 0.9708, "step": 21976 }, { - "epoch": 0.6227719686021139, + "epoch": 0.8598873151263792, "grad_norm": 0.0, - "learning_rate": 6.58008604827701e-06, - "loss": 0.8664, + "learning_rate": 1.0121027443659593e-06, + "loss": 1.0123, "step": 21977 }, { - "epoch": 0.6228003060443764, + "epoch": 0.8599264418186087, "grad_norm": 0.0, - "learning_rate": 6.5792236132793985e-06, - "loss": 0.8606, + "learning_rate": 1.0115472877521048e-06, + "loss": 0.9805, "step": 21978 }, { - "epoch": 0.6228286434866389, + "epoch": 0.859965568510838, "grad_norm": 0.0, - "learning_rate": 6.578361207096261e-06, - "loss": 0.8468, + "learning_rate": 1.0109919754815377e-06, + "loss": 0.9003, "step": 21979 }, { - "epoch": 0.6228569809289014, + "epoch": 0.8600046952030675, "grad_norm": 0.0, - "learning_rate": 6.577498829734853e-06, - "loss": 0.8048, + "learning_rate": 1.0104368075631764e-06, + "loss": 0.9164, "step": 21980 }, { - "epoch": 0.6228853183711638, + "epoch": 0.8600438218952969, "grad_norm": 0.0, - "learning_rate": 6.5766364812024455e-06, - "loss": 0.9307, + "learning_rate": 1.009881784005935e-06, + "loss": 0.8624, "step": 21981 }, { - "epoch": 0.6229136558134263, + "epoch": 0.8600829485875264, "grad_norm": 0.0, - "learning_rate": 6.575774161506298e-06, - "loss": 0.8556, + "learning_rate": 1.009326904818727e-06, + "loss": 0.9414, "step": 21982 }, { - "epoch": 0.6229419932556888, + "epoch": 0.8601220752797558, "grad_norm": 0.0, - "learning_rate": 6.574911870653678e-06, - "loss": 0.8567, + "learning_rate": 1.0087721700104603e-06, + "loss": 0.9524, "step": 21983 }, { - "epoch": 0.6229703306979512, + "epoch": 0.8601612019719853, "grad_norm": 0.0, - "learning_rate": 6.574049608651849e-06, - "loss": 0.8457, + "learning_rate": 1.0082175795900496e-06, + "loss": 0.9135, "step": 21984 }, { - "epoch": 0.6229986681402137, + "epoch": 0.8602003286642147, "grad_norm": 0.0, - "learning_rate": 6.57318737550807e-06, - "loss": 0.8788, + "learning_rate": 1.0076631335663956e-06, + "loss": 0.816, "step": 21985 }, { - "epoch": 0.6230270055824761, + "epoch": 0.8602394553564442, "grad_norm": 0.0, - "learning_rate": 6.572325171229606e-06, - "loss": 0.9436, + "learning_rate": 1.0071088319484057e-06, + "loss": 0.929, "step": 21986 }, { - "epoch": 0.6230553430247386, + "epoch": 0.8602785820486736, "grad_norm": 0.0, - "learning_rate": 6.571462995823721e-06, - "loss": 0.7778, + "learning_rate": 1.006554674744975e-06, + "loss": 0.8645, "step": 21987 }, { - "epoch": 0.623083680467001, + "epoch": 0.8603177087409031, "grad_norm": 0.0, - "learning_rate": 6.570600849297674e-06, - "loss": 0.9355, + "learning_rate": 1.0060006619650108e-06, + "loss": 1.0278, "step": 21988 }, { - "epoch": 0.6231120179092635, + "epoch": 0.8603568354331325, "grad_norm": 0.0, - "learning_rate": 6.569738731658735e-06, - "loss": 0.875, + "learning_rate": 1.005446793617403e-06, + "loss": 0.8366, "step": 21989 }, { - "epoch": 0.623140355351526, + "epoch": 0.860395962125362, "grad_norm": 0.0, - "learning_rate": 6.568876642914155e-06, - "loss": 0.7958, + "learning_rate": 1.0048930697110514e-06, + "loss": 0.9366, "step": 21990 }, { - "epoch": 0.6231686927937884, + "epoch": 0.8604350888175913, "grad_norm": 0.0, - "learning_rate": 6.568014583071201e-06, - "loss": 0.9011, + "learning_rate": 1.0043394902548442e-06, + "loss": 0.9655, "step": 21991 }, { - "epoch": 0.6231970302360509, + "epoch": 0.8604742155098208, "grad_norm": 0.0, - "learning_rate": 6.567152552137139e-06, - "loss": 0.9072, + "learning_rate": 1.0037860552576729e-06, + "loss": 0.9288, "step": 21992 }, { - "epoch": 0.6232253676783134, + "epoch": 0.8605133422020502, "grad_norm": 0.0, - "learning_rate": 6.566290550119223e-06, - "loss": 0.8459, + "learning_rate": 1.0032327647284234e-06, + "loss": 0.9244, "step": 21993 }, { - "epoch": 0.6232537051205758, + "epoch": 0.8605524688942797, "grad_norm": 0.0, - "learning_rate": 6.565428577024716e-06, - "loss": 0.8636, + "learning_rate": 1.0026796186759847e-06, + "loss": 0.9887, "step": 21994 }, { - "epoch": 0.6232820425628383, + "epoch": 0.8605915955865091, "grad_norm": 0.0, - "learning_rate": 6.564566632860883e-06, - "loss": 0.825, + "learning_rate": 1.0021266171092348e-06, + "loss": 0.8981, "step": 21995 }, { - "epoch": 0.6233103800051008, + "epoch": 0.8606307222787386, "grad_norm": 0.0, - "learning_rate": 6.563704717634975e-06, - "loss": 0.8694, + "learning_rate": 1.0015737600370568e-06, + "loss": 0.8734, "step": 21996 }, { - "epoch": 0.6233387174473632, + "epoch": 0.860669848970968, "grad_norm": 0.0, - "learning_rate": 6.562842831354266e-06, - "loss": 0.7512, + "learning_rate": 1.001021047468329e-06, + "loss": 0.9821, "step": 21997 }, { - "epoch": 0.6233670548896256, + "epoch": 0.8607089756631975, "grad_norm": 0.0, - "learning_rate": 6.561980974026003e-06, - "loss": 0.8705, + "learning_rate": 1.000468479411928e-06, + "loss": 0.7531, "step": 21998 }, { - "epoch": 0.6233953923318881, + "epoch": 0.8607481023554269, "grad_norm": 0.0, - "learning_rate": 6.561119145657451e-06, - "loss": 0.8268, + "learning_rate": 9.999160558767251e-07, + "loss": 0.9677, "step": 21999 }, { - "epoch": 0.6234237297741506, + "epoch": 0.8607872290476564, "grad_norm": 0.0, - "learning_rate": 6.5602573462558715e-06, - "loss": 0.8626, + "learning_rate": 9.993637768715935e-07, + "loss": 1.0662, "step": 22000 }, { - "epoch": 0.623452067216413, + "epoch": 0.8608263557398858, "grad_norm": 0.0, - "learning_rate": 6.5593955758285185e-06, - "loss": 0.7225, + "learning_rate": 9.988116424053973e-07, + "loss": 0.9812, "step": 22001 }, { - "epoch": 0.6234804046586755, + "epoch": 0.8608654824321151, "grad_norm": 0.0, - "learning_rate": 6.558533834382655e-06, - "loss": 0.7695, + "learning_rate": 9.982596524870113e-07, + "loss": 0.8317, "step": 22002 }, { - "epoch": 0.623508742100938, + "epoch": 0.8609046091243446, "grad_norm": 0.0, - "learning_rate": 6.5576721219255435e-06, - "loss": 0.7918, + "learning_rate": 9.977078071252944e-07, + "loss": 0.9191, "step": 22003 }, { - "epoch": 0.6235370795432005, + "epoch": 0.860943735816574, "grad_norm": 0.0, - "learning_rate": 6.556810438464434e-06, - "loss": 0.8224, + "learning_rate": 9.971561063291102e-07, + "loss": 0.9229, "step": 22004 }, { - "epoch": 0.6235654169854629, + "epoch": 0.8609828625088035, "grad_norm": 0.0, - "learning_rate": 6.555948784006592e-06, - "loss": 0.812, + "learning_rate": 9.966045501073162e-07, + "loss": 0.9578, "step": 22005 }, { - "epoch": 0.6235937544277254, + "epoch": 0.8610219892010329, "grad_norm": 0.0, - "learning_rate": 6.555087158559268e-06, - "loss": 0.8969, + "learning_rate": 9.96053138468772e-07, + "loss": 0.979, "step": 22006 }, { - "epoch": 0.6236220918699878, + "epoch": 0.8610611158932624, "grad_norm": 0.0, - "learning_rate": 6.554225562129726e-06, - "loss": 0.8224, + "learning_rate": 9.955018714223308e-07, + "loss": 0.9114, "step": 22007 }, { - "epoch": 0.6236504293122502, + "epoch": 0.8611002425854918, "grad_norm": 0.0, - "learning_rate": 6.553363994725221e-06, - "loss": 0.8701, + "learning_rate": 9.949507489768484e-07, + "loss": 1.0496, "step": 22008 }, { - "epoch": 0.6236787667545127, + "epoch": 0.8611393692777213, "grad_norm": 0.0, - "learning_rate": 6.552502456353011e-06, - "loss": 0.8239, + "learning_rate": 9.943997711411712e-07, + "loss": 0.9677, "step": 22009 }, { - "epoch": 0.6237071041967752, + "epoch": 0.8611784959699507, "grad_norm": 0.0, - "learning_rate": 6.551640947020356e-06, - "loss": 0.8777, + "learning_rate": 9.9384893792415e-07, + "loss": 0.9494, "step": 22010 }, { - "epoch": 0.6237354416390377, + "epoch": 0.8612176226621802, "grad_norm": 0.0, - "learning_rate": 6.550779466734507e-06, - "loss": 0.9135, + "learning_rate": 9.932982493346299e-07, + "loss": 0.9868, "step": 22011 }, { - "epoch": 0.6237637790813001, + "epoch": 0.8612567493544095, "grad_norm": 0.0, - "learning_rate": 6.549918015502722e-06, - "loss": 0.7738, + "learning_rate": 9.927477053814528e-07, + "loss": 0.9628, "step": 22012 }, { - "epoch": 0.6237921165235626, + "epoch": 0.861295876046639, "grad_norm": 0.0, - "learning_rate": 6.5490565933322615e-06, - "loss": 0.8528, + "learning_rate": 9.921973060734612e-07, + "loss": 1.0152, "step": 22013 }, { - "epoch": 0.6238204539658251, + "epoch": 0.8613350027388684, "grad_norm": 0.0, - "learning_rate": 6.548195200230376e-06, - "loss": 0.716, + "learning_rate": 9.91647051419492e-07, + "loss": 1.0667, "step": 22014 }, { - "epoch": 0.6238487914080875, + "epoch": 0.8613741294310979, "grad_norm": 0.0, - "learning_rate": 6.547333836204326e-06, - "loss": 0.897, + "learning_rate": 9.910969414283866e-07, + "loss": 0.8821, "step": 22015 }, { - "epoch": 0.62387712885035, + "epoch": 0.8614132561233273, "grad_norm": 0.0, - "learning_rate": 6.546472501261367e-06, - "loss": 0.8115, + "learning_rate": 9.905469761089725e-07, + "loss": 0.9243, "step": 22016 }, { - "epoch": 0.6239054662926125, + "epoch": 0.8614523828155568, "grad_norm": 0.0, - "learning_rate": 6.54561119540875e-06, - "loss": 0.8147, + "learning_rate": 9.899971554700872e-07, + "loss": 0.9278, "step": 22017 }, { - "epoch": 0.6239338037348748, + "epoch": 0.8614915095077862, "grad_norm": 0.0, - "learning_rate": 6.544749918653737e-06, - "loss": 0.9722, + "learning_rate": 9.894474795205555e-07, + "loss": 1.0275, "step": 22018 }, { - "epoch": 0.6239621411771373, + "epoch": 0.8615306362000157, "grad_norm": 0.0, - "learning_rate": 6.543888671003573e-06, - "loss": 0.7839, + "learning_rate": 9.888979482692052e-07, + "loss": 0.8671, "step": 22019 }, { - "epoch": 0.6239904786193998, + "epoch": 0.8615697628922451, "grad_norm": 0.0, - "learning_rate": 6.543027452465518e-06, - "loss": 0.7726, + "learning_rate": 9.883485617248635e-07, + "loss": 0.8803, "step": 22020 }, { - "epoch": 0.6240188160616623, + "epoch": 0.8616088895844746, "grad_norm": 0.0, - "learning_rate": 6.54216626304683e-06, - "loss": 0.816, + "learning_rate": 9.877993198963532e-07, + "loss": 1.0108, "step": 22021 }, { - "epoch": 0.6240471535039247, + "epoch": 0.861648016276704, "grad_norm": 0.0, - "learning_rate": 6.541305102754756e-06, - "loss": 0.7727, + "learning_rate": 9.872502227924907e-07, + "loss": 1.0626, "step": 22022 }, { - "epoch": 0.6240754909461872, + "epoch": 0.8616871429689335, "grad_norm": 0.0, - "learning_rate": 6.540443971596555e-06, - "loss": 0.9452, + "learning_rate": 9.867012704220968e-07, + "loss": 1.1017, "step": 22023 }, { - "epoch": 0.6241038283884497, + "epoch": 0.8617262696611628, "grad_norm": 0.0, - "learning_rate": 6.539582869579482e-06, - "loss": 0.7729, + "learning_rate": 9.861524627939855e-07, + "loss": 1.025, "step": 22024 }, { - "epoch": 0.6241321658307121, + "epoch": 0.8617653963533923, "grad_norm": 0.0, - "learning_rate": 6.538721796710784e-06, - "loss": 0.7805, + "learning_rate": 9.856037999169731e-07, + "loss": 0.9909, "step": 22025 }, { - "epoch": 0.6241605032729746, + "epoch": 0.8618045230456217, "grad_norm": 0.0, - "learning_rate": 6.53786075299772e-06, - "loss": 0.7509, + "learning_rate": 9.85055281799866e-07, + "loss": 0.8378, "step": 22026 }, { - "epoch": 0.6241888407152371, + "epoch": 0.8618436497378512, "grad_norm": 0.0, - "learning_rate": 6.536999738447538e-06, - "loss": 0.8236, + "learning_rate": 9.845069084514746e-07, + "loss": 0.9454, "step": 22027 }, { - "epoch": 0.6242171781574996, + "epoch": 0.8618827764300806, "grad_norm": 0.0, - "learning_rate": 6.5361387530674935e-06, - "loss": 0.9585, + "learning_rate": 9.839586798806044e-07, + "loss": 1.043, "step": 22028 }, { - "epoch": 0.6242455155997619, + "epoch": 0.86192190312231, "grad_norm": 0.0, - "learning_rate": 6.535277796864842e-06, - "loss": 0.7586, + "learning_rate": 9.834105960960627e-07, + "loss": 0.9726, "step": 22029 }, { - "epoch": 0.6242738530420244, + "epoch": 0.8619610298145395, "grad_norm": 0.0, - "learning_rate": 6.534416869846828e-06, - "loss": 0.7489, + "learning_rate": 9.828626571066469e-07, + "loss": 0.9418, "step": 22030 }, { - "epoch": 0.6243021904842869, + "epoch": 0.8620001565067689, "grad_norm": 0.0, - "learning_rate": 6.533555972020709e-06, - "loss": 0.9109, + "learning_rate": 9.823148629211587e-07, + "loss": 0.9489, "step": 22031 }, { - "epoch": 0.6243305279265493, + "epoch": 0.8620392831989984, "grad_norm": 0.0, - "learning_rate": 6.532695103393738e-06, - "loss": 0.7543, + "learning_rate": 9.817672135483914e-07, + "loss": 1.1118, "step": 22032 }, { - "epoch": 0.6243588653688118, + "epoch": 0.8620784098912277, "grad_norm": 0.0, - "learning_rate": 6.5318342639731606e-06, - "loss": 0.9298, + "learning_rate": 9.812197089971453e-07, + "loss": 0.9908, "step": 22033 }, { - "epoch": 0.6243872028110743, + "epoch": 0.8621175365834572, "grad_norm": 0.0, - "learning_rate": 6.530973453766232e-06, - "loss": 0.8513, + "learning_rate": 9.806723492762072e-07, + "loss": 0.9824, "step": 22034 }, { - "epoch": 0.6244155402533368, + "epoch": 0.8621566632756866, "grad_norm": 0.0, - "learning_rate": 6.5301126727802e-06, - "loss": 0.8283, + "learning_rate": 9.801251343943718e-07, + "loss": 0.9858, "step": 22035 }, { - "epoch": 0.6244438776955992, + "epoch": 0.8621957899679161, "grad_norm": 0.0, - "learning_rate": 6.529251921022318e-06, - "loss": 0.8767, + "learning_rate": 9.795780643604203e-07, + "loss": 0.9655, "step": 22036 }, { - "epoch": 0.6244722151378617, + "epoch": 0.8622349166601455, "grad_norm": 0.0, - "learning_rate": 6.528391198499841e-06, - "loss": 0.8189, + "learning_rate": 9.790311391831453e-07, + "loss": 0.9207, "step": 22037 }, { - "epoch": 0.6245005525801242, + "epoch": 0.862274043352375, "grad_norm": 0.0, - "learning_rate": 6.527530505220009e-06, - "loss": 0.9068, + "learning_rate": 9.784843588713255e-07, + "loss": 1.013, "step": 22038 }, { - "epoch": 0.6245288900223865, + "epoch": 0.8623131700446044, "grad_norm": 0.0, - "learning_rate": 6.526669841190078e-06, - "loss": 0.7849, + "learning_rate": 9.779377234337428e-07, + "loss": 1.1321, "step": 22039 }, { - "epoch": 0.624557227464649, + "epoch": 0.8623522967368339, "grad_norm": 0.0, - "learning_rate": 6.5258092064172976e-06, - "loss": 0.8145, + "learning_rate": 9.773912328791735e-07, + "loss": 1.1049, "step": 22040 }, { - "epoch": 0.6245855649069115, + "epoch": 0.8623914234290633, "grad_norm": 0.0, - "learning_rate": 6.524948600908914e-06, - "loss": 0.8975, + "learning_rate": 9.76844887216396e-07, + "loss": 0.9769, "step": 22041 }, { - "epoch": 0.6246139023491739, + "epoch": 0.8624305501212928, "grad_norm": 0.0, - "learning_rate": 6.524088024672184e-06, - "loss": 0.8628, + "learning_rate": 9.762986864541824e-07, + "loss": 0.8754, "step": 22042 }, { - "epoch": 0.6246422397914364, + "epoch": 0.8624696768135222, "grad_norm": 0.0, - "learning_rate": 6.523227477714347e-06, - "loss": 0.7469, + "learning_rate": 9.757526306013055e-07, + "loss": 0.9804, "step": 22043 }, { - "epoch": 0.6246705772336989, + "epoch": 0.8625088035057517, "grad_norm": 0.0, - "learning_rate": 6.522366960042654e-06, - "loss": 0.9883, + "learning_rate": 9.752067196665327e-07, + "loss": 1.0369, "step": 22044 }, { - "epoch": 0.6246989146759614, + "epoch": 0.862547930197981, "grad_norm": 0.0, - "learning_rate": 6.521506471664363e-06, - "loss": 0.8422, + "learning_rate": 9.746609536586305e-07, + "loss": 0.8314, "step": 22045 }, { - "epoch": 0.6247272521182238, + "epoch": 0.8625870568902105, "grad_norm": 0.0, - "learning_rate": 6.520646012586709e-06, - "loss": 0.8078, + "learning_rate": 9.74115332586364e-07, + "loss": 0.8315, "step": 22046 }, { - "epoch": 0.6247555895604863, + "epoch": 0.8626261835824399, "grad_norm": 0.0, - "learning_rate": 6.519785582816947e-06, - "loss": 0.8773, + "learning_rate": 9.735698564584972e-07, + "loss": 0.9585, "step": 22047 }, { - "epoch": 0.6247839270027488, + "epoch": 0.8626653102746694, "grad_norm": 0.0, - "learning_rate": 6.518925182362321e-06, - "loss": 0.8349, + "learning_rate": 9.730245252837867e-07, + "loss": 0.8711, "step": 22048 }, { - "epoch": 0.6248122644450111, + "epoch": 0.8627044369668988, "grad_norm": 0.0, - "learning_rate": 6.518064811230083e-06, - "loss": 0.8234, + "learning_rate": 9.724793390709919e-07, + "loss": 0.9355, "step": 22049 }, { - "epoch": 0.6248406018872736, + "epoch": 0.8627435636591283, "grad_norm": 0.0, - "learning_rate": 6.517204469427481e-06, - "loss": 0.8871, + "learning_rate": 9.71934297828865e-07, + "loss": 0.8593, "step": 22050 }, { - "epoch": 0.6248689393295361, + "epoch": 0.8627826903513577, "grad_norm": 0.0, - "learning_rate": 6.516344156961754e-06, - "loss": 0.7627, + "learning_rate": 9.713894015661608e-07, + "loss": 0.9814, "step": 22051 }, { - "epoch": 0.6248972767717986, + "epoch": 0.8628218170435872, "grad_norm": 0.0, - "learning_rate": 6.515483873840155e-06, - "loss": 0.9404, + "learning_rate": 9.70844650291629e-07, + "loss": 0.9239, "step": 22052 }, { - "epoch": 0.624925614214061, + "epoch": 0.8628609437358166, "grad_norm": 0.0, - "learning_rate": 6.514623620069931e-06, - "loss": 0.8591, + "learning_rate": 9.703000440140199e-07, + "loss": 0.9677, "step": 22053 }, { - "epoch": 0.6249539516563235, + "epoch": 0.8629000704280461, "grad_norm": 0.0, - "learning_rate": 6.513763395658325e-06, - "loss": 0.7568, + "learning_rate": 9.697555827420756e-07, + "loss": 0.9962, "step": 22054 }, { - "epoch": 0.624982289098586, + "epoch": 0.8629391971202754, "grad_norm": 0.0, - "learning_rate": 6.512903200612588e-06, - "loss": 0.8108, + "learning_rate": 9.69211266484541e-07, + "loss": 0.8925, "step": 22055 }, { - "epoch": 0.6250106265408484, + "epoch": 0.8629783238125049, "grad_norm": 0.0, - "learning_rate": 6.512043034939959e-06, - "loss": 0.8673, + "learning_rate": 9.686670952501586e-07, + "loss": 0.904, "step": 22056 }, { - "epoch": 0.6250389639831109, + "epoch": 0.8630174505047343, "grad_norm": 0.0, - "learning_rate": 6.5111828986476855e-06, - "loss": 0.7851, + "learning_rate": 9.681230690476651e-07, + "loss": 1.0366, "step": 22057 }, { - "epoch": 0.6250673014253734, + "epoch": 0.8630565771969637, "grad_norm": 0.0, - "learning_rate": 6.510322791743016e-06, - "loss": 0.859, + "learning_rate": 9.675791878857966e-07, + "loss": 0.876, "step": 22058 }, { - "epoch": 0.6250956388676359, + "epoch": 0.8630957038891932, "grad_norm": 0.0, - "learning_rate": 6.509462714233194e-06, - "loss": 0.9427, + "learning_rate": 9.670354517732883e-07, + "loss": 1.0618, "step": 22059 }, { - "epoch": 0.6251239763098982, + "epoch": 0.8631348305814226, "grad_norm": 0.0, - "learning_rate": 6.508602666125462e-06, - "loss": 0.9786, + "learning_rate": 9.664918607188734e-07, + "loss": 0.8608, "step": 22060 }, { - "epoch": 0.6251523137521607, + "epoch": 0.8631739572736521, "grad_norm": 0.0, - "learning_rate": 6.507742647427068e-06, - "loss": 0.8879, + "learning_rate": 9.65948414731278e-07, + "loss": 0.8843, "step": 22061 }, { - "epoch": 0.6251806511944232, + "epoch": 0.8632130839658815, "grad_norm": 0.0, - "learning_rate": 6.5068826581452525e-06, - "loss": 0.9602, + "learning_rate": 9.654051138192322e-07, + "loss": 0.9148, "step": 22062 }, { - "epoch": 0.6252089886366856, + "epoch": 0.863252210658111, "grad_norm": 0.0, - "learning_rate": 6.506022698287265e-06, - "loss": 0.9711, + "learning_rate": 9.648619579914563e-07, + "loss": 0.9038, "step": 22063 }, { - "epoch": 0.6252373260789481, + "epoch": 0.8632913373503404, "grad_norm": 0.0, - "learning_rate": 6.5051627678603425e-06, - "loss": 0.8242, + "learning_rate": 9.643189472566794e-07, + "loss": 0.9164, "step": 22064 }, { - "epoch": 0.6252656635212106, + "epoch": 0.8633304640425699, "grad_norm": 0.0, - "learning_rate": 6.504302866871732e-06, - "loss": 0.8129, + "learning_rate": 9.637760816236152e-07, + "loss": 0.9595, "step": 22065 }, { - "epoch": 0.625294000963473, + "epoch": 0.8633695907347992, "grad_norm": 0.0, - "learning_rate": 6.503442995328678e-06, - "loss": 0.9211, + "learning_rate": 9.63233361100986e-07, + "loss": 0.8764, "step": 22066 }, { - "epoch": 0.6253223384057355, + "epoch": 0.8634087174270287, "grad_norm": 0.0, - "learning_rate": 6.50258315323842e-06, - "loss": 0.9034, + "learning_rate": 9.626907856975044e-07, + "loss": 0.8095, "step": 22067 }, { - "epoch": 0.625350675847998, + "epoch": 0.8634478441192581, "grad_norm": 0.0, - "learning_rate": 6.501723340608207e-06, - "loss": 0.8569, + "learning_rate": 9.621483554218836e-07, + "loss": 0.9316, "step": 22068 }, { - "epoch": 0.6253790132902605, + "epoch": 0.8634869708114876, "grad_norm": 0.0, - "learning_rate": 6.500863557445274e-06, - "loss": 0.9265, + "learning_rate": 9.616060702828356e-07, + "loss": 0.9364, "step": 22069 }, { - "epoch": 0.6254073507325228, + "epoch": 0.863526097503717, "grad_norm": 0.0, - "learning_rate": 6.5000038037568645e-06, - "loss": 0.8278, + "learning_rate": 9.610639302890701e-07, + "loss": 0.9739, "step": 22070 }, { - "epoch": 0.6254356881747853, + "epoch": 0.8635652241959465, "grad_norm": 0.0, - "learning_rate": 6.499144079550227e-06, - "loss": 0.7957, + "learning_rate": 9.60521935449289e-07, + "loss": 1.0362, "step": 22071 }, { - "epoch": 0.6254640256170478, + "epoch": 0.8636043508881759, "grad_norm": 0.0, - "learning_rate": 6.498284384832596e-06, - "loss": 0.8696, + "learning_rate": 9.599800857721986e-07, + "loss": 0.9645, "step": 22072 }, { - "epoch": 0.6254923630593102, + "epoch": 0.8636434775804054, "grad_norm": 0.0, - "learning_rate": 6.497424719611216e-06, - "loss": 0.861, + "learning_rate": 9.59438381266501e-07, + "loss": 0.9463, "step": 22073 }, { - "epoch": 0.6255207005015727, + "epoch": 0.8636826042726348, "grad_norm": 0.0, - "learning_rate": 6.496565083893333e-06, - "loss": 0.8442, + "learning_rate": 9.588968219408967e-07, + "loss": 1.1007, "step": 22074 }, { - "epoch": 0.6255490379438352, + "epoch": 0.8637217309648643, "grad_norm": 0.0, - "learning_rate": 6.495705477686179e-06, - "loss": 0.8784, + "learning_rate": 9.583554078040769e-07, + "loss": 1.0078, "step": 22075 }, { - "epoch": 0.6255773753860977, + "epoch": 0.8637608576570937, "grad_norm": 0.0, - "learning_rate": 6.494845900997002e-06, - "loss": 0.8158, + "learning_rate": 9.57814138864742e-07, + "loss": 1.0449, "step": 22076 }, { - "epoch": 0.6256057128283601, + "epoch": 0.8637999843493231, "grad_norm": 0.0, - "learning_rate": 6.493986353833035e-06, - "loss": 0.7668, + "learning_rate": 9.57273015131579e-07, + "loss": 0.9936, "step": 22077 }, { - "epoch": 0.6256340502706226, + "epoch": 0.8638391110415525, "grad_norm": 0.0, - "learning_rate": 6.4931268362015245e-06, - "loss": 0.8396, + "learning_rate": 9.567320366132826e-07, + "loss": 0.9292, "step": 22078 }, { - "epoch": 0.6256623877128851, + "epoch": 0.863878237733782, "grad_norm": 0.0, - "learning_rate": 6.492267348109711e-06, - "loss": 0.8741, + "learning_rate": 9.56191203318536e-07, + "loss": 0.8108, "step": 22079 }, { - "epoch": 0.6256907251551475, + "epoch": 0.8639173644260114, "grad_norm": 0.0, - "learning_rate": 6.491407889564829e-06, - "loss": 0.8783, + "learning_rate": 9.556505152560292e-07, + "loss": 1.0208, "step": 22080 }, { - "epoch": 0.62571906259741, + "epoch": 0.8639564911182409, "grad_norm": 0.0, - "learning_rate": 6.490548460574122e-06, - "loss": 0.8329, + "learning_rate": 9.55109972434437e-07, + "loss": 0.9151, "step": 22081 }, { - "epoch": 0.6257474000396724, + "epoch": 0.8639956178104703, "grad_norm": 0.0, - "learning_rate": 6.489689061144832e-06, - "loss": 0.8103, + "learning_rate": 9.545695748624484e-07, + "loss": 0.9759, "step": 22082 }, { - "epoch": 0.6257757374819349, + "epoch": 0.8640347445026998, "grad_norm": 0.0, - "learning_rate": 6.48882969128419e-06, - "loss": 0.8572, + "learning_rate": 9.540293225487363e-07, + "loss": 0.9008, "step": 22083 }, { - "epoch": 0.6258040749241973, + "epoch": 0.8640738711949292, "grad_norm": 0.0, - "learning_rate": 6.4879703509994444e-06, - "loss": 0.7563, + "learning_rate": 9.534892155019803e-07, + "loss": 0.893, "step": 22084 }, { - "epoch": 0.6258324123664598, + "epoch": 0.8641129978871587, "grad_norm": 0.0, - "learning_rate": 6.487111040297825e-06, - "loss": 0.8546, + "learning_rate": 9.529492537308483e-07, + "loss": 0.8859, "step": 22085 }, { - "epoch": 0.6258607498087223, + "epoch": 0.8641521245793881, "grad_norm": 0.0, - "learning_rate": 6.486251759186573e-06, - "loss": 0.8125, + "learning_rate": 9.524094372440174e-07, + "loss": 0.9079, "step": 22086 }, { - "epoch": 0.6258890872509847, + "epoch": 0.8641912512716174, "grad_norm": 0.0, - "learning_rate": 6.485392507672931e-06, - "loss": 0.895, + "learning_rate": 9.518697660501519e-07, + "loss": 0.861, "step": 22087 }, { - "epoch": 0.6259174246932472, + "epoch": 0.8642303779638469, "grad_norm": 0.0, - "learning_rate": 6.4845332857641294e-06, - "loss": 0.8721, + "learning_rate": 9.513302401579217e-07, + "loss": 0.9632, "step": 22088 }, { - "epoch": 0.6259457621355097, + "epoch": 0.8642695046560763, "grad_norm": 0.0, - "learning_rate": 6.483674093467409e-06, - "loss": 0.8167, + "learning_rate": 9.507908595759885e-07, + "loss": 0.8423, "step": 22089 }, { - "epoch": 0.6259740995777721, + "epoch": 0.8643086313483058, "grad_norm": 0.0, - "learning_rate": 6.482814930790014e-06, - "loss": 0.8662, + "learning_rate": 9.502516243130133e-07, + "loss": 0.9836, "step": 22090 }, { - "epoch": 0.6260024370200346, + "epoch": 0.8643477580405352, "grad_norm": 0.0, - "learning_rate": 6.481955797739168e-06, - "loss": 0.8429, + "learning_rate": 9.497125343776581e-07, + "loss": 0.9991, "step": 22091 }, { - "epoch": 0.626030774462297, + "epoch": 0.8643868847327647, "grad_norm": 0.0, - "learning_rate": 6.481096694322118e-06, - "loss": 0.9415, + "learning_rate": 9.491735897785804e-07, + "loss": 0.9615, "step": 22092 }, { - "epoch": 0.6260591119045595, + "epoch": 0.8644260114249941, "grad_norm": 0.0, - "learning_rate": 6.480237620546095e-06, - "loss": 0.7961, + "learning_rate": 9.48634790524432e-07, + "loss": 0.9549, "step": 22093 }, { - "epoch": 0.6260874493468219, + "epoch": 0.8644651381172236, "grad_norm": 0.0, - "learning_rate": 6.4793785764183356e-06, - "loss": 0.9003, + "learning_rate": 9.480961366238662e-07, + "loss": 1.0062, "step": 22094 }, { - "epoch": 0.6261157867890844, + "epoch": 0.864504264809453, "grad_norm": 0.0, - "learning_rate": 6.478519561946085e-06, - "loss": 0.9276, + "learning_rate": 9.47557628085537e-07, + "loss": 0.9515, "step": 22095 }, { - "epoch": 0.6261441242313469, + "epoch": 0.8645433915016825, "grad_norm": 0.0, - "learning_rate": 6.4776605771365666e-06, - "loss": 0.7667, + "learning_rate": 9.470192649180853e-07, + "loss": 1.0318, "step": 22096 }, { - "epoch": 0.6261724616736093, + "epoch": 0.8645825181939119, "grad_norm": 0.0, - "learning_rate": 6.476801621997022e-06, - "loss": 0.8327, + "learning_rate": 9.46481047130161e-07, + "loss": 0.8978, "step": 22097 }, { - "epoch": 0.6262007991158718, + "epoch": 0.8646216448861413, "grad_norm": 0.0, - "learning_rate": 6.475942696534685e-06, - "loss": 0.8425, + "learning_rate": 9.459429747304094e-07, + "loss": 0.8798, "step": 22098 }, { - "epoch": 0.6262291365581343, + "epoch": 0.8646607715783707, "grad_norm": 0.0, - "learning_rate": 6.4750838007567915e-06, - "loss": 0.8559, + "learning_rate": 9.454050477274646e-07, + "loss": 0.9925, "step": 22099 }, { - "epoch": 0.6262574740003968, + "epoch": 0.8646998982706002, "grad_norm": 0.0, - "learning_rate": 6.474224934670579e-06, - "loss": 0.8328, + "learning_rate": 9.448672661299696e-07, + "loss": 0.9278, "step": 22100 }, { - "epoch": 0.6262858114426592, + "epoch": 0.8647390249628296, "grad_norm": 0.0, - "learning_rate": 6.473366098283276e-06, - "loss": 0.9341, + "learning_rate": 9.443296299465609e-07, + "loss": 0.9603, "step": 22101 }, { - "epoch": 0.6263141488849217, + "epoch": 0.8647781516550591, "grad_norm": 0.0, - "learning_rate": 6.472507291602119e-06, - "loss": 0.9023, + "learning_rate": 9.437921391858696e-07, + "loss": 0.9986, "step": 22102 }, { - "epoch": 0.6263424863271841, + "epoch": 0.8648172783472885, "grad_norm": 0.0, - "learning_rate": 6.471648514634348e-06, - "loss": 0.7003, + "learning_rate": 9.432547938565285e-07, + "loss": 1.0144, "step": 22103 }, { - "epoch": 0.6263708237694465, + "epoch": 0.864856405039518, "grad_norm": 0.0, - "learning_rate": 6.470789767387188e-06, - "loss": 0.8238, + "learning_rate": 9.427175939671662e-07, + "loss": 0.8485, "step": 22104 }, { - "epoch": 0.626399161211709, + "epoch": 0.8648955317317474, "grad_norm": 0.0, - "learning_rate": 6.469931049867877e-06, - "loss": 0.9143, + "learning_rate": 9.421805395264127e-07, + "loss": 0.9322, "step": 22105 }, { - "epoch": 0.6264274986539715, + "epoch": 0.8649346584239769, "grad_norm": 0.0, - "learning_rate": 6.469072362083647e-06, - "loss": 0.8943, + "learning_rate": 9.416436305428867e-07, + "loss": 0.94, "step": 22106 }, { - "epoch": 0.626455836096234, + "epoch": 0.8649737851162063, "grad_norm": 0.0, - "learning_rate": 6.468213704041731e-06, - "loss": 0.7717, + "learning_rate": 9.411068670252144e-07, + "loss": 0.9611, "step": 22107 }, { - "epoch": 0.6264841735384964, + "epoch": 0.8650129118084358, "grad_norm": 0.0, - "learning_rate": 6.4673550757493665e-06, - "loss": 0.813, + "learning_rate": 9.405702489820135e-07, + "loss": 0.8777, "step": 22108 }, { - "epoch": 0.6265125109807589, + "epoch": 0.8650520385006651, "grad_norm": 0.0, - "learning_rate": 6.466496477213777e-06, - "loss": 0.8424, + "learning_rate": 9.400337764219036e-07, + "loss": 0.9048, "step": 22109 }, { - "epoch": 0.6265408484230214, + "epoch": 0.8650911651928946, "grad_norm": 0.0, - "learning_rate": 6.4656379084422014e-06, - "loss": 0.8309, + "learning_rate": 9.394974493534981e-07, + "loss": 0.9403, "step": 22110 }, { - "epoch": 0.6265691858652838, + "epoch": 0.865130291885124, "grad_norm": 0.0, - "learning_rate": 6.464779369441871e-06, - "loss": 0.8663, + "learning_rate": 9.38961267785411e-07, + "loss": 0.885, "step": 22111 }, { - "epoch": 0.6265975233075463, + "epoch": 0.8651694185773535, "grad_norm": 0.0, - "learning_rate": 6.463920860220017e-06, - "loss": 0.9136, + "learning_rate": 9.384252317262487e-07, + "loss": 1.0367, "step": 22112 }, { - "epoch": 0.6266258607498087, + "epoch": 0.8652085452695829, "grad_norm": 0.0, - "learning_rate": 6.4630623807838726e-06, - "loss": 0.8402, + "learning_rate": 9.378893411846257e-07, + "loss": 1.0453, "step": 22113 }, { - "epoch": 0.6266541981920711, + "epoch": 0.8652476719618124, "grad_norm": 0.0, - "learning_rate": 6.462203931140662e-06, - "loss": 0.8566, + "learning_rate": 9.373535961691427e-07, + "loss": 0.9581, "step": 22114 }, { - "epoch": 0.6266825356343336, + "epoch": 0.8652867986540418, "grad_norm": 0.0, - "learning_rate": 6.461345511297624e-06, - "loss": 0.8667, + "learning_rate": 9.368179966884062e-07, + "loss": 0.8948, "step": 22115 }, { - "epoch": 0.6267108730765961, + "epoch": 0.8653259253462712, "grad_norm": 0.0, - "learning_rate": 6.46048712126199e-06, - "loss": 0.8545, + "learning_rate": 9.362825427510147e-07, + "loss": 0.8124, "step": 22116 }, { - "epoch": 0.6267392105188586, + "epoch": 0.8653650520385007, "grad_norm": 0.0, - "learning_rate": 6.459628761040983e-06, - "loss": 0.8671, + "learning_rate": 9.357472343655682e-07, + "loss": 0.9052, "step": 22117 }, { - "epoch": 0.626767547961121, + "epoch": 0.8654041787307301, "grad_norm": 0.0, - "learning_rate": 6.458770430641839e-06, - "loss": 0.8145, + "learning_rate": 9.352120715406621e-07, + "loss": 0.9815, "step": 22118 }, { - "epoch": 0.6267958854033835, + "epoch": 0.8654433054229596, "grad_norm": 0.0, - "learning_rate": 6.457912130071786e-06, - "loss": 0.8627, + "learning_rate": 9.346770542848937e-07, + "loss": 1.0095, "step": 22119 }, { - "epoch": 0.626824222845646, + "epoch": 0.8654824321151889, "grad_norm": 0.0, - "learning_rate": 6.457053859338054e-06, - "loss": 0.7608, + "learning_rate": 9.341421826068508e-07, + "loss": 0.9327, "step": 22120 }, { - "epoch": 0.6268525602879084, + "epoch": 0.8655215588074184, "grad_norm": 0.0, - "learning_rate": 6.456195618447877e-06, - "loss": 0.7687, + "learning_rate": 9.336074565151232e-07, + "loss": 0.9462, "step": 22121 }, { - "epoch": 0.6268808977301709, + "epoch": 0.8655606854996478, "grad_norm": 0.0, - "learning_rate": 6.455337407408476e-06, - "loss": 1.0024, + "learning_rate": 9.330728760183006e-07, + "loss": 0.9095, "step": 22122 }, { - "epoch": 0.6269092351724334, + "epoch": 0.8655998121918773, "grad_norm": 0.0, - "learning_rate": 6.454479226227084e-06, - "loss": 0.8331, + "learning_rate": 9.325384411249672e-07, + "loss": 0.9168, "step": 22123 }, { - "epoch": 0.6269375726146958, + "epoch": 0.8656389388841067, "grad_norm": 0.0, - "learning_rate": 6.453621074910933e-06, - "loss": 0.8741, + "learning_rate": 9.320041518437017e-07, + "loss": 0.8715, "step": 22124 }, { - "epoch": 0.6269659100569582, + "epoch": 0.8656780655763362, "grad_norm": 0.0, - "learning_rate": 6.452762953467246e-06, - "loss": 0.9116, + "learning_rate": 9.314700081830896e-07, + "loss": 0.8849, "step": 22125 }, { - "epoch": 0.6269942474992207, + "epoch": 0.8657171922685656, "grad_norm": 0.0, - "learning_rate": 6.451904861903258e-06, - "loss": 0.8795, + "learning_rate": 9.309360101517007e-07, + "loss": 1.0113, "step": 22126 }, { - "epoch": 0.6270225849414832, + "epoch": 0.8657563189607951, "grad_norm": 0.0, - "learning_rate": 6.451046800226189e-06, - "loss": 0.8697, + "learning_rate": 9.304021577581201e-07, + "loss": 0.9761, "step": 22127 }, { - "epoch": 0.6270509223837456, + "epoch": 0.8657954456530245, "grad_norm": 0.0, - "learning_rate": 6.4501887684432706e-06, - "loss": 0.9317, + "learning_rate": 9.298684510109146e-07, + "loss": 0.9654, "step": 22128 }, { - "epoch": 0.6270792598260081, + "epoch": 0.865834572345254, "grad_norm": 0.0, - "learning_rate": 6.449330766561735e-06, - "loss": 0.8857, + "learning_rate": 9.293348899186572e-07, + "loss": 1.0168, "step": 22129 }, { - "epoch": 0.6271075972682706, + "epoch": 0.8658736990374833, "grad_norm": 0.0, - "learning_rate": 6.4484727945888e-06, - "loss": 0.678, + "learning_rate": 9.288014744899121e-07, + "loss": 0.9859, "step": 22130 }, { - "epoch": 0.627135934710533, + "epoch": 0.8659128257297128, "grad_norm": 0.0, - "learning_rate": 6.447614852531697e-06, - "loss": 0.9471, + "learning_rate": 9.282682047332514e-07, + "loss": 1.0022, "step": 22131 }, { - "epoch": 0.6271642721527955, + "epoch": 0.8659519524219422, "grad_norm": 0.0, - "learning_rate": 6.446756940397656e-06, - "loss": 0.9006, + "learning_rate": 9.277350806572338e-07, + "loss": 1.1069, "step": 22132 }, { - "epoch": 0.627192609595058, + "epoch": 0.8659910791141717, "grad_norm": 0.0, - "learning_rate": 6.445899058193899e-06, - "loss": 1.0351, + "learning_rate": 9.272021022704258e-07, + "loss": 0.8915, "step": 22133 }, { - "epoch": 0.6272209470373205, + "epoch": 0.8660302058064011, "grad_norm": 0.0, - "learning_rate": 6.445041205927658e-06, - "loss": 0.8563, + "learning_rate": 9.266692695813806e-07, + "loss": 0.9041, "step": 22134 }, { - "epoch": 0.6272492844795828, + "epoch": 0.8660693324986306, "grad_norm": 0.0, - "learning_rate": 6.444183383606151e-06, - "loss": 0.9044, + "learning_rate": 9.26136582598658e-07, + "loss": 1.0853, "step": 22135 }, { - "epoch": 0.6272776219218453, + "epoch": 0.86610845919086, "grad_norm": 0.0, - "learning_rate": 6.443325591236607e-06, - "loss": 0.7975, + "learning_rate": 9.25604041330811e-07, + "loss": 1.036, "step": 22136 }, { - "epoch": 0.6273059593641078, + "epoch": 0.8661475858830895, "grad_norm": 0.0, - "learning_rate": 6.4424678288262556e-06, - "loss": 0.8807, + "learning_rate": 9.25071645786394e-07, + "loss": 0.9812, "step": 22137 }, { - "epoch": 0.6273342968063702, + "epoch": 0.8661867125753189, "grad_norm": 0.0, - "learning_rate": 6.441610096382316e-06, - "loss": 0.91, + "learning_rate": 9.245393959739535e-07, + "loss": 0.9918, "step": 22138 }, { - "epoch": 0.6273626342486327, + "epoch": 0.8662258392675484, "grad_norm": 0.0, - "learning_rate": 6.4407523939120154e-06, - "loss": 0.8879, + "learning_rate": 9.240072919020371e-07, + "loss": 0.9054, "step": 22139 }, { - "epoch": 0.6273909716908952, + "epoch": 0.8662649659597778, "grad_norm": 0.0, - "learning_rate": 6.439894721422584e-06, - "loss": 0.8718, + "learning_rate": 9.234753335791913e-07, + "loss": 0.9237, "step": 22140 }, { - "epoch": 0.6274193091331577, + "epoch": 0.8663040926520073, "grad_norm": 0.0, - "learning_rate": 6.439037078921235e-06, - "loss": 0.8879, + "learning_rate": 9.229435210139604e-07, + "loss": 0.9712, "step": 22141 }, { - "epoch": 0.6274476465754201, + "epoch": 0.8663432193442366, "grad_norm": 0.0, - "learning_rate": 6.4381794664152065e-06, - "loss": 0.8911, + "learning_rate": 9.224118542148808e-07, + "loss": 0.9529, "step": 22142 }, { - "epoch": 0.6274759840176826, + "epoch": 0.866382346036466, "grad_norm": 0.0, - "learning_rate": 6.437321883911709e-06, - "loss": 0.8178, + "learning_rate": 9.218803331904913e-07, + "loss": 1.0819, "step": 22143 }, { - "epoch": 0.6275043214599451, + "epoch": 0.8664214727286955, "grad_norm": 0.0, - "learning_rate": 6.436464331417973e-06, - "loss": 0.7959, + "learning_rate": 9.213489579493295e-07, + "loss": 0.9435, "step": 22144 }, { - "epoch": 0.6275326589022074, + "epoch": 0.8664605994209249, "grad_norm": 0.0, - "learning_rate": 6.435606808941223e-06, - "loss": 0.898, + "learning_rate": 9.208177284999264e-07, + "loss": 0.9976, "step": 22145 }, { - "epoch": 0.6275609963444699, + "epoch": 0.8664997261131544, "grad_norm": 0.0, - "learning_rate": 6.434749316488678e-06, - "loss": 0.9694, + "learning_rate": 9.20286644850813e-07, + "loss": 0.846, "step": 22146 }, { - "epoch": 0.6275893337867324, + "epoch": 0.8665388528053838, "grad_norm": 0.0, - "learning_rate": 6.433891854067564e-06, - "loss": 0.8854, + "learning_rate": 9.197557070105212e-07, + "loss": 0.9174, "step": 22147 }, { - "epoch": 0.6276176712289949, + "epoch": 0.8665779794976133, "grad_norm": 0.0, - "learning_rate": 6.433034421685107e-06, - "loss": 0.8467, + "learning_rate": 9.19224914987572e-07, + "loss": 0.9652, "step": 22148 }, { - "epoch": 0.6276460086712573, + "epoch": 0.8666171061898427, "grad_norm": 0.0, - "learning_rate": 6.432177019348521e-06, - "loss": 0.7858, + "learning_rate": 9.186942687904921e-07, + "loss": 0.8668, "step": 22149 }, { - "epoch": 0.6276743461135198, + "epoch": 0.8666562328820722, "grad_norm": 0.0, - "learning_rate": 6.4313196470650356e-06, - "loss": 0.8937, + "learning_rate": 9.181637684278044e-07, + "loss": 0.945, "step": 22150 }, { - "epoch": 0.6277026835557823, + "epoch": 0.8666953595743015, "grad_norm": 0.0, - "learning_rate": 6.430462304841868e-06, - "loss": 0.8212, + "learning_rate": 9.176334139080257e-07, + "loss": 0.9377, "step": 22151 }, { - "epoch": 0.6277310209980447, + "epoch": 0.866734486266531, "grad_norm": 0.0, - "learning_rate": 6.429604992686241e-06, - "loss": 0.8391, + "learning_rate": 9.171032052396723e-07, + "loss": 0.937, "step": 22152 }, { - "epoch": 0.6277593584403072, + "epoch": 0.8667736129587604, "grad_norm": 0.0, - "learning_rate": 6.428747710605382e-06, - "loss": 0.9353, + "learning_rate": 9.165731424312596e-07, + "loss": 1.0402, "step": 22153 }, { - "epoch": 0.6277876958825697, + "epoch": 0.8668127396509899, "grad_norm": 0.0, - "learning_rate": 6.4278904586065025e-06, - "loss": 0.9535, + "learning_rate": 9.160432254913032e-07, + "loss": 0.9771, "step": 22154 }, { - "epoch": 0.627816033324832, + "epoch": 0.8668518663432193, "grad_norm": 0.0, - "learning_rate": 6.427033236696833e-06, - "loss": 0.9786, + "learning_rate": 9.155134544283062e-07, + "loss": 0.9601, "step": 22155 }, { - "epoch": 0.6278443707670945, + "epoch": 0.8668909930354488, "grad_norm": 0.0, - "learning_rate": 6.426176044883585e-06, - "loss": 0.7816, + "learning_rate": 9.149838292507829e-07, + "loss": 0.9729, "step": 22156 }, { - "epoch": 0.627872708209357, + "epoch": 0.8669301197276782, "grad_norm": 0.0, - "learning_rate": 6.425318883173983e-06, - "loss": 0.8832, + "learning_rate": 9.144543499672309e-07, + "loss": 0.9054, "step": 22157 }, { - "epoch": 0.6279010456516195, + "epoch": 0.8669692464199077, "grad_norm": 0.0, - "learning_rate": 6.4244617515752505e-06, - "loss": 0.8278, + "learning_rate": 9.139250165861613e-07, + "loss": 0.8503, "step": 22158 }, { - "epoch": 0.6279293830938819, + "epoch": 0.8670083731121371, "grad_norm": 0.0, - "learning_rate": 6.423604650094601e-06, - "loss": 0.8595, + "learning_rate": 9.133958291160683e-07, + "loss": 0.9092, "step": 22159 }, { - "epoch": 0.6279577205361444, + "epoch": 0.8670474998043666, "grad_norm": 0.0, - "learning_rate": 6.422747578739258e-06, - "loss": 0.819, + "learning_rate": 9.12866787565454e-07, + "loss": 0.956, "step": 22160 }, { - "epoch": 0.6279860579784069, + "epoch": 0.867086626496596, "grad_norm": 0.0, - "learning_rate": 6.421890537516444e-06, - "loss": 0.7841, + "learning_rate": 9.123378919428083e-07, + "loss": 0.9586, "step": 22161 }, { - "epoch": 0.6280143954206693, + "epoch": 0.8671257531888255, "grad_norm": 0.0, - "learning_rate": 6.4210335264333716e-06, - "loss": 0.7917, + "learning_rate": 9.118091422566333e-07, + "loss": 1.0112, "step": 22162 }, { - "epoch": 0.6280427328629318, + "epoch": 0.8671648798810548, "grad_norm": 0.0, - "learning_rate": 6.420176545497265e-06, - "loss": 0.8407, + "learning_rate": 9.112805385154122e-07, + "loss": 0.9671, "step": 22163 }, { - "epoch": 0.6280710703051943, + "epoch": 0.8672040065732843, "grad_norm": 0.0, - "learning_rate": 6.419319594715338e-06, - "loss": 0.8229, + "learning_rate": 9.107520807276394e-07, + "loss": 0.8661, "step": 22164 }, { - "epoch": 0.6280994077474568, + "epoch": 0.8672431332655137, "grad_norm": 0.0, - "learning_rate": 6.418462674094812e-06, - "loss": 0.9639, + "learning_rate": 9.102237689017967e-07, + "loss": 0.9669, "step": 22165 }, { - "epoch": 0.6281277451897191, + "epoch": 0.8672822599577432, "grad_norm": 0.0, - "learning_rate": 6.417605783642909e-06, - "loss": 0.7734, + "learning_rate": 9.0969560304637e-07, + "loss": 0.9034, "step": 22166 }, { - "epoch": 0.6281560826319816, + "epoch": 0.8673213866499726, "grad_norm": 0.0, - "learning_rate": 6.4167489233668386e-06, - "loss": 0.8116, + "learning_rate": 9.09167583169841e-07, + "loss": 0.9992, "step": 22167 }, { - "epoch": 0.6281844200742441, + "epoch": 0.8673605133422021, "grad_norm": 0.0, - "learning_rate": 6.415892093273824e-06, - "loss": 0.8796, + "learning_rate": 9.08639709280692e-07, + "loss": 0.8528, "step": 22168 }, { - "epoch": 0.6282127575165065, + "epoch": 0.8673996400344315, "grad_norm": 0.0, - "learning_rate": 6.415035293371081e-06, - "loss": 0.7122, + "learning_rate": 9.08111981387394e-07, + "loss": 0.954, "step": 22169 }, { - "epoch": 0.628241094958769, + "epoch": 0.867438766726661, "grad_norm": 0.0, - "learning_rate": 6.4141785236658285e-06, - "loss": 0.9216, + "learning_rate": 9.075843994984257e-07, + "loss": 0.9264, "step": 22170 }, { - "epoch": 0.6282694324010315, + "epoch": 0.8674778934188904, "grad_norm": 0.0, - "learning_rate": 6.413321784165281e-06, - "loss": 0.7541, + "learning_rate": 9.070569636222582e-07, + "loss": 0.9822, "step": 22171 }, { - "epoch": 0.628297769843294, + "epoch": 0.8675170201111198, "grad_norm": 0.0, - "learning_rate": 6.412465074876653e-06, - "loss": 0.8471, + "learning_rate": 9.065296737673634e-07, + "loss": 1.0601, "step": 22172 }, { - "epoch": 0.6283261072855564, + "epoch": 0.8675561468033492, "grad_norm": 0.0, - "learning_rate": 6.4116083958071654e-06, - "loss": 0.8905, + "learning_rate": 9.060025299422059e-07, + "loss": 0.9975, "step": 22173 }, { - "epoch": 0.6283544447278189, + "epoch": 0.8675952734955786, "grad_norm": 0.0, - "learning_rate": 6.410751746964037e-06, - "loss": 0.9004, + "learning_rate": 9.054755321552533e-07, + "loss": 0.9863, "step": 22174 }, { - "epoch": 0.6283827821700814, + "epoch": 0.8676344001878081, "grad_norm": 0.0, - "learning_rate": 6.409895128354475e-06, - "loss": 0.8577, + "learning_rate": 9.049486804149655e-07, + "loss": 0.9965, "step": 22175 }, { - "epoch": 0.6284111196123437, + "epoch": 0.8676735268800375, "grad_norm": 0.0, - "learning_rate": 6.409038539985699e-06, - "loss": 0.9154, + "learning_rate": 9.044219747298078e-07, + "loss": 1.0634, "step": 22176 }, { - "epoch": 0.6284394570546062, + "epoch": 0.867712653572267, "grad_norm": 0.0, - "learning_rate": 6.408181981864927e-06, - "loss": 0.892, + "learning_rate": 9.038954151082336e-07, + "loss": 0.9501, "step": 22177 }, { - "epoch": 0.6284677944968687, + "epoch": 0.8677517802644964, "grad_norm": 0.0, - "learning_rate": 6.4073254539993705e-06, - "loss": 0.8915, + "learning_rate": 9.033690015587038e-07, + "loss": 0.9586, "step": 22178 }, { - "epoch": 0.6284961319391311, + "epoch": 0.8677909069567259, "grad_norm": 0.0, - "learning_rate": 6.4064689563962505e-06, - "loss": 0.9774, + "learning_rate": 9.028427340896639e-07, + "loss": 0.9726, "step": 22179 }, { - "epoch": 0.6285244693813936, + "epoch": 0.8678300336489553, "grad_norm": 0.0, - "learning_rate": 6.405612489062771e-06, - "loss": 0.8045, + "learning_rate": 9.02316612709575e-07, + "loss": 0.8803, "step": 22180 }, { - "epoch": 0.6285528068236561, + "epoch": 0.8678691603411848, "grad_norm": 0.0, - "learning_rate": 6.404756052006153e-06, - "loss": 0.8452, + "learning_rate": 9.017906374268804e-07, + "loss": 1.0248, "step": 22181 }, { - "epoch": 0.6285811442659186, + "epoch": 0.8679082870334142, "grad_norm": 0.0, - "learning_rate": 6.403899645233612e-06, - "loss": 0.9069, + "learning_rate": 9.012648082500275e-07, + "loss": 0.924, "step": 22182 }, { - "epoch": 0.628609481708181, + "epoch": 0.8679474137256437, "grad_norm": 0.0, - "learning_rate": 6.403043268752358e-06, - "loss": 0.8199, + "learning_rate": 9.007391251874598e-07, + "loss": 0.8113, "step": 22183 }, { - "epoch": 0.6286378191504435, + "epoch": 0.867986540417873, "grad_norm": 0.0, - "learning_rate": 6.402186922569609e-06, - "loss": 0.7831, + "learning_rate": 9.002135882476193e-07, + "loss": 0.9631, "step": 22184 }, { - "epoch": 0.628666156592706, + "epoch": 0.8680256671101025, "grad_norm": 0.0, - "learning_rate": 6.4013306066925725e-06, - "loss": 0.9072, + "learning_rate": 8.99688197438946e-07, + "loss": 0.9355, "step": 22185 }, { - "epoch": 0.6286944940349684, + "epoch": 0.8680647938023319, "grad_norm": 0.0, - "learning_rate": 6.400474321128465e-06, - "loss": 0.864, + "learning_rate": 8.991629527698786e-07, + "loss": 0.9126, "step": 22186 }, { - "epoch": 0.6287228314772308, + "epoch": 0.8681039204945614, "grad_norm": 0.0, - "learning_rate": 6.3996180658845035e-06, - "loss": 0.8589, + "learning_rate": 8.986378542488483e-07, + "loss": 1.0268, "step": 22187 }, { - "epoch": 0.6287511689194933, + "epoch": 0.8681430471867908, "grad_norm": 0.0, - "learning_rate": 6.398761840967891e-06, - "loss": 0.7152, + "learning_rate": 8.981129018842893e-07, + "loss": 0.9198, "step": 22188 }, { - "epoch": 0.6287795063617558, + "epoch": 0.8681821738790203, "grad_norm": 0.0, - "learning_rate": 6.397905646385844e-06, - "loss": 0.8708, + "learning_rate": 8.97588095684635e-07, + "loss": 0.9393, "step": 22189 }, { - "epoch": 0.6288078438040182, + "epoch": 0.8682213005712497, "grad_norm": 0.0, - "learning_rate": 6.397049482145578e-06, - "loss": 0.8517, + "learning_rate": 8.970634356583064e-07, + "loss": 0.9863, "step": 22190 }, { - "epoch": 0.6288361812462807, + "epoch": 0.8682604272634792, "grad_norm": 0.0, - "learning_rate": 6.3961933482543e-06, - "loss": 0.9457, + "learning_rate": 8.965389218137333e-07, + "loss": 1.087, "step": 22191 }, { - "epoch": 0.6288645186885432, + "epoch": 0.8682995539557086, "grad_norm": 0.0, - "learning_rate": 6.395337244719229e-06, - "loss": 0.9185, + "learning_rate": 8.96014554159339e-07, + "loss": 0.9799, "step": 22192 }, { - "epoch": 0.6288928561308056, + "epoch": 0.8683386806479381, "grad_norm": 0.0, - "learning_rate": 6.394481171547566e-06, - "loss": 0.855, + "learning_rate": 8.954903327035414e-07, + "loss": 0.8736, "step": 22193 }, { - "epoch": 0.6289211935730681, + "epoch": 0.8683778073401675, "grad_norm": 0.0, - "learning_rate": 6.393625128746527e-06, - "loss": 0.895, + "learning_rate": 8.949662574547613e-07, + "loss": 0.9642, "step": 22194 }, { - "epoch": 0.6289495310153306, + "epoch": 0.868416934032397, "grad_norm": 0.0, - "learning_rate": 6.392769116323325e-06, - "loss": 0.8512, + "learning_rate": 8.944423284214143e-07, + "loss": 0.8761, "step": 22195 }, { - "epoch": 0.6289778684575931, + "epoch": 0.8684560607246263, "grad_norm": 0.0, - "learning_rate": 6.391913134285166e-06, - "loss": 0.9281, + "learning_rate": 8.939185456119126e-07, + "loss": 1.0389, "step": 22196 }, { - "epoch": 0.6290062058998555, + "epoch": 0.8684951874168558, "grad_norm": 0.0, - "learning_rate": 6.391057182639262e-06, - "loss": 0.8748, + "learning_rate": 8.933949090346683e-07, + "loss": 0.9186, "step": 22197 }, { - "epoch": 0.629034543342118, + "epoch": 0.8685343141090852, "grad_norm": 0.0, - "learning_rate": 6.390201261392828e-06, - "loss": 0.94, + "learning_rate": 8.928714186980913e-07, + "loss": 0.9985, "step": 22198 }, { - "epoch": 0.6290628807843804, + "epoch": 0.8685734408013147, "grad_norm": 0.0, - "learning_rate": 6.389345370553065e-06, - "loss": 0.9703, + "learning_rate": 8.923480746105884e-07, + "loss": 1.0219, "step": 22199 }, { - "epoch": 0.6290912182266428, + "epoch": 0.8686125674935441, "grad_norm": 0.0, - "learning_rate": 6.3884895101271915e-06, - "loss": 0.7815, + "learning_rate": 8.918248767805615e-07, + "loss": 0.9875, "step": 22200 }, { - "epoch": 0.6291195556689053, + "epoch": 0.8686516941857735, "grad_norm": 0.0, - "learning_rate": 6.387633680122407e-06, - "loss": 0.7595, + "learning_rate": 8.91301825216413e-07, + "loss": 1.0052, "step": 22201 }, { - "epoch": 0.6291478931111678, + "epoch": 0.868690820878003, "grad_norm": 0.0, - "learning_rate": 6.386777880545924e-06, - "loss": 0.8605, + "learning_rate": 8.907789199265449e-07, + "loss": 0.8929, "step": 22202 }, { - "epoch": 0.6291762305534302, + "epoch": 0.8687299475702324, "grad_norm": 0.0, - "learning_rate": 6.385922111404958e-06, - "loss": 0.9363, + "learning_rate": 8.902561609193539e-07, + "loss": 0.9529, "step": 22203 }, { - "epoch": 0.6292045679956927, + "epoch": 0.8687690742624619, "grad_norm": 0.0, - "learning_rate": 6.385066372706707e-06, - "loss": 0.8321, + "learning_rate": 8.89733548203231e-07, + "loss": 0.9565, "step": 22204 }, { - "epoch": 0.6292329054379552, + "epoch": 0.8688082009546912, "grad_norm": 0.0, - "learning_rate": 6.3842106644583875e-06, - "loss": 0.882, + "learning_rate": 8.892110817865751e-07, + "loss": 0.91, "step": 22205 }, { - "epoch": 0.6292612428802177, + "epoch": 0.8688473276469207, "grad_norm": 0.0, - "learning_rate": 6.383354986667206e-06, - "loss": 0.7922, + "learning_rate": 8.886887616777673e-07, + "loss": 0.9837, "step": 22206 }, { - "epoch": 0.6292895803224801, + "epoch": 0.8688864543391501, "grad_norm": 0.0, - "learning_rate": 6.382499339340365e-06, - "loss": 0.8642, + "learning_rate": 8.881665878852063e-07, + "loss": 1.0266, "step": 22207 }, { - "epoch": 0.6293179177647426, + "epoch": 0.8689255810313796, "grad_norm": 0.0, - "learning_rate": 6.381643722485078e-06, - "loss": 0.8347, + "learning_rate": 8.87644560417269e-07, + "loss": 0.9656, "step": 22208 }, { - "epoch": 0.629346255207005, + "epoch": 0.868964707723609, "grad_norm": 0.0, - "learning_rate": 6.3807881361085465e-06, - "loss": 0.7362, + "learning_rate": 8.87122679282344e-07, + "loss": 0.9293, "step": 22209 }, { - "epoch": 0.6293745926492674, + "epoch": 0.8690038344158385, "grad_norm": 0.0, - "learning_rate": 6.379932580217981e-06, - "loss": 0.9014, + "learning_rate": 8.866009444888057e-07, + "loss": 0.8358, "step": 22210 }, { - "epoch": 0.6294029300915299, + "epoch": 0.8690429611080679, "grad_norm": 0.0, - "learning_rate": 6.379077054820592e-06, - "loss": 0.8125, + "learning_rate": 8.860793560450409e-07, + "loss": 0.9417, "step": 22211 }, { - "epoch": 0.6294312675337924, + "epoch": 0.8690820878002974, "grad_norm": 0.0, - "learning_rate": 6.378221559923576e-06, - "loss": 0.867, + "learning_rate": 8.855579139594184e-07, + "loss": 1.0053, "step": 22212 }, { - "epoch": 0.6294596049760549, + "epoch": 0.8691212144925268, "grad_norm": 0.0, - "learning_rate": 6.377366095534152e-06, - "loss": 0.7677, + "learning_rate": 8.85036618240318e-07, + "loss": 1.1019, "step": 22213 }, { - "epoch": 0.6294879424183173, + "epoch": 0.8691603411847563, "grad_norm": 0.0, - "learning_rate": 6.376510661659512e-06, - "loss": 0.8319, + "learning_rate": 8.845154688961044e-07, + "loss": 0.9489, "step": 22214 }, { - "epoch": 0.6295162798605798, + "epoch": 0.8691994678769857, "grad_norm": 0.0, - "learning_rate": 6.375655258306869e-06, - "loss": 0.7818, + "learning_rate": 8.839944659351507e-07, + "loss": 1.0828, "step": 22215 }, { - "epoch": 0.6295446173028423, + "epoch": 0.8692385945692152, "grad_norm": 0.0, - "learning_rate": 6.3747998854834295e-06, - "loss": 0.7577, + "learning_rate": 8.834736093658237e-07, + "loss": 0.9106, "step": 22216 }, { - "epoch": 0.6295729547451047, + "epoch": 0.8692777212614445, "grad_norm": 0.0, - "learning_rate": 6.373944543196394e-06, - "loss": 0.9365, + "learning_rate": 8.829528991964875e-07, + "loss": 0.9889, "step": 22217 }, { - "epoch": 0.6296012921873672, + "epoch": 0.869316847953674, "grad_norm": 0.0, - "learning_rate": 6.373089231452972e-06, - "loss": 0.8455, + "learning_rate": 8.824323354355024e-07, + "loss": 0.9498, "step": 22218 }, { - "epoch": 0.6296296296296297, + "epoch": 0.8693559746459034, "grad_norm": 0.0, - "learning_rate": 6.372233950260368e-06, - "loss": 0.92, + "learning_rate": 8.819119180912283e-07, + "loss": 1.0519, "step": 22219 }, { - "epoch": 0.6296579670718921, + "epoch": 0.8693951013381329, "grad_norm": 0.0, - "learning_rate": 6.371378699625781e-06, - "loss": 0.8647, + "learning_rate": 8.81391647172024e-07, + "loss": 1.0462, "step": 22220 }, { - "epoch": 0.6296863045141545, + "epoch": 0.8694342280303623, "grad_norm": 0.0, - "learning_rate": 6.370523479556422e-06, - "loss": 0.8479, + "learning_rate": 8.808715226862452e-07, + "loss": 1.0011, "step": 22221 }, { - "epoch": 0.629714641956417, + "epoch": 0.8694733547225918, "grad_norm": 0.0, - "learning_rate": 6.369668290059489e-06, - "loss": 0.8346, + "learning_rate": 8.803515446422406e-07, + "loss": 0.9581, "step": 22222 }, { - "epoch": 0.6297429793986795, + "epoch": 0.8695124814148212, "grad_norm": 0.0, - "learning_rate": 6.368813131142187e-06, - "loss": 0.8788, + "learning_rate": 8.798317130483647e-07, + "loss": 0.9521, "step": 22223 }, { - "epoch": 0.6297713168409419, + "epoch": 0.8695516081070507, "grad_norm": 0.0, - "learning_rate": 6.367958002811726e-06, - "loss": 0.8802, + "learning_rate": 8.793120279129596e-07, + "loss": 0.8593, "step": 22224 }, { - "epoch": 0.6297996542832044, + "epoch": 0.8695907347992801, "grad_norm": 0.0, - "learning_rate": 6.367102905075299e-06, - "loss": 1.0052, + "learning_rate": 8.787924892443789e-07, + "loss": 1.0127, "step": 22225 }, { - "epoch": 0.6298279917254669, + "epoch": 0.8696298614915096, "grad_norm": 0.0, - "learning_rate": 6.3662478379401125e-06, - "loss": 0.8415, + "learning_rate": 8.782730970509589e-07, + "loss": 0.9729, "step": 22226 }, { - "epoch": 0.6298563291677293, + "epoch": 0.869668988183739, "grad_norm": 0.0, - "learning_rate": 6.365392801413375e-06, - "loss": 0.8046, + "learning_rate": 8.777538513410445e-07, + "loss": 0.8212, "step": 22227 }, { - "epoch": 0.6298846666099918, + "epoch": 0.8697081148759684, "grad_norm": 0.0, - "learning_rate": 6.364537795502278e-06, - "loss": 0.8148, + "learning_rate": 8.772347521229696e-07, + "loss": 0.9355, "step": 22228 }, { - "epoch": 0.6299130040522543, + "epoch": 0.8697472415681978, "grad_norm": 0.0, - "learning_rate": 6.363682820214032e-06, - "loss": 0.8291, + "learning_rate": 8.767157994050746e-07, + "loss": 0.8959, "step": 22229 }, { - "epoch": 0.6299413414945167, + "epoch": 0.8697863682604272, "grad_norm": 0.0, - "learning_rate": 6.362827875555835e-06, - "loss": 0.8428, + "learning_rate": 8.761969931956915e-07, + "loss": 0.8654, "step": 22230 }, { - "epoch": 0.6299696789367791, + "epoch": 0.8698254949526567, "grad_norm": 0.0, - "learning_rate": 6.361972961534888e-06, - "loss": 0.7999, + "learning_rate": 8.756783335031538e-07, + "loss": 0.8414, "step": 22231 }, { - "epoch": 0.6299980163790416, + "epoch": 0.8698646216448861, "grad_norm": 0.0, - "learning_rate": 6.361118078158398e-06, - "loss": 0.7719, + "learning_rate": 8.751598203357858e-07, + "loss": 0.885, "step": 22232 }, { - "epoch": 0.6300263538213041, + "epoch": 0.8699037483371156, "grad_norm": 0.0, - "learning_rate": 6.360263225433559e-06, - "loss": 0.84, + "learning_rate": 8.746414537019177e-07, + "loss": 0.958, "step": 22233 }, { - "epoch": 0.6300546912635665, + "epoch": 0.869942875029345, "grad_norm": 0.0, - "learning_rate": 6.359408403367572e-06, - "loss": 0.8921, + "learning_rate": 8.741232336098749e-07, + "loss": 0.9976, "step": 22234 }, { - "epoch": 0.630083028705829, + "epoch": 0.8699820017215745, "grad_norm": 0.0, - "learning_rate": 6.358553611967644e-06, - "loss": 0.9025, + "learning_rate": 8.736051600679763e-07, + "loss": 1.1742, "step": 22235 }, { - "epoch": 0.6301113661480915, + "epoch": 0.8700211284138039, "grad_norm": 0.0, - "learning_rate": 6.357698851240968e-06, - "loss": 0.836, + "learning_rate": 8.73087233084542e-07, + "loss": 1.0298, "step": 22236 }, { - "epoch": 0.630139703590354, + "epoch": 0.8700602551060334, "grad_norm": 0.0, - "learning_rate": 6.356844121194751e-06, - "loss": 0.8401, + "learning_rate": 8.725694526678907e-07, + "loss": 0.9997, "step": 22237 }, { - "epoch": 0.6301680410326164, + "epoch": 0.8700993817982627, "grad_norm": 0.0, - "learning_rate": 6.355989421836185e-06, - "loss": 0.8639, + "learning_rate": 8.720518188263382e-07, + "loss": 1.0021, "step": 22238 }, { - "epoch": 0.6301963784748789, + "epoch": 0.8701385084904922, "grad_norm": 0.0, - "learning_rate": 6.355134753172474e-06, - "loss": 0.8057, + "learning_rate": 8.715343315681945e-07, + "loss": 0.8769, "step": 22239 }, { - "epoch": 0.6302247159171414, + "epoch": 0.8701776351827216, "grad_norm": 0.0, - "learning_rate": 6.35428011521082e-06, - "loss": 0.9663, + "learning_rate": 8.710169909017718e-07, + "loss": 0.9408, "step": 22240 }, { - "epoch": 0.6302530533594037, + "epoch": 0.8702167618749511, "grad_norm": 0.0, - "learning_rate": 6.353425507958414e-06, - "loss": 0.8499, + "learning_rate": 8.704997968353746e-07, + "loss": 0.9411, "step": 22241 }, { - "epoch": 0.6302813908016662, + "epoch": 0.8702558885671805, "grad_norm": 0.0, - "learning_rate": 6.3525709314224616e-06, - "loss": 0.9131, + "learning_rate": 8.699827493773116e-07, + "loss": 0.8741, "step": 22242 }, { - "epoch": 0.6303097282439287, + "epoch": 0.87029501525941, "grad_norm": 0.0, - "learning_rate": 6.351716385610156e-06, - "loss": 0.8854, + "learning_rate": 8.694658485358853e-07, + "loss": 0.8603, "step": 22243 }, { - "epoch": 0.6303380656861912, + "epoch": 0.8703341419516394, "grad_norm": 0.0, - "learning_rate": 6.3508618705287e-06, - "loss": 0.9048, + "learning_rate": 8.689490943193979e-07, + "loss": 0.9026, "step": 22244 }, { - "epoch": 0.6303664031284536, + "epoch": 0.8703732686438689, "grad_norm": 0.0, - "learning_rate": 6.350007386185291e-06, - "loss": 0.9817, + "learning_rate": 8.684324867361438e-07, + "loss": 0.8864, "step": 22245 }, { - "epoch": 0.6303947405707161, + "epoch": 0.8704123953360983, "grad_norm": 0.0, - "learning_rate": 6.349152932587122e-06, - "loss": 0.8416, + "learning_rate": 8.679160257944219e-07, + "loss": 0.8762, "step": 22246 }, { - "epoch": 0.6304230780129786, + "epoch": 0.8704515220283278, "grad_norm": 0.0, - "learning_rate": 6.348298509741394e-06, - "loss": 0.7815, + "learning_rate": 8.673997115025257e-07, + "loss": 0.9593, "step": 22247 }, { - "epoch": 0.630451415455241, + "epoch": 0.8704906487205571, "grad_norm": 0.0, - "learning_rate": 6.347444117655306e-06, - "loss": 0.8347, + "learning_rate": 8.668835438687484e-07, + "loss": 0.8774, "step": 22248 }, { - "epoch": 0.6304797528975035, + "epoch": 0.8705297754127866, "grad_norm": 0.0, - "learning_rate": 6.34658975633605e-06, - "loss": 0.8439, + "learning_rate": 8.663675229013746e-07, + "loss": 0.8954, "step": 22249 }, { - "epoch": 0.630508090339766, + "epoch": 0.870568902105016, "grad_norm": 0.0, - "learning_rate": 6.345735425790829e-06, - "loss": 0.867, + "learning_rate": 8.658516486086943e-07, + "loss": 0.9866, "step": 22250 }, { - "epoch": 0.6305364277820283, + "epoch": 0.8706080287972455, "grad_norm": 0.0, - "learning_rate": 6.344881126026832e-06, - "loss": 0.84, + "learning_rate": 8.653359209989887e-07, + "loss": 1.0682, "step": 22251 }, { - "epoch": 0.6305647652242908, + "epoch": 0.8706471554894749, "grad_norm": 0.0, - "learning_rate": 6.344026857051257e-06, - "loss": 0.8002, + "learning_rate": 8.648203400805444e-07, + "loss": 0.9969, "step": 22252 }, { - "epoch": 0.6305931026665533, + "epoch": 0.8706862821817044, "grad_norm": 0.0, - "learning_rate": 6.343172618871307e-06, - "loss": 0.8192, + "learning_rate": 8.643049058616371e-07, + "loss": 0.8743, "step": 22253 }, { - "epoch": 0.6306214401088158, + "epoch": 0.8707254088739338, "grad_norm": 0.0, - "learning_rate": 6.3423184114941686e-06, - "loss": 0.8781, + "learning_rate": 8.637896183505467e-07, + "loss": 0.9736, "step": 22254 }, { - "epoch": 0.6306497775510782, + "epoch": 0.8707645355661633, "grad_norm": 0.0, - "learning_rate": 6.341464234927039e-06, - "loss": 0.8406, + "learning_rate": 8.632744775555435e-07, + "loss": 1.0621, "step": 22255 }, { - "epoch": 0.6306781149933407, + "epoch": 0.8708036622583927, "grad_norm": 0.0, - "learning_rate": 6.3406100891771175e-06, - "loss": 0.798, + "learning_rate": 8.627594834849073e-07, + "loss": 1.0595, "step": 22256 }, { - "epoch": 0.6307064524356032, + "epoch": 0.8708427889506221, "grad_norm": 0.0, - "learning_rate": 6.339755974251594e-06, - "loss": 0.8644, + "learning_rate": 8.622446361469017e-07, + "loss": 1.0558, "step": 22257 }, { - "epoch": 0.6307347898778656, + "epoch": 0.8708819156428516, "grad_norm": 0.0, - "learning_rate": 6.3389018901576695e-06, - "loss": 0.7942, + "learning_rate": 8.617299355497988e-07, + "loss": 0.9772, "step": 22258 }, { - "epoch": 0.6307631273201281, + "epoch": 0.8709210423350809, "grad_norm": 0.0, - "learning_rate": 6.338047836902528e-06, - "loss": 0.7166, + "learning_rate": 8.612153817018598e-07, + "loss": 0.8757, "step": 22259 }, { - "epoch": 0.6307914647623906, + "epoch": 0.8709601690273104, "grad_norm": 0.0, - "learning_rate": 6.337193814493371e-06, - "loss": 0.8883, + "learning_rate": 8.607009746113526e-07, + "loss": 0.9692, "step": 22260 }, { - "epoch": 0.6308198022046531, + "epoch": 0.8709992957195398, "grad_norm": 0.0, - "learning_rate": 6.3363398229373915e-06, - "loss": 0.8247, + "learning_rate": 8.601867142865339e-07, + "loss": 0.8951, "step": 22261 }, { - "epoch": 0.6308481396469154, + "epoch": 0.8710384224117693, "grad_norm": 0.0, - "learning_rate": 6.335485862241782e-06, - "loss": 0.833, + "learning_rate": 8.596726007356659e-07, + "loss": 0.9921, "step": 22262 }, { - "epoch": 0.6308764770891779, + "epoch": 0.8710775491039987, "grad_norm": 0.0, - "learning_rate": 6.33463193241374e-06, - "loss": 0.9013, + "learning_rate": 8.59158633967e-07, + "loss": 0.8509, "step": 22263 }, { - "epoch": 0.6309048145314404, + "epoch": 0.8711166757962282, "grad_norm": 0.0, - "learning_rate": 6.3337780334604485e-06, - "loss": 0.8349, + "learning_rate": 8.586448139887927e-07, + "loss": 0.9026, "step": 22264 }, { - "epoch": 0.6309331519737028, + "epoch": 0.8711558024884576, "grad_norm": 0.0, - "learning_rate": 6.332924165389106e-06, - "loss": 0.7822, + "learning_rate": 8.581311408092952e-07, + "loss": 0.9919, "step": 22265 }, { - "epoch": 0.6309614894159653, + "epoch": 0.8711949291806871, "grad_norm": 0.0, - "learning_rate": 6.33207032820691e-06, - "loss": 0.8139, + "learning_rate": 8.576176144367576e-07, + "loss": 0.9106, "step": 22266 }, { - "epoch": 0.6309898268582278, + "epoch": 0.8712340558729165, "grad_norm": 0.0, - "learning_rate": 6.331216521921044e-06, - "loss": 0.8678, + "learning_rate": 8.571042348794234e-07, + "loss": 1.0081, "step": 22267 }, { - "epoch": 0.6310181643004903, + "epoch": 0.871273182565146, "grad_norm": 0.0, - "learning_rate": 6.330362746538703e-06, - "loss": 0.862, + "learning_rate": 8.565910021455393e-07, + "loss": 0.907, "step": 22268 }, { - "epoch": 0.6310465017427527, + "epoch": 0.8713123092573754, "grad_norm": 0.0, - "learning_rate": 6.32950900206708e-06, - "loss": 0.8617, + "learning_rate": 8.560779162433452e-07, + "loss": 0.9922, "step": 22269 }, { - "epoch": 0.6310748391850152, + "epoch": 0.8713514359496048, "grad_norm": 0.0, - "learning_rate": 6.328655288513366e-06, - "loss": 0.8307, + "learning_rate": 8.555649771810837e-07, + "loss": 0.9833, "step": 22270 }, { - "epoch": 0.6311031766272777, + "epoch": 0.8713905626418342, "grad_norm": 0.0, - "learning_rate": 6.327801605884754e-06, - "loss": 0.9498, + "learning_rate": 8.550521849669891e-07, + "loss": 0.9883, "step": 22271 }, { - "epoch": 0.63113151406954, + "epoch": 0.8714296893340637, "grad_norm": 0.0, - "learning_rate": 6.32694795418843e-06, - "loss": 0.9684, + "learning_rate": 8.545395396092981e-07, + "loss": 1.0444, "step": 22272 }, { - "epoch": 0.6311598515118025, + "epoch": 0.8714688160262931, "grad_norm": 0.0, - "learning_rate": 6.326094333431587e-06, - "loss": 0.7509, + "learning_rate": 8.54027041116241e-07, + "loss": 0.9682, "step": 22273 }, { - "epoch": 0.631188188954065, + "epoch": 0.8715079427185226, "grad_norm": 0.0, - "learning_rate": 6.3252407436214165e-06, - "loss": 0.7736, + "learning_rate": 8.535146894960488e-07, + "loss": 0.8552, "step": 22274 }, { - "epoch": 0.6312165263963274, + "epoch": 0.871547069410752, "grad_norm": 0.0, - "learning_rate": 6.324387184765108e-06, - "loss": 0.8365, + "learning_rate": 8.530024847569496e-07, + "loss": 0.9568, "step": 22275 }, { - "epoch": 0.6312448638385899, + "epoch": 0.8715861961029815, "grad_norm": 0.0, - "learning_rate": 6.323533656869849e-06, - "loss": 0.772, + "learning_rate": 8.524904269071698e-07, + "loss": 0.9671, "step": 22276 }, { - "epoch": 0.6312732012808524, + "epoch": 0.8716253227952109, "grad_norm": 0.0, - "learning_rate": 6.322680159942838e-06, - "loss": 0.7411, + "learning_rate": 8.519785159549299e-07, + "loss": 0.9531, "step": 22277 }, { - "epoch": 0.6313015387231149, + "epoch": 0.8716644494874404, "grad_norm": 0.0, - "learning_rate": 6.321826693991251e-06, - "loss": 0.8665, + "learning_rate": 8.514667519084518e-07, + "loss": 1.0434, "step": 22278 }, { - "epoch": 0.6313298761653773, + "epoch": 0.8717035761796698, "grad_norm": 0.0, - "learning_rate": 6.320973259022286e-06, - "loss": 0.8331, + "learning_rate": 8.509551347759559e-07, + "loss": 0.8155, "step": 22279 }, { - "epoch": 0.6313582136076398, + "epoch": 0.8717427028718993, "grad_norm": 0.0, - "learning_rate": 6.32011985504313e-06, - "loss": 0.8866, + "learning_rate": 8.504436645656545e-07, + "loss": 0.7421, "step": 22280 }, { - "epoch": 0.6313865510499023, + "epoch": 0.8717818295641286, "grad_norm": 0.0, - "learning_rate": 6.3192664820609685e-06, - "loss": 0.8569, + "learning_rate": 8.49932341285763e-07, + "loss": 0.8067, "step": 22281 }, { - "epoch": 0.6314148884921646, + "epoch": 0.8718209562563581, "grad_norm": 0.0, - "learning_rate": 6.318413140082995e-06, - "loss": 0.9138, + "learning_rate": 8.494211649444917e-07, + "loss": 1.1249, "step": 22282 }, { - "epoch": 0.6314432259344271, + "epoch": 0.8718600829485875, "grad_norm": 0.0, - "learning_rate": 6.317559829116392e-06, - "loss": 0.8637, + "learning_rate": 8.489101355500529e-07, + "loss": 0.9594, "step": 22283 }, { - "epoch": 0.6314715633766896, + "epoch": 0.871899209640817, "grad_norm": 0.0, - "learning_rate": 6.31670654916835e-06, - "loss": 0.844, + "learning_rate": 8.483992531106477e-07, + "loss": 0.823, "step": 22284 }, { - "epoch": 0.6314999008189521, + "epoch": 0.8719383363330464, "grad_norm": 0.0, - "learning_rate": 6.315853300246061e-06, - "loss": 0.7311, + "learning_rate": 8.478885176344853e-07, + "loss": 1.0241, "step": 22285 }, { - "epoch": 0.6315282382612145, + "epoch": 0.8719774630252758, "grad_norm": 0.0, - "learning_rate": 6.315000082356704e-06, - "loss": 0.8584, + "learning_rate": 8.473779291297612e-07, + "loss": 1.0345, "step": 22286 }, { - "epoch": 0.631556575703477, + "epoch": 0.8720165897175053, "grad_norm": 0.0, - "learning_rate": 6.31414689550747e-06, - "loss": 0.8708, + "learning_rate": 8.468674876046823e-07, + "loss": 0.9122, "step": 22287 }, { - "epoch": 0.6315849131457395, + "epoch": 0.8720557164097347, "grad_norm": 0.0, - "learning_rate": 6.313293739705545e-06, - "loss": 0.8027, + "learning_rate": 8.463571930674397e-07, + "loss": 0.9725, "step": 22288 }, { - "epoch": 0.6316132505880019, + "epoch": 0.8720948431019642, "grad_norm": 0.0, - "learning_rate": 6.3124406149581154e-06, - "loss": 0.8668, + "learning_rate": 8.458470455262335e-07, + "loss": 0.9373, "step": 22289 }, { - "epoch": 0.6316415880302644, + "epoch": 0.8721339697941936, "grad_norm": 0.0, - "learning_rate": 6.311587521272372e-06, - "loss": 0.871, + "learning_rate": 8.453370449892506e-07, + "loss": 0.9876, "step": 22290 }, { - "epoch": 0.6316699254725269, + "epoch": 0.872173096486423, "grad_norm": 0.0, - "learning_rate": 6.3107344586554915e-06, - "loss": 0.8318, + "learning_rate": 8.448271914646822e-07, + "loss": 1.1749, "step": 22291 }, { - "epoch": 0.6316982629147894, + "epoch": 0.8722122231786524, "grad_norm": 0.0, - "learning_rate": 6.309881427114668e-06, - "loss": 0.8255, + "learning_rate": 8.443174849607183e-07, + "loss": 0.9806, "step": 22292 }, { - "epoch": 0.6317266003570517, + "epoch": 0.8722513498708819, "grad_norm": 0.0, - "learning_rate": 6.309028426657082e-06, - "loss": 0.7355, + "learning_rate": 8.438079254855447e-07, + "loss": 1.0372, "step": 22293 }, { - "epoch": 0.6317549377993142, + "epoch": 0.8722904765631113, "grad_norm": 0.0, - "learning_rate": 6.308175457289918e-06, - "loss": 0.8301, + "learning_rate": 8.432985130473403e-07, + "loss": 0.9332, "step": 22294 }, { - "epoch": 0.6317832752415767, + "epoch": 0.8723296032553408, "grad_norm": 0.0, - "learning_rate": 6.307322519020369e-06, - "loss": 0.7811, + "learning_rate": 8.427892476542876e-07, + "loss": 0.9482, "step": 22295 }, { - "epoch": 0.6318116126838391, + "epoch": 0.8723687299475702, "grad_norm": 0.0, - "learning_rate": 6.306469611855609e-06, - "loss": 0.811, + "learning_rate": 8.422801293145655e-07, + "loss": 0.9288, "step": 22296 }, { - "epoch": 0.6318399501261016, + "epoch": 0.8724078566397997, "grad_norm": 0.0, - "learning_rate": 6.305616735802827e-06, - "loss": 0.9092, + "learning_rate": 8.417711580363508e-07, + "loss": 1.0655, "step": 22297 }, { - "epoch": 0.6318682875683641, + "epoch": 0.8724469833320291, "grad_norm": 0.0, - "learning_rate": 6.304763890869211e-06, - "loss": 0.853, + "learning_rate": 8.412623338278125e-07, + "loss": 1.013, "step": 22298 }, { - "epoch": 0.6318966250106265, + "epoch": 0.8724861100242586, "grad_norm": 0.0, - "learning_rate": 6.303911077061937e-06, - "loss": 0.8306, + "learning_rate": 8.407536566971275e-07, + "loss": 0.9258, "step": 22299 }, { - "epoch": 0.631924962452889, + "epoch": 0.872525236716488, "grad_norm": 0.0, - "learning_rate": 6.303058294388195e-06, - "loss": 0.7934, + "learning_rate": 8.402451266524581e-07, + "loss": 0.961, "step": 22300 }, { - "epoch": 0.6319532998951515, + "epoch": 0.8725643634087175, "grad_norm": 0.0, - "learning_rate": 6.302205542855163e-06, - "loss": 0.8802, + "learning_rate": 8.397367437019777e-07, + "loss": 0.9919, "step": 22301 }, { - "epoch": 0.631981637337414, + "epoch": 0.8726034901009468, "grad_norm": 0.0, - "learning_rate": 6.301352822470027e-06, - "loss": 0.7993, + "learning_rate": 8.392285078538453e-07, + "loss": 0.9692, "step": 22302 }, { - "epoch": 0.6320099747796764, + "epoch": 0.8726426167931763, "grad_norm": 0.0, - "learning_rate": 6.300500133239974e-06, - "loss": 0.8666, + "learning_rate": 8.387204191162246e-07, + "loss": 0.9336, "step": 22303 }, { - "epoch": 0.6320383122219388, + "epoch": 0.8726817434854057, "grad_norm": 0.0, - "learning_rate": 6.299647475172178e-06, - "loss": 0.8737, + "learning_rate": 8.38212477497271e-07, + "loss": 0.8862, "step": 22304 }, { - "epoch": 0.6320666496642013, + "epoch": 0.8727208701776352, "grad_norm": 0.0, - "learning_rate": 6.298794848273826e-06, - "loss": 0.7951, + "learning_rate": 8.377046830051494e-07, + "loss": 0.9612, "step": 22305 }, { - "epoch": 0.6320949871064637, + "epoch": 0.8727599968698646, "grad_norm": 0.0, - "learning_rate": 6.297942252552101e-06, - "loss": 0.7897, + "learning_rate": 8.371970356480064e-07, + "loss": 1.0095, "step": 22306 }, { - "epoch": 0.6321233245487262, + "epoch": 0.8727991235620941, "grad_norm": 0.0, - "learning_rate": 6.2970896880141794e-06, - "loss": 0.9289, + "learning_rate": 8.366895354339999e-07, + "loss": 0.9787, "step": 22307 }, { - "epoch": 0.6321516619909887, + "epoch": 0.8728382502543235, "grad_norm": 0.0, - "learning_rate": 6.296237154667253e-06, - "loss": 0.7888, + "learning_rate": 8.361821823712757e-07, + "loss": 1.0415, "step": 22308 }, { - "epoch": 0.6321799994332512, + "epoch": 0.872877376946553, "grad_norm": 0.0, - "learning_rate": 6.295384652518491e-06, - "loss": 0.8324, + "learning_rate": 8.356749764679816e-07, + "loss": 0.9037, "step": 22309 }, { - "epoch": 0.6322083368755136, + "epoch": 0.8729165036387824, "grad_norm": 0.0, - "learning_rate": 6.29453218157508e-06, - "loss": 0.9084, + "learning_rate": 8.351679177322647e-07, + "loss": 1.1341, "step": 22310 }, { - "epoch": 0.6322366743177761, + "epoch": 0.8729556303310119, "grad_norm": 0.0, - "learning_rate": 6.293679741844205e-06, - "loss": 0.7559, + "learning_rate": 8.346610061722682e-07, + "loss": 0.8856, "step": 22311 }, { - "epoch": 0.6322650117600386, + "epoch": 0.8729947570232413, "grad_norm": 0.0, - "learning_rate": 6.292827333333037e-06, - "loss": 0.9102, + "learning_rate": 8.341542417961301e-07, + "loss": 0.8675, "step": 22312 }, { - "epoch": 0.632293349202301, + "epoch": 0.8730338837154707, "grad_norm": 0.0, - "learning_rate": 6.2919749560487606e-06, - "loss": 0.871, + "learning_rate": 8.336476246119884e-07, + "loss": 0.9735, "step": 22313 }, { - "epoch": 0.6323216866445635, + "epoch": 0.8730730104077001, "grad_norm": 0.0, - "learning_rate": 6.291122609998559e-06, - "loss": 0.7963, + "learning_rate": 8.3314115462798e-07, + "loss": 1.098, "step": 22314 }, { - "epoch": 0.632350024086826, + "epoch": 0.8731121370999295, "grad_norm": 0.0, - "learning_rate": 6.290270295189607e-06, - "loss": 0.8168, + "learning_rate": 8.326348318522404e-07, + "loss": 1.0031, "step": 22315 }, { - "epoch": 0.6323783615290884, + "epoch": 0.873151263792159, "grad_norm": 0.0, - "learning_rate": 6.289418011629089e-06, - "loss": 0.8248, + "learning_rate": 8.321286562928954e-07, + "loss": 0.8869, "step": 22316 }, { - "epoch": 0.6324066989713508, + "epoch": 0.8731903904843884, "grad_norm": 0.0, - "learning_rate": 6.288565759324179e-06, - "loss": 0.9098, + "learning_rate": 8.316226279580775e-07, + "loss": 1.0378, "step": 22317 }, { - "epoch": 0.6324350364136133, + "epoch": 0.8732295171766179, "grad_norm": 0.0, - "learning_rate": 6.287713538282057e-06, - "loss": 0.8451, + "learning_rate": 8.311167468559122e-07, + "loss": 0.9711, "step": 22318 }, { - "epoch": 0.6324633738558758, + "epoch": 0.8732686438688473, "grad_norm": 0.0, - "learning_rate": 6.286861348509903e-06, - "loss": 0.9047, + "learning_rate": 8.30611012994521e-07, + "loss": 0.9857, "step": 22319 }, { - "epoch": 0.6324917112981382, + "epoch": 0.8733077705610768, "grad_norm": 0.0, - "learning_rate": 6.2860091900148935e-06, - "loss": 0.8292, + "learning_rate": 8.301054263820274e-07, + "loss": 0.9532, "step": 22320 }, { - "epoch": 0.6325200487404007, + "epoch": 0.8733468972533062, "grad_norm": 0.0, - "learning_rate": 6.285157062804213e-06, - "loss": 0.8363, + "learning_rate": 8.295999870265514e-07, + "loss": 0.985, "step": 22321 }, { - "epoch": 0.6325483861826632, + "epoch": 0.8733860239455357, "grad_norm": 0.0, - "learning_rate": 6.284304966885027e-06, - "loss": 0.8997, + "learning_rate": 8.290946949362078e-07, + "loss": 0.9747, "step": 22322 }, { - "epoch": 0.6325767236249256, + "epoch": 0.873425150637765, "grad_norm": 0.0, - "learning_rate": 6.283452902264522e-06, - "loss": 0.8425, + "learning_rate": 8.28589550119111e-07, + "loss": 1.0844, "step": 22323 }, { - "epoch": 0.6326050610671881, + "epoch": 0.8734642773299945, "grad_norm": 0.0, - "learning_rate": 6.282600868949877e-06, - "loss": 0.8928, + "learning_rate": 8.280845525833747e-07, + "loss": 0.9318, "step": 22324 }, { - "epoch": 0.6326333985094506, + "epoch": 0.8735034040222239, "grad_norm": 0.0, - "learning_rate": 6.281748866948262e-06, - "loss": 0.9257, + "learning_rate": 8.275797023371058e-07, + "loss": 0.9062, "step": 22325 }, { - "epoch": 0.632661735951713, + "epoch": 0.8735425307144534, "grad_norm": 0.0, - "learning_rate": 6.280896896266857e-06, - "loss": 0.7826, + "learning_rate": 8.270749993884142e-07, + "loss": 0.9713, "step": 22326 }, { - "epoch": 0.6326900733939754, + "epoch": 0.8735816574066828, "grad_norm": 0.0, - "learning_rate": 6.28004495691284e-06, - "loss": 0.9311, + "learning_rate": 8.265704437454025e-07, + "loss": 0.9932, "step": 22327 }, { - "epoch": 0.6327184108362379, + "epoch": 0.8736207840989123, "grad_norm": 0.0, - "learning_rate": 6.279193048893384e-06, - "loss": 0.782, + "learning_rate": 8.260660354161776e-07, + "loss": 0.8292, "step": 22328 }, { - "epoch": 0.6327467482785004, + "epoch": 0.8736599107911417, "grad_norm": 0.0, - "learning_rate": 6.278341172215669e-06, - "loss": 0.8193, + "learning_rate": 8.25561774408834e-07, + "loss": 0.9619, "step": 22329 }, { - "epoch": 0.6327750857207628, + "epoch": 0.8736990374833712, "grad_norm": 0.0, - "learning_rate": 6.277489326886866e-06, - "loss": 0.876, + "learning_rate": 8.250576607314742e-07, + "loss": 0.93, "step": 22330 }, { - "epoch": 0.6328034231630253, + "epoch": 0.8737381641756006, "grad_norm": 0.0, - "learning_rate": 6.276637512914152e-06, - "loss": 0.894, + "learning_rate": 8.245536943921884e-07, + "loss": 0.994, "step": 22331 }, { - "epoch": 0.6328317606052878, + "epoch": 0.8737772908678301, "grad_norm": 0.0, - "learning_rate": 6.2757857303047055e-06, - "loss": 0.9033, + "learning_rate": 8.240498753990756e-07, + "loss": 0.9922, "step": 22332 }, { - "epoch": 0.6328600980475503, + "epoch": 0.8738164175600595, "grad_norm": 0.0, - "learning_rate": 6.274933979065696e-06, - "loss": 0.9079, + "learning_rate": 8.235462037602215e-07, + "loss": 0.9343, "step": 22333 }, { - "epoch": 0.6328884354898127, + "epoch": 0.873855544252289, "grad_norm": 0.0, - "learning_rate": 6.274082259204302e-06, - "loss": 0.7741, + "learning_rate": 8.230426794837187e-07, + "loss": 1.0423, "step": 22334 }, { - "epoch": 0.6329167729320752, + "epoch": 0.8738946709445183, "grad_norm": 0.0, - "learning_rate": 6.2732305707277e-06, - "loss": 0.7737, + "learning_rate": 8.225393025776484e-07, + "loss": 1.0516, "step": 22335 }, { - "epoch": 0.6329451103743376, + "epoch": 0.8739337976367478, "grad_norm": 0.0, - "learning_rate": 6.272378913643057e-06, - "loss": 0.9423, + "learning_rate": 8.220360730500998e-07, + "loss": 1.0626, "step": 22336 }, { - "epoch": 0.6329734478166, + "epoch": 0.8739729243289772, "grad_norm": 0.0, - "learning_rate": 6.271527287957555e-06, - "loss": 0.887, + "learning_rate": 8.215329909091496e-07, + "loss": 0.8868, "step": 22337 }, { - "epoch": 0.6330017852588625, + "epoch": 0.8740120510212067, "grad_norm": 0.0, - "learning_rate": 6.270675693678359e-06, - "loss": 0.8067, + "learning_rate": 8.210300561628803e-07, + "loss": 0.8873, "step": 22338 }, { - "epoch": 0.633030122701125, + "epoch": 0.8740511777134361, "grad_norm": 0.0, - "learning_rate": 6.269824130812645e-06, - "loss": 0.8459, + "learning_rate": 8.205272688193644e-07, + "loss": 1.0515, "step": 22339 }, { - "epoch": 0.6330584601433874, + "epoch": 0.8740903044056656, "grad_norm": 0.0, - "learning_rate": 6.268972599367591e-06, - "loss": 0.921, + "learning_rate": 8.200246288866775e-07, + "loss": 0.9733, "step": 22340 }, { - "epoch": 0.6330867975856499, + "epoch": 0.874129431097895, "grad_norm": 0.0, - "learning_rate": 6.268121099350364e-06, - "loss": 0.7581, + "learning_rate": 8.195221363728923e-07, + "loss": 0.9679, "step": 22341 }, { - "epoch": 0.6331151350279124, + "epoch": 0.8741685577901244, "grad_norm": 0.0, - "learning_rate": 6.267269630768138e-06, - "loss": 0.8309, + "learning_rate": 8.190197912860798e-07, + "loss": 0.9716, "step": 22342 }, { - "epoch": 0.6331434724701749, + "epoch": 0.8742076844823539, "grad_norm": 0.0, - "learning_rate": 6.266418193628092e-06, - "loss": 0.8219, + "learning_rate": 8.185175936343037e-07, + "loss": 0.92, "step": 22343 }, { - "epoch": 0.6331718099124373, + "epoch": 0.8742468111745832, "grad_norm": 0.0, - "learning_rate": 6.265566787937386e-06, - "loss": 0.734, + "learning_rate": 8.180155434256288e-07, + "loss": 0.9814, "step": 22344 }, { - "epoch": 0.6332001473546998, + "epoch": 0.8742859378668127, "grad_norm": 0.0, - "learning_rate": 6.264715413703201e-06, - "loss": 0.7977, + "learning_rate": 8.175136406681194e-07, + "loss": 0.9376, "step": 22345 }, { - "epoch": 0.6332284847969623, + "epoch": 0.8743250645590421, "grad_norm": 0.0, - "learning_rate": 6.263864070932702e-06, - "loss": 0.7978, + "learning_rate": 8.170118853698361e-07, + "loss": 0.7532, "step": 22346 }, { - "epoch": 0.6332568222392246, + "epoch": 0.8743641912512716, "grad_norm": 0.0, - "learning_rate": 6.263012759633063e-06, - "loss": 0.8712, + "learning_rate": 8.165102775388334e-07, + "loss": 0.9606, "step": 22347 }, { - "epoch": 0.6332851596814871, + "epoch": 0.874403317943501, "grad_norm": 0.0, - "learning_rate": 6.2621614798114615e-06, - "loss": 0.7636, + "learning_rate": 8.160088171831704e-07, + "loss": 0.9185, "step": 22348 }, { - "epoch": 0.6333134971237496, + "epoch": 0.8744424446357305, "grad_norm": 0.0, - "learning_rate": 6.261310231475055e-06, - "loss": 0.898, + "learning_rate": 8.155075043108928e-07, + "loss": 1.1293, "step": 22349 }, { - "epoch": 0.6333418345660121, + "epoch": 0.8744815713279599, "grad_norm": 0.0, - "learning_rate": 6.260459014631027e-06, - "loss": 0.8145, + "learning_rate": 8.150063389300611e-07, + "loss": 0.8429, "step": 22350 }, { - "epoch": 0.6333701720082745, + "epoch": 0.8745206980201894, "grad_norm": 0.0, - "learning_rate": 6.259607829286537e-06, - "loss": 0.8293, + "learning_rate": 8.145053210487152e-07, + "loss": 1.1121, "step": 22351 }, { - "epoch": 0.633398509450537, + "epoch": 0.8745598247124188, "grad_norm": 0.0, - "learning_rate": 6.258756675448759e-06, - "loss": 0.9812, + "learning_rate": 8.140044506749056e-07, + "loss": 1.0145, "step": 22352 }, { - "epoch": 0.6334268468927995, + "epoch": 0.8745989514046483, "grad_norm": 0.0, - "learning_rate": 6.257905553124864e-06, - "loss": 1.0024, + "learning_rate": 8.135037278166702e-07, + "loss": 1.0116, "step": 22353 }, { - "epoch": 0.6334551843350619, + "epoch": 0.8746380780968777, "grad_norm": 0.0, - "learning_rate": 6.257054462322019e-06, - "loss": 0.876, + "learning_rate": 8.130031524820569e-07, + "loss": 0.9271, "step": 22354 }, { - "epoch": 0.6334835217773244, + "epoch": 0.8746772047891072, "grad_norm": 0.0, - "learning_rate": 6.256203403047394e-06, - "loss": 0.7786, + "learning_rate": 8.125027246791006e-07, + "loss": 0.9393, "step": 22355 }, { - "epoch": 0.6335118592195869, + "epoch": 0.8747163314813365, "grad_norm": 0.0, - "learning_rate": 6.255352375308162e-06, - "loss": 0.8001, + "learning_rate": 8.120024444158381e-07, + "loss": 1.0332, "step": 22356 }, { - "epoch": 0.6335401966618494, + "epoch": 0.874755458173566, "grad_norm": 0.0, - "learning_rate": 6.254501379111483e-06, - "loss": 0.8141, + "learning_rate": 8.115023117003029e-07, + "loss": 0.9536, "step": 22357 }, { - "epoch": 0.6335685341041117, + "epoch": 0.8747945848657954, "grad_norm": 0.0, - "learning_rate": 6.253650414464531e-06, - "loss": 0.6763, + "learning_rate": 8.110023265405253e-07, + "loss": 1.0251, "step": 22358 }, { - "epoch": 0.6335968715463742, + "epoch": 0.8748337115580249, "grad_norm": 0.0, - "learning_rate": 6.252799481374472e-06, - "loss": 0.9032, + "learning_rate": 8.105024889445367e-07, + "loss": 0.9708, "step": 22359 }, { - "epoch": 0.6336252089886367, + "epoch": 0.8748728382502543, "grad_norm": 0.0, - "learning_rate": 6.251948579848475e-06, - "loss": 0.7761, + "learning_rate": 8.100027989203651e-07, + "loss": 0.9133, "step": 22360 }, { - "epoch": 0.6336535464308991, + "epoch": 0.8749119649424838, "grad_norm": 0.0, - "learning_rate": 6.25109770989371e-06, - "loss": 0.75, + "learning_rate": 8.095032564760308e-07, + "loss": 1.0164, "step": 22361 }, { - "epoch": 0.6336818838731616, + "epoch": 0.8749510916347132, "grad_norm": 0.0, - "learning_rate": 6.250246871517338e-06, - "loss": 0.889, + "learning_rate": 8.090038616195572e-07, + "loss": 1.0364, "step": 22362 }, { - "epoch": 0.6337102213154241, + "epoch": 0.8749902183269427, "grad_norm": 0.0, - "learning_rate": 6.249396064726527e-06, - "loss": 0.7396, + "learning_rate": 8.08504614358967e-07, + "loss": 0.963, "step": 22363 }, { - "epoch": 0.6337385587576865, + "epoch": 0.8750293450191721, "grad_norm": 0.0, - "learning_rate": 6.248545289528452e-06, - "loss": 0.9053, + "learning_rate": 8.080055147022737e-07, + "loss": 1.0439, "step": 22364 }, { - "epoch": 0.633766896199949, + "epoch": 0.8750684717114016, "grad_norm": 0.0, - "learning_rate": 6.247694545930267e-06, - "loss": 0.8086, + "learning_rate": 8.075065626574929e-07, + "loss": 0.8164, "step": 22365 }, { - "epoch": 0.6337952336422115, + "epoch": 0.875107598403631, "grad_norm": 0.0, - "learning_rate": 6.246843833939148e-06, - "loss": 0.8184, + "learning_rate": 8.070077582326374e-07, + "loss": 0.9867, "step": 22366 }, { - "epoch": 0.633823571084474, + "epoch": 0.8751467250958604, "grad_norm": 0.0, - "learning_rate": 6.2459931535622554e-06, - "loss": 0.8567, + "learning_rate": 8.065091014357207e-07, + "loss": 0.9694, "step": 22367 }, { - "epoch": 0.6338519085267363, + "epoch": 0.8751858517880898, "grad_norm": 0.0, - "learning_rate": 6.245142504806755e-06, - "loss": 0.7433, + "learning_rate": 8.060105922747463e-07, + "loss": 0.9016, "step": 22368 }, { - "epoch": 0.6338802459689988, + "epoch": 0.8752249784803193, "grad_norm": 0.0, - "learning_rate": 6.244291887679819e-06, - "loss": 0.8401, + "learning_rate": 8.055122307577212e-07, + "loss": 1.0017, "step": 22369 }, { - "epoch": 0.6339085834112613, + "epoch": 0.8752641051725487, "grad_norm": 0.0, - "learning_rate": 6.243441302188601e-06, - "loss": 0.7254, + "learning_rate": 8.050140168926479e-07, + "loss": 0.9897, "step": 22370 }, { - "epoch": 0.6339369208535237, + "epoch": 0.8753032318647781, "grad_norm": 0.0, - "learning_rate": 6.2425907483402735e-06, - "loss": 0.9436, + "learning_rate": 8.045159506875266e-07, + "loss": 0.8854, "step": 22371 }, { - "epoch": 0.6339652582957862, + "epoch": 0.8753423585570076, "grad_norm": 0.0, - "learning_rate": 6.241740226142002e-06, - "loss": 0.8777, + "learning_rate": 8.040180321503577e-07, + "loss": 1.028, "step": 22372 }, { - "epoch": 0.6339935957380487, + "epoch": 0.875381485249237, "grad_norm": 0.0, - "learning_rate": 6.240889735600943e-06, - "loss": 0.8345, + "learning_rate": 8.035202612891368e-07, + "loss": 0.9697, "step": 22373 }, { - "epoch": 0.6340219331803112, + "epoch": 0.8754206119414665, "grad_norm": 0.0, - "learning_rate": 6.240039276724273e-06, - "loss": 0.8724, + "learning_rate": 8.030226381118555e-07, + "loss": 1.0604, "step": 22374 }, { - "epoch": 0.6340502706225736, + "epoch": 0.8754597386336959, "grad_norm": 0.0, - "learning_rate": 6.239188849519142e-06, - "loss": 0.7078, + "learning_rate": 8.025251626265063e-07, + "loss": 0.9396, "step": 22375 }, { - "epoch": 0.6340786080648361, + "epoch": 0.8754988653259254, "grad_norm": 0.0, - "learning_rate": 6.2383384539927214e-06, - "loss": 0.8817, + "learning_rate": 8.02027834841077e-07, + "loss": 0.9443, "step": 22376 }, { - "epoch": 0.6341069455070986, + "epoch": 0.8755379920181547, "grad_norm": 0.0, - "learning_rate": 6.237488090152177e-06, - "loss": 0.8213, + "learning_rate": 8.015306547635571e-07, + "loss": 0.9459, "step": 22377 }, { - "epoch": 0.634135282949361, + "epoch": 0.8755771187103842, "grad_norm": 0.0, - "learning_rate": 6.2366377580046634e-06, - "loss": 0.915, + "learning_rate": 8.010336224019278e-07, + "loss": 0.8435, "step": 22378 }, { - "epoch": 0.6341636203916234, + "epoch": 0.8756162454026136, "grad_norm": 0.0, - "learning_rate": 6.23578745755735e-06, - "loss": 0.8171, + "learning_rate": 8.005367377641715e-07, + "loss": 0.9863, "step": 22379 }, { - "epoch": 0.6341919578338859, + "epoch": 0.8756553720948431, "grad_norm": 0.0, - "learning_rate": 6.234937188817393e-06, - "loss": 0.8698, + "learning_rate": 8.000400008582654e-07, + "loss": 0.8686, "step": 22380 }, { - "epoch": 0.6342202952761484, + "epoch": 0.8756944987870725, "grad_norm": 0.0, - "learning_rate": 6.234086951791959e-06, - "loss": 0.9381, + "learning_rate": 7.995434116921919e-07, + "loss": 0.9555, "step": 22381 }, { - "epoch": 0.6342486327184108, + "epoch": 0.875733625479302, "grad_norm": 0.0, - "learning_rate": 6.233236746488213e-06, - "loss": 0.8281, + "learning_rate": 7.990469702739212e-07, + "loss": 0.8575, "step": 22382 }, { - "epoch": 0.6342769701606733, + "epoch": 0.8757727521715314, "grad_norm": 0.0, - "learning_rate": 6.23238657291331e-06, - "loss": 0.9281, + "learning_rate": 7.98550676611427e-07, + "loss": 0.9482, "step": 22383 }, { - "epoch": 0.6343053076029358, + "epoch": 0.8758118788637609, "grad_norm": 0.0, - "learning_rate": 6.231536431074411e-06, - "loss": 0.6666, + "learning_rate": 7.980545307126763e-07, + "loss": 0.8688, "step": 22384 }, { - "epoch": 0.6343336450451982, + "epoch": 0.8758510055559903, "grad_norm": 0.0, - "learning_rate": 6.230686320978684e-06, - "loss": 0.8066, + "learning_rate": 7.975585325856427e-07, + "loss": 0.8751, "step": 22385 }, { - "epoch": 0.6343619824874607, + "epoch": 0.8758901322482198, "grad_norm": 0.0, - "learning_rate": 6.2298362426332825e-06, - "loss": 0.7854, + "learning_rate": 7.970626822382866e-07, + "loss": 0.9321, "step": 22386 }, { - "epoch": 0.6343903199297232, + "epoch": 0.8759292589404492, "grad_norm": 0.0, - "learning_rate": 6.228986196045377e-06, - "loss": 0.845, + "learning_rate": 7.965669796785725e-07, + "loss": 0.991, "step": 22387 }, { - "epoch": 0.6344186573719856, + "epoch": 0.8759683856326786, "grad_norm": 0.0, - "learning_rate": 6.228136181222115e-06, - "loss": 0.8856, + "learning_rate": 7.960714249144586e-07, + "loss": 1.0127, "step": 22388 }, { - "epoch": 0.634446994814248, + "epoch": 0.876007512324908, "grad_norm": 0.0, - "learning_rate": 6.227286198170663e-06, - "loss": 0.8857, + "learning_rate": 7.955760179539052e-07, + "loss": 1.0278, "step": 22389 }, { - "epoch": 0.6344753322565105, + "epoch": 0.8760466390171375, "grad_norm": 0.0, - "learning_rate": 6.226436246898184e-06, - "loss": 0.8398, + "learning_rate": 7.95080758804867e-07, + "loss": 0.9273, "step": 22390 }, { - "epoch": 0.634503669698773, + "epoch": 0.8760857657093669, "grad_norm": 0.0, - "learning_rate": 6.22558632741183e-06, - "loss": 0.7742, + "learning_rate": 7.945856474752989e-07, + "loss": 0.9779, "step": 22391 }, { - "epoch": 0.6345320071410354, + "epoch": 0.8761248924015964, "grad_norm": 0.0, - "learning_rate": 6.224736439718764e-06, - "loss": 0.8649, + "learning_rate": 7.940906839731477e-07, + "loss": 0.935, "step": 22392 }, { - "epoch": 0.6345603445832979, + "epoch": 0.8761640190938258, "grad_norm": 0.0, - "learning_rate": 6.223886583826147e-06, - "loss": 0.8202, + "learning_rate": 7.93595868306366e-07, + "loss": 0.9799, "step": 22393 }, { - "epoch": 0.6345886820255604, + "epoch": 0.8762031457860553, "grad_norm": 0.0, - "learning_rate": 6.223036759741133e-06, - "loss": 0.7968, + "learning_rate": 7.931012004828975e-07, + "loss": 0.9501, "step": 22394 }, { - "epoch": 0.6346170194678228, + "epoch": 0.8762422724782847, "grad_norm": 0.0, - "learning_rate": 6.222186967470888e-06, - "loss": 0.9445, + "learning_rate": 7.92606680510688e-07, + "loss": 0.9021, "step": 22395 }, { - "epoch": 0.6346453569100853, + "epoch": 0.8762813991705142, "grad_norm": 0.0, - "learning_rate": 6.221337207022561e-06, - "loss": 0.7812, + "learning_rate": 7.921123083976768e-07, + "loss": 0.9234, "step": 22396 }, { - "epoch": 0.6346736943523478, + "epoch": 0.8763205258627436, "grad_norm": 0.0, - "learning_rate": 6.220487478403313e-06, - "loss": 0.8654, + "learning_rate": 7.916180841518062e-07, + "loss": 0.9703, "step": 22397 }, { - "epoch": 0.6347020317946103, + "epoch": 0.8763596525549731, "grad_norm": 0.0, - "learning_rate": 6.2196377816203045e-06, - "loss": 0.7878, + "learning_rate": 7.911240077810057e-07, + "loss": 1.1051, "step": 22398 }, { - "epoch": 0.6347303692368726, + "epoch": 0.8763987792472024, "grad_norm": 0.0, - "learning_rate": 6.218788116680689e-06, - "loss": 0.9779, + "learning_rate": 7.906300792932186e-07, + "loss": 0.8607, "step": 22399 }, { - "epoch": 0.6347587066791351, + "epoch": 0.8764379059394318, "grad_norm": 0.0, - "learning_rate": 6.2179384835916256e-06, - "loss": 0.7271, + "learning_rate": 7.901362986963701e-07, + "loss": 0.8866, "step": 22400 }, { - "epoch": 0.6347870441213976, + "epoch": 0.8764770326316613, "grad_norm": 0.0, - "learning_rate": 6.217088882360274e-06, - "loss": 0.8263, + "learning_rate": 7.896426659983936e-07, + "loss": 0.9772, "step": 22401 }, { - "epoch": 0.63481538156366, + "epoch": 0.8765161593238907, "grad_norm": 0.0, - "learning_rate": 6.216239312993783e-06, - "loss": 0.7711, + "learning_rate": 7.891491812072139e-07, + "loss": 1.0525, "step": 22402 }, { - "epoch": 0.6348437190059225, + "epoch": 0.8765552860161202, "grad_norm": 0.0, - "learning_rate": 6.215389775499319e-06, - "loss": 0.8097, + "learning_rate": 7.886558443307557e-07, + "loss": 1.0947, "step": 22403 }, { - "epoch": 0.634872056448185, + "epoch": 0.8765944127083496, "grad_norm": 0.0, - "learning_rate": 6.214540269884026e-06, - "loss": 0.8271, + "learning_rate": 7.881626553769417e-07, + "loss": 0.944, "step": 22404 }, { - "epoch": 0.6349003938904475, + "epoch": 0.8766335394005791, "grad_norm": 0.0, - "learning_rate": 6.2136907961550676e-06, - "loss": 0.8895, + "learning_rate": 7.876696143536955e-07, + "loss": 0.9263, "step": 22405 }, { - "epoch": 0.6349287313327099, + "epoch": 0.8766726660928085, "grad_norm": 0.0, - "learning_rate": 6.212841354319603e-06, - "loss": 0.855, + "learning_rate": 7.871767212689285e-07, + "loss": 0.9839, "step": 22406 }, { - "epoch": 0.6349570687749724, + "epoch": 0.876711792785038, "grad_norm": 0.0, - "learning_rate": 6.211991944384776e-06, - "loss": 0.8771, + "learning_rate": 7.8668397613056e-07, + "loss": 1.0064, "step": 22407 }, { - "epoch": 0.6349854062172349, + "epoch": 0.8767509194772674, "grad_norm": 0.0, - "learning_rate": 6.211142566357753e-06, - "loss": 0.8764, + "learning_rate": 7.861913789465037e-07, + "loss": 0.9384, "step": 22408 }, { - "epoch": 0.6350137436594973, + "epoch": 0.8767900461694969, "grad_norm": 0.0, - "learning_rate": 6.210293220245678e-06, - "loss": 0.8748, + "learning_rate": 7.856989297246664e-07, + "loss": 0.9458, "step": 22409 }, { - "epoch": 0.6350420811017597, + "epoch": 0.8768291728617262, "grad_norm": 0.0, - "learning_rate": 6.20944390605571e-06, - "loss": 0.8628, + "learning_rate": 7.852066284729576e-07, + "loss": 0.8389, "step": 22410 }, { - "epoch": 0.6350704185440222, + "epoch": 0.8768682995539557, "grad_norm": 0.0, - "learning_rate": 6.208594623795007e-06, - "loss": 0.7544, + "learning_rate": 7.847144751992842e-07, + "loss": 0.8421, "step": 22411 }, { - "epoch": 0.6350987559862846, + "epoch": 0.8769074262461851, "grad_norm": 0.0, - "learning_rate": 6.207745373470717e-06, - "loss": 0.9001, + "learning_rate": 7.842224699115497e-07, + "loss": 0.8266, "step": 22412 }, { - "epoch": 0.6351270934285471, + "epoch": 0.8769465529384146, "grad_norm": 0.0, - "learning_rate": 6.206896155089995e-06, - "loss": 0.8413, + "learning_rate": 7.837306126176536e-07, + "loss": 1.0367, "step": 22413 }, { - "epoch": 0.6351554308708096, + "epoch": 0.876985679630644, "grad_norm": 0.0, - "learning_rate": 6.20604696866e-06, - "loss": 0.9223, + "learning_rate": 7.83238903325495e-07, + "loss": 0.9177, "step": 22414 }, { - "epoch": 0.6351837683130721, + "epoch": 0.8770248063228735, "grad_norm": 0.0, - "learning_rate": 6.2051978141878755e-06, - "loss": 0.8862, + "learning_rate": 7.827473420429721e-07, + "loss": 0.9724, "step": 22415 }, { - "epoch": 0.6352121057553345, + "epoch": 0.8770639330151029, "grad_norm": 0.0, - "learning_rate": 6.204348691680781e-06, - "loss": 0.9129, + "learning_rate": 7.822559287779752e-07, + "loss": 1.0638, "step": 22416 }, { - "epoch": 0.635240443197597, + "epoch": 0.8771030597073324, "grad_norm": 0.0, - "learning_rate": 6.203499601145867e-06, - "loss": 0.8947, + "learning_rate": 7.817646635383969e-07, + "loss": 0.8951, "step": 22417 }, { - "epoch": 0.6352687806398595, + "epoch": 0.8771421863995618, "grad_norm": 0.0, - "learning_rate": 6.202650542590284e-06, - "loss": 0.7961, + "learning_rate": 7.812735463321297e-07, + "loss": 1.0584, "step": 22418 }, { - "epoch": 0.6352971180821219, + "epoch": 0.8771813130917913, "grad_norm": 0.0, - "learning_rate": 6.20180151602119e-06, - "loss": 0.9257, + "learning_rate": 7.807825771670552e-07, + "loss": 0.8747, "step": 22419 }, { - "epoch": 0.6353254555243844, + "epoch": 0.8772204397840206, "grad_norm": 0.0, - "learning_rate": 6.200952521445728e-06, - "loss": 0.8748, + "learning_rate": 7.802917560510614e-07, + "loss": 1.0057, "step": 22420 }, { - "epoch": 0.6353537929666468, + "epoch": 0.8772595664762501, "grad_norm": 0.0, - "learning_rate": 6.200103558871054e-06, - "loss": 0.8901, + "learning_rate": 7.798010829920278e-07, + "loss": 1.0438, "step": 22421 }, { - "epoch": 0.6353821304089093, + "epoch": 0.8772986931684795, "grad_norm": 0.0, - "learning_rate": 6.199254628304323e-06, - "loss": 0.8132, + "learning_rate": 7.793105579978377e-07, + "loss": 0.9338, "step": 22422 }, { - "epoch": 0.6354104678511717, + "epoch": 0.877337819860709, "grad_norm": 0.0, - "learning_rate": 6.198405729752677e-06, - "loss": 0.8784, + "learning_rate": 7.78820181076364e-07, + "loss": 0.9167, "step": 22423 }, { - "epoch": 0.6354388052934342, + "epoch": 0.8773769465529384, "grad_norm": 0.0, - "learning_rate": 6.197556863223273e-06, - "loss": 0.8747, + "learning_rate": 7.783299522354826e-07, + "loss": 1.0577, "step": 22424 }, { - "epoch": 0.6354671427356967, + "epoch": 0.8774160732451679, "grad_norm": 0.0, - "learning_rate": 6.196708028723257e-06, - "loss": 0.8694, + "learning_rate": 7.778398714830682e-07, + "loss": 0.8948, "step": 22425 }, { - "epoch": 0.6354954801779591, + "epoch": 0.8774551999373973, "grad_norm": 0.0, - "learning_rate": 6.195859226259784e-06, - "loss": 0.9125, + "learning_rate": 7.773499388269901e-07, + "loss": 1.03, "step": 22426 }, { - "epoch": 0.6355238176202216, + "epoch": 0.8774943266296268, "grad_norm": 0.0, - "learning_rate": 6.195010455840003e-06, - "loss": 0.9148, + "learning_rate": 7.768601542751142e-07, + "loss": 0.9119, "step": 22427 }, { - "epoch": 0.6355521550624841, + "epoch": 0.8775334533218562, "grad_norm": 0.0, - "learning_rate": 6.194161717471059e-06, - "loss": 0.9079, + "learning_rate": 7.763705178353076e-07, + "loss": 0.9741, "step": 22428 }, { - "epoch": 0.6355804925047466, + "epoch": 0.8775725800140856, "grad_norm": 0.0, - "learning_rate": 6.193313011160104e-06, - "loss": 0.8843, + "learning_rate": 7.758810295154306e-07, + "loss": 1.0368, "step": 22429 }, { - "epoch": 0.635608829947009, + "epoch": 0.877611706706315, "grad_norm": 0.0, - "learning_rate": 6.192464336914289e-06, - "loss": 0.8386, + "learning_rate": 7.753916893233482e-07, + "loss": 1.0504, "step": 22430 }, { - "epoch": 0.6356371673892715, + "epoch": 0.8776508333985444, "grad_norm": 0.0, - "learning_rate": 6.191615694740758e-06, - "loss": 0.8441, + "learning_rate": 7.74902497266915e-07, + "loss": 1.0062, "step": 22431 }, { - "epoch": 0.635665504831534, + "epoch": 0.8776899600907739, "grad_norm": 0.0, - "learning_rate": 6.190767084646667e-06, - "loss": 0.7898, + "learning_rate": 7.744134533539905e-07, + "loss": 1.0487, "step": 22432 }, { - "epoch": 0.6356938422737963, + "epoch": 0.8777290867830033, "grad_norm": 0.0, - "learning_rate": 6.1899185066391565e-06, - "loss": 0.8814, + "learning_rate": 7.739245575924215e-07, + "loss": 1.0267, "step": 22433 }, { - "epoch": 0.6357221797160588, + "epoch": 0.8777682134752328, "grad_norm": 0.0, - "learning_rate": 6.189069960725375e-06, - "loss": 0.8246, + "learning_rate": 7.734358099900663e-07, + "loss": 1.0336, "step": 22434 }, { - "epoch": 0.6357505171583213, + "epoch": 0.8778073401674622, "grad_norm": 0.0, - "learning_rate": 6.188221446912478e-06, - "loss": 0.8047, + "learning_rate": 7.729472105547687e-07, + "loss": 0.9543, "step": 22435 }, { - "epoch": 0.6357788546005837, + "epoch": 0.8778464668596917, "grad_norm": 0.0, - "learning_rate": 6.187372965207603e-06, - "loss": 0.8742, + "learning_rate": 7.724587592943788e-07, + "loss": 1.0169, "step": 22436 }, { - "epoch": 0.6358071920428462, + "epoch": 0.8778855935519211, "grad_norm": 0.0, - "learning_rate": 6.186524515617902e-06, - "loss": 0.905, + "learning_rate": 7.719704562167363e-07, + "loss": 0.8957, "step": 22437 }, { - "epoch": 0.6358355294851087, + "epoch": 0.8779247202441506, "grad_norm": 0.0, - "learning_rate": 6.1856760981505205e-06, - "loss": 0.8189, + "learning_rate": 7.714823013296857e-07, + "loss": 1.0315, "step": 22438 }, { - "epoch": 0.6358638669273712, + "epoch": 0.87796384693638, "grad_norm": 0.0, - "learning_rate": 6.184827712812605e-06, - "loss": 0.7761, + "learning_rate": 7.709942946410642e-07, + "loss": 0.8509, "step": 22439 }, { - "epoch": 0.6358922043696336, + "epoch": 0.8780029736286095, "grad_norm": 0.0, - "learning_rate": 6.183979359611308e-06, - "loss": 0.8152, + "learning_rate": 7.705064361587122e-07, + "loss": 0.8886, "step": 22440 }, { - "epoch": 0.6359205418118961, + "epoch": 0.8780421003208388, "grad_norm": 0.0, - "learning_rate": 6.183131038553763e-06, - "loss": 0.8253, + "learning_rate": 7.700187258904601e-07, + "loss": 0.9606, "step": 22441 }, { - "epoch": 0.6359488792541586, + "epoch": 0.8780812270130683, "grad_norm": 0.0, - "learning_rate": 6.182282749647124e-06, - "loss": 0.8146, + "learning_rate": 7.695311638441416e-07, + "loss": 0.8965, "step": 22442 }, { - "epoch": 0.6359772166964209, + "epoch": 0.8781203537052977, "grad_norm": 0.0, - "learning_rate": 6.181434492898537e-06, - "loss": 0.7491, + "learning_rate": 7.69043750027586e-07, + "loss": 0.8753, "step": 22443 }, { - "epoch": 0.6360055541386834, + "epoch": 0.8781594803975272, "grad_norm": 0.0, - "learning_rate": 6.180586268315144e-06, - "loss": 0.7813, + "learning_rate": 7.685564844486215e-07, + "loss": 1.014, "step": 22444 }, { - "epoch": 0.6360338915809459, + "epoch": 0.8781986070897566, "grad_norm": 0.0, - "learning_rate": 6.179738075904095e-06, - "loss": 0.7823, + "learning_rate": 7.680693671150718e-07, + "loss": 0.9984, "step": 22445 }, { - "epoch": 0.6360622290232084, + "epoch": 0.8782377337819861, "grad_norm": 0.0, - "learning_rate": 6.178889915672526e-06, - "loss": 0.8586, + "learning_rate": 7.675823980347607e-07, + "loss": 0.9872, "step": 22446 }, { - "epoch": 0.6360905664654708, + "epoch": 0.8782768604742155, "grad_norm": 0.0, - "learning_rate": 6.178041787627587e-06, - "loss": 0.7582, + "learning_rate": 7.670955772155042e-07, + "loss": 0.9504, "step": 22447 }, { - "epoch": 0.6361189039077333, + "epoch": 0.878315987166445, "grad_norm": 0.0, - "learning_rate": 6.177193691776424e-06, - "loss": 0.8793, + "learning_rate": 7.66608904665127e-07, + "loss": 0.9352, "step": 22448 }, { - "epoch": 0.6361472413499958, + "epoch": 0.8783551138586744, "grad_norm": 0.0, - "learning_rate": 6.176345628126176e-06, - "loss": 0.8223, + "learning_rate": 7.661223803914386e-07, + "loss": 0.9648, "step": 22449 }, { - "epoch": 0.6361755787922582, + "epoch": 0.8783942405509039, "grad_norm": 0.0, - "learning_rate": 6.175497596683988e-06, - "loss": 0.8041, + "learning_rate": 7.656360044022559e-07, + "loss": 0.9465, "step": 22450 }, { - "epoch": 0.6362039162345207, + "epoch": 0.8784333672431333, "grad_norm": 0.0, - "learning_rate": 6.174649597457005e-06, - "loss": 0.8164, + "learning_rate": 7.651497767053862e-07, + "loss": 1.0341, "step": 22451 }, { - "epoch": 0.6362322536767832, + "epoch": 0.8784724939353628, "grad_norm": 0.0, - "learning_rate": 6.1738016304523675e-06, - "loss": 0.9121, + "learning_rate": 7.646636973086396e-07, + "loss": 0.8611, "step": 22452 }, { - "epoch": 0.6362605911190456, + "epoch": 0.8785116206275921, "grad_norm": 0.0, - "learning_rate": 6.172953695677224e-06, - "loss": 0.8694, + "learning_rate": 7.641777662198202e-07, + "loss": 0.8713, "step": 22453 }, { - "epoch": 0.636288928561308, + "epoch": 0.8785507473198216, "grad_norm": 0.0, - "learning_rate": 6.1721057931387075e-06, - "loss": 0.9329, + "learning_rate": 7.636919834467349e-07, + "loss": 1.04, "step": 22454 }, { - "epoch": 0.6363172660035705, + "epoch": 0.878589874012051, "grad_norm": 0.0, - "learning_rate": 6.171257922843968e-06, - "loss": 0.8648, + "learning_rate": 7.632063489971819e-07, + "loss": 1.0602, "step": 22455 }, { - "epoch": 0.636345603445833, + "epoch": 0.8786290007042804, "grad_norm": 0.0, - "learning_rate": 6.1704100848001446e-06, - "loss": 0.8448, + "learning_rate": 7.627208628789595e-07, + "loss": 0.9766, "step": 22456 }, { - "epoch": 0.6363739408880954, + "epoch": 0.8786681273965099, "grad_norm": 0.0, - "learning_rate": 6.169562279014376e-06, - "loss": 1.0143, + "learning_rate": 7.62235525099867e-07, + "loss": 0.9255, "step": 22457 }, { - "epoch": 0.6364022783303579, + "epoch": 0.8787072540887393, "grad_norm": 0.0, - "learning_rate": 6.1687145054938095e-06, - "loss": 0.8163, + "learning_rate": 7.617503356676948e-07, + "loss": 0.9931, "step": 22458 }, { - "epoch": 0.6364306157726204, + "epoch": 0.8787463807809688, "grad_norm": 0.0, - "learning_rate": 6.167866764245586e-06, - "loss": 0.8019, + "learning_rate": 7.612652945902366e-07, + "loss": 0.9057, "step": 22459 }, { - "epoch": 0.6364589532148828, + "epoch": 0.8787855074731982, "grad_norm": 0.0, - "learning_rate": 6.16701905527684e-06, - "loss": 0.8824, + "learning_rate": 7.607804018752795e-07, + "loss": 0.7593, "step": 22460 }, { - "epoch": 0.6364872906571453, + "epoch": 0.8788246341654277, "grad_norm": 0.0, - "learning_rate": 6.16617137859472e-06, - "loss": 0.7146, + "learning_rate": 7.602956575306153e-07, + "loss": 0.891, "step": 22461 }, { - "epoch": 0.6365156280994078, + "epoch": 0.878863760857657, "grad_norm": 0.0, - "learning_rate": 6.1653237342063575e-06, - "loss": 0.9413, + "learning_rate": 7.59811061564022e-07, + "loss": 0.9714, "step": 22462 }, { - "epoch": 0.6365439655416703, + "epoch": 0.8789028875498865, "grad_norm": 0.0, - "learning_rate": 6.1644761221188984e-06, - "loss": 0.7882, + "learning_rate": 7.593266139832856e-07, + "loss": 0.9388, "step": 22463 }, { - "epoch": 0.6365723029839326, + "epoch": 0.8789420142421159, "grad_norm": 0.0, - "learning_rate": 6.163628542339482e-06, - "loss": 0.8656, + "learning_rate": 7.588423147961843e-07, + "loss": 0.9855, "step": 22464 }, { - "epoch": 0.6366006404261951, + "epoch": 0.8789811409343454, "grad_norm": 0.0, - "learning_rate": 6.162780994875246e-06, - "loss": 0.9232, + "learning_rate": 7.583581640104942e-07, + "loss": 0.9472, "step": 22465 }, { - "epoch": 0.6366289778684576, + "epoch": 0.8790202676265748, "grad_norm": 0.0, - "learning_rate": 6.161933479733333e-06, - "loss": 0.9099, + "learning_rate": 7.578741616339925e-07, + "loss": 1.0173, "step": 22466 }, { - "epoch": 0.63665731531072, + "epoch": 0.8790593943188043, "grad_norm": 0.0, - "learning_rate": 6.161085996920877e-06, - "loss": 0.7876, + "learning_rate": 7.573903076744527e-07, + "loss": 0.8638, "step": 22467 }, { - "epoch": 0.6366856527529825, + "epoch": 0.8790985210110337, "grad_norm": 0.0, - "learning_rate": 6.160238546445019e-06, - "loss": 0.7876, + "learning_rate": 7.569066021396409e-07, + "loss": 1.0818, "step": 22468 }, { - "epoch": 0.636713990195245, + "epoch": 0.8791376477032632, "grad_norm": 0.0, - "learning_rate": 6.159391128312899e-06, - "loss": 0.8031, + "learning_rate": 7.564230450373267e-07, + "loss": 0.9486, "step": 22469 }, { - "epoch": 0.6367423276375075, + "epoch": 0.8791767743954926, "grad_norm": 0.0, - "learning_rate": 6.158543742531652e-06, - "loss": 0.8878, + "learning_rate": 7.559396363752747e-07, + "loss": 0.8549, "step": 22470 }, { - "epoch": 0.6367706650797699, + "epoch": 0.8792159010877221, "grad_norm": 0.0, - "learning_rate": 6.1576963891084175e-06, - "loss": 0.9208, + "learning_rate": 7.554563761612521e-07, + "loss": 0.921, "step": 22471 }, { - "epoch": 0.6367990025220324, + "epoch": 0.8792550277799515, "grad_norm": 0.0, - "learning_rate": 6.156849068050336e-06, - "loss": 0.8974, + "learning_rate": 7.549732644030127e-07, + "loss": 1.0699, "step": 22472 }, { - "epoch": 0.6368273399642949, + "epoch": 0.879294154472181, "grad_norm": 0.0, - "learning_rate": 6.15600177936454e-06, - "loss": 0.7932, + "learning_rate": 7.544903011083205e-07, + "loss": 0.7946, "step": 22473 }, { - "epoch": 0.6368556774065572, + "epoch": 0.8793332811644103, "grad_norm": 0.0, - "learning_rate": 6.155154523058172e-06, - "loss": 0.8085, + "learning_rate": 7.540074862849256e-07, + "loss": 1.0449, "step": 22474 }, { - "epoch": 0.6368840148488197, + "epoch": 0.8793724078566398, "grad_norm": 0.0, - "learning_rate": 6.15430729913836e-06, - "loss": 0.8072, + "learning_rate": 7.535248199405875e-07, + "loss": 0.9833, "step": 22475 }, { - "epoch": 0.6369123522910822, + "epoch": 0.8794115345488692, "grad_norm": 0.0, - "learning_rate": 6.153460107612248e-06, - "loss": 0.8309, + "learning_rate": 7.530423020830536e-07, + "loss": 0.931, "step": 22476 }, { - "epoch": 0.6369406897333447, + "epoch": 0.8794506612410987, "grad_norm": 0.0, - "learning_rate": 6.152612948486969e-06, - "loss": 0.905, + "learning_rate": 7.525599327200739e-07, + "loss": 0.9348, "step": 22477 }, { - "epoch": 0.6369690271756071, + "epoch": 0.8794897879333281, "grad_norm": 0.0, - "learning_rate": 6.1517658217696596e-06, - "loss": 0.8017, + "learning_rate": 7.520777118593903e-07, + "loss": 1.0197, "step": 22478 }, { - "epoch": 0.6369973646178696, + "epoch": 0.8795289146255576, "grad_norm": 0.0, - "learning_rate": 6.150918727467455e-06, - "loss": 0.9927, + "learning_rate": 7.515956395087542e-07, + "loss": 0.947, "step": 22479 }, { - "epoch": 0.6370257020601321, + "epoch": 0.879568041317787, "grad_norm": 0.0, - "learning_rate": 6.150071665587496e-06, - "loss": 0.8134, + "learning_rate": 7.511137156759019e-07, + "loss": 0.9897, "step": 22480 }, { - "epoch": 0.6370540395023945, + "epoch": 0.8796071680100165, "grad_norm": 0.0, - "learning_rate": 6.149224636136908e-06, - "loss": 0.8559, + "learning_rate": 7.506319403685758e-07, + "loss": 1.0482, "step": 22481 }, { - "epoch": 0.637082376944657, + "epoch": 0.8796462947022459, "grad_norm": 0.0, - "learning_rate": 6.1483776391228335e-06, - "loss": 0.8978, + "learning_rate": 7.501503135945065e-07, + "loss": 0.97, "step": 22482 }, { - "epoch": 0.6371107143869195, + "epoch": 0.8796854213944754, "grad_norm": 0.0, - "learning_rate": 6.147530674552402e-06, - "loss": 0.808, + "learning_rate": 7.496688353614357e-07, + "loss": 1.0237, "step": 22483 }, { - "epoch": 0.6371390518291818, + "epoch": 0.8797245480867047, "grad_norm": 0.0, - "learning_rate": 6.1466837424327505e-06, - "loss": 0.7672, + "learning_rate": 7.491875056770914e-07, + "loss": 0.9319, "step": 22484 }, { - "epoch": 0.6371673892714443, + "epoch": 0.8797636747789341, "grad_norm": 0.0, - "learning_rate": 6.145836842771018e-06, - "loss": 0.8511, + "learning_rate": 7.487063245492043e-07, + "loss": 0.929, "step": 22485 }, { - "epoch": 0.6371957267137068, + "epoch": 0.8798028014711636, "grad_norm": 0.0, - "learning_rate": 6.144989975574327e-06, - "loss": 0.9761, + "learning_rate": 7.482252919855004e-07, + "loss": 0.9977, "step": 22486 }, { - "epoch": 0.6372240641559693, + "epoch": 0.879841928163393, "grad_norm": 0.0, - "learning_rate": 6.1441431408498175e-06, - "loss": 0.8099, + "learning_rate": 7.477444079937046e-07, + "loss": 0.9604, "step": 22487 }, { - "epoch": 0.6372524015982317, + "epoch": 0.8798810548556225, "grad_norm": 0.0, - "learning_rate": 6.143296338604626e-06, - "loss": 0.7717, + "learning_rate": 7.472636725815396e-07, + "loss": 1.0021, "step": 22488 }, { - "epoch": 0.6372807390404942, + "epoch": 0.8799201815478519, "grad_norm": 0.0, - "learning_rate": 6.1424495688458785e-06, - "loss": 0.8074, + "learning_rate": 7.467830857567282e-07, + "loss": 0.8386, "step": 22489 }, { - "epoch": 0.6373090764827567, + "epoch": 0.8799593082400814, "grad_norm": 0.0, - "learning_rate": 6.141602831580712e-06, - "loss": 0.8446, + "learning_rate": 7.463026475269841e-07, + "loss": 0.9355, "step": 22490 }, { - "epoch": 0.6373374139250191, + "epoch": 0.8799984349323108, "grad_norm": 0.0, - "learning_rate": 6.140756126816256e-06, - "loss": 0.8508, + "learning_rate": 7.458223579000223e-07, + "loss": 0.9367, "step": 22491 }, { - "epoch": 0.6373657513672816, + "epoch": 0.8800375616245403, "grad_norm": 0.0, - "learning_rate": 6.139909454559644e-06, - "loss": 0.7495, + "learning_rate": 7.453422168835589e-07, + "loss": 0.9306, "step": 22492 }, { - "epoch": 0.6373940888095441, + "epoch": 0.8800766883167697, "grad_norm": 0.0, - "learning_rate": 6.139062814818012e-06, - "loss": 0.8856, + "learning_rate": 7.448622244853043e-07, + "loss": 0.8927, "step": 22493 }, { - "epoch": 0.6374224262518066, + "epoch": 0.8801158150089992, "grad_norm": 0.0, - "learning_rate": 6.138216207598484e-06, - "loss": 0.8185, + "learning_rate": 7.443823807129624e-07, + "loss": 0.9789, "step": 22494 }, { - "epoch": 0.637450763694069, + "epoch": 0.8801549417012285, "grad_norm": 0.0, - "learning_rate": 6.137369632908197e-06, - "loss": 0.9044, + "learning_rate": 7.439026855742437e-07, + "loss": 1.0062, "step": 22495 }, { - "epoch": 0.6374791011363314, + "epoch": 0.880194068393458, "grad_norm": 0.0, - "learning_rate": 6.136523090754277e-06, - "loss": 0.8935, + "learning_rate": 7.434231390768476e-07, + "loss": 1.0394, "step": 22496 }, { - "epoch": 0.6375074385785939, + "epoch": 0.8802331950856874, "grad_norm": 0.0, - "learning_rate": 6.135676581143859e-06, - "loss": 0.7762, + "learning_rate": 7.429437412284768e-07, + "loss": 0.8344, "step": 22497 }, { - "epoch": 0.6375357760208563, + "epoch": 0.8802723217779169, "grad_norm": 0.0, - "learning_rate": 6.134830104084075e-06, - "loss": 0.9546, + "learning_rate": 7.424644920368296e-07, + "loss": 0.9894, "step": 22498 }, { - "epoch": 0.6375641134631188, + "epoch": 0.8803114484701463, "grad_norm": 0.0, - "learning_rate": 6.133983659582048e-06, - "loss": 0.8586, + "learning_rate": 7.419853915096042e-07, + "loss": 0.9491, "step": 22499 }, { - "epoch": 0.6375924509053813, + "epoch": 0.8803505751623758, "grad_norm": 0.0, - "learning_rate": 6.133137247644914e-06, - "loss": 0.7573, + "learning_rate": 7.415064396544913e-07, + "loss": 1.0403, "step": 22500 }, { - "epoch": 0.6376207883476438, + "epoch": 0.8803897018546052, "grad_norm": 0.0, - "learning_rate": 6.132290868279803e-06, - "loss": 0.7776, + "learning_rate": 7.410276364791824e-07, + "loss": 0.9932, "step": 22501 }, { - "epoch": 0.6376491257899062, + "epoch": 0.8804288285468347, "grad_norm": 0.0, - "learning_rate": 6.131444521493839e-06, - "loss": 0.899, + "learning_rate": 7.405489819913703e-07, + "loss": 0.9932, "step": 22502 }, { - "epoch": 0.6376774632321687, + "epoch": 0.8804679552390641, "grad_norm": 0.0, - "learning_rate": 6.130598207294156e-06, - "loss": 0.8148, + "learning_rate": 7.400704761987365e-07, + "loss": 0.9647, "step": 22503 }, { - "epoch": 0.6377058006744312, + "epoch": 0.8805070819312936, "grad_norm": 0.0, - "learning_rate": 6.1297519256878815e-06, - "loss": 0.8305, + "learning_rate": 7.395921191089673e-07, + "loss": 0.8388, "step": 22504 }, { - "epoch": 0.6377341381166935, + "epoch": 0.880546208623523, "grad_norm": 0.0, - "learning_rate": 6.128905676682141e-06, - "loss": 0.846, + "learning_rate": 7.391139107297451e-07, + "loss": 0.9584, "step": 22505 }, { - "epoch": 0.637762475558956, + "epoch": 0.8805853353157524, "grad_norm": 0.0, - "learning_rate": 6.12805946028407e-06, - "loss": 0.857, + "learning_rate": 7.386358510687508e-07, + "loss": 0.9847, "step": 22506 }, { - "epoch": 0.6377908130012185, + "epoch": 0.8806244620079818, "grad_norm": 0.0, - "learning_rate": 6.127213276500789e-06, - "loss": 0.8434, + "learning_rate": 7.381579401336581e-07, + "loss": 0.9308, "step": 22507 }, { - "epoch": 0.6378191504434809, + "epoch": 0.8806635887002113, "grad_norm": 0.0, - "learning_rate": 6.126367125339428e-06, - "loss": 0.9032, + "learning_rate": 7.376801779321441e-07, + "loss": 0.9805, "step": 22508 }, { - "epoch": 0.6378474878857434, + "epoch": 0.8807027153924407, "grad_norm": 0.0, - "learning_rate": 6.125521006807116e-06, - "loss": 0.9016, + "learning_rate": 7.372025644718772e-07, + "loss": 0.9412, "step": 22509 }, { - "epoch": 0.6378758253280059, + "epoch": 0.8807418420846702, "grad_norm": 0.0, - "learning_rate": 6.124674920910979e-06, - "loss": 0.8227, + "learning_rate": 7.367250997605324e-07, + "loss": 1.0331, "step": 22510 }, { - "epoch": 0.6379041627702684, + "epoch": 0.8807809687768996, "grad_norm": 0.0, - "learning_rate": 6.123828867658148e-06, - "loss": 0.8299, + "learning_rate": 7.362477838057747e-07, + "loss": 1.0016, "step": 22511 }, { - "epoch": 0.6379325002125308, + "epoch": 0.8808200954691291, "grad_norm": 0.0, - "learning_rate": 6.1229828470557405e-06, - "loss": 0.9016, + "learning_rate": 7.357706166152711e-07, + "loss": 0.968, "step": 22512 }, { - "epoch": 0.6379608376547933, + "epoch": 0.8808592221613585, "grad_norm": 0.0, - "learning_rate": 6.1221368591108895e-06, - "loss": 0.9101, + "learning_rate": 7.352935981966802e-07, + "loss": 1.0514, "step": 22513 }, { - "epoch": 0.6379891750970558, + "epoch": 0.8808983488535879, "grad_norm": 0.0, - "learning_rate": 6.1212909038307215e-06, - "loss": 0.8067, + "learning_rate": 7.348167285576646e-07, + "loss": 0.9411, "step": 22514 }, { - "epoch": 0.6380175125393182, + "epoch": 0.8809374755458174, "grad_norm": 0.0, - "learning_rate": 6.120444981222359e-06, - "loss": 0.9126, + "learning_rate": 7.343400077058838e-07, + "loss": 0.9743, "step": 22515 }, { - "epoch": 0.6380458499815806, + "epoch": 0.8809766022380467, "grad_norm": 0.0, - "learning_rate": 6.11959909129293e-06, - "loss": 0.8717, + "learning_rate": 7.338634356489926e-07, + "loss": 0.9916, "step": 22516 }, { - "epoch": 0.6380741874238431, + "epoch": 0.8810157289302762, "grad_norm": 0.0, - "learning_rate": 6.118753234049559e-06, - "loss": 0.805, + "learning_rate": 7.333870123946418e-07, + "loss": 0.9701, "step": 22517 }, { - "epoch": 0.6381025248661056, + "epoch": 0.8810548556225056, "grad_norm": 0.0, - "learning_rate": 6.1179074094993695e-06, - "loss": 0.7874, + "learning_rate": 7.32910737950484e-07, + "loss": 0.9656, "step": 22518 }, { - "epoch": 0.638130862308368, + "epoch": 0.8810939823147351, "grad_norm": 0.0, - "learning_rate": 6.1170616176494916e-06, - "loss": 0.9473, + "learning_rate": 7.324346123241677e-07, + "loss": 0.8786, "step": 22519 }, { - "epoch": 0.6381591997506305, + "epoch": 0.8811331090069645, "grad_norm": 0.0, - "learning_rate": 6.11621585850704e-06, - "loss": 0.8539, + "learning_rate": 7.319586355233399e-07, + "loss": 0.9621, "step": 22520 }, { - "epoch": 0.638187537192893, + "epoch": 0.881172235699194, "grad_norm": 0.0, - "learning_rate": 6.1153701320791455e-06, - "loss": 0.9067, + "learning_rate": 7.314828075556412e-07, + "loss": 1.0544, "step": 22521 }, { - "epoch": 0.6382158746351554, + "epoch": 0.8812113623914234, "grad_norm": 0.0, - "learning_rate": 6.114524438372933e-06, - "loss": 0.8741, + "learning_rate": 7.310071284287168e-07, + "loss": 0.9908, "step": 22522 }, { - "epoch": 0.6382442120774179, + "epoch": 0.8812504890836529, "grad_norm": 0.0, - "learning_rate": 6.113678777395522e-06, - "loss": 0.7831, + "learning_rate": 7.305315981501993e-07, + "loss": 0.9103, "step": 22523 }, { - "epoch": 0.6382725495196804, + "epoch": 0.8812896157758823, "grad_norm": 0.0, - "learning_rate": 6.112833149154042e-06, - "loss": 0.7641, + "learning_rate": 7.300562167277325e-07, + "loss": 0.9457, "step": 22524 }, { - "epoch": 0.6383008869619429, + "epoch": 0.8813287424681118, "grad_norm": 0.0, - "learning_rate": 6.111987553655607e-06, - "loss": 0.7165, + "learning_rate": 7.29580984168946e-07, + "loss": 0.8667, "step": 22525 }, { - "epoch": 0.6383292244042053, + "epoch": 0.8813678691603412, "grad_norm": 0.0, - "learning_rate": 6.111141990907346e-06, - "loss": 0.8968, + "learning_rate": 7.291059004814738e-07, + "loss": 1.0176, "step": 22526 }, { - "epoch": 0.6383575618464677, + "epoch": 0.8814069958525707, "grad_norm": 0.0, - "learning_rate": 6.1102964609163804e-06, - "loss": 0.818, + "learning_rate": 7.286309656729396e-07, + "loss": 0.9993, "step": 22527 }, { - "epoch": 0.6383858992887302, + "epoch": 0.8814461225448, "grad_norm": 0.0, - "learning_rate": 6.109450963689831e-06, - "loss": 0.9341, + "learning_rate": 7.281561797509784e-07, + "loss": 1.0472, "step": 22528 }, { - "epoch": 0.6384142367309926, + "epoch": 0.8814852492370295, "grad_norm": 0.0, - "learning_rate": 6.108605499234821e-06, - "loss": 0.6427, + "learning_rate": 7.276815427232087e-07, + "loss": 0.9362, "step": 22529 }, { - "epoch": 0.6384425741732551, + "epoch": 0.8815243759292589, "grad_norm": 0.0, - "learning_rate": 6.107760067558476e-06, - "loss": 0.9236, + "learning_rate": 7.272070545972564e-07, + "loss": 1.0342, "step": 22530 }, { - "epoch": 0.6384709116155176, + "epoch": 0.8815635026214884, "grad_norm": 0.0, - "learning_rate": 6.106914668667909e-06, - "loss": 0.8951, + "learning_rate": 7.26732715380738e-07, + "loss": 0.9798, "step": 22531 }, { - "epoch": 0.63849924905778, + "epoch": 0.8816026293137178, "grad_norm": 0.0, - "learning_rate": 6.10606930257025e-06, - "loss": 0.8456, + "learning_rate": 7.262585250812715e-07, + "loss": 1.0612, "step": 22532 }, { - "epoch": 0.6385275865000425, + "epoch": 0.8816417560059473, "grad_norm": 0.0, - "learning_rate": 6.10522396927261e-06, - "loss": 0.8579, + "learning_rate": 7.257844837064732e-07, + "loss": 0.9357, "step": 22533 }, { - "epoch": 0.638555923942305, + "epoch": 0.8816808826981767, "grad_norm": 0.0, - "learning_rate": 6.104378668782116e-06, - "loss": 0.8745, + "learning_rate": 7.253105912639557e-07, + "loss": 1.0216, "step": 22534 }, { - "epoch": 0.6385842613845675, + "epoch": 0.8817200093904062, "grad_norm": 0.0, - "learning_rate": 6.103533401105888e-06, - "loss": 0.8693, + "learning_rate": 7.248368477613265e-07, + "loss": 0.9619, "step": 22535 }, { - "epoch": 0.6386125988268299, + "epoch": 0.8817591360826356, "grad_norm": 0.0, - "learning_rate": 6.102688166251044e-06, - "loss": 0.9093, + "learning_rate": 7.243632532061962e-07, + "loss": 1.012, "step": 22536 }, { - "epoch": 0.6386409362690924, + "epoch": 0.8817982627748651, "grad_norm": 0.0, - "learning_rate": 6.1018429642247045e-06, - "loss": 0.7548, + "learning_rate": 7.238898076061685e-07, + "loss": 0.967, "step": 22537 }, { - "epoch": 0.6386692737113548, + "epoch": 0.8818373894670944, "grad_norm": 0.0, - "learning_rate": 6.1009977950339926e-06, - "loss": 0.865, + "learning_rate": 7.234165109688485e-07, + "loss": 1.0094, "step": 22538 }, { - "epoch": 0.6386976111536172, + "epoch": 0.8818765161593239, "grad_norm": 0.0, - "learning_rate": 6.10015265868602e-06, - "loss": 0.7569, + "learning_rate": 7.229433633018335e-07, + "loss": 0.9038, "step": 22539 }, { - "epoch": 0.6387259485958797, + "epoch": 0.8819156428515533, "grad_norm": 0.0, - "learning_rate": 6.099307555187913e-06, - "loss": 0.8556, + "learning_rate": 7.224703646127229e-07, + "loss": 0.8047, "step": 22540 }, { - "epoch": 0.6387542860381422, + "epoch": 0.8819547695437828, "grad_norm": 0.0, - "learning_rate": 6.098462484546785e-06, - "loss": 0.8999, + "learning_rate": 7.219975149091141e-07, + "loss": 0.8658, "step": 22541 }, { - "epoch": 0.6387826234804047, + "epoch": 0.8819938962360122, "grad_norm": 0.0, - "learning_rate": 6.097617446769755e-06, - "loss": 0.8736, + "learning_rate": 7.215248141985986e-07, + "loss": 1.0084, "step": 22542 }, { - "epoch": 0.6388109609226671, + "epoch": 0.8820330229282416, "grad_norm": 0.0, - "learning_rate": 6.0967724418639474e-06, - "loss": 0.8438, + "learning_rate": 7.210522624887672e-07, + "loss": 0.9093, "step": 22543 }, { - "epoch": 0.6388392983649296, + "epoch": 0.8820721496204711, "grad_norm": 0.0, - "learning_rate": 6.095927469836471e-06, - "loss": 0.915, + "learning_rate": 7.205798597872116e-07, + "loss": 0.9005, "step": 22544 }, { - "epoch": 0.6388676358071921, + "epoch": 0.8821112763127005, "grad_norm": 0.0, - "learning_rate": 6.09508253069445e-06, - "loss": 0.9162, + "learning_rate": 7.201076061015144e-07, + "loss": 0.8975, "step": 22545 }, { - "epoch": 0.6388959732494545, + "epoch": 0.88215040300493, "grad_norm": 0.0, - "learning_rate": 6.0942376244449965e-06, - "loss": 0.8084, + "learning_rate": 7.196355014392597e-07, + "loss": 0.9329, "step": 22546 }, { - "epoch": 0.638924310691717, + "epoch": 0.8821895296971594, "grad_norm": 0.0, - "learning_rate": 6.093392751095228e-06, - "loss": 0.8058, + "learning_rate": 7.191635458080326e-07, + "loss": 0.8822, "step": 22547 }, { - "epoch": 0.6389526481339795, + "epoch": 0.8822286563893889, "grad_norm": 0.0, - "learning_rate": 6.092547910652267e-06, - "loss": 0.9222, + "learning_rate": 7.186917392154069e-07, + "loss": 0.9309, "step": 22548 }, { - "epoch": 0.6389809855762418, + "epoch": 0.8822677830816182, "grad_norm": 0.0, - "learning_rate": 6.091703103123223e-06, - "loss": 0.8311, + "learning_rate": 7.182200816689622e-07, + "loss": 1.0071, "step": 22549 }, { - "epoch": 0.6390093230185043, + "epoch": 0.8823069097738477, "grad_norm": 0.0, - "learning_rate": 6.0908583285152154e-06, - "loss": 0.9202, + "learning_rate": 7.177485731762712e-07, + "loss": 0.8661, "step": 22550 }, { - "epoch": 0.6390376604607668, + "epoch": 0.8823460364660771, "grad_norm": 0.0, - "learning_rate": 6.0900135868353635e-06, - "loss": 0.7928, + "learning_rate": 7.172772137449091e-07, + "loss": 0.9456, "step": 22551 }, { - "epoch": 0.6390659979030293, + "epoch": 0.8823851631583066, "grad_norm": 0.0, - "learning_rate": 6.089168878090776e-06, - "loss": 0.8931, + "learning_rate": 7.16806003382442e-07, + "loss": 0.9865, "step": 22552 }, { - "epoch": 0.6390943353452917, + "epoch": 0.882424289850536, "grad_norm": 0.0, - "learning_rate": 6.0883242022885716e-06, - "loss": 0.8331, + "learning_rate": 7.163349420964394e-07, + "loss": 0.9559, "step": 22553 }, { - "epoch": 0.6391226727875542, + "epoch": 0.8824634165427655, "grad_norm": 0.0, - "learning_rate": 6.0874795594358635e-06, - "loss": 0.8906, + "learning_rate": 7.158640298944608e-07, + "loss": 1.0328, "step": 22554 }, { - "epoch": 0.6391510102298167, + "epoch": 0.8825025432349949, "grad_norm": 0.0, - "learning_rate": 6.086634949539769e-06, - "loss": 0.892, + "learning_rate": 7.153932667840757e-07, + "loss": 1.0034, "step": 22555 }, { - "epoch": 0.6391793476720791, + "epoch": 0.8825416699272244, "grad_norm": 0.0, - "learning_rate": 6.085790372607404e-06, - "loss": 0.9627, + "learning_rate": 7.149226527728393e-07, + "loss": 0.9597, "step": 22556 }, { - "epoch": 0.6392076851143416, + "epoch": 0.8825807966194538, "grad_norm": 0.0, - "learning_rate": 6.084945828645878e-06, - "loss": 0.8511, + "learning_rate": 7.144521878683108e-07, + "loss": 0.9551, "step": 22557 }, { - "epoch": 0.639236022556604, + "epoch": 0.8826199233116833, "grad_norm": 0.0, - "learning_rate": 6.0841013176623056e-06, - "loss": 0.8148, + "learning_rate": 7.139818720780423e-07, + "loss": 0.9291, "step": 22558 }, { - "epoch": 0.6392643599988665, + "epoch": 0.8826590500039126, "grad_norm": 0.0, - "learning_rate": 6.083256839663807e-06, - "loss": 0.6747, + "learning_rate": 7.135117054095919e-07, + "loss": 0.9557, "step": 22559 }, { - "epoch": 0.6392926974411289, + "epoch": 0.8826981766961421, "grad_norm": 0.0, - "learning_rate": 6.082412394657485e-06, - "loss": 0.8281, + "learning_rate": 7.130416878705059e-07, + "loss": 1.0315, "step": 22560 }, { - "epoch": 0.6393210348833914, + "epoch": 0.8827373033883715, "grad_norm": 0.0, - "learning_rate": 6.08156798265046e-06, - "loss": 0.8918, + "learning_rate": 7.12571819468334e-07, + "loss": 0.9082, "step": 22561 }, { - "epoch": 0.6393493723256539, + "epoch": 0.882776430080601, "grad_norm": 0.0, - "learning_rate": 6.080723603649843e-06, - "loss": 0.7901, + "learning_rate": 7.121021002106198e-07, + "loss": 0.9467, "step": 22562 }, { - "epoch": 0.6393777097679163, + "epoch": 0.8828155567728304, "grad_norm": 0.0, - "learning_rate": 6.079879257662746e-06, - "loss": 0.9155, + "learning_rate": 7.116325301049076e-07, + "loss": 0.9481, "step": 22563 }, { - "epoch": 0.6394060472101788, + "epoch": 0.8828546834650599, "grad_norm": 0.0, - "learning_rate": 6.079034944696285e-06, - "loss": 0.766, + "learning_rate": 7.111631091587368e-07, + "loss": 0.9393, "step": 22564 }, { - "epoch": 0.6394343846524413, + "epoch": 0.8828938101572893, "grad_norm": 0.0, - "learning_rate": 6.078190664757564e-06, - "loss": 0.8865, + "learning_rate": 7.106938373796501e-07, + "loss": 0.9964, "step": 22565 }, { - "epoch": 0.6394627220947038, + "epoch": 0.8829329368495188, "grad_norm": 0.0, - "learning_rate": 6.0773464178537e-06, - "loss": 0.8566, + "learning_rate": 7.102247147751773e-07, + "loss": 0.8131, "step": 22566 }, { - "epoch": 0.6394910595369662, + "epoch": 0.8829720635417482, "grad_norm": 0.0, - "learning_rate": 6.076502203991808e-06, - "loss": 0.7186, + "learning_rate": 7.097557413528555e-07, + "loss": 0.9211, "step": 22567 }, { - "epoch": 0.6395193969792287, + "epoch": 0.8830111902339777, "grad_norm": 0.0, - "learning_rate": 6.07565802317899e-06, - "loss": 0.8786, + "learning_rate": 7.092869171202155e-07, + "loss": 0.9861, "step": 22568 }, { - "epoch": 0.6395477344214912, + "epoch": 0.8830503169262071, "grad_norm": 0.0, - "learning_rate": 6.0748138754223665e-06, - "loss": 0.7912, + "learning_rate": 7.088182420847867e-07, + "loss": 0.8969, "step": 22569 }, { - "epoch": 0.6395760718637535, + "epoch": 0.8830894436184364, "grad_norm": 0.0, - "learning_rate": 6.073969760729039e-06, - "loss": 0.8561, + "learning_rate": 7.083497162540931e-07, + "loss": 0.7963, "step": 22570 }, { - "epoch": 0.639604409306016, + "epoch": 0.8831285703106659, "grad_norm": 0.0, - "learning_rate": 6.073125679106122e-06, - "loss": 0.927, + "learning_rate": 7.07881339635661e-07, + "loss": 0.8327, "step": 22571 }, { - "epoch": 0.6396327467482785, + "epoch": 0.8831676970028953, "grad_norm": 0.0, - "learning_rate": 6.0722816305607315e-06, - "loss": 0.8762, + "learning_rate": 7.074131122370076e-07, + "loss": 1.0116, "step": 22572 }, { - "epoch": 0.6396610841905409, + "epoch": 0.8832068236951248, "grad_norm": 0.0, - "learning_rate": 6.071437615099966e-06, - "loss": 0.8074, + "learning_rate": 7.069450340656592e-07, + "loss": 0.9156, "step": 22573 }, { - "epoch": 0.6396894216328034, + "epoch": 0.8832459503873542, "grad_norm": 0.0, - "learning_rate": 6.070593632730941e-06, - "loss": 0.8734, + "learning_rate": 7.064771051291275e-07, + "loss": 0.9906, "step": 22574 }, { - "epoch": 0.6397177590750659, + "epoch": 0.8832850770795837, "grad_norm": 0.0, - "learning_rate": 6.069749683460765e-06, - "loss": 0.8902, + "learning_rate": 7.060093254349287e-07, + "loss": 0.92, "step": 22575 }, { - "epoch": 0.6397460965173284, + "epoch": 0.8833242037718131, "grad_norm": 0.0, - "learning_rate": 6.068905767296547e-06, - "loss": 0.946, + "learning_rate": 7.055416949905714e-07, + "loss": 0.953, "step": 22576 }, { - "epoch": 0.6397744339595908, + "epoch": 0.8833633304640426, "grad_norm": 0.0, - "learning_rate": 6.068061884245398e-06, - "loss": 0.9108, + "learning_rate": 7.050742138035716e-07, + "loss": 0.937, "step": 22577 }, { - "epoch": 0.6398027714018533, + "epoch": 0.883402457156272, "grad_norm": 0.0, - "learning_rate": 6.0672180343144204e-06, - "loss": 0.7775, + "learning_rate": 7.0460688188143e-07, + "loss": 0.9542, "step": 22578 }, { - "epoch": 0.6398311088441158, + "epoch": 0.8834415838485015, "grad_norm": 0.0, - "learning_rate": 6.066374217510725e-06, - "loss": 0.9016, + "learning_rate": 7.041396992316563e-07, + "loss": 0.9977, "step": 22579 }, { - "epoch": 0.6398594462863781, + "epoch": 0.8834807105407309, "grad_norm": 0.0, - "learning_rate": 6.065530433841424e-06, - "loss": 0.7682, + "learning_rate": 7.036726658617499e-07, + "loss": 0.9673, "step": 22580 }, { - "epoch": 0.6398877837286406, + "epoch": 0.8835198372329603, "grad_norm": 0.0, - "learning_rate": 6.064686683313619e-06, - "loss": 0.8654, + "learning_rate": 7.032057817792104e-07, + "loss": 0.8721, "step": 22581 }, { - "epoch": 0.6399161211709031, + "epoch": 0.8835589639251897, "grad_norm": 0.0, - "learning_rate": 6.0638429659344215e-06, - "loss": 0.8913, + "learning_rate": 7.027390469915363e-07, + "loss": 0.8285, "step": 22582 }, { - "epoch": 0.6399444586131656, + "epoch": 0.8835980906174192, "grad_norm": 0.0, - "learning_rate": 6.062999281710934e-06, - "loss": 0.7636, + "learning_rate": 7.022724615062249e-07, + "loss": 0.9515, "step": 22583 }, { - "epoch": 0.639972796055428, + "epoch": 0.8836372173096486, "grad_norm": 0.0, - "learning_rate": 6.062155630650265e-06, - "loss": 0.9593, + "learning_rate": 7.018060253307657e-07, + "loss": 0.9835, "step": 22584 }, { - "epoch": 0.6400011334976905, + "epoch": 0.8836763440018781, "grad_norm": 0.0, - "learning_rate": 6.061312012759526e-06, - "loss": 0.8336, + "learning_rate": 7.013397384726505e-07, + "loss": 0.9628, "step": 22585 }, { - "epoch": 0.640029470939953, + "epoch": 0.8837154706941075, "grad_norm": 0.0, - "learning_rate": 6.0604684280458135e-06, - "loss": 0.7606, + "learning_rate": 7.00873600939369e-07, + "loss": 1.0005, "step": 22586 }, { - "epoch": 0.6400578083822154, + "epoch": 0.883754597386337, "grad_norm": 0.0, - "learning_rate": 6.059624876516239e-06, - "loss": 0.8391, + "learning_rate": 7.00407612738403e-07, + "loss": 0.9797, "step": 22587 }, { - "epoch": 0.6400861458244779, + "epoch": 0.8837937240785664, "grad_norm": 0.0, - "learning_rate": 6.058781358177909e-06, - "loss": 0.9048, + "learning_rate": 6.999417738772374e-07, + "loss": 0.9345, "step": 22588 }, { - "epoch": 0.6401144832667404, + "epoch": 0.8838328507707959, "grad_norm": 0.0, - "learning_rate": 6.057937873037925e-06, - "loss": 0.7471, + "learning_rate": 6.994760843633552e-07, + "loss": 0.9498, "step": 22589 }, { - "epoch": 0.6401428207090029, + "epoch": 0.8838719774630253, "grad_norm": 0.0, - "learning_rate": 6.057094421103398e-06, - "loss": 0.8887, + "learning_rate": 6.990105442042316e-07, + "loss": 0.7824, "step": 22590 }, { - "epoch": 0.6401711581512652, + "epoch": 0.8839111041552548, "grad_norm": 0.0, - "learning_rate": 6.0562510023814256e-06, - "loss": 0.8817, + "learning_rate": 6.985451534073439e-07, + "loss": 0.8839, "step": 22591 }, { - "epoch": 0.6401994955935277, + "epoch": 0.8839502308474841, "grad_norm": 0.0, - "learning_rate": 6.055407616879115e-06, - "loss": 0.8784, + "learning_rate": 6.980799119801674e-07, + "loss": 0.8634, "step": 22592 }, { - "epoch": 0.6402278330357902, + "epoch": 0.8839893575397136, "grad_norm": 0.0, - "learning_rate": 6.054564264603573e-06, - "loss": 0.8398, + "learning_rate": 6.976148199301691e-07, + "loss": 0.9239, "step": 22593 }, { - "epoch": 0.6402561704780526, + "epoch": 0.884028484231943, "grad_norm": 0.0, - "learning_rate": 6.053720945561901e-06, - "loss": 0.8462, + "learning_rate": 6.971498772648211e-07, + "loss": 1.0374, "step": 22594 }, { - "epoch": 0.6402845079203151, + "epoch": 0.8840676109241725, "grad_norm": 0.0, - "learning_rate": 6.0528776597612e-06, - "loss": 0.8501, + "learning_rate": 6.966850839915884e-07, + "loss": 0.937, "step": 22595 }, { - "epoch": 0.6403128453625776, + "epoch": 0.8841067376164019, "grad_norm": 0.0, - "learning_rate": 6.052034407208582e-06, - "loss": 0.7558, + "learning_rate": 6.962204401179373e-07, + "loss": 0.9185, "step": 22596 }, { - "epoch": 0.64034118280484, + "epoch": 0.8841458643086314, "grad_norm": 0.0, - "learning_rate": 6.051191187911138e-06, - "loss": 0.8915, + "learning_rate": 6.957559456513263e-07, + "loss": 0.9421, "step": 22597 }, { - "epoch": 0.6403695202471025, + "epoch": 0.8841849910008608, "grad_norm": 0.0, - "learning_rate": 6.050348001875983e-06, - "loss": 0.8427, + "learning_rate": 6.95291600599215e-07, + "loss": 0.9937, "step": 22598 }, { - "epoch": 0.640397857689365, + "epoch": 0.8842241176930902, "grad_norm": 0.0, - "learning_rate": 6.04950484911021e-06, - "loss": 0.823, + "learning_rate": 6.948274049690618e-07, + "loss": 0.8995, "step": 22599 }, { - "epoch": 0.6404261951316275, + "epoch": 0.8842632443853197, "grad_norm": 0.0, - "learning_rate": 6.048661729620924e-06, - "loss": 0.8723, + "learning_rate": 6.943633587683218e-07, + "loss": 0.9795, "step": 22600 }, { - "epoch": 0.6404545325738898, + "epoch": 0.884302371077549, "grad_norm": 0.0, - "learning_rate": 6.047818643415229e-06, - "loss": 0.8431, + "learning_rate": 6.938994620044448e-07, + "loss": 1.019, "step": 22601 }, { - "epoch": 0.6404828700161523, + "epoch": 0.8843414977697786, "grad_norm": 0.0, - "learning_rate": 6.046975590500223e-06, - "loss": 0.703, + "learning_rate": 6.934357146848824e-07, + "loss": 0.9497, "step": 22602 }, { - "epoch": 0.6405112074584148, + "epoch": 0.8843806244620079, "grad_norm": 0.0, - "learning_rate": 6.046132570883015e-06, - "loss": 0.7347, + "learning_rate": 6.929721168170778e-07, + "loss": 0.9496, "step": 22603 }, { - "epoch": 0.6405395449006772, + "epoch": 0.8844197511542374, "grad_norm": 0.0, - "learning_rate": 6.045289584570695e-06, - "loss": 0.8593, + "learning_rate": 6.925086684084814e-07, + "loss": 0.8582, "step": 22604 }, { - "epoch": 0.6405678823429397, + "epoch": 0.8844588778464668, "grad_norm": 0.0, - "learning_rate": 6.0444466315703695e-06, - "loss": 0.8349, + "learning_rate": 6.920453694665308e-07, + "loss": 0.8946, "step": 22605 }, { - "epoch": 0.6405962197852022, + "epoch": 0.8844980045386963, "grad_norm": 0.0, - "learning_rate": 6.043603711889141e-06, - "loss": 0.7545, + "learning_rate": 6.915822199986699e-07, + "loss": 0.9387, "step": 22606 }, { - "epoch": 0.6406245572274647, + "epoch": 0.8845371312309257, "grad_norm": 0.0, - "learning_rate": 6.0427608255341064e-06, - "loss": 0.868, + "learning_rate": 6.911192200123318e-07, + "loss": 0.8708, "step": 22607 }, { - "epoch": 0.6406528946697271, + "epoch": 0.8845762579231552, "grad_norm": 0.0, - "learning_rate": 6.041917972512367e-06, - "loss": 0.827, + "learning_rate": 6.906563695149571e-07, + "loss": 0.9456, "step": 22608 }, { - "epoch": 0.6406812321119896, + "epoch": 0.8846153846153846, "grad_norm": 0.0, - "learning_rate": 6.041075152831025e-06, - "loss": 0.8335, + "learning_rate": 6.901936685139743e-07, + "loss": 0.9271, "step": 22609 }, { - "epoch": 0.6407095695542521, + "epoch": 0.8846545113076141, "grad_norm": 0.0, - "learning_rate": 6.040232366497174e-06, - "loss": 0.9903, + "learning_rate": 6.897311170168175e-07, + "loss": 0.9729, "step": 22610 }, { - "epoch": 0.6407379069965145, + "epoch": 0.8846936379998435, "grad_norm": 0.0, - "learning_rate": 6.0393896135179205e-06, - "loss": 0.7785, + "learning_rate": 6.892687150309108e-07, + "loss": 1.0097, "step": 22611 }, { - "epoch": 0.6407662444387769, + "epoch": 0.884732764692073, "grad_norm": 0.0, - "learning_rate": 6.038546893900354e-06, - "loss": 0.948, + "learning_rate": 6.888064625636803e-07, + "loss": 0.8854, "step": 22612 }, { - "epoch": 0.6407945818810394, + "epoch": 0.8847718913843023, "grad_norm": 0.0, - "learning_rate": 6.037704207651578e-06, - "loss": 0.7975, + "learning_rate": 6.883443596225514e-07, + "loss": 0.887, "step": 22613 }, { - "epoch": 0.6408229193233019, + "epoch": 0.8848110180765318, "grad_norm": 0.0, - "learning_rate": 6.036861554778695e-06, - "loss": 0.8745, + "learning_rate": 6.878824062149459e-07, + "loss": 0.9062, "step": 22614 }, { - "epoch": 0.6408512567655643, + "epoch": 0.8848501447687612, "grad_norm": 0.0, - "learning_rate": 6.036018935288794e-06, - "loss": 0.7764, + "learning_rate": 6.874206023482777e-07, + "loss": 1.0052, "step": 22615 }, { - "epoch": 0.6408795942078268, + "epoch": 0.8848892714609907, "grad_norm": 0.0, - "learning_rate": 6.035176349188978e-06, - "loss": 0.7312, + "learning_rate": 6.869589480299665e-07, + "loss": 0.8767, "step": 22616 }, { - "epoch": 0.6409079316500893, + "epoch": 0.8849283981532201, "grad_norm": 0.0, - "learning_rate": 6.034333796486349e-06, - "loss": 0.9398, + "learning_rate": 6.864974432674232e-07, + "loss": 0.9019, "step": 22617 }, { - "epoch": 0.6409362690923517, + "epoch": 0.8849675248454496, "grad_norm": 0.0, - "learning_rate": 6.033491277187995e-06, - "loss": 0.6857, + "learning_rate": 6.860360880680639e-07, + "loss": 0.9432, "step": 22618 }, { - "epoch": 0.6409646065346142, + "epoch": 0.885006651537679, "grad_norm": 0.0, - "learning_rate": 6.032648791301019e-06, - "loss": 0.985, + "learning_rate": 6.855748824392904e-07, + "loss": 1.0101, "step": 22619 }, { - "epoch": 0.6409929439768767, + "epoch": 0.8850457782299085, "grad_norm": 0.0, - "learning_rate": 6.0318063388325134e-06, - "loss": 0.9353, + "learning_rate": 6.85113826388516e-07, + "loss": 0.9594, "step": 22620 }, { - "epoch": 0.641021281419139, + "epoch": 0.8850849049221379, "grad_norm": 0.0, - "learning_rate": 6.030963919789575e-06, - "loss": 0.8219, + "learning_rate": 6.846529199231366e-07, + "loss": 0.9415, "step": 22621 }, { - "epoch": 0.6410496188614015, + "epoch": 0.8851240316143674, "grad_norm": 0.0, - "learning_rate": 6.030121534179307e-06, - "loss": 0.8858, + "learning_rate": 6.841921630505632e-07, + "loss": 0.9663, "step": 22622 }, { - "epoch": 0.641077956303664, + "epoch": 0.8851631583065968, "grad_norm": 0.0, - "learning_rate": 6.029279182008795e-06, - "loss": 0.7829, + "learning_rate": 6.837315557781876e-07, + "loss": 1.1804, "step": 22623 }, { - "epoch": 0.6411062937459265, + "epoch": 0.8852022849988262, "grad_norm": 0.0, - "learning_rate": 6.0284368632851386e-06, - "loss": 0.8745, + "learning_rate": 6.832710981134116e-07, + "loss": 1.0216, "step": 22624 }, { - "epoch": 0.6411346311881889, + "epoch": 0.8852414116910556, "grad_norm": 0.0, - "learning_rate": 6.0275945780154365e-06, - "loss": 0.8672, + "learning_rate": 6.828107900636249e-07, + "loss": 0.9599, "step": 22625 }, { - "epoch": 0.6411629686304514, + "epoch": 0.8852805383832851, "grad_norm": 0.0, - "learning_rate": 6.026752326206777e-06, - "loss": 0.7994, + "learning_rate": 6.823506316362227e-07, + "loss": 0.9185, "step": 22626 }, { - "epoch": 0.6411913060727139, + "epoch": 0.8853196650755145, "grad_norm": 0.0, - "learning_rate": 6.025910107866263e-06, - "loss": 0.9336, + "learning_rate": 6.818906228385924e-07, + "loss": 0.9799, "step": 22627 }, { - "epoch": 0.6412196435149763, + "epoch": 0.8853587917677439, "grad_norm": 0.0, - "learning_rate": 6.02506792300098e-06, - "loss": 0.8084, + "learning_rate": 6.814307636781248e-07, + "loss": 0.97, "step": 22628 }, { - "epoch": 0.6412479809572388, + "epoch": 0.8853979184599734, "grad_norm": 0.0, - "learning_rate": 6.024225771618024e-06, - "loss": 0.8177, + "learning_rate": 6.809710541622017e-07, + "loss": 0.8942, "step": 22629 }, { - "epoch": 0.6412763183995013, + "epoch": 0.8854370451522028, "grad_norm": 0.0, - "learning_rate": 6.0233836537244975e-06, - "loss": 0.8899, + "learning_rate": 6.80511494298205e-07, + "loss": 1.024, "step": 22630 }, { - "epoch": 0.6413046558417638, + "epoch": 0.8854761718444323, "grad_norm": 0.0, - "learning_rate": 6.022541569327481e-06, - "loss": 0.9047, + "learning_rate": 6.800520840935176e-07, + "loss": 1.0364, "step": 22631 }, { - "epoch": 0.6413329932840262, + "epoch": 0.8855152985366617, "grad_norm": 0.0, - "learning_rate": 6.021699518434077e-06, - "loss": 0.869, + "learning_rate": 6.795928235555127e-07, + "loss": 0.9239, "step": 22632 }, { - "epoch": 0.6413613307262886, + "epoch": 0.8855544252288912, "grad_norm": 0.0, - "learning_rate": 6.0208575010513735e-06, - "loss": 0.9428, + "learning_rate": 6.791337126915687e-07, + "loss": 0.9063, "step": 22633 }, { - "epoch": 0.6413896681685511, + "epoch": 0.8855935519211205, "grad_norm": 0.0, - "learning_rate": 6.0200155171864635e-06, - "loss": 0.8303, + "learning_rate": 6.786747515090574e-07, + "loss": 0.881, "step": 22634 }, { - "epoch": 0.6414180056108135, + "epoch": 0.88563267861335, "grad_norm": 0.0, - "learning_rate": 6.019173566846446e-06, - "loss": 0.7404, + "learning_rate": 6.782159400153521e-07, + "loss": 0.989, "step": 22635 }, { - "epoch": 0.641446343053076, + "epoch": 0.8856718053055794, "grad_norm": 0.0, - "learning_rate": 6.0183316500384035e-06, - "loss": 0.7889, + "learning_rate": 6.777572782178155e-07, + "loss": 0.9739, "step": 22636 }, { - "epoch": 0.6414746804953385, + "epoch": 0.8857109319978089, "grad_norm": 0.0, - "learning_rate": 6.017489766769432e-06, - "loss": 0.7188, + "learning_rate": 6.772987661238161e-07, + "loss": 0.9613, "step": 22637 }, { - "epoch": 0.641503017937601, + "epoch": 0.8857500586900383, "grad_norm": 0.0, - "learning_rate": 6.016647917046625e-06, - "loss": 0.9417, + "learning_rate": 6.768404037407162e-07, + "loss": 1.0044, "step": 22638 }, { - "epoch": 0.6415313553798634, + "epoch": 0.8857891853822678, "grad_norm": 0.0, - "learning_rate": 6.015806100877069e-06, - "loss": 0.84, + "learning_rate": 6.763821910758761e-07, + "loss": 1.0416, "step": 22639 }, { - "epoch": 0.6415596928221259, + "epoch": 0.8858283120744972, "grad_norm": 0.0, - "learning_rate": 6.014964318267863e-06, - "loss": 0.7878, + "learning_rate": 6.759241281366558e-07, + "loss": 1.0006, "step": 22640 }, { - "epoch": 0.6415880302643884, + "epoch": 0.8858674387667267, "grad_norm": 0.0, - "learning_rate": 6.014122569226088e-06, - "loss": 0.8533, + "learning_rate": 6.754662149304115e-07, + "loss": 0.8714, "step": 22641 }, { - "epoch": 0.6416163677066508, + "epoch": 0.8859065654589561, "grad_norm": 0.0, - "learning_rate": 6.013280853758839e-06, - "loss": 0.7908, + "learning_rate": 6.750084514644939e-07, + "loss": 0.9409, "step": 22642 }, { - "epoch": 0.6416447051489133, + "epoch": 0.8859456921511856, "grad_norm": 0.0, - "learning_rate": 6.012439171873209e-06, - "loss": 0.9259, + "learning_rate": 6.745508377462551e-07, + "loss": 0.9551, "step": 22643 }, { - "epoch": 0.6416730425911757, + "epoch": 0.885984818843415, "grad_norm": 0.0, - "learning_rate": 6.01159752357628e-06, - "loss": 0.9253, + "learning_rate": 6.740933737830446e-07, + "loss": 0.9692, "step": 22644 }, { - "epoch": 0.6417013800334381, + "epoch": 0.8860239455356445, "grad_norm": 0.0, - "learning_rate": 6.0107559088751475e-06, - "loss": 0.785, + "learning_rate": 6.7363605958221e-07, + "loss": 0.9919, "step": 22645 }, { - "epoch": 0.6417297174757006, + "epoch": 0.8860630722278738, "grad_norm": 0.0, - "learning_rate": 6.009914327776901e-06, - "loss": 0.8609, + "learning_rate": 6.731788951510932e-07, + "loss": 1.0277, "step": 22646 }, { - "epoch": 0.6417580549179631, + "epoch": 0.8861021989201033, "grad_norm": 0.0, - "learning_rate": 6.009072780288626e-06, - "loss": 0.824, + "learning_rate": 6.72721880497037e-07, + "loss": 0.8544, "step": 22647 }, { - "epoch": 0.6417863923602256, + "epoch": 0.8861413256123327, "grad_norm": 0.0, - "learning_rate": 6.008231266417417e-06, - "loss": 0.8675, + "learning_rate": 6.722650156273758e-07, + "loss": 0.9209, "step": 22648 }, { - "epoch": 0.641814729802488, + "epoch": 0.8861804523045622, "grad_norm": 0.0, - "learning_rate": 6.007389786170355e-06, - "loss": 0.8021, + "learning_rate": 6.718083005494547e-07, + "loss": 0.9441, "step": 22649 }, { - "epoch": 0.6418430672447505, + "epoch": 0.8862195789967916, "grad_norm": 0.0, - "learning_rate": 6.00654833955453e-06, - "loss": 0.7954, + "learning_rate": 6.713517352706012e-07, + "loss": 1.0588, "step": 22650 }, { - "epoch": 0.641871404687013, + "epoch": 0.8862587056890211, "grad_norm": 0.0, - "learning_rate": 6.005706926577033e-06, - "loss": 0.7844, + "learning_rate": 6.708953197981504e-07, + "loss": 1.0248, "step": 22651 }, { - "epoch": 0.6418997421292754, + "epoch": 0.8862978323812505, "grad_norm": 0.0, - "learning_rate": 6.004865547244949e-06, - "loss": 0.786, + "learning_rate": 6.704390541394278e-07, + "loss": 0.952, "step": 22652 }, { - "epoch": 0.6419280795715379, + "epoch": 0.88633695907348, "grad_norm": 0.0, - "learning_rate": 6.004024201565366e-06, - "loss": 0.8409, + "learning_rate": 6.699829383017675e-07, + "loss": 0.8991, "step": 22653 }, { - "epoch": 0.6419564170138004, + "epoch": 0.8863760857657094, "grad_norm": 0.0, - "learning_rate": 6.003182889545374e-06, - "loss": 0.8383, + "learning_rate": 6.69526972292488e-07, + "loss": 0.9296, "step": 22654 }, { - "epoch": 0.6419847544560628, + "epoch": 0.8864152124579388, "grad_norm": 0.0, - "learning_rate": 6.002341611192053e-06, - "loss": 0.8643, + "learning_rate": 6.690711561189145e-07, + "loss": 0.8551, "step": 22655 }, { - "epoch": 0.6420130918983252, + "epoch": 0.8864543391501682, "grad_norm": 0.0, - "learning_rate": 6.001500366512498e-06, - "loss": 0.8505, + "learning_rate": 6.686154897883634e-07, + "loss": 0.9582, "step": 22656 }, { - "epoch": 0.6420414293405877, + "epoch": 0.8864934658423976, "grad_norm": 0.0, - "learning_rate": 6.000659155513786e-06, - "loss": 0.8874, + "learning_rate": 6.681599733081579e-07, + "loss": 0.9601, "step": 22657 }, { - "epoch": 0.6420697667828502, + "epoch": 0.8865325925346271, "grad_norm": 0.0, - "learning_rate": 5.999817978203006e-06, - "loss": 0.8539, + "learning_rate": 6.677046066856075e-07, + "loss": 0.8555, "step": 22658 }, { - "epoch": 0.6420981042251126, + "epoch": 0.8865717192268565, "grad_norm": 0.0, - "learning_rate": 5.998976834587246e-06, - "loss": 0.8603, + "learning_rate": 6.672493899280297e-07, + "loss": 0.856, "step": 22659 }, { - "epoch": 0.6421264416673751, + "epoch": 0.886610845919086, "grad_norm": 0.0, - "learning_rate": 5.998135724673591e-06, - "loss": 0.8493, + "learning_rate": 6.667943230427298e-07, + "loss": 1.0015, "step": 22660 }, { - "epoch": 0.6421547791096376, + "epoch": 0.8866499726113154, "grad_norm": 0.0, - "learning_rate": 5.997294648469128e-06, - "loss": 0.754, + "learning_rate": 6.663394060370177e-07, + "loss": 1.042, "step": 22661 }, { - "epoch": 0.6421831165519001, + "epoch": 0.8866890993035449, "grad_norm": 0.0, - "learning_rate": 5.996453605980932e-06, - "loss": 0.7707, + "learning_rate": 6.658846389181994e-07, + "loss": 1.0369, "step": 22662 }, { - "epoch": 0.6422114539941625, + "epoch": 0.8867282259957743, "grad_norm": 0.0, - "learning_rate": 5.995612597216096e-06, - "loss": 0.8517, + "learning_rate": 6.654300216935794e-07, + "loss": 0.9235, "step": 22663 }, { - "epoch": 0.642239791436425, + "epoch": 0.8867673526880038, "grad_norm": 0.0, - "learning_rate": 5.994771622181703e-06, - "loss": 0.8126, + "learning_rate": 6.649755543704539e-07, + "loss": 0.9993, "step": 22664 }, { - "epoch": 0.6422681288786874, + "epoch": 0.8868064793802332, "grad_norm": 0.0, - "learning_rate": 5.993930680884834e-06, - "loss": 0.7765, + "learning_rate": 6.645212369561249e-07, + "loss": 0.8969, "step": 22665 }, { - "epoch": 0.6422964663209498, + "epoch": 0.8868456060724627, "grad_norm": 0.0, - "learning_rate": 5.993089773332577e-06, - "loss": 0.9115, + "learning_rate": 6.640670694578855e-07, + "loss": 0.9924, "step": 22666 }, { - "epoch": 0.6423248037632123, + "epoch": 0.886884732764692, "grad_norm": 0.0, - "learning_rate": 5.992248899532014e-06, - "loss": 0.8693, + "learning_rate": 6.63613051883033e-07, + "loss": 0.9899, "step": 22667 }, { - "epoch": 0.6423531412054748, + "epoch": 0.8869238594569215, "grad_norm": 0.0, - "learning_rate": 5.991408059490223e-06, - "loss": 0.9438, + "learning_rate": 6.631591842388529e-07, + "loss": 0.8168, "step": 22668 }, { - "epoch": 0.6423814786477372, + "epoch": 0.8869629861491509, "grad_norm": 0.0, - "learning_rate": 5.9905672532142955e-06, - "loss": 0.7922, + "learning_rate": 6.627054665326394e-07, + "loss": 0.9896, "step": 22669 }, { - "epoch": 0.6424098160899997, + "epoch": 0.8870021128413804, "grad_norm": 0.0, - "learning_rate": 5.989726480711304e-06, - "loss": 0.7391, + "learning_rate": 6.622518987716742e-07, + "loss": 0.8826, "step": 22670 }, { - "epoch": 0.6424381535322622, + "epoch": 0.8870412395336098, "grad_norm": 0.0, - "learning_rate": 5.988885741988336e-06, - "loss": 0.8326, + "learning_rate": 6.617984809632416e-07, + "loss": 0.8347, "step": 22671 }, { - "epoch": 0.6424664909745247, + "epoch": 0.8870803662258393, "grad_norm": 0.0, - "learning_rate": 5.9880450370524744e-06, - "loss": 0.7913, + "learning_rate": 6.613452131146248e-07, + "loss": 0.8989, "step": 22672 }, { - "epoch": 0.6424948284167871, + "epoch": 0.8871194929180687, "grad_norm": 0.0, - "learning_rate": 5.987204365910798e-06, - "loss": 0.9471, + "learning_rate": 6.608920952331033e-07, + "loss": 1.0107, "step": 22673 }, { - "epoch": 0.6425231658590496, + "epoch": 0.8871586196102982, "grad_norm": 0.0, - "learning_rate": 5.98636372857039e-06, - "loss": 0.7513, + "learning_rate": 6.604391273259503e-07, + "loss": 1.0276, "step": 22674 }, { - "epoch": 0.642551503301312, + "epoch": 0.8871977463025276, "grad_norm": 0.0, - "learning_rate": 5.985523125038333e-06, - "loss": 0.8769, + "learning_rate": 6.599863094004422e-07, + "loss": 0.986, "step": 22675 }, { - "epoch": 0.6425798407435744, + "epoch": 0.8872368729947571, "grad_norm": 0.0, - "learning_rate": 5.984682555321702e-06, - "loss": 0.7896, + "learning_rate": 6.59533641463852e-07, + "loss": 0.8634, "step": 22676 }, { - "epoch": 0.6426081781858369, + "epoch": 0.8872759996869864, "grad_norm": 0.0, - "learning_rate": 5.983842019427583e-06, - "loss": 0.7218, + "learning_rate": 6.590811235234451e-07, + "loss": 1.0439, "step": 22677 }, { - "epoch": 0.6426365156280994, + "epoch": 0.887315126379216, "grad_norm": 0.0, - "learning_rate": 5.983001517363053e-06, - "loss": 0.8828, + "learning_rate": 6.586287555864912e-07, + "loss": 1.0029, "step": 22678 }, { - "epoch": 0.6426648530703619, + "epoch": 0.8873542530714453, "grad_norm": 0.0, - "learning_rate": 5.982161049135191e-06, - "loss": 0.9589, + "learning_rate": 6.581765376602533e-07, + "loss": 1.0038, "step": 22679 }, { - "epoch": 0.6426931905126243, + "epoch": 0.8873933797636748, "grad_norm": 0.0, - "learning_rate": 5.981320614751085e-06, - "loss": 0.8207, + "learning_rate": 6.577244697519969e-07, + "loss": 0.9881, "step": 22680 }, { - "epoch": 0.6427215279548868, + "epoch": 0.8874325064559042, "grad_norm": 0.0, - "learning_rate": 5.980480214217801e-06, - "loss": 0.8981, + "learning_rate": 6.57272551868976e-07, + "loss": 0.9061, "step": 22681 }, { - "epoch": 0.6427498653971493, + "epoch": 0.8874716331481337, "grad_norm": 0.0, - "learning_rate": 5.979639847542427e-06, - "loss": 0.937, + "learning_rate": 6.568207840184537e-07, + "loss": 0.9963, "step": 22682 }, { - "epoch": 0.6427782028394117, + "epoch": 0.8875107598403631, "grad_norm": 0.0, - "learning_rate": 5.978799514732042e-06, - "loss": 0.8481, + "learning_rate": 6.563691662076777e-07, + "loss": 0.9812, "step": 22683 }, { - "epoch": 0.6428065402816742, + "epoch": 0.8875498865325925, "grad_norm": 0.0, - "learning_rate": 5.977959215793718e-06, - "loss": 0.9326, + "learning_rate": 6.559176984439087e-07, + "loss": 0.9401, "step": 22684 }, { - "epoch": 0.6428348777239367, + "epoch": 0.887589013224822, "grad_norm": 0.0, - "learning_rate": 5.97711895073454e-06, - "loss": 0.7931, + "learning_rate": 6.554663807343908e-07, + "loss": 1.0536, "step": 22685 }, { - "epoch": 0.6428632151661992, + "epoch": 0.8876281399170514, "grad_norm": 0.0, - "learning_rate": 5.976278719561581e-06, - "loss": 0.7885, + "learning_rate": 6.550152130863751e-07, + "loss": 0.998, "step": 22686 }, { - "epoch": 0.6428915526084615, + "epoch": 0.8876672666092809, "grad_norm": 0.0, - "learning_rate": 5.97543852228192e-06, - "loss": 0.7956, + "learning_rate": 6.545641955071036e-07, + "loss": 0.9379, "step": 22687 }, { - "epoch": 0.642919890050724, + "epoch": 0.8877063933015102, "grad_norm": 0.0, - "learning_rate": 5.974598358902639e-06, - "loss": 0.8537, + "learning_rate": 6.541133280038203e-07, + "loss": 1.0473, "step": 22688 }, { - "epoch": 0.6429482274929865, + "epoch": 0.8877455199937397, "grad_norm": 0.0, - "learning_rate": 5.973758229430806e-06, - "loss": 0.8214, + "learning_rate": 6.536626105837662e-07, + "loss": 0.9875, "step": 22689 }, { - "epoch": 0.6429765649352489, + "epoch": 0.8877846466859691, "grad_norm": 0.0, - "learning_rate": 5.972918133873506e-06, - "loss": 0.8855, + "learning_rate": 6.53212043254181e-07, + "loss": 1.0521, "step": 22690 }, { - "epoch": 0.6430049023775114, + "epoch": 0.8878237733781986, "grad_norm": 0.0, - "learning_rate": 5.972078072237808e-06, - "loss": 0.8312, + "learning_rate": 6.527616260222958e-07, + "loss": 0.853, "step": 22691 }, { - "epoch": 0.6430332398197739, + "epoch": 0.887862900070428, "grad_norm": 0.0, - "learning_rate": 5.971238044530794e-06, - "loss": 0.7873, + "learning_rate": 6.523113588953466e-07, + "loss": 0.931, "step": 22692 }, { - "epoch": 0.6430615772620363, + "epoch": 0.8879020267626575, "grad_norm": 0.0, - "learning_rate": 5.97039805075954e-06, - "loss": 0.9144, + "learning_rate": 6.518612418805637e-07, + "loss": 1.0115, "step": 22693 }, { - "epoch": 0.6430899147042988, + "epoch": 0.8879411534548869, "grad_norm": 0.0, - "learning_rate": 5.969558090931118e-06, - "loss": 0.7454, + "learning_rate": 6.514112749851764e-07, + "loss": 0.8738, "step": 22694 }, { - "epoch": 0.6431182521465613, + "epoch": 0.8879802801471164, "grad_norm": 0.0, - "learning_rate": 5.968718165052604e-06, - "loss": 0.7321, + "learning_rate": 6.509614582164081e-07, + "loss": 1.0117, "step": 22695 }, { - "epoch": 0.6431465895888238, + "epoch": 0.8880194068393458, "grad_norm": 0.0, - "learning_rate": 5.967878273131078e-06, - "loss": 0.8087, + "learning_rate": 6.505117915814863e-07, + "loss": 1.0169, "step": 22696 }, { - "epoch": 0.6431749270310861, + "epoch": 0.8880585335315753, "grad_norm": 0.0, - "learning_rate": 5.967038415173605e-06, - "loss": 0.7836, + "learning_rate": 6.500622750876251e-07, + "loss": 0.8741, "step": 22697 }, { - "epoch": 0.6432032644733486, + "epoch": 0.8880976602238047, "grad_norm": 0.0, - "learning_rate": 5.966198591187269e-06, - "loss": 0.8678, + "learning_rate": 6.496129087420511e-07, + "loss": 1.1364, "step": 22698 }, { - "epoch": 0.6432316019156111, + "epoch": 0.8881367869160341, "grad_norm": 0.0, - "learning_rate": 5.965358801179138e-06, - "loss": 0.7941, + "learning_rate": 6.491636925519762e-07, + "loss": 0.9612, "step": 22699 }, { - "epoch": 0.6432599393578735, + "epoch": 0.8881759136082635, "grad_norm": 0.0, - "learning_rate": 5.964519045156286e-06, - "loss": 0.8523, + "learning_rate": 6.487146265246169e-07, + "loss": 0.8698, "step": 22700 }, { - "epoch": 0.643288276800136, + "epoch": 0.888215040300493, "grad_norm": 0.0, - "learning_rate": 5.963679323125795e-06, - "loss": 0.8796, + "learning_rate": 6.482657106671785e-07, + "loss": 1.0555, "step": 22701 }, { - "epoch": 0.6433166142423985, + "epoch": 0.8882541669927224, "grad_norm": 0.0, - "learning_rate": 5.962839635094726e-06, - "loss": 0.9088, + "learning_rate": 6.478169449868787e-07, + "loss": 0.894, "step": 22702 }, { - "epoch": 0.643344951684661, + "epoch": 0.8882932936849519, "grad_norm": 0.0, - "learning_rate": 5.961999981070159e-06, - "loss": 0.9044, + "learning_rate": 6.473683294909172e-07, + "loss": 0.8911, "step": 22703 }, { - "epoch": 0.6433732891269234, + "epoch": 0.8883324203771813, "grad_norm": 0.0, - "learning_rate": 5.961160361059168e-06, - "loss": 0.7755, + "learning_rate": 6.469198641865038e-07, + "loss": 1.0596, "step": 22704 }, { - "epoch": 0.6434016265691859, + "epoch": 0.8883715470694108, "grad_norm": 0.0, - "learning_rate": 5.960320775068821e-06, - "loss": 0.8923, + "learning_rate": 6.464715490808349e-07, + "loss": 0.9179, "step": 22705 }, { - "epoch": 0.6434299640114484, + "epoch": 0.8884106737616402, "grad_norm": 0.0, - "learning_rate": 5.959481223106196e-06, - "loss": 0.6872, + "learning_rate": 6.460233841811125e-07, + "loss": 0.9383, "step": 22706 }, { - "epoch": 0.6434583014537107, + "epoch": 0.8884498004538697, "grad_norm": 0.0, - "learning_rate": 5.958641705178356e-06, - "loss": 0.8778, + "learning_rate": 6.455753694945332e-07, + "loss": 0.989, "step": 22707 }, { - "epoch": 0.6434866388959732, + "epoch": 0.8884889271460991, "grad_norm": 0.0, - "learning_rate": 5.957802221292379e-06, - "loss": 0.7938, + "learning_rate": 6.451275050282935e-07, + "loss": 0.8073, "step": 22708 }, { - "epoch": 0.6435149763382357, + "epoch": 0.8885280538383286, "grad_norm": 0.0, - "learning_rate": 5.956962771455338e-06, - "loss": 0.8431, + "learning_rate": 6.446797907895819e-07, + "loss": 1.0069, "step": 22709 }, { - "epoch": 0.6435433137804982, + "epoch": 0.8885671805305579, "grad_norm": 0.0, - "learning_rate": 5.956123355674297e-06, - "loss": 0.8423, + "learning_rate": 6.442322267855894e-07, + "loss": 0.9676, "step": 22710 }, { - "epoch": 0.6435716512227606, + "epoch": 0.8886063072227874, "grad_norm": 0.0, - "learning_rate": 5.955283973956332e-06, - "loss": 0.7718, + "learning_rate": 6.437848130235047e-07, + "loss": 0.8445, "step": 22711 }, { - "epoch": 0.6435999886650231, + "epoch": 0.8886454339150168, "grad_norm": 0.0, - "learning_rate": 5.954444626308513e-06, - "loss": 0.9233, + "learning_rate": 6.433375495105132e-07, + "loss": 0.9001, "step": 22712 }, { - "epoch": 0.6436283261072856, + "epoch": 0.8886845606072462, "grad_norm": 0.0, - "learning_rate": 5.953605312737907e-06, - "loss": 0.836, + "learning_rate": 6.428904362537946e-07, + "loss": 0.8872, "step": 22713 }, { - "epoch": 0.643656663549548, + "epoch": 0.8887236872994757, "grad_norm": 0.0, - "learning_rate": 5.95276603325159e-06, - "loss": 0.8731, + "learning_rate": 6.424434732605312e-07, + "loss": 0.9418, "step": 22714 }, { - "epoch": 0.6436850009918105, + "epoch": 0.8887628139917051, "grad_norm": 0.0, - "learning_rate": 5.9519267878566235e-06, - "loss": 0.8175, + "learning_rate": 6.419966605379002e-07, + "loss": 0.93, "step": 22715 }, { - "epoch": 0.643713338434073, + "epoch": 0.8888019406839346, "grad_norm": 0.0, - "learning_rate": 5.951087576560081e-06, - "loss": 0.8231, + "learning_rate": 6.415499980930761e-07, + "loss": 1.0034, "step": 22716 }, { - "epoch": 0.6437416758763354, + "epoch": 0.888841067376164, "grad_norm": 0.0, - "learning_rate": 5.950248399369034e-06, - "loss": 0.8302, + "learning_rate": 6.411034859332321e-07, + "loss": 1.052, "step": 22717 }, { - "epoch": 0.6437700133185978, + "epoch": 0.8888801940683935, "grad_norm": 0.0, - "learning_rate": 5.949409256290546e-06, - "loss": 0.9914, + "learning_rate": 6.406571240655402e-07, + "loss": 0.9336, "step": 22718 }, { - "epoch": 0.6437983507608603, + "epoch": 0.8889193207606229, "grad_norm": 0.0, - "learning_rate": 5.9485701473316925e-06, - "loss": 0.7914, + "learning_rate": 6.402109124971645e-07, + "loss": 1.0294, "step": 22719 }, { - "epoch": 0.6438266882031228, + "epoch": 0.8889584474528524, "grad_norm": 0.0, - "learning_rate": 5.947731072499533e-06, - "loss": 0.818, + "learning_rate": 6.397648512352739e-07, + "loss": 0.9839, "step": 22720 }, { - "epoch": 0.6438550256453852, + "epoch": 0.8889975741450817, "grad_norm": 0.0, - "learning_rate": 5.946892031801139e-06, - "loss": 0.865, + "learning_rate": 6.393189402870315e-07, + "loss": 0.8857, "step": 22721 }, { - "epoch": 0.6438833630876477, + "epoch": 0.8890367008373112, "grad_norm": 0.0, - "learning_rate": 5.946053025243584e-06, - "loss": 0.7211, + "learning_rate": 6.388731796595971e-07, + "loss": 0.9547, "step": 22722 }, { - "epoch": 0.6439117005299102, + "epoch": 0.8890758275295406, "grad_norm": 0.0, - "learning_rate": 5.945214052833923e-06, - "loss": 0.8202, + "learning_rate": 6.384275693601293e-07, + "loss": 0.9612, "step": 22723 }, { - "epoch": 0.6439400379721726, + "epoch": 0.8891149542217701, "grad_norm": 0.0, - "learning_rate": 5.944375114579232e-06, - "loss": 0.746, + "learning_rate": 6.379821093957838e-07, + "loss": 0.9548, "step": 22724 }, { - "epoch": 0.6439683754144351, + "epoch": 0.8891540809139995, "grad_norm": 0.0, - "learning_rate": 5.943536210486577e-06, - "loss": 0.7875, + "learning_rate": 6.375367997737147e-07, + "loss": 0.8759, "step": 22725 }, { - "epoch": 0.6439967128566976, + "epoch": 0.889193207606229, "grad_norm": 0.0, - "learning_rate": 5.942697340563019e-06, - "loss": 0.8196, + "learning_rate": 6.37091640501073e-07, + "loss": 0.9813, "step": 22726 }, { - "epoch": 0.6440250502989601, + "epoch": 0.8892323342984584, "grad_norm": 0.0, - "learning_rate": 5.941858504815634e-06, - "loss": 0.9056, + "learning_rate": 6.366466315850062e-07, + "loss": 0.9853, "step": 22727 }, { - "epoch": 0.6440533877412224, + "epoch": 0.8892714609906879, "grad_norm": 0.0, - "learning_rate": 5.9410197032514785e-06, - "loss": 0.8107, + "learning_rate": 6.362017730326609e-07, + "loss": 0.9607, "step": 22728 }, { - "epoch": 0.6440817251834849, + "epoch": 0.8893105876829173, "grad_norm": 0.0, - "learning_rate": 5.94018093587762e-06, - "loss": 0.9, + "learning_rate": 6.357570648511846e-07, + "loss": 0.981, "step": 22729 }, { - "epoch": 0.6441100626257474, + "epoch": 0.8893497143751468, "grad_norm": 0.0, - "learning_rate": 5.939342202701126e-06, - "loss": 0.8747, + "learning_rate": 6.353125070477129e-07, + "loss": 1.0011, "step": 22730 }, { - "epoch": 0.6441384000680098, + "epoch": 0.8893888410673761, "grad_norm": 0.0, - "learning_rate": 5.93850350372906e-06, - "loss": 0.8754, + "learning_rate": 6.348680996293899e-07, + "loss": 0.957, "step": 22731 }, { - "epoch": 0.6441667375102723, + "epoch": 0.8894279677596056, "grad_norm": 0.0, - "learning_rate": 5.937664838968487e-06, - "loss": 0.9167, + "learning_rate": 6.344238426033478e-07, + "loss": 0.8621, "step": 22732 }, { - "epoch": 0.6441950749525348, + "epoch": 0.889467094451835, "grad_norm": 0.0, - "learning_rate": 5.936826208426475e-06, - "loss": 0.8706, + "learning_rate": 6.339797359767253e-07, + "loss": 1.0065, "step": 22733 }, { - "epoch": 0.6442234123947972, + "epoch": 0.8895062211440645, "grad_norm": 0.0, - "learning_rate": 5.935987612110081e-06, - "loss": 0.8357, + "learning_rate": 6.335357797566499e-07, + "loss": 0.9495, "step": 22734 }, { - "epoch": 0.6442517498370597, + "epoch": 0.8895453478362939, "grad_norm": 0.0, - "learning_rate": 5.935149050026374e-06, - "loss": 0.9044, + "learning_rate": 6.33091973950255e-07, + "loss": 0.9383, "step": 22735 }, { - "epoch": 0.6442800872793222, + "epoch": 0.8895844745285234, "grad_norm": 0.0, - "learning_rate": 5.934310522182415e-06, - "loss": 0.9314, + "learning_rate": 6.326483185646648e-07, + "loss": 0.8921, "step": 22736 }, { - "epoch": 0.6443084247215847, + "epoch": 0.8896236012207528, "grad_norm": 0.0, - "learning_rate": 5.93347202858527e-06, - "loss": 0.7881, + "learning_rate": 6.322048136070036e-07, + "loss": 0.9894, "step": 22737 }, { - "epoch": 0.644336762163847, + "epoch": 0.8896627279129823, "grad_norm": 0.0, - "learning_rate": 5.932633569242e-06, - "loss": 0.9097, + "learning_rate": 6.317614590843945e-07, + "loss": 0.9338, "step": 22738 }, { - "epoch": 0.6443650996061095, + "epoch": 0.8897018546052117, "grad_norm": 0.0, - "learning_rate": 5.9317951441596656e-06, - "loss": 0.9255, + "learning_rate": 6.313182550039598e-07, + "loss": 1.1033, "step": 22739 }, { - "epoch": 0.644393437048372, + "epoch": 0.8897409812974412, "grad_norm": 0.0, - "learning_rate": 5.930956753345332e-06, - "loss": 0.876, + "learning_rate": 6.308752013728126e-07, + "loss": 0.889, "step": 22740 }, { - "epoch": 0.6444217744906344, + "epoch": 0.8897801079896706, "grad_norm": 0.0, - "learning_rate": 5.930118396806064e-06, - "loss": 0.9145, + "learning_rate": 6.304322981980693e-07, + "loss": 1.0337, "step": 22741 }, { - "epoch": 0.6444501119328969, + "epoch": 0.8898192346818999, "grad_norm": 0.0, - "learning_rate": 5.929280074548915e-06, - "loss": 0.9534, + "learning_rate": 6.299895454868421e-07, + "loss": 0.8551, "step": 22742 }, { - "epoch": 0.6444784493751594, + "epoch": 0.8898583613741294, "grad_norm": 0.0, - "learning_rate": 5.928441786580957e-06, - "loss": 0.8628, + "learning_rate": 6.295469432462442e-07, + "loss": 0.9005, "step": 22743 }, { - "epoch": 0.6445067868174219, + "epoch": 0.8898974880663588, "grad_norm": 0.0, - "learning_rate": 5.927603532909241e-06, - "loss": 0.8197, + "learning_rate": 6.291044914833777e-07, + "loss": 0.923, "step": 22744 }, { - "epoch": 0.6445351242596843, + "epoch": 0.8899366147585883, "grad_norm": 0.0, - "learning_rate": 5.926765313540832e-06, - "loss": 0.8092, + "learning_rate": 6.286621902053524e-07, + "loss": 0.9675, "step": 22745 }, { - "epoch": 0.6445634617019468, + "epoch": 0.8899757414508177, "grad_norm": 0.0, - "learning_rate": 5.9259271284827965e-06, - "loss": 0.8627, + "learning_rate": 6.282200394192673e-07, + "loss": 0.9979, "step": 22746 }, { - "epoch": 0.6445917991442093, + "epoch": 0.8900148681430472, "grad_norm": 0.0, - "learning_rate": 5.925088977742186e-06, - "loss": 0.8521, + "learning_rate": 6.277780391322275e-07, + "loss": 0.969, "step": 22747 }, { - "epoch": 0.6446201365864717, + "epoch": 0.8900539948352766, "grad_norm": 0.0, - "learning_rate": 5.924250861326066e-06, - "loss": 0.8177, + "learning_rate": 6.273361893513264e-07, + "loss": 0.844, "step": 22748 }, { - "epoch": 0.6446484740287342, + "epoch": 0.8900931215275061, "grad_norm": 0.0, - "learning_rate": 5.923412779241493e-06, - "loss": 0.9145, + "learning_rate": 6.268944900836638e-07, + "loss": 0.9794, "step": 22749 }, { - "epoch": 0.6446768114709966, + "epoch": 0.8901322482197355, "grad_norm": 0.0, - "learning_rate": 5.922574731495528e-06, - "loss": 0.8644, + "learning_rate": 6.264529413363263e-07, + "loss": 1.0135, "step": 22750 }, { - "epoch": 0.6447051489132591, + "epoch": 0.890171374911965, "grad_norm": 0.0, - "learning_rate": 5.921736718095232e-06, - "loss": 0.8698, + "learning_rate": 6.260115431164127e-07, + "loss": 0.8951, "step": 22751 }, { - "epoch": 0.6447334863555215, + "epoch": 0.8902105016041943, "grad_norm": 0.0, - "learning_rate": 5.92089873904766e-06, - "loss": 0.7928, + "learning_rate": 6.255702954310061e-07, + "loss": 1.0095, "step": 22752 }, { - "epoch": 0.644761823797784, + "epoch": 0.8902496282964238, "grad_norm": 0.0, - "learning_rate": 5.920060794359872e-06, - "loss": 0.859, + "learning_rate": 6.251291982871943e-07, + "loss": 0.9602, "step": 22753 }, { - "epoch": 0.6447901612400465, + "epoch": 0.8902887549886532, "grad_norm": 0.0, - "learning_rate": 5.919222884038932e-06, - "loss": 0.8843, + "learning_rate": 6.246882516920593e-07, + "loss": 0.9048, "step": 22754 }, { - "epoch": 0.6448184986823089, + "epoch": 0.8903278816808827, "grad_norm": 0.0, - "learning_rate": 5.9183850080918885e-06, - "loss": 0.8425, + "learning_rate": 6.242474556526834e-07, + "loss": 0.9836, "step": 22755 }, { - "epoch": 0.6448468361245714, + "epoch": 0.8903670083731121, "grad_norm": 0.0, - "learning_rate": 5.917547166525806e-06, - "loss": 0.9469, + "learning_rate": 6.238068101761452e-07, + "loss": 0.944, "step": 22756 }, { - "epoch": 0.6448751735668339, + "epoch": 0.8904061350653416, "grad_norm": 0.0, - "learning_rate": 5.916709359347737e-06, - "loss": 0.7899, + "learning_rate": 6.233663152695213e-07, + "loss": 0.9632, "step": 22757 }, { - "epoch": 0.6449035110090963, + "epoch": 0.890445261757571, "grad_norm": 0.0, - "learning_rate": 5.915871586564741e-06, - "loss": 0.8485, + "learning_rate": 6.229259709398828e-07, + "loss": 0.8817, "step": 22758 }, { - "epoch": 0.6449318484513588, + "epoch": 0.8904843884498005, "grad_norm": 0.0, - "learning_rate": 5.91503384818388e-06, - "loss": 0.7884, + "learning_rate": 6.224857771943038e-07, + "loss": 0.9438, "step": 22759 }, { - "epoch": 0.6449601858936213, + "epoch": 0.8905235151420299, "grad_norm": 0.0, - "learning_rate": 5.914196144212201e-06, - "loss": 0.9317, + "learning_rate": 6.220457340398533e-07, + "loss": 0.9459, "step": 22760 }, { - "epoch": 0.6449885233358837, + "epoch": 0.8905626418342594, "grad_norm": 0.0, - "learning_rate": 5.913358474656766e-06, - "loss": 0.7666, + "learning_rate": 6.216058414835969e-07, + "loss": 1.0088, "step": 22761 }, { - "epoch": 0.6450168607781461, + "epoch": 0.8906017685264888, "grad_norm": 0.0, - "learning_rate": 5.9125208395246315e-06, - "loss": 0.819, + "learning_rate": 6.211660995325986e-07, + "loss": 1.1251, "step": 22762 }, { - "epoch": 0.6450451982204086, + "epoch": 0.8906408952187183, "grad_norm": 0.0, - "learning_rate": 5.911683238822851e-06, - "loss": 0.8214, + "learning_rate": 6.207265081939218e-07, + "loss": 0.872, "step": 22763 }, { - "epoch": 0.6450735356626711, + "epoch": 0.8906800219109476, "grad_norm": 0.0, - "learning_rate": 5.910845672558483e-06, - "loss": 0.8681, + "learning_rate": 6.202870674746231e-07, + "loss": 0.9238, "step": 22764 }, { - "epoch": 0.6451018731049335, + "epoch": 0.8907191486031771, "grad_norm": 0.0, - "learning_rate": 5.910008140738578e-06, - "loss": 0.9241, + "learning_rate": 6.198477773817601e-07, + "loss": 0.9868, "step": 22765 }, { - "epoch": 0.645130210547196, + "epoch": 0.8907582752954065, "grad_norm": 0.0, - "learning_rate": 5.909170643370192e-06, - "loss": 0.8841, + "learning_rate": 6.194086379223874e-07, + "loss": 0.9898, "step": 22766 }, { - "epoch": 0.6451585479894585, + "epoch": 0.890797401987636, "grad_norm": 0.0, - "learning_rate": 5.9083331804603865e-06, - "loss": 0.8526, + "learning_rate": 6.18969649103559e-07, + "loss": 0.901, "step": 22767 }, { - "epoch": 0.645186885431721, + "epoch": 0.8908365286798654, "grad_norm": 0.0, - "learning_rate": 5.907495752016203e-06, - "loss": 0.7688, + "learning_rate": 6.18530810932323e-07, + "loss": 0.975, "step": 22768 }, { - "epoch": 0.6452152228739834, + "epoch": 0.8908756553720948, "grad_norm": 0.0, - "learning_rate": 5.906658358044704e-06, - "loss": 0.8059, + "learning_rate": 6.180921234157245e-07, + "loss": 0.9812, "step": 22769 }, { - "epoch": 0.6452435603162459, + "epoch": 0.8909147820643243, "grad_norm": 0.0, - "learning_rate": 5.905820998552944e-06, - "loss": 0.8459, + "learning_rate": 6.176535865608135e-07, + "loss": 0.8414, "step": 22770 }, { - "epoch": 0.6452718977585084, + "epoch": 0.8909539087565537, "grad_norm": 0.0, - "learning_rate": 5.9049836735479725e-06, - "loss": 0.8045, + "learning_rate": 6.172152003746268e-07, + "loss": 0.9874, "step": 22771 }, { - "epoch": 0.6453002352007707, + "epoch": 0.8909930354487832, "grad_norm": 0.0, - "learning_rate": 5.904146383036849e-06, - "loss": 0.8515, + "learning_rate": 6.167769648642064e-07, + "loss": 0.8965, "step": 22772 }, { - "epoch": 0.6453285726430332, + "epoch": 0.8910321621410126, "grad_norm": 0.0, - "learning_rate": 5.903309127026615e-06, - "loss": 0.8468, + "learning_rate": 6.16338880036591e-07, + "loss": 0.9372, "step": 22773 }, { - "epoch": 0.6453569100852957, + "epoch": 0.891071288833242, "grad_norm": 0.0, - "learning_rate": 5.902471905524331e-06, - "loss": 0.8117, + "learning_rate": 6.159009458988152e-07, + "loss": 0.896, "step": 22774 }, { - "epoch": 0.6453852475275582, + "epoch": 0.8911104155254714, "grad_norm": 0.0, - "learning_rate": 5.901634718537048e-06, - "loss": 0.8751, + "learning_rate": 6.154631624579111e-07, + "loss": 0.9361, "step": 22775 }, { - "epoch": 0.6454135849698206, + "epoch": 0.8911495422177009, "grad_norm": 0.0, - "learning_rate": 5.900797566071818e-06, - "loss": 0.8468, + "learning_rate": 6.150255297209095e-07, + "loss": 1.0624, "step": 22776 }, { - "epoch": 0.6454419224120831, + "epoch": 0.8911886689099303, "grad_norm": 0.0, - "learning_rate": 5.8999604481356955e-06, - "loss": 0.81, + "learning_rate": 6.145880476948352e-07, + "loss": 0.9848, "step": 22777 }, { - "epoch": 0.6454702598543456, + "epoch": 0.8912277956021598, "grad_norm": 0.0, - "learning_rate": 5.899123364735724e-06, - "loss": 0.9099, + "learning_rate": 6.1415071638672e-07, + "loss": 1.0491, "step": 22778 }, { - "epoch": 0.645498597296608, + "epoch": 0.8912669222943892, "grad_norm": 0.0, - "learning_rate": 5.8982863158789605e-06, - "loss": 0.8295, + "learning_rate": 6.137135358035806e-07, + "loss": 1.0195, "step": 22779 }, { - "epoch": 0.6455269347388705, + "epoch": 0.8913060489866187, "grad_norm": 0.0, - "learning_rate": 5.8974493015724576e-06, - "loss": 0.9638, + "learning_rate": 6.132765059524426e-07, + "loss": 0.8699, "step": 22780 }, { - "epoch": 0.645555272181133, + "epoch": 0.8913451756788481, "grad_norm": 0.0, - "learning_rate": 5.896612321823258e-06, - "loss": 0.8484, + "learning_rate": 6.12839626840318e-07, + "loss": 0.9015, "step": 22781 }, { - "epoch": 0.6455836096233953, + "epoch": 0.8913843023710776, "grad_norm": 0.0, - "learning_rate": 5.895775376638417e-06, - "loss": 0.8275, + "learning_rate": 6.124028984742292e-07, + "loss": 0.9929, "step": 22782 }, { - "epoch": 0.6456119470656578, + "epoch": 0.891423429063307, "grad_norm": 0.0, - "learning_rate": 5.894938466024986e-06, - "loss": 0.9358, + "learning_rate": 6.119663208611848e-07, + "loss": 0.9096, "step": 22783 }, { - "epoch": 0.6456402845079203, + "epoch": 0.8914625557555365, "grad_norm": 0.0, - "learning_rate": 5.894101589990011e-06, - "loss": 0.8509, + "learning_rate": 6.115298940081993e-07, + "loss": 1.0396, "step": 22784 }, { - "epoch": 0.6456686219501828, + "epoch": 0.8915016824477658, "grad_norm": 0.0, - "learning_rate": 5.893264748540548e-06, - "loss": 0.8495, + "learning_rate": 6.110936179222782e-07, + "loss": 0.8902, "step": 22785 }, { - "epoch": 0.6456969593924452, + "epoch": 0.8915408091399953, "grad_norm": 0.0, - "learning_rate": 5.892427941683636e-06, - "loss": 0.6925, + "learning_rate": 6.106574926104281e-07, + "loss": 0.9189, "step": 22786 }, { - "epoch": 0.6457252968347077, + "epoch": 0.8915799358322247, "grad_norm": 0.0, - "learning_rate": 5.8915911694263296e-06, - "loss": 0.8377, + "learning_rate": 6.102215180796533e-07, + "loss": 0.9279, "step": 22787 }, { - "epoch": 0.6457536342769702, + "epoch": 0.8916190625244542, "grad_norm": 0.0, - "learning_rate": 5.890754431775676e-06, - "loss": 0.8547, + "learning_rate": 6.097856943369562e-07, + "loss": 0.9472, "step": 22788 }, { - "epoch": 0.6457819717192326, + "epoch": 0.8916581892166836, "grad_norm": 0.0, - "learning_rate": 5.889917728738725e-06, - "loss": 0.8532, + "learning_rate": 6.093500213893333e-07, + "loss": 0.9686, "step": 22789 }, { - "epoch": 0.6458103091614951, + "epoch": 0.8916973159089131, "grad_norm": 0.0, - "learning_rate": 5.889081060322521e-06, - "loss": 0.8059, + "learning_rate": 6.089144992437812e-07, + "loss": 1.0892, "step": 22790 }, { - "epoch": 0.6458386466037576, + "epoch": 0.8917364426011425, "grad_norm": 0.0, - "learning_rate": 5.888244426534118e-06, - "loss": 0.8423, + "learning_rate": 6.084791279072955e-07, + "loss": 1.0433, "step": 22791 }, { - "epoch": 0.64586698404602, + "epoch": 0.891775569293372, "grad_norm": 0.0, - "learning_rate": 5.887407827380556e-06, - "loss": 0.7263, + "learning_rate": 6.080439073868671e-07, + "loss": 0.964, "step": 22792 }, { - "epoch": 0.6458953214882824, + "epoch": 0.8918146959856014, "grad_norm": 0.0, - "learning_rate": 5.886571262868888e-06, - "loss": 0.9137, + "learning_rate": 6.07608837689484e-07, + "loss": 0.8936, "step": 22793 }, { - "epoch": 0.6459236589305449, + "epoch": 0.8918538226778309, "grad_norm": 0.0, - "learning_rate": 5.8857347330061545e-06, - "loss": 0.8671, + "learning_rate": 6.071739188221349e-07, + "loss": 1.0885, "step": 22794 }, { - "epoch": 0.6459519963728074, + "epoch": 0.8918929493700603, "grad_norm": 0.0, - "learning_rate": 5.884898237799405e-06, - "loss": 0.8964, + "learning_rate": 6.067391507917997e-07, + "loss": 0.8761, "step": 22795 }, { - "epoch": 0.6459803338150698, + "epoch": 0.8919320760622897, "grad_norm": 0.0, - "learning_rate": 5.884061777255688e-06, - "loss": 0.9017, + "learning_rate": 6.063045336054674e-07, + "loss": 0.9616, "step": 22796 }, { - "epoch": 0.6460086712573323, + "epoch": 0.8919712027545191, "grad_norm": 0.0, - "learning_rate": 5.883225351382044e-06, - "loss": 0.9044, + "learning_rate": 6.058700672701101e-07, + "loss": 0.9719, "step": 22797 }, { - "epoch": 0.6460370086995948, + "epoch": 0.8920103294467485, "grad_norm": 0.0, - "learning_rate": 5.882388960185522e-06, - "loss": 0.9329, + "learning_rate": 6.054357517927112e-07, + "loss": 0.9478, "step": 22798 }, { - "epoch": 0.6460653461418573, + "epoch": 0.892049456138978, "grad_norm": 0.0, - "learning_rate": 5.881552603673171e-06, - "loss": 0.8552, + "learning_rate": 6.050015871802384e-07, + "loss": 0.9443, "step": 22799 }, { - "epoch": 0.6460936835841197, + "epoch": 0.8920885828312074, "grad_norm": 0.0, - "learning_rate": 5.880716281852028e-06, - "loss": 0.7964, + "learning_rate": 6.045675734396694e-07, + "loss": 0.974, "step": 22800 }, { - "epoch": 0.6461220210263822, + "epoch": 0.8921277095234369, "grad_norm": 0.0, - "learning_rate": 5.879879994729143e-06, - "loss": 0.8964, + "learning_rate": 6.041337105779721e-07, + "loss": 0.9928, "step": 22801 }, { - "epoch": 0.6461503584686447, + "epoch": 0.8921668362156663, "grad_norm": 0.0, - "learning_rate": 5.879043742311556e-06, - "loss": 0.7261, + "learning_rate": 6.036999986021141e-07, + "loss": 1.0072, "step": 22802 }, { - "epoch": 0.646178695910907, + "epoch": 0.8922059629078958, "grad_norm": 0.0, - "learning_rate": 5.878207524606316e-06, - "loss": 0.8391, + "learning_rate": 6.032664375190588e-07, + "loss": 0.7997, "step": 22803 }, { - "epoch": 0.6462070333531695, + "epoch": 0.8922450896001252, "grad_norm": 0.0, - "learning_rate": 5.877371341620468e-06, - "loss": 0.8768, + "learning_rate": 6.028330273357708e-07, + "loss": 0.9991, "step": 22804 }, { - "epoch": 0.646235370795432, + "epoch": 0.8922842162923547, "grad_norm": 0.0, - "learning_rate": 5.8765351933610474e-06, - "loss": 0.8396, + "learning_rate": 6.023997680592075e-07, + "loss": 1.0437, "step": 22805 }, { - "epoch": 0.6462637082376944, + "epoch": 0.892323342984584, "grad_norm": 0.0, - "learning_rate": 5.875699079835107e-06, - "loss": 0.7616, + "learning_rate": 6.019666596963303e-07, + "loss": 0.9711, "step": 22806 }, { - "epoch": 0.6462920456799569, + "epoch": 0.8923624696768135, "grad_norm": 0.0, - "learning_rate": 5.8748630010496795e-06, - "loss": 0.8158, + "learning_rate": 6.015337022540912e-07, + "loss": 0.9565, "step": 22807 }, { - "epoch": 0.6463203831222194, + "epoch": 0.8924015963690429, "grad_norm": 0.0, - "learning_rate": 5.874026957011814e-06, - "loss": 0.8793, + "learning_rate": 6.011008957394426e-07, + "loss": 0.9762, "step": 22808 }, { - "epoch": 0.6463487205644819, + "epoch": 0.8924407230612724, "grad_norm": 0.0, - "learning_rate": 5.873190947728552e-06, - "loss": 0.936, + "learning_rate": 6.006682401593389e-07, + "loss": 0.9028, "step": 22809 }, { - "epoch": 0.6463770580067443, + "epoch": 0.8924798497535018, "grad_norm": 0.0, - "learning_rate": 5.872354973206934e-06, - "loss": 0.7799, + "learning_rate": 6.002357355207234e-07, + "loss": 0.9807, "step": 22810 }, { - "epoch": 0.6464053954490068, + "epoch": 0.8925189764457313, "grad_norm": 0.0, - "learning_rate": 5.871519033454003e-06, - "loss": 0.8604, + "learning_rate": 5.998033818305426e-07, + "loss": 0.9211, "step": 22811 }, { - "epoch": 0.6464337328912693, + "epoch": 0.8925581031379607, "grad_norm": 0.0, - "learning_rate": 5.870683128476804e-06, - "loss": 0.8753, + "learning_rate": 5.993711790957423e-07, + "loss": 0.9962, "step": 22812 }, { - "epoch": 0.6464620703335316, + "epoch": 0.8925972298301902, "grad_norm": 0.0, - "learning_rate": 5.8698472582823705e-06, - "loss": 0.8923, + "learning_rate": 5.989391273232603e-07, + "loss": 0.935, "step": 22813 }, { - "epoch": 0.6464904077757941, + "epoch": 0.8926363565224196, "grad_norm": 0.0, - "learning_rate": 5.869011422877748e-06, - "loss": 0.8031, + "learning_rate": 5.985072265200354e-07, + "loss": 0.9581, "step": 22814 }, { - "epoch": 0.6465187452180566, + "epoch": 0.8926754832146491, "grad_norm": 0.0, - "learning_rate": 5.868175622269976e-06, - "loss": 0.756, + "learning_rate": 5.980754766930052e-07, + "loss": 1.1258, "step": 22815 }, { - "epoch": 0.6465470826603191, + "epoch": 0.8927146099068785, "grad_norm": 0.0, - "learning_rate": 5.8673398564660946e-06, - "loss": 0.8988, + "learning_rate": 5.976438778491001e-07, + "loss": 1.0111, "step": 22816 }, { - "epoch": 0.6465754201025815, + "epoch": 0.892753736599108, "grad_norm": 0.0, - "learning_rate": 5.866504125473149e-06, - "loss": 0.9424, + "learning_rate": 5.97212429995252e-07, + "loss": 0.8847, "step": 22817 }, { - "epoch": 0.646603757544844, + "epoch": 0.8927928632913373, "grad_norm": 0.0, - "learning_rate": 5.8656684292981685e-06, - "loss": 0.8852, + "learning_rate": 5.967811331383899e-07, + "loss": 0.9876, "step": 22818 }, { - "epoch": 0.6466320949871065, + "epoch": 0.8928319899835668, "grad_norm": 0.0, - "learning_rate": 5.8648327679481984e-06, - "loss": 0.8169, + "learning_rate": 5.963499872854417e-07, + "loss": 0.9872, "step": 22819 }, { - "epoch": 0.6466604324293689, + "epoch": 0.8928711166757962, "grad_norm": 0.0, - "learning_rate": 5.863997141430282e-06, - "loss": 0.8064, + "learning_rate": 5.959189924433284e-07, + "loss": 1.1448, "step": 22820 }, { - "epoch": 0.6466887698716314, + "epoch": 0.8929102433680257, "grad_norm": 0.0, - "learning_rate": 5.8631615497514506e-06, - "loss": 0.8453, + "learning_rate": 5.954881486189734e-07, + "loss": 0.9286, "step": 22821 }, { - "epoch": 0.6467171073138939, + "epoch": 0.8929493700602551, "grad_norm": 0.0, - "learning_rate": 5.8623259929187445e-06, - "loss": 0.89, + "learning_rate": 5.9505745581929e-07, + "loss": 0.983, "step": 22822 }, { - "epoch": 0.6467454447561564, + "epoch": 0.8929884967524846, "grad_norm": 0.0, - "learning_rate": 5.861490470939204e-06, - "loss": 0.8572, + "learning_rate": 5.946269140512028e-07, + "loss": 0.921, "step": 22823 }, { - "epoch": 0.6467737821984187, + "epoch": 0.893027623444714, "grad_norm": 0.0, - "learning_rate": 5.860654983819865e-06, - "loss": 0.9245, + "learning_rate": 5.941965233216207e-07, + "loss": 0.8663, "step": 22824 }, { - "epoch": 0.6468021196406812, + "epoch": 0.8930667501369435, "grad_norm": 0.0, - "learning_rate": 5.85981953156777e-06, - "loss": 0.8912, + "learning_rate": 5.937662836374569e-07, + "loss": 0.9111, "step": 22825 }, { - "epoch": 0.6468304570829437, + "epoch": 0.8931058768291729, "grad_norm": 0.0, - "learning_rate": 5.85898411418995e-06, - "loss": 0.7847, + "learning_rate": 5.933361950056183e-07, + "loss": 1.022, "step": 22826 }, { - "epoch": 0.6468587945252061, + "epoch": 0.8931450035214022, "grad_norm": 0.0, - "learning_rate": 5.858148731693445e-06, - "loss": 0.9228, + "learning_rate": 5.929062574330147e-07, + "loss": 0.982, "step": 22827 }, { - "epoch": 0.6468871319674686, + "epoch": 0.8931841302136317, "grad_norm": 0.0, - "learning_rate": 5.8573133840852895e-06, - "loss": 0.7625, + "learning_rate": 5.924764709265473e-07, + "loss": 0.9588, "step": 22828 }, { - "epoch": 0.6469154694097311, + "epoch": 0.8932232569058611, "grad_norm": 0.0, - "learning_rate": 5.856478071372521e-06, - "loss": 0.8477, + "learning_rate": 5.920468354931219e-07, + "loss": 0.9557, "step": 22829 }, { - "epoch": 0.6469438068519935, + "epoch": 0.8932623835980906, "grad_norm": 0.0, - "learning_rate": 5.855642793562182e-06, - "loss": 0.792, + "learning_rate": 5.916173511396328e-07, + "loss": 0.9292, "step": 22830 }, { - "epoch": 0.646972144294256, + "epoch": 0.89330151029032, "grad_norm": 0.0, - "learning_rate": 5.854807550661296e-06, - "loss": 0.7133, + "learning_rate": 5.911880178729812e-07, + "loss": 0.8717, "step": 22831 }, { - "epoch": 0.6470004817365185, + "epoch": 0.8933406369825495, "grad_norm": 0.0, - "learning_rate": 5.853972342676908e-06, - "loss": 0.837, + "learning_rate": 5.907588357000604e-07, + "loss": 0.8399, "step": 22832 }, { - "epoch": 0.647028819178781, + "epoch": 0.8933797636747789, "grad_norm": 0.0, - "learning_rate": 5.853137169616054e-06, - "loss": 0.8135, + "learning_rate": 5.903298046277628e-07, + "loss": 1.0154, "step": 22833 }, { - "epoch": 0.6470571566210434, + "epoch": 0.8934188903670084, "grad_norm": 0.0, - "learning_rate": 5.852302031485762e-06, - "loss": 0.7645, + "learning_rate": 5.899009246629761e-07, + "loss": 1.0159, "step": 22834 }, { - "epoch": 0.6470854940633058, + "epoch": 0.8934580170592378, "grad_norm": 0.0, - "learning_rate": 5.85146692829307e-06, - "loss": 0.7252, + "learning_rate": 5.894721958125882e-07, + "loss": 1.0387, "step": 22835 }, { - "epoch": 0.6471138315055683, + "epoch": 0.8934971437514673, "grad_norm": 0.0, - "learning_rate": 5.850631860045013e-06, - "loss": 0.893, + "learning_rate": 5.890436180834857e-07, + "loss": 0.9453, "step": 22836 }, { - "epoch": 0.6471421689478307, + "epoch": 0.8935362704436967, "grad_norm": 0.0, - "learning_rate": 5.849796826748623e-06, - "loss": 0.7925, + "learning_rate": 5.886151914825522e-07, + "loss": 0.9271, "step": 22837 }, { - "epoch": 0.6471705063900932, + "epoch": 0.8935753971359262, "grad_norm": 0.0, - "learning_rate": 5.848961828410939e-06, - "loss": 0.8354, + "learning_rate": 5.881869160166632e-07, + "loss": 0.8808, "step": 22838 }, { - "epoch": 0.6471988438323557, + "epoch": 0.8936145238281555, "grad_norm": 0.0, - "learning_rate": 5.84812686503899e-06, - "loss": 0.8323, + "learning_rate": 5.877587916926986e-07, + "loss": 0.8932, "step": 22839 }, { - "epoch": 0.6472271812746182, + "epoch": 0.893653650520385, "grad_norm": 0.0, - "learning_rate": 5.8472919366398075e-06, - "loss": 0.8162, + "learning_rate": 5.873308185175341e-07, + "loss": 1.0164, "step": 22840 }, { - "epoch": 0.6472555187168806, + "epoch": 0.8936927772126144, "grad_norm": 0.0, - "learning_rate": 5.84645704322043e-06, - "loss": 0.9224, + "learning_rate": 5.869029964980433e-07, + "loss": 0.8884, "step": 22841 }, { - "epoch": 0.6472838561591431, + "epoch": 0.8937319039048439, "grad_norm": 0.0, - "learning_rate": 5.845622184787885e-06, - "loss": 0.8375, + "learning_rate": 5.864753256410938e-07, + "loss": 0.9575, "step": 22842 }, { - "epoch": 0.6473121936014056, + "epoch": 0.8937710305970733, "grad_norm": 0.0, - "learning_rate": 5.844787361349211e-06, - "loss": 0.7974, + "learning_rate": 5.860478059535557e-07, + "loss": 1.0115, "step": 22843 }, { - "epoch": 0.647340531043668, + "epoch": 0.8938101572893028, "grad_norm": 0.0, - "learning_rate": 5.843952572911432e-06, - "loss": 0.8499, + "learning_rate": 5.856204374422903e-07, + "loss": 0.9097, "step": 22844 }, { - "epoch": 0.6473688684859304, + "epoch": 0.8938492839815322, "grad_norm": 0.0, - "learning_rate": 5.843117819481584e-06, - "loss": 0.8436, + "learning_rate": 5.851932201141674e-07, + "loss": 0.892, "step": 22845 }, { - "epoch": 0.6473972059281929, + "epoch": 0.8938884106737617, "grad_norm": 0.0, - "learning_rate": 5.8422831010667e-06, - "loss": 0.8195, + "learning_rate": 5.84766153976043e-07, + "loss": 0.8847, "step": 22846 }, { - "epoch": 0.6474255433704554, + "epoch": 0.8939275373659911, "grad_norm": 0.0, - "learning_rate": 5.84144841767381e-06, - "loss": 0.8544, + "learning_rate": 5.843392390347768e-07, + "loss": 0.9562, "step": 22847 }, { - "epoch": 0.6474538808127178, + "epoch": 0.8939666640582206, "grad_norm": 0.0, - "learning_rate": 5.840613769309942e-06, - "loss": 0.9016, + "learning_rate": 5.839124752972225e-07, + "loss": 1.0186, "step": 22848 }, { - "epoch": 0.6474822182549803, + "epoch": 0.89400579075045, "grad_norm": 0.0, - "learning_rate": 5.839779155982131e-06, - "loss": 0.9496, + "learning_rate": 5.834858627702355e-07, + "loss": 0.9224, "step": 22849 }, { - "epoch": 0.6475105556972428, + "epoch": 0.8940449174426794, "grad_norm": 0.0, - "learning_rate": 5.838944577697401e-06, - "loss": 0.8959, + "learning_rate": 5.83059401460665e-07, + "loss": 0.9339, "step": 22850 }, { - "epoch": 0.6475388931395052, + "epoch": 0.8940840441349088, "grad_norm": 0.0, - "learning_rate": 5.8381100344627915e-06, - "loss": 0.838, + "learning_rate": 5.826330913753631e-07, + "loss": 0.8146, "step": 22851 }, { - "epoch": 0.6475672305817677, + "epoch": 0.8941231708271383, "grad_norm": 0.0, - "learning_rate": 5.837275526285323e-06, - "loss": 0.7475, + "learning_rate": 5.822069325211699e-07, + "loss": 0.9494, "step": 22852 }, { - "epoch": 0.6475955680240302, + "epoch": 0.8941622975193677, "grad_norm": 0.0, - "learning_rate": 5.8364410531720285e-06, - "loss": 0.866, + "learning_rate": 5.817809249049333e-07, + "loss": 0.9738, "step": 22853 }, { - "epoch": 0.6476239054662926, + "epoch": 0.8942014242115972, "grad_norm": 0.0, - "learning_rate": 5.83560661512994e-06, - "loss": 0.7586, + "learning_rate": 5.813550685334957e-07, + "loss": 0.9386, "step": 22854 }, { - "epoch": 0.647652242908555, + "epoch": 0.8942405509038266, "grad_norm": 0.0, - "learning_rate": 5.834772212166081e-06, - "loss": 0.8596, + "learning_rate": 5.809293634136903e-07, + "loss": 1.0068, "step": 22855 }, { - "epoch": 0.6476805803508175, + "epoch": 0.894279677596056, "grad_norm": 0.0, - "learning_rate": 5.833937844287482e-06, - "loss": 0.9268, + "learning_rate": 5.805038095523574e-07, + "loss": 0.9564, "step": 22856 }, { - "epoch": 0.64770891779308, + "epoch": 0.8943188042882855, "grad_norm": 0.0, - "learning_rate": 5.833103511501171e-06, - "loss": 0.8298, + "learning_rate": 5.800784069563304e-07, + "loss": 0.8528, "step": 22857 }, { - "epoch": 0.6477372552353424, + "epoch": 0.8943579309805149, "grad_norm": 0.0, - "learning_rate": 5.832269213814177e-06, - "loss": 0.9272, + "learning_rate": 5.796531556324414e-07, + "loss": 0.9633, "step": 22858 }, { - "epoch": 0.6477655926776049, + "epoch": 0.8943970576727444, "grad_norm": 0.0, - "learning_rate": 5.83143495123353e-06, - "loss": 0.8178, + "learning_rate": 5.792280555875174e-07, + "loss": 1.0733, "step": 22859 }, { - "epoch": 0.6477939301198674, + "epoch": 0.8944361843649737, "grad_norm": 0.0, - "learning_rate": 5.830600723766251e-06, - "loss": 0.846, + "learning_rate": 5.788031068283872e-07, + "loss": 0.8915, "step": 22860 }, { - "epoch": 0.6478222675621298, + "epoch": 0.8944753110572032, "grad_norm": 0.0, - "learning_rate": 5.82976653141937e-06, - "loss": 0.801, + "learning_rate": 5.783783093618711e-07, + "loss": 0.9064, "step": 22861 }, { - "epoch": 0.6478506050043923, + "epoch": 0.8945144377494326, "grad_norm": 0.0, - "learning_rate": 5.8289323741999185e-06, - "loss": 0.7885, + "learning_rate": 5.779536631947947e-07, + "loss": 0.9995, "step": 22862 }, { - "epoch": 0.6478789424466548, + "epoch": 0.8945535644416621, "grad_norm": 0.0, - "learning_rate": 5.8280982521149154e-06, - "loss": 0.8597, + "learning_rate": 5.775291683339757e-07, + "loss": 0.9598, "step": 22863 }, { - "epoch": 0.6479072798889173, + "epoch": 0.8945926911338915, "grad_norm": 0.0, - "learning_rate": 5.827264165171393e-06, - "loss": 0.7116, + "learning_rate": 5.77104824786232e-07, + "loss": 0.9848, "step": 22864 }, { - "epoch": 0.6479356173311797, + "epoch": 0.894631817826121, "grad_norm": 0.0, - "learning_rate": 5.826430113376369e-06, - "loss": 0.9195, + "learning_rate": 5.766806325583763e-07, + "loss": 1.0122, "step": 22865 }, { - "epoch": 0.6479639547734422, + "epoch": 0.8946709445183504, "grad_norm": 0.0, - "learning_rate": 5.825596096736876e-06, - "loss": 0.7863, + "learning_rate": 5.762565916572216e-07, + "loss": 1.0132, "step": 22866 }, { - "epoch": 0.6479922922157046, + "epoch": 0.8947100712105799, "grad_norm": 0.0, - "learning_rate": 5.82476211525994e-06, - "loss": 0.8704, + "learning_rate": 5.758327020895782e-07, + "loss": 0.9467, "step": 22867 }, { - "epoch": 0.648020629657967, + "epoch": 0.8947491979028093, "grad_norm": 0.0, - "learning_rate": 5.823928168952579e-06, - "loss": 0.8474, + "learning_rate": 5.754089638622529e-07, + "loss": 0.8951, "step": 22868 }, { - "epoch": 0.6480489671002295, + "epoch": 0.8947883245950388, "grad_norm": 0.0, - "learning_rate": 5.823094257821822e-06, - "loss": 0.9605, + "learning_rate": 5.74985376982049e-07, + "loss": 0.9189, "step": 22869 }, { - "epoch": 0.648077304542492, + "epoch": 0.8948274512872681, "grad_norm": 0.0, - "learning_rate": 5.822260381874694e-06, - "loss": 0.9452, + "learning_rate": 5.745619414557713e-07, + "loss": 1.022, "step": 22870 }, { - "epoch": 0.6481056419847545, + "epoch": 0.8948665779794976, "grad_norm": 0.0, - "learning_rate": 5.821426541118218e-06, - "loss": 0.8087, + "learning_rate": 5.741386572902141e-07, + "loss": 1.0281, "step": 22871 }, { - "epoch": 0.6481339794270169, + "epoch": 0.894905704671727, "grad_norm": 0.0, - "learning_rate": 5.820592735559421e-06, - "loss": 0.784, + "learning_rate": 5.737155244921833e-07, + "loss": 0.9371, "step": 22872 }, { - "epoch": 0.6481623168692794, + "epoch": 0.8949448313639565, "grad_norm": 0.0, - "learning_rate": 5.81975896520532e-06, - "loss": 0.8359, + "learning_rate": 5.732925430684666e-07, + "loss": 1.0244, "step": 22873 }, { - "epoch": 0.6481906543115419, + "epoch": 0.8949839580561859, "grad_norm": 0.0, - "learning_rate": 5.818925230062941e-06, - "loss": 0.8896, + "learning_rate": 5.72869713025861e-07, + "loss": 0.9384, "step": 22874 }, { - "epoch": 0.6482189917538043, + "epoch": 0.8950230847484154, "grad_norm": 0.0, - "learning_rate": 5.818091530139311e-06, - "loss": 0.8891, + "learning_rate": 5.72447034371153e-07, + "loss": 0.8918, "step": 22875 }, { - "epoch": 0.6482473291960668, + "epoch": 0.8950622114406448, "grad_norm": 0.0, - "learning_rate": 5.817257865441446e-06, - "loss": 0.8153, + "learning_rate": 5.720245071111341e-07, + "loss": 0.9835, "step": 22876 }, { - "epoch": 0.6482756666383293, + "epoch": 0.8951013381328743, "grad_norm": 0.0, - "learning_rate": 5.8164242359763705e-06, - "loss": 0.7588, + "learning_rate": 5.716021312525866e-07, + "loss": 0.9273, "step": 22877 }, { - "epoch": 0.6483040040805916, + "epoch": 0.8951404648251037, "grad_norm": 0.0, - "learning_rate": 5.8155906417511125e-06, - "loss": 0.8057, + "learning_rate": 5.71179906802296e-07, + "loss": 0.867, "step": 22878 }, { - "epoch": 0.6483323415228541, + "epoch": 0.8951795915173332, "grad_norm": 0.0, - "learning_rate": 5.814757082772683e-06, - "loss": 0.946, + "learning_rate": 5.707578337670394e-07, + "loss": 1.0039, "step": 22879 }, { - "epoch": 0.6483606789651166, + "epoch": 0.8952187182095626, "grad_norm": 0.0, - "learning_rate": 5.813923559048114e-06, - "loss": 0.849, + "learning_rate": 5.703359121535967e-07, + "loss": 1.0027, "step": 22880 }, { - "epoch": 0.6483890164073791, + "epoch": 0.895257844901792, "grad_norm": 0.0, - "learning_rate": 5.813090070584415e-06, - "loss": 0.9043, + "learning_rate": 5.699141419687437e-07, + "loss": 0.9341, "step": 22881 }, { - "epoch": 0.6484173538496415, + "epoch": 0.8952969715940214, "grad_norm": 0.0, - "learning_rate": 5.812256617388614e-06, - "loss": 0.8554, + "learning_rate": 5.694925232192538e-07, + "loss": 0.875, "step": 22882 }, { - "epoch": 0.648445691291904, + "epoch": 0.8953360982862508, "grad_norm": 0.0, - "learning_rate": 5.8114231994677315e-06, - "loss": 0.8577, + "learning_rate": 5.69071055911895e-07, + "loss": 0.9489, "step": 22883 }, { - "epoch": 0.6484740287341665, + "epoch": 0.8953752249784803, "grad_norm": 0.0, - "learning_rate": 5.810589816828786e-06, - "loss": 0.7392, + "learning_rate": 5.686497400534385e-07, + "loss": 1.0004, "step": 22884 }, { - "epoch": 0.6485023661764289, + "epoch": 0.8954143516707097, "grad_norm": 0.0, - "learning_rate": 5.809756469478804e-06, - "loss": 0.8073, + "learning_rate": 5.682285756506489e-07, + "loss": 1.0436, "step": 22885 }, { - "epoch": 0.6485307036186914, + "epoch": 0.8954534783629392, "grad_norm": 0.0, - "learning_rate": 5.8089231574247926e-06, - "loss": 0.8898, + "learning_rate": 5.678075627102908e-07, + "loss": 0.9962, "step": 22886 }, { - "epoch": 0.6485590410609539, + "epoch": 0.8954926050551686, "grad_norm": 0.0, - "learning_rate": 5.808089880673779e-06, - "loss": 0.7988, + "learning_rate": 5.673867012391221e-07, + "loss": 0.9557, "step": 22887 }, { - "epoch": 0.6485873785032163, + "epoch": 0.8955317317473981, "grad_norm": 0.0, - "learning_rate": 5.807256639232785e-06, - "loss": 0.7439, + "learning_rate": 5.669659912439041e-07, + "loss": 0.8781, "step": 22888 }, { - "epoch": 0.6486157159454787, + "epoch": 0.8955708584396275, "grad_norm": 0.0, - "learning_rate": 5.806423433108822e-06, - "loss": 0.7628, + "learning_rate": 5.665454327313913e-07, + "loss": 0.8287, "step": 22889 }, { - "epoch": 0.6486440533877412, + "epoch": 0.895609985131857, "grad_norm": 0.0, - "learning_rate": 5.805590262308911e-06, - "loss": 0.7893, + "learning_rate": 5.661250257083395e-07, + "loss": 0.967, "step": 22890 }, { - "epoch": 0.6486723908300037, + "epoch": 0.8956491118240864, "grad_norm": 0.0, - "learning_rate": 5.804757126840075e-06, - "loss": 0.8775, + "learning_rate": 5.657047701814978e-07, + "loss": 0.9195, "step": 22891 }, { - "epoch": 0.6487007282722661, + "epoch": 0.8956882385163158, "grad_norm": 0.0, - "learning_rate": 5.803924026709323e-06, - "loss": 0.9193, + "learning_rate": 5.652846661576172e-07, + "loss": 0.9608, "step": 22892 }, { - "epoch": 0.6487290657145286, + "epoch": 0.8957273652085452, "grad_norm": 0.0, - "learning_rate": 5.803090961923682e-06, - "loss": 0.8597, + "learning_rate": 5.648647136434404e-07, + "loss": 0.9372, "step": 22893 }, { - "epoch": 0.6487574031567911, + "epoch": 0.8957664919007747, "grad_norm": 0.0, - "learning_rate": 5.80225793249016e-06, - "loss": 0.8957, + "learning_rate": 5.64444912645713e-07, + "loss": 0.9079, "step": 22894 }, { - "epoch": 0.6487857405990536, + "epoch": 0.8958056185930041, "grad_norm": 0.0, - "learning_rate": 5.801424938415778e-06, - "loss": 0.6831, + "learning_rate": 5.640252631711784e-07, + "loss": 0.9039, "step": 22895 }, { - "epoch": 0.648814078041316, + "epoch": 0.8958447452852336, "grad_norm": 0.0, - "learning_rate": 5.800591979707553e-06, - "loss": 0.8301, + "learning_rate": 5.636057652265747e-07, + "loss": 0.8531, "step": 22896 }, { - "epoch": 0.6488424154835785, + "epoch": 0.895883871977463, "grad_norm": 0.0, - "learning_rate": 5.7997590563725e-06, - "loss": 0.8134, + "learning_rate": 5.631864188186364e-07, + "loss": 0.98, "step": 22897 }, { - "epoch": 0.648870752925841, + "epoch": 0.8959229986696925, "grad_norm": 0.0, - "learning_rate": 5.798926168417638e-06, - "loss": 0.7987, + "learning_rate": 5.627672239540993e-07, + "loss": 0.8947, "step": 22898 }, { - "epoch": 0.6488990903681033, + "epoch": 0.8959621253619219, "grad_norm": 0.0, - "learning_rate": 5.798093315849984e-06, - "loss": 0.8442, + "learning_rate": 5.623481806396958e-07, + "loss": 0.9676, "step": 22899 }, { - "epoch": 0.6489274278103658, + "epoch": 0.8960012520541514, "grad_norm": 0.0, - "learning_rate": 5.7972604986765456e-06, - "loss": 0.8512, + "learning_rate": 5.619292888821537e-07, + "loss": 0.9006, "step": 22900 }, { - "epoch": 0.6489557652526283, + "epoch": 0.8960403787463808, "grad_norm": 0.0, - "learning_rate": 5.796427716904347e-06, - "loss": 0.8521, + "learning_rate": 5.615105486882011e-07, + "loss": 1.0062, "step": 22901 }, { - "epoch": 0.6489841026948907, + "epoch": 0.8960795054386103, "grad_norm": 0.0, - "learning_rate": 5.795594970540395e-06, - "loss": 0.9499, + "learning_rate": 5.610919600645615e-07, + "loss": 0.9988, "step": 22902 }, { - "epoch": 0.6490124401371532, + "epoch": 0.8961186321308396, "grad_norm": 0.0, - "learning_rate": 5.794762259591709e-06, - "loss": 0.849, + "learning_rate": 5.606735230179594e-07, + "loss": 0.9261, "step": 22903 }, { - "epoch": 0.6490407775794157, + "epoch": 0.8961577588230691, "grad_norm": 0.0, - "learning_rate": 5.793929584065306e-06, - "loss": 0.8329, + "learning_rate": 5.602552375551107e-07, + "loss": 0.9679, "step": 22904 }, { - "epoch": 0.6490691150216782, + "epoch": 0.8961968855152985, "grad_norm": 0.0, - "learning_rate": 5.793096943968191e-06, - "loss": 0.7378, + "learning_rate": 5.598371036827355e-07, + "loss": 0.9779, "step": 22905 }, { - "epoch": 0.6490974524639406, + "epoch": 0.896236012207528, "grad_norm": 0.0, - "learning_rate": 5.792264339307382e-06, - "loss": 0.7989, + "learning_rate": 5.59419121407544e-07, + "loss": 0.9579, "step": 22906 }, { - "epoch": 0.6491257899062031, + "epoch": 0.8962751388997574, "grad_norm": 0.0, - "learning_rate": 5.791431770089897e-06, - "loss": 0.8459, + "learning_rate": 5.590012907362552e-07, + "loss": 1.045, "step": 22907 }, { - "epoch": 0.6491541273484656, + "epoch": 0.8963142655919869, "grad_norm": 0.0, - "learning_rate": 5.790599236322743e-06, - "loss": 0.8138, + "learning_rate": 5.585836116755739e-07, + "loss": 0.9374, "step": 22908 }, { - "epoch": 0.6491824647907279, + "epoch": 0.8963533922842163, "grad_norm": 0.0, - "learning_rate": 5.789766738012932e-06, - "loss": 0.8116, + "learning_rate": 5.581660842322101e-07, + "loss": 0.9898, "step": 22909 }, { - "epoch": 0.6492108022329904, + "epoch": 0.8963925189764458, "grad_norm": 0.0, - "learning_rate": 5.788934275167482e-06, - "loss": 0.8833, + "learning_rate": 5.577487084128664e-07, + "loss": 0.9587, "step": 22910 }, { - "epoch": 0.6492391396752529, + "epoch": 0.8964316456686752, "grad_norm": 0.0, - "learning_rate": 5.788101847793399e-06, - "loss": 0.9381, + "learning_rate": 5.573314842242461e-07, + "loss": 0.9898, "step": 22911 }, { - "epoch": 0.6492674771175154, + "epoch": 0.8964707723609046, "grad_norm": 0.0, - "learning_rate": 5.787269455897705e-06, - "loss": 0.7884, + "learning_rate": 5.569144116730507e-07, + "loss": 0.9548, "step": 22912 }, { - "epoch": 0.6492958145597778, + "epoch": 0.896509899053134, "grad_norm": 0.0, - "learning_rate": 5.786437099487401e-06, - "loss": 0.9291, + "learning_rate": 5.564974907659781e-07, + "loss": 0.9802, "step": 22913 }, { - "epoch": 0.6493241520020403, + "epoch": 0.8965490257453634, "grad_norm": 0.0, - "learning_rate": 5.785604778569505e-06, - "loss": 0.8843, + "learning_rate": 5.560807215097208e-07, + "loss": 0.9271, "step": 22914 }, { - "epoch": 0.6493524894443028, + "epoch": 0.8965881524375929, "grad_norm": 0.0, - "learning_rate": 5.784772493151019e-06, - "loss": 0.8812, + "learning_rate": 5.556641039109734e-07, + "loss": 0.9481, "step": 22915 }, { - "epoch": 0.6493808268865652, + "epoch": 0.8966272791298223, "grad_norm": 0.0, - "learning_rate": 5.783940243238963e-06, - "loss": 0.8548, + "learning_rate": 5.55247637976426e-07, + "loss": 0.9677, "step": 22916 }, { - "epoch": 0.6494091643288277, + "epoch": 0.8966664058220518, "grad_norm": 0.0, - "learning_rate": 5.783108028840345e-06, - "loss": 0.8021, + "learning_rate": 5.548313237127689e-07, + "loss": 0.8231, "step": 22917 }, { - "epoch": 0.6494375017710902, + "epoch": 0.8967055325142812, "grad_norm": 0.0, - "learning_rate": 5.7822758499621715e-06, - "loss": 0.7787, + "learning_rate": 5.544151611266823e-07, + "loss": 1.0363, "step": 22918 }, { - "epoch": 0.6494658392133527, + "epoch": 0.8967446592065107, "grad_norm": 0.0, - "learning_rate": 5.781443706611455e-06, - "loss": 0.8185, + "learning_rate": 5.539991502248554e-07, + "loss": 0.9777, "step": 22919 }, { - "epoch": 0.649494176655615, + "epoch": 0.8967837858987401, "grad_norm": 0.0, - "learning_rate": 5.780611598795207e-06, - "loss": 0.7133, + "learning_rate": 5.535832910139616e-07, + "loss": 0.893, "step": 22920 }, { - "epoch": 0.6495225140978775, + "epoch": 0.8968229125909696, "grad_norm": 0.0, - "learning_rate": 5.779779526520433e-06, - "loss": 0.8437, + "learning_rate": 5.531675835006867e-07, + "loss": 0.8738, "step": 22921 }, { - "epoch": 0.64955085154014, + "epoch": 0.896862039283199, "grad_norm": 0.0, - "learning_rate": 5.778947489794141e-06, - "loss": 0.8655, + "learning_rate": 5.527520276917009e-07, + "loss": 1.0215, "step": 22922 }, { - "epoch": 0.6495791889824024, + "epoch": 0.8969011659754285, "grad_norm": 0.0, - "learning_rate": 5.778115488623343e-06, - "loss": 0.791, + "learning_rate": 5.523366235936811e-07, + "loss": 0.9836, "step": 22923 }, { - "epoch": 0.6496075264246649, + "epoch": 0.8969402926676578, "grad_norm": 0.0, - "learning_rate": 5.777283523015045e-06, - "loss": 0.823, + "learning_rate": 5.519213712132931e-07, + "loss": 0.9236, "step": 22924 }, { - "epoch": 0.6496358638669274, + "epoch": 0.8969794193598873, "grad_norm": 0.0, - "learning_rate": 5.77645159297626e-06, - "loss": 0.8754, + "learning_rate": 5.515062705572116e-07, + "loss": 1.1003, "step": 22925 }, { - "epoch": 0.6496642013091898, + "epoch": 0.8970185460521167, "grad_norm": 0.0, - "learning_rate": 5.7756196985139875e-06, - "loss": 0.8744, + "learning_rate": 5.51091321632099e-07, + "loss": 0.9622, "step": 22926 }, { - "epoch": 0.6496925387514523, + "epoch": 0.8970576727443462, "grad_norm": 0.0, - "learning_rate": 5.774787839635241e-06, - "loss": 0.8004, + "learning_rate": 5.506765244446211e-07, + "loss": 1.004, "step": 22927 }, { - "epoch": 0.6497208761937148, + "epoch": 0.8970967994365756, "grad_norm": 0.0, - "learning_rate": 5.7739560163470275e-06, - "loss": 0.8029, + "learning_rate": 5.502618790014358e-07, + "loss": 1.0068, "step": 22928 }, { - "epoch": 0.6497492136359773, + "epoch": 0.8971359261288051, "grad_norm": 0.0, - "learning_rate": 5.773124228656348e-06, - "loss": 0.8264, + "learning_rate": 5.498473853092034e-07, + "loss": 1.0617, "step": 22929 }, { - "epoch": 0.6497775510782396, + "epoch": 0.8971750528210345, "grad_norm": 0.0, - "learning_rate": 5.772292476570218e-06, - "loss": 0.8656, + "learning_rate": 5.494330433745809e-07, + "loss": 0.8661, "step": 22930 }, { - "epoch": 0.6498058885205021, + "epoch": 0.897214179513264, "grad_norm": 0.0, - "learning_rate": 5.771460760095633e-06, - "loss": 0.9036, + "learning_rate": 5.490188532042229e-07, + "loss": 1.0596, "step": 22931 }, { - "epoch": 0.6498342259627646, + "epoch": 0.8972533062054934, "grad_norm": 0.0, - "learning_rate": 5.770629079239605e-06, - "loss": 0.918, + "learning_rate": 5.486048148047774e-07, + "loss": 0.9015, "step": 22932 }, { - "epoch": 0.649862563405027, + "epoch": 0.8972924328977229, "grad_norm": 0.0, - "learning_rate": 5.769797434009141e-06, - "loss": 0.9077, + "learning_rate": 5.481909281828956e-07, + "loss": 1.0402, "step": 22933 }, { - "epoch": 0.6498909008472895, + "epoch": 0.8973315595899523, "grad_norm": 0.0, - "learning_rate": 5.768965824411242e-06, - "loss": 0.9136, + "learning_rate": 5.477771933452237e-07, + "loss": 0.9067, "step": 22934 }, { - "epoch": 0.649919238289552, + "epoch": 0.8973706862821818, "grad_norm": 0.0, - "learning_rate": 5.768134250452915e-06, - "loss": 0.8923, + "learning_rate": 5.47363610298407e-07, + "loss": 1.0216, "step": 22935 }, { - "epoch": 0.6499475757318145, + "epoch": 0.8974098129744111, "grad_norm": 0.0, - "learning_rate": 5.767302712141164e-06, - "loss": 0.8738, + "learning_rate": 5.46950179049085e-07, + "loss": 0.9436, "step": 22936 }, { - "epoch": 0.6499759131740769, + "epoch": 0.8974489396666406, "grad_norm": 0.0, - "learning_rate": 5.766471209482994e-06, - "loss": 0.8753, + "learning_rate": 5.465368996038989e-07, + "loss": 0.8922, "step": 22937 }, { - "epoch": 0.6500042506163394, + "epoch": 0.89748806635887, "grad_norm": 0.0, - "learning_rate": 5.765639742485414e-06, - "loss": 0.8537, + "learning_rate": 5.461237719694823e-07, + "loss": 0.9341, "step": 22938 }, { - "epoch": 0.6500325880586019, + "epoch": 0.8975271930510995, "grad_norm": 0.0, - "learning_rate": 5.764808311155419e-06, - "loss": 0.8237, + "learning_rate": 5.457107961524721e-07, + "loss": 0.958, "step": 22939 }, { - "epoch": 0.6500609255008643, + "epoch": 0.8975663197433289, "grad_norm": 0.0, - "learning_rate": 5.763976915500013e-06, - "loss": 0.909, + "learning_rate": 5.452979721594997e-07, + "loss": 1.0278, "step": 22940 }, { - "epoch": 0.6500892629431267, + "epoch": 0.8976054464355583, "grad_norm": 0.0, - "learning_rate": 5.763145555526211e-06, - "loss": 0.8711, + "learning_rate": 5.448852999971965e-07, + "loss": 0.9628, "step": 22941 }, { - "epoch": 0.6501176003853892, + "epoch": 0.8976445731277878, "grad_norm": 0.0, - "learning_rate": 5.762314231241001e-06, - "loss": 0.8682, + "learning_rate": 5.444727796721849e-07, + "loss": 0.9497, "step": 22942 }, { - "epoch": 0.6501459378276516, + "epoch": 0.8976836998200172, "grad_norm": 0.0, - "learning_rate": 5.761482942651395e-06, - "loss": 0.8882, + "learning_rate": 5.440604111910929e-07, + "loss": 0.9217, "step": 22943 }, { - "epoch": 0.6501742752699141, + "epoch": 0.8977228265122467, "grad_norm": 0.0, - "learning_rate": 5.76065168976439e-06, - "loss": 0.8224, + "learning_rate": 5.43648194560542e-07, + "loss": 0.7874, "step": 22944 }, { - "epoch": 0.6502026127121766, + "epoch": 0.897761953204476, "grad_norm": 0.0, - "learning_rate": 5.759820472586989e-06, - "loss": 0.7342, + "learning_rate": 5.432361297871513e-07, + "loss": 1.0131, "step": 22945 }, { - "epoch": 0.6502309501544391, + "epoch": 0.8978010798967055, "grad_norm": 0.0, - "learning_rate": 5.758989291126199e-06, - "loss": 0.8589, + "learning_rate": 5.428242168775378e-07, + "loss": 0.8692, "step": 22946 }, { - "epoch": 0.6502592875967015, + "epoch": 0.8978402065889349, "grad_norm": 0.0, - "learning_rate": 5.758158145389012e-06, - "loss": 0.7042, + "learning_rate": 5.42412455838317e-07, + "loss": 0.913, "step": 22947 }, { - "epoch": 0.650287625038964, + "epoch": 0.8978793332811644, "grad_norm": 0.0, - "learning_rate": 5.757327035382436e-06, - "loss": 0.9609, + "learning_rate": 5.420008466761028e-07, + "loss": 0.8535, "step": 22948 }, { - "epoch": 0.6503159624812265, + "epoch": 0.8979184599733938, "grad_norm": 0.0, - "learning_rate": 5.7564959611134685e-06, - "loss": 0.8152, + "learning_rate": 5.415893893975022e-07, + "loss": 0.9939, "step": 22949 }, { - "epoch": 0.6503442999234889, + "epoch": 0.8979575866656233, "grad_norm": 0.0, - "learning_rate": 5.7556649225891125e-06, - "loss": 0.8165, + "learning_rate": 5.411780840091252e-07, + "loss": 0.9348, "step": 22950 }, { - "epoch": 0.6503726373657513, + "epoch": 0.8979967133578527, "grad_norm": 0.0, - "learning_rate": 5.75483391981637e-06, - "loss": 0.7804, + "learning_rate": 5.407669305175723e-07, + "loss": 1.0634, "step": 22951 }, { - "epoch": 0.6504009748080138, + "epoch": 0.8980358400500822, "grad_norm": 0.0, - "learning_rate": 5.754002952802233e-06, - "loss": 0.7874, + "learning_rate": 5.403559289294525e-07, + "loss": 0.9693, "step": 22952 }, { - "epoch": 0.6504293122502763, + "epoch": 0.8980749667423116, "grad_norm": 0.0, - "learning_rate": 5.753172021553708e-06, - "loss": 0.8639, + "learning_rate": 5.399450792513616e-07, + "loss": 0.8897, "step": 22953 }, { - "epoch": 0.6504576496925387, + "epoch": 0.8981140934345411, "grad_norm": 0.0, - "learning_rate": 5.752341126077795e-06, - "loss": 0.9188, + "learning_rate": 5.395343814899001e-07, + "loss": 1.0205, "step": 22954 }, { - "epoch": 0.6504859871348012, + "epoch": 0.8981532201267705, "grad_norm": 0.0, - "learning_rate": 5.7515102663814855e-06, - "loss": 0.792, + "learning_rate": 5.391238356516593e-07, + "loss": 0.8204, "step": 22955 }, { - "epoch": 0.6505143245770637, + "epoch": 0.898192346819, "grad_norm": 0.0, - "learning_rate": 5.750679442471783e-06, - "loss": 0.8335, + "learning_rate": 5.387134417432372e-07, + "loss": 0.9935, "step": 22956 }, { - "epoch": 0.6505426620193261, + "epoch": 0.8982314735112293, "grad_norm": 0.0, - "learning_rate": 5.7498486543556896e-06, - "loss": 0.7662, + "learning_rate": 5.383031997712195e-07, + "loss": 0.9595, "step": 22957 }, { - "epoch": 0.6505709994615886, + "epoch": 0.8982706002034588, "grad_norm": 0.0, - "learning_rate": 5.749017902040196e-06, - "loss": 0.8817, + "learning_rate": 5.37893109742198e-07, + "loss": 0.9753, "step": 22958 }, { - "epoch": 0.6505993369038511, + "epoch": 0.8983097268956882, "grad_norm": 0.0, - "learning_rate": 5.748187185532306e-06, - "loss": 0.7761, + "learning_rate": 5.374831716627549e-07, + "loss": 0.9406, "step": 22959 }, { - "epoch": 0.6506276743461136, + "epoch": 0.8983488535879177, "grad_norm": 0.0, - "learning_rate": 5.7473565048390115e-06, - "loss": 0.9093, + "learning_rate": 5.37073385539475e-07, + "loss": 1.007, "step": 22960 }, { - "epoch": 0.650656011788376, + "epoch": 0.8983879802801471, "grad_norm": 0.0, - "learning_rate": 5.7465258599673115e-06, - "loss": 0.9086, + "learning_rate": 5.366637513789397e-07, + "loss": 1.0695, "step": 22961 }, { - "epoch": 0.6506843492306384, + "epoch": 0.8984271069723766, "grad_norm": 0.0, - "learning_rate": 5.745695250924204e-06, - "loss": 0.814, + "learning_rate": 5.36254269187726e-07, + "loss": 0.9558, "step": 22962 }, { - "epoch": 0.6507126866729009, + "epoch": 0.898466233664606, "grad_norm": 0.0, - "learning_rate": 5.744864677716684e-06, - "loss": 0.8713, + "learning_rate": 5.358449389724097e-07, + "loss": 0.9221, "step": 22963 }, { - "epoch": 0.6507410241151633, + "epoch": 0.8985053603568355, "grad_norm": 0.0, - "learning_rate": 5.74403414035175e-06, - "loss": 0.7885, + "learning_rate": 5.354357607395644e-07, + "loss": 0.8939, "step": 22964 }, { - "epoch": 0.6507693615574258, + "epoch": 0.8985444870490649, "grad_norm": 0.0, - "learning_rate": 5.743203638836401e-06, - "loss": 0.9006, + "learning_rate": 5.350267344957605e-07, + "loss": 0.9977, "step": 22965 }, { - "epoch": 0.6507976989996883, + "epoch": 0.8985836137412944, "grad_norm": 0.0, - "learning_rate": 5.742373173177625e-06, - "loss": 0.8987, + "learning_rate": 5.346178602475693e-07, + "loss": 0.9047, "step": 22966 }, { - "epoch": 0.6508260364419507, + "epoch": 0.8986227404335237, "grad_norm": 0.0, - "learning_rate": 5.741542743382422e-06, - "loss": 0.7313, + "learning_rate": 5.342091380015524e-07, + "loss": 0.9758, "step": 22967 }, { - "epoch": 0.6508543738842132, + "epoch": 0.8986618671257532, "grad_norm": 0.0, - "learning_rate": 5.740712349457785e-06, - "loss": 0.8148, + "learning_rate": 5.338005677642776e-07, + "loss": 0.9716, "step": 22968 }, { - "epoch": 0.6508827113264757, + "epoch": 0.8987009938179826, "grad_norm": 0.0, - "learning_rate": 5.739881991410707e-06, - "loss": 0.7987, + "learning_rate": 5.333921495423e-07, + "loss": 0.9274, "step": 22969 }, { - "epoch": 0.6509110487687382, + "epoch": 0.898740120510212, "grad_norm": 0.0, - "learning_rate": 5.7390516692481905e-06, - "loss": 0.7944, + "learning_rate": 5.329838833421852e-07, + "loss": 0.8904, "step": 22970 }, { - "epoch": 0.6509393862110006, + "epoch": 0.8987792472024415, "grad_norm": 0.0, - "learning_rate": 5.7382213829772205e-06, - "loss": 0.8259, + "learning_rate": 5.325757691704858e-07, + "loss": 0.9369, "step": 22971 }, { - "epoch": 0.650967723653263, + "epoch": 0.8988183738946709, "grad_norm": 0.0, - "learning_rate": 5.737391132604797e-06, - "loss": 0.8073, + "learning_rate": 5.321678070337566e-07, + "loss": 0.8996, "step": 22972 }, { - "epoch": 0.6509960610955255, + "epoch": 0.8988575005869004, "grad_norm": 0.0, - "learning_rate": 5.736560918137908e-06, - "loss": 0.8797, + "learning_rate": 5.317599969385456e-07, + "loss": 0.9541, "step": 22973 }, { - "epoch": 0.6510243985377879, + "epoch": 0.8988966272791298, "grad_norm": 0.0, - "learning_rate": 5.73573073958355e-06, - "loss": 0.8326, + "learning_rate": 5.313523388914088e-07, + "loss": 0.9253, "step": 22974 }, { - "epoch": 0.6510527359800504, + "epoch": 0.8989357539713593, "grad_norm": 0.0, - "learning_rate": 5.734900596948713e-06, - "loss": 0.8007, + "learning_rate": 5.309448328988865e-07, + "loss": 1.0359, "step": 22975 }, { - "epoch": 0.6510810734223129, + "epoch": 0.8989748806635887, "grad_norm": 0.0, - "learning_rate": 5.734070490240393e-06, - "loss": 0.9255, + "learning_rate": 5.305374789675255e-07, + "loss": 0.8906, "step": 22976 }, { - "epoch": 0.6511094108645754, + "epoch": 0.8990140073558182, "grad_norm": 0.0, - "learning_rate": 5.7332404194655824e-06, - "loss": 0.8319, + "learning_rate": 5.301302771038663e-07, + "loss": 1.018, "step": 22977 }, { - "epoch": 0.6511377483068378, + "epoch": 0.8990531340480475, "grad_norm": 0.0, - "learning_rate": 5.732410384631276e-06, - "loss": 0.9471, + "learning_rate": 5.297232273144481e-07, + "loss": 0.938, "step": 22978 }, { - "epoch": 0.6511660857491003, + "epoch": 0.899092260740277, "grad_norm": 0.0, - "learning_rate": 5.731580385744457e-06, - "loss": 0.7667, + "learning_rate": 5.293163296058079e-07, + "loss": 0.7455, "step": 22979 }, { - "epoch": 0.6511944231913628, + "epoch": 0.8991313874325064, "grad_norm": 0.0, - "learning_rate": 5.730750422812126e-06, - "loss": 0.8148, + "learning_rate": 5.289095839844816e-07, + "loss": 0.9966, "step": 22980 }, { - "epoch": 0.6512227606336252, + "epoch": 0.8991705141247359, "grad_norm": 0.0, - "learning_rate": 5.729920495841266e-06, - "loss": 0.9673, + "learning_rate": 5.285029904569972e-07, + "loss": 0.8277, "step": 22981 }, { - "epoch": 0.6512510980758877, + "epoch": 0.8992096408169653, "grad_norm": 0.0, - "learning_rate": 5.72909060483887e-06, - "loss": 0.8413, + "learning_rate": 5.280965490298873e-07, + "loss": 0.9574, "step": 22982 }, { - "epoch": 0.6512794355181502, + "epoch": 0.8992487675091948, "grad_norm": 0.0, - "learning_rate": 5.728260749811936e-06, - "loss": 0.855, + "learning_rate": 5.276902597096789e-07, + "loss": 0.9005, "step": 22983 }, { - "epoch": 0.6513077729604126, + "epoch": 0.8992878942014242, "grad_norm": 0.0, - "learning_rate": 5.727430930767441e-06, - "loss": 0.8806, + "learning_rate": 5.272841225028935e-07, + "loss": 0.9218, "step": 22984 }, { - "epoch": 0.651336110402675, + "epoch": 0.8993270208936537, "grad_norm": 0.0, - "learning_rate": 5.726601147712384e-06, - "loss": 0.8706, + "learning_rate": 5.268781374160559e-07, + "loss": 0.929, "step": 22985 }, { - "epoch": 0.6513644478449375, + "epoch": 0.8993661475858831, "grad_norm": 0.0, - "learning_rate": 5.725771400653756e-06, - "loss": 0.8399, + "learning_rate": 5.264723044556863e-07, + "loss": 1.019, "step": 22986 }, { - "epoch": 0.6513927852872, + "epoch": 0.8994052742781126, "grad_norm": 0.0, - "learning_rate": 5.724941689598538e-06, - "loss": 0.8966, + "learning_rate": 5.260666236282985e-07, + "loss": 1.1081, "step": 22987 }, { - "epoch": 0.6514211227294624, + "epoch": 0.899444400970342, "grad_norm": 0.0, - "learning_rate": 5.724112014553725e-06, - "loss": 0.9018, + "learning_rate": 5.256610949404106e-07, + "loss": 0.8484, "step": 22988 }, { - "epoch": 0.6514494601717249, + "epoch": 0.8994835276625714, "grad_norm": 0.0, - "learning_rate": 5.723282375526302e-06, - "loss": 0.8515, + "learning_rate": 5.25255718398533e-07, + "loss": 1.0097, "step": 22989 }, { - "epoch": 0.6514777976139874, + "epoch": 0.8995226543548008, "grad_norm": 0.0, - "learning_rate": 5.7224527725232614e-06, - "loss": 0.8968, + "learning_rate": 5.248504940091758e-07, + "loss": 0.9402, "step": 22990 }, { - "epoch": 0.6515061350562498, + "epoch": 0.8995617810470303, "grad_norm": 0.0, - "learning_rate": 5.721623205551594e-06, - "loss": 0.8529, + "learning_rate": 5.244454217788464e-07, + "loss": 0.8556, "step": 22991 }, { - "epoch": 0.6515344724985123, + "epoch": 0.8996009077392597, "grad_norm": 0.0, - "learning_rate": 5.720793674618278e-06, - "loss": 0.7927, + "learning_rate": 5.240405017140504e-07, + "loss": 1.0044, "step": 22992 }, { - "epoch": 0.6515628099407748, + "epoch": 0.8996400344314892, "grad_norm": 0.0, - "learning_rate": 5.719964179730306e-06, - "loss": 0.8624, + "learning_rate": 5.236357338212905e-07, + "loss": 0.9397, "step": 22993 }, { - "epoch": 0.6515911473830373, + "epoch": 0.8996791611237186, "grad_norm": 0.0, - "learning_rate": 5.71913472089467e-06, - "loss": 0.8434, + "learning_rate": 5.23231118107066e-07, + "loss": 0.9519, "step": 22994 }, { - "epoch": 0.6516194848252996, + "epoch": 0.8997182878159481, "grad_norm": 0.0, - "learning_rate": 5.718305298118347e-06, - "loss": 0.7674, + "learning_rate": 5.228266545778737e-07, + "loss": 0.8594, "step": 22995 }, { - "epoch": 0.6516478222675621, + "epoch": 0.8997574145081775, "grad_norm": 0.0, - "learning_rate": 5.717475911408332e-06, - "loss": 0.8356, + "learning_rate": 5.224223432402098e-07, + "loss": 0.9578, "step": 22996 }, { - "epoch": 0.6516761597098246, + "epoch": 0.8997965412004069, "grad_norm": 0.0, - "learning_rate": 5.716646560771605e-06, - "loss": 0.878, + "learning_rate": 5.220181841005689e-07, + "loss": 1.0099, "step": 22997 }, { - "epoch": 0.651704497152087, + "epoch": 0.8998356678926364, "grad_norm": 0.0, - "learning_rate": 5.715817246215154e-06, - "loss": 0.8523, + "learning_rate": 5.216141771654371e-07, + "loss": 0.996, "step": 22998 }, { - "epoch": 0.6517328345943495, + "epoch": 0.8998747945848657, "grad_norm": 0.0, - "learning_rate": 5.714987967745969e-06, - "loss": 0.7879, + "learning_rate": 5.212103224413068e-07, + "loss": 0.9897, "step": 22999 }, { - "epoch": 0.651761172036612, + "epoch": 0.8999139212770952, "grad_norm": 0.0, - "learning_rate": 5.714158725371027e-06, - "loss": 0.8118, + "learning_rate": 5.208066199346573e-07, + "loss": 0.8578, "step": 23000 }, { - "epoch": 0.6517895094788745, + "epoch": 0.8999530479693246, "grad_norm": 0.0, - "learning_rate": 5.713329519097319e-06, - "loss": 0.8863, + "learning_rate": 5.204030696519791e-07, + "loss": 1.0436, "step": 23001 }, { - "epoch": 0.6518178469211369, + "epoch": 0.8999921746615541, "grad_norm": 0.0, - "learning_rate": 5.712500348931828e-06, - "loss": 0.8689, + "learning_rate": 5.19999671599748e-07, + "loss": 0.9948, "step": 23002 }, { - "epoch": 0.6518461843633994, + "epoch": 0.9000313013537835, "grad_norm": 0.0, - "learning_rate": 5.7116712148815375e-06, - "loss": 0.8705, + "learning_rate": 5.195964257844433e-07, + "loss": 1.0574, "step": 23003 }, { - "epoch": 0.6518745218056619, + "epoch": 0.900070428046013, "grad_norm": 0.0, - "learning_rate": 5.7108421169534376e-06, - "loss": 0.7594, + "learning_rate": 5.191933322125387e-07, + "loss": 0.9391, "step": 23004 }, { - "epoch": 0.6519028592479242, + "epoch": 0.9001095547382424, "grad_norm": 0.0, - "learning_rate": 5.710013055154503e-06, - "loss": 0.8108, + "learning_rate": 5.187903908905112e-07, + "loss": 1.0819, "step": 23005 }, { - "epoch": 0.6519311966901867, + "epoch": 0.9001486814304719, "grad_norm": 0.0, - "learning_rate": 5.709184029491721e-06, - "loss": 0.8862, + "learning_rate": 5.18387601824828e-07, + "loss": 0.8499, "step": 23006 }, { - "epoch": 0.6519595341324492, + "epoch": 0.9001878081227013, "grad_norm": 0.0, - "learning_rate": 5.708355039972081e-06, - "loss": 0.7383, + "learning_rate": 5.179849650219604e-07, + "loss": 0.9538, "step": 23007 }, { - "epoch": 0.6519878715747117, + "epoch": 0.9002269348149308, "grad_norm": 0.0, - "learning_rate": 5.707526086602555e-06, - "loss": 0.7904, + "learning_rate": 5.175824804883711e-07, + "loss": 0.9648, "step": 23008 }, { - "epoch": 0.6520162090169741, + "epoch": 0.9002660615071602, "grad_norm": 0.0, - "learning_rate": 5.706697169390134e-06, - "loss": 0.8372, + "learning_rate": 5.17180148230525e-07, + "loss": 0.8849, "step": 23009 }, { - "epoch": 0.6520445464592366, + "epoch": 0.9003051881993896, "grad_norm": 0.0, - "learning_rate": 5.705868288341795e-06, - "loss": 0.9494, + "learning_rate": 5.167779682548824e-07, + "loss": 0.9657, "step": 23010 }, { - "epoch": 0.6520728839014991, + "epoch": 0.900344314891619, "grad_norm": 0.0, - "learning_rate": 5.705039443464521e-06, - "loss": 0.8244, + "learning_rate": 5.163759405679048e-07, + "loss": 0.8697, "step": 23011 }, { - "epoch": 0.6521012213437615, + "epoch": 0.9003834415838485, "grad_norm": 0.0, - "learning_rate": 5.704210634765295e-06, - "loss": 0.9152, + "learning_rate": 5.159740651760447e-07, + "loss": 0.9877, "step": 23012 }, { - "epoch": 0.652129558786024, + "epoch": 0.9004225682760779, "grad_norm": 0.0, - "learning_rate": 5.7033818622511e-06, - "loss": 0.8528, + "learning_rate": 5.155723420857561e-07, + "loss": 0.8987, "step": 23013 }, { - "epoch": 0.6521578962282865, + "epoch": 0.9004616949683074, "grad_norm": 0.0, - "learning_rate": 5.7025531259289134e-06, - "loss": 0.7973, + "learning_rate": 5.151707713034926e-07, + "loss": 1.0528, "step": 23014 }, { - "epoch": 0.6521862336705488, + "epoch": 0.9005008216605368, "grad_norm": 0.0, - "learning_rate": 5.701724425805717e-06, - "loss": 0.9018, + "learning_rate": 5.147693528357012e-07, + "loss": 0.9085, "step": 23015 }, { - "epoch": 0.6522145711128113, + "epoch": 0.9005399483527663, "grad_norm": 0.0, - "learning_rate": 5.700895761888492e-06, - "loss": 0.9352, + "learning_rate": 5.143680866888279e-07, + "loss": 0.9512, "step": 23016 }, { - "epoch": 0.6522429085550738, + "epoch": 0.9005790750449957, "grad_norm": 0.0, - "learning_rate": 5.7000671341842215e-06, - "loss": 0.8098, + "learning_rate": 5.139669728693176e-07, + "loss": 0.9741, "step": 23017 }, { - "epoch": 0.6522712459973363, + "epoch": 0.9006182017372252, "grad_norm": 0.0, - "learning_rate": 5.699238542699879e-06, - "loss": 0.9016, + "learning_rate": 5.135660113836083e-07, + "loss": 0.9113, "step": 23018 }, { - "epoch": 0.6522995834395987, + "epoch": 0.9006573284294546, "grad_norm": 0.0, - "learning_rate": 5.698409987442448e-06, - "loss": 0.8266, + "learning_rate": 5.13165202238145e-07, + "loss": 1.0478, "step": 23019 }, { - "epoch": 0.6523279208818612, + "epoch": 0.9006964551216841, "grad_norm": 0.0, - "learning_rate": 5.697581468418909e-06, - "loss": 0.8771, + "learning_rate": 5.127645454393593e-07, + "loss": 0.9611, "step": 23020 }, { - "epoch": 0.6523562583241237, + "epoch": 0.9007355818139134, "grad_norm": 0.0, - "learning_rate": 5.696752985636237e-06, - "loss": 0.9121, + "learning_rate": 5.123640409936881e-07, + "loss": 0.9846, "step": 23021 }, { - "epoch": 0.6523845957663861, + "epoch": 0.9007747085061429, "grad_norm": 0.0, - "learning_rate": 5.695924539101412e-06, - "loss": 0.8389, + "learning_rate": 5.119636889075608e-07, + "loss": 0.9161, "step": 23022 }, { - "epoch": 0.6524129332086486, + "epoch": 0.9008138351983723, "grad_norm": 0.0, - "learning_rate": 5.695096128821417e-06, - "loss": 0.8447, + "learning_rate": 5.115634891874066e-07, + "loss": 0.8085, "step": 23023 }, { - "epoch": 0.6524412706509111, + "epoch": 0.9008529618906018, "grad_norm": 0.0, - "learning_rate": 5.694267754803221e-06, - "loss": 0.8156, + "learning_rate": 5.111634418396538e-07, + "loss": 1.1064, "step": 23024 }, { - "epoch": 0.6524696080931736, + "epoch": 0.9008920885828312, "grad_norm": 0.0, - "learning_rate": 5.6934394170538075e-06, - "loss": 0.8174, + "learning_rate": 5.107635468707273e-07, + "loss": 1.0209, "step": 23025 }, { - "epoch": 0.6524979455354359, + "epoch": 0.9009312152750606, "grad_norm": 0.0, - "learning_rate": 5.692611115580153e-06, - "loss": 0.8687, + "learning_rate": 5.103638042870462e-07, + "loss": 0.8593, "step": 23026 }, { - "epoch": 0.6525262829776984, + "epoch": 0.9009703419672901, "grad_norm": 0.0, - "learning_rate": 5.691782850389233e-06, - "loss": 0.8092, + "learning_rate": 5.099642140950301e-07, + "loss": 0.9346, "step": 23027 }, { - "epoch": 0.6525546204199609, + "epoch": 0.9010094686595195, "grad_norm": 0.0, - "learning_rate": 5.690954621488031e-06, - "loss": 0.8545, + "learning_rate": 5.095647763010981e-07, + "loss": 0.8778, "step": 23028 }, { - "epoch": 0.6525829578622233, + "epoch": 0.901048595351749, "grad_norm": 0.0, - "learning_rate": 5.690126428883516e-06, - "loss": 0.9595, + "learning_rate": 5.09165490911665e-07, + "loss": 0.8849, "step": 23029 }, { - "epoch": 0.6526112953044858, + "epoch": 0.9010877220439784, "grad_norm": 0.0, - "learning_rate": 5.6892982725826684e-06, - "loss": 0.7885, + "learning_rate": 5.087663579331403e-07, + "loss": 0.961, "step": 23030 }, { - "epoch": 0.6526396327467483, + "epoch": 0.9011268487362079, "grad_norm": 0.0, - "learning_rate": 5.688470152592459e-06, - "loss": 0.8671, + "learning_rate": 5.083673773719344e-07, + "loss": 1.0215, "step": 23031 }, { - "epoch": 0.6526679701890108, + "epoch": 0.9011659754284372, "grad_norm": 0.0, - "learning_rate": 5.687642068919866e-06, - "loss": 0.8071, + "learning_rate": 5.079685492344555e-07, + "loss": 0.9437, "step": 23032 }, { - "epoch": 0.6526963076312732, + "epoch": 0.9012051021206667, "grad_norm": 0.0, - "learning_rate": 5.6868140215718695e-06, - "loss": 0.8212, + "learning_rate": 5.075698735271073e-07, + "loss": 0.9305, "step": 23033 }, { - "epoch": 0.6527246450735357, + "epoch": 0.9012442288128961, "grad_norm": 0.0, - "learning_rate": 5.685986010555437e-06, - "loss": 0.8916, + "learning_rate": 5.071713502562913e-07, + "loss": 0.9535, "step": 23034 }, { - "epoch": 0.6527529825157982, + "epoch": 0.9012833555051256, "grad_norm": 0.0, - "learning_rate": 5.6851580358775445e-06, - "loss": 0.8247, + "learning_rate": 5.067729794284104e-07, + "loss": 0.9733, "step": 23035 }, { - "epoch": 0.6527813199580605, + "epoch": 0.901322482197355, "grad_norm": 0.0, - "learning_rate": 5.684330097545174e-06, - "loss": 0.738, + "learning_rate": 5.063747610498571e-07, + "loss": 0.9503, "step": 23036 }, { - "epoch": 0.652809657400323, + "epoch": 0.9013616088895845, "grad_norm": 0.0, - "learning_rate": 5.683502195565289e-06, - "loss": 0.872, + "learning_rate": 5.059766951270307e-07, + "loss": 0.9876, "step": 23037 }, { - "epoch": 0.6528379948425855, + "epoch": 0.9014007355818139, "grad_norm": 0.0, - "learning_rate": 5.682674329944867e-06, - "loss": 0.917, + "learning_rate": 5.055787816663216e-07, + "loss": 0.9729, "step": 23038 }, { - "epoch": 0.6528663322848479, + "epoch": 0.9014398622740434, "grad_norm": 0.0, - "learning_rate": 5.681846500690884e-06, - "loss": 0.8312, + "learning_rate": 5.051810206741192e-07, + "loss": 0.9375, "step": 23039 }, { - "epoch": 0.6528946697271104, + "epoch": 0.9014789889662728, "grad_norm": 0.0, - "learning_rate": 5.68101870781031e-06, - "loss": 0.8306, + "learning_rate": 5.047834121568129e-07, + "loss": 0.9747, "step": 23040 }, { - "epoch": 0.6529230071693729, + "epoch": 0.9015181156585023, "grad_norm": 0.0, - "learning_rate": 5.6801909513101235e-06, - "loss": 0.7673, + "learning_rate": 5.043859561207853e-07, + "loss": 1.0063, "step": 23041 }, { - "epoch": 0.6529513446116354, + "epoch": 0.9015572423507316, "grad_norm": 0.0, - "learning_rate": 5.679363231197289e-06, - "loss": 0.8614, + "learning_rate": 5.039886525724236e-07, + "loss": 0.9421, "step": 23042 }, { - "epoch": 0.6529796820538978, + "epoch": 0.9015963690429611, "grad_norm": 0.0, - "learning_rate": 5.678535547478783e-06, - "loss": 0.7861, + "learning_rate": 5.035915015181025e-07, + "loss": 0.9474, "step": 23043 }, { - "epoch": 0.6530080194961603, + "epoch": 0.9016354957351905, "grad_norm": 0.0, - "learning_rate": 5.67770790016158e-06, - "loss": 0.8464, + "learning_rate": 5.031945029642038e-07, + "loss": 0.9208, "step": 23044 }, { - "epoch": 0.6530363569384228, + "epoch": 0.90167462242742, "grad_norm": 0.0, - "learning_rate": 5.676880289252644e-06, - "loss": 0.8571, + "learning_rate": 5.027976569170989e-07, + "loss": 0.801, "step": 23045 }, { - "epoch": 0.6530646943806852, + "epoch": 0.9017137491196494, "grad_norm": 0.0, - "learning_rate": 5.676052714758955e-06, - "loss": 0.9189, + "learning_rate": 5.02400963383165e-07, + "loss": 0.9681, "step": 23046 }, { - "epoch": 0.6530930318229476, + "epoch": 0.9017528758118789, "grad_norm": 0.0, - "learning_rate": 5.675225176687477e-06, - "loss": 0.9058, + "learning_rate": 5.020044223687692e-07, + "loss": 1.0904, "step": 23047 }, { - "epoch": 0.6531213692652101, + "epoch": 0.9017920025041083, "grad_norm": 0.0, - "learning_rate": 5.674397675045182e-06, - "loss": 0.818, + "learning_rate": 5.016080338802831e-07, + "loss": 0.8055, "step": 23048 }, { - "epoch": 0.6531497067074726, + "epoch": 0.9018311291963378, "grad_norm": 0.0, - "learning_rate": 5.6735702098390454e-06, - "loss": 0.8049, + "learning_rate": 5.01211797924066e-07, + "loss": 0.9458, "step": 23049 }, { - "epoch": 0.653178044149735, + "epoch": 0.9018702558885672, "grad_norm": 0.0, - "learning_rate": 5.6727427810760305e-06, - "loss": 0.7722, + "learning_rate": 5.008157145064885e-07, + "loss": 1.0672, "step": 23050 }, { - "epoch": 0.6532063815919975, + "epoch": 0.9019093825807967, "grad_norm": 0.0, - "learning_rate": 5.67191538876311e-06, - "loss": 0.8562, + "learning_rate": 5.004197836339054e-07, + "loss": 0.903, "step": 23051 }, { - "epoch": 0.65323471903426, + "epoch": 0.9019485092730261, "grad_norm": 0.0, - "learning_rate": 5.671088032907252e-06, - "loss": 0.8539, + "learning_rate": 5.000240053126781e-07, + "loss": 0.9299, "step": 23052 }, { - "epoch": 0.6532630564765224, + "epoch": 0.9019876359652556, "grad_norm": 0.0, - "learning_rate": 5.670260713515429e-06, - "loss": 0.7446, + "learning_rate": 4.996283795491597e-07, + "loss": 1.0328, "step": 23053 }, { - "epoch": 0.6532913939187849, + "epoch": 0.9020267626574849, "grad_norm": 0.0, - "learning_rate": 5.669433430594611e-06, - "loss": 0.9697, + "learning_rate": 4.992329063497059e-07, + "loss": 0.8948, "step": 23054 }, { - "epoch": 0.6533197313610474, + "epoch": 0.9020658893497143, "grad_norm": 0.0, - "learning_rate": 5.6686061841517585e-06, - "loss": 0.831, + "learning_rate": 4.988375857206651e-07, + "loss": 0.9977, "step": 23055 }, { - "epoch": 0.6533480688033099, + "epoch": 0.9021050160419438, "grad_norm": 0.0, - "learning_rate": 5.667778974193845e-06, - "loss": 0.8784, + "learning_rate": 4.984424176683888e-07, + "loss": 1.1035, "step": 23056 }, { - "epoch": 0.6533764062455722, + "epoch": 0.9021441427341732, "grad_norm": 0.0, - "learning_rate": 5.6669518007278425e-06, - "loss": 0.8282, + "learning_rate": 4.98047402199221e-07, + "loss": 1.0863, "step": 23057 }, { - "epoch": 0.6534047436878347, + "epoch": 0.9021832694264027, "grad_norm": 0.0, - "learning_rate": 5.6661246637607085e-06, - "loss": 0.7471, + "learning_rate": 4.976525393195042e-07, + "loss": 0.929, "step": 23058 }, { - "epoch": 0.6534330811300972, + "epoch": 0.9022223961186321, "grad_norm": 0.0, - "learning_rate": 5.6652975632994214e-06, - "loss": 0.89, + "learning_rate": 4.972578290355812e-07, + "loss": 0.991, "step": 23059 }, { - "epoch": 0.6534614185723596, + "epoch": 0.9022615228108616, "grad_norm": 0.0, - "learning_rate": 5.664470499350938e-06, - "loss": 0.7586, + "learning_rate": 4.968632713537902e-07, + "loss": 0.9799, "step": 23060 }, { - "epoch": 0.6534897560146221, + "epoch": 0.902300649503091, "grad_norm": 0.0, - "learning_rate": 5.663643471922229e-06, - "loss": 0.819, + "learning_rate": 4.964688662804662e-07, + "loss": 1.0324, "step": 23061 }, { - "epoch": 0.6535180934568846, + "epoch": 0.9023397761953205, "grad_norm": 0.0, - "learning_rate": 5.6628164810202655e-06, - "loss": 0.8713, + "learning_rate": 4.960746138219441e-07, + "loss": 0.8607, "step": 23062 }, { - "epoch": 0.653546430899147, + "epoch": 0.9023789028875498, "grad_norm": 0.0, - "learning_rate": 5.661989526652007e-06, - "loss": 0.8683, + "learning_rate": 4.956805139845533e-07, + "loss": 0.9626, "step": 23063 }, { - "epoch": 0.6535747683414095, + "epoch": 0.9024180295797793, "grad_norm": 0.0, - "learning_rate": 5.66116260882442e-06, - "loss": 0.8402, + "learning_rate": 4.952865667746265e-07, + "loss": 0.9174, "step": 23064 }, { - "epoch": 0.653603105783672, + "epoch": 0.9024571562720087, "grad_norm": 0.0, - "learning_rate": 5.660335727544472e-06, - "loss": 0.8332, + "learning_rate": 4.948927721984853e-07, + "loss": 0.8894, "step": 23065 }, { - "epoch": 0.6536314432259345, + "epoch": 0.9024962829642382, "grad_norm": 0.0, - "learning_rate": 5.659508882819127e-06, - "loss": 0.891, + "learning_rate": 4.94499130262458e-07, + "loss": 0.8726, "step": 23066 }, { - "epoch": 0.6536597806681969, + "epoch": 0.9025354096564676, "grad_norm": 0.0, - "learning_rate": 5.658682074655355e-06, - "loss": 0.7632, + "learning_rate": 4.941056409728595e-07, + "loss": 0.9938, "step": 23067 }, { - "epoch": 0.6536881181104593, + "epoch": 0.9025745363486971, "grad_norm": 0.0, - "learning_rate": 5.657855303060112e-06, - "loss": 0.8176, + "learning_rate": 4.937123043360159e-07, + "loss": 0.952, "step": 23068 }, { - "epoch": 0.6537164555527218, + "epoch": 0.9026136630409265, "grad_norm": 0.0, - "learning_rate": 5.657028568040366e-06, - "loss": 0.8459, + "learning_rate": 4.933191203582399e-07, + "loss": 0.9529, "step": 23069 }, { - "epoch": 0.6537447929949842, + "epoch": 0.902652789733156, "grad_norm": 0.0, - "learning_rate": 5.656201869603087e-06, - "loss": 0.8387, + "learning_rate": 4.929260890458476e-07, + "loss": 0.8703, "step": 23070 }, { - "epoch": 0.6537731304372467, + "epoch": 0.9026919164253854, "grad_norm": 0.0, - "learning_rate": 5.655375207755226e-06, - "loss": 0.8616, + "learning_rate": 4.925332104051472e-07, + "loss": 0.9361, "step": 23071 }, { - "epoch": 0.6538014678795092, + "epoch": 0.9027310431176149, "grad_norm": 0.0, - "learning_rate": 5.6545485825037534e-06, - "loss": 0.8383, + "learning_rate": 4.921404844424504e-07, + "loss": 1.0046, "step": 23072 }, { - "epoch": 0.6538298053217717, + "epoch": 0.9027701698098443, "grad_norm": 0.0, - "learning_rate": 5.6537219938556365e-06, - "loss": 0.8196, + "learning_rate": 4.917479111640633e-07, + "loss": 0.9817, "step": 23073 }, { - "epoch": 0.6538581427640341, + "epoch": 0.9028092965020738, "grad_norm": 0.0, - "learning_rate": 5.652895441817829e-06, - "loss": 0.8119, + "learning_rate": 4.913554905762919e-07, + "loss": 1.008, "step": 23074 }, { - "epoch": 0.6538864802062966, + "epoch": 0.9028484231943031, "grad_norm": 0.0, - "learning_rate": 5.6520689263973014e-06, - "loss": 0.8773, + "learning_rate": 4.909632226854343e-07, + "loss": 0.9096, "step": 23075 }, { - "epoch": 0.6539148176485591, + "epoch": 0.9028875498865326, "grad_norm": 0.0, - "learning_rate": 5.6512424476010085e-06, - "loss": 0.7895, + "learning_rate": 4.905711074977926e-07, + "loss": 0.8721, "step": 23076 }, { - "epoch": 0.6539431550908215, + "epoch": 0.902926676578762, "grad_norm": 0.0, - "learning_rate": 5.650416005435915e-06, - "loss": 0.8454, + "learning_rate": 4.901791450196646e-07, + "loss": 1.0278, "step": 23077 }, { - "epoch": 0.653971492533084, + "epoch": 0.9029658032709915, "grad_norm": 0.0, - "learning_rate": 5.6495895999089826e-06, - "loss": 0.8744, + "learning_rate": 4.897873352573401e-07, + "loss": 0.9332, "step": 23078 }, { - "epoch": 0.6539998299753464, + "epoch": 0.9030049299632209, "grad_norm": 0.0, - "learning_rate": 5.648763231027171e-06, - "loss": 0.7627, + "learning_rate": 4.893956782171161e-07, + "loss": 0.9294, "step": 23079 }, { - "epoch": 0.6540281674176089, + "epoch": 0.9030440566554504, "grad_norm": 0.0, - "learning_rate": 5.647936898797444e-06, - "loss": 0.9123, + "learning_rate": 4.890041739052786e-07, + "loss": 1.1138, "step": 23080 }, { - "epoch": 0.6540565048598713, + "epoch": 0.9030831833476798, "grad_norm": 0.0, - "learning_rate": 5.647110603226764e-06, - "loss": 0.8069, + "learning_rate": 4.886128223281173e-07, + "loss": 0.8341, "step": 23081 }, { - "epoch": 0.6540848423021338, + "epoch": 0.9031223100399092, "grad_norm": 0.0, - "learning_rate": 5.646284344322084e-06, - "loss": 0.8675, + "learning_rate": 4.882216234919157e-07, + "loss": 0.9985, "step": 23082 }, { - "epoch": 0.6541131797443963, + "epoch": 0.9031614367321387, "grad_norm": 0.0, - "learning_rate": 5.645458122090372e-06, - "loss": 0.8132, + "learning_rate": 4.878305774029557e-07, + "loss": 0.9915, "step": 23083 }, { - "epoch": 0.6541415171866587, + "epoch": 0.903200563424368, "grad_norm": 0.0, - "learning_rate": 5.644631936538578e-06, - "loss": 0.8503, + "learning_rate": 4.874396840675166e-07, + "loss": 1.0164, "step": 23084 }, { - "epoch": 0.6541698546289212, + "epoch": 0.9032396901165975, "grad_norm": 0.0, - "learning_rate": 5.643805787673668e-06, - "loss": 0.961, + "learning_rate": 4.870489434918768e-07, + "loss": 1.0433, "step": 23085 }, { - "epoch": 0.6541981920711837, + "epoch": 0.9032788168088269, "grad_norm": 0.0, - "learning_rate": 5.642979675502603e-06, - "loss": 0.8343, + "learning_rate": 4.86658355682309e-07, + "loss": 0.9368, "step": 23086 }, { - "epoch": 0.6542265295134461, + "epoch": 0.9033179435010564, "grad_norm": 0.0, - "learning_rate": 5.642153600032334e-06, - "loss": 0.8542, + "learning_rate": 4.862679206450904e-07, + "loss": 0.9626, "step": 23087 }, { - "epoch": 0.6542548669557086, + "epoch": 0.9033570701932858, "grad_norm": 0.0, - "learning_rate": 5.641327561269828e-06, - "loss": 0.8353, + "learning_rate": 4.858776383864849e-07, + "loss": 1.0632, "step": 23088 }, { - "epoch": 0.654283204397971, + "epoch": 0.9033961968855153, "grad_norm": 0.0, - "learning_rate": 5.640501559222034e-06, - "loss": 0.8341, + "learning_rate": 4.854875089127631e-07, + "loss": 0.9496, "step": 23089 }, { - "epoch": 0.6543115418402335, + "epoch": 0.9034353235777447, "grad_norm": 0.0, - "learning_rate": 5.639675593895915e-06, - "loss": 0.7728, + "learning_rate": 4.850975322301898e-07, + "loss": 0.9659, "step": 23090 }, { - "epoch": 0.6543398792824959, + "epoch": 0.9034744502699742, "grad_norm": 0.0, - "learning_rate": 5.638849665298427e-06, - "loss": 0.8019, + "learning_rate": 4.84707708345028e-07, + "loss": 0.7902, "step": 23091 }, { - "epoch": 0.6543682167247584, + "epoch": 0.9035135769622036, "grad_norm": 0.0, - "learning_rate": 5.6380237734365275e-06, - "loss": 0.8392, + "learning_rate": 4.843180372635358e-07, + "loss": 0.9164, "step": 23092 }, { - "epoch": 0.6543965541670209, + "epoch": 0.9035527036544331, "grad_norm": 0.0, - "learning_rate": 5.637197918317174e-06, - "loss": 0.7889, + "learning_rate": 4.83928518991974e-07, + "loss": 0.9153, "step": 23093 }, { - "epoch": 0.6544248916092833, + "epoch": 0.9035918303466625, "grad_norm": 0.0, - "learning_rate": 5.636372099947327e-06, - "loss": 0.8005, + "learning_rate": 4.83539153536593e-07, + "loss": 0.8767, "step": 23094 }, { - "epoch": 0.6544532290515458, + "epoch": 0.903630957038892, "grad_norm": 0.0, - "learning_rate": 5.635546318333933e-06, - "loss": 0.8816, + "learning_rate": 4.83149940903651e-07, + "loss": 0.8177, "step": 23095 }, { - "epoch": 0.6544815664938083, + "epoch": 0.9036700837311213, "grad_norm": 0.0, - "learning_rate": 5.634720573483957e-06, - "loss": 0.9515, + "learning_rate": 4.827608810993945e-07, + "loss": 0.8258, "step": 23096 }, { - "epoch": 0.6545099039360708, + "epoch": 0.9037092104233508, "grad_norm": 0.0, - "learning_rate": 5.633894865404348e-06, - "loss": 0.9091, + "learning_rate": 4.823719741300737e-07, + "loss": 0.9571, "step": 23097 }, { - "epoch": 0.6545382413783332, + "epoch": 0.9037483371155802, "grad_norm": 0.0, - "learning_rate": 5.633069194102063e-06, - "loss": 0.6696, + "learning_rate": 4.819832200019303e-07, + "loss": 0.808, "step": 23098 }, { - "epoch": 0.6545665788205957, + "epoch": 0.9037874638078097, "grad_norm": 0.0, - "learning_rate": 5.632243559584061e-06, - "loss": 0.8004, + "learning_rate": 4.815946187212117e-07, + "loss": 0.8951, "step": 23099 }, { - "epoch": 0.6545949162628582, + "epoch": 0.9038265905000391, "grad_norm": 0.0, - "learning_rate": 5.631417961857291e-06, - "loss": 0.7814, + "learning_rate": 4.812061702941562e-07, + "loss": 0.8775, "step": 23100 }, { - "epoch": 0.6546232537051205, + "epoch": 0.9038657171922686, "grad_norm": 0.0, - "learning_rate": 5.630592400928709e-06, - "loss": 0.8404, + "learning_rate": 4.808178747270021e-07, + "loss": 0.9213, "step": 23101 }, { - "epoch": 0.654651591147383, + "epoch": 0.903904843884498, "grad_norm": 0.0, - "learning_rate": 5.629766876805274e-06, - "loss": 0.9209, + "learning_rate": 4.804297320259832e-07, + "loss": 0.8976, "step": 23102 }, { - "epoch": 0.6546799285896455, + "epoch": 0.9039439705767275, "grad_norm": 0.0, - "learning_rate": 5.628941389493932e-06, - "loss": 0.8408, + "learning_rate": 4.800417421973347e-07, + "loss": 1.0203, "step": 23103 }, { - "epoch": 0.654708266031908, + "epoch": 0.9039830972689569, "grad_norm": 0.0, - "learning_rate": 5.628115939001637e-06, - "loss": 0.8176, + "learning_rate": 4.79653905247287e-07, + "loss": 0.9476, "step": 23104 }, { - "epoch": 0.6547366034741704, + "epoch": 0.9040222239611864, "grad_norm": 0.0, - "learning_rate": 5.627290525335347e-06, - "loss": 0.8541, + "learning_rate": 4.792662211820687e-07, + "loss": 0.8837, "step": 23105 }, { - "epoch": 0.6547649409164329, + "epoch": 0.9040613506534158, "grad_norm": 0.0, - "learning_rate": 5.626465148502014e-06, - "loss": 0.8644, + "learning_rate": 4.788786900079034e-07, + "loss": 0.9589, "step": 23106 }, { - "epoch": 0.6547932783586954, + "epoch": 0.9041004773456452, "grad_norm": 0.0, - "learning_rate": 5.625639808508592e-06, - "loss": 0.887, + "learning_rate": 4.784913117310153e-07, + "loss": 0.9474, "step": 23107 }, { - "epoch": 0.6548216158009578, + "epoch": 0.9041396040378746, "grad_norm": 0.0, - "learning_rate": 5.624814505362025e-06, - "loss": 0.9205, + "learning_rate": 4.781040863576258e-07, + "loss": 0.8238, "step": 23108 }, { - "epoch": 0.6548499532432203, + "epoch": 0.9041787307301041, "grad_norm": 0.0, - "learning_rate": 5.623989239069275e-06, - "loss": 0.8113, + "learning_rate": 4.777170138939546e-07, + "loss": 0.9742, "step": 23109 }, { - "epoch": 0.6548782906854828, + "epoch": 0.9042178574223335, "grad_norm": 0.0, - "learning_rate": 5.6231640096372856e-06, - "loss": 0.8967, + "learning_rate": 4.773300943462156e-07, + "loss": 1.0412, "step": 23110 }, { - "epoch": 0.6549066281277451, + "epoch": 0.9042569841145629, "grad_norm": 0.0, - "learning_rate": 5.62233881707301e-06, - "loss": 0.8086, + "learning_rate": 4.769433277206226e-07, + "loss": 0.9565, "step": 23111 }, { - "epoch": 0.6549349655700076, + "epoch": 0.9042961108067924, "grad_norm": 0.0, - "learning_rate": 5.621513661383404e-06, - "loss": 0.8723, + "learning_rate": 4.765567140233851e-07, + "loss": 0.9282, "step": 23112 }, { - "epoch": 0.6549633030122701, + "epoch": 0.9043352374990218, "grad_norm": 0.0, - "learning_rate": 5.62068854257541e-06, - "loss": 0.7288, + "learning_rate": 4.7617025326071597e-07, + "loss": 0.9337, "step": 23113 }, { - "epoch": 0.6549916404545326, + "epoch": 0.9043743641912513, "grad_norm": 0.0, - "learning_rate": 5.619863460655983e-06, - "loss": 0.7469, + "learning_rate": 4.757839454388169e-07, + "loss": 0.986, "step": 23114 }, { - "epoch": 0.655019977896795, + "epoch": 0.9044134908834807, "grad_norm": 0.0, - "learning_rate": 5.619038415632078e-06, - "loss": 0.805, + "learning_rate": 4.7539779056389404e-07, + "loss": 0.9196, "step": 23115 }, { - "epoch": 0.6550483153390575, + "epoch": 0.9044526175757102, "grad_norm": 0.0, - "learning_rate": 5.618213407510633e-06, - "loss": 0.7984, + "learning_rate": 4.750117886421468e-07, + "loss": 0.989, "step": 23116 }, { - "epoch": 0.65507665278132, + "epoch": 0.9044917442679395, "grad_norm": 0.0, - "learning_rate": 5.617388436298605e-06, - "loss": 0.8368, + "learning_rate": 4.7462593967977475e-07, + "loss": 0.9102, "step": 23117 }, { - "epoch": 0.6551049902235824, + "epoch": 0.904530870960169, "grad_norm": 0.0, - "learning_rate": 5.616563502002942e-06, - "loss": 0.7711, + "learning_rate": 4.7424024368297296e-07, + "loss": 0.9811, "step": 23118 }, { - "epoch": 0.6551333276658449, + "epoch": 0.9045699976523984, "grad_norm": 0.0, - "learning_rate": 5.615738604630592e-06, - "loss": 0.8075, + "learning_rate": 4.738547006579397e-07, + "loss": 0.9804, "step": 23119 }, { - "epoch": 0.6551616651081074, + "epoch": 0.9046091243446279, "grad_norm": 0.0, - "learning_rate": 5.614913744188508e-06, - "loss": 0.8084, + "learning_rate": 4.734693106108601e-07, + "loss": 1.1262, "step": 23120 }, { - "epoch": 0.6551900025503699, + "epoch": 0.9046482510368573, "grad_norm": 0.0, - "learning_rate": 5.614088920683629e-06, - "loss": 0.7842, + "learning_rate": 4.73084073547927e-07, + "loss": 0.9222, "step": 23121 }, { - "epoch": 0.6552183399926322, + "epoch": 0.9046873777290868, "grad_norm": 0.0, - "learning_rate": 5.6132641341229085e-06, - "loss": 0.8065, + "learning_rate": 4.7269898947532644e-07, + "loss": 0.956, "step": 23122 }, { - "epoch": 0.6552466774348947, + "epoch": 0.9047265044213162, "grad_norm": 0.0, - "learning_rate": 5.6124393845132964e-06, - "loss": 0.8434, + "learning_rate": 4.723140583992414e-07, + "loss": 0.8679, "step": 23123 }, { - "epoch": 0.6552750148771572, + "epoch": 0.9047656311135457, "grad_norm": 0.0, - "learning_rate": 5.611614671861733e-06, - "loss": 0.819, + "learning_rate": 4.719292803258524e-07, + "loss": 0.8937, "step": 23124 }, { - "epoch": 0.6553033523194196, + "epoch": 0.9048047578057751, "grad_norm": 0.0, - "learning_rate": 5.610789996175172e-06, - "loss": 0.8412, + "learning_rate": 4.715446552613401e-07, + "loss": 0.8972, "step": 23125 }, { - "epoch": 0.6553316897616821, + "epoch": 0.9048438844980046, "grad_norm": 0.0, - "learning_rate": 5.609965357460554e-06, - "loss": 0.9042, + "learning_rate": 4.711601832118828e-07, + "loss": 1.0111, "step": 23126 }, { - "epoch": 0.6553600272039446, + "epoch": 0.904883011190234, "grad_norm": 0.0, - "learning_rate": 5.609140755724829e-06, - "loss": 0.9582, + "learning_rate": 4.7077586418365126e-07, + "loss": 1.0006, "step": 23127 }, { - "epoch": 0.655388364646207, + "epoch": 0.9049221378824635, "grad_norm": 0.0, - "learning_rate": 5.608316190974944e-06, - "loss": 0.8207, + "learning_rate": 4.703916981828194e-07, + "loss": 0.9576, "step": 23128 }, { - "epoch": 0.6554167020884695, + "epoch": 0.9049612645746928, "grad_norm": 0.0, - "learning_rate": 5.607491663217839e-06, - "loss": 0.9238, + "learning_rate": 4.700076852155533e-07, + "loss": 1.0029, "step": 23129 }, { - "epoch": 0.655445039530732, + "epoch": 0.9050003912669223, "grad_norm": 0.0, - "learning_rate": 5.606667172460462e-06, - "loss": 0.7588, + "learning_rate": 4.6962382528802476e-07, + "loss": 0.8291, "step": 23130 }, { - "epoch": 0.6554733769729945, + "epoch": 0.9050395179591517, "grad_norm": 0.0, - "learning_rate": 5.605842718709759e-06, - "loss": 0.7971, + "learning_rate": 4.6924011840639327e-07, + "loss": 0.9539, "step": 23131 }, { - "epoch": 0.6555017144152568, + "epoch": 0.9050786446513812, "grad_norm": 0.0, - "learning_rate": 5.605018301972677e-06, - "loss": 0.891, + "learning_rate": 4.6885656457682505e-07, + "loss": 0.8795, "step": 23132 }, { - "epoch": 0.6555300518575193, + "epoch": 0.9051177713436106, "grad_norm": 0.0, - "learning_rate": 5.60419392225616e-06, - "loss": 0.8469, + "learning_rate": 4.6847316380547513e-07, + "loss": 0.8474, "step": 23133 }, { - "epoch": 0.6555583892997818, + "epoch": 0.9051568980358401, "grad_norm": 0.0, - "learning_rate": 5.6033695795671465e-06, - "loss": 0.91, + "learning_rate": 4.6808991609850307e-07, + "loss": 0.953, "step": 23134 }, { - "epoch": 0.6555867267420442, + "epoch": 0.9051960247280695, "grad_norm": 0.0, - "learning_rate": 5.602545273912583e-06, - "loss": 0.7542, + "learning_rate": 4.6770682146206283e-07, + "loss": 0.8096, "step": 23135 }, { - "epoch": 0.6556150641843067, + "epoch": 0.905235151420299, "grad_norm": 0.0, - "learning_rate": 5.601721005299418e-06, - "loss": 0.887, + "learning_rate": 4.673238799023072e-07, + "loss": 1.0114, "step": 23136 }, { - "epoch": 0.6556434016265692, + "epoch": 0.9052742781125284, "grad_norm": 0.0, - "learning_rate": 5.600896773734585e-06, - "loss": 0.9162, + "learning_rate": 4.6694109142538467e-07, + "loss": 1.0103, "step": 23137 }, { - "epoch": 0.6556717390688317, + "epoch": 0.9053134048047579, "grad_norm": 0.0, - "learning_rate": 5.600072579225038e-06, - "loss": 0.8535, + "learning_rate": 4.665584560374414e-07, + "loss": 0.9208, "step": 23138 }, { - "epoch": 0.6557000765110941, + "epoch": 0.9053525314969872, "grad_norm": 0.0, - "learning_rate": 5.5992484217777074e-06, - "loss": 0.8609, + "learning_rate": 4.6617597374462366e-07, + "loss": 0.9167, "step": 23139 }, { - "epoch": 0.6557284139533566, + "epoch": 0.9053916581892166, "grad_norm": 0.0, - "learning_rate": 5.598424301399543e-06, - "loss": 0.8788, + "learning_rate": 4.6579364455307527e-07, + "loss": 0.9716, "step": 23140 }, { - "epoch": 0.6557567513956191, + "epoch": 0.9054307848814461, "grad_norm": 0.0, - "learning_rate": 5.597600218097489e-06, - "loss": 0.909, + "learning_rate": 4.654114684689315e-07, + "loss": 0.8672, "step": 23141 }, { - "epoch": 0.6557850888378814, + "epoch": 0.9054699115736755, "grad_norm": 0.0, - "learning_rate": 5.5967761718784795e-06, - "loss": 0.8413, + "learning_rate": 4.6502944549833397e-07, + "loss": 1.0201, "step": 23142 }, { - "epoch": 0.6558134262801439, + "epoch": 0.905509038265905, "grad_norm": 0.0, - "learning_rate": 5.595952162749459e-06, - "loss": 0.7628, + "learning_rate": 4.6464757564741223e-07, + "loss": 0.9678, "step": 23143 }, { - "epoch": 0.6558417637224064, + "epoch": 0.9055481649581344, "grad_norm": 0.0, - "learning_rate": 5.595128190717369e-06, - "loss": 0.8969, + "learning_rate": 4.6426585892230593e-07, + "loss": 1.0081, "step": 23144 }, { - "epoch": 0.6558701011646689, + "epoch": 0.9055872916503639, "grad_norm": 0.0, - "learning_rate": 5.59430425578915e-06, - "loss": 0.9005, + "learning_rate": 4.638842953291389e-07, + "loss": 0.8066, "step": 23145 }, { - "epoch": 0.6558984386069313, + "epoch": 0.9056264183425933, "grad_norm": 0.0, - "learning_rate": 5.5934803579717455e-06, - "loss": 0.7923, + "learning_rate": 4.6350288487404194e-07, + "loss": 0.9656, "step": 23146 }, { - "epoch": 0.6559267760491938, + "epoch": 0.9056655450348228, "grad_norm": 0.0, - "learning_rate": 5.59265649727209e-06, - "loss": 0.8592, + "learning_rate": 4.631216275631356e-07, + "loss": 1.082, "step": 23147 }, { - "epoch": 0.6559551134914563, + "epoch": 0.9057046717270522, "grad_norm": 0.0, - "learning_rate": 5.591832673697124e-06, - "loss": 0.8652, + "learning_rate": 4.627405234025495e-07, + "loss": 1.061, "step": 23148 }, { - "epoch": 0.6559834509337187, + "epoch": 0.9057437984192817, "grad_norm": 0.0, - "learning_rate": 5.591008887253792e-06, - "loss": 0.7237, + "learning_rate": 4.6235957239839755e-07, + "loss": 0.8877, "step": 23149 }, { - "epoch": 0.6560117883759812, + "epoch": 0.905782925111511, "grad_norm": 0.0, - "learning_rate": 5.590185137949027e-06, - "loss": 0.8219, + "learning_rate": 4.619787745568005e-07, + "loss": 0.849, "step": 23150 }, { - "epoch": 0.6560401258182437, + "epoch": 0.9058220518037405, "grad_norm": 0.0, - "learning_rate": 5.589361425789769e-06, - "loss": 0.8213, + "learning_rate": 4.615981298838712e-07, + "loss": 0.935, "step": 23151 }, { - "epoch": 0.656068463260506, + "epoch": 0.9058611784959699, "grad_norm": 0.0, - "learning_rate": 5.588537750782961e-06, - "loss": 0.6799, + "learning_rate": 4.6121763838572473e-07, + "loss": 1.0701, "step": 23152 }, { - "epoch": 0.6560968007027685, + "epoch": 0.9059003051881994, "grad_norm": 0.0, - "learning_rate": 5.587714112935535e-06, - "loss": 0.7655, + "learning_rate": 4.6083730006846963e-07, + "loss": 0.9994, "step": 23153 }, { - "epoch": 0.656125138145031, + "epoch": 0.9059394318804288, "grad_norm": 0.0, - "learning_rate": 5.5868905122544344e-06, - "loss": 0.8544, + "learning_rate": 4.604571149382153e-07, + "loss": 0.9815, "step": 23154 }, { - "epoch": 0.6561534755872935, + "epoch": 0.9059785585726583, "grad_norm": 0.0, - "learning_rate": 5.5860669487465915e-06, - "loss": 0.8286, + "learning_rate": 4.600770830010648e-07, + "loss": 0.972, "step": 23155 }, { - "epoch": 0.6561818130295559, + "epoch": 0.9060176852648877, "grad_norm": 0.0, - "learning_rate": 5.585243422418945e-06, - "loss": 0.738, + "learning_rate": 4.5969720426312204e-07, + "loss": 1.009, "step": 23156 }, { - "epoch": 0.6562101504718184, + "epoch": 0.9060568119571172, "grad_norm": 0.0, - "learning_rate": 5.584419933278431e-06, - "loss": 0.8212, + "learning_rate": 4.593174787304877e-07, + "loss": 0.7866, "step": 23157 }, { - "epoch": 0.6562384879140809, + "epoch": 0.9060959386493466, "grad_norm": 0.0, - "learning_rate": 5.583596481331987e-06, - "loss": 0.7737, + "learning_rate": 4.5893790640926137e-07, + "loss": 0.8508, "step": 23158 }, { - "epoch": 0.6562668253563433, + "epoch": 0.9061350653415761, "grad_norm": 0.0, - "learning_rate": 5.582773066586553e-06, - "loss": 0.703, + "learning_rate": 4.5855848730553486e-07, + "loss": 0.9852, "step": 23159 }, { - "epoch": 0.6562951627986058, + "epoch": 0.9061741920338054, "grad_norm": 0.0, - "learning_rate": 5.581949689049064e-06, - "loss": 0.8953, + "learning_rate": 4.581792214254044e-07, + "loss": 0.888, "step": 23160 }, { - "epoch": 0.6563235002408683, + "epoch": 0.9062133187260349, "grad_norm": 0.0, - "learning_rate": 5.58112634872645e-06, - "loss": 0.8868, + "learning_rate": 4.578001087749573e-07, + "loss": 1.0849, "step": 23161 }, { - "epoch": 0.6563518376831308, + "epoch": 0.9062524454182643, "grad_norm": 0.0, - "learning_rate": 5.580303045625652e-06, - "loss": 0.7894, + "learning_rate": 4.5742114936028315e-07, + "loss": 1.0714, "step": 23162 }, { - "epoch": 0.6563801751253932, + "epoch": 0.9062915721104938, "grad_norm": 0.0, - "learning_rate": 5.5794797797536e-06, - "loss": 0.8148, + "learning_rate": 4.570423431874693e-07, + "loss": 1.0134, "step": 23163 }, { - "epoch": 0.6564085125676556, + "epoch": 0.9063306988027232, "grad_norm": 0.0, - "learning_rate": 5.578656551117232e-06, - "loss": 0.8071, + "learning_rate": 4.566636902625976e-07, + "loss": 1.071, "step": 23164 }, { - "epoch": 0.6564368500099181, + "epoch": 0.9063698254949527, "grad_norm": 0.0, - "learning_rate": 5.577833359723485e-06, - "loss": 0.9266, + "learning_rate": 4.562851905917476e-07, + "loss": 0.9601, "step": 23165 }, { - "epoch": 0.6564651874521805, + "epoch": 0.9064089521871821, "grad_norm": 0.0, - "learning_rate": 5.577010205579285e-06, - "loss": 0.7932, + "learning_rate": 4.5590684418099776e-07, + "loss": 0.9662, "step": 23166 }, { - "epoch": 0.656493524894443, + "epoch": 0.9064480788794116, "grad_norm": 0.0, - "learning_rate": 5.576187088691576e-06, - "loss": 0.8637, + "learning_rate": 4.555286510364265e-07, + "loss": 0.9605, "step": 23167 }, { - "epoch": 0.6565218623367055, + "epoch": 0.906487205571641, "grad_norm": 0.0, - "learning_rate": 5.575364009067281e-06, - "loss": 0.82, + "learning_rate": 4.551506111641035e-07, + "loss": 0.9545, "step": 23168 }, { - "epoch": 0.656550199778968, + "epoch": 0.9065263322638704, "grad_norm": 0.0, - "learning_rate": 5.574540966713338e-06, - "loss": 0.6781, + "learning_rate": 4.547727245701028e-07, + "loss": 1.0606, "step": 23169 }, { - "epoch": 0.6565785372212304, + "epoch": 0.9065654589560999, "grad_norm": 0.0, - "learning_rate": 5.57371796163668e-06, - "loss": 0.7947, + "learning_rate": 4.5439499126048945e-07, + "loss": 0.9661, "step": 23170 }, { - "epoch": 0.6566068746634929, + "epoch": 0.9066045856483292, "grad_norm": 0.0, - "learning_rate": 5.57289499384424e-06, - "loss": 0.8794, + "learning_rate": 4.5401741124133315e-07, + "loss": 0.9839, "step": 23171 }, { - "epoch": 0.6566352121057554, + "epoch": 0.9066437123405587, "grad_norm": 0.0, - "learning_rate": 5.572072063342948e-06, - "loss": 0.7768, + "learning_rate": 4.536399845186945e-07, + "loss": 0.9582, "step": 23172 }, { - "epoch": 0.6566635495480178, + "epoch": 0.9066828390327881, "grad_norm": 0.0, - "learning_rate": 5.571249170139742e-06, - "loss": 0.8528, + "learning_rate": 4.532627110986365e-07, + "loss": 0.9417, "step": 23173 }, { - "epoch": 0.6566918869902802, + "epoch": 0.9067219657250176, "grad_norm": 0.0, - "learning_rate": 5.570426314241544e-06, - "loss": 0.8896, + "learning_rate": 4.5288559098721427e-07, + "loss": 1.0028, "step": 23174 }, { - "epoch": 0.6567202244325427, + "epoch": 0.906761092417247, "grad_norm": 0.0, - "learning_rate": 5.569603495655296e-06, - "loss": 0.8723, + "learning_rate": 4.5250862419048856e-07, + "loss": 0.914, "step": 23175 }, { - "epoch": 0.6567485618748051, + "epoch": 0.9068002191094765, "grad_norm": 0.0, - "learning_rate": 5.568780714387917e-06, - "loss": 0.9019, + "learning_rate": 4.5213181071450894e-07, + "loss": 1.0586, "step": 23176 }, { - "epoch": 0.6567768993170676, + "epoch": 0.9068393458017059, "grad_norm": 0.0, - "learning_rate": 5.567957970446344e-06, - "loss": 0.7492, + "learning_rate": 4.517551505653306e-07, + "loss": 0.9457, "step": 23177 }, { - "epoch": 0.6568052367593301, + "epoch": 0.9068784724939354, "grad_norm": 0.0, - "learning_rate": 5.567135263837511e-06, - "loss": 0.8448, + "learning_rate": 4.513786437489964e-07, + "loss": 0.9736, "step": 23178 }, { - "epoch": 0.6568335742015926, + "epoch": 0.9069175991861648, "grad_norm": 0.0, - "learning_rate": 5.56631259456834e-06, - "loss": 0.9129, + "learning_rate": 4.510022902715594e-07, + "loss": 0.8966, "step": 23179 }, { - "epoch": 0.656861911643855, + "epoch": 0.9069567258783943, "grad_norm": 0.0, - "learning_rate": 5.5654899626457645e-06, - "loss": 0.9888, + "learning_rate": 4.50626090139058e-07, + "loss": 0.9882, "step": 23180 }, { - "epoch": 0.6568902490861175, + "epoch": 0.9069958525706237, "grad_norm": 0.0, - "learning_rate": 5.564667368076717e-06, - "loss": 0.8492, + "learning_rate": 4.502500433575374e-07, + "loss": 0.8731, "step": 23181 }, { - "epoch": 0.65691858652838, + "epoch": 0.9070349792628531, "grad_norm": 0.0, - "learning_rate": 5.5638448108681195e-06, - "loss": 0.8794, + "learning_rate": 4.498741499330339e-07, + "loss": 0.9521, "step": 23182 }, { - "epoch": 0.6569469239706424, + "epoch": 0.9070741059550825, "grad_norm": 0.0, - "learning_rate": 5.563022291026905e-06, - "loss": 0.9305, + "learning_rate": 4.4949840987158377e-07, + "loss": 1.0666, "step": 23183 }, { - "epoch": 0.6569752614129049, + "epoch": 0.907113232647312, "grad_norm": 0.0, - "learning_rate": 5.562199808560001e-06, - "loss": 0.8692, + "learning_rate": 4.4912282317922107e-07, + "loss": 0.9822, "step": 23184 }, { - "epoch": 0.6570035988551673, + "epoch": 0.9071523593395414, "grad_norm": 0.0, - "learning_rate": 5.561377363474336e-06, - "loss": 0.9046, + "learning_rate": 4.4874738986198096e-07, + "loss": 0.9977, "step": 23185 }, { - "epoch": 0.6570319362974298, + "epoch": 0.9071914860317709, "grad_norm": 0.0, - "learning_rate": 5.560554955776843e-06, - "loss": 0.9234, + "learning_rate": 4.4837210992588643e-07, + "loss": 0.9733, "step": 23186 }, { - "epoch": 0.6570602737396922, + "epoch": 0.9072306127240003, "grad_norm": 0.0, - "learning_rate": 5.559732585474438e-06, - "loss": 0.9326, + "learning_rate": 4.4799698337696815e-07, + "loss": 0.9299, "step": 23187 }, { - "epoch": 0.6570886111819547, + "epoch": 0.9072697394162298, "grad_norm": 0.0, - "learning_rate": 5.558910252574054e-06, - "loss": 0.9013, + "learning_rate": 4.476220102212481e-07, + "loss": 0.9525, "step": 23188 }, { - "epoch": 0.6571169486242172, + "epoch": 0.9073088661084592, "grad_norm": 0.0, - "learning_rate": 5.558087957082624e-06, - "loss": 0.9432, + "learning_rate": 4.472471904647502e-07, + "loss": 0.9221, "step": 23189 }, { - "epoch": 0.6571452860664796, + "epoch": 0.9073479928006887, "grad_norm": 0.0, - "learning_rate": 5.557265699007064e-06, - "loss": 0.8482, + "learning_rate": 4.468725241134908e-07, + "loss": 0.9942, "step": 23190 }, { - "epoch": 0.6571736235087421, + "epoch": 0.9073871194929181, "grad_norm": 0.0, - "learning_rate": 5.556443478354309e-06, - "loss": 0.8298, + "learning_rate": 4.4649801117348957e-07, + "loss": 1.0428, "step": 23191 }, { - "epoch": 0.6572019609510046, + "epoch": 0.9074262461851476, "grad_norm": 0.0, - "learning_rate": 5.555621295131276e-06, - "loss": 0.8527, + "learning_rate": 4.4612365165075724e-07, + "loss": 0.9601, "step": 23192 }, { - "epoch": 0.6572302983932671, + "epoch": 0.9074653728773769, "grad_norm": 0.0, - "learning_rate": 5.554799149344896e-06, - "loss": 0.7162, + "learning_rate": 4.4574944555130895e-07, + "loss": 1.0178, "step": 23193 }, { - "epoch": 0.6572586358355295, + "epoch": 0.9075044995696064, "grad_norm": 0.0, - "learning_rate": 5.553977041002097e-06, - "loss": 0.8598, + "learning_rate": 4.4537539288115106e-07, + "loss": 0.8322, "step": 23194 }, { - "epoch": 0.657286973277792, + "epoch": 0.9075436262618358, "grad_norm": 0.0, - "learning_rate": 5.553154970109795e-06, - "loss": 0.8951, + "learning_rate": 4.4500149364629317e-07, + "loss": 0.9697, "step": 23195 }, { - "epoch": 0.6573153107200544, + "epoch": 0.9075827529540652, "grad_norm": 0.0, - "learning_rate": 5.552332936674922e-06, - "loss": 0.8578, + "learning_rate": 4.446277478527361e-07, + "loss": 0.9897, "step": 23196 }, { - "epoch": 0.6573436481623168, + "epoch": 0.9076218796462947, "grad_norm": 0.0, - "learning_rate": 5.5515109407044e-06, - "loss": 0.7899, + "learning_rate": 4.442541555064861e-07, + "loss": 0.9321, "step": 23197 }, { - "epoch": 0.6573719856045793, + "epoch": 0.9076610063385241, "grad_norm": 0.0, - "learning_rate": 5.550688982205152e-06, - "loss": 0.7688, + "learning_rate": 4.438807166135384e-07, + "loss": 0.9341, "step": 23198 }, { - "epoch": 0.6574003230468418, + "epoch": 0.9077001330307536, "grad_norm": 0.0, - "learning_rate": 5.549867061184108e-06, - "loss": 0.7832, + "learning_rate": 4.435074311798948e-07, + "loss": 0.8796, "step": 23199 }, { - "epoch": 0.6574286604891042, + "epoch": 0.907739259722983, "grad_norm": 0.0, - "learning_rate": 5.549045177648182e-06, - "loss": 0.866, + "learning_rate": 4.4313429921154394e-07, + "loss": 0.9157, "step": 23200 }, { - "epoch": 0.6574569979313667, + "epoch": 0.9077783864152125, "grad_norm": 0.0, - "learning_rate": 5.548223331604302e-06, - "loss": 0.6911, + "learning_rate": 4.427613207144821e-07, + "loss": 1.0139, "step": 23201 }, { - "epoch": 0.6574853353736292, + "epoch": 0.9078175131074419, "grad_norm": 0.0, - "learning_rate": 5.547401523059393e-06, - "loss": 0.698, + "learning_rate": 4.4238849569469664e-07, + "loss": 0.9472, "step": 23202 }, { - "epoch": 0.6575136728158917, + "epoch": 0.9078566397996713, "grad_norm": 0.0, - "learning_rate": 5.546579752020371e-06, - "loss": 0.9396, + "learning_rate": 4.4201582415817734e-07, + "loss": 0.9547, "step": 23203 }, { - "epoch": 0.6575420102581541, + "epoch": 0.9078957664919007, "grad_norm": 0.0, - "learning_rate": 5.545758018494165e-06, - "loss": 0.8969, + "learning_rate": 4.416433061109049e-07, + "loss": 0.88, "step": 23204 }, { - "epoch": 0.6575703477004166, + "epoch": 0.9079348931841302, "grad_norm": 0.0, - "learning_rate": 5.5449363224876905e-06, - "loss": 0.8533, + "learning_rate": 4.412709415588645e-07, + "loss": 0.8768, "step": 23205 }, { - "epoch": 0.657598685142679, + "epoch": 0.9079740198763596, "grad_norm": 0.0, - "learning_rate": 5.5441146640078705e-06, - "loss": 0.8432, + "learning_rate": 4.40898730508037e-07, + "loss": 0.8793, "step": 23206 }, { - "epoch": 0.6576270225849414, + "epoch": 0.9080131465685891, "grad_norm": 0.0, - "learning_rate": 5.54329304306163e-06, - "loss": 0.8607, + "learning_rate": 4.4052667296439533e-07, + "loss": 0.9285, "step": 23207 }, { - "epoch": 0.6576553600272039, + "epoch": 0.9080522732608185, "grad_norm": 0.0, - "learning_rate": 5.542471459655886e-06, - "loss": 0.7587, + "learning_rate": 4.4015476893391695e-07, + "loss": 0.8887, "step": 23208 }, { - "epoch": 0.6576836974694664, + "epoch": 0.908091399953048, "grad_norm": 0.0, - "learning_rate": 5.541649913797559e-06, - "loss": 0.8338, + "learning_rate": 4.3978301842257486e-07, + "loss": 0.8936, "step": 23209 }, { - "epoch": 0.6577120349117289, + "epoch": 0.9081305266452774, "grad_norm": 0.0, - "learning_rate": 5.54082840549357e-06, - "loss": 0.8899, + "learning_rate": 4.3941142143633654e-07, + "loss": 0.9705, "step": 23210 }, { - "epoch": 0.6577403723539913, + "epoch": 0.9081696533375069, "grad_norm": 0.0, - "learning_rate": 5.540006934750839e-06, - "loss": 0.824, + "learning_rate": 4.390399779811716e-07, + "loss": 0.9309, "step": 23211 }, { - "epoch": 0.6577687097962538, + "epoch": 0.9082087800297363, "grad_norm": 0.0, - "learning_rate": 5.539185501576291e-06, - "loss": 0.8142, + "learning_rate": 4.386686880630442e-07, + "loss": 0.8754, "step": 23212 }, { - "epoch": 0.6577970472385163, + "epoch": 0.9082479067219658, "grad_norm": 0.0, - "learning_rate": 5.538364105976835e-06, - "loss": 0.8943, + "learning_rate": 4.3829755168791623e-07, + "loss": 0.981, "step": 23213 }, { - "epoch": 0.6578253846807787, + "epoch": 0.9082870334141951, "grad_norm": 0.0, - "learning_rate": 5.5375427479593945e-06, - "loss": 0.837, + "learning_rate": 4.3792656886174733e-07, + "loss": 0.9762, "step": 23214 }, { - "epoch": 0.6578537221230412, + "epoch": 0.9083261601064246, "grad_norm": 0.0, - "learning_rate": 5.536721427530894e-06, - "loss": 0.911, + "learning_rate": 4.375557395904961e-07, + "loss": 0.8978, "step": 23215 }, { - "epoch": 0.6578820595653037, + "epoch": 0.908365286798654, "grad_norm": 0.0, - "learning_rate": 5.535900144698241e-06, - "loss": 0.7876, + "learning_rate": 4.3718506388011895e-07, + "loss": 1.0379, "step": 23216 }, { - "epoch": 0.6579103970075662, + "epoch": 0.9084044134908835, "grad_norm": 0.0, - "learning_rate": 5.535078899468359e-06, - "loss": 0.7131, + "learning_rate": 4.3681454173656546e-07, + "loss": 0.9753, "step": 23217 }, { - "epoch": 0.6579387344498285, + "epoch": 0.9084435401831129, "grad_norm": 0.0, - "learning_rate": 5.53425769184817e-06, - "loss": 0.8504, + "learning_rate": 4.364441731657876e-07, + "loss": 0.9093, "step": 23218 }, { - "epoch": 0.657967071892091, + "epoch": 0.9084826668753424, "grad_norm": 0.0, - "learning_rate": 5.533436521844582e-06, - "loss": 0.8527, + "learning_rate": 4.3607395817373056e-07, + "loss": 0.9253, "step": 23219 }, { - "epoch": 0.6579954093343535, + "epoch": 0.9085217935675718, "grad_norm": 0.0, - "learning_rate": 5.532615389464522e-06, - "loss": 0.8183, + "learning_rate": 4.357038967663441e-07, + "loss": 0.9512, "step": 23220 }, { - "epoch": 0.6580237467766159, + "epoch": 0.9085609202598013, "grad_norm": 0.0, - "learning_rate": 5.531794294714896e-06, - "loss": 0.8232, + "learning_rate": 4.353339889495667e-07, + "loss": 0.9917, "step": 23221 }, { - "epoch": 0.6580520842188784, + "epoch": 0.9086000469520307, "grad_norm": 0.0, - "learning_rate": 5.530973237602627e-06, - "loss": 0.9154, + "learning_rate": 4.3496423472934146e-07, + "loss": 0.9695, "step": 23222 }, { - "epoch": 0.6580804216611409, + "epoch": 0.9086391736442602, "grad_norm": 0.0, - "learning_rate": 5.53015221813463e-06, - "loss": 0.7372, + "learning_rate": 4.345946341116025e-07, + "loss": 1.0082, "step": 23223 }, { - "epoch": 0.6581087591034033, + "epoch": 0.9086783003364896, "grad_norm": 0.0, - "learning_rate": 5.5293312363178194e-06, - "loss": 0.8388, + "learning_rate": 4.3422518710229067e-07, + "loss": 1.0195, "step": 23224 }, { - "epoch": 0.6581370965456658, + "epoch": 0.9087174270287189, "grad_norm": 0.0, - "learning_rate": 5.528510292159117e-06, - "loss": 0.8791, + "learning_rate": 4.338558937073345e-07, + "loss": 0.9702, "step": 23225 }, { - "epoch": 0.6581654339879283, + "epoch": 0.9087565537209484, "grad_norm": 0.0, - "learning_rate": 5.527689385665429e-06, - "loss": 0.8137, + "learning_rate": 4.3348675393266594e-07, + "loss": 1.0085, "step": 23226 }, { - "epoch": 0.6581937714301908, + "epoch": 0.9087956804131778, "grad_norm": 0.0, - "learning_rate": 5.526868516843673e-06, - "loss": 0.8867, + "learning_rate": 4.3311776778421243e-07, + "loss": 0.8757, "step": 23227 }, { - "epoch": 0.6582221088724531, + "epoch": 0.9088348071054073, "grad_norm": 0.0, - "learning_rate": 5.526047685700771e-06, - "loss": 0.9226, + "learning_rate": 4.3274893526789816e-07, + "loss": 0.9982, "step": 23228 }, { - "epoch": 0.6582504463147156, + "epoch": 0.9088739337976367, "grad_norm": 0.0, - "learning_rate": 5.525226892243623e-06, - "loss": 0.8207, + "learning_rate": 4.3238025638964843e-07, + "loss": 0.8718, "step": 23229 }, { - "epoch": 0.6582787837569781, + "epoch": 0.9089130604898662, "grad_norm": 0.0, - "learning_rate": 5.524406136479153e-06, - "loss": 1.0178, + "learning_rate": 4.3201173115538507e-07, + "loss": 0.8979, "step": 23230 }, { - "epoch": 0.6583071211992405, + "epoch": 0.9089521871820956, "grad_norm": 0.0, - "learning_rate": 5.5235854184142755e-06, - "loss": 0.8893, + "learning_rate": 4.316433595710212e-07, + "loss": 0.8713, "step": 23231 }, { - "epoch": 0.658335458641503, + "epoch": 0.9089913138743251, "grad_norm": 0.0, - "learning_rate": 5.522764738055897e-06, - "loss": 0.83, + "learning_rate": 4.3127514164247543e-07, + "loss": 0.9207, "step": 23232 }, { - "epoch": 0.6583637960837655, + "epoch": 0.9090304405665545, "grad_norm": 0.0, - "learning_rate": 5.521944095410936e-06, - "loss": 0.9121, + "learning_rate": 4.309070773756607e-07, + "loss": 1.0287, "step": 23233 }, { - "epoch": 0.658392133526028, + "epoch": 0.909069567258784, "grad_norm": 0.0, - "learning_rate": 5.521123490486301e-06, - "loss": 0.8707, + "learning_rate": 4.305391667764891e-07, + "loss": 0.9038, "step": 23234 }, { - "epoch": 0.6584204709682904, + "epoch": 0.9091086939510133, "grad_norm": 0.0, - "learning_rate": 5.520302923288905e-06, - "loss": 0.8352, + "learning_rate": 4.301714098508658e-07, + "loss": 0.9436, "step": 23235 }, { - "epoch": 0.6584488084105529, + "epoch": 0.9091478206432428, "grad_norm": 0.0, - "learning_rate": 5.51948239382566e-06, - "loss": 0.7978, + "learning_rate": 4.2980380660469834e-07, + "loss": 0.8722, "step": 23236 }, { - "epoch": 0.6584771458528154, + "epoch": 0.9091869473354722, "grad_norm": 0.0, - "learning_rate": 5.518661902103479e-06, - "loss": 0.9237, + "learning_rate": 4.2943635704388973e-07, + "loss": 1.0272, "step": 23237 }, { - "epoch": 0.6585054832950777, + "epoch": 0.9092260740277017, "grad_norm": 0.0, - "learning_rate": 5.5178414481292745e-06, - "loss": 0.9124, + "learning_rate": 4.29069061174342e-07, + "loss": 0.8857, "step": 23238 }, { - "epoch": 0.6585338207373402, + "epoch": 0.9092652007199311, "grad_norm": 0.0, - "learning_rate": 5.5170210319099595e-06, - "loss": 0.8892, + "learning_rate": 4.2870191900195034e-07, + "loss": 0.9233, "step": 23239 }, { - "epoch": 0.6585621581796027, + "epoch": 0.9093043274121606, "grad_norm": 0.0, - "learning_rate": 5.5162006534524346e-06, - "loss": 0.8419, + "learning_rate": 4.2833493053261343e-07, + "loss": 1.0267, "step": 23240 }, { - "epoch": 0.6585904956218652, + "epoch": 0.90934345410439, "grad_norm": 0.0, - "learning_rate": 5.5153803127636225e-06, - "loss": 0.9222, + "learning_rate": 4.27968095772221e-07, + "loss": 0.9586, "step": 23241 }, { - "epoch": 0.6586188330641276, + "epoch": 0.9093825807966195, "grad_norm": 0.0, - "learning_rate": 5.514560009850423e-06, - "loss": 0.8024, + "learning_rate": 4.276014147266694e-07, + "loss": 1.0316, "step": 23242 }, { - "epoch": 0.6586471705063901, + "epoch": 0.9094217074888489, "grad_norm": 0.0, - "learning_rate": 5.513739744719751e-06, - "loss": 0.7844, + "learning_rate": 4.2723488740184285e-07, + "loss": 0.95, "step": 23243 }, { - "epoch": 0.6586755079486526, + "epoch": 0.9094608341810784, "grad_norm": 0.0, - "learning_rate": 5.5129195173785184e-06, - "loss": 0.7712, + "learning_rate": 4.2686851380362994e-07, + "loss": 0.9313, "step": 23244 }, { - "epoch": 0.658703845390915, + "epoch": 0.9094999608733078, "grad_norm": 0.0, - "learning_rate": 5.512099327833627e-06, - "loss": 0.8749, + "learning_rate": 4.2650229393791156e-07, + "loss": 1.035, "step": 23245 }, { - "epoch": 0.6587321828331775, + "epoch": 0.9095390875655373, "grad_norm": 0.0, - "learning_rate": 5.511279176091989e-06, - "loss": 0.8749, + "learning_rate": 4.261362278105707e-07, + "loss": 0.9529, "step": 23246 }, { - "epoch": 0.65876052027544, + "epoch": 0.9095782142577666, "grad_norm": 0.0, - "learning_rate": 5.5104590621605135e-06, - "loss": 0.8734, + "learning_rate": 4.2577031542748393e-07, + "loss": 0.947, "step": 23247 }, { - "epoch": 0.6587888577177023, + "epoch": 0.9096173409499961, "grad_norm": 0.0, - "learning_rate": 5.509638986046109e-06, - "loss": 0.9249, + "learning_rate": 4.254045567945309e-07, + "loss": 1.043, "step": 23248 }, { - "epoch": 0.6588171951599648, + "epoch": 0.9096564676422255, "grad_norm": 0.0, - "learning_rate": 5.508818947755687e-06, - "loss": 0.8003, + "learning_rate": 4.250389519175824e-07, + "loss": 1.0964, "step": 23249 }, { - "epoch": 0.6588455326022273, + "epoch": 0.909695594334455, "grad_norm": 0.0, - "learning_rate": 5.507998947296147e-06, - "loss": 0.8706, + "learning_rate": 4.2467350080250934e-07, + "loss": 0.9472, "step": 23250 }, { - "epoch": 0.6588738700444898, + "epoch": 0.9097347210266844, "grad_norm": 0.0, - "learning_rate": 5.507178984674399e-06, - "loss": 0.7957, + "learning_rate": 4.2430820345518265e-07, + "loss": 0.9388, "step": 23251 }, { - "epoch": 0.6589022074867522, + "epoch": 0.9097738477189139, "grad_norm": 0.0, - "learning_rate": 5.5063590598973545e-06, - "loss": 0.7733, + "learning_rate": 4.2394305988146643e-07, + "loss": 0.9264, "step": 23252 }, { - "epoch": 0.6589305449290147, + "epoch": 0.9098129744111433, "grad_norm": 0.0, - "learning_rate": 5.505539172971912e-06, - "loss": 0.8243, + "learning_rate": 4.235780700872238e-07, + "loss": 0.8279, "step": 23253 }, { - "epoch": 0.6589588823712772, + "epoch": 0.9098521011033727, "grad_norm": 0.0, - "learning_rate": 5.504719323904988e-06, - "loss": 0.9214, + "learning_rate": 4.2321323407831907e-07, + "loss": 0.828, "step": 23254 }, { - "epoch": 0.6589872198135396, + "epoch": 0.9098912277956022, "grad_norm": 0.0, - "learning_rate": 5.503899512703477e-06, - "loss": 0.9048, + "learning_rate": 4.228485518606096e-07, + "loss": 1.111, "step": 23255 }, { - "epoch": 0.6590155572558021, + "epoch": 0.9099303544878315, "grad_norm": 0.0, - "learning_rate": 5.5030797393742905e-06, - "loss": 0.7772, + "learning_rate": 4.2248402343995076e-07, + "loss": 0.9596, "step": 23256 }, { - "epoch": 0.6590438946980646, + "epoch": 0.909969481180061, "grad_norm": 0.0, - "learning_rate": 5.502260003924337e-06, - "loss": 0.8851, + "learning_rate": 4.22119648822199e-07, + "loss": 0.9842, "step": 23257 }, { - "epoch": 0.6590722321403271, + "epoch": 0.9100086078722904, "grad_norm": 0.0, - "learning_rate": 5.501440306360512e-06, - "loss": 0.9037, + "learning_rate": 4.2175542801320193e-07, + "loss": 0.9204, "step": 23258 }, { - "epoch": 0.6591005695825894, + "epoch": 0.9100477345645199, "grad_norm": 0.0, - "learning_rate": 5.50062064668973e-06, - "loss": 0.8618, + "learning_rate": 4.213913610188103e-07, + "loss": 0.8775, "step": 23259 }, { - "epoch": 0.6591289070248519, + "epoch": 0.9100868612567493, "grad_norm": 0.0, - "learning_rate": 5.499801024918887e-06, - "loss": 0.7934, + "learning_rate": 4.210274478448717e-07, + "loss": 0.8716, "step": 23260 }, { - "epoch": 0.6591572444671144, + "epoch": 0.9101259879489788, "grad_norm": 0.0, - "learning_rate": 5.498981441054894e-06, - "loss": 0.8999, + "learning_rate": 4.206636884972293e-07, + "loss": 0.9865, "step": 23261 }, { - "epoch": 0.6591855819093768, + "epoch": 0.9101651146412082, "grad_norm": 0.0, - "learning_rate": 5.498161895104653e-06, - "loss": 0.8594, + "learning_rate": 4.2030008298172384e-07, + "loss": 0.9368, "step": 23262 }, { - "epoch": 0.6592139193516393, + "epoch": 0.9102042413334377, "grad_norm": 0.0, - "learning_rate": 5.4973423870750645e-06, - "loss": 0.7918, + "learning_rate": 4.1993663130419526e-07, + "loss": 1.0698, "step": 23263 }, { - "epoch": 0.6592422567939018, + "epoch": 0.9102433680256671, "grad_norm": 0.0, - "learning_rate": 5.49652291697303e-06, - "loss": 0.769, + "learning_rate": 4.195733334704788e-07, + "loss": 0.7874, "step": 23264 }, { - "epoch": 0.6592705942361643, + "epoch": 0.9102824947178966, "grad_norm": 0.0, - "learning_rate": 5.495703484805462e-06, - "loss": 0.8216, + "learning_rate": 4.19210189486412e-07, + "loss": 0.9776, "step": 23265 }, { - "epoch": 0.6592989316784267, + "epoch": 0.910321621410126, "grad_norm": 0.0, - "learning_rate": 5.494884090579252e-06, - "loss": 0.8608, + "learning_rate": 4.188471993578225e-07, + "loss": 1.1099, "step": 23266 }, { - "epoch": 0.6593272691206892, + "epoch": 0.9103607481023555, "grad_norm": 0.0, - "learning_rate": 5.4940647343013055e-06, - "loss": 0.852, + "learning_rate": 4.184843630905422e-07, + "loss": 0.9521, "step": 23267 }, { - "epoch": 0.6593556065629517, + "epoch": 0.9103998747945848, "grad_norm": 0.0, - "learning_rate": 5.49324541597853e-06, - "loss": 0.8529, + "learning_rate": 4.1812168069039426e-07, + "loss": 0.9397, "step": 23268 }, { - "epoch": 0.659383944005214, + "epoch": 0.9104390014868143, "grad_norm": 0.0, - "learning_rate": 5.492426135617816e-06, - "loss": 0.7756, + "learning_rate": 4.1775915216320853e-07, + "loss": 0.8338, "step": 23269 }, { - "epoch": 0.6594122814474765, + "epoch": 0.9104781281790437, "grad_norm": 0.0, - "learning_rate": 5.491606893226076e-06, - "loss": 0.8843, + "learning_rate": 4.1739677751480135e-07, + "loss": 0.9187, "step": 23270 }, { - "epoch": 0.659440618889739, + "epoch": 0.9105172548712732, "grad_norm": 0.0, - "learning_rate": 5.490787688810202e-06, - "loss": 0.957, + "learning_rate": 4.17034556750997e-07, + "loss": 1.1099, "step": 23271 }, { - "epoch": 0.6594689563320014, + "epoch": 0.9105563815635026, "grad_norm": 0.0, - "learning_rate": 5.489968522377096e-06, - "loss": 0.8247, + "learning_rate": 4.1667248987760534e-07, + "loss": 1.0247, "step": 23272 }, { - "epoch": 0.6594972937742639, + "epoch": 0.9105955082557321, "grad_norm": 0.0, - "learning_rate": 5.48914939393366e-06, - "loss": 0.749, + "learning_rate": 4.163105769004483e-07, + "loss": 0.9801, "step": 23273 }, { - "epoch": 0.6595256312165264, + "epoch": 0.9106346349479615, "grad_norm": 0.0, - "learning_rate": 5.488330303486795e-06, - "loss": 0.8391, + "learning_rate": 4.1594881782533235e-07, + "loss": 0.9252, "step": 23274 }, { - "epoch": 0.6595539686587889, + "epoch": 0.910673761640191, "grad_norm": 0.0, - "learning_rate": 5.487511251043398e-06, - "loss": 0.8844, + "learning_rate": 4.155872126580718e-07, + "loss": 0.8626, "step": 23275 }, { - "epoch": 0.6595823061010513, + "epoch": 0.9107128883324204, "grad_norm": 0.0, - "learning_rate": 5.486692236610373e-06, - "loss": 0.7613, + "learning_rate": 4.1522576140446747e-07, + "loss": 1.0724, "step": 23276 }, { - "epoch": 0.6596106435433138, + "epoch": 0.9107520150246499, "grad_norm": 0.0, - "learning_rate": 5.485873260194614e-06, - "loss": 0.7878, + "learning_rate": 4.148644640703281e-07, + "loss": 0.9998, "step": 23277 }, { - "epoch": 0.6596389809855763, + "epoch": 0.9107911417168792, "grad_norm": 0.0, - "learning_rate": 5.485054321803023e-06, - "loss": 0.7061, + "learning_rate": 4.145033206614546e-07, + "loss": 0.9914, "step": 23278 }, { - "epoch": 0.6596673184278387, + "epoch": 0.9108302684091087, "grad_norm": 0.0, - "learning_rate": 5.484235421442492e-06, - "loss": 0.7536, + "learning_rate": 4.1414233118364787e-07, + "loss": 1.0145, "step": 23279 }, { - "epoch": 0.6596956558701011, + "epoch": 0.9108693951013381, "grad_norm": 0.0, - "learning_rate": 5.483416559119923e-06, - "loss": 0.7133, + "learning_rate": 4.137814956427011e-07, + "loss": 0.9054, "step": 23280 }, { - "epoch": 0.6597239933123636, + "epoch": 0.9109085217935676, "grad_norm": 0.0, - "learning_rate": 5.482597734842217e-06, - "loss": 0.9392, + "learning_rate": 4.134208140444129e-07, + "loss": 0.9678, "step": 23281 }, { - "epoch": 0.6597523307546261, + "epoch": 0.910947648485797, "grad_norm": 0.0, - "learning_rate": 5.481778948616264e-06, - "loss": 0.876, + "learning_rate": 4.130602863945732e-07, + "loss": 0.8853, "step": 23282 }, { - "epoch": 0.6597806681968885, + "epoch": 0.9109867751780264, "grad_norm": 0.0, - "learning_rate": 5.480960200448969e-06, - "loss": 0.8557, + "learning_rate": 4.126999126989728e-07, + "loss": 0.8927, "step": 23283 }, { - "epoch": 0.659809005639151, + "epoch": 0.9110259018702559, "grad_norm": 0.0, - "learning_rate": 5.480141490347221e-06, - "loss": 0.7025, + "learning_rate": 4.1233969296339716e-07, + "loss": 0.9084, "step": 23284 }, { - "epoch": 0.6598373430814135, + "epoch": 0.9110650285624853, "grad_norm": 0.0, - "learning_rate": 5.479322818317918e-06, - "loss": 0.849, + "learning_rate": 4.1197962719363383e-07, + "loss": 0.9782, "step": 23285 }, { - "epoch": 0.6598656805236759, + "epoch": 0.9111041552547148, "grad_norm": 0.0, - "learning_rate": 5.478504184367959e-06, - "loss": 0.8554, + "learning_rate": 4.116197153954604e-07, + "loss": 0.8494, "step": 23286 }, { - "epoch": 0.6598940179659384, + "epoch": 0.9111432819469442, "grad_norm": 0.0, - "learning_rate": 5.477685588504238e-06, - "loss": 0.7514, + "learning_rate": 4.112599575746623e-07, + "loss": 0.8466, "step": 23287 }, { - "epoch": 0.6599223554082009, + "epoch": 0.9111824086391737, "grad_norm": 0.0, - "learning_rate": 5.47686703073365e-06, - "loss": 0.7408, + "learning_rate": 4.1090035373701154e-07, + "loss": 0.9782, "step": 23288 }, { - "epoch": 0.6599506928504634, + "epoch": 0.911221535331403, "grad_norm": 0.0, - "learning_rate": 5.4760485110630956e-06, - "loss": 0.9654, + "learning_rate": 4.105409038882879e-07, + "loss": 0.9256, "step": 23289 }, { - "epoch": 0.6599790302927258, + "epoch": 0.9112606620236325, "grad_norm": 0.0, - "learning_rate": 5.47523002949946e-06, - "loss": 0.8084, + "learning_rate": 4.101816080342591e-07, + "loss": 1.0403, "step": 23290 }, { - "epoch": 0.6600073677349882, + "epoch": 0.9112997887158619, "grad_norm": 0.0, - "learning_rate": 5.474411586049646e-06, - "loss": 0.8576, + "learning_rate": 4.098224661806971e-07, + "loss": 0.8536, "step": 23291 }, { - "epoch": 0.6600357051772507, + "epoch": 0.9113389154080914, "grad_norm": 0.0, - "learning_rate": 5.47359318072054e-06, - "loss": 0.9352, + "learning_rate": 4.0946347833336954e-07, + "loss": 1.045, "step": 23292 }, { - "epoch": 0.6600640426195131, + "epoch": 0.9113780421003208, "grad_norm": 0.0, - "learning_rate": 5.472774813519039e-06, - "loss": 0.8966, + "learning_rate": 4.0910464449804176e-07, + "loss": 0.9973, "step": 23293 }, { - "epoch": 0.6600923800617756, + "epoch": 0.9114171687925503, "grad_norm": 0.0, - "learning_rate": 5.4719564844520416e-06, - "loss": 0.837, + "learning_rate": 4.087459646804737e-07, + "loss": 1.0389, "step": 23294 }, { - "epoch": 0.6601207175040381, + "epoch": 0.9114562954847797, "grad_norm": 0.0, - "learning_rate": 5.471138193526431e-06, - "loss": 0.8963, + "learning_rate": 4.083874388864273e-07, + "loss": 0.9902, "step": 23295 }, { - "epoch": 0.6601490549463005, + "epoch": 0.9114954221770092, "grad_norm": 0.0, - "learning_rate": 5.470319940749107e-06, - "loss": 0.8255, + "learning_rate": 4.0802906712166134e-07, + "loss": 0.9583, "step": 23296 }, { - "epoch": 0.660177392388563, + "epoch": 0.9115345488692386, "grad_norm": 0.0, - "learning_rate": 5.469501726126962e-06, - "loss": 0.8583, + "learning_rate": 4.076708493919279e-07, + "loss": 1.0055, "step": 23297 }, { - "epoch": 0.6602057298308255, + "epoch": 0.9115736755614681, "grad_norm": 0.0, - "learning_rate": 5.4686835496668845e-06, - "loss": 0.807, + "learning_rate": 4.073127857029802e-07, + "loss": 0.9388, "step": 23298 }, { - "epoch": 0.660234067273088, + "epoch": 0.9116128022536975, "grad_norm": 0.0, - "learning_rate": 5.467865411375766e-06, - "loss": 0.7254, + "learning_rate": 4.0695487606056903e-07, + "loss": 0.9281, "step": 23299 }, { - "epoch": 0.6602624047153504, + "epoch": 0.911651928945927, "grad_norm": 0.0, - "learning_rate": 5.467047311260501e-06, - "loss": 0.7419, + "learning_rate": 4.065971204704433e-07, + "loss": 0.8964, "step": 23300 }, { - "epoch": 0.6602907421576129, + "epoch": 0.9116910556381563, "grad_norm": 0.0, - "learning_rate": 5.466229249327981e-06, - "loss": 0.7665, + "learning_rate": 4.06239518938345e-07, + "loss": 0.8995, "step": 23301 }, { - "epoch": 0.6603190795998753, + "epoch": 0.9117301823303858, "grad_norm": 0.0, - "learning_rate": 5.465411225585098e-06, - "loss": 0.8132, + "learning_rate": 4.0588207147001845e-07, + "loss": 1.1295, "step": 23302 }, { - "epoch": 0.6603474170421377, + "epoch": 0.9117693090226152, "grad_norm": 0.0, - "learning_rate": 5.464593240038736e-06, - "loss": 0.7108, + "learning_rate": 4.055247780712035e-07, + "loss": 1.0333, "step": 23303 }, { - "epoch": 0.6603757544844002, + "epoch": 0.9118084357148447, "grad_norm": 0.0, - "learning_rate": 5.46377529269579e-06, - "loss": 0.7985, + "learning_rate": 4.0516763874763996e-07, + "loss": 0.9405, "step": 23304 }, { - "epoch": 0.6604040919266627, + "epoch": 0.9118475624070741, "grad_norm": 0.0, - "learning_rate": 5.462957383563154e-06, - "loss": 0.833, + "learning_rate": 4.048106535050589e-07, + "loss": 0.9678, "step": 23305 }, { - "epoch": 0.6604324293689252, + "epoch": 0.9118866890993036, "grad_norm": 0.0, - "learning_rate": 5.462139512647708e-06, - "loss": 0.8859, + "learning_rate": 4.0445382234919674e-07, + "loss": 1.0595, "step": 23306 }, { - "epoch": 0.6604607668111876, + "epoch": 0.911925815791533, "grad_norm": 0.0, - "learning_rate": 5.461321679956349e-06, - "loss": 0.7009, + "learning_rate": 4.0409714528578224e-07, + "loss": 0.969, "step": 23307 }, { - "epoch": 0.6604891042534501, + "epoch": 0.9119649424837625, "grad_norm": 0.0, - "learning_rate": 5.46050388549596e-06, - "loss": 0.965, + "learning_rate": 4.03740622320542e-07, + "loss": 0.9544, "step": 23308 }, { - "epoch": 0.6605174416957126, + "epoch": 0.9120040691759919, "grad_norm": 0.0, - "learning_rate": 5.459686129273433e-06, - "loss": 0.763, + "learning_rate": 4.0338425345920364e-07, + "loss": 0.8832, "step": 23309 }, { - "epoch": 0.660545779137975, + "epoch": 0.9120431958682212, "grad_norm": 0.0, - "learning_rate": 5.458868411295659e-06, - "loss": 0.8099, + "learning_rate": 4.030280387074892e-07, + "loss": 1.0022, "step": 23310 }, { - "epoch": 0.6605741165802375, + "epoch": 0.9120823225604507, "grad_norm": 0.0, - "learning_rate": 5.458050731569517e-06, - "loss": 0.7804, + "learning_rate": 4.026719780711175e-07, + "loss": 0.9718, "step": 23311 }, { - "epoch": 0.6606024540225, + "epoch": 0.9121214492526801, "grad_norm": 0.0, - "learning_rate": 5.4572330901019036e-06, - "loss": 0.7895, + "learning_rate": 4.023160715558083e-07, + "loss": 0.8611, "step": 23312 }, { - "epoch": 0.6606307914647624, + "epoch": 0.9121605759449096, "grad_norm": 0.0, - "learning_rate": 5.456415486899701e-06, - "loss": 0.7589, + "learning_rate": 4.0196031916727606e-07, + "loss": 1.1202, "step": 23313 }, { - "epoch": 0.6606591289070248, + "epoch": 0.912199702637139, "grad_norm": 0.0, - "learning_rate": 5.455597921969797e-06, - "loss": 0.811, + "learning_rate": 4.0160472091123616e-07, + "loss": 0.9846, "step": 23314 }, { - "epoch": 0.6606874663492873, + "epoch": 0.9122388293293685, "grad_norm": 0.0, - "learning_rate": 5.454780395319086e-06, - "loss": 0.7819, + "learning_rate": 4.012492767933951e-07, + "loss": 0.958, "step": 23315 }, { - "epoch": 0.6607158037915498, + "epoch": 0.9122779560215979, "grad_norm": 0.0, - "learning_rate": 5.453962906954442e-06, - "loss": 0.8593, + "learning_rate": 4.008939868194639e-07, + "loss": 0.8847, "step": 23316 }, { - "epoch": 0.6607441412338122, + "epoch": 0.9123170827138274, "grad_norm": 0.0, - "learning_rate": 5.453145456882756e-06, - "loss": 0.8064, + "learning_rate": 4.005388509951447e-07, + "loss": 0.8457, "step": 23317 }, { - "epoch": 0.6607724786760747, + "epoch": 0.9123562094060568, "grad_norm": 0.0, - "learning_rate": 5.452328045110918e-06, - "loss": 0.8719, + "learning_rate": 4.0018386932614504e-07, + "loss": 1.0583, "step": 23318 }, { - "epoch": 0.6608008161183372, + "epoch": 0.9123953360982863, "grad_norm": 0.0, - "learning_rate": 5.451510671645806e-06, - "loss": 0.9217, + "learning_rate": 3.9982904181816163e-07, + "loss": 1.0529, "step": 23319 }, { - "epoch": 0.6608291535605996, + "epoch": 0.9124344627905157, "grad_norm": 0.0, - "learning_rate": 5.450693336494314e-06, - "loss": 0.7717, + "learning_rate": 3.9947436847689536e-07, + "loss": 0.9633, "step": 23320 }, { - "epoch": 0.6608574910028621, + "epoch": 0.9124735894827452, "grad_norm": 0.0, - "learning_rate": 5.449876039663316e-06, - "loss": 0.7778, + "learning_rate": 3.991198493080384e-07, + "loss": 0.8371, "step": 23321 }, { - "epoch": 0.6608858284451246, + "epoch": 0.9125127161749745, "grad_norm": 0.0, - "learning_rate": 5.449058781159702e-06, - "loss": 0.8344, + "learning_rate": 3.9876548431728943e-07, + "loss": 0.7608, "step": 23322 }, { - "epoch": 0.660914165887387, + "epoch": 0.912551842867204, "grad_norm": 0.0, - "learning_rate": 5.448241560990361e-06, - "loss": 0.8748, + "learning_rate": 3.9841127351033295e-07, + "loss": 0.8335, "step": 23323 }, { - "epoch": 0.6609425033296494, + "epoch": 0.9125909695594334, "grad_norm": 0.0, - "learning_rate": 5.447424379162165e-06, - "loss": 0.7722, + "learning_rate": 3.9805721689286205e-07, + "loss": 0.9914, "step": 23324 }, { - "epoch": 0.6609708407719119, + "epoch": 0.9126300962516629, "grad_norm": 0.0, - "learning_rate": 5.446607235682007e-06, - "loss": 0.8074, + "learning_rate": 3.9770331447055886e-07, + "loss": 0.9839, "step": 23325 }, { - "epoch": 0.6609991782141744, + "epoch": 0.9126692229438923, "grad_norm": 0.0, - "learning_rate": 5.445790130556765e-06, - "loss": 0.929, + "learning_rate": 3.973495662491089e-07, + "loss": 0.9708, "step": 23326 }, { - "epoch": 0.6610275156564368, + "epoch": 0.9127083496361218, "grad_norm": 0.0, - "learning_rate": 5.444973063793324e-06, - "loss": 0.6883, + "learning_rate": 3.9699597223419097e-07, + "loss": 1.0306, "step": 23327 }, { - "epoch": 0.6610558530986993, + "epoch": 0.9127474763283512, "grad_norm": 0.0, - "learning_rate": 5.4441560353985715e-06, - "loss": 0.8594, + "learning_rate": 3.96642532431486e-07, + "loss": 0.8637, "step": 23328 }, { - "epoch": 0.6610841905409618, + "epoch": 0.9127866030205807, "grad_norm": 0.0, - "learning_rate": 5.44333904537938e-06, - "loss": 0.7588, + "learning_rate": 3.9628924684666727e-07, + "loss": 0.9548, "step": 23329 }, { - "epoch": 0.6611125279832243, + "epoch": 0.9128257297128101, "grad_norm": 0.0, - "learning_rate": 5.442522093742636e-06, - "loss": 0.8243, + "learning_rate": 3.959361154854091e-07, + "loss": 0.9273, "step": 23330 }, { - "epoch": 0.6611408654254867, + "epoch": 0.9128648564050396, "grad_norm": 0.0, - "learning_rate": 5.441705180495225e-06, - "loss": 0.8782, + "learning_rate": 3.9558313835338257e-07, + "loss": 0.997, "step": 23331 }, { - "epoch": 0.6611692028677492, + "epoch": 0.912903983097269, "grad_norm": 0.0, - "learning_rate": 5.440888305644019e-06, - "loss": 0.8487, + "learning_rate": 3.952303154562576e-07, + "loss": 0.963, "step": 23332 }, { - "epoch": 0.6611975403100117, + "epoch": 0.9129431097894984, "grad_norm": 0.0, - "learning_rate": 5.440071469195905e-06, - "loss": 0.6465, + "learning_rate": 3.948776467996962e-07, + "loss": 0.9667, "step": 23333 }, { - "epoch": 0.661225877752274, + "epoch": 0.9129822364817278, "grad_norm": 0.0, - "learning_rate": 5.439254671157764e-06, - "loss": 0.8994, + "learning_rate": 3.9452513238936505e-07, + "loss": 0.9283, "step": 23334 }, { - "epoch": 0.6612542151945365, + "epoch": 0.9130213631739573, "grad_norm": 0.0, - "learning_rate": 5.438437911536473e-06, - "loss": 0.8382, + "learning_rate": 3.94172772230923e-07, + "loss": 0.821, "step": 23335 }, { - "epoch": 0.661282552636799, + "epoch": 0.9130604898661867, "grad_norm": 0.0, - "learning_rate": 5.437621190338915e-06, - "loss": 0.8903, + "learning_rate": 3.9382056633002876e-07, + "loss": 0.9184, "step": 23336 }, { - "epoch": 0.6613108900790614, + "epoch": 0.9130996165584162, "grad_norm": 0.0, - "learning_rate": 5.436804507571966e-06, - "loss": 0.7559, + "learning_rate": 3.9346851469234006e-07, + "loss": 0.9836, "step": 23337 }, { - "epoch": 0.6613392275213239, + "epoch": 0.9131387432506456, "grad_norm": 0.0, - "learning_rate": 5.435987863242507e-06, - "loss": 0.7562, + "learning_rate": 3.931166173235101e-07, + "loss": 0.9754, "step": 23338 }, { - "epoch": 0.6613675649635864, + "epoch": 0.913177869942875, "grad_norm": 0.0, - "learning_rate": 5.435171257357417e-06, - "loss": 0.6937, + "learning_rate": 3.927648742291879e-07, + "loss": 0.9059, "step": 23339 }, { - "epoch": 0.6613959024058489, + "epoch": 0.9132169966351045, "grad_norm": 0.0, - "learning_rate": 5.434354689923574e-06, - "loss": 0.7945, + "learning_rate": 3.924132854150231e-07, + "loss": 0.8716, "step": 23340 }, { - "epoch": 0.6614242398481113, + "epoch": 0.9132561233273339, "grad_norm": 0.0, - "learning_rate": 5.43353816094786e-06, - "loss": 0.8156, + "learning_rate": 3.9206185088666246e-07, + "loss": 1.0188, "step": 23341 }, { - "epoch": 0.6614525772903738, + "epoch": 0.9132952500195634, "grad_norm": 0.0, - "learning_rate": 5.432721670437147e-06, - "loss": 0.6832, + "learning_rate": 3.9171057064975035e-07, + "loss": 1.0767, "step": 23342 }, { - "epoch": 0.6614809147326363, + "epoch": 0.9133343767117927, "grad_norm": 0.0, - "learning_rate": 5.431905218398314e-06, - "loss": 0.8976, + "learning_rate": 3.913594447099245e-07, + "loss": 1.0139, "step": 23343 }, { - "epoch": 0.6615092521748986, + "epoch": 0.9133735034040222, "grad_norm": 0.0, - "learning_rate": 5.431088804838246e-06, - "loss": 0.9025, + "learning_rate": 3.9100847307282696e-07, + "loss": 0.9512, "step": 23344 }, { - "epoch": 0.6615375896171611, + "epoch": 0.9134126300962516, "grad_norm": 0.0, - "learning_rate": 5.430272429763808e-06, - "loss": 0.8161, + "learning_rate": 3.906576557440922e-07, + "loss": 0.9697, "step": 23345 }, { - "epoch": 0.6615659270594236, + "epoch": 0.9134517567884811, "grad_norm": 0.0, - "learning_rate": 5.429456093181883e-06, - "loss": 0.8357, + "learning_rate": 3.9030699272935455e-07, + "loss": 1.0096, "step": 23346 }, { - "epoch": 0.6615942645016861, + "epoch": 0.9134908834807105, "grad_norm": 0.0, - "learning_rate": 5.42863979509935e-06, - "loss": 0.8423, + "learning_rate": 3.8995648403424404e-07, + "loss": 0.944, "step": 23347 }, { - "epoch": 0.6616226019439485, + "epoch": 0.91353001017294, "grad_norm": 0.0, - "learning_rate": 5.427823535523078e-06, - "loss": 0.7613, + "learning_rate": 3.896061296643905e-07, + "loss": 0.9022, "step": 23348 }, { - "epoch": 0.661650939386211, + "epoch": 0.9135691368651694, "grad_norm": 0.0, - "learning_rate": 5.427007314459949e-06, - "loss": 0.7953, + "learning_rate": 3.892559296254217e-07, + "loss": 0.9228, "step": 23349 }, { - "epoch": 0.6616792768284735, + "epoch": 0.9136082635573989, "grad_norm": 0.0, - "learning_rate": 5.4261911319168335e-06, - "loss": 0.8389, + "learning_rate": 3.889058839229587e-07, + "loss": 0.8802, "step": 23350 }, { - "epoch": 0.6617076142707359, + "epoch": 0.9136473902496283, "grad_norm": 0.0, - "learning_rate": 5.425374987900607e-06, - "loss": 0.8153, + "learning_rate": 3.8855599256262475e-07, + "loss": 0.9066, "step": 23351 }, { - "epoch": 0.6617359517129984, + "epoch": 0.9136865169418578, "grad_norm": 0.0, - "learning_rate": 5.424558882418146e-06, - "loss": 0.9834, + "learning_rate": 3.8820625555003543e-07, + "loss": 0.9272, "step": 23352 }, { - "epoch": 0.6617642891552609, + "epoch": 0.9137256436340871, "grad_norm": 0.0, - "learning_rate": 5.423742815476325e-06, - "loss": 0.8808, + "learning_rate": 3.8785667289081066e-07, + "loss": 0.9028, "step": 23353 }, { - "epoch": 0.6617926265975234, + "epoch": 0.9137647703263166, "grad_norm": 0.0, - "learning_rate": 5.422926787082017e-06, - "loss": 0.8777, + "learning_rate": 3.8750724459056367e-07, + "loss": 0.8684, "step": 23354 }, { - "epoch": 0.6618209640397857, + "epoch": 0.913803897018546, "grad_norm": 0.0, - "learning_rate": 5.422110797242102e-06, - "loss": 0.8361, + "learning_rate": 3.8715797065490446e-07, + "loss": 0.9723, "step": 23355 }, { - "epoch": 0.6618493014820482, + "epoch": 0.9138430237107755, "grad_norm": 0.0, - "learning_rate": 5.4212948459634414e-06, - "loss": 0.8971, + "learning_rate": 3.86808851089443e-07, + "loss": 1.0732, "step": 23356 }, { - "epoch": 0.6618776389243107, + "epoch": 0.9138821504030049, "grad_norm": 0.0, - "learning_rate": 5.4204789332529195e-06, - "loss": 0.7741, + "learning_rate": 3.8645988589978477e-07, + "loss": 0.9828, "step": 23357 }, { - "epoch": 0.6619059763665731, + "epoch": 0.9139212770952344, "grad_norm": 0.0, - "learning_rate": 5.419663059117401e-06, - "loss": 0.7863, + "learning_rate": 3.8611107509153423e-07, + "loss": 0.9561, "step": 23358 }, { - "epoch": 0.6619343138088356, + "epoch": 0.9139604037874638, "grad_norm": 0.0, - "learning_rate": 5.418847223563761e-06, - "loss": 0.7833, + "learning_rate": 3.857624186702946e-07, + "loss": 0.9581, "step": 23359 }, { - "epoch": 0.6619626512510981, + "epoch": 0.9139995304796933, "grad_norm": 0.0, - "learning_rate": 5.418031426598875e-06, - "loss": 0.8676, + "learning_rate": 3.854139166416615e-07, + "loss": 1.0844, "step": 23360 }, { - "epoch": 0.6619909886933605, + "epoch": 0.9140386571719227, "grad_norm": 0.0, - "learning_rate": 5.417215668229609e-06, - "loss": 0.8774, + "learning_rate": 3.8506556901123373e-07, + "loss": 0.9275, "step": 23361 }, { - "epoch": 0.662019326135623, + "epoch": 0.9140777838641522, "grad_norm": 0.0, - "learning_rate": 5.416399948462836e-06, - "loss": 0.8564, + "learning_rate": 3.8471737578460453e-07, + "loss": 0.9398, "step": 23362 }, { - "epoch": 0.6620476635778855, + "epoch": 0.9141169105563816, "grad_norm": 0.0, - "learning_rate": 5.415584267305433e-06, - "loss": 0.765, + "learning_rate": 3.8436933696736734e-07, + "loss": 0.9051, "step": 23363 }, { - "epoch": 0.662076001020148, + "epoch": 0.914156037248611, "grad_norm": 0.0, - "learning_rate": 5.414768624764262e-06, - "loss": 0.9439, + "learning_rate": 3.840214525651076e-07, + "loss": 1.0973, "step": 23364 }, { - "epoch": 0.6621043384624103, + "epoch": 0.9141951639408404, "grad_norm": 0.0, - "learning_rate": 5.413953020846197e-06, - "loss": 0.8418, + "learning_rate": 3.8367372258341527e-07, + "loss": 0.9658, "step": 23365 }, { - "epoch": 0.6621326759046728, + "epoch": 0.9142342906330699, "grad_norm": 0.0, - "learning_rate": 5.413137455558111e-06, - "loss": 0.8265, + "learning_rate": 3.8332614702787043e-07, + "loss": 0.9086, "step": 23366 }, { - "epoch": 0.6621610133469353, + "epoch": 0.9142734173252993, "grad_norm": 0.0, - "learning_rate": 5.41232192890687e-06, - "loss": 0.8532, + "learning_rate": 3.829787259040596e-07, + "loss": 0.9568, "step": 23367 }, { - "epoch": 0.6621893507891977, + "epoch": 0.9143125440175287, "grad_norm": 0.0, - "learning_rate": 5.411506440899348e-06, - "loss": 0.8403, + "learning_rate": 3.826314592175584e-07, + "loss": 0.9201, "step": 23368 }, { - "epoch": 0.6622176882314602, + "epoch": 0.9143516707097582, "grad_norm": 0.0, - "learning_rate": 5.4106909915424075e-06, - "loss": 0.6882, + "learning_rate": 3.822843469739468e-07, + "loss": 1.0382, "step": 23369 }, { - "epoch": 0.6622460256737227, + "epoch": 0.9143907974019876, "grad_norm": 0.0, - "learning_rate": 5.409875580842925e-06, - "loss": 0.8047, + "learning_rate": 3.819373891787936e-07, + "loss": 0.9102, "step": 23370 }, { - "epoch": 0.6622743631159852, + "epoch": 0.9144299240942171, "grad_norm": 0.0, - "learning_rate": 5.409060208807761e-06, - "loss": 0.6572, + "learning_rate": 3.815905858376767e-07, + "loss": 0.7984, "step": 23371 }, { - "epoch": 0.6623027005582476, + "epoch": 0.9144690507864465, "grad_norm": 0.0, - "learning_rate": 5.408244875443789e-06, - "loss": 0.8237, + "learning_rate": 3.8124393695616047e-07, + "loss": 0.9283, "step": 23372 }, { - "epoch": 0.6623310380005101, + "epoch": 0.914508177478676, "grad_norm": 0.0, - "learning_rate": 5.407429580757879e-06, - "loss": 0.8918, + "learning_rate": 3.8089744253981596e-07, + "loss": 0.9096, "step": 23373 }, { - "epoch": 0.6623593754427726, + "epoch": 0.9145473041709054, "grad_norm": 0.0, - "learning_rate": 5.40661432475689e-06, - "loss": 0.9016, + "learning_rate": 3.805511025942032e-07, + "loss": 1.035, "step": 23374 }, { - "epoch": 0.662387712885035, + "epoch": 0.9145864308631348, "grad_norm": 0.0, - "learning_rate": 5.405799107447695e-06, - "loss": 0.8013, + "learning_rate": 3.802049171248856e-07, + "loss": 1.0087, "step": 23375 }, { - "epoch": 0.6624160503272974, + "epoch": 0.9146255575553642, "grad_norm": 0.0, - "learning_rate": 5.404983928837164e-06, - "loss": 0.9261, + "learning_rate": 3.7985888613742416e-07, + "loss": 0.8297, "step": 23376 }, { - "epoch": 0.6624443877695599, + "epoch": 0.9146646842475937, "grad_norm": 0.0, - "learning_rate": 5.404168788932156e-06, - "loss": 0.9099, + "learning_rate": 3.7951300963737445e-07, + "loss": 0.9085, "step": 23377 }, { - "epoch": 0.6624727252118224, + "epoch": 0.9147038109398231, "grad_norm": 0.0, - "learning_rate": 5.403353687739541e-06, - "loss": 0.8714, + "learning_rate": 3.7916728763028874e-07, + "loss": 0.8847, "step": 23378 }, { - "epoch": 0.6625010626540848, + "epoch": 0.9147429376320526, "grad_norm": 0.0, - "learning_rate": 5.402538625266184e-06, - "loss": 0.8234, + "learning_rate": 3.788217201217226e-07, + "loss": 1.0375, "step": 23379 }, { - "epoch": 0.6625294000963473, + "epoch": 0.914782064324282, "grad_norm": 0.0, - "learning_rate": 5.4017236015189515e-06, - "loss": 0.8724, + "learning_rate": 3.784763071172226e-07, + "loss": 0.9994, "step": 23380 }, { - "epoch": 0.6625577375386098, + "epoch": 0.9148211910165115, "grad_norm": 0.0, - "learning_rate": 5.400908616504713e-06, - "loss": 0.8755, + "learning_rate": 3.781310486223377e-07, + "loss": 0.8015, "step": 23381 }, { - "epoch": 0.6625860749808722, + "epoch": 0.9148603177087409, "grad_norm": 0.0, - "learning_rate": 5.400093670230324e-06, - "loss": 0.7884, + "learning_rate": 3.7778594464261023e-07, + "loss": 0.9963, "step": 23382 }, { - "epoch": 0.6626144124231347, + "epoch": 0.9148994444009704, "grad_norm": 0.0, - "learning_rate": 5.399278762702655e-06, - "loss": 0.857, + "learning_rate": 3.7744099518358447e-07, + "loss": 0.8451, "step": 23383 }, { - "epoch": 0.6626427498653972, + "epoch": 0.9149385710931998, "grad_norm": 0.0, - "learning_rate": 5.398463893928574e-06, - "loss": 0.7815, + "learning_rate": 3.770962002507972e-07, + "loss": 1.0168, "step": 23384 }, { - "epoch": 0.6626710873076596, + "epoch": 0.9149776977854293, "grad_norm": 0.0, - "learning_rate": 5.397649063914936e-06, - "loss": 0.8816, + "learning_rate": 3.7675155984978726e-07, + "loss": 0.8663, "step": 23385 }, { - "epoch": 0.662699424749922, + "epoch": 0.9150168244776586, "grad_norm": 0.0, - "learning_rate": 5.396834272668614e-06, - "loss": 0.8144, + "learning_rate": 3.764070739860881e-07, + "loss": 0.9877, "step": 23386 }, { - "epoch": 0.6627277621921845, + "epoch": 0.9150559511698881, "grad_norm": 0.0, - "learning_rate": 5.39601952019646e-06, - "loss": 0.8257, + "learning_rate": 3.7606274266523415e-07, + "loss": 0.907, "step": 23387 }, { - "epoch": 0.662756099634447, + "epoch": 0.9150950778621175, "grad_norm": 0.0, - "learning_rate": 5.395204806505345e-06, - "loss": 0.8833, + "learning_rate": 3.7571856589275093e-07, + "loss": 1.0015, "step": 23388 }, { - "epoch": 0.6627844370767094, + "epoch": 0.915134204554347, "grad_norm": 0.0, - "learning_rate": 5.394390131602133e-06, - "loss": 0.7748, + "learning_rate": 3.7537454367416847e-07, + "loss": 0.8974, "step": 23389 }, { - "epoch": 0.6628127745189719, + "epoch": 0.9151733312465764, "grad_norm": 0.0, - "learning_rate": 5.393575495493679e-06, - "loss": 0.8779, + "learning_rate": 3.750306760150113e-07, + "loss": 0.8885, "step": 23390 }, { - "epoch": 0.6628411119612344, + "epoch": 0.9152124579388059, "grad_norm": 0.0, - "learning_rate": 5.392760898186851e-06, - "loss": 0.8441, + "learning_rate": 3.746869629207994e-07, + "loss": 0.9936, "step": 23391 }, { - "epoch": 0.6628694494034968, + "epoch": 0.9152515846310353, "grad_norm": 0.0, - "learning_rate": 5.391946339688506e-06, - "loss": 0.8673, + "learning_rate": 3.7434340439705396e-07, + "loss": 0.9582, "step": 23392 }, { - "epoch": 0.6628977868457593, + "epoch": 0.9152907113232648, "grad_norm": 0.0, - "learning_rate": 5.3911318200055105e-06, - "loss": 0.8019, + "learning_rate": 3.7400000044929273e-07, + "loss": 0.9761, "step": 23393 }, { - "epoch": 0.6629261242880218, + "epoch": 0.9153298380154942, "grad_norm": 0.0, - "learning_rate": 5.390317339144726e-06, - "loss": 0.8647, + "learning_rate": 3.736567510830291e-07, + "loss": 1.073, "step": 23394 }, { - "epoch": 0.6629544617302843, + "epoch": 0.9153689647077236, "grad_norm": 0.0, - "learning_rate": 5.389502897113006e-06, - "loss": 0.7612, + "learning_rate": 3.7331365630377537e-07, + "loss": 0.9568, "step": 23395 }, { - "epoch": 0.6629827991725467, + "epoch": 0.915408091399953, "grad_norm": 0.0, - "learning_rate": 5.388688493917216e-06, - "loss": 0.922, + "learning_rate": 3.729707161170415e-07, + "loss": 0.9865, "step": 23396 }, { - "epoch": 0.6630111366148091, + "epoch": 0.9154472180921824, "grad_norm": 0.0, - "learning_rate": 5.387874129564219e-06, - "loss": 0.808, + "learning_rate": 3.726279305283331e-07, + "loss": 0.9327, "step": 23397 }, { - "epoch": 0.6630394740570716, + "epoch": 0.9154863447844119, "grad_norm": 0.0, - "learning_rate": 5.387059804060866e-06, - "loss": 0.8276, + "learning_rate": 3.722852995431592e-07, + "loss": 1.0692, "step": 23398 }, { - "epoch": 0.663067811499334, + "epoch": 0.9155254714766413, "grad_norm": 0.0, - "learning_rate": 5.386245517414026e-06, - "loss": 0.6897, + "learning_rate": 3.719428231670175e-07, + "loss": 1.1176, "step": 23399 }, { - "epoch": 0.6630961489415965, + "epoch": 0.9155645981688708, "grad_norm": 0.0, - "learning_rate": 5.38543126963055e-06, - "loss": 0.7312, + "learning_rate": 3.716005014054103e-07, + "loss": 0.9748, "step": 23400 }, { - "epoch": 0.663124486383859, + "epoch": 0.9156037248611002, "grad_norm": 0.0, - "learning_rate": 5.3846170607172985e-06, - "loss": 0.7778, + "learning_rate": 3.712583342638332e-07, + "loss": 0.9003, "step": 23401 }, { - "epoch": 0.6631528238261215, + "epoch": 0.9156428515533297, "grad_norm": 0.0, - "learning_rate": 5.383802890681136e-06, - "loss": 0.9092, + "learning_rate": 3.709163217477807e-07, + "loss": 1.0242, "step": 23402 }, { - "epoch": 0.6631811612683839, + "epoch": 0.9156819782455591, "grad_norm": 0.0, - "learning_rate": 5.382988759528912e-06, - "loss": 0.8226, + "learning_rate": 3.705744638627473e-07, + "loss": 0.9986, "step": 23403 }, { - "epoch": 0.6632094987106464, + "epoch": 0.9157211049377886, "grad_norm": 0.0, - "learning_rate": 5.382174667267488e-06, - "loss": 0.8541, + "learning_rate": 3.702327606142231e-07, + "loss": 1.0388, "step": 23404 }, { - "epoch": 0.6632378361529089, + "epoch": 0.915760231630018, "grad_norm": 0.0, - "learning_rate": 5.381360613903722e-06, - "loss": 0.8768, + "learning_rate": 3.698912120076914e-07, + "loss": 0.8084, "step": 23405 }, { - "epoch": 0.6632661735951713, + "epoch": 0.9157993583222475, "grad_norm": 0.0, - "learning_rate": 5.380546599444471e-06, - "loss": 0.7639, + "learning_rate": 3.695498180486412e-07, + "loss": 0.902, "step": 23406 }, { - "epoch": 0.6632945110374338, + "epoch": 0.9158384850144768, "grad_norm": 0.0, - "learning_rate": 5.379732623896594e-06, - "loss": 0.8474, + "learning_rate": 3.692085787425526e-07, + "loss": 0.9641, "step": 23407 }, { - "epoch": 0.6633228484796962, + "epoch": 0.9158776117067063, "grad_norm": 0.0, - "learning_rate": 5.378918687266942e-06, - "loss": 0.8709, + "learning_rate": 3.688674940949066e-07, + "loss": 0.8637, "step": 23408 }, { - "epoch": 0.6633511859219586, + "epoch": 0.9159167383989357, "grad_norm": 0.0, - "learning_rate": 5.378104789562373e-06, - "loss": 0.8562, + "learning_rate": 3.685265641111802e-07, + "loss": 1.0638, "step": 23409 }, { - "epoch": 0.6633795233642211, + "epoch": 0.9159558650911652, "grad_norm": 0.0, - "learning_rate": 5.377290930789749e-06, - "loss": 0.8369, + "learning_rate": 3.681857887968476e-07, + "loss": 0.8258, "step": 23410 }, { - "epoch": 0.6634078608064836, + "epoch": 0.9159949917833946, "grad_norm": 0.0, - "learning_rate": 5.376477110955915e-06, - "loss": 0.7971, + "learning_rate": 3.678451681573825e-07, + "loss": 1.016, "step": 23411 }, { - "epoch": 0.6634361982487461, + "epoch": 0.9160341184756241, "grad_norm": 0.0, - "learning_rate": 5.375663330067731e-06, - "loss": 0.7934, + "learning_rate": 3.675047021982547e-07, + "loss": 0.9604, "step": 23412 }, { - "epoch": 0.6634645356910085, + "epoch": 0.9160732451678535, "grad_norm": 0.0, - "learning_rate": 5.374849588132056e-06, - "loss": 0.7704, + "learning_rate": 3.6716439092493007e-07, + "loss": 0.9963, "step": 23413 }, { - "epoch": 0.663492873133271, + "epoch": 0.916112371860083, "grad_norm": 0.0, - "learning_rate": 5.374035885155737e-06, - "loss": 0.7813, + "learning_rate": 3.668242343428763e-07, + "loss": 1.0038, "step": 23414 }, { - "epoch": 0.6635212105755335, + "epoch": 0.9161514985523124, "grad_norm": 0.0, - "learning_rate": 5.373222221145635e-06, - "loss": 0.8813, + "learning_rate": 3.6648423245755125e-07, + "loss": 0.9799, "step": 23415 }, { - "epoch": 0.6635495480177959, + "epoch": 0.9161906252445419, "grad_norm": 0.0, - "learning_rate": 5.372408596108598e-06, - "loss": 0.8718, + "learning_rate": 3.6614438527442067e-07, + "loss": 0.8788, "step": 23416 }, { - "epoch": 0.6635778854600584, + "epoch": 0.9162297519367713, "grad_norm": 0.0, - "learning_rate": 5.37159501005148e-06, - "loss": 0.8144, + "learning_rate": 3.658046927989389e-07, + "loss": 0.8735, "step": 23417 }, { - "epoch": 0.6636062229023209, + "epoch": 0.9162688786290007, "grad_norm": 0.0, - "learning_rate": 5.370781462981136e-06, - "loss": 0.7909, + "learning_rate": 3.6546515503656176e-07, + "loss": 1.0758, "step": 23418 }, { - "epoch": 0.6636345603445833, + "epoch": 0.9163080053212301, "grad_norm": 0.0, - "learning_rate": 5.36996795490442e-06, - "loss": 0.9869, + "learning_rate": 3.6512577199273924e-07, + "loss": 0.8432, "step": 23419 }, { - "epoch": 0.6636628977868457, + "epoch": 0.9163471320134596, "grad_norm": 0.0, - "learning_rate": 5.369154485828187e-06, - "loss": 0.8407, + "learning_rate": 3.64786543672927e-07, + "loss": 0.9166, "step": 23420 }, { - "epoch": 0.6636912352291082, + "epoch": 0.916386258705689, "grad_norm": 0.0, - "learning_rate": 5.368341055759281e-06, - "loss": 0.8581, + "learning_rate": 3.6444747008256733e-07, + "loss": 0.8458, "step": 23421 }, { - "epoch": 0.6637195726713707, + "epoch": 0.9164253853979185, "grad_norm": 0.0, - "learning_rate": 5.367527664704557e-06, - "loss": 0.6804, + "learning_rate": 3.641085512271081e-07, + "loss": 0.8047, "step": 23422 }, { - "epoch": 0.6637479101136331, + "epoch": 0.9164645120901479, "grad_norm": 0.0, - "learning_rate": 5.366714312670873e-06, - "loss": 0.7005, + "learning_rate": 3.637697871119894e-07, + "loss": 0.9104, "step": 23423 }, { - "epoch": 0.6637762475558956, + "epoch": 0.9165036387823773, "grad_norm": 0.0, - "learning_rate": 5.3659009996650704e-06, - "loss": 0.8829, + "learning_rate": 3.6343117774265467e-07, + "loss": 0.9744, "step": 23424 }, { - "epoch": 0.6638045849981581, + "epoch": 0.9165427654746068, "grad_norm": 0.0, - "learning_rate": 5.3650877256940045e-06, - "loss": 0.9654, + "learning_rate": 3.630927231245385e-07, + "loss": 0.9268, "step": 23425 }, { - "epoch": 0.6638329224404206, + "epoch": 0.9165818921668362, "grad_norm": 0.0, - "learning_rate": 5.36427449076453e-06, - "loss": 0.767, + "learning_rate": 3.6275442326307974e-07, + "loss": 0.9695, "step": 23426 }, { - "epoch": 0.663861259882683, + "epoch": 0.9166210188590657, "grad_norm": 0.0, - "learning_rate": 5.36346129488349e-06, - "loss": 0.8701, + "learning_rate": 3.624162781637064e-07, + "loss": 1.0593, "step": 23427 }, { - "epoch": 0.6638895973249455, + "epoch": 0.916660145551295, "grad_norm": 0.0, - "learning_rate": 5.36264813805774e-06, - "loss": 0.7912, + "learning_rate": 3.6207828783185184e-07, + "loss": 0.9297, "step": 23428 }, { - "epoch": 0.663917934767208, + "epoch": 0.9166992722435245, "grad_norm": 0.0, - "learning_rate": 5.3618350202941225e-06, - "loss": 0.9457, + "learning_rate": 3.617404522729451e-07, + "loss": 0.9387, "step": 23429 }, { - "epoch": 0.6639462722094703, + "epoch": 0.9167383989357539, "grad_norm": 0.0, - "learning_rate": 5.361021941599492e-06, - "loss": 0.7649, + "learning_rate": 3.6140277149240623e-07, + "loss": 1.0067, "step": 23430 }, { - "epoch": 0.6639746096517328, + "epoch": 0.9167775256279834, "grad_norm": 0.0, - "learning_rate": 5.3602089019806955e-06, - "loss": 0.7897, + "learning_rate": 3.6106524549566203e-07, + "loss": 1.0128, "step": 23431 }, { - "epoch": 0.6640029470939953, + "epoch": 0.9168166523202128, "grad_norm": 0.0, - "learning_rate": 5.359395901444583e-06, - "loss": 0.831, + "learning_rate": 3.607278742881326e-07, + "loss": 0.9461, "step": 23432 }, { - "epoch": 0.6640312845362577, + "epoch": 0.9168557790124423, "grad_norm": 0.0, - "learning_rate": 5.358582939998001e-06, - "loss": 0.77, + "learning_rate": 3.6039065787523365e-07, + "loss": 0.9423, "step": 23433 }, { - "epoch": 0.6640596219785202, + "epoch": 0.9168949057046717, "grad_norm": 0.0, - "learning_rate": 5.357770017647803e-06, - "loss": 0.8407, + "learning_rate": 3.600535962623819e-07, + "loss": 0.9315, "step": 23434 }, { - "epoch": 0.6640879594207827, + "epoch": 0.9169340323969012, "grad_norm": 0.0, - "learning_rate": 5.356957134400829e-06, - "loss": 0.8631, + "learning_rate": 3.5971668945499084e-07, + "loss": 0.9306, "step": 23435 }, { - "epoch": 0.6641162968630452, + "epoch": 0.9169731590891306, "grad_norm": 0.0, - "learning_rate": 5.356144290263932e-06, - "loss": 0.8835, + "learning_rate": 3.5937993745846946e-07, + "loss": 0.9206, "step": 23436 }, { - "epoch": 0.6641446343053076, + "epoch": 0.9170122857813601, "grad_norm": 0.0, - "learning_rate": 5.355331485243953e-06, - "loss": 0.8442, + "learning_rate": 3.590433402782245e-07, + "loss": 0.9058, "step": 23437 }, { - "epoch": 0.6641729717475701, + "epoch": 0.9170514124735895, "grad_norm": 0.0, - "learning_rate": 5.354518719347741e-06, - "loss": 0.8319, + "learning_rate": 3.5870689791966394e-07, + "loss": 0.9521, "step": 23438 }, { - "epoch": 0.6642013091898326, + "epoch": 0.917090539165819, "grad_norm": 0.0, - "learning_rate": 5.353705992582147e-06, - "loss": 0.8512, + "learning_rate": 3.583706103881901e-07, + "loss": 0.9733, "step": 23439 }, { - "epoch": 0.6642296466320949, + "epoch": 0.9171296658580483, "grad_norm": 0.0, - "learning_rate": 5.352893304954008e-06, - "loss": 0.8772, + "learning_rate": 3.580344776892009e-07, + "loss": 0.9136, "step": 23440 }, { - "epoch": 0.6642579840743574, + "epoch": 0.9171687925502778, "grad_norm": 0.0, - "learning_rate": 5.352080656470175e-06, - "loss": 0.8854, + "learning_rate": 3.5769849982809746e-07, + "loss": 0.877, "step": 23441 }, { - "epoch": 0.6642863215166199, + "epoch": 0.9172079192425072, "grad_norm": 0.0, - "learning_rate": 5.351268047137497e-06, - "loss": 0.8497, + "learning_rate": 3.5736267681027117e-07, + "loss": 0.9071, "step": 23442 }, { - "epoch": 0.6643146589588824, + "epoch": 0.9172470459347367, "grad_norm": 0.0, - "learning_rate": 5.35045547696281e-06, - "loss": 0.8559, + "learning_rate": 3.5702700864112095e-07, + "loss": 0.9901, "step": 23443 }, { - "epoch": 0.6643429964011448, + "epoch": 0.9172861726269661, "grad_norm": 0.0, - "learning_rate": 5.349642945952964e-06, - "loss": 0.8813, + "learning_rate": 3.566914953260314e-07, + "loss": 0.9427, "step": 23444 }, { - "epoch": 0.6643713338434073, + "epoch": 0.9173252993191956, "grad_norm": 0.0, - "learning_rate": 5.348830454114802e-06, - "loss": 0.8455, + "learning_rate": 3.563561368703938e-07, + "loss": 0.9699, "step": 23445 }, { - "epoch": 0.6643996712856698, + "epoch": 0.917364426011425, "grad_norm": 0.0, - "learning_rate": 5.348018001455167e-06, - "loss": 0.8819, + "learning_rate": 3.560209332795894e-07, + "loss": 0.9138, "step": 23446 }, { - "epoch": 0.6644280087279322, + "epoch": 0.9174035527036545, "grad_norm": 0.0, - "learning_rate": 5.347205587980909e-06, - "loss": 0.8011, + "learning_rate": 3.556858845590083e-07, + "loss": 0.919, "step": 23447 }, { - "epoch": 0.6644563461701947, + "epoch": 0.9174426793958839, "grad_norm": 0.0, - "learning_rate": 5.3463932136988615e-06, - "loss": 0.8306, + "learning_rate": 3.55350990714024e-07, + "loss": 0.9805, "step": 23448 }, { - "epoch": 0.6644846836124572, + "epoch": 0.9174818060881134, "grad_norm": 0.0, - "learning_rate": 5.345580878615877e-06, - "loss": 0.8757, + "learning_rate": 3.5501625175001995e-07, + "loss": 1.0131, "step": 23449 }, { - "epoch": 0.6645130210547197, + "epoch": 0.9175209327803427, "grad_norm": 0.0, - "learning_rate": 5.344768582738789e-06, - "loss": 0.8369, + "learning_rate": 3.5468166767236746e-07, + "loss": 0.8377, "step": 23450 }, { - "epoch": 0.664541358496982, + "epoch": 0.9175600594725722, "grad_norm": 0.0, - "learning_rate": 5.343956326074442e-06, - "loss": 0.7826, + "learning_rate": 3.5434723848644105e-07, + "loss": 0.9454, "step": 23451 }, { - "epoch": 0.6645696959392445, + "epoch": 0.9175991861648016, "grad_norm": 0.0, - "learning_rate": 5.343144108629685e-06, - "loss": 0.8247, + "learning_rate": 3.5401296419761086e-07, + "loss": 0.8293, "step": 23452 }, { - "epoch": 0.664598033381507, + "epoch": 0.917638312857031, "grad_norm": 0.0, - "learning_rate": 5.342331930411352e-06, - "loss": 0.8209, + "learning_rate": 3.5367884481124715e-07, + "loss": 1.0232, "step": 23453 }, { - "epoch": 0.6646263708237694, + "epoch": 0.9176774395492605, "grad_norm": 0.0, - "learning_rate": 5.341519791426285e-06, - "loss": 0.8594, + "learning_rate": 3.533448803327122e-07, + "loss": 0.8145, "step": 23454 }, { - "epoch": 0.6646547082660319, + "epoch": 0.9177165662414899, "grad_norm": 0.0, - "learning_rate": 5.340707691681332e-06, - "loss": 0.8231, + "learning_rate": 3.5301107076737064e-07, + "loss": 0.8745, "step": 23455 }, { - "epoch": 0.6646830457082944, + "epoch": 0.9177556929337194, "grad_norm": 0.0, - "learning_rate": 5.339895631183323e-06, - "loss": 0.8827, + "learning_rate": 3.526774161205826e-07, + "loss": 0.9559, "step": 23456 }, { - "epoch": 0.6647113831505568, + "epoch": 0.9177948196259488, "grad_norm": 0.0, - "learning_rate": 5.339083609939104e-06, - "loss": 0.8614, + "learning_rate": 3.523439163977083e-07, + "loss": 1.0196, "step": 23457 }, { - "epoch": 0.6647397205928193, + "epoch": 0.9178339463181783, "grad_norm": 0.0, - "learning_rate": 5.338271627955515e-06, - "loss": 0.8477, + "learning_rate": 3.5201057160410003e-07, + "loss": 0.7997, "step": 23458 }, { - "epoch": 0.6647680580350818, + "epoch": 0.9178730730104077, "grad_norm": 0.0, - "learning_rate": 5.337459685239395e-06, - "loss": 0.8971, + "learning_rate": 3.5167738174511245e-07, + "loss": 1.078, "step": 23459 }, { - "epoch": 0.6647963954773443, + "epoch": 0.9179121997026372, "grad_norm": 0.0, - "learning_rate": 5.336647781797586e-06, - "loss": 0.9438, + "learning_rate": 3.5134434682609573e-07, + "loss": 1.0104, "step": 23460 }, { - "epoch": 0.6648247329196066, + "epoch": 0.9179513263948665, "grad_norm": 0.0, - "learning_rate": 5.335835917636923e-06, - "loss": 0.7644, + "learning_rate": 3.5101146685240005e-07, + "loss": 0.8406, "step": 23461 }, { - "epoch": 0.6648530703618691, + "epoch": 0.917990453087096, "grad_norm": 0.0, - "learning_rate": 5.335024092764243e-06, - "loss": 0.9191, + "learning_rate": 3.506787418293678e-07, + "loss": 1.0051, "step": 23462 }, { - "epoch": 0.6648814078041316, + "epoch": 0.9180295797793254, "grad_norm": 0.0, - "learning_rate": 5.334212307186394e-06, - "loss": 0.8723, + "learning_rate": 3.503461717623446e-07, + "loss": 1.0674, "step": 23463 }, { - "epoch": 0.664909745246394, + "epoch": 0.9180687064715549, "grad_norm": 0.0, - "learning_rate": 5.333400560910202e-06, - "loss": 0.7869, + "learning_rate": 3.500137566566686e-07, + "loss": 0.8768, "step": 23464 }, { - "epoch": 0.6649380826886565, + "epoch": 0.9181078331637843, "grad_norm": 0.0, - "learning_rate": 5.332588853942515e-06, - "loss": 0.9605, + "learning_rate": 3.49681496517682e-07, + "loss": 0.9545, "step": 23465 }, { - "epoch": 0.664966420130919, + "epoch": 0.9181469598560138, "grad_norm": 0.0, - "learning_rate": 5.33177718629016e-06, - "loss": 0.8151, + "learning_rate": 3.493493913507162e-07, + "loss": 1.0174, "step": 23466 }, { - "epoch": 0.6649947575731815, + "epoch": 0.9181860865482432, "grad_norm": 0.0, - "learning_rate": 5.3309655579599795e-06, - "loss": 0.7334, + "learning_rate": 3.490174411611069e-07, + "loss": 0.9183, "step": 23467 }, { - "epoch": 0.6650230950154439, + "epoch": 0.9182252132404727, "grad_norm": 0.0, - "learning_rate": 5.330153968958811e-06, - "loss": 0.9053, + "learning_rate": 3.486856459541843e-07, + "loss": 0.8568, "step": 23468 }, { - "epoch": 0.6650514324577064, + "epoch": 0.9182643399327021, "grad_norm": 0.0, - "learning_rate": 5.329342419293488e-06, - "loss": 0.8336, + "learning_rate": 3.4835400573527525e-07, + "loss": 0.9152, "step": 23469 }, { - "epoch": 0.6650797698999689, + "epoch": 0.9183034666249316, "grad_norm": 0.0, - "learning_rate": 5.3285309089708545e-06, - "loss": 0.8475, + "learning_rate": 3.4802252050970763e-07, + "loss": 1.0983, "step": 23470 }, { - "epoch": 0.6651081073422312, + "epoch": 0.918342593317161, "grad_norm": 0.0, - "learning_rate": 5.327719437997734e-06, - "loss": 0.8822, + "learning_rate": 3.4769119028280396e-07, + "loss": 0.8281, "step": 23471 }, { - "epoch": 0.6651364447844937, + "epoch": 0.9183817200093904, "grad_norm": 0.0, - "learning_rate": 5.3269080063809685e-06, - "loss": 0.7067, + "learning_rate": 3.473600150598844e-07, + "loss": 1.0073, "step": 23472 }, { - "epoch": 0.6651647822267562, + "epoch": 0.9184208467016198, "grad_norm": 0.0, - "learning_rate": 5.326096614127396e-06, - "loss": 0.8904, + "learning_rate": 3.47028994846268e-07, + "loss": 0.9807, "step": 23473 }, { - "epoch": 0.6651931196690187, + "epoch": 0.9184599733938493, "grad_norm": 0.0, - "learning_rate": 5.325285261243843e-06, - "loss": 0.8453, + "learning_rate": 3.4669812964727043e-07, + "loss": 1.0476, "step": 23474 }, { - "epoch": 0.6652214571112811, + "epoch": 0.9184991000860787, "grad_norm": 0.0, - "learning_rate": 5.324473947737149e-06, - "loss": 0.7978, + "learning_rate": 3.463674194682032e-07, + "loss": 0.9727, "step": 23475 }, { - "epoch": 0.6652497945535436, + "epoch": 0.9185382267783082, "grad_norm": 0.0, - "learning_rate": 5.3236626736141505e-06, - "loss": 0.8782, + "learning_rate": 3.4603686431437855e-07, + "loss": 1.0685, "step": 23476 }, { - "epoch": 0.6652781319958061, + "epoch": 0.9185773534705376, "grad_norm": 0.0, - "learning_rate": 5.322851438881673e-06, - "loss": 0.8217, + "learning_rate": 3.4570646419110564e-07, + "loss": 0.8336, "step": 23477 }, { - "epoch": 0.6653064694380685, + "epoch": 0.9186164801627671, "grad_norm": 0.0, - "learning_rate": 5.32204024354656e-06, - "loss": 0.906, + "learning_rate": 3.4537621910369026e-07, + "loss": 0.9073, "step": 23478 }, { - "epoch": 0.665334806880331, + "epoch": 0.9186556068549965, "grad_norm": 0.0, - "learning_rate": 5.321229087615635e-06, - "loss": 0.799, + "learning_rate": 3.450461290574336e-07, + "loss": 0.8385, "step": 23479 }, { - "epoch": 0.6653631443225935, + "epoch": 0.918694733547226, "grad_norm": 0.0, - "learning_rate": 5.320417971095735e-06, - "loss": 0.87, + "learning_rate": 3.4471619405763825e-07, + "loss": 0.9656, "step": 23480 }, { - "epoch": 0.6653914817648559, + "epoch": 0.9187338602394554, "grad_norm": 0.0, - "learning_rate": 5.319606893993692e-06, - "loss": 0.9475, + "learning_rate": 3.4438641410960203e-07, + "loss": 0.9361, "step": 23481 }, { - "epoch": 0.6654198192071183, + "epoch": 0.9187729869316847, "grad_norm": 0.0, - "learning_rate": 5.318795856316336e-06, - "loss": 0.8925, + "learning_rate": 3.4405678921861976e-07, + "loss": 0.9573, "step": 23482 }, { - "epoch": 0.6654481566493808, + "epoch": 0.9188121136239142, "grad_norm": 0.0, - "learning_rate": 5.317984858070504e-06, - "loss": 0.8215, + "learning_rate": 3.4372731938998703e-07, + "loss": 0.9549, "step": 23483 }, { - "epoch": 0.6654764940916433, + "epoch": 0.9188512403161436, "grad_norm": 0.0, - "learning_rate": 5.3171738992630266e-06, - "loss": 0.8446, + "learning_rate": 3.433980046289942e-07, + "loss": 0.9083, "step": 23484 }, { - "epoch": 0.6655048315339057, + "epoch": 0.9188903670083731, "grad_norm": 0.0, - "learning_rate": 5.316362979900729e-06, - "loss": 0.6868, + "learning_rate": 3.430688449409281e-07, + "loss": 0.9789, "step": 23485 }, { - "epoch": 0.6655331689761682, + "epoch": 0.9189294937006025, "grad_norm": 0.0, - "learning_rate": 5.315552099990449e-06, - "loss": 0.8663, + "learning_rate": 3.4273984033107443e-07, + "loss": 0.9712, "step": 23486 }, { - "epoch": 0.6655615064184307, + "epoch": 0.918968620392832, "grad_norm": 0.0, - "learning_rate": 5.314741259539009e-06, - "loss": 0.9344, + "learning_rate": 3.4241099080471904e-07, + "loss": 1.0545, "step": 23487 }, { - "epoch": 0.6655898438606931, + "epoch": 0.9190077470850614, "grad_norm": 0.0, - "learning_rate": 5.313930458553242e-06, - "loss": 0.9315, + "learning_rate": 3.4208229636714216e-07, + "loss": 0.8659, "step": 23488 }, { - "epoch": 0.6656181813029556, + "epoch": 0.9190468737772909, "grad_norm": 0.0, - "learning_rate": 5.313119697039985e-06, - "loss": 0.7509, + "learning_rate": 3.417537570236207e-07, + "loss": 0.9899, "step": 23489 }, { - "epoch": 0.6656465187452181, + "epoch": 0.9190860004695203, "grad_norm": 0.0, - "learning_rate": 5.3123089750060554e-06, - "loss": 0.8989, + "learning_rate": 3.4142537277943146e-07, + "loss": 0.9702, "step": 23490 }, { - "epoch": 0.6656748561874806, + "epoch": 0.9191251271617498, "grad_norm": 0.0, - "learning_rate": 5.311498292458291e-06, - "loss": 0.8138, + "learning_rate": 3.410971436398469e-07, + "loss": 1.0694, "step": 23491 }, { - "epoch": 0.665703193629743, + "epoch": 0.9191642538539792, "grad_norm": 0.0, - "learning_rate": 5.310687649403518e-06, - "loss": 0.8266, + "learning_rate": 3.4076906961014066e-07, + "loss": 1.0059, "step": 23492 }, { - "epoch": 0.6657315310720054, + "epoch": 0.9192033805462086, "grad_norm": 0.0, - "learning_rate": 5.309877045848561e-06, - "loss": 0.8751, + "learning_rate": 3.4044115069557847e-07, + "loss": 1.073, "step": 23493 }, { - "epoch": 0.6657598685142679, + "epoch": 0.919242507238438, "grad_norm": 0.0, - "learning_rate": 5.309066481800253e-06, - "loss": 0.8394, + "learning_rate": 3.401133869014284e-07, + "loss": 0.9285, "step": 23494 }, { - "epoch": 0.6657882059565303, + "epoch": 0.9192816339306675, "grad_norm": 0.0, - "learning_rate": 5.308255957265418e-06, - "loss": 0.8203, + "learning_rate": 3.3978577823295056e-07, + "loss": 1.0181, "step": 23495 }, { - "epoch": 0.6658165433987928, + "epoch": 0.9193207606228969, "grad_norm": 0.0, - "learning_rate": 5.307445472250887e-06, - "loss": 0.8261, + "learning_rate": 3.394583246954097e-07, + "loss": 0.9625, "step": 23496 }, { - "epoch": 0.6658448808410553, + "epoch": 0.9193598873151264, "grad_norm": 0.0, - "learning_rate": 5.306635026763489e-06, - "loss": 0.9204, + "learning_rate": 3.391310262940628e-07, + "loss": 0.8813, "step": 23497 }, { - "epoch": 0.6658732182833178, + "epoch": 0.9193990140073558, "grad_norm": 0.0, - "learning_rate": 5.305824620810043e-06, - "loss": 0.7321, + "learning_rate": 3.3880388303416666e-07, + "loss": 1.0591, "step": 23498 }, { - "epoch": 0.6659015557255802, + "epoch": 0.9194381406995853, "grad_norm": 0.0, - "learning_rate": 5.305014254397378e-06, - "loss": 0.8232, + "learning_rate": 3.384768949209727e-07, + "loss": 0.8522, "step": 23499 }, { - "epoch": 0.6659298931678427, + "epoch": 0.9194772673918147, "grad_norm": 0.0, - "learning_rate": 5.304203927532327e-06, - "loss": 0.7622, + "learning_rate": 3.3815006195973333e-07, + "loss": 1.0097, "step": 23500 }, { - "epoch": 0.6659582306101052, + "epoch": 0.9195163940840442, "grad_norm": 0.0, - "learning_rate": 5.3033936402217055e-06, - "loss": 0.8122, + "learning_rate": 3.378233841556966e-07, + "loss": 1.0195, "step": 23501 }, { - "epoch": 0.6659865680523676, + "epoch": 0.9195555207762736, "grad_norm": 0.0, - "learning_rate": 5.302583392472347e-06, - "loss": 0.8891, + "learning_rate": 3.3749686151411056e-07, + "loss": 1.0175, "step": 23502 }, { - "epoch": 0.66601490549463, + "epoch": 0.9195946474685031, "grad_norm": 0.0, - "learning_rate": 5.301773184291069e-06, - "loss": 0.8072, + "learning_rate": 3.3717049404021653e-07, + "loss": 0.9975, "step": 23503 }, { - "epoch": 0.6660432429368925, + "epoch": 0.9196337741607324, "grad_norm": 0.0, - "learning_rate": 5.300963015684701e-06, - "loss": 0.8333, + "learning_rate": 3.368442817392548e-07, + "loss": 0.9698, "step": 23504 }, { - "epoch": 0.6660715803791549, + "epoch": 0.9196729008529619, "grad_norm": 0.0, - "learning_rate": 5.300152886660068e-06, - "loss": 0.726, + "learning_rate": 3.365182246164667e-07, + "loss": 0.9247, "step": 23505 }, { - "epoch": 0.6660999178214174, + "epoch": 0.9197120275451913, "grad_norm": 0.0, - "learning_rate": 5.299342797223992e-06, - "loss": 0.8304, + "learning_rate": 3.361923226770869e-07, + "loss": 1.0099, "step": 23506 }, { - "epoch": 0.6661282552636799, + "epoch": 0.9197511542374208, "grad_norm": 0.0, - "learning_rate": 5.298532747383295e-06, - "loss": 0.7721, + "learning_rate": 3.3586657592634797e-07, + "loss": 0.9173, "step": 23507 }, { - "epoch": 0.6661565927059424, + "epoch": 0.9197902809296502, "grad_norm": 0.0, - "learning_rate": 5.297722737144803e-06, - "loss": 0.821, + "learning_rate": 3.3554098436948347e-07, + "loss": 0.747, "step": 23508 }, { - "epoch": 0.6661849301482048, + "epoch": 0.9198294076218796, "grad_norm": 0.0, - "learning_rate": 5.296912766515338e-06, - "loss": 0.7915, + "learning_rate": 3.3521554801171807e-07, + "loss": 0.9306, "step": 23509 }, { - "epoch": 0.6662132675904673, + "epoch": 0.9198685343141091, "grad_norm": 0.0, - "learning_rate": 5.296102835501728e-06, - "loss": 0.8034, + "learning_rate": 3.348902668582832e-07, + "loss": 0.9757, "step": 23510 }, { - "epoch": 0.6662416050327298, + "epoch": 0.9199076610063385, "grad_norm": 0.0, - "learning_rate": 5.295292944110786e-06, - "loss": 0.8649, + "learning_rate": 3.3456514091439796e-07, + "loss": 1.0098, "step": 23511 }, { - "epoch": 0.6662699424749922, + "epoch": 0.919946787698568, "grad_norm": 0.0, - "learning_rate": 5.294483092349338e-06, - "loss": 0.8271, + "learning_rate": 3.3424017018528596e-07, + "loss": 0.8594, "step": 23512 }, { - "epoch": 0.6662982799172547, + "epoch": 0.9199859143907974, "grad_norm": 0.0, - "learning_rate": 5.293673280224212e-06, - "loss": 0.9304, + "learning_rate": 3.339153546761642e-07, + "loss": 1.0376, "step": 23513 }, { - "epoch": 0.6663266173595171, + "epoch": 0.9200250410830269, "grad_norm": 0.0, - "learning_rate": 5.292863507742218e-06, - "loss": 0.9075, + "learning_rate": 3.3359069439224956e-07, + "loss": 0.9754, "step": 23514 }, { - "epoch": 0.6663549548017796, + "epoch": 0.9200641677752562, "grad_norm": 0.0, - "learning_rate": 5.2920537749101885e-06, - "loss": 0.8129, + "learning_rate": 3.3326618933875565e-07, + "loss": 0.9646, "step": 23515 }, { - "epoch": 0.666383292244042, + "epoch": 0.9201032944674857, "grad_norm": 0.0, - "learning_rate": 5.291244081734933e-06, - "loss": 0.8866, + "learning_rate": 3.32941839520895e-07, + "loss": 0.9716, "step": 23516 }, { - "epoch": 0.6664116296863045, + "epoch": 0.9201424211597151, "grad_norm": 0.0, - "learning_rate": 5.290434428223276e-06, - "loss": 0.7765, + "learning_rate": 3.326176449438734e-07, + "loss": 0.9504, "step": 23517 }, { - "epoch": 0.666439967128567, + "epoch": 0.9201815478519446, "grad_norm": 0.0, - "learning_rate": 5.289624814382046e-06, - "loss": 0.854, + "learning_rate": 3.322936056128978e-07, + "loss": 0.939, "step": 23518 }, { - "epoch": 0.6664683045708294, + "epoch": 0.920220674544174, "grad_norm": 0.0, - "learning_rate": 5.288815240218048e-06, - "loss": 0.8801, + "learning_rate": 3.319697215331752e-07, + "loss": 0.9781, "step": 23519 }, { - "epoch": 0.6664966420130919, + "epoch": 0.9202598012364035, "grad_norm": 0.0, - "learning_rate": 5.28800570573811e-06, - "loss": 0.9048, + "learning_rate": 3.316459927099025e-07, + "loss": 0.9346, "step": 23520 }, { - "epoch": 0.6665249794553544, + "epoch": 0.9202989279286329, "grad_norm": 0.0, - "learning_rate": 5.287196210949051e-06, - "loss": 0.8615, + "learning_rate": 3.3132241914828e-07, + "loss": 0.9879, "step": 23521 }, { - "epoch": 0.6665533168976169, + "epoch": 0.9203380546208624, "grad_norm": 0.0, - "learning_rate": 5.286386755857686e-06, - "loss": 0.7559, + "learning_rate": 3.3099900085350355e-07, + "loss": 1.0258, "step": 23522 }, { - "epoch": 0.6665816543398793, + "epoch": 0.9203771813130918, "grad_norm": 0.0, - "learning_rate": 5.285577340470842e-06, - "loss": 0.8492, + "learning_rate": 3.306757378307679e-07, + "loss": 0.9147, "step": 23523 }, { - "epoch": 0.6666099917821418, + "epoch": 0.9204163080053213, "grad_norm": 0.0, - "learning_rate": 5.284767964795325e-06, - "loss": 0.9524, + "learning_rate": 3.303526300852633e-07, + "loss": 0.9182, "step": 23524 }, { - "epoch": 0.6666383292244042, + "epoch": 0.9204554346975506, "grad_norm": 0.0, - "learning_rate": 5.2839586288379595e-06, - "loss": 0.8031, + "learning_rate": 3.300296776221801e-07, + "loss": 0.9681, "step": 23525 }, { - "epoch": 0.6666666666666666, + "epoch": 0.9204945613897801, "grad_norm": 0.0, - "learning_rate": 5.2831493326055634e-06, - "loss": 0.8064, + "learning_rate": 3.297068804466996e-07, + "loss": 0.9376, "step": 23526 }, { - "epoch": 0.6666950041089291, + "epoch": 0.9205336880820095, "grad_norm": 0.0, - "learning_rate": 5.28234007610495e-06, - "loss": 0.8697, + "learning_rate": 3.2938423856401226e-07, + "loss": 0.9518, "step": 23527 }, { - "epoch": 0.6667233415511916, + "epoch": 0.920572814774239, "grad_norm": 0.0, - "learning_rate": 5.281530859342938e-06, - "loss": 0.8807, + "learning_rate": 3.2906175197929493e-07, + "loss": 0.9355, "step": 23528 }, { - "epoch": 0.666751678993454, + "epoch": 0.9206119414664684, "grad_norm": 0.0, - "learning_rate": 5.280721682326349e-06, - "loss": 0.8767, + "learning_rate": 3.28739420697729e-07, + "loss": 0.9675, "step": 23529 }, { - "epoch": 0.6667800164357165, + "epoch": 0.9206510681586979, "grad_norm": 0.0, - "learning_rate": 5.279912545061987e-06, - "loss": 0.8004, + "learning_rate": 3.284172447244871e-07, + "loss": 1.0494, "step": 23530 }, { - "epoch": 0.666808353877979, + "epoch": 0.9206901948509273, "grad_norm": 0.0, - "learning_rate": 5.27910344755668e-06, - "loss": 0.8065, + "learning_rate": 3.2809522406474616e-07, + "loss": 0.9554, "step": 23531 }, { - "epoch": 0.6668366913202415, + "epoch": 0.9207293215431568, "grad_norm": 0.0, - "learning_rate": 5.278294389817233e-06, - "loss": 0.8137, + "learning_rate": 3.2777335872367534e-07, + "loss": 0.832, "step": 23532 }, { - "epoch": 0.6668650287625039, + "epoch": 0.9207684482353862, "grad_norm": 0.0, - "learning_rate": 5.277485371850466e-06, - "loss": 0.8932, + "learning_rate": 3.274516487064461e-07, + "loss": 0.9594, "step": 23533 }, { - "epoch": 0.6668933662047664, + "epoch": 0.9208075749276157, "grad_norm": 0.0, - "learning_rate": 5.276676393663194e-06, - "loss": 0.8324, + "learning_rate": 3.27130094018222e-07, + "loss": 0.9488, "step": 23534 }, { - "epoch": 0.6669217036470289, + "epoch": 0.920846701619845, "grad_norm": 0.0, - "learning_rate": 5.27586745526223e-06, - "loss": 0.9048, + "learning_rate": 3.2680869466416687e-07, + "loss": 1.0786, "step": 23535 }, { - "epoch": 0.6669500410892912, + "epoch": 0.9208858283120746, "grad_norm": 0.0, - "learning_rate": 5.275058556654394e-06, - "loss": 0.8437, + "learning_rate": 3.264874506494442e-07, + "loss": 0.8969, "step": 23536 }, { - "epoch": 0.6669783785315537, + "epoch": 0.9209249550043039, "grad_norm": 0.0, - "learning_rate": 5.274249697846487e-06, - "loss": 0.9232, + "learning_rate": 3.2616636197921106e-07, + "loss": 0.8351, "step": 23537 }, { - "epoch": 0.6670067159738162, + "epoch": 0.9209640816965333, "grad_norm": 0.0, - "learning_rate": 5.2734408788453326e-06, - "loss": 0.8259, + "learning_rate": 3.2584542865862435e-07, + "loss": 1.02, "step": 23538 }, { - "epoch": 0.6670350534160787, + "epoch": 0.9210032083887628, "grad_norm": 0.0, - "learning_rate": 5.272632099657744e-06, - "loss": 0.8006, + "learning_rate": 3.255246506928389e-07, + "loss": 0.919, "step": 23539 }, { - "epoch": 0.6670633908583411, + "epoch": 0.9210423350809922, "grad_norm": 0.0, - "learning_rate": 5.2718233602905265e-06, - "loss": 0.8781, + "learning_rate": 3.252040280870017e-07, + "loss": 0.9568, "step": 23540 }, { - "epoch": 0.6670917283006036, + "epoch": 0.9210814617732217, "grad_norm": 0.0, - "learning_rate": 5.271014660750498e-06, - "loss": 0.9402, + "learning_rate": 3.2488356084626747e-07, + "loss": 0.809, "step": 23541 }, { - "epoch": 0.6671200657428661, + "epoch": 0.9211205884654511, "grad_norm": 0.0, - "learning_rate": 5.270206001044471e-06, - "loss": 0.8158, + "learning_rate": 3.2456324897577774e-07, + "loss": 0.9131, "step": 23542 }, { - "epoch": 0.6671484031851285, + "epoch": 0.9211597151576806, "grad_norm": 0.0, - "learning_rate": 5.269397381179253e-06, - "loss": 0.8833, + "learning_rate": 3.2424309248067944e-07, + "loss": 0.9213, "step": 23543 }, { - "epoch": 0.667176740627391, + "epoch": 0.92119884184991, "grad_norm": 0.0, - "learning_rate": 5.268588801161661e-06, - "loss": 0.8706, + "learning_rate": 3.2392309136611066e-07, + "loss": 0.8675, "step": 23544 }, { - "epoch": 0.6672050780696535, + "epoch": 0.9212379685421395, "grad_norm": 0.0, - "learning_rate": 5.2677802609984974e-06, - "loss": 0.7124, + "learning_rate": 3.2360324563721514e-07, + "loss": 1.0464, "step": 23545 }, { - "epoch": 0.6672334155119158, + "epoch": 0.9212770952343688, "grad_norm": 0.0, - "learning_rate": 5.2669717606965785e-06, - "loss": 0.8363, + "learning_rate": 3.2328355529912423e-07, + "loss": 0.972, "step": 23546 }, { - "epoch": 0.6672617529541783, + "epoch": 0.9213162219265983, "grad_norm": 0.0, - "learning_rate": 5.266163300262714e-06, - "loss": 0.8104, + "learning_rate": 3.2296402035697616e-07, + "loss": 0.9789, "step": 23547 }, { - "epoch": 0.6672900903964408, + "epoch": 0.9213553486188277, "grad_norm": 0.0, - "learning_rate": 5.265354879703715e-06, - "loss": 0.7294, + "learning_rate": 3.2264464081589785e-07, + "loss": 1.0311, "step": 23548 }, { - "epoch": 0.6673184278387033, + "epoch": 0.9213944753110572, "grad_norm": 0.0, - "learning_rate": 5.264546499026388e-06, - "loss": 0.7785, + "learning_rate": 3.223254166810197e-07, + "loss": 0.9504, "step": 23549 }, { - "epoch": 0.6673467652809657, + "epoch": 0.9214336020032866, "grad_norm": 0.0, - "learning_rate": 5.26373815823755e-06, - "loss": 0.8287, + "learning_rate": 3.220063479574698e-07, + "loss": 1.008, "step": 23550 }, { - "epoch": 0.6673751027232282, + "epoch": 0.9214727286955161, "grad_norm": 0.0, - "learning_rate": 5.262929857344e-06, - "loss": 0.8125, + "learning_rate": 3.2168743465037066e-07, + "loss": 1.0579, "step": 23551 }, { - "epoch": 0.6674034401654907, + "epoch": 0.9215118553877455, "grad_norm": 0.0, - "learning_rate": 5.262121596352554e-06, - "loss": 0.8103, + "learning_rate": 3.2136867676484384e-07, + "loss": 0.9471, "step": 23552 }, { - "epoch": 0.6674317776077531, + "epoch": 0.921550982079975, "grad_norm": 0.0, - "learning_rate": 5.2613133752700145e-06, - "loss": 0.8617, + "learning_rate": 3.210500743060074e-07, + "loss": 0.8568, "step": 23553 }, { - "epoch": 0.6674601150500156, + "epoch": 0.9215901087722044, "grad_norm": 0.0, - "learning_rate": 5.260505194103191e-06, - "loss": 0.7888, + "learning_rate": 3.2073162727897847e-07, + "loss": 0.7865, "step": 23554 }, { - "epoch": 0.6674884524922781, + "epoch": 0.9216292354644339, "grad_norm": 0.0, - "learning_rate": 5.259697052858896e-06, - "loss": 0.8651, + "learning_rate": 3.204133356888717e-07, + "loss": 0.8539, "step": 23555 }, { - "epoch": 0.6675167899345406, + "epoch": 0.9216683621566633, "grad_norm": 0.0, - "learning_rate": 5.25888895154393e-06, - "loss": 0.7974, + "learning_rate": 3.2009519954079635e-07, + "loss": 0.8844, "step": 23556 }, { - "epoch": 0.6675451273768029, + "epoch": 0.9217074888488928, "grad_norm": 0.0, - "learning_rate": 5.258080890165102e-06, - "loss": 0.7761, + "learning_rate": 3.19777218839864e-07, + "loss": 0.9618, "step": 23557 }, { - "epoch": 0.6675734648190654, + "epoch": 0.9217466155411221, "grad_norm": 0.0, - "learning_rate": 5.257272868729225e-06, - "loss": 0.7886, + "learning_rate": 3.194593935911783e-07, + "loss": 0.9334, "step": 23558 }, { - "epoch": 0.6676018022613279, + "epoch": 0.9217857422333516, "grad_norm": 0.0, - "learning_rate": 5.256464887243095e-06, - "loss": 0.7823, + "learning_rate": 3.19141723799844e-07, + "loss": 0.9523, "step": 23559 }, { - "epoch": 0.6676301397035903, + "epoch": 0.921824868925581, "grad_norm": 0.0, - "learning_rate": 5.255656945713523e-06, - "loss": 0.9011, + "learning_rate": 3.188242094709637e-07, + "loss": 0.9515, "step": 23560 }, { - "epoch": 0.6676584771458528, + "epoch": 0.9218639956178105, "grad_norm": 0.0, - "learning_rate": 5.254849044147313e-06, - "loss": 0.8492, + "learning_rate": 3.1850685060963557e-07, + "loss": 0.8704, "step": 23561 }, { - "epoch": 0.6676868145881153, + "epoch": 0.9219031223100399, "grad_norm": 0.0, - "learning_rate": 5.2540411825512724e-06, - "loss": 0.8255, + "learning_rate": 3.181896472209556e-07, + "loss": 0.9233, "step": 23562 }, { - "epoch": 0.6677151520303778, + "epoch": 0.9219422490022694, "grad_norm": 0.0, - "learning_rate": 5.2532333609322096e-06, - "loss": 0.8958, + "learning_rate": 3.1787259931001737e-07, + "loss": 0.9778, "step": 23563 }, { - "epoch": 0.6677434894726402, + "epoch": 0.9219813756944988, "grad_norm": 0.0, - "learning_rate": 5.25242557929692e-06, - "loss": 0.7846, + "learning_rate": 3.175557068819135e-07, + "loss": 0.8716, "step": 23564 }, { - "epoch": 0.6677718269149027, + "epoch": 0.9220205023867283, "grad_norm": 0.0, - "learning_rate": 5.251617837652219e-06, - "loss": 0.8226, + "learning_rate": 3.172389699417311e-07, + "loss": 0.8302, "step": 23565 }, { - "epoch": 0.6678001643571652, + "epoch": 0.9220596290789577, "grad_norm": 0.0, - "learning_rate": 5.250810136004898e-06, - "loss": 0.8074, + "learning_rate": 3.1692238849455823e-07, + "loss": 0.9579, "step": 23566 }, { - "epoch": 0.6678285017994275, + "epoch": 0.922098755771187, "grad_norm": 0.0, - "learning_rate": 5.250002474361766e-06, - "loss": 0.827, + "learning_rate": 3.1660596254547873e-07, + "loss": 1.0552, "step": 23567 }, { - "epoch": 0.66785683924169, + "epoch": 0.9221378824634165, "grad_norm": 0.0, - "learning_rate": 5.249194852729633e-06, - "loss": 0.8847, + "learning_rate": 3.1628969209957394e-07, + "loss": 0.8773, "step": 23568 }, { - "epoch": 0.6678851766839525, + "epoch": 0.9221770091556459, "grad_norm": 0.0, - "learning_rate": 5.248387271115292e-06, - "loss": 0.7825, + "learning_rate": 3.15973577161921e-07, + "loss": 1.0496, "step": 23569 }, { - "epoch": 0.6679135141262149, + "epoch": 0.9222161358478754, "grad_norm": 0.0, - "learning_rate": 5.247579729525548e-06, - "loss": 0.7253, + "learning_rate": 3.156576177375992e-07, + "loss": 0.8979, "step": 23570 }, { - "epoch": 0.6679418515684774, + "epoch": 0.9222552625401048, "grad_norm": 0.0, - "learning_rate": 5.24677222796721e-06, - "loss": 0.7648, + "learning_rate": 3.153418138316788e-07, + "loss": 1.0616, "step": 23571 }, { - "epoch": 0.6679701890107399, + "epoch": 0.9222943892323343, "grad_norm": 0.0, - "learning_rate": 5.245964766447069e-06, - "loss": 0.8432, + "learning_rate": 3.150261654492348e-07, + "loss": 1.029, "step": 23572 }, { - "epoch": 0.6679985264530024, + "epoch": 0.9223335159245637, "grad_norm": 0.0, - "learning_rate": 5.245157344971934e-06, - "loss": 0.9088, + "learning_rate": 3.147106725953342e-07, + "loss": 1.0007, "step": 23573 }, { - "epoch": 0.6680268638952648, + "epoch": 0.9223726426167932, "grad_norm": 0.0, - "learning_rate": 5.244349963548603e-06, - "loss": 0.9187, + "learning_rate": 3.1439533527504396e-07, + "loss": 1.0308, "step": 23574 }, { - "epoch": 0.6680552013375273, + "epoch": 0.9224117693090226, "grad_norm": 0.0, - "learning_rate": 5.2435426221838795e-06, - "loss": 0.9318, + "learning_rate": 3.140801534934268e-07, + "loss": 1.0035, "step": 23575 }, { - "epoch": 0.6680835387797898, + "epoch": 0.9224508960012521, "grad_norm": 0.0, - "learning_rate": 5.242735320884566e-06, - "loss": 0.8442, + "learning_rate": 3.1376512725554755e-07, + "loss": 0.9895, "step": 23576 }, { - "epoch": 0.6681118762220521, + "epoch": 0.9224900226934815, "grad_norm": 0.0, - "learning_rate": 5.241928059657455e-06, - "loss": 0.8757, + "learning_rate": 3.13450256566461e-07, + "loss": 0.9444, "step": 23577 }, { - "epoch": 0.6681402136643146, + "epoch": 0.922529149385711, "grad_norm": 0.0, - "learning_rate": 5.2411208385093515e-06, - "loss": 0.8121, + "learning_rate": 3.1313554143122647e-07, + "loss": 0.9124, "step": 23578 }, { - "epoch": 0.6681685511065771, + "epoch": 0.9225682760779403, "grad_norm": 0.0, - "learning_rate": 5.240313657447058e-06, - "loss": 0.7363, + "learning_rate": 3.128209818548955e-07, + "loss": 1.0414, "step": 23579 }, { - "epoch": 0.6681968885488396, + "epoch": 0.9226074027701698, "grad_norm": 0.0, - "learning_rate": 5.2395065164773665e-06, - "loss": 0.9318, + "learning_rate": 3.125065778425218e-07, + "loss": 0.9489, "step": 23580 }, { - "epoch": 0.668225225991102, + "epoch": 0.9226465294623992, "grad_norm": 0.0, - "learning_rate": 5.2386994156070825e-06, - "loss": 0.8033, + "learning_rate": 3.1219232939915244e-07, + "loss": 1.0083, "step": 23581 }, { - "epoch": 0.6682535634333645, + "epoch": 0.9226856561546287, "grad_norm": 0.0, - "learning_rate": 5.237892354842998e-06, - "loss": 0.814, + "learning_rate": 3.1187823652983675e-07, + "loss": 0.9956, "step": 23582 }, { - "epoch": 0.668281900875627, + "epoch": 0.9227247828468581, "grad_norm": 0.0, - "learning_rate": 5.2370853341919145e-06, - "loss": 0.8253, + "learning_rate": 3.115642992396151e-07, + "loss": 0.8947, "step": 23583 }, { - "epoch": 0.6683102383178894, + "epoch": 0.9227639095390876, "grad_norm": 0.0, - "learning_rate": 5.236278353660634e-06, - "loss": 0.9191, + "learning_rate": 3.112505175335312e-07, + "loss": 0.9594, "step": 23584 }, { - "epoch": 0.6683385757601519, + "epoch": 0.922803036231317, "grad_norm": 0.0, - "learning_rate": 5.235471413255946e-06, - "loss": 0.7823, + "learning_rate": 3.109368914166222e-07, + "loss": 0.9595, "step": 23585 }, { - "epoch": 0.6683669132024144, + "epoch": 0.9228421629235465, "grad_norm": 0.0, - "learning_rate": 5.2346645129846504e-06, - "loss": 0.8998, + "learning_rate": 3.1062342089392737e-07, + "loss": 0.9887, "step": 23586 }, { - "epoch": 0.6683952506446769, + "epoch": 0.9228812896157759, "grad_norm": 0.0, - "learning_rate": 5.233857652853547e-06, - "loss": 0.9571, + "learning_rate": 3.103101059704772e-07, + "loss": 0.9371, "step": 23587 }, { - "epoch": 0.6684235880869392, + "epoch": 0.9229204163080054, "grad_norm": 0.0, - "learning_rate": 5.23305083286943e-06, - "loss": 0.8065, + "learning_rate": 3.0999694665130643e-07, + "loss": 0.9871, "step": 23588 }, { - "epoch": 0.6684519255292017, + "epoch": 0.9229595430002348, "grad_norm": 0.0, - "learning_rate": 5.232244053039099e-06, - "loss": 0.7567, + "learning_rate": 3.0968394294144113e-07, + "loss": 0.8835, "step": 23589 }, { - "epoch": 0.6684802629714642, + "epoch": 0.9229986696924642, "grad_norm": 0.0, - "learning_rate": 5.231437313369343e-06, - "loss": 0.9603, + "learning_rate": 3.0937109484590943e-07, + "loss": 0.8665, "step": 23590 }, { - "epoch": 0.6685086004137266, + "epoch": 0.9230377963846936, "grad_norm": 0.0, - "learning_rate": 5.230630613866962e-06, - "loss": 0.8503, + "learning_rate": 3.090584023697352e-07, + "loss": 0.8696, "step": 23591 }, { - "epoch": 0.6685369378559891, + "epoch": 0.9230769230769231, "grad_norm": 0.0, - "learning_rate": 5.229823954538754e-06, - "loss": 0.8599, + "learning_rate": 3.0874586551793983e-07, + "loss": 0.8752, "step": 23592 }, { - "epoch": 0.6685652752982516, + "epoch": 0.9231160497691525, "grad_norm": 0.0, - "learning_rate": 5.229017335391506e-06, - "loss": 0.7991, + "learning_rate": 3.0843348429554054e-07, + "loss": 0.9933, "step": 23593 }, { - "epoch": 0.668593612740514, + "epoch": 0.923155176461382, "grad_norm": 0.0, - "learning_rate": 5.228210756432021e-06, - "loss": 0.8332, + "learning_rate": 3.081212587075577e-07, + "loss": 0.9239, "step": 23594 }, { - "epoch": 0.6686219501827765, + "epoch": 0.9231943031536114, "grad_norm": 0.0, - "learning_rate": 5.227404217667085e-06, - "loss": 0.8324, + "learning_rate": 3.078091887590007e-07, + "loss": 0.9509, "step": 23595 }, { - "epoch": 0.668650287625039, + "epoch": 0.9232334298458408, "grad_norm": 0.0, - "learning_rate": 5.226597719103495e-06, - "loss": 0.8307, + "learning_rate": 3.074972744548854e-07, + "loss": 1.0051, "step": 23596 }, { - "epoch": 0.6686786250673015, + "epoch": 0.9232725565380703, "grad_norm": 0.0, - "learning_rate": 5.22579126074805e-06, - "loss": 0.8668, + "learning_rate": 3.071855158002168e-07, + "loss": 1.0403, "step": 23597 }, { - "epoch": 0.6687069625095639, + "epoch": 0.9233116832302997, "grad_norm": 0.0, - "learning_rate": 5.2249848426075325e-06, - "loss": 0.8052, + "learning_rate": 3.0687391280000313e-07, + "loss": 0.9034, "step": 23598 }, { - "epoch": 0.6687352999518263, + "epoch": 0.9233508099225292, "grad_norm": 0.0, - "learning_rate": 5.224178464688742e-06, - "loss": 0.7452, + "learning_rate": 3.0656246545924804e-07, + "loss": 1.0571, "step": 23599 }, { - "epoch": 0.6687636373940888, + "epoch": 0.9233899366147585, "grad_norm": 0.0, - "learning_rate": 5.2233721269984695e-06, - "loss": 0.7018, + "learning_rate": 3.062511737829543e-07, + "loss": 1.0354, "step": 23600 }, { - "epoch": 0.6687919748363512, + "epoch": 0.923429063306988, "grad_norm": 0.0, - "learning_rate": 5.2225658295435065e-06, - "loss": 0.8467, + "learning_rate": 3.05940037776119e-07, + "loss": 0.9235, "step": 23601 }, { - "epoch": 0.6688203122786137, + "epoch": 0.9234681899992174, "grad_norm": 0.0, - "learning_rate": 5.22175957233065e-06, - "loss": 0.8535, + "learning_rate": 3.0562905744373926e-07, + "loss": 0.8964, "step": 23602 }, { - "epoch": 0.6688486497208762, + "epoch": 0.9235073166914469, "grad_norm": 0.0, - "learning_rate": 5.220953355366684e-06, - "loss": 0.7446, + "learning_rate": 3.0531823279080995e-07, + "loss": 0.9749, "step": 23603 }, { - "epoch": 0.6688769871631387, + "epoch": 0.9235464433836763, "grad_norm": 0.0, - "learning_rate": 5.220147178658401e-06, - "loss": 0.8227, + "learning_rate": 3.0500756382232045e-07, + "loss": 0.9388, "step": 23604 }, { - "epoch": 0.6689053246054011, + "epoch": 0.9235855700759058, "grad_norm": 0.0, - "learning_rate": 5.219341042212598e-06, - "loss": 0.7496, + "learning_rate": 3.0469705054326117e-07, + "loss": 0.9742, "step": 23605 }, { - "epoch": 0.6689336620476636, + "epoch": 0.9236246967681352, "grad_norm": 0.0, - "learning_rate": 5.218534946036057e-06, - "loss": 0.8725, + "learning_rate": 3.043866929586192e-07, + "loss": 0.9178, "step": 23606 }, { - "epoch": 0.6689619994899261, + "epoch": 0.9236638234603647, "grad_norm": 0.0, - "learning_rate": 5.2177288901355714e-06, - "loss": 0.991, + "learning_rate": 3.0407649107337734e-07, + "loss": 0.9548, "step": 23607 }, { - "epoch": 0.6689903369321885, + "epoch": 0.9237029501525941, "grad_norm": 0.0, - "learning_rate": 5.216922874517935e-06, - "loss": 0.8498, + "learning_rate": 3.0376644489251707e-07, + "loss": 0.9504, "step": 23608 }, { - "epoch": 0.669018674374451, + "epoch": 0.9237420768448236, "grad_norm": 0.0, - "learning_rate": 5.216116899189929e-06, - "loss": 0.8183, + "learning_rate": 3.034565544210177e-07, + "loss": 0.8641, "step": 23609 }, { - "epoch": 0.6690470118167134, + "epoch": 0.923781203537053, "grad_norm": 0.0, - "learning_rate": 5.215310964158351e-06, - "loss": 0.85, + "learning_rate": 3.0314681966385653e-07, + "loss": 0.947, "step": 23610 }, { - "epoch": 0.6690753492589759, + "epoch": 0.9238203302292824, "grad_norm": 0.0, - "learning_rate": 5.214505069429982e-06, - "loss": 0.8079, + "learning_rate": 3.0283724062600496e-07, + "loss": 1.0065, "step": 23611 }, { - "epoch": 0.6691036867012383, + "epoch": 0.9238594569215118, "grad_norm": 0.0, - "learning_rate": 5.213699215011614e-06, - "loss": 0.8908, + "learning_rate": 3.0252781731243686e-07, + "loss": 1.0175, "step": 23612 }, { - "epoch": 0.6691320241435008, + "epoch": 0.9238985836137413, "grad_norm": 0.0, - "learning_rate": 5.2128934009100356e-06, - "loss": 0.9901, + "learning_rate": 3.022185497281216e-07, + "loss": 0.9122, "step": 23613 }, { - "epoch": 0.6691603615857633, + "epoch": 0.9239377103059707, "grad_norm": 0.0, - "learning_rate": 5.212087627132032e-06, - "loss": 0.8448, + "learning_rate": 3.01909437878023e-07, + "loss": 1.0042, "step": 23614 }, { - "epoch": 0.6691886990280257, + "epoch": 0.9239768369982002, "grad_norm": 0.0, - "learning_rate": 5.211281893684392e-06, - "loss": 0.7396, + "learning_rate": 3.0160048176710586e-07, + "loss": 1.0674, "step": 23615 }, { - "epoch": 0.6692170364702882, + "epoch": 0.9240159636904296, "grad_norm": 0.0, - "learning_rate": 5.2104762005739084e-06, - "loss": 0.8341, + "learning_rate": 3.0129168140033305e-07, + "loss": 0.8232, "step": 23616 }, { - "epoch": 0.6692453739125507, + "epoch": 0.9240550903826591, "grad_norm": 0.0, - "learning_rate": 5.209670547807357e-06, - "loss": 0.8026, + "learning_rate": 3.0098303678266383e-07, + "loss": 0.8919, "step": 23617 }, { - "epoch": 0.6692737113548131, + "epoch": 0.9240942170748885, "grad_norm": 0.0, - "learning_rate": 5.208864935391536e-06, - "loss": 0.8462, + "learning_rate": 3.0067454791905206e-07, + "loss": 0.9834, "step": 23618 }, { - "epoch": 0.6693020487970756, + "epoch": 0.924133343767118, "grad_norm": 0.0, - "learning_rate": 5.208059363333218e-06, - "loss": 0.8812, + "learning_rate": 3.003662148144537e-07, + "loss": 1.0004, "step": 23619 }, { - "epoch": 0.669330386239338, + "epoch": 0.9241724704593474, "grad_norm": 0.0, - "learning_rate": 5.2072538316391965e-06, - "loss": 0.8881, + "learning_rate": 3.0005803747381824e-07, + "loss": 0.9407, "step": 23620 }, { - "epoch": 0.6693587236816005, + "epoch": 0.9242115971515769, "grad_norm": 0.0, - "learning_rate": 5.2064483403162595e-06, - "loss": 0.9528, + "learning_rate": 2.997500159020983e-07, + "loss": 0.7923, "step": 23621 }, { - "epoch": 0.6693870611238629, + "epoch": 0.9242507238438062, "grad_norm": 0.0, - "learning_rate": 5.205642889371184e-06, - "loss": 0.8599, + "learning_rate": 2.994421501042355e-07, + "loss": 0.9886, "step": 23622 }, { - "epoch": 0.6694153985661254, + "epoch": 0.9242898505360356, "grad_norm": 0.0, - "learning_rate": 5.2048374788107625e-06, - "loss": 0.8675, + "learning_rate": 2.9913444008517924e-07, + "loss": 1.0038, "step": 23623 }, { - "epoch": 0.6694437360083879, + "epoch": 0.9243289772282651, "grad_norm": 0.0, - "learning_rate": 5.204032108641772e-06, - "loss": 0.8242, + "learning_rate": 2.9882688584986554e-07, + "loss": 0.9429, "step": 23624 }, { - "epoch": 0.6694720734506503, + "epoch": 0.9243681039204945, "grad_norm": 0.0, - "learning_rate": 5.203226778871e-06, - "loss": 0.8475, + "learning_rate": 2.98519487403236e-07, + "loss": 0.9324, "step": 23625 }, { - "epoch": 0.6695004108929128, + "epoch": 0.924407230612724, "grad_norm": 0.0, - "learning_rate": 5.202421489505231e-06, - "loss": 0.9568, + "learning_rate": 2.982122447502278e-07, + "loss": 0.9664, "step": 23626 }, { - "epoch": 0.6695287483351753, + "epoch": 0.9244463573049534, "grad_norm": 0.0, - "learning_rate": 5.2016162405512464e-06, - "loss": 0.8136, + "learning_rate": 2.979051578957748e-07, + "loss": 0.9042, "step": 23627 }, { - "epoch": 0.6695570857774378, + "epoch": 0.9244854839971829, "grad_norm": 0.0, - "learning_rate": 5.20081103201583e-06, - "loss": 0.8313, + "learning_rate": 2.9759822684480524e-07, + "loss": 0.9601, "step": 23628 }, { - "epoch": 0.6695854232197002, + "epoch": 0.9245246106894123, "grad_norm": 0.0, - "learning_rate": 5.200005863905768e-06, - "loss": 0.8134, + "learning_rate": 2.972914516022518e-07, + "loss": 0.8759, "step": 23629 }, { - "epoch": 0.6696137606619627, + "epoch": 0.9245637373816418, "grad_norm": 0.0, - "learning_rate": 5.199200736227836e-06, - "loss": 0.7627, + "learning_rate": 2.969848321730384e-07, + "loss": 0.9886, "step": 23630 }, { - "epoch": 0.6696420981042251, + "epoch": 0.9246028640738712, "grad_norm": 0.0, - "learning_rate": 5.198395648988823e-06, - "loss": 0.8902, + "learning_rate": 2.966783685620922e-07, + "loss": 0.9261, "step": 23631 }, { - "epoch": 0.6696704355464875, + "epoch": 0.9246419907661007, "grad_norm": 0.0, - "learning_rate": 5.1975906021955016e-06, - "loss": 0.9271, + "learning_rate": 2.963720607743303e-07, + "loss": 1.0531, "step": 23632 }, { - "epoch": 0.66969877298875, + "epoch": 0.92468111745833, "grad_norm": 0.0, - "learning_rate": 5.196785595854659e-06, - "loss": 0.8417, + "learning_rate": 2.9606590881467445e-07, + "loss": 0.8252, "step": 23633 }, { - "epoch": 0.6697271104310125, + "epoch": 0.9247202441505595, "grad_norm": 0.0, - "learning_rate": 5.195980629973077e-06, - "loss": 0.8773, + "learning_rate": 2.957599126880395e-07, + "loss": 0.8195, "step": 23634 }, { - "epoch": 0.669755447873275, + "epoch": 0.9247593708427889, "grad_norm": 0.0, - "learning_rate": 5.195175704557531e-06, - "loss": 0.8593, + "learning_rate": 2.9545407239934265e-07, + "loss": 0.8981, "step": 23635 }, { - "epoch": 0.6697837853155374, + "epoch": 0.9247984975350184, "grad_norm": 0.0, - "learning_rate": 5.194370819614806e-06, - "loss": 0.8795, + "learning_rate": 2.951483879534911e-07, + "loss": 0.8924, "step": 23636 }, { - "epoch": 0.6698121227577999, + "epoch": 0.9248376242272478, "grad_norm": 0.0, - "learning_rate": 5.193565975151682e-06, - "loss": 0.8241, + "learning_rate": 2.9484285935539645e-07, + "loss": 0.8464, "step": 23637 }, { - "epoch": 0.6698404602000624, + "epoch": 0.9248767509194773, "grad_norm": 0.0, - "learning_rate": 5.192761171174934e-06, - "loss": 0.7895, + "learning_rate": 2.9453748660996264e-07, + "loss": 0.9498, "step": 23638 }, { - "epoch": 0.6698687976423248, + "epoch": 0.9249158776117067, "grad_norm": 0.0, - "learning_rate": 5.191956407691343e-06, - "loss": 0.8492, + "learning_rate": 2.9423226972209673e-07, + "loss": 0.8667, "step": 23639 }, { - "epoch": 0.6698971350845873, + "epoch": 0.9249550043039362, "grad_norm": 0.0, - "learning_rate": 5.191151684707689e-06, - "loss": 0.8823, + "learning_rate": 2.9392720869669823e-07, + "loss": 1.0489, "step": 23640 }, { - "epoch": 0.6699254725268498, + "epoch": 0.9249941309961656, "grad_norm": 0.0, - "learning_rate": 5.190347002230749e-06, - "loss": 0.8301, + "learning_rate": 2.936223035386665e-07, + "loss": 1.0217, "step": 23641 }, { - "epoch": 0.6699538099691121, + "epoch": 0.9250332576883951, "grad_norm": 0.0, - "learning_rate": 5.189542360267308e-06, - "loss": 0.8726, + "learning_rate": 2.933175542528977e-07, + "loss": 0.9865, "step": 23642 }, { - "epoch": 0.6699821474113746, + "epoch": 0.9250723843806244, "grad_norm": 0.0, - "learning_rate": 5.188737758824132e-06, - "loss": 0.8409, + "learning_rate": 2.9301296084428553e-07, + "loss": 0.9487, "step": 23643 }, { - "epoch": 0.6700104848536371, + "epoch": 0.9251115110728539, "grad_norm": 0.0, - "learning_rate": 5.187933197908006e-06, - "loss": 0.7854, + "learning_rate": 2.927085233177218e-07, + "loss": 1.0258, "step": 23644 }, { - "epoch": 0.6700388222958996, + "epoch": 0.9251506377650833, "grad_norm": 0.0, - "learning_rate": 5.187128677525709e-06, - "loss": 0.818, + "learning_rate": 2.924042416780959e-07, + "loss": 0.9954, "step": 23645 }, { - "epoch": 0.670067159738162, + "epoch": 0.9251897644573128, "grad_norm": 0.0, - "learning_rate": 5.18632419768401e-06, - "loss": 0.8979, + "learning_rate": 2.9210011593029276e-07, + "loss": 1.0942, "step": 23646 }, { - "epoch": 0.6700954971804245, + "epoch": 0.9252288911495422, "grad_norm": 0.0, - "learning_rate": 5.185519758389694e-06, - "loss": 0.8598, + "learning_rate": 2.9179614607919737e-07, + "loss": 0.9659, "step": 23647 }, { - "epoch": 0.670123834622687, + "epoch": 0.9252680178417717, "grad_norm": 0.0, - "learning_rate": 5.18471535964953e-06, - "loss": 0.8822, + "learning_rate": 2.914923321296903e-07, + "loss": 1.0544, "step": 23648 }, { - "epoch": 0.6701521720649494, + "epoch": 0.9253071445340011, "grad_norm": 0.0, - "learning_rate": 5.183911001470296e-06, - "loss": 0.8907, + "learning_rate": 2.9118867408665206e-07, + "loss": 0.9491, "step": 23649 }, { - "epoch": 0.6701805095072119, + "epoch": 0.9253462712262306, "grad_norm": 0.0, - "learning_rate": 5.183106683858772e-06, - "loss": 0.7942, + "learning_rate": 2.908851719549566e-07, + "loss": 0.9205, "step": 23650 }, { - "epoch": 0.6702088469494744, + "epoch": 0.92538539791846, "grad_norm": 0.0, - "learning_rate": 5.182302406821724e-06, - "loss": 0.7835, + "learning_rate": 2.905818257394799e-07, + "loss": 0.9175, "step": 23651 }, { - "epoch": 0.6702371843917369, + "epoch": 0.9254245246106894, "grad_norm": 0.0, - "learning_rate": 5.181498170365933e-06, - "loss": 0.8405, + "learning_rate": 2.9027863544509263e-07, + "loss": 1.0089, "step": 23652 }, { - "epoch": 0.6702655218339992, + "epoch": 0.9254636513029189, "grad_norm": 0.0, - "learning_rate": 5.180693974498172e-06, - "loss": 0.8855, + "learning_rate": 2.8997560107666303e-07, + "loss": 0.9325, "step": 23653 }, { - "epoch": 0.6702938592762617, + "epoch": 0.9255027779951482, "grad_norm": 0.0, - "learning_rate": 5.179889819225215e-06, - "loss": 0.8628, + "learning_rate": 2.896727226390572e-07, + "loss": 0.8094, "step": 23654 }, { - "epoch": 0.6703221967185242, + "epoch": 0.9255419046873777, "grad_norm": 0.0, - "learning_rate": 5.179085704553841e-06, - "loss": 0.763, + "learning_rate": 2.8937000013714247e-07, + "loss": 0.8768, "step": 23655 }, { - "epoch": 0.6703505341607866, + "epoch": 0.9255810313796071, "grad_norm": 0.0, - "learning_rate": 5.178281630490814e-06, - "loss": 0.8284, + "learning_rate": 2.890674335757748e-07, + "loss": 0.9825, "step": 23656 }, { - "epoch": 0.6703788716030491, + "epoch": 0.9256201580718366, "grad_norm": 0.0, - "learning_rate": 5.177477597042911e-06, - "loss": 0.8312, + "learning_rate": 2.88765022959816e-07, + "loss": 0.8681, "step": 23657 }, { - "epoch": 0.6704072090453116, + "epoch": 0.925659284764066, "grad_norm": 0.0, - "learning_rate": 5.176673604216908e-06, - "loss": 0.7937, + "learning_rate": 2.884627682941232e-07, + "loss": 0.9251, "step": 23658 }, { - "epoch": 0.6704355464875741, + "epoch": 0.9256984114562955, "grad_norm": 0.0, - "learning_rate": 5.175869652019571e-06, - "loss": 0.9702, + "learning_rate": 2.8816066958354815e-07, + "loss": 0.9507, "step": 23659 }, { - "epoch": 0.6704638839298365, + "epoch": 0.9257375381485249, "grad_norm": 0.0, - "learning_rate": 5.175065740457679e-06, - "loss": 0.7901, + "learning_rate": 2.878587268329436e-07, + "loss": 0.8719, "step": 23660 }, { - "epoch": 0.670492221372099, + "epoch": 0.9257766648407544, "grad_norm": 0.0, - "learning_rate": 5.174261869537997e-06, - "loss": 0.714, + "learning_rate": 2.8755694004715674e-07, + "loss": 0.9123, "step": 23661 }, { - "epoch": 0.6705205588143615, + "epoch": 0.9258157915329838, "grad_norm": 0.0, - "learning_rate": 5.173458039267299e-06, - "loss": 0.7576, + "learning_rate": 2.8725530923103705e-07, + "loss": 1.0323, "step": 23662 }, { - "epoch": 0.6705488962566238, + "epoch": 0.9258549182252133, "grad_norm": 0.0, - "learning_rate": 5.172654249652359e-06, - "loss": 0.8451, + "learning_rate": 2.8695383438942514e-07, + "loss": 0.9443, "step": 23663 }, { - "epoch": 0.6705772336988863, + "epoch": 0.9258940449174426, "grad_norm": 0.0, - "learning_rate": 5.171850500699942e-06, - "loss": 0.7811, + "learning_rate": 2.866525155271649e-07, + "loss": 0.9949, "step": 23664 }, { - "epoch": 0.6706055711411488, + "epoch": 0.9259331716096721, "grad_norm": 0.0, - "learning_rate": 5.17104679241682e-06, - "loss": 0.8963, + "learning_rate": 2.863513526490902e-07, + "loss": 1.0112, "step": 23665 }, { - "epoch": 0.6706339085834112, + "epoch": 0.9259722983019015, "grad_norm": 0.0, - "learning_rate": 5.170243124809766e-06, - "loss": 0.7973, + "learning_rate": 2.8605034576004497e-07, + "loss": 1.0184, "step": 23666 }, { - "epoch": 0.6706622460256737, + "epoch": 0.926011424994131, "grad_norm": 0.0, - "learning_rate": 5.169439497885546e-06, - "loss": 0.7609, + "learning_rate": 2.8574949486485647e-07, + "loss": 0.9799, "step": 23667 }, { - "epoch": 0.6706905834679362, + "epoch": 0.9260505516863604, "grad_norm": 0.0, - "learning_rate": 5.168635911650937e-06, - "loss": 0.7733, + "learning_rate": 2.854487999683597e-07, + "loss": 0.8994, "step": 23668 }, { - "epoch": 0.6707189209101987, + "epoch": 0.9260896783785899, "grad_norm": 0.0, - "learning_rate": 5.167832366112695e-06, - "loss": 0.8119, + "learning_rate": 2.851482610753808e-07, + "loss": 1.0201, "step": 23669 }, { - "epoch": 0.6707472583524611, + "epoch": 0.9261288050708193, "grad_norm": 0.0, - "learning_rate": 5.167028861277597e-06, - "loss": 0.8006, + "learning_rate": 2.848478781907493e-07, + "loss": 1.083, "step": 23670 }, { - "epoch": 0.6707755957947236, + "epoch": 0.9261679317630488, "grad_norm": 0.0, - "learning_rate": 5.166225397152414e-06, - "loss": 0.8847, + "learning_rate": 2.8454765131928573e-07, + "loss": 0.9821, "step": 23671 }, { - "epoch": 0.6708039332369861, + "epoch": 0.9262070584552782, "grad_norm": 0.0, - "learning_rate": 5.165421973743905e-06, - "loss": 0.9137, + "learning_rate": 2.8424758046581405e-07, + "loss": 0.8691, "step": 23672 }, { - "epoch": 0.6708322706792484, + "epoch": 0.9262461851475077, "grad_norm": 0.0, - "learning_rate": 5.164618591058845e-06, - "loss": 0.8882, + "learning_rate": 2.839476656351503e-07, + "loss": 0.9538, "step": 23673 }, { - "epoch": 0.6708606081215109, + "epoch": 0.9262853118397371, "grad_norm": 0.0, - "learning_rate": 5.163815249103994e-06, - "loss": 0.8681, + "learning_rate": 2.8364790683211183e-07, + "loss": 1.0284, "step": 23674 }, { - "epoch": 0.6708889455637734, + "epoch": 0.9263244385319666, "grad_norm": 0.0, - "learning_rate": 5.163011947886125e-06, - "loss": 0.8794, + "learning_rate": 2.833483040615137e-07, + "loss": 1.0294, "step": 23675 }, { - "epoch": 0.6709172830060359, + "epoch": 0.9263635652241959, "grad_norm": 0.0, - "learning_rate": 5.162208687412005e-06, - "loss": 0.8226, + "learning_rate": 2.8304885732816647e-07, + "loss": 0.8692, "step": 23676 }, { - "epoch": 0.6709456204482983, + "epoch": 0.9264026919164254, "grad_norm": 0.0, - "learning_rate": 5.161405467688394e-06, - "loss": 0.7866, + "learning_rate": 2.827495666368774e-07, + "loss": 0.9859, "step": 23677 }, { - "epoch": 0.6709739578905608, + "epoch": 0.9264418186086548, "grad_norm": 0.0, - "learning_rate": 5.160602288722062e-06, - "loss": 0.8721, + "learning_rate": 2.824504319924548e-07, + "loss": 1.0228, "step": 23678 }, { - "epoch": 0.6710022953328233, + "epoch": 0.9264809453008843, "grad_norm": 0.0, - "learning_rate": 5.159799150519773e-06, - "loss": 0.8616, + "learning_rate": 2.821514533997005e-07, + "loss": 0.977, "step": 23679 }, { - "epoch": 0.6710306327750857, + "epoch": 0.9265200719931137, "grad_norm": 0.0, - "learning_rate": 5.158996053088294e-06, - "loss": 0.8755, + "learning_rate": 2.818526308634184e-07, + "loss": 0.8599, "step": 23680 }, { - "epoch": 0.6710589702173482, + "epoch": 0.9265591986853431, "grad_norm": 0.0, - "learning_rate": 5.158192996434393e-06, - "loss": 0.7607, + "learning_rate": 2.8155396438840464e-07, + "loss": 0.9557, "step": 23681 }, { - "epoch": 0.6710873076596107, + "epoch": 0.9265983253775726, "grad_norm": 0.0, - "learning_rate": 5.157389980564827e-06, - "loss": 0.8445, + "learning_rate": 2.8125545397945653e-07, + "loss": 0.982, "step": 23682 }, { - "epoch": 0.6711156451018732, + "epoch": 0.926637452069802, "grad_norm": 0.0, - "learning_rate": 5.156587005486362e-06, - "loss": 0.8591, + "learning_rate": 2.8095709964136573e-07, + "loss": 0.9059, "step": 23683 }, { - "epoch": 0.6711439825441355, + "epoch": 0.9266765787620315, "grad_norm": 0.0, - "learning_rate": 5.155784071205767e-06, - "loss": 0.6894, + "learning_rate": 2.806589013789274e-07, + "loss": 1.0772, "step": 23684 }, { - "epoch": 0.671172319986398, + "epoch": 0.9267157054542609, "grad_norm": 0.0, - "learning_rate": 5.154981177729799e-06, - "loss": 0.8534, + "learning_rate": 2.803608591969276e-07, + "loss": 0.9087, "step": 23685 }, { - "epoch": 0.6712006574286605, + "epoch": 0.9267548321464903, "grad_norm": 0.0, - "learning_rate": 5.154178325065222e-06, - "loss": 0.8455, + "learning_rate": 2.800629731001536e-07, + "loss": 0.9354, "step": 23686 }, { - "epoch": 0.6712289948709229, + "epoch": 0.9267939588387197, "grad_norm": 0.0, - "learning_rate": 5.153375513218806e-06, - "loss": 0.9374, + "learning_rate": 2.7976524309338726e-07, + "loss": 1.048, "step": 23687 }, { - "epoch": 0.6712573323131854, + "epoch": 0.9268330855309492, "grad_norm": 0.0, - "learning_rate": 5.152572742197304e-06, - "loss": 0.8261, + "learning_rate": 2.7946766918141134e-07, + "loss": 1.0855, "step": 23688 }, { - "epoch": 0.6712856697554479, + "epoch": 0.9268722122231786, "grad_norm": 0.0, - "learning_rate": 5.15177001200748e-06, - "loss": 0.8587, + "learning_rate": 2.7917025136900423e-07, + "loss": 0.8244, "step": 23689 }, { - "epoch": 0.6713140071977103, + "epoch": 0.9269113389154081, "grad_norm": 0.0, - "learning_rate": 5.150967322656103e-06, - "loss": 0.8346, + "learning_rate": 2.7887298966094325e-07, + "loss": 1.0112, "step": 23690 }, { - "epoch": 0.6713423446399728, + "epoch": 0.9269504656076375, "grad_norm": 0.0, - "learning_rate": 5.150164674149925e-06, - "loss": 0.8252, + "learning_rate": 2.78575884061999e-07, + "loss": 0.9871, "step": 23691 }, { - "epoch": 0.6713706820822353, + "epoch": 0.926989592299867, "grad_norm": 0.0, - "learning_rate": 5.149362066495709e-06, - "loss": 0.9026, + "learning_rate": 2.7827893457694545e-07, + "loss": 0.9203, "step": 23692 }, { - "epoch": 0.6713990195244978, + "epoch": 0.9270287189920964, "grad_norm": 0.0, - "learning_rate": 5.14855949970022e-06, - "loss": 0.8337, + "learning_rate": 2.7798214121054877e-07, + "loss": 0.9886, "step": 23693 }, { - "epoch": 0.6714273569667601, + "epoch": 0.9270678456843259, "grad_norm": 0.0, - "learning_rate": 5.147756973770215e-06, - "loss": 0.7999, + "learning_rate": 2.7768550396757853e-07, + "loss": 0.9798, "step": 23694 }, { - "epoch": 0.6714556944090226, + "epoch": 0.9271069723765553, "grad_norm": 0.0, - "learning_rate": 5.146954488712458e-06, - "loss": 0.8798, + "learning_rate": 2.7738902285279534e-07, + "loss": 0.9566, "step": 23695 }, { - "epoch": 0.6714840318512851, + "epoch": 0.9271460990687848, "grad_norm": 0.0, - "learning_rate": 5.146152044533702e-06, - "loss": 0.958, + "learning_rate": 2.7709269787096093e-07, + "loss": 0.9229, "step": 23696 }, { - "epoch": 0.6715123692935475, + "epoch": 0.9271852257610141, "grad_norm": 0.0, - "learning_rate": 5.145349641240714e-06, - "loss": 0.8501, + "learning_rate": 2.7679652902683596e-07, + "loss": 0.8352, "step": 23697 }, { - "epoch": 0.67154070673581, + "epoch": 0.9272243524532436, "grad_norm": 0.0, - "learning_rate": 5.144547278840244e-06, - "loss": 0.8731, + "learning_rate": 2.765005163251733e-07, + "loss": 0.9904, "step": 23698 }, { - "epoch": 0.6715690441780725, + "epoch": 0.927263479145473, "grad_norm": 0.0, - "learning_rate": 5.143744957339056e-06, - "loss": 0.8388, + "learning_rate": 2.7620465977072794e-07, + "loss": 0.972, "step": 23699 }, { - "epoch": 0.671597381620335, + "epoch": 0.9273026058377025, "grad_norm": 0.0, - "learning_rate": 5.1429426767439115e-06, - "loss": 0.8605, + "learning_rate": 2.759089593682518e-07, + "loss": 1.1124, "step": 23700 }, { - "epoch": 0.6716257190625974, + "epoch": 0.9273417325299319, "grad_norm": 0.0, - "learning_rate": 5.14214043706156e-06, - "loss": 0.8231, + "learning_rate": 2.7561341512249316e-07, + "loss": 0.9416, "step": 23701 }, { - "epoch": 0.6716540565048599, + "epoch": 0.9273808592221614, "grad_norm": 0.0, - "learning_rate": 5.141338238298766e-06, - "loss": 0.7256, + "learning_rate": 2.753180270381972e-07, + "loss": 0.9547, "step": 23702 }, { - "epoch": 0.6716823939471224, + "epoch": 0.9274199859143908, "grad_norm": 0.0, - "learning_rate": 5.1405360804622815e-06, - "loss": 0.8894, + "learning_rate": 2.7502279512010897e-07, + "loss": 0.8436, "step": 23703 }, { - "epoch": 0.6717107313893848, + "epoch": 0.9274591126066203, "grad_norm": 0.0, - "learning_rate": 5.139733963558866e-06, - "loss": 0.7604, + "learning_rate": 2.7472771937296805e-07, + "loss": 0.9467, "step": 23704 }, { - "epoch": 0.6717390688316472, + "epoch": 0.9274982392988497, "grad_norm": 0.0, - "learning_rate": 5.1389318875952806e-06, - "loss": 0.9774, + "learning_rate": 2.744327998015128e-07, + "loss": 0.7803, "step": 23705 }, { - "epoch": 0.6717674062739097, + "epoch": 0.9275373659910792, "grad_norm": 0.0, - "learning_rate": 5.138129852578273e-06, - "loss": 0.8363, + "learning_rate": 2.7413803641047954e-07, + "loss": 0.9747, "step": 23706 }, { - "epoch": 0.6717957437161722, + "epoch": 0.9275764926833086, "grad_norm": 0.0, - "learning_rate": 5.137327858514605e-06, - "loss": 0.8169, + "learning_rate": 2.738434292046044e-07, + "loss": 0.9894, "step": 23707 }, { - "epoch": 0.6718240811584346, + "epoch": 0.927615619375538, "grad_norm": 0.0, - "learning_rate": 5.136525905411031e-06, - "loss": 0.865, + "learning_rate": 2.735489781886147e-07, + "loss": 1.0663, "step": 23708 }, { - "epoch": 0.6718524186006971, + "epoch": 0.9276547460677674, "grad_norm": 0.0, - "learning_rate": 5.135723993274304e-06, - "loss": 0.8225, + "learning_rate": 2.7325468336724e-07, + "loss": 0.9388, "step": 23709 }, { - "epoch": 0.6718807560429596, + "epoch": 0.9276938727599968, "grad_norm": 0.0, - "learning_rate": 5.1349221221111826e-06, - "loss": 0.8094, + "learning_rate": 2.729605447452077e-07, + "loss": 0.9675, "step": 23710 }, { - "epoch": 0.671909093485222, + "epoch": 0.9277329994522263, "grad_norm": 0.0, - "learning_rate": 5.134120291928415e-06, - "loss": 0.8459, + "learning_rate": 2.7266656232724063e-07, + "loss": 1.0353, "step": 23711 }, { - "epoch": 0.6719374309274845, + "epoch": 0.9277721261444557, "grad_norm": 0.0, - "learning_rate": 5.133318502732759e-06, - "loss": 0.7923, + "learning_rate": 2.7237273611805836e-07, + "loss": 1.0219, "step": 23712 }, { - "epoch": 0.671965768369747, + "epoch": 0.9278112528366852, "grad_norm": 0.0, - "learning_rate": 5.132516754530973e-06, - "loss": 0.8695, + "learning_rate": 2.7207906612238264e-07, + "loss": 0.8874, "step": 23713 }, { - "epoch": 0.6719941058120094, + "epoch": 0.9278503795289146, "grad_norm": 0.0, - "learning_rate": 5.131715047329802e-06, - "loss": 0.8437, + "learning_rate": 2.717855523449242e-07, + "loss": 0.9559, "step": 23714 }, { - "epoch": 0.6720224432542719, + "epoch": 0.9278895062211441, "grad_norm": 0.0, - "learning_rate": 5.130913381136003e-06, - "loss": 0.8755, + "learning_rate": 2.7149219479040257e-07, + "loss": 1.0215, "step": 23715 }, { - "epoch": 0.6720507806965343, + "epoch": 0.9279286329133735, "grad_norm": 0.0, - "learning_rate": 5.130111755956327e-06, - "loss": 0.9099, + "learning_rate": 2.7119899346352395e-07, + "loss": 0.8989, "step": 23716 }, { - "epoch": 0.6720791181387968, + "epoch": 0.927967759605603, "grad_norm": 0.0, - "learning_rate": 5.1293101717975305e-06, - "loss": 0.8287, + "learning_rate": 2.709059483690002e-07, + "loss": 0.949, "step": 23717 }, { - "epoch": 0.6721074555810592, + "epoch": 0.9280068862978323, "grad_norm": 0.0, - "learning_rate": 5.128508628666365e-06, - "loss": 0.8539, + "learning_rate": 2.7061305951153415e-07, + "loss": 0.9102, "step": 23718 }, { - "epoch": 0.6721357930233217, + "epoch": 0.9280460129900618, "grad_norm": 0.0, - "learning_rate": 5.127707126569577e-06, - "loss": 0.8521, + "learning_rate": 2.703203268958321e-07, + "loss": 1.0089, "step": 23719 }, { - "epoch": 0.6721641304655842, + "epoch": 0.9280851396822912, "grad_norm": 0.0, - "learning_rate": 5.126905665513922e-06, - "loss": 0.7702, + "learning_rate": 2.700277505265925e-07, + "loss": 0.9702, "step": 23720 }, { - "epoch": 0.6721924679078466, + "epoch": 0.9281242663745207, "grad_norm": 0.0, - "learning_rate": 5.126104245506153e-06, - "loss": 0.9623, + "learning_rate": 2.69735330408516e-07, + "loss": 1.0567, "step": 23721 }, { - "epoch": 0.6722208053501091, + "epoch": 0.9281633930667501, "grad_norm": 0.0, - "learning_rate": 5.125302866553015e-06, - "loss": 0.825, + "learning_rate": 2.694430665462966e-07, + "loss": 0.9093, "step": 23722 }, { - "epoch": 0.6722491427923716, + "epoch": 0.9282025197589796, "grad_norm": 0.0, - "learning_rate": 5.1245015286612586e-06, - "loss": 0.8795, + "learning_rate": 2.691509589446284e-07, + "loss": 0.9588, "step": 23723 }, { - "epoch": 0.6722774802346341, + "epoch": 0.928241646451209, "grad_norm": 0.0, - "learning_rate": 5.123700231837643e-06, - "loss": 0.8543, + "learning_rate": 2.6885900760820204e-07, + "loss": 0.8401, "step": 23724 }, { - "epoch": 0.6723058176768965, + "epoch": 0.9282807731434385, "grad_norm": 0.0, - "learning_rate": 5.122898976088906e-06, - "loss": 0.9529, + "learning_rate": 2.6856721254170714e-07, + "loss": 0.932, "step": 23725 }, { - "epoch": 0.672334155119159, + "epoch": 0.9283198998356679, "grad_norm": 0.0, - "learning_rate": 5.122097761421806e-06, - "loss": 0.8269, + "learning_rate": 2.682755737498277e-07, + "loss": 0.9313, "step": 23726 }, { - "epoch": 0.6723624925614214, + "epoch": 0.9283590265278974, "grad_norm": 0.0, - "learning_rate": 5.121296587843084e-06, - "loss": 0.7075, + "learning_rate": 2.679840912372489e-07, + "loss": 0.9505, "step": 23727 }, { - "epoch": 0.6723908300036838, + "epoch": 0.9283981532201268, "grad_norm": 0.0, - "learning_rate": 5.120495455359493e-06, - "loss": 0.8622, + "learning_rate": 2.676927650086503e-07, + "loss": 0.9467, "step": 23728 }, { - "epoch": 0.6724191674459463, + "epoch": 0.9284372799123563, "grad_norm": 0.0, - "learning_rate": 5.11969436397778e-06, - "loss": 0.9746, + "learning_rate": 2.6740159506871146e-07, + "loss": 0.9495, "step": 23729 }, { - "epoch": 0.6724475048882088, + "epoch": 0.9284764066045856, "grad_norm": 0.0, - "learning_rate": 5.118893313704693e-06, - "loss": 0.8323, + "learning_rate": 2.6711058142210643e-07, + "loss": 0.8905, "step": 23730 }, { - "epoch": 0.6724758423304712, + "epoch": 0.9285155332968151, "grad_norm": 0.0, - "learning_rate": 5.118092304546987e-06, - "loss": 0.8831, + "learning_rate": 2.668197240735104e-07, + "loss": 0.8708, "step": 23731 }, { - "epoch": 0.6725041797727337, + "epoch": 0.9285546599890445, "grad_norm": 0.0, - "learning_rate": 5.117291336511396e-06, - "loss": 0.7449, + "learning_rate": 2.6652902302759077e-07, + "loss": 1.0479, "step": 23732 }, { - "epoch": 0.6725325172149962, + "epoch": 0.928593786681274, "grad_norm": 0.0, - "learning_rate": 5.116490409604674e-06, - "loss": 0.8734, + "learning_rate": 2.6623847828902037e-07, + "loss": 0.797, "step": 23733 }, { - "epoch": 0.6725608546572587, + "epoch": 0.9286329133735034, "grad_norm": 0.0, - "learning_rate": 5.11568952383357e-06, - "loss": 0.8476, + "learning_rate": 2.659480898624622e-07, + "loss": 1.0309, "step": 23734 }, { - "epoch": 0.6725891920995211, + "epoch": 0.9286720400657329, "grad_norm": 0.0, - "learning_rate": 5.114888679204824e-06, - "loss": 0.8572, + "learning_rate": 2.6565785775258143e-07, + "loss": 0.9853, "step": 23735 }, { - "epoch": 0.6726175295417836, + "epoch": 0.9287111667579623, "grad_norm": 0.0, - "learning_rate": 5.114087875725185e-06, - "loss": 0.8215, + "learning_rate": 2.6536778196403657e-07, + "loss": 0.9418, "step": 23736 }, { - "epoch": 0.672645866984046, + "epoch": 0.9287502934501917, "grad_norm": 0.0, - "learning_rate": 5.1132871134013996e-06, - "loss": 0.8629, + "learning_rate": 2.6507786250148714e-07, + "loss": 0.908, "step": 23737 }, { - "epoch": 0.6726742044263084, + "epoch": 0.9287894201424212, "grad_norm": 0.0, - "learning_rate": 5.1124863922402104e-06, - "loss": 0.8567, + "learning_rate": 2.647880993695884e-07, + "loss": 1.0809, "step": 23738 }, { - "epoch": 0.6727025418685709, + "epoch": 0.9288285468346505, "grad_norm": 0.0, - "learning_rate": 5.111685712248364e-06, - "loss": 0.8715, + "learning_rate": 2.6449849257299545e-07, + "loss": 0.8864, "step": 23739 }, { - "epoch": 0.6727308793108334, + "epoch": 0.92886767352688, "grad_norm": 0.0, - "learning_rate": 5.1108850734326035e-06, - "loss": 0.9169, + "learning_rate": 2.6420904211635567e-07, + "loss": 0.9758, "step": 23740 }, { - "epoch": 0.6727592167530959, + "epoch": 0.9289068002191094, "grad_norm": 0.0, - "learning_rate": 5.110084475799671e-06, - "loss": 0.7758, + "learning_rate": 2.6391974800431985e-07, + "loss": 0.8785, "step": 23741 }, { - "epoch": 0.6727875541953583, + "epoch": 0.9289459269113389, "grad_norm": 0.0, - "learning_rate": 5.109283919356315e-06, - "loss": 0.8125, + "learning_rate": 2.636306102415331e-07, + "loss": 0.9796, "step": 23742 }, { - "epoch": 0.6728158916376208, + "epoch": 0.9289850536035683, "grad_norm": 0.0, - "learning_rate": 5.108483404109275e-06, - "loss": 0.6767, + "learning_rate": 2.6334162883263736e-07, + "loss": 0.9361, "step": 23743 }, { - "epoch": 0.6728442290798833, + "epoch": 0.9290241802957978, "grad_norm": 0.0, - "learning_rate": 5.107682930065296e-06, - "loss": 0.8989, + "learning_rate": 2.630528037822755e-07, + "loss": 0.8847, "step": 23744 }, { - "epoch": 0.6728725665221457, + "epoch": 0.9290633069880272, "grad_norm": 0.0, - "learning_rate": 5.1068824972311245e-06, - "loss": 0.9017, + "learning_rate": 2.627641350950838e-07, + "loss": 0.9603, "step": 23745 }, { - "epoch": 0.6729009039644082, + "epoch": 0.9291024336802567, "grad_norm": 0.0, - "learning_rate": 5.106082105613496e-06, - "loss": 0.745, + "learning_rate": 2.624756227756986e-07, + "loss": 0.9628, "step": 23746 }, { - "epoch": 0.6729292414066707, + "epoch": 0.9291415603724861, "grad_norm": 0.0, - "learning_rate": 5.105281755219158e-06, - "loss": 0.8447, + "learning_rate": 2.6218726682875395e-07, + "loss": 0.9074, "step": 23747 }, { - "epoch": 0.6729575788489331, + "epoch": 0.9291806870647156, "grad_norm": 0.0, - "learning_rate": 5.104481446054845e-06, - "loss": 0.7881, + "learning_rate": 2.6189906725887946e-07, + "loss": 0.9397, "step": 23748 }, { - "epoch": 0.6729859162911955, + "epoch": 0.929219813756945, "grad_norm": 0.0, - "learning_rate": 5.103681178127303e-06, - "loss": 0.803, + "learning_rate": 2.616110240707015e-07, + "loss": 0.9553, "step": 23749 }, { - "epoch": 0.673014253733458, + "epoch": 0.9292589404491745, "grad_norm": 0.0, - "learning_rate": 5.102880951443277e-06, - "loss": 0.8111, + "learning_rate": 2.613231372688496e-07, + "loss": 1.0391, "step": 23750 }, { - "epoch": 0.6730425911757205, + "epoch": 0.9292980671414038, "grad_norm": 0.0, - "learning_rate": 5.102080766009499e-06, - "loss": 0.9313, + "learning_rate": 2.610354068579446e-07, + "loss": 0.8771, "step": 23751 }, { - "epoch": 0.6730709286179829, + "epoch": 0.9293371938336333, "grad_norm": 0.0, - "learning_rate": 5.1012806218327135e-06, - "loss": 0.8624, + "learning_rate": 2.6074783284260717e-07, + "loss": 1.0714, "step": 23752 }, { - "epoch": 0.6730992660602454, + "epoch": 0.9293763205258627, "grad_norm": 0.0, - "learning_rate": 5.100480518919665e-06, - "loss": 0.7888, + "learning_rate": 2.604604152274559e-07, + "loss": 0.8849, "step": 23753 }, { - "epoch": 0.6731276035025079, + "epoch": 0.9294154472180922, "grad_norm": 0.0, - "learning_rate": 5.099680457277083e-06, - "loss": 0.9362, + "learning_rate": 2.6017315401710486e-07, + "loss": 0.963, "step": 23754 }, { - "epoch": 0.6731559409447703, + "epoch": 0.9294545739103216, "grad_norm": 0.0, - "learning_rate": 5.098880436911714e-06, - "loss": 0.7591, + "learning_rate": 2.598860492161692e-07, + "loss": 0.9979, "step": 23755 }, { - "epoch": 0.6731842783870328, + "epoch": 0.9294937006025511, "grad_norm": 0.0, - "learning_rate": 5.098080457830295e-06, - "loss": 0.8662, + "learning_rate": 2.5959910082925975e-07, + "loss": 0.9246, "step": 23756 }, { - "epoch": 0.6732126158292953, + "epoch": 0.9295328272947805, "grad_norm": 0.0, - "learning_rate": 5.097280520039562e-06, - "loss": 0.9429, + "learning_rate": 2.593123088609817e-07, + "loss": 0.9852, "step": 23757 }, { - "epoch": 0.6732409532715578, + "epoch": 0.92957195398701, "grad_norm": 0.0, - "learning_rate": 5.0964806235462625e-06, - "loss": 0.9145, + "learning_rate": 2.590256733159413e-07, + "loss": 0.9777, "step": 23758 }, { - "epoch": 0.6732692907138201, + "epoch": 0.9296110806792394, "grad_norm": 0.0, - "learning_rate": 5.095680768357123e-06, - "loss": 0.7919, + "learning_rate": 2.5873919419874383e-07, + "loss": 0.9919, "step": 23759 }, { - "epoch": 0.6732976281560826, + "epoch": 0.9296502073714689, "grad_norm": 0.0, - "learning_rate": 5.094880954478889e-06, - "loss": 0.6976, + "learning_rate": 2.584528715139889e-07, + "loss": 0.8158, "step": 23760 }, { - "epoch": 0.6733259655983451, + "epoch": 0.9296893340636982, "grad_norm": 0.0, - "learning_rate": 5.094081181918291e-06, - "loss": 0.8129, + "learning_rate": 2.5816670526627285e-07, + "loss": 0.9167, "step": 23761 }, { - "epoch": 0.6733543030406075, + "epoch": 0.9297284607559277, "grad_norm": 0.0, - "learning_rate": 5.0932814506820685e-06, - "loss": 0.8389, + "learning_rate": 2.5788069546019423e-07, + "loss": 0.8373, "step": 23762 }, { - "epoch": 0.67338264048287, + "epoch": 0.9297675874481571, "grad_norm": 0.0, - "learning_rate": 5.092481760776962e-06, - "loss": 0.8444, + "learning_rate": 2.575948421003416e-07, + "loss": 0.9082, "step": 23763 }, { - "epoch": 0.6734109779251325, + "epoch": 0.9298067141403866, "grad_norm": 0.0, - "learning_rate": 5.0916821122097e-06, - "loss": 0.8969, + "learning_rate": 2.57309145191309e-07, + "loss": 0.964, "step": 23764 }, { - "epoch": 0.673439315367395, + "epoch": 0.929845840832616, "grad_norm": 0.0, - "learning_rate": 5.090882504987023e-06, - "loss": 0.8774, + "learning_rate": 2.5702360473768396e-07, + "loss": 1.0624, "step": 23765 }, { - "epoch": 0.6734676528096574, + "epoch": 0.9298849675248454, "grad_norm": 0.0, - "learning_rate": 5.090082939115668e-06, - "loss": 0.8121, + "learning_rate": 2.5673822074405053e-07, + "loss": 0.9474, "step": 23766 }, { - "epoch": 0.6734959902519199, + "epoch": 0.9299240942170749, "grad_norm": 0.0, - "learning_rate": 5.0892834146023665e-06, - "loss": 0.8059, + "learning_rate": 2.564529932149906e-07, + "loss": 0.9472, "step": 23767 }, { - "epoch": 0.6735243276941824, + "epoch": 0.9299632209093043, "grad_norm": 0.0, - "learning_rate": 5.088483931453851e-06, - "loss": 0.8377, + "learning_rate": 2.5616792215508943e-07, + "loss": 1.0247, "step": 23768 }, { - "epoch": 0.6735526651364447, + "epoch": 0.9300023476015338, "grad_norm": 0.0, - "learning_rate": 5.087684489676862e-06, - "loss": 0.8483, + "learning_rate": 2.5588300756892004e-07, + "loss": 0.9884, "step": 23769 }, { - "epoch": 0.6735810025787072, + "epoch": 0.9300414742937632, "grad_norm": 0.0, - "learning_rate": 5.08688508927813e-06, - "loss": 0.7325, + "learning_rate": 2.555982494610598e-07, + "loss": 1.0797, "step": 23770 }, { - "epoch": 0.6736093400209697, + "epoch": 0.9300806009859927, "grad_norm": 0.0, - "learning_rate": 5.086085730264392e-06, - "loss": 0.9034, + "learning_rate": 2.5531364783608073e-07, + "loss": 0.9992, "step": 23771 }, { - "epoch": 0.6736376774632322, + "epoch": 0.930119727678222, "grad_norm": 0.0, - "learning_rate": 5.085286412642376e-06, - "loss": 0.8206, + "learning_rate": 2.5502920269855346e-07, + "loss": 0.8183, "step": 23772 }, { - "epoch": 0.6736660149054946, + "epoch": 0.9301588543704515, "grad_norm": 0.0, - "learning_rate": 5.084487136418816e-06, - "loss": 0.7859, + "learning_rate": 2.547449140530467e-07, + "loss": 0.86, "step": 23773 }, { - "epoch": 0.6736943523477571, + "epoch": 0.9301979810626809, "grad_norm": 0.0, - "learning_rate": 5.083687901600452e-06, - "loss": 0.8461, + "learning_rate": 2.544607819041256e-07, + "loss": 0.8992, "step": 23774 }, { - "epoch": 0.6737226897900196, + "epoch": 0.9302371077549104, "grad_norm": 0.0, - "learning_rate": 5.082888708194006e-06, - "loss": 0.881, + "learning_rate": 2.541768062563521e-07, + "loss": 1.0599, "step": 23775 }, { - "epoch": 0.673751027232282, + "epoch": 0.9302762344471398, "grad_norm": 0.0, - "learning_rate": 5.082089556206218e-06, - "loss": 0.9337, + "learning_rate": 2.53892987114287e-07, + "loss": 0.7633, "step": 23776 }, { - "epoch": 0.6737793646745445, + "epoch": 0.9303153611393693, "grad_norm": 0.0, - "learning_rate": 5.081290445643812e-06, - "loss": 0.8087, + "learning_rate": 2.5360932448248663e-07, + "loss": 0.9028, "step": 23777 }, { - "epoch": 0.673807702116807, + "epoch": 0.9303544878315987, "grad_norm": 0.0, - "learning_rate": 5.080491376513523e-06, - "loss": 0.8075, + "learning_rate": 2.533258183655096e-07, + "loss": 0.9398, "step": 23778 }, { - "epoch": 0.6738360395590693, + "epoch": 0.9303936145238282, "grad_norm": 0.0, - "learning_rate": 5.079692348822085e-06, - "loss": 0.8691, + "learning_rate": 2.530424687679056e-07, + "loss": 0.8362, "step": 23779 }, { - "epoch": 0.6738643770013318, + "epoch": 0.9304327412160576, "grad_norm": 0.0, - "learning_rate": 5.078893362576223e-06, - "loss": 0.914, + "learning_rate": 2.527592756942265e-07, + "loss": 0.9834, "step": 23780 }, { - "epoch": 0.6738927144435943, + "epoch": 0.9304718679082871, "grad_norm": 0.0, - "learning_rate": 5.078094417782669e-06, - "loss": 0.9131, + "learning_rate": 2.5247623914901984e-07, + "loss": 0.9341, "step": 23781 }, { - "epoch": 0.6739210518858568, + "epoch": 0.9305109946005165, "grad_norm": 0.0, - "learning_rate": 5.077295514448154e-06, - "loss": 0.8436, + "learning_rate": 2.5219335913682864e-07, + "loss": 0.9734, "step": 23782 }, { - "epoch": 0.6739493893281192, + "epoch": 0.930550121292746, "grad_norm": 0.0, - "learning_rate": 5.076496652579406e-06, - "loss": 0.8149, + "learning_rate": 2.519106356621981e-07, + "loss": 1.0081, "step": 23783 }, { - "epoch": 0.6739777267703817, + "epoch": 0.9305892479849753, "grad_norm": 0.0, - "learning_rate": 5.07569783218316e-06, - "loss": 0.8433, + "learning_rate": 2.5162806872966903e-07, + "loss": 0.9023, "step": 23784 }, { - "epoch": 0.6740060642126442, + "epoch": 0.9306283746772048, "grad_norm": 0.0, - "learning_rate": 5.0748990532661345e-06, - "loss": 0.8699, + "learning_rate": 2.513456583437768e-07, + "loss": 0.9545, "step": 23785 }, { - "epoch": 0.6740344016549066, + "epoch": 0.9306675013694342, "grad_norm": 0.0, - "learning_rate": 5.074100315835064e-06, - "loss": 0.7818, + "learning_rate": 2.5106340450905763e-07, + "loss": 0.9478, "step": 23786 }, { - "epoch": 0.6740627390971691, + "epoch": 0.9307066280616637, "grad_norm": 0.0, - "learning_rate": 5.073301619896679e-06, - "loss": 0.9143, + "learning_rate": 2.507813072300447e-07, + "loss": 1.0941, "step": 23787 }, { - "epoch": 0.6740910765394316, + "epoch": 0.9307457547538931, "grad_norm": 0.0, - "learning_rate": 5.072502965457701e-06, - "loss": 0.8952, + "learning_rate": 2.504993665112665e-07, + "loss": 0.876, "step": 23788 }, { - "epoch": 0.6741194139816941, + "epoch": 0.9307848814461226, "grad_norm": 0.0, - "learning_rate": 5.071704352524863e-06, - "loss": 0.9445, + "learning_rate": 2.502175823572517e-07, + "loss": 0.9959, "step": 23789 }, { - "epoch": 0.6741477514239564, + "epoch": 0.930824008138352, "grad_norm": 0.0, - "learning_rate": 5.070905781104887e-06, - "loss": 0.8851, + "learning_rate": 2.499359547725255e-07, + "loss": 0.9039, "step": 23790 }, { - "epoch": 0.6741760888662189, + "epoch": 0.9308631348305815, "grad_norm": 0.0, - "learning_rate": 5.0701072512045e-06, - "loss": 0.7864, + "learning_rate": 2.496544837616111e-07, + "loss": 1.0159, "step": 23791 }, { - "epoch": 0.6742044263084814, + "epoch": 0.9309022615228109, "grad_norm": 0.0, - "learning_rate": 5.069308762830436e-06, - "loss": 0.7656, + "learning_rate": 2.493731693290269e-07, + "loss": 1.0103, "step": 23792 }, { - "epoch": 0.6742327637507438, + "epoch": 0.9309413882150404, "grad_norm": 0.0, - "learning_rate": 5.06851031598941e-06, - "loss": 0.8435, + "learning_rate": 2.4909201147929275e-07, + "loss": 0.8198, "step": 23793 }, { - "epoch": 0.6742611011930063, + "epoch": 0.9309805149072697, "grad_norm": 0.0, - "learning_rate": 5.067711910688153e-06, - "loss": 0.8306, + "learning_rate": 2.4881101021691945e-07, + "loss": 0.9025, "step": 23794 }, { - "epoch": 0.6742894386352688, + "epoch": 0.9310196415994991, "grad_norm": 0.0, - "learning_rate": 5.066913546933392e-06, - "loss": 0.8668, + "learning_rate": 2.485301655464256e-07, + "loss": 0.97, "step": 23795 }, { - "epoch": 0.6743177760775313, + "epoch": 0.9310587682917286, "grad_norm": 0.0, - "learning_rate": 5.066115224731848e-06, - "loss": 0.9763, + "learning_rate": 2.4824947747231544e-07, + "loss": 0.8935, "step": 23796 }, { - "epoch": 0.6743461135197937, + "epoch": 0.931097894983958, "grad_norm": 0.0, - "learning_rate": 5.065316944090253e-06, - "loss": 0.8571, + "learning_rate": 2.4796894599910194e-07, + "loss": 1.0655, "step": 23797 }, { - "epoch": 0.6743744509620562, + "epoch": 0.9311370216761875, "grad_norm": 0.0, - "learning_rate": 5.06451870501532e-06, - "loss": 0.8877, + "learning_rate": 2.476885711312849e-07, + "loss": 0.9744, "step": 23798 }, { - "epoch": 0.6744027884043187, + "epoch": 0.9311761483684169, "grad_norm": 0.0, - "learning_rate": 5.063720507513781e-06, - "loss": 0.7797, + "learning_rate": 2.474083528733695e-07, + "loss": 0.9644, "step": 23799 }, { - "epoch": 0.674431125846581, + "epoch": 0.9312152750606464, "grad_norm": 0.0, - "learning_rate": 5.062922351592359e-06, - "loss": 0.7705, + "learning_rate": 2.4712829122985447e-07, + "loss": 0.9426, "step": 23800 }, { - "epoch": 0.6744594632888435, + "epoch": 0.9312544017528758, "grad_norm": 0.0, - "learning_rate": 5.062124237257772e-06, - "loss": 0.8712, + "learning_rate": 2.468483862052407e-07, + "loss": 0.9902, "step": 23801 }, { - "epoch": 0.674487800731106, + "epoch": 0.9312935284451053, "grad_norm": 0.0, - "learning_rate": 5.061326164516747e-06, - "loss": 0.7686, + "learning_rate": 2.4656863780401775e-07, + "loss": 1.0438, "step": 23802 }, { - "epoch": 0.6745161381733684, + "epoch": 0.9313326551373347, "grad_norm": 0.0, - "learning_rate": 5.060528133376009e-06, - "loss": 0.8627, + "learning_rate": 2.462890460306822e-07, + "loss": 1.0042, "step": 23803 }, { - "epoch": 0.6745444756156309, + "epoch": 0.9313717818295641, "grad_norm": 0.0, - "learning_rate": 5.059730143842273e-06, - "loss": 0.8971, + "learning_rate": 2.4600961088972143e-07, + "loss": 1.0106, "step": 23804 }, { - "epoch": 0.6745728130578934, + "epoch": 0.9314109085217935, "grad_norm": 0.0, - "learning_rate": 5.058932195922268e-06, - "loss": 0.8422, + "learning_rate": 2.4573033238562525e-07, + "loss": 0.976, "step": 23805 }, { - "epoch": 0.6746011505001559, + "epoch": 0.931450035214023, "grad_norm": 0.0, - "learning_rate": 5.058134289622709e-06, - "loss": 0.9543, + "learning_rate": 2.454512105228768e-07, + "loss": 0.9888, "step": 23806 }, { - "epoch": 0.6746294879424183, + "epoch": 0.9314891619062524, "grad_norm": 0.0, - "learning_rate": 5.057336424950319e-06, - "loss": 0.8662, + "learning_rate": 2.4517224530595794e-07, + "loss": 0.9837, "step": 23807 }, { - "epoch": 0.6746578253846808, + "epoch": 0.9315282885984819, "grad_norm": 0.0, - "learning_rate": 5.05653860191182e-06, - "loss": 0.885, + "learning_rate": 2.4489343673934964e-07, + "loss": 0.9161, "step": 23808 }, { - "epoch": 0.6746861628269433, + "epoch": 0.9315674152907113, "grad_norm": 0.0, - "learning_rate": 5.055740820513932e-06, - "loss": 0.7328, + "learning_rate": 2.4461478482752933e-07, + "loss": 0.9099, "step": 23809 }, { - "epoch": 0.6747145002692057, + "epoch": 0.9316065419829408, "grad_norm": 0.0, - "learning_rate": 5.054943080763376e-06, - "loss": 0.7733, + "learning_rate": 2.443362895749712e-07, + "loss": 0.9585, "step": 23810 }, { - "epoch": 0.6747428377114681, + "epoch": 0.9316456686751702, "grad_norm": 0.0, - "learning_rate": 5.054145382666874e-06, - "loss": 0.8948, + "learning_rate": 2.440579509861485e-07, + "loss": 0.9683, "step": 23811 }, { - "epoch": 0.6747711751537306, + "epoch": 0.9316847953673997, "grad_norm": 0.0, - "learning_rate": 5.0533477262311384e-06, - "loss": 0.8032, + "learning_rate": 2.4377976906552746e-07, + "loss": 0.8962, "step": 23812 }, { - "epoch": 0.6747995125959931, + "epoch": 0.9317239220596291, "grad_norm": 0.0, - "learning_rate": 5.052550111462895e-06, - "loss": 0.844, + "learning_rate": 2.435017438175813e-07, + "loss": 0.9619, "step": 23813 }, { - "epoch": 0.6748278500382555, + "epoch": 0.9317630487518586, "grad_norm": 0.0, - "learning_rate": 5.051752538368855e-06, - "loss": 0.8369, + "learning_rate": 2.432238752467697e-07, + "loss": 1.0159, "step": 23814 }, { - "epoch": 0.674856187480518, + "epoch": 0.9318021754440879, "grad_norm": 0.0, - "learning_rate": 5.050955006955742e-06, - "loss": 0.8134, + "learning_rate": 2.4294616335755806e-07, + "loss": 1.0676, "step": 23815 }, { - "epoch": 0.6748845249227805, + "epoch": 0.9318413021363174, "grad_norm": 0.0, - "learning_rate": 5.050157517230276e-06, - "loss": 0.8497, + "learning_rate": 2.4266860815440275e-07, + "loss": 0.84, "step": 23816 }, { - "epoch": 0.6749128623650429, + "epoch": 0.9318804288285468, "grad_norm": 0.0, - "learning_rate": 5.049360069199167e-06, - "loss": 0.849, + "learning_rate": 2.4239120964176577e-07, + "loss": 1.0385, "step": 23817 }, { - "epoch": 0.6749411998073054, + "epoch": 0.9319195555207763, "grad_norm": 0.0, - "learning_rate": 5.048562662869142e-06, - "loss": 0.9566, + "learning_rate": 2.421139678240969e-07, + "loss": 1.0818, "step": 23818 }, { - "epoch": 0.6749695372495679, + "epoch": 0.9319586822130057, "grad_norm": 0.0, - "learning_rate": 5.047765298246907e-06, - "loss": 0.8498, + "learning_rate": 2.4183688270585147e-07, + "loss": 0.9781, "step": 23819 }, { - "epoch": 0.6749978746918304, + "epoch": 0.9319978089052352, "grad_norm": 0.0, - "learning_rate": 5.046967975339184e-06, - "loss": 0.7258, + "learning_rate": 2.4155995429147705e-07, + "loss": 1.0082, "step": 23820 }, { - "epoch": 0.6750262121340928, + "epoch": 0.9320369355974646, "grad_norm": 0.0, - "learning_rate": 5.04617069415269e-06, - "loss": 0.8142, + "learning_rate": 2.4128318258542227e-07, + "loss": 0.931, "step": 23821 }, { - "epoch": 0.6750545495763552, + "epoch": 0.932076062289694, "grad_norm": 0.0, - "learning_rate": 5.045373454694139e-06, - "loss": 0.8282, + "learning_rate": 2.4100656759213135e-07, + "loss": 0.849, "step": 23822 }, { - "epoch": 0.6750828870186177, + "epoch": 0.9321151889819235, "grad_norm": 0.0, - "learning_rate": 5.044576256970247e-06, - "loss": 0.8635, + "learning_rate": 2.4073010931604635e-07, + "loss": 0.9645, "step": 23823 }, { - "epoch": 0.6751112244608801, + "epoch": 0.9321543156741529, "grad_norm": 0.0, - "learning_rate": 5.0437791009877335e-06, - "loss": 0.7673, + "learning_rate": 2.4045380776160587e-07, + "loss": 0.909, "step": 23824 }, { - "epoch": 0.6751395619031426, + "epoch": 0.9321934423663824, "grad_norm": 0.0, - "learning_rate": 5.042981986753306e-06, - "loss": 0.8812, + "learning_rate": 2.4017766293324863e-07, + "loss": 0.9326, "step": 23825 }, { - "epoch": 0.6751678993454051, + "epoch": 0.9322325690586117, "grad_norm": 0.0, - "learning_rate": 5.042184914273685e-06, - "loss": 0.8266, + "learning_rate": 2.399016748354088e-07, + "loss": 0.9088, "step": 23826 }, { - "epoch": 0.6751962367876675, + "epoch": 0.9322716957508412, "grad_norm": 0.0, - "learning_rate": 5.041387883555577e-06, - "loss": 0.8131, + "learning_rate": 2.3962584347251737e-07, + "loss": 0.9024, "step": 23827 }, { - "epoch": 0.67522457422993, + "epoch": 0.9323108224430706, "grad_norm": 0.0, - "learning_rate": 5.040590894605701e-06, - "loss": 0.8377, + "learning_rate": 2.393501688490041e-07, + "loss": 1.0126, "step": 23828 }, { - "epoch": 0.6752529116721925, + "epoch": 0.9323499491353001, "grad_norm": 0.0, - "learning_rate": 5.039793947430774e-06, - "loss": 0.8146, + "learning_rate": 2.390746509692976e-07, + "loss": 0.8611, "step": 23829 }, { - "epoch": 0.675281249114455, + "epoch": 0.9323890758275295, "grad_norm": 0.0, - "learning_rate": 5.0389970420375e-06, - "loss": 0.7974, + "learning_rate": 2.3879928983782e-07, + "loss": 0.8925, "step": 23830 }, { - "epoch": 0.6753095865567174, + "epoch": 0.932428202519759, "grad_norm": 0.0, - "learning_rate": 5.0382001784325966e-06, - "loss": 0.8352, + "learning_rate": 2.385240854589943e-07, + "loss": 0.9108, "step": 23831 }, { - "epoch": 0.6753379239989798, + "epoch": 0.9324673292119884, "grad_norm": 0.0, - "learning_rate": 5.037403356622779e-06, - "loss": 0.8939, + "learning_rate": 2.382490378372404e-07, + "loss": 0.9065, "step": 23832 }, { - "epoch": 0.6753662614412423, + "epoch": 0.9325064559042179, "grad_norm": 0.0, - "learning_rate": 5.036606576614751e-06, - "loss": 0.8337, + "learning_rate": 2.3797414697697475e-07, + "loss": 1.0207, "step": 23833 }, { - "epoch": 0.6753945988835047, + "epoch": 0.9325455825964473, "grad_norm": 0.0, - "learning_rate": 5.035809838415231e-06, - "loss": 0.8076, + "learning_rate": 2.3769941288261156e-07, + "loss": 0.959, "step": 23834 }, { - "epoch": 0.6754229363257672, + "epoch": 0.9325847092886768, "grad_norm": 0.0, - "learning_rate": 5.0350131420309265e-06, - "loss": 0.8427, + "learning_rate": 2.374248355585629e-07, + "loss": 0.9266, "step": 23835 }, { - "epoch": 0.6754512737680297, + "epoch": 0.9326238359809061, "grad_norm": 0.0, - "learning_rate": 5.034216487468551e-06, - "loss": 0.8815, + "learning_rate": 2.3715041500923853e-07, + "loss": 0.8086, "step": 23836 }, { - "epoch": 0.6754796112102922, + "epoch": 0.9326629626731356, "grad_norm": 0.0, - "learning_rate": 5.033419874734816e-06, - "loss": 0.8548, + "learning_rate": 2.3687615123904495e-07, + "loss": 1.0641, "step": 23837 }, { - "epoch": 0.6755079486525546, + "epoch": 0.932702089365365, "grad_norm": 0.0, - "learning_rate": 5.032623303836428e-06, - "loss": 0.813, + "learning_rate": 2.3660204425238754e-07, + "loss": 0.9399, "step": 23838 }, { - "epoch": 0.6755362860948171, + "epoch": 0.9327412160575945, "grad_norm": 0.0, - "learning_rate": 5.031826774780098e-06, - "loss": 0.743, + "learning_rate": 2.3632809405366387e-07, + "loss": 0.9348, "step": 23839 }, { - "epoch": 0.6755646235370796, + "epoch": 0.9327803427498239, "grad_norm": 0.0, - "learning_rate": 5.031030287572538e-06, - "loss": 0.8103, + "learning_rate": 2.360543006472793e-07, + "loss": 0.9001, "step": 23840 }, { - "epoch": 0.675592960979342, + "epoch": 0.9328194694420534, "grad_norm": 0.0, - "learning_rate": 5.030233842220453e-06, - "loss": 0.8505, + "learning_rate": 2.3578066403762701e-07, + "loss": 0.9695, "step": 23841 }, { - "epoch": 0.6756212984216045, + "epoch": 0.9328585961342828, "grad_norm": 0.0, - "learning_rate": 5.029437438730558e-06, - "loss": 0.8787, + "learning_rate": 2.3550718422910235e-07, + "loss": 0.9891, "step": 23842 }, { - "epoch": 0.675649635863867, + "epoch": 0.9328977228265123, "grad_norm": 0.0, - "learning_rate": 5.028641077109553e-06, - "loss": 0.8056, + "learning_rate": 2.3523386122609514e-07, + "loss": 0.9372, "step": 23843 }, { - "epoch": 0.6756779733061294, + "epoch": 0.9329368495187417, "grad_norm": 0.0, - "learning_rate": 5.02784475736415e-06, - "loss": 0.775, + "learning_rate": 2.3496069503299745e-07, + "loss": 1.0427, "step": 23844 }, { - "epoch": 0.6757063107483918, + "epoch": 0.9329759762109712, "grad_norm": 0.0, - "learning_rate": 5.0270484795010595e-06, - "loss": 0.7693, + "learning_rate": 2.3468768565419463e-07, + "loss": 0.9077, "step": 23845 }, { - "epoch": 0.6757346481906543, + "epoch": 0.9330151029032006, "grad_norm": 0.0, - "learning_rate": 5.026252243526984e-06, - "loss": 0.8539, + "learning_rate": 2.344148330940721e-07, + "loss": 0.8845, "step": 23846 }, { - "epoch": 0.6757629856329168, + "epoch": 0.93305422959543, "grad_norm": 0.0, - "learning_rate": 5.025456049448634e-06, - "loss": 0.8804, + "learning_rate": 2.3414213735700853e-07, + "loss": 0.9665, "step": 23847 }, { - "epoch": 0.6757913230751792, + "epoch": 0.9330933562876594, "grad_norm": 0.0, - "learning_rate": 5.0246598972727125e-06, - "loss": 0.7795, + "learning_rate": 2.3386959844738598e-07, + "loss": 0.9044, "step": 23848 }, { - "epoch": 0.6758196605174417, + "epoch": 0.9331324829798889, "grad_norm": 0.0, - "learning_rate": 5.0238637870059296e-06, - "loss": 0.7794, + "learning_rate": 2.3359721636957878e-07, + "loss": 0.9747, "step": 23849 }, { - "epoch": 0.6758479979597042, + "epoch": 0.9331716096721183, "grad_norm": 0.0, - "learning_rate": 5.023067718654994e-06, - "loss": 0.8549, + "learning_rate": 2.3332499112796447e-07, + "loss": 0.8276, "step": 23850 }, { - "epoch": 0.6758763354019666, + "epoch": 0.9332107363643477, "grad_norm": 0.0, - "learning_rate": 5.022271692226602e-06, - "loss": 0.793, + "learning_rate": 2.330529227269107e-07, + "loss": 1.0239, "step": 23851 }, { - "epoch": 0.6759046728442291, + "epoch": 0.9332498630565772, "grad_norm": 0.0, - "learning_rate": 5.021475707727466e-06, - "loss": 0.7927, + "learning_rate": 2.3278101117078843e-07, + "loss": 0.9498, "step": 23852 }, { - "epoch": 0.6759330102864916, + "epoch": 0.9332889897488066, "grad_norm": 0.0, - "learning_rate": 5.02067976516429e-06, - "loss": 0.8199, + "learning_rate": 2.3250925646396416e-07, + "loss": 0.9126, "step": 23853 }, { - "epoch": 0.675961347728754, + "epoch": 0.9333281164410361, "grad_norm": 0.0, - "learning_rate": 5.019883864543776e-06, - "loss": 0.9016, + "learning_rate": 2.3223765861080218e-07, + "loss": 0.8869, "step": 23854 }, { - "epoch": 0.6759896851710164, + "epoch": 0.9333672431332655, "grad_norm": 0.0, - "learning_rate": 5.019088005872632e-06, - "loss": 0.7827, + "learning_rate": 2.3196621761566452e-07, + "loss": 0.8913, "step": 23855 }, { - "epoch": 0.6760180226132789, + "epoch": 0.933406369825495, "grad_norm": 0.0, - "learning_rate": 5.018292189157555e-06, - "loss": 0.8128, + "learning_rate": 2.3169493348290884e-07, + "loss": 0.8504, "step": 23856 }, { - "epoch": 0.6760463600555414, + "epoch": 0.9334454965177243, "grad_norm": 0.0, - "learning_rate": 5.017496414405254e-06, - "loss": 0.8806, + "learning_rate": 2.3142380621689052e-07, + "loss": 1.0777, "step": 23857 }, { - "epoch": 0.6760746974978038, + "epoch": 0.9334846232099538, "grad_norm": 0.0, - "learning_rate": 5.016700681622434e-06, - "loss": 0.8531, + "learning_rate": 2.3115283582196725e-07, + "loss": 0.8984, "step": 23858 }, { - "epoch": 0.6761030349400663, + "epoch": 0.9335237499021832, "grad_norm": 0.0, - "learning_rate": 5.015904990815792e-06, - "loss": 0.8354, + "learning_rate": 2.3088202230248768e-07, + "loss": 1.0955, "step": 23859 }, { - "epoch": 0.6761313723823288, + "epoch": 0.9335628765944127, "grad_norm": 0.0, - "learning_rate": 5.015109341992032e-06, - "loss": 0.8177, + "learning_rate": 2.3061136566280175e-07, + "loss": 0.9867, "step": 23860 }, { - "epoch": 0.6761597098245913, + "epoch": 0.9336020032866421, "grad_norm": 0.0, - "learning_rate": 5.014313735157856e-06, - "loss": 0.8887, + "learning_rate": 2.3034086590725368e-07, + "loss": 1.0365, "step": 23861 }, { - "epoch": 0.6761880472668537, + "epoch": 0.9336411299788716, "grad_norm": 0.0, - "learning_rate": 5.013518170319968e-06, - "loss": 0.866, + "learning_rate": 2.3007052304019228e-07, + "loss": 1.0078, "step": 23862 }, { - "epoch": 0.6762163847091162, + "epoch": 0.933680256671101, "grad_norm": 0.0, - "learning_rate": 5.012722647485073e-06, - "loss": 0.9042, + "learning_rate": 2.2980033706595407e-07, + "loss": 1.0023, "step": 23863 }, { - "epoch": 0.6762447221513787, + "epoch": 0.9337193833633305, "grad_norm": 0.0, - "learning_rate": 5.011927166659862e-06, - "loss": 0.8563, + "learning_rate": 2.2953030798888e-07, + "loss": 1.0352, "step": 23864 }, { - "epoch": 0.676273059593641, + "epoch": 0.9337585100555599, "grad_norm": 0.0, - "learning_rate": 5.011131727851042e-06, - "loss": 0.7808, + "learning_rate": 2.292604358133066e-07, + "loss": 0.9934, "step": 23865 }, { - "epoch": 0.6763013970359035, + "epoch": 0.9337976367477894, "grad_norm": 0.0, - "learning_rate": 5.0103363310653154e-06, - "loss": 0.8464, + "learning_rate": 2.2899072054356598e-07, + "loss": 0.9238, "step": 23866 }, { - "epoch": 0.676329734478166, + "epoch": 0.9338367634400188, "grad_norm": 0.0, - "learning_rate": 5.0095409763093725e-06, - "loss": 0.8112, + "learning_rate": 2.287211621839913e-07, + "loss": 0.871, "step": 23867 }, { - "epoch": 0.6763580719204285, + "epoch": 0.9338758901322483, "grad_norm": 0.0, - "learning_rate": 5.008745663589922e-06, - "loss": 0.7828, + "learning_rate": 2.284517607389114e-07, + "loss": 0.861, "step": 23868 }, { - "epoch": 0.6763864093626909, + "epoch": 0.9339150168244776, "grad_norm": 0.0, - "learning_rate": 5.007950392913663e-06, - "loss": 0.8453, + "learning_rate": 2.2818251621265052e-07, + "loss": 0.8417, "step": 23869 }, { - "epoch": 0.6764147468049534, + "epoch": 0.9339541435167071, "grad_norm": 0.0, - "learning_rate": 5.007155164287289e-06, - "loss": 0.8742, + "learning_rate": 2.2791342860953415e-07, + "loss": 0.89, "step": 23870 }, { - "epoch": 0.6764430842472159, + "epoch": 0.9339932702089365, "grad_norm": 0.0, - "learning_rate": 5.006359977717503e-06, - "loss": 0.9038, + "learning_rate": 2.2764449793388432e-07, + "loss": 0.9999, "step": 23871 }, { - "epoch": 0.6764714216894783, + "epoch": 0.934032396901166, "grad_norm": 0.0, - "learning_rate": 5.005564833210998e-06, - "loss": 0.7279, + "learning_rate": 2.2737572419001653e-07, + "loss": 1.007, "step": 23872 }, { - "epoch": 0.6764997591317408, + "epoch": 0.9340715235933954, "grad_norm": 0.0, - "learning_rate": 5.004769730774476e-06, - "loss": 0.8173, + "learning_rate": 2.271071073822495e-07, + "loss": 0.8522, "step": 23873 }, { - "epoch": 0.6765280965740033, + "epoch": 0.9341106502856249, "grad_norm": 0.0, - "learning_rate": 5.003974670414633e-06, - "loss": 0.6814, + "learning_rate": 2.2683864751489537e-07, + "loss": 1.0494, "step": 23874 }, { - "epoch": 0.6765564340162656, + "epoch": 0.9341497769778543, "grad_norm": 0.0, - "learning_rate": 5.003179652138166e-06, - "loss": 0.7803, + "learning_rate": 2.2657034459226735e-07, + "loss": 0.9505, "step": 23875 }, { - "epoch": 0.6765847714585281, + "epoch": 0.9341889036700838, "grad_norm": 0.0, - "learning_rate": 5.002384675951777e-06, - "loss": 0.8645, + "learning_rate": 2.263021986186731e-07, + "loss": 0.9464, "step": 23876 }, { - "epoch": 0.6766131089007906, + "epoch": 0.9342280303623132, "grad_norm": 0.0, - "learning_rate": 5.001589741862153e-06, - "loss": 0.8442, + "learning_rate": 2.2603420959841805e-07, + "loss": 0.9713, "step": 23877 }, { - "epoch": 0.6766414463430531, + "epoch": 0.9342671570545427, "grad_norm": 0.0, - "learning_rate": 5.0007948498759954e-06, - "loss": 0.8901, + "learning_rate": 2.2576637753580544e-07, + "loss": 1.0467, "step": 23878 }, { - "epoch": 0.6766697837853155, + "epoch": 0.934306283746772, "grad_norm": 0.0, - "learning_rate": 5.000000000000003e-06, - "loss": 0.8893, + "learning_rate": 2.2549870243513737e-07, + "loss": 0.8035, "step": 23879 }, { - "epoch": 0.676698121227578, + "epoch": 0.9343454104390014, "grad_norm": 0.0, - "learning_rate": 4.999205192240863e-06, - "loss": 0.8224, + "learning_rate": 2.2523118430071157e-07, + "loss": 1.0486, "step": 23880 }, { - "epoch": 0.6767264586698405, + "epoch": 0.9343845371312309, "grad_norm": 0.0, - "learning_rate": 4.998410426605274e-06, - "loss": 0.9288, + "learning_rate": 2.2496382313682562e-07, + "loss": 0.9953, "step": 23881 }, { - "epoch": 0.6767547961121029, + "epoch": 0.9344236638234603, "grad_norm": 0.0, - "learning_rate": 4.9976157030999365e-06, - "loss": 0.855, + "learning_rate": 2.246966189477706e-07, + "loss": 0.975, "step": 23882 }, { - "epoch": 0.6767831335543654, + "epoch": 0.9344627905156898, "grad_norm": 0.0, - "learning_rate": 4.996821021731535e-06, - "loss": 0.7868, + "learning_rate": 2.2442957173783863e-07, + "loss": 1.0256, "step": 23883 }, { - "epoch": 0.6768114709966279, + "epoch": 0.9345019172079192, "grad_norm": 0.0, - "learning_rate": 4.9960263825067725e-06, - "loss": 0.7689, + "learning_rate": 2.2416268151131848e-07, + "loss": 0.8993, "step": 23884 }, { - "epoch": 0.6768398084388904, + "epoch": 0.9345410439001487, "grad_norm": 0.0, - "learning_rate": 4.995231785432333e-06, - "loss": 0.8869, + "learning_rate": 2.2389594827249784e-07, + "loss": 1.0125, "step": 23885 }, { - "epoch": 0.6768681458811527, + "epoch": 0.9345801705923781, "grad_norm": 0.0, - "learning_rate": 4.994437230514916e-06, - "loss": 0.6864, + "learning_rate": 2.2362937202565549e-07, + "loss": 0.9681, "step": 23886 }, { - "epoch": 0.6768964833234152, + "epoch": 0.9346192972846076, "grad_norm": 0.0, - "learning_rate": 4.993642717761211e-06, - "loss": 0.8025, + "learning_rate": 2.2336295277507691e-07, + "loss": 0.9852, "step": 23887 }, { - "epoch": 0.6769248207656777, + "epoch": 0.934658423976837, "grad_norm": 0.0, - "learning_rate": 4.992848247177913e-06, - "loss": 0.8622, + "learning_rate": 2.2309669052503646e-07, + "loss": 0.9352, "step": 23888 }, { - "epoch": 0.6769531582079401, + "epoch": 0.9346975506690665, "grad_norm": 0.0, - "learning_rate": 4.992053818771715e-06, - "loss": 0.8494, + "learning_rate": 2.2283058527981405e-07, + "loss": 1.0909, "step": 23889 }, { - "epoch": 0.6769814956502026, + "epoch": 0.9347366773612958, "grad_norm": 0.0, - "learning_rate": 4.99125943254931e-06, - "loss": 0.9398, + "learning_rate": 2.225646370436796e-07, + "loss": 0.8703, "step": 23890 }, { - "epoch": 0.6770098330924651, + "epoch": 0.9347758040535253, "grad_norm": 0.0, - "learning_rate": 4.990465088517384e-06, - "loss": 0.9159, + "learning_rate": 2.2229884582090634e-07, + "loss": 0.9474, "step": 23891 }, { - "epoch": 0.6770381705347276, + "epoch": 0.9348149307457547, "grad_norm": 0.0, - "learning_rate": 4.989670786682634e-06, - "loss": 0.7686, + "learning_rate": 2.2203321161575974e-07, + "loss": 0.9508, "step": 23892 }, { - "epoch": 0.67706650797699, + "epoch": 0.9348540574379842, "grad_norm": 0.0, - "learning_rate": 4.988876527051743e-06, - "loss": 0.7874, + "learning_rate": 2.2176773443250865e-07, + "loss": 0.9914, "step": 23893 }, { - "epoch": 0.6770948454192525, + "epoch": 0.9348931841302136, "grad_norm": 0.0, - "learning_rate": 4.988082309631407e-06, - "loss": 0.9088, + "learning_rate": 2.2150241427541518e-07, + "loss": 1.0222, "step": 23894 }, { - "epoch": 0.677123182861515, + "epoch": 0.9349323108224431, "grad_norm": 0.0, - "learning_rate": 4.987288134428318e-06, - "loss": 0.7418, + "learning_rate": 2.2123725114873927e-07, + "loss": 0.9085, "step": 23895 }, { - "epoch": 0.6771515203037773, + "epoch": 0.9349714375146725, "grad_norm": 0.0, - "learning_rate": 4.986494001449159e-06, - "loss": 0.8119, + "learning_rate": 2.2097224505673976e-07, + "loss": 0.9819, "step": 23896 }, { - "epoch": 0.6771798577460398, + "epoch": 0.935010564206902, "grad_norm": 0.0, - "learning_rate": 4.985699910700622e-06, - "loss": 0.8231, + "learning_rate": 2.2070739600367207e-07, + "loss": 0.9702, "step": 23897 }, { - "epoch": 0.6772081951883023, + "epoch": 0.9350496908991314, "grad_norm": 0.0, - "learning_rate": 4.984905862189402e-06, - "loss": 0.856, + "learning_rate": 2.204427039937884e-07, + "loss": 0.9219, "step": 23898 }, { - "epoch": 0.6772365326305647, + "epoch": 0.9350888175913609, "grad_norm": 0.0, - "learning_rate": 4.984111855922177e-06, - "loss": 0.7423, + "learning_rate": 2.2017816903134314e-07, + "loss": 0.9107, "step": 23899 }, { - "epoch": 0.6772648700728272, + "epoch": 0.9351279442835903, "grad_norm": 0.0, - "learning_rate": 4.98331789190564e-06, - "loss": 0.8306, + "learning_rate": 2.1991379112057954e-07, + "loss": 0.9053, "step": 23900 }, { - "epoch": 0.6772932075150897, + "epoch": 0.9351670709758197, "grad_norm": 0.0, - "learning_rate": 4.982523970146481e-06, - "loss": 0.7983, + "learning_rate": 2.196495702657453e-07, + "loss": 0.8531, "step": 23901 }, { - "epoch": 0.6773215449573522, + "epoch": 0.9352061976680491, "grad_norm": 0.0, - "learning_rate": 4.981730090651384e-06, - "loss": 0.8806, + "learning_rate": 2.193855064710837e-07, + "loss": 1.1429, "step": 23902 }, { - "epoch": 0.6773498823996146, + "epoch": 0.9352453243602786, "grad_norm": 0.0, - "learning_rate": 4.980936253427044e-06, - "loss": 0.8526, + "learning_rate": 2.191215997408358e-07, + "loss": 0.9218, "step": 23903 }, { - "epoch": 0.6773782198418771, + "epoch": 0.935284451052508, "grad_norm": 0.0, - "learning_rate": 4.980142458480136e-06, - "loss": 1.0063, + "learning_rate": 2.188578500792382e-07, + "loss": 1.0553, "step": 23904 }, { - "epoch": 0.6774065572841396, + "epoch": 0.9353235777447375, "grad_norm": 0.0, - "learning_rate": 4.979348705817356e-06, - "loss": 0.8943, + "learning_rate": 2.1859425749052865e-07, + "loss": 0.9672, "step": 23905 }, { - "epoch": 0.677434894726402, + "epoch": 0.9353627044369669, "grad_norm": 0.0, - "learning_rate": 4.978554995445384e-06, - "loss": 0.9427, + "learning_rate": 2.1833082197893595e-07, + "loss": 0.9034, "step": 23906 }, { - "epoch": 0.6774632321686644, + "epoch": 0.9354018311291964, "grad_norm": 0.0, - "learning_rate": 4.977761327370907e-06, - "loss": 0.9232, + "learning_rate": 2.1806754354869452e-07, + "loss": 1.0, "step": 23907 }, { - "epoch": 0.6774915696109269, + "epoch": 0.9354409578214258, "grad_norm": 0.0, - "learning_rate": 4.9769677016006145e-06, - "loss": 0.9119, + "learning_rate": 2.1780442220403097e-07, + "loss": 0.8711, "step": 23908 }, { - "epoch": 0.6775199070531894, + "epoch": 0.9354800845136552, "grad_norm": 0.0, - "learning_rate": 4.9761741181411845e-06, - "loss": 0.6942, + "learning_rate": 2.1754145794917082e-07, + "loss": 0.8305, "step": 23909 }, { - "epoch": 0.6775482444954518, + "epoch": 0.9355192112058847, "grad_norm": 0.0, - "learning_rate": 4.975380576999307e-06, - "loss": 0.8709, + "learning_rate": 2.172786507883362e-07, + "loss": 0.9694, "step": 23910 }, { - "epoch": 0.6775765819377143, + "epoch": 0.935558337898114, "grad_norm": 0.0, - "learning_rate": 4.974587078181667e-06, - "loss": 0.845, + "learning_rate": 2.1701600072574825e-07, + "loss": 0.9819, "step": 23911 }, { - "epoch": 0.6776049193799768, + "epoch": 0.9355974645903435, "grad_norm": 0.0, - "learning_rate": 4.973793621694943e-06, - "loss": 0.8713, + "learning_rate": 2.1675350776562466e-07, + "loss": 1.0279, "step": 23912 }, { - "epoch": 0.6776332568222392, + "epoch": 0.9356365912825729, "grad_norm": 0.0, - "learning_rate": 4.973000207545821e-06, - "loss": 0.8066, + "learning_rate": 2.1649117191218094e-07, + "loss": 0.9753, "step": 23913 }, { - "epoch": 0.6776615942645017, + "epoch": 0.9356757179748024, "grad_norm": 0.0, - "learning_rate": 4.972206835740986e-06, - "loss": 0.8007, + "learning_rate": 2.162289931696282e-07, + "loss": 0.9375, "step": 23914 }, { - "epoch": 0.6776899317067642, + "epoch": 0.9357148446670318, "grad_norm": 0.0, - "learning_rate": 4.97141350628712e-06, - "loss": 0.7807, + "learning_rate": 2.1596697154217972e-07, + "loss": 0.9138, "step": 23915 }, { - "epoch": 0.6777182691490267, + "epoch": 0.9357539713592613, "grad_norm": 0.0, - "learning_rate": 4.970620219190908e-06, - "loss": 0.7748, + "learning_rate": 2.15705107034041e-07, + "loss": 1.0131, "step": 23916 }, { - "epoch": 0.677746606591289, + "epoch": 0.9357930980514907, "grad_norm": 0.0, - "learning_rate": 4.969826974459027e-06, - "loss": 0.8939, + "learning_rate": 2.1544339964941762e-07, + "loss": 0.9243, "step": 23917 }, { - "epoch": 0.6777749440335515, + "epoch": 0.9358322247437202, "grad_norm": 0.0, - "learning_rate": 4.96903377209816e-06, - "loss": 0.8871, + "learning_rate": 2.1518184939251174e-07, + "loss": 1.0691, "step": 23918 }, { - "epoch": 0.677803281475814, + "epoch": 0.9358713514359496, "grad_norm": 0.0, - "learning_rate": 4.968240612114995e-06, - "loss": 0.7778, + "learning_rate": 2.1492045626752554e-07, + "loss": 0.9746, "step": 23919 }, { - "epoch": 0.6778316189180764, + "epoch": 0.9359104781281791, "grad_norm": 0.0, - "learning_rate": 4.967447494516203e-06, - "loss": 0.881, + "learning_rate": 2.146592202786557e-07, + "loss": 0.9454, "step": 23920 }, { - "epoch": 0.6778599563603389, + "epoch": 0.9359496048204085, "grad_norm": 0.0, - "learning_rate": 4.9666544193084735e-06, - "loss": 0.7727, + "learning_rate": 2.1439814143009553e-07, + "loss": 1.0503, "step": 23921 }, { - "epoch": 0.6778882938026014, + "epoch": 0.935988731512638, "grad_norm": 0.0, - "learning_rate": 4.965861386498479e-06, - "loss": 0.8541, + "learning_rate": 2.1413721972604052e-07, + "loss": 0.9406, "step": 23922 }, { - "epoch": 0.6779166312448638, + "epoch": 0.9360278582048673, "grad_norm": 0.0, - "learning_rate": 4.965068396092904e-06, - "loss": 0.8653, + "learning_rate": 2.1387645517067845e-07, + "loss": 0.9565, "step": 23923 }, { - "epoch": 0.6779449686871263, + "epoch": 0.9360669848970968, "grad_norm": 0.0, - "learning_rate": 4.964275448098427e-06, - "loss": 0.9411, + "learning_rate": 2.1361584776819933e-07, + "loss": 0.8121, "step": 23924 }, { - "epoch": 0.6779733061293888, + "epoch": 0.9361061115893262, "grad_norm": 0.0, - "learning_rate": 4.963482542521728e-06, - "loss": 0.967, + "learning_rate": 2.1335539752278532e-07, + "loss": 0.8664, "step": 23925 }, { - "epoch": 0.6780016435716513, + "epoch": 0.9361452382815557, "grad_norm": 0.0, - "learning_rate": 4.9626896793694905e-06, - "loss": 0.7994, + "learning_rate": 2.1309510443862092e-07, + "loss": 1.0511, "step": 23926 }, { - "epoch": 0.6780299810139137, + "epoch": 0.9361843649737851, "grad_norm": 0.0, - "learning_rate": 4.961896858648383e-06, - "loss": 0.9133, + "learning_rate": 2.1283496851988495e-07, + "loss": 0.8785, "step": 23927 }, { - "epoch": 0.6780583184561761, + "epoch": 0.9362234916660146, "grad_norm": 0.0, - "learning_rate": 4.96110408036509e-06, - "loss": 0.8373, + "learning_rate": 2.125749897707552e-07, + "loss": 0.9402, "step": 23928 }, { - "epoch": 0.6780866558984386, + "epoch": 0.936262618358244, "grad_norm": 0.0, - "learning_rate": 4.9603113445262915e-06, - "loss": 0.7892, + "learning_rate": 2.123151681954072e-07, + "loss": 0.9164, "step": 23929 }, { - "epoch": 0.678114993340701, + "epoch": 0.9363017450504735, "grad_norm": 0.0, - "learning_rate": 4.95951865113866e-06, - "loss": 0.751, + "learning_rate": 2.1205550379801322e-07, + "loss": 0.908, "step": 23930 }, { - "epoch": 0.6781433307829635, + "epoch": 0.9363408717427029, "grad_norm": 0.0, - "learning_rate": 4.958726000208872e-06, - "loss": 0.7974, + "learning_rate": 2.1179599658274208e-07, + "loss": 1.0064, "step": 23931 }, { - "epoch": 0.678171668225226, + "epoch": 0.9363799984349324, "grad_norm": 0.0, - "learning_rate": 4.957933391743614e-06, - "loss": 0.9037, + "learning_rate": 2.115366465537627e-07, + "loss": 1.1729, "step": 23932 }, { - "epoch": 0.6782000056674885, + "epoch": 0.9364191251271617, "grad_norm": 0.0, - "learning_rate": 4.957140825749549e-06, - "loss": 0.8177, + "learning_rate": 2.1127745371523845e-07, + "loss": 0.8987, "step": 23933 }, { - "epoch": 0.6782283431097509, + "epoch": 0.9364582518193912, "grad_norm": 0.0, - "learning_rate": 4.956348302233364e-06, - "loss": 0.8484, + "learning_rate": 2.110184180713326e-07, + "loss": 0.9733, "step": 23934 }, { - "epoch": 0.6782566805520134, + "epoch": 0.9364973785116206, "grad_norm": 0.0, - "learning_rate": 4.955555821201726e-06, - "loss": 0.7705, + "learning_rate": 2.1075953962620522e-07, + "loss": 1.0069, "step": 23935 }, { - "epoch": 0.6782850179942759, + "epoch": 0.93653650520385, "grad_norm": 0.0, - "learning_rate": 4.954763382661315e-06, - "loss": 0.8147, + "learning_rate": 2.1050081838401294e-07, + "loss": 1.0276, "step": 23936 }, { - "epoch": 0.6783133554365383, + "epoch": 0.9365756318960795, "grad_norm": 0.0, - "learning_rate": 4.953970986618806e-06, - "loss": 0.9164, + "learning_rate": 2.102422543489091e-07, + "loss": 0.9709, "step": 23937 }, { - "epoch": 0.6783416928788008, + "epoch": 0.9366147585883089, "grad_norm": 0.0, - "learning_rate": 4.9531786330808715e-06, - "loss": 0.9014, + "learning_rate": 2.099838475250493e-07, + "loss": 0.7798, "step": 23938 }, { - "epoch": 0.6783700303210632, + "epoch": 0.9366538852805384, "grad_norm": 0.0, - "learning_rate": 4.952386322054189e-06, - "loss": 0.8257, + "learning_rate": 2.097255979165802e-07, + "loss": 0.9615, "step": 23939 }, { - "epoch": 0.6783983677633256, + "epoch": 0.9366930119727678, "grad_norm": 0.0, - "learning_rate": 4.951594053545435e-06, - "loss": 0.8479, + "learning_rate": 2.0946750552765072e-07, + "loss": 0.9855, "step": 23940 }, { - "epoch": 0.6784267052055881, + "epoch": 0.9367321386649973, "grad_norm": 0.0, - "learning_rate": 4.950801827561274e-06, - "loss": 0.7943, + "learning_rate": 2.092095703624042e-07, + "loss": 0.9565, "step": 23941 }, { - "epoch": 0.6784550426478506, + "epoch": 0.9367712653572267, "grad_norm": 0.0, - "learning_rate": 4.950009644108388e-06, - "loss": 0.8367, + "learning_rate": 2.0895179242498398e-07, + "loss": 1.1479, "step": 23942 }, { - "epoch": 0.6784833800901131, + "epoch": 0.9368103920494562, "grad_norm": 0.0, - "learning_rate": 4.949217503193443e-06, - "loss": 0.8725, + "learning_rate": 2.0869417171952899e-07, + "loss": 0.9514, "step": 23943 }, { - "epoch": 0.6785117175323755, + "epoch": 0.9368495187416855, "grad_norm": 0.0, - "learning_rate": 4.948425404823114e-06, - "loss": 0.811, + "learning_rate": 2.0843670825017703e-07, + "loss": 0.9838, "step": 23944 }, { - "epoch": 0.678540054974638, + "epoch": 0.936888645433915, "grad_norm": 0.0, - "learning_rate": 4.947633349004077e-06, - "loss": 0.7941, + "learning_rate": 2.0817940202106145e-07, + "loss": 0.9467, "step": 23945 }, { - "epoch": 0.6785683924169005, + "epoch": 0.9369277721261444, "grad_norm": 0.0, - "learning_rate": 4.946841335742998e-06, - "loss": 0.8074, + "learning_rate": 2.0792225303631452e-07, + "loss": 0.9683, "step": 23946 }, { - "epoch": 0.6785967298591629, + "epoch": 0.9369668988183739, "grad_norm": 0.0, - "learning_rate": 4.94604936504655e-06, - "loss": 0.7444, + "learning_rate": 2.0766526130006626e-07, + "loss": 0.8665, "step": 23947 }, { - "epoch": 0.6786250673014254, + "epoch": 0.9370060255106033, "grad_norm": 0.0, - "learning_rate": 4.945257436921409e-06, - "loss": 0.8504, + "learning_rate": 2.0740842681644334e-07, + "loss": 0.9456, "step": 23948 }, { - "epoch": 0.6786534047436878, + "epoch": 0.9370451522028328, "grad_norm": 0.0, - "learning_rate": 4.944465551374238e-06, - "loss": 0.8286, + "learning_rate": 2.0715174958957029e-07, + "loss": 0.9709, "step": 23949 }, { - "epoch": 0.6786817421859503, + "epoch": 0.9370842788950622, "grad_norm": 0.0, - "learning_rate": 4.94367370841171e-06, - "loss": 0.7104, + "learning_rate": 2.0689522962356933e-07, + "loss": 0.8823, "step": 23950 }, { - "epoch": 0.6787100796282127, + "epoch": 0.9371234055872917, "grad_norm": 0.0, - "learning_rate": 4.942881908040498e-06, - "loss": 0.8195, + "learning_rate": 2.066388669225594e-07, + "loss": 0.914, "step": 23951 }, { - "epoch": 0.6787384170704752, + "epoch": 0.9371625322795211, "grad_norm": 0.0, - "learning_rate": 4.942090150267268e-06, - "loss": 0.8861, + "learning_rate": 2.0638266149065722e-07, + "loss": 1.0077, "step": 23952 }, { - "epoch": 0.6787667545127377, + "epoch": 0.9372016589717506, "grad_norm": 0.0, - "learning_rate": 4.9412984350986945e-06, - "loss": 0.85, + "learning_rate": 2.0612661333197725e-07, + "loss": 0.9843, "step": 23953 }, { - "epoch": 0.6787950919550001, + "epoch": 0.93724078566398, "grad_norm": 0.0, - "learning_rate": 4.94050676254144e-06, - "loss": 0.7703, + "learning_rate": 2.0587072245063289e-07, + "loss": 0.9018, "step": 23954 }, { - "epoch": 0.6788234293972626, + "epoch": 0.9372799123562094, "grad_norm": 0.0, - "learning_rate": 4.939715132602178e-06, - "loss": 0.8552, + "learning_rate": 2.0561498885072973e-07, + "loss": 0.9023, "step": 23955 }, { - "epoch": 0.6788517668395251, + "epoch": 0.9373190390484388, "grad_norm": 0.0, - "learning_rate": 4.938923545287572e-06, - "loss": 0.8643, + "learning_rate": 2.0535941253637892e-07, + "loss": 0.9916, "step": 23956 }, { - "epoch": 0.6788801042817876, + "epoch": 0.9373581657406683, "grad_norm": 0.0, - "learning_rate": 4.938132000604293e-06, - "loss": 0.9381, + "learning_rate": 2.0510399351168165e-07, + "loss": 0.9143, "step": 23957 }, { - "epoch": 0.67890844172405, + "epoch": 0.9373972924328977, "grad_norm": 0.0, - "learning_rate": 4.9373404985590116e-06, - "loss": 0.8744, + "learning_rate": 2.0484873178074128e-07, + "loss": 0.8412, "step": 23958 }, { - "epoch": 0.6789367791663125, + "epoch": 0.9374364191251272, "grad_norm": 0.0, - "learning_rate": 4.936549039158386e-06, - "loss": 0.8323, + "learning_rate": 2.0459362734765454e-07, + "loss": 1.0188, "step": 23959 }, { - "epoch": 0.678965116608575, + "epoch": 0.9374755458173566, "grad_norm": 0.0, - "learning_rate": 4.935757622409089e-06, - "loss": 0.8203, + "learning_rate": 2.043386802165215e-07, + "loss": 1.0173, "step": 23960 }, { - "epoch": 0.6789934540508373, + "epoch": 0.9375146725095861, "grad_norm": 0.0, - "learning_rate": 4.934966248317789e-06, - "loss": 0.8016, + "learning_rate": 2.040838903914333e-07, + "loss": 0.821, "step": 23961 }, { - "epoch": 0.6790217914930998, + "epoch": 0.9375537992018155, "grad_norm": 0.0, - "learning_rate": 4.934174916891146e-06, - "loss": 0.7558, + "learning_rate": 2.0382925787648444e-07, + "loss": 1.0586, "step": 23962 }, { - "epoch": 0.6790501289353623, + "epoch": 0.937592925894045, "grad_norm": 0.0, - "learning_rate": 4.933383628135828e-06, - "loss": 0.7949, + "learning_rate": 2.0357478267576058e-07, + "loss": 0.9701, "step": 23963 }, { - "epoch": 0.6790784663776247, + "epoch": 0.9376320525862744, "grad_norm": 0.0, - "learning_rate": 4.932592382058503e-06, - "loss": 0.6873, + "learning_rate": 2.0332046479335066e-07, + "loss": 0.9232, "step": 23964 }, { - "epoch": 0.6791068038198872, + "epoch": 0.9376711792785037, "grad_norm": 0.0, - "learning_rate": 4.931801178665833e-06, - "loss": 0.9239, + "learning_rate": 2.0306630423333917e-07, + "loss": 1.0096, "step": 23965 }, { - "epoch": 0.6791351412621497, + "epoch": 0.9377103059707332, "grad_norm": 0.0, - "learning_rate": 4.931010017964487e-06, - "loss": 0.8102, + "learning_rate": 2.028123009998062e-07, + "loss": 0.9646, "step": 23966 }, { - "epoch": 0.6791634787044122, + "epoch": 0.9377494326629626, "grad_norm": 0.0, - "learning_rate": 4.930218899961123e-06, - "loss": 0.8685, + "learning_rate": 2.025584550968296e-07, + "loss": 0.8561, "step": 23967 }, { - "epoch": 0.6791918161466746, + "epoch": 0.9377885593551921, "grad_norm": 0.0, - "learning_rate": 4.929427824662408e-06, - "loss": 0.8658, + "learning_rate": 2.023047665284883e-07, + "loss": 0.9902, "step": 23968 }, { - "epoch": 0.6792201535889371, + "epoch": 0.9378276860474215, "grad_norm": 0.0, - "learning_rate": 4.9286367920750075e-06, - "loss": 0.7927, + "learning_rate": 2.0205123529885683e-07, + "loss": 0.8823, "step": 23969 }, { - "epoch": 0.6792484910311996, + "epoch": 0.937866812739651, "grad_norm": 0.0, - "learning_rate": 4.927845802205581e-06, - "loss": 0.9024, + "learning_rate": 2.017978614120031e-07, + "loss": 0.9662, "step": 23970 }, { - "epoch": 0.6792768284734619, + "epoch": 0.9379059394318804, "grad_norm": 0.0, - "learning_rate": 4.927054855060794e-06, - "loss": 0.9369, + "learning_rate": 2.0154464487199932e-07, + "loss": 1.0573, "step": 23971 }, { - "epoch": 0.6793051659157244, + "epoch": 0.9379450661241099, "grad_norm": 0.0, - "learning_rate": 4.9262639506473064e-06, - "loss": 0.8184, + "learning_rate": 2.0129158568290896e-07, + "loss": 1.0137, "step": 23972 }, { - "epoch": 0.6793335033579869, + "epoch": 0.9379841928163393, "grad_norm": 0.0, - "learning_rate": 4.925473088971781e-06, - "loss": 0.7938, + "learning_rate": 2.0103868384879765e-07, + "loss": 0.8624, "step": 23973 }, { - "epoch": 0.6793618408002494, + "epoch": 0.9380233195085688, "grad_norm": 0.0, - "learning_rate": 4.924682270040883e-06, - "loss": 0.7781, + "learning_rate": 2.0078593937372658e-07, + "loss": 1.0421, "step": 23974 }, { - "epoch": 0.6793901782425118, + "epoch": 0.9380624462007982, "grad_norm": 0.0, - "learning_rate": 4.923891493861268e-06, - "loss": 0.7449, + "learning_rate": 2.0053335226175475e-07, + "loss": 0.9154, "step": 23975 }, { - "epoch": 0.6794185156847743, + "epoch": 0.9381015728930276, "grad_norm": 0.0, - "learning_rate": 4.9231007604396e-06, - "loss": 0.818, + "learning_rate": 2.0028092251693664e-07, + "loss": 0.9083, "step": 23976 }, { - "epoch": 0.6794468531270368, + "epoch": 0.938140699585257, "grad_norm": 0.0, - "learning_rate": 4.9223100697825385e-06, - "loss": 0.8565, + "learning_rate": 2.0002865014332795e-07, + "loss": 0.8689, "step": 23977 }, { - "epoch": 0.6794751905692992, + "epoch": 0.9381798262774865, "grad_norm": 0.0, - "learning_rate": 4.921519421896746e-06, - "loss": 0.7773, + "learning_rate": 1.9977653514497764e-07, + "loss": 1.0157, "step": 23978 }, { - "epoch": 0.6795035280115617, + "epoch": 0.9382189529697159, "grad_norm": 0.0, - "learning_rate": 4.920728816788885e-06, - "loss": 0.8595, + "learning_rate": 1.99524577525938e-07, + "loss": 0.9165, "step": 23979 }, { - "epoch": 0.6795318654538242, + "epoch": 0.9382580796619454, "grad_norm": 0.0, - "learning_rate": 4.919938254465606e-06, - "loss": 0.843, + "learning_rate": 1.9927277729025251e-07, + "loss": 0.9064, "step": 23980 }, { - "epoch": 0.6795602028960867, + "epoch": 0.9382972063541748, "grad_norm": 0.0, - "learning_rate": 4.919147734933575e-06, - "loss": 0.8306, + "learning_rate": 1.9902113444196348e-07, + "loss": 1.0819, "step": 23981 }, { - "epoch": 0.679588540338349, + "epoch": 0.9383363330464043, "grad_norm": 0.0, - "learning_rate": 4.9183572581994525e-06, - "loss": 0.8082, + "learning_rate": 1.987696489851154e-07, + "loss": 0.7748, "step": 23982 }, { - "epoch": 0.6796168777806115, + "epoch": 0.9383754597386337, "grad_norm": 0.0, - "learning_rate": 4.91756682426989e-06, - "loss": 0.942, + "learning_rate": 1.9851832092374512e-07, + "loss": 0.9872, "step": 23983 }, { - "epoch": 0.679645215222874, + "epoch": 0.9384145864308632, "grad_norm": 0.0, - "learning_rate": 4.916776433151553e-06, - "loss": 0.7736, + "learning_rate": 1.9826715026188824e-07, + "loss": 0.9576, "step": 23984 }, { - "epoch": 0.6796735526651364, + "epoch": 0.9384537131230926, "grad_norm": 0.0, - "learning_rate": 4.9159860848510936e-06, - "loss": 0.8521, + "learning_rate": 1.9801613700357936e-07, + "loss": 0.9317, "step": 23985 }, { - "epoch": 0.6797018901073989, + "epoch": 0.938492839815322, "grad_norm": 0.0, - "learning_rate": 4.9151957793751695e-06, - "loss": 0.7866, + "learning_rate": 1.977652811528463e-07, + "loss": 0.9574, "step": 23986 }, { - "epoch": 0.6797302275496614, + "epoch": 0.9385319665075514, "grad_norm": 0.0, - "learning_rate": 4.914405516730444e-06, - "loss": 0.8261, + "learning_rate": 1.9751458271372259e-07, + "loss": 0.9349, "step": 23987 }, { - "epoch": 0.6797585649919238, + "epoch": 0.9385710931997809, "grad_norm": 0.0, - "learning_rate": 4.913615296923566e-06, - "loss": 0.8481, + "learning_rate": 1.972640416902305e-07, + "loss": 0.8804, "step": 23988 }, { - "epoch": 0.6797869024341863, + "epoch": 0.9386102198920103, "grad_norm": 0.0, - "learning_rate": 4.912825119961194e-06, - "loss": 0.8627, + "learning_rate": 1.970136580863946e-07, + "loss": 0.9687, "step": 23989 }, { - "epoch": 0.6798152398764488, + "epoch": 0.9386493465842398, "grad_norm": 0.0, - "learning_rate": 4.9120349858499864e-06, - "loss": 0.8191, + "learning_rate": 1.9676343190623505e-07, + "loss": 1.0075, "step": 23990 }, { - "epoch": 0.6798435773187113, + "epoch": 0.9386884732764692, "grad_norm": 0.0, - "learning_rate": 4.911244894596596e-06, - "loss": 0.8471, + "learning_rate": 1.9651336315377079e-07, + "loss": 0.945, "step": 23991 }, { - "epoch": 0.6798719147609736, + "epoch": 0.9387275999686987, "grad_norm": 0.0, - "learning_rate": 4.910454846207685e-06, - "loss": 0.7612, + "learning_rate": 1.9626345183301753e-07, + "loss": 0.9572, "step": 23992 }, { - "epoch": 0.6799002522032361, + "epoch": 0.9387667266609281, "grad_norm": 0.0, - "learning_rate": 4.9096648406898974e-06, - "loss": 0.7385, + "learning_rate": 1.9601369794798986e-07, + "loss": 0.9521, "step": 23993 }, { - "epoch": 0.6799285896454986, + "epoch": 0.9388058533531575, "grad_norm": 0.0, - "learning_rate": 4.908874878049894e-06, - "loss": 0.8247, + "learning_rate": 1.9576410150269566e-07, + "loss": 0.8964, "step": 23994 }, { - "epoch": 0.679956927087761, + "epoch": 0.938844980045387, "grad_norm": 0.0, - "learning_rate": 4.9080849582943324e-06, - "loss": 0.8939, + "learning_rate": 1.9551466250114393e-07, + "loss": 0.8199, "step": 23995 }, { - "epoch": 0.6799852645300235, + "epoch": 0.9388841067376164, "grad_norm": 0.0, - "learning_rate": 4.907295081429857e-06, - "loss": 0.9035, + "learning_rate": 1.952653809473415e-07, + "loss": 0.9801, "step": 23996 }, { - "epoch": 0.680013601972286, + "epoch": 0.9389232334298458, "grad_norm": 0.0, - "learning_rate": 4.906505247463127e-06, - "loss": 0.8488, + "learning_rate": 1.950162568452918e-07, + "loss": 0.9498, "step": 23997 }, { - "epoch": 0.6800419394145485, + "epoch": 0.9389623601220752, "grad_norm": 0.0, - "learning_rate": 4.905715456400798e-06, - "loss": 0.796, + "learning_rate": 1.94767290198995e-07, + "loss": 1.016, "step": 23998 }, { - "epoch": 0.6800702768568109, + "epoch": 0.9390014868143047, "grad_norm": 0.0, - "learning_rate": 4.904925708249516e-06, - "loss": 0.783, + "learning_rate": 1.9451848101244786e-07, + "loss": 0.8599, "step": 23999 }, { - "epoch": 0.6800986142990734, + "epoch": 0.9390406135065341, "grad_norm": 0.0, - "learning_rate": 4.90413600301594e-06, - "loss": 0.8371, + "learning_rate": 1.942698292896461e-07, + "loss": 0.9704, "step": 24000 }, { - "epoch": 0.6801269517413359, + "epoch": 0.9390797401987636, "grad_norm": 0.0, - "learning_rate": 4.903346340706716e-06, - "loss": 0.7429, + "learning_rate": 1.9402133503458543e-07, + "loss": 1.0367, "step": 24001 }, { - "epoch": 0.6801552891835982, + "epoch": 0.939118866890993, "grad_norm": 0.0, - "learning_rate": 4.9025567213284975e-06, - "loss": 0.8316, + "learning_rate": 1.9377299825125373e-07, + "loss": 0.9037, "step": 24002 }, { - "epoch": 0.6801836266258607, + "epoch": 0.9391579935832225, "grad_norm": 0.0, - "learning_rate": 4.901767144887937e-06, - "loss": 0.8022, + "learning_rate": 1.9352481894364117e-07, + "loss": 1.0016, "step": 24003 }, { - "epoch": 0.6802119640681232, + "epoch": 0.9391971202754519, "grad_norm": 0.0, - "learning_rate": 4.900977611391685e-06, - "loss": 0.8837, + "learning_rate": 1.932767971157301e-07, + "loss": 0.8828, "step": 24004 }, { - "epoch": 0.6802403015103857, + "epoch": 0.9392362469676814, "grad_norm": 0.0, - "learning_rate": 4.900188120846392e-06, - "loss": 0.8866, + "learning_rate": 1.9302893277150513e-07, + "loss": 0.8619, "step": 24005 }, { - "epoch": 0.6802686389526481, + "epoch": 0.9392753736599108, "grad_norm": 0.0, - "learning_rate": 4.899398673258712e-06, - "loss": 0.8366, + "learning_rate": 1.9278122591494753e-07, + "loss": 0.9809, "step": 24006 }, { - "epoch": 0.6802969763949106, + "epoch": 0.9393145003521403, "grad_norm": 0.0, - "learning_rate": 4.898609268635287e-06, - "loss": 0.9085, + "learning_rate": 1.9253367655003406e-07, + "loss": 0.8924, "step": 24007 }, { - "epoch": 0.6803253138371731, + "epoch": 0.9393536270443696, "grad_norm": 0.0, - "learning_rate": 4.897819906982775e-06, - "loss": 0.8561, + "learning_rate": 1.922862846807405e-07, + "loss": 0.9828, "step": 24008 }, { - "epoch": 0.6803536512794355, + "epoch": 0.9393927537365991, "grad_norm": 0.0, - "learning_rate": 4.897030588307816e-06, - "loss": 0.8448, + "learning_rate": 1.9203905031103808e-07, + "loss": 1.0045, "step": 24009 }, { - "epoch": 0.680381988721698, + "epoch": 0.9394318804288285, "grad_norm": 0.0, - "learning_rate": 4.896241312617064e-06, - "loss": 0.9072, + "learning_rate": 1.917919734449003e-07, + "loss": 0.8941, "step": 24010 }, { - "epoch": 0.6804103261639605, + "epoch": 0.939471007121058, "grad_norm": 0.0, - "learning_rate": 4.89545207991717e-06, - "loss": 0.8889, + "learning_rate": 1.9154505408629177e-07, + "loss": 1.0043, "step": 24011 }, { - "epoch": 0.6804386636062228, + "epoch": 0.9395101338132874, "grad_norm": 0.0, - "learning_rate": 4.894662890214775e-06, - "loss": 0.8458, + "learning_rate": 1.9129829223917928e-07, + "loss": 1.0169, "step": 24012 }, { - "epoch": 0.6804670010484853, + "epoch": 0.9395492605055169, "grad_norm": 0.0, - "learning_rate": 4.893873743516534e-06, - "loss": 0.8191, + "learning_rate": 1.9105168790752527e-07, + "loss": 0.8351, "step": 24013 }, { - "epoch": 0.6804953384907478, + "epoch": 0.9395883871977463, "grad_norm": 0.0, - "learning_rate": 4.8930846398290865e-06, - "loss": 0.9117, + "learning_rate": 1.9080524109529096e-07, + "loss": 1.0406, "step": 24014 }, { - "epoch": 0.6805236759330103, + "epoch": 0.9396275138899758, "grad_norm": 0.0, - "learning_rate": 4.8922955791590845e-06, - "loss": 0.7569, + "learning_rate": 1.9055895180643213e-07, + "loss": 1.1168, "step": 24015 }, { - "epoch": 0.6805520133752727, + "epoch": 0.9396666405822052, "grad_norm": 0.0, - "learning_rate": 4.891506561513172e-06, - "loss": 0.7976, + "learning_rate": 1.9031282004490447e-07, + "loss": 0.8687, "step": 24016 }, { - "epoch": 0.6805803508175352, + "epoch": 0.9397057672744347, "grad_norm": 0.0, - "learning_rate": 4.890717586897997e-06, - "loss": 0.8845, + "learning_rate": 1.900668458146593e-07, + "loss": 1.0547, "step": 24017 }, { - "epoch": 0.6806086882597977, + "epoch": 0.939744893966664, "grad_norm": 0.0, - "learning_rate": 4.889928655320206e-06, - "loss": 0.8996, + "learning_rate": 1.8982102911965006e-07, + "loss": 0.9464, "step": 24018 }, { - "epoch": 0.6806370257020601, + "epoch": 0.9397840206588935, "grad_norm": 0.0, - "learning_rate": 4.889139766786447e-06, - "loss": 0.8578, + "learning_rate": 1.8957536996382142e-07, + "loss": 0.9719, "step": 24019 }, { - "epoch": 0.6806653631443226, + "epoch": 0.9398231473511229, "grad_norm": 0.0, - "learning_rate": 4.888350921303358e-06, - "loss": 0.8021, + "learning_rate": 1.893298683511202e-07, + "loss": 0.7711, "step": 24020 }, { - "epoch": 0.6806937005865851, + "epoch": 0.9398622740433524, "grad_norm": 0.0, - "learning_rate": 4.887562118877591e-06, - "loss": 0.8662, + "learning_rate": 1.8908452428548663e-07, + "loss": 0.9162, "step": 24021 }, { - "epoch": 0.6807220380288476, + "epoch": 0.9399014007355818, "grad_norm": 0.0, - "learning_rate": 4.886773359515783e-06, - "loss": 0.8758, + "learning_rate": 1.8883933777086194e-07, + "loss": 0.8777, "step": 24022 }, { - "epoch": 0.68075037547111, + "epoch": 0.9399405274278112, "grad_norm": 0.0, - "learning_rate": 4.885984643224581e-06, - "loss": 0.6956, + "learning_rate": 1.8859430881118303e-07, + "loss": 0.9519, "step": 24023 }, { - "epoch": 0.6807787129133724, + "epoch": 0.9399796541200407, "grad_norm": 0.0, - "learning_rate": 4.885195970010634e-06, - "loss": 0.9027, + "learning_rate": 1.8834943741038668e-07, + "loss": 1.0175, "step": 24024 }, { - "epoch": 0.6808070503556349, + "epoch": 0.9400187808122701, "grad_norm": 0.0, - "learning_rate": 4.884407339880577e-06, - "loss": 0.8958, + "learning_rate": 1.8810472357240316e-07, + "loss": 1.1065, "step": 24025 }, { - "epoch": 0.6808353877978973, + "epoch": 0.9400579075044996, "grad_norm": 0.0, - "learning_rate": 4.883618752841056e-06, - "loss": 0.8293, + "learning_rate": 1.8786016730116152e-07, + "loss": 0.841, "step": 24026 }, { - "epoch": 0.6808637252401598, + "epoch": 0.940097034196729, "grad_norm": 0.0, - "learning_rate": 4.882830208898718e-06, - "loss": 0.8577, + "learning_rate": 1.8761576860059084e-07, + "loss": 0.878, "step": 24027 }, { - "epoch": 0.6808920626824223, + "epoch": 0.9401361608889585, "grad_norm": 0.0, - "learning_rate": 4.882041708060198e-06, - "loss": 0.8615, + "learning_rate": 1.8737152747461686e-07, + "loss": 0.9792, "step": 24028 }, { - "epoch": 0.6809204001246848, + "epoch": 0.9401752875811878, "grad_norm": 0.0, - "learning_rate": 4.881253250332141e-06, - "loss": 0.8814, + "learning_rate": 1.8712744392715864e-07, + "loss": 0.9541, "step": 24029 }, { - "epoch": 0.6809487375669472, + "epoch": 0.9402144142734173, "grad_norm": 0.0, - "learning_rate": 4.880464835721188e-06, - "loss": 0.8144, + "learning_rate": 1.8688351796213865e-07, + "loss": 1.0098, "step": 24030 }, { - "epoch": 0.6809770750092097, + "epoch": 0.9402535409656467, "grad_norm": 0.0, - "learning_rate": 4.879676464233982e-06, - "loss": 0.9545, + "learning_rate": 1.866397495834704e-07, + "loss": 0.9143, "step": 24031 }, { - "epoch": 0.6810054124514722, + "epoch": 0.9402926676578762, "grad_norm": 0.0, - "learning_rate": 4.878888135877166e-06, - "loss": 0.7251, + "learning_rate": 1.86396138795073e-07, + "loss": 0.9639, "step": 24032 }, { - "epoch": 0.6810337498937346, + "epoch": 0.9403317943501056, "grad_norm": 0.0, - "learning_rate": 4.878099850657373e-06, - "loss": 0.9009, + "learning_rate": 1.8615268560085665e-07, + "loss": 0.9178, "step": 24033 }, { - "epoch": 0.681062087335997, + "epoch": 0.9403709210423351, "grad_norm": 0.0, - "learning_rate": 4.877311608581246e-06, - "loss": 0.9192, + "learning_rate": 1.859093900047304e-07, + "loss": 0.9475, "step": 24034 }, { - "epoch": 0.6810904247782595, + "epoch": 0.9404100477345645, "grad_norm": 0.0, - "learning_rate": 4.87652340965543e-06, - "loss": 0.8029, + "learning_rate": 1.8566625201060007e-07, + "loss": 0.9142, "step": 24035 }, { - "epoch": 0.6811187622205219, + "epoch": 0.940449174426794, "grad_norm": 0.0, - "learning_rate": 4.875735253886557e-06, - "loss": 0.8564, + "learning_rate": 1.8542327162237361e-07, + "loss": 0.9472, "step": 24036 }, { - "epoch": 0.6811470996627844, + "epoch": 0.9404883011190234, "grad_norm": 0.0, - "learning_rate": 4.874947141281271e-06, - "loss": 0.7873, + "learning_rate": 1.8518044884395015e-07, + "loss": 0.8751, "step": 24037 }, { - "epoch": 0.6811754371050469, + "epoch": 0.9405274278112529, "grad_norm": 0.0, - "learning_rate": 4.874159071846206e-06, - "loss": 0.83, + "learning_rate": 1.8493778367923097e-07, + "loss": 0.9039, "step": 24038 }, { - "epoch": 0.6812037745473094, + "epoch": 0.9405665545034823, "grad_norm": 0.0, - "learning_rate": 4.873371045588002e-06, - "loss": 0.9092, + "learning_rate": 1.8469527613210969e-07, + "loss": 0.8618, "step": 24039 }, { - "epoch": 0.6812321119895718, + "epoch": 0.9406056811957118, "grad_norm": 0.0, - "learning_rate": 4.872583062513301e-06, - "loss": 0.8048, + "learning_rate": 1.8445292620648536e-07, + "loss": 0.9859, "step": 24040 }, { - "epoch": 0.6812604494318343, + "epoch": 0.9406448078879411, "grad_norm": 0.0, - "learning_rate": 4.871795122628733e-06, - "loss": 0.8793, + "learning_rate": 1.842107339062471e-07, + "loss": 1.0376, "step": 24041 }, { - "epoch": 0.6812887868740968, + "epoch": 0.9406839345801706, "grad_norm": 0.0, - "learning_rate": 4.87100722594094e-06, - "loss": 0.7502, + "learning_rate": 1.8396869923528404e-07, + "loss": 0.9941, "step": 24042 }, { - "epoch": 0.6813171243163592, + "epoch": 0.9407230612724, "grad_norm": 0.0, - "learning_rate": 4.8702193724565575e-06, - "loss": 0.8972, + "learning_rate": 1.8372682219748417e-07, + "loss": 1.047, "step": 24043 }, { - "epoch": 0.6813454617586217, + "epoch": 0.9407621879646295, "grad_norm": 0.0, - "learning_rate": 4.869431562182221e-06, - "loss": 0.893, + "learning_rate": 1.8348510279672994e-07, + "loss": 0.9922, "step": 24044 }, { - "epoch": 0.6813737992008841, + "epoch": 0.9408013146568589, "grad_norm": 0.0, - "learning_rate": 4.868643795124572e-06, - "loss": 0.8245, + "learning_rate": 1.8324354103690488e-07, + "loss": 0.9396, "step": 24045 }, { - "epoch": 0.6814021366431466, + "epoch": 0.9408404413490884, "grad_norm": 0.0, - "learning_rate": 4.8678560712902375e-06, - "loss": 0.8752, + "learning_rate": 1.8300213692188816e-07, + "loss": 1.0088, "step": 24046 }, { - "epoch": 0.681430474085409, + "epoch": 0.9408795680413178, "grad_norm": 0.0, - "learning_rate": 4.867068390685858e-06, - "loss": 0.8782, + "learning_rate": 1.8276089045555444e-07, + "loss": 1.0516, "step": 24047 }, { - "epoch": 0.6814588115276715, + "epoch": 0.9409186947335473, "grad_norm": 0.0, - "learning_rate": 4.866280753318071e-06, - "loss": 0.7127, + "learning_rate": 1.8251980164177952e-07, + "loss": 1.0099, "step": 24048 }, { - "epoch": 0.681487148969934, + "epoch": 0.9409578214257767, "grad_norm": 0.0, - "learning_rate": 4.865493159193504e-06, - "loss": 0.8276, + "learning_rate": 1.822788704844347e-07, + "loss": 1.0838, "step": 24049 }, { - "epoch": 0.6815154864121964, + "epoch": 0.940996948118006, "grad_norm": 0.0, - "learning_rate": 4.864705608318798e-06, - "loss": 0.9233, + "learning_rate": 1.8203809698738917e-07, + "loss": 0.9369, "step": 24050 }, { - "epoch": 0.6815438238544589, + "epoch": 0.9410360748102355, "grad_norm": 0.0, - "learning_rate": 4.863918100700581e-06, - "loss": 0.7449, + "learning_rate": 1.8179748115450868e-07, + "loss": 0.9176, "step": 24051 }, { - "epoch": 0.6815721612967214, + "epoch": 0.9410752015024649, "grad_norm": 0.0, - "learning_rate": 4.863130636345488e-06, - "loss": 0.8041, + "learning_rate": 1.8155702298965904e-07, + "loss": 0.9605, "step": 24052 }, { - "epoch": 0.6816004987389839, + "epoch": 0.9411143281946944, "grad_norm": 0.0, - "learning_rate": 4.862343215260157e-06, - "loss": 0.8294, + "learning_rate": 1.813167224966994e-07, + "loss": 0.9547, "step": 24053 }, { - "epoch": 0.6816288361812463, + "epoch": 0.9411534548869238, "grad_norm": 0.0, - "learning_rate": 4.861555837451213e-06, - "loss": 0.8535, + "learning_rate": 1.8107657967948889e-07, + "loss": 0.8102, "step": 24054 }, { - "epoch": 0.6816571736235087, + "epoch": 0.9411925815791533, "grad_norm": 0.0, - "learning_rate": 4.8607685029252924e-06, - "loss": 0.8114, + "learning_rate": 1.8083659454188663e-07, + "loss": 0.957, "step": 24055 }, { - "epoch": 0.6816855110657712, + "epoch": 0.9412317082713827, "grad_norm": 0.0, - "learning_rate": 4.859981211689028e-06, - "loss": 0.9002, + "learning_rate": 1.8059676708774289e-07, + "loss": 0.9097, "step": 24056 }, { - "epoch": 0.6817138485080336, + "epoch": 0.9412708349636122, "grad_norm": 0.0, - "learning_rate": 4.859193963749049e-06, - "loss": 0.8227, + "learning_rate": 1.8035709732091123e-07, + "loss": 1.0832, "step": 24057 }, { - "epoch": 0.6817421859502961, + "epoch": 0.9413099616558416, "grad_norm": 0.0, - "learning_rate": 4.858406759111993e-06, - "loss": 0.8291, + "learning_rate": 1.801175852452397e-07, + "loss": 0.9414, "step": 24058 }, { - "epoch": 0.6817705233925586, + "epoch": 0.9413490883480711, "grad_norm": 0.0, - "learning_rate": 4.8576195977844835e-06, - "loss": 0.9728, + "learning_rate": 1.7987823086457524e-07, + "loss": 0.8999, "step": 24059 }, { - "epoch": 0.681798860834821, + "epoch": 0.9413882150403005, "grad_norm": 0.0, - "learning_rate": 4.856832479773152e-06, - "loss": 0.7981, + "learning_rate": 1.796390341827603e-07, + "loss": 0.9777, "step": 24060 }, { - "epoch": 0.6818271982770835, + "epoch": 0.94142734173253, "grad_norm": 0.0, - "learning_rate": 4.856045405084634e-06, - "loss": 0.8779, + "learning_rate": 1.7939999520363849e-07, + "loss": 0.9825, "step": 24061 }, { - "epoch": 0.681855535719346, + "epoch": 0.9414664684247593, "grad_norm": 0.0, - "learning_rate": 4.855258373725554e-06, - "loss": 0.7804, + "learning_rate": 1.7916111393104452e-07, + "loss": 0.8714, "step": 24062 }, { - "epoch": 0.6818838731616085, + "epoch": 0.9415055951169888, "grad_norm": 0.0, - "learning_rate": 4.854471385702541e-06, - "loss": 0.8627, + "learning_rate": 1.7892239036881864e-07, + "loss": 1.0618, "step": 24063 }, { - "epoch": 0.6819122106038709, + "epoch": 0.9415447218092182, "grad_norm": 0.0, - "learning_rate": 4.853684441022231e-06, - "loss": 0.8733, + "learning_rate": 1.7868382452079224e-07, + "loss": 0.9474, "step": 24064 }, { - "epoch": 0.6819405480461334, + "epoch": 0.9415838485014477, "grad_norm": 0.0, - "learning_rate": 4.8528975396912435e-06, - "loss": 0.8572, + "learning_rate": 1.784454163907967e-07, + "loss": 0.9399, "step": 24065 }, { - "epoch": 0.6819688854883958, + "epoch": 0.9416229751936771, "grad_norm": 0.0, - "learning_rate": 4.852110681716215e-06, - "loss": 0.8568, + "learning_rate": 1.782071659826601e-07, + "loss": 0.8917, "step": 24066 }, { - "epoch": 0.6819972229306582, + "epoch": 0.9416621018859066, "grad_norm": 0.0, - "learning_rate": 4.8513238671037665e-06, - "loss": 0.8616, + "learning_rate": 1.7796907330021042e-07, + "loss": 0.9911, "step": 24067 }, { - "epoch": 0.6820255603729207, + "epoch": 0.941701228578136, "grad_norm": 0.0, - "learning_rate": 4.85053709586053e-06, - "loss": 0.7588, + "learning_rate": 1.7773113834726796e-07, + "loss": 0.8542, "step": 24068 }, { - "epoch": 0.6820538978151832, + "epoch": 0.9417403552703655, "grad_norm": 0.0, - "learning_rate": 4.84975036799313e-06, - "loss": 0.7938, + "learning_rate": 1.7749336112765747e-07, + "loss": 0.8665, "step": 24069 }, { - "epoch": 0.6820822352574457, + "epoch": 0.9417794819625949, "grad_norm": 0.0, - "learning_rate": 4.848963683508196e-06, - "loss": 0.8779, + "learning_rate": 1.7725574164519365e-07, + "loss": 0.9671, "step": 24070 }, { - "epoch": 0.6821105726997081, + "epoch": 0.9418186086548244, "grad_norm": 0.0, - "learning_rate": 4.8481770424123585e-06, - "loss": 0.9055, + "learning_rate": 1.770182799036946e-07, + "loss": 0.914, "step": 24071 }, { - "epoch": 0.6821389101419706, + "epoch": 0.9418577353470537, "grad_norm": 0.0, - "learning_rate": 4.847390444712234e-06, - "loss": 0.8816, + "learning_rate": 1.7678097590697275e-07, + "loss": 0.9966, "step": 24072 }, { - "epoch": 0.6821672475842331, + "epoch": 0.9418968620392832, "grad_norm": 0.0, - "learning_rate": 4.846603890414453e-06, - "loss": 0.7994, + "learning_rate": 1.7654382965884066e-07, + "loss": 0.8588, "step": 24073 }, { - "epoch": 0.6821955850264955, + "epoch": 0.9419359887315126, "grad_norm": 0.0, - "learning_rate": 4.845817379525646e-06, - "loss": 0.8929, + "learning_rate": 1.7630684116310416e-07, + "loss": 1.0259, "step": 24074 }, { - "epoch": 0.682223922468758, + "epoch": 0.9419751154237421, "grad_norm": 0.0, - "learning_rate": 4.845030912052428e-06, - "loss": 0.8902, + "learning_rate": 1.760700104235691e-07, + "loss": 1.1031, "step": 24075 }, { - "epoch": 0.6822522599110205, + "epoch": 0.9420142421159715, "grad_norm": 0.0, - "learning_rate": 4.8442444880014295e-06, - "loss": 0.8871, + "learning_rate": 1.7583333744404018e-07, + "loss": 0.8985, "step": 24076 }, { - "epoch": 0.682280597353283, + "epoch": 0.942053368808201, "grad_norm": 0.0, - "learning_rate": 4.843458107379278e-06, - "loss": 0.9139, + "learning_rate": 1.7559682222831887e-07, + "loss": 0.8401, "step": 24077 }, { - "epoch": 0.6823089347955453, + "epoch": 0.9420924955004304, "grad_norm": 0.0, - "learning_rate": 4.84267177019259e-06, - "loss": 0.8502, + "learning_rate": 1.7536046478019987e-07, + "loss": 1.0553, "step": 24078 }, { - "epoch": 0.6823372722378078, + "epoch": 0.9421316221926598, "grad_norm": 0.0, - "learning_rate": 4.841885476447996e-06, - "loss": 0.827, + "learning_rate": 1.7512426510348234e-07, + "loss": 1.0161, "step": 24079 }, { - "epoch": 0.6823656096800703, + "epoch": 0.9421707488848893, "grad_norm": 0.0, - "learning_rate": 4.841099226152113e-06, - "loss": 0.7851, + "learning_rate": 1.7488822320195665e-07, + "loss": 0.7915, "step": 24080 }, { - "epoch": 0.6823939471223327, + "epoch": 0.9422098755771187, "grad_norm": 0.0, - "learning_rate": 4.840313019311567e-06, - "loss": 0.899, + "learning_rate": 1.7465233907941636e-07, + "loss": 0.9144, "step": 24081 }, { - "epoch": 0.6824222845645952, + "epoch": 0.9422490022693482, "grad_norm": 0.0, - "learning_rate": 4.8395268559329785e-06, - "loss": 0.8804, + "learning_rate": 1.7441661273964628e-07, + "loss": 0.9363, "step": 24082 }, { - "epoch": 0.6824506220068577, + "epoch": 0.9422881289615775, "grad_norm": 0.0, - "learning_rate": 4.838740736022974e-06, - "loss": 0.8065, + "learning_rate": 1.7418104418643335e-07, + "loss": 0.9447, "step": 24083 }, { - "epoch": 0.6824789594491201, + "epoch": 0.942327255653807, "grad_norm": 0.0, - "learning_rate": 4.837954659588172e-06, - "loss": 0.855, + "learning_rate": 1.7394563342356008e-07, + "loss": 0.8745, "step": 24084 }, { - "epoch": 0.6825072968913826, + "epoch": 0.9423663823460364, "grad_norm": 0.0, - "learning_rate": 4.837168626635198e-06, - "loss": 0.8564, + "learning_rate": 1.7371038045480792e-07, + "loss": 0.9037, "step": 24085 }, { - "epoch": 0.6825356343336451, + "epoch": 0.9424055090382659, "grad_norm": 0.0, - "learning_rate": 4.8363826371706665e-06, - "loss": 0.7947, + "learning_rate": 1.7347528528395386e-07, + "loss": 0.9435, "step": 24086 }, { - "epoch": 0.6825639717759076, + "epoch": 0.9424446357304953, "grad_norm": 0.0, - "learning_rate": 4.835596691201207e-06, - "loss": 0.8441, + "learning_rate": 1.7324034791477374e-07, + "loss": 0.9353, "step": 24087 }, { - "epoch": 0.6825923092181699, + "epoch": 0.9424837624227248, "grad_norm": 0.0, - "learning_rate": 4.8348107887334285e-06, - "loss": 0.8288, + "learning_rate": 1.73005568351039e-07, + "loss": 0.9912, "step": 24088 }, { - "epoch": 0.6826206466604324, + "epoch": 0.9425228891149542, "grad_norm": 0.0, - "learning_rate": 4.834024929773956e-06, - "loss": 0.7657, + "learning_rate": 1.7277094659652104e-07, + "loss": 0.9239, "step": 24089 }, { - "epoch": 0.6826489841026949, + "epoch": 0.9425620158071837, "grad_norm": 0.0, - "learning_rate": 4.833239114329415e-06, - "loss": 0.8801, + "learning_rate": 1.72536482654988e-07, + "loss": 0.958, "step": 24090 }, { - "epoch": 0.6826773215449573, + "epoch": 0.9426011424994131, "grad_norm": 0.0, - "learning_rate": 4.832453342406416e-06, - "loss": 0.8021, + "learning_rate": 1.7230217653020576e-07, + "loss": 0.8818, "step": 24091 }, { - "epoch": 0.6827056589872198, + "epoch": 0.9426402691916426, "grad_norm": 0.0, - "learning_rate": 4.831667614011582e-06, - "loss": 0.8478, + "learning_rate": 1.720680282259335e-07, + "loss": 0.8405, "step": 24092 }, { - "epoch": 0.6827339964294823, + "epoch": 0.942679395883872, "grad_norm": 0.0, - "learning_rate": 4.830881929151533e-06, - "loss": 0.7739, + "learning_rate": 1.7183403774593488e-07, + "loss": 1.0781, "step": 24093 }, { - "epoch": 0.6827623338717448, + "epoch": 0.9427185225761014, "grad_norm": 0.0, - "learning_rate": 4.830096287832882e-06, - "loss": 0.7556, + "learning_rate": 1.716002050939658e-07, + "loss": 1.0605, "step": 24094 }, { - "epoch": 0.6827906713140072, + "epoch": 0.9427576492683308, "grad_norm": 0.0, - "learning_rate": 4.82931069006225e-06, - "loss": 0.7685, + "learning_rate": 1.7136653027378214e-07, + "loss": 0.9239, "step": 24095 }, { - "epoch": 0.6828190087562697, + "epoch": 0.9427967759605603, "grad_norm": 0.0, - "learning_rate": 4.8285251358462535e-06, - "loss": 0.839, + "learning_rate": 1.7113301328913535e-07, + "loss": 0.8791, "step": 24096 }, { - "epoch": 0.6828473461985322, + "epoch": 0.9428359026527897, "grad_norm": 0.0, - "learning_rate": 4.827739625191511e-06, - "loss": 0.7725, + "learning_rate": 1.7089965414377685e-07, + "loss": 0.9286, "step": 24097 }, { - "epoch": 0.6828756836407945, + "epoch": 0.9428750293450192, "grad_norm": 0.0, - "learning_rate": 4.826954158104641e-06, - "loss": 0.9173, + "learning_rate": 1.7066645284145367e-07, + "loss": 0.9838, "step": 24098 }, { - "epoch": 0.682904021083057, + "epoch": 0.9429141560372486, "grad_norm": 0.0, - "learning_rate": 4.826168734592254e-06, - "loss": 0.9032, + "learning_rate": 1.7043340938590946e-07, + "loss": 1.0658, "step": 24099 }, { - "epoch": 0.6829323585253195, + "epoch": 0.9429532827294781, "grad_norm": 0.0, - "learning_rate": 4.825383354660973e-06, - "loss": 0.7468, + "learning_rate": 1.7020052378088793e-07, + "loss": 1.0736, "step": 24100 }, { - "epoch": 0.682960695967582, + "epoch": 0.9429924094217075, "grad_norm": 0.0, - "learning_rate": 4.824598018317406e-06, - "loss": 0.7773, + "learning_rate": 1.6996779603012825e-07, + "loss": 0.9186, "step": 24101 }, { - "epoch": 0.6829890334098444, + "epoch": 0.943031536113937, "grad_norm": 0.0, - "learning_rate": 4.823812725568171e-06, - "loss": 0.7787, + "learning_rate": 1.697352261373686e-07, + "loss": 0.8288, "step": 24102 }, { - "epoch": 0.6830173708521069, + "epoch": 0.9430706628061664, "grad_norm": 0.0, - "learning_rate": 4.823027476419887e-06, - "loss": 0.7645, + "learning_rate": 1.6950281410634262e-07, + "loss": 1.0552, "step": 24103 }, { - "epoch": 0.6830457082943694, + "epoch": 0.9431097894983959, "grad_norm": 0.0, - "learning_rate": 4.822242270879161e-06, - "loss": 0.7855, + "learning_rate": 1.69270559940784e-07, + "loss": 0.8926, "step": 24104 }, { - "epoch": 0.6830740457366318, + "epoch": 0.9431489161906252, "grad_norm": 0.0, - "learning_rate": 4.821457108952613e-06, - "loss": 0.7197, + "learning_rate": 1.6903846364442089e-07, + "loss": 0.9177, "step": 24105 }, { - "epoch": 0.6831023831788943, + "epoch": 0.9431880428828547, "grad_norm": 0.0, - "learning_rate": 4.820671990646857e-06, - "loss": 0.7884, + "learning_rate": 1.688065252209814e-07, + "loss": 0.9748, "step": 24106 }, { - "epoch": 0.6831307206211568, + "epoch": 0.9432271695750841, "grad_norm": 0.0, - "learning_rate": 4.819886915968501e-06, - "loss": 0.8858, + "learning_rate": 1.6857474467418921e-07, + "loss": 0.9197, "step": 24107 }, { - "epoch": 0.6831590580634191, + "epoch": 0.9432662962673135, "grad_norm": 0.0, - "learning_rate": 4.819101884924161e-06, - "loss": 0.8335, + "learning_rate": 1.6834312200776804e-07, + "loss": 0.8849, "step": 24108 }, { - "epoch": 0.6831873955056816, + "epoch": 0.943305422959543, "grad_norm": 0.0, - "learning_rate": 4.81831689752045e-06, - "loss": 0.8596, + "learning_rate": 1.681116572254371e-07, + "loss": 0.8537, "step": 24109 }, { - "epoch": 0.6832157329479441, + "epoch": 0.9433445496517724, "grad_norm": 0.0, - "learning_rate": 4.817531953763979e-06, - "loss": 0.8699, + "learning_rate": 1.678803503309123e-07, + "loss": 0.79, "step": 24110 }, { - "epoch": 0.6832440703902066, + "epoch": 0.9433836763440019, "grad_norm": 0.0, - "learning_rate": 4.8167470536613645e-06, - "loss": 0.8582, + "learning_rate": 1.676492013279074e-07, + "loss": 0.8884, "step": 24111 }, { - "epoch": 0.683272407832469, + "epoch": 0.9434228030362313, "grad_norm": 0.0, - "learning_rate": 4.815962197219211e-06, - "loss": 0.9058, + "learning_rate": 1.6741821022013716e-07, + "loss": 1.015, "step": 24112 }, { - "epoch": 0.6833007452747315, + "epoch": 0.9434619297284608, "grad_norm": 0.0, - "learning_rate": 4.815177384444133e-06, - "loss": 0.7606, + "learning_rate": 1.671873770113097e-07, + "loss": 0.8792, "step": 24113 }, { - "epoch": 0.683329082716994, + "epoch": 0.9435010564206902, "grad_norm": 0.0, - "learning_rate": 4.814392615342746e-06, - "loss": 0.7985, + "learning_rate": 1.6695670170513212e-07, + "loss": 1.0586, "step": 24114 }, { - "epoch": 0.6833574201592564, + "epoch": 0.9435401831129197, "grad_norm": 0.0, - "learning_rate": 4.813607889921651e-06, - "loss": 0.8734, + "learning_rate": 1.6672618430530584e-07, + "loss": 1.0432, "step": 24115 }, { - "epoch": 0.6833857576015189, + "epoch": 0.943579309805149, "grad_norm": 0.0, - "learning_rate": 4.8128232081874656e-06, - "loss": 0.7871, + "learning_rate": 1.6649582481553794e-07, + "loss": 1.0432, "step": 24116 }, { - "epoch": 0.6834140950437814, + "epoch": 0.9436184364973785, "grad_norm": 0.0, - "learning_rate": 4.812038570146794e-06, - "loss": 0.7431, + "learning_rate": 1.6626562323952434e-07, + "loss": 0.9116, "step": 24117 }, { - "epoch": 0.6834424324860439, + "epoch": 0.9436575631896079, "grad_norm": 0.0, - "learning_rate": 4.811253975806247e-06, - "loss": 0.9622, + "learning_rate": 1.6603557958096206e-07, + "loss": 1.0153, "step": 24118 }, { - "epoch": 0.6834707699283062, + "epoch": 0.9436966898818374, "grad_norm": 0.0, - "learning_rate": 4.810469425172439e-06, - "loss": 0.8524, + "learning_rate": 1.6580569384354594e-07, + "loss": 0.9883, "step": 24119 }, { - "epoch": 0.6834991073705687, + "epoch": 0.9437358165740668, "grad_norm": 0.0, - "learning_rate": 4.80968491825197e-06, - "loss": 0.7399, + "learning_rate": 1.6557596603096748e-07, + "loss": 1.0235, "step": 24120 }, { - "epoch": 0.6835274448128312, + "epoch": 0.9437749432662963, "grad_norm": 0.0, - "learning_rate": 4.8089004550514525e-06, - "loss": 0.9526, + "learning_rate": 1.6534639614691594e-07, + "loss": 0.9145, "step": 24121 }, { - "epoch": 0.6835557822550936, + "epoch": 0.9438140699585257, "grad_norm": 0.0, - "learning_rate": 4.808116035577495e-06, - "loss": 0.773, + "learning_rate": 1.6511698419507728e-07, + "loss": 0.8923, "step": 24122 }, { - "epoch": 0.6835841196973561, + "epoch": 0.9438531966507552, "grad_norm": 0.0, - "learning_rate": 4.807331659836703e-06, - "loss": 0.8172, + "learning_rate": 1.6488773017913629e-07, + "loss": 0.8952, "step": 24123 }, { - "epoch": 0.6836124571396186, + "epoch": 0.9438923233429846, "grad_norm": 0.0, - "learning_rate": 4.8065473278356885e-06, - "loss": 0.9573, + "learning_rate": 1.646586341027745e-07, + "loss": 0.8384, "step": 24124 }, { - "epoch": 0.683640794581881, + "epoch": 0.9439314500352141, "grad_norm": 0.0, - "learning_rate": 4.80576303958105e-06, - "loss": 0.7693, + "learning_rate": 1.6442969596967006e-07, + "loss": 0.9557, "step": 24125 }, { - "epoch": 0.6836691320241435, + "epoch": 0.9439705767274434, "grad_norm": 0.0, - "learning_rate": 4.8049787950794e-06, - "loss": 0.8392, + "learning_rate": 1.6420091578350117e-07, + "loss": 1.0549, "step": 24126 }, { - "epoch": 0.683697469466406, + "epoch": 0.9440097034196729, "grad_norm": 0.0, - "learning_rate": 4.8041945943373455e-06, - "loss": 0.9366, + "learning_rate": 1.639722935479393e-07, + "loss": 1.0306, "step": 24127 }, { - "epoch": 0.6837258069086685, + "epoch": 0.9440488301119023, "grad_norm": 0.0, - "learning_rate": 4.803410437361485e-06, - "loss": 0.7871, + "learning_rate": 1.6374382926665931e-07, + "loss": 1.0681, "step": 24128 }, { - "epoch": 0.6837541443509308, + "epoch": 0.9440879568041318, "grad_norm": 0.0, - "learning_rate": 4.802626324158432e-06, - "loss": 0.8183, + "learning_rate": 1.6351552294332496e-07, + "loss": 0.8578, "step": 24129 }, { - "epoch": 0.6837824817931933, + "epoch": 0.9441270834963612, "grad_norm": 0.0, - "learning_rate": 4.8018422547347855e-06, - "loss": 0.8721, + "learning_rate": 1.6328737458160771e-07, + "loss": 0.9713, "step": 24130 }, { - "epoch": 0.6838108192354558, + "epoch": 0.9441662101885907, "grad_norm": 0.0, - "learning_rate": 4.801058229097151e-06, - "loss": 0.9084, + "learning_rate": 1.63059384185168e-07, + "loss": 0.8653, "step": 24131 }, { - "epoch": 0.6838391566777182, + "epoch": 0.9442053368808201, "grad_norm": 0.0, - "learning_rate": 4.800274247252137e-06, - "loss": 0.7543, + "learning_rate": 1.628315517576695e-07, + "loss": 0.9437, "step": 24132 }, { - "epoch": 0.6838674941199807, + "epoch": 0.9442444635730496, "grad_norm": 0.0, - "learning_rate": 4.79949030920634e-06, - "loss": 0.8033, + "learning_rate": 1.6260387730276827e-07, + "loss": 1.0034, "step": 24133 }, { - "epoch": 0.6838958315622432, + "epoch": 0.944283590265279, "grad_norm": 0.0, - "learning_rate": 4.798706414966367e-06, - "loss": 0.9229, + "learning_rate": 1.6237636082412135e-07, + "loss": 0.971, "step": 24134 }, { - "epoch": 0.6839241690045057, + "epoch": 0.9443227169575084, "grad_norm": 0.0, - "learning_rate": 4.797922564538822e-06, - "loss": 0.7639, + "learning_rate": 1.6214900232538356e-07, + "loss": 0.9677, "step": 24135 }, { - "epoch": 0.6839525064467681, + "epoch": 0.9443618436497379, "grad_norm": 0.0, - "learning_rate": 4.7971387579303065e-06, - "loss": 0.9434, + "learning_rate": 1.6192180181020645e-07, + "loss": 1.0071, "step": 24136 }, { - "epoch": 0.6839808438890306, + "epoch": 0.9444009703419672, "grad_norm": 0.0, - "learning_rate": 4.796354995147428e-06, - "loss": 0.7726, + "learning_rate": 1.61694759282236e-07, + "loss": 0.9118, "step": 24137 }, { - "epoch": 0.6840091813312931, + "epoch": 0.9444400970341967, "grad_norm": 0.0, - "learning_rate": 4.795571276196779e-06, - "loss": 0.839, + "learning_rate": 1.6146787474512037e-07, + "loss": 0.9053, "step": 24138 }, { - "epoch": 0.6840375187735555, + "epoch": 0.9444792237264261, "grad_norm": 0.0, - "learning_rate": 4.7947876010849655e-06, - "loss": 0.9142, + "learning_rate": 1.6124114820250225e-07, + "loss": 0.9796, "step": 24139 }, { - "epoch": 0.684065856215818, + "epoch": 0.9445183504186556, "grad_norm": 0.0, - "learning_rate": 4.7940039698185935e-06, - "loss": 0.8753, + "learning_rate": 1.6101457965802204e-07, + "loss": 0.8712, "step": 24140 }, { - "epoch": 0.6840941936580804, + "epoch": 0.944557477110885, "grad_norm": 0.0, - "learning_rate": 4.7932203824042555e-06, - "loss": 0.8285, + "learning_rate": 1.6078816911531904e-07, + "loss": 1.0473, "step": 24141 }, { - "epoch": 0.6841225311003429, + "epoch": 0.9445966038031145, "grad_norm": 0.0, - "learning_rate": 4.792436838848555e-06, - "loss": 0.8478, + "learning_rate": 1.605619165780281e-07, + "loss": 0.9238, "step": 24142 }, { - "epoch": 0.6841508685426053, + "epoch": 0.9446357304953439, "grad_norm": 0.0, - "learning_rate": 4.7916533391580975e-06, - "loss": 0.8353, + "learning_rate": 1.6033582204978526e-07, + "loss": 0.7951, "step": 24143 }, { - "epoch": 0.6841792059848678, + "epoch": 0.9446748571875734, "grad_norm": 0.0, - "learning_rate": 4.790869883339473e-06, - "loss": 0.8615, + "learning_rate": 1.6010988553421757e-07, + "loss": 1.0145, "step": 24144 }, { - "epoch": 0.6842075434271303, + "epoch": 0.9447139838798028, "grad_norm": 0.0, - "learning_rate": 4.790086471399287e-06, - "loss": 0.8832, + "learning_rate": 1.5988410703495548e-07, + "loss": 0.8051, "step": 24145 }, { - "epoch": 0.6842358808693927, + "epoch": 0.9447531105720323, "grad_norm": 0.0, - "learning_rate": 4.789303103344138e-06, - "loss": 0.8529, + "learning_rate": 1.59658486555625e-07, + "loss": 0.9729, "step": 24146 }, { - "epoch": 0.6842642183116552, + "epoch": 0.9447922372642616, "grad_norm": 0.0, - "learning_rate": 4.7885197791806245e-06, - "loss": 0.8456, + "learning_rate": 1.594330240998476e-07, + "loss": 1.1395, "step": 24147 }, { - "epoch": 0.6842925557539177, + "epoch": 0.9448313639564911, "grad_norm": 0.0, - "learning_rate": 4.787736498915343e-06, - "loss": 0.9223, + "learning_rate": 1.5920771967124494e-07, + "loss": 1.0254, "step": 24148 }, { - "epoch": 0.6843208931961801, + "epoch": 0.9448704906487205, "grad_norm": 0.0, - "learning_rate": 4.786953262554892e-06, - "loss": 0.7055, + "learning_rate": 1.5898257327343624e-07, + "loss": 0.8835, "step": 24149 }, { - "epoch": 0.6843492306384426, + "epoch": 0.94490961734095, "grad_norm": 0.0, - "learning_rate": 4.78617007010587e-06, - "loss": 0.8262, + "learning_rate": 1.5875758491003422e-07, + "loss": 0.8902, "step": 24150 }, { - "epoch": 0.684377568080705, + "epoch": 0.9449487440331794, "grad_norm": 0.0, - "learning_rate": 4.7853869215748764e-06, - "loss": 0.7906, + "learning_rate": 1.585327545846549e-07, + "loss": 1.0015, "step": 24151 }, { - "epoch": 0.6844059055229675, + "epoch": 0.9449878707254089, "grad_norm": 0.0, - "learning_rate": 4.784603816968502e-06, - "loss": 0.7743, + "learning_rate": 1.5830808230090644e-07, + "loss": 1.0474, "step": 24152 }, { - "epoch": 0.6844342429652299, + "epoch": 0.9450269974176383, "grad_norm": 0.0, - "learning_rate": 4.783820756293349e-06, - "loss": 0.8898, + "learning_rate": 1.5808356806239932e-07, + "loss": 0.983, "step": 24153 }, { - "epoch": 0.6844625804074924, + "epoch": 0.9450661241098678, "grad_norm": 0.0, - "learning_rate": 4.783037739556008e-06, - "loss": 0.8356, + "learning_rate": 1.5785921187273622e-07, + "loss": 0.9866, "step": 24154 }, { - "epoch": 0.6844909178497549, + "epoch": 0.9451052508020972, "grad_norm": 0.0, - "learning_rate": 4.782254766763078e-06, - "loss": 0.7936, + "learning_rate": 1.5763501373552092e-07, + "loss": 0.8979, "step": 24155 }, { - "epoch": 0.6845192552920173, + "epoch": 0.9451443774943267, "grad_norm": 0.0, - "learning_rate": 4.781471837921157e-06, - "loss": 0.8603, + "learning_rate": 1.57410973654355e-07, + "loss": 0.8893, "step": 24156 }, { - "epoch": 0.6845475927342798, + "epoch": 0.9451835041865561, "grad_norm": 0.0, - "learning_rate": 4.780688953036831e-06, - "loss": 0.8816, + "learning_rate": 1.5718709163283663e-07, + "loss": 1.0736, "step": 24157 }, { - "epoch": 0.6845759301765423, + "epoch": 0.9452226308787856, "grad_norm": 0.0, - "learning_rate": 4.779906112116702e-06, - "loss": 0.8677, + "learning_rate": 1.5696336767455743e-07, + "loss": 1.0471, "step": 24158 }, { - "epoch": 0.6846042676188048, + "epoch": 0.9452617575710149, "grad_norm": 0.0, - "learning_rate": 4.779123315167362e-06, - "loss": 0.7912, + "learning_rate": 1.5673980178311455e-07, + "loss": 0.841, "step": 24159 }, { - "epoch": 0.6846326050610672, + "epoch": 0.9453008842632444, "grad_norm": 0.0, - "learning_rate": 4.778340562195405e-06, - "loss": 0.9097, + "learning_rate": 1.5651639396209395e-07, + "loss": 0.9087, "step": 24160 }, { - "epoch": 0.6846609425033297, + "epoch": 0.9453400109554738, "grad_norm": 0.0, - "learning_rate": 4.7775578532074275e-06, - "loss": 0.8354, + "learning_rate": 1.5629314421508724e-07, + "loss": 0.9307, "step": 24161 }, { - "epoch": 0.6846892799455921, + "epoch": 0.9453791376477033, "grad_norm": 0.0, - "learning_rate": 4.776775188210017e-06, - "loss": 0.8957, + "learning_rate": 1.560700525456771e-07, + "loss": 0.9929, "step": 24162 }, { - "epoch": 0.6847176173878545, + "epoch": 0.9454182643399327, "grad_norm": 0.0, - "learning_rate": 4.775992567209767e-06, - "loss": 0.8668, + "learning_rate": 1.5584711895744841e-07, + "loss": 0.9653, "step": 24163 }, { - "epoch": 0.684745954830117, + "epoch": 0.9454573910321621, "grad_norm": 0.0, - "learning_rate": 4.775209990213277e-06, - "loss": 0.82, + "learning_rate": 1.5562434345397725e-07, + "loss": 0.9507, "step": 24164 }, { - "epoch": 0.6847742922723795, + "epoch": 0.9454965177243916, "grad_norm": 0.0, - "learning_rate": 4.774427457227129e-06, - "loss": 0.7543, + "learning_rate": 1.5540172603884407e-07, + "loss": 0.9468, "step": 24165 }, { - "epoch": 0.684802629714642, + "epoch": 0.945535644416621, "grad_norm": 0.0, - "learning_rate": 4.773644968257922e-06, - "loss": 0.9684, + "learning_rate": 1.5517926671562378e-07, + "loss": 0.7769, "step": 24166 }, { - "epoch": 0.6848309671569044, + "epoch": 0.9455747711088505, "grad_norm": 0.0, - "learning_rate": 4.772862523312242e-06, - "loss": 0.7618, + "learning_rate": 1.5495696548788797e-07, + "loss": 1.0319, "step": 24167 }, { - "epoch": 0.6848593045991669, + "epoch": 0.9456138978010799, "grad_norm": 0.0, - "learning_rate": 4.772080122396681e-06, - "loss": 1.0272, + "learning_rate": 1.5473482235920712e-07, + "loss": 0.9677, "step": 24168 }, { - "epoch": 0.6848876420414294, + "epoch": 0.9456530244933093, "grad_norm": 0.0, - "learning_rate": 4.771297765517834e-06, - "loss": 0.8102, + "learning_rate": 1.5451283733314726e-07, + "loss": 0.9844, "step": 24169 }, { - "epoch": 0.6849159794836918, + "epoch": 0.9456921511855387, "grad_norm": 0.0, - "learning_rate": 4.770515452682284e-06, - "loss": 0.823, + "learning_rate": 1.5429101041327444e-07, + "loss": 1.0369, "step": 24170 }, { - "epoch": 0.6849443169259543, + "epoch": 0.9457312778777682, "grad_norm": 0.0, - "learning_rate": 4.769733183896624e-06, - "loss": 0.9285, + "learning_rate": 1.5406934160315135e-07, + "loss": 0.9023, "step": 24171 }, { - "epoch": 0.6849726543682167, + "epoch": 0.9457704045699976, "grad_norm": 0.0, - "learning_rate": 4.768950959167444e-06, - "loss": 0.758, + "learning_rate": 1.5384783090633627e-07, + "loss": 0.9014, "step": 24172 }, { - "epoch": 0.6850009918104791, + "epoch": 0.9458095312622271, "grad_norm": 0.0, - "learning_rate": 4.768168778501333e-06, - "loss": 0.7692, + "learning_rate": 1.5362647832638743e-07, + "loss": 0.8365, "step": 24173 }, { - "epoch": 0.6850293292527416, + "epoch": 0.9458486579544565, "grad_norm": 0.0, - "learning_rate": 4.767386641904883e-06, - "loss": 0.8163, + "learning_rate": 1.5340528386685872e-07, + "loss": 0.9225, "step": 24174 }, { - "epoch": 0.6850576666950041, + "epoch": 0.945887784646686, "grad_norm": 0.0, - "learning_rate": 4.766604549384674e-06, - "loss": 0.7888, + "learning_rate": 1.531842475313039e-07, + "loss": 0.8356, "step": 24175 }, { - "epoch": 0.6850860041372666, + "epoch": 0.9459269113389154, "grad_norm": 0.0, - "learning_rate": 4.765822500947298e-06, - "loss": 0.9234, + "learning_rate": 1.5296336932327016e-07, + "loss": 1.0333, "step": 24176 }, { - "epoch": 0.685114341579529, + "epoch": 0.9459660380311449, "grad_norm": 0.0, - "learning_rate": 4.765040496599347e-06, - "loss": 0.8261, + "learning_rate": 1.5274264924630578e-07, + "loss": 0.9858, "step": 24177 }, { - "epoch": 0.6851426790217915, + "epoch": 0.9460051647233743, "grad_norm": 0.0, - "learning_rate": 4.7642585363474e-06, - "loss": 0.855, + "learning_rate": 1.5252208730395567e-07, + "loss": 0.9409, "step": 24178 }, { - "epoch": 0.685171016464054, + "epoch": 0.9460442914156038, "grad_norm": 0.0, - "learning_rate": 4.763476620198048e-06, - "loss": 0.7923, + "learning_rate": 1.5230168349976037e-07, + "loss": 1.0191, "step": 24179 }, { - "epoch": 0.6851993539063164, + "epoch": 0.9460834181078331, "grad_norm": 0.0, - "learning_rate": 4.76269474815788e-06, - "loss": 0.8832, + "learning_rate": 1.5208143783726038e-07, + "loss": 0.9804, "step": 24180 }, { - "epoch": 0.6852276913485789, + "epoch": 0.9461225448000626, "grad_norm": 0.0, - "learning_rate": 4.761912920233476e-06, - "loss": 0.8601, + "learning_rate": 1.5186135031999394e-07, + "loss": 0.918, "step": 24181 }, { - "epoch": 0.6852560287908414, + "epoch": 0.946161671492292, "grad_norm": 0.0, - "learning_rate": 4.761131136431427e-06, - "loss": 0.8006, + "learning_rate": 1.5164142095149158e-07, + "loss": 0.9146, "step": 24182 }, { - "epoch": 0.6852843662331038, + "epoch": 0.9462007981845215, "grad_norm": 0.0, - "learning_rate": 4.760349396758314e-06, - "loss": 0.8379, + "learning_rate": 1.5142164973528827e-07, + "loss": 0.922, "step": 24183 }, { - "epoch": 0.6853127036753662, + "epoch": 0.9462399248767509, "grad_norm": 0.0, - "learning_rate": 4.759567701220722e-06, - "loss": 0.798, + "learning_rate": 1.512020366749134e-07, + "loss": 0.9545, "step": 24184 }, { - "epoch": 0.6853410411176287, + "epoch": 0.9462790515689804, "grad_norm": 0.0, - "learning_rate": 4.758786049825238e-06, - "loss": 0.7586, + "learning_rate": 1.509825817738908e-07, + "loss": 0.8372, "step": 24185 }, { - "epoch": 0.6853693785598912, + "epoch": 0.9463181782612098, "grad_norm": 0.0, - "learning_rate": 4.758004442578445e-06, - "loss": 0.8079, + "learning_rate": 1.5076328503574766e-07, + "loss": 0.8915, "step": 24186 }, { - "epoch": 0.6853977160021536, + "epoch": 0.9463573049534393, "grad_norm": 0.0, - "learning_rate": 4.757222879486931e-06, - "loss": 0.9299, + "learning_rate": 1.505441464640034e-07, + "loss": 0.9483, "step": 24187 }, { - "epoch": 0.6854260534444161, + "epoch": 0.9463964316456687, "grad_norm": 0.0, - "learning_rate": 4.756441360557272e-06, - "loss": 0.8266, + "learning_rate": 1.5032516606217961e-07, + "loss": 1.0222, "step": 24188 }, { - "epoch": 0.6854543908866786, + "epoch": 0.9464355583378982, "grad_norm": 0.0, - "learning_rate": 4.755659885796054e-06, - "loss": 0.7807, + "learning_rate": 1.501063438337891e-07, + "loss": 0.9301, "step": 24189 }, { - "epoch": 0.6854827283289411, + "epoch": 0.9464746850301275, "grad_norm": 0.0, - "learning_rate": 4.754878455209866e-06, - "loss": 0.8491, + "learning_rate": 1.4988767978235007e-07, + "loss": 0.9544, "step": 24190 }, { - "epoch": 0.6855110657712035, + "epoch": 0.946513811722357, "grad_norm": 0.0, - "learning_rate": 4.754097068805279e-06, - "loss": 0.7752, + "learning_rate": 1.4966917391137093e-07, + "loss": 1.0532, "step": 24191 }, { - "epoch": 0.685539403213466, + "epoch": 0.9465529384145864, "grad_norm": 0.0, - "learning_rate": 4.7533157265888806e-06, - "loss": 0.8579, + "learning_rate": 1.4945082622436212e-07, + "loss": 0.9468, "step": 24192 }, { - "epoch": 0.6855677406557285, + "epoch": 0.9465920651068158, "grad_norm": 0.0, - "learning_rate": 4.752534428567256e-06, - "loss": 0.7679, + "learning_rate": 1.4923263672482978e-07, + "loss": 0.9021, "step": 24193 }, { - "epoch": 0.6855960780979908, + "epoch": 0.9466311917990453, "grad_norm": 0.0, - "learning_rate": 4.7517531747469795e-06, - "loss": 0.662, + "learning_rate": 1.4901460541627777e-07, + "loss": 1.0212, "step": 24194 }, { - "epoch": 0.6856244155402533, + "epoch": 0.9466703184912747, "grad_norm": 0.0, - "learning_rate": 4.750971965134637e-06, - "loss": 0.9403, + "learning_rate": 1.487967323022066e-07, + "loss": 0.8569, "step": 24195 }, { - "epoch": 0.6856527529825158, + "epoch": 0.9467094451835042, "grad_norm": 0.0, - "learning_rate": 4.7501907997368035e-06, - "loss": 0.966, + "learning_rate": 1.4857901738611569e-07, + "loss": 0.9737, "step": 24196 }, { - "epoch": 0.6856810904247782, + "epoch": 0.9467485718757336, "grad_norm": 0.0, - "learning_rate": 4.749409678560063e-06, - "loss": 0.8289, + "learning_rate": 1.4836146067150113e-07, + "loss": 0.9716, "step": 24197 }, { - "epoch": 0.6857094278670407, + "epoch": 0.9467876985679631, "grad_norm": 0.0, - "learning_rate": 4.748628601610995e-06, - "loss": 0.8668, + "learning_rate": 1.4814406216185685e-07, + "loss": 0.8655, "step": 24198 }, { - "epoch": 0.6857377653093032, + "epoch": 0.9468268252601925, "grad_norm": 0.0, - "learning_rate": 4.747847568896178e-06, - "loss": 0.8804, + "learning_rate": 1.4792682186067442e-07, + "loss": 0.9071, "step": 24199 }, { - "epoch": 0.6857661027515657, + "epoch": 0.946865951952422, "grad_norm": 0.0, - "learning_rate": 4.74706658042219e-06, - "loss": 0.8676, + "learning_rate": 1.477097397714411e-07, + "loss": 1.0358, "step": 24200 }, { - "epoch": 0.6857944401938281, + "epoch": 0.9469050786446513, "grad_norm": 0.0, - "learning_rate": 4.746285636195615e-06, - "loss": 0.854, + "learning_rate": 1.474928158976441e-07, + "loss": 0.9501, "step": 24201 }, { - "epoch": 0.6858227776360906, + "epoch": 0.9469442053368808, "grad_norm": 0.0, - "learning_rate": 4.7455047362230246e-06, - "loss": 0.7765, + "learning_rate": 1.4727605024276615e-07, + "loss": 0.961, "step": 24202 }, { - "epoch": 0.6858511150783531, + "epoch": 0.9469833320291102, "grad_norm": 0.0, - "learning_rate": 4.744723880511002e-06, - "loss": 0.7293, + "learning_rate": 1.4705944281028893e-07, + "loss": 0.916, "step": 24203 }, { - "epoch": 0.6858794525206154, + "epoch": 0.9470224587213397, "grad_norm": 0.0, - "learning_rate": 4.743943069066118e-06, - "loss": 0.9052, + "learning_rate": 1.468429936036919e-07, + "loss": 0.8772, "step": 24204 }, { - "epoch": 0.6859077899628779, + "epoch": 0.9470615854135691, "grad_norm": 0.0, - "learning_rate": 4.743162301894952e-06, - "loss": 0.9845, + "learning_rate": 1.4662670262644784e-07, + "loss": 0.8501, "step": 24205 }, { - "epoch": 0.6859361274051404, + "epoch": 0.9471007121057986, "grad_norm": 0.0, - "learning_rate": 4.7423815790040885e-06, - "loss": 0.8587, + "learning_rate": 1.4641056988203285e-07, + "loss": 1.0525, "step": 24206 }, { - "epoch": 0.6859644648474029, + "epoch": 0.947139838798028, "grad_norm": 0.0, - "learning_rate": 4.741600900400092e-06, - "loss": 0.7735, + "learning_rate": 1.4619459537391635e-07, + "loss": 0.8562, "step": 24207 }, { - "epoch": 0.6859928022896653, + "epoch": 0.9471789654902575, "grad_norm": 0.0, - "learning_rate": 4.740820266089547e-06, - "loss": 0.7731, + "learning_rate": 1.459787791055689e-07, + "loss": 0.9306, "step": 24208 }, { - "epoch": 0.6860211397319278, + "epoch": 0.9472180921824869, "grad_norm": 0.0, - "learning_rate": 4.740039676079022e-06, - "loss": 0.8626, + "learning_rate": 1.4576312108045222e-07, + "loss": 0.9281, "step": 24209 }, { - "epoch": 0.6860494771741903, + "epoch": 0.9472572188747164, "grad_norm": 0.0, - "learning_rate": 4.739259130375097e-06, - "loss": 0.8629, + "learning_rate": 1.4554762130203236e-07, + "loss": 0.9322, "step": 24210 }, { - "epoch": 0.6860778146164527, + "epoch": 0.9472963455669458, "grad_norm": 0.0, - "learning_rate": 4.738478628984345e-06, - "loss": 0.8017, + "learning_rate": 1.4533227977376994e-07, + "loss": 0.901, "step": 24211 }, { - "epoch": 0.6861061520587152, + "epoch": 0.9473354722591752, "grad_norm": 0.0, - "learning_rate": 4.737698171913343e-06, - "loss": 0.8099, + "learning_rate": 1.4511709649912332e-07, + "loss": 0.9223, "step": 24212 }, { - "epoch": 0.6861344895009777, + "epoch": 0.9473745989514046, "grad_norm": 0.0, - "learning_rate": 4.736917759168662e-06, - "loss": 0.7354, + "learning_rate": 1.4490207148154522e-07, + "loss": 0.9606, "step": 24213 }, { - "epoch": 0.6861628269432402, + "epoch": 0.9474137256436341, "grad_norm": 0.0, - "learning_rate": 4.7361373907568804e-06, - "loss": 0.842, + "learning_rate": 1.4468720472449182e-07, + "loss": 1.0484, "step": 24214 }, { - "epoch": 0.6861911643855025, + "epoch": 0.9474528523358635, "grad_norm": 0.0, - "learning_rate": 4.7353570666845664e-06, - "loss": 0.8198, + "learning_rate": 1.4447249623141258e-07, + "loss": 0.8711, "step": 24215 }, { - "epoch": 0.686219501827765, + "epoch": 0.947491979028093, "grad_norm": 0.0, - "learning_rate": 4.734576786958297e-06, - "loss": 0.8415, + "learning_rate": 1.442579460057558e-07, + "loss": 0.9466, "step": 24216 }, { - "epoch": 0.6862478392700275, + "epoch": 0.9475311057203224, "grad_norm": 0.0, - "learning_rate": 4.7337965515846384e-06, - "loss": 0.8094, + "learning_rate": 1.4404355405096548e-07, + "loss": 0.9919, "step": 24217 }, { - "epoch": 0.6862761767122899, + "epoch": 0.9475702324125519, "grad_norm": 0.0, - "learning_rate": 4.733016360570169e-06, - "loss": 0.7919, + "learning_rate": 1.4382932037048547e-07, + "loss": 0.8152, "step": 24218 }, { - "epoch": 0.6863045141545524, + "epoch": 0.9476093591047813, "grad_norm": 0.0, - "learning_rate": 4.73223621392146e-06, - "loss": 0.8598, + "learning_rate": 1.4361524496775636e-07, + "loss": 0.9952, "step": 24219 }, { - "epoch": 0.6863328515968149, + "epoch": 0.9476484857970108, "grad_norm": 0.0, - "learning_rate": 4.73145611164508e-06, - "loss": 0.7272, + "learning_rate": 1.434013278462165e-07, + "loss": 0.9566, "step": 24220 }, { - "epoch": 0.6863611890390773, + "epoch": 0.9476876124892402, "grad_norm": 0.0, - "learning_rate": 4.7306760537476e-06, - "loss": 0.9652, + "learning_rate": 1.4318756900929986e-07, + "loss": 0.9388, "step": 24221 }, { - "epoch": 0.6863895264813398, + "epoch": 0.9477267391814695, "grad_norm": 0.0, - "learning_rate": 4.7298960402355966e-06, - "loss": 0.7898, + "learning_rate": 1.429739684604392e-07, + "loss": 1.0205, "step": 24222 }, { - "epoch": 0.6864178639236023, + "epoch": 0.947765865873699, "grad_norm": 0.0, - "learning_rate": 4.729116071115632e-06, - "loss": 0.8241, + "learning_rate": 1.4276052620306513e-07, + "loss": 0.8681, "step": 24223 }, { - "epoch": 0.6864462013658648, + "epoch": 0.9478049925659284, "grad_norm": 0.0, - "learning_rate": 4.72833614639428e-06, - "loss": 0.8371, + "learning_rate": 1.425472422406049e-07, + "loss": 0.8511, "step": 24224 }, { - "epoch": 0.6864745388081271, + "epoch": 0.9478441192581579, "grad_norm": 0.0, - "learning_rate": 4.727556266078111e-06, - "loss": 0.7949, + "learning_rate": 1.4233411657648465e-07, + "loss": 1.0295, "step": 24225 }, { - "epoch": 0.6865028762503896, + "epoch": 0.9478832459503873, "grad_norm": 0.0, - "learning_rate": 4.726776430173693e-06, - "loss": 0.8602, + "learning_rate": 1.421211492141261e-07, + "loss": 0.9023, "step": 24226 }, { - "epoch": 0.6865312136926521, + "epoch": 0.9479223726426168, "grad_norm": 0.0, - "learning_rate": 4.7259966386875985e-06, - "loss": 0.9072, + "learning_rate": 1.4190834015694876e-07, + "loss": 0.8363, "step": 24227 }, { - "epoch": 0.6865595511349145, + "epoch": 0.9479614993348462, "grad_norm": 0.0, - "learning_rate": 4.72521689162639e-06, - "loss": 0.859, + "learning_rate": 1.4169568940837098e-07, + "loss": 0.9357, "step": 24228 }, { - "epoch": 0.686587888577177, + "epoch": 0.9480006260270757, "grad_norm": 0.0, - "learning_rate": 4.7244371889966374e-06, - "loss": 0.8314, + "learning_rate": 1.414831969718067e-07, + "loss": 1.0347, "step": 24229 }, { - "epoch": 0.6866162260194395, + "epoch": 0.9480397527193051, "grad_norm": 0.0, - "learning_rate": 4.7236575308049135e-06, - "loss": 0.7849, + "learning_rate": 1.412708628506698e-07, + "loss": 0.9446, "step": 24230 }, { - "epoch": 0.686644563461702, + "epoch": 0.9480788794115346, "grad_norm": 0.0, - "learning_rate": 4.722877917057777e-06, - "loss": 0.8805, + "learning_rate": 1.4105868704836767e-07, + "loss": 0.9375, "step": 24231 }, { - "epoch": 0.6866729009039644, + "epoch": 0.948118006103764, "grad_norm": 0.0, - "learning_rate": 4.722098347761805e-06, - "loss": 0.7892, + "learning_rate": 1.4084666956831083e-07, + "loss": 0.9092, "step": 24232 }, { - "epoch": 0.6867012383462269, + "epoch": 0.9481571327959935, "grad_norm": 0.0, - "learning_rate": 4.721318822923553e-06, - "loss": 0.8146, + "learning_rate": 1.4063481041390102e-07, + "loss": 0.8464, "step": 24233 }, { - "epoch": 0.6867295757884894, + "epoch": 0.9481962594882228, "grad_norm": 0.0, - "learning_rate": 4.720539342549594e-06, - "loss": 0.8144, + "learning_rate": 1.4042310958854222e-07, + "loss": 1.0892, "step": 24234 }, { - "epoch": 0.6867579132307517, + "epoch": 0.9482353861804523, "grad_norm": 0.0, - "learning_rate": 4.719759906646496e-06, - "loss": 0.782, + "learning_rate": 1.402115670956339e-07, + "loss": 0.9501, "step": 24235 }, { - "epoch": 0.6867862506730142, + "epoch": 0.9482745128726817, "grad_norm": 0.0, - "learning_rate": 4.718980515220817e-06, - "loss": 0.8105, + "learning_rate": 1.400001829385722e-07, + "loss": 0.9875, "step": 24236 }, { - "epoch": 0.6868145881152767, + "epoch": 0.9483136395649112, "grad_norm": 0.0, - "learning_rate": 4.718201168279126e-06, - "loss": 0.7726, + "learning_rate": 1.3978895712075223e-07, + "loss": 0.908, "step": 24237 }, { - "epoch": 0.6868429255575392, + "epoch": 0.9483527662571406, "grad_norm": 0.0, - "learning_rate": 4.717421865827988e-06, - "loss": 0.8595, + "learning_rate": 1.3957788964556685e-07, + "loss": 1.0086, "step": 24238 }, { - "epoch": 0.6868712629998016, + "epoch": 0.9483918929493701, "grad_norm": 0.0, - "learning_rate": 4.716642607873968e-06, - "loss": 0.8409, + "learning_rate": 1.393669805164044e-07, + "loss": 0.8633, "step": 24239 }, { - "epoch": 0.6868996004420641, + "epoch": 0.9484310196415995, "grad_norm": 0.0, - "learning_rate": 4.715863394423632e-06, - "loss": 0.9597, + "learning_rate": 1.391562297366511e-07, + "loss": 1.0252, "step": 24240 }, { - "epoch": 0.6869279378843266, + "epoch": 0.948470146333829, "grad_norm": 0.0, - "learning_rate": 4.715084225483538e-06, - "loss": 0.841, + "learning_rate": 1.3894563730969312e-07, + "loss": 0.7968, "step": 24241 }, { - "epoch": 0.686956275326589, + "epoch": 0.9485092730260584, "grad_norm": 0.0, - "learning_rate": 4.714305101060252e-06, - "loss": 0.7516, + "learning_rate": 1.387352032389122e-07, + "loss": 0.96, "step": 24242 }, { - "epoch": 0.6869846127688515, + "epoch": 0.9485483997182879, "grad_norm": 0.0, - "learning_rate": 4.713526021160339e-06, - "loss": 0.9067, + "learning_rate": 1.3852492752768676e-07, + "loss": 0.9277, "step": 24243 }, { - "epoch": 0.687012950211114, + "epoch": 0.9485875264105172, "grad_norm": 0.0, - "learning_rate": 4.712746985790357e-06, - "loss": 0.889, + "learning_rate": 1.3831481017939406e-07, + "loss": 1.0001, "step": 24244 }, { - "epoch": 0.6870412876533764, + "epoch": 0.9486266531027467, "grad_norm": 0.0, - "learning_rate": 4.711967994956875e-06, - "loss": 0.894, + "learning_rate": 1.3810485119740703e-07, + "loss": 0.9902, "step": 24245 }, { - "epoch": 0.6870696250956388, + "epoch": 0.9486657797949761, "grad_norm": 0.0, - "learning_rate": 4.7111890486664455e-06, - "loss": 0.8953, + "learning_rate": 1.3789505058509955e-07, + "loss": 0.8729, "step": 24246 }, { - "epoch": 0.6870979625379013, + "epoch": 0.9487049064872056, "grad_norm": 0.0, - "learning_rate": 4.710410146925635e-06, - "loss": 0.9018, + "learning_rate": 1.376854083458401e-07, + "loss": 0.7907, "step": 24247 }, { - "epoch": 0.6871262999801638, + "epoch": 0.948744033179435, "grad_norm": 0.0, - "learning_rate": 4.709631289741008e-06, - "loss": 0.9211, + "learning_rate": 1.3747592448299375e-07, + "loss": 0.9111, "step": 24248 }, { - "epoch": 0.6871546374224262, + "epoch": 0.9487831598716644, "grad_norm": 0.0, - "learning_rate": 4.708852477119117e-06, - "loss": 0.8577, + "learning_rate": 1.3726659899992555e-07, + "loss": 0.972, "step": 24249 }, { - "epoch": 0.6871829748646887, + "epoch": 0.9488222865638939, "grad_norm": 0.0, - "learning_rate": 4.708073709066526e-06, - "loss": 0.8003, + "learning_rate": 1.3705743189999732e-07, + "loss": 1.0901, "step": 24250 }, { - "epoch": 0.6872113123069512, + "epoch": 0.9488614132561233, "grad_norm": 0.0, - "learning_rate": 4.707294985589796e-06, - "loss": 0.8696, + "learning_rate": 1.3684842318656854e-07, + "loss": 0.8644, "step": 24251 }, { - "epoch": 0.6872396497492136, + "epoch": 0.9489005399483528, "grad_norm": 0.0, - "learning_rate": 4.7065163066954854e-06, - "loss": 0.8381, + "learning_rate": 1.3663957286299324e-07, + "loss": 1.0313, "step": 24252 }, { - "epoch": 0.6872679871914761, + "epoch": 0.9489396666405822, "grad_norm": 0.0, - "learning_rate": 4.705737672390159e-06, - "loss": 0.6476, + "learning_rate": 1.3643088093262867e-07, + "loss": 1.007, "step": 24253 }, { - "epoch": 0.6872963246337386, + "epoch": 0.9489787933328117, "grad_norm": 0.0, - "learning_rate": 4.704959082680363e-06, - "loss": 0.7766, + "learning_rate": 1.3622234739882335e-07, + "loss": 1.0372, "step": 24254 }, { - "epoch": 0.6873246620760011, + "epoch": 0.949017920025041, "grad_norm": 0.0, - "learning_rate": 4.704180537572666e-06, - "loss": 0.8715, + "learning_rate": 1.3601397226492784e-07, + "loss": 0.9998, "step": 24255 }, { - "epoch": 0.6873529995182635, + "epoch": 0.9490570467172705, "grad_norm": 0.0, - "learning_rate": 4.703402037073624e-06, - "loss": 0.7944, + "learning_rate": 1.358057555342862e-07, + "loss": 0.9665, "step": 24256 }, { - "epoch": 0.687381336960526, + "epoch": 0.9490961734094999, "grad_norm": 0.0, - "learning_rate": 4.7026235811897925e-06, - "loss": 0.764, + "learning_rate": 1.3559769721024573e-07, + "loss": 1.0385, "step": 24257 }, { - "epoch": 0.6874096744027884, + "epoch": 0.9491353001017294, "grad_norm": 0.0, - "learning_rate": 4.7018451699277275e-06, - "loss": 0.8207, + "learning_rate": 1.3538979729614376e-07, + "loss": 1.0237, "step": 24258 }, { - "epoch": 0.6874380118450508, + "epoch": 0.9491744267939588, "grad_norm": 0.0, - "learning_rate": 4.701066803293993e-06, - "loss": 0.8804, + "learning_rate": 1.3518205579532096e-07, + "loss": 1.0861, "step": 24259 }, { - "epoch": 0.6874663492873133, + "epoch": 0.9492135534861883, "grad_norm": 0.0, - "learning_rate": 4.7002884812951365e-06, - "loss": 0.8894, + "learning_rate": 1.3497447271111353e-07, + "loss": 0.8937, "step": 24260 }, { - "epoch": 0.6874946867295758, + "epoch": 0.9492526801784177, "grad_norm": 0.0, - "learning_rate": 4.699510203937722e-06, - "loss": 0.7827, + "learning_rate": 1.3476704804685436e-07, + "loss": 0.8588, "step": 24261 }, { - "epoch": 0.6875230241718383, + "epoch": 0.9492918068706472, "grad_norm": 0.0, - "learning_rate": 4.698731971228298e-06, - "loss": 0.7539, + "learning_rate": 1.3455978180587414e-07, + "loss": 1.0013, "step": 24262 }, { - "epoch": 0.6875513616141007, + "epoch": 0.9493309335628766, "grad_norm": 0.0, - "learning_rate": 4.697953783173423e-06, - "loss": 0.8484, + "learning_rate": 1.3435267399150243e-07, + "loss": 0.9508, "step": 24263 }, { - "epoch": 0.6875796990563632, + "epoch": 0.9493700602551061, "grad_norm": 0.0, - "learning_rate": 4.6971756397796506e-06, - "loss": 0.8604, + "learning_rate": 1.3414572460706432e-07, + "loss": 0.9432, "step": 24264 }, { - "epoch": 0.6876080364986257, + "epoch": 0.9494091869473354, "grad_norm": 0.0, - "learning_rate": 4.6963975410535375e-06, - "loss": 0.9099, + "learning_rate": 1.3393893365588272e-07, + "loss": 0.8395, "step": 24265 }, { - "epoch": 0.6876363739408881, + "epoch": 0.949448313639565, "grad_norm": 0.0, - "learning_rate": 4.695619487001643e-06, - "loss": 0.7751, + "learning_rate": 1.3373230114127943e-07, + "loss": 0.8718, "step": 24266 }, { - "epoch": 0.6876647113831506, + "epoch": 0.9494874403317943, "grad_norm": 0.0, - "learning_rate": 4.694841477630509e-06, - "loss": 0.8027, + "learning_rate": 1.335258270665718e-07, + "loss": 0.9685, "step": 24267 }, { - "epoch": 0.687693048825413, + "epoch": 0.9495265670240238, "grad_norm": 0.0, - "learning_rate": 4.694063512946697e-06, - "loss": 0.8697, + "learning_rate": 1.3331951143507604e-07, + "loss": 1.0168, "step": 24268 }, { - "epoch": 0.6877213862676754, + "epoch": 0.9495656937162532, "grad_norm": 0.0, - "learning_rate": 4.693285592956761e-06, - "loss": 0.7953, + "learning_rate": 1.331133542501062e-07, + "loss": 0.9197, "step": 24269 }, { - "epoch": 0.6877497237099379, + "epoch": 0.9496048204084827, "grad_norm": 0.0, - "learning_rate": 4.692507717667249e-06, - "loss": 0.8117, + "learning_rate": 1.329073555149707e-07, + "loss": 0.9321, "step": 24270 }, { - "epoch": 0.6877780611522004, + "epoch": 0.9496439471007121, "grad_norm": 0.0, - "learning_rate": 4.6917298870847135e-06, - "loss": 0.9424, + "learning_rate": 1.3270151523297915e-07, + "loss": 1.0914, "step": 24271 }, { - "epoch": 0.6878063985944629, + "epoch": 0.9496830737929416, "grad_norm": 0.0, - "learning_rate": 4.690952101215713e-06, - "loss": 0.8283, + "learning_rate": 1.3249583340743778e-07, + "loss": 0.9569, "step": 24272 }, { - "epoch": 0.6878347360367253, + "epoch": 0.949722200485171, "grad_norm": 0.0, - "learning_rate": 4.69017436006679e-06, - "loss": 0.9224, + "learning_rate": 1.3229031004164839e-07, + "loss": 0.9082, "step": 24273 }, { - "epoch": 0.6878630734789878, + "epoch": 0.9497613271774005, "grad_norm": 0.0, - "learning_rate": 4.6893966636445055e-06, - "loss": 0.7808, + "learning_rate": 1.3208494513891168e-07, + "loss": 0.9614, "step": 24274 }, { - "epoch": 0.6878914109212503, + "epoch": 0.9498004538696299, "grad_norm": 0.0, - "learning_rate": 4.6886190119554e-06, - "loss": 0.7868, + "learning_rate": 1.3187973870252501e-07, + "loss": 0.9503, "step": 24275 }, { - "epoch": 0.6879197483635127, + "epoch": 0.9498395805618594, "grad_norm": 0.0, - "learning_rate": 4.687841405006029e-06, - "loss": 0.839, + "learning_rate": 1.3167469073578465e-07, + "loss": 0.8924, "step": 24276 }, { - "epoch": 0.6879480858057752, + "epoch": 0.9498787072540887, "grad_norm": 0.0, - "learning_rate": 4.687063842802943e-06, - "loss": 0.8636, + "learning_rate": 1.3146980124198238e-07, + "loss": 0.9691, "step": 24277 }, { - "epoch": 0.6879764232480376, + "epoch": 0.9499178339463181, "grad_norm": 0.0, - "learning_rate": 4.686286325352689e-06, - "loss": 0.8336, + "learning_rate": 1.3126507022441116e-07, + "loss": 0.9365, "step": 24278 }, { - "epoch": 0.6880047606903001, + "epoch": 0.9499569606385476, "grad_norm": 0.0, - "learning_rate": 4.6855088526618205e-06, - "loss": 0.8952, + "learning_rate": 1.31060497686355e-07, + "loss": 0.9235, "step": 24279 }, { - "epoch": 0.6880330981325625, + "epoch": 0.949996087330777, "grad_norm": 0.0, - "learning_rate": 4.684731424736888e-06, - "loss": 0.835, + "learning_rate": 1.3085608363110014e-07, + "loss": 1.0148, "step": 24280 }, { - "epoch": 0.688061435574825, + "epoch": 0.9500352140230065, "grad_norm": 0.0, - "learning_rate": 4.683954041584432e-06, - "loss": 0.752, + "learning_rate": 1.3065182806193066e-07, + "loss": 0.9775, "step": 24281 }, { - "epoch": 0.6880897730170875, + "epoch": 0.9500743407152359, "grad_norm": 0.0, - "learning_rate": 4.68317670321101e-06, - "loss": 0.8727, + "learning_rate": 1.3044773098212616e-07, + "loss": 0.9955, "step": 24282 }, { - "epoch": 0.6881181104593499, + "epoch": 0.9501134674074654, "grad_norm": 0.0, - "learning_rate": 4.682399409623161e-06, - "loss": 0.7572, + "learning_rate": 1.30243792394964e-07, + "loss": 0.9281, "step": 24283 }, { - "epoch": 0.6881464479016124, + "epoch": 0.9501525940996948, "grad_norm": 0.0, - "learning_rate": 4.681622160827436e-06, - "loss": 0.9151, + "learning_rate": 1.3004001230371932e-07, + "loss": 0.78, "step": 24284 }, { - "epoch": 0.6881747853438749, + "epoch": 0.9501917207919243, "grad_norm": 0.0, - "learning_rate": 4.680844956830386e-06, - "loss": 0.841, + "learning_rate": 1.2983639071166288e-07, + "loss": 1.0145, "step": 24285 }, { - "epoch": 0.6882031227861374, + "epoch": 0.9502308474841537, "grad_norm": 0.0, - "learning_rate": 4.68006779763855e-06, - "loss": 0.8044, + "learning_rate": 1.2963292762206648e-07, + "loss": 0.8477, "step": 24286 }, { - "epoch": 0.6882314602283998, + "epoch": 0.9502699741763831, "grad_norm": 0.0, - "learning_rate": 4.679290683258479e-06, - "loss": 0.8169, + "learning_rate": 1.294296230381975e-07, + "loss": 0.965, "step": 24287 }, { - "epoch": 0.6882597976706623, + "epoch": 0.9503091008686125, "grad_norm": 0.0, - "learning_rate": 4.678513613696724e-06, - "loss": 0.8112, + "learning_rate": 1.2922647696332004e-07, + "loss": 0.8791, "step": 24288 }, { - "epoch": 0.6882881351129247, + "epoch": 0.950348227560842, "grad_norm": 0.0, - "learning_rate": 4.677736588959818e-06, - "loss": 0.7622, + "learning_rate": 1.2902348940069587e-07, + "loss": 1.0397, "step": 24289 }, { - "epoch": 0.6883164725551871, + "epoch": 0.9503873542530714, "grad_norm": 0.0, - "learning_rate": 4.676959609054315e-06, - "loss": 0.8423, + "learning_rate": 1.2882066035358686e-07, + "loss": 0.8734, "step": 24290 }, { - "epoch": 0.6883448099974496, + "epoch": 0.9504264809453009, "grad_norm": 0.0, - "learning_rate": 4.676182673986757e-06, - "loss": 0.7739, + "learning_rate": 1.286179898252482e-07, + "loss": 0.9454, "step": 24291 }, { - "epoch": 0.6883731474397121, + "epoch": 0.9504656076375303, "grad_norm": 0.0, - "learning_rate": 4.6754057837636905e-06, - "loss": 0.7945, + "learning_rate": 1.2841547781893505e-07, + "loss": 1.0169, "step": 24292 }, { - "epoch": 0.6884014848819745, + "epoch": 0.9505047343297598, "grad_norm": 0.0, - "learning_rate": 4.674628938391661e-06, - "loss": 0.8766, + "learning_rate": 1.2821312433789924e-07, + "loss": 0.9689, "step": 24293 }, { - "epoch": 0.688429822324237, + "epoch": 0.9505438610219892, "grad_norm": 0.0, - "learning_rate": 4.6738521378772066e-06, - "loss": 0.8645, + "learning_rate": 1.2801092938539038e-07, + "loss": 0.9008, "step": 24294 }, { - "epoch": 0.6884581597664995, + "epoch": 0.9505829877142187, "grad_norm": 0.0, - "learning_rate": 4.673075382226876e-06, - "loss": 0.8612, + "learning_rate": 1.2780889296465592e-07, + "loss": 0.9336, "step": 24295 }, { - "epoch": 0.688486497208762, + "epoch": 0.9506221144064481, "grad_norm": 0.0, - "learning_rate": 4.672298671447206e-06, - "loss": 0.788, + "learning_rate": 1.2760701507894102e-07, + "loss": 0.9841, "step": 24296 }, { - "epoch": 0.6885148346510244, + "epoch": 0.9506612410986776, "grad_norm": 0.0, - "learning_rate": 4.671522005544743e-06, - "loss": 0.7174, + "learning_rate": 1.274052957314853e-07, + "loss": 0.8757, "step": 24297 }, { - "epoch": 0.6885431720932869, + "epoch": 0.9507003677909069, "grad_norm": 0.0, - "learning_rate": 4.670745384526033e-06, - "loss": 0.8892, + "learning_rate": 1.272037349255306e-07, + "loss": 1.0966, "step": 24298 }, { - "epoch": 0.6885715095355494, + "epoch": 0.9507394944831364, "grad_norm": 0.0, - "learning_rate": 4.669968808397609e-06, - "loss": 0.7827, + "learning_rate": 1.2700233266431217e-07, + "loss": 0.9784, "step": 24299 }, { - "epoch": 0.6885998469778117, + "epoch": 0.9507786211753658, "grad_norm": 0.0, - "learning_rate": 4.669192277166018e-06, - "loss": 0.885, + "learning_rate": 1.2680108895106514e-07, + "loss": 1.0755, "step": 24300 }, { - "epoch": 0.6886281844200742, + "epoch": 0.9508177478675953, "grad_norm": 0.0, - "learning_rate": 4.668415790837804e-06, - "loss": 0.8758, + "learning_rate": 1.2660000378902026e-07, + "loss": 0.9646, "step": 24301 }, { - "epoch": 0.6886565218623367, + "epoch": 0.9508568745598247, "grad_norm": 0.0, - "learning_rate": 4.6676393494194985e-06, - "loss": 0.8857, + "learning_rate": 1.263990771814072e-07, + "loss": 0.8814, "step": 24302 }, { - "epoch": 0.6886848593045992, + "epoch": 0.9508960012520542, "grad_norm": 0.0, - "learning_rate": 4.666862952917647e-06, - "loss": 0.7411, + "learning_rate": 1.2619830913145225e-07, + "loss": 0.9231, "step": 24303 }, { - "epoch": 0.6887131967468616, + "epoch": 0.9509351279442836, "grad_norm": 0.0, - "learning_rate": 4.6660866013387896e-06, - "loss": 0.8948, + "learning_rate": 1.259976996423806e-07, + "loss": 0.9736, "step": 24304 }, { - "epoch": 0.6887415341891241, + "epoch": 0.9509742546365131, "grad_norm": 0.0, - "learning_rate": 4.665310294689466e-06, - "loss": 0.9198, + "learning_rate": 1.2579724871741305e-07, + "loss": 0.9156, "step": 24305 }, { - "epoch": 0.6887698716313866, + "epoch": 0.9510133813287425, "grad_norm": 0.0, - "learning_rate": 4.664534032976218e-06, - "loss": 0.8653, + "learning_rate": 1.255969563597692e-07, + "loss": 0.9755, "step": 24306 }, { - "epoch": 0.688798209073649, + "epoch": 0.9510525080209719, "grad_norm": 0.0, - "learning_rate": 4.663757816205577e-06, - "loss": 0.8152, + "learning_rate": 1.2539682257266316e-07, + "loss": 0.8716, "step": 24307 }, { - "epoch": 0.6888265465159115, + "epoch": 0.9510916347132014, "grad_norm": 0.0, - "learning_rate": 4.662981644384087e-06, - "loss": 0.8855, + "learning_rate": 1.2519684735931125e-07, + "loss": 1.0163, "step": 24308 }, { - "epoch": 0.688854883958174, + "epoch": 0.9511307614054307, "grad_norm": 0.0, - "learning_rate": 4.662205517518286e-06, - "loss": 0.8686, + "learning_rate": 1.2499703072292424e-07, + "loss": 0.9252, "step": 24309 }, { - "epoch": 0.6888832214004365, + "epoch": 0.9511698880976602, "grad_norm": 0.0, - "learning_rate": 4.661429435614708e-06, - "loss": 0.795, + "learning_rate": 1.2479737266671176e-07, + "loss": 1.0435, "step": 24310 }, { - "epoch": 0.6889115588426988, + "epoch": 0.9512090147898896, "grad_norm": 0.0, - "learning_rate": 4.660653398679896e-06, - "loss": 0.8072, + "learning_rate": 1.2459787319387907e-07, + "loss": 1.007, "step": 24311 }, { - "epoch": 0.6889398962849613, + "epoch": 0.9512481414821191, "grad_norm": 0.0, - "learning_rate": 4.659877406720379e-06, - "loss": 0.8971, + "learning_rate": 1.243985323076291e-07, + "loss": 0.8748, "step": 24312 }, { - "epoch": 0.6889682337272238, + "epoch": 0.9512872681743485, "grad_norm": 0.0, - "learning_rate": 4.6591014597426974e-06, - "loss": 0.875, + "learning_rate": 1.241993500111638e-07, + "loss": 0.9427, "step": 24313 }, { - "epoch": 0.6889965711694862, + "epoch": 0.951326394866578, "grad_norm": 0.0, - "learning_rate": 4.658325557753391e-06, - "loss": 0.9238, + "learning_rate": 1.2400032630768278e-07, + "loss": 0.878, "step": 24314 }, { - "epoch": 0.6890249086117487, + "epoch": 0.9513655215588074, "grad_norm": 0.0, - "learning_rate": 4.657549700758989e-06, - "loss": 0.8309, + "learning_rate": 1.2380146120038016e-07, + "loss": 0.8553, "step": 24315 }, { - "epoch": 0.6890532460540112, + "epoch": 0.9514046482510369, "grad_norm": 0.0, - "learning_rate": 4.65677388876603e-06, - "loss": 0.8571, + "learning_rate": 1.236027546924512e-07, + "loss": 0.9627, "step": 24316 }, { - "epoch": 0.6890815834962736, + "epoch": 0.9514437749432663, "grad_norm": 0.0, - "learning_rate": 4.655998121781048e-06, - "loss": 0.9662, + "learning_rate": 1.2340420678708664e-07, + "loss": 1.0632, "step": 24317 }, { - "epoch": 0.6891099209385361, + "epoch": 0.9514829016354958, "grad_norm": 0.0, - "learning_rate": 4.655222399810579e-06, - "loss": 0.763, + "learning_rate": 1.2320581748747286e-07, + "loss": 0.8382, "step": 24318 }, { - "epoch": 0.6891382583807986, + "epoch": 0.9515220283277251, "grad_norm": 0.0, - "learning_rate": 4.654446722861159e-06, - "loss": 0.8693, + "learning_rate": 1.2300758679679835e-07, + "loss": 0.9129, "step": 24319 }, { - "epoch": 0.6891665958230611, + "epoch": 0.9515611550199546, "grad_norm": 0.0, - "learning_rate": 4.6536710909393155e-06, - "loss": 0.771, + "learning_rate": 1.228095147182462e-07, + "loss": 0.9317, "step": 24320 }, { - "epoch": 0.6891949332653234, + "epoch": 0.951600281712184, "grad_norm": 0.0, - "learning_rate": 4.652895504051587e-06, - "loss": 0.8216, + "learning_rate": 1.2261160125499495e-07, + "loss": 1.0356, "step": 24321 }, { - "epoch": 0.6892232707075859, + "epoch": 0.9516394084044135, "grad_norm": 0.0, - "learning_rate": 4.652119962204508e-06, - "loss": 0.8935, + "learning_rate": 1.2241384641022425e-07, + "loss": 0.9543, "step": 24322 }, { - "epoch": 0.6892516081498484, + "epoch": 0.9516785350966429, "grad_norm": 0.0, - "learning_rate": 4.6513444654046044e-06, - "loss": 0.8814, + "learning_rate": 1.2221625018711158e-07, + "loss": 0.8782, "step": 24323 }, { - "epoch": 0.6892799455921108, + "epoch": 0.9517176617888724, "grad_norm": 0.0, - "learning_rate": 4.650569013658417e-06, - "loss": 0.7324, + "learning_rate": 1.2201881258882775e-07, + "loss": 0.9593, "step": 24324 }, { - "epoch": 0.6893082830343733, + "epoch": 0.9517567884811018, "grad_norm": 0.0, - "learning_rate": 4.64979360697247e-06, - "loss": 0.8114, + "learning_rate": 1.2182153361854244e-07, + "loss": 0.9568, "step": 24325 }, { - "epoch": 0.6893366204766358, + "epoch": 0.9517959151733313, "grad_norm": 0.0, - "learning_rate": 4.649018245353297e-06, - "loss": 0.7992, + "learning_rate": 1.2162441327942642e-07, + "loss": 0.8972, "step": 24326 }, { - "epoch": 0.6893649579188983, + "epoch": 0.9518350418655607, "grad_norm": 0.0, - "learning_rate": 4.648242928807435e-06, - "loss": 0.8498, + "learning_rate": 1.2142745157464498e-07, + "loss": 1.0383, "step": 24327 }, { - "epoch": 0.6893932953611607, + "epoch": 0.9518741685577902, "grad_norm": 0.0, - "learning_rate": 4.647467657341407e-06, - "loss": 0.7241, + "learning_rate": 1.212306485073589e-07, + "loss": 1.0292, "step": 24328 }, { - "epoch": 0.6894216328034232, + "epoch": 0.9519132952500196, "grad_norm": 0.0, - "learning_rate": 4.646692430961745e-06, - "loss": 0.8326, + "learning_rate": 1.2103400408073006e-07, + "loss": 0.8845, "step": 24329 }, { - "epoch": 0.6894499702456857, + "epoch": 0.951952421942249, "grad_norm": 0.0, - "learning_rate": 4.645917249674982e-06, - "loss": 0.9173, + "learning_rate": 1.20837518297916e-07, + "loss": 0.9891, "step": 24330 }, { - "epoch": 0.689478307687948, + "epoch": 0.9519915486344784, "grad_norm": 0.0, - "learning_rate": 4.645142113487645e-06, - "loss": 0.7828, + "learning_rate": 1.2064119116207195e-07, + "loss": 0.9549, "step": 24331 }, { - "epoch": 0.6895066451302105, + "epoch": 0.9520306753267079, "grad_norm": 0.0, - "learning_rate": 4.644367022406268e-06, - "loss": 0.8623, + "learning_rate": 1.2044502267635093e-07, + "loss": 0.9012, "step": 24332 }, { - "epoch": 0.689534982572473, + "epoch": 0.9520698020189373, "grad_norm": 0.0, - "learning_rate": 4.6435919764373735e-06, - "loss": 0.8642, + "learning_rate": 1.2024901284390377e-07, + "loss": 0.9177, "step": 24333 }, { - "epoch": 0.6895633200147354, + "epoch": 0.9521089287111668, "grad_norm": 0.0, - "learning_rate": 4.642816975587493e-06, - "loss": 0.8818, + "learning_rate": 1.2005316166787574e-07, + "loss": 0.9815, "step": 24334 }, { - "epoch": 0.6895916574569979, + "epoch": 0.9521480554033962, "grad_norm": 0.0, - "learning_rate": 4.642042019863158e-06, - "loss": 0.7787, + "learning_rate": 1.198574691514154e-07, + "loss": 0.8542, "step": 24335 }, { - "epoch": 0.6896199948992604, + "epoch": 0.9521871820956256, "grad_norm": 0.0, - "learning_rate": 4.641267109270889e-06, - "loss": 0.7617, + "learning_rate": 1.1966193529766356e-07, + "loss": 0.9211, "step": 24336 }, { - "epoch": 0.6896483323415229, + "epoch": 0.9522263087878551, "grad_norm": 0.0, - "learning_rate": 4.640492243817216e-06, - "loss": 0.8826, + "learning_rate": 1.1946656010976e-07, + "loss": 1.0063, "step": 24337 }, { - "epoch": 0.6896766697837853, + "epoch": 0.9522654354800845, "grad_norm": 0.0, - "learning_rate": 4.639717423508672e-06, - "loss": 0.8513, + "learning_rate": 1.1927134359084104e-07, + "loss": 0.8548, "step": 24338 }, { - "epoch": 0.6897050072260478, + "epoch": 0.952304562172314, "grad_norm": 0.0, - "learning_rate": 4.638942648351774e-06, - "loss": 0.7795, + "learning_rate": 1.1907628574404528e-07, + "loss": 0.9097, "step": 24339 }, { - "epoch": 0.6897333446683103, + "epoch": 0.9523436888645433, "grad_norm": 0.0, - "learning_rate": 4.638167918353057e-06, - "loss": 0.8754, + "learning_rate": 1.1888138657250136e-07, + "loss": 0.9707, "step": 24340 }, { - "epoch": 0.6897616821105726, + "epoch": 0.9523828155567728, "grad_norm": 0.0, - "learning_rate": 4.637393233519038e-06, - "loss": 0.7512, + "learning_rate": 1.1868664607934121e-07, + "loss": 0.8904, "step": 24341 }, { - "epoch": 0.6897900195528351, + "epoch": 0.9524219422490022, "grad_norm": 0.0, - "learning_rate": 4.636618593856249e-06, - "loss": 0.7931, + "learning_rate": 1.1849206426769233e-07, + "loss": 0.9095, "step": 24342 }, { - "epoch": 0.6898183569950976, + "epoch": 0.9524610689412317, "grad_norm": 0.0, - "learning_rate": 4.635843999371212e-06, - "loss": 0.952, + "learning_rate": 1.1829764114067666e-07, + "loss": 0.9659, "step": 24343 }, { - "epoch": 0.6898466944373601, + "epoch": 0.9525001956334611, "grad_norm": 0.0, - "learning_rate": 4.635069450070453e-06, - "loss": 0.9039, + "learning_rate": 1.1810337670141947e-07, + "loss": 1.0351, "step": 24344 }, { - "epoch": 0.6898750318796225, + "epoch": 0.9525393223256906, "grad_norm": 0.0, - "learning_rate": 4.634294945960497e-06, - "loss": 0.8186, + "learning_rate": 1.179092709530405e-07, + "loss": 1.0283, "step": 24345 }, { - "epoch": 0.689903369321885, + "epoch": 0.95257844901792, "grad_norm": 0.0, - "learning_rate": 4.63352048704787e-06, - "loss": 0.7511, + "learning_rate": 1.1771532389865393e-07, + "loss": 0.9743, "step": 24346 }, { - "epoch": 0.6899317067641475, + "epoch": 0.9526175757101495, "grad_norm": 0.0, - "learning_rate": 4.63274607333909e-06, - "loss": 0.9048, + "learning_rate": 1.1752153554137724e-07, + "loss": 1.0823, "step": 24347 }, { - "epoch": 0.6899600442064099, + "epoch": 0.9526567024023789, "grad_norm": 0.0, - "learning_rate": 4.631971704840685e-06, - "loss": 0.841, + "learning_rate": 1.1732790588432019e-07, + "loss": 0.9578, "step": 24348 }, { - "epoch": 0.6899883816486724, + "epoch": 0.9526958290946084, "grad_norm": 0.0, - "learning_rate": 4.631197381559173e-06, - "loss": 0.7938, + "learning_rate": 1.1713443493059473e-07, + "loss": 1.0412, "step": 24349 }, { - "epoch": 0.6900167190909349, + "epoch": 0.9527349557868378, "grad_norm": 0.0, - "learning_rate": 4.6304231035010795e-06, - "loss": 0.8701, + "learning_rate": 1.1694112268330505e-07, + "loss": 0.9704, "step": 24350 }, { - "epoch": 0.6900450565331974, + "epoch": 0.9527740824790673, "grad_norm": 0.0, - "learning_rate": 4.6296488706729306e-06, - "loss": 0.9123, + "learning_rate": 1.1674796914555753e-07, + "loss": 1.017, "step": 24351 }, { - "epoch": 0.6900733939754597, + "epoch": 0.9528132091712966, "grad_norm": 0.0, - "learning_rate": 4.6288746830812385e-06, - "loss": 0.8026, + "learning_rate": 1.1655497432045193e-07, + "loss": 0.984, "step": 24352 }, { - "epoch": 0.6901017314177222, + "epoch": 0.9528523358635261, "grad_norm": 0.0, - "learning_rate": 4.628100540732533e-06, - "loss": 0.7885, + "learning_rate": 1.1636213821109021e-07, + "loss": 0.9857, "step": 24353 }, { - "epoch": 0.6901300688599847, + "epoch": 0.9528914625557555, "grad_norm": 0.0, - "learning_rate": 4.627326443633327e-06, - "loss": 0.783, + "learning_rate": 1.1616946082056657e-07, + "loss": 0.9828, "step": 24354 }, { - "epoch": 0.6901584063022471, + "epoch": 0.952930589247985, "grad_norm": 0.0, - "learning_rate": 4.6265523917901476e-06, - "loss": 0.8809, + "learning_rate": 1.1597694215197741e-07, + "loss": 0.8132, "step": 24355 }, { - "epoch": 0.6901867437445096, + "epoch": 0.9529697159402144, "grad_norm": 0.0, - "learning_rate": 4.6257783852095116e-06, - "loss": 0.8126, + "learning_rate": 1.1578458220841249e-07, + "loss": 0.7852, "step": 24356 }, { - "epoch": 0.6902150811867721, + "epoch": 0.9530088426324439, "grad_norm": 0.0, - "learning_rate": 4.62500442389794e-06, - "loss": 0.8472, + "learning_rate": 1.1559238099296155e-07, + "loss": 0.8003, "step": 24357 }, { - "epoch": 0.6902434186290345, + "epoch": 0.9530479693246733, "grad_norm": 0.0, - "learning_rate": 4.624230507861952e-06, - "loss": 0.8799, + "learning_rate": 1.1540033850871102e-07, + "loss": 1.0505, "step": 24358 }, { - "epoch": 0.690271756071297, + "epoch": 0.9530870960169028, "grad_norm": 0.0, - "learning_rate": 4.62345663710807e-06, - "loss": 0.8579, + "learning_rate": 1.152084547587462e-07, + "loss": 0.9155, "step": 24359 }, { - "epoch": 0.6903000935135595, + "epoch": 0.9531262227091322, "grad_norm": 0.0, - "learning_rate": 4.622682811642807e-06, - "loss": 0.7657, + "learning_rate": 1.1501672974614575e-07, + "loss": 1.0239, "step": 24360 }, { - "epoch": 0.690328430955822, + "epoch": 0.9531653494013617, "grad_norm": 0.0, - "learning_rate": 4.621909031472687e-06, - "loss": 0.8133, + "learning_rate": 1.1482516347399052e-07, + "loss": 0.8963, "step": 24361 }, { - "epoch": 0.6903567683980844, + "epoch": 0.953204476093591, "grad_norm": 0.0, - "learning_rate": 4.621135296604219e-06, - "loss": 0.857, + "learning_rate": 1.1463375594535697e-07, + "loss": 0.9327, "step": 24362 }, { - "epoch": 0.6903851058403468, + "epoch": 0.9532436027858204, "grad_norm": 0.0, - "learning_rate": 4.620361607043927e-06, - "loss": 0.8423, + "learning_rate": 1.1444250716331706e-07, + "loss": 0.8922, "step": 24363 }, { - "epoch": 0.6904134432826093, + "epoch": 0.9532827294780499, "grad_norm": 0.0, - "learning_rate": 4.61958796279833e-06, - "loss": 0.7735, + "learning_rate": 1.142514171309439e-07, + "loss": 0.9761, "step": 24364 }, { - "epoch": 0.6904417807248717, + "epoch": 0.9533218561702793, "grad_norm": 0.0, - "learning_rate": 4.618814363873938e-06, - "loss": 0.8072, + "learning_rate": 1.1406048585130503e-07, + "loss": 1.0213, "step": 24365 }, { - "epoch": 0.6904701181671342, + "epoch": 0.9533609828625088, "grad_norm": 0.0, - "learning_rate": 4.618040810277271e-06, - "loss": 0.8395, + "learning_rate": 1.1386971332746798e-07, + "loss": 0.9109, "step": 24366 }, { - "epoch": 0.6904984556093967, + "epoch": 0.9534001095547382, "grad_norm": 0.0, - "learning_rate": 4.617267302014845e-06, - "loss": 0.8852, + "learning_rate": 1.1367909956249479e-07, + "loss": 1.042, "step": 24367 }, { - "epoch": 0.6905267930516592, + "epoch": 0.9534392362469677, "grad_norm": 0.0, - "learning_rate": 4.616493839093179e-06, - "loss": 0.8546, + "learning_rate": 1.1348864455944742e-07, + "loss": 0.9161, "step": 24368 }, { - "epoch": 0.6905551304939216, + "epoch": 0.9534783629391971, "grad_norm": 0.0, - "learning_rate": 4.61572042151878e-06, - "loss": 0.7315, + "learning_rate": 1.1329834832138232e-07, + "loss": 0.9468, "step": 24369 }, { - "epoch": 0.6905834679361841, + "epoch": 0.9535174896314266, "grad_norm": 0.0, - "learning_rate": 4.614947049298169e-06, - "loss": 0.8081, + "learning_rate": 1.1310821085135815e-07, + "loss": 0.9229, "step": 24370 }, { - "epoch": 0.6906118053784466, + "epoch": 0.953556616323656, "grad_norm": 0.0, - "learning_rate": 4.614173722437857e-06, - "loss": 0.9023, + "learning_rate": 1.1291823215242693e-07, + "loss": 0.8056, "step": 24371 }, { - "epoch": 0.690640142820709, + "epoch": 0.9535957430158855, "grad_norm": 0.0, - "learning_rate": 4.613400440944364e-06, - "loss": 0.9157, + "learning_rate": 1.1272841222764063e-07, + "loss": 0.9133, "step": 24372 }, { - "epoch": 0.6906684802629715, + "epoch": 0.9536348697081148, "grad_norm": 0.0, - "learning_rate": 4.612627204824196e-06, - "loss": 0.8587, + "learning_rate": 1.1253875108004575e-07, + "loss": 0.8925, "step": 24373 }, { - "epoch": 0.690696817705234, + "epoch": 0.9536739964003443, "grad_norm": 0.0, - "learning_rate": 4.611854014083868e-06, - "loss": 0.967, + "learning_rate": 1.1234924871268871e-07, + "loss": 0.9238, "step": 24374 }, { - "epoch": 0.6907251551474964, + "epoch": 0.9537131230925737, "grad_norm": 0.0, - "learning_rate": 4.6110808687299e-06, - "loss": 0.8281, + "learning_rate": 1.1215990512861263e-07, + "loss": 0.969, "step": 24375 }, { - "epoch": 0.6907534925897588, + "epoch": 0.9537522497848032, "grad_norm": 0.0, - "learning_rate": 4.610307768768796e-06, - "loss": 0.7585, + "learning_rate": 1.1197072033085844e-07, + "loss": 0.9573, "step": 24376 }, { - "epoch": 0.6907818300320213, + "epoch": 0.9537913764770326, "grad_norm": 0.0, - "learning_rate": 4.609534714207073e-06, - "loss": 0.7978, + "learning_rate": 1.1178169432246477e-07, + "loss": 1.0328, "step": 24377 }, { - "epoch": 0.6908101674742838, + "epoch": 0.9538305031692621, "grad_norm": 0.0, - "learning_rate": 4.608761705051238e-06, - "loss": 0.8387, + "learning_rate": 1.1159282710646591e-07, + "loss": 1.0017, "step": 24378 }, { - "epoch": 0.6908385049165462, + "epoch": 0.9538696298614915, "grad_norm": 0.0, - "learning_rate": 4.607988741307804e-06, - "loss": 0.856, + "learning_rate": 1.1140411868589496e-07, + "loss": 0.9076, "step": 24379 }, { - "epoch": 0.6908668423588087, + "epoch": 0.953908756553721, "grad_norm": 0.0, - "learning_rate": 4.607215822983284e-06, - "loss": 0.8561, + "learning_rate": 1.1121556906378394e-07, + "loss": 0.8911, "step": 24380 }, { - "epoch": 0.6908951798010712, + "epoch": 0.9539478832459504, "grad_norm": 0.0, - "learning_rate": 4.606442950084188e-06, - "loss": 0.8262, + "learning_rate": 1.1102717824315823e-07, + "loss": 0.9653, "step": 24381 }, { - "epoch": 0.6909235172433336, + "epoch": 0.9539870099381799, "grad_norm": 0.0, - "learning_rate": 4.605670122617028e-06, - "loss": 0.8621, + "learning_rate": 1.1083894622704539e-07, + "loss": 0.9962, "step": 24382 }, { - "epoch": 0.6909518546855961, + "epoch": 0.9540261366304092, "grad_norm": 0.0, - "learning_rate": 4.60489734058831e-06, - "loss": 0.8127, + "learning_rate": 1.1065087301846633e-07, + "loss": 1.0513, "step": 24383 }, { - "epoch": 0.6909801921278586, + "epoch": 0.9540652633226387, "grad_norm": 0.0, - "learning_rate": 4.604124604004544e-06, - "loss": 0.8774, + "learning_rate": 1.1046295862044309e-07, + "loss": 0.9991, "step": 24384 }, { - "epoch": 0.691008529570121, + "epoch": 0.9541043900148681, "grad_norm": 0.0, - "learning_rate": 4.603351912872245e-06, - "loss": 0.8757, + "learning_rate": 1.1027520303599215e-07, + "loss": 0.8882, "step": 24385 }, { - "epoch": 0.6910368670123834, + "epoch": 0.9541435167070976, "grad_norm": 0.0, - "learning_rate": 4.602579267197912e-06, - "loss": 0.8348, + "learning_rate": 1.1008760626812997e-07, + "loss": 0.9502, "step": 24386 }, { - "epoch": 0.6910652044546459, + "epoch": 0.954182643399327, "grad_norm": 0.0, - "learning_rate": 4.601806666988058e-06, - "loss": 0.895, + "learning_rate": 1.0990016831986639e-07, + "loss": 0.87, "step": 24387 }, { - "epoch": 0.6910935418969084, + "epoch": 0.9542217700915565, "grad_norm": 0.0, - "learning_rate": 4.601034112249195e-06, - "loss": 0.8602, + "learning_rate": 1.0971288919421341e-07, + "loss": 1.0394, "step": 24388 }, { - "epoch": 0.6911218793391708, + "epoch": 0.9542608967837859, "grad_norm": 0.0, - "learning_rate": 4.6002616029878235e-06, - "loss": 0.7779, + "learning_rate": 1.0952576889417866e-07, + "loss": 1.0267, "step": 24389 }, { - "epoch": 0.6911502167814333, + "epoch": 0.9543000234760154, "grad_norm": 0.0, - "learning_rate": 4.599489139210457e-06, - "loss": 0.7506, + "learning_rate": 1.0933880742276748e-07, + "loss": 0.9649, "step": 24390 }, { - "epoch": 0.6911785542236958, + "epoch": 0.9543391501682448, "grad_norm": 0.0, - "learning_rate": 4.598716720923595e-06, - "loss": 0.8923, + "learning_rate": 1.0915200478297972e-07, + "loss": 0.9312, "step": 24391 }, { - "epoch": 0.6912068916659583, + "epoch": 0.9543782768604742, "grad_norm": 0.0, - "learning_rate": 4.597944348133747e-06, - "loss": 0.8405, + "learning_rate": 1.0896536097781741e-07, + "loss": 0.9712, "step": 24392 }, { - "epoch": 0.6912352291082207, + "epoch": 0.9544174035527037, "grad_norm": 0.0, - "learning_rate": 4.597172020847421e-06, - "loss": 0.8204, + "learning_rate": 1.0877887601027704e-07, + "loss": 0.8967, "step": 24393 }, { - "epoch": 0.6912635665504832, + "epoch": 0.954456530244933, "grad_norm": 0.0, - "learning_rate": 4.596399739071121e-06, - "loss": 0.8488, + "learning_rate": 1.085925498833551e-07, + "loss": 1.0131, "step": 24394 }, { - "epoch": 0.6912919039927456, + "epoch": 0.9544956569371625, "grad_norm": 0.0, - "learning_rate": 4.595627502811351e-06, - "loss": 0.6458, + "learning_rate": 1.0840638260004033e-07, + "loss": 0.9425, "step": 24395 }, { - "epoch": 0.691320241435008, + "epoch": 0.9545347836293919, "grad_norm": 0.0, - "learning_rate": 4.594855312074624e-06, - "loss": 0.937, + "learning_rate": 1.0822037416332475e-07, + "loss": 1.0009, "step": 24396 }, { - "epoch": 0.6913485788772705, + "epoch": 0.9545739103216214, "grad_norm": 0.0, - "learning_rate": 4.594083166867433e-06, - "loss": 0.82, + "learning_rate": 1.080345245761949e-07, + "loss": 1.002, "step": 24397 }, { - "epoch": 0.691376916319533, + "epoch": 0.9546130370138508, "grad_norm": 0.0, - "learning_rate": 4.59331106719629e-06, - "loss": 0.7493, + "learning_rate": 1.0784883384163502e-07, + "loss": 0.9162, "step": 24398 }, { - "epoch": 0.6914052537617955, + "epoch": 0.9546521637060803, "grad_norm": 0.0, - "learning_rate": 4.592539013067692e-06, - "loss": 0.8642, + "learning_rate": 1.0766330196262831e-07, + "loss": 0.9742, "step": 24399 }, { - "epoch": 0.6914335912040579, + "epoch": 0.9546912903983097, "grad_norm": 0.0, - "learning_rate": 4.591767004488147e-06, - "loss": 0.7804, + "learning_rate": 1.0747792894215236e-07, + "loss": 0.9219, "step": 24400 }, { - "epoch": 0.6914619286463204, + "epoch": 0.9547304170905392, "grad_norm": 0.0, - "learning_rate": 4.590995041464159e-06, - "loss": 0.8186, + "learning_rate": 1.0729271478318481e-07, + "loss": 0.8666, "step": 24401 }, { - "epoch": 0.6914902660885829, + "epoch": 0.9547695437827686, "grad_norm": 0.0, - "learning_rate": 4.590223124002225e-06, - "loss": 0.9201, + "learning_rate": 1.0710765948869995e-07, + "loss": 0.9094, "step": 24402 }, { - "epoch": 0.6915186035308453, + "epoch": 0.9548086704749981, "grad_norm": 0.0, - "learning_rate": 4.589451252108851e-06, - "loss": 0.8602, + "learning_rate": 1.0692276306166982e-07, + "loss": 1.0022, "step": 24403 }, { - "epoch": 0.6915469409731078, + "epoch": 0.9548477971672275, "grad_norm": 0.0, - "learning_rate": 4.588679425790542e-06, - "loss": 0.7563, + "learning_rate": 1.0673802550506318e-07, + "loss": 1.0214, "step": 24404 }, { - "epoch": 0.6915752784153703, + "epoch": 0.954886923859457, "grad_norm": 0.0, - "learning_rate": 4.587907645053792e-06, - "loss": 0.7919, + "learning_rate": 1.0655344682184764e-07, + "loss": 0.9637, "step": 24405 }, { - "epoch": 0.6916036158576326, + "epoch": 0.9549260505516863, "grad_norm": 0.0, - "learning_rate": 4.587135909905105e-06, - "loss": 0.7411, + "learning_rate": 1.063690270149853e-07, + "loss": 0.9537, "step": 24406 }, { - "epoch": 0.6916319532998951, + "epoch": 0.9549651772439158, "grad_norm": 0.0, - "learning_rate": 4.5863642203509826e-06, - "loss": 0.806, + "learning_rate": 1.0618476608744043e-07, + "loss": 0.9913, "step": 24407 }, { - "epoch": 0.6916602907421576, + "epoch": 0.9550043039361452, "grad_norm": 0.0, - "learning_rate": 4.585592576397925e-06, - "loss": 0.7857, + "learning_rate": 1.0600066404216957e-07, + "loss": 0.9679, "step": 24408 }, { - "epoch": 0.6916886281844201, + "epoch": 0.9550434306283747, "grad_norm": 0.0, - "learning_rate": 4.584820978052434e-06, - "loss": 0.7303, + "learning_rate": 1.0581672088213036e-07, + "loss": 0.8873, "step": 24409 }, { - "epoch": 0.6917169656266825, + "epoch": 0.9550825573206041, "grad_norm": 0.0, - "learning_rate": 4.584049425321006e-06, - "loss": 0.7886, + "learning_rate": 1.0563293661027596e-07, + "loss": 0.9474, "step": 24410 }, { - "epoch": 0.691745303068945, + "epoch": 0.9551216840128336, "grad_norm": 0.0, - "learning_rate": 4.583277918210142e-06, - "loss": 0.7387, + "learning_rate": 1.054493112295596e-07, + "loss": 0.8578, "step": 24411 }, { - "epoch": 0.6917736405112075, + "epoch": 0.955160810705063, "grad_norm": 0.0, - "learning_rate": 4.582506456726337e-06, - "loss": 0.7961, + "learning_rate": 1.0526584474292778e-07, + "loss": 1.0257, "step": 24412 }, { - "epoch": 0.6918019779534699, + "epoch": 0.9551999373972925, "grad_norm": 0.0, - "learning_rate": 4.581735040876091e-06, - "loss": 0.9285, + "learning_rate": 1.0508253715332817e-07, + "loss": 1.0294, "step": 24413 }, { - "epoch": 0.6918303153957324, + "epoch": 0.9552390640895219, "grad_norm": 0.0, - "learning_rate": 4.580963670665906e-06, - "loss": 0.852, + "learning_rate": 1.0489938846370285e-07, + "loss": 1.0148, "step": 24414 }, { - "epoch": 0.6918586528379949, + "epoch": 0.9552781907817514, "grad_norm": 0.0, - "learning_rate": 4.580192346102275e-06, - "loss": 0.8676, + "learning_rate": 1.0471639867699612e-07, + "loss": 0.9982, "step": 24415 }, { - "epoch": 0.6918869902802574, + "epoch": 0.9553173174739807, "grad_norm": 0.0, - "learning_rate": 4.579421067191695e-06, - "loss": 0.8324, + "learning_rate": 1.0453356779614233e-07, + "loss": 1.0839, "step": 24416 }, { - "epoch": 0.6919153277225197, + "epoch": 0.9553564441662102, "grad_norm": 0.0, - "learning_rate": 4.578649833940667e-06, - "loss": 0.9248, + "learning_rate": 1.0435089582408131e-07, + "loss": 0.9931, "step": 24417 }, { - "epoch": 0.6919436651647822, + "epoch": 0.9553955708584396, "grad_norm": 0.0, - "learning_rate": 4.577878646355682e-06, - "loss": 0.8715, + "learning_rate": 1.041683827637452e-07, + "loss": 0.8781, "step": 24418 }, { - "epoch": 0.6919720026070447, + "epoch": 0.9554346975506691, "grad_norm": 0.0, - "learning_rate": 4.577107504443239e-06, - "loss": 0.8615, + "learning_rate": 1.0398602861806383e-07, + "loss": 0.9516, "step": 24419 }, { - "epoch": 0.6920003400493071, + "epoch": 0.9554738242428985, "grad_norm": 0.0, - "learning_rate": 4.5763364082098326e-06, - "loss": 0.8206, + "learning_rate": 1.0380383338996603e-07, + "loss": 0.944, "step": 24420 }, { - "epoch": 0.6920286774915696, + "epoch": 0.9555129509351279, "grad_norm": 0.0, - "learning_rate": 4.575565357661958e-06, - "loss": 0.7541, + "learning_rate": 1.036217970823783e-07, + "loss": 1.0328, "step": 24421 }, { - "epoch": 0.6920570149338321, + "epoch": 0.9555520776273574, "grad_norm": 0.0, - "learning_rate": 4.574794352806116e-06, - "loss": 0.9009, + "learning_rate": 1.0343991969822387e-07, + "loss": 0.9092, "step": 24422 }, { - "epoch": 0.6920853523760946, + "epoch": 0.9555912043195868, "grad_norm": 0.0, - "learning_rate": 4.574023393648791e-06, - "loss": 0.7546, + "learning_rate": 1.0325820124042374e-07, + "loss": 0.9639, "step": 24423 }, { - "epoch": 0.692113689818357, + "epoch": 0.9556303310118163, "grad_norm": 0.0, - "learning_rate": 4.5732524801964815e-06, - "loss": 0.9113, + "learning_rate": 1.0307664171189446e-07, + "loss": 0.9314, "step": 24424 }, { - "epoch": 0.6921420272606195, + "epoch": 0.9556694577040457, "grad_norm": 0.0, - "learning_rate": 4.5724816124556856e-06, - "loss": 0.7569, + "learning_rate": 1.0289524111555371e-07, + "loss": 1.0127, "step": 24425 }, { - "epoch": 0.692170364702882, + "epoch": 0.9557085843962752, "grad_norm": 0.0, - "learning_rate": 4.571710790432889e-06, - "loss": 0.7384, + "learning_rate": 1.027139994543136e-07, + "loss": 0.9656, "step": 24426 }, { - "epoch": 0.6921987021451443, + "epoch": 0.9557477110885045, "grad_norm": 0.0, - "learning_rate": 4.570940014134593e-06, - "loss": 0.8245, + "learning_rate": 1.0253291673108512e-07, + "loss": 0.9867, "step": 24427 }, { - "epoch": 0.6922270395874068, + "epoch": 0.955786837780734, "grad_norm": 0.0, - "learning_rate": 4.57016928356728e-06, - "loss": 0.8021, + "learning_rate": 1.0235199294877374e-07, + "loss": 0.9221, "step": 24428 }, { - "epoch": 0.6922553770296693, + "epoch": 0.9558259644729634, "grad_norm": 0.0, - "learning_rate": 4.569398598737448e-06, - "loss": 0.8049, + "learning_rate": 1.0217122811028934e-07, + "loss": 0.852, "step": 24429 }, { - "epoch": 0.6922837144719317, + "epoch": 0.9558650911651929, "grad_norm": 0.0, - "learning_rate": 4.568627959651593e-06, - "loss": 0.8147, + "learning_rate": 1.0199062221853074e-07, + "loss": 0.9476, "step": 24430 }, { - "epoch": 0.6923120519141942, + "epoch": 0.9559042178574223, "grad_norm": 0.0, - "learning_rate": 4.567857366316196e-06, - "loss": 0.8178, + "learning_rate": 1.0181017527640114e-07, + "loss": 0.8773, "step": 24431 }, { - "epoch": 0.6923403893564567, + "epoch": 0.9559433445496518, "grad_norm": 0.0, - "learning_rate": 4.567086818737754e-06, - "loss": 0.866, + "learning_rate": 1.016298872867949e-07, + "loss": 1.0283, "step": 24432 }, { - "epoch": 0.6923687267987192, + "epoch": 0.9559824712418812, "grad_norm": 0.0, - "learning_rate": 4.566316316922758e-06, - "loss": 0.9218, + "learning_rate": 1.0144975825261083e-07, + "loss": 1.0731, "step": 24433 }, { - "epoch": 0.6923970642409816, + "epoch": 0.9560215979341107, "grad_norm": 0.0, - "learning_rate": 4.565545860877697e-06, - "loss": 0.8558, + "learning_rate": 1.0126978817673994e-07, + "loss": 0.8925, "step": 24434 }, { - "epoch": 0.6924254016832441, + "epoch": 0.9560607246263401, "grad_norm": 0.0, - "learning_rate": 4.5647754506090645e-06, - "loss": 0.9095, + "learning_rate": 1.0108997706207324e-07, + "loss": 1.022, "step": 24435 }, { - "epoch": 0.6924537391255066, + "epoch": 0.9560998513185696, "grad_norm": 0.0, - "learning_rate": 4.564005086123343e-06, - "loss": 0.8143, + "learning_rate": 1.0091032491149621e-07, + "loss": 1.0272, "step": 24436 }, { - "epoch": 0.692482076567769, + "epoch": 0.956138978010799, "grad_norm": 0.0, - "learning_rate": 4.563234767427026e-06, - "loss": 0.8952, + "learning_rate": 1.0073083172789543e-07, + "loss": 0.943, "step": 24437 }, { - "epoch": 0.6925104140100314, + "epoch": 0.9561781047030284, "grad_norm": 0.0, - "learning_rate": 4.562464494526605e-06, - "loss": 0.9515, + "learning_rate": 1.0055149751415305e-07, + "loss": 1.1159, "step": 24438 }, { - "epoch": 0.6925387514522939, + "epoch": 0.9562172313952578, "grad_norm": 0.0, - "learning_rate": 4.56169426742856e-06, - "loss": 0.9528, + "learning_rate": 1.0037232227314897e-07, + "loss": 0.8768, "step": 24439 }, { - "epoch": 0.6925670888945564, + "epoch": 0.9562563580874873, "grad_norm": 0.0, - "learning_rate": 4.560924086139389e-06, - "loss": 0.7548, + "learning_rate": 1.0019330600776089e-07, + "loss": 0.998, "step": 24440 }, { - "epoch": 0.6925954263368188, + "epoch": 0.9562954847797167, "grad_norm": 0.0, - "learning_rate": 4.560153950665569e-06, - "loss": 0.8006, + "learning_rate": 1.0001444872086208e-07, + "loss": 0.9794, "step": 24441 }, { - "epoch": 0.6926237637790813, + "epoch": 0.9563346114719462, "grad_norm": 0.0, - "learning_rate": 4.559383861013593e-06, - "loss": 0.7903, + "learning_rate": 9.98357504153269e-08, + "loss": 0.906, "step": 24442 }, { - "epoch": 0.6926521012213438, + "epoch": 0.9563737381641756, "grad_norm": 0.0, - "learning_rate": 4.558613817189951e-06, - "loss": 0.8899, + "learning_rate": 9.965721109402415e-08, + "loss": 0.8988, "step": 24443 }, { - "epoch": 0.6926804386636062, + "epoch": 0.9564128648564051, "grad_norm": 0.0, - "learning_rate": 4.557843819201121e-06, - "loss": 0.8517, + "learning_rate": 9.947883075981934e-08, + "loss": 0.9733, "step": 24444 }, { - "epoch": 0.6927087761058687, + "epoch": 0.9564519915486345, "grad_norm": 0.0, - "learning_rate": 4.5570738670535944e-06, - "loss": 0.8, + "learning_rate": 9.930060941557907e-08, + "loss": 0.9901, "step": 24445 }, { - "epoch": 0.6927371135481312, + "epoch": 0.956491118240864, "grad_norm": 0.0, - "learning_rate": 4.556303960753855e-06, - "loss": 0.7974, + "learning_rate": 9.912254706416546e-08, + "loss": 0.8566, "step": 24446 }, { - "epoch": 0.6927654509903937, + "epoch": 0.9565302449330934, "grad_norm": 0.0, - "learning_rate": 4.55553410030839e-06, - "loss": 0.8634, + "learning_rate": 9.894464370843626e-08, + "loss": 0.9263, "step": 24447 }, { - "epoch": 0.692793788432656, + "epoch": 0.9565693716253229, "grad_norm": 0.0, - "learning_rate": 4.554764285723685e-06, - "loss": 0.6914, + "learning_rate": 9.876689935125027e-08, + "loss": 0.9413, "step": 24448 }, { - "epoch": 0.6928221258749185, + "epoch": 0.9566084983175522, "grad_norm": 0.0, - "learning_rate": 4.553994517006219e-06, - "loss": 0.7563, + "learning_rate": 9.858931399546079e-08, + "loss": 1.004, "step": 24449 }, { - "epoch": 0.692850463317181, + "epoch": 0.9566476250097816, "grad_norm": 0.0, - "learning_rate": 4.553224794162481e-06, - "loss": 0.7893, + "learning_rate": 9.841188764391996e-08, + "loss": 0.9507, "step": 24450 }, { - "epoch": 0.6928788007594434, + "epoch": 0.9566867517020111, "grad_norm": 0.0, - "learning_rate": 4.552455117198955e-06, - "loss": 0.7944, + "learning_rate": 9.82346202994755e-08, + "loss": 0.9977, "step": 24451 }, { - "epoch": 0.6929071382017059, + "epoch": 0.9567258783942405, "grad_norm": 0.0, - "learning_rate": 4.55168548612212e-06, - "loss": 0.8533, + "learning_rate": 9.805751196497737e-08, + "loss": 0.7929, "step": 24452 }, { - "epoch": 0.6929354756439684, + "epoch": 0.95676500508647, "grad_norm": 0.0, - "learning_rate": 4.55091590093846e-06, - "loss": 0.7368, + "learning_rate": 9.788056264326661e-08, + "loss": 1.0478, "step": 24453 }, { - "epoch": 0.6929638130862308, + "epoch": 0.9568041317786994, "grad_norm": 0.0, - "learning_rate": 4.550146361654465e-06, - "loss": 0.8181, + "learning_rate": 9.770377233718543e-08, + "loss": 0.9885, "step": 24454 }, { - "epoch": 0.6929921505284933, + "epoch": 0.9568432584709289, "grad_norm": 0.0, - "learning_rate": 4.549376868276606e-06, - "loss": 0.8238, + "learning_rate": 9.752714104957372e-08, + "loss": 0.984, "step": 24455 }, { - "epoch": 0.6930204879707558, + "epoch": 0.9568823851631583, "grad_norm": 0.0, - "learning_rate": 4.548607420811373e-06, - "loss": 0.7987, + "learning_rate": 9.735066878326816e-08, + "loss": 0.957, "step": 24456 }, { - "epoch": 0.6930488254130183, + "epoch": 0.9569215118553878, "grad_norm": 0.0, - "learning_rate": 4.547838019265241e-06, - "loss": 0.7644, + "learning_rate": 9.7174355541102e-08, + "loss": 1.0522, "step": 24457 }, { - "epoch": 0.6930771628552806, + "epoch": 0.9569606385476171, "grad_norm": 0.0, - "learning_rate": 4.5470686636446924e-06, - "loss": 0.8901, + "learning_rate": 9.699820132590742e-08, + "loss": 0.8881, "step": 24458 }, { - "epoch": 0.6931055002975431, + "epoch": 0.9569997652398466, "grad_norm": 0.0, - "learning_rate": 4.546299353956211e-06, - "loss": 0.8289, + "learning_rate": 9.682220614051108e-08, + "loss": 0.8724, "step": 24459 }, { - "epoch": 0.6931338377398056, + "epoch": 0.957038891932076, "grad_norm": 0.0, - "learning_rate": 4.545530090206274e-06, - "loss": 0.821, + "learning_rate": 9.664636998774179e-08, + "loss": 1.0157, "step": 24460 }, { - "epoch": 0.693162175182068, + "epoch": 0.9570780186243055, "grad_norm": 0.0, - "learning_rate": 4.544760872401364e-06, - "loss": 0.7477, + "learning_rate": 9.647069287042176e-08, + "loss": 1.0169, "step": 24461 }, { - "epoch": 0.6931905126243305, + "epoch": 0.9571171453165349, "grad_norm": 0.0, - "learning_rate": 4.543991700547962e-06, - "loss": 0.8624, + "learning_rate": 9.629517479137318e-08, + "loss": 0.9716, "step": 24462 }, { - "epoch": 0.693218850066593, + "epoch": 0.9571562720087644, "grad_norm": 0.0, - "learning_rate": 4.54322257465254e-06, - "loss": 0.8956, + "learning_rate": 9.611981575341267e-08, + "loss": 1.0997, "step": 24463 }, { - "epoch": 0.6932471875088555, + "epoch": 0.9571953987009938, "grad_norm": 0.0, - "learning_rate": 4.542453494721583e-06, - "loss": 0.8037, + "learning_rate": 9.594461575935909e-08, + "loss": 0.9282, "step": 24464 }, { - "epoch": 0.6932755249511179, + "epoch": 0.9572345253932233, "grad_norm": 0.0, - "learning_rate": 4.541684460761565e-06, - "loss": 0.8761, + "learning_rate": 9.576957481202353e-08, + "loss": 1.0157, "step": 24465 }, { - "epoch": 0.6933038623933804, + "epoch": 0.9572736520854527, "grad_norm": 0.0, - "learning_rate": 4.540915472778965e-06, - "loss": 0.7361, + "learning_rate": 9.55946929142193e-08, + "loss": 0.9649, "step": 24466 }, { - "epoch": 0.6933321998356429, + "epoch": 0.9573127787776822, "grad_norm": 0.0, - "learning_rate": 4.540146530780266e-06, - "loss": 0.7799, + "learning_rate": 9.541997006875192e-08, + "loss": 0.9731, "step": 24467 }, { - "epoch": 0.6933605372779053, + "epoch": 0.9573519054699116, "grad_norm": 0.0, - "learning_rate": 4.539377634771935e-06, - "loss": 0.7227, + "learning_rate": 9.524540627842915e-08, + "loss": 0.9422, "step": 24468 }, { - "epoch": 0.6933888747201677, + "epoch": 0.957391032162141, "grad_norm": 0.0, - "learning_rate": 4.538608784760459e-06, - "loss": 0.8911, + "learning_rate": 9.50710015460543e-08, + "loss": 0.9245, "step": 24469 }, { - "epoch": 0.6934172121624302, + "epoch": 0.9574301588543704, "grad_norm": 0.0, - "learning_rate": 4.537839980752305e-06, - "loss": 0.6876, + "learning_rate": 9.489675587442737e-08, + "loss": 0.883, "step": 24470 }, { - "epoch": 0.6934455496046927, + "epoch": 0.9574692855465999, "grad_norm": 0.0, - "learning_rate": 4.537071222753953e-06, - "loss": 0.7824, + "learning_rate": 9.47226692663461e-08, + "loss": 0.9231, "step": 24471 }, { - "epoch": 0.6934738870469551, + "epoch": 0.9575084122388293, "grad_norm": 0.0, - "learning_rate": 4.5363025107718775e-06, - "loss": 0.838, + "learning_rate": 9.454874172460715e-08, + "loss": 1.0245, "step": 24472 }, { - "epoch": 0.6935022244892176, + "epoch": 0.9575475389310588, "grad_norm": 0.0, - "learning_rate": 4.5355338448125566e-06, - "loss": 0.8769, + "learning_rate": 9.437497325200385e-08, + "loss": 0.937, "step": 24473 }, { - "epoch": 0.6935305619314801, + "epoch": 0.9575866656232882, "grad_norm": 0.0, - "learning_rate": 4.534765224882463e-06, - "loss": 0.8855, + "learning_rate": 9.420136385132505e-08, + "loss": 0.9988, "step": 24474 }, { - "epoch": 0.6935588993737425, + "epoch": 0.9576257923155177, "grad_norm": 0.0, - "learning_rate": 4.533996650988074e-06, - "loss": 0.9066, + "learning_rate": 9.402791352536078e-08, + "loss": 1.0546, "step": 24475 }, { - "epoch": 0.693587236816005, + "epoch": 0.9576649190077471, "grad_norm": 0.0, - "learning_rate": 4.533228123135858e-06, - "loss": 0.7601, + "learning_rate": 9.385462227689435e-08, + "loss": 0.9271, "step": 24476 }, { - "epoch": 0.6936155742582675, + "epoch": 0.9577040456999765, "grad_norm": 0.0, - "learning_rate": 4.532459641332295e-06, - "loss": 0.8024, + "learning_rate": 9.368149010870909e-08, + "loss": 0.9547, "step": 24477 }, { - "epoch": 0.6936439117005299, + "epoch": 0.957743172392206, "grad_norm": 0.0, - "learning_rate": 4.531691205583852e-06, - "loss": 0.7986, + "learning_rate": 9.350851702358721e-08, + "loss": 0.9664, "step": 24478 }, { - "epoch": 0.6936722491427924, + "epoch": 0.9577822990844354, "grad_norm": 0.0, - "learning_rate": 4.530922815897003e-06, - "loss": 0.8666, + "learning_rate": 9.333570302430428e-08, + "loss": 0.8338, "step": 24479 }, { - "epoch": 0.6937005865850548, + "epoch": 0.9578214257766648, "grad_norm": 0.0, - "learning_rate": 4.530154472278228e-06, - "loss": 0.808, + "learning_rate": 9.316304811363696e-08, + "loss": 0.9222, "step": 24480 }, { - "epoch": 0.6937289240273173, + "epoch": 0.9578605524688942, "grad_norm": 0.0, - "learning_rate": 4.529386174733987e-06, - "loss": 0.8114, + "learning_rate": 9.299055229435527e-08, + "loss": 0.8606, "step": 24481 }, { - "epoch": 0.6937572614695797, + "epoch": 0.9578996791611237, "grad_norm": 0.0, - "learning_rate": 4.528617923270758e-06, - "loss": 0.8641, + "learning_rate": 9.281821556923365e-08, + "loss": 0.8335, "step": 24482 }, { - "epoch": 0.6937855989118422, + "epoch": 0.9579388058533531, "grad_norm": 0.0, - "learning_rate": 4.527849717895017e-06, - "loss": 0.8674, + "learning_rate": 9.264603794103544e-08, + "loss": 1.0286, "step": 24483 }, { - "epoch": 0.6938139363541047, + "epoch": 0.9579779325455826, "grad_norm": 0.0, - "learning_rate": 4.527081558613225e-06, - "loss": 0.7909, + "learning_rate": 9.247401941252732e-08, + "loss": 0.9164, "step": 24484 }, { - "epoch": 0.6938422737963671, + "epoch": 0.958017059237812, "grad_norm": 0.0, - "learning_rate": 4.526313445431858e-06, - "loss": 0.8614, + "learning_rate": 9.230215998647263e-08, + "loss": 0.8877, "step": 24485 }, { - "epoch": 0.6938706112386296, + "epoch": 0.9580561859300415, "grad_norm": 0.0, - "learning_rate": 4.525545378357385e-06, - "loss": 0.7701, + "learning_rate": 9.213045966562917e-08, + "loss": 1.1136, "step": 24486 }, { - "epoch": 0.6938989486808921, + "epoch": 0.9580953126222709, "grad_norm": 0.0, - "learning_rate": 4.524777357396277e-06, - "loss": 0.9305, + "learning_rate": 9.195891845275473e-08, + "loss": 0.9679, "step": 24487 }, { - "epoch": 0.6939272861231546, + "epoch": 0.9581344393145004, "grad_norm": 0.0, - "learning_rate": 4.524009382555005e-06, - "loss": 0.8597, + "learning_rate": 9.1787536350606e-08, + "loss": 0.9563, "step": 24488 }, { - "epoch": 0.693955623565417, + "epoch": 0.9581735660067298, "grad_norm": 0.0, - "learning_rate": 4.5232414538400336e-06, - "loss": 0.7855, + "learning_rate": 9.1616313361933e-08, + "loss": 0.8988, "step": 24489 }, { - "epoch": 0.6939839610076795, + "epoch": 0.9582126926989593, "grad_norm": 0.0, - "learning_rate": 4.522473571257836e-06, - "loss": 0.7554, + "learning_rate": 9.144524948948685e-08, + "loss": 0.9706, "step": 24490 }, { - "epoch": 0.694012298449942, + "epoch": 0.9582518193911886, "grad_norm": 0.0, - "learning_rate": 4.521705734814873e-06, - "loss": 0.8151, + "learning_rate": 9.127434473601315e-08, + "loss": 1.0472, "step": 24491 }, { - "epoch": 0.6940406358922043, + "epoch": 0.9582909460834181, "grad_norm": 0.0, - "learning_rate": 4.520937944517617e-06, - "loss": 0.8307, + "learning_rate": 9.110359910425748e-08, + "loss": 0.8328, "step": 24492 }, { - "epoch": 0.6940689733344668, + "epoch": 0.9583300727756475, "grad_norm": 0.0, - "learning_rate": 4.52017020037254e-06, - "loss": 0.8589, + "learning_rate": 9.093301259696096e-08, + "loss": 0.9216, "step": 24493 }, { - "epoch": 0.6940973107767293, + "epoch": 0.958369199467877, "grad_norm": 0.0, - "learning_rate": 4.5194025023861e-06, - "loss": 0.7667, + "learning_rate": 9.076258521686366e-08, + "loss": 0.94, "step": 24494 }, { - "epoch": 0.6941256482189918, + "epoch": 0.9584083261601064, "grad_norm": 0.0, - "learning_rate": 4.518634850564768e-06, - "loss": 0.7919, + "learning_rate": 9.059231696670334e-08, + "loss": 0.9345, "step": 24495 }, { - "epoch": 0.6941539856612542, + "epoch": 0.9584474528523359, "grad_norm": 0.0, - "learning_rate": 4.517867244915014e-06, - "loss": 0.8017, + "learning_rate": 9.04222078492123e-08, + "loss": 0.9412, "step": 24496 }, { - "epoch": 0.6941823231035167, + "epoch": 0.9584865795445653, "grad_norm": 0.0, - "learning_rate": 4.5170996854432955e-06, - "loss": 0.7976, + "learning_rate": 9.025225786712388e-08, + "loss": 1.1846, "step": 24497 }, { - "epoch": 0.6942106605457792, + "epoch": 0.9585257062367948, "grad_norm": 0.0, - "learning_rate": 4.5163321721560824e-06, - "loss": 0.8747, + "learning_rate": 9.008246702316481e-08, + "loss": 0.8776, "step": 24498 }, { - "epoch": 0.6942389979880416, + "epoch": 0.9585648329290242, "grad_norm": 0.0, - "learning_rate": 4.515564705059841e-06, - "loss": 0.8651, + "learning_rate": 8.991283532006511e-08, + "loss": 0.9915, "step": 24499 }, { - "epoch": 0.6942673354303041, + "epoch": 0.9586039596212537, "grad_norm": 0.0, - "learning_rate": 4.514797284161033e-06, - "loss": 0.8446, + "learning_rate": 8.974336276054707e-08, + "loss": 1.0645, "step": 24500 }, { - "epoch": 0.6942956728725665, + "epoch": 0.958643086313483, "grad_norm": 0.0, - "learning_rate": 4.51402990946613e-06, - "loss": 0.8749, + "learning_rate": 8.957404934733183e-08, + "loss": 1.0706, "step": 24501 }, { - "epoch": 0.6943240103148289, + "epoch": 0.9586822130057125, "grad_norm": 0.0, - "learning_rate": 4.513262580981586e-06, - "loss": 0.7871, + "learning_rate": 8.940489508313944e-08, + "loss": 0.8938, "step": 24502 }, { - "epoch": 0.6943523477570914, + "epoch": 0.9587213396979419, "grad_norm": 0.0, - "learning_rate": 4.512495298713867e-06, - "loss": 0.8164, + "learning_rate": 8.923589997068549e-08, + "loss": 0.8873, "step": 24503 }, { - "epoch": 0.6943806851993539, + "epoch": 0.9587604663901714, "grad_norm": 0.0, - "learning_rate": 4.511728062669443e-06, - "loss": 0.8527, + "learning_rate": 8.906706401268339e-08, + "loss": 0.8984, "step": 24504 }, { - "epoch": 0.6944090226416164, + "epoch": 0.9587995930824008, "grad_norm": 0.0, - "learning_rate": 4.510960872854767e-06, - "loss": 0.8697, + "learning_rate": 8.889838721184652e-08, + "loss": 0.9448, "step": 24505 }, { - "epoch": 0.6944373600838788, + "epoch": 0.9588387197746302, "grad_norm": 0.0, - "learning_rate": 4.510193729276311e-06, - "loss": 0.8209, + "learning_rate": 8.872986957088159e-08, + "loss": 0.8866, "step": 24506 }, { - "epoch": 0.6944656975261413, + "epoch": 0.9588778464668597, "grad_norm": 0.0, - "learning_rate": 4.509426631940527e-06, - "loss": 0.8811, + "learning_rate": 8.856151109249533e-08, + "loss": 1.0053, "step": 24507 }, { - "epoch": 0.6944940349684038, + "epoch": 0.9589169731590891, "grad_norm": 0.0, - "learning_rate": 4.508659580853881e-06, - "loss": 0.8425, + "learning_rate": 8.839331177939114e-08, + "loss": 0.8881, "step": 24508 }, { - "epoch": 0.6945223724106662, + "epoch": 0.9589560998513186, "grad_norm": 0.0, - "learning_rate": 4.507892576022838e-06, - "loss": 0.8857, + "learning_rate": 8.822527163427131e-08, + "loss": 0.9311, "step": 24509 }, { - "epoch": 0.6945507098529287, + "epoch": 0.958995226543548, "grad_norm": 0.0, - "learning_rate": 4.5071256174538535e-06, - "loss": 0.6905, + "learning_rate": 8.805739065983254e-08, + "loss": 0.8551, "step": 24510 }, { - "epoch": 0.6945790472951912, + "epoch": 0.9590343532357775, "grad_norm": 0.0, - "learning_rate": 4.5063587051533885e-06, - "loss": 0.8197, + "learning_rate": 8.78896688587727e-08, + "loss": 1.0584, "step": 24511 }, { - "epoch": 0.6946073847374536, + "epoch": 0.9590734799280068, "grad_norm": 0.0, - "learning_rate": 4.505591839127904e-06, - "loss": 0.7452, + "learning_rate": 8.772210623378297e-08, + "loss": 0.9682, "step": 24512 }, { - "epoch": 0.694635722179716, + "epoch": 0.9591126066202363, "grad_norm": 0.0, - "learning_rate": 4.504825019383861e-06, - "loss": 0.8799, + "learning_rate": 8.755470278755674e-08, + "loss": 0.9666, "step": 24513 }, { - "epoch": 0.6946640596219785, + "epoch": 0.9591517333124657, "grad_norm": 0.0, - "learning_rate": 4.5040582459277194e-06, - "loss": 0.8501, + "learning_rate": 8.738745852278075e-08, + "loss": 1.0016, "step": 24514 }, { - "epoch": 0.694692397064241, + "epoch": 0.9591908600046952, "grad_norm": 0.0, - "learning_rate": 4.5032915187659334e-06, - "loss": 0.7217, + "learning_rate": 8.722037344214062e-08, + "loss": 0.7907, "step": 24515 }, { - "epoch": 0.6947207345065034, + "epoch": 0.9592299866969246, "grad_norm": 0.0, - "learning_rate": 4.502524837904964e-06, - "loss": 0.8811, + "learning_rate": 8.705344754831979e-08, + "loss": 0.9384, "step": 24516 }, { - "epoch": 0.6947490719487659, + "epoch": 0.9592691133891541, "grad_norm": 0.0, - "learning_rate": 4.501758203351272e-06, - "loss": 0.7791, + "learning_rate": 8.688668084399942e-08, + "loss": 0.9679, "step": 24517 }, { - "epoch": 0.6947774093910284, + "epoch": 0.9593082400813835, "grad_norm": 0.0, - "learning_rate": 4.50099161511131e-06, - "loss": 0.7669, + "learning_rate": 8.672007333185628e-08, + "loss": 1.1091, "step": 24518 }, { - "epoch": 0.6948057468332909, + "epoch": 0.959347366773613, "grad_norm": 0.0, - "learning_rate": 4.50022507319154e-06, - "loss": 0.8418, + "learning_rate": 8.655362501456821e-08, + "loss": 1.037, "step": 24519 }, { - "epoch": 0.6948340842755533, + "epoch": 0.9593864934658424, "grad_norm": 0.0, - "learning_rate": 4.499458577598413e-06, - "loss": 0.8506, + "learning_rate": 8.63873358948053e-08, + "loss": 0.9652, "step": 24520 }, { - "epoch": 0.6948624217178158, + "epoch": 0.9594256201580719, "grad_norm": 0.0, - "learning_rate": 4.49869212833839e-06, - "loss": 0.77, + "learning_rate": 8.622120597523986e-08, + "loss": 0.9344, "step": 24521 }, { - "epoch": 0.6948907591600783, + "epoch": 0.9594647468503013, "grad_norm": 0.0, - "learning_rate": 4.49792572541793e-06, - "loss": 0.9339, + "learning_rate": 8.605523525853865e-08, + "loss": 0.9097, "step": 24522 }, { - "epoch": 0.6949190966023406, + "epoch": 0.9595038735425307, "grad_norm": 0.0, - "learning_rate": 4.49715936884348e-06, - "loss": 0.8961, + "learning_rate": 8.588942374736841e-08, + "loss": 0.9373, "step": 24523 }, { - "epoch": 0.6949474340446031, + "epoch": 0.9595430002347601, "grad_norm": 0.0, - "learning_rate": 4.4963930586215e-06, - "loss": 0.8488, + "learning_rate": 8.572377144439037e-08, + "loss": 1.0536, "step": 24524 }, { - "epoch": 0.6949757714868656, + "epoch": 0.9595821269269896, "grad_norm": 0.0, - "learning_rate": 4.495626794758445e-06, - "loss": 0.8519, + "learning_rate": 8.555827835226571e-08, + "loss": 0.9643, "step": 24525 }, { - "epoch": 0.695004108929128, + "epoch": 0.959621253619219, "grad_norm": 0.0, - "learning_rate": 4.49486057726077e-06, - "loss": 0.7975, + "learning_rate": 8.53929444736501e-08, + "loss": 0.9103, "step": 24526 }, { - "epoch": 0.6950324463713905, + "epoch": 0.9596603803114485, "grad_norm": 0.0, - "learning_rate": 4.4940944061349325e-06, - "loss": 0.8126, + "learning_rate": 8.522776981120251e-08, + "loss": 0.8263, "step": 24527 }, { - "epoch": 0.695060783813653, + "epoch": 0.9596995070036779, "grad_norm": 0.0, - "learning_rate": 4.493328281387378e-06, - "loss": 0.7981, + "learning_rate": 8.506275436757083e-08, + "loss": 0.9179, "step": 24528 }, { - "epoch": 0.6950891212559155, + "epoch": 0.9597386336959074, "grad_norm": 0.0, - "learning_rate": 4.492562203024565e-06, - "loss": 0.9049, + "learning_rate": 8.48978981454096e-08, + "loss": 0.8109, "step": 24529 }, { - "epoch": 0.6951174586981779, + "epoch": 0.9597777603881368, "grad_norm": 0.0, - "learning_rate": 4.4917961710529486e-06, - "loss": 0.8376, + "learning_rate": 8.473320114736228e-08, + "loss": 0.9792, "step": 24530 }, { - "epoch": 0.6951457961404404, + "epoch": 0.9598168870803663, "grad_norm": 0.0, - "learning_rate": 4.491030185478976e-06, - "loss": 0.9393, + "learning_rate": 8.45686633760745e-08, + "loss": 1.0599, "step": 24531 }, { - "epoch": 0.6951741335827029, + "epoch": 0.9598560137725957, "grad_norm": 0.0, - "learning_rate": 4.4902642463091e-06, - "loss": 0.8826, + "learning_rate": 8.440428483418972e-08, + "loss": 0.9282, "step": 24532 }, { - "epoch": 0.6952024710249652, + "epoch": 0.9598951404648252, "grad_norm": 0.0, - "learning_rate": 4.4894983535497805e-06, - "loss": 0.7919, + "learning_rate": 8.424006552434805e-08, + "loss": 0.9941, "step": 24533 }, { - "epoch": 0.6952308084672277, + "epoch": 0.9599342671570545, "grad_norm": 0.0, - "learning_rate": 4.488732507207457e-06, - "loss": 0.8884, + "learning_rate": 8.407600544918515e-08, + "loss": 0.9862, "step": 24534 }, { - "epoch": 0.6952591459094902, + "epoch": 0.9599733938492839, "grad_norm": 0.0, - "learning_rate": 4.4879667072885906e-06, - "loss": 0.7818, + "learning_rate": 8.39121046113367e-08, + "loss": 1.0316, "step": 24535 }, { - "epoch": 0.6952874833517527, + "epoch": 0.9600125205415134, "grad_norm": 0.0, - "learning_rate": 4.487200953799623e-06, - "loss": 0.7338, + "learning_rate": 8.374836301343393e-08, + "loss": 0.9759, "step": 24536 }, { - "epoch": 0.6953158207940151, + "epoch": 0.9600516472337428, "grad_norm": 0.0, - "learning_rate": 4.4864352467470095e-06, - "loss": 0.9207, + "learning_rate": 8.358478065810693e-08, + "loss": 0.8733, "step": 24537 }, { - "epoch": 0.6953441582362776, + "epoch": 0.9600907739259723, "grad_norm": 0.0, - "learning_rate": 4.485669586137199e-06, - "loss": 0.838, + "learning_rate": 8.342135754798253e-08, + "loss": 0.9613, "step": 24538 }, { - "epoch": 0.6953724956785401, + "epoch": 0.9601299006182017, "grad_norm": 0.0, - "learning_rate": 4.484903971976642e-06, - "loss": 0.8319, + "learning_rate": 8.325809368568417e-08, + "loss": 0.9951, "step": 24539 }, { - "epoch": 0.6954008331208025, + "epoch": 0.9601690273104312, "grad_norm": 0.0, - "learning_rate": 4.4841384042717866e-06, - "loss": 0.792, + "learning_rate": 8.309498907383528e-08, + "loss": 0.8785, "step": 24540 }, { - "epoch": 0.695429170563065, + "epoch": 0.9602081540026606, "grad_norm": 0.0, - "learning_rate": 4.483372883029085e-06, - "loss": 0.862, + "learning_rate": 8.293204371505381e-08, + "loss": 0.9879, "step": 24541 }, { - "epoch": 0.6954575080053275, + "epoch": 0.9602472806948901, "grad_norm": 0.0, - "learning_rate": 4.482607408254978e-06, - "loss": 0.8603, + "learning_rate": 8.276925761195654e-08, + "loss": 1.0155, "step": 24542 }, { - "epoch": 0.6954858454475898, + "epoch": 0.9602864073871195, "grad_norm": 0.0, - "learning_rate": 4.481841979955922e-06, - "loss": 0.7835, + "learning_rate": 8.260663076715913e-08, + "loss": 0.9097, "step": 24543 }, { - "epoch": 0.6955141828898523, + "epoch": 0.960325534079349, "grad_norm": 0.0, - "learning_rate": 4.481076598138356e-06, - "loss": 0.8945, + "learning_rate": 8.244416318327175e-08, + "loss": 0.9386, "step": 24544 }, { - "epoch": 0.6955425203321148, + "epoch": 0.9603646607715783, "grad_norm": 0.0, - "learning_rate": 4.480311262808732e-06, - "loss": 0.8767, + "learning_rate": 8.228185486290341e-08, + "loss": 0.9563, "step": 24545 }, { - "epoch": 0.6955708577743773, + "epoch": 0.9604037874638078, "grad_norm": 0.0, - "learning_rate": 4.479545973973499e-06, - "loss": 0.8864, + "learning_rate": 8.211970580866091e-08, + "loss": 0.9284, "step": 24546 }, { - "epoch": 0.6955991952166397, + "epoch": 0.9604429141560372, "grad_norm": 0.0, - "learning_rate": 4.478780731639096e-06, - "loss": 0.7808, + "learning_rate": 8.195771602314773e-08, + "loss": 1.0311, "step": 24547 }, { - "epoch": 0.6956275326589022, + "epoch": 0.9604820408482667, "grad_norm": 0.0, - "learning_rate": 4.478015535811978e-06, - "loss": 0.8685, + "learning_rate": 8.179588550896622e-08, + "loss": 0.8638, "step": 24548 }, { - "epoch": 0.6956558701011647, + "epoch": 0.9605211675404961, "grad_norm": 0.0, - "learning_rate": 4.477250386498582e-06, - "loss": 0.7406, + "learning_rate": 8.163421426871432e-08, + "loss": 0.988, "step": 24549 }, { - "epoch": 0.6956842075434271, + "epoch": 0.9605602942327256, "grad_norm": 0.0, - "learning_rate": 4.476485283705356e-06, - "loss": 0.8248, + "learning_rate": 8.147270230498883e-08, + "loss": 0.8907, "step": 24550 }, { - "epoch": 0.6957125449856896, + "epoch": 0.960599420924955, "grad_norm": 0.0, - "learning_rate": 4.475720227438745e-06, - "loss": 0.7753, + "learning_rate": 8.131134962038211e-08, + "loss": 0.9186, "step": 24551 }, { - "epoch": 0.6957408824279521, + "epoch": 0.9606385476171845, "grad_norm": 0.0, - "learning_rate": 4.474955217705196e-06, - "loss": 0.9244, + "learning_rate": 8.115015621748768e-08, + "loss": 0.9105, "step": 24552 }, { - "epoch": 0.6957692198702146, + "epoch": 0.9606776743094139, "grad_norm": 0.0, - "learning_rate": 4.4741902545111495e-06, - "loss": 0.8272, + "learning_rate": 8.09891220988912e-08, + "loss": 0.9724, "step": 24553 }, { - "epoch": 0.6957975573124769, + "epoch": 0.9607168010016434, "grad_norm": 0.0, - "learning_rate": 4.473425337863055e-06, - "loss": 0.7881, + "learning_rate": 8.082824726718174e-08, + "loss": 0.9421, "step": 24554 }, { - "epoch": 0.6958258947547394, + "epoch": 0.9607559276938727, "grad_norm": 0.0, - "learning_rate": 4.472660467767346e-06, - "loss": 0.8138, + "learning_rate": 8.066753172494057e-08, + "loss": 0.9438, "step": 24555 }, { - "epoch": 0.6958542321970019, + "epoch": 0.9607950543861022, "grad_norm": 0.0, - "learning_rate": 4.471895644230475e-06, - "loss": 0.8782, + "learning_rate": 8.050697547475006e-08, + "loss": 0.8972, "step": 24556 }, { - "epoch": 0.6958825696392643, + "epoch": 0.9608341810783316, "grad_norm": 0.0, - "learning_rate": 4.471130867258876e-06, - "loss": 0.8637, + "learning_rate": 8.034657851918592e-08, + "loss": 0.9304, "step": 24557 }, { - "epoch": 0.6959109070815268, + "epoch": 0.9608733077705611, "grad_norm": 0.0, - "learning_rate": 4.470366136858994e-06, - "loss": 0.7236, + "learning_rate": 8.018634086082721e-08, + "loss": 0.8464, "step": 24558 }, { - "epoch": 0.6959392445237893, + "epoch": 0.9609124344627905, "grad_norm": 0.0, - "learning_rate": 4.469601453037277e-06, - "loss": 0.7946, + "learning_rate": 8.002626250224521e-08, + "loss": 0.8318, "step": 24559 }, { - "epoch": 0.6959675819660518, + "epoch": 0.96095156115502, "grad_norm": 0.0, - "learning_rate": 4.468836815800155e-06, - "loss": 0.6792, + "learning_rate": 7.986634344601119e-08, + "loss": 0.9105, "step": 24560 }, { - "epoch": 0.6959959194083142, + "epoch": 0.9609906878472494, "grad_norm": 0.0, - "learning_rate": 4.468072225154075e-06, - "loss": 0.7965, + "learning_rate": 7.970658369469198e-08, + "loss": 0.9509, "step": 24561 }, { - "epoch": 0.6960242568505767, + "epoch": 0.9610298145394788, "grad_norm": 0.0, - "learning_rate": 4.467307681105481e-06, - "loss": 0.8366, + "learning_rate": 7.954698325085553e-08, + "loss": 0.9307, "step": 24562 }, { - "epoch": 0.6960525942928392, + "epoch": 0.9610689412317083, "grad_norm": 0.0, - "learning_rate": 4.466543183660805e-06, - "loss": 0.8383, + "learning_rate": 7.938754211706312e-08, + "loss": 0.9053, "step": 24563 }, { - "epoch": 0.6960809317351015, + "epoch": 0.9611080679239377, "grad_norm": 0.0, - "learning_rate": 4.46577873282649e-06, - "loss": 0.8536, + "learning_rate": 7.922826029587494e-08, + "loss": 1.0189, "step": 24564 }, { - "epoch": 0.696109269177364, + "epoch": 0.9611471946161672, "grad_norm": 0.0, - "learning_rate": 4.465014328608977e-06, - "loss": 0.8724, + "learning_rate": 7.906913778985004e-08, + "loss": 1.0381, "step": 24565 }, { - "epoch": 0.6961376066196265, + "epoch": 0.9611863213083965, "grad_norm": 0.0, - "learning_rate": 4.4642499710147025e-06, - "loss": 0.8051, + "learning_rate": 7.891017460154305e-08, + "loss": 0.9475, "step": 24566 }, { - "epoch": 0.6961659440618889, + "epoch": 0.961225448000626, "grad_norm": 0.0, - "learning_rate": 4.463485660050111e-06, - "loss": 0.7919, + "learning_rate": 7.875137073350636e-08, + "loss": 1.041, "step": 24567 }, { - "epoch": 0.6961942815041514, + "epoch": 0.9612645746928554, "grad_norm": 0.0, - "learning_rate": 4.462721395721631e-06, - "loss": 0.8541, + "learning_rate": 7.859272618829239e-08, + "loss": 1.0217, "step": 24568 }, { - "epoch": 0.6962226189464139, + "epoch": 0.9613037013850849, "grad_norm": 0.0, - "learning_rate": 4.461957178035705e-06, - "loss": 0.9417, + "learning_rate": 7.843424096844576e-08, + "loss": 1.0377, "step": 24569 }, { - "epoch": 0.6962509563886764, + "epoch": 0.9613428280773143, "grad_norm": 0.0, - "learning_rate": 4.4611930069987755e-06, - "loss": 0.8119, + "learning_rate": 7.827591507651222e-08, + "loss": 0.9392, "step": 24570 }, { - "epoch": 0.6962792938309388, + "epoch": 0.9613819547695438, "grad_norm": 0.0, - "learning_rate": 4.4604288826172685e-06, - "loss": 0.8275, + "learning_rate": 7.811774851503528e-08, + "loss": 0.9292, "step": 24571 }, { - "epoch": 0.6963076312732013, + "epoch": 0.9614210814617732, "grad_norm": 0.0, - "learning_rate": 4.459664804897631e-06, - "loss": 0.7439, + "learning_rate": 7.795974128655514e-08, + "loss": 0.9207, "step": 24572 }, { - "epoch": 0.6963359687154638, + "epoch": 0.9614602081540027, "grad_norm": 0.0, - "learning_rate": 4.45890077384629e-06, - "loss": 0.8039, + "learning_rate": 7.780189339360755e-08, + "loss": 1.0219, "step": 24573 }, { - "epoch": 0.6963643061577262, + "epoch": 0.9614993348462321, "grad_norm": 0.0, - "learning_rate": 4.458136789469685e-06, - "loss": 0.8746, + "learning_rate": 7.764420483872937e-08, + "loss": 0.9216, "step": 24574 }, { - "epoch": 0.6963926435999886, + "epoch": 0.9615384615384616, "grad_norm": 0.0, - "learning_rate": 4.457372851774256e-06, - "loss": 0.8275, + "learning_rate": 7.748667562445078e-08, + "loss": 0.8583, "step": 24575 }, { - "epoch": 0.6964209810422511, + "epoch": 0.961577588230691, "grad_norm": 0.0, - "learning_rate": 4.456608960766429e-06, - "loss": 0.8202, + "learning_rate": 7.732930575330422e-08, + "loss": 0.9688, "step": 24576 }, { - "epoch": 0.6964493184845136, + "epoch": 0.9616167149229204, "grad_norm": 0.0, - "learning_rate": 4.455845116452644e-06, - "loss": 0.8445, + "learning_rate": 7.717209522781322e-08, + "loss": 0.9037, "step": 24577 }, { - "epoch": 0.696477655926776, + "epoch": 0.9616558416151498, "grad_norm": 0.0, - "learning_rate": 4.455081318839335e-06, - "loss": 0.9229, + "learning_rate": 7.701504405050575e-08, + "loss": 1.0445, "step": 24578 }, { - "epoch": 0.6965059933690385, + "epoch": 0.9616949683073793, "grad_norm": 0.0, - "learning_rate": 4.4543175679329345e-06, - "loss": 0.8429, + "learning_rate": 7.685815222390202e-08, + "loss": 0.9482, "step": 24579 }, { - "epoch": 0.696534330811301, + "epoch": 0.9617340949996087, "grad_norm": 0.0, - "learning_rate": 4.453553863739879e-06, - "loss": 0.8931, + "learning_rate": 7.670141975052226e-08, + "loss": 1.0012, "step": 24580 }, { - "epoch": 0.6965626682535634, + "epoch": 0.9617732216918382, "grad_norm": 0.0, - "learning_rate": 4.452790206266597e-06, - "loss": 0.7299, + "learning_rate": 7.65448466328822e-08, + "loss": 0.92, "step": 24581 }, { - "epoch": 0.6965910056958259, + "epoch": 0.9618123483840676, "grad_norm": 0.0, - "learning_rate": 4.452026595519522e-06, - "loss": 0.7606, + "learning_rate": 7.638843287349873e-08, + "loss": 0.9567, "step": 24582 }, { - "epoch": 0.6966193431380884, + "epoch": 0.9618514750762971, "grad_norm": 0.0, - "learning_rate": 4.451263031505091e-06, - "loss": 0.7391, + "learning_rate": 7.623217847488096e-08, + "loss": 0.8846, "step": 24583 }, { - "epoch": 0.6966476805803509, + "epoch": 0.9618906017685265, "grad_norm": 0.0, - "learning_rate": 4.4504995142297294e-06, - "loss": 0.8667, + "learning_rate": 7.607608343953909e-08, + "loss": 0.9672, "step": 24584 }, { - "epoch": 0.6966760180226133, + "epoch": 0.961929728460756, "grad_norm": 0.0, - "learning_rate": 4.449736043699872e-06, - "loss": 0.8023, + "learning_rate": 7.592014776998003e-08, + "loss": 1.0517, "step": 24585 }, { - "epoch": 0.6967043554648757, + "epoch": 0.9619688551529854, "grad_norm": 0.0, - "learning_rate": 4.448972619921949e-06, - "loss": 0.8112, + "learning_rate": 7.57643714687084e-08, + "loss": 0.9053, "step": 24586 }, { - "epoch": 0.6967326929071382, + "epoch": 0.9620079818452149, "grad_norm": 0.0, - "learning_rate": 4.4482092429023886e-06, - "loss": 0.7942, + "learning_rate": 7.560875453822447e-08, + "loss": 1.0745, "step": 24587 }, { - "epoch": 0.6967610303494006, + "epoch": 0.9620471085374442, "grad_norm": 0.0, - "learning_rate": 4.447445912647623e-06, - "loss": 0.7939, + "learning_rate": 7.545329698102843e-08, + "loss": 0.9653, "step": 24588 }, { - "epoch": 0.6967893677916631, + "epoch": 0.9620862352296737, "grad_norm": 0.0, - "learning_rate": 4.446682629164088e-06, - "loss": 0.9494, + "learning_rate": 7.529799879961608e-08, + "loss": 1.0138, "step": 24589 }, { - "epoch": 0.6968177052339256, + "epoch": 0.9621253619219031, "grad_norm": 0.0, - "learning_rate": 4.445919392458203e-06, - "loss": 0.9299, + "learning_rate": 7.514285999648208e-08, + "loss": 0.858, "step": 24590 }, { - "epoch": 0.696846042676188, + "epoch": 0.9621644886141325, "grad_norm": 0.0, - "learning_rate": 4.4451562025364e-06, - "loss": 0.8042, + "learning_rate": 7.498788057411665e-08, + "loss": 0.8233, "step": 24591 }, { - "epoch": 0.6968743801184505, + "epoch": 0.962203615306362, "grad_norm": 0.0, - "learning_rate": 4.44439305940511e-06, - "loss": 0.818, + "learning_rate": 7.483306053500894e-08, + "loss": 1.0699, "step": 24592 }, { - "epoch": 0.696902717560713, + "epoch": 0.9622427419985914, "grad_norm": 0.0, - "learning_rate": 4.443629963070765e-06, - "loss": 0.9069, + "learning_rate": 7.467839988164583e-08, + "loss": 1.0078, "step": 24593 }, { - "epoch": 0.6969310550029755, + "epoch": 0.9622818686908209, "grad_norm": 0.0, - "learning_rate": 4.442866913539783e-06, - "loss": 0.8891, + "learning_rate": 7.452389861650977e-08, + "loss": 0.9777, "step": 24594 }, { - "epoch": 0.6969593924452379, + "epoch": 0.9623209953830503, "grad_norm": 0.0, - "learning_rate": 4.442103910818597e-06, - "loss": 0.8502, + "learning_rate": 7.436955674208324e-08, + "loss": 1.0214, "step": 24595 }, { - "epoch": 0.6969877298875004, + "epoch": 0.9623601220752798, "grad_norm": 0.0, - "learning_rate": 4.441340954913636e-06, - "loss": 0.8652, + "learning_rate": 7.421537426084425e-08, + "loss": 0.9292, "step": 24596 }, { - "epoch": 0.6970160673297628, + "epoch": 0.9623992487675092, "grad_norm": 0.0, - "learning_rate": 4.440578045831323e-06, - "loss": 0.9218, + "learning_rate": 7.406135117526858e-08, + "loss": 0.974, "step": 24597 }, { - "epoch": 0.6970444047720252, + "epoch": 0.9624383754597386, "grad_norm": 0.0, - "learning_rate": 4.439815183578085e-06, - "loss": 0.8043, + "learning_rate": 7.390748748782873e-08, + "loss": 1.0192, "step": 24598 }, { - "epoch": 0.6970727422142877, + "epoch": 0.962477502151968, "grad_norm": 0.0, - "learning_rate": 4.439052368160351e-06, - "loss": 0.7767, + "learning_rate": 7.375378320099824e-08, + "loss": 1.0253, "step": 24599 }, { - "epoch": 0.6971010796565502, + "epoch": 0.9625166288441975, "grad_norm": 0.0, - "learning_rate": 4.438289599584541e-06, - "loss": 0.8297, + "learning_rate": 7.360023831724183e-08, + "loss": 0.9175, "step": 24600 }, { - "epoch": 0.6971294170988127, + "epoch": 0.9625557555364269, "grad_norm": 0.0, - "learning_rate": 4.437526877857083e-06, - "loss": 0.8363, + "learning_rate": 7.344685283902863e-08, + "loss": 0.9059, "step": 24601 }, { - "epoch": 0.6971577545410751, + "epoch": 0.9625948822286564, "grad_norm": 0.0, - "learning_rate": 4.436764202984401e-06, - "loss": 0.7273, + "learning_rate": 7.32936267688189e-08, + "loss": 0.9431, "step": 24602 }, { - "epoch": 0.6971860919833376, + "epoch": 0.9626340089208858, "grad_norm": 0.0, - "learning_rate": 4.436001574972921e-06, - "loss": 0.7772, + "learning_rate": 7.314056010907622e-08, + "loss": 0.9373, "step": 24603 }, { - "epoch": 0.6972144294256001, + "epoch": 0.9626731356131153, "grad_norm": 0.0, - "learning_rate": 4.43523899382907e-06, - "loss": 0.8403, + "learning_rate": 7.298765286225639e-08, + "loss": 0.9217, "step": 24604 }, { - "epoch": 0.6972427668678625, + "epoch": 0.9627122623053447, "grad_norm": 0.0, - "learning_rate": 4.434476459559262e-06, - "loss": 0.7233, + "learning_rate": 7.283490503081636e-08, + "loss": 0.9302, "step": 24605 }, { - "epoch": 0.697271104310125, + "epoch": 0.9627513889975742, "grad_norm": 0.0, - "learning_rate": 4.43371397216993e-06, - "loss": 0.8792, + "learning_rate": 7.268231661720637e-08, + "loss": 0.8644, "step": 24606 }, { - "epoch": 0.6972994417523874, + "epoch": 0.9627905156898036, "grad_norm": 0.0, - "learning_rate": 4.4329515316674884e-06, - "loss": 0.7743, + "learning_rate": 7.252988762388003e-08, + "loss": 0.9348, "step": 24607 }, { - "epoch": 0.6973277791946499, + "epoch": 0.9628296423820331, "grad_norm": 0.0, - "learning_rate": 4.432189138058364e-06, - "loss": 0.8066, + "learning_rate": 7.237761805328425e-08, + "loss": 0.913, "step": 24608 }, { - "epoch": 0.6973561166369123, + "epoch": 0.9628687690742624, "grad_norm": 0.0, - "learning_rate": 4.431426791348981e-06, - "loss": 0.7999, + "learning_rate": 7.222550790786376e-08, + "loss": 0.8613, "step": 24609 }, { - "epoch": 0.6973844540791748, + "epoch": 0.9629078957664919, "grad_norm": 0.0, - "learning_rate": 4.430664491545754e-06, - "loss": 0.9255, + "learning_rate": 7.207355719005993e-08, + "loss": 0.9617, "step": 24610 }, { - "epoch": 0.6974127915214373, + "epoch": 0.9629470224587213, "grad_norm": 0.0, - "learning_rate": 4.429902238655108e-06, - "loss": 0.8956, + "learning_rate": 7.192176590231636e-08, + "loss": 0.8593, "step": 24611 }, { - "epoch": 0.6974411289636997, + "epoch": 0.9629861491509508, "grad_norm": 0.0, - "learning_rate": 4.429140032683469e-06, - "loss": 0.8758, + "learning_rate": 7.177013404706779e-08, + "loss": 0.9818, "step": 24612 }, { - "epoch": 0.6974694664059622, + "epoch": 0.9630252758431802, "grad_norm": 0.0, - "learning_rate": 4.428377873637247e-06, - "loss": 0.8797, + "learning_rate": 7.161866162675001e-08, + "loss": 0.9493, "step": 24613 }, { - "epoch": 0.6974978038482247, + "epoch": 0.9630644025354097, "grad_norm": 0.0, - "learning_rate": 4.427615761522868e-06, - "loss": 0.8062, + "learning_rate": 7.146734864379556e-08, + "loss": 0.9388, "step": 24614 }, { - "epoch": 0.6975261412904871, + "epoch": 0.9631035292276391, "grad_norm": 0.0, - "learning_rate": 4.42685369634675e-06, - "loss": 0.8589, + "learning_rate": 7.13161951006347e-08, + "loss": 0.9362, "step": 24615 }, { - "epoch": 0.6975544787327496, + "epoch": 0.9631426559198686, "grad_norm": 0.0, - "learning_rate": 4.426091678115313e-06, - "loss": 0.7282, + "learning_rate": 7.116520099969437e-08, + "loss": 0.9043, "step": 24616 }, { - "epoch": 0.697582816175012, + "epoch": 0.963181782612098, "grad_norm": 0.0, - "learning_rate": 4.42532970683498e-06, - "loss": 0.8515, + "learning_rate": 7.101436634339931e-08, + "loss": 0.89, "step": 24617 }, { - "epoch": 0.6976111536172745, + "epoch": 0.9632209093043275, "grad_norm": 0.0, - "learning_rate": 4.42456778251216e-06, - "loss": 0.7915, + "learning_rate": 7.086369113417201e-08, + "loss": 0.9571, "step": 24618 }, { - "epoch": 0.6976394910595369, + "epoch": 0.9632600359965569, "grad_norm": 0.0, - "learning_rate": 4.423805905153278e-06, - "loss": 0.8488, + "learning_rate": 7.071317537443167e-08, + "loss": 1.1205, "step": 24619 }, { - "epoch": 0.6976678285017994, + "epoch": 0.9632991626887862, "grad_norm": 0.0, - "learning_rate": 4.423044074764752e-06, - "loss": 0.8187, + "learning_rate": 7.056281906659524e-08, + "loss": 1.0262, "step": 24620 }, { - "epoch": 0.6976961659440619, + "epoch": 0.9633382893810157, "grad_norm": 0.0, - "learning_rate": 4.422282291352993e-06, - "loss": 0.8026, + "learning_rate": 7.041262221307854e-08, + "loss": 0.9196, "step": 24621 }, { - "epoch": 0.6977245033863243, + "epoch": 0.9633774160732451, "grad_norm": 0.0, - "learning_rate": 4.421520554924424e-06, - "loss": 0.8553, + "learning_rate": 7.02625848162919e-08, + "loss": 1.054, "step": 24622 }, { - "epoch": 0.6977528408285868, + "epoch": 0.9634165427654746, "grad_norm": 0.0, - "learning_rate": 4.420758865485457e-06, - "loss": 0.7931, + "learning_rate": 7.011270687864557e-08, + "loss": 0.8467, "step": 24623 }, { - "epoch": 0.6977811782708493, + "epoch": 0.963455669457704, "grad_norm": 0.0, - "learning_rate": 4.419997223042509e-06, - "loss": 0.8494, + "learning_rate": 6.996298840254656e-08, + "loss": 0.9771, "step": 24624 }, { - "epoch": 0.6978095157131118, + "epoch": 0.9634947961499335, "grad_norm": 0.0, - "learning_rate": 4.419235627602001e-06, - "loss": 0.8155, + "learning_rate": 6.981342939039848e-08, + "loss": 1.0265, "step": 24625 }, { - "epoch": 0.6978378531553742, + "epoch": 0.9635339228421629, "grad_norm": 0.0, - "learning_rate": 4.41847407917034e-06, - "loss": 0.7861, + "learning_rate": 6.966402984460385e-08, + "loss": 0.8607, "step": 24626 }, { - "epoch": 0.6978661905976367, + "epoch": 0.9635730495343924, "grad_norm": 0.0, - "learning_rate": 4.4177125777539435e-06, - "loss": 0.7644, + "learning_rate": 6.951478976756188e-08, + "loss": 0.9984, "step": 24627 }, { - "epoch": 0.6978945280398992, + "epoch": 0.9636121762266218, "grad_norm": 0.0, - "learning_rate": 4.416951123359227e-06, - "loss": 0.7575, + "learning_rate": 6.93657091616684e-08, + "loss": 0.9091, "step": 24628 }, { - "epoch": 0.6979228654821615, + "epoch": 0.9636513029188513, "grad_norm": 0.0, - "learning_rate": 4.416189715992605e-06, - "loss": 0.8683, + "learning_rate": 6.92167880293182e-08, + "loss": 0.9171, "step": 24629 }, { - "epoch": 0.697951202924424, + "epoch": 0.9636904296110806, "grad_norm": 0.0, - "learning_rate": 4.415428355660494e-06, - "loss": 0.8049, + "learning_rate": 6.906802637290266e-08, + "loss": 0.9202, "step": 24630 }, { - "epoch": 0.6979795403666865, + "epoch": 0.9637295563033101, "grad_norm": 0.0, - "learning_rate": 4.4146670423692995e-06, - "loss": 0.8425, + "learning_rate": 6.891942419480879e-08, + "loss": 0.9044, "step": 24631 }, { - "epoch": 0.698007877808949, + "epoch": 0.9637686829955395, "grad_norm": 0.0, - "learning_rate": 4.413905776125439e-06, - "loss": 0.9549, + "learning_rate": 6.877098149742579e-08, + "loss": 0.9586, "step": 24632 }, { - "epoch": 0.6980362152512114, + "epoch": 0.963807809687769, "grad_norm": 0.0, - "learning_rate": 4.413144556935328e-06, - "loss": 0.8247, + "learning_rate": 6.862269828313616e-08, + "loss": 0.9932, "step": 24633 }, { - "epoch": 0.6980645526934739, + "epoch": 0.9638469363799984, "grad_norm": 0.0, - "learning_rate": 4.412383384805372e-06, - "loss": 0.7675, + "learning_rate": 6.847457455432249e-08, + "loss": 0.8306, "step": 24634 }, { - "epoch": 0.6980928901357364, + "epoch": 0.9638860630722279, "grad_norm": 0.0, - "learning_rate": 4.411622259741989e-06, - "loss": 0.8303, + "learning_rate": 6.832661031336063e-08, + "loss": 0.9902, "step": 24635 }, { - "epoch": 0.6981212275779988, + "epoch": 0.9639251897644573, "grad_norm": 0.0, - "learning_rate": 4.4108611817515834e-06, - "loss": 0.81, + "learning_rate": 6.817880556262868e-08, + "loss": 0.8657, "step": 24636 }, { - "epoch": 0.6981495650202613, + "epoch": 0.9639643164566868, "grad_norm": 0.0, - "learning_rate": 4.4101001508405695e-06, - "loss": 0.7238, + "learning_rate": 6.803116030449919e-08, + "loss": 1.0145, "step": 24637 }, { - "epoch": 0.6981779024625238, + "epoch": 0.9640034431489162, "grad_norm": 0.0, - "learning_rate": 4.409339167015361e-06, - "loss": 0.8193, + "learning_rate": 6.788367454134581e-08, + "loss": 0.893, "step": 24638 }, { - "epoch": 0.6982062399047861, + "epoch": 0.9640425698411457, "grad_norm": 0.0, - "learning_rate": 4.408578230282361e-06, - "loss": 0.9564, + "learning_rate": 6.773634827553333e-08, + "loss": 0.9937, "step": 24639 }, { - "epoch": 0.6982345773470486, + "epoch": 0.964081696533375, "grad_norm": 0.0, - "learning_rate": 4.407817340647983e-06, - "loss": 0.9365, + "learning_rate": 6.758918150942984e-08, + "loss": 1.0078, "step": 24640 }, { - "epoch": 0.6982629147893111, + "epoch": 0.9641208232256046, "grad_norm": 0.0, - "learning_rate": 4.4070564981186355e-06, - "loss": 0.8884, + "learning_rate": 6.74421742453979e-08, + "loss": 1.0476, "step": 24641 }, { - "epoch": 0.6982912522315736, + "epoch": 0.9641599499178339, "grad_norm": 0.0, - "learning_rate": 4.40629570270073e-06, - "loss": 0.7447, + "learning_rate": 6.729532648579895e-08, + "loss": 0.8524, "step": 24642 }, { - "epoch": 0.698319589673836, + "epoch": 0.9641990766100634, "grad_norm": 0.0, - "learning_rate": 4.405534954400675e-06, - "loss": 0.8044, + "learning_rate": 6.714863823299e-08, + "loss": 0.961, "step": 24643 }, { - "epoch": 0.6983479271160985, + "epoch": 0.9642382033022928, "grad_norm": 0.0, - "learning_rate": 4.404774253224874e-06, - "loss": 0.7544, + "learning_rate": 6.700210948932805e-08, + "loss": 0.9755, "step": 24644 }, { - "epoch": 0.698376264558361, + "epoch": 0.9642773299945223, "grad_norm": 0.0, - "learning_rate": 4.4040135991797366e-06, - "loss": 0.8387, + "learning_rate": 6.685574025716457e-08, + "loss": 1.0093, "step": 24645 }, { - "epoch": 0.6984046020006234, + "epoch": 0.9643164566867517, "grad_norm": 0.0, - "learning_rate": 4.4032529922716735e-06, - "loss": 0.7936, + "learning_rate": 6.670953053885099e-08, + "loss": 0.9266, "step": 24646 }, { - "epoch": 0.6984329394428859, + "epoch": 0.9643555833789812, "grad_norm": 0.0, - "learning_rate": 4.402492432507086e-06, - "loss": 0.7814, + "learning_rate": 6.656348033673654e-08, + "loss": 0.9814, "step": 24647 }, { - "epoch": 0.6984612768851484, + "epoch": 0.9643947100712106, "grad_norm": 0.0, - "learning_rate": 4.401731919892384e-06, - "loss": 0.813, + "learning_rate": 6.64175896531638e-08, + "loss": 0.8839, "step": 24648 }, { - "epoch": 0.6984896143274109, + "epoch": 0.96443383676344, "grad_norm": 0.0, - "learning_rate": 4.4009714544339755e-06, - "loss": 0.792, + "learning_rate": 6.627185849047868e-08, + "loss": 0.8829, "step": 24649 }, { - "epoch": 0.6985179517696732, + "epoch": 0.9644729634556695, "grad_norm": 0.0, - "learning_rate": 4.40021103613826e-06, - "loss": 0.8001, + "learning_rate": 6.61262868510193e-08, + "loss": 0.918, "step": 24650 }, { - "epoch": 0.6985462892119357, + "epoch": 0.9645120901478988, "grad_norm": 0.0, - "learning_rate": 4.39945066501165e-06, - "loss": 0.8591, + "learning_rate": 6.598087473712267e-08, + "loss": 0.9783, "step": 24651 }, { - "epoch": 0.6985746266541982, + "epoch": 0.9645512168401283, "grad_norm": 0.0, - "learning_rate": 4.398690341060543e-06, - "loss": 0.8873, + "learning_rate": 6.583562215112582e-08, + "loss": 0.9221, "step": 24652 }, { - "epoch": 0.6986029640964606, + "epoch": 0.9645903435323577, "grad_norm": 0.0, - "learning_rate": 4.397930064291348e-06, - "loss": 0.8932, + "learning_rate": 6.569052909536134e-08, + "loss": 1.0015, "step": 24653 }, { - "epoch": 0.6986313015387231, + "epoch": 0.9646294702245872, "grad_norm": 0.0, - "learning_rate": 4.397169834710467e-06, - "loss": 0.7339, + "learning_rate": 6.554559557215844e-08, + "loss": 0.9072, "step": 24654 }, { - "epoch": 0.6986596389809856, + "epoch": 0.9646685969168166, "grad_norm": 0.0, - "learning_rate": 4.396409652324306e-06, - "loss": 0.8646, + "learning_rate": 6.540082158384309e-08, + "loss": 0.9428, "step": 24655 }, { - "epoch": 0.6986879764232481, + "epoch": 0.9647077236090461, "grad_norm": 0.0, - "learning_rate": 4.395649517139267e-06, - "loss": 0.8601, + "learning_rate": 6.52562071327445e-08, + "loss": 0.8926, "step": 24656 }, { - "epoch": 0.6987163138655105, + "epoch": 0.9647468503012755, "grad_norm": 0.0, - "learning_rate": 4.3948894291617584e-06, - "loss": 0.7379, + "learning_rate": 6.511175222117971e-08, + "loss": 1.0342, "step": 24657 }, { - "epoch": 0.698744651307773, + "epoch": 0.964785976993505, "grad_norm": 0.0, - "learning_rate": 4.394129388398172e-06, - "loss": 0.7652, + "learning_rate": 6.496745685147355e-08, + "loss": 0.8516, "step": 24658 }, { - "epoch": 0.6987729887500355, + "epoch": 0.9648251036857344, "grad_norm": 0.0, - "learning_rate": 4.39336939485492e-06, - "loss": 0.7471, + "learning_rate": 6.482332102593858e-08, + "loss": 0.9211, "step": 24659 }, { - "epoch": 0.6988013261922978, + "epoch": 0.9648642303779639, "grad_norm": 0.0, - "learning_rate": 4.392609448538395e-06, - "loss": 0.8861, + "learning_rate": 6.467934474689296e-08, + "loss": 1.0056, "step": 24660 }, { - "epoch": 0.6988296636345603, + "epoch": 0.9649033570701933, "grad_norm": 0.0, - "learning_rate": 4.391849549455004e-06, - "loss": 0.8224, + "learning_rate": 6.453552801664708e-08, + "loss": 1.0196, "step": 24661 }, { - "epoch": 0.6988580010768228, + "epoch": 0.9649424837624228, "grad_norm": 0.0, - "learning_rate": 4.391089697611151e-06, - "loss": 0.8033, + "learning_rate": 6.43918708375102e-08, + "loss": 0.8604, "step": 24662 }, { - "epoch": 0.6988863385190852, + "epoch": 0.9649816104546521, "grad_norm": 0.0, - "learning_rate": 4.3903298930132265e-06, - "loss": 0.7438, + "learning_rate": 6.424837321179045e-08, + "loss": 0.9442, "step": 24663 }, { - "epoch": 0.6989146759613477, + "epoch": 0.9650207371468816, "grad_norm": 0.0, - "learning_rate": 4.389570135667642e-06, - "loss": 0.8491, + "learning_rate": 6.410503514179157e-08, + "loss": 0.9676, "step": 24664 }, { - "epoch": 0.6989430134036102, + "epoch": 0.965059863839111, "grad_norm": 0.0, - "learning_rate": 4.388810425580786e-06, - "loss": 0.8322, + "learning_rate": 6.396185662981502e-08, + "loss": 0.981, "step": 24665 }, { - "epoch": 0.6989713508458727, + "epoch": 0.9650989905313405, "grad_norm": 0.0, - "learning_rate": 4.3880507627590655e-06, - "loss": 0.8118, + "learning_rate": 6.381883767816122e-08, + "loss": 0.9347, "step": 24666 }, { - "epoch": 0.6989996882881351, + "epoch": 0.9651381172235699, "grad_norm": 0.0, - "learning_rate": 4.387291147208876e-06, - "loss": 0.8397, + "learning_rate": 6.367597828912609e-08, + "loss": 1.034, "step": 24667 }, { - "epoch": 0.6990280257303976, + "epoch": 0.9651772439157994, "grad_norm": 0.0, - "learning_rate": 4.386531578936618e-06, - "loss": 0.7633, + "learning_rate": 6.353327846500335e-08, + "loss": 1.1054, "step": 24668 }, { - "epoch": 0.6990563631726601, + "epoch": 0.9652163706080288, "grad_norm": 0.0, - "learning_rate": 4.38577205794869e-06, - "loss": 0.7586, + "learning_rate": 6.339073820808561e-08, + "loss": 0.9209, "step": 24669 }, { - "epoch": 0.6990847006149224, + "epoch": 0.9652554973002583, "grad_norm": 0.0, - "learning_rate": 4.38501258425149e-06, - "loss": 0.8877, + "learning_rate": 6.324835752065994e-08, + "loss": 0.9875, "step": 24670 }, { - "epoch": 0.6991130380571849, + "epoch": 0.9652946239924877, "grad_norm": 0.0, - "learning_rate": 4.384253157851413e-06, - "loss": 0.9002, + "learning_rate": 6.310613640501562e-08, + "loss": 0.9606, "step": 24671 }, { - "epoch": 0.6991413754994474, + "epoch": 0.9653337506847172, "grad_norm": 0.0, - "learning_rate": 4.3834937787548585e-06, - "loss": 0.7793, + "learning_rate": 6.296407486343525e-08, + "loss": 1.0735, "step": 24672 }, { - "epoch": 0.6991697129417099, + "epoch": 0.9653728773769465, "grad_norm": 0.0, - "learning_rate": 4.382734446968219e-06, - "loss": 0.9, + "learning_rate": 6.282217289819925e-08, + "loss": 0.9712, "step": 24673 }, { - "epoch": 0.6991980503839723, + "epoch": 0.965412004069176, "grad_norm": 0.0, - "learning_rate": 4.381975162497892e-06, - "loss": 0.7506, + "learning_rate": 6.26804305115869e-08, + "loss": 0.952, "step": 24674 }, { - "epoch": 0.6992263878262348, + "epoch": 0.9654511307614054, "grad_norm": 0.0, - "learning_rate": 4.381215925350279e-06, - "loss": 0.8496, + "learning_rate": 6.253884770587636e-08, + "loss": 0.813, "step": 24675 }, { - "epoch": 0.6992547252684973, + "epoch": 0.9654902574536348, "grad_norm": 0.0, - "learning_rate": 4.380456735531767e-06, - "loss": 0.7908, + "learning_rate": 6.239742448333807e-08, + "loss": 0.9549, "step": 24676 }, { - "epoch": 0.6992830627107597, + "epoch": 0.9655293841458643, "grad_norm": 0.0, - "learning_rate": 4.379697593048755e-06, - "loss": 0.8105, + "learning_rate": 6.225616084624463e-08, + "loss": 0.9749, "step": 24677 }, { - "epoch": 0.6993114001530222, + "epoch": 0.9655685108380937, "grad_norm": 0.0, - "learning_rate": 4.3789384979076414e-06, - "loss": 0.9423, + "learning_rate": 6.211505679686536e-08, + "loss": 0.9926, "step": 24678 }, { - "epoch": 0.6993397375952847, + "epoch": 0.9656076375303232, "grad_norm": 0.0, - "learning_rate": 4.3781794501148116e-06, - "loss": 0.878, + "learning_rate": 6.197411233746509e-08, + "loss": 1.0113, "step": 24679 }, { - "epoch": 0.6993680750375472, + "epoch": 0.9656467642225526, "grad_norm": 0.0, - "learning_rate": 4.377420449676664e-06, - "loss": 0.8918, + "learning_rate": 6.183332747030757e-08, + "loss": 0.9905, "step": 24680 }, { - "epoch": 0.6993964124798095, + "epoch": 0.9656858909147821, "grad_norm": 0.0, - "learning_rate": 4.376661496599593e-06, - "loss": 0.8072, + "learning_rate": 6.169270219765322e-08, + "loss": 0.9931, "step": 24681 }, { - "epoch": 0.699424749922072, + "epoch": 0.9657250176070115, "grad_norm": 0.0, - "learning_rate": 4.375902590889988e-06, - "loss": 0.7816, + "learning_rate": 6.155223652176023e-08, + "loss": 0.8412, "step": 24682 }, { - "epoch": 0.6994530873643345, + "epoch": 0.965764144299241, "grad_norm": 0.0, - "learning_rate": 4.375143732554249e-06, - "loss": 0.8117, + "learning_rate": 6.141193044488569e-08, + "loss": 0.8922, "step": 24683 }, { - "epoch": 0.6994814248065969, + "epoch": 0.9658032709914703, "grad_norm": 0.0, - "learning_rate": 4.37438492159876e-06, - "loss": 0.8687, + "learning_rate": 6.127178396928224e-08, + "loss": 0.9772, "step": 24684 }, { - "epoch": 0.6995097622488594, + "epoch": 0.9658423976836998, "grad_norm": 0.0, - "learning_rate": 4.373626158029915e-06, - "loss": 0.8828, + "learning_rate": 6.113179709719919e-08, + "loss": 0.9875, "step": 24685 }, { - "epoch": 0.6995380996911219, + "epoch": 0.9658815243759292, "grad_norm": 0.0, - "learning_rate": 4.372867441854109e-06, - "loss": 0.8407, + "learning_rate": 6.099196983088473e-08, + "loss": 0.9957, "step": 24686 }, { - "epoch": 0.6995664371333843, + "epoch": 0.9659206510681587, "grad_norm": 0.0, - "learning_rate": 4.3721087730777275e-06, - "loss": 0.8846, + "learning_rate": 6.085230217258597e-08, + "loss": 0.9912, "step": 24687 }, { - "epoch": 0.6995947745756468, + "epoch": 0.9659597777603881, "grad_norm": 0.0, - "learning_rate": 4.371350151707168e-06, - "loss": 0.7708, + "learning_rate": 6.071279412454445e-08, + "loss": 0.8954, "step": 24688 }, { - "epoch": 0.6996231120179093, + "epoch": 0.9659989044526176, "grad_norm": 0.0, - "learning_rate": 4.370591577748811e-06, - "loss": 0.9166, + "learning_rate": 6.05734456890017e-08, + "loss": 0.8686, "step": 24689 }, { - "epoch": 0.6996514494601718, + "epoch": 0.966038031144847, "grad_norm": 0.0, - "learning_rate": 4.3698330512090535e-06, - "loss": 0.775, + "learning_rate": 6.043425686819371e-08, + "loss": 0.9449, "step": 24690 }, { - "epoch": 0.6996797869024342, + "epoch": 0.9660771578370765, "grad_norm": 0.0, - "learning_rate": 4.369074572094285e-06, - "loss": 0.9018, + "learning_rate": 6.029522766435648e-08, + "loss": 1.1002, "step": 24691 }, { - "epoch": 0.6997081243446966, + "epoch": 0.9661162845293059, "grad_norm": 0.0, - "learning_rate": 4.36831614041089e-06, - "loss": 0.8062, + "learning_rate": 6.015635807972265e-08, + "loss": 1.0289, "step": 24692 }, { - "epoch": 0.6997364617869591, + "epoch": 0.9661554112215354, "grad_norm": 0.0, - "learning_rate": 4.367557756165259e-06, - "loss": 0.868, + "learning_rate": 6.001764811652378e-08, + "loss": 0.7967, "step": 24693 }, { - "epoch": 0.6997647992292215, + "epoch": 0.9661945379137648, "grad_norm": 0.0, - "learning_rate": 4.3667994193637794e-06, - "loss": 0.8501, + "learning_rate": 5.987909777698475e-08, + "loss": 0.9453, "step": 24694 }, { - "epoch": 0.699793136671484, + "epoch": 0.9662336646059942, "grad_norm": 0.0, - "learning_rate": 4.366041130012841e-06, - "loss": 0.9832, + "learning_rate": 5.974070706333157e-08, + "loss": 1.0283, "step": 24695 }, { - "epoch": 0.6998214741137465, + "epoch": 0.9662727912982236, "grad_norm": 0.0, - "learning_rate": 4.365282888118834e-06, - "loss": 0.8693, + "learning_rate": 5.96024759777869e-08, + "loss": 0.9999, "step": 24696 }, { - "epoch": 0.699849811556009, + "epoch": 0.9663119179904531, "grad_norm": 0.0, - "learning_rate": 4.364524693688138e-06, - "loss": 0.7939, + "learning_rate": 5.9464404522571185e-08, + "loss": 0.9878, "step": 24697 }, { - "epoch": 0.6998781489982714, + "epoch": 0.9663510446826825, "grad_norm": 0.0, - "learning_rate": 4.363766546727143e-06, - "loss": 0.8161, + "learning_rate": 5.932649269990043e-08, + "loss": 0.9067, "step": 24698 }, { - "epoch": 0.6999064864405339, + "epoch": 0.966390171374912, "grad_norm": 0.0, - "learning_rate": 4.36300844724224e-06, - "loss": 0.8661, + "learning_rate": 5.9188740511989526e-08, + "loss": 0.8684, "step": 24699 }, { - "epoch": 0.6999348238827964, + "epoch": 0.9664292980671414, "grad_norm": 0.0, - "learning_rate": 4.362250395239805e-06, - "loss": 0.8652, + "learning_rate": 5.905114796105116e-08, + "loss": 0.9032, "step": 24700 }, { - "epoch": 0.6999631613250588, + "epoch": 0.9664684247593709, "grad_norm": 0.0, - "learning_rate": 4.361492390726233e-06, - "loss": 0.8326, + "learning_rate": 5.891371504929466e-08, + "loss": 0.9631, "step": 24701 }, { - "epoch": 0.6999914987673213, + "epoch": 0.9665075514516003, "grad_norm": 0.0, - "learning_rate": 4.3607344337079e-06, - "loss": 0.906, + "learning_rate": 5.8776441778927165e-08, + "loss": 0.8541, "step": 24702 }, { - "epoch": 0.7000198362095837, + "epoch": 0.9665466781438298, "grad_norm": 0.0, - "learning_rate": 4.359976524191195e-06, - "loss": 0.8272, + "learning_rate": 5.8639328152153566e-08, + "loss": 0.8713, "step": 24703 }, { - "epoch": 0.7000481736518462, + "epoch": 0.9665858048360592, "grad_norm": 0.0, - "learning_rate": 4.359218662182506e-06, - "loss": 0.7063, + "learning_rate": 5.8502374171174324e-08, + "loss": 1.0372, "step": 24704 }, { - "epoch": 0.7000765110941086, + "epoch": 0.9666249315282885, "grad_norm": 0.0, - "learning_rate": 4.35846084768821e-06, - "loss": 0.8027, + "learning_rate": 5.836557983818991e-08, + "loss": 1.0568, "step": 24705 }, { - "epoch": 0.7001048485363711, + "epoch": 0.966664058220518, "grad_norm": 0.0, - "learning_rate": 4.3577030807146925e-06, - "loss": 0.8402, + "learning_rate": 5.8228945155396344e-08, + "loss": 1.0135, "step": 24706 }, { - "epoch": 0.7001331859786336, + "epoch": 0.9667031849127474, "grad_norm": 0.0, - "learning_rate": 4.356945361268337e-06, - "loss": 0.798, + "learning_rate": 5.809247012498853e-08, + "loss": 0.9566, "step": 24707 }, { - "epoch": 0.700161523420896, + "epoch": 0.9667423116049769, "grad_norm": 0.0, - "learning_rate": 4.3561876893555264e-06, - "loss": 0.8911, + "learning_rate": 5.795615474915694e-08, + "loss": 1.1037, "step": 24708 }, { - "epoch": 0.7001898608631585, + "epoch": 0.9667814382972063, "grad_norm": 0.0, - "learning_rate": 4.355430064982647e-06, - "loss": 0.8899, + "learning_rate": 5.781999903009206e-08, + "loss": 0.9851, "step": 24709 }, { - "epoch": 0.700218198305421, + "epoch": 0.9668205649894358, "grad_norm": 0.0, - "learning_rate": 4.354672488156071e-06, - "loss": 0.8425, + "learning_rate": 5.768400296997989e-08, + "loss": 0.9993, "step": 24710 }, { - "epoch": 0.7002465357476834, + "epoch": 0.9668596916816652, "grad_norm": 0.0, - "learning_rate": 4.353914958882186e-06, - "loss": 0.7869, + "learning_rate": 5.754816657100426e-08, + "loss": 0.849, "step": 24711 }, { - "epoch": 0.7002748731899459, + "epoch": 0.9668988183738947, "grad_norm": 0.0, - "learning_rate": 4.353157477167375e-06, - "loss": 0.8389, + "learning_rate": 5.741248983534675e-08, + "loss": 1.0215, "step": 24712 }, { - "epoch": 0.7003032106322084, + "epoch": 0.9669379450661241, "grad_norm": 0.0, - "learning_rate": 4.3524000430180125e-06, - "loss": 0.9022, + "learning_rate": 5.727697276518451e-08, + "loss": 1.1736, "step": 24713 }, { - "epoch": 0.7003315480744708, + "epoch": 0.9669770717583536, "grad_norm": 0.0, - "learning_rate": 4.351642656440482e-06, - "loss": 0.9137, + "learning_rate": 5.714161536269691e-08, + "loss": 1.0106, "step": 24714 }, { - "epoch": 0.7003598855167332, + "epoch": 0.967016198450583, "grad_norm": 0.0, - "learning_rate": 4.350885317441166e-06, - "loss": 0.8029, + "learning_rate": 5.700641763005443e-08, + "loss": 0.9835, "step": 24715 }, { - "epoch": 0.7003882229589957, + "epoch": 0.9670553251428124, "grad_norm": 0.0, - "learning_rate": 4.350128026026437e-06, - "loss": 0.7618, + "learning_rate": 5.6871379569430894e-08, + "loss": 0.9855, "step": 24716 }, { - "epoch": 0.7004165604012582, + "epoch": 0.9670944518350418, "grad_norm": 0.0, - "learning_rate": 4.349370782202681e-06, - "loss": 0.8442, + "learning_rate": 5.673650118299234e-08, + "loss": 0.881, "step": 24717 }, { - "epoch": 0.7004448978435206, + "epoch": 0.9671335785272713, "grad_norm": 0.0, - "learning_rate": 4.3486135859762705e-06, - "loss": 0.8896, + "learning_rate": 5.660178247290704e-08, + "loss": 0.9125, "step": 24718 }, { - "epoch": 0.7004732352857831, + "epoch": 0.9671727052195007, "grad_norm": 0.0, - "learning_rate": 4.347856437353584e-06, - "loss": 0.7712, + "learning_rate": 5.646722344133659e-08, + "loss": 0.9156, "step": 24719 }, { - "epoch": 0.7005015727280456, + "epoch": 0.9672118319117302, "grad_norm": 0.0, - "learning_rate": 4.347099336341004e-06, - "loss": 0.8077, + "learning_rate": 5.633282409044372e-08, + "loss": 0.9674, "step": 24720 }, { - "epoch": 0.7005299101703081, + "epoch": 0.9672509586039596, "grad_norm": 0.0, - "learning_rate": 4.346342282944905e-06, - "loss": 0.8614, + "learning_rate": 5.619858442238446e-08, + "loss": 0.8871, "step": 24721 }, { - "epoch": 0.7005582476125705, + "epoch": 0.9672900852961891, "grad_norm": 0.0, - "learning_rate": 4.3455852771716675e-06, - "loss": 0.725, + "learning_rate": 5.606450443931488e-08, + "loss": 0.9152, "step": 24722 }, { - "epoch": 0.700586585054833, + "epoch": 0.9673292119884185, "grad_norm": 0.0, - "learning_rate": 4.344828319027662e-06, - "loss": 0.8218, + "learning_rate": 5.593058414338992e-08, + "loss": 0.8873, "step": 24723 }, { - "epoch": 0.7006149224970954, + "epoch": 0.967368338680648, "grad_norm": 0.0, - "learning_rate": 4.344071408519267e-06, - "loss": 0.8876, + "learning_rate": 5.579682353676008e-08, + "loss": 0.9205, "step": 24724 }, { - "epoch": 0.7006432599393578, + "epoch": 0.9674074653728774, "grad_norm": 0.0, - "learning_rate": 4.343314545652863e-06, - "loss": 0.8359, + "learning_rate": 5.566322262157142e-08, + "loss": 0.8906, "step": 24725 }, { - "epoch": 0.7006715973816203, + "epoch": 0.9674465920651069, "grad_norm": 0.0, - "learning_rate": 4.342557730434818e-06, - "loss": 0.8204, + "learning_rate": 5.5529781399970005e-08, + "loss": 0.8352, "step": 24726 }, { - "epoch": 0.7006999348238828, + "epoch": 0.9674857187573362, "grad_norm": 0.0, - "learning_rate": 4.341800962871508e-06, - "loss": 0.7468, + "learning_rate": 5.539649987409967e-08, + "loss": 1.014, "step": 24727 }, { - "epoch": 0.7007282722661452, + "epoch": 0.9675248454495657, "grad_norm": 0.0, - "learning_rate": 4.341044242969315e-06, - "loss": 0.7929, + "learning_rate": 5.526337804610094e-08, + "loss": 0.971, "step": 24728 }, { - "epoch": 0.7007566097084077, + "epoch": 0.9675639721417951, "grad_norm": 0.0, - "learning_rate": 4.340287570734604e-06, - "loss": 0.894, + "learning_rate": 5.513041591810986e-08, + "loss": 1.0051, "step": 24729 }, { - "epoch": 0.7007849471506702, + "epoch": 0.9676030988340246, "grad_norm": 0.0, - "learning_rate": 4.339530946173754e-06, - "loss": 0.7406, + "learning_rate": 5.4997613492263626e-08, + "loss": 0.9812, "step": 24730 }, { - "epoch": 0.7008132845929327, + "epoch": 0.967642225526254, "grad_norm": 0.0, - "learning_rate": 4.3387743692931365e-06, - "loss": 0.8473, + "learning_rate": 5.486497077069275e-08, + "loss": 1.0636, "step": 24731 }, { - "epoch": 0.7008416220351951, + "epoch": 0.9676813522184835, "grad_norm": 0.0, - "learning_rate": 4.3380178400991225e-06, - "loss": 0.8008, + "learning_rate": 5.473248775552997e-08, + "loss": 0.9588, "step": 24732 }, { - "epoch": 0.7008699594774576, + "epoch": 0.9677204789107129, "grad_norm": 0.0, - "learning_rate": 4.337261358598087e-06, - "loss": 0.911, + "learning_rate": 5.460016444890026e-08, + "loss": 1.05, "step": 24733 }, { - "epoch": 0.70089829691972, + "epoch": 0.9677596056029423, "grad_norm": 0.0, - "learning_rate": 4.336504924796402e-06, - "loss": 0.7617, + "learning_rate": 5.4468000852929695e-08, + "loss": 1.0006, "step": 24734 }, { - "epoch": 0.7009266343619824, + "epoch": 0.9677987322951718, "grad_norm": 0.0, - "learning_rate": 4.335748538700439e-06, - "loss": 0.8669, + "learning_rate": 5.433599696974101e-08, + "loss": 0.9129, "step": 24735 }, { - "epoch": 0.7009549718042449, + "epoch": 0.9678378589874012, "grad_norm": 0.0, - "learning_rate": 4.334992200316573e-06, - "loss": 0.7324, + "learning_rate": 5.420415280145364e-08, + "loss": 0.939, "step": 24736 }, { - "epoch": 0.7009833092465074, + "epoch": 0.9678769856796307, "grad_norm": 0.0, - "learning_rate": 4.334235909651169e-06, - "loss": 0.8788, + "learning_rate": 5.407246835018365e-08, + "loss": 0.9743, "step": 24737 }, { - "epoch": 0.7010116466887699, + "epoch": 0.96791611237186, "grad_norm": 0.0, - "learning_rate": 4.333479666710603e-06, - "loss": 0.8039, + "learning_rate": 5.394094361804825e-08, + "loss": 1.0306, "step": 24738 }, { - "epoch": 0.7010399841310323, + "epoch": 0.9679552390640895, "grad_norm": 0.0, - "learning_rate": 4.332723471501238e-06, - "loss": 0.9565, + "learning_rate": 5.380957860715796e-08, + "loss": 0.9191, "step": 24739 }, { - "epoch": 0.7010683215732948, + "epoch": 0.9679943657563189, "grad_norm": 0.0, - "learning_rate": 4.331967324029447e-06, - "loss": 0.8183, + "learning_rate": 5.367837331962222e-08, + "loss": 0.8243, "step": 24740 }, { - "epoch": 0.7010966590155573, + "epoch": 0.9680334924485484, "grad_norm": 0.0, - "learning_rate": 4.331211224301605e-06, - "loss": 0.7677, + "learning_rate": 5.354732775754823e-08, + "loss": 0.8731, "step": 24741 }, { - "epoch": 0.7011249964578197, + "epoch": 0.9680726191407778, "grad_norm": 0.0, - "learning_rate": 4.3304551723240705e-06, - "loss": 0.7738, + "learning_rate": 5.3416441923040964e-08, + "loss": 0.98, "step": 24742 }, { - "epoch": 0.7011533339000822, + "epoch": 0.9681117458330073, "grad_norm": 0.0, - "learning_rate": 4.329699168103218e-06, - "loss": 0.8636, + "learning_rate": 5.328571581820208e-08, + "loss": 0.9997, "step": 24743 }, { - "epoch": 0.7011816713423447, + "epoch": 0.9681508725252367, "grad_norm": 0.0, - "learning_rate": 4.32894321164542e-06, - "loss": 0.7869, + "learning_rate": 5.3155149445129895e-08, + "loss": 0.8943, "step": 24744 }, { - "epoch": 0.7012100087846072, + "epoch": 0.9681899992174662, "grad_norm": 0.0, - "learning_rate": 4.328187302957034e-06, - "loss": 0.82, + "learning_rate": 5.3024742805921625e-08, + "loss": 0.9525, "step": 24745 }, { - "epoch": 0.7012383462268695, + "epoch": 0.9682291259096956, "grad_norm": 0.0, - "learning_rate": 4.327431442044434e-06, - "loss": 0.816, + "learning_rate": 5.289449590267337e-08, + "loss": 0.8919, "step": 24746 }, { - "epoch": 0.701266683669132, + "epoch": 0.9682682526019251, "grad_norm": 0.0, - "learning_rate": 4.326675628913985e-06, - "loss": 0.8785, + "learning_rate": 5.2764408737473464e-08, + "loss": 0.9094, "step": 24747 }, { - "epoch": 0.7012950211113945, + "epoch": 0.9683073792941544, "grad_norm": 0.0, - "learning_rate": 4.325919863572052e-06, - "loss": 0.8598, + "learning_rate": 5.2634481312413555e-08, + "loss": 0.9357, "step": 24748 }, { - "epoch": 0.7013233585536569, + "epoch": 0.9683465059863839, "grad_norm": 0.0, - "learning_rate": 4.325164146025009e-06, - "loss": 0.8462, + "learning_rate": 5.250471362957865e-08, + "loss": 0.9712, "step": 24749 }, { - "epoch": 0.7013516959959194, + "epoch": 0.9683856326786133, "grad_norm": 0.0, - "learning_rate": 4.324408476279211e-06, - "loss": 0.8169, + "learning_rate": 5.237510569105375e-08, + "loss": 0.9611, "step": 24750 }, { - "epoch": 0.7013800334381819, + "epoch": 0.9684247593708428, "grad_norm": 0.0, - "learning_rate": 4.323652854341032e-06, - "loss": 0.6307, + "learning_rate": 5.22456574989183e-08, + "loss": 0.9405, "step": 24751 }, { - "epoch": 0.7014083708804443, + "epoch": 0.9684638860630722, "grad_norm": 0.0, - "learning_rate": 4.322897280216829e-06, - "loss": 0.763, + "learning_rate": 5.211636905525397e-08, + "loss": 0.8693, "step": 24752 }, { - "epoch": 0.7014367083227068, + "epoch": 0.9685030127553017, "grad_norm": 0.0, - "learning_rate": 4.322141753912971e-06, - "loss": 0.8719, + "learning_rate": 5.198724036213465e-08, + "loss": 0.9077, "step": 24753 }, { - "epoch": 0.7014650457649693, + "epoch": 0.9685421394475311, "grad_norm": 0.0, - "learning_rate": 4.321386275435824e-06, - "loss": 0.8416, + "learning_rate": 5.1858271421634245e-08, + "loss": 1.0311, "step": 24754 }, { - "epoch": 0.7014933832072318, + "epoch": 0.9685812661397606, "grad_norm": 0.0, - "learning_rate": 4.320630844791746e-06, - "loss": 0.823, + "learning_rate": 5.172946223582442e-08, + "loss": 0.9852, "step": 24755 }, { - "epoch": 0.7015217206494941, + "epoch": 0.96862039283199, "grad_norm": 0.0, - "learning_rate": 4.319875461987103e-06, - "loss": 0.8489, + "learning_rate": 5.160081280677465e-08, + "loss": 0.9435, "step": 24756 }, { - "epoch": 0.7015500580917566, + "epoch": 0.9686595195242195, "grad_norm": 0.0, - "learning_rate": 4.319120127028263e-06, - "loss": 0.8095, + "learning_rate": 5.147232313654882e-08, + "loss": 1.0289, "step": 24757 }, { - "epoch": 0.7015783955340191, + "epoch": 0.9686986462164489, "grad_norm": 0.0, - "learning_rate": 4.318364839921579e-06, - "loss": 0.9977, + "learning_rate": 5.1343993227210845e-08, + "loss": 0.901, "step": 24758 }, { - "epoch": 0.7016067329762815, + "epoch": 0.9687377729086784, "grad_norm": 0.0, - "learning_rate": 4.317609600673418e-06, - "loss": 0.9116, + "learning_rate": 5.12158230808224e-08, + "loss": 0.841, "step": 24759 }, { - "epoch": 0.701635070418544, + "epoch": 0.9687768996009077, "grad_norm": 0.0, - "learning_rate": 4.316854409290141e-06, - "loss": 0.827, + "learning_rate": 5.108781269944185e-08, + "loss": 1.0639, "step": 24760 }, { - "epoch": 0.7016634078608065, + "epoch": 0.9688160262931372, "grad_norm": 0.0, - "learning_rate": 4.316099265778111e-06, - "loss": 0.9016, + "learning_rate": 5.095996208512311e-08, + "loss": 0.9027, "step": 24761 }, { - "epoch": 0.701691745303069, + "epoch": 0.9688551529853666, "grad_norm": 0.0, - "learning_rate": 4.315344170143691e-06, - "loss": 0.7761, + "learning_rate": 5.083227123992118e-08, + "loss": 0.941, "step": 24762 }, { - "epoch": 0.7017200827453314, + "epoch": 0.968894279677596, "grad_norm": 0.0, - "learning_rate": 4.314589122393232e-06, - "loss": 0.8273, + "learning_rate": 5.070474016588667e-08, + "loss": 1.0144, "step": 24763 }, { - "epoch": 0.7017484201875939, + "epoch": 0.9689334063698255, "grad_norm": 0.0, - "learning_rate": 4.313834122533102e-06, - "loss": 0.8924, + "learning_rate": 5.057736886506681e-08, + "loss": 0.8707, "step": 24764 }, { - "epoch": 0.7017767576298564, + "epoch": 0.9689725330620549, "grad_norm": 0.0, - "learning_rate": 4.3130791705696626e-06, - "loss": 0.8748, + "learning_rate": 5.0450157339506646e-08, + "loss": 1.0321, "step": 24765 }, { - "epoch": 0.7018050950721187, + "epoch": 0.9690116597542844, "grad_norm": 0.0, - "learning_rate": 4.312324266509265e-06, - "loss": 0.7044, + "learning_rate": 5.032310559124898e-08, + "loss": 0.8693, "step": 24766 }, { - "epoch": 0.7018334325143812, + "epoch": 0.9690507864465138, "grad_norm": 0.0, - "learning_rate": 4.3115694103582764e-06, - "loss": 0.8157, + "learning_rate": 5.019621362233551e-08, + "loss": 0.9349, "step": 24767 }, { - "epoch": 0.7018617699566437, + "epoch": 0.9690899131387433, "grad_norm": 0.0, - "learning_rate": 4.3108146021230465e-06, - "loss": 0.8399, + "learning_rate": 5.0069481434802395e-08, + "loss": 0.9041, "step": 24768 }, { - "epoch": 0.7018901073989062, + "epoch": 0.9691290398309726, "grad_norm": 0.0, - "learning_rate": 4.310059841809938e-06, - "loss": 0.9192, + "learning_rate": 4.9942909030685774e-08, + "loss": 1.0312, "step": 24769 }, { - "epoch": 0.7019184448411686, + "epoch": 0.9691681665232021, "grad_norm": 0.0, - "learning_rate": 4.309305129425312e-06, - "loss": 0.9233, + "learning_rate": 4.981649641201736e-08, + "loss": 1.0197, "step": 24770 }, { - "epoch": 0.7019467822834311, + "epoch": 0.9692072932154315, "grad_norm": 0.0, - "learning_rate": 4.308550464975518e-06, - "loss": 0.8326, + "learning_rate": 4.969024358082775e-08, + "loss": 0.8978, "step": 24771 }, { - "epoch": 0.7019751197256936, + "epoch": 0.969246419907661, "grad_norm": 0.0, - "learning_rate": 4.307795848466918e-06, - "loss": 0.866, + "learning_rate": 4.956415053914532e-08, + "loss": 0.9238, "step": 24772 }, { - "epoch": 0.702003457167956, + "epoch": 0.9692855465998904, "grad_norm": 0.0, - "learning_rate": 4.307041279905867e-06, - "loss": 0.867, + "learning_rate": 4.9438217288994005e-08, + "loss": 0.9944, "step": 24773 }, { - "epoch": 0.7020317946102185, + "epoch": 0.9693246732921199, "grad_norm": 0.0, - "learning_rate": 4.306286759298721e-06, - "loss": 0.8818, + "learning_rate": 4.931244383239553e-08, + "loss": 0.8781, "step": 24774 }, { - "epoch": 0.702060132052481, + "epoch": 0.9693637999843493, "grad_norm": 0.0, - "learning_rate": 4.30553228665184e-06, - "loss": 0.9192, + "learning_rate": 4.918683017137049e-08, + "loss": 0.9746, "step": 24775 }, { - "epoch": 0.7020884694947434, + "epoch": 0.9694029266765788, "grad_norm": 0.0, - "learning_rate": 4.30477786197157e-06, - "loss": 0.8319, + "learning_rate": 4.906137630793506e-08, + "loss": 0.9104, "step": 24776 }, { - "epoch": 0.7021168069370058, + "epoch": 0.9694420533688082, "grad_norm": 0.0, - "learning_rate": 4.304023485264273e-06, - "loss": 0.8362, + "learning_rate": 4.89360822441054e-08, + "loss": 1.0447, "step": 24777 }, { - "epoch": 0.7021451443792683, + "epoch": 0.9694811800610377, "grad_norm": 0.0, - "learning_rate": 4.3032691565363034e-06, - "loss": 0.7917, + "learning_rate": 4.881094798189323e-08, + "loss": 0.9453, "step": 24778 }, { - "epoch": 0.7021734818215308, + "epoch": 0.9695203067532671, "grad_norm": 0.0, - "learning_rate": 4.30251487579401e-06, - "loss": 0.8275, + "learning_rate": 4.8685973523308064e-08, + "loss": 0.9132, "step": 24779 }, { - "epoch": 0.7022018192637932, + "epoch": 0.9695594334454966, "grad_norm": 0.0, - "learning_rate": 4.301760643043754e-06, - "loss": 0.7965, + "learning_rate": 4.856115887035495e-08, + "loss": 0.9016, "step": 24780 }, { - "epoch": 0.7022301567060557, + "epoch": 0.9695985601377259, "grad_norm": 0.0, - "learning_rate": 4.301006458291879e-06, - "loss": 0.776, + "learning_rate": 4.8436504025041185e-08, + "loss": 0.9466, "step": 24781 }, { - "epoch": 0.7022584941483182, + "epoch": 0.9696376868299554, "grad_norm": 0.0, - "learning_rate": 4.300252321544744e-06, - "loss": 0.7542, + "learning_rate": 4.831200898936628e-08, + "loss": 1.0258, "step": 24782 }, { - "epoch": 0.7022868315905806, + "epoch": 0.9696768135221848, "grad_norm": 0.0, - "learning_rate": 4.299498232808704e-06, - "loss": 0.8538, + "learning_rate": 4.818767376533084e-08, + "loss": 0.9744, "step": 24783 }, { - "epoch": 0.7023151690328431, + "epoch": 0.9697159402144143, "grad_norm": 0.0, - "learning_rate": 4.298744192090103e-06, - "loss": 0.9374, + "learning_rate": 4.806349835492996e-08, + "loss": 0.9736, "step": 24784 }, { - "epoch": 0.7023435064751056, + "epoch": 0.9697550669066437, "grad_norm": 0.0, - "learning_rate": 4.2979901993952975e-06, - "loss": 0.7764, + "learning_rate": 4.7939482760160915e-08, + "loss": 0.8615, "step": 24785 }, { - "epoch": 0.7023718439173681, + "epoch": 0.9697941935988732, "grad_norm": 0.0, - "learning_rate": 4.297236254730637e-06, - "loss": 0.8104, + "learning_rate": 4.781562698301101e-08, + "loss": 1.0284, "step": 24786 }, { - "epoch": 0.7024001813596304, + "epoch": 0.9698333202911026, "grad_norm": 0.0, - "learning_rate": 4.296482358102474e-06, - "loss": 0.7859, + "learning_rate": 4.7691931025473095e-08, + "loss": 0.9913, "step": 24787 }, { - "epoch": 0.7024285188018929, + "epoch": 0.9698724469833321, "grad_norm": 0.0, - "learning_rate": 4.29572850951716e-06, - "loss": 0.8721, + "learning_rate": 4.756839488953113e-08, + "loss": 0.9852, "step": 24788 }, { - "epoch": 0.7024568562441554, + "epoch": 0.9699115736755615, "grad_norm": 0.0, - "learning_rate": 4.294974708981041e-06, - "loss": 0.8511, + "learning_rate": 4.74450185771691e-08, + "loss": 1.0404, "step": 24789 }, { - "epoch": 0.7024851936864178, + "epoch": 0.9699507003677909, "grad_norm": 0.0, - "learning_rate": 4.294220956500469e-06, - "loss": 0.8437, + "learning_rate": 4.732180209036985e-08, + "loss": 0.8931, "step": 24790 }, { - "epoch": 0.7025135311286803, + "epoch": 0.9699898270600203, "grad_norm": 0.0, - "learning_rate": 4.2934672520817944e-06, - "loss": 0.7963, + "learning_rate": 4.719874543111069e-08, + "loss": 0.9093, "step": 24791 }, { - "epoch": 0.7025418685709428, + "epoch": 0.9700289537522497, "grad_norm": 0.0, - "learning_rate": 4.292713595731363e-06, - "loss": 0.816, + "learning_rate": 4.7075848601367825e-08, + "loss": 0.9077, "step": 24792 }, { - "epoch": 0.7025702060132053, + "epoch": 0.9700680804444792, "grad_norm": 0.0, - "learning_rate": 4.291959987455522e-06, - "loss": 0.797, + "learning_rate": 4.695311160311522e-08, + "loss": 0.982, "step": 24793 }, { - "epoch": 0.7025985434554677, + "epoch": 0.9701072071367086, "grad_norm": 0.0, - "learning_rate": 4.2912064272606255e-06, - "loss": 0.7771, + "learning_rate": 4.6830534438323524e-08, + "loss": 0.8763, "step": 24794 }, { - "epoch": 0.7026268808977302, + "epoch": 0.9701463338289381, "grad_norm": 0.0, - "learning_rate": 4.290452915153015e-06, - "loss": 0.7818, + "learning_rate": 4.670811710896117e-08, + "loss": 0.9045, "step": 24795 }, { - "epoch": 0.7026552183399927, + "epoch": 0.9701854605211675, "grad_norm": 0.0, - "learning_rate": 4.289699451139043e-06, - "loss": 0.8118, + "learning_rate": 4.6585859616995464e-08, + "loss": 0.8967, "step": 24796 }, { - "epoch": 0.702683555782255, + "epoch": 0.970224587213397, "grad_norm": 0.0, - "learning_rate": 4.288946035225049e-06, - "loss": 0.8533, + "learning_rate": 4.6463761964388176e-08, + "loss": 1.0052, "step": 24797 }, { - "epoch": 0.7027118932245175, + "epoch": 0.9702637139056264, "grad_norm": 0.0, - "learning_rate": 4.288192667417384e-06, - "loss": 0.9007, + "learning_rate": 4.634182415309996e-08, + "loss": 0.8797, "step": 24798 }, { - "epoch": 0.70274023066678, + "epoch": 0.9703028405978559, "grad_norm": 0.0, - "learning_rate": 4.2874393477223915e-06, - "loss": 0.8212, + "learning_rate": 4.622004618508924e-08, + "loss": 0.9446, "step": 24799 }, { - "epoch": 0.7027685681090424, + "epoch": 0.9703419672900853, "grad_norm": 0.0, - "learning_rate": 4.2866860761464205e-06, - "loss": 0.803, + "learning_rate": 4.609842806231224e-08, + "loss": 0.9263, "step": 24800 }, { - "epoch": 0.7027969055513049, + "epoch": 0.9703810939823148, "grad_norm": 0.0, - "learning_rate": 4.2859328526958165e-06, - "loss": 0.7882, + "learning_rate": 4.597696978672073e-08, + "loss": 0.9798, "step": 24801 }, { - "epoch": 0.7028252429935674, + "epoch": 0.9704202206745441, "grad_norm": 0.0, - "learning_rate": 4.285179677376919e-06, - "loss": 0.8109, + "learning_rate": 4.5855671360267585e-08, + "loss": 1.0037, "step": 24802 }, { - "epoch": 0.7028535804358299, + "epoch": 0.9704593473667736, "grad_norm": 0.0, - "learning_rate": 4.2844265501960745e-06, - "loss": 0.79, + "learning_rate": 4.573453278489792e-08, + "loss": 0.8477, "step": 24803 }, { - "epoch": 0.7028819178780923, + "epoch": 0.970498474059003, "grad_norm": 0.0, - "learning_rate": 4.283673471159632e-06, - "loss": 0.7631, + "learning_rate": 4.561355406255796e-08, + "loss": 1.0697, "step": 24804 }, { - "epoch": 0.7029102553203548, + "epoch": 0.9705376007512325, "grad_norm": 0.0, - "learning_rate": 4.282920440273927e-06, - "loss": 0.8741, + "learning_rate": 4.549273519519171e-08, + "loss": 0.9758, "step": 24805 }, { - "epoch": 0.7029385927626173, + "epoch": 0.9705767274434619, "grad_norm": 0.0, - "learning_rate": 4.282167457545306e-06, - "loss": 0.8008, + "learning_rate": 4.53720761847376e-08, + "loss": 1.0359, "step": 24806 }, { - "epoch": 0.7029669302048797, + "epoch": 0.9706158541356914, "grad_norm": 0.0, - "learning_rate": 4.2814145229801155e-06, - "loss": 0.7778, + "learning_rate": 4.525157703313521e-08, + "loss": 0.915, "step": 24807 }, { - "epoch": 0.7029952676471422, + "epoch": 0.9706549808279208, "grad_norm": 0.0, - "learning_rate": 4.28066163658469e-06, - "loss": 0.8914, + "learning_rate": 4.513123774231742e-08, + "loss": 0.9809, "step": 24808 }, { - "epoch": 0.7030236050894046, + "epoch": 0.9706941075201503, "grad_norm": 0.0, - "learning_rate": 4.279908798365379e-06, - "loss": 0.7858, + "learning_rate": 4.501105831421826e-08, + "loss": 0.9409, "step": 24809 }, { - "epoch": 0.7030519425316671, + "epoch": 0.9707332342123797, "grad_norm": 0.0, - "learning_rate": 4.279156008328517e-06, - "loss": 0.7952, + "learning_rate": 4.489103875076728e-08, + "loss": 0.9937, "step": 24810 }, { - "epoch": 0.7030802799739295, + "epoch": 0.9707723609046092, "grad_norm": 0.0, - "learning_rate": 4.2784032664804474e-06, - "loss": 0.8152, + "learning_rate": 4.4771179053891835e-08, + "loss": 0.933, "step": 24811 }, { - "epoch": 0.703108617416192, + "epoch": 0.9708114875968386, "grad_norm": 0.0, - "learning_rate": 4.277650572827513e-06, - "loss": 0.832, + "learning_rate": 4.465147922551705e-08, + "loss": 0.9627, "step": 24812 }, { - "epoch": 0.7031369548584545, + "epoch": 0.970850614289068, "grad_norm": 0.0, - "learning_rate": 4.2768979273760524e-06, - "loss": 0.7571, + "learning_rate": 4.453193926756361e-08, + "loss": 0.9924, "step": 24813 }, { - "epoch": 0.7031652923007169, + "epoch": 0.9708897409812974, "grad_norm": 0.0, - "learning_rate": 4.276145330132405e-06, - "loss": 0.8126, + "learning_rate": 4.441255918195331e-08, + "loss": 0.8808, "step": 24814 }, { - "epoch": 0.7031936297429794, + "epoch": 0.9709288676735269, "grad_norm": 0.0, - "learning_rate": 4.275392781102916e-06, - "loss": 0.8651, + "learning_rate": 4.4293338970601284e-08, + "loss": 0.8302, "step": 24815 }, { - "epoch": 0.7032219671852419, + "epoch": 0.9709679943657563, "grad_norm": 0.0, - "learning_rate": 4.274640280293915e-06, - "loss": 0.8272, + "learning_rate": 4.4174278635423787e-08, + "loss": 0.9002, "step": 24816 }, { - "epoch": 0.7032503046275044, + "epoch": 0.9710071210579858, "grad_norm": 0.0, - "learning_rate": 4.273887827711749e-06, - "loss": 0.7985, + "learning_rate": 4.40553781783315e-08, + "loss": 0.8951, "step": 24817 }, { - "epoch": 0.7032786420697668, + "epoch": 0.9710462477502152, "grad_norm": 0.0, - "learning_rate": 4.273135423362748e-06, - "loss": 0.7643, + "learning_rate": 4.393663760123512e-08, + "loss": 0.994, "step": 24818 }, { - "epoch": 0.7033069795120293, + "epoch": 0.9710853744424446, "grad_norm": 0.0, - "learning_rate": 4.272383067253254e-06, - "loss": 0.8918, + "learning_rate": 4.381805690604091e-08, + "loss": 0.9181, "step": 24819 }, { - "epoch": 0.7033353169542917, + "epoch": 0.9711245011346741, "grad_norm": 0.0, - "learning_rate": 4.271630759389607e-06, - "loss": 0.9901, + "learning_rate": 4.3699636094651773e-08, + "loss": 0.9265, "step": 24820 }, { - "epoch": 0.7033636543965541, + "epoch": 0.9711636278269035, "grad_norm": 0.0, - "learning_rate": 4.27087849977814e-06, - "loss": 0.8229, + "learning_rate": 4.3581375168970654e-08, + "loss": 0.9022, "step": 24821 }, { - "epoch": 0.7033919918388166, + "epoch": 0.971202754519133, "grad_norm": 0.0, - "learning_rate": 4.270126288425189e-06, - "loss": 0.8522, + "learning_rate": 4.346327413089713e-08, + "loss": 0.946, "step": 24822 }, { - "epoch": 0.7034203292810791, + "epoch": 0.9712418812113623, "grad_norm": 0.0, - "learning_rate": 4.269374125337092e-06, - "loss": 0.8003, + "learning_rate": 4.334533298232746e-08, + "loss": 0.9781, "step": 24823 }, { - "epoch": 0.7034486667233415, + "epoch": 0.9712810079035918, "grad_norm": 0.0, - "learning_rate": 4.268622010520186e-06, - "loss": 0.8548, + "learning_rate": 4.322755172515458e-08, + "loss": 0.8765, "step": 24824 }, { - "epoch": 0.703477004165604, + "epoch": 0.9713201345958212, "grad_norm": 0.0, - "learning_rate": 4.267869943980808e-06, - "loss": 0.7651, + "learning_rate": 4.310993036127142e-08, + "loss": 0.9482, "step": 24825 }, { - "epoch": 0.7035053416078665, + "epoch": 0.9713592612880507, "grad_norm": 0.0, - "learning_rate": 4.267117925725287e-06, - "loss": 0.8203, + "learning_rate": 4.2992468892565365e-08, + "loss": 1.0087, "step": 24826 }, { - "epoch": 0.703533679050129, + "epoch": 0.9713983879802801, "grad_norm": 0.0, - "learning_rate": 4.26636595575996e-06, - "loss": 0.8772, + "learning_rate": 4.2875167320923786e-08, + "loss": 1.0706, "step": 24827 }, { - "epoch": 0.7035620164923914, + "epoch": 0.9714375146725096, "grad_norm": 0.0, - "learning_rate": 4.2656140340911655e-06, - "loss": 0.7842, + "learning_rate": 4.275802564823073e-08, + "loss": 0.9214, "step": 24828 }, { - "epoch": 0.7035903539346539, + "epoch": 0.971476641364739, "grad_norm": 0.0, - "learning_rate": 4.264862160725229e-06, - "loss": 0.7758, + "learning_rate": 4.2641043876364694e-08, + "loss": 0.9458, "step": 24829 }, { - "epoch": 0.7036186913769163, + "epoch": 0.9715157680569685, "grad_norm": 0.0, - "learning_rate": 4.264110335668493e-06, - "loss": 0.8822, + "learning_rate": 4.252422200720863e-08, + "loss": 0.9017, "step": 24830 }, { - "epoch": 0.7036470288191787, + "epoch": 0.9715548947491979, "grad_norm": 0.0, - "learning_rate": 4.263358558927281e-06, - "loss": 0.8189, + "learning_rate": 4.2407560042634355e-08, + "loss": 0.994, "step": 24831 }, { - "epoch": 0.7036753662614412, + "epoch": 0.9715940214414274, "grad_norm": 0.0, - "learning_rate": 4.26260683050793e-06, - "loss": 0.7778, + "learning_rate": 4.229105798451816e-08, + "loss": 0.9113, "step": 24832 }, { - "epoch": 0.7037037037037037, + "epoch": 0.9716331481336568, "grad_norm": 0.0, - "learning_rate": 4.2618551504167774e-06, - "loss": 0.836, + "learning_rate": 4.217471583472965e-08, + "loss": 0.9156, "step": 24833 }, { - "epoch": 0.7037320411459662, + "epoch": 0.9716722748258863, "grad_norm": 0.0, - "learning_rate": 4.2611035186601445e-06, - "loss": 0.7625, + "learning_rate": 4.205853359513623e-08, + "loss": 1.0217, "step": 24834 }, { - "epoch": 0.7037603785882286, + "epoch": 0.9717114015181156, "grad_norm": 0.0, - "learning_rate": 4.260351935244369e-06, - "loss": 0.8168, + "learning_rate": 4.19425112676064e-08, + "loss": 0.9496, "step": 24835 }, { - "epoch": 0.7037887160304911, + "epoch": 0.9717505282103451, "grad_norm": 0.0, - "learning_rate": 4.259600400175779e-06, - "loss": 0.794, + "learning_rate": 4.182664885400089e-08, + "loss": 0.8763, "step": 24836 }, { - "epoch": 0.7038170534727536, + "epoch": 0.9717896549025745, "grad_norm": 0.0, - "learning_rate": 4.258848913460708e-06, - "loss": 0.7599, + "learning_rate": 4.171094635618045e-08, + "loss": 0.9692, "step": 24837 }, { - "epoch": 0.703845390915016, + "epoch": 0.971828781594804, "grad_norm": 0.0, - "learning_rate": 4.258097475105487e-06, - "loss": 0.8143, + "learning_rate": 4.1595403776004684e-08, + "loss": 1.1253, "step": 24838 }, { - "epoch": 0.7038737283572785, + "epoch": 0.9718679082870334, "grad_norm": 0.0, - "learning_rate": 4.257346085116441e-06, - "loss": 0.9501, + "learning_rate": 4.148002111532767e-08, + "loss": 0.9952, "step": 24839 }, { - "epoch": 0.703902065799541, + "epoch": 0.9719070349792629, "grad_norm": 0.0, - "learning_rate": 4.2565947434999e-06, - "loss": 0.8868, + "learning_rate": 4.136479837600349e-08, + "loss": 1.0059, "step": 24840 }, { - "epoch": 0.7039304032418034, + "epoch": 0.9719461616714923, "grad_norm": 0.0, - "learning_rate": 4.255843450262198e-06, - "loss": 0.8165, + "learning_rate": 4.1249735559881764e-08, + "loss": 0.8981, "step": 24841 }, { - "epoch": 0.7039587406840658, + "epoch": 0.9719852883637218, "grad_norm": 0.0, - "learning_rate": 4.255092205409657e-06, - "loss": 0.7723, + "learning_rate": 4.113483266880991e-08, + "loss": 1.0327, "step": 24842 }, { - "epoch": 0.7039870781263283, + "epoch": 0.9720244150559512, "grad_norm": 0.0, - "learning_rate": 4.2543410089486055e-06, - "loss": 0.7618, + "learning_rate": 4.1020089704633115e-08, + "loss": 0.9272, "step": 24843 }, { - "epoch": 0.7040154155685908, + "epoch": 0.9720635417481807, "grad_norm": 0.0, - "learning_rate": 4.2535898608853784e-06, - "loss": 0.7982, + "learning_rate": 4.090550666919546e-08, + "loss": 0.8524, "step": 24844 }, { - "epoch": 0.7040437530108532, + "epoch": 0.97210266844041, "grad_norm": 0.0, - "learning_rate": 4.252838761226295e-06, - "loss": 0.7669, + "learning_rate": 4.0791083564334365e-08, + "loss": 0.9849, "step": 24845 }, { - "epoch": 0.7040720904531157, + "epoch": 0.9721417951326395, "grad_norm": 0.0, - "learning_rate": 4.252087709977687e-06, - "loss": 0.8829, + "learning_rate": 4.067682039189058e-08, + "loss": 1.0291, "step": 24846 }, { - "epoch": 0.7041004278953782, + "epoch": 0.9721809218248689, "grad_norm": 0.0, - "learning_rate": 4.251336707145876e-06, - "loss": 0.7014, + "learning_rate": 4.056271715369597e-08, + "loss": 0.9669, "step": 24847 }, { - "epoch": 0.7041287653376406, + "epoch": 0.9722200485170983, "grad_norm": 0.0, - "learning_rate": 4.250585752737189e-06, - "loss": 0.7773, + "learning_rate": 4.0448773851584635e-08, + "loss": 1.0045, "step": 24848 }, { - "epoch": 0.7041571027799031, + "epoch": 0.9722591752093278, "grad_norm": 0.0, - "learning_rate": 4.2498348467579555e-06, - "loss": 0.8284, + "learning_rate": 4.0334990487386206e-08, + "loss": 0.9182, "step": 24849 }, { - "epoch": 0.7041854402221656, + "epoch": 0.9722983019015572, "grad_norm": 0.0, - "learning_rate": 4.2490839892144975e-06, - "loss": 0.866, + "learning_rate": 4.022136706292812e-08, + "loss": 1.0236, "step": 24850 }, { - "epoch": 0.704213777664428, + "epoch": 0.9723374285937867, "grad_norm": 0.0, - "learning_rate": 4.24833318011314e-06, - "loss": 0.9954, + "learning_rate": 4.0107903580033355e-08, + "loss": 0.9249, "step": 24851 }, { - "epoch": 0.7042421151066904, + "epoch": 0.9723765552860161, "grad_norm": 0.0, - "learning_rate": 4.247582419460212e-06, - "loss": 0.9324, + "learning_rate": 3.999460004052602e-08, + "loss": 0.87, "step": 24852 }, { - "epoch": 0.7042704525489529, + "epoch": 0.9724156819782456, "grad_norm": 0.0, - "learning_rate": 4.24683170726203e-06, - "loss": 0.8566, + "learning_rate": 3.988145644622465e-08, + "loss": 0.8744, "step": 24853 }, { - "epoch": 0.7042987899912154, + "epoch": 0.972454808670475, "grad_norm": 0.0, - "learning_rate": 4.246081043524925e-06, - "loss": 0.8768, + "learning_rate": 3.976847279894669e-08, + "loss": 0.9207, "step": 24854 }, { - "epoch": 0.7043271274334778, + "epoch": 0.9724939353627045, "grad_norm": 0.0, - "learning_rate": 4.245330428255211e-06, - "loss": 0.7853, + "learning_rate": 3.9655649100506235e-08, + "loss": 1.0703, "step": 24855 }, { - "epoch": 0.7043554648757403, + "epoch": 0.9725330620549338, "grad_norm": 0.0, - "learning_rate": 4.244579861459217e-06, - "loss": 0.7987, + "learning_rate": 3.954298535271406e-08, + "loss": 0.9681, "step": 24856 }, { - "epoch": 0.7043838023180028, + "epoch": 0.9725721887471633, "grad_norm": 0.0, - "learning_rate": 4.2438293431432665e-06, - "loss": 0.9213, + "learning_rate": 3.943048155738094e-08, + "loss": 0.9852, "step": 24857 }, { - "epoch": 0.7044121397602653, + "epoch": 0.9726113154393927, "grad_norm": 0.0, - "learning_rate": 4.243078873313677e-06, - "loss": 0.8595, + "learning_rate": 3.931813771631321e-08, + "loss": 0.8936, "step": 24858 }, { - "epoch": 0.7044404772025277, + "epoch": 0.9726504421316222, "grad_norm": 0.0, - "learning_rate": 4.242328451976774e-06, - "loss": 0.8976, + "learning_rate": 3.920595383131498e-08, + "loss": 0.8725, "step": 24859 }, { - "epoch": 0.7044688146447902, + "epoch": 0.9726895688238516, "grad_norm": 0.0, - "learning_rate": 4.241578079138873e-06, - "loss": 0.8337, + "learning_rate": 3.909392990418703e-08, + "loss": 1.0589, "step": 24860 }, { - "epoch": 0.7044971520870527, + "epoch": 0.9727286955160811, "grad_norm": 0.0, - "learning_rate": 4.240827754806299e-06, - "loss": 0.8382, + "learning_rate": 3.898206593672904e-08, + "loss": 0.835, "step": 24861 }, { - "epoch": 0.704525489529315, + "epoch": 0.9727678222083105, "grad_norm": 0.0, - "learning_rate": 4.2400774789853705e-06, - "loss": 0.8465, + "learning_rate": 3.887036193073734e-08, + "loss": 1.0273, "step": 24862 }, { - "epoch": 0.7045538269715775, + "epoch": 0.97280694890054, "grad_norm": 0.0, - "learning_rate": 4.239327251682409e-06, - "loss": 0.8416, + "learning_rate": 3.875881788800606e-08, + "loss": 0.9886, "step": 24863 }, { - "epoch": 0.70458216441384, + "epoch": 0.9728460755927694, "grad_norm": 0.0, - "learning_rate": 4.2385770729037336e-06, - "loss": 0.8178, + "learning_rate": 3.864743381032487e-08, + "loss": 1.0786, "step": 24864 }, { - "epoch": 0.7046105018561025, + "epoch": 0.9728852022849989, "grad_norm": 0.0, - "learning_rate": 4.237826942655666e-06, - "loss": 0.9171, + "learning_rate": 3.853620969948457e-08, + "loss": 0.8999, "step": 24865 }, { - "epoch": 0.7046388392983649, + "epoch": 0.9729243289772282, "grad_norm": 0.0, - "learning_rate": 4.237076860944518e-06, - "loss": 0.8958, + "learning_rate": 3.842514555726928e-08, + "loss": 0.939, "step": 24866 }, { - "epoch": 0.7046671767406274, + "epoch": 0.9729634556694577, "grad_norm": 0.0, - "learning_rate": 4.236326827776615e-06, - "loss": 1.0288, + "learning_rate": 3.8314241385465354e-08, + "loss": 1.0297, "step": 24867 }, { - "epoch": 0.7046955141828899, + "epoch": 0.9730025823616871, "grad_norm": 0.0, - "learning_rate": 4.235576843158269e-06, - "loss": 0.7317, + "learning_rate": 3.8203497185850256e-08, + "loss": 0.9004, "step": 24868 }, { - "epoch": 0.7047238516251523, + "epoch": 0.9730417090539166, "grad_norm": 0.0, - "learning_rate": 4.2348269070957986e-06, - "loss": 0.738, + "learning_rate": 3.809291296020479e-08, + "loss": 1.0915, "step": 24869 }, { - "epoch": 0.7047521890674148, + "epoch": 0.973080835746146, "grad_norm": 0.0, - "learning_rate": 4.2340770195955264e-06, - "loss": 0.7833, + "learning_rate": 3.7982488710304186e-08, + "loss": 0.993, "step": 24870 }, { - "epoch": 0.7047805265096773, + "epoch": 0.9731199624383755, "grad_norm": 0.0, - "learning_rate": 4.23332718066376e-06, - "loss": 0.8192, + "learning_rate": 3.78722244379226e-08, + "loss": 0.8806, "step": 24871 }, { - "epoch": 0.7048088639519396, + "epoch": 0.9731590891306049, "grad_norm": 0.0, - "learning_rate": 4.232577390306821e-06, - "loss": 0.8474, + "learning_rate": 3.776212014482861e-08, + "loss": 0.8769, "step": 24872 }, { - "epoch": 0.7048372013942021, + "epoch": 0.9731982158228344, "grad_norm": 0.0, - "learning_rate": 4.231827648531028e-06, - "loss": 0.784, + "learning_rate": 3.7652175832791906e-08, + "loss": 0.9807, "step": 24873 }, { - "epoch": 0.7048655388364646, + "epoch": 0.9732373425150638, "grad_norm": 0.0, - "learning_rate": 4.231077955342688e-06, - "loss": 0.8419, + "learning_rate": 3.7542391503577745e-08, + "loss": 0.9148, "step": 24874 }, { - "epoch": 0.7048938762787271, + "epoch": 0.9732764692072932, "grad_norm": 0.0, - "learning_rate": 4.230328310748122e-06, - "loss": 0.8747, + "learning_rate": 3.743276715894917e-08, + "loss": 0.922, "step": 24875 }, { - "epoch": 0.7049222137209895, + "epoch": 0.9733155958995227, "grad_norm": 0.0, - "learning_rate": 4.229578714753642e-06, - "loss": 0.8645, + "learning_rate": 3.7323302800666986e-08, + "loss": 1.1012, "step": 24876 }, { - "epoch": 0.704950551163252, + "epoch": 0.973354722591752, "grad_norm": 0.0, - "learning_rate": 4.228829167365565e-06, - "loss": 0.8596, + "learning_rate": 3.721399843048867e-08, + "loss": 0.8488, "step": 24877 }, { - "epoch": 0.7049788886055145, + "epoch": 0.9733938492839815, "grad_norm": 0.0, - "learning_rate": 4.228079668590205e-06, - "loss": 0.8337, + "learning_rate": 3.710485405016839e-08, + "loss": 0.8474, "step": 24878 }, { - "epoch": 0.7050072260477769, + "epoch": 0.9734329759762109, "grad_norm": 0.0, - "learning_rate": 4.22733021843387e-06, - "loss": 0.8579, + "learning_rate": 3.6995869661460294e-08, + "loss": 0.8397, "step": 24879 }, { - "epoch": 0.7050355634900394, + "epoch": 0.9734721026684404, "grad_norm": 0.0, - "learning_rate": 4.226580816902876e-06, - "loss": 0.7639, + "learning_rate": 3.6887045266115196e-08, + "loss": 1.0853, "step": 24880 }, { - "epoch": 0.7050639009323019, + "epoch": 0.9735112293606698, "grad_norm": 0.0, - "learning_rate": 4.225831464003541e-06, - "loss": 0.7856, + "learning_rate": 3.6778380865878374e-08, + "loss": 1.0428, "step": 24881 }, { - "epoch": 0.7050922383745644, + "epoch": 0.9735503560528993, "grad_norm": 0.0, - "learning_rate": 4.225082159742166e-06, - "loss": 0.8726, + "learning_rate": 3.666987646249731e-08, + "loss": 1.0081, "step": 24882 }, { - "epoch": 0.7051205758168267, + "epoch": 0.9735894827451287, "grad_norm": 0.0, - "learning_rate": 4.224332904125072e-06, - "loss": 0.7828, + "learning_rate": 3.6561532057712844e-08, + "loss": 1.085, "step": 24883 }, { - "epoch": 0.7051489132590892, + "epoch": 0.9736286094373582, "grad_norm": 0.0, - "learning_rate": 4.223583697158564e-06, - "loss": 0.761, + "learning_rate": 3.6453347653264695e-08, + "loss": 0.8243, "step": 24884 }, { - "epoch": 0.7051772507013517, + "epoch": 0.9736677361295876, "grad_norm": 0.0, - "learning_rate": 4.222834538848956e-06, - "loss": 0.7578, + "learning_rate": 3.634532325089146e-08, + "loss": 0.9739, "step": 24885 }, { - "epoch": 0.7052055881436141, + "epoch": 0.9737068628218171, "grad_norm": 0.0, - "learning_rate": 4.222085429202561e-06, - "loss": 0.7129, + "learning_rate": 3.623745885232621e-08, + "loss": 1.0761, "step": 24886 }, { - "epoch": 0.7052339255858766, + "epoch": 0.9737459895140465, "grad_norm": 0.0, - "learning_rate": 4.221336368225682e-06, - "loss": 0.7348, + "learning_rate": 3.6129754459303114e-08, + "loss": 0.8801, "step": 24887 }, { - "epoch": 0.7052622630281391, + "epoch": 0.973785116206276, "grad_norm": 0.0, - "learning_rate": 4.220587355924634e-06, - "loss": 0.8367, + "learning_rate": 3.602221007355078e-08, + "loss": 0.9505, "step": 24888 }, { - "epoch": 0.7052906004704016, + "epoch": 0.9738242428985053, "grad_norm": 0.0, - "learning_rate": 4.219838392305723e-06, - "loss": 0.8071, + "learning_rate": 3.591482569679561e-08, + "loss": 0.9425, "step": 24889 }, { - "epoch": 0.705318937912664, + "epoch": 0.9738633695907348, "grad_norm": 0.0, - "learning_rate": 4.219089477375261e-06, - "loss": 0.8255, + "learning_rate": 3.5807601330762886e-08, + "loss": 0.9821, "step": 24890 }, { - "epoch": 0.7053472753549265, + "epoch": 0.9739024962829642, "grad_norm": 0.0, - "learning_rate": 4.218340611139559e-06, - "loss": 0.9721, + "learning_rate": 3.570053697717457e-08, + "loss": 0.8313, "step": 24891 }, { - "epoch": 0.705375612797189, + "epoch": 0.9739416229751937, "grad_norm": 0.0, - "learning_rate": 4.217591793604916e-06, - "loss": 0.962, + "learning_rate": 3.559363263774929e-08, + "loss": 0.942, "step": 24892 }, { - "epoch": 0.7054039502394513, + "epoch": 0.9739807496674231, "grad_norm": 0.0, - "learning_rate": 4.216843024777645e-06, - "loss": 0.8925, + "learning_rate": 3.548688831420344e-08, + "loss": 0.8935, "step": 24893 }, { - "epoch": 0.7054322876817138, + "epoch": 0.9740198763596526, "grad_norm": 0.0, - "learning_rate": 4.216094304664056e-06, - "loss": 0.8984, + "learning_rate": 3.538030400825343e-08, + "loss": 0.9167, "step": 24894 }, { - "epoch": 0.7054606251239763, + "epoch": 0.974059003051882, "grad_norm": 0.0, - "learning_rate": 4.215345633270449e-06, - "loss": 0.7894, + "learning_rate": 3.527387972160901e-08, + "loss": 0.9675, "step": 24895 }, { - "epoch": 0.7054889625662387, + "epoch": 0.9740981297441115, "grad_norm": 0.0, - "learning_rate": 4.2145970106031385e-06, - "loss": 0.9135, + "learning_rate": 3.51676154559788e-08, + "loss": 0.9069, "step": 24896 }, { - "epoch": 0.7055173000085012, + "epoch": 0.9741372564363409, "grad_norm": 0.0, - "learning_rate": 4.213848436668421e-06, - "loss": 0.7743, + "learning_rate": 3.506151121307033e-08, + "loss": 0.9838, "step": 24897 }, { - "epoch": 0.7055456374507637, + "epoch": 0.9741763831285704, "grad_norm": 0.0, - "learning_rate": 4.213099911472607e-06, - "loss": 0.8415, + "learning_rate": 3.49555669945878e-08, + "loss": 0.9749, "step": 24898 }, { - "epoch": 0.7055739748930262, + "epoch": 0.9742155098207997, "grad_norm": 0.0, - "learning_rate": 4.212351435022005e-06, - "loss": 0.8461, + "learning_rate": 3.484978280223095e-08, + "loss": 1.0253, "step": 24899 }, { - "epoch": 0.7056023123352886, + "epoch": 0.9742546365130292, "grad_norm": 0.0, - "learning_rate": 4.211603007322913e-06, - "loss": 0.711, + "learning_rate": 3.474415863770064e-08, + "loss": 0.939, "step": 24900 }, { - "epoch": 0.7056306497775511, + "epoch": 0.9742937632052586, "grad_norm": 0.0, - "learning_rate": 4.210854628381637e-06, - "loss": 0.6815, + "learning_rate": 3.4638694502691083e-08, + "loss": 0.9979, "step": 24901 }, { - "epoch": 0.7056589872198136, + "epoch": 0.9743328898974881, "grad_norm": 0.0, - "learning_rate": 4.210106298204483e-06, - "loss": 0.9104, + "learning_rate": 3.453339039889758e-08, + "loss": 0.8939, "step": 24902 }, { - "epoch": 0.705687324662076, + "epoch": 0.9743720165897175, "grad_norm": 0.0, - "learning_rate": 4.209358016797754e-06, - "loss": 0.8468, + "learning_rate": 3.44282463280099e-08, + "loss": 0.931, "step": 24903 }, { - "epoch": 0.7057156621043384, + "epoch": 0.9744111432819469, "grad_norm": 0.0, - "learning_rate": 4.208609784167756e-06, - "loss": 0.764, + "learning_rate": 3.432326229171668e-08, + "loss": 1.0627, "step": 24904 }, { - "epoch": 0.7057439995466009, + "epoch": 0.9744502699741764, "grad_norm": 0.0, - "learning_rate": 4.207861600320785e-06, - "loss": 0.8036, + "learning_rate": 3.4218438291704346e-08, + "loss": 1.0049, "step": 24905 }, { - "epoch": 0.7057723369888634, + "epoch": 0.9744893966664058, "grad_norm": 0.0, - "learning_rate": 4.207113465263146e-06, - "loss": 0.7853, + "learning_rate": 3.411377432965712e-08, + "loss": 0.9355, "step": 24906 }, { - "epoch": 0.7058006744311258, + "epoch": 0.9745285233586353, "grad_norm": 0.0, - "learning_rate": 4.206365379001146e-06, - "loss": 0.8973, + "learning_rate": 3.4009270407253656e-08, + "loss": 0.9168, "step": 24907 }, { - "epoch": 0.7058290118733883, + "epoch": 0.9745676500508647, "grad_norm": 0.0, - "learning_rate": 4.205617341541078e-06, - "loss": 0.896, + "learning_rate": 3.390492652617372e-08, + "loss": 0.9342, "step": 24908 }, { - "epoch": 0.7058573493156508, + "epoch": 0.9746067767430941, "grad_norm": 0.0, - "learning_rate": 4.204869352889246e-06, - "loss": 0.8276, + "learning_rate": 3.3800742688091526e-08, + "loss": 0.972, "step": 24909 }, { - "epoch": 0.7058856867579132, + "epoch": 0.9746459034353235, "grad_norm": 0.0, - "learning_rate": 4.204121413051956e-06, - "loss": 0.8494, + "learning_rate": 3.36967188946824e-08, + "loss": 1.0905, "step": 24910 }, { - "epoch": 0.7059140242001757, + "epoch": 0.974685030127553, "grad_norm": 0.0, - "learning_rate": 4.2033735220355e-06, - "loss": 0.7709, + "learning_rate": 3.35928551476139e-08, + "loss": 1.0095, "step": 24911 }, { - "epoch": 0.7059423616424382, + "epoch": 0.9747241568197824, "grad_norm": 0.0, - "learning_rate": 4.202625679846184e-06, - "loss": 0.7474, + "learning_rate": 3.348915144855691e-08, + "loss": 0.9077, "step": 24912 }, { - "epoch": 0.7059706990847007, + "epoch": 0.9747632835120119, "grad_norm": 0.0, - "learning_rate": 4.201877886490301e-06, - "loss": 0.788, + "learning_rate": 3.3385607799175654e-08, + "loss": 0.8055, "step": 24913 }, { - "epoch": 0.705999036526963, + "epoch": 0.9748024102042413, "grad_norm": 0.0, - "learning_rate": 4.201130141974154e-06, - "loss": 0.9567, + "learning_rate": 3.328222420113103e-08, + "loss": 0.876, "step": 24914 }, { - "epoch": 0.7060273739692255, + "epoch": 0.9748415368964708, "grad_norm": 0.0, - "learning_rate": 4.200382446304042e-06, - "loss": 0.7646, + "learning_rate": 3.3179000656086147e-08, + "loss": 1.0064, "step": 24915 }, { - "epoch": 0.706055711411488, + "epoch": 0.9748806635887002, "grad_norm": 0.0, - "learning_rate": 4.199634799486262e-06, - "loss": 0.8382, + "learning_rate": 3.307593716569746e-08, + "loss": 1.0275, "step": 24916 }, { - "epoch": 0.7060840488537504, + "epoch": 0.9749197902809297, "grad_norm": 0.0, - "learning_rate": 4.198887201527114e-06, - "loss": 0.7964, + "learning_rate": 3.2973033731619197e-08, + "loss": 1.0579, "step": 24917 }, { - "epoch": 0.7061123862960129, + "epoch": 0.9749589169731591, "grad_norm": 0.0, - "learning_rate": 4.198139652432892e-06, - "loss": 0.876, + "learning_rate": 3.2870290355505593e-08, + "loss": 0.9195, "step": 24918 }, { - "epoch": 0.7061407237382754, + "epoch": 0.9749980436653886, "grad_norm": 0.0, - "learning_rate": 4.197392152209892e-06, - "loss": 0.7601, + "learning_rate": 3.276770703900423e-08, + "loss": 0.977, "step": 24919 }, { - "epoch": 0.7061690611805378, + "epoch": 0.9750371703576179, "grad_norm": 0.0, - "learning_rate": 4.196644700864419e-06, - "loss": 0.82, + "learning_rate": 3.2665283783764876e-08, + "loss": 1.0087, "step": 24920 }, { - "epoch": 0.7061973986228003, + "epoch": 0.9750762970498474, "grad_norm": 0.0, - "learning_rate": 4.195897298402757e-06, - "loss": 0.7822, + "learning_rate": 3.2563020591431794e-08, + "loss": 0.8713, "step": 24921 }, { - "epoch": 0.7062257360650628, + "epoch": 0.9751154237420768, "grad_norm": 0.0, - "learning_rate": 4.195149944831208e-06, - "loss": 0.8112, + "learning_rate": 3.246091746364588e-08, + "loss": 0.9636, "step": 24922 }, { - "epoch": 0.7062540735073253, + "epoch": 0.9751545504343063, "grad_norm": 0.0, - "learning_rate": 4.19440264015607e-06, - "loss": 0.8271, + "learning_rate": 3.235897440204694e-08, + "loss": 0.9666, "step": 24923 }, { - "epoch": 0.7062824109495877, + "epoch": 0.9751936771265357, "grad_norm": 0.0, - "learning_rate": 4.193655384383631e-06, - "loss": 0.9294, + "learning_rate": 3.225719140827477e-08, + "loss": 0.9637, "step": 24924 }, { - "epoch": 0.7063107483918502, + "epoch": 0.9752328038187652, "grad_norm": 0.0, - "learning_rate": 4.192908177520192e-06, - "loss": 0.8134, + "learning_rate": 3.215556848396029e-08, + "loss": 0.8696, "step": 24925 }, { - "epoch": 0.7063390858341126, + "epoch": 0.9752719305109946, "grad_norm": 0.0, - "learning_rate": 4.19216101957204e-06, - "loss": 0.9067, + "learning_rate": 3.2054105630737745e-08, + "loss": 0.9944, "step": 24926 }, { - "epoch": 0.706367423276375, + "epoch": 0.9753110572032241, "grad_norm": 0.0, - "learning_rate": 4.191413910545473e-06, - "loss": 0.7872, + "learning_rate": 3.195280285023472e-08, + "loss": 0.8889, "step": 24927 }, { - "epoch": 0.7063957607186375, + "epoch": 0.9753501838954535, "grad_norm": 0.0, - "learning_rate": 4.190666850446784e-06, - "loss": 0.7328, + "learning_rate": 3.185166014407881e-08, + "loss": 0.8674, "step": 24928 }, { - "epoch": 0.7064240981609, + "epoch": 0.975389310587683, "grad_norm": 0.0, - "learning_rate": 4.189919839282265e-06, - "loss": 0.7841, + "learning_rate": 3.1750677513895376e-08, + "loss": 0.9333, "step": 24929 }, { - "epoch": 0.7064524356031625, + "epoch": 0.9754284372799124, "grad_norm": 0.0, - "learning_rate": 4.1891728770582075e-06, - "loss": 0.8871, + "learning_rate": 3.164985496130535e-08, + "loss": 0.9079, "step": 24930 }, { - "epoch": 0.7064807730454249, + "epoch": 0.9754675639721418, "grad_norm": 0.0, - "learning_rate": 4.18842596378091e-06, - "loss": 0.7925, + "learning_rate": 3.154919248792743e-08, + "loss": 0.8865, "step": 24931 }, { - "epoch": 0.7065091104876874, + "epoch": 0.9755066906643712, "grad_norm": 0.0, - "learning_rate": 4.187679099456654e-06, - "loss": 0.877, + "learning_rate": 3.14486900953781e-08, + "loss": 0.8175, "step": 24932 }, { - "epoch": 0.7065374479299499, + "epoch": 0.9755458173566006, "grad_norm": 0.0, - "learning_rate": 4.186932284091739e-06, - "loss": 0.8156, + "learning_rate": 3.1348347785271625e-08, + "loss": 0.9655, "step": 24933 }, { - "epoch": 0.7065657853722123, + "epoch": 0.9755849440488301, "grad_norm": 0.0, - "learning_rate": 4.186185517692449e-06, - "loss": 0.7819, + "learning_rate": 3.1248165559218946e-08, + "loss": 0.8647, "step": 24934 }, { - "epoch": 0.7065941228144748, + "epoch": 0.9756240707410595, "grad_norm": 0.0, - "learning_rate": 4.185438800265077e-06, - "loss": 0.7048, + "learning_rate": 3.1148143418828766e-08, + "loss": 1.1038, "step": 24935 }, { - "epoch": 0.7066224602567373, + "epoch": 0.975663197433289, "grad_norm": 0.0, - "learning_rate": 4.1846921318159175e-06, - "loss": 0.7705, + "learning_rate": 3.10482813657087e-08, + "loss": 0.8619, "step": 24936 }, { - "epoch": 0.7066507976989996, + "epoch": 0.9757023241255184, "grad_norm": 0.0, - "learning_rate": 4.183945512351251e-06, - "loss": 0.8531, + "learning_rate": 3.0948579401461896e-08, + "loss": 0.8943, "step": 24937 }, { - "epoch": 0.7066791351412621, + "epoch": 0.9757414508177479, "grad_norm": 0.0, - "learning_rate": 4.1831989418773714e-06, - "loss": 0.8024, + "learning_rate": 3.084903752768709e-08, + "loss": 0.8374, "step": 24938 }, { - "epoch": 0.7067074725835246, + "epoch": 0.9757805775099773, "grad_norm": 0.0, - "learning_rate": 4.182452420400571e-06, - "loss": 0.8215, + "learning_rate": 3.074965574598632e-08, + "loss": 1.0001, "step": 24939 }, { - "epoch": 0.7067358100257871, + "epoch": 0.9758197042022068, "grad_norm": 0.0, - "learning_rate": 4.181705947927131e-06, - "loss": 0.8742, + "learning_rate": 3.065043405795387e-08, + "loss": 0.9639, "step": 24940 }, { - "epoch": 0.7067641474680495, + "epoch": 0.9758588308944361, "grad_norm": 0.0, - "learning_rate": 4.18095952446334e-06, - "loss": 0.7616, + "learning_rate": 3.0551372465181805e-08, + "loss": 1.0133, "step": 24941 }, { - "epoch": 0.706792484910312, + "epoch": 0.9758979575866656, "grad_norm": 0.0, - "learning_rate": 4.18021315001549e-06, - "loss": 0.828, + "learning_rate": 3.0452470969263293e-08, + "loss": 1.0516, "step": 24942 }, { - "epoch": 0.7068208223525745, + "epoch": 0.975937084278895, "grad_norm": 0.0, - "learning_rate": 4.1794668245898664e-06, - "loss": 0.8581, + "learning_rate": 3.035372957178595e-08, + "loss": 0.883, "step": 24943 }, { - "epoch": 0.7068491597948369, + "epoch": 0.9759762109711245, "grad_norm": 0.0, - "learning_rate": 4.1787205481927575e-06, - "loss": 0.8822, + "learning_rate": 3.025514827433407e-08, + "loss": 1.0369, "step": 24944 }, { - "epoch": 0.7068774972370994, + "epoch": 0.9760153376633539, "grad_norm": 0.0, - "learning_rate": 4.1779743208304435e-06, - "loss": 0.7949, + "learning_rate": 3.0156727078491935e-08, + "loss": 0.7977, "step": 24945 }, { - "epoch": 0.7069058346793619, + "epoch": 0.9760544643555834, "grad_norm": 0.0, - "learning_rate": 4.177228142509218e-06, - "loss": 0.8066, + "learning_rate": 3.00584659858405e-08, + "loss": 0.9962, "step": 24946 }, { - "epoch": 0.7069341721216243, + "epoch": 0.9760935910478128, "grad_norm": 0.0, - "learning_rate": 4.176482013235357e-06, - "loss": 0.7504, + "learning_rate": 2.9960364997956294e-08, + "loss": 0.8554, "step": 24947 }, { - "epoch": 0.7069625095638867, + "epoch": 0.9761327177400423, "grad_norm": 0.0, - "learning_rate": 4.175735933015151e-06, - "loss": 0.697, + "learning_rate": 2.986242411641582e-08, + "loss": 0.9188, "step": 24948 }, { - "epoch": 0.7069908470061492, + "epoch": 0.9761718444322717, "grad_norm": 0.0, - "learning_rate": 4.174989901854889e-06, - "loss": 0.8763, + "learning_rate": 2.9764643342792276e-08, + "loss": 0.9971, "step": 24949 }, { - "epoch": 0.7070191844484117, + "epoch": 0.9762109711245012, "grad_norm": 0.0, - "learning_rate": 4.174243919760845e-06, - "loss": 0.8291, + "learning_rate": 2.966702267865329e-08, + "loss": 0.8167, "step": 24950 }, { - "epoch": 0.7070475218906741, + "epoch": 0.9762500978167306, "grad_norm": 0.0, - "learning_rate": 4.173497986739309e-06, - "loss": 0.8715, + "learning_rate": 2.9569562125569827e-08, + "loss": 0.9917, "step": 24951 }, { - "epoch": 0.7070758593329366, + "epoch": 0.97628922450896, "grad_norm": 0.0, - "learning_rate": 4.172752102796565e-06, - "loss": 0.8381, + "learning_rate": 2.9472261685105084e-08, + "loss": 0.9572, "step": 24952 }, { - "epoch": 0.7071041967751991, + "epoch": 0.9763283512011894, "grad_norm": 0.0, - "learning_rate": 4.172006267938893e-06, - "loss": 0.8091, + "learning_rate": 2.9375121358822257e-08, + "loss": 1.0069, "step": 24953 }, { - "epoch": 0.7071325342174616, + "epoch": 0.9763674778934189, "grad_norm": 0.0, - "learning_rate": 4.171260482172574e-06, - "loss": 0.8624, + "learning_rate": 2.9278141148281202e-08, + "loss": 1.0588, "step": 24954 }, { - "epoch": 0.707160871659724, + "epoch": 0.9764066045856483, "grad_norm": 0.0, - "learning_rate": 4.170514745503893e-06, - "loss": 0.8698, + "learning_rate": 2.9181321055039567e-08, + "loss": 0.9898, "step": 24955 }, { - "epoch": 0.7071892091019865, + "epoch": 0.9764457312778778, "grad_norm": 0.0, - "learning_rate": 4.169769057939132e-06, - "loss": 0.8749, + "learning_rate": 2.9084661080650556e-08, + "loss": 0.9224, "step": 24956 }, { - "epoch": 0.707217546544249, + "epoch": 0.9764848579701072, "grad_norm": 0.0, - "learning_rate": 4.169023419484574e-06, - "loss": 0.8483, + "learning_rate": 2.8988161226668478e-08, + "loss": 1.0451, "step": 24957 }, { - "epoch": 0.7072458839865113, + "epoch": 0.9765239846623367, "grad_norm": 0.0, - "learning_rate": 4.168277830146493e-06, - "loss": 0.8497, + "learning_rate": 2.8891821494642092e-08, + "loss": 0.9453, "step": 24958 }, { - "epoch": 0.7072742214287738, + "epoch": 0.9765631113545661, "grad_norm": 0.0, - "learning_rate": 4.167532289931175e-06, - "loss": 0.7722, + "learning_rate": 2.8795641886117943e-08, + "loss": 0.9236, "step": 24959 }, { - "epoch": 0.7073025588710363, + "epoch": 0.9766022380467956, "grad_norm": 0.0, - "learning_rate": 4.1667867988448995e-06, - "loss": 0.8762, + "learning_rate": 2.8699622402641457e-08, + "loss": 0.9047, "step": 24960 }, { - "epoch": 0.7073308963132987, + "epoch": 0.976641364739025, "grad_norm": 0.0, - "learning_rate": 4.166041356893943e-06, - "loss": 0.7983, + "learning_rate": 2.860376304575474e-08, + "loss": 0.9185, "step": 24961 }, { - "epoch": 0.7073592337555612, + "epoch": 0.9766804914312543, "grad_norm": 0.0, - "learning_rate": 4.1652959640845906e-06, - "loss": 0.8345, + "learning_rate": 2.8508063816996557e-08, + "loss": 0.9091, "step": 24962 }, { - "epoch": 0.7073875711978237, + "epoch": 0.9767196181234838, "grad_norm": 0.0, - "learning_rate": 4.164550620423112e-06, - "loss": 0.8035, + "learning_rate": 2.8412524717903454e-08, + "loss": 0.9367, "step": 24963 }, { - "epoch": 0.7074159086400862, + "epoch": 0.9767587448157132, "grad_norm": 0.0, - "learning_rate": 4.16380532591579e-06, - "loss": 0.7188, + "learning_rate": 2.8317145750010876e-08, + "loss": 1.0349, "step": 24964 }, { - "epoch": 0.7074442460823486, + "epoch": 0.9767978715079427, "grad_norm": 0.0, - "learning_rate": 4.163060080568908e-06, - "loss": 0.8347, + "learning_rate": 2.822192691484982e-08, + "loss": 0.8641, "step": 24965 }, { - "epoch": 0.7074725835246111, + "epoch": 0.9768369982001721, "grad_norm": 0.0, - "learning_rate": 4.162314884388735e-06, - "loss": 0.9629, + "learning_rate": 2.8126868213947945e-08, + "loss": 1.0262, "step": 24966 }, { - "epoch": 0.7075009209668736, + "epoch": 0.9768761248924016, "grad_norm": 0.0, - "learning_rate": 4.161569737381551e-06, - "loss": 0.9005, + "learning_rate": 2.8031969648834033e-08, + "loss": 0.9973, "step": 24967 }, { - "epoch": 0.7075292584091359, + "epoch": 0.976915251584631, "grad_norm": 0.0, - "learning_rate": 4.160824639553634e-06, - "loss": 0.8211, + "learning_rate": 2.7937231221031313e-08, + "loss": 1.0684, "step": 24968 }, { - "epoch": 0.7075575958513984, + "epoch": 0.9769543782768605, "grad_norm": 0.0, - "learning_rate": 4.160079590911257e-06, - "loss": 0.7914, + "learning_rate": 2.7842652932060786e-08, + "loss": 0.9927, "step": 24969 }, { - "epoch": 0.7075859332936609, + "epoch": 0.9769935049690899, "grad_norm": 0.0, - "learning_rate": 4.159334591460703e-06, - "loss": 0.7917, + "learning_rate": 2.774823478344124e-08, + "loss": 1.0079, "step": 24970 }, { - "epoch": 0.7076142707359234, + "epoch": 0.9770326316613194, "grad_norm": 0.0, - "learning_rate": 4.158589641208239e-06, - "loss": 0.9084, + "learning_rate": 2.765397677668924e-08, + "loss": 0.9887, "step": 24971 }, { - "epoch": 0.7076426081781858, + "epoch": 0.9770717583535488, "grad_norm": 0.0, - "learning_rate": 4.1578447401601455e-06, - "loss": 0.7555, + "learning_rate": 2.7559878913318018e-08, + "loss": 0.9496, "step": 24972 }, { - "epoch": 0.7076709456204483, + "epoch": 0.9771108850457783, "grad_norm": 0.0, - "learning_rate": 4.157099888322697e-06, - "loss": 0.8551, + "learning_rate": 2.7465941194839697e-08, + "loss": 0.9626, "step": 24973 }, { - "epoch": 0.7076992830627108, + "epoch": 0.9771500117380076, "grad_norm": 0.0, - "learning_rate": 4.156355085702162e-06, - "loss": 0.7333, + "learning_rate": 2.7372163622760852e-08, + "loss": 0.7837, "step": 24974 }, { - "epoch": 0.7077276205049732, + "epoch": 0.9771891384302371, "grad_norm": 0.0, - "learning_rate": 4.155610332304823e-06, - "loss": 0.8838, + "learning_rate": 2.7278546198590272e-08, + "loss": 0.8459, "step": 24975 }, { - "epoch": 0.7077559579472357, + "epoch": 0.9772282651224665, "grad_norm": 0.0, - "learning_rate": 4.154865628136942e-06, - "loss": 0.758, + "learning_rate": 2.7185088923828984e-08, + "loss": 1.0299, "step": 24976 }, { - "epoch": 0.7077842953894982, + "epoch": 0.977267391814696, "grad_norm": 0.0, - "learning_rate": 4.154120973204802e-06, - "loss": 0.8335, + "learning_rate": 2.7091791799978005e-08, + "loss": 0.8895, "step": 24977 }, { - "epoch": 0.7078126328317607, + "epoch": 0.9773065185069254, "grad_norm": 0.0, - "learning_rate": 4.1533763675146736e-06, - "loss": 0.7853, + "learning_rate": 2.699865482853614e-08, + "loss": 0.9478, "step": 24978 }, { - "epoch": 0.707840970274023, + "epoch": 0.9773456451991549, "grad_norm": 0.0, - "learning_rate": 4.152631811072822e-06, - "loss": 0.8702, + "learning_rate": 2.690567801099997e-08, + "loss": 0.9321, "step": 24979 }, { - "epoch": 0.7078693077162855, + "epoch": 0.9773847718913843, "grad_norm": 0.0, - "learning_rate": 4.151887303885527e-06, - "loss": 0.8813, + "learning_rate": 2.681286134886052e-08, + "loss": 1.0222, "step": 24980 }, { - "epoch": 0.707897645158548, + "epoch": 0.9774238985836138, "grad_norm": 0.0, - "learning_rate": 4.151142845959055e-06, - "loss": 0.8445, + "learning_rate": 2.672020484360993e-08, + "loss": 1.0127, "step": 24981 }, { - "epoch": 0.7079259826008104, + "epoch": 0.9774630252758432, "grad_norm": 0.0, - "learning_rate": 4.15039843729968e-06, - "loss": 0.8649, + "learning_rate": 2.6627708496735906e-08, + "loss": 1.0012, "step": 24982 }, { - "epoch": 0.7079543200430729, + "epoch": 0.9775021519680727, "grad_norm": 0.0, - "learning_rate": 4.149654077913674e-06, - "loss": 0.7882, + "learning_rate": 2.6535372309722807e-08, + "loss": 1.0135, "step": 24983 }, { - "epoch": 0.7079826574853354, + "epoch": 0.977541278660302, "grad_norm": 0.0, - "learning_rate": 4.1489097678073e-06, - "loss": 0.8065, + "learning_rate": 2.6443196284055007e-08, + "loss": 1.0175, "step": 24984 }, { - "epoch": 0.7080109949275978, + "epoch": 0.9775804053525315, "grad_norm": 0.0, - "learning_rate": 4.148165506986834e-06, - "loss": 0.7724, + "learning_rate": 2.635118042121132e-08, + "loss": 1.0232, "step": 24985 }, { - "epoch": 0.7080393323698603, + "epoch": 0.9776195320447609, "grad_norm": 0.0, - "learning_rate": 4.147421295458543e-06, - "loss": 0.7323, + "learning_rate": 2.6259324722670565e-08, + "loss": 0.9837, "step": 24986 }, { - "epoch": 0.7080676698121228, + "epoch": 0.9776586587369904, "grad_norm": 0.0, - "learning_rate": 4.146677133228695e-06, - "loss": 0.8705, + "learning_rate": 2.6167629189907117e-08, + "loss": 1.0153, "step": 24987 }, { - "epoch": 0.7080960072543853, + "epoch": 0.9776977854292198, "grad_norm": 0.0, - "learning_rate": 4.145933020303558e-06, - "loss": 0.8278, + "learning_rate": 2.6076093824393134e-08, + "loss": 0.8685, "step": 24988 }, { - "epoch": 0.7081243446966476, + "epoch": 0.9777369121214492, "grad_norm": 0.0, - "learning_rate": 4.145188956689405e-06, - "loss": 0.7708, + "learning_rate": 2.598471862759966e-08, + "loss": 1.0073, "step": 24989 }, { - "epoch": 0.7081526821389101, + "epoch": 0.9777760388136787, "grad_norm": 0.0, - "learning_rate": 4.144444942392496e-06, - "loss": 0.7783, + "learning_rate": 2.5893503600993296e-08, + "loss": 0.9479, "step": 24990 }, { - "epoch": 0.7081810195811726, + "epoch": 0.9778151655059081, "grad_norm": 0.0, - "learning_rate": 4.143700977419105e-06, - "loss": 0.8538, + "learning_rate": 2.5802448746038432e-08, + "loss": 1.0182, "step": 24991 }, { - "epoch": 0.708209357023435, + "epoch": 0.9778542921981376, "grad_norm": 0.0, - "learning_rate": 4.142957061775491e-06, - "loss": 0.853, + "learning_rate": 2.571155406419834e-08, + "loss": 0.8631, "step": 24992 }, { - "epoch": 0.7082376944656975, + "epoch": 0.977893418890367, "grad_norm": 0.0, - "learning_rate": 4.142213195467926e-06, - "loss": 0.848, + "learning_rate": 2.5620819556931852e-08, + "loss": 1.0001, "step": 24993 }, { - "epoch": 0.70826603190796, + "epoch": 0.9779325455825965, "grad_norm": 0.0, - "learning_rate": 4.141469378502674e-06, - "loss": 0.7613, + "learning_rate": 2.5530245225696693e-08, + "loss": 0.8804, "step": 24994 }, { - "epoch": 0.7082943693502225, + "epoch": 0.9779716722748258, "grad_norm": 0.0, - "learning_rate": 4.140725610886e-06, - "loss": 0.8904, + "learning_rate": 2.543983107194725e-08, + "loss": 0.8802, "step": 24995 }, { - "epoch": 0.7083227067924849, + "epoch": 0.9780107989670553, "grad_norm": 0.0, - "learning_rate": 4.139981892624172e-06, - "loss": 0.7244, + "learning_rate": 2.5349577097135703e-08, + "loss": 0.9431, "step": 24996 }, { - "epoch": 0.7083510442347474, + "epoch": 0.9780499256592847, "grad_norm": 0.0, - "learning_rate": 4.139238223723454e-06, - "loss": 0.7555, + "learning_rate": 2.5259483302709776e-08, + "loss": 0.9883, "step": 24997 }, { - "epoch": 0.7083793816770099, + "epoch": 0.9780890523515142, "grad_norm": 0.0, - "learning_rate": 4.138494604190106e-06, - "loss": 0.8929, + "learning_rate": 2.5169549690118312e-08, + "loss": 1.0005, "step": 24998 }, { - "epoch": 0.7084077191192723, + "epoch": 0.9781281790437436, "grad_norm": 0.0, - "learning_rate": 4.1377510340304e-06, - "loss": 0.9534, + "learning_rate": 2.50797762608046e-08, + "loss": 0.9396, "step": 24999 }, { - "epoch": 0.7084360565615347, + "epoch": 0.9781673057359731, "grad_norm": 0.0, - "learning_rate": 4.137007513250587e-06, - "loss": 0.8159, + "learning_rate": 2.499016301621082e-08, + "loss": 0.8438, "step": 25000 }, { - "epoch": 0.7084643940037972, + "epoch": 0.9782064324282025, "grad_norm": 0.0, - "learning_rate": 4.136264041856939e-06, - "loss": 0.8632, + "learning_rate": 2.4900709957774715e-08, + "loss": 0.9647, "step": 25001 }, { - "epoch": 0.7084927314460597, + "epoch": 0.978245559120432, "grad_norm": 0.0, - "learning_rate": 4.135520619855719e-06, - "loss": 0.848, + "learning_rate": 2.481141708693513e-08, + "loss": 0.9506, "step": 25002 }, { - "epoch": 0.7085210688883221, + "epoch": 0.9782846858126614, "grad_norm": 0.0, - "learning_rate": 4.134777247253184e-06, - "loss": 0.8098, + "learning_rate": 2.4722284405123142e-08, + "loss": 1.0718, "step": 25003 }, { - "epoch": 0.7085494063305846, + "epoch": 0.9783238125048909, "grad_norm": 0.0, - "learning_rate": 4.134033924055601e-06, - "loss": 0.8265, + "learning_rate": 2.4633311913772052e-08, + "loss": 0.9495, "step": 25004 }, { - "epoch": 0.7085777437728471, + "epoch": 0.9783629391971203, "grad_norm": 0.0, - "learning_rate": 4.133290650269226e-06, - "loss": 0.8097, + "learning_rate": 2.45444996143096e-08, + "loss": 0.9, "step": 25005 }, { - "epoch": 0.7086060812151095, + "epoch": 0.9784020658893497, "grad_norm": 0.0, - "learning_rate": 4.132547425900322e-06, - "loss": 0.8422, + "learning_rate": 2.4455847508163545e-08, + "loss": 0.9526, "step": 25006 }, { - "epoch": 0.708634418657372, + "epoch": 0.9784411925815791, "grad_norm": 0.0, - "learning_rate": 4.131804250955149e-06, - "loss": 0.7621, + "learning_rate": 2.436735559675607e-08, + "loss": 0.9257, "step": 25007 }, { - "epoch": 0.7086627560996345, + "epoch": 0.9784803192738086, "grad_norm": 0.0, - "learning_rate": 4.131061125439969e-06, - "loss": 0.7948, + "learning_rate": 2.427902388150827e-08, + "loss": 0.7621, "step": 25008 }, { - "epoch": 0.7086910935418969, + "epoch": 0.978519445966038, "grad_norm": 0.0, - "learning_rate": 4.130318049361039e-06, - "loss": 0.7708, + "learning_rate": 2.419085236384011e-08, + "loss": 0.966, "step": 25009 }, { - "epoch": 0.7087194309841593, + "epoch": 0.9785585726582675, "grad_norm": 0.0, - "learning_rate": 4.1295750227246245e-06, - "loss": 0.96, + "learning_rate": 2.4102841045166027e-08, + "loss": 0.8676, "step": 25010 }, { - "epoch": 0.7087477684264218, + "epoch": 0.9785976993504969, "grad_norm": 0.0, - "learning_rate": 4.128832045536976e-06, - "loss": 0.762, + "learning_rate": 2.4014989926899323e-08, + "loss": 1.0123, "step": 25011 }, { - "epoch": 0.7087761058686843, + "epoch": 0.9786368260427264, "grad_norm": 0.0, - "learning_rate": 4.128089117804359e-06, - "loss": 0.6761, + "learning_rate": 2.39272990104511e-08, + "loss": 0.9865, "step": 25012 }, { - "epoch": 0.7088044433109467, + "epoch": 0.9786759527349558, "grad_norm": 0.0, - "learning_rate": 4.127346239533023e-06, - "loss": 0.7932, + "learning_rate": 2.3839768297230226e-08, + "loss": 0.9601, "step": 25013 }, { - "epoch": 0.7088327807532092, + "epoch": 0.9787150794271853, "grad_norm": 0.0, - "learning_rate": 4.126603410729232e-06, - "loss": 0.861, + "learning_rate": 2.3752397788642245e-08, + "loss": 0.9891, "step": 25014 }, { - "epoch": 0.7088611181954717, + "epoch": 0.9787542061194147, "grad_norm": 0.0, - "learning_rate": 4.1258606313992445e-06, - "loss": 0.959, + "learning_rate": 2.366518748608937e-08, + "loss": 0.968, "step": 25015 }, { - "epoch": 0.7088894556377341, + "epoch": 0.9787933328116442, "grad_norm": 0.0, - "learning_rate": 4.12511790154931e-06, - "loss": 0.7225, + "learning_rate": 2.35781373909727e-08, + "loss": 0.9463, "step": 25016 }, { - "epoch": 0.7089177930799966, + "epoch": 0.9788324595038735, "grad_norm": 0.0, - "learning_rate": 4.1243752211856904e-06, - "loss": 0.7553, + "learning_rate": 2.3491247504690008e-08, + "loss": 0.9601, "step": 25017 }, { - "epoch": 0.7089461305222591, + "epoch": 0.9788715861961029, "grad_norm": 0.0, - "learning_rate": 4.123632590314643e-06, - "loss": 0.8143, + "learning_rate": 2.3404517828636843e-08, + "loss": 0.8114, "step": 25018 }, { - "epoch": 0.7089744679645216, + "epoch": 0.9789107128883324, "grad_norm": 0.0, - "learning_rate": 4.122890008942417e-06, - "loss": 0.8595, + "learning_rate": 2.3317948364205424e-08, + "loss": 0.8564, "step": 25019 }, { - "epoch": 0.709002805406784, + "epoch": 0.9789498395805618, "grad_norm": 0.0, - "learning_rate": 4.12214747707527e-06, - "loss": 0.7589, + "learning_rate": 2.323153911278575e-08, + "loss": 0.9978, "step": 25020 }, { - "epoch": 0.7090311428490464, + "epoch": 0.9789889662727913, "grad_norm": 0.0, - "learning_rate": 4.121404994719458e-06, - "loss": 0.9237, + "learning_rate": 2.3145290075766713e-08, + "loss": 1.0533, "step": 25021 }, { - "epoch": 0.7090594802913089, + "epoch": 0.9790280929650207, "grad_norm": 0.0, - "learning_rate": 4.120662561881235e-06, - "loss": 0.7935, + "learning_rate": 2.305920125453165e-08, + "loss": 0.9923, "step": 25022 }, { - "epoch": 0.7090878177335713, + "epoch": 0.9790672196572502, "grad_norm": 0.0, - "learning_rate": 4.119920178566858e-06, - "loss": 0.8662, + "learning_rate": 2.2973272650465006e-08, + "loss": 0.912, "step": 25023 }, { - "epoch": 0.7091161551758338, + "epoch": 0.9791063463494796, "grad_norm": 0.0, - "learning_rate": 4.119177844782573e-06, - "loss": 0.8229, + "learning_rate": 2.2887504264945682e-08, + "loss": 0.9853, "step": 25024 }, { - "epoch": 0.7091444926180963, + "epoch": 0.9791454730417091, "grad_norm": 0.0, - "learning_rate": 4.118435560534636e-06, - "loss": 0.8568, + "learning_rate": 2.2801896099350352e-08, + "loss": 0.9438, "step": 25025 }, { - "epoch": 0.7091728300603588, + "epoch": 0.9791845997339385, "grad_norm": 0.0, - "learning_rate": 4.117693325829305e-06, - "loss": 0.7318, + "learning_rate": 2.2716448155054583e-08, + "loss": 0.9293, "step": 25026 }, { - "epoch": 0.7092011675026212, + "epoch": 0.979223726426168, "grad_norm": 0.0, - "learning_rate": 4.116951140672824e-06, - "loss": 0.7428, + "learning_rate": 2.263116043343172e-08, + "loss": 0.873, "step": 25027 }, { - "epoch": 0.7092295049448837, + "epoch": 0.9792628531183973, "grad_norm": 0.0, - "learning_rate": 4.116209005071451e-06, - "loss": 0.736, + "learning_rate": 2.254603293584845e-08, + "loss": 1.0212, "step": 25028 }, { - "epoch": 0.7092578423871462, + "epoch": 0.9793019798106268, "grad_norm": 0.0, - "learning_rate": 4.1154669190314315e-06, - "loss": 0.7632, + "learning_rate": 2.2461065663673676e-08, + "loss": 1.0181, "step": 25029 }, { - "epoch": 0.7092861798294086, + "epoch": 0.9793411065028562, "grad_norm": 0.0, - "learning_rate": 4.114724882559019e-06, - "loss": 0.7636, + "learning_rate": 2.237625861827186e-08, + "loss": 0.8589, "step": 25030 }, { - "epoch": 0.709314517271671, + "epoch": 0.9793802331950857, "grad_norm": 0.0, - "learning_rate": 4.113982895660467e-06, - "loss": 0.7917, + "learning_rate": 2.2291611801005253e-08, + "loss": 0.9352, "step": 25031 }, { - "epoch": 0.7093428547139335, + "epoch": 0.9794193598873151, "grad_norm": 0.0, - "learning_rate": 4.113240958342022e-06, - "loss": 0.7877, + "learning_rate": 2.220712521323165e-08, + "loss": 0.9957, "step": 25032 }, { - "epoch": 0.7093711921561959, + "epoch": 0.9794584865795446, "grad_norm": 0.0, - "learning_rate": 4.112499070609932e-06, - "loss": 0.8065, + "learning_rate": 2.2122798856309967e-08, + "loss": 1.0684, "step": 25033 }, { - "epoch": 0.7093995295984584, + "epoch": 0.979497613271774, "grad_norm": 0.0, - "learning_rate": 4.111757232470449e-06, - "loss": 0.8821, + "learning_rate": 2.2038632731592456e-08, + "loss": 0.9655, "step": 25034 }, { - "epoch": 0.7094278670407209, + "epoch": 0.9795367399640035, "grad_norm": 0.0, - "learning_rate": 4.1110154439298214e-06, - "loss": 0.8117, + "learning_rate": 2.195462684043137e-08, + "loss": 0.898, "step": 25035 }, { - "epoch": 0.7094562044829834, + "epoch": 0.9795758666562329, "grad_norm": 0.0, - "learning_rate": 4.110273704994301e-06, - "loss": 0.8641, + "learning_rate": 2.1870781184175625e-08, + "loss": 1.1117, "step": 25036 }, { - "epoch": 0.7094845419252458, + "epoch": 0.9796149933484624, "grad_norm": 0.0, - "learning_rate": 4.109532015670129e-06, - "loss": 0.7982, + "learning_rate": 2.1787095764171928e-08, + "loss": 0.9159, "step": 25037 }, { - "epoch": 0.7095128793675083, + "epoch": 0.9796541200406917, "grad_norm": 0.0, - "learning_rate": 4.108790375963556e-06, - "loss": 0.8281, + "learning_rate": 2.1703570581763644e-08, + "loss": 0.9619, "step": 25038 }, { - "epoch": 0.7095412168097708, + "epoch": 0.9796932467329212, "grad_norm": 0.0, - "learning_rate": 4.1080487858808335e-06, - "loss": 0.7965, + "learning_rate": 2.1620205638293033e-08, + "loss": 0.9063, "step": 25039 }, { - "epoch": 0.7095695542520332, + "epoch": 0.9797323734251506, "grad_norm": 0.0, - "learning_rate": 4.1073072454282e-06, - "loss": 0.8551, + "learning_rate": 2.153700093509903e-08, + "loss": 0.9005, "step": 25040 }, { - "epoch": 0.7095978916942957, + "epoch": 0.9797715001173801, "grad_norm": 0.0, - "learning_rate": 4.10656575461191e-06, - "loss": 0.8454, + "learning_rate": 2.1453956473516114e-08, + "loss": 1.0088, "step": 25041 }, { - "epoch": 0.7096262291365582, + "epoch": 0.9798106268096095, "grad_norm": 0.0, - "learning_rate": 4.1058243134382e-06, - "loss": 0.7649, + "learning_rate": 2.1371072254878778e-08, + "loss": 0.9791, "step": 25042 }, { - "epoch": 0.7096545665788206, + "epoch": 0.979849753501839, "grad_norm": 0.0, - "learning_rate": 4.105082921913322e-06, - "loss": 0.7878, + "learning_rate": 2.1288348280518178e-08, + "loss": 0.7656, "step": 25043 }, { - "epoch": 0.709682904021083, + "epoch": 0.9798888801940684, "grad_norm": 0.0, - "learning_rate": 4.104341580043518e-06, - "loss": 0.7456, + "learning_rate": 2.1205784551763253e-08, + "loss": 1.0596, "step": 25044 }, { - "epoch": 0.7097112414633455, + "epoch": 0.9799280068862979, "grad_norm": 0.0, - "learning_rate": 4.1036002878350354e-06, - "loss": 0.7373, + "learning_rate": 2.1123381069938497e-08, + "loss": 0.9298, "step": 25045 }, { - "epoch": 0.709739578905608, + "epoch": 0.9799671335785273, "grad_norm": 0.0, - "learning_rate": 4.102859045294121e-06, - "loss": 0.78, + "learning_rate": 2.104113783636841e-08, + "loss": 0.9487, "step": 25046 }, { - "epoch": 0.7097679163478704, + "epoch": 0.9800062602707567, "grad_norm": 0.0, - "learning_rate": 4.10211785242701e-06, - "loss": 0.8562, + "learning_rate": 2.095905485237415e-08, + "loss": 0.8761, "step": 25047 }, { - "epoch": 0.7097962537901329, + "epoch": 0.9800453869629862, "grad_norm": 0.0, - "learning_rate": 4.101376709239951e-06, - "loss": 0.889, + "learning_rate": 2.0877132119271336e-08, + "loss": 1.0067, "step": 25048 }, { - "epoch": 0.7098245912323954, + "epoch": 0.9800845136552155, "grad_norm": 0.0, - "learning_rate": 4.10063561573919e-06, - "loss": 0.8229, + "learning_rate": 2.0795369638378915e-08, + "loss": 0.8296, "step": 25049 }, { - "epoch": 0.7098529286746579, + "epoch": 0.980123640347445, "grad_norm": 0.0, - "learning_rate": 4.099894571930962e-06, - "loss": 0.8992, + "learning_rate": 2.0713767411006946e-08, + "loss": 1.017, "step": 25050 }, { - "epoch": 0.7098812661169203, + "epoch": 0.9801627670396744, "grad_norm": 0.0, - "learning_rate": 4.0991535778215145e-06, - "loss": 0.7535, + "learning_rate": 2.0632325438467714e-08, + "loss": 0.9494, "step": 25051 }, { - "epoch": 0.7099096035591828, + "epoch": 0.9802018937319039, "grad_norm": 0.0, - "learning_rate": 4.098412633417089e-06, - "loss": 0.7981, + "learning_rate": 2.0551043722067953e-08, + "loss": 0.9113, "step": 25052 }, { - "epoch": 0.7099379410014452, + "epoch": 0.9802410204241333, "grad_norm": 0.0, - "learning_rate": 4.097671738723923e-06, - "loss": 0.8545, + "learning_rate": 2.0469922263114396e-08, + "loss": 1.0266, "step": 25053 }, { - "epoch": 0.7099662784437076, + "epoch": 0.9802801471163628, "grad_norm": 0.0, - "learning_rate": 4.096930893748264e-06, - "loss": 0.7875, + "learning_rate": 2.038896106290822e-08, + "loss": 0.9891, "step": 25054 }, { - "epoch": 0.7099946158859701, + "epoch": 0.9803192738085922, "grad_norm": 0.0, - "learning_rate": 4.096190098496343e-06, - "loss": 0.6873, + "learning_rate": 2.030816012275061e-08, + "loss": 0.937, "step": 25055 }, { - "epoch": 0.7100229533282326, + "epoch": 0.9803584005008217, "grad_norm": 0.0, - "learning_rate": 4.095449352974406e-06, - "loss": 0.8641, + "learning_rate": 2.0227519443938305e-08, + "loss": 0.9979, "step": 25056 }, { - "epoch": 0.710051290770495, + "epoch": 0.9803975271930511, "grad_norm": 0.0, - "learning_rate": 4.094708657188692e-06, - "loss": 0.7929, + "learning_rate": 2.0147039027766935e-08, + "loss": 0.8228, "step": 25057 }, { - "epoch": 0.7100796282127575, + "epoch": 0.9804366538852806, "grad_norm": 0.0, - "learning_rate": 4.093968011145438e-06, - "loss": 0.7877, + "learning_rate": 2.006671887552769e-08, + "loss": 0.9192, "step": 25058 }, { - "epoch": 0.71010796565502, + "epoch": 0.98047578057751, "grad_norm": 0.0, - "learning_rate": 4.093227414850887e-06, - "loss": 0.8027, + "learning_rate": 1.998655898851287e-08, + "loss": 0.9266, "step": 25059 }, { - "epoch": 0.7101363030972825, + "epoch": 0.9805149072697394, "grad_norm": 0.0, - "learning_rate": 4.092486868311277e-06, - "loss": 0.71, + "learning_rate": 1.9906559368007006e-08, + "loss": 0.7606, "step": 25060 }, { - "epoch": 0.7101646405395449, + "epoch": 0.9805540339619688, "grad_norm": 0.0, - "learning_rate": 4.091746371532841e-06, - "loss": 0.8219, + "learning_rate": 1.9826720015296842e-08, + "loss": 0.9813, "step": 25061 }, { - "epoch": 0.7101929779818074, + "epoch": 0.9805931606541983, "grad_norm": 0.0, - "learning_rate": 4.0910059245218225e-06, - "loss": 0.8658, + "learning_rate": 1.9747040931663573e-08, + "loss": 0.9668, "step": 25062 }, { - "epoch": 0.7102213154240699, + "epoch": 0.9806322873464277, "grad_norm": 0.0, - "learning_rate": 4.090265527284452e-06, - "loss": 0.9857, + "learning_rate": 1.9667522118387295e-08, + "loss": 0.953, "step": 25063 }, { - "epoch": 0.7102496528663322, + "epoch": 0.9806714140386572, "grad_norm": 0.0, - "learning_rate": 4.089525179826969e-06, - "loss": 0.8752, + "learning_rate": 1.9588163576743647e-08, + "loss": 0.9724, "step": 25064 }, { - "epoch": 0.7102779903085947, + "epoch": 0.9807105407308866, "grad_norm": 0.0, - "learning_rate": 4.088784882155614e-06, - "loss": 0.858, + "learning_rate": 1.9508965308008276e-08, + "loss": 0.959, "step": 25065 }, { - "epoch": 0.7103063277508572, + "epoch": 0.9807496674231161, "grad_norm": 0.0, - "learning_rate": 4.088044634276616e-06, - "loss": 0.7544, + "learning_rate": 1.942992731345239e-08, + "loss": 0.8081, "step": 25066 }, { - "epoch": 0.7103346651931197, + "epoch": 0.9807887941153455, "grad_norm": 0.0, - "learning_rate": 4.087304436196212e-06, - "loss": 0.7622, + "learning_rate": 1.9351049594346083e-08, + "loss": 0.8514, "step": 25067 }, { - "epoch": 0.7103630026353821, + "epoch": 0.980827920807575, "grad_norm": 0.0, - "learning_rate": 4.086564287920643e-06, - "loss": 0.8228, + "learning_rate": 1.927233215195501e-08, + "loss": 0.8455, "step": 25068 }, { - "epoch": 0.7103913400776446, + "epoch": 0.9808670474998044, "grad_norm": 0.0, - "learning_rate": 4.085824189456136e-06, - "loss": 0.9228, + "learning_rate": 1.919377498754482e-08, + "loss": 0.9047, "step": 25069 }, { - "epoch": 0.7104196775199071, + "epoch": 0.9809061741920339, "grad_norm": 0.0, - "learning_rate": 4.085084140808927e-06, - "loss": 0.7871, + "learning_rate": 1.9115378102374517e-08, + "loss": 0.9184, "step": 25070 }, { - "epoch": 0.7104480149621695, + "epoch": 0.9809453008842632, "grad_norm": 0.0, - "learning_rate": 4.08434414198525e-06, - "loss": 0.7722, + "learning_rate": 1.9037141497705302e-08, + "loss": 0.9105, "step": 25071 }, { - "epoch": 0.710476352404432, + "epoch": 0.9809844275764927, "grad_norm": 0.0, - "learning_rate": 4.08360419299134e-06, - "loss": 0.913, + "learning_rate": 1.8959065174792844e-08, + "loss": 0.9628, "step": 25072 }, { - "epoch": 0.7105046898466945, + "epoch": 0.9810235542687221, "grad_norm": 0.0, - "learning_rate": 4.082864293833433e-06, - "loss": 0.9001, + "learning_rate": 1.8881149134889477e-08, + "loss": 0.8631, "step": 25073 }, { - "epoch": 0.710533027288957, + "epoch": 0.9810626809609516, "grad_norm": 0.0, - "learning_rate": 4.0821244445177535e-06, - "loss": 0.8034, + "learning_rate": 1.8803393379248635e-08, + "loss": 0.8385, "step": 25074 }, { - "epoch": 0.7105613647312193, + "epoch": 0.981101807653181, "grad_norm": 0.0, - "learning_rate": 4.081384645050537e-06, - "loss": 0.8115, + "learning_rate": 1.8725797909119324e-08, + "loss": 0.8446, "step": 25075 }, { - "epoch": 0.7105897021734818, + "epoch": 0.9811409343454104, "grad_norm": 0.0, - "learning_rate": 4.080644895438019e-06, - "loss": 0.8838, + "learning_rate": 1.8648362725744995e-08, + "loss": 0.9757, "step": 25076 }, { - "epoch": 0.7106180396157443, + "epoch": 0.9811800610376399, "grad_norm": 0.0, - "learning_rate": 4.079905195686424e-06, - "loss": 0.8216, + "learning_rate": 1.8571087830371315e-08, + "loss": 0.959, "step": 25077 }, { - "epoch": 0.7106463770580067, + "epoch": 0.9812191877298693, "grad_norm": 0.0, - "learning_rate": 4.079165545801988e-06, - "loss": 0.9032, + "learning_rate": 1.8493973224238405e-08, + "loss": 0.941, "step": 25078 }, { - "epoch": 0.7106747145002692, + "epoch": 0.9812583144220988, "grad_norm": 0.0, - "learning_rate": 4.078425945790937e-06, - "loss": 0.7825, + "learning_rate": 1.8417018908584163e-08, + "loss": 0.918, "step": 25079 }, { - "epoch": 0.7107030519425317, + "epoch": 0.9812974411143282, "grad_norm": 0.0, - "learning_rate": 4.077686395659504e-06, - "loss": 0.7755, + "learning_rate": 1.8340224884646484e-08, + "loss": 1.0732, "step": 25080 }, { - "epoch": 0.7107313893847941, + "epoch": 0.9813365678065576, "grad_norm": 0.0, - "learning_rate": 4.076946895413919e-06, - "loss": 0.8889, + "learning_rate": 1.82635911536555e-08, + "loss": 0.9844, "step": 25081 }, { - "epoch": 0.7107597268270566, + "epoch": 0.981375694498787, "grad_norm": 0.0, - "learning_rate": 4.0762074450604085e-06, - "loss": 0.8291, + "learning_rate": 1.8187117716844672e-08, + "loss": 0.8883, "step": 25082 }, { - "epoch": 0.7107880642693191, + "epoch": 0.9814148211910165, "grad_norm": 0.0, - "learning_rate": 4.075468044605201e-06, - "loss": 0.8774, + "learning_rate": 1.8110804575439677e-08, + "loss": 1.0151, "step": 25083 }, { - "epoch": 0.7108164017115816, + "epoch": 0.9814539478832459, "grad_norm": 0.0, - "learning_rate": 4.074728694054526e-06, - "loss": 0.8293, + "learning_rate": 1.803465173066732e-08, + "loss": 0.9949, "step": 25084 }, { - "epoch": 0.7108447391538439, + "epoch": 0.9814930745754754, "grad_norm": 0.0, - "learning_rate": 4.073989393414613e-06, - "loss": 0.8308, + "learning_rate": 1.795865918375106e-08, + "loss": 1.0101, "step": 25085 }, { - "epoch": 0.7108730765961064, + "epoch": 0.9815322012677048, "grad_norm": 0.0, - "learning_rate": 4.073250142691691e-06, - "loss": 0.7718, + "learning_rate": 1.7882826935909924e-08, + "loss": 1.0247, "step": 25086 }, { - "epoch": 0.7109014140383689, + "epoch": 0.9815713279599343, "grad_norm": 0.0, - "learning_rate": 4.072510941891978e-06, - "loss": 0.8297, + "learning_rate": 1.780715498836294e-08, + "loss": 0.868, "step": 25087 }, { - "epoch": 0.7109297514806313, + "epoch": 0.9816104546521637, "grad_norm": 0.0, - "learning_rate": 4.071771791021709e-06, - "loss": 0.7753, + "learning_rate": 1.773164334232358e-08, + "loss": 0.8685, "step": 25088 }, { - "epoch": 0.7109580889228938, + "epoch": 0.9816495813443932, "grad_norm": 0.0, - "learning_rate": 4.071032690087111e-06, - "loss": 0.9356, + "learning_rate": 1.7656291999006424e-08, + "loss": 1.1204, "step": 25089 }, { - "epoch": 0.7109864263651563, + "epoch": 0.9816887080366226, "grad_norm": 0.0, - "learning_rate": 4.070293639094401e-06, - "loss": 0.7845, + "learning_rate": 1.7581100959620513e-08, + "loss": 0.8547, "step": 25090 }, { - "epoch": 0.7110147638074188, + "epoch": 0.9817278347288521, "grad_norm": 0.0, - "learning_rate": 4.069554638049815e-06, - "loss": 0.7407, + "learning_rate": 1.7506070225373763e-08, + "loss": 0.9001, "step": 25091 }, { - "epoch": 0.7110431012496812, + "epoch": 0.9817669614210814, "grad_norm": 0.0, - "learning_rate": 4.068815686959568e-06, - "loss": 0.9319, + "learning_rate": 1.7431199797469654e-08, + "loss": 1.0395, "step": 25092 }, { - "epoch": 0.7110714386919437, + "epoch": 0.9818060881133109, "grad_norm": 0.0, - "learning_rate": 4.06807678582989e-06, - "loss": 0.8133, + "learning_rate": 1.7356489677112788e-08, + "loss": 0.9382, "step": 25093 }, { - "epoch": 0.7110997761342062, + "epoch": 0.9818452148055403, "grad_norm": 0.0, - "learning_rate": 4.067337934667007e-06, - "loss": 0.825, + "learning_rate": 1.7281939865501084e-08, + "loss": 0.9655, "step": 25094 }, { - "epoch": 0.7111281135764685, + "epoch": 0.9818843414977698, "grad_norm": 0.0, - "learning_rate": 4.066599133477136e-06, - "loss": 0.7741, + "learning_rate": 1.720755036383248e-08, + "loss": 0.9609, "step": 25095 }, { - "epoch": 0.711156451018731, + "epoch": 0.9819234681899992, "grad_norm": 0.0, - "learning_rate": 4.065860382266504e-06, - "loss": 0.9644, + "learning_rate": 1.7133321173301576e-08, + "loss": 1.0184, "step": 25096 }, { - "epoch": 0.7111847884609935, + "epoch": 0.9819625948822287, "grad_norm": 0.0, - "learning_rate": 4.065121681041333e-06, - "loss": 0.8818, + "learning_rate": 1.7059252295100748e-08, + "loss": 1.0668, "step": 25097 }, { - "epoch": 0.711213125903256, + "epoch": 0.9820017215744581, "grad_norm": 0.0, - "learning_rate": 4.0643830298078456e-06, - "loss": 0.8067, + "learning_rate": 1.6985343730417937e-08, + "loss": 0.9811, "step": 25098 }, { - "epoch": 0.7112414633455184, + "epoch": 0.9820408482666876, "grad_norm": 0.0, - "learning_rate": 4.063644428572268e-06, - "loss": 0.9167, + "learning_rate": 1.691159548044219e-08, + "loss": 0.9842, "step": 25099 }, { - "epoch": 0.7112698007877809, + "epoch": 0.982079974958917, "grad_norm": 0.0, - "learning_rate": 4.062905877340816e-06, - "loss": 0.7546, + "learning_rate": 1.6838007546355895e-08, + "loss": 0.8777, "step": 25100 }, { - "epoch": 0.7112981382300434, + "epoch": 0.9821191016511465, "grad_norm": 0.0, - "learning_rate": 4.06216737611971e-06, - "loss": 0.9047, + "learning_rate": 1.676457992934255e-08, + "loss": 0.9727, "step": 25101 }, { - "epoch": 0.7113264756723058, + "epoch": 0.9821582283433758, "grad_norm": 0.0, - "learning_rate": 4.0614289249151785e-06, - "loss": 0.7573, + "learning_rate": 1.6691312630578992e-08, + "loss": 1.022, "step": 25102 }, { - "epoch": 0.7113548131145683, + "epoch": 0.9821973550356052, "grad_norm": 0.0, - "learning_rate": 4.060690523733432e-06, - "loss": 0.9403, + "learning_rate": 1.661820565124428e-08, + "loss": 0.915, "step": 25103 }, { - "epoch": 0.7113831505568308, + "epoch": 0.9822364817278347, "grad_norm": 0.0, - "learning_rate": 4.059952172580694e-06, - "loss": 0.8961, + "learning_rate": 1.6545258992510803e-08, + "loss": 0.8058, "step": 25104 }, { - "epoch": 0.7114114879990932, + "epoch": 0.9822756084200641, "grad_norm": 0.0, - "learning_rate": 4.059213871463189e-06, - "loss": 0.7896, + "learning_rate": 1.6472472655550964e-08, + "loss": 1.0163, "step": 25105 }, { - "epoch": 0.7114398254413556, + "epoch": 0.9823147351122936, "grad_norm": 0.0, - "learning_rate": 4.058475620387129e-06, - "loss": 0.875, + "learning_rate": 1.6399846641532714e-08, + "loss": 0.8256, "step": 25106 }, { - "epoch": 0.7114681628836181, + "epoch": 0.982353861804523, "grad_norm": 0.0, - "learning_rate": 4.057737419358737e-06, - "loss": 0.8067, + "learning_rate": 1.6327380951622895e-08, + "loss": 1.0329, "step": 25107 }, { - "epoch": 0.7114965003258806, + "epoch": 0.9823929884967525, "grad_norm": 0.0, - "learning_rate": 4.056999268384227e-06, - "loss": 0.7674, + "learning_rate": 1.6255075586985024e-08, + "loss": 0.9308, "step": 25108 }, { - "epoch": 0.711524837768143, + "epoch": 0.9824321151889819, "grad_norm": 0.0, - "learning_rate": 4.0562611674698186e-06, - "loss": 0.8129, + "learning_rate": 1.6182930548780395e-08, + "loss": 0.9229, "step": 25109 }, { - "epoch": 0.7115531752104055, + "epoch": 0.9824712418812114, "grad_norm": 0.0, - "learning_rate": 4.055523116621729e-06, - "loss": 0.8802, + "learning_rate": 1.611094583816808e-08, + "loss": 0.9373, "step": 25110 }, { - "epoch": 0.711581512652668, + "epoch": 0.9825103685734408, "grad_norm": 0.0, - "learning_rate": 4.054785115846176e-06, - "loss": 0.8515, + "learning_rate": 1.6039121456302708e-08, + "loss": 0.9251, "step": 25111 }, { - "epoch": 0.7116098500949304, + "epoch": 0.9825494952656703, "grad_norm": 0.0, - "learning_rate": 4.05404716514938e-06, - "loss": 0.7795, + "learning_rate": 1.5967457404337805e-08, + "loss": 1.0459, "step": 25112 }, { - "epoch": 0.7116381875371929, + "epoch": 0.9825886219578996, "grad_norm": 0.0, - "learning_rate": 4.053309264537549e-06, - "loss": 0.8111, + "learning_rate": 1.589595368342578e-08, + "loss": 1.0492, "step": 25113 }, { - "epoch": 0.7116665249794554, + "epoch": 0.9826277486501291, "grad_norm": 0.0, - "learning_rate": 4.0525714140169015e-06, - "loss": 0.7897, + "learning_rate": 1.5824610294714605e-08, + "loss": 0.908, "step": 25114 }, { - "epoch": 0.7116948624217179, + "epoch": 0.9826668753423585, "grad_norm": 0.0, - "learning_rate": 4.051833613593657e-06, - "loss": 0.8812, + "learning_rate": 1.5753427239347807e-08, + "loss": 1.0454, "step": 25115 }, { - "epoch": 0.7117231998639802, + "epoch": 0.982706002034588, "grad_norm": 0.0, - "learning_rate": 4.051095863274024e-06, - "loss": 0.8329, + "learning_rate": 1.568240451847114e-08, + "loss": 0.8614, "step": 25116 }, { - "epoch": 0.7117515373062427, + "epoch": 0.9827451287268174, "grad_norm": 0.0, - "learning_rate": 4.050358163064219e-06, - "loss": 0.7572, + "learning_rate": 1.561154213322369e-08, + "loss": 1.0404, "step": 25117 }, { - "epoch": 0.7117798747485052, + "epoch": 0.9827842554190469, "grad_norm": 0.0, - "learning_rate": 4.04962051297046e-06, - "loss": 0.8323, + "learning_rate": 1.5540840084743435e-08, + "loss": 0.8182, "step": 25118 }, { - "epoch": 0.7118082121907676, + "epoch": 0.9828233821112763, "grad_norm": 0.0, - "learning_rate": 4.0488829129989536e-06, - "loss": 0.7494, + "learning_rate": 1.547029837416614e-08, + "loss": 0.8887, "step": 25119 }, { - "epoch": 0.7118365496330301, + "epoch": 0.9828625088035058, "grad_norm": 0.0, - "learning_rate": 4.048145363155921e-06, - "loss": 0.755, + "learning_rate": 1.539991700262422e-08, + "loss": 1.031, "step": 25120 }, { - "epoch": 0.7118648870752926, + "epoch": 0.9829016354957352, "grad_norm": 0.0, - "learning_rate": 4.047407863447565e-06, - "loss": 0.812, + "learning_rate": 1.5329695971249003e-08, + "loss": 0.8049, "step": 25121 }, { - "epoch": 0.711893224517555, + "epoch": 0.9829407621879647, "grad_norm": 0.0, - "learning_rate": 4.0466704138801035e-06, - "loss": 0.7305, + "learning_rate": 1.525963528116736e-08, + "loss": 0.977, "step": 25122 }, { - "epoch": 0.7119215619598175, + "epoch": 0.982979888880194, "grad_norm": 0.0, - "learning_rate": 4.0459330144597485e-06, - "loss": 0.9158, + "learning_rate": 1.5189734933503954e-08, + "loss": 0.9941, "step": 25123 }, { - "epoch": 0.71194989940208, + "epoch": 0.9830190155724235, "grad_norm": 0.0, - "learning_rate": 4.045195665192711e-06, - "loss": 0.7751, + "learning_rate": 1.511999492938121e-08, + "loss": 0.9906, "step": 25124 }, { - "epoch": 0.7119782368443425, + "epoch": 0.9830581422646529, "grad_norm": 0.0, - "learning_rate": 4.0444583660852e-06, - "loss": 0.7676, + "learning_rate": 1.505041526992046e-08, + "loss": 0.8321, "step": 25125 }, { - "epoch": 0.7120065742866049, + "epoch": 0.9830972689568824, "grad_norm": 0.0, - "learning_rate": 4.043721117143432e-06, - "loss": 0.8278, + "learning_rate": 1.4980995956238588e-08, + "loss": 1.0897, "step": 25126 }, { - "epoch": 0.7120349117288673, + "epoch": 0.9831363956491118, "grad_norm": 0.0, - "learning_rate": 4.04298391837361e-06, - "loss": 0.8934, + "learning_rate": 1.4911736989449143e-08, + "loss": 0.8412, "step": 25127 }, { - "epoch": 0.7120632491711298, + "epoch": 0.9831755223413413, "grad_norm": 0.0, - "learning_rate": 4.042246769781949e-06, - "loss": 0.8303, + "learning_rate": 1.4842638370664575e-08, + "loss": 0.9904, "step": 25128 }, { - "epoch": 0.7120915866133922, + "epoch": 0.9832146490335707, "grad_norm": 0.0, - "learning_rate": 4.041509671374653e-06, - "loss": 0.8898, + "learning_rate": 1.4773700100996214e-08, + "loss": 0.8316, "step": 25129 }, { - "epoch": 0.7121199240556547, + "epoch": 0.9832537757258002, "grad_norm": 0.0, - "learning_rate": 4.040772623157933e-06, - "loss": 0.8108, + "learning_rate": 1.470492218154984e-08, + "loss": 0.9759, "step": 25130 }, { - "epoch": 0.7121482614979172, + "epoch": 0.9832929024180296, "grad_norm": 0.0, - "learning_rate": 4.040035625138002e-06, - "loss": 0.8242, + "learning_rate": 1.463630461343013e-08, + "loss": 0.9777, "step": 25131 }, { - "epoch": 0.7121765989401797, + "epoch": 0.983332029110259, "grad_norm": 0.0, - "learning_rate": 4.039298677321062e-06, - "loss": 0.808, + "learning_rate": 1.4567847397739532e-08, + "loss": 1.135, "step": 25132 }, { - "epoch": 0.7122049363824421, + "epoch": 0.9833711558024885, "grad_norm": 0.0, - "learning_rate": 4.0385617797133205e-06, - "loss": 0.8903, + "learning_rate": 1.4499550535576057e-08, + "loss": 0.9434, "step": 25133 }, { - "epoch": 0.7122332738247046, + "epoch": 0.9834102824947178, "grad_norm": 0.0, - "learning_rate": 4.0378249323209915e-06, - "loss": 0.8704, + "learning_rate": 1.4431414028037715e-08, + "loss": 0.9412, "step": 25134 }, { - "epoch": 0.7122616112669671, + "epoch": 0.9834494091869473, "grad_norm": 0.0, - "learning_rate": 4.0370881351502735e-06, - "loss": 0.7846, + "learning_rate": 1.4363437876218078e-08, + "loss": 1.0269, "step": 25135 }, { - "epoch": 0.7122899487092295, + "epoch": 0.9834885358791767, "grad_norm": 0.0, - "learning_rate": 4.036351388207376e-06, - "loss": 0.8854, + "learning_rate": 1.4295622081208493e-08, + "loss": 0.9069, "step": 25136 }, { - "epoch": 0.712318286151492, + "epoch": 0.9835276625714062, "grad_norm": 0.0, - "learning_rate": 4.035614691498505e-06, - "loss": 0.7664, + "learning_rate": 1.42279666440992e-08, + "loss": 1.0283, "step": 25137 }, { - "epoch": 0.7123466235937544, + "epoch": 0.9835667892636356, "grad_norm": 0.0, - "learning_rate": 4.034878045029867e-06, - "loss": 0.7613, + "learning_rate": 1.4160471565974887e-08, + "loss": 0.9736, "step": 25138 }, { - "epoch": 0.7123749610360169, + "epoch": 0.9836059159558651, "grad_norm": 0.0, - "learning_rate": 4.03414144880767e-06, - "loss": 0.9572, + "learning_rate": 1.4093136847921352e-08, + "loss": 0.9291, "step": 25139 }, { - "epoch": 0.7124032984782793, + "epoch": 0.9836450426480945, "grad_norm": 0.0, - "learning_rate": 4.0334049028381116e-06, - "loss": 0.8197, + "learning_rate": 1.402596249101884e-08, + "loss": 0.9961, "step": 25140 }, { - "epoch": 0.7124316359205418, + "epoch": 0.983684169340324, "grad_norm": 0.0, - "learning_rate": 4.032668407127403e-06, - "loss": 0.8439, + "learning_rate": 1.3958948496346492e-08, + "loss": 1.0621, "step": 25141 }, { - "epoch": 0.7124599733628043, + "epoch": 0.9837232960325534, "grad_norm": 0.0, - "learning_rate": 4.031931961681738e-06, - "loss": 0.7784, + "learning_rate": 1.3892094864979e-08, + "loss": 1.0552, "step": 25142 }, { - "epoch": 0.7124883108050667, + "epoch": 0.9837624227247829, "grad_norm": 0.0, - "learning_rate": 4.03119556650733e-06, - "loss": 0.8644, + "learning_rate": 1.3825401597992172e-08, + "loss": 0.8802, "step": 25143 }, { - "epoch": 0.7125166482473292, + "epoch": 0.9838015494170123, "grad_norm": 0.0, - "learning_rate": 4.0304592216103795e-06, - "loss": 0.7996, + "learning_rate": 1.3758868696456263e-08, + "loss": 0.961, "step": 25144 }, { - "epoch": 0.7125449856895917, + "epoch": 0.9838406761092418, "grad_norm": 0.0, - "learning_rate": 4.029722926997085e-06, - "loss": 0.8612, + "learning_rate": 1.3692496161439305e-08, + "loss": 0.9027, "step": 25145 }, { - "epoch": 0.7125733231318541, + "epoch": 0.9838798028014711, "grad_norm": 0.0, - "learning_rate": 4.028986682673651e-06, - "loss": 0.8612, + "learning_rate": 1.3626283994007116e-08, + "loss": 0.9408, "step": 25146 }, { - "epoch": 0.7126016605741166, + "epoch": 0.9839189294937006, "grad_norm": 0.0, - "learning_rate": 4.028250488646284e-06, - "loss": 0.7989, + "learning_rate": 1.3560232195223288e-08, + "loss": 0.8061, "step": 25147 }, { - "epoch": 0.712629998016379, + "epoch": 0.98395805618593, "grad_norm": 0.0, - "learning_rate": 4.027514344921175e-06, - "loss": 0.8505, + "learning_rate": 1.3494340766148084e-08, + "loss": 0.9141, "step": 25148 }, { - "epoch": 0.7126583354586415, + "epoch": 0.9839971828781595, "grad_norm": 0.0, - "learning_rate": 4.026778251504533e-06, - "loss": 0.7387, + "learning_rate": 1.3428609707839547e-08, + "loss": 0.9228, "step": 25149 }, { - "epoch": 0.7126866729009039, + "epoch": 0.9840363095703889, "grad_norm": 0.0, - "learning_rate": 4.026042208402554e-06, - "loss": 0.8828, + "learning_rate": 1.3363039021354607e-08, + "loss": 0.9161, "step": 25150 }, { - "epoch": 0.7127150103431664, + "epoch": 0.9840754362626184, "grad_norm": 0.0, - "learning_rate": 4.0253062156214406e-06, - "loss": 0.8198, + "learning_rate": 1.3297628707744647e-08, + "loss": 1.0183, "step": 25151 }, { - "epoch": 0.7127433477854289, + "epoch": 0.9841145629548478, "grad_norm": 0.0, - "learning_rate": 4.024570273167395e-06, - "loss": 0.7533, + "learning_rate": 1.323237876806105e-08, + "loss": 0.975, "step": 25152 }, { - "epoch": 0.7127716852276913, + "epoch": 0.9841536896470773, "grad_norm": 0.0, - "learning_rate": 4.023834381046609e-06, - "loss": 0.8051, + "learning_rate": 1.3167289203351862e-08, + "loss": 0.9297, "step": 25153 }, { - "epoch": 0.7128000226699538, + "epoch": 0.9841928163393067, "grad_norm": 0.0, - "learning_rate": 4.023098539265285e-06, - "loss": 0.8174, + "learning_rate": 1.3102360014660697e-08, + "loss": 1.023, "step": 25154 }, { - "epoch": 0.7128283601122163, + "epoch": 0.9842319430315362, "grad_norm": 0.0, - "learning_rate": 4.022362747829627e-06, - "loss": 0.8852, + "learning_rate": 1.3037591203032273e-08, + "loss": 1.0357, "step": 25155 }, { - "epoch": 0.7128566975544788, + "epoch": 0.9842710697237655, "grad_norm": 0.0, - "learning_rate": 4.0216270067458215e-06, - "loss": 0.8529, + "learning_rate": 1.297298276950576e-08, + "loss": 0.9297, "step": 25156 }, { - "epoch": 0.7128850349967412, + "epoch": 0.984310196415995, "grad_norm": 0.0, - "learning_rate": 4.0208913160200765e-06, - "loss": 0.853, + "learning_rate": 1.2908534715118104e-08, + "loss": 1.0075, "step": 25157 }, { - "epoch": 0.7129133724390037, + "epoch": 0.9843493231082244, "grad_norm": 0.0, - "learning_rate": 4.02015567565858e-06, - "loss": 0.9067, + "learning_rate": 1.2844247040905145e-08, + "loss": 0.8972, "step": 25158 }, { - "epoch": 0.7129417098812662, + "epoch": 0.9843884498004539, "grad_norm": 0.0, - "learning_rate": 4.019420085667534e-06, - "loss": 0.9476, + "learning_rate": 1.2780119747899388e-08, + "loss": 0.9428, "step": 25159 }, { - "epoch": 0.7129700473235285, + "epoch": 0.9844275764926833, "grad_norm": 0.0, - "learning_rate": 4.018684546053137e-06, - "loss": 0.8427, + "learning_rate": 1.2716152837128904e-08, + "loss": 0.994, "step": 25160 }, { - "epoch": 0.712998384765791, + "epoch": 0.9844667031849127, "grad_norm": 0.0, - "learning_rate": 4.017949056821576e-06, - "loss": 0.8687, + "learning_rate": 1.2652346309623974e-08, + "loss": 1.002, "step": 25161 }, { - "epoch": 0.7130267222080535, + "epoch": 0.9845058298771422, "grad_norm": 0.0, - "learning_rate": 4.017213617979052e-06, - "loss": 0.7848, + "learning_rate": 1.258870016640601e-08, + "loss": 0.9723, "step": 25162 }, { - "epoch": 0.713055059650316, + "epoch": 0.9845449565693716, "grad_norm": 0.0, - "learning_rate": 4.01647822953176e-06, - "loss": 0.9075, + "learning_rate": 1.2525214408499743e-08, + "loss": 1.059, "step": 25163 }, { - "epoch": 0.7130833970925784, + "epoch": 0.9845840832616011, "grad_norm": 0.0, - "learning_rate": 4.015742891485893e-06, - "loss": 0.8586, + "learning_rate": 1.2461889036923247e-08, + "loss": 0.8935, "step": 25164 }, { - "epoch": 0.7131117345348409, + "epoch": 0.9846232099538305, "grad_norm": 0.0, - "learning_rate": 4.015007603847651e-06, - "loss": 0.9027, + "learning_rate": 1.239872405269349e-08, + "loss": 0.9281, "step": 25165 }, { - "epoch": 0.7131400719771034, + "epoch": 0.98466233664606, "grad_norm": 0.0, - "learning_rate": 4.014272366623216e-06, - "loss": 0.8019, + "learning_rate": 1.2335719456825212e-08, + "loss": 0.9085, "step": 25166 }, { - "epoch": 0.7131684094193658, + "epoch": 0.9847014633382893, "grad_norm": 0.0, - "learning_rate": 4.0135371798187895e-06, - "loss": 0.9007, + "learning_rate": 1.227287525032983e-08, + "loss": 0.8549, "step": 25167 }, { - "epoch": 0.7131967468616283, + "epoch": 0.9847405900305188, "grad_norm": 0.0, - "learning_rate": 4.012802043440565e-06, - "loss": 0.839, + "learning_rate": 1.2210191434216534e-08, + "loss": 0.7346, "step": 25168 }, { - "epoch": 0.7132250843038908, + "epoch": 0.9847797167227482, "grad_norm": 0.0, - "learning_rate": 4.01206695749473e-06, - "loss": 0.7747, + "learning_rate": 1.2147668009492297e-08, + "loss": 0.9544, "step": 25169 }, { - "epoch": 0.7132534217461531, + "epoch": 0.9848188434149777, "grad_norm": 0.0, - "learning_rate": 4.011331921987481e-06, - "loss": 0.7573, + "learning_rate": 1.208530497716076e-08, + "loss": 1.0396, "step": 25170 }, { - "epoch": 0.7132817591884156, + "epoch": 0.9848579701072071, "grad_norm": 0.0, - "learning_rate": 4.010596936925005e-06, - "loss": 0.8031, + "learning_rate": 1.2023102338224457e-08, + "loss": 0.8653, "step": 25171 }, { - "epoch": 0.7133100966306781, + "epoch": 0.9848970967994366, "grad_norm": 0.0, - "learning_rate": 4.009862002313494e-06, - "loss": 0.7992, + "learning_rate": 1.1961060093681476e-08, + "loss": 0.9381, "step": 25172 }, { - "epoch": 0.7133384340729406, + "epoch": 0.984936223491666, "grad_norm": 0.0, - "learning_rate": 4.009127118159143e-06, - "loss": 0.8125, + "learning_rate": 1.1899178244527687e-08, + "loss": 0.9143, "step": 25173 }, { - "epoch": 0.713366771515203, + "epoch": 0.9849753501838955, "grad_norm": 0.0, - "learning_rate": 4.008392284468136e-06, - "loss": 0.9113, + "learning_rate": 1.183745679175785e-08, + "loss": 0.9657, "step": 25174 }, { - "epoch": 0.7133951089574655, + "epoch": 0.9850144768761249, "grad_norm": 0.0, - "learning_rate": 4.007657501246666e-06, - "loss": 0.8522, + "learning_rate": 1.1775895736361176e-08, + "loss": 1.0356, "step": 25175 }, { - "epoch": 0.713423446399728, + "epoch": 0.9850536035683544, "grad_norm": 0.0, - "learning_rate": 4.00692276850092e-06, - "loss": 0.8699, + "learning_rate": 1.1714495079329091e-08, + "loss": 0.9308, "step": 25176 }, { - "epoch": 0.7134517838419904, + "epoch": 0.9850927302605837, "grad_norm": 0.0, - "learning_rate": 4.006188086237091e-06, - "loss": 0.7188, + "learning_rate": 1.1653254821645254e-08, + "loss": 0.953, "step": 25177 }, { - "epoch": 0.7134801212842529, + "epoch": 0.9851318569528132, "grad_norm": 0.0, - "learning_rate": 4.005453454461369e-06, - "loss": 0.8094, + "learning_rate": 1.1592174964295545e-08, + "loss": 0.9095, "step": 25178 }, { - "epoch": 0.7135084587265154, + "epoch": 0.9851709836450426, "grad_norm": 0.0, - "learning_rate": 4.0047188731799345e-06, - "loss": 0.7692, + "learning_rate": 1.1531255508258066e-08, + "loss": 0.896, "step": 25179 }, { - "epoch": 0.7135367961687779, + "epoch": 0.9852101103372721, "grad_norm": 0.0, - "learning_rate": 4.00398434239898e-06, - "loss": 0.731, + "learning_rate": 1.1470496454512037e-08, + "loss": 0.9126, "step": 25180 }, { - "epoch": 0.7135651336110402, + "epoch": 0.9852492370295015, "grad_norm": 0.0, - "learning_rate": 4.003249862124694e-06, - "loss": 0.8112, + "learning_rate": 1.1409897804034452e-08, + "loss": 0.9451, "step": 25181 }, { - "epoch": 0.7135934710533027, + "epoch": 0.985288363721731, "grad_norm": 0.0, - "learning_rate": 4.002515432363259e-06, - "loss": 0.83, + "learning_rate": 1.1349459557796761e-08, + "loss": 0.8111, "step": 25182 }, { - "epoch": 0.7136218084955652, + "epoch": 0.9853274904139604, "grad_norm": 0.0, - "learning_rate": 4.001781053120863e-06, - "loss": 0.8869, + "learning_rate": 1.1289181716771513e-08, + "loss": 0.9482, "step": 25183 }, { - "epoch": 0.7136501459378276, + "epoch": 0.9853666171061899, "grad_norm": 0.0, - "learning_rate": 4.001046724403697e-06, - "loss": 0.9385, + "learning_rate": 1.1229064281924606e-08, + "loss": 0.8894, "step": 25184 }, { - "epoch": 0.7136784833800901, + "epoch": 0.9854057437984193, "grad_norm": 0.0, - "learning_rate": 4.000312446217937e-06, - "loss": 0.8116, + "learning_rate": 1.1169107254221934e-08, + "loss": 0.963, "step": 25185 }, { - "epoch": 0.7137068208223526, + "epoch": 0.9854448704906488, "grad_norm": 0.0, - "learning_rate": 3.999578218569777e-06, - "loss": 0.8383, + "learning_rate": 1.1109310634627169e-08, + "loss": 0.8792, "step": 25186 }, { - "epoch": 0.7137351582646151, + "epoch": 0.9854839971828782, "grad_norm": 0.0, - "learning_rate": 3.998844041465395e-06, - "loss": 0.818, + "learning_rate": 1.1049674424099543e-08, + "loss": 0.9288, "step": 25187 }, { - "epoch": 0.7137634957068775, + "epoch": 0.9855231238751077, "grad_norm": 0.0, - "learning_rate": 3.998109914910978e-06, - "loss": 0.8108, + "learning_rate": 1.0990198623598292e-08, + "loss": 0.8907, "step": 25188 }, { - "epoch": 0.71379183314914, + "epoch": 0.985562250567337, "grad_norm": 0.0, - "learning_rate": 3.99737583891271e-06, - "loss": 0.7324, + "learning_rate": 1.0930883234077095e-08, + "loss": 0.9964, "step": 25189 }, { - "epoch": 0.7138201705914025, + "epoch": 0.9856013772595664, "grad_norm": 0.0, - "learning_rate": 3.9966418134767745e-06, - "loss": 0.8867, + "learning_rate": 1.0871728256488524e-08, + "loss": 0.9114, "step": 25190 }, { - "epoch": 0.7138485080336648, + "epoch": 0.9856405039517959, "grad_norm": 0.0, - "learning_rate": 3.995907838609354e-06, - "loss": 0.8343, + "learning_rate": 1.0812733691782929e-08, + "loss": 0.9035, "step": 25191 }, { - "epoch": 0.7138768454759273, + "epoch": 0.9856796306440253, "grad_norm": 0.0, - "learning_rate": 3.995173914316635e-06, - "loss": 0.8649, + "learning_rate": 1.0753899540908441e-08, + "loss": 0.9666, "step": 25192 }, { - "epoch": 0.7139051829181898, + "epoch": 0.9857187573362548, "grad_norm": 0.0, - "learning_rate": 3.994440040604792e-06, - "loss": 0.7997, + "learning_rate": 1.0695225804808752e-08, + "loss": 0.9028, "step": 25193 }, { - "epoch": 0.7139335203604522, + "epoch": 0.9857578840284842, "grad_norm": 0.0, - "learning_rate": 3.993706217480015e-06, - "loss": 0.8255, + "learning_rate": 1.0636712484426436e-08, + "loss": 0.9865, "step": 25194 }, { - "epoch": 0.7139618578027147, + "epoch": 0.9857970107207137, "grad_norm": 0.0, - "learning_rate": 3.992972444948476e-06, - "loss": 0.8035, + "learning_rate": 1.0578359580700747e-08, + "loss": 0.9758, "step": 25195 }, { - "epoch": 0.7139901952449772, + "epoch": 0.9858361374129431, "grad_norm": 0.0, - "learning_rate": 3.992238723016363e-06, - "loss": 0.8644, + "learning_rate": 1.0520167094568712e-08, + "loss": 1.0177, "step": 25196 }, { - "epoch": 0.7140185326872397, + "epoch": 0.9858752641051726, "grad_norm": 0.0, - "learning_rate": 3.9915050516898554e-06, - "loss": 0.8081, + "learning_rate": 1.0462135026966246e-08, + "loss": 0.9051, "step": 25197 }, { - "epoch": 0.7140468701295021, + "epoch": 0.985914390797402, "grad_norm": 0.0, - "learning_rate": 3.99077143097513e-06, - "loss": 0.7404, + "learning_rate": 1.040426337882372e-08, + "loss": 0.8505, "step": 25198 }, { - "epoch": 0.7140752075717646, + "epoch": 0.9859535174896314, "grad_norm": 0.0, - "learning_rate": 3.990037860878371e-06, - "loss": 0.915, + "learning_rate": 1.0346552151071499e-08, + "loss": 0.9023, "step": 25199 }, { - "epoch": 0.7141035450140271, + "epoch": 0.9859926441818608, "grad_norm": 0.0, - "learning_rate": 3.989304341405752e-06, - "loss": 0.8997, + "learning_rate": 1.0289001344635507e-08, + "loss": 0.8795, "step": 25200 }, { - "epoch": 0.7141318824562894, + "epoch": 0.9860317708740903, "grad_norm": 0.0, - "learning_rate": 3.988570872563454e-06, - "loss": 0.9245, + "learning_rate": 1.0231610960440564e-08, + "loss": 0.9352, "step": 25201 }, { - "epoch": 0.7141602198985519, + "epoch": 0.9860708975663197, "grad_norm": 0.0, - "learning_rate": 3.987837454357656e-06, - "loss": 0.8386, + "learning_rate": 1.017438099940815e-08, + "loss": 0.9483, "step": 25202 }, { - "epoch": 0.7141885573408144, + "epoch": 0.9861100242585492, "grad_norm": 0.0, - "learning_rate": 3.987104086794536e-06, - "loss": 0.7986, + "learning_rate": 1.0117311462457535e-08, + "loss": 1.0206, "step": 25203 }, { - "epoch": 0.7142168947830769, + "epoch": 0.9861491509507786, "grad_norm": 0.0, - "learning_rate": 3.98637076988027e-06, - "loss": 0.8504, + "learning_rate": 1.006040235050465e-08, + "loss": 0.961, "step": 25204 }, { - "epoch": 0.7142452322253393, + "epoch": 0.9861882776430081, "grad_norm": 0.0, - "learning_rate": 3.98563750362104e-06, - "loss": 0.7336, + "learning_rate": 1.000365366446321e-08, + "loss": 0.9384, "step": 25205 }, { - "epoch": 0.7142735696676018, + "epoch": 0.9862274043352375, "grad_norm": 0.0, - "learning_rate": 3.984904288023016e-06, - "loss": 0.7671, + "learning_rate": 9.947065405245815e-09, + "loss": 1.1445, "step": 25206 }, { - "epoch": 0.7143019071098643, + "epoch": 0.986266531027467, "grad_norm": 0.0, - "learning_rate": 3.984171123092379e-06, - "loss": 0.7566, + "learning_rate": 9.89063757375952e-09, + "loss": 0.9789, "step": 25207 }, { - "epoch": 0.7143302445521267, + "epoch": 0.9863056577196964, "grad_norm": 0.0, - "learning_rate": 3.983438008835301e-06, - "loss": 0.8113, + "learning_rate": 9.834370170911378e-09, + "loss": 0.9557, "step": 25208 }, { - "epoch": 0.7143585819943892, + "epoch": 0.9863447844119259, "grad_norm": 0.0, - "learning_rate": 3.982704945257957e-06, - "loss": 0.8218, + "learning_rate": 9.77826319760511e-09, + "loss": 0.9898, "step": 25209 }, { - "epoch": 0.7143869194366517, + "epoch": 0.9863839111041552, "grad_norm": 0.0, - "learning_rate": 3.9819719323665285e-06, - "loss": 0.8379, + "learning_rate": 9.722316654741103e-09, + "loss": 1.0626, "step": 25210 }, { - "epoch": 0.7144152568789142, + "epoch": 0.9864230377963847, "grad_norm": 0.0, - "learning_rate": 3.98123897016718e-06, - "loss": 0.9229, + "learning_rate": 9.666530543218644e-09, + "loss": 1.0765, "step": 25211 }, { - "epoch": 0.7144435943211765, + "epoch": 0.9864621644886141, "grad_norm": 0.0, - "learning_rate": 3.980506058666092e-06, - "loss": 0.909, + "learning_rate": 9.610904863932568e-09, + "loss": 0.9713, "step": 25212 }, { - "epoch": 0.714471931763439, + "epoch": 0.9865012911808436, "grad_norm": 0.0, - "learning_rate": 3.979773197869441e-06, - "loss": 0.8745, + "learning_rate": 9.555439617776607e-09, + "loss": 0.8608, "step": 25213 }, { - "epoch": 0.7145002692057015, + "epoch": 0.986540417873073, "grad_norm": 0.0, - "learning_rate": 3.979040387783391e-06, - "loss": 0.8804, + "learning_rate": 9.500134805642269e-09, + "loss": 1.0038, "step": 25214 }, { - "epoch": 0.7145286066479639, + "epoch": 0.9865795445653025, "grad_norm": 0.0, - "learning_rate": 3.97830762841412e-06, - "loss": 0.7994, + "learning_rate": 9.444990428416623e-09, + "loss": 0.8657, "step": 25215 }, { - "epoch": 0.7145569440902264, + "epoch": 0.9866186712575319, "grad_norm": 0.0, - "learning_rate": 3.977574919767801e-06, - "loss": 0.7664, + "learning_rate": 9.390006486985625e-09, + "loss": 0.8609, "step": 25216 }, { - "epoch": 0.7145852815324889, + "epoch": 0.9866577979497613, "grad_norm": 0.0, - "learning_rate": 3.976842261850603e-06, - "loss": 0.9216, + "learning_rate": 9.335182982231905e-09, + "loss": 0.8777, "step": 25217 }, { - "epoch": 0.7146136189747513, + "epoch": 0.9866969246419908, "grad_norm": 0.0, - "learning_rate": 3.976109654668704e-06, - "loss": 0.8345, + "learning_rate": 9.280519915036979e-09, + "loss": 0.9535, "step": 25218 }, { - "epoch": 0.7146419564170138, + "epoch": 0.9867360513342202, "grad_norm": 0.0, - "learning_rate": 3.975377098228266e-06, - "loss": 0.9031, + "learning_rate": 9.226017286276812e-09, + "loss": 0.8087, "step": 25219 }, { - "epoch": 0.7146702938592763, + "epoch": 0.9867751780264497, "grad_norm": 0.0, - "learning_rate": 3.974644592535464e-06, - "loss": 0.889, + "learning_rate": 9.17167509682848e-09, + "loss": 0.9134, "step": 25220 }, { - "epoch": 0.7146986313015388, + "epoch": 0.986814304718679, "grad_norm": 0.0, - "learning_rate": 3.973912137596472e-06, - "loss": 0.7455, + "learning_rate": 9.11749334756351e-09, + "loss": 1.0157, "step": 25221 }, { - "epoch": 0.7147269687438011, + "epoch": 0.9868534314109085, "grad_norm": 0.0, - "learning_rate": 3.973179733417453e-06, - "loss": 0.7297, + "learning_rate": 9.063472039352317e-09, + "loss": 1.0501, "step": 25222 }, { - "epoch": 0.7147553061860636, + "epoch": 0.9868925581031379, "grad_norm": 0.0, - "learning_rate": 3.972447380004581e-06, - "loss": 0.8102, + "learning_rate": 9.009611173061983e-09, + "loss": 1.0752, "step": 25223 }, { - "epoch": 0.7147836436283261, + "epoch": 0.9869316847953674, "grad_norm": 0.0, - "learning_rate": 3.97171507736402e-06, - "loss": 0.8138, + "learning_rate": 8.955910749558482e-09, + "loss": 1.0298, "step": 25224 }, { - "epoch": 0.7148119810705885, + "epoch": 0.9869708114875968, "grad_norm": 0.0, - "learning_rate": 3.970982825501942e-06, - "loss": 0.885, + "learning_rate": 8.90237076970335e-09, + "loss": 0.849, "step": 25225 }, { - "epoch": 0.714840318512851, + "epoch": 0.9870099381798263, "grad_norm": 0.0, - "learning_rate": 3.970250624424517e-06, - "loss": 0.8147, + "learning_rate": 8.848991234357007e-09, + "loss": 0.7822, "step": 25226 }, { - "epoch": 0.7148686559551135, + "epoch": 0.9870490648720557, "grad_norm": 0.0, - "learning_rate": 3.969518474137908e-06, - "loss": 0.7577, + "learning_rate": 8.795772144375436e-09, + "loss": 0.9424, "step": 25227 }, { - "epoch": 0.714896993397376, + "epoch": 0.9870881915642852, "grad_norm": 0.0, - "learning_rate": 3.968786374648283e-06, - "loss": 0.9041, + "learning_rate": 8.742713500614619e-09, + "loss": 0.936, "step": 25228 }, { - "epoch": 0.7149253308396384, + "epoch": 0.9871273182565146, "grad_norm": 0.0, - "learning_rate": 3.9680543259618105e-06, - "loss": 0.7018, + "learning_rate": 8.689815303926096e-09, + "loss": 0.8778, "step": 25229 }, { - "epoch": 0.7149536682819009, + "epoch": 0.9871664449487441, "grad_norm": 0.0, - "learning_rate": 3.967322328084657e-06, - "loss": 0.781, + "learning_rate": 8.63707755515919e-09, + "loss": 1.0781, "step": 25230 }, { - "epoch": 0.7149820057241634, + "epoch": 0.9872055716409734, "grad_norm": 0.0, - "learning_rate": 3.96659038102299e-06, - "loss": 0.8702, + "learning_rate": 8.584500255160999e-09, + "loss": 0.9939, "step": 25231 }, { - "epoch": 0.7150103431664258, + "epoch": 0.9872446983332029, "grad_norm": 0.0, - "learning_rate": 3.96585848478297e-06, - "loss": 0.7905, + "learning_rate": 8.532083404775294e-09, + "loss": 0.876, "step": 25232 }, { - "epoch": 0.7150386806086882, + "epoch": 0.9872838250254323, "grad_norm": 0.0, - "learning_rate": 3.965126639370764e-06, - "loss": 0.7757, + "learning_rate": 8.479827004844732e-09, + "loss": 0.8642, "step": 25233 }, { - "epoch": 0.7150670180509507, + "epoch": 0.9873229517176618, "grad_norm": 0.0, - "learning_rate": 3.964394844792542e-06, - "loss": 0.7582, + "learning_rate": 8.427731056208643e-09, + "loss": 1.0251, "step": 25234 }, { - "epoch": 0.7150953554932132, + "epoch": 0.9873620784098912, "grad_norm": 0.0, - "learning_rate": 3.963663101054459e-06, - "loss": 0.8359, + "learning_rate": 8.375795559701916e-09, + "loss": 0.8993, "step": 25235 }, { - "epoch": 0.7151236929354756, + "epoch": 0.9874012051021207, "grad_norm": 0.0, - "learning_rate": 3.9629314081626864e-06, - "loss": 0.7924, + "learning_rate": 8.324020516160547e-09, + "loss": 0.9094, "step": 25236 }, { - "epoch": 0.7151520303777381, + "epoch": 0.9874403317943501, "grad_norm": 0.0, - "learning_rate": 3.962199766123382e-06, - "loss": 0.8651, + "learning_rate": 8.272405926414984e-09, + "loss": 0.9103, "step": 25237 }, { - "epoch": 0.7151803678200006, + "epoch": 0.9874794584865796, "grad_norm": 0.0, - "learning_rate": 3.9614681749427105e-06, - "loss": 0.8179, + "learning_rate": 8.220951791294562e-09, + "loss": 0.9036, "step": 25238 }, { - "epoch": 0.715208705262263, + "epoch": 0.987518585178809, "grad_norm": 0.0, - "learning_rate": 3.960736634626838e-06, - "loss": 0.7392, + "learning_rate": 8.169658111624179e-09, + "loss": 1.0174, "step": 25239 }, { - "epoch": 0.7152370427045255, + "epoch": 0.9875577118710385, "grad_norm": 0.0, - "learning_rate": 3.960005145181921e-06, - "loss": 0.7914, + "learning_rate": 8.118524888229839e-09, + "loss": 0.9272, "step": 25240 }, { - "epoch": 0.715265380146788, + "epoch": 0.9875968385632679, "grad_norm": 0.0, - "learning_rate": 3.959273706614123e-06, - "loss": 0.8658, + "learning_rate": 8.067552121930888e-09, + "loss": 0.998, "step": 25241 }, { - "epoch": 0.7152937175890504, + "epoch": 0.9876359652554974, "grad_norm": 0.0, - "learning_rate": 3.958542318929606e-06, - "loss": 0.7976, + "learning_rate": 8.01673981354667e-09, + "loss": 0.8709, "step": 25242 }, { - "epoch": 0.7153220550313129, + "epoch": 0.9876750919477267, "grad_norm": 0.0, - "learning_rate": 3.95781098213453e-06, - "loss": 0.8598, + "learning_rate": 7.966087963892089e-09, + "loss": 0.9101, "step": 25243 }, { - "epoch": 0.7153503924735753, + "epoch": 0.9877142186399562, "grad_norm": 0.0, - "learning_rate": 3.957079696235059e-06, - "loss": 0.8001, + "learning_rate": 7.915596573782047e-09, + "loss": 1.0387, "step": 25244 }, { - "epoch": 0.7153787299158378, + "epoch": 0.9877533453321856, "grad_norm": 0.0, - "learning_rate": 3.956348461237347e-06, - "loss": 0.8344, + "learning_rate": 7.865265644027009e-09, + "loss": 1.0246, "step": 25245 }, { - "epoch": 0.7154070673581002, + "epoch": 0.987792472024415, "grad_norm": 0.0, - "learning_rate": 3.9556172771475554e-06, - "loss": 0.8975, + "learning_rate": 7.815095175435217e-09, + "loss": 0.8973, "step": 25246 }, { - "epoch": 0.7154354048003627, + "epoch": 0.9878315987166445, "grad_norm": 0.0, - "learning_rate": 3.954886143971848e-06, - "loss": 0.8208, + "learning_rate": 7.765085168811581e-09, + "loss": 0.9626, "step": 25247 }, { - "epoch": 0.7154637422426252, + "epoch": 0.9878707254088739, "grad_norm": 0.0, - "learning_rate": 3.954155061716376e-06, - "loss": 0.7851, + "learning_rate": 7.715235624958794e-09, + "loss": 0.905, "step": 25248 }, { - "epoch": 0.7154920796848876, + "epoch": 0.9879098521011034, "grad_norm": 0.0, - "learning_rate": 3.953424030387301e-06, - "loss": 0.7638, + "learning_rate": 7.665546544679547e-09, + "loss": 0.9594, "step": 25249 }, { - "epoch": 0.7155204171271501, + "epoch": 0.9879489787933328, "grad_norm": 0.0, - "learning_rate": 3.952693049990784e-06, - "loss": 0.841, + "learning_rate": 7.61601792877098e-09, + "loss": 0.8748, "step": 25250 }, { - "epoch": 0.7155487545694126, + "epoch": 0.9879881054855623, "grad_norm": 0.0, - "learning_rate": 3.951962120532975e-06, - "loss": 0.8132, + "learning_rate": 7.566649778026902e-09, + "loss": 1.0295, "step": 25251 }, { - "epoch": 0.7155770920116751, + "epoch": 0.9880272321777916, "grad_norm": 0.0, - "learning_rate": 3.951231242020039e-06, - "loss": 0.7917, + "learning_rate": 7.517442093241123e-09, + "loss": 0.9415, "step": 25252 }, { - "epoch": 0.7156054294539375, + "epoch": 0.9880663588700211, "grad_norm": 0.0, - "learning_rate": 3.950500414458126e-06, - "loss": 0.8697, + "learning_rate": 7.468394875204121e-09, + "loss": 0.9737, "step": 25253 }, { - "epoch": 0.7156337668962, + "epoch": 0.9881054855622505, "grad_norm": 0.0, - "learning_rate": 3.949769637853393e-06, - "loss": 0.7736, + "learning_rate": 7.419508124704156e-09, + "loss": 0.807, "step": 25254 }, { - "epoch": 0.7156621043384624, + "epoch": 0.98814461225448, "grad_norm": 0.0, - "learning_rate": 3.9490389122119974e-06, - "loss": 0.7441, + "learning_rate": 7.370781842525044e-09, + "loss": 0.9513, "step": 25255 }, { - "epoch": 0.7156904417807248, + "epoch": 0.9881837389467094, "grad_norm": 0.0, - "learning_rate": 3.948308237540094e-06, - "loss": 0.8141, + "learning_rate": 7.322216029449491e-09, + "loss": 1.0239, "step": 25256 }, { - "epoch": 0.7157187792229873, + "epoch": 0.9882228656389389, "grad_norm": 0.0, - "learning_rate": 3.9475776138438414e-06, - "loss": 0.7726, + "learning_rate": 7.273810686257987e-09, + "loss": 1.0386, "step": 25257 }, { - "epoch": 0.7157471166652498, + "epoch": 0.9882619923311683, "grad_norm": 0.0, - "learning_rate": 3.946847041129386e-06, - "loss": 0.8421, + "learning_rate": 7.225565813726576e-09, + "loss": 0.9669, "step": 25258 }, { - "epoch": 0.7157754541075123, + "epoch": 0.9883011190233978, "grad_norm": 0.0, - "learning_rate": 3.946116519402886e-06, - "loss": 0.7924, + "learning_rate": 7.177481412632414e-09, + "loss": 0.9381, "step": 25259 }, { - "epoch": 0.7158037915497747, + "epoch": 0.9883402457156272, "grad_norm": 0.0, - "learning_rate": 3.9453860486704975e-06, - "loss": 0.7825, + "learning_rate": 7.129557483744886e-09, + "loss": 0.9245, "step": 25260 }, { - "epoch": 0.7158321289920372, + "epoch": 0.9883793724078567, "grad_norm": 0.0, - "learning_rate": 3.944655628938369e-06, - "loss": 0.7776, + "learning_rate": 7.081794027835598e-09, + "loss": 1.0129, "step": 25261 }, { - "epoch": 0.7158604664342997, + "epoch": 0.9884184991000861, "grad_norm": 0.0, - "learning_rate": 3.943925260212653e-06, - "loss": 0.8086, + "learning_rate": 7.034191045670602e-09, + "loss": 0.945, "step": 25262 }, { - "epoch": 0.7158888038765621, + "epoch": 0.9884576257923156, "grad_norm": 0.0, - "learning_rate": 3.9431949424995075e-06, - "loss": 0.8311, + "learning_rate": 6.986748538014843e-09, + "loss": 0.9533, "step": 25263 }, { - "epoch": 0.7159171413188246, + "epoch": 0.9884967524845449, "grad_norm": 0.0, - "learning_rate": 3.942464675805077e-06, - "loss": 0.8917, + "learning_rate": 6.939466505629933e-09, + "loss": 0.866, "step": 25264 }, { - "epoch": 0.715945478761087, + "epoch": 0.9885358791767744, "grad_norm": 0.0, - "learning_rate": 3.941734460135514e-06, - "loss": 0.849, + "learning_rate": 6.8923449492741545e-09, + "loss": 1.0103, "step": 25265 }, { - "epoch": 0.7159738162033494, + "epoch": 0.9885750058690038, "grad_norm": 0.0, - "learning_rate": 3.941004295496977e-06, - "loss": 0.8582, + "learning_rate": 6.845383869706901e-09, + "loss": 1.03, "step": 25266 }, { - "epoch": 0.7160021536456119, + "epoch": 0.9886141325612333, "grad_norm": 0.0, - "learning_rate": 3.940274181895607e-06, - "loss": 0.7599, + "learning_rate": 6.7985832676797925e-09, + "loss": 0.9434, "step": 25267 }, { - "epoch": 0.7160304910878744, + "epoch": 0.9886532592534627, "grad_norm": 0.0, - "learning_rate": 3.939544119337557e-06, - "loss": 0.8455, + "learning_rate": 6.751943143945561e-09, + "loss": 1.0506, "step": 25268 }, { - "epoch": 0.7160588285301369, + "epoch": 0.9886923859456922, "grad_norm": 0.0, - "learning_rate": 3.9388141078289775e-06, - "loss": 0.825, + "learning_rate": 6.705463499252496e-09, + "loss": 0.9866, "step": 25269 }, { - "epoch": 0.7160871659723993, + "epoch": 0.9887315126379216, "grad_norm": 0.0, - "learning_rate": 3.938084147376018e-06, - "loss": 0.8221, + "learning_rate": 6.659144334347778e-09, + "loss": 0.9744, "step": 25270 }, { - "epoch": 0.7161155034146618, + "epoch": 0.9887706393301511, "grad_norm": 0.0, - "learning_rate": 3.9373542379848305e-06, - "loss": 0.7876, + "learning_rate": 6.612985649974146e-09, + "loss": 1.1151, "step": 25271 }, { - "epoch": 0.7161438408569243, + "epoch": 0.9888097660223805, "grad_norm": 0.0, - "learning_rate": 3.936624379661556e-06, - "loss": 0.7593, + "learning_rate": 6.56698744687434e-09, + "loss": 1.0247, "step": 25272 }, { - "epoch": 0.7161721782991867, + "epoch": 0.98884889271461, "grad_norm": 0.0, - "learning_rate": 3.9358945724123484e-06, - "loss": 0.6998, + "learning_rate": 6.521149725785547e-09, + "loss": 0.9201, "step": 25273 }, { - "epoch": 0.7162005157414492, + "epoch": 0.9888880194068393, "grad_norm": 0.0, - "learning_rate": 3.9351648162433495e-06, - "loss": 0.7514, + "learning_rate": 6.475472487444956e-09, + "loss": 0.9477, "step": 25274 }, { - "epoch": 0.7162288531837117, + "epoch": 0.9889271460990687, "grad_norm": 0.0, - "learning_rate": 3.93443511116071e-06, - "loss": 0.8277, + "learning_rate": 6.429955732585313e-09, + "loss": 1.1053, "step": 25275 }, { - "epoch": 0.7162571906259741, + "epoch": 0.9889662727912982, "grad_norm": 0.0, - "learning_rate": 3.933705457170579e-06, - "loss": 0.8486, + "learning_rate": 6.384599461938256e-09, + "loss": 1.043, "step": 25276 }, { - "epoch": 0.7162855280682365, + "epoch": 0.9890053994835276, "grad_norm": 0.0, - "learning_rate": 3.932975854279097e-06, - "loss": 0.9141, + "learning_rate": 6.339403676232092e-09, + "loss": 0.9462, "step": 25277 }, { - "epoch": 0.716313865510499, + "epoch": 0.9890445261757571, "grad_norm": 0.0, - "learning_rate": 3.932246302492411e-06, - "loss": 0.8163, + "learning_rate": 6.294368376192906e-09, + "loss": 0.9736, "step": 25278 }, { - "epoch": 0.7163422029527615, + "epoch": 0.9890836528679865, "grad_norm": 0.0, - "learning_rate": 3.931516801816668e-06, - "loss": 0.7481, + "learning_rate": 6.2494935625423415e-09, + "loss": 1.0001, "step": 25279 }, { - "epoch": 0.7163705403950239, + "epoch": 0.989122779560216, "grad_norm": 0.0, - "learning_rate": 3.930787352258013e-06, - "loss": 0.7662, + "learning_rate": 6.204779236002045e-09, + "loss": 0.9451, "step": 25280 }, { - "epoch": 0.7163988778372864, + "epoch": 0.9891619062524454, "grad_norm": 0.0, - "learning_rate": 3.930057953822594e-06, - "loss": 0.7766, + "learning_rate": 6.160225397290332e-09, + "loss": 0.9874, "step": 25281 }, { - "epoch": 0.7164272152795489, + "epoch": 0.9892010329446749, "grad_norm": 0.0, - "learning_rate": 3.929328606516546e-06, - "loss": 0.926, + "learning_rate": 6.1158320471221834e-09, + "loss": 0.9353, "step": 25282 }, { - "epoch": 0.7164555527218114, + "epoch": 0.9892401596369043, "grad_norm": 0.0, - "learning_rate": 3.928599310346017e-06, - "loss": 0.8127, + "learning_rate": 6.071599186211474e-09, + "loss": 0.9409, "step": 25283 }, { - "epoch": 0.7164838901640738, + "epoch": 0.9892792863291338, "grad_norm": 0.0, - "learning_rate": 3.927870065317156e-06, - "loss": 0.7736, + "learning_rate": 6.0275268152665264e-09, + "loss": 1.0053, "step": 25284 }, { - "epoch": 0.7165122276063363, + "epoch": 0.9893184130213631, "grad_norm": 0.0, - "learning_rate": 3.927140871436095e-06, - "loss": 0.8632, + "learning_rate": 5.983614934997883e-09, + "loss": 0.9123, "step": 25285 }, { - "epoch": 0.7165405650485988, + "epoch": 0.9893575397135926, "grad_norm": 0.0, - "learning_rate": 3.9264117287089865e-06, - "loss": 0.9079, + "learning_rate": 5.9398635461083155e-09, + "loss": 0.8828, "step": 25286 }, { - "epoch": 0.7165689024908611, + "epoch": 0.989396666405822, "grad_norm": 0.0, - "learning_rate": 3.9256826371419635e-06, - "loss": 0.8718, + "learning_rate": 5.896272649300594e-09, + "loss": 0.9272, "step": 25287 }, { - "epoch": 0.7165972399331236, + "epoch": 0.9894357930980515, "grad_norm": 0.0, - "learning_rate": 3.92495359674117e-06, - "loss": 0.8418, + "learning_rate": 5.85284224527638e-09, + "loss": 1.0089, "step": 25288 }, { - "epoch": 0.7166255773753861, + "epoch": 0.9894749197902809, "grad_norm": 0.0, - "learning_rate": 3.9242246075127536e-06, - "loss": 0.8187, + "learning_rate": 5.809572334731783e-09, + "loss": 0.9034, "step": 25289 }, { - "epoch": 0.7166539148176485, + "epoch": 0.9895140464825104, "grad_norm": 0.0, - "learning_rate": 3.923495669462844e-06, - "loss": 0.7919, + "learning_rate": 5.766462918362914e-09, + "loss": 0.9064, "step": 25290 }, { - "epoch": 0.716682252259911, + "epoch": 0.9895531731747398, "grad_norm": 0.0, - "learning_rate": 3.922766782597588e-06, - "loss": 0.8373, + "learning_rate": 5.72351399686033e-09, + "loss": 0.9739, "step": 25291 }, { - "epoch": 0.7167105897021735, + "epoch": 0.9895922998669693, "grad_norm": 0.0, - "learning_rate": 3.922037946923124e-06, - "loss": 0.9172, + "learning_rate": 5.680725570914592e-09, + "loss": 0.9735, "step": 25292 }, { - "epoch": 0.716738927144436, + "epoch": 0.9896314265591987, "grad_norm": 0.0, - "learning_rate": 3.921309162445591e-06, - "loss": 0.8798, + "learning_rate": 5.638097641212925e-09, + "loss": 0.8776, "step": 25293 }, { - "epoch": 0.7167672645866984, + "epoch": 0.9896705532514282, "grad_norm": 0.0, - "learning_rate": 3.920580429171132e-06, - "loss": 0.7483, + "learning_rate": 5.595630208439229e-09, + "loss": 1.0125, "step": 25294 }, { - "epoch": 0.7167956020289609, + "epoch": 0.9897096799436575, "grad_norm": 0.0, - "learning_rate": 3.919851747105879e-06, - "loss": 0.86, + "learning_rate": 5.5533232732774e-09, + "loss": 1.0084, "step": 25295 }, { - "epoch": 0.7168239394712234, + "epoch": 0.989748806635887, "grad_norm": 0.0, - "learning_rate": 3.9191231162559715e-06, - "loss": 0.8668, + "learning_rate": 5.511176836404674e-09, + "loss": 0.7971, "step": 25296 }, { - "epoch": 0.7168522769134857, + "epoch": 0.9897879333281164, "grad_norm": 0.0, - "learning_rate": 3.9183945366275524e-06, - "loss": 0.8997, + "learning_rate": 5.469190898498289e-09, + "loss": 0.9065, "step": 25297 }, { - "epoch": 0.7168806143557482, + "epoch": 0.9898270600203459, "grad_norm": 0.0, - "learning_rate": 3.91766600822675e-06, - "loss": 0.808, + "learning_rate": 5.427365460233258e-09, + "loss": 1.0209, "step": 25298 }, { - "epoch": 0.7169089517980107, + "epoch": 0.9898661867125753, "grad_norm": 0.0, - "learning_rate": 3.916937531059706e-06, - "loss": 0.8499, + "learning_rate": 5.385700522280158e-09, + "loss": 0.8334, "step": 25299 }, { - "epoch": 0.7169372892402732, + "epoch": 0.9899053134048048, "grad_norm": 0.0, - "learning_rate": 3.916209105132559e-06, - "loss": 0.7916, + "learning_rate": 5.344196085309561e-09, + "loss": 0.823, "step": 25300 }, { - "epoch": 0.7169656266825356, + "epoch": 0.9899444400970342, "grad_norm": 0.0, - "learning_rate": 3.915480730451438e-06, - "loss": 0.8275, + "learning_rate": 5.302852149987603e-09, + "loss": 0.93, "step": 25301 }, { - "epoch": 0.7169939641247981, + "epoch": 0.9899835667892636, "grad_norm": 0.0, - "learning_rate": 3.914752407022487e-06, - "loss": 0.9285, + "learning_rate": 5.261668716975976e-09, + "loss": 0.9764, "step": 25302 }, { - "epoch": 0.7170223015670606, + "epoch": 0.9900226934814931, "grad_norm": 0.0, - "learning_rate": 3.914024134851833e-06, - "loss": 0.8262, + "learning_rate": 5.2206457869397045e-09, + "loss": 0.9151, "step": 25303 }, { - "epoch": 0.717050639009323, + "epoch": 0.9900618201737225, "grad_norm": 0.0, - "learning_rate": 3.9132959139456125e-06, - "loss": 0.8784, + "learning_rate": 5.179783360534929e-09, + "loss": 0.9906, "step": 25304 }, { - "epoch": 0.7170789764515855, + "epoch": 0.990100946865952, "grad_norm": 0.0, - "learning_rate": 3.912567744309961e-06, - "loss": 0.8057, + "learning_rate": 5.139081438418902e-09, + "loss": 0.8925, "step": 25305 }, { - "epoch": 0.717107313893848, + "epoch": 0.9901400735581813, "grad_norm": 0.0, - "learning_rate": 3.911839625951012e-06, - "loss": 0.7997, + "learning_rate": 5.0985400212444355e-09, + "loss": 0.9497, "step": 25306 }, { - "epoch": 0.7171356513361105, + "epoch": 0.9901792002504108, "grad_norm": 0.0, - "learning_rate": 3.911111558874898e-06, - "loss": 0.8064, + "learning_rate": 5.05815910966323e-09, + "loss": 0.9435, "step": 25307 }, { - "epoch": 0.7171639887783728, + "epoch": 0.9902183269426402, "grad_norm": 0.0, - "learning_rate": 3.910383543087757e-06, - "loss": 0.8696, + "learning_rate": 5.017938704323655e-09, + "loss": 0.9777, "step": 25308 }, { - "epoch": 0.7171923262206353, + "epoch": 0.9902574536348697, "grad_norm": 0.0, - "learning_rate": 3.909655578595714e-06, - "loss": 0.8445, + "learning_rate": 4.977878805871861e-09, + "loss": 1.0546, "step": 25309 }, { - "epoch": 0.7172206636628978, + "epoch": 0.9902965803270991, "grad_norm": 0.0, - "learning_rate": 3.9089276654049045e-06, - "loss": 0.7503, + "learning_rate": 4.937979414950667e-09, + "loss": 0.9198, "step": 25310 }, { - "epoch": 0.7172490011051602, + "epoch": 0.9903357070193286, "grad_norm": 0.0, - "learning_rate": 3.9081998035214576e-06, - "loss": 0.9069, + "learning_rate": 4.898240532201781e-09, + "loss": 0.8762, "step": 25311 }, { - "epoch": 0.7172773385474227, + "epoch": 0.990374833711558, "grad_norm": 0.0, - "learning_rate": 3.907471992951505e-06, - "loss": 0.7502, + "learning_rate": 4.858662158261363e-09, + "loss": 0.8662, "step": 25312 }, { - "epoch": 0.7173056759896852, + "epoch": 0.9904139604037875, "grad_norm": 0.0, - "learning_rate": 3.9067442337011816e-06, - "loss": 0.8666, + "learning_rate": 4.819244293766678e-09, + "loss": 0.9675, "step": 25313 }, { - "epoch": 0.7173340134319476, + "epoch": 0.9904530870960169, "grad_norm": 0.0, - "learning_rate": 3.9060165257766116e-06, - "loss": 0.8911, + "learning_rate": 4.779986939350556e-09, + "loss": 0.9151, "step": 25314 }, { - "epoch": 0.7173623508742101, + "epoch": 0.9904922137882464, "grad_norm": 0.0, - "learning_rate": 3.9052888691839305e-06, - "loss": 0.6581, + "learning_rate": 4.740890095642492e-09, + "loss": 0.9321, "step": 25315 }, { - "epoch": 0.7173906883164726, + "epoch": 0.9905313404804758, "grad_norm": 0.0, - "learning_rate": 3.904561263929261e-06, - "loss": 0.8192, + "learning_rate": 4.701953763271983e-09, + "loss": 0.905, "step": 25316 }, { - "epoch": 0.7174190257587351, + "epoch": 0.9905704671727052, "grad_norm": 0.0, - "learning_rate": 3.903833710018735e-06, - "loss": 0.8827, + "learning_rate": 4.663177942862973e-09, + "loss": 0.9695, "step": 25317 }, { - "epoch": 0.7174473632009974, + "epoch": 0.9906095938649346, "grad_norm": 0.0, - "learning_rate": 3.903106207458482e-06, - "loss": 0.8973, + "learning_rate": 4.624562635038299e-09, + "loss": 0.9002, "step": 25318 }, { - "epoch": 0.7174757006432599, + "epoch": 0.9906487205571641, "grad_norm": 0.0, - "learning_rate": 3.902378756254629e-06, - "loss": 0.8601, + "learning_rate": 4.586107840417464e-09, + "loss": 0.847, "step": 25319 }, { - "epoch": 0.7175040380855224, + "epoch": 0.9906878472493935, "grad_norm": 0.0, - "learning_rate": 3.901651356413304e-06, - "loss": 0.7753, + "learning_rate": 4.547813559619974e-09, + "loss": 0.9629, "step": 25320 }, { - "epoch": 0.7175323755277848, + "epoch": 0.990726973941623, "grad_norm": 0.0, - "learning_rate": 3.900924007940638e-06, - "loss": 0.8154, + "learning_rate": 4.50967979325867e-09, + "loss": 0.9675, "step": 25321 }, { - "epoch": 0.7175607129700473, + "epoch": 0.9907661006338524, "grad_norm": 0.0, - "learning_rate": 3.900196710842751e-06, - "loss": 0.7994, + "learning_rate": 4.471706541947507e-09, + "loss": 0.9732, "step": 25322 }, { - "epoch": 0.7175890504123098, + "epoch": 0.9908052273260819, "grad_norm": 0.0, - "learning_rate": 3.899469465125774e-06, - "loss": 0.8758, + "learning_rate": 4.433893806294887e-09, + "loss": 1.0008, "step": 25323 }, { - "epoch": 0.7176173878545723, + "epoch": 0.9908443540183113, "grad_norm": 0.0, - "learning_rate": 3.898742270795829e-06, - "loss": 0.8552, + "learning_rate": 4.396241586909211e-09, + "loss": 0.9811, "step": 25324 }, { - "epoch": 0.7176457252968347, + "epoch": 0.9908834807105408, "grad_norm": 0.0, - "learning_rate": 3.898015127859043e-06, - "loss": 0.7925, + "learning_rate": 4.358749884395552e-09, + "loss": 0.816, "step": 25325 }, { - "epoch": 0.7176740627390972, + "epoch": 0.9909226074027702, "grad_norm": 0.0, - "learning_rate": 3.897288036321545e-06, - "loss": 0.8209, + "learning_rate": 4.321418699354541e-09, + "loss": 0.8692, "step": 25326 }, { - "epoch": 0.7177024001813597, + "epoch": 0.9909617340949997, "grad_norm": 0.0, - "learning_rate": 3.896560996189454e-06, - "loss": 0.9202, + "learning_rate": 4.284248032385696e-09, + "loss": 1.025, "step": 25327 }, { - "epoch": 0.717730737623622, + "epoch": 0.991000860787229, "grad_norm": 0.0, - "learning_rate": 3.895834007468894e-06, - "loss": 0.8266, + "learning_rate": 4.247237884086319e-09, + "loss": 0.9405, "step": 25328 }, { - "epoch": 0.7177590750658845, + "epoch": 0.9910399874794585, "grad_norm": 0.0, - "learning_rate": 3.895107070165995e-06, - "loss": 0.8151, + "learning_rate": 4.210388255051489e-09, + "loss": 1.0406, "step": 25329 }, { - "epoch": 0.717787412508147, + "epoch": 0.9910791141716879, "grad_norm": 0.0, - "learning_rate": 3.894380184286874e-06, - "loss": 0.8499, + "learning_rate": 4.173699145872956e-09, + "loss": 0.9977, "step": 25330 }, { - "epoch": 0.7178157499504094, + "epoch": 0.9911182408639173, "grad_norm": 0.0, - "learning_rate": 3.8936533498376535e-06, - "loss": 0.7503, + "learning_rate": 4.137170557139136e-09, + "loss": 0.8588, "step": 25331 }, { - "epoch": 0.7178440873926719, + "epoch": 0.9911573675561468, "grad_norm": 0.0, - "learning_rate": 3.89292656682446e-06, - "loss": 0.7229, + "learning_rate": 4.100802489436228e-09, + "loss": 1.0609, "step": 25332 }, { - "epoch": 0.7178724248349344, + "epoch": 0.9911964942483762, "grad_norm": 0.0, - "learning_rate": 3.892199835253413e-06, - "loss": 0.8606, + "learning_rate": 4.06459494334932e-09, + "loss": 0.8818, "step": 25333 }, { - "epoch": 0.7179007622771969, + "epoch": 0.9912356209406057, "grad_norm": 0.0, - "learning_rate": 3.89147315513064e-06, - "loss": 0.7603, + "learning_rate": 4.028547919459058e-09, + "loss": 1.04, "step": 25334 }, { - "epoch": 0.7179290997194593, + "epoch": 0.9912747476328351, "grad_norm": 0.0, - "learning_rate": 3.890746526462252e-06, - "loss": 0.7909, + "learning_rate": 3.9926614183449785e-09, + "loss": 0.9131, "step": 25335 }, { - "epoch": 0.7179574371617218, + "epoch": 0.9913138743250646, "grad_norm": 0.0, - "learning_rate": 3.890019949254378e-06, - "loss": 0.9529, + "learning_rate": 3.956935440582177e-09, + "loss": 0.835, "step": 25336 }, { - "epoch": 0.7179857746039843, + "epoch": 0.991353001017294, "grad_norm": 0.0, - "learning_rate": 3.889293423513132e-06, - "loss": 0.8305, + "learning_rate": 3.921369986746859e-09, + "loss": 0.9862, "step": 25337 }, { - "epoch": 0.7180141120462467, + "epoch": 0.9913921277095235, "grad_norm": 0.0, - "learning_rate": 3.8885669492446364e-06, - "loss": 0.7921, + "learning_rate": 3.885965057407459e-09, + "loss": 0.8332, "step": 25338 }, { - "epoch": 0.7180424494885091, + "epoch": 0.9914312544017528, "grad_norm": 0.0, - "learning_rate": 3.887840526455014e-06, - "loss": 0.8626, + "learning_rate": 3.850720653133522e-09, + "loss": 1.0427, "step": 25339 }, { - "epoch": 0.7180707869307716, + "epoch": 0.9914703810939823, "grad_norm": 0.0, - "learning_rate": 3.887114155150377e-06, - "loss": 0.8374, + "learning_rate": 3.8156367744912604e-09, + "loss": 0.9232, "step": 25340 }, { - "epoch": 0.7180991243730341, + "epoch": 0.9915095077862117, "grad_norm": 0.0, - "learning_rate": 3.886387835336849e-06, - "loss": 0.788, + "learning_rate": 3.780713422044668e-09, + "loss": 1.0403, "step": 25341 }, { - "epoch": 0.7181274618152965, + "epoch": 0.9915486344784412, "grad_norm": 0.0, - "learning_rate": 3.885661567020549e-06, - "loss": 0.8862, + "learning_rate": 3.745950596353298e-09, + "loss": 1.018, "step": 25342 }, { - "epoch": 0.718155799257559, + "epoch": 0.9915877611706706, "grad_norm": 0.0, - "learning_rate": 3.884935350207588e-06, - "loss": 0.8098, + "learning_rate": 3.711348297975592e-09, + "loss": 0.982, "step": 25343 }, { - "epoch": 0.7181841366998215, + "epoch": 0.9916268878629001, "grad_norm": 0.0, - "learning_rate": 3.884209184904088e-06, - "loss": 0.7806, + "learning_rate": 3.6769065274677718e-09, + "loss": 1.0253, "step": 25344 }, { - "epoch": 0.7182124741420839, + "epoch": 0.9916660145551295, "grad_norm": 0.0, - "learning_rate": 3.883483071116165e-06, - "loss": 0.8551, + "learning_rate": 3.6426252853838385e-09, + "loss": 0.8203, "step": 25345 }, { - "epoch": 0.7182408115843464, + "epoch": 0.991705141247359, "grad_norm": 0.0, - "learning_rate": 3.882757008849936e-06, - "loss": 0.7328, + "learning_rate": 3.6085045722722424e-09, + "loss": 0.9079, "step": 25346 }, { - "epoch": 0.7182691490266089, + "epoch": 0.9917442679395884, "grad_norm": 0.0, - "learning_rate": 3.882030998111518e-06, - "loss": 0.8282, + "learning_rate": 3.5745443886814334e-09, + "loss": 0.985, "step": 25347 }, { - "epoch": 0.7182974864688714, + "epoch": 0.9917833946318179, "grad_norm": 0.0, - "learning_rate": 3.881305038907023e-06, - "loss": 0.8528, + "learning_rate": 3.540744735157642e-09, + "loss": 0.8976, "step": 25348 }, { - "epoch": 0.7183258239111338, + "epoch": 0.9918225213240472, "grad_norm": 0.0, - "learning_rate": 3.880579131242567e-06, - "loss": 0.8967, + "learning_rate": 3.5071056122437663e-09, + "loss": 0.8921, "step": 25349 }, { - "epoch": 0.7183541613533962, + "epoch": 0.9918616480162767, "grad_norm": 0.0, - "learning_rate": 3.879853275124269e-06, - "loss": 0.8871, + "learning_rate": 3.4736270204793752e-09, + "loss": 0.9816, "step": 25350 }, { - "epoch": 0.7183824987956587, + "epoch": 0.9919007747085061, "grad_norm": 0.0, - "learning_rate": 3.879127470558236e-06, - "loss": 0.8216, + "learning_rate": 3.4403089604007066e-09, + "loss": 0.961, "step": 25351 }, { - "epoch": 0.7184108362379211, + "epoch": 0.9919399014007356, "grad_norm": 0.0, - "learning_rate": 3.8784017175505886e-06, - "loss": 0.7688, + "learning_rate": 3.4071514325462184e-09, + "loss": 0.8623, "step": 25352 }, { - "epoch": 0.7184391736801836, + "epoch": 0.991979028092965, "grad_norm": 0.0, - "learning_rate": 3.877676016107433e-06, - "loss": 0.8034, + "learning_rate": 3.374154437445487e-09, + "loss": 0.9294, "step": 25353 }, { - "epoch": 0.7184675111224461, + "epoch": 0.9920181547851945, "grad_norm": 0.0, - "learning_rate": 3.8769503662348865e-06, - "loss": 0.7658, + "learning_rate": 3.341317975629199e-09, + "loss": 0.9157, "step": 25354 }, { - "epoch": 0.7184958485647085, + "epoch": 0.9920572814774239, "grad_norm": 0.0, - "learning_rate": 3.876224767939064e-06, - "loss": 0.7475, + "learning_rate": 3.308642047625821e-09, + "loss": 1.0764, "step": 25355 }, { - "epoch": 0.718524186006971, + "epoch": 0.9920964081696534, "grad_norm": 0.0, - "learning_rate": 3.8754992212260714e-06, - "loss": 0.7917, + "learning_rate": 3.276126653958267e-09, + "loss": 0.8987, "step": 25356 }, { - "epoch": 0.7185525234492335, + "epoch": 0.9921355348618828, "grad_norm": 0.0, - "learning_rate": 3.874773726102022e-06, - "loss": 0.78, + "learning_rate": 3.243771795149453e-09, + "loss": 0.9694, "step": 25357 }, { - "epoch": 0.718580860891496, + "epoch": 0.9921746615541123, "grad_norm": 0.0, - "learning_rate": 3.874048282573029e-06, - "loss": 0.794, + "learning_rate": 3.211577471718963e-09, + "loss": 1.0568, "step": 25358 }, { - "epoch": 0.7186091983337584, + "epoch": 0.9922137882463417, "grad_norm": 0.0, - "learning_rate": 3.873322890645202e-06, - "loss": 0.838, + "learning_rate": 3.179543684184161e-09, + "loss": 0.9232, "step": 25359 }, { - "epoch": 0.7186375357760209, + "epoch": 0.992252914938571, "grad_norm": 0.0, - "learning_rate": 3.872597550324654e-06, - "loss": 0.735, + "learning_rate": 3.14767043305908e-09, + "loss": 0.9827, "step": 25360 }, { - "epoch": 0.7186658732182833, + "epoch": 0.9922920416308005, "grad_norm": 0.0, - "learning_rate": 3.871872261617489e-06, - "loss": 0.7533, + "learning_rate": 3.1159577188555334e-09, + "loss": 0.94, "step": 25361 }, { - "epoch": 0.7186942106605457, + "epoch": 0.9923311683230299, "grad_norm": 0.0, - "learning_rate": 3.8711470245298195e-06, - "loss": 0.8858, + "learning_rate": 3.084405542083113e-09, + "loss": 0.9204, "step": 25362 }, { - "epoch": 0.7187225481028082, + "epoch": 0.9923702950152594, "grad_norm": 0.0, - "learning_rate": 3.870421839067759e-06, - "loss": 0.8969, + "learning_rate": 3.053013903249191e-09, + "loss": 1.0146, "step": 25363 }, { - "epoch": 0.7187508855450707, + "epoch": 0.9924094217074888, "grad_norm": 0.0, - "learning_rate": 3.869696705237407e-06, - "loss": 0.7277, + "learning_rate": 3.0217828028555883e-09, + "loss": 0.8834, "step": 25364 }, { - "epoch": 0.7187792229873332, + "epoch": 0.9924485483997183, "grad_norm": 0.0, - "learning_rate": 3.868971623044881e-06, - "loss": 0.763, + "learning_rate": 2.990712241405236e-09, + "loss": 0.825, "step": 25365 }, { - "epoch": 0.7188075604295956, + "epoch": 0.9924876750919477, "grad_norm": 0.0, - "learning_rate": 3.8682465924962776e-06, - "loss": 0.7481, + "learning_rate": 2.959802219397734e-09, + "loss": 0.8803, "step": 25366 }, { - "epoch": 0.7188358978718581, + "epoch": 0.9925268017841772, "grad_norm": 0.0, - "learning_rate": 3.867521613597712e-06, - "loss": 0.7876, + "learning_rate": 2.929052737329352e-09, + "loss": 0.806, "step": 25367 }, { - "epoch": 0.7188642353141206, + "epoch": 0.9925659284764066, "grad_norm": 0.0, - "learning_rate": 3.866796686355292e-06, - "loss": 0.9061, + "learning_rate": 2.8984637956930297e-09, + "loss": 1.0035, "step": 25368 }, { - "epoch": 0.718892572756383, + "epoch": 0.9926050551686361, "grad_norm": 0.0, - "learning_rate": 3.866071810775118e-06, - "loss": 0.8193, + "learning_rate": 2.868035394979485e-09, + "loss": 0.9926, "step": 25369 }, { - "epoch": 0.7189209101986455, + "epoch": 0.9926441818608654, "grad_norm": 0.0, - "learning_rate": 3.8653469868632986e-06, - "loss": 0.812, + "learning_rate": 2.8377675356783262e-09, + "loss": 0.9287, "step": 25370 }, { - "epoch": 0.718949247640908, + "epoch": 0.992683308553095, "grad_norm": 0.0, - "learning_rate": 3.86462221462594e-06, - "loss": 0.8245, + "learning_rate": 2.8076602182758317e-09, + "loss": 0.9663, "step": 25371 }, { - "epoch": 0.7189775850831704, + "epoch": 0.9927224352453243, "grad_norm": 0.0, - "learning_rate": 3.863897494069147e-06, - "loss": 0.8608, + "learning_rate": 2.7777134432549478e-09, + "loss": 1.021, "step": 25372 }, { - "epoch": 0.7190059225254328, + "epoch": 0.9927615619375538, "grad_norm": 0.0, - "learning_rate": 3.863172825199026e-06, - "loss": 0.8206, + "learning_rate": 2.7479272110975117e-09, + "loss": 0.897, "step": 25373 }, { - "epoch": 0.7190342599676953, + "epoch": 0.9928006886297832, "grad_norm": 0.0, - "learning_rate": 3.862448208021677e-06, - "loss": 0.9344, + "learning_rate": 2.7183015222809197e-09, + "loss": 0.9592, "step": 25374 }, { - "epoch": 0.7190625974099578, + "epoch": 0.9928398153220127, "grad_norm": 0.0, - "learning_rate": 3.861723642543206e-06, - "loss": 0.8201, + "learning_rate": 2.688836377280346e-09, + "loss": 0.8389, "step": 25375 }, { - "epoch": 0.7190909348522202, + "epoch": 0.9928789420142421, "grad_norm": 0.0, - "learning_rate": 3.860999128769719e-06, - "loss": 0.9171, + "learning_rate": 2.659531776569857e-09, + "loss": 1.0742, "step": 25376 }, { - "epoch": 0.7191192722944827, + "epoch": 0.9929180687064716, "grad_norm": 0.0, - "learning_rate": 3.860274666707312e-06, - "loss": 0.8531, + "learning_rate": 2.6303877206201868e-09, + "loss": 1.0544, "step": 25377 }, { - "epoch": 0.7191476097367452, + "epoch": 0.992957195398701, "grad_norm": 0.0, - "learning_rate": 3.8595502563620924e-06, - "loss": 0.8717, + "learning_rate": 2.601404209898739e-09, + "loss": 0.9054, "step": 25378 }, { - "epoch": 0.7191759471790076, + "epoch": 0.9929963220909305, "grad_norm": 0.0, - "learning_rate": 3.858825897740164e-06, - "loss": 0.7526, + "learning_rate": 2.572581244871808e-09, + "loss": 0.7827, "step": 25379 }, { - "epoch": 0.7192042846212701, + "epoch": 0.9930354487831599, "grad_norm": 0.0, - "learning_rate": 3.858101590847623e-06, - "loss": 0.7155, + "learning_rate": 2.543918826002356e-09, + "loss": 0.8216, "step": 25380 }, { - "epoch": 0.7192326220635326, + "epoch": 0.9930745754753894, "grad_norm": 0.0, - "learning_rate": 3.857377335690575e-06, - "loss": 0.7943, + "learning_rate": 2.515416953748906e-09, + "loss": 0.9793, "step": 25381 }, { - "epoch": 0.719260959505795, + "epoch": 0.9931137021676187, "grad_norm": 0.0, - "learning_rate": 3.856653132275117e-06, - "loss": 0.825, + "learning_rate": 2.4870756285699795e-09, + "loss": 1.0358, "step": 25382 }, { - "epoch": 0.7192892969480574, + "epoch": 0.9931528288598482, "grad_norm": 0.0, - "learning_rate": 3.85592898060735e-06, - "loss": 0.8064, + "learning_rate": 2.4588948509218778e-09, + "loss": 0.9019, "step": 25383 }, { - "epoch": 0.7193176343903199, + "epoch": 0.9931919555520776, "grad_norm": 0.0, - "learning_rate": 3.855204880693374e-06, - "loss": 0.8164, + "learning_rate": 2.4308746212564627e-09, + "loss": 0.929, "step": 25384 }, { - "epoch": 0.7193459718325824, + "epoch": 0.9932310822443071, "grad_norm": 0.0, - "learning_rate": 3.85448083253929e-06, - "loss": 0.6662, + "learning_rate": 2.4030149400233737e-09, + "loss": 0.9156, "step": 25385 }, { - "epoch": 0.7193743092748448, + "epoch": 0.9932702089365365, "grad_norm": 0.0, - "learning_rate": 3.853756836151197e-06, - "loss": 0.7895, + "learning_rate": 2.375315807670031e-09, + "loss": 0.91, "step": 25386 }, { - "epoch": 0.7194026467171073, + "epoch": 0.993309335628766, "grad_norm": 0.0, - "learning_rate": 3.8530328915351946e-06, - "loss": 0.7607, + "learning_rate": 2.3477772246416342e-09, + "loss": 1.0054, "step": 25387 }, { - "epoch": 0.7194309841593698, + "epoch": 0.9933484623209954, "grad_norm": 0.0, - "learning_rate": 3.852308998697375e-06, - "loss": 0.7904, + "learning_rate": 2.3203991913800515e-09, + "loss": 1.0016, "step": 25388 }, { - "epoch": 0.7194593216016323, + "epoch": 0.9933875890132248, "grad_norm": 0.0, - "learning_rate": 3.851585157643845e-06, - "loss": 0.8665, + "learning_rate": 2.293181708324932e-09, + "loss": 0.911, "step": 25389 }, { - "epoch": 0.7194876590438947, + "epoch": 0.9934267157054543, "grad_norm": 0.0, - "learning_rate": 3.850861368380691e-06, - "loss": 0.7955, + "learning_rate": 2.2661247759125927e-09, + "loss": 1.0106, "step": 25390 }, { - "epoch": 0.7195159964861572, + "epoch": 0.9934658423976837, "grad_norm": 0.0, - "learning_rate": 3.850137630914015e-06, - "loss": 0.9097, + "learning_rate": 2.2392283945793513e-09, + "loss": 0.9062, "step": 25391 }, { - "epoch": 0.7195443339284197, + "epoch": 0.9935049690899131, "grad_norm": 0.0, - "learning_rate": 3.849413945249918e-06, - "loss": 0.7656, + "learning_rate": 2.212492564757085e-09, + "loss": 0.9956, "step": 25392 }, { - "epoch": 0.719572671370682, + "epoch": 0.9935440957821425, "grad_norm": 0.0, - "learning_rate": 3.848690311394487e-06, - "loss": 0.7932, + "learning_rate": 2.185917286873229e-09, + "loss": 0.9537, "step": 25393 }, { - "epoch": 0.7196010088129445, + "epoch": 0.993583222474372, "grad_norm": 0.0, - "learning_rate": 3.847966729353826e-06, - "loss": 0.7746, + "learning_rate": 2.15950256135522e-09, + "loss": 1.0121, "step": 25394 }, { - "epoch": 0.719629346255207, + "epoch": 0.9936223491666014, "grad_norm": 0.0, - "learning_rate": 3.847243199134022e-06, - "loss": 0.7977, + "learning_rate": 2.133248388628273e-09, + "loss": 0.918, "step": 25395 }, { - "epoch": 0.7196576836974695, + "epoch": 0.9936614758588309, "grad_norm": 0.0, - "learning_rate": 3.846519720741173e-06, - "loss": 0.7488, + "learning_rate": 2.1071547691131624e-09, + "loss": 0.9384, "step": 25396 }, { - "epoch": 0.7196860211397319, + "epoch": 0.9937006025510603, "grad_norm": 0.0, - "learning_rate": 3.845796294181374e-06, - "loss": 0.8185, + "learning_rate": 2.0812217032295523e-09, + "loss": 0.9226, "step": 25397 }, { - "epoch": 0.7197143585819944, + "epoch": 0.9937397292432898, "grad_norm": 0.0, - "learning_rate": 3.845072919460717e-06, - "loss": 0.7934, + "learning_rate": 2.055449191392667e-09, + "loss": 1.0703, "step": 25398 }, { - "epoch": 0.7197426960242569, + "epoch": 0.9937788559355192, "grad_norm": 0.0, - "learning_rate": 3.844349596585299e-06, - "loss": 0.9194, + "learning_rate": 2.0298372340177287e-09, + "loss": 1.019, "step": 25399 }, { - "epoch": 0.7197710334665193, + "epoch": 0.9938179826277487, "grad_norm": 0.0, - "learning_rate": 3.843626325561211e-06, - "loss": 0.6618, + "learning_rate": 2.0043858315144103e-09, + "loss": 0.8479, "step": 25400 }, { - "epoch": 0.7197993709087818, + "epoch": 0.9938571093199781, "grad_norm": 0.0, - "learning_rate": 3.842903106394542e-06, - "loss": 0.8481, + "learning_rate": 1.979094984293495e-09, + "loss": 0.906, "step": 25401 }, { - "epoch": 0.7198277083510443, + "epoch": 0.9938962360122076, "grad_norm": 0.0, - "learning_rate": 3.842179939091389e-06, - "loss": 0.7711, + "learning_rate": 1.953964692760213e-09, + "loss": 0.9326, "step": 25402 }, { - "epoch": 0.7198560457933066, + "epoch": 0.9939353627044369, "grad_norm": 0.0, - "learning_rate": 3.841456823657839e-06, - "loss": 0.7678, + "learning_rate": 1.928994957317576e-09, + "loss": 0.921, "step": 25403 }, { - "epoch": 0.7198843832355691, + "epoch": 0.9939744893966664, "grad_norm": 0.0, - "learning_rate": 3.840733760099985e-06, - "loss": 0.9597, + "learning_rate": 1.904185778366374e-09, + "loss": 0.8656, "step": 25404 }, { - "epoch": 0.7199127206778316, + "epoch": 0.9940136160888958, "grad_norm": 0.0, - "learning_rate": 3.84001074842392e-06, - "loss": 0.7599, + "learning_rate": 1.879537156306288e-09, + "loss": 1.0543, "step": 25405 }, { - "epoch": 0.7199410581200941, + "epoch": 0.9940527427811253, "grad_norm": 0.0, - "learning_rate": 3.8392877886357296e-06, - "loss": 0.8021, + "learning_rate": 1.855049091532557e-09, + "loss": 0.8835, "step": 25406 }, { - "epoch": 0.7199693955623565, + "epoch": 0.9940918694733547, "grad_norm": 0.0, - "learning_rate": 3.838564880741506e-06, - "loss": 0.8451, + "learning_rate": 1.8307215844381998e-09, + "loss": 1.0612, "step": 25407 }, { - "epoch": 0.719997733004619, + "epoch": 0.9941309961655842, "grad_norm": 0.0, - "learning_rate": 3.837842024747341e-06, - "loss": 0.9204, + "learning_rate": 1.806554635414015e-09, + "loss": 1.0072, "step": 25408 }, { - "epoch": 0.7200260704468815, + "epoch": 0.9941701228578136, "grad_norm": 0.0, - "learning_rate": 3.837119220659318e-06, - "loss": 0.8494, + "learning_rate": 1.7825482448485808e-09, + "loss": 1.0204, "step": 25409 }, { - "epoch": 0.7200544078891439, + "epoch": 0.9942092495500431, "grad_norm": 0.0, - "learning_rate": 3.836396468483528e-06, - "loss": 0.8271, + "learning_rate": 1.7587024131260344e-09, + "loss": 0.8279, "step": 25410 }, { - "epoch": 0.7200827453314064, + "epoch": 0.9942483762422725, "grad_norm": 0.0, - "learning_rate": 3.835673768226059e-06, - "loss": 0.752, + "learning_rate": 1.7350171406316229e-09, + "loss": 0.9156, "step": 25411 }, { - "epoch": 0.7201110827736689, + "epoch": 0.994287502934502, "grad_norm": 0.0, - "learning_rate": 3.834951119893e-06, - "loss": 0.9171, + "learning_rate": 1.7114924277428223e-09, + "loss": 0.941, "step": 25412 }, { - "epoch": 0.7201394202159314, + "epoch": 0.9943266296267314, "grad_norm": 0.0, - "learning_rate": 3.83422852349044e-06, - "loss": 0.8375, + "learning_rate": 1.6881282748404392e-09, + "loss": 1.0009, "step": 25413 }, { - "epoch": 0.7201677576581937, + "epoch": 0.9943657563189608, "grad_norm": 0.0, - "learning_rate": 3.8335059790244585e-06, - "loss": 0.8471, + "learning_rate": 1.6649246822963982e-09, + "loss": 0.7682, "step": 25414 }, { - "epoch": 0.7201960951004562, + "epoch": 0.9944048830111902, "grad_norm": 0.0, - "learning_rate": 3.8327834865011475e-06, - "loss": 0.8956, + "learning_rate": 1.6418816504859548e-09, + "loss": 0.9282, "step": 25415 }, { - "epoch": 0.7202244325427187, + "epoch": 0.9944440097034196, "grad_norm": 0.0, - "learning_rate": 3.832061045926594e-06, - "loss": 0.8236, + "learning_rate": 1.618999179778813e-09, + "loss": 0.967, "step": 25416 }, { - "epoch": 0.7202527699849811, + "epoch": 0.9944831363956491, "grad_norm": 0.0, - "learning_rate": 3.831338657306877e-06, - "loss": 0.8432, + "learning_rate": 1.5962772705413465e-09, + "loss": 1.0701, "step": 25417 }, { - "epoch": 0.7202811074272436, + "epoch": 0.9945222630878785, "grad_norm": 0.0, - "learning_rate": 3.830616320648089e-06, - "loss": 0.9287, + "learning_rate": 1.5737159231388188e-09, + "loss": 1.0514, "step": 25418 }, { - "epoch": 0.7203094448695061, + "epoch": 0.994561389780108, "grad_norm": 0.0, - "learning_rate": 3.829894035956306e-06, - "loss": 0.8253, + "learning_rate": 1.5513151379331625e-09, + "loss": 0.9099, "step": 25419 }, { - "epoch": 0.7203377823117686, + "epoch": 0.9946005164723374, "grad_norm": 0.0, - "learning_rate": 3.829171803237618e-06, - "loss": 0.851, + "learning_rate": 1.5290749152852003e-09, + "loss": 0.9549, "step": 25420 }, { - "epoch": 0.720366119754031, + "epoch": 0.9946396431645669, "grad_norm": 0.0, - "learning_rate": 3.8284496224981116e-06, - "loss": 0.839, + "learning_rate": 1.5069952555513135e-09, + "loss": 0.8736, "step": 25421 }, { - "epoch": 0.7203944571962935, + "epoch": 0.9946787698567963, "grad_norm": 0.0, - "learning_rate": 3.827727493743861e-06, - "loss": 0.8826, + "learning_rate": 1.4850761590867734e-09, + "loss": 1.0094, "step": 25422 }, { - "epoch": 0.720422794638556, + "epoch": 0.9947178965490258, "grad_norm": 0.0, - "learning_rate": 3.827005416980955e-06, - "loss": 0.8314, + "learning_rate": 1.463317626242411e-09, + "loss": 0.8381, "step": 25423 }, { - "epoch": 0.7204511320808183, + "epoch": 0.9947570232412551, "grad_norm": 0.0, - "learning_rate": 3.826283392215473e-06, - "loss": 0.8381, + "learning_rate": 1.4417196573690561e-09, + "loss": 0.8656, "step": 25424 }, { - "epoch": 0.7204794695230808, + "epoch": 0.9947961499334846, "grad_norm": 0.0, - "learning_rate": 3.8255614194535e-06, - "loss": 0.8942, + "learning_rate": 1.4202822528119886e-09, + "loss": 0.9139, "step": 25425 }, { - "epoch": 0.7205078069653433, + "epoch": 0.994835276625714, "grad_norm": 0.0, - "learning_rate": 3.824839498701118e-06, - "loss": 0.804, + "learning_rate": 1.3990054129175979e-09, + "loss": 0.8534, "step": 25426 }, { - "epoch": 0.7205361444076057, + "epoch": 0.9948744033179435, "grad_norm": 0.0, - "learning_rate": 3.824117629964404e-06, - "loss": 0.784, + "learning_rate": 1.3778891380245018e-09, + "loss": 0.9438, "step": 25427 }, { - "epoch": 0.7205644818498682, + "epoch": 0.9949135300101729, "grad_norm": 0.0, - "learning_rate": 3.823395813249439e-06, - "loss": 0.7995, + "learning_rate": 1.3569334284746493e-09, + "loss": 0.9642, "step": 25428 }, { - "epoch": 0.7205928192921307, + "epoch": 0.9949526567024024, "grad_norm": 0.0, - "learning_rate": 3.822674048562309e-06, - "loss": 0.9778, + "learning_rate": 1.3361382846033278e-09, + "loss": 0.9786, "step": 25429 }, { - "epoch": 0.7206211567343932, + "epoch": 0.9949917833946318, "grad_norm": 0.0, - "learning_rate": 3.821952335909086e-06, - "loss": 0.8684, + "learning_rate": 1.315503706744714e-09, + "loss": 0.9281, "step": 25430 }, { - "epoch": 0.7206494941766556, + "epoch": 0.9950309100868613, "grad_norm": 0.0, - "learning_rate": 3.821230675295856e-06, - "loss": 0.7611, + "learning_rate": 1.295029695229655e-09, + "loss": 1.1008, "step": 25431 }, { - "epoch": 0.7206778316189181, + "epoch": 0.9950700367790907, "grad_norm": 0.0, - "learning_rate": 3.820509066728691e-06, - "loss": 0.8024, + "learning_rate": 1.274716250386776e-09, + "loss": 0.9137, "step": 25432 }, { - "epoch": 0.7207061690611806, + "epoch": 0.9951091634713202, "grad_norm": 0.0, - "learning_rate": 3.819787510213673e-06, - "loss": 0.8854, + "learning_rate": 1.2545633725435936e-09, + "loss": 0.8369, "step": 25433 }, { - "epoch": 0.720734506503443, + "epoch": 0.9951482901635496, "grad_norm": 0.0, - "learning_rate": 3.819066005756883e-06, - "loss": 0.9162, + "learning_rate": 1.234571062023182e-09, + "loss": 0.9237, "step": 25434 }, { - "epoch": 0.7207628439457054, + "epoch": 0.995187416855779, "grad_norm": 0.0, - "learning_rate": 3.818344553364392e-06, - "loss": 0.7787, + "learning_rate": 1.2147393191463963e-09, + "loss": 1.0163, "step": 25435 }, { - "epoch": 0.7207911813879679, + "epoch": 0.9952265435480084, "grad_norm": 0.0, - "learning_rate": 3.817623153042281e-06, - "loss": 0.8629, + "learning_rate": 1.1950681442307599e-09, + "loss": 0.9152, "step": 25436 }, { - "epoch": 0.7208195188302304, + "epoch": 0.9952656702402379, "grad_norm": 0.0, - "learning_rate": 3.816901804796624e-06, - "loss": 0.8081, + "learning_rate": 1.1755575375937966e-09, + "loss": 0.9299, "step": 25437 }, { - "epoch": 0.7208478562724928, + "epoch": 0.9953047969324673, "grad_norm": 0.0, - "learning_rate": 3.8161805086335e-06, - "loss": 0.7637, + "learning_rate": 1.1562074995485895e-09, + "loss": 0.8853, "step": 25438 }, { - "epoch": 0.7208761937147553, + "epoch": 0.9953439236246968, "grad_norm": 0.0, - "learning_rate": 3.815459264558988e-06, - "loss": 0.8999, + "learning_rate": 1.1370180304048906e-09, + "loss": 0.9113, "step": 25439 }, { - "epoch": 0.7209045311570178, + "epoch": 0.9953830503169262, "grad_norm": 0.0, - "learning_rate": 3.814738072579156e-06, - "loss": 0.8361, + "learning_rate": 1.117989130471342e-09, + "loss": 0.8663, "step": 25440 }, { - "epoch": 0.7209328685992802, + "epoch": 0.9954221770091557, "grad_norm": 0.0, - "learning_rate": 3.814016932700081e-06, - "loss": 0.7152, + "learning_rate": 1.099120800054365e-09, + "loss": 0.9245, "step": 25441 }, { - "epoch": 0.7209612060415427, + "epoch": 0.9954613037013851, "grad_norm": 0.0, - "learning_rate": 3.813295844927842e-06, - "loss": 0.8287, + "learning_rate": 1.0804130394548306e-09, + "loss": 1.0305, "step": 25442 }, { - "epoch": 0.7209895434838052, + "epoch": 0.9955004303936146, "grad_norm": 0.0, - "learning_rate": 3.8125748092685066e-06, - "loss": 0.8747, + "learning_rate": 1.0618658489758293e-09, + "loss": 0.982, "step": 25443 }, { - "epoch": 0.7210178809260677, + "epoch": 0.995539557085844, "grad_norm": 0.0, - "learning_rate": 3.811853825728151e-06, - "loss": 0.7555, + "learning_rate": 1.0434792289137908e-09, + "loss": 0.8974, "step": 25444 }, { - "epoch": 0.72104621836833, + "epoch": 0.9955786837780733, "grad_norm": 0.0, - "learning_rate": 3.8111328943128524e-06, - "loss": 0.9672, + "learning_rate": 1.0252531795629238e-09, + "loss": 1.0247, "step": 25445 }, { - "epoch": 0.7210745558105925, + "epoch": 0.9956178104703028, "grad_norm": 0.0, - "learning_rate": 3.810412015028676e-06, - "loss": 0.833, + "learning_rate": 1.0071877012185483e-09, + "loss": 0.9789, "step": 25446 }, { - "epoch": 0.721102893252855, + "epoch": 0.9956569371625322, "grad_norm": 0.0, - "learning_rate": 3.809691187881701e-06, - "loss": 0.8504, + "learning_rate": 9.892827941682114e-10, + "loss": 0.801, "step": 25447 }, { - "epoch": 0.7211312306951174, + "epoch": 0.9956960638547617, "grad_norm": 0.0, - "learning_rate": 3.8089704128779913e-06, - "loss": 0.8659, + "learning_rate": 9.715384587005715e-10, + "loss": 0.9821, "step": 25448 }, { - "epoch": 0.7211595681373799, + "epoch": 0.9957351905469911, "grad_norm": 0.0, - "learning_rate": 3.8082496900236244e-06, - "loss": 0.7809, + "learning_rate": 9.539546951009559e-10, + "loss": 0.8754, "step": 25449 }, { - "epoch": 0.7211879055796424, + "epoch": 0.9957743172392206, "grad_norm": 0.0, - "learning_rate": 3.8075290193246685e-06, - "loss": 0.7847, + "learning_rate": 9.36531503651361e-10, + "loss": 1.0411, "step": 25450 }, { - "epoch": 0.7212162430219048, + "epoch": 0.99581344393145, "grad_norm": 0.0, - "learning_rate": 3.8068084007871965e-06, - "loss": 0.7328, + "learning_rate": 9.192688846315634e-10, + "loss": 0.9886, "step": 25451 }, { - "epoch": 0.7212445804641673, + "epoch": 0.9958525706236795, "grad_norm": 0.0, - "learning_rate": 3.8060878344172793e-06, - "loss": 0.7584, + "learning_rate": 9.021668383180082e-10, + "loss": 1.0372, "step": 25452 }, { - "epoch": 0.7212729179064298, + "epoch": 0.9958916973159089, "grad_norm": 0.0, - "learning_rate": 3.805367320220981e-06, - "loss": 0.8385, + "learning_rate": 8.852253649871412e-10, + "loss": 0.9976, "step": 25453 }, { - "epoch": 0.7213012553486923, + "epoch": 0.9959308240081384, "grad_norm": 0.0, - "learning_rate": 3.804646858204375e-06, - "loss": 0.8528, + "learning_rate": 8.684444649087465e-10, + "loss": 0.9872, "step": 25454 }, { - "epoch": 0.7213295927909547, + "epoch": 0.9959699507003678, "grad_norm": 0.0, - "learning_rate": 3.8039264483735317e-06, - "loss": 0.8782, + "learning_rate": 8.518241383548286e-10, + "loss": 0.9803, "step": 25455 }, { - "epoch": 0.7213579302332171, + "epoch": 0.9960090773925973, "grad_norm": 0.0, - "learning_rate": 3.8032060907345137e-06, - "loss": 0.6863, + "learning_rate": 8.353643855907312e-10, + "loss": 0.8208, "step": 25456 }, { - "epoch": 0.7213862676754796, + "epoch": 0.9960482040848266, "grad_norm": 0.0, - "learning_rate": 3.8024857852933915e-06, - "loss": 0.8196, + "learning_rate": 8.19065206880687e-10, + "loss": 0.9204, "step": 25457 }, { - "epoch": 0.721414605117742, + "epoch": 0.9960873307770561, "grad_norm": 0.0, - "learning_rate": 3.8017655320562363e-06, - "loss": 0.7551, + "learning_rate": 8.029266024867088e-10, + "loss": 0.9674, "step": 25458 }, { - "epoch": 0.7214429425600045, + "epoch": 0.9961264574692855, "grad_norm": 0.0, - "learning_rate": 3.8010453310291086e-06, - "loss": 0.7667, + "learning_rate": 7.869485726674786e-10, + "loss": 0.9371, "step": 25459 }, { - "epoch": 0.721471280002267, + "epoch": 0.996165584161515, "grad_norm": 0.0, - "learning_rate": 3.800325182218082e-06, - "loss": 0.7127, + "learning_rate": 7.71131117680568e-10, + "loss": 0.8357, "step": 25460 }, { - "epoch": 0.7214996174445295, + "epoch": 0.9962047108537444, "grad_norm": 0.0, - "learning_rate": 3.7996050856292143e-06, - "loss": 0.9542, + "learning_rate": 7.554742377802183e-10, + "loss": 0.8564, "step": 25461 }, { - "epoch": 0.7215279548867919, + "epoch": 0.9962438375459739, "grad_norm": 0.0, - "learning_rate": 3.798885041268575e-06, - "loss": 0.9118, + "learning_rate": 7.399779332162293e-10, + "loss": 0.9948, "step": 25462 }, { - "epoch": 0.7215562923290544, + "epoch": 0.9962829642382033, "grad_norm": 0.0, - "learning_rate": 3.7981650491422305e-06, - "loss": 0.7828, + "learning_rate": 7.246422042395118e-10, + "loss": 1.0504, "step": 25463 }, { - "epoch": 0.7215846297713169, + "epoch": 0.9963220909304328, "grad_norm": 0.0, - "learning_rate": 3.7974451092562447e-06, - "loss": 0.857, + "learning_rate": 7.094670510943147e-10, + "loss": 0.9886, "step": 25464 }, { - "epoch": 0.7216129672135793, + "epoch": 0.9963612176226622, "grad_norm": 0.0, - "learning_rate": 3.7967252216166827e-06, - "loss": 0.6856, + "learning_rate": 6.944524740259973e-10, + "loss": 0.9395, "step": 25465 }, { - "epoch": 0.7216413046558418, + "epoch": 0.9964003443148917, "grad_norm": 0.0, - "learning_rate": 3.79600538622961e-06, - "loss": 0.791, + "learning_rate": 6.795984732743677e-10, + "loss": 1.0415, "step": 25466 }, { - "epoch": 0.7216696420981042, + "epoch": 0.996439471007121, "grad_norm": 0.0, - "learning_rate": 3.795285603101085e-06, - "loss": 0.8726, + "learning_rate": 6.649050490792341e-10, + "loss": 0.9453, "step": 25467 }, { - "epoch": 0.7216979795403667, + "epoch": 0.9964785976993505, "grad_norm": 0.0, - "learning_rate": 3.7945658722371768e-06, - "loss": 0.9086, + "learning_rate": 6.503722016748537e-10, + "loss": 0.9736, "step": 25468 }, { - "epoch": 0.7217263169826291, + "epoch": 0.9965177243915799, "grad_norm": 0.0, - "learning_rate": 3.793846193643941e-06, - "loss": 0.7799, + "learning_rate": 6.359999312965937e-10, + "loss": 0.9827, "step": 25469 }, { - "epoch": 0.7217546544248916, + "epoch": 0.9965568510838094, "grad_norm": 0.0, - "learning_rate": 3.793126567327443e-06, - "loss": 0.7441, + "learning_rate": 6.217882381731599e-10, + "loss": 0.9613, "step": 25470 }, { - "epoch": 0.7217829918671541, + "epoch": 0.9965959777760388, "grad_norm": 0.0, - "learning_rate": 3.792406993293748e-06, - "loss": 0.7984, + "learning_rate": 6.077371225343687e-10, + "loss": 0.9748, "step": 25471 }, { - "epoch": 0.7218113293094165, + "epoch": 0.9966351044682683, "grad_norm": 0.0, - "learning_rate": 3.7916874715489117e-06, - "loss": 0.8309, + "learning_rate": 5.938465846055952e-10, + "loss": 0.9392, "step": 25472 }, { - "epoch": 0.721839666751679, + "epoch": 0.9966742311604977, "grad_norm": 0.0, - "learning_rate": 3.790968002098997e-06, - "loss": 0.8179, + "learning_rate": 5.801166246099943e-10, + "loss": 0.9371, "step": 25473 }, { - "epoch": 0.7218680041939415, + "epoch": 0.9967133578527271, "grad_norm": 0.0, - "learning_rate": 3.790248584950067e-06, - "loss": 0.758, + "learning_rate": 5.665472427673901e-10, + "loss": 0.9531, "step": 25474 }, { - "epoch": 0.7218963416362039, + "epoch": 0.9967524845449566, "grad_norm": 0.0, - "learning_rate": 3.789529220108176e-06, - "loss": 0.7924, + "learning_rate": 5.531384392953865e-10, + "loss": 0.9703, "step": 25475 }, { - "epoch": 0.7219246790784664, + "epoch": 0.996791611237186, "grad_norm": 0.0, - "learning_rate": 3.788809907579387e-06, - "loss": 0.9009, + "learning_rate": 5.398902144104768e-10, + "loss": 1.0028, "step": 25476 }, { - "epoch": 0.7219530165207289, + "epoch": 0.9968307379294155, "grad_norm": 0.0, - "learning_rate": 3.788090647369759e-06, - "loss": 0.7622, + "learning_rate": 5.268025683258238e-10, + "loss": 0.9872, "step": 25477 }, { - "epoch": 0.7219813539629913, + "epoch": 0.9968698646216448, "grad_norm": 0.0, - "learning_rate": 3.787371439485349e-06, - "loss": 0.8817, + "learning_rate": 5.138755012501495e-10, + "loss": 0.885, "step": 25478 }, { - "epoch": 0.7220096914052537, + "epoch": 0.9969089913138743, "grad_norm": 0.0, - "learning_rate": 3.7866522839322207e-06, - "loss": 0.8478, + "learning_rate": 5.011090133910657e-10, + "loss": 0.9457, "step": 25479 }, { - "epoch": 0.7220380288475162, + "epoch": 0.9969481180061037, "grad_norm": 0.0, - "learning_rate": 3.7859331807164236e-06, - "loss": 0.9488, + "learning_rate": 4.885031049539635e-10, + "loss": 1.0286, "step": 25480 }, { - "epoch": 0.7220663662897787, + "epoch": 0.9969872446983332, "grad_norm": 0.0, - "learning_rate": 3.7852141298440227e-06, - "loss": 0.7889, + "learning_rate": 4.760577761420137e-10, + "loss": 0.9785, "step": 25481 }, { - "epoch": 0.7220947037320411, + "epoch": 0.9970263713905626, "grad_norm": 0.0, - "learning_rate": 3.7844951313210663e-06, - "loss": 0.9106, + "learning_rate": 4.6377302715394644e-10, + "loss": 0.7946, "step": 25482 }, { - "epoch": 0.7221230411743036, + "epoch": 0.9970654980827921, "grad_norm": 0.0, - "learning_rate": 3.783776185153617e-06, - "loss": 0.7728, + "learning_rate": 4.516488581884915e-10, + "loss": 0.9927, "step": 25483 }, { - "epoch": 0.7221513786165661, + "epoch": 0.9971046247750215, "grad_norm": 0.0, - "learning_rate": 3.7830572913477325e-06, - "loss": 0.8377, + "learning_rate": 4.396852694388276e-10, + "loss": 0.9003, "step": 25484 }, { - "epoch": 0.7221797160588286, + "epoch": 0.997143751467251, "grad_norm": 0.0, - "learning_rate": 3.7823384499094608e-06, - "loss": 0.8006, + "learning_rate": 4.278822610970235e-10, + "loss": 1.0372, "step": 25485 }, { - "epoch": 0.722208053501091, + "epoch": 0.9971828781594804, "grad_norm": 0.0, - "learning_rate": 3.7816196608448617e-06, - "loss": 0.8338, + "learning_rate": 4.162398333540374e-10, + "loss": 0.8668, "step": 25486 }, { - "epoch": 0.7222363909433535, + "epoch": 0.9972220048517099, "grad_norm": 0.0, - "learning_rate": 3.7809009241599927e-06, - "loss": 0.8025, + "learning_rate": 4.047579863963869e-10, + "loss": 1.0959, "step": 25487 }, { - "epoch": 0.722264728385616, + "epoch": 0.9972611315439392, "grad_norm": 0.0, - "learning_rate": 3.7801822398609013e-06, - "loss": 0.9322, + "learning_rate": 3.9343672040725866e-10, + "loss": 0.9311, "step": 25488 }, { - "epoch": 0.7222930658278783, + "epoch": 0.9973002582361687, "grad_norm": 0.0, - "learning_rate": 3.779463607953644e-06, - "loss": 0.8139, + "learning_rate": 3.822760355698396e-10, + "loss": 0.9571, "step": 25489 }, { - "epoch": 0.7223214032701408, + "epoch": 0.9973393849283981, "grad_norm": 0.0, - "learning_rate": 3.778745028444275e-06, - "loss": 0.7935, + "learning_rate": 3.712759320628756e-10, + "loss": 0.9797, "step": 25490 }, { - "epoch": 0.7223497407124033, + "epoch": 0.9973785116206276, "grad_norm": 0.0, - "learning_rate": 3.7780265013388475e-06, - "loss": 0.8061, + "learning_rate": 3.6043641006289207e-10, + "loss": 1.0429, "step": 25491 }, { - "epoch": 0.7223780781546658, + "epoch": 0.997417638312857, "grad_norm": 0.0, - "learning_rate": 3.777308026643416e-06, - "loss": 0.8547, + "learning_rate": 3.4975746974419413e-10, + "loss": 0.9656, "step": 25492 }, { - "epoch": 0.7224064155969282, + "epoch": 0.9974567650050865, "grad_norm": 0.0, - "learning_rate": 3.776589604364027e-06, - "loss": 0.8118, + "learning_rate": 3.39239111277756e-10, + "loss": 0.9276, "step": 25493 }, { - "epoch": 0.7224347530391907, + "epoch": 0.9974958916973159, "grad_norm": 0.0, - "learning_rate": 3.775871234506734e-06, - "loss": 0.87, + "learning_rate": 3.2888133483233163e-10, + "loss": 1.0541, "step": 25494 }, { - "epoch": 0.7224630904814532, + "epoch": 0.9975350183895454, "grad_norm": 0.0, - "learning_rate": 3.7751529170775925e-06, - "loss": 0.8482, + "learning_rate": 3.1868414057556475e-10, + "loss": 0.9312, "step": 25495 }, { - "epoch": 0.7224914279237156, + "epoch": 0.9975741450817748, "grad_norm": 0.0, - "learning_rate": 3.774434652082646e-06, - "loss": 0.8357, + "learning_rate": 3.0864752867065807e-10, + "loss": 1.0227, "step": 25496 }, { - "epoch": 0.7225197653659781, + "epoch": 0.9976132717740043, "grad_norm": 0.0, - "learning_rate": 3.7737164395279514e-06, - "loss": 0.8852, + "learning_rate": 2.987714992785939e-10, + "loss": 1.0142, "step": 25497 }, { - "epoch": 0.7225481028082406, + "epoch": 0.9976523984662337, "grad_norm": 0.0, - "learning_rate": 3.772998279419553e-06, - "loss": 0.7622, + "learning_rate": 2.8905605255702405e-10, + "loss": 0.8493, "step": 25498 }, { - "epoch": 0.7225764402505029, + "epoch": 0.9976915251584632, "grad_norm": 0.0, - "learning_rate": 3.7722801717635016e-06, - "loss": 0.8, + "learning_rate": 2.795011886636001e-10, + "loss": 1.065, "step": 25499 }, { - "epoch": 0.7226047776927654, + "epoch": 0.9977306518506925, "grad_norm": 0.0, - "learning_rate": 3.7715621165658466e-06, - "loss": 0.7889, + "learning_rate": 2.701069077504226e-10, + "loss": 0.8917, "step": 25500 }, { - "epoch": 0.7226331151350279, + "epoch": 0.997769778542922, "grad_norm": 0.0, - "learning_rate": 3.770844113832637e-06, - "loss": 0.8131, + "learning_rate": 2.608732099695921e-10, + "loss": 0.972, "step": 25501 }, { - "epoch": 0.7226614525772904, + "epoch": 0.9978089052351514, "grad_norm": 0.0, - "learning_rate": 3.7701261635699205e-06, - "loss": 0.83, + "learning_rate": 2.518000954687683e-10, + "loss": 0.9539, "step": 25502 }, { - "epoch": 0.7226897900195528, + "epoch": 0.9978480319273808, "grad_norm": 0.0, - "learning_rate": 3.7694082657837495e-06, - "loss": 0.8341, + "learning_rate": 2.4288756439339034e-10, + "loss": 0.9171, "step": 25503 }, { - "epoch": 0.7227181274618153, + "epoch": 0.9978871586196103, "grad_norm": 0.0, - "learning_rate": 3.768690420480161e-06, - "loss": 0.7022, + "learning_rate": 2.3413561688667706e-10, + "loss": 0.8685, "step": 25504 }, { - "epoch": 0.7227464649040778, + "epoch": 0.9979262853118397, "grad_norm": 0.0, - "learning_rate": 3.767972627665212e-06, - "loss": 0.9263, + "learning_rate": 2.255442530896268e-10, + "loss": 0.8392, "step": 25505 }, { - "epoch": 0.7227748023463402, + "epoch": 0.9979654120040692, "grad_norm": 0.0, - "learning_rate": 3.7672548873449388e-06, - "loss": 0.7941, + "learning_rate": 2.1711347313990715e-10, + "loss": 1.0085, "step": 25506 }, { - "epoch": 0.7228031397886027, + "epoch": 0.9980045386962986, "grad_norm": 0.0, - "learning_rate": 3.766537199525393e-06, - "loss": 0.8699, + "learning_rate": 2.0884327717296538e-10, + "loss": 1.0119, "step": 25507 }, { - "epoch": 0.7228314772308652, + "epoch": 0.9980436653885281, "grad_norm": 0.0, - "learning_rate": 3.765819564212623e-06, - "loss": 0.8845, + "learning_rate": 2.0073366532202821e-10, + "loss": 0.8276, "step": 25508 }, { - "epoch": 0.7228598146731277, + "epoch": 0.9980827920807575, "grad_norm": 0.0, - "learning_rate": 3.7651019814126656e-06, - "loss": 0.758, + "learning_rate": 1.927846377158815e-10, + "loss": 0.9329, "step": 25509 }, { - "epoch": 0.72288815211539, + "epoch": 0.998121918772987, "grad_norm": 0.0, - "learning_rate": 3.764384451131573e-06, - "loss": 0.8044, + "learning_rate": 1.8499619448331118e-10, + "loss": 1.004, "step": 25510 }, { - "epoch": 0.7229164895576525, + "epoch": 0.9981610454652163, "grad_norm": 0.0, - "learning_rate": 3.763666973375383e-06, - "loss": 0.8695, + "learning_rate": 1.773683357497724e-10, + "loss": 0.9632, "step": 25511 }, { - "epoch": 0.722944826999915, + "epoch": 0.9982001721574458, "grad_norm": 0.0, - "learning_rate": 3.762949548150141e-06, - "loss": 0.7318, + "learning_rate": 1.6990106163738972e-10, + "loss": 0.9595, "step": 25512 }, { - "epoch": 0.7229731644421774, + "epoch": 0.9982392988496752, "grad_norm": 0.0, - "learning_rate": 3.762232175461892e-06, - "loss": 0.8179, + "learning_rate": 1.62594372264957e-10, + "loss": 0.9314, "step": 25513 }, { - "epoch": 0.7230015018844399, + "epoch": 0.9982784255419047, "grad_norm": 0.0, - "learning_rate": 3.761514855316677e-06, - "loss": 0.837, + "learning_rate": 1.5544826775126808e-10, + "loss": 1.0457, "step": 25514 }, { - "epoch": 0.7230298393267024, + "epoch": 0.9983175522341341, "grad_norm": 0.0, - "learning_rate": 3.760797587720538e-06, - "loss": 0.7783, + "learning_rate": 1.4846274821067596e-10, + "loss": 1.0126, "step": 25515 }, { - "epoch": 0.7230581767689649, + "epoch": 0.9983566789263636, "grad_norm": 0.0, - "learning_rate": 3.760080372679523e-06, - "loss": 0.8323, + "learning_rate": 1.4163781375531316e-10, + "loss": 1.0237, "step": 25516 }, { - "epoch": 0.7230865142112273, + "epoch": 0.998395805618593, "grad_norm": 0.0, - "learning_rate": 3.759363210199665e-06, - "loss": 0.8494, + "learning_rate": 1.3497346449509175e-10, + "loss": 1.0689, "step": 25517 }, { - "epoch": 0.7231148516534898, + "epoch": 0.9984349323108225, "grad_norm": 0.0, - "learning_rate": 3.75864610028701e-06, - "loss": 0.8007, + "learning_rate": 1.2846970053548292e-10, + "loss": 1.0157, "step": 25518 }, { - "epoch": 0.7231431890957523, + "epoch": 0.9984740590030519, "grad_norm": 0.0, - "learning_rate": 3.7579290429475933e-06, - "loss": 0.8617, + "learning_rate": 1.2212652198306807e-10, + "loss": 0.9278, "step": 25519 }, { - "epoch": 0.7231715265380146, + "epoch": 0.9985131856952814, "grad_norm": 0.0, - "learning_rate": 3.757212038187459e-06, - "loss": 0.8515, + "learning_rate": 1.1594392893776729e-10, + "loss": 0.9198, "step": 25520 }, { - "epoch": 0.7231998639802771, + "epoch": 0.9985523123875107, "grad_norm": 0.0, - "learning_rate": 3.756495086012648e-06, - "loss": 0.8503, + "learning_rate": 1.0992192150061087e-10, + "loss": 1.0786, "step": 25521 }, { - "epoch": 0.7232282014225396, + "epoch": 0.9985914390797402, "grad_norm": 0.0, - "learning_rate": 3.7557781864291954e-06, - "loss": 0.7979, + "learning_rate": 1.0406049976818822e-10, + "loss": 0.9662, "step": 25522 }, { - "epoch": 0.723256538864802, + "epoch": 0.9986305657719696, "grad_norm": 0.0, - "learning_rate": 3.7550613394431413e-06, - "loss": 0.9012, + "learning_rate": 9.835966383264784e-11, + "loss": 0.9218, "step": 25523 }, { - "epoch": 0.7232848763070645, + "epoch": 0.9986696924641991, "grad_norm": 0.0, - "learning_rate": 3.754344545060529e-06, - "loss": 0.7919, + "learning_rate": 9.281941378835869e-11, + "loss": 0.8834, "step": 25524 }, { - "epoch": 0.723313213749327, + "epoch": 0.9987088191564285, "grad_norm": 0.0, - "learning_rate": 3.753627803287386e-06, - "loss": 0.791, + "learning_rate": 8.743974972191816e-11, + "loss": 1.013, "step": 25525 }, { - "epoch": 0.7233415511915895, + "epoch": 0.998747945848658, "grad_norm": 0.0, - "learning_rate": 3.7529111141297582e-06, - "loss": 0.8268, + "learning_rate": 8.222067171992365e-11, + "loss": 0.9872, "step": 25526 }, { - "epoch": 0.7233698886338519, + "epoch": 0.9987870725408874, "grad_norm": 0.0, - "learning_rate": 3.7521944775936782e-06, - "loss": 0.7338, + "learning_rate": 7.716217986786234e-11, + "loss": 1.0458, "step": 25527 }, { - "epoch": 0.7233982260761144, + "epoch": 0.9988261992331169, "grad_norm": 0.0, - "learning_rate": 3.7514778936851846e-06, - "loss": 0.8963, + "learning_rate": 7.226427424567028e-11, + "loss": 0.9969, "step": 25528 }, { - "epoch": 0.7234265635183769, + "epoch": 0.9988653259253463, "grad_norm": 0.0, - "learning_rate": 3.7507613624103167e-06, - "loss": 0.8047, + "learning_rate": 6.752695493217331e-11, + "loss": 1.0835, "step": 25529 }, { - "epoch": 0.7234549009606392, + "epoch": 0.9989044526175757, "grad_norm": 0.0, - "learning_rate": 3.7500448837751024e-06, - "loss": 0.8091, + "learning_rate": 6.295022200397682e-11, + "loss": 0.8658, "step": 25530 }, { - "epoch": 0.7234832384029017, + "epoch": 0.9989435793098052, "grad_norm": 0.0, - "learning_rate": 3.7493284577855816e-06, - "loss": 0.7816, + "learning_rate": 5.85340755332453e-11, + "loss": 0.9093, "step": 25531 }, { - "epoch": 0.7235115758451642, + "epoch": 0.9989827060020345, "grad_norm": 0.0, - "learning_rate": 3.7486120844477914e-06, - "loss": 0.8206, + "learning_rate": 5.427851559325348e-11, + "loss": 0.9929, "step": 25532 }, { - "epoch": 0.7235399132874267, + "epoch": 0.999021832694264, "grad_norm": 0.0, - "learning_rate": 3.7478957637677593e-06, - "loss": 0.8794, + "learning_rate": 5.01835422495045e-11, + "loss": 0.9086, "step": 25533 }, { - "epoch": 0.7235682507296891, + "epoch": 0.9990609593864934, "grad_norm": 0.0, - "learning_rate": 3.747179495751527e-06, - "loss": 0.7966, + "learning_rate": 4.6249155569721983e-11, + "loss": 0.8902, "step": 25534 }, { - "epoch": 0.7235965881719516, + "epoch": 0.9991000860787229, "grad_norm": 0.0, - "learning_rate": 3.7464632804051204e-06, - "loss": 0.7625, + "learning_rate": 4.247535561718863e-11, + "loss": 1.0076, "step": 25535 }, { - "epoch": 0.7236249256142141, + "epoch": 0.9991392127709523, "grad_norm": 0.0, - "learning_rate": 3.745747117734575e-06, - "loss": 0.9256, + "learning_rate": 3.886214245074627e-11, + "loss": 0.9008, "step": 25536 }, { - "epoch": 0.7236532630564765, + "epoch": 0.9991783394631818, "grad_norm": 0.0, - "learning_rate": 3.7450310077459274e-06, - "loss": 0.7854, + "learning_rate": 3.5409516130346936e-11, + "loss": 0.9891, "step": 25537 }, { - "epoch": 0.723681600498739, + "epoch": 0.9992174661554112, "grad_norm": 0.0, - "learning_rate": 3.7443149504452036e-06, - "loss": 0.7628, + "learning_rate": 3.211747671039156e-11, + "loss": 0.994, "step": 25538 }, { - "epoch": 0.7237099379410015, + "epoch": 0.9992565928476407, "grad_norm": 0.0, - "learning_rate": 3.743598945838438e-06, - "loss": 0.9047, + "learning_rate": 2.898602424417085e-11, + "loss": 0.9009, "step": 25539 }, { - "epoch": 0.7237382753832639, + "epoch": 0.9992957195398701, "grad_norm": 0.0, - "learning_rate": 3.7428829939316615e-06, - "loss": 0.7528, + "learning_rate": 2.6015158781644844e-11, + "loss": 0.9208, "step": 25540 }, { - "epoch": 0.7237666128255263, + "epoch": 0.9993348462320996, "grad_norm": 0.0, - "learning_rate": 3.7421670947309054e-06, - "loss": 0.784, + "learning_rate": 2.320488037055313e-11, + "loss": 0.9936, "step": 25541 }, { - "epoch": 0.7237949502677888, + "epoch": 0.999373972924329, "grad_norm": 0.0, - "learning_rate": 3.7414512482422026e-06, - "loss": 0.8374, + "learning_rate": 2.0555189055304627e-11, + "loss": 1.0728, "step": 25542 }, { - "epoch": 0.7238232877100513, + "epoch": 0.9994130996165584, "grad_norm": 0.0, - "learning_rate": 3.740735454471577e-06, - "loss": 0.8699, + "learning_rate": 1.8066084880308255e-11, + "loss": 0.9539, "step": 25543 }, { - "epoch": 0.7238516251523137, + "epoch": 0.9994522263087878, "grad_norm": 0.0, - "learning_rate": 3.7400197134250606e-06, - "loss": 0.8563, + "learning_rate": 1.5737567884421824e-11, + "loss": 0.8921, "step": 25544 }, { - "epoch": 0.7238799625945762, + "epoch": 0.9994913530010173, "grad_norm": 0.0, - "learning_rate": 3.7393040251086854e-06, - "loss": 0.8396, + "learning_rate": 1.3569638105392913e-11, + "loss": 0.9147, "step": 25545 }, { - "epoch": 0.7239083000368387, + "epoch": 0.9995304796932467, "grad_norm": 0.0, - "learning_rate": 3.7385883895284747e-06, - "loss": 0.8111, + "learning_rate": 1.1562295576528216e-11, + "loss": 1.0902, "step": 25546 }, { - "epoch": 0.7239366374791011, + "epoch": 0.9995696063854762, "grad_norm": 0.0, - "learning_rate": 3.7378728066904624e-06, - "loss": 0.8832, + "learning_rate": 9.715540332244645e-12, + "loss": 1.018, "step": 25547 }, { - "epoch": 0.7239649749213636, + "epoch": 0.9996087330777056, "grad_norm": 0.0, - "learning_rate": 3.7371572766006705e-06, - "loss": 0.8614, + "learning_rate": 8.029372401407997e-12, + "loss": 0.8777, "step": 25548 }, { - "epoch": 0.7239933123636261, + "epoch": 0.9996478597699351, "grad_norm": 0.0, - "learning_rate": 3.736441799265127e-06, - "loss": 0.7997, + "learning_rate": 6.503791810663629e-12, + "loss": 0.892, "step": 25549 }, { - "epoch": 0.7240216498058886, + "epoch": 0.9996869864621645, "grad_norm": 0.0, - "learning_rate": 3.735726374689864e-06, - "loss": 0.8374, + "learning_rate": 5.138798584436444e-12, + "loss": 0.9996, "step": 25550 }, { - "epoch": 0.724049987248151, + "epoch": 0.999726113154394, "grad_norm": 0.0, - "learning_rate": 3.7350110028809004e-06, - "loss": 0.7078, + "learning_rate": 3.934392744930904e-12, + "loss": 1.0075, "step": 25551 }, { - "epoch": 0.7240783246904134, + "epoch": 0.9997652398466234, "grad_norm": 0.0, - "learning_rate": 3.7342956838442658e-06, - "loss": 0.7569, + "learning_rate": 2.8905743121310225e-12, + "loss": 0.9733, "step": 25552 }, { - "epoch": 0.7241066621326759, + "epoch": 0.9998043665388529, "grad_norm": 0.0, - "learning_rate": 3.7335804175859856e-06, - "loss": 0.7597, + "learning_rate": 2.0073433015799227e-12, + "loss": 1.0447, "step": 25553 }, { - "epoch": 0.7241349995749383, + "epoch": 0.9998434932310822, "grad_norm": 0.0, - "learning_rate": 3.732865204112084e-06, - "loss": 0.7938, + "learning_rate": 1.2846997288207263e-12, + "loss": 1.0254, "step": 25554 }, { - "epoch": 0.7241633370172008, + "epoch": 0.9998826199233117, "grad_norm": 0.0, - "learning_rate": 3.7321500434285895e-06, - "loss": 0.7309, + "learning_rate": 7.22643603845441e-13, + "loss": 1.0316, "step": 25555 }, { - "epoch": 0.7241916744594633, + "epoch": 0.9999217466155411, "grad_norm": 0.0, - "learning_rate": 3.7314349355415193e-06, - "loss": 0.7868, + "learning_rate": 3.211749377562967e-13, + "loss": 1.054, "step": 25556 }, { - "epoch": 0.7242200119017258, + "epoch": 0.9999608733077706, "grad_norm": 0.0, - "learning_rate": 3.7307198804569013e-06, - "loss": 0.9165, + "learning_rate": 8.029373499418569e-14, + "loss": 0.9539, "step": 25557 }, { - "epoch": 0.7242483493439882, + "epoch": 1.0, "grad_norm": 0.0, - "learning_rate": 3.7300048781807598e-06, - "loss": 0.7546, + "learning_rate": 0.0, + "loss": 0.8918, "step": 25558 }, - { - "epoch": 0.7242766867862507, - "grad_norm": 0.0, - "learning_rate": 3.729289928719113e-06, - "loss": 0.9181, - "step": 25559 - }, - { - "epoch": 0.7243050242285132, - "grad_norm": 0.0, - "learning_rate": 3.7285750320779857e-06, - "loss": 0.8793, - "step": 25560 - }, - { - "epoch": 0.7243333616707756, - "grad_norm": 0.0, - "learning_rate": 3.7278601882634026e-06, - "loss": 0.7757, - "step": 25561 - }, - { - "epoch": 0.724361699113038, - "grad_norm": 0.0, - "learning_rate": 3.72714539728138e-06, - "loss": 0.8867, - "step": 25562 - }, - { - "epoch": 0.7243900365553005, - "grad_norm": 0.0, - "learning_rate": 3.726430659137943e-06, - "loss": 0.9992, - "step": 25563 - }, - { - "epoch": 0.7244183739975629, - "grad_norm": 0.0, - "learning_rate": 3.725715973839108e-06, - "loss": 0.7459, - "step": 25564 - }, - { - "epoch": 0.7244467114398254, - "grad_norm": 0.0, - "learning_rate": 3.725001341390896e-06, - "loss": 0.8133, - "step": 25565 - }, - { - "epoch": 0.7244750488820879, - "grad_norm": 0.0, - "learning_rate": 3.7242867617993283e-06, - "loss": 0.8824, - "step": 25566 - }, - { - "epoch": 0.7245033863243504, - "grad_norm": 0.0, - "learning_rate": 3.723572235070424e-06, - "loss": 0.7807, - "step": 25567 - }, - { - "epoch": 0.7245317237666128, - "grad_norm": 0.0, - "learning_rate": 3.7228577612102047e-06, - "loss": 0.736, - "step": 25568 - }, - { - "epoch": 0.7245600612088753, - "grad_norm": 0.0, - "learning_rate": 3.722143340224682e-06, - "loss": 0.7706, - "step": 25569 - }, - { - "epoch": 0.7245883986511378, - "grad_norm": 0.0, - "learning_rate": 3.721428972119878e-06, - "loss": 0.7727, - "step": 25570 - }, - { - "epoch": 0.7246167360934002, - "grad_norm": 0.0, - "learning_rate": 3.7207146569018114e-06, - "loss": 0.7787, - "step": 25571 - }, - { - "epoch": 0.7246450735356627, - "grad_norm": 0.0, - "learning_rate": 3.720000394576494e-06, - "loss": 0.8673, - "step": 25572 - }, - { - "epoch": 0.7246734109779251, - "grad_norm": 0.0, - "learning_rate": 3.7192861851499463e-06, - "loss": 0.7845, - "step": 25573 - }, - { - "epoch": 0.7247017484201876, - "grad_norm": 0.0, - "learning_rate": 3.7185720286281855e-06, - "loss": 0.8259, - "step": 25574 - }, - { - "epoch": 0.72473008586245, - "grad_norm": 0.0, - "learning_rate": 3.717857925017222e-06, - "loss": 0.6802, - "step": 25575 - }, - { - "epoch": 0.7247584233047125, - "grad_norm": 0.0, - "learning_rate": 3.717143874323077e-06, - "loss": 0.7677, - "step": 25576 - }, - { - "epoch": 0.724786760746975, - "grad_norm": 0.0, - "learning_rate": 3.7164298765517594e-06, - "loss": 0.8788, - "step": 25577 - }, - { - "epoch": 0.7248150981892374, - "grad_norm": 0.0, - "learning_rate": 3.715715931709286e-06, - "loss": 0.8348, - "step": 25578 - }, - { - "epoch": 0.7248434356314999, - "grad_norm": 0.0, - "learning_rate": 3.7150020398016717e-06, - "loss": 0.9135, - "step": 25579 - }, - { - "epoch": 0.7248717730737624, - "grad_norm": 0.0, - "learning_rate": 3.7142882008349277e-06, - "loss": 0.8042, - "step": 25580 - }, - { - "epoch": 0.7249001105160249, - "grad_norm": 0.0, - "learning_rate": 3.7135744148150698e-06, - "loss": 0.9196, - "step": 25581 - }, - { - "epoch": 0.7249284479582873, - "grad_norm": 0.0, - "learning_rate": 3.712860681748112e-06, - "loss": 0.8806, - "step": 25582 - }, - { - "epoch": 0.7249567854005498, - "grad_norm": 0.0, - "learning_rate": 3.7121470016400585e-06, - "loss": 0.8356, - "step": 25583 - }, - { - "epoch": 0.7249851228428122, - "grad_norm": 0.0, - "learning_rate": 3.7114333744969312e-06, - "loss": 0.8871, - "step": 25584 - }, - { - "epoch": 0.7250134602850746, - "grad_norm": 0.0, - "learning_rate": 3.7107198003247314e-06, - "loss": 0.866, - "step": 25585 - }, - { - "epoch": 0.7250417977273371, - "grad_norm": 0.0, - "learning_rate": 3.7100062791294746e-06, - "loss": 0.8787, - "step": 25586 - }, - { - "epoch": 0.7250701351695996, - "grad_norm": 0.0, - "learning_rate": 3.7092928109171734e-06, - "loss": 0.7182, - "step": 25587 - }, - { - "epoch": 0.725098472611862, - "grad_norm": 0.0, - "learning_rate": 3.708579395693831e-06, - "loss": 0.763, - "step": 25588 - }, - { - "epoch": 0.7251268100541245, - "grad_norm": 0.0, - "learning_rate": 3.7078660334654616e-06, - "loss": 0.8615, - "step": 25589 - }, - { - "epoch": 0.725155147496387, - "grad_norm": 0.0, - "learning_rate": 3.7071527242380767e-06, - "loss": 0.8326, - "step": 25590 - }, - { - "epoch": 0.7251834849386495, - "grad_norm": 0.0, - "learning_rate": 3.7064394680176774e-06, - "loss": 0.7224, - "step": 25591 - }, - { - "epoch": 0.7252118223809119, - "grad_norm": 0.0, - "learning_rate": 3.7057262648102755e-06, - "loss": 0.8044, - "step": 25592 - }, - { - "epoch": 0.7252401598231744, - "grad_norm": 0.0, - "learning_rate": 3.705013114621878e-06, - "loss": 0.7938, - "step": 25593 - }, - { - "epoch": 0.7252684972654369, - "grad_norm": 0.0, - "learning_rate": 3.7043000174584932e-06, - "loss": 0.8547, - "step": 25594 - }, - { - "epoch": 0.7252968347076992, - "grad_norm": 0.0, - "learning_rate": 3.703586973326131e-06, - "loss": 0.8883, - "step": 25595 - }, - { - "epoch": 0.7253251721499617, - "grad_norm": 0.0, - "learning_rate": 3.7028739822307902e-06, - "loss": 0.8617, - "step": 25596 - }, - { - "epoch": 0.7253535095922242, - "grad_norm": 0.0, - "learning_rate": 3.702161044178484e-06, - "loss": 0.8053, - "step": 25597 - }, - { - "epoch": 0.7253818470344867, - "grad_norm": 0.0, - "learning_rate": 3.7014481591752093e-06, - "loss": 0.7933, - "step": 25598 - }, - { - "epoch": 0.7254101844767491, - "grad_norm": 0.0, - "learning_rate": 3.7007353272269764e-06, - "loss": 0.7148, - "step": 25599 - }, - { - "epoch": 0.7254385219190116, - "grad_norm": 0.0, - "learning_rate": 3.700022548339792e-06, - "loss": 0.8287, - "step": 25600 - }, - { - "epoch": 0.7254668593612741, - "grad_norm": 0.0, - "learning_rate": 3.6993098225196544e-06, - "loss": 0.9185, - "step": 25601 - }, - { - "epoch": 0.7254951968035365, - "grad_norm": 0.0, - "learning_rate": 3.6985971497725705e-06, - "loss": 0.9228, - "step": 25602 - }, - { - "epoch": 0.725523534245799, - "grad_norm": 0.0, - "learning_rate": 3.6978845301045452e-06, - "loss": 0.9017, - "step": 25603 - }, - { - "epoch": 0.7255518716880615, - "grad_norm": 0.0, - "learning_rate": 3.6971719635215753e-06, - "loss": 0.7758, - "step": 25604 - }, - { - "epoch": 0.725580209130324, - "grad_norm": 0.0, - "learning_rate": 3.696459450029666e-06, - "loss": 0.8337, - "step": 25605 - }, - { - "epoch": 0.7256085465725863, - "grad_norm": 0.0, - "learning_rate": 3.695746989634821e-06, - "loss": 0.9484, - "step": 25606 - }, - { - "epoch": 0.7256368840148488, - "grad_norm": 0.0, - "learning_rate": 3.695034582343039e-06, - "loss": 0.8218, - "step": 25607 - }, - { - "epoch": 0.7256652214571113, - "grad_norm": 0.0, - "learning_rate": 3.6943222281603252e-06, - "loss": 0.8634, - "step": 25608 - }, - { - "epoch": 0.7256935588993737, - "grad_norm": 0.0, - "learning_rate": 3.693609927092674e-06, - "loss": 0.8205, - "step": 25609 - }, - { - "epoch": 0.7257218963416362, - "grad_norm": 0.0, - "learning_rate": 3.692897679146088e-06, - "loss": 0.8152, - "step": 25610 - }, - { - "epoch": 0.7257502337838987, - "grad_norm": 0.0, - "learning_rate": 3.6921854843265704e-06, - "loss": 0.8511, - "step": 25611 - }, - { - "epoch": 0.7257785712261611, - "grad_norm": 0.0, - "learning_rate": 3.691473342640114e-06, - "loss": 0.711, - "step": 25612 - }, - { - "epoch": 0.7258069086684236, - "grad_norm": 0.0, - "learning_rate": 3.6907612540927227e-06, - "loss": 0.7704, - "step": 25613 - }, - { - "epoch": 0.7258352461106861, - "grad_norm": 0.0, - "learning_rate": 3.6900492186903893e-06, - "loss": 0.9429, - "step": 25614 - }, - { - "epoch": 0.7258635835529486, - "grad_norm": 0.0, - "learning_rate": 3.6893372364391145e-06, - "loss": 0.7647, - "step": 25615 - }, - { - "epoch": 0.7258919209952109, - "grad_norm": 0.0, - "learning_rate": 3.6886253073448984e-06, - "loss": 0.8686, - "step": 25616 - }, - { - "epoch": 0.7259202584374734, - "grad_norm": 0.0, - "learning_rate": 3.687913431413732e-06, - "loss": 0.8996, - "step": 25617 - }, - { - "epoch": 0.7259485958797359, - "grad_norm": 0.0, - "learning_rate": 3.6872016086516148e-06, - "loss": 0.8498, - "step": 25618 - }, - { - "epoch": 0.7259769333219983, - "grad_norm": 0.0, - "learning_rate": 3.6864898390645434e-06, - "loss": 0.8876, - "step": 25619 - }, - { - "epoch": 0.7260052707642608, - "grad_norm": 0.0, - "learning_rate": 3.685778122658511e-06, - "loss": 0.9671, - "step": 25620 - }, - { - "epoch": 0.7260336082065233, - "grad_norm": 0.0, - "learning_rate": 3.685066459439518e-06, - "loss": 0.8271, - "step": 25621 - }, - { - "epoch": 0.7260619456487858, - "grad_norm": 0.0, - "learning_rate": 3.6843548494135527e-06, - "loss": 0.7942, - "step": 25622 - }, - { - "epoch": 0.7260902830910482, - "grad_norm": 0.0, - "learning_rate": 3.6836432925866107e-06, - "loss": 0.8008, - "step": 25623 - }, - { - "epoch": 0.7261186205333107, - "grad_norm": 0.0, - "learning_rate": 3.682931788964691e-06, - "loss": 0.7757, - "step": 25624 - }, - { - "epoch": 0.7261469579755732, - "grad_norm": 0.0, - "learning_rate": 3.682220338553779e-06, - "loss": 0.9131, - "step": 25625 - }, - { - "epoch": 0.7261752954178355, - "grad_norm": 0.0, - "learning_rate": 3.6815089413598746e-06, - "loss": 0.7909, - "step": 25626 - }, - { - "epoch": 0.726203632860098, - "grad_norm": 0.0, - "learning_rate": 3.680797597388963e-06, - "loss": 0.8005, - "step": 25627 - }, - { - "epoch": 0.7262319703023605, - "grad_norm": 0.0, - "learning_rate": 3.6800863066470406e-06, - "loss": 0.8765, - "step": 25628 - }, - { - "epoch": 0.726260307744623, - "grad_norm": 0.0, - "learning_rate": 3.6793750691400996e-06, - "loss": 0.8338, - "step": 25629 - }, - { - "epoch": 0.7262886451868854, - "grad_norm": 0.0, - "learning_rate": 3.6786638848741273e-06, - "loss": 0.8036, - "step": 25630 - }, - { - "epoch": 0.7263169826291479, - "grad_norm": 0.0, - "learning_rate": 3.6779527538551173e-06, - "loss": 0.7961, - "step": 25631 - }, - { - "epoch": 0.7263453200714104, - "grad_norm": 0.0, - "learning_rate": 3.6772416760890584e-06, - "loss": 0.7928, - "step": 25632 - }, - { - "epoch": 0.7263736575136728, - "grad_norm": 0.0, - "learning_rate": 3.6765306515819398e-06, - "loss": 0.7697, - "step": 25633 - }, - { - "epoch": 0.7264019949559353, - "grad_norm": 0.0, - "learning_rate": 3.675819680339756e-06, - "loss": 0.8113, - "step": 25634 - }, - { - "epoch": 0.7264303323981978, - "grad_norm": 0.0, - "learning_rate": 3.6751087623684877e-06, - "loss": 0.9429, - "step": 25635 - }, - { - "epoch": 0.7264586698404601, - "grad_norm": 0.0, - "learning_rate": 3.6743978976741267e-06, - "loss": 0.9535, - "step": 25636 - }, - { - "epoch": 0.7264870072827226, - "grad_norm": 0.0, - "learning_rate": 3.673687086262665e-06, - "loss": 0.8932, - "step": 25637 - }, - { - "epoch": 0.7265153447249851, - "grad_norm": 0.0, - "learning_rate": 3.672976328140083e-06, - "loss": 0.8467, - "step": 25638 - }, - { - "epoch": 0.7265436821672476, - "grad_norm": 0.0, - "learning_rate": 3.672265623312371e-06, - "loss": 0.8172, - "step": 25639 - }, - { - "epoch": 0.72657201960951, - "grad_norm": 0.0, - "learning_rate": 3.671554971785518e-06, - "loss": 0.8488, - "step": 25640 - }, - { - "epoch": 0.7266003570517725, - "grad_norm": 0.0, - "learning_rate": 3.6708443735655054e-06, - "loss": 0.8903, - "step": 25641 - }, - { - "epoch": 0.726628694494035, - "grad_norm": 0.0, - "learning_rate": 3.6701338286583234e-06, - "loss": 0.8171, - "step": 25642 - }, - { - "epoch": 0.7266570319362974, - "grad_norm": 0.0, - "learning_rate": 3.6694233370699508e-06, - "loss": 0.7446, - "step": 25643 - }, - { - "epoch": 0.7266853693785599, - "grad_norm": 0.0, - "learning_rate": 3.6687128988063768e-06, - "loss": 0.7875, - "step": 25644 - }, - { - "epoch": 0.7267137068208224, - "grad_norm": 0.0, - "learning_rate": 3.6680025138735853e-06, - "loss": 0.7607, - "step": 25645 - }, - { - "epoch": 0.7267420442630849, - "grad_norm": 0.0, - "learning_rate": 3.66729218227756e-06, - "loss": 0.9339, - "step": 25646 - }, - { - "epoch": 0.7267703817053472, - "grad_norm": 0.0, - "learning_rate": 3.6665819040242867e-06, - "loss": 0.863, - "step": 25647 - }, - { - "epoch": 0.7267987191476097, - "grad_norm": 0.0, - "learning_rate": 3.6658716791197436e-06, - "loss": 0.6953, - "step": 25648 - }, - { - "epoch": 0.7268270565898722, - "grad_norm": 0.0, - "learning_rate": 3.665161507569914e-06, - "loss": 0.8357, - "step": 25649 - }, - { - "epoch": 0.7268553940321346, - "grad_norm": 0.0, - "learning_rate": 3.6644513893807866e-06, - "loss": 0.8233, - "step": 25650 - }, - { - "epoch": 0.7268837314743971, - "grad_norm": 0.0, - "learning_rate": 3.663741324558333e-06, - "loss": 0.7356, - "step": 25651 - }, - { - "epoch": 0.7269120689166596, - "grad_norm": 0.0, - "learning_rate": 3.6630313131085404e-06, - "loss": 0.8119, - "step": 25652 - }, - { - "epoch": 0.7269404063589221, - "grad_norm": 0.0, - "learning_rate": 3.6623213550373903e-06, - "loss": 0.7773, - "step": 25653 - }, - { - "epoch": 0.7269687438011845, - "grad_norm": 0.0, - "learning_rate": 3.661611450350858e-06, - "loss": 0.8887, - "step": 25654 - }, - { - "epoch": 0.726997081243447, - "grad_norm": 0.0, - "learning_rate": 3.66090159905493e-06, - "loss": 0.7913, - "step": 25655 - }, - { - "epoch": 0.7270254186857095, - "grad_norm": 0.0, - "learning_rate": 3.6601918011555783e-06, - "loss": 0.731, - "step": 25656 - }, - { - "epoch": 0.7270537561279719, - "grad_norm": 0.0, - "learning_rate": 3.6594820566587854e-06, - "loss": 0.8457, - "step": 25657 - }, - { - "epoch": 0.7270820935702343, - "grad_norm": 0.0, - "learning_rate": 3.658772365570529e-06, - "loss": 0.905, - "step": 25658 - }, - { - "epoch": 0.7271104310124968, - "grad_norm": 0.0, - "learning_rate": 3.6580627278967883e-06, - "loss": 0.9763, - "step": 25659 - }, - { - "epoch": 0.7271387684547592, - "grad_norm": 0.0, - "learning_rate": 3.6573531436435395e-06, - "loss": 0.8227, - "step": 25660 - }, - { - "epoch": 0.7271671058970217, - "grad_norm": 0.0, - "learning_rate": 3.6566436128167647e-06, - "loss": 0.775, - "step": 25661 - }, - { - "epoch": 0.7271954433392842, - "grad_norm": 0.0, - "learning_rate": 3.655934135422432e-06, - "loss": 0.8144, - "step": 25662 - }, - { - "epoch": 0.7272237807815467, - "grad_norm": 0.0, - "learning_rate": 3.6552247114665264e-06, - "loss": 0.8392, - "step": 25663 - }, - { - "epoch": 0.7272521182238091, - "grad_norm": 0.0, - "learning_rate": 3.654515340955015e-06, - "loss": 0.7952, - "step": 25664 - }, - { - "epoch": 0.7272804556660716, - "grad_norm": 0.0, - "learning_rate": 3.6538060238938777e-06, - "loss": 0.8141, - "step": 25665 - }, - { - "epoch": 0.7273087931083341, - "grad_norm": 0.0, - "learning_rate": 3.6530967602890923e-06, - "loss": 0.7624, - "step": 25666 - }, - { - "epoch": 0.7273371305505965, - "grad_norm": 0.0, - "learning_rate": 3.6523875501466255e-06, - "loss": 0.7942, - "step": 25667 - }, - { - "epoch": 0.727365467992859, - "grad_norm": 0.0, - "learning_rate": 3.6516783934724563e-06, - "loss": 0.753, - "step": 25668 - }, - { - "epoch": 0.7273938054351214, - "grad_norm": 0.0, - "learning_rate": 3.65096929027256e-06, - "loss": 0.8079, - "step": 25669 - }, - { - "epoch": 0.7274221428773839, - "grad_norm": 0.0, - "learning_rate": 3.6502602405529044e-06, - "loss": 0.7184, - "step": 25670 - }, - { - "epoch": 0.7274504803196463, - "grad_norm": 0.0, - "learning_rate": 3.6495512443194647e-06, - "loss": 0.831, - "step": 25671 - }, - { - "epoch": 0.7274788177619088, - "grad_norm": 0.0, - "learning_rate": 3.6488423015782128e-06, - "loss": 0.8488, - "step": 25672 - }, - { - "epoch": 0.7275071552041713, - "grad_norm": 0.0, - "learning_rate": 3.6481334123351196e-06, - "loss": 0.87, - "step": 25673 - }, - { - "epoch": 0.7275354926464337, - "grad_norm": 0.0, - "learning_rate": 3.6474245765961623e-06, - "loss": 0.7637, - "step": 25674 - }, - { - "epoch": 0.7275638300886962, - "grad_norm": 0.0, - "learning_rate": 3.6467157943673028e-06, - "loss": 0.785, - "step": 25675 - }, - { - "epoch": 0.7275921675309587, - "grad_norm": 0.0, - "learning_rate": 3.646007065654519e-06, - "loss": 0.7625, - "step": 25676 - }, - { - "epoch": 0.7276205049732212, - "grad_norm": 0.0, - "learning_rate": 3.6452983904637738e-06, - "loss": 0.8841, - "step": 25677 - }, - { - "epoch": 0.7276488424154836, - "grad_norm": 0.0, - "learning_rate": 3.6445897688010403e-06, - "loss": 0.84, - "step": 25678 - }, - { - "epoch": 0.727677179857746, - "grad_norm": 0.0, - "learning_rate": 3.643881200672289e-06, - "loss": 0.7947, - "step": 25679 - }, - { - "epoch": 0.7277055173000085, - "grad_norm": 0.0, - "learning_rate": 3.6431726860834848e-06, - "loss": 0.7665, - "step": 25680 - }, - { - "epoch": 0.7277338547422709, - "grad_norm": 0.0, - "learning_rate": 3.6424642250405974e-06, - "loss": 0.865, - "step": 25681 - }, - { - "epoch": 0.7277621921845334, - "grad_norm": 0.0, - "learning_rate": 3.6417558175495983e-06, - "loss": 0.8333, - "step": 25682 - }, - { - "epoch": 0.7277905296267959, - "grad_norm": 0.0, - "learning_rate": 3.6410474636164463e-06, - "loss": 0.7903, - "step": 25683 - }, - { - "epoch": 0.7278188670690583, - "grad_norm": 0.0, - "learning_rate": 3.6403391632471142e-06, - "loss": 0.8064, - "step": 25684 - }, - { - "epoch": 0.7278472045113208, - "grad_norm": 0.0, - "learning_rate": 3.6396309164475665e-06, - "loss": 0.7737, - "step": 25685 - }, - { - "epoch": 0.7278755419535833, - "grad_norm": 0.0, - "learning_rate": 3.638922723223769e-06, - "loss": 0.7423, - "step": 25686 - }, - { - "epoch": 0.7279038793958458, - "grad_norm": 0.0, - "learning_rate": 3.6382145835816916e-06, - "loss": 0.9021, - "step": 25687 - }, - { - "epoch": 0.7279322168381082, - "grad_norm": 0.0, - "learning_rate": 3.637506497527291e-06, - "loss": 0.8279, - "step": 25688 - }, - { - "epoch": 0.7279605542803707, - "grad_norm": 0.0, - "learning_rate": 3.636798465066537e-06, - "loss": 0.8441, - "step": 25689 - }, - { - "epoch": 0.7279888917226331, - "grad_norm": 0.0, - "learning_rate": 3.6360904862053947e-06, - "loss": 0.7867, - "step": 25690 - }, - { - "epoch": 0.7280172291648955, - "grad_norm": 0.0, - "learning_rate": 3.6353825609498227e-06, - "loss": 0.8168, - "step": 25691 - }, - { - "epoch": 0.728045566607158, - "grad_norm": 0.0, - "learning_rate": 3.6346746893057896e-06, - "loss": 0.835, - "step": 25692 - }, - { - "epoch": 0.7280739040494205, - "grad_norm": 0.0, - "learning_rate": 3.6339668712792533e-06, - "loss": 0.754, - "step": 25693 - }, - { - "epoch": 0.728102241491683, - "grad_norm": 0.0, - "learning_rate": 3.6332591068761756e-06, - "loss": 0.9351, - "step": 25694 - }, - { - "epoch": 0.7281305789339454, - "grad_norm": 0.0, - "learning_rate": 3.632551396102526e-06, - "loss": 0.799, - "step": 25695 - }, - { - "epoch": 0.7281589163762079, - "grad_norm": 0.0, - "learning_rate": 3.631843738964257e-06, - "loss": 0.8311, - "step": 25696 - }, - { - "epoch": 0.7281872538184704, - "grad_norm": 0.0, - "learning_rate": 3.6311361354673324e-06, - "loss": 0.8888, - "step": 25697 - }, - { - "epoch": 0.7282155912607328, - "grad_norm": 0.0, - "learning_rate": 3.6304285856177134e-06, - "loss": 0.8455, - "step": 25698 - }, - { - "epoch": 0.7282439287029953, - "grad_norm": 0.0, - "learning_rate": 3.62972108942136e-06, - "loss": 0.8409, - "step": 25699 - }, - { - "epoch": 0.7282722661452578, - "grad_norm": 0.0, - "learning_rate": 3.6290136468842343e-06, - "loss": 0.8293, - "step": 25700 - }, - { - "epoch": 0.7283006035875202, - "grad_norm": 0.0, - "learning_rate": 3.6283062580122884e-06, - "loss": 0.779, - "step": 25701 - }, - { - "epoch": 0.7283289410297826, - "grad_norm": 0.0, - "learning_rate": 3.6275989228114863e-06, - "loss": 0.8199, - "step": 25702 - }, - { - "epoch": 0.7283572784720451, - "grad_norm": 0.0, - "learning_rate": 3.6268916412877875e-06, - "loss": 0.8215, - "step": 25703 - }, - { - "epoch": 0.7283856159143076, - "grad_norm": 0.0, - "learning_rate": 3.6261844134471434e-06, - "loss": 0.7584, - "step": 25704 - }, - { - "epoch": 0.72841395335657, - "grad_norm": 0.0, - "learning_rate": 3.6254772392955183e-06, - "loss": 0.8326, - "step": 25705 - }, - { - "epoch": 0.7284422907988325, - "grad_norm": 0.0, - "learning_rate": 3.6247701188388627e-06, - "loss": 0.8153, - "step": 25706 - }, - { - "epoch": 0.728470628241095, - "grad_norm": 0.0, - "learning_rate": 3.6240630520831354e-06, - "loss": 0.8561, - "step": 25707 - }, - { - "epoch": 0.7284989656833574, - "grad_norm": 0.0, - "learning_rate": 3.6233560390342957e-06, - "loss": 0.7949, - "step": 25708 - }, - { - "epoch": 0.7285273031256199, - "grad_norm": 0.0, - "learning_rate": 3.622649079698293e-06, - "loss": 0.739, - "step": 25709 - }, - { - "epoch": 0.7285556405678824, - "grad_norm": 0.0, - "learning_rate": 3.6219421740810855e-06, - "loss": 0.8008, - "step": 25710 - }, - { - "epoch": 0.7285839780101449, - "grad_norm": 0.0, - "learning_rate": 3.6212353221886276e-06, - "loss": 0.8262, - "step": 25711 - }, - { - "epoch": 0.7286123154524072, - "grad_norm": 0.0, - "learning_rate": 3.620528524026873e-06, - "loss": 0.7852, - "step": 25712 - }, - { - "epoch": 0.7286406528946697, - "grad_norm": 0.0, - "learning_rate": 3.6198217796017788e-06, - "loss": 0.8105, - "step": 25713 - }, - { - "epoch": 0.7286689903369322, - "grad_norm": 0.0, - "learning_rate": 3.6191150889192915e-06, - "loss": 0.9106, - "step": 25714 - }, - { - "epoch": 0.7286973277791946, - "grad_norm": 0.0, - "learning_rate": 3.618408451985368e-06, - "loss": 0.8577, - "step": 25715 - }, - { - "epoch": 0.7287256652214571, - "grad_norm": 0.0, - "learning_rate": 3.617701868805963e-06, - "loss": 0.842, - "step": 25716 - }, - { - "epoch": 0.7287540026637196, - "grad_norm": 0.0, - "learning_rate": 3.6169953393870203e-06, - "loss": 0.8186, - "step": 25717 - }, - { - "epoch": 0.7287823401059821, - "grad_norm": 0.0, - "learning_rate": 3.616288863734498e-06, - "loss": 0.616, - "step": 25718 - }, - { - "epoch": 0.7288106775482445, - "grad_norm": 0.0, - "learning_rate": 3.6155824418543482e-06, - "loss": 0.8608, - "step": 25719 - }, - { - "epoch": 0.728839014990507, - "grad_norm": 0.0, - "learning_rate": 3.6148760737525145e-06, - "loss": 0.8541, - "step": 25720 - }, - { - "epoch": 0.7288673524327695, - "grad_norm": 0.0, - "learning_rate": 3.61416975943495e-06, - "loss": 0.8039, - "step": 25721 - }, - { - "epoch": 0.7288956898750318, - "grad_norm": 0.0, - "learning_rate": 3.6134634989076065e-06, - "loss": 0.7396, - "step": 25722 - }, - { - "epoch": 0.7289240273172943, - "grad_norm": 0.0, - "learning_rate": 3.612757292176434e-06, - "loss": 0.8385, - "step": 25723 - }, - { - "epoch": 0.7289523647595568, - "grad_norm": 0.0, - "learning_rate": 3.6120511392473756e-06, - "loss": 0.726, - "step": 25724 - }, - { - "epoch": 0.7289807022018192, - "grad_norm": 0.0, - "learning_rate": 3.6113450401263815e-06, - "loss": 0.8416, - "step": 25725 - }, - { - "epoch": 0.7290090396440817, - "grad_norm": 0.0, - "learning_rate": 3.610638994819402e-06, - "loss": 0.837, - "step": 25726 - }, - { - "epoch": 0.7290373770863442, - "grad_norm": 0.0, - "learning_rate": 3.6099330033323854e-06, - "loss": 0.7184, - "step": 25727 - }, - { - "epoch": 0.7290657145286067, - "grad_norm": 0.0, - "learning_rate": 3.6092270656712723e-06, - "loss": 0.8926, - "step": 25728 - }, - { - "epoch": 0.7290940519708691, - "grad_norm": 0.0, - "learning_rate": 3.6085211818420176e-06, - "loss": 0.805, - "step": 25729 - }, - { - "epoch": 0.7291223894131316, - "grad_norm": 0.0, - "learning_rate": 3.6078153518505578e-06, - "loss": 0.7212, - "step": 25730 - }, - { - "epoch": 0.7291507268553941, - "grad_norm": 0.0, - "learning_rate": 3.607109575702843e-06, - "loss": 0.8494, - "step": 25731 - }, - { - "epoch": 0.7291790642976564, - "grad_norm": 0.0, - "learning_rate": 3.606403853404823e-06, - "loss": 0.8952, - "step": 25732 - }, - { - "epoch": 0.7292074017399189, - "grad_norm": 0.0, - "learning_rate": 3.605698184962433e-06, - "loss": 0.8364, - "step": 25733 - }, - { - "epoch": 0.7292357391821814, - "grad_norm": 0.0, - "learning_rate": 3.604992570381621e-06, - "loss": 0.7719, - "step": 25734 - }, - { - "epoch": 0.7292640766244439, - "grad_norm": 0.0, - "learning_rate": 3.6042870096683324e-06, - "loss": 0.7917, - "step": 25735 - }, - { - "epoch": 0.7292924140667063, - "grad_norm": 0.0, - "learning_rate": 3.603581502828508e-06, - "loss": 0.7293, - "step": 25736 - }, - { - "epoch": 0.7293207515089688, - "grad_norm": 0.0, - "learning_rate": 3.602876049868097e-06, - "loss": 0.8424, - "step": 25737 - }, - { - "epoch": 0.7293490889512313, - "grad_norm": 0.0, - "learning_rate": 3.602170650793032e-06, - "loss": 0.8587, - "step": 25738 - }, - { - "epoch": 0.7293774263934937, - "grad_norm": 0.0, - "learning_rate": 3.6014653056092598e-06, - "loss": 0.8432, - "step": 25739 - }, - { - "epoch": 0.7294057638357562, - "grad_norm": 0.0, - "learning_rate": 3.600760014322725e-06, - "loss": 0.8315, - "step": 25740 - }, - { - "epoch": 0.7294341012780187, - "grad_norm": 0.0, - "learning_rate": 3.600054776939361e-06, - "loss": 0.7737, - "step": 25741 - }, - { - "epoch": 0.7294624387202812, - "grad_norm": 0.0, - "learning_rate": 3.5993495934651157e-06, - "loss": 0.8318, - "step": 25742 - }, - { - "epoch": 0.7294907761625435, - "grad_norm": 0.0, - "learning_rate": 3.5986444639059214e-06, - "loss": 0.8063, - "step": 25743 - }, - { - "epoch": 0.729519113604806, - "grad_norm": 0.0, - "learning_rate": 3.597939388267724e-06, - "loss": 0.8634, - "step": 25744 - }, - { - "epoch": 0.7295474510470685, - "grad_norm": 0.0, - "learning_rate": 3.5972343665564625e-06, - "loss": 0.8081, - "step": 25745 - }, - { - "epoch": 0.7295757884893309, - "grad_norm": 0.0, - "learning_rate": 3.596529398778069e-06, - "loss": 0.7734, - "step": 25746 - }, - { - "epoch": 0.7296041259315934, - "grad_norm": 0.0, - "learning_rate": 3.595824484938488e-06, - "loss": 0.8627, - "step": 25747 - }, - { - "epoch": 0.7296324633738559, - "grad_norm": 0.0, - "learning_rate": 3.595119625043655e-06, - "loss": 0.7954, - "step": 25748 - }, - { - "epoch": 0.7296608008161183, - "grad_norm": 0.0, - "learning_rate": 3.5944148190995077e-06, - "loss": 0.9048, - "step": 25749 - }, - { - "epoch": 0.7296891382583808, - "grad_norm": 0.0, - "learning_rate": 3.5937100671119864e-06, - "loss": 0.8785, - "step": 25750 - }, - { - "epoch": 0.7297174757006433, - "grad_norm": 0.0, - "learning_rate": 3.5930053690870217e-06, - "loss": 0.6731, - "step": 25751 - }, - { - "epoch": 0.7297458131429058, - "grad_norm": 0.0, - "learning_rate": 3.5923007250305507e-06, - "loss": 0.8014, - "step": 25752 - }, - { - "epoch": 0.7297741505851681, - "grad_norm": 0.0, - "learning_rate": 3.591596134948514e-06, - "loss": 0.7245, - "step": 25753 - }, - { - "epoch": 0.7298024880274306, - "grad_norm": 0.0, - "learning_rate": 3.5908915988468386e-06, - "loss": 0.8781, - "step": 25754 - }, - { - "epoch": 0.7298308254696931, - "grad_norm": 0.0, - "learning_rate": 3.590187116731464e-06, - "loss": 0.7834, - "step": 25755 - }, - { - "epoch": 0.7298591629119555, - "grad_norm": 0.0, - "learning_rate": 3.5894826886083268e-06, - "loss": 0.7641, - "step": 25756 - }, - { - "epoch": 0.729887500354218, - "grad_norm": 0.0, - "learning_rate": 3.5887783144833544e-06, - "loss": 0.8021, - "step": 25757 - }, - { - "epoch": 0.7299158377964805, - "grad_norm": 0.0, - "learning_rate": 3.5880739943624855e-06, - "loss": 0.8023, - "step": 25758 - }, - { - "epoch": 0.729944175238743, - "grad_norm": 0.0, - "learning_rate": 3.587369728251647e-06, - "loss": 0.7828, - "step": 25759 - }, - { - "epoch": 0.7299725126810054, - "grad_norm": 0.0, - "learning_rate": 3.5866655161567754e-06, - "loss": 0.8782, - "step": 25760 - }, - { - "epoch": 0.7300008501232679, - "grad_norm": 0.0, - "learning_rate": 3.5859613580838015e-06, - "loss": 0.7994, - "step": 25761 - }, - { - "epoch": 0.7300291875655304, - "grad_norm": 0.0, - "learning_rate": 3.5852572540386564e-06, - "loss": 0.826, - "step": 25762 - }, - { - "epoch": 0.7300575250077928, - "grad_norm": 0.0, - "learning_rate": 3.5845532040272758e-06, - "loss": 0.7223, - "step": 25763 - }, - { - "epoch": 0.7300858624500552, - "grad_norm": 0.0, - "learning_rate": 3.583849208055582e-06, - "loss": 0.8365, - "step": 25764 - }, - { - "epoch": 0.7301141998923177, - "grad_norm": 0.0, - "learning_rate": 3.5831452661295085e-06, - "loss": 0.8321, - "step": 25765 - }, - { - "epoch": 0.7301425373345802, - "grad_norm": 0.0, - "learning_rate": 3.5824413782549893e-06, - "loss": 0.8318, - "step": 25766 - }, - { - "epoch": 0.7301708747768426, - "grad_norm": 0.0, - "learning_rate": 3.5817375444379454e-06, - "loss": 0.846, - "step": 25767 - }, - { - "epoch": 0.7301992122191051, - "grad_norm": 0.0, - "learning_rate": 3.5810337646843096e-06, - "loss": 0.7359, - "step": 25768 - }, - { - "epoch": 0.7302275496613676, - "grad_norm": 0.0, - "learning_rate": 3.580330039000014e-06, - "loss": 0.8145, - "step": 25769 - }, - { - "epoch": 0.73025588710363, - "grad_norm": 0.0, - "learning_rate": 3.579626367390978e-06, - "loss": 0.8527, - "step": 25770 - }, - { - "epoch": 0.7302842245458925, - "grad_norm": 0.0, - "learning_rate": 3.5789227498631376e-06, - "loss": 0.7578, - "step": 25771 - }, - { - "epoch": 0.730312561988155, - "grad_norm": 0.0, - "learning_rate": 3.5782191864224113e-06, - "loss": 0.738, - "step": 25772 - }, - { - "epoch": 0.7303408994304174, - "grad_norm": 0.0, - "learning_rate": 3.57751567707473e-06, - "loss": 0.8239, - "step": 25773 - }, - { - "epoch": 0.7303692368726799, - "grad_norm": 0.0, - "learning_rate": 3.5768122218260193e-06, - "loss": 0.9188, - "step": 25774 - }, - { - "epoch": 0.7303975743149423, - "grad_norm": 0.0, - "learning_rate": 3.5761088206822035e-06, - "loss": 0.8367, - "step": 25775 - }, - { - "epoch": 0.7304259117572048, - "grad_norm": 0.0, - "learning_rate": 3.5754054736492096e-06, - "loss": 0.8741, - "step": 25776 - }, - { - "epoch": 0.7304542491994672, - "grad_norm": 0.0, - "learning_rate": 3.574702180732964e-06, - "loss": 0.9326, - "step": 25777 - }, - { - "epoch": 0.7304825866417297, - "grad_norm": 0.0, - "learning_rate": 3.573998941939384e-06, - "loss": 0.7753, - "step": 25778 - }, - { - "epoch": 0.7305109240839922, - "grad_norm": 0.0, - "learning_rate": 3.573295757274401e-06, - "loss": 0.8791, - "step": 25779 - }, - { - "epoch": 0.7305392615262546, - "grad_norm": 0.0, - "learning_rate": 3.5725926267439304e-06, - "loss": 0.6748, - "step": 25780 - }, - { - "epoch": 0.7305675989685171, - "grad_norm": 0.0, - "learning_rate": 3.5718895503538997e-06, - "loss": 0.7674, - "step": 25781 - }, - { - "epoch": 0.7305959364107796, - "grad_norm": 0.0, - "learning_rate": 3.5711865281102333e-06, - "loss": 0.8139, - "step": 25782 - }, - { - "epoch": 0.7306242738530421, - "grad_norm": 0.0, - "learning_rate": 3.570483560018847e-06, - "loss": 0.8177, - "step": 25783 - }, - { - "epoch": 0.7306526112953045, - "grad_norm": 0.0, - "learning_rate": 3.5697806460856655e-06, - "loss": 0.8825, - "step": 25784 - }, - { - "epoch": 0.730680948737567, - "grad_norm": 0.0, - "learning_rate": 3.5690777863166115e-06, - "loss": 0.7308, - "step": 25785 - }, - { - "epoch": 0.7307092861798294, - "grad_norm": 0.0, - "learning_rate": 3.5683749807176015e-06, - "loss": 0.8382, - "step": 25786 - }, - { - "epoch": 0.7307376236220918, - "grad_norm": 0.0, - "learning_rate": 3.5676722292945567e-06, - "loss": 0.7584, - "step": 25787 - }, - { - "epoch": 0.7307659610643543, - "grad_norm": 0.0, - "learning_rate": 3.566969532053397e-06, - "loss": 0.7953, - "step": 25788 - }, - { - "epoch": 0.7307942985066168, - "grad_norm": 0.0, - "learning_rate": 3.5662668890000416e-06, - "loss": 0.8474, - "step": 25789 - }, - { - "epoch": 0.7308226359488793, - "grad_norm": 0.0, - "learning_rate": 3.565564300140414e-06, - "loss": 0.7517, - "step": 25790 - }, - { - "epoch": 0.7308509733911417, - "grad_norm": 0.0, - "learning_rate": 3.5648617654804228e-06, - "loss": 0.824, - "step": 25791 - }, - { - "epoch": 0.7308793108334042, - "grad_norm": 0.0, - "learning_rate": 3.5641592850259943e-06, - "loss": 0.9108, - "step": 25792 - }, - { - "epoch": 0.7309076482756667, - "grad_norm": 0.0, - "learning_rate": 3.5634568587830386e-06, - "loss": 0.8123, - "step": 25793 - }, - { - "epoch": 0.7309359857179291, - "grad_norm": 0.0, - "learning_rate": 3.562754486757477e-06, - "loss": 0.7462, - "step": 25794 - }, - { - "epoch": 0.7309643231601916, - "grad_norm": 0.0, - "learning_rate": 3.562052168955227e-06, - "loss": 0.8199, - "step": 25795 - }, - { - "epoch": 0.730992660602454, - "grad_norm": 0.0, - "learning_rate": 3.5613499053821997e-06, - "loss": 0.7394, - "step": 25796 - }, - { - "epoch": 0.7310209980447164, - "grad_norm": 0.0, - "learning_rate": 3.5606476960443126e-06, - "loss": 0.8026, - "step": 25797 - }, - { - "epoch": 0.7310493354869789, - "grad_norm": 0.0, - "learning_rate": 3.5599455409474847e-06, - "loss": 0.9248, - "step": 25798 - }, - { - "epoch": 0.7310776729292414, - "grad_norm": 0.0, - "learning_rate": 3.559243440097623e-06, - "loss": 0.8516, - "step": 25799 - }, - { - "epoch": 0.7311060103715039, - "grad_norm": 0.0, - "learning_rate": 3.5585413935006465e-06, - "loss": 0.7289, - "step": 25800 - }, - { - "epoch": 0.7311343478137663, - "grad_norm": 0.0, - "learning_rate": 3.5578394011624674e-06, - "loss": 0.795, - "step": 25801 - }, - { - "epoch": 0.7311626852560288, - "grad_norm": 0.0, - "learning_rate": 3.557137463088999e-06, - "loss": 0.807, - "step": 25802 - }, - { - "epoch": 0.7311910226982913, - "grad_norm": 0.0, - "learning_rate": 3.5564355792861573e-06, - "loss": 0.8168, - "step": 25803 - }, - { - "epoch": 0.7312193601405537, - "grad_norm": 0.0, - "learning_rate": 3.5557337497598487e-06, - "loss": 0.8682, - "step": 25804 - }, - { - "epoch": 0.7312476975828162, - "grad_norm": 0.0, - "learning_rate": 3.555031974515988e-06, - "loss": 0.8301, - "step": 25805 - }, - { - "epoch": 0.7312760350250787, - "grad_norm": 0.0, - "learning_rate": 3.5543302535604897e-06, - "loss": 0.9084, - "step": 25806 - }, - { - "epoch": 0.7313043724673411, - "grad_norm": 0.0, - "learning_rate": 3.553628586899257e-06, - "loss": 0.9161, - "step": 25807 - }, - { - "epoch": 0.7313327099096035, - "grad_norm": 0.0, - "learning_rate": 3.552926974538208e-06, - "loss": 0.8125, - "step": 25808 - }, - { - "epoch": 0.731361047351866, - "grad_norm": 0.0, - "learning_rate": 3.5522254164832458e-06, - "loss": 0.7974, - "step": 25809 - }, - { - "epoch": 0.7313893847941285, - "grad_norm": 0.0, - "learning_rate": 3.5515239127402845e-06, - "loss": 0.8519, - "step": 25810 - }, - { - "epoch": 0.7314177222363909, - "grad_norm": 0.0, - "learning_rate": 3.5508224633152333e-06, - "loss": 0.9138, - "step": 25811 - }, - { - "epoch": 0.7314460596786534, - "grad_norm": 0.0, - "learning_rate": 3.5501210682139977e-06, - "loss": 0.7748, - "step": 25812 - }, - { - "epoch": 0.7314743971209159, - "grad_norm": 0.0, - "learning_rate": 3.549419727442487e-06, - "loss": 0.7746, - "step": 25813 - }, - { - "epoch": 0.7315027345631784, - "grad_norm": 0.0, - "learning_rate": 3.5487184410066076e-06, - "loss": 0.8485, - "step": 25814 - }, - { - "epoch": 0.7315310720054408, - "grad_norm": 0.0, - "learning_rate": 3.54801720891227e-06, - "loss": 0.6954, - "step": 25815 - }, - { - "epoch": 0.7315594094477033, - "grad_norm": 0.0, - "learning_rate": 3.5473160311653833e-06, - "loss": 0.7725, - "step": 25816 - }, - { - "epoch": 0.7315877468899658, - "grad_norm": 0.0, - "learning_rate": 3.546614907771845e-06, - "loss": 0.7196, - "step": 25817 - }, - { - "epoch": 0.7316160843322281, - "grad_norm": 0.0, - "learning_rate": 3.545913838737567e-06, - "loss": 0.9089, - "step": 25818 - }, - { - "epoch": 0.7316444217744906, - "grad_norm": 0.0, - "learning_rate": 3.545212824068456e-06, - "loss": 0.8164, - "step": 25819 - }, - { - "epoch": 0.7316727592167531, - "grad_norm": 0.0, - "learning_rate": 3.5445118637704112e-06, - "loss": 0.7598, - "step": 25820 - }, - { - "epoch": 0.7317010966590155, - "grad_norm": 0.0, - "learning_rate": 3.543810957849343e-06, - "loss": 0.8507, - "step": 25821 - }, - { - "epoch": 0.731729434101278, - "grad_norm": 0.0, - "learning_rate": 3.5431101063111495e-06, - "loss": 0.9435, - "step": 25822 - }, - { - "epoch": 0.7317577715435405, - "grad_norm": 0.0, - "learning_rate": 3.5424093091617375e-06, - "loss": 0.8064, - "step": 25823 - }, - { - "epoch": 0.731786108985803, - "grad_norm": 0.0, - "learning_rate": 3.5417085664070127e-06, - "loss": 0.8952, - "step": 25824 - }, - { - "epoch": 0.7318144464280654, - "grad_norm": 0.0, - "learning_rate": 3.5410078780528723e-06, - "loss": 0.799, - "step": 25825 - }, - { - "epoch": 0.7318427838703279, - "grad_norm": 0.0, - "learning_rate": 3.54030724410522e-06, - "loss": 0.834, - "step": 25826 - }, - { - "epoch": 0.7318711213125904, - "grad_norm": 0.0, - "learning_rate": 3.53960666456996e-06, - "loss": 0.8056, - "step": 25827 - }, - { - "epoch": 0.7318994587548527, - "grad_norm": 0.0, - "learning_rate": 3.538906139452991e-06, - "loss": 0.8799, - "step": 25828 - }, - { - "epoch": 0.7319277961971152, - "grad_norm": 0.0, - "learning_rate": 3.5382056687602185e-06, - "loss": 0.8271, - "step": 25829 - }, - { - "epoch": 0.7319561336393777, - "grad_norm": 0.0, - "learning_rate": 3.537505252497535e-06, - "loss": 0.9871, - "step": 25830 - }, - { - "epoch": 0.7319844710816402, - "grad_norm": 0.0, - "learning_rate": 3.536804890670846e-06, - "loss": 0.8384, - "step": 25831 - }, - { - "epoch": 0.7320128085239026, - "grad_norm": 0.0, - "learning_rate": 3.536104583286052e-06, - "loss": 0.7978, - "step": 25832 - }, - { - "epoch": 0.7320411459661651, - "grad_norm": 0.0, - "learning_rate": 3.535404330349046e-06, - "loss": 0.8332, - "step": 25833 - }, - { - "epoch": 0.7320694834084276, - "grad_norm": 0.0, - "learning_rate": 3.5347041318657304e-06, - "loss": 0.751, - "step": 25834 - }, - { - "epoch": 0.73209782085069, - "grad_norm": 0.0, - "learning_rate": 3.534003987842005e-06, - "loss": 0.7213, - "step": 25835 - }, - { - "epoch": 0.7321261582929525, - "grad_norm": 0.0, - "learning_rate": 3.533303898283763e-06, - "loss": 0.9025, - "step": 25836 - }, - { - "epoch": 0.732154495735215, - "grad_norm": 0.0, - "learning_rate": 3.5326038631969064e-06, - "loss": 0.8133, - "step": 25837 - }, - { - "epoch": 0.7321828331774775, - "grad_norm": 0.0, - "learning_rate": 3.5319038825873254e-06, - "loss": 0.7165, - "step": 25838 - }, - { - "epoch": 0.7322111706197398, - "grad_norm": 0.0, - "learning_rate": 3.5312039564609203e-06, - "loss": 0.8219, - "step": 25839 - }, - { - "epoch": 0.7322395080620023, - "grad_norm": 0.0, - "learning_rate": 3.530504084823586e-06, - "loss": 0.7973, - "step": 25840 - }, - { - "epoch": 0.7322678455042648, - "grad_norm": 0.0, - "learning_rate": 3.5298042676812195e-06, - "loss": 0.7939, - "step": 25841 - }, - { - "epoch": 0.7322961829465272, - "grad_norm": 0.0, - "learning_rate": 3.529104505039713e-06, - "loss": 0.794, - "step": 25842 - }, - { - "epoch": 0.7323245203887897, - "grad_norm": 0.0, - "learning_rate": 3.528404796904966e-06, - "loss": 0.8331, - "step": 25843 - }, - { - "epoch": 0.7323528578310522, - "grad_norm": 0.0, - "learning_rate": 3.5277051432828648e-06, - "loss": 0.8925, - "step": 25844 - }, - { - "epoch": 0.7323811952733146, - "grad_norm": 0.0, - "learning_rate": 3.5270055441793106e-06, - "loss": 0.8223, - "step": 25845 - }, - { - "epoch": 0.7324095327155771, - "grad_norm": 0.0, - "learning_rate": 3.526305999600188e-06, - "loss": 0.7833, - "step": 25846 - }, - { - "epoch": 0.7324378701578396, - "grad_norm": 0.0, - "learning_rate": 3.5256065095513947e-06, - "loss": 0.9341, - "step": 25847 - }, - { - "epoch": 0.7324662076001021, - "grad_norm": 0.0, - "learning_rate": 3.5249070740388246e-06, - "loss": 0.7918, - "step": 25848 - }, - { - "epoch": 0.7324945450423644, - "grad_norm": 0.0, - "learning_rate": 3.5242076930683644e-06, - "loss": 0.7361, - "step": 25849 - }, - { - "epoch": 0.7325228824846269, - "grad_norm": 0.0, - "learning_rate": 3.5235083666459104e-06, - "loss": 0.8474, - "step": 25850 - }, - { - "epoch": 0.7325512199268894, - "grad_norm": 0.0, - "learning_rate": 3.5228090947773473e-06, - "loss": 0.8838, - "step": 25851 - }, - { - "epoch": 0.7325795573691518, - "grad_norm": 0.0, - "learning_rate": 3.522109877468568e-06, - "loss": 0.8568, - "step": 25852 - }, - { - "epoch": 0.7326078948114143, - "grad_norm": 0.0, - "learning_rate": 3.521410714725463e-06, - "loss": 0.8479, - "step": 25853 - }, - { - "epoch": 0.7326362322536768, - "grad_norm": 0.0, - "learning_rate": 3.5207116065539214e-06, - "loss": 0.8182, - "step": 25854 - }, - { - "epoch": 0.7326645696959393, - "grad_norm": 0.0, - "learning_rate": 3.520012552959832e-06, - "loss": 0.8316, - "step": 25855 - }, - { - "epoch": 0.7326929071382017, - "grad_norm": 0.0, - "learning_rate": 3.5193135539490854e-06, - "loss": 0.8605, - "step": 25856 - }, - { - "epoch": 0.7327212445804642, - "grad_norm": 0.0, - "learning_rate": 3.518614609527565e-06, - "loss": 0.8022, - "step": 25857 - }, - { - "epoch": 0.7327495820227267, - "grad_norm": 0.0, - "learning_rate": 3.517915719701164e-06, - "loss": 0.8857, - "step": 25858 - }, - { - "epoch": 0.732777919464989, - "grad_norm": 0.0, - "learning_rate": 3.5172168844757625e-06, - "loss": 0.8463, - "step": 25859 - }, - { - "epoch": 0.7328062569072515, - "grad_norm": 0.0, - "learning_rate": 3.5165181038572505e-06, - "loss": 0.7564, - "step": 25860 - }, - { - "epoch": 0.732834594349514, - "grad_norm": 0.0, - "learning_rate": 3.515819377851517e-06, - "loss": 0.7805, - "step": 25861 - }, - { - "epoch": 0.7328629317917765, - "grad_norm": 0.0, - "learning_rate": 3.515120706464441e-06, - "loss": 0.8261, - "step": 25862 - }, - { - "epoch": 0.7328912692340389, - "grad_norm": 0.0, - "learning_rate": 3.5144220897019122e-06, - "loss": 0.8148, - "step": 25863 - }, - { - "epoch": 0.7329196066763014, - "grad_norm": 0.0, - "learning_rate": 3.513723527569818e-06, - "loss": 0.7952, - "step": 25864 - }, - { - "epoch": 0.7329479441185639, - "grad_norm": 0.0, - "learning_rate": 3.5130250200740355e-06, - "loss": 0.7081, - "step": 25865 - }, - { - "epoch": 0.7329762815608263, - "grad_norm": 0.0, - "learning_rate": 3.512326567220452e-06, - "loss": 0.6999, - "step": 25866 - }, - { - "epoch": 0.7330046190030888, - "grad_norm": 0.0, - "learning_rate": 3.5116281690149514e-06, - "loss": 0.7844, - "step": 25867 - }, - { - "epoch": 0.7330329564453513, - "grad_norm": 0.0, - "learning_rate": 3.510929825463415e-06, - "loss": 0.7589, - "step": 25868 - }, - { - "epoch": 0.7330612938876137, - "grad_norm": 0.0, - "learning_rate": 3.510231536571731e-06, - "loss": 0.8419, - "step": 25869 - }, - { - "epoch": 0.7330896313298761, - "grad_norm": 0.0, - "learning_rate": 3.5095333023457723e-06, - "loss": 0.758, - "step": 25870 - }, - { - "epoch": 0.7331179687721386, - "grad_norm": 0.0, - "learning_rate": 3.508835122791425e-06, - "loss": 0.8338, - "step": 25871 - }, - { - "epoch": 0.7331463062144011, - "grad_norm": 0.0, - "learning_rate": 3.5081369979145743e-06, - "loss": 0.7645, - "step": 25872 - }, - { - "epoch": 0.7331746436566635, - "grad_norm": 0.0, - "learning_rate": 3.507438927721092e-06, - "loss": 0.8703, - "step": 25873 - }, - { - "epoch": 0.733202981098926, - "grad_norm": 0.0, - "learning_rate": 3.5067409122168663e-06, - "loss": 0.8819, - "step": 25874 - }, - { - "epoch": 0.7332313185411885, - "grad_norm": 0.0, - "learning_rate": 3.5060429514077697e-06, - "loss": 0.807, - "step": 25875 - }, - { - "epoch": 0.7332596559834509, - "grad_norm": 0.0, - "learning_rate": 3.505345045299684e-06, - "loss": 0.7937, - "step": 25876 - }, - { - "epoch": 0.7332879934257134, - "grad_norm": 0.0, - "learning_rate": 3.504647193898494e-06, - "loss": 0.8427, - "step": 25877 - }, - { - "epoch": 0.7333163308679759, - "grad_norm": 0.0, - "learning_rate": 3.5039493972100667e-06, - "loss": 0.7725, - "step": 25878 - }, - { - "epoch": 0.7333446683102384, - "grad_norm": 0.0, - "learning_rate": 3.5032516552402885e-06, - "loss": 0.8325, - "step": 25879 - }, - { - "epoch": 0.7333730057525008, - "grad_norm": 0.0, - "learning_rate": 3.5025539679950326e-06, - "loss": 0.7516, - "step": 25880 - }, - { - "epoch": 0.7334013431947632, - "grad_norm": 0.0, - "learning_rate": 3.501856335480177e-06, - "loss": 0.8894, - "step": 25881 - }, - { - "epoch": 0.7334296806370257, - "grad_norm": 0.0, - "learning_rate": 3.5011587577016027e-06, - "loss": 0.7808, - "step": 25882 - }, - { - "epoch": 0.7334580180792881, - "grad_norm": 0.0, - "learning_rate": 3.500461234665178e-06, - "loss": 0.7608, - "step": 25883 - }, - { - "epoch": 0.7334863555215506, - "grad_norm": 0.0, - "learning_rate": 3.4997637663767827e-06, - "loss": 0.9291, - "step": 25884 - }, - { - "epoch": 0.7335146929638131, - "grad_norm": 0.0, - "learning_rate": 3.499066352842293e-06, - "loss": 0.7639, - "step": 25885 - }, - { - "epoch": 0.7335430304060756, - "grad_norm": 0.0, - "learning_rate": 3.498368994067578e-06, - "loss": 0.744, - "step": 25886 - }, - { - "epoch": 0.733571367848338, - "grad_norm": 0.0, - "learning_rate": 3.4976716900585194e-06, - "loss": 0.8599, - "step": 25887 - }, - { - "epoch": 0.7335997052906005, - "grad_norm": 0.0, - "learning_rate": 3.496974440820984e-06, - "loss": 0.8463, - "step": 25888 - }, - { - "epoch": 0.733628042732863, - "grad_norm": 0.0, - "learning_rate": 3.4962772463608463e-06, - "loss": 0.8352, - "step": 25889 - }, - { - "epoch": 0.7336563801751254, - "grad_norm": 0.0, - "learning_rate": 3.4955801066839847e-06, - "loss": 0.9104, - "step": 25890 - }, - { - "epoch": 0.7336847176173878, - "grad_norm": 0.0, - "learning_rate": 3.494883021796264e-06, - "loss": 0.8345, - "step": 25891 - }, - { - "epoch": 0.7337130550596503, - "grad_norm": 0.0, - "learning_rate": 3.494185991703558e-06, - "loss": 0.8315, - "step": 25892 - }, - { - "epoch": 0.7337413925019127, - "grad_norm": 0.0, - "learning_rate": 3.4934890164117407e-06, - "loss": 0.8596, - "step": 25893 - }, - { - "epoch": 0.7337697299441752, - "grad_norm": 0.0, - "learning_rate": 3.4927920959266804e-06, - "loss": 0.847, - "step": 25894 - }, - { - "epoch": 0.7337980673864377, - "grad_norm": 0.0, - "learning_rate": 3.4920952302542533e-06, - "loss": 0.7815, - "step": 25895 - }, - { - "epoch": 0.7338264048287002, - "grad_norm": 0.0, - "learning_rate": 3.4913984194003205e-06, - "loss": 0.7503, - "step": 25896 - }, - { - "epoch": 0.7338547422709626, - "grad_norm": 0.0, - "learning_rate": 3.4907016633707557e-06, - "loss": 0.734, - "step": 25897 - }, - { - "epoch": 0.7338830797132251, - "grad_norm": 0.0, - "learning_rate": 3.4900049621714315e-06, - "loss": 0.7383, - "step": 25898 - }, - { - "epoch": 0.7339114171554876, - "grad_norm": 0.0, - "learning_rate": 3.4893083158082096e-06, - "loss": 0.8352, - "step": 25899 - }, - { - "epoch": 0.73393975459775, - "grad_norm": 0.0, - "learning_rate": 3.4886117242869643e-06, - "loss": 0.8415, - "step": 25900 - }, - { - "epoch": 0.7339680920400125, - "grad_norm": 0.0, - "learning_rate": 3.4879151876135574e-06, - "loss": 0.6712, - "step": 25901 - }, - { - "epoch": 0.733996429482275, - "grad_norm": 0.0, - "learning_rate": 3.487218705793859e-06, - "loss": 0.8392, - "step": 25902 - }, - { - "epoch": 0.7340247669245374, - "grad_norm": 0.0, - "learning_rate": 3.4865222788337393e-06, - "loss": 0.8972, - "step": 25903 - }, - { - "epoch": 0.7340531043667998, - "grad_norm": 0.0, - "learning_rate": 3.4858259067390586e-06, - "loss": 0.8767, - "step": 25904 - }, - { - "epoch": 0.7340814418090623, - "grad_norm": 0.0, - "learning_rate": 3.4851295895156846e-06, - "loss": 0.8148, - "step": 25905 - }, - { - "epoch": 0.7341097792513248, - "grad_norm": 0.0, - "learning_rate": 3.484433327169483e-06, - "loss": 0.7446, - "step": 25906 - }, - { - "epoch": 0.7341381166935872, - "grad_norm": 0.0, - "learning_rate": 3.48373711970632e-06, - "loss": 0.8096, - "step": 25907 - }, - { - "epoch": 0.7341664541358497, - "grad_norm": 0.0, - "learning_rate": 3.483040967132061e-06, - "loss": 0.9123, - "step": 25908 - }, - { - "epoch": 0.7341947915781122, - "grad_norm": 0.0, - "learning_rate": 3.482344869452565e-06, - "loss": 0.8642, - "step": 25909 - }, - { - "epoch": 0.7342231290203747, - "grad_norm": 0.0, - "learning_rate": 3.4816488266736992e-06, - "loss": 0.9236, - "step": 25910 - }, - { - "epoch": 0.7342514664626371, - "grad_norm": 0.0, - "learning_rate": 3.480952838801328e-06, - "loss": 0.8131, - "step": 25911 - }, - { - "epoch": 0.7342798039048996, - "grad_norm": 0.0, - "learning_rate": 3.4802569058413093e-06, - "loss": 0.8617, - "step": 25912 - }, - { - "epoch": 0.734308141347162, - "grad_norm": 0.0, - "learning_rate": 3.4795610277995075e-06, - "loss": 0.7665, - "step": 25913 - }, - { - "epoch": 0.7343364787894244, - "grad_norm": 0.0, - "learning_rate": 3.4788652046817885e-06, - "loss": 0.8354, - "step": 25914 - }, - { - "epoch": 0.7343648162316869, - "grad_norm": 0.0, - "learning_rate": 3.4781694364940054e-06, - "loss": 0.7868, - "step": 25915 - }, - { - "epoch": 0.7343931536739494, - "grad_norm": 0.0, - "learning_rate": 3.4774737232420264e-06, - "loss": 0.8033, - "step": 25916 - }, - { - "epoch": 0.7344214911162118, - "grad_norm": 0.0, - "learning_rate": 3.4767780649317053e-06, - "loss": 0.7933, - "step": 25917 - }, - { - "epoch": 0.7344498285584743, - "grad_norm": 0.0, - "learning_rate": 3.4760824615689036e-06, - "loss": 0.7028, - "step": 25918 - }, - { - "epoch": 0.7344781660007368, - "grad_norm": 0.0, - "learning_rate": 3.4753869131594832e-06, - "loss": 0.8211, - "step": 25919 - }, - { - "epoch": 0.7345065034429993, - "grad_norm": 0.0, - "learning_rate": 3.474691419709302e-06, - "loss": 0.8347, - "step": 25920 - }, - { - "epoch": 0.7345348408852617, - "grad_norm": 0.0, - "learning_rate": 3.4739959812242175e-06, - "loss": 0.809, - "step": 25921 - }, - { - "epoch": 0.7345631783275242, - "grad_norm": 0.0, - "learning_rate": 3.473300597710091e-06, - "loss": 0.8826, - "step": 25922 - }, - { - "epoch": 0.7345915157697867, - "grad_norm": 0.0, - "learning_rate": 3.472605269172774e-06, - "loss": 0.82, - "step": 25923 - }, - { - "epoch": 0.734619853212049, - "grad_norm": 0.0, - "learning_rate": 3.4719099956181313e-06, - "loss": 0.8896, - "step": 25924 - }, - { - "epoch": 0.7346481906543115, - "grad_norm": 0.0, - "learning_rate": 3.47121477705201e-06, - "loss": 0.7729, - "step": 25925 - }, - { - "epoch": 0.734676528096574, - "grad_norm": 0.0, - "learning_rate": 3.4705196134802723e-06, - "loss": 0.8434, - "step": 25926 - }, - { - "epoch": 0.7347048655388365, - "grad_norm": 0.0, - "learning_rate": 3.4698245049087755e-06, - "loss": 0.8494, - "step": 25927 - }, - { - "epoch": 0.7347332029810989, - "grad_norm": 0.0, - "learning_rate": 3.469129451343367e-06, - "loss": 0.701, - "step": 25928 - }, - { - "epoch": 0.7347615404233614, - "grad_norm": 0.0, - "learning_rate": 3.4684344527899117e-06, - "loss": 0.7722, - "step": 25929 - }, - { - "epoch": 0.7347898778656239, - "grad_norm": 0.0, - "learning_rate": 3.4677395092542542e-06, - "loss": 0.8567, - "step": 25930 - }, - { - "epoch": 0.7348182153078863, - "grad_norm": 0.0, - "learning_rate": 3.4670446207422525e-06, - "loss": 0.7685, - "step": 25931 - }, - { - "epoch": 0.7348465527501488, - "grad_norm": 0.0, - "learning_rate": 3.4663497872597596e-06, - "loss": 0.9728, - "step": 25932 - }, - { - "epoch": 0.7348748901924113, - "grad_norm": 0.0, - "learning_rate": 3.4656550088126294e-06, - "loss": 0.8317, - "step": 25933 - }, - { - "epoch": 0.7349032276346736, - "grad_norm": 0.0, - "learning_rate": 3.464960285406713e-06, - "loss": 0.7681, - "step": 25934 - }, - { - "epoch": 0.7349315650769361, - "grad_norm": 0.0, - "learning_rate": 3.464265617047866e-06, - "loss": 0.7317, - "step": 25935 - }, - { - "epoch": 0.7349599025191986, - "grad_norm": 0.0, - "learning_rate": 3.4635710037419345e-06, - "loss": 0.86, - "step": 25936 - }, - { - "epoch": 0.7349882399614611, - "grad_norm": 0.0, - "learning_rate": 3.462876445494774e-06, - "loss": 0.7562, - "step": 25937 - }, - { - "epoch": 0.7350165774037235, - "grad_norm": 0.0, - "learning_rate": 3.4621819423122295e-06, - "loss": 0.7946, - "step": 25938 - }, - { - "epoch": 0.735044914845986, - "grad_norm": 0.0, - "learning_rate": 3.4614874942001543e-06, - "loss": 0.957, - "step": 25939 - }, - { - "epoch": 0.7350732522882485, - "grad_norm": 0.0, - "learning_rate": 3.4607931011644013e-06, - "loss": 0.8384, - "step": 25940 - }, - { - "epoch": 0.7351015897305109, - "grad_norm": 0.0, - "learning_rate": 3.460098763210813e-06, - "loss": 0.8239, - "step": 25941 - }, - { - "epoch": 0.7351299271727734, - "grad_norm": 0.0, - "learning_rate": 3.459404480345242e-06, - "loss": 0.8275, - "step": 25942 - }, - { - "epoch": 0.7351582646150359, - "grad_norm": 0.0, - "learning_rate": 3.458710252573535e-06, - "loss": 0.7444, - "step": 25943 - }, - { - "epoch": 0.7351866020572984, - "grad_norm": 0.0, - "learning_rate": 3.458016079901544e-06, - "loss": 0.8831, - "step": 25944 - }, - { - "epoch": 0.7352149394995607, - "grad_norm": 0.0, - "learning_rate": 3.4573219623351097e-06, - "loss": 0.8398, - "step": 25945 - }, - { - "epoch": 0.7352432769418232, - "grad_norm": 0.0, - "learning_rate": 3.456627899880082e-06, - "loss": 0.9684, - "step": 25946 - }, - { - "epoch": 0.7352716143840857, - "grad_norm": 0.0, - "learning_rate": 3.455933892542308e-06, - "loss": 0.759, - "step": 25947 - }, - { - "epoch": 0.7352999518263481, - "grad_norm": 0.0, - "learning_rate": 3.455239940327635e-06, - "loss": 0.8983, - "step": 25948 - }, - { - "epoch": 0.7353282892686106, - "grad_norm": 0.0, - "learning_rate": 3.454546043241904e-06, - "loss": 0.7625, - "step": 25949 - }, - { - "epoch": 0.7353566267108731, - "grad_norm": 0.0, - "learning_rate": 3.4538522012909616e-06, - "loss": 0.847, - "step": 25950 - }, - { - "epoch": 0.7353849641531356, - "grad_norm": 0.0, - "learning_rate": 3.4531584144806564e-06, - "loss": 0.8079, - "step": 25951 - }, - { - "epoch": 0.735413301595398, - "grad_norm": 0.0, - "learning_rate": 3.452464682816826e-06, - "loss": 0.8861, - "step": 25952 - }, - { - "epoch": 0.7354416390376605, - "grad_norm": 0.0, - "learning_rate": 3.45177100630532e-06, - "loss": 0.8539, - "step": 25953 - }, - { - "epoch": 0.735469976479923, - "grad_norm": 0.0, - "learning_rate": 3.4510773849519752e-06, - "loss": 0.7264, - "step": 25954 - }, - { - "epoch": 0.7354983139221853, - "grad_norm": 0.0, - "learning_rate": 3.450383818762638e-06, - "loss": 0.8147, - "step": 25955 - }, - { - "epoch": 0.7355266513644478, - "grad_norm": 0.0, - "learning_rate": 3.449690307743149e-06, - "loss": 0.7364, - "step": 25956 - }, - { - "epoch": 0.7355549888067103, - "grad_norm": 0.0, - "learning_rate": 3.4489968518993513e-06, - "loss": 0.7715, - "step": 25957 - }, - { - "epoch": 0.7355833262489727, - "grad_norm": 0.0, - "learning_rate": 3.4483034512370896e-06, - "loss": 0.8167, - "step": 25958 - }, - { - "epoch": 0.7356116636912352, - "grad_norm": 0.0, - "learning_rate": 3.447610105762197e-06, - "loss": 0.7985, - "step": 25959 - }, - { - "epoch": 0.7356400011334977, - "grad_norm": 0.0, - "learning_rate": 3.4469168154805177e-06, - "loss": 0.89, - "step": 25960 - }, - { - "epoch": 0.7356683385757602, - "grad_norm": 0.0, - "learning_rate": 3.446223580397895e-06, - "loss": 0.8694, - "step": 25961 - }, - { - "epoch": 0.7356966760180226, - "grad_norm": 0.0, - "learning_rate": 3.4455304005201617e-06, - "loss": 0.8591, - "step": 25962 - }, - { - "epoch": 0.7357250134602851, - "grad_norm": 0.0, - "learning_rate": 3.444837275853159e-06, - "loss": 0.8582, - "step": 25963 - }, - { - "epoch": 0.7357533509025476, - "grad_norm": 0.0, - "learning_rate": 3.4441442064027297e-06, - "loss": 0.8042, - "step": 25964 - }, - { - "epoch": 0.73578168834481, - "grad_norm": 0.0, - "learning_rate": 3.443451192174706e-06, - "loss": 0.8201, - "step": 25965 - }, - { - "epoch": 0.7358100257870724, - "grad_norm": 0.0, - "learning_rate": 3.44275823317493e-06, - "loss": 0.8141, - "step": 25966 - }, - { - "epoch": 0.7358383632293349, - "grad_norm": 0.0, - "learning_rate": 3.442065329409232e-06, - "loss": 0.8014, - "step": 25967 - }, - { - "epoch": 0.7358667006715974, - "grad_norm": 0.0, - "learning_rate": 3.441372480883455e-06, - "loss": 0.8844, - "step": 25968 - }, - { - "epoch": 0.7358950381138598, - "grad_norm": 0.0, - "learning_rate": 3.4406796876034323e-06, - "loss": 0.8212, - "step": 25969 - }, - { - "epoch": 0.7359233755561223, - "grad_norm": 0.0, - "learning_rate": 3.4399869495749995e-06, - "loss": 0.8208, - "step": 25970 - }, - { - "epoch": 0.7359517129983848, - "grad_norm": 0.0, - "learning_rate": 3.439294266803993e-06, - "loss": 0.8112, - "step": 25971 - }, - { - "epoch": 0.7359800504406472, - "grad_norm": 0.0, - "learning_rate": 3.4386016392962507e-06, - "loss": 0.847, - "step": 25972 - }, - { - "epoch": 0.7360083878829097, - "grad_norm": 0.0, - "learning_rate": 3.4379090670576e-06, - "loss": 0.8886, - "step": 25973 - }, - { - "epoch": 0.7360367253251722, - "grad_norm": 0.0, - "learning_rate": 3.4372165500938813e-06, - "loss": 0.7421, - "step": 25974 - }, - { - "epoch": 0.7360650627674347, - "grad_norm": 0.0, - "learning_rate": 3.4365240884109217e-06, - "loss": 0.8244, - "step": 25975 - }, - { - "epoch": 0.736093400209697, - "grad_norm": 0.0, - "learning_rate": 3.4358316820145564e-06, - "loss": 0.8674, - "step": 25976 - }, - { - "epoch": 0.7361217376519595, - "grad_norm": 0.0, - "learning_rate": 3.435139330910622e-06, - "loss": 0.7845, - "step": 25977 - }, - { - "epoch": 0.736150075094222, - "grad_norm": 0.0, - "learning_rate": 3.434447035104944e-06, - "loss": 0.8256, - "step": 25978 - }, - { - "epoch": 0.7361784125364844, - "grad_norm": 0.0, - "learning_rate": 3.4337547946033557e-06, - "loss": 0.8234, - "step": 25979 - }, - { - "epoch": 0.7362067499787469, - "grad_norm": 0.0, - "learning_rate": 3.4330626094116927e-06, - "loss": 0.9092, - "step": 25980 - }, - { - "epoch": 0.7362350874210094, - "grad_norm": 0.0, - "learning_rate": 3.4323704795357794e-06, - "loss": 0.6948, - "step": 25981 - }, - { - "epoch": 0.7362634248632718, - "grad_norm": 0.0, - "learning_rate": 3.431678404981448e-06, - "loss": 0.8849, - "step": 25982 - }, - { - "epoch": 0.7362917623055343, - "grad_norm": 0.0, - "learning_rate": 3.430986385754528e-06, - "loss": 0.8648, - "step": 25983 - }, - { - "epoch": 0.7363200997477968, - "grad_norm": 0.0, - "learning_rate": 3.4302944218608493e-06, - "loss": 0.7755, - "step": 25984 - }, - { - "epoch": 0.7363484371900593, - "grad_norm": 0.0, - "learning_rate": 3.429602513306243e-06, - "loss": 0.8425, - "step": 25985 - }, - { - "epoch": 0.7363767746323217, - "grad_norm": 0.0, - "learning_rate": 3.4289106600965317e-06, - "loss": 0.7996, - "step": 25986 - }, - { - "epoch": 0.7364051120745841, - "grad_norm": 0.0, - "learning_rate": 3.4282188622375488e-06, - "loss": 0.8132, - "step": 25987 - }, - { - "epoch": 0.7364334495168466, - "grad_norm": 0.0, - "learning_rate": 3.4275271197351166e-06, - "loss": 0.7923, - "step": 25988 - }, - { - "epoch": 0.736461786959109, - "grad_norm": 0.0, - "learning_rate": 3.4268354325950637e-06, - "loss": 0.8479, - "step": 25989 - }, - { - "epoch": 0.7364901244013715, - "grad_norm": 0.0, - "learning_rate": 3.4261438008232205e-06, - "loss": 0.8029, - "step": 25990 - }, - { - "epoch": 0.736518461843634, - "grad_norm": 0.0, - "learning_rate": 3.4254522244254053e-06, - "loss": 0.827, - "step": 25991 - }, - { - "epoch": 0.7365467992858965, - "grad_norm": 0.0, - "learning_rate": 3.424760703407447e-06, - "loss": 0.843, - "step": 25992 - }, - { - "epoch": 0.7365751367281589, - "grad_norm": 0.0, - "learning_rate": 3.424069237775175e-06, - "loss": 0.8768, - "step": 25993 - }, - { - "epoch": 0.7366034741704214, - "grad_norm": 0.0, - "learning_rate": 3.4233778275344065e-06, - "loss": 0.8125, - "step": 25994 - }, - { - "epoch": 0.7366318116126839, - "grad_norm": 0.0, - "learning_rate": 3.4226864726909683e-06, - "loss": 0.7975, - "step": 25995 - }, - { - "epoch": 0.7366601490549463, - "grad_norm": 0.0, - "learning_rate": 3.421995173250684e-06, - "loss": 0.807, - "step": 25996 - }, - { - "epoch": 0.7366884864972087, - "grad_norm": 0.0, - "learning_rate": 3.4213039292193785e-06, - "loss": 0.8717, - "step": 25997 - }, - { - "epoch": 0.7367168239394712, - "grad_norm": 0.0, - "learning_rate": 3.4206127406028744e-06, - "loss": 0.8466, - "step": 25998 - }, - { - "epoch": 0.7367451613817337, - "grad_norm": 0.0, - "learning_rate": 3.4199216074069906e-06, - "loss": 0.7768, - "step": 25999 - }, - { - "epoch": 0.7367734988239961, - "grad_norm": 0.0, - "learning_rate": 3.4192305296375493e-06, - "loss": 0.8647, - "step": 26000 - }, - { - "epoch": 0.7368018362662586, - "grad_norm": 0.0, - "learning_rate": 3.4185395073003768e-06, - "loss": 0.8513, - "step": 26001 - }, - { - "epoch": 0.7368301737085211, - "grad_norm": 0.0, - "learning_rate": 3.4178485404012874e-06, - "loss": 0.7891, - "step": 26002 - }, - { - "epoch": 0.7368585111507835, - "grad_norm": 0.0, - "learning_rate": 3.4171576289461063e-06, - "loss": 0.8108, - "step": 26003 - }, - { - "epoch": 0.736886848593046, - "grad_norm": 0.0, - "learning_rate": 3.4164667729406487e-06, - "loss": 0.9887, - "step": 26004 - }, - { - "epoch": 0.7369151860353085, - "grad_norm": 0.0, - "learning_rate": 3.4157759723907347e-06, - "loss": 0.8151, - "step": 26005 - }, - { - "epoch": 0.7369435234775709, - "grad_norm": 0.0, - "learning_rate": 3.4150852273021896e-06, - "loss": 0.7991, - "step": 26006 - }, - { - "epoch": 0.7369718609198334, - "grad_norm": 0.0, - "learning_rate": 3.414394537680823e-06, - "loss": 0.8664, - "step": 26007 - }, - { - "epoch": 0.7370001983620958, - "grad_norm": 0.0, - "learning_rate": 3.413703903532456e-06, - "loss": 0.8108, - "step": 26008 - }, - { - "epoch": 0.7370285358043583, - "grad_norm": 0.0, - "learning_rate": 3.413013324862907e-06, - "loss": 0.8617, - "step": 26009 - }, - { - "epoch": 0.7370568732466207, - "grad_norm": 0.0, - "learning_rate": 3.412322801677993e-06, - "loss": 0.931, - "step": 26010 - }, - { - "epoch": 0.7370852106888832, - "grad_norm": 0.0, - "learning_rate": 3.4116323339835344e-06, - "loss": 0.7735, - "step": 26011 - }, - { - "epoch": 0.7371135481311457, - "grad_norm": 0.0, - "learning_rate": 3.410941921785339e-06, - "loss": 0.8043, - "step": 26012 - }, - { - "epoch": 0.7371418855734081, - "grad_norm": 0.0, - "learning_rate": 3.4102515650892266e-06, - "loss": 0.7571, - "step": 26013 - }, - { - "epoch": 0.7371702230156706, - "grad_norm": 0.0, - "learning_rate": 3.4095612639010158e-06, - "loss": 0.8048, - "step": 26014 - }, - { - "epoch": 0.7371985604579331, - "grad_norm": 0.0, - "learning_rate": 3.4088710182265138e-06, - "loss": 0.8411, - "step": 26015 - }, - { - "epoch": 0.7372268979001956, - "grad_norm": 0.0, - "learning_rate": 3.4081808280715435e-06, - "loss": 0.7796, - "step": 26016 - }, - { - "epoch": 0.737255235342458, - "grad_norm": 0.0, - "learning_rate": 3.4074906934419094e-06, - "loss": 0.9478, - "step": 26017 - }, - { - "epoch": 0.7372835727847205, - "grad_norm": 0.0, - "learning_rate": 3.4068006143434296e-06, - "loss": 0.8084, - "step": 26018 - }, - { - "epoch": 0.737311910226983, - "grad_norm": 0.0, - "learning_rate": 3.4061105907819202e-06, - "loss": 0.7949, - "step": 26019 - }, - { - "epoch": 0.7373402476692453, - "grad_norm": 0.0, - "learning_rate": 3.4054206227631857e-06, - "loss": 0.8687, - "step": 26020 - }, - { - "epoch": 0.7373685851115078, - "grad_norm": 0.0, - "learning_rate": 3.4047307102930425e-06, - "loss": 0.7951, - "step": 26021 - }, - { - "epoch": 0.7373969225537703, - "grad_norm": 0.0, - "learning_rate": 3.4040408533773017e-06, - "loss": 0.7644, - "step": 26022 - }, - { - "epoch": 0.7374252599960328, - "grad_norm": 0.0, - "learning_rate": 3.403351052021775e-06, - "loss": 0.8536, - "step": 26023 - }, - { - "epoch": 0.7374535974382952, - "grad_norm": 0.0, - "learning_rate": 3.4026613062322743e-06, - "loss": 0.8022, - "step": 26024 - }, - { - "epoch": 0.7374819348805577, - "grad_norm": 0.0, - "learning_rate": 3.4019716160146043e-06, - "loss": 0.8329, - "step": 26025 - }, - { - "epoch": 0.7375102723228202, - "grad_norm": 0.0, - "learning_rate": 3.401281981374578e-06, - "loss": 0.8508, - "step": 26026 - }, - { - "epoch": 0.7375386097650826, - "grad_norm": 0.0, - "learning_rate": 3.400592402318006e-06, - "loss": 0.8365, - "step": 26027 - }, - { - "epoch": 0.7375669472073451, - "grad_norm": 0.0, - "learning_rate": 3.399902878850693e-06, - "loss": 0.8106, - "step": 26028 - }, - { - "epoch": 0.7375952846496076, - "grad_norm": 0.0, - "learning_rate": 3.399213410978447e-06, - "loss": 0.7682, - "step": 26029 - }, - { - "epoch": 0.7376236220918699, - "grad_norm": 0.0, - "learning_rate": 3.398523998707083e-06, - "loss": 0.8368, - "step": 26030 - }, - { - "epoch": 0.7376519595341324, - "grad_norm": 0.0, - "learning_rate": 3.397834642042398e-06, - "loss": 0.816, - "step": 26031 - }, - { - "epoch": 0.7376802969763949, - "grad_norm": 0.0, - "learning_rate": 3.3971453409902067e-06, - "loss": 0.8565, - "step": 26032 - }, - { - "epoch": 0.7377086344186574, - "grad_norm": 0.0, - "learning_rate": 3.3964560955563097e-06, - "loss": 0.8395, - "step": 26033 - }, - { - "epoch": 0.7377369718609198, - "grad_norm": 0.0, - "learning_rate": 3.395766905746515e-06, - "loss": 0.8071, - "step": 26034 - }, - { - "epoch": 0.7377653093031823, - "grad_norm": 0.0, - "learning_rate": 3.3950777715666285e-06, - "loss": 0.757, - "step": 26035 - }, - { - "epoch": 0.7377936467454448, - "grad_norm": 0.0, - "learning_rate": 3.3943886930224536e-06, - "loss": 0.8353, - "step": 26036 - }, - { - "epoch": 0.7378219841877072, - "grad_norm": 0.0, - "learning_rate": 3.3936996701197955e-06, - "loss": 0.7597, - "step": 26037 - }, - { - "epoch": 0.7378503216299697, - "grad_norm": 0.0, - "learning_rate": 3.393010702864462e-06, - "loss": 0.7251, - "step": 26038 - }, - { - "epoch": 0.7378786590722322, - "grad_norm": 0.0, - "learning_rate": 3.3923217912622495e-06, - "loss": 0.8294, - "step": 26039 - }, - { - "epoch": 0.7379069965144947, - "grad_norm": 0.0, - "learning_rate": 3.391632935318968e-06, - "loss": 0.8593, - "step": 26040 - }, - { - "epoch": 0.737935333956757, - "grad_norm": 0.0, - "learning_rate": 3.3909441350404125e-06, - "loss": 0.803, - "step": 26041 - }, - { - "epoch": 0.7379636713990195, - "grad_norm": 0.0, - "learning_rate": 3.390255390432389e-06, - "loss": 0.8332, - "step": 26042 - }, - { - "epoch": 0.737992008841282, - "grad_norm": 0.0, - "learning_rate": 3.3895667015007027e-06, - "loss": 0.7971, - "step": 26043 - }, - { - "epoch": 0.7380203462835444, - "grad_norm": 0.0, - "learning_rate": 3.388878068251147e-06, - "loss": 0.8484, - "step": 26044 - }, - { - "epoch": 0.7380486837258069, - "grad_norm": 0.0, - "learning_rate": 3.3881894906895287e-06, - "loss": 0.82, - "step": 26045 - }, - { - "epoch": 0.7380770211680694, - "grad_norm": 0.0, - "learning_rate": 3.387500968821643e-06, - "loss": 0.7809, - "step": 26046 - }, - { - "epoch": 0.7381053586103319, - "grad_norm": 0.0, - "learning_rate": 3.3868125026532917e-06, - "loss": 0.8026, - "step": 26047 - }, - { - "epoch": 0.7381336960525943, - "grad_norm": 0.0, - "learning_rate": 3.3861240921902747e-06, - "loss": 0.7395, - "step": 26048 - }, - { - "epoch": 0.7381620334948568, - "grad_norm": 0.0, - "learning_rate": 3.3854357374383905e-06, - "loss": 0.796, - "step": 26049 - }, - { - "epoch": 0.7381903709371193, - "grad_norm": 0.0, - "learning_rate": 3.3847474384034383e-06, - "loss": 0.7709, - "step": 26050 - }, - { - "epoch": 0.7382187083793816, - "grad_norm": 0.0, - "learning_rate": 3.3840591950912172e-06, - "loss": 0.7708, - "step": 26051 - }, - { - "epoch": 0.7382470458216441, - "grad_norm": 0.0, - "learning_rate": 3.383371007507519e-06, - "loss": 0.8345, - "step": 26052 - }, - { - "epoch": 0.7382753832639066, - "grad_norm": 0.0, - "learning_rate": 3.3826828756581476e-06, - "loss": 0.8306, - "step": 26053 - }, - { - "epoch": 0.738303720706169, - "grad_norm": 0.0, - "learning_rate": 3.381994799548892e-06, - "loss": 0.7842, - "step": 26054 - }, - { - "epoch": 0.7383320581484315, - "grad_norm": 0.0, - "learning_rate": 3.3813067791855513e-06, - "loss": 0.7913, - "step": 26055 - }, - { - "epoch": 0.738360395590694, - "grad_norm": 0.0, - "learning_rate": 3.380618814573925e-06, - "loss": 0.8211, - "step": 26056 - }, - { - "epoch": 0.7383887330329565, - "grad_norm": 0.0, - "learning_rate": 3.3799309057198016e-06, - "loss": 0.8382, - "step": 26057 - }, - { - "epoch": 0.7384170704752189, - "grad_norm": 0.0, - "learning_rate": 3.379243052628979e-06, - "loss": 0.8374, - "step": 26058 - }, - { - "epoch": 0.7384454079174814, - "grad_norm": 0.0, - "learning_rate": 3.378555255307252e-06, - "loss": 0.8151, - "step": 26059 - }, - { - "epoch": 0.7384737453597439, - "grad_norm": 0.0, - "learning_rate": 3.377867513760411e-06, - "loss": 0.715, - "step": 26060 - }, - { - "epoch": 0.7385020828020062, - "grad_norm": 0.0, - "learning_rate": 3.3771798279942513e-06, - "loss": 0.8684, - "step": 26061 - }, - { - "epoch": 0.7385304202442687, - "grad_norm": 0.0, - "learning_rate": 3.376492198014565e-06, - "loss": 0.749, - "step": 26062 - }, - { - "epoch": 0.7385587576865312, - "grad_norm": 0.0, - "learning_rate": 3.3758046238271436e-06, - "loss": 0.7476, - "step": 26063 - }, - { - "epoch": 0.7385870951287937, - "grad_norm": 0.0, - "learning_rate": 3.375117105437784e-06, - "loss": 0.8554, - "step": 26064 - }, - { - "epoch": 0.7386154325710561, - "grad_norm": 0.0, - "learning_rate": 3.3744296428522693e-06, - "loss": 0.7609, - "step": 26065 - }, - { - "epoch": 0.7386437700133186, - "grad_norm": 0.0, - "learning_rate": 3.3737422360763938e-06, - "loss": 0.7339, - "step": 26066 - }, - { - "epoch": 0.7386721074555811, - "grad_norm": 0.0, - "learning_rate": 3.3730548851159517e-06, - "loss": 0.8185, - "step": 26067 - }, - { - "epoch": 0.7387004448978435, - "grad_norm": 0.0, - "learning_rate": 3.372367589976726e-06, - "loss": 0.7019, - "step": 26068 - }, - { - "epoch": 0.738728782340106, - "grad_norm": 0.0, - "learning_rate": 3.3716803506645125e-06, - "loss": 0.8078, - "step": 26069 - }, - { - "epoch": 0.7387571197823685, - "grad_norm": 0.0, - "learning_rate": 3.3709931671850935e-06, - "loss": 0.7923, - "step": 26070 - }, - { - "epoch": 0.738785457224631, - "grad_norm": 0.0, - "learning_rate": 3.370306039544261e-06, - "loss": 0.8686, - "step": 26071 - }, - { - "epoch": 0.7388137946668933, - "grad_norm": 0.0, - "learning_rate": 3.3696189677478053e-06, - "loss": 0.8273, - "step": 26072 - }, - { - "epoch": 0.7388421321091558, - "grad_norm": 0.0, - "learning_rate": 3.3689319518015083e-06, - "loss": 0.8522, - "step": 26073 - }, - { - "epoch": 0.7388704695514183, - "grad_norm": 0.0, - "learning_rate": 3.368244991711159e-06, - "loss": 0.8489, - "step": 26074 - }, - { - "epoch": 0.7388988069936807, - "grad_norm": 0.0, - "learning_rate": 3.367558087482545e-06, - "loss": 0.8238, - "step": 26075 - }, - { - "epoch": 0.7389271444359432, - "grad_norm": 0.0, - "learning_rate": 3.366871239121453e-06, - "loss": 0.7658, - "step": 26076 - }, - { - "epoch": 0.7389554818782057, - "grad_norm": 0.0, - "learning_rate": 3.36618444663367e-06, - "loss": 0.7682, - "step": 26077 - }, - { - "epoch": 0.7389838193204681, - "grad_norm": 0.0, - "learning_rate": 3.365497710024976e-06, - "loss": 0.8577, - "step": 26078 - }, - { - "epoch": 0.7390121567627306, - "grad_norm": 0.0, - "learning_rate": 3.3648110293011592e-06, - "loss": 0.8181, - "step": 26079 - }, - { - "epoch": 0.7390404942049931, - "grad_norm": 0.0, - "learning_rate": 3.3641244044680053e-06, - "loss": 0.8946, - "step": 26080 - }, - { - "epoch": 0.7390688316472556, - "grad_norm": 0.0, - "learning_rate": 3.3634378355312925e-06, - "loss": 0.7586, - "step": 26081 - }, - { - "epoch": 0.739097169089518, - "grad_norm": 0.0, - "learning_rate": 3.362751322496811e-06, - "loss": 0.8041, - "step": 26082 - }, - { - "epoch": 0.7391255065317804, - "grad_norm": 0.0, - "learning_rate": 3.362064865370336e-06, - "loss": 0.8302, - "step": 26083 - }, - { - "epoch": 0.7391538439740429, - "grad_norm": 0.0, - "learning_rate": 3.361378464157654e-06, - "loss": 0.795, - "step": 26084 - }, - { - "epoch": 0.7391821814163053, - "grad_norm": 0.0, - "learning_rate": 3.360692118864549e-06, - "loss": 0.7388, - "step": 26085 - }, - { - "epoch": 0.7392105188585678, - "grad_norm": 0.0, - "learning_rate": 3.3600058294967974e-06, - "loss": 0.8412, - "step": 26086 - }, - { - "epoch": 0.7392388563008303, - "grad_norm": 0.0, - "learning_rate": 3.3593195960601822e-06, - "loss": 0.8183, - "step": 26087 - }, - { - "epoch": 0.7392671937430928, - "grad_norm": 0.0, - "learning_rate": 3.3586334185604828e-06, - "loss": 0.894, - "step": 26088 - }, - { - "epoch": 0.7392955311853552, - "grad_norm": 0.0, - "learning_rate": 3.357947297003482e-06, - "loss": 0.8541, - "step": 26089 - }, - { - "epoch": 0.7393238686276177, - "grad_norm": 0.0, - "learning_rate": 3.3572612313949606e-06, - "loss": 0.9086, - "step": 26090 - }, - { - "epoch": 0.7393522060698802, - "grad_norm": 0.0, - "learning_rate": 3.35657522174069e-06, - "loss": 0.8422, - "step": 26091 - }, - { - "epoch": 0.7393805435121426, - "grad_norm": 0.0, - "learning_rate": 3.3558892680464538e-06, - "loss": 0.7365, - "step": 26092 - }, - { - "epoch": 0.739408880954405, - "grad_norm": 0.0, - "learning_rate": 3.355203370318033e-06, - "loss": 0.8759, - "step": 26093 - }, - { - "epoch": 0.7394372183966675, - "grad_norm": 0.0, - "learning_rate": 3.3545175285611986e-06, - "loss": 0.7865, - "step": 26094 - }, - { - "epoch": 0.73946555583893, - "grad_norm": 0.0, - "learning_rate": 3.3538317427817315e-06, - "loss": 0.8346, - "step": 26095 - }, - { - "epoch": 0.7394938932811924, - "grad_norm": 0.0, - "learning_rate": 3.35314601298541e-06, - "loss": 0.7845, - "step": 26096 - }, - { - "epoch": 0.7395222307234549, - "grad_norm": 0.0, - "learning_rate": 3.3524603391780043e-06, - "loss": 0.844, - "step": 26097 - }, - { - "epoch": 0.7395505681657174, - "grad_norm": 0.0, - "learning_rate": 3.3517747213652973e-06, - "loss": 0.8643, - "step": 26098 - }, - { - "epoch": 0.7395789056079798, - "grad_norm": 0.0, - "learning_rate": 3.351089159553057e-06, - "loss": 0.7595, - "step": 26099 - }, - { - "epoch": 0.7396072430502423, - "grad_norm": 0.0, - "learning_rate": 3.350403653747062e-06, - "loss": 0.7286, - "step": 26100 - }, - { - "epoch": 0.7396355804925048, - "grad_norm": 0.0, - "learning_rate": 3.349718203953086e-06, - "loss": 0.8008, - "step": 26101 - }, - { - "epoch": 0.7396639179347672, - "grad_norm": 0.0, - "learning_rate": 3.3490328101769044e-06, - "loss": 0.8466, - "step": 26102 - }, - { - "epoch": 0.7396922553770297, - "grad_norm": 0.0, - "learning_rate": 3.3483474724242915e-06, - "loss": 0.7221, - "step": 26103 - }, - { - "epoch": 0.7397205928192921, - "grad_norm": 0.0, - "learning_rate": 3.3476621907010142e-06, - "loss": 0.87, - "step": 26104 - }, - { - "epoch": 0.7397489302615546, - "grad_norm": 0.0, - "learning_rate": 3.346976965012849e-06, - "loss": 0.844, - "step": 26105 - }, - { - "epoch": 0.739777267703817, - "grad_norm": 0.0, - "learning_rate": 3.3462917953655706e-06, - "loss": 0.8325, - "step": 26106 - }, - { - "epoch": 0.7398056051460795, - "grad_norm": 0.0, - "learning_rate": 3.3456066817649446e-06, - "loss": 0.7698, - "step": 26107 - }, - { - "epoch": 0.739833942588342, - "grad_norm": 0.0, - "learning_rate": 3.344921624216744e-06, - "loss": 0.8965, - "step": 26108 - }, - { - "epoch": 0.7398622800306044, - "grad_norm": 0.0, - "learning_rate": 3.344236622726743e-06, - "loss": 0.8485, - "step": 26109 - }, - { - "epoch": 0.7398906174728669, - "grad_norm": 0.0, - "learning_rate": 3.3435516773007047e-06, - "loss": 0.9396, - "step": 26110 - }, - { - "epoch": 0.7399189549151294, - "grad_norm": 0.0, - "learning_rate": 3.3428667879444067e-06, - "loss": 0.8774, - "step": 26111 - }, - { - "epoch": 0.7399472923573919, - "grad_norm": 0.0, - "learning_rate": 3.3421819546636104e-06, - "loss": 0.9077, - "step": 26112 - }, - { - "epoch": 0.7399756297996543, - "grad_norm": 0.0, - "learning_rate": 3.341497177464087e-06, - "loss": 0.7416, - "step": 26113 - }, - { - "epoch": 0.7400039672419167, - "grad_norm": 0.0, - "learning_rate": 3.340812456351605e-06, - "loss": 0.7569, - "step": 26114 - }, - { - "epoch": 0.7400323046841792, - "grad_norm": 0.0, - "learning_rate": 3.340127791331934e-06, - "loss": 0.8505, - "step": 26115 - }, - { - "epoch": 0.7400606421264416, - "grad_norm": 0.0, - "learning_rate": 3.3394431824108397e-06, - "loss": 0.8188, - "step": 26116 - }, - { - "epoch": 0.7400889795687041, - "grad_norm": 0.0, - "learning_rate": 3.338758629594091e-06, - "loss": 0.7709, - "step": 26117 - }, - { - "epoch": 0.7401173170109666, - "grad_norm": 0.0, - "learning_rate": 3.33807413288745e-06, - "loss": 0.8464, - "step": 26118 - }, - { - "epoch": 0.740145654453229, - "grad_norm": 0.0, - "learning_rate": 3.3373896922966863e-06, - "loss": 0.8655, - "step": 26119 - }, - { - "epoch": 0.7401739918954915, - "grad_norm": 0.0, - "learning_rate": 3.3367053078275614e-06, - "loss": 0.7697, - "step": 26120 - }, - { - "epoch": 0.740202329337754, - "grad_norm": 0.0, - "learning_rate": 3.336020979485841e-06, - "loss": 0.8719, - "step": 26121 - }, - { - "epoch": 0.7402306667800165, - "grad_norm": 0.0, - "learning_rate": 3.3353367072772935e-06, - "loss": 0.7299, - "step": 26122 - }, - { - "epoch": 0.7402590042222789, - "grad_norm": 0.0, - "learning_rate": 3.3346524912076774e-06, - "loss": 0.8083, - "step": 26123 - }, - { - "epoch": 0.7402873416645414, - "grad_norm": 0.0, - "learning_rate": 3.333968331282759e-06, - "loss": 0.8598, - "step": 26124 - }, - { - "epoch": 0.7403156791068038, - "grad_norm": 0.0, - "learning_rate": 3.3332842275083023e-06, - "loss": 0.8491, - "step": 26125 - }, - { - "epoch": 0.7403440165490662, - "grad_norm": 0.0, - "learning_rate": 3.3326001798900664e-06, - "loss": 0.7717, - "step": 26126 - }, - { - "epoch": 0.7403723539913287, - "grad_norm": 0.0, - "learning_rate": 3.3319161884338135e-06, - "loss": 0.8896, - "step": 26127 - }, - { - "epoch": 0.7404006914335912, - "grad_norm": 0.0, - "learning_rate": 3.331232253145308e-06, - "loss": 0.7987, - "step": 26128 - }, - { - "epoch": 0.7404290288758537, - "grad_norm": 0.0, - "learning_rate": 3.330548374030309e-06, - "loss": 0.7968, - "step": 26129 - }, - { - "epoch": 0.7404573663181161, - "grad_norm": 0.0, - "learning_rate": 3.3298645510945813e-06, - "loss": 0.7838, - "step": 26130 - }, - { - "epoch": 0.7404857037603786, - "grad_norm": 0.0, - "learning_rate": 3.3291807843438784e-06, - "loss": 0.9209, - "step": 26131 - }, - { - "epoch": 0.7405140412026411, - "grad_norm": 0.0, - "learning_rate": 3.328497073783966e-06, - "loss": 0.8187, - "step": 26132 - }, - { - "epoch": 0.7405423786449035, - "grad_norm": 0.0, - "learning_rate": 3.327813419420597e-06, - "loss": 0.8694, - "step": 26133 - }, - { - "epoch": 0.740570716087166, - "grad_norm": 0.0, - "learning_rate": 3.3271298212595325e-06, - "loss": 0.8366, - "step": 26134 - }, - { - "epoch": 0.7405990535294285, - "grad_norm": 0.0, - "learning_rate": 3.3264462793065343e-06, - "loss": 0.8032, - "step": 26135 - }, - { - "epoch": 0.740627390971691, - "grad_norm": 0.0, - "learning_rate": 3.3257627935673554e-06, - "loss": 0.8243, - "step": 26136 - }, - { - "epoch": 0.7406557284139533, - "grad_norm": 0.0, - "learning_rate": 3.3250793640477543e-06, - "loss": 0.7166, - "step": 26137 - }, - { - "epoch": 0.7406840658562158, - "grad_norm": 0.0, - "learning_rate": 3.3243959907534917e-06, - "loss": 0.8132, - "step": 26138 - }, - { - "epoch": 0.7407124032984783, - "grad_norm": 0.0, - "learning_rate": 3.3237126736903168e-06, - "loss": 0.8865, - "step": 26139 - }, - { - "epoch": 0.7407407407407407, - "grad_norm": 0.0, - "learning_rate": 3.3230294128639894e-06, - "loss": 0.8822, - "step": 26140 - }, - { - "epoch": 0.7407690781830032, - "grad_norm": 0.0, - "learning_rate": 3.322346208280265e-06, - "loss": 0.7725, - "step": 26141 - }, - { - "epoch": 0.7407974156252657, - "grad_norm": 0.0, - "learning_rate": 3.3216630599448985e-06, - "loss": 0.877, - "step": 26142 - }, - { - "epoch": 0.7408257530675281, - "grad_norm": 0.0, - "learning_rate": 3.3209799678636466e-06, - "loss": 0.8433, - "step": 26143 - }, - { - "epoch": 0.7408540905097906, - "grad_norm": 0.0, - "learning_rate": 3.3202969320422586e-06, - "loss": 0.6403, - "step": 26144 - }, - { - "epoch": 0.7408824279520531, - "grad_norm": 0.0, - "learning_rate": 3.319613952486488e-06, - "loss": 0.7995, - "step": 26145 - }, - { - "epoch": 0.7409107653943156, - "grad_norm": 0.0, - "learning_rate": 3.3189310292020948e-06, - "loss": 0.8067, - "step": 26146 - }, - { - "epoch": 0.7409391028365779, - "grad_norm": 0.0, - "learning_rate": 3.3182481621948225e-06, - "loss": 0.7986, - "step": 26147 - }, - { - "epoch": 0.7409674402788404, - "grad_norm": 0.0, - "learning_rate": 3.31756535147043e-06, - "loss": 0.8573, - "step": 26148 - }, - { - "epoch": 0.7409957777211029, - "grad_norm": 0.0, - "learning_rate": 3.316882597034663e-06, - "loss": 0.8485, - "step": 26149 - }, - { - "epoch": 0.7410241151633653, - "grad_norm": 0.0, - "learning_rate": 3.3161998988932762e-06, - "loss": 0.8698, - "step": 26150 - }, - { - "epoch": 0.7410524526056278, - "grad_norm": 0.0, - "learning_rate": 3.3155172570520223e-06, - "loss": 0.8332, - "step": 26151 - }, - { - "epoch": 0.7410807900478903, - "grad_norm": 0.0, - "learning_rate": 3.3148346715166457e-06, - "loss": 0.8667, - "step": 26152 - }, - { - "epoch": 0.7411091274901528, - "grad_norm": 0.0, - "learning_rate": 3.314152142292899e-06, - "loss": 0.7596, - "step": 26153 - }, - { - "epoch": 0.7411374649324152, - "grad_norm": 0.0, - "learning_rate": 3.313469669386532e-06, - "loss": 0.8028, - "step": 26154 - }, - { - "epoch": 0.7411658023746777, - "grad_norm": 0.0, - "learning_rate": 3.3127872528032924e-06, - "loss": 0.859, - "step": 26155 - }, - { - "epoch": 0.7411941398169402, - "grad_norm": 0.0, - "learning_rate": 3.312104892548932e-06, - "loss": 0.7053, - "step": 26156 - }, - { - "epoch": 0.7412224772592025, - "grad_norm": 0.0, - "learning_rate": 3.311422588629193e-06, - "loss": 0.8131, - "step": 26157 - }, - { - "epoch": 0.741250814701465, - "grad_norm": 0.0, - "learning_rate": 3.310740341049825e-06, - "loss": 0.7378, - "step": 26158 - }, - { - "epoch": 0.7412791521437275, - "grad_norm": 0.0, - "learning_rate": 3.3100581498165783e-06, - "loss": 0.9601, - "step": 26159 - }, - { - "epoch": 0.74130748958599, - "grad_norm": 0.0, - "learning_rate": 3.3093760149351926e-06, - "loss": 0.823, - "step": 26160 - }, - { - "epoch": 0.7413358270282524, - "grad_norm": 0.0, - "learning_rate": 3.308693936411421e-06, - "loss": 0.9286, - "step": 26161 - }, - { - "epoch": 0.7413641644705149, - "grad_norm": 0.0, - "learning_rate": 3.3080119142510014e-06, - "loss": 0.9536, - "step": 26162 - }, - { - "epoch": 0.7413925019127774, - "grad_norm": 0.0, - "learning_rate": 3.3073299484596834e-06, - "loss": 0.8145, - "step": 26163 - }, - { - "epoch": 0.7414208393550398, - "grad_norm": 0.0, - "learning_rate": 3.3066480390432085e-06, - "loss": 0.7743, - "step": 26164 - }, - { - "epoch": 0.7414491767973023, - "grad_norm": 0.0, - "learning_rate": 3.305966186007328e-06, - "loss": 0.8432, - "step": 26165 - }, - { - "epoch": 0.7414775142395648, - "grad_norm": 0.0, - "learning_rate": 3.3052843893577757e-06, - "loss": 0.7718, - "step": 26166 - }, - { - "epoch": 0.7415058516818271, - "grad_norm": 0.0, - "learning_rate": 3.3046026491003004e-06, - "loss": 0.842, - "step": 26167 - }, - { - "epoch": 0.7415341891240896, - "grad_norm": 0.0, - "learning_rate": 3.303920965240641e-06, - "loss": 0.8181, - "step": 26168 - }, - { - "epoch": 0.7415625265663521, - "grad_norm": 0.0, - "learning_rate": 3.303239337784547e-06, - "loss": 0.7871, - "step": 26169 - }, - { - "epoch": 0.7415908640086146, - "grad_norm": 0.0, - "learning_rate": 3.3025577667377507e-06, - "loss": 0.8155, - "step": 26170 - }, - { - "epoch": 0.741619201450877, - "grad_norm": 0.0, - "learning_rate": 3.3018762521059976e-06, - "loss": 0.7873, - "step": 26171 - }, - { - "epoch": 0.7416475388931395, - "grad_norm": 0.0, - "learning_rate": 3.3011947938950317e-06, - "loss": 0.7752, - "step": 26172 - }, - { - "epoch": 0.741675876335402, - "grad_norm": 0.0, - "learning_rate": 3.300513392110586e-06, - "loss": 0.8488, - "step": 26173 - }, - { - "epoch": 0.7417042137776644, - "grad_norm": 0.0, - "learning_rate": 3.2998320467584034e-06, - "loss": 0.7407, - "step": 26174 - }, - { - "epoch": 0.7417325512199269, - "grad_norm": 0.0, - "learning_rate": 3.2991507578442272e-06, - "loss": 0.7151, - "step": 26175 - }, - { - "epoch": 0.7417608886621894, - "grad_norm": 0.0, - "learning_rate": 3.29846952537379e-06, - "loss": 0.7699, - "step": 26176 - }, - { - "epoch": 0.7417892261044519, - "grad_norm": 0.0, - "learning_rate": 3.2977883493528307e-06, - "loss": 0.7631, - "step": 26177 - }, - { - "epoch": 0.7418175635467142, - "grad_norm": 0.0, - "learning_rate": 3.2971072297870897e-06, - "loss": 0.7561, - "step": 26178 - }, - { - "epoch": 0.7418459009889767, - "grad_norm": 0.0, - "learning_rate": 3.296426166682304e-06, - "loss": 0.8104, - "step": 26179 - }, - { - "epoch": 0.7418742384312392, - "grad_norm": 0.0, - "learning_rate": 3.295745160044214e-06, - "loss": 0.8453, - "step": 26180 - }, - { - "epoch": 0.7419025758735016, - "grad_norm": 0.0, - "learning_rate": 3.295064209878547e-06, - "loss": 0.812, - "step": 26181 - }, - { - "epoch": 0.7419309133157641, - "grad_norm": 0.0, - "learning_rate": 3.294383316191049e-06, - "loss": 0.9092, - "step": 26182 - }, - { - "epoch": 0.7419592507580266, - "grad_norm": 0.0, - "learning_rate": 3.2937024789874462e-06, - "loss": 0.9791, - "step": 26183 - }, - { - "epoch": 0.7419875882002891, - "grad_norm": 0.0, - "learning_rate": 3.2930216982734775e-06, - "loss": 0.7465, - "step": 26184 - }, - { - "epoch": 0.7420159256425515, - "grad_norm": 0.0, - "learning_rate": 3.2923409740548805e-06, - "loss": 0.8589, - "step": 26185 - }, - { - "epoch": 0.742044263084814, - "grad_norm": 0.0, - "learning_rate": 3.291660306337384e-06, - "loss": 0.7761, - "step": 26186 - }, - { - "epoch": 0.7420726005270765, - "grad_norm": 0.0, - "learning_rate": 3.290979695126724e-06, - "loss": 0.8689, - "step": 26187 - }, - { - "epoch": 0.7421009379693388, - "grad_norm": 0.0, - "learning_rate": 3.2902991404286354e-06, - "loss": 0.8085, - "step": 26188 - }, - { - "epoch": 0.7421292754116013, - "grad_norm": 0.0, - "learning_rate": 3.2896186422488463e-06, - "loss": 0.7607, - "step": 26189 - }, - { - "epoch": 0.7421576128538638, - "grad_norm": 0.0, - "learning_rate": 3.2889382005930912e-06, - "loss": 0.7593, - "step": 26190 - }, - { - "epoch": 0.7421859502961262, - "grad_norm": 0.0, - "learning_rate": 3.2882578154671017e-06, - "loss": 0.772, - "step": 26191 - }, - { - "epoch": 0.7422142877383887, - "grad_norm": 0.0, - "learning_rate": 3.2875774868766087e-06, - "loss": 0.9029, - "step": 26192 - }, - { - "epoch": 0.7422426251806512, - "grad_norm": 0.0, - "learning_rate": 3.286897214827347e-06, - "loss": 0.7836, - "step": 26193 - }, - { - "epoch": 0.7422709626229137, - "grad_norm": 0.0, - "learning_rate": 3.286216999325039e-06, - "loss": 0.9167, - "step": 26194 - }, - { - "epoch": 0.7422993000651761, - "grad_norm": 0.0, - "learning_rate": 3.2855368403754185e-06, - "loss": 0.7318, - "step": 26195 - }, - { - "epoch": 0.7423276375074386, - "grad_norm": 0.0, - "learning_rate": 3.2848567379842177e-06, - "loss": 0.7206, - "step": 26196 - }, - { - "epoch": 0.7423559749497011, - "grad_norm": 0.0, - "learning_rate": 3.2841766921571593e-06, - "loss": 0.7099, - "step": 26197 - }, - { - "epoch": 0.7423843123919635, - "grad_norm": 0.0, - "learning_rate": 3.2834967028999777e-06, - "loss": 0.7578, - "step": 26198 - }, - { - "epoch": 0.742412649834226, - "grad_norm": 0.0, - "learning_rate": 3.2828167702183945e-06, - "loss": 0.9524, - "step": 26199 - }, - { - "epoch": 0.7424409872764884, - "grad_norm": 0.0, - "learning_rate": 3.2821368941181396e-06, - "loss": 0.8287, - "step": 26200 - }, - { - "epoch": 0.7424693247187509, - "grad_norm": 0.0, - "learning_rate": 3.2814570746049435e-06, - "loss": 0.827, - "step": 26201 - }, - { - "epoch": 0.7424976621610133, - "grad_norm": 0.0, - "learning_rate": 3.2807773116845267e-06, - "loss": 0.8531, - "step": 26202 - }, - { - "epoch": 0.7425259996032758, - "grad_norm": 0.0, - "learning_rate": 3.2800976053626168e-06, - "loss": 0.7614, - "step": 26203 - }, - { - "epoch": 0.7425543370455383, - "grad_norm": 0.0, - "learning_rate": 3.27941795564494e-06, - "loss": 0.9561, - "step": 26204 - }, - { - "epoch": 0.7425826744878007, - "grad_norm": 0.0, - "learning_rate": 3.2787383625372214e-06, - "loss": 0.8697, - "step": 26205 - }, - { - "epoch": 0.7426110119300632, - "grad_norm": 0.0, - "learning_rate": 3.2780588260451896e-06, - "loss": 0.8267, - "step": 26206 - }, - { - "epoch": 0.7426393493723257, - "grad_norm": 0.0, - "learning_rate": 3.2773793461745608e-06, - "loss": 0.8924, - "step": 26207 - }, - { - "epoch": 0.7426676868145882, - "grad_norm": 0.0, - "learning_rate": 3.2766999229310627e-06, - "loss": 0.8301, - "step": 26208 - }, - { - "epoch": 0.7426960242568506, - "grad_norm": 0.0, - "learning_rate": 3.2760205563204195e-06, - "loss": 0.8431, - "step": 26209 - }, - { - "epoch": 0.742724361699113, - "grad_norm": 0.0, - "learning_rate": 3.2753412463483505e-06, - "loss": 0.7377, - "step": 26210 - }, - { - "epoch": 0.7427526991413755, - "grad_norm": 0.0, - "learning_rate": 3.2746619930205815e-06, - "loss": 0.8406, - "step": 26211 - }, - { - "epoch": 0.7427810365836379, - "grad_norm": 0.0, - "learning_rate": 3.2739827963428296e-06, - "loss": 0.9456, - "step": 26212 - }, - { - "epoch": 0.7428093740259004, - "grad_norm": 0.0, - "learning_rate": 3.2733036563208165e-06, - "loss": 0.7807, - "step": 26213 - }, - { - "epoch": 0.7428377114681629, - "grad_norm": 0.0, - "learning_rate": 3.272624572960269e-06, - "loss": 0.8459, - "step": 26214 - }, - { - "epoch": 0.7428660489104253, - "grad_norm": 0.0, - "learning_rate": 3.2719455462669003e-06, - "loss": 0.7729, - "step": 26215 - }, - { - "epoch": 0.7428943863526878, - "grad_norm": 0.0, - "learning_rate": 3.2712665762464313e-06, - "loss": 0.7842, - "step": 26216 - }, - { - "epoch": 0.7429227237949503, - "grad_norm": 0.0, - "learning_rate": 3.270587662904584e-06, - "loss": 0.8059, - "step": 26217 - }, - { - "epoch": 0.7429510612372128, - "grad_norm": 0.0, - "learning_rate": 3.269908806247074e-06, - "loss": 0.8456, - "step": 26218 - }, - { - "epoch": 0.7429793986794752, - "grad_norm": 0.0, - "learning_rate": 3.2692300062796257e-06, - "loss": 0.7174, - "step": 26219 - }, - { - "epoch": 0.7430077361217376, - "grad_norm": 0.0, - "learning_rate": 3.2685512630079497e-06, - "loss": 0.7975, - "step": 26220 - }, - { - "epoch": 0.7430360735640001, - "grad_norm": 0.0, - "learning_rate": 3.267872576437765e-06, - "loss": 0.6948, - "step": 26221 - }, - { - "epoch": 0.7430644110062625, - "grad_norm": 0.0, - "learning_rate": 3.2671939465747937e-06, - "loss": 0.8484, - "step": 26222 - }, - { - "epoch": 0.743092748448525, - "grad_norm": 0.0, - "learning_rate": 3.2665153734247436e-06, - "loss": 0.8096, - "step": 26223 - }, - { - "epoch": 0.7431210858907875, - "grad_norm": 0.0, - "learning_rate": 3.265836856993335e-06, - "loss": 0.8711, - "step": 26224 - }, - { - "epoch": 0.74314942333305, - "grad_norm": 0.0, - "learning_rate": 3.2651583972862877e-06, - "loss": 0.8127, - "step": 26225 - }, - { - "epoch": 0.7431777607753124, - "grad_norm": 0.0, - "learning_rate": 3.2644799943093075e-06, - "loss": 0.8664, - "step": 26226 - }, - { - "epoch": 0.7432060982175749, - "grad_norm": 0.0, - "learning_rate": 3.263801648068118e-06, - "loss": 0.8847, - "step": 26227 - }, - { - "epoch": 0.7432344356598374, - "grad_norm": 0.0, - "learning_rate": 3.2631233585684243e-06, - "loss": 0.839, - "step": 26228 - }, - { - "epoch": 0.7432627731020998, - "grad_norm": 0.0, - "learning_rate": 3.262445125815945e-06, - "loss": 0.8351, - "step": 26229 - }, - { - "epoch": 0.7432911105443623, - "grad_norm": 0.0, - "learning_rate": 3.2617669498163917e-06, - "loss": 0.8573, - "step": 26230 - }, - { - "epoch": 0.7433194479866247, - "grad_norm": 0.0, - "learning_rate": 3.2610888305754783e-06, - "loss": 0.7338, - "step": 26231 - }, - { - "epoch": 0.7433477854288872, - "grad_norm": 0.0, - "learning_rate": 3.260410768098916e-06, - "loss": 0.8511, - "step": 26232 - }, - { - "epoch": 0.7433761228711496, - "grad_norm": 0.0, - "learning_rate": 3.2597327623924193e-06, - "loss": 0.8009, - "step": 26233 - }, - { - "epoch": 0.7434044603134121, - "grad_norm": 0.0, - "learning_rate": 3.259054813461693e-06, - "loss": 0.791, - "step": 26234 - }, - { - "epoch": 0.7434327977556746, - "grad_norm": 0.0, - "learning_rate": 3.258376921312455e-06, - "loss": 0.8208, - "step": 26235 - }, - { - "epoch": 0.743461135197937, - "grad_norm": 0.0, - "learning_rate": 3.2576990859504075e-06, - "loss": 0.8232, - "step": 26236 - }, - { - "epoch": 0.7434894726401995, - "grad_norm": 0.0, - "learning_rate": 3.257021307381265e-06, - "loss": 0.8598, - "step": 26237 - }, - { - "epoch": 0.743517810082462, - "grad_norm": 0.0, - "learning_rate": 3.2563435856107396e-06, - "loss": 0.8388, - "step": 26238 - }, - { - "epoch": 0.7435461475247244, - "grad_norm": 0.0, - "learning_rate": 3.255665920644533e-06, - "loss": 0.8271, - "step": 26239 - }, - { - "epoch": 0.7435744849669869, - "grad_norm": 0.0, - "learning_rate": 3.2549883124883595e-06, - "loss": 0.8759, - "step": 26240 - }, - { - "epoch": 0.7436028224092494, - "grad_norm": 0.0, - "learning_rate": 3.254310761147922e-06, - "loss": 0.8632, - "step": 26241 - }, - { - "epoch": 0.7436311598515118, - "grad_norm": 0.0, - "learning_rate": 3.253633266628928e-06, - "loss": 0.7767, - "step": 26242 - }, - { - "epoch": 0.7436594972937742, - "grad_norm": 0.0, - "learning_rate": 3.2529558289370877e-06, - "loss": 0.8418, - "step": 26243 - }, - { - "epoch": 0.7436878347360367, - "grad_norm": 0.0, - "learning_rate": 3.2522784480781057e-06, - "loss": 0.7964, - "step": 26244 - }, - { - "epoch": 0.7437161721782992, - "grad_norm": 0.0, - "learning_rate": 3.2516011240576874e-06, - "loss": 0.8875, - "step": 26245 - }, - { - "epoch": 0.7437445096205616, - "grad_norm": 0.0, - "learning_rate": 3.2509238568815426e-06, - "loss": 0.846, - "step": 26246 - }, - { - "epoch": 0.7437728470628241, - "grad_norm": 0.0, - "learning_rate": 3.2502466465553697e-06, - "loss": 0.8103, - "step": 26247 - }, - { - "epoch": 0.7438011845050866, - "grad_norm": 0.0, - "learning_rate": 3.2495694930848777e-06, - "loss": 0.9223, - "step": 26248 - }, - { - "epoch": 0.7438295219473491, - "grad_norm": 0.0, - "learning_rate": 3.2488923964757656e-06, - "loss": 0.9297, - "step": 26249 - }, - { - "epoch": 0.7438578593896115, - "grad_norm": 0.0, - "learning_rate": 3.2482153567337405e-06, - "loss": 0.8323, - "step": 26250 - }, - { - "epoch": 0.743886196831874, - "grad_norm": 0.0, - "learning_rate": 3.2475383738645072e-06, - "loss": 0.8323, - "step": 26251 - }, - { - "epoch": 0.7439145342741365, - "grad_norm": 0.0, - "learning_rate": 3.2468614478737626e-06, - "loss": 0.8413, - "step": 26252 - }, - { - "epoch": 0.7439428717163988, - "grad_norm": 0.0, - "learning_rate": 3.246184578767212e-06, - "loss": 0.7935, - "step": 26253 - }, - { - "epoch": 0.7439712091586613, - "grad_norm": 0.0, - "learning_rate": 3.2455077665505597e-06, - "loss": 0.8752, - "step": 26254 - }, - { - "epoch": 0.7439995466009238, - "grad_norm": 0.0, - "learning_rate": 3.2448310112295012e-06, - "loss": 0.7594, - "step": 26255 - }, - { - "epoch": 0.7440278840431863, - "grad_norm": 0.0, - "learning_rate": 3.2441543128097386e-06, - "loss": 0.7526, - "step": 26256 - }, - { - "epoch": 0.7440562214854487, - "grad_norm": 0.0, - "learning_rate": 3.243477671296973e-06, - "loss": 0.8232, - "step": 26257 - }, - { - "epoch": 0.7440845589277112, - "grad_norm": 0.0, - "learning_rate": 3.242801086696904e-06, - "loss": 0.7826, - "step": 26258 - }, - { - "epoch": 0.7441128963699737, - "grad_norm": 0.0, - "learning_rate": 3.242124559015234e-06, - "loss": 0.83, - "step": 26259 - }, - { - "epoch": 0.7441412338122361, - "grad_norm": 0.0, - "learning_rate": 3.241448088257655e-06, - "loss": 0.8057, - "step": 26260 - }, - { - "epoch": 0.7441695712544986, - "grad_norm": 0.0, - "learning_rate": 3.240771674429869e-06, - "loss": 0.7887, - "step": 26261 - }, - { - "epoch": 0.7441979086967611, - "grad_norm": 0.0, - "learning_rate": 3.240095317537576e-06, - "loss": 0.77, - "step": 26262 - }, - { - "epoch": 0.7442262461390234, - "grad_norm": 0.0, - "learning_rate": 3.239419017586467e-06, - "loss": 0.7677, - "step": 26263 - }, - { - "epoch": 0.7442545835812859, - "grad_norm": 0.0, - "learning_rate": 3.2387427745822453e-06, - "loss": 0.7943, - "step": 26264 - }, - { - "epoch": 0.7442829210235484, - "grad_norm": 0.0, - "learning_rate": 3.2380665885306017e-06, - "loss": 0.8052, - "step": 26265 - }, - { - "epoch": 0.7443112584658109, - "grad_norm": 0.0, - "learning_rate": 3.237390459437233e-06, - "loss": 0.7875, - "step": 26266 - }, - { - "epoch": 0.7443395959080733, - "grad_norm": 0.0, - "learning_rate": 3.236714387307839e-06, - "loss": 0.7577, - "step": 26267 - }, - { - "epoch": 0.7443679333503358, - "grad_norm": 0.0, - "learning_rate": 3.2360383721481082e-06, - "loss": 0.9586, - "step": 26268 - }, - { - "epoch": 0.7443962707925983, - "grad_norm": 0.0, - "learning_rate": 3.2353624139637383e-06, - "loss": 0.9119, - "step": 26269 - }, - { - "epoch": 0.7444246082348607, - "grad_norm": 0.0, - "learning_rate": 3.234686512760422e-06, - "loss": 0.7638, - "step": 26270 - }, - { - "epoch": 0.7444529456771232, - "grad_norm": 0.0, - "learning_rate": 3.2340106685438545e-06, - "loss": 0.7975, - "step": 26271 - }, - { - "epoch": 0.7444812831193857, - "grad_norm": 0.0, - "learning_rate": 3.2333348813197306e-06, - "loss": 0.8454, - "step": 26272 - }, - { - "epoch": 0.7445096205616482, - "grad_norm": 0.0, - "learning_rate": 3.2326591510937353e-06, - "loss": 0.7642, - "step": 26273 - }, - { - "epoch": 0.7445379580039105, - "grad_norm": 0.0, - "learning_rate": 3.2319834778715662e-06, - "loss": 0.7983, - "step": 26274 - }, - { - "epoch": 0.744566295446173, - "grad_norm": 0.0, - "learning_rate": 3.2313078616589166e-06, - "loss": 0.8977, - "step": 26275 - }, - { - "epoch": 0.7445946328884355, - "grad_norm": 0.0, - "learning_rate": 3.2306323024614717e-06, - "loss": 0.6588, - "step": 26276 - }, - { - "epoch": 0.7446229703306979, - "grad_norm": 0.0, - "learning_rate": 3.229956800284927e-06, - "loss": 0.7842, - "step": 26277 - }, - { - "epoch": 0.7446513077729604, - "grad_norm": 0.0, - "learning_rate": 3.2292813551349666e-06, - "loss": 0.7995, - "step": 26278 - }, - { - "epoch": 0.7446796452152229, - "grad_norm": 0.0, - "learning_rate": 3.228605967017284e-06, - "loss": 0.8452, - "step": 26279 - }, - { - "epoch": 0.7447079826574854, - "grad_norm": 0.0, - "learning_rate": 3.2279306359375718e-06, - "loss": 0.7321, - "step": 26280 - }, - { - "epoch": 0.7447363200997478, - "grad_norm": 0.0, - "learning_rate": 3.22725536190151e-06, - "loss": 0.9177, - "step": 26281 - }, - { - "epoch": 0.7447646575420103, - "grad_norm": 0.0, - "learning_rate": 3.2265801449147927e-06, - "loss": 0.8528, - "step": 26282 - }, - { - "epoch": 0.7447929949842728, - "grad_norm": 0.0, - "learning_rate": 3.2259049849831047e-06, - "loss": 0.8498, - "step": 26283 - }, - { - "epoch": 0.7448213324265351, - "grad_norm": 0.0, - "learning_rate": 3.225229882112135e-06, - "loss": 0.8267, - "step": 26284 - }, - { - "epoch": 0.7448496698687976, - "grad_norm": 0.0, - "learning_rate": 3.224554836307573e-06, - "loss": 0.8639, - "step": 26285 - }, - { - "epoch": 0.7448780073110601, - "grad_norm": 0.0, - "learning_rate": 3.223879847575099e-06, - "loss": 0.8673, - "step": 26286 - }, - { - "epoch": 0.7449063447533225, - "grad_norm": 0.0, - "learning_rate": 3.2232049159204005e-06, - "loss": 0.7779, - "step": 26287 - }, - { - "epoch": 0.744934682195585, - "grad_norm": 0.0, - "learning_rate": 3.222530041349168e-06, - "loss": 0.7322, - "step": 26288 - }, - { - "epoch": 0.7449630196378475, - "grad_norm": 0.0, - "learning_rate": 3.221855223867076e-06, - "loss": 0.831, - "step": 26289 - }, - { - "epoch": 0.74499135708011, - "grad_norm": 0.0, - "learning_rate": 3.221180463479817e-06, - "loss": 0.8156, - "step": 26290 - }, - { - "epoch": 0.7450196945223724, - "grad_norm": 0.0, - "learning_rate": 3.2205057601930744e-06, - "loss": 0.7772, - "step": 26291 - }, - { - "epoch": 0.7450480319646349, - "grad_norm": 0.0, - "learning_rate": 3.219831114012526e-06, - "loss": 0.7385, - "step": 26292 - }, - { - "epoch": 0.7450763694068974, - "grad_norm": 0.0, - "learning_rate": 3.219156524943862e-06, - "loss": 0.8115, - "step": 26293 - }, - { - "epoch": 0.7451047068491597, - "grad_norm": 0.0, - "learning_rate": 3.2184819929927557e-06, - "loss": 0.8425, - "step": 26294 - }, - { - "epoch": 0.7451330442914222, - "grad_norm": 0.0, - "learning_rate": 3.2178075181648947e-06, - "loss": 0.8279, - "step": 26295 - }, - { - "epoch": 0.7451613817336847, - "grad_norm": 0.0, - "learning_rate": 3.21713310046596e-06, - "loss": 0.8014, - "step": 26296 - }, - { - "epoch": 0.7451897191759472, - "grad_norm": 0.0, - "learning_rate": 3.216458739901631e-06, - "loss": 0.714, - "step": 26297 - }, - { - "epoch": 0.7452180566182096, - "grad_norm": 0.0, - "learning_rate": 3.2157844364775924e-06, - "loss": 0.8363, - "step": 26298 - }, - { - "epoch": 0.7452463940604721, - "grad_norm": 0.0, - "learning_rate": 3.2151101901995184e-06, - "loss": 0.8549, - "step": 26299 - }, - { - "epoch": 0.7452747315027346, - "grad_norm": 0.0, - "learning_rate": 3.21443600107309e-06, - "loss": 0.8853, - "step": 26300 - }, - { - "epoch": 0.745303068944997, - "grad_norm": 0.0, - "learning_rate": 3.2137618691039908e-06, - "loss": 0.857, - "step": 26301 - }, - { - "epoch": 0.7453314063872595, - "grad_norm": 0.0, - "learning_rate": 3.213087794297891e-06, - "loss": 0.7012, - "step": 26302 - }, - { - "epoch": 0.745359743829522, - "grad_norm": 0.0, - "learning_rate": 3.2124137766604735e-06, - "loss": 0.7343, - "step": 26303 - }, - { - "epoch": 0.7453880812717845, - "grad_norm": 0.0, - "learning_rate": 3.211739816197419e-06, - "loss": 0.7837, - "step": 26304 - }, - { - "epoch": 0.7454164187140468, - "grad_norm": 0.0, - "learning_rate": 3.211065912914397e-06, - "loss": 0.6702, - "step": 26305 - }, - { - "epoch": 0.7454447561563093, - "grad_norm": 0.0, - "learning_rate": 3.2103920668170916e-06, - "loss": 0.6604, - "step": 26306 - }, - { - "epoch": 0.7454730935985718, - "grad_norm": 0.0, - "learning_rate": 3.209718277911171e-06, - "loss": 0.7401, - "step": 26307 - }, - { - "epoch": 0.7455014310408342, - "grad_norm": 0.0, - "learning_rate": 3.2090445462023156e-06, - "loss": 0.744, - "step": 26308 - }, - { - "epoch": 0.7455297684830967, - "grad_norm": 0.0, - "learning_rate": 3.208370871696199e-06, - "loss": 0.7884, - "step": 26309 - }, - { - "epoch": 0.7455581059253592, - "grad_norm": 0.0, - "learning_rate": 3.2076972543984975e-06, - "loss": 0.8614, - "step": 26310 - }, - { - "epoch": 0.7455864433676216, - "grad_norm": 0.0, - "learning_rate": 3.2070236943148834e-06, - "loss": 0.8351, - "step": 26311 - }, - { - "epoch": 0.7456147808098841, - "grad_norm": 0.0, - "learning_rate": 3.2063501914510355e-06, - "loss": 0.8477, - "step": 26312 - }, - { - "epoch": 0.7456431182521466, - "grad_norm": 0.0, - "learning_rate": 3.205676745812619e-06, - "loss": 0.7808, - "step": 26313 - }, - { - "epoch": 0.7456714556944091, - "grad_norm": 0.0, - "learning_rate": 3.205003357405313e-06, - "loss": 0.8806, - "step": 26314 - }, - { - "epoch": 0.7456997931366715, - "grad_norm": 0.0, - "learning_rate": 3.2043300262347842e-06, - "loss": 0.8377, - "step": 26315 - }, - { - "epoch": 0.745728130578934, - "grad_norm": 0.0, - "learning_rate": 3.2036567523067074e-06, - "loss": 0.783, - "step": 26316 - }, - { - "epoch": 0.7457564680211964, - "grad_norm": 0.0, - "learning_rate": 3.2029835356267567e-06, - "loss": 0.8602, - "step": 26317 - }, - { - "epoch": 0.7457848054634588, - "grad_norm": 0.0, - "learning_rate": 3.202310376200596e-06, - "loss": 0.7524, - "step": 26318 - }, - { - "epoch": 0.7458131429057213, - "grad_norm": 0.0, - "learning_rate": 3.2016372740339e-06, - "loss": 0.7508, - "step": 26319 - }, - { - "epoch": 0.7458414803479838, - "grad_norm": 0.0, - "learning_rate": 3.2009642291323397e-06, - "loss": 0.8872, - "step": 26320 - }, - { - "epoch": 0.7458698177902463, - "grad_norm": 0.0, - "learning_rate": 3.20029124150158e-06, - "loss": 0.8483, - "step": 26321 - }, - { - "epoch": 0.7458981552325087, - "grad_norm": 0.0, - "learning_rate": 3.199618311147292e-06, - "loss": 0.7885, - "step": 26322 - }, - { - "epoch": 0.7459264926747712, - "grad_norm": 0.0, - "learning_rate": 3.198945438075144e-06, - "loss": 0.8075, - "step": 26323 - }, - { - "epoch": 0.7459548301170337, - "grad_norm": 0.0, - "learning_rate": 3.1982726222908046e-06, - "loss": 0.7656, - "step": 26324 - }, - { - "epoch": 0.7459831675592961, - "grad_norm": 0.0, - "learning_rate": 3.197599863799944e-06, - "loss": 0.8523, - "step": 26325 - }, - { - "epoch": 0.7460115050015586, - "grad_norm": 0.0, - "learning_rate": 3.196927162608222e-06, - "loss": 0.754, - "step": 26326 - }, - { - "epoch": 0.746039842443821, - "grad_norm": 0.0, - "learning_rate": 3.1962545187213123e-06, - "loss": 0.8722, - "step": 26327 - }, - { - "epoch": 0.7460681798860834, - "grad_norm": 0.0, - "learning_rate": 3.1955819321448744e-06, - "loss": 0.7943, - "step": 26328 - }, - { - "epoch": 0.7460965173283459, - "grad_norm": 0.0, - "learning_rate": 3.194909402884576e-06, - "loss": 0.8173, - "step": 26329 - }, - { - "epoch": 0.7461248547706084, - "grad_norm": 0.0, - "learning_rate": 3.1942369309460864e-06, - "loss": 0.7786, - "step": 26330 - }, - { - "epoch": 0.7461531922128709, - "grad_norm": 0.0, - "learning_rate": 3.1935645163350628e-06, - "loss": 0.8167, - "step": 26331 - }, - { - "epoch": 0.7461815296551333, - "grad_norm": 0.0, - "learning_rate": 3.1928921590571726e-06, - "loss": 0.8788, - "step": 26332 - }, - { - "epoch": 0.7462098670973958, - "grad_norm": 0.0, - "learning_rate": 3.192219859118083e-06, - "loss": 0.7719, - "step": 26333 - }, - { - "epoch": 0.7462382045396583, - "grad_norm": 0.0, - "learning_rate": 3.1915476165234505e-06, - "loss": 0.7803, - "step": 26334 - }, - { - "epoch": 0.7462665419819207, - "grad_norm": 0.0, - "learning_rate": 3.1908754312789412e-06, - "loss": 0.8823, - "step": 26335 - }, - { - "epoch": 0.7462948794241832, - "grad_norm": 0.0, - "learning_rate": 3.1902033033902156e-06, - "loss": 0.9208, - "step": 26336 - }, - { - "epoch": 0.7463232168664456, - "grad_norm": 0.0, - "learning_rate": 3.1895312328629368e-06, - "loss": 0.7367, - "step": 26337 - }, - { - "epoch": 0.7463515543087081, - "grad_norm": 0.0, - "learning_rate": 3.188859219702769e-06, - "loss": 0.7116, - "step": 26338 - }, - { - "epoch": 0.7463798917509705, - "grad_norm": 0.0, - "learning_rate": 3.1881872639153655e-06, - "loss": 0.7538, - "step": 26339 - }, - { - "epoch": 0.746408229193233, - "grad_norm": 0.0, - "learning_rate": 3.1875153655063907e-06, - "loss": 0.9167, - "step": 26340 - }, - { - "epoch": 0.7464365666354955, - "grad_norm": 0.0, - "learning_rate": 3.1868435244815057e-06, - "loss": 0.8076, - "step": 26341 - }, - { - "epoch": 0.7464649040777579, - "grad_norm": 0.0, - "learning_rate": 3.1861717408463656e-06, - "loss": 0.8445, - "step": 26342 - }, - { - "epoch": 0.7464932415200204, - "grad_norm": 0.0, - "learning_rate": 3.185500014606634e-06, - "loss": 0.7734, - "step": 26343 - }, - { - "epoch": 0.7465215789622829, - "grad_norm": 0.0, - "learning_rate": 3.184828345767963e-06, - "loss": 0.8076, - "step": 26344 - }, - { - "epoch": 0.7465499164045454, - "grad_norm": 0.0, - "learning_rate": 3.1841567343360136e-06, - "loss": 0.8234, - "step": 26345 - }, - { - "epoch": 0.7465782538468078, - "grad_norm": 0.0, - "learning_rate": 3.183485180316447e-06, - "loss": 0.9036, - "step": 26346 - }, - { - "epoch": 0.7466065912890703, - "grad_norm": 0.0, - "learning_rate": 3.1828136837149128e-06, - "loss": 0.8834, - "step": 26347 - }, - { - "epoch": 0.7466349287313327, - "grad_norm": 0.0, - "learning_rate": 3.1821422445370688e-06, - "loss": 0.8256, - "step": 26348 - }, - { - "epoch": 0.7466632661735951, - "grad_norm": 0.0, - "learning_rate": 3.1814708627885736e-06, - "loss": 0.8386, - "step": 26349 - }, - { - "epoch": 0.7466916036158576, - "grad_norm": 0.0, - "learning_rate": 3.180799538475081e-06, - "loss": 0.7784, - "step": 26350 - }, - { - "epoch": 0.7467199410581201, - "grad_norm": 0.0, - "learning_rate": 3.1801282716022498e-06, - "loss": 0.7423, - "step": 26351 - }, - { - "epoch": 0.7467482785003825, - "grad_norm": 0.0, - "learning_rate": 3.179457062175727e-06, - "loss": 0.6747, - "step": 26352 - }, - { - "epoch": 0.746776615942645, - "grad_norm": 0.0, - "learning_rate": 3.178785910201171e-06, - "loss": 0.7979, - "step": 26353 - }, - { - "epoch": 0.7468049533849075, - "grad_norm": 0.0, - "learning_rate": 3.1781148156842368e-06, - "loss": 0.8829, - "step": 26354 - }, - { - "epoch": 0.74683329082717, - "grad_norm": 0.0, - "learning_rate": 3.177443778630571e-06, - "loss": 0.746, - "step": 26355 - }, - { - "epoch": 0.7468616282694324, - "grad_norm": 0.0, - "learning_rate": 3.176772799045834e-06, - "loss": 0.9665, - "step": 26356 - }, - { - "epoch": 0.7468899657116949, - "grad_norm": 0.0, - "learning_rate": 3.176101876935669e-06, - "loss": 0.8636, - "step": 26357 - }, - { - "epoch": 0.7469183031539574, - "grad_norm": 0.0, - "learning_rate": 3.175431012305733e-06, - "loss": 0.8409, - "step": 26358 - }, - { - "epoch": 0.7469466405962197, - "grad_norm": 0.0, - "learning_rate": 3.1747602051616787e-06, - "loss": 0.9571, - "step": 26359 - }, - { - "epoch": 0.7469749780384822, - "grad_norm": 0.0, - "learning_rate": 3.1740894555091504e-06, - "loss": 0.8835, - "step": 26360 - }, - { - "epoch": 0.7470033154807447, - "grad_norm": 0.0, - "learning_rate": 3.173418763353802e-06, - "loss": 0.8127, - "step": 26361 - }, - { - "epoch": 0.7470316529230072, - "grad_norm": 0.0, - "learning_rate": 3.172748128701281e-06, - "loss": 0.8152, - "step": 26362 - }, - { - "epoch": 0.7470599903652696, - "grad_norm": 0.0, - "learning_rate": 3.172077551557239e-06, - "loss": 0.8197, - "step": 26363 - }, - { - "epoch": 0.7470883278075321, - "grad_norm": 0.0, - "learning_rate": 3.171407031927325e-06, - "loss": 0.6929, - "step": 26364 - }, - { - "epoch": 0.7471166652497946, - "grad_norm": 0.0, - "learning_rate": 3.170736569817183e-06, - "loss": 0.8228, - "step": 26365 - }, - { - "epoch": 0.747145002692057, - "grad_norm": 0.0, - "learning_rate": 3.170066165232464e-06, - "loss": 0.8213, - "step": 26366 - }, - { - "epoch": 0.7471733401343195, - "grad_norm": 0.0, - "learning_rate": 3.1693958181788154e-06, - "loss": 0.7896, - "step": 26367 - }, - { - "epoch": 0.747201677576582, - "grad_norm": 0.0, - "learning_rate": 3.16872552866188e-06, - "loss": 0.7833, - "step": 26368 - }, - { - "epoch": 0.7472300150188445, - "grad_norm": 0.0, - "learning_rate": 3.1680552966873057e-06, - "loss": 0.744, - "step": 26369 - }, - { - "epoch": 0.7472583524611068, - "grad_norm": 0.0, - "learning_rate": 3.167385122260742e-06, - "loss": 0.7665, - "step": 26370 - }, - { - "epoch": 0.7472866899033693, - "grad_norm": 0.0, - "learning_rate": 3.166715005387827e-06, - "loss": 0.8706, - "step": 26371 - }, - { - "epoch": 0.7473150273456318, - "grad_norm": 0.0, - "learning_rate": 3.1660449460742137e-06, - "loss": 0.9063, - "step": 26372 - }, - { - "epoch": 0.7473433647878942, - "grad_norm": 0.0, - "learning_rate": 3.1653749443255367e-06, - "loss": 0.9068, - "step": 26373 - }, - { - "epoch": 0.7473717022301567, - "grad_norm": 0.0, - "learning_rate": 3.1647050001474454e-06, - "loss": 0.761, - "step": 26374 - }, - { - "epoch": 0.7474000396724192, - "grad_norm": 0.0, - "learning_rate": 3.1640351135455814e-06, - "loss": 0.827, - "step": 26375 - }, - { - "epoch": 0.7474283771146816, - "grad_norm": 0.0, - "learning_rate": 3.163365284525589e-06, - "loss": 0.8561, - "step": 26376 - }, - { - "epoch": 0.7474567145569441, - "grad_norm": 0.0, - "learning_rate": 3.162695513093109e-06, - "loss": 0.7253, - "step": 26377 - }, - { - "epoch": 0.7474850519992066, - "grad_norm": 0.0, - "learning_rate": 3.1620257992537872e-06, - "loss": 0.8561, - "step": 26378 - }, - { - "epoch": 0.7475133894414691, - "grad_norm": 0.0, - "learning_rate": 3.161356143013258e-06, - "loss": 0.7798, - "step": 26379 - }, - { - "epoch": 0.7475417268837314, - "grad_norm": 0.0, - "learning_rate": 3.1606865443771685e-06, - "loss": 0.7626, - "step": 26380 - }, - { - "epoch": 0.7475700643259939, - "grad_norm": 0.0, - "learning_rate": 3.1600170033511525e-06, - "loss": 0.7842, - "step": 26381 - }, - { - "epoch": 0.7475984017682564, - "grad_norm": 0.0, - "learning_rate": 3.159347519940853e-06, - "loss": 0.8311, - "step": 26382 - }, - { - "epoch": 0.7476267392105188, - "grad_norm": 0.0, - "learning_rate": 3.1586780941519135e-06, - "loss": 0.8532, - "step": 26383 - }, - { - "epoch": 0.7476550766527813, - "grad_norm": 0.0, - "learning_rate": 3.1580087259899662e-06, - "loss": 0.9638, - "step": 26384 - }, - { - "epoch": 0.7476834140950438, - "grad_norm": 0.0, - "learning_rate": 3.157339415460654e-06, - "loss": 0.7889, - "step": 26385 - }, - { - "epoch": 0.7477117515373063, - "grad_norm": 0.0, - "learning_rate": 3.1566701625696108e-06, - "loss": 0.7064, - "step": 26386 - }, - { - "epoch": 0.7477400889795687, - "grad_norm": 0.0, - "learning_rate": 3.1560009673224758e-06, - "loss": 0.7848, - "step": 26387 - }, - { - "epoch": 0.7477684264218312, - "grad_norm": 0.0, - "learning_rate": 3.1553318297248847e-06, - "loss": 0.7417, - "step": 26388 - }, - { - "epoch": 0.7477967638640937, - "grad_norm": 0.0, - "learning_rate": 3.1546627497824767e-06, - "loss": 0.8725, - "step": 26389 - }, - { - "epoch": 0.747825101306356, - "grad_norm": 0.0, - "learning_rate": 3.1539937275008857e-06, - "loss": 0.8755, - "step": 26390 - }, - { - "epoch": 0.7478534387486185, - "grad_norm": 0.0, - "learning_rate": 3.1533247628857523e-06, - "loss": 0.8289, - "step": 26391 - }, - { - "epoch": 0.747881776190881, - "grad_norm": 0.0, - "learning_rate": 3.1526558559427023e-06, - "loss": 0.7409, - "step": 26392 - }, - { - "epoch": 0.7479101136331435, - "grad_norm": 0.0, - "learning_rate": 3.1519870066773783e-06, - "loss": 0.8721, - "step": 26393 - }, - { - "epoch": 0.7479384510754059, - "grad_norm": 0.0, - "learning_rate": 3.1513182150954067e-06, - "loss": 0.8368, - "step": 26394 - }, - { - "epoch": 0.7479667885176684, - "grad_norm": 0.0, - "learning_rate": 3.150649481202426e-06, - "loss": 0.857, - "step": 26395 - }, - { - "epoch": 0.7479951259599309, - "grad_norm": 0.0, - "learning_rate": 3.1499808050040713e-06, - "loss": 0.8633, - "step": 26396 - }, - { - "epoch": 0.7480234634021933, - "grad_norm": 0.0, - "learning_rate": 3.1493121865059684e-06, - "loss": 0.7414, - "step": 26397 - }, - { - "epoch": 0.7480518008444558, - "grad_norm": 0.0, - "learning_rate": 3.148643625713753e-06, - "loss": 0.9167, - "step": 26398 - }, - { - "epoch": 0.7480801382867183, - "grad_norm": 0.0, - "learning_rate": 3.1479751226330567e-06, - "loss": 0.8107, - "step": 26399 - }, - { - "epoch": 0.7481084757289806, - "grad_norm": 0.0, - "learning_rate": 3.1473066772695105e-06, - "loss": 0.7158, - "step": 26400 - }, - { - "epoch": 0.7481368131712431, - "grad_norm": 0.0, - "learning_rate": 3.1466382896287474e-06, - "loss": 0.9784, - "step": 26401 - }, - { - "epoch": 0.7481651506135056, - "grad_norm": 0.0, - "learning_rate": 3.1459699597163917e-06, - "loss": 0.8781, - "step": 26402 - }, - { - "epoch": 0.7481934880557681, - "grad_norm": 0.0, - "learning_rate": 3.145301687538077e-06, - "loss": 0.8453, - "step": 26403 - }, - { - "epoch": 0.7482218254980305, - "grad_norm": 0.0, - "learning_rate": 3.144633473099434e-06, - "loss": 0.8829, - "step": 26404 - }, - { - "epoch": 0.748250162940293, - "grad_norm": 0.0, - "learning_rate": 3.143965316406087e-06, - "loss": 0.8037, - "step": 26405 - }, - { - "epoch": 0.7482785003825555, - "grad_norm": 0.0, - "learning_rate": 3.1432972174636646e-06, - "loss": 0.9703, - "step": 26406 - }, - { - "epoch": 0.7483068378248179, - "grad_norm": 0.0, - "learning_rate": 3.1426291762777994e-06, - "loss": 0.8461, - "step": 26407 - }, - { - "epoch": 0.7483351752670804, - "grad_norm": 0.0, - "learning_rate": 3.141961192854113e-06, - "loss": 0.7662, - "step": 26408 - }, - { - "epoch": 0.7483635127093429, - "grad_norm": 0.0, - "learning_rate": 3.1412932671982368e-06, - "loss": 0.8395, - "step": 26409 - }, - { - "epoch": 0.7483918501516054, - "grad_norm": 0.0, - "learning_rate": 3.14062539931579e-06, - "loss": 0.8722, - "step": 26410 - }, - { - "epoch": 0.7484201875938677, - "grad_norm": 0.0, - "learning_rate": 3.1399575892124035e-06, - "loss": 0.7667, - "step": 26411 - }, - { - "epoch": 0.7484485250361302, - "grad_norm": 0.0, - "learning_rate": 3.139289836893702e-06, - "loss": 0.8328, - "step": 26412 - }, - { - "epoch": 0.7484768624783927, - "grad_norm": 0.0, - "learning_rate": 3.1386221423653096e-06, - "loss": 0.7208, - "step": 26413 - }, - { - "epoch": 0.7485051999206551, - "grad_norm": 0.0, - "learning_rate": 3.137954505632854e-06, - "loss": 0.8379, - "step": 26414 - }, - { - "epoch": 0.7485335373629176, - "grad_norm": 0.0, - "learning_rate": 3.1372869267019525e-06, - "loss": 0.8113, - "step": 26415 - }, - { - "epoch": 0.7485618748051801, - "grad_norm": 0.0, - "learning_rate": 3.136619405578232e-06, - "loss": 0.7983, - "step": 26416 - }, - { - "epoch": 0.7485902122474426, - "grad_norm": 0.0, - "learning_rate": 3.135951942267317e-06, - "loss": 0.785, - "step": 26417 - }, - { - "epoch": 0.748618549689705, - "grad_norm": 0.0, - "learning_rate": 3.135284536774825e-06, - "loss": 0.7421, - "step": 26418 - }, - { - "epoch": 0.7486468871319675, - "grad_norm": 0.0, - "learning_rate": 3.13461718910638e-06, - "loss": 0.7493, - "step": 26419 - }, - { - "epoch": 0.74867522457423, - "grad_norm": 0.0, - "learning_rate": 3.1339498992676087e-06, - "loss": 0.8804, - "step": 26420 - }, - { - "epoch": 0.7487035620164924, - "grad_norm": 0.0, - "learning_rate": 3.1332826672641227e-06, - "loss": 0.8482, - "step": 26421 - }, - { - "epoch": 0.7487318994587548, - "grad_norm": 0.0, - "learning_rate": 3.1326154931015496e-06, - "loss": 0.8099, - "step": 26422 - }, - { - "epoch": 0.7487602369010173, - "grad_norm": 0.0, - "learning_rate": 3.1319483767855042e-06, - "loss": 0.972, - "step": 26423 - }, - { - "epoch": 0.7487885743432797, - "grad_norm": 0.0, - "learning_rate": 3.131281318321607e-06, - "loss": 0.9625, - "step": 26424 - }, - { - "epoch": 0.7488169117855422, - "grad_norm": 0.0, - "learning_rate": 3.130614317715478e-06, - "loss": 0.7817, - "step": 26425 - }, - { - "epoch": 0.7488452492278047, - "grad_norm": 0.0, - "learning_rate": 3.129947374972736e-06, - "loss": 0.9031, - "step": 26426 - }, - { - "epoch": 0.7488735866700672, - "grad_norm": 0.0, - "learning_rate": 3.1292804900989983e-06, - "loss": 0.8438, - "step": 26427 - }, - { - "epoch": 0.7489019241123296, - "grad_norm": 0.0, - "learning_rate": 3.128613663099885e-06, - "loss": 0.8067, - "step": 26428 - }, - { - "epoch": 0.7489302615545921, - "grad_norm": 0.0, - "learning_rate": 3.127946893981009e-06, - "loss": 0.92, - "step": 26429 - }, - { - "epoch": 0.7489585989968546, - "grad_norm": 0.0, - "learning_rate": 3.1272801827479894e-06, - "loss": 0.8387, - "step": 26430 - }, - { - "epoch": 0.748986936439117, - "grad_norm": 0.0, - "learning_rate": 3.1266135294064383e-06, - "loss": 0.6937, - "step": 26431 - }, - { - "epoch": 0.7490152738813795, - "grad_norm": 0.0, - "learning_rate": 3.125946933961974e-06, - "loss": 0.8052, - "step": 26432 - }, - { - "epoch": 0.749043611323642, - "grad_norm": 0.0, - "learning_rate": 3.125280396420214e-06, - "loss": 0.8303, - "step": 26433 - }, - { - "epoch": 0.7490719487659044, - "grad_norm": 0.0, - "learning_rate": 3.124613916786767e-06, - "loss": 0.8443, - "step": 26434 - }, - { - "epoch": 0.7491002862081668, - "grad_norm": 0.0, - "learning_rate": 3.123947495067251e-06, - "loss": 0.8054, - "step": 26435 - }, - { - "epoch": 0.7491286236504293, - "grad_norm": 0.0, - "learning_rate": 3.1232811312672817e-06, - "loss": 0.8101, - "step": 26436 - }, - { - "epoch": 0.7491569610926918, - "grad_norm": 0.0, - "learning_rate": 3.122614825392465e-06, - "loss": 0.8535, - "step": 26437 - }, - { - "epoch": 0.7491852985349542, - "grad_norm": 0.0, - "learning_rate": 3.121948577448418e-06, - "loss": 0.7896, - "step": 26438 - }, - { - "epoch": 0.7492136359772167, - "grad_norm": 0.0, - "learning_rate": 3.1212823874407517e-06, - "loss": 0.778, - "step": 26439 - }, - { - "epoch": 0.7492419734194792, - "grad_norm": 0.0, - "learning_rate": 3.1206162553750785e-06, - "loss": 0.7732, - "step": 26440 - }, - { - "epoch": 0.7492703108617417, - "grad_norm": 0.0, - "learning_rate": 3.1199501812570133e-06, - "loss": 0.8661, - "step": 26441 - }, - { - "epoch": 0.7492986483040041, - "grad_norm": 0.0, - "learning_rate": 3.119284165092158e-06, - "loss": 0.7926, - "step": 26442 - }, - { - "epoch": 0.7493269857462665, - "grad_norm": 0.0, - "learning_rate": 3.1186182068861306e-06, - "loss": 0.8074, - "step": 26443 - }, - { - "epoch": 0.749355323188529, - "grad_norm": 0.0, - "learning_rate": 3.117952306644535e-06, - "loss": 0.7644, - "step": 26444 - }, - { - "epoch": 0.7493836606307914, - "grad_norm": 0.0, - "learning_rate": 3.1172864643729815e-06, - "loss": 0.7837, - "step": 26445 - }, - { - "epoch": 0.7494119980730539, - "grad_norm": 0.0, - "learning_rate": 3.1166206800770847e-06, - "loss": 0.7702, - "step": 26446 - }, - { - "epoch": 0.7494403355153164, - "grad_norm": 0.0, - "learning_rate": 3.1159549537624434e-06, - "loss": 0.8267, - "step": 26447 - }, - { - "epoch": 0.7494686729575788, - "grad_norm": 0.0, - "learning_rate": 3.1152892854346707e-06, - "loss": 0.9099, - "step": 26448 - }, - { - "epoch": 0.7494970103998413, - "grad_norm": 0.0, - "learning_rate": 3.1146236750993763e-06, - "loss": 0.9514, - "step": 26449 - }, - { - "epoch": 0.7495253478421038, - "grad_norm": 0.0, - "learning_rate": 3.1139581227621595e-06, - "loss": 0.8658, - "step": 26450 - }, - { - "epoch": 0.7495536852843663, - "grad_norm": 0.0, - "learning_rate": 3.113292628428631e-06, - "loss": 0.8081, - "step": 26451 - }, - { - "epoch": 0.7495820227266287, - "grad_norm": 0.0, - "learning_rate": 3.112627192104396e-06, - "loss": 0.8371, - "step": 26452 - }, - { - "epoch": 0.7496103601688912, - "grad_norm": 0.0, - "learning_rate": 3.1119618137950593e-06, - "loss": 0.8218, - "step": 26453 - }, - { - "epoch": 0.7496386976111536, - "grad_norm": 0.0, - "learning_rate": 3.1112964935062297e-06, - "loss": 0.8061, - "step": 26454 - }, - { - "epoch": 0.749667035053416, - "grad_norm": 0.0, - "learning_rate": 3.110631231243505e-06, - "loss": 0.842, - "step": 26455 - }, - { - "epoch": 0.7496953724956785, - "grad_norm": 0.0, - "learning_rate": 3.1099660270124908e-06, - "loss": 0.7786, - "step": 26456 - }, - { - "epoch": 0.749723709937941, - "grad_norm": 0.0, - "learning_rate": 3.1093008808187952e-06, - "loss": 0.7883, - "step": 26457 - }, - { - "epoch": 0.7497520473802035, - "grad_norm": 0.0, - "learning_rate": 3.1086357926680134e-06, - "loss": 0.7974, - "step": 26458 - }, - { - "epoch": 0.7497803848224659, - "grad_norm": 0.0, - "learning_rate": 3.107970762565755e-06, - "loss": 0.8358, - "step": 26459 - }, - { - "epoch": 0.7498087222647284, - "grad_norm": 0.0, - "learning_rate": 3.107305790517614e-06, - "loss": 0.7216, - "step": 26460 - }, - { - "epoch": 0.7498370597069909, - "grad_norm": 0.0, - "learning_rate": 3.1066408765291966e-06, - "loss": 0.7375, - "step": 26461 - }, - { - "epoch": 0.7498653971492533, - "grad_norm": 0.0, - "learning_rate": 3.105976020606106e-06, - "loss": 0.871, - "step": 26462 - }, - { - "epoch": 0.7498937345915158, - "grad_norm": 0.0, - "learning_rate": 3.105311222753936e-06, - "loss": 0.7946, - "step": 26463 - }, - { - "epoch": 0.7499220720337783, - "grad_norm": 0.0, - "learning_rate": 3.1046464829782906e-06, - "loss": 0.7703, - "step": 26464 - }, - { - "epoch": 0.7499504094760407, - "grad_norm": 0.0, - "learning_rate": 3.1039818012847676e-06, - "loss": 0.8259, - "step": 26465 - }, - { - "epoch": 0.7499787469183031, - "grad_norm": 0.0, - "learning_rate": 3.103317177678967e-06, - "loss": 0.7812, - "step": 26466 - }, - { - "epoch": 0.7500070843605656, - "grad_norm": 0.0, - "learning_rate": 3.1026526121664903e-06, - "loss": 0.7534, - "step": 26467 - }, - { - "epoch": 0.7500354218028281, - "grad_norm": 0.0, - "learning_rate": 3.1019881047529286e-06, - "loss": 0.8024, - "step": 26468 - }, - { - "epoch": 0.7500637592450905, - "grad_norm": 0.0, - "learning_rate": 3.101323655443882e-06, - "loss": 0.7974, - "step": 26469 - }, - { - "epoch": 0.750092096687353, - "grad_norm": 0.0, - "learning_rate": 3.1006592642449516e-06, - "loss": 0.7015, - "step": 26470 - }, - { - "epoch": 0.7501204341296155, - "grad_norm": 0.0, - "learning_rate": 3.0999949311617273e-06, - "loss": 0.7338, - "step": 26471 - }, - { - "epoch": 0.7501487715718779, - "grad_norm": 0.0, - "learning_rate": 3.0993306561998116e-06, - "loss": 0.8303, - "step": 26472 - }, - { - "epoch": 0.7501771090141404, - "grad_norm": 0.0, - "learning_rate": 3.0986664393647925e-06, - "loss": 0.8657, - "step": 26473 - }, - { - "epoch": 0.7502054464564029, - "grad_norm": 0.0, - "learning_rate": 3.098002280662268e-06, - "loss": 0.7922, - "step": 26474 - }, - { - "epoch": 0.7502337838986654, - "grad_norm": 0.0, - "learning_rate": 3.0973381800978374e-06, - "loss": 0.845, - "step": 26475 - }, - { - "epoch": 0.7502621213409277, - "grad_norm": 0.0, - "learning_rate": 3.096674137677087e-06, - "loss": 0.8509, - "step": 26476 - }, - { - "epoch": 0.7502904587831902, - "grad_norm": 0.0, - "learning_rate": 3.096010153405614e-06, - "loss": 0.843, - "step": 26477 - }, - { - "epoch": 0.7503187962254527, - "grad_norm": 0.0, - "learning_rate": 3.095346227289011e-06, - "loss": 0.8356, - "step": 26478 - }, - { - "epoch": 0.7503471336677151, - "grad_norm": 0.0, - "learning_rate": 3.094682359332871e-06, - "loss": 0.8668, - "step": 26479 - }, - { - "epoch": 0.7503754711099776, - "grad_norm": 0.0, - "learning_rate": 3.0940185495427887e-06, - "loss": 0.862, - "step": 26480 - }, - { - "epoch": 0.7504038085522401, - "grad_norm": 0.0, - "learning_rate": 3.0933547979243494e-06, - "loss": 0.7942, - "step": 26481 - }, - { - "epoch": 0.7504321459945026, - "grad_norm": 0.0, - "learning_rate": 3.0926911044831476e-06, - "loss": 0.8271, - "step": 26482 - }, - { - "epoch": 0.750460483436765, - "grad_norm": 0.0, - "learning_rate": 3.0920274692247765e-06, - "loss": 0.8836, - "step": 26483 - }, - { - "epoch": 0.7504888208790275, - "grad_norm": 0.0, - "learning_rate": 3.0913638921548195e-06, - "loss": 0.8419, - "step": 26484 - }, - { - "epoch": 0.75051715832129, - "grad_norm": 0.0, - "learning_rate": 3.090700373278871e-06, - "loss": 0.761, - "step": 26485 - }, - { - "epoch": 0.7505454957635523, - "grad_norm": 0.0, - "learning_rate": 3.090036912602522e-06, - "loss": 0.7756, - "step": 26486 - }, - { - "epoch": 0.7505738332058148, - "grad_norm": 0.0, - "learning_rate": 3.089373510131354e-06, - "loss": 0.7025, - "step": 26487 - }, - { - "epoch": 0.7506021706480773, - "grad_norm": 0.0, - "learning_rate": 3.088710165870963e-06, - "loss": 0.7255, - "step": 26488 - }, - { - "epoch": 0.7506305080903398, - "grad_norm": 0.0, - "learning_rate": 3.0880468798269293e-06, - "loss": 0.8099, - "step": 26489 - }, - { - "epoch": 0.7506588455326022, - "grad_norm": 0.0, - "learning_rate": 3.087383652004844e-06, - "loss": 0.7915, - "step": 26490 - }, - { - "epoch": 0.7506871829748647, - "grad_norm": 0.0, - "learning_rate": 3.0867204824102926e-06, - "loss": 0.7259, - "step": 26491 - }, - { - "epoch": 0.7507155204171272, - "grad_norm": 0.0, - "learning_rate": 3.0860573710488616e-06, - "loss": 0.9174, - "step": 26492 - }, - { - "epoch": 0.7507438578593896, - "grad_norm": 0.0, - "learning_rate": 3.0853943179261405e-06, - "loss": 0.9349, - "step": 26493 - }, - { - "epoch": 0.7507721953016521, - "grad_norm": 0.0, - "learning_rate": 3.084731323047707e-06, - "loss": 0.9392, - "step": 26494 - }, - { - "epoch": 0.7508005327439146, - "grad_norm": 0.0, - "learning_rate": 3.084068386419149e-06, - "loss": 0.7043, - "step": 26495 - }, - { - "epoch": 0.7508288701861769, - "grad_norm": 0.0, - "learning_rate": 3.0834055080460558e-06, - "loss": 0.8461, - "step": 26496 - }, - { - "epoch": 0.7508572076284394, - "grad_norm": 0.0, - "learning_rate": 3.0827426879340017e-06, - "loss": 0.8472, - "step": 26497 - }, - { - "epoch": 0.7508855450707019, - "grad_norm": 0.0, - "learning_rate": 3.082079926088576e-06, - "loss": 0.8297, - "step": 26498 - }, - { - "epoch": 0.7509138825129644, - "grad_norm": 0.0, - "learning_rate": 3.0814172225153626e-06, - "loss": 0.8779, - "step": 26499 - }, - { - "epoch": 0.7509422199552268, - "grad_norm": 0.0, - "learning_rate": 3.0807545772199377e-06, - "loss": 0.8387, - "step": 26500 - }, - { - "epoch": 0.7509705573974893, - "grad_norm": 0.0, - "learning_rate": 3.0800919902078897e-06, - "loss": 0.7773, - "step": 26501 - }, - { - "epoch": 0.7509988948397518, - "grad_norm": 0.0, - "learning_rate": 3.079429461484793e-06, - "loss": 0.8048, - "step": 26502 - }, - { - "epoch": 0.7510272322820142, - "grad_norm": 0.0, - "learning_rate": 3.0787669910562323e-06, - "loss": 0.7966, - "step": 26503 - }, - { - "epoch": 0.7510555697242767, - "grad_norm": 0.0, - "learning_rate": 3.0781045789277875e-06, - "loss": 0.898, - "step": 26504 - }, - { - "epoch": 0.7510839071665392, - "grad_norm": 0.0, - "learning_rate": 3.0774422251050386e-06, - "loss": 0.8036, - "step": 26505 - }, - { - "epoch": 0.7511122446088017, - "grad_norm": 0.0, - "learning_rate": 3.076779929593563e-06, - "loss": 0.8391, - "step": 26506 - }, - { - "epoch": 0.751140582051064, - "grad_norm": 0.0, - "learning_rate": 3.0761176923989456e-06, - "loss": 0.7831, - "step": 26507 - }, - { - "epoch": 0.7511689194933265, - "grad_norm": 0.0, - "learning_rate": 3.075455513526756e-06, - "loss": 0.8285, - "step": 26508 - }, - { - "epoch": 0.751197256935589, - "grad_norm": 0.0, - "learning_rate": 3.074793392982579e-06, - "loss": 0.8391, - "step": 26509 - }, - { - "epoch": 0.7512255943778514, - "grad_norm": 0.0, - "learning_rate": 3.0741313307719865e-06, - "loss": 0.799, - "step": 26510 - }, - { - "epoch": 0.7512539318201139, - "grad_norm": 0.0, - "learning_rate": 3.0734693269005567e-06, - "loss": 0.7972, - "step": 26511 - }, - { - "epoch": 0.7512822692623764, - "grad_norm": 0.0, - "learning_rate": 3.072807381373871e-06, - "loss": 0.7682, - "step": 26512 - }, - { - "epoch": 0.7513106067046389, - "grad_norm": 0.0, - "learning_rate": 3.072145494197497e-06, - "loss": 0.8956, - "step": 26513 - }, - { - "epoch": 0.7513389441469013, - "grad_norm": 0.0, - "learning_rate": 3.0714836653770153e-06, - "loss": 0.7857, - "step": 26514 - }, - { - "epoch": 0.7513672815891638, - "grad_norm": 0.0, - "learning_rate": 3.0708218949180015e-06, - "loss": 0.8489, - "step": 26515 - }, - { - "epoch": 0.7513956190314263, - "grad_norm": 0.0, - "learning_rate": 3.0701601828260253e-06, - "loss": 0.7859, - "step": 26516 - }, - { - "epoch": 0.7514239564736886, - "grad_norm": 0.0, - "learning_rate": 3.0694985291066627e-06, - "loss": 0.9057, - "step": 26517 - }, - { - "epoch": 0.7514522939159511, - "grad_norm": 0.0, - "learning_rate": 3.0688369337654876e-06, - "loss": 0.9119, - "step": 26518 - }, - { - "epoch": 0.7514806313582136, - "grad_norm": 0.0, - "learning_rate": 3.068175396808074e-06, - "loss": 0.886, - "step": 26519 - }, - { - "epoch": 0.751508968800476, - "grad_norm": 0.0, - "learning_rate": 3.067513918239995e-06, - "loss": 0.7161, - "step": 26520 - }, - { - "epoch": 0.7515373062427385, - "grad_norm": 0.0, - "learning_rate": 3.066852498066818e-06, - "loss": 0.7945, - "step": 26521 - }, - { - "epoch": 0.751565643685001, - "grad_norm": 0.0, - "learning_rate": 3.06619113629412e-06, - "loss": 0.7996, - "step": 26522 - }, - { - "epoch": 0.7515939811272635, - "grad_norm": 0.0, - "learning_rate": 3.0655298329274663e-06, - "loss": 0.8524, - "step": 26523 - }, - { - "epoch": 0.7516223185695259, - "grad_norm": 0.0, - "learning_rate": 3.0648685879724296e-06, - "loss": 0.6957, - "step": 26524 - }, - { - "epoch": 0.7516506560117884, - "grad_norm": 0.0, - "learning_rate": 3.0642074014345824e-06, - "loss": 0.8222, - "step": 26525 - }, - { - "epoch": 0.7516789934540509, - "grad_norm": 0.0, - "learning_rate": 3.0635462733194886e-06, - "loss": 0.8723, - "step": 26526 - }, - { - "epoch": 0.7517073308963133, - "grad_norm": 0.0, - "learning_rate": 3.0628852036327215e-06, - "loss": 0.754, - "step": 26527 - }, - { - "epoch": 0.7517356683385757, - "grad_norm": 0.0, - "learning_rate": 3.0622241923798513e-06, - "loss": 0.7442, - "step": 26528 - }, - { - "epoch": 0.7517640057808382, - "grad_norm": 0.0, - "learning_rate": 3.0615632395664395e-06, - "loss": 0.894, - "step": 26529 - }, - { - "epoch": 0.7517923432231007, - "grad_norm": 0.0, - "learning_rate": 3.0609023451980568e-06, - "loss": 0.7904, - "step": 26530 - }, - { - "epoch": 0.7518206806653631, - "grad_norm": 0.0, - "learning_rate": 3.0602415092802706e-06, - "loss": 0.7705, - "step": 26531 - }, - { - "epoch": 0.7518490181076256, - "grad_norm": 0.0, - "learning_rate": 3.0595807318186476e-06, - "loss": 0.8509, - "step": 26532 - }, - { - "epoch": 0.7518773555498881, - "grad_norm": 0.0, - "learning_rate": 3.058920012818756e-06, - "loss": 0.904, - "step": 26533 - }, - { - "epoch": 0.7519056929921505, - "grad_norm": 0.0, - "learning_rate": 3.058259352286156e-06, - "loss": 0.8163, - "step": 26534 - }, - { - "epoch": 0.751934030434413, - "grad_norm": 0.0, - "learning_rate": 3.057598750226415e-06, - "loss": 0.872, - "step": 26535 - }, - { - "epoch": 0.7519623678766755, - "grad_norm": 0.0, - "learning_rate": 3.0569382066451005e-06, - "loss": 0.7794, - "step": 26536 - }, - { - "epoch": 0.7519907053189379, - "grad_norm": 0.0, - "learning_rate": 3.05627772154777e-06, - "loss": 0.878, - "step": 26537 - }, - { - "epoch": 0.7520190427612004, - "grad_norm": 0.0, - "learning_rate": 3.0556172949399955e-06, - "loss": 0.8539, - "step": 26538 - }, - { - "epoch": 0.7520473802034628, - "grad_norm": 0.0, - "learning_rate": 3.0549569268273316e-06, - "loss": 0.7953, - "step": 26539 - }, - { - "epoch": 0.7520757176457253, - "grad_norm": 0.0, - "learning_rate": 3.0542966172153433e-06, - "loss": 0.7458, - "step": 26540 - }, - { - "epoch": 0.7521040550879877, - "grad_norm": 0.0, - "learning_rate": 3.053636366109598e-06, - "loss": 0.7804, - "step": 26541 - }, - { - "epoch": 0.7521323925302502, - "grad_norm": 0.0, - "learning_rate": 3.0529761735156505e-06, - "loss": 0.7982, - "step": 26542 - }, - { - "epoch": 0.7521607299725127, - "grad_norm": 0.0, - "learning_rate": 3.052316039439064e-06, - "loss": 0.7638, - "step": 26543 - }, - { - "epoch": 0.7521890674147751, - "grad_norm": 0.0, - "learning_rate": 3.051655963885398e-06, - "loss": 0.8297, - "step": 26544 - }, - { - "epoch": 0.7522174048570376, - "grad_norm": 0.0, - "learning_rate": 3.0509959468602157e-06, - "loss": 0.8915, - "step": 26545 - }, - { - "epoch": 0.7522457422993001, - "grad_norm": 0.0, - "learning_rate": 3.0503359883690776e-06, - "loss": 0.8823, - "step": 26546 - }, - { - "epoch": 0.7522740797415626, - "grad_norm": 0.0, - "learning_rate": 3.049676088417537e-06, - "loss": 0.8085, - "step": 26547 - }, - { - "epoch": 0.752302417183825, - "grad_norm": 0.0, - "learning_rate": 3.049016247011156e-06, - "loss": 0.7729, - "step": 26548 - }, - { - "epoch": 0.7523307546260875, - "grad_norm": 0.0, - "learning_rate": 3.0483564641554953e-06, - "loss": 0.8291, - "step": 26549 - }, - { - "epoch": 0.7523590920683499, - "grad_norm": 0.0, - "learning_rate": 3.0476967398561053e-06, - "loss": 0.7908, - "step": 26550 - }, - { - "epoch": 0.7523874295106123, - "grad_norm": 0.0, - "learning_rate": 3.047037074118552e-06, - "loss": 0.9335, - "step": 26551 - }, - { - "epoch": 0.7524157669528748, - "grad_norm": 0.0, - "learning_rate": 3.0463774669483838e-06, - "loss": 0.8369, - "step": 26552 - }, - { - "epoch": 0.7524441043951373, - "grad_norm": 0.0, - "learning_rate": 3.04571791835116e-06, - "loss": 0.8371, - "step": 26553 - }, - { - "epoch": 0.7524724418373998, - "grad_norm": 0.0, - "learning_rate": 3.0450584283324404e-06, - "loss": 0.797, - "step": 26554 - }, - { - "epoch": 0.7525007792796622, - "grad_norm": 0.0, - "learning_rate": 3.0443989968977717e-06, - "loss": 0.8178, - "step": 26555 - }, - { - "epoch": 0.7525291167219247, - "grad_norm": 0.0, - "learning_rate": 3.043739624052715e-06, - "loss": 0.7963, - "step": 26556 - }, - { - "epoch": 0.7525574541641872, - "grad_norm": 0.0, - "learning_rate": 3.043080309802822e-06, - "loss": 0.7672, - "step": 26557 - }, - { - "epoch": 0.7525857916064496, - "grad_norm": 0.0, - "learning_rate": 3.0424210541536456e-06, - "loss": 0.8459, - "step": 26558 - }, - { - "epoch": 0.752614129048712, - "grad_norm": 0.0, - "learning_rate": 3.0417618571107443e-06, - "loss": 0.8353, - "step": 26559 - }, - { - "epoch": 0.7526424664909745, - "grad_norm": 0.0, - "learning_rate": 3.041102718679664e-06, - "loss": 0.8531, - "step": 26560 - }, - { - "epoch": 0.7526708039332369, - "grad_norm": 0.0, - "learning_rate": 3.0404436388659597e-06, - "loss": 0.7437, - "step": 26561 - }, - { - "epoch": 0.7526991413754994, - "grad_norm": 0.0, - "learning_rate": 3.0397846176751853e-06, - "loss": 0.7764, - "step": 26562 - }, - { - "epoch": 0.7527274788177619, - "grad_norm": 0.0, - "learning_rate": 3.0391256551128877e-06, - "loss": 0.9196, - "step": 26563 - }, - { - "epoch": 0.7527558162600244, - "grad_norm": 0.0, - "learning_rate": 3.038466751184619e-06, - "loss": 0.7389, - "step": 26564 - }, - { - "epoch": 0.7527841537022868, - "grad_norm": 0.0, - "learning_rate": 3.037807905895933e-06, - "loss": 0.8777, - "step": 26565 - }, - { - "epoch": 0.7528124911445493, - "grad_norm": 0.0, - "learning_rate": 3.037149119252374e-06, - "loss": 0.7788, - "step": 26566 - }, - { - "epoch": 0.7528408285868118, - "grad_norm": 0.0, - "learning_rate": 3.0364903912594958e-06, - "loss": 0.8211, - "step": 26567 - }, - { - "epoch": 0.7528691660290742, - "grad_norm": 0.0, - "learning_rate": 3.0358317219228415e-06, - "loss": 0.8508, - "step": 26568 - }, - { - "epoch": 0.7528975034713367, - "grad_norm": 0.0, - "learning_rate": 3.035173111247963e-06, - "loss": 0.8065, - "step": 26569 - }, - { - "epoch": 0.7529258409135992, - "grad_norm": 0.0, - "learning_rate": 3.0345145592404077e-06, - "loss": 0.7396, - "step": 26570 - }, - { - "epoch": 0.7529541783558616, - "grad_norm": 0.0, - "learning_rate": 3.0338560659057226e-06, - "loss": 0.9401, - "step": 26571 - }, - { - "epoch": 0.752982515798124, - "grad_norm": 0.0, - "learning_rate": 3.033197631249456e-06, - "loss": 0.8646, - "step": 26572 - }, - { - "epoch": 0.7530108532403865, - "grad_norm": 0.0, - "learning_rate": 3.032539255277155e-06, - "loss": 0.8186, - "step": 26573 - }, - { - "epoch": 0.753039190682649, - "grad_norm": 0.0, - "learning_rate": 3.0318809379943594e-06, - "loss": 0.8181, - "step": 26574 - }, - { - "epoch": 0.7530675281249114, - "grad_norm": 0.0, - "learning_rate": 3.0312226794066225e-06, - "loss": 0.8981, - "step": 26575 - }, - { - "epoch": 0.7530958655671739, - "grad_norm": 0.0, - "learning_rate": 3.030564479519481e-06, - "loss": 0.8619, - "step": 26576 - }, - { - "epoch": 0.7531242030094364, - "grad_norm": 0.0, - "learning_rate": 3.029906338338483e-06, - "loss": 0.8473, - "step": 26577 - }, - { - "epoch": 0.7531525404516989, - "grad_norm": 0.0, - "learning_rate": 3.0292482558691748e-06, - "loss": 0.838, - "step": 26578 - }, - { - "epoch": 0.7531808778939613, - "grad_norm": 0.0, - "learning_rate": 3.0285902321170945e-06, - "loss": 0.8042, - "step": 26579 - }, - { - "epoch": 0.7532092153362238, - "grad_norm": 0.0, - "learning_rate": 3.027932267087791e-06, - "loss": 0.703, - "step": 26580 - }, - { - "epoch": 0.7532375527784863, - "grad_norm": 0.0, - "learning_rate": 3.0272743607867995e-06, - "loss": 0.8057, - "step": 26581 - }, - { - "epoch": 0.7532658902207486, - "grad_norm": 0.0, - "learning_rate": 3.0266165132196645e-06, - "loss": 0.7469, - "step": 26582 - }, - { - "epoch": 0.7532942276630111, - "grad_norm": 0.0, - "learning_rate": 3.0259587243919286e-06, - "loss": 0.8448, - "step": 26583 - }, - { - "epoch": 0.7533225651052736, - "grad_norm": 0.0, - "learning_rate": 3.025300994309133e-06, - "loss": 0.8326, - "step": 26584 - }, - { - "epoch": 0.753350902547536, - "grad_norm": 0.0, - "learning_rate": 3.024643322976816e-06, - "loss": 0.8443, - "step": 26585 - }, - { - "epoch": 0.7533792399897985, - "grad_norm": 0.0, - "learning_rate": 3.0239857104005223e-06, - "loss": 0.8158, - "step": 26586 - }, - { - "epoch": 0.753407577432061, - "grad_norm": 0.0, - "learning_rate": 3.0233281565857843e-06, - "loss": 0.7993, - "step": 26587 - }, - { - "epoch": 0.7534359148743235, - "grad_norm": 0.0, - "learning_rate": 3.022670661538147e-06, - "loss": 0.6771, - "step": 26588 - }, - { - "epoch": 0.7534642523165859, - "grad_norm": 0.0, - "learning_rate": 3.022013225263142e-06, - "loss": 0.7406, - "step": 26589 - }, - { - "epoch": 0.7534925897588484, - "grad_norm": 0.0, - "learning_rate": 3.021355847766312e-06, - "loss": 0.8322, - "step": 26590 - }, - { - "epoch": 0.7535209272011109, - "grad_norm": 0.0, - "learning_rate": 3.0206985290531956e-06, - "loss": 0.8094, - "step": 26591 - }, - { - "epoch": 0.7535492646433732, - "grad_norm": 0.0, - "learning_rate": 3.0200412691293237e-06, - "loss": 0.8351, - "step": 26592 - }, - { - "epoch": 0.7535776020856357, - "grad_norm": 0.0, - "learning_rate": 3.0193840680002364e-06, - "loss": 0.8559, - "step": 26593 - }, - { - "epoch": 0.7536059395278982, - "grad_norm": 0.0, - "learning_rate": 3.0187269256714724e-06, - "loss": 0.8818, - "step": 26594 - }, - { - "epoch": 0.7536342769701607, - "grad_norm": 0.0, - "learning_rate": 3.018069842148561e-06, - "loss": 0.7343, - "step": 26595 - }, - { - "epoch": 0.7536626144124231, - "grad_norm": 0.0, - "learning_rate": 3.0174128174370397e-06, - "loss": 0.7916, - "step": 26596 - }, - { - "epoch": 0.7536909518546856, - "grad_norm": 0.0, - "learning_rate": 3.0167558515424434e-06, - "loss": 0.828, - "step": 26597 - }, - { - "epoch": 0.7537192892969481, - "grad_norm": 0.0, - "learning_rate": 3.016098944470306e-06, - "loss": 0.7905, - "step": 26598 - }, - { - "epoch": 0.7537476267392105, - "grad_norm": 0.0, - "learning_rate": 3.015442096226163e-06, - "loss": 0.8137, - "step": 26599 - }, - { - "epoch": 0.753775964181473, - "grad_norm": 0.0, - "learning_rate": 3.014785306815542e-06, - "loss": 0.8697, - "step": 26600 - }, - { - "epoch": 0.7538043016237355, - "grad_norm": 0.0, - "learning_rate": 3.0141285762439785e-06, - "loss": 0.7065, - "step": 26601 - }, - { - "epoch": 0.753832639065998, - "grad_norm": 0.0, - "learning_rate": 3.013471904517007e-06, - "loss": 0.7987, - "step": 26602 - }, - { - "epoch": 0.7538609765082603, - "grad_norm": 0.0, - "learning_rate": 3.012815291640153e-06, - "loss": 0.807, - "step": 26603 - }, - { - "epoch": 0.7538893139505228, - "grad_norm": 0.0, - "learning_rate": 3.0121587376189544e-06, - "loss": 0.9132, - "step": 26604 - }, - { - "epoch": 0.7539176513927853, - "grad_norm": 0.0, - "learning_rate": 3.0115022424589336e-06, - "loss": 0.8351, - "step": 26605 - }, - { - "epoch": 0.7539459888350477, - "grad_norm": 0.0, - "learning_rate": 3.010845806165624e-06, - "loss": 1.0003, - "step": 26606 - }, - { - "epoch": 0.7539743262773102, - "grad_norm": 0.0, - "learning_rate": 3.01018942874456e-06, - "loss": 0.7714, - "step": 26607 - }, - { - "epoch": 0.7540026637195727, - "grad_norm": 0.0, - "learning_rate": 3.0095331102012616e-06, - "loss": 0.8602, - "step": 26608 - }, - { - "epoch": 0.7540310011618351, - "grad_norm": 0.0, - "learning_rate": 3.0088768505412623e-06, - "loss": 0.7573, - "step": 26609 - }, - { - "epoch": 0.7540593386040976, - "grad_norm": 0.0, - "learning_rate": 3.0082206497700894e-06, - "loss": 0.8071, - "step": 26610 - }, - { - "epoch": 0.7540876760463601, - "grad_norm": 0.0, - "learning_rate": 3.0075645078932703e-06, - "loss": 0.8413, - "step": 26611 - }, - { - "epoch": 0.7541160134886226, - "grad_norm": 0.0, - "learning_rate": 3.006908424916335e-06, - "loss": 0.7877, - "step": 26612 - }, - { - "epoch": 0.7541443509308849, - "grad_norm": 0.0, - "learning_rate": 3.0062524008448036e-06, - "loss": 0.8512, - "step": 26613 - }, - { - "epoch": 0.7541726883731474, - "grad_norm": 0.0, - "learning_rate": 3.005596435684206e-06, - "loss": 0.8511, - "step": 26614 - }, - { - "epoch": 0.7542010258154099, - "grad_norm": 0.0, - "learning_rate": 3.004940529440069e-06, - "loss": 0.7877, - "step": 26615 - }, - { - "epoch": 0.7542293632576723, - "grad_norm": 0.0, - "learning_rate": 3.004284682117913e-06, - "loss": 0.8972, - "step": 26616 - }, - { - "epoch": 0.7542577006999348, - "grad_norm": 0.0, - "learning_rate": 3.0036288937232683e-06, - "loss": 0.7795, - "step": 26617 - }, - { - "epoch": 0.7542860381421973, - "grad_norm": 0.0, - "learning_rate": 3.0029731642616522e-06, - "loss": 0.837, - "step": 26618 - }, - { - "epoch": 0.7543143755844598, - "grad_norm": 0.0, - "learning_rate": 3.00231749373859e-06, - "loss": 0.8401, - "step": 26619 - }, - { - "epoch": 0.7543427130267222, - "grad_norm": 0.0, - "learning_rate": 3.0016618821596077e-06, - "loss": 0.8155, - "step": 26620 - }, - { - "epoch": 0.7543710504689847, - "grad_norm": 0.0, - "learning_rate": 3.0010063295302262e-06, - "loss": 0.7862, - "step": 26621 - }, - { - "epoch": 0.7543993879112472, - "grad_norm": 0.0, - "learning_rate": 3.0003508358559697e-06, - "loss": 0.8157, - "step": 26622 - }, - { - "epoch": 0.7544277253535095, - "grad_norm": 0.0, - "learning_rate": 2.999695401142354e-06, - "loss": 0.7802, - "step": 26623 - }, - { - "epoch": 0.754456062795772, - "grad_norm": 0.0, - "learning_rate": 2.999040025394905e-06, - "loss": 0.7561, - "step": 26624 - }, - { - "epoch": 0.7544844002380345, - "grad_norm": 0.0, - "learning_rate": 2.998384708619143e-06, - "loss": 0.8274, - "step": 26625 - }, - { - "epoch": 0.754512737680297, - "grad_norm": 0.0, - "learning_rate": 2.9977294508205844e-06, - "loss": 0.8453, - "step": 26626 - }, - { - "epoch": 0.7545410751225594, - "grad_norm": 0.0, - "learning_rate": 2.9970742520047504e-06, - "loss": 0.8264, - "step": 26627 - }, - { - "epoch": 0.7545694125648219, - "grad_norm": 0.0, - "learning_rate": 2.9964191121771634e-06, - "loss": 0.8526, - "step": 26628 - }, - { - "epoch": 0.7545977500070844, - "grad_norm": 0.0, - "learning_rate": 2.9957640313433366e-06, - "loss": 0.7924, - "step": 26629 - }, - { - "epoch": 0.7546260874493468, - "grad_norm": 0.0, - "learning_rate": 2.9951090095087897e-06, - "loss": 0.8304, - "step": 26630 - }, - { - "epoch": 0.7546544248916093, - "grad_norm": 0.0, - "learning_rate": 2.994454046679045e-06, - "loss": 0.8203, - "step": 26631 - }, - { - "epoch": 0.7546827623338718, - "grad_norm": 0.0, - "learning_rate": 2.9937991428596115e-06, - "loss": 0.9019, - "step": 26632 - }, - { - "epoch": 0.7547110997761342, - "grad_norm": 0.0, - "learning_rate": 2.993144298056009e-06, - "loss": 0.8249, - "step": 26633 - }, - { - "epoch": 0.7547394372183966, - "grad_norm": 0.0, - "learning_rate": 2.992489512273754e-06, - "loss": 0.823, - "step": 26634 - }, - { - "epoch": 0.7547677746606591, - "grad_norm": 0.0, - "learning_rate": 2.9918347855183627e-06, - "loss": 0.7756, - "step": 26635 - }, - { - "epoch": 0.7547961121029216, - "grad_norm": 0.0, - "learning_rate": 2.9911801177953513e-06, - "loss": 0.7382, - "step": 26636 - }, - { - "epoch": 0.754824449545184, - "grad_norm": 0.0, - "learning_rate": 2.9905255091102313e-06, - "loss": 0.9416, - "step": 26637 - }, - { - "epoch": 0.7548527869874465, - "grad_norm": 0.0, - "learning_rate": 2.9898709594685195e-06, - "loss": 0.7806, - "step": 26638 - }, - { - "epoch": 0.754881124429709, - "grad_norm": 0.0, - "learning_rate": 2.989216468875725e-06, - "loss": 0.8174, - "step": 26639 - }, - { - "epoch": 0.7549094618719714, - "grad_norm": 0.0, - "learning_rate": 2.988562037337364e-06, - "loss": 0.8219, - "step": 26640 - }, - { - "epoch": 0.7549377993142339, - "grad_norm": 0.0, - "learning_rate": 2.987907664858951e-06, - "loss": 0.8518, - "step": 26641 - }, - { - "epoch": 0.7549661367564964, - "grad_norm": 0.0, - "learning_rate": 2.987253351445992e-06, - "loss": 0.8537, - "step": 26642 - }, - { - "epoch": 0.7549944741987589, - "grad_norm": 0.0, - "learning_rate": 2.9865990971040037e-06, - "loss": 0.8892, - "step": 26643 - }, - { - "epoch": 0.7550228116410213, - "grad_norm": 0.0, - "learning_rate": 2.985944901838498e-06, - "loss": 0.8953, - "step": 26644 - }, - { - "epoch": 0.7550511490832837, - "grad_norm": 0.0, - "learning_rate": 2.98529076565498e-06, - "loss": 0.7489, - "step": 26645 - }, - { - "epoch": 0.7550794865255462, - "grad_norm": 0.0, - "learning_rate": 2.984636688558963e-06, - "loss": 0.8536, - "step": 26646 - }, - { - "epoch": 0.7551078239678086, - "grad_norm": 0.0, - "learning_rate": 2.983982670555955e-06, - "loss": 0.8599, - "step": 26647 - }, - { - "epoch": 0.7551361614100711, - "grad_norm": 0.0, - "learning_rate": 2.9833287116514674e-06, - "loss": 0.7887, - "step": 26648 - }, - { - "epoch": 0.7551644988523336, - "grad_norm": 0.0, - "learning_rate": 2.9826748118510107e-06, - "loss": 0.9228, - "step": 26649 - }, - { - "epoch": 0.7551928362945961, - "grad_norm": 0.0, - "learning_rate": 2.9820209711600858e-06, - "loss": 0.9024, - "step": 26650 - }, - { - "epoch": 0.7552211737368585, - "grad_norm": 0.0, - "learning_rate": 2.9813671895842057e-06, - "loss": 0.8083, - "step": 26651 - }, - { - "epoch": 0.755249511179121, - "grad_norm": 0.0, - "learning_rate": 2.9807134671288785e-06, - "loss": 0.7518, - "step": 26652 - }, - { - "epoch": 0.7552778486213835, - "grad_norm": 0.0, - "learning_rate": 2.9800598037996055e-06, - "loss": 0.7099, - "step": 26653 - }, - { - "epoch": 0.7553061860636459, - "grad_norm": 0.0, - "learning_rate": 2.9794061996018973e-06, - "loss": 0.8311, - "step": 26654 - }, - { - "epoch": 0.7553345235059084, - "grad_norm": 0.0, - "learning_rate": 2.978752654541256e-06, - "loss": 0.9308, - "step": 26655 - }, - { - "epoch": 0.7553628609481708, - "grad_norm": 0.0, - "learning_rate": 2.9780991686231887e-06, - "loss": 0.8622, - "step": 26656 - }, - { - "epoch": 0.7553911983904332, - "grad_norm": 0.0, - "learning_rate": 2.977445741853202e-06, - "loss": 0.7979, - "step": 26657 - }, - { - "epoch": 0.7554195358326957, - "grad_norm": 0.0, - "learning_rate": 2.9767923742367945e-06, - "loss": 0.9454, - "step": 26658 - }, - { - "epoch": 0.7554478732749582, - "grad_norm": 0.0, - "learning_rate": 2.976139065779473e-06, - "loss": 0.8064, - "step": 26659 - }, - { - "epoch": 0.7554762107172207, - "grad_norm": 0.0, - "learning_rate": 2.9754858164867394e-06, - "loss": 0.8964, - "step": 26660 - }, - { - "epoch": 0.7555045481594831, - "grad_norm": 0.0, - "learning_rate": 2.974832626364099e-06, - "loss": 0.9595, - "step": 26661 - }, - { - "epoch": 0.7555328856017456, - "grad_norm": 0.0, - "learning_rate": 2.9741794954170546e-06, - "loss": 0.7353, - "step": 26662 - }, - { - "epoch": 0.7555612230440081, - "grad_norm": 0.0, - "learning_rate": 2.9735264236511018e-06, - "loss": 0.8919, - "step": 26663 - }, - { - "epoch": 0.7555895604862705, - "grad_norm": 0.0, - "learning_rate": 2.972873411071745e-06, - "loss": 0.8719, - "step": 26664 - }, - { - "epoch": 0.755617897928533, - "grad_norm": 0.0, - "learning_rate": 2.9722204576844883e-06, - "loss": 0.7792, - "step": 26665 - }, - { - "epoch": 0.7556462353707954, - "grad_norm": 0.0, - "learning_rate": 2.971567563494825e-06, - "loss": 0.8621, - "step": 26666 - }, - { - "epoch": 0.7556745728130579, - "grad_norm": 0.0, - "learning_rate": 2.970914728508262e-06, - "loss": 0.833, - "step": 26667 - }, - { - "epoch": 0.7557029102553203, - "grad_norm": 0.0, - "learning_rate": 2.9702619527302913e-06, - "loss": 0.8218, - "step": 26668 - }, - { - "epoch": 0.7557312476975828, - "grad_norm": 0.0, - "learning_rate": 2.969609236166413e-06, - "loss": 0.7901, - "step": 26669 - }, - { - "epoch": 0.7557595851398453, - "grad_norm": 0.0, - "learning_rate": 2.968956578822132e-06, - "loss": 0.7478, - "step": 26670 - }, - { - "epoch": 0.7557879225821077, - "grad_norm": 0.0, - "learning_rate": 2.9683039807029358e-06, - "loss": 0.9256, - "step": 26671 - }, - { - "epoch": 0.7558162600243702, - "grad_norm": 0.0, - "learning_rate": 2.9676514418143276e-06, - "loss": 0.7889, - "step": 26672 - }, - { - "epoch": 0.7558445974666327, - "grad_norm": 0.0, - "learning_rate": 2.9669989621618023e-06, - "loss": 0.7791, - "step": 26673 - }, - { - "epoch": 0.7558729349088952, - "grad_norm": 0.0, - "learning_rate": 2.9663465417508554e-06, - "loss": 0.8541, - "step": 26674 - }, - { - "epoch": 0.7559012723511576, - "grad_norm": 0.0, - "learning_rate": 2.965694180586988e-06, - "loss": 0.8324, - "step": 26675 - }, - { - "epoch": 0.75592960979342, - "grad_norm": 0.0, - "learning_rate": 2.9650418786756863e-06, - "loss": 0.8727, - "step": 26676 - }, - { - "epoch": 0.7559579472356825, - "grad_norm": 0.0, - "learning_rate": 2.96438963602245e-06, - "loss": 0.825, - "step": 26677 - }, - { - "epoch": 0.7559862846779449, - "grad_norm": 0.0, - "learning_rate": 2.9637374526327755e-06, - "loss": 0.8373, - "step": 26678 - }, - { - "epoch": 0.7560146221202074, - "grad_norm": 0.0, - "learning_rate": 2.9630853285121506e-06, - "loss": 0.7147, - "step": 26679 - }, - { - "epoch": 0.7560429595624699, - "grad_norm": 0.0, - "learning_rate": 2.96243326366607e-06, - "loss": 0.8006, - "step": 26680 - }, - { - "epoch": 0.7560712970047323, - "grad_norm": 0.0, - "learning_rate": 2.9617812581000318e-06, - "loss": 0.7584, - "step": 26681 - }, - { - "epoch": 0.7560996344469948, - "grad_norm": 0.0, - "learning_rate": 2.9611293118195197e-06, - "loss": 0.8683, - "step": 26682 - }, - { - "epoch": 0.7561279718892573, - "grad_norm": 0.0, - "learning_rate": 2.960477424830032e-06, - "loss": 0.846, - "step": 26683 - }, - { - "epoch": 0.7561563093315198, - "grad_norm": 0.0, - "learning_rate": 2.9598255971370538e-06, - "loss": 0.7802, - "step": 26684 - }, - { - "epoch": 0.7561846467737822, - "grad_norm": 0.0, - "learning_rate": 2.95917382874608e-06, - "loss": 0.8596, - "step": 26685 - }, - { - "epoch": 0.7562129842160447, - "grad_norm": 0.0, - "learning_rate": 2.958522119662599e-06, - "loss": 0.846, - "step": 26686 - }, - { - "epoch": 0.7562413216583072, - "grad_norm": 0.0, - "learning_rate": 2.9578704698921e-06, - "loss": 0.7905, - "step": 26687 - }, - { - "epoch": 0.7562696591005695, - "grad_norm": 0.0, - "learning_rate": 2.9572188794400745e-06, - "loss": 0.8088, - "step": 26688 - }, - { - "epoch": 0.756297996542832, - "grad_norm": 0.0, - "learning_rate": 2.9565673483120126e-06, - "loss": 0.8215, - "step": 26689 - }, - { - "epoch": 0.7563263339850945, - "grad_norm": 0.0, - "learning_rate": 2.9559158765133955e-06, - "loss": 0.8863, - "step": 26690 - }, - { - "epoch": 0.756354671427357, - "grad_norm": 0.0, - "learning_rate": 2.9552644640497185e-06, - "loss": 0.7841, - "step": 26691 - }, - { - "epoch": 0.7563830088696194, - "grad_norm": 0.0, - "learning_rate": 2.954613110926462e-06, - "loss": 0.8447, - "step": 26692 - }, - { - "epoch": 0.7564113463118819, - "grad_norm": 0.0, - "learning_rate": 2.9539618171491145e-06, - "loss": 0.7648, - "step": 26693 - }, - { - "epoch": 0.7564396837541444, - "grad_norm": 0.0, - "learning_rate": 2.9533105827231677e-06, - "loss": 0.8541, - "step": 26694 - }, - { - "epoch": 0.7564680211964068, - "grad_norm": 0.0, - "learning_rate": 2.9526594076540983e-06, - "loss": 0.9091, - "step": 26695 - }, - { - "epoch": 0.7564963586386693, - "grad_norm": 0.0, - "learning_rate": 2.952008291947399e-06, - "loss": 0.7866, - "step": 26696 - }, - { - "epoch": 0.7565246960809318, - "grad_norm": 0.0, - "learning_rate": 2.9513572356085485e-06, - "loss": 0.8074, - "step": 26697 - }, - { - "epoch": 0.7565530335231943, - "grad_norm": 0.0, - "learning_rate": 2.950706238643033e-06, - "loss": 0.9, - "step": 26698 - }, - { - "epoch": 0.7565813709654566, - "grad_norm": 0.0, - "learning_rate": 2.950055301056336e-06, - "loss": 0.8724, - "step": 26699 - }, - { - "epoch": 0.7566097084077191, - "grad_norm": 0.0, - "learning_rate": 2.949404422853942e-06, - "loss": 0.9584, - "step": 26700 - }, - { - "epoch": 0.7566380458499816, - "grad_norm": 0.0, - "learning_rate": 2.948753604041332e-06, - "loss": 0.7894, - "step": 26701 - }, - { - "epoch": 0.756666383292244, - "grad_norm": 0.0, - "learning_rate": 2.948102844623992e-06, - "loss": 0.7971, - "step": 26702 - }, - { - "epoch": 0.7566947207345065, - "grad_norm": 0.0, - "learning_rate": 2.9474521446073978e-06, - "loss": 0.9042, - "step": 26703 - }, - { - "epoch": 0.756723058176769, - "grad_norm": 0.0, - "learning_rate": 2.9468015039970365e-06, - "loss": 0.8268, - "step": 26704 - }, - { - "epoch": 0.7567513956190314, - "grad_norm": 0.0, - "learning_rate": 2.946150922798382e-06, - "loss": 0.8683, - "step": 26705 - }, - { - "epoch": 0.7567797330612939, - "grad_norm": 0.0, - "learning_rate": 2.9455004010169174e-06, - "loss": 0.7638, - "step": 26706 - }, - { - "epoch": 0.7568080705035564, - "grad_norm": 0.0, - "learning_rate": 2.9448499386581254e-06, - "loss": 0.7891, - "step": 26707 - }, - { - "epoch": 0.7568364079458189, - "grad_norm": 0.0, - "learning_rate": 2.9441995357274787e-06, - "loss": 0.8, - "step": 26708 - }, - { - "epoch": 0.7568647453880812, - "grad_norm": 0.0, - "learning_rate": 2.9435491922304603e-06, - "loss": 0.809, - "step": 26709 - }, - { - "epoch": 0.7568930828303437, - "grad_norm": 0.0, - "learning_rate": 2.94289890817255e-06, - "loss": 0.7583, - "step": 26710 - }, - { - "epoch": 0.7569214202726062, - "grad_norm": 0.0, - "learning_rate": 2.94224868355922e-06, - "loss": 0.8894, - "step": 26711 - }, - { - "epoch": 0.7569497577148686, - "grad_norm": 0.0, - "learning_rate": 2.9415985183959505e-06, - "loss": 0.7924, - "step": 26712 - }, - { - "epoch": 0.7569780951571311, - "grad_norm": 0.0, - "learning_rate": 2.940948412688217e-06, - "loss": 0.7608, - "step": 26713 - }, - { - "epoch": 0.7570064325993936, - "grad_norm": 0.0, - "learning_rate": 2.9402983664414963e-06, - "loss": 0.8679, - "step": 26714 - }, - { - "epoch": 0.7570347700416561, - "grad_norm": 0.0, - "learning_rate": 2.9396483796612685e-06, - "loss": 0.7461, - "step": 26715 - }, - { - "epoch": 0.7570631074839185, - "grad_norm": 0.0, - "learning_rate": 2.938998452353e-06, - "loss": 0.8711, - "step": 26716 - }, - { - "epoch": 0.757091444926181, - "grad_norm": 0.0, - "learning_rate": 2.9383485845221695e-06, - "loss": 0.8892, - "step": 26717 - }, - { - "epoch": 0.7571197823684435, - "grad_norm": 0.0, - "learning_rate": 2.9376987761742546e-06, - "loss": 0.8064, - "step": 26718 - }, - { - "epoch": 0.7571481198107058, - "grad_norm": 0.0, - "learning_rate": 2.9370490273147224e-06, - "loss": 0.8595, - "step": 26719 - }, - { - "epoch": 0.7571764572529683, - "grad_norm": 0.0, - "learning_rate": 2.9363993379490517e-06, - "loss": 0.8365, - "step": 26720 - }, - { - "epoch": 0.7572047946952308, - "grad_norm": 0.0, - "learning_rate": 2.935749708082709e-06, - "loss": 0.796, - "step": 26721 - }, - { - "epoch": 0.7572331321374932, - "grad_norm": 0.0, - "learning_rate": 2.9351001377211707e-06, - "loss": 0.8046, - "step": 26722 - }, - { - "epoch": 0.7572614695797557, - "grad_norm": 0.0, - "learning_rate": 2.934450626869909e-06, - "loss": 0.8272, - "step": 26723 - }, - { - "epoch": 0.7572898070220182, - "grad_norm": 0.0, - "learning_rate": 2.933801175534392e-06, - "loss": 0.8644, - "step": 26724 - }, - { - "epoch": 0.7573181444642807, - "grad_norm": 0.0, - "learning_rate": 2.9331517837200905e-06, - "loss": 0.926, - "step": 26725 - }, - { - "epoch": 0.7573464819065431, - "grad_norm": 0.0, - "learning_rate": 2.9325024514324764e-06, - "loss": 0.8326, - "step": 26726 - }, - { - "epoch": 0.7573748193488056, - "grad_norm": 0.0, - "learning_rate": 2.9318531786770186e-06, - "loss": 0.7754, - "step": 26727 - }, - { - "epoch": 0.7574031567910681, - "grad_norm": 0.0, - "learning_rate": 2.9312039654591896e-06, - "loss": 0.7662, - "step": 26728 - }, - { - "epoch": 0.7574314942333304, - "grad_norm": 0.0, - "learning_rate": 2.930554811784451e-06, - "loss": 0.8111, - "step": 26729 - }, - { - "epoch": 0.7574598316755929, - "grad_norm": 0.0, - "learning_rate": 2.9299057176582733e-06, - "loss": 0.8507, - "step": 26730 - }, - { - "epoch": 0.7574881691178554, - "grad_norm": 0.0, - "learning_rate": 2.9292566830861303e-06, - "loss": 0.868, - "step": 26731 - }, - { - "epoch": 0.7575165065601179, - "grad_norm": 0.0, - "learning_rate": 2.9286077080734786e-06, - "loss": 0.7108, - "step": 26732 - }, - { - "epoch": 0.7575448440023803, - "grad_norm": 0.0, - "learning_rate": 2.9279587926257945e-06, - "loss": 0.8077, - "step": 26733 - }, - { - "epoch": 0.7575731814446428, - "grad_norm": 0.0, - "learning_rate": 2.927309936748537e-06, - "loss": 0.9798, - "step": 26734 - }, - { - "epoch": 0.7576015188869053, - "grad_norm": 0.0, - "learning_rate": 2.9266611404471736e-06, - "loss": 0.7833, - "step": 26735 - }, - { - "epoch": 0.7576298563291677, - "grad_norm": 0.0, - "learning_rate": 2.926012403727173e-06, - "loss": 0.7053, - "step": 26736 - }, - { - "epoch": 0.7576581937714302, - "grad_norm": 0.0, - "learning_rate": 2.9253637265939936e-06, - "loss": 0.8428, - "step": 26737 - }, - { - "epoch": 0.7576865312136927, - "grad_norm": 0.0, - "learning_rate": 2.9247151090531032e-06, - "loss": 0.7755, - "step": 26738 - }, - { - "epoch": 0.7577148686559552, - "grad_norm": 0.0, - "learning_rate": 2.9240665511099643e-06, - "loss": 0.8152, - "step": 26739 - }, - { - "epoch": 0.7577432060982175, - "grad_norm": 0.0, - "learning_rate": 2.9234180527700407e-06, - "loss": 0.879, - "step": 26740 - }, - { - "epoch": 0.75777154354048, - "grad_norm": 0.0, - "learning_rate": 2.9227696140387974e-06, - "loss": 0.8525, - "step": 26741 - }, - { - "epoch": 0.7577998809827425, - "grad_norm": 0.0, - "learning_rate": 2.922121234921691e-06, - "loss": 0.7903, - "step": 26742 - }, - { - "epoch": 0.7578282184250049, - "grad_norm": 0.0, - "learning_rate": 2.921472915424186e-06, - "loss": 0.7416, - "step": 26743 - }, - { - "epoch": 0.7578565558672674, - "grad_norm": 0.0, - "learning_rate": 2.920824655551746e-06, - "loss": 0.7641, - "step": 26744 - }, - { - "epoch": 0.7578848933095299, - "grad_norm": 0.0, - "learning_rate": 2.9201764553098254e-06, - "loss": 0.9058, - "step": 26745 - }, - { - "epoch": 0.7579132307517923, - "grad_norm": 0.0, - "learning_rate": 2.919528314703891e-06, - "loss": 0.8375, - "step": 26746 - }, - { - "epoch": 0.7579415681940548, - "grad_norm": 0.0, - "learning_rate": 2.9188802337393953e-06, - "loss": 0.7196, - "step": 26747 - }, - { - "epoch": 0.7579699056363173, - "grad_norm": 0.0, - "learning_rate": 2.918232212421801e-06, - "loss": 0.907, - "step": 26748 - }, - { - "epoch": 0.7579982430785798, - "grad_norm": 0.0, - "learning_rate": 2.91758425075657e-06, - "loss": 0.7833, - "step": 26749 - }, - { - "epoch": 0.7580265805208422, - "grad_norm": 0.0, - "learning_rate": 2.916936348749153e-06, - "loss": 0.817, - "step": 26750 - }, - { - "epoch": 0.7580549179631046, - "grad_norm": 0.0, - "learning_rate": 2.9162885064050117e-06, - "loss": 0.8343, - "step": 26751 - }, - { - "epoch": 0.7580832554053671, - "grad_norm": 0.0, - "learning_rate": 2.915640723729604e-06, - "loss": 0.9245, - "step": 26752 - }, - { - "epoch": 0.7581115928476295, - "grad_norm": 0.0, - "learning_rate": 2.914993000728383e-06, - "loss": 0.8463, - "step": 26753 - }, - { - "epoch": 0.758139930289892, - "grad_norm": 0.0, - "learning_rate": 2.914345337406812e-06, - "loss": 0.8209, - "step": 26754 - }, - { - "epoch": 0.7581682677321545, - "grad_norm": 0.0, - "learning_rate": 2.913697733770338e-06, - "loss": 0.7537, - "step": 26755 - }, - { - "epoch": 0.758196605174417, - "grad_norm": 0.0, - "learning_rate": 2.9130501898244177e-06, - "loss": 0.7744, - "step": 26756 - }, - { - "epoch": 0.7582249426166794, - "grad_norm": 0.0, - "learning_rate": 2.9124027055745118e-06, - "loss": 0.7706, - "step": 26757 - }, - { - "epoch": 0.7582532800589419, - "grad_norm": 0.0, - "learning_rate": 2.9117552810260675e-06, - "loss": 0.6784, - "step": 26758 - }, - { - "epoch": 0.7582816175012044, - "grad_norm": 0.0, - "learning_rate": 2.911107916184539e-06, - "loss": 0.8004, - "step": 26759 - }, - { - "epoch": 0.7583099549434668, - "grad_norm": 0.0, - "learning_rate": 2.9104606110553844e-06, - "loss": 0.8488, - "step": 26760 - }, - { - "epoch": 0.7583382923857293, - "grad_norm": 0.0, - "learning_rate": 2.909813365644051e-06, - "loss": 0.7761, - "step": 26761 - }, - { - "epoch": 0.7583666298279917, - "grad_norm": 0.0, - "learning_rate": 2.9091661799559934e-06, - "loss": 0.9879, - "step": 26762 - }, - { - "epoch": 0.7583949672702542, - "grad_norm": 0.0, - "learning_rate": 2.9085190539966602e-06, - "loss": 0.9129, - "step": 26763 - }, - { - "epoch": 0.7584233047125166, - "grad_norm": 0.0, - "learning_rate": 2.9078719877715046e-06, - "loss": 0.8429, - "step": 26764 - }, - { - "epoch": 0.7584516421547791, - "grad_norm": 0.0, - "learning_rate": 2.9072249812859767e-06, - "loss": 0.7552, - "step": 26765 - }, - { - "epoch": 0.7584799795970416, - "grad_norm": 0.0, - "learning_rate": 2.9065780345455265e-06, - "loss": 0.9059, - "step": 26766 - }, - { - "epoch": 0.758508317039304, - "grad_norm": 0.0, - "learning_rate": 2.905931147555604e-06, - "loss": 0.9203, - "step": 26767 - }, - { - "epoch": 0.7585366544815665, - "grad_norm": 0.0, - "learning_rate": 2.9052843203216597e-06, - "loss": 0.7653, - "step": 26768 - }, - { - "epoch": 0.758564991923829, - "grad_norm": 0.0, - "learning_rate": 2.9046375528491378e-06, - "loss": 0.8005, - "step": 26769 - }, - { - "epoch": 0.7585933293660914, - "grad_norm": 0.0, - "learning_rate": 2.903990845143492e-06, - "loss": 0.8997, - "step": 26770 - }, - { - "epoch": 0.7586216668083539, - "grad_norm": 0.0, - "learning_rate": 2.9033441972101628e-06, - "loss": 0.832, - "step": 26771 - }, - { - "epoch": 0.7586500042506163, - "grad_norm": 0.0, - "learning_rate": 2.902697609054601e-06, - "loss": 0.8101, - "step": 26772 - }, - { - "epoch": 0.7586783416928788, - "grad_norm": 0.0, - "learning_rate": 2.9020510806822555e-06, - "loss": 0.8817, - "step": 26773 - }, - { - "epoch": 0.7587066791351412, - "grad_norm": 0.0, - "learning_rate": 2.901404612098567e-06, - "loss": 0.8452, - "step": 26774 - }, - { - "epoch": 0.7587350165774037, - "grad_norm": 0.0, - "learning_rate": 2.9007582033089865e-06, - "loss": 0.7219, - "step": 26775 - }, - { - "epoch": 0.7587633540196662, - "grad_norm": 0.0, - "learning_rate": 2.900111854318952e-06, - "loss": 0.7423, - "step": 26776 - }, - { - "epoch": 0.7587916914619286, - "grad_norm": 0.0, - "learning_rate": 2.899465565133912e-06, - "loss": 0.8068, - "step": 26777 - }, - { - "epoch": 0.7588200289041911, - "grad_norm": 0.0, - "learning_rate": 2.898819335759311e-06, - "loss": 0.8183, - "step": 26778 - }, - { - "epoch": 0.7588483663464536, - "grad_norm": 0.0, - "learning_rate": 2.898173166200591e-06, - "loss": 0.8089, - "step": 26779 - }, - { - "epoch": 0.7588767037887161, - "grad_norm": 0.0, - "learning_rate": 2.8975270564631963e-06, - "loss": 0.7563, - "step": 26780 - }, - { - "epoch": 0.7589050412309785, - "grad_norm": 0.0, - "learning_rate": 2.896881006552571e-06, - "loss": 0.808, - "step": 26781 - }, - { - "epoch": 0.758933378673241, - "grad_norm": 0.0, - "learning_rate": 2.8962350164741515e-06, - "loss": 0.8265, - "step": 26782 - }, - { - "epoch": 0.7589617161155034, - "grad_norm": 0.0, - "learning_rate": 2.895589086233386e-06, - "loss": 0.7182, - "step": 26783 - }, - { - "epoch": 0.7589900535577658, - "grad_norm": 0.0, - "learning_rate": 2.8949432158357083e-06, - "loss": 0.7499, - "step": 26784 - }, - { - "epoch": 0.7590183910000283, - "grad_norm": 0.0, - "learning_rate": 2.8942974052865624e-06, - "loss": 0.826, - "step": 26785 - }, - { - "epoch": 0.7590467284422908, - "grad_norm": 0.0, - "learning_rate": 2.8936516545913917e-06, - "loss": 0.8432, - "step": 26786 - }, - { - "epoch": 0.7590750658845533, - "grad_norm": 0.0, - "learning_rate": 2.8930059637556286e-06, - "loss": 0.8381, - "step": 26787 - }, - { - "epoch": 0.7591034033268157, - "grad_norm": 0.0, - "learning_rate": 2.8923603327847148e-06, - "loss": 0.8524, - "step": 26788 - }, - { - "epoch": 0.7591317407690782, - "grad_norm": 0.0, - "learning_rate": 2.8917147616840933e-06, - "loss": 0.8191, - "step": 26789 - }, - { - "epoch": 0.7591600782113407, - "grad_norm": 0.0, - "learning_rate": 2.891069250459194e-06, - "loss": 0.8036, - "step": 26790 - }, - { - "epoch": 0.7591884156536031, - "grad_norm": 0.0, - "learning_rate": 2.8904237991154594e-06, - "loss": 0.8525, - "step": 26791 - }, - { - "epoch": 0.7592167530958656, - "grad_norm": 0.0, - "learning_rate": 2.8897784076583237e-06, - "loss": 0.818, - "step": 26792 - }, - { - "epoch": 0.759245090538128, - "grad_norm": 0.0, - "learning_rate": 2.889133076093226e-06, - "loss": 0.7514, - "step": 26793 - }, - { - "epoch": 0.7592734279803904, - "grad_norm": 0.0, - "learning_rate": 2.8884878044256037e-06, - "loss": 0.7791, - "step": 26794 - }, - { - "epoch": 0.7593017654226529, - "grad_norm": 0.0, - "learning_rate": 2.8878425926608856e-06, - "loss": 0.6821, - "step": 26795 - }, - { - "epoch": 0.7593301028649154, - "grad_norm": 0.0, - "learning_rate": 2.887197440804511e-06, - "loss": 0.7885, - "step": 26796 - }, - { - "epoch": 0.7593584403071779, - "grad_norm": 0.0, - "learning_rate": 2.8865523488619174e-06, - "loss": 0.799, - "step": 26797 - }, - { - "epoch": 0.7593867777494403, - "grad_norm": 0.0, - "learning_rate": 2.885907316838531e-06, - "loss": 0.7574, - "step": 26798 - }, - { - "epoch": 0.7594151151917028, - "grad_norm": 0.0, - "learning_rate": 2.885262344739792e-06, - "loss": 0.7204, - "step": 26799 - }, - { - "epoch": 0.7594434526339653, - "grad_norm": 0.0, - "learning_rate": 2.8846174325711272e-06, - "loss": 0.854, - "step": 26800 - }, - { - "epoch": 0.7594717900762277, - "grad_norm": 0.0, - "learning_rate": 2.8839725803379724e-06, - "loss": 0.7798, - "step": 26801 - }, - { - "epoch": 0.7595001275184902, - "grad_norm": 0.0, - "learning_rate": 2.8833277880457622e-06, - "loss": 0.7363, - "step": 26802 - }, - { - "epoch": 0.7595284649607527, - "grad_norm": 0.0, - "learning_rate": 2.8826830556999207e-06, - "loss": 0.878, - "step": 26803 - }, - { - "epoch": 0.7595568024030152, - "grad_norm": 0.0, - "learning_rate": 2.882038383305884e-06, - "loss": 0.8765, - "step": 26804 - }, - { - "epoch": 0.7595851398452775, - "grad_norm": 0.0, - "learning_rate": 2.881393770869081e-06, - "loss": 0.8282, - "step": 26805 - }, - { - "epoch": 0.75961347728754, - "grad_norm": 0.0, - "learning_rate": 2.8807492183949404e-06, - "loss": 0.6954, - "step": 26806 - }, - { - "epoch": 0.7596418147298025, - "grad_norm": 0.0, - "learning_rate": 2.880104725888897e-06, - "loss": 0.8761, - "step": 26807 - }, - { - "epoch": 0.7596701521720649, - "grad_norm": 0.0, - "learning_rate": 2.879460293356372e-06, - "loss": 0.8906, - "step": 26808 - }, - { - "epoch": 0.7596984896143274, - "grad_norm": 0.0, - "learning_rate": 2.8788159208027975e-06, - "loss": 0.8934, - "step": 26809 - }, - { - "epoch": 0.7597268270565899, - "grad_norm": 0.0, - "learning_rate": 2.8781716082336042e-06, - "loss": 0.8245, - "step": 26810 - }, - { - "epoch": 0.7597551644988524, - "grad_norm": 0.0, - "learning_rate": 2.8775273556542116e-06, - "loss": 0.7406, - "step": 26811 - }, - { - "epoch": 0.7597835019411148, - "grad_norm": 0.0, - "learning_rate": 2.8768831630700555e-06, - "loss": 0.7799, - "step": 26812 - }, - { - "epoch": 0.7598118393833773, - "grad_norm": 0.0, - "learning_rate": 2.876239030486554e-06, - "loss": 0.789, - "step": 26813 - }, - { - "epoch": 0.7598401768256398, - "grad_norm": 0.0, - "learning_rate": 2.875594957909136e-06, - "loss": 0.7605, - "step": 26814 - }, - { - "epoch": 0.7598685142679021, - "grad_norm": 0.0, - "learning_rate": 2.874950945343231e-06, - "loss": 0.8615, - "step": 26815 - }, - { - "epoch": 0.7598968517101646, - "grad_norm": 0.0, - "learning_rate": 2.874306992794257e-06, - "loss": 0.741, - "step": 26816 - }, - { - "epoch": 0.7599251891524271, - "grad_norm": 0.0, - "learning_rate": 2.873663100267641e-06, - "loss": 0.8712, - "step": 26817 - }, - { - "epoch": 0.7599535265946895, - "grad_norm": 0.0, - "learning_rate": 2.873019267768806e-06, - "loss": 0.8089, - "step": 26818 - }, - { - "epoch": 0.759981864036952, - "grad_norm": 0.0, - "learning_rate": 2.872375495303178e-06, - "loss": 0.7132, - "step": 26819 - }, - { - "epoch": 0.7600102014792145, - "grad_norm": 0.0, - "learning_rate": 2.8717317828761805e-06, - "loss": 0.7898, - "step": 26820 - }, - { - "epoch": 0.760038538921477, - "grad_norm": 0.0, - "learning_rate": 2.8710881304932293e-06, - "loss": 0.852, - "step": 26821 - }, - { - "epoch": 0.7600668763637394, - "grad_norm": 0.0, - "learning_rate": 2.8704445381597513e-06, - "loss": 0.9038, - "step": 26822 - }, - { - "epoch": 0.7600952138060019, - "grad_norm": 0.0, - "learning_rate": 2.8698010058811686e-06, - "loss": 0.8311, - "step": 26823 - }, - { - "epoch": 0.7601235512482644, - "grad_norm": 0.0, - "learning_rate": 2.8691575336628973e-06, - "loss": 0.8277, - "step": 26824 - }, - { - "epoch": 0.7601518886905267, - "grad_norm": 0.0, - "learning_rate": 2.8685141215103594e-06, - "loss": 0.8202, - "step": 26825 - }, - { - "epoch": 0.7601802261327892, - "grad_norm": 0.0, - "learning_rate": 2.867870769428979e-06, - "loss": 0.8495, - "step": 26826 - }, - { - "epoch": 0.7602085635750517, - "grad_norm": 0.0, - "learning_rate": 2.867227477424168e-06, - "loss": 0.8295, - "step": 26827 - }, - { - "epoch": 0.7602369010173142, - "grad_norm": 0.0, - "learning_rate": 2.8665842455013513e-06, - "loss": 0.7648, - "step": 26828 - }, - { - "epoch": 0.7602652384595766, - "grad_norm": 0.0, - "learning_rate": 2.865941073665942e-06, - "loss": 0.757, - "step": 26829 - }, - { - "epoch": 0.7602935759018391, - "grad_norm": 0.0, - "learning_rate": 2.86529796192336e-06, - "loss": 0.8091, - "step": 26830 - }, - { - "epoch": 0.7603219133441016, - "grad_norm": 0.0, - "learning_rate": 2.8646549102790232e-06, - "loss": 0.7972, - "step": 26831 - }, - { - "epoch": 0.760350250786364, - "grad_norm": 0.0, - "learning_rate": 2.8640119187383475e-06, - "loss": 0.8632, - "step": 26832 - }, - { - "epoch": 0.7603785882286265, - "grad_norm": 0.0, - "learning_rate": 2.863368987306753e-06, - "loss": 0.904, - "step": 26833 - }, - { - "epoch": 0.760406925670889, - "grad_norm": 0.0, - "learning_rate": 2.8627261159896467e-06, - "loss": 0.8398, - "step": 26834 - }, - { - "epoch": 0.7604352631131515, - "grad_norm": 0.0, - "learning_rate": 2.8620833047924502e-06, - "loss": 0.8087, - "step": 26835 - }, - { - "epoch": 0.7604636005554138, - "grad_norm": 0.0, - "learning_rate": 2.8614405537205793e-06, - "loss": 0.8675, - "step": 26836 - }, - { - "epoch": 0.7604919379976763, - "grad_norm": 0.0, - "learning_rate": 2.860797862779442e-06, - "loss": 0.7908, - "step": 26837 - }, - { - "epoch": 0.7605202754399388, - "grad_norm": 0.0, - "learning_rate": 2.8601552319744564e-06, - "loss": 0.7421, - "step": 26838 - }, - { - "epoch": 0.7605486128822012, - "grad_norm": 0.0, - "learning_rate": 2.859512661311037e-06, - "loss": 0.9404, - "step": 26839 - }, - { - "epoch": 0.7605769503244637, - "grad_norm": 0.0, - "learning_rate": 2.8588701507945904e-06, - "loss": 0.8952, - "step": 26840 - }, - { - "epoch": 0.7606052877667262, - "grad_norm": 0.0, - "learning_rate": 2.8582277004305326e-06, - "loss": 0.7658, - "step": 26841 - }, - { - "epoch": 0.7606336252089886, - "grad_norm": 0.0, - "learning_rate": 2.857585310224279e-06, - "loss": 0.9089, - "step": 26842 - }, - { - "epoch": 0.7606619626512511, - "grad_norm": 0.0, - "learning_rate": 2.856942980181232e-06, - "loss": 0.7845, - "step": 26843 - }, - { - "epoch": 0.7606903000935136, - "grad_norm": 0.0, - "learning_rate": 2.8563007103068075e-06, - "loss": 0.8256, - "step": 26844 - }, - { - "epoch": 0.7607186375357761, - "grad_norm": 0.0, - "learning_rate": 2.8556585006064153e-06, - "loss": 0.8076, - "step": 26845 - }, - { - "epoch": 0.7607469749780384, - "grad_norm": 0.0, - "learning_rate": 2.8550163510854647e-06, - "loss": 0.787, - "step": 26846 - }, - { - "epoch": 0.7607753124203009, - "grad_norm": 0.0, - "learning_rate": 2.8543742617493665e-06, - "loss": 0.7976, - "step": 26847 - }, - { - "epoch": 0.7608036498625634, - "grad_norm": 0.0, - "learning_rate": 2.8537322326035253e-06, - "loss": 0.8303, - "step": 26848 - }, - { - "epoch": 0.7608319873048258, - "grad_norm": 0.0, - "learning_rate": 2.853090263653354e-06, - "loss": 0.8739, - "step": 26849 - }, - { - "epoch": 0.7608603247470883, - "grad_norm": 0.0, - "learning_rate": 2.8524483549042537e-06, - "loss": 0.8042, - "step": 26850 - }, - { - "epoch": 0.7608886621893508, - "grad_norm": 0.0, - "learning_rate": 2.8518065063616353e-06, - "loss": 0.7961, - "step": 26851 - }, - { - "epoch": 0.7609169996316133, - "grad_norm": 0.0, - "learning_rate": 2.8511647180309087e-06, - "loss": 0.8029, - "step": 26852 - }, - { - "epoch": 0.7609453370738757, - "grad_norm": 0.0, - "learning_rate": 2.8505229899174734e-06, - "loss": 0.8207, - "step": 26853 - }, - { - "epoch": 0.7609736745161382, - "grad_norm": 0.0, - "learning_rate": 2.8498813220267373e-06, - "loss": 0.8276, - "step": 26854 - }, - { - "epoch": 0.7610020119584007, - "grad_norm": 0.0, - "learning_rate": 2.849239714364106e-06, - "loss": 0.852, - "step": 26855 - }, - { - "epoch": 0.761030349400663, - "grad_norm": 0.0, - "learning_rate": 2.848598166934984e-06, - "loss": 0.8941, - "step": 26856 - }, - { - "epoch": 0.7610586868429255, - "grad_norm": 0.0, - "learning_rate": 2.847956679744779e-06, - "loss": 0.7535, - "step": 26857 - }, - { - "epoch": 0.761087024285188, - "grad_norm": 0.0, - "learning_rate": 2.847315252798887e-06, - "loss": 0.8969, - "step": 26858 - }, - { - "epoch": 0.7611153617274505, - "grad_norm": 0.0, - "learning_rate": 2.8466738861027143e-06, - "loss": 0.8151, - "step": 26859 - }, - { - "epoch": 0.7611436991697129, - "grad_norm": 0.0, - "learning_rate": 2.846032579661667e-06, - "loss": 0.7988, - "step": 26860 - }, - { - "epoch": 0.7611720366119754, - "grad_norm": 0.0, - "learning_rate": 2.845391333481141e-06, - "loss": 0.9037, - "step": 26861 - }, - { - "epoch": 0.7612003740542379, - "grad_norm": 0.0, - "learning_rate": 2.844750147566544e-06, - "loss": 0.7809, - "step": 26862 - }, - { - "epoch": 0.7612287114965003, - "grad_norm": 0.0, - "learning_rate": 2.84410902192327e-06, - "loss": 0.8574, - "step": 26863 - }, - { - "epoch": 0.7612570489387628, - "grad_norm": 0.0, - "learning_rate": 2.8434679565567236e-06, - "loss": 0.8169, - "step": 26864 - }, - { - "epoch": 0.7612853863810253, - "grad_norm": 0.0, - "learning_rate": 2.842826951472306e-06, - "loss": 0.8764, - "step": 26865 - }, - { - "epoch": 0.7613137238232877, - "grad_norm": 0.0, - "learning_rate": 2.8421860066754126e-06, - "loss": 0.8084, - "step": 26866 - }, - { - "epoch": 0.7613420612655502, - "grad_norm": 0.0, - "learning_rate": 2.841545122171445e-06, - "loss": 0.7886, - "step": 26867 - }, - { - "epoch": 0.7613703987078126, - "grad_norm": 0.0, - "learning_rate": 2.8409042979657997e-06, - "loss": 0.7565, - "step": 26868 - }, - { - "epoch": 0.7613987361500751, - "grad_norm": 0.0, - "learning_rate": 2.8402635340638775e-06, - "loss": 0.8076, - "step": 26869 - }, - { - "epoch": 0.7614270735923375, - "grad_norm": 0.0, - "learning_rate": 2.839622830471076e-06, - "loss": 0.8498, - "step": 26870 - }, - { - "epoch": 0.7614554110346, - "grad_norm": 0.0, - "learning_rate": 2.8389821871927882e-06, - "loss": 0.7803, - "step": 26871 - }, - { - "epoch": 0.7614837484768625, - "grad_norm": 0.0, - "learning_rate": 2.8383416042344114e-06, - "loss": 0.7967, - "step": 26872 - }, - { - "epoch": 0.7615120859191249, - "grad_norm": 0.0, - "learning_rate": 2.8377010816013463e-06, - "loss": 0.8056, - "step": 26873 - }, - { - "epoch": 0.7615404233613874, - "grad_norm": 0.0, - "learning_rate": 2.8370606192989826e-06, - "loss": 0.8004, - "step": 26874 - }, - { - "epoch": 0.7615687608036499, - "grad_norm": 0.0, - "learning_rate": 2.836420217332716e-06, - "loss": 0.8096, - "step": 26875 - }, - { - "epoch": 0.7615970982459124, - "grad_norm": 0.0, - "learning_rate": 2.835779875707946e-06, - "loss": 0.8175, - "step": 26876 - }, - { - "epoch": 0.7616254356881748, - "grad_norm": 0.0, - "learning_rate": 2.8351395944300586e-06, - "loss": 0.8075, - "step": 26877 - }, - { - "epoch": 0.7616537731304373, - "grad_norm": 0.0, - "learning_rate": 2.8344993735044546e-06, - "loss": 0.8321, - "step": 26878 - }, - { - "epoch": 0.7616821105726997, - "grad_norm": 0.0, - "learning_rate": 2.8338592129365194e-06, - "loss": 0.824, - "step": 26879 - }, - { - "epoch": 0.7617104480149621, - "grad_norm": 0.0, - "learning_rate": 2.83321911273165e-06, - "loss": 0.8326, - "step": 26880 - }, - { - "epoch": 0.7617387854572246, - "grad_norm": 0.0, - "learning_rate": 2.8325790728952364e-06, - "loss": 0.8308, - "step": 26881 - }, - { - "epoch": 0.7617671228994871, - "grad_norm": 0.0, - "learning_rate": 2.831939093432672e-06, - "loss": 0.8247, - "step": 26882 - }, - { - "epoch": 0.7617954603417496, - "grad_norm": 0.0, - "learning_rate": 2.8312991743493457e-06, - "loss": 0.826, - "step": 26883 - }, - { - "epoch": 0.761823797784012, - "grad_norm": 0.0, - "learning_rate": 2.830659315650651e-06, - "loss": 0.7347, - "step": 26884 - }, - { - "epoch": 0.7618521352262745, - "grad_norm": 0.0, - "learning_rate": 2.830019517341973e-06, - "loss": 0.7853, - "step": 26885 - }, - { - "epoch": 0.761880472668537, - "grad_norm": 0.0, - "learning_rate": 2.829379779428706e-06, - "loss": 0.8872, - "step": 26886 - }, - { - "epoch": 0.7619088101107994, - "grad_norm": 0.0, - "learning_rate": 2.8287401019162332e-06, - "loss": 0.8272, - "step": 26887 - }, - { - "epoch": 0.7619371475530619, - "grad_norm": 0.0, - "learning_rate": 2.828100484809945e-06, - "loss": 0.8768, - "step": 26888 - }, - { - "epoch": 0.7619654849953243, - "grad_norm": 0.0, - "learning_rate": 2.8274609281152322e-06, - "loss": 0.7922, - "step": 26889 - }, - { - "epoch": 0.7619938224375867, - "grad_norm": 0.0, - "learning_rate": 2.8268214318374764e-06, - "loss": 0.7268, - "step": 26890 - }, - { - "epoch": 0.7620221598798492, - "grad_norm": 0.0, - "learning_rate": 2.8261819959820713e-06, - "loss": 0.9297, - "step": 26891 - }, - { - "epoch": 0.7620504973221117, - "grad_norm": 0.0, - "learning_rate": 2.8255426205543957e-06, - "loss": 0.8552, - "step": 26892 - }, - { - "epoch": 0.7620788347643742, - "grad_norm": 0.0, - "learning_rate": 2.8249033055598387e-06, - "loss": 0.8178, - "step": 26893 - }, - { - "epoch": 0.7621071722066366, - "grad_norm": 0.0, - "learning_rate": 2.8242640510037853e-06, - "loss": 0.7923, - "step": 26894 - }, - { - "epoch": 0.7621355096488991, - "grad_norm": 0.0, - "learning_rate": 2.8236248568916215e-06, - "loss": 1.0781, - "step": 26895 - }, - { - "epoch": 0.7621638470911616, - "grad_norm": 0.0, - "learning_rate": 2.8229857232287293e-06, - "loss": 0.832, - "step": 26896 - }, - { - "epoch": 0.762192184533424, - "grad_norm": 0.0, - "learning_rate": 2.822346650020498e-06, - "loss": 0.8611, - "step": 26897 - }, - { - "epoch": 0.7622205219756865, - "grad_norm": 0.0, - "learning_rate": 2.8217076372723017e-06, - "loss": 0.7281, - "step": 26898 - }, - { - "epoch": 0.762248859417949, - "grad_norm": 0.0, - "learning_rate": 2.821068684989531e-06, - "loss": 0.7729, - "step": 26899 - }, - { - "epoch": 0.7622771968602114, - "grad_norm": 0.0, - "learning_rate": 2.8204297931775615e-06, - "loss": 0.7008, - "step": 26900 - }, - { - "epoch": 0.7623055343024738, - "grad_norm": 0.0, - "learning_rate": 2.8197909618417786e-06, - "loss": 0.9041, - "step": 26901 - }, - { - "epoch": 0.7623338717447363, - "grad_norm": 0.0, - "learning_rate": 2.819152190987565e-06, - "loss": 0.8251, - "step": 26902 - }, - { - "epoch": 0.7623622091869988, - "grad_norm": 0.0, - "learning_rate": 2.818513480620296e-06, - "loss": 0.7669, - "step": 26903 - }, - { - "epoch": 0.7623905466292612, - "grad_norm": 0.0, - "learning_rate": 2.8178748307453552e-06, - "loss": 0.7689, - "step": 26904 - }, - { - "epoch": 0.7624188840715237, - "grad_norm": 0.0, - "learning_rate": 2.8172362413681243e-06, - "loss": 0.731, - "step": 26905 - }, - { - "epoch": 0.7624472215137862, - "grad_norm": 0.0, - "learning_rate": 2.816597712493977e-06, - "loss": 0.8451, - "step": 26906 - }, - { - "epoch": 0.7624755589560487, - "grad_norm": 0.0, - "learning_rate": 2.8159592441282948e-06, - "loss": 0.876, - "step": 26907 - }, - { - "epoch": 0.7625038963983111, - "grad_norm": 0.0, - "learning_rate": 2.815320836276455e-06, - "loss": 0.8327, - "step": 26908 - }, - { - "epoch": 0.7625322338405736, - "grad_norm": 0.0, - "learning_rate": 2.814682488943836e-06, - "loss": 0.7852, - "step": 26909 - }, - { - "epoch": 0.762560571282836, - "grad_norm": 0.0, - "learning_rate": 2.8140442021358185e-06, - "loss": 0.8958, - "step": 26910 - }, - { - "epoch": 0.7625889087250984, - "grad_norm": 0.0, - "learning_rate": 2.8134059758577714e-06, - "loss": 0.8584, - "step": 26911 - }, - { - "epoch": 0.7626172461673609, - "grad_norm": 0.0, - "learning_rate": 2.8127678101150744e-06, - "loss": 0.8098, - "step": 26912 - }, - { - "epoch": 0.7626455836096234, - "grad_norm": 0.0, - "learning_rate": 2.8121297049131057e-06, - "loss": 0.7666, - "step": 26913 - }, - { - "epoch": 0.7626739210518858, - "grad_norm": 0.0, - "learning_rate": 2.811491660257235e-06, - "loss": 0.7876, - "step": 26914 - }, - { - "epoch": 0.7627022584941483, - "grad_norm": 0.0, - "learning_rate": 2.8108536761528426e-06, - "loss": 0.7601, - "step": 26915 - }, - { - "epoch": 0.7627305959364108, - "grad_norm": 0.0, - "learning_rate": 2.8102157526052963e-06, - "loss": 0.8619, - "step": 26916 - }, - { - "epoch": 0.7627589333786733, - "grad_norm": 0.0, - "learning_rate": 2.809577889619972e-06, - "loss": 0.7665, - "step": 26917 - }, - { - "epoch": 0.7627872708209357, - "grad_norm": 0.0, - "learning_rate": 2.8089400872022475e-06, - "loss": 0.8813, - "step": 26918 - }, - { - "epoch": 0.7628156082631982, - "grad_norm": 0.0, - "learning_rate": 2.8083023453574867e-06, - "loss": 0.8743, - "step": 26919 - }, - { - "epoch": 0.7628439457054607, - "grad_norm": 0.0, - "learning_rate": 2.8076646640910666e-06, - "loss": 0.8001, - "step": 26920 - }, - { - "epoch": 0.762872283147723, - "grad_norm": 0.0, - "learning_rate": 2.807027043408358e-06, - "loss": 0.8083, - "step": 26921 - }, - { - "epoch": 0.7629006205899855, - "grad_norm": 0.0, - "learning_rate": 2.8063894833147308e-06, - "loss": 0.8499, - "step": 26922 - }, - { - "epoch": 0.762928958032248, - "grad_norm": 0.0, - "learning_rate": 2.80575198381556e-06, - "loss": 0.7451, - "step": 26923 - }, - { - "epoch": 0.7629572954745105, - "grad_norm": 0.0, - "learning_rate": 2.8051145449162075e-06, - "loss": 0.7696, - "step": 26924 - }, - { - "epoch": 0.7629856329167729, - "grad_norm": 0.0, - "learning_rate": 2.8044771666220483e-06, - "loss": 0.7453, - "step": 26925 - }, - { - "epoch": 0.7630139703590354, - "grad_norm": 0.0, - "learning_rate": 2.8038398489384522e-06, - "loss": 0.7651, - "step": 26926 - }, - { - "epoch": 0.7630423078012979, - "grad_norm": 0.0, - "learning_rate": 2.8032025918707828e-06, - "loss": 0.7592, - "step": 26927 - }, - { - "epoch": 0.7630706452435603, - "grad_norm": 0.0, - "learning_rate": 2.8025653954244135e-06, - "loss": 0.8349, - "step": 26928 - }, - { - "epoch": 0.7630989826858228, - "grad_norm": 0.0, - "learning_rate": 2.801928259604705e-06, - "loss": 0.7732, - "step": 26929 - }, - { - "epoch": 0.7631273201280853, - "grad_norm": 0.0, - "learning_rate": 2.8012911844170277e-06, - "loss": 0.8064, - "step": 26930 - }, - { - "epoch": 0.7631556575703476, - "grad_norm": 0.0, - "learning_rate": 2.8006541698667512e-06, - "loss": 0.7159, - "step": 26931 - }, - { - "epoch": 0.7631839950126101, - "grad_norm": 0.0, - "learning_rate": 2.8000172159592353e-06, - "loss": 0.8452, - "step": 26932 - }, - { - "epoch": 0.7632123324548726, - "grad_norm": 0.0, - "learning_rate": 2.7993803226998485e-06, - "loss": 0.7677, - "step": 26933 - }, - { - "epoch": 0.7632406698971351, - "grad_norm": 0.0, - "learning_rate": 2.7987434900939537e-06, - "loss": 0.8142, - "step": 26934 - }, - { - "epoch": 0.7632690073393975, - "grad_norm": 0.0, - "learning_rate": 2.798106718146918e-06, - "loss": 0.7989, - "step": 26935 - }, - { - "epoch": 0.76329734478166, - "grad_norm": 0.0, - "learning_rate": 2.797470006864106e-06, - "loss": 0.8725, - "step": 26936 - }, - { - "epoch": 0.7633256822239225, - "grad_norm": 0.0, - "learning_rate": 2.7968333562508754e-06, - "loss": 0.7712, - "step": 26937 - }, - { - "epoch": 0.7633540196661849, - "grad_norm": 0.0, - "learning_rate": 2.7961967663125924e-06, - "loss": 0.801, - "step": 26938 - }, - { - "epoch": 0.7633823571084474, - "grad_norm": 0.0, - "learning_rate": 2.795560237054623e-06, - "loss": 0.6979, - "step": 26939 - }, - { - "epoch": 0.7634106945507099, - "grad_norm": 0.0, - "learning_rate": 2.7949237684823217e-06, - "loss": 0.8677, - "step": 26940 - }, - { - "epoch": 0.7634390319929724, - "grad_norm": 0.0, - "learning_rate": 2.7942873606010524e-06, - "loss": 0.7477, - "step": 26941 - }, - { - "epoch": 0.7634673694352347, - "grad_norm": 0.0, - "learning_rate": 2.79365101341618e-06, - "loss": 0.8114, - "step": 26942 - }, - { - "epoch": 0.7634957068774972, - "grad_norm": 0.0, - "learning_rate": 2.7930147269330577e-06, - "loss": 0.869, - "step": 26943 - }, - { - "epoch": 0.7635240443197597, - "grad_norm": 0.0, - "learning_rate": 2.7923785011570513e-06, - "loss": 0.7404, - "step": 26944 - }, - { - "epoch": 0.7635523817620221, - "grad_norm": 0.0, - "learning_rate": 2.7917423360935147e-06, - "loss": 0.7714, - "step": 26945 - }, - { - "epoch": 0.7635807192042846, - "grad_norm": 0.0, - "learning_rate": 2.79110623174781e-06, - "loss": 0.9005, - "step": 26946 - }, - { - "epoch": 0.7636090566465471, - "grad_norm": 0.0, - "learning_rate": 2.7904701881252936e-06, - "loss": 0.9427, - "step": 26947 - }, - { - "epoch": 0.7636373940888096, - "grad_norm": 0.0, - "learning_rate": 2.7898342052313233e-06, - "loss": 0.7796, - "step": 26948 - }, - { - "epoch": 0.763665731531072, - "grad_norm": 0.0, - "learning_rate": 2.7891982830712614e-06, - "loss": 0.7212, - "step": 26949 - }, - { - "epoch": 0.7636940689733345, - "grad_norm": 0.0, - "learning_rate": 2.788562421650456e-06, - "loss": 0.8625, - "step": 26950 - }, - { - "epoch": 0.763722406415597, - "grad_norm": 0.0, - "learning_rate": 2.787926620974267e-06, - "loss": 0.8551, - "step": 26951 - }, - { - "epoch": 0.7637507438578593, - "grad_norm": 0.0, - "learning_rate": 2.787290881048055e-06, - "loss": 0.8, - "step": 26952 - }, - { - "epoch": 0.7637790813001218, - "grad_norm": 0.0, - "learning_rate": 2.7866552018771652e-06, - "loss": 0.827, - "step": 26953 - }, - { - "epoch": 0.7638074187423843, - "grad_norm": 0.0, - "learning_rate": 2.786019583466958e-06, - "loss": 0.8557, - "step": 26954 - }, - { - "epoch": 0.7638357561846467, - "grad_norm": 0.0, - "learning_rate": 2.7853840258227905e-06, - "loss": 0.7764, - "step": 26955 - }, - { - "epoch": 0.7638640936269092, - "grad_norm": 0.0, - "learning_rate": 2.7847485289500085e-06, - "loss": 0.7391, - "step": 26956 - }, - { - "epoch": 0.7638924310691717, - "grad_norm": 0.0, - "learning_rate": 2.7841130928539716e-06, - "loss": 0.7411, - "step": 26957 - }, - { - "epoch": 0.7639207685114342, - "grad_norm": 0.0, - "learning_rate": 2.783477717540027e-06, - "loss": 0.8417, - "step": 26958 - }, - { - "epoch": 0.7639491059536966, - "grad_norm": 0.0, - "learning_rate": 2.7828424030135305e-06, - "loss": 0.9581, - "step": 26959 - }, - { - "epoch": 0.7639774433959591, - "grad_norm": 0.0, - "learning_rate": 2.7822071492798307e-06, - "loss": 0.8283, - "step": 26960 - }, - { - "epoch": 0.7640057808382216, - "grad_norm": 0.0, - "learning_rate": 2.781571956344282e-06, - "loss": 0.7682, - "step": 26961 - }, - { - "epoch": 0.764034118280484, - "grad_norm": 0.0, - "learning_rate": 2.7809368242122327e-06, - "loss": 0.8165, - "step": 26962 - }, - { - "epoch": 0.7640624557227464, - "grad_norm": 0.0, - "learning_rate": 2.780301752889035e-06, - "loss": 0.8147, - "step": 26963 - }, - { - "epoch": 0.7640907931650089, - "grad_norm": 0.0, - "learning_rate": 2.779666742380035e-06, - "loss": 0.8795, - "step": 26964 - }, - { - "epoch": 0.7641191306072714, - "grad_norm": 0.0, - "learning_rate": 2.7790317926905865e-06, - "loss": 0.7748, - "step": 26965 - }, - { - "epoch": 0.7641474680495338, - "grad_norm": 0.0, - "learning_rate": 2.7783969038260306e-06, - "loss": 0.8454, - "step": 26966 - }, - { - "epoch": 0.7641758054917963, - "grad_norm": 0.0, - "learning_rate": 2.77776207579172e-06, - "loss": 0.8919, - "step": 26967 - }, - { - "epoch": 0.7642041429340588, - "grad_norm": 0.0, - "learning_rate": 2.777127308593004e-06, - "loss": 0.7395, - "step": 26968 - }, - { - "epoch": 0.7642324803763212, - "grad_norm": 0.0, - "learning_rate": 2.7764926022352232e-06, - "loss": 0.8613, - "step": 26969 - }, - { - "epoch": 0.7642608178185837, - "grad_norm": 0.0, - "learning_rate": 2.7758579567237286e-06, - "loss": 0.9025, - "step": 26970 - }, - { - "epoch": 0.7642891552608462, - "grad_norm": 0.0, - "learning_rate": 2.7752233720638678e-06, - "loss": 0.8205, - "step": 26971 - }, - { - "epoch": 0.7643174927031087, - "grad_norm": 0.0, - "learning_rate": 2.7745888482609796e-06, - "loss": 0.7346, - "step": 26972 - }, - { - "epoch": 0.764345830145371, - "grad_norm": 0.0, - "learning_rate": 2.773954385320413e-06, - "loss": 0.7494, - "step": 26973 - }, - { - "epoch": 0.7643741675876335, - "grad_norm": 0.0, - "learning_rate": 2.773319983247513e-06, - "loss": 0.6812, - "step": 26974 - }, - { - "epoch": 0.764402505029896, - "grad_norm": 0.0, - "learning_rate": 2.772685642047621e-06, - "loss": 0.7317, - "step": 26975 - }, - { - "epoch": 0.7644308424721584, - "grad_norm": 0.0, - "learning_rate": 2.7720513617260857e-06, - "loss": 0.7711, - "step": 26976 - }, - { - "epoch": 0.7644591799144209, - "grad_norm": 0.0, - "learning_rate": 2.771417142288242e-06, - "loss": 0.8067, - "step": 26977 - }, - { - "epoch": 0.7644875173566834, - "grad_norm": 0.0, - "learning_rate": 2.7707829837394394e-06, - "loss": 0.8474, - "step": 26978 - }, - { - "epoch": 0.7645158547989458, - "grad_norm": 0.0, - "learning_rate": 2.7701488860850134e-06, - "loss": 0.8189, - "step": 26979 - }, - { - "epoch": 0.7645441922412083, - "grad_norm": 0.0, - "learning_rate": 2.769514849330308e-06, - "loss": 0.8309, - "step": 26980 - }, - { - "epoch": 0.7645725296834708, - "grad_norm": 0.0, - "learning_rate": 2.768880873480666e-06, - "loss": 0.8073, - "step": 26981 - }, - { - "epoch": 0.7646008671257333, - "grad_norm": 0.0, - "learning_rate": 2.768246958541424e-06, - "loss": 0.72, - "step": 26982 - }, - { - "epoch": 0.7646292045679957, - "grad_norm": 0.0, - "learning_rate": 2.767613104517922e-06, - "loss": 0.8513, - "step": 26983 - }, - { - "epoch": 0.7646575420102582, - "grad_norm": 0.0, - "learning_rate": 2.766979311415505e-06, - "loss": 0.8147, - "step": 26984 - }, - { - "epoch": 0.7646858794525206, - "grad_norm": 0.0, - "learning_rate": 2.766345579239503e-06, - "loss": 0.9265, - "step": 26985 - }, - { - "epoch": 0.764714216894783, - "grad_norm": 0.0, - "learning_rate": 2.7657119079952588e-06, - "loss": 0.6989, - "step": 26986 - }, - { - "epoch": 0.7647425543370455, - "grad_norm": 0.0, - "learning_rate": 2.7650782976881096e-06, - "loss": 0.8923, - "step": 26987 - }, - { - "epoch": 0.764770891779308, - "grad_norm": 0.0, - "learning_rate": 2.764444748323393e-06, - "loss": 0.7433, - "step": 26988 - }, - { - "epoch": 0.7647992292215705, - "grad_norm": 0.0, - "learning_rate": 2.763811259906447e-06, - "loss": 0.7883, - "step": 26989 - }, - { - "epoch": 0.7648275666638329, - "grad_norm": 0.0, - "learning_rate": 2.763177832442603e-06, - "loss": 0.8686, - "step": 26990 - }, - { - "epoch": 0.7648559041060954, - "grad_norm": 0.0, - "learning_rate": 2.7625444659372e-06, - "loss": 0.7798, - "step": 26991 - }, - { - "epoch": 0.7648842415483579, - "grad_norm": 0.0, - "learning_rate": 2.7619111603955763e-06, - "loss": 0.8181, - "step": 26992 - }, - { - "epoch": 0.7649125789906203, - "grad_norm": 0.0, - "learning_rate": 2.7612779158230583e-06, - "loss": 0.7403, - "step": 26993 - }, - { - "epoch": 0.7649409164328828, - "grad_norm": 0.0, - "learning_rate": 2.7606447322249876e-06, - "loss": 0.8081, - "step": 26994 - }, - { - "epoch": 0.7649692538751452, - "grad_norm": 0.0, - "learning_rate": 2.760011609606692e-06, - "loss": 0.896, - "step": 26995 - }, - { - "epoch": 0.7649975913174077, - "grad_norm": 0.0, - "learning_rate": 2.759378547973507e-06, - "loss": 0.8057, - "step": 26996 - }, - { - "epoch": 0.7650259287596701, - "grad_norm": 0.0, - "learning_rate": 2.758745547330769e-06, - "loss": 0.8546, - "step": 26997 - }, - { - "epoch": 0.7650542662019326, - "grad_norm": 0.0, - "learning_rate": 2.7581126076838017e-06, - "loss": 0.8987, - "step": 26998 - }, - { - "epoch": 0.7650826036441951, - "grad_norm": 0.0, - "learning_rate": 2.757479729037942e-06, - "loss": 0.8495, - "step": 26999 - }, - { - "epoch": 0.7651109410864575, - "grad_norm": 0.0, - "learning_rate": 2.7568469113985197e-06, - "loss": 0.7148, - "step": 27000 - }, - { - "epoch": 0.76513927852872, - "grad_norm": 0.0, - "learning_rate": 2.7562141547708663e-06, - "loss": 0.8463, - "step": 27001 - }, - { - "epoch": 0.7651676159709825, - "grad_norm": 0.0, - "learning_rate": 2.755581459160314e-06, - "loss": 0.7814, - "step": 27002 - }, - { - "epoch": 0.7651959534132449, - "grad_norm": 0.0, - "learning_rate": 2.7549488245721845e-06, - "loss": 0.8639, - "step": 27003 - }, - { - "epoch": 0.7652242908555074, - "grad_norm": 0.0, - "learning_rate": 2.7543162510118125e-06, - "loss": 0.9053, - "step": 27004 - }, - { - "epoch": 0.7652526282977699, - "grad_norm": 0.0, - "learning_rate": 2.7536837384845296e-06, - "loss": 0.8438, - "step": 27005 - }, - { - "epoch": 0.7652809657400323, - "grad_norm": 0.0, - "learning_rate": 2.753051286995655e-06, - "loss": 0.8302, - "step": 27006 - }, - { - "epoch": 0.7653093031822947, - "grad_norm": 0.0, - "learning_rate": 2.752418896550524e-06, - "loss": 0.7483, - "step": 27007 - }, - { - "epoch": 0.7653376406245572, - "grad_norm": 0.0, - "learning_rate": 2.751786567154456e-06, - "loss": 0.6518, - "step": 27008 - }, - { - "epoch": 0.7653659780668197, - "grad_norm": 0.0, - "learning_rate": 2.7511542988127815e-06, - "loss": 0.7654, - "step": 27009 - }, - { - "epoch": 0.7653943155090821, - "grad_norm": 0.0, - "learning_rate": 2.7505220915308304e-06, - "loss": 0.8078, - "step": 27010 - }, - { - "epoch": 0.7654226529513446, - "grad_norm": 0.0, - "learning_rate": 2.7498899453139193e-06, - "loss": 0.8265, - "step": 27011 - }, - { - "epoch": 0.7654509903936071, - "grad_norm": 0.0, - "learning_rate": 2.7492578601673793e-06, - "loss": 0.8463, - "step": 27012 - }, - { - "epoch": 0.7654793278358696, - "grad_norm": 0.0, - "learning_rate": 2.748625836096531e-06, - "loss": 0.8338, - "step": 27013 - }, - { - "epoch": 0.765507665278132, - "grad_norm": 0.0, - "learning_rate": 2.7479938731067e-06, - "loss": 0.8434, - "step": 27014 - }, - { - "epoch": 0.7655360027203945, - "grad_norm": 0.0, - "learning_rate": 2.747361971203214e-06, - "loss": 0.8719, - "step": 27015 - }, - { - "epoch": 0.765564340162657, - "grad_norm": 0.0, - "learning_rate": 2.7467301303913874e-06, - "loss": 0.7602, - "step": 27016 - }, - { - "epoch": 0.7655926776049193, - "grad_norm": 0.0, - "learning_rate": 2.7460983506765472e-06, - "loss": 0.7467, - "step": 27017 - }, - { - "epoch": 0.7656210150471818, - "grad_norm": 0.0, - "learning_rate": 2.7454666320640165e-06, - "loss": 0.7846, - "step": 27018 - }, - { - "epoch": 0.7656493524894443, - "grad_norm": 0.0, - "learning_rate": 2.7448349745591108e-06, - "loss": 0.8039, - "step": 27019 - }, - { - "epoch": 0.7656776899317068, - "grad_norm": 0.0, - "learning_rate": 2.7442033781671553e-06, - "loss": 0.8383, - "step": 27020 - }, - { - "epoch": 0.7657060273739692, - "grad_norm": 0.0, - "learning_rate": 2.743571842893471e-06, - "loss": 0.8502, - "step": 27021 - }, - { - "epoch": 0.7657343648162317, - "grad_norm": 0.0, - "learning_rate": 2.742940368743373e-06, - "loss": 0.7192, - "step": 27022 - }, - { - "epoch": 0.7657627022584942, - "grad_norm": 0.0, - "learning_rate": 2.742308955722187e-06, - "loss": 0.7508, - "step": 27023 - }, - { - "epoch": 0.7657910397007566, - "grad_norm": 0.0, - "learning_rate": 2.7416776038352246e-06, - "loss": 0.7594, - "step": 27024 - }, - { - "epoch": 0.7658193771430191, - "grad_norm": 0.0, - "learning_rate": 2.7410463130878063e-06, - "loss": 0.7471, - "step": 27025 - }, - { - "epoch": 0.7658477145852816, - "grad_norm": 0.0, - "learning_rate": 2.7404150834852506e-06, - "loss": 0.833, - "step": 27026 - }, - { - "epoch": 0.7658760520275439, - "grad_norm": 0.0, - "learning_rate": 2.7397839150328744e-06, - "loss": 0.831, - "step": 27027 - }, - { - "epoch": 0.7659043894698064, - "grad_norm": 0.0, - "learning_rate": 2.7391528077359975e-06, - "loss": 0.8495, - "step": 27028 - }, - { - "epoch": 0.7659327269120689, - "grad_norm": 0.0, - "learning_rate": 2.7385217615999303e-06, - "loss": 0.8195, - "step": 27029 - }, - { - "epoch": 0.7659610643543314, - "grad_norm": 0.0, - "learning_rate": 2.73789077662999e-06, - "loss": 0.7941, - "step": 27030 - }, - { - "epoch": 0.7659894017965938, - "grad_norm": 0.0, - "learning_rate": 2.7372598528314955e-06, - "loss": 0.9313, - "step": 27031 - }, - { - "epoch": 0.7660177392388563, - "grad_norm": 0.0, - "learning_rate": 2.7366289902097555e-06, - "loss": 0.7934, - "step": 27032 - }, - { - "epoch": 0.7660460766811188, - "grad_norm": 0.0, - "learning_rate": 2.735998188770087e-06, - "loss": 0.874, - "step": 27033 - }, - { - "epoch": 0.7660744141233812, - "grad_norm": 0.0, - "learning_rate": 2.735367448517805e-06, - "loss": 0.7773, - "step": 27034 - }, - { - "epoch": 0.7661027515656437, - "grad_norm": 0.0, - "learning_rate": 2.7347367694582183e-06, - "loss": 0.931, - "step": 27035 - }, - { - "epoch": 0.7661310890079062, - "grad_norm": 0.0, - "learning_rate": 2.734106151596645e-06, - "loss": 0.8207, - "step": 27036 - }, - { - "epoch": 0.7661594264501687, - "grad_norm": 0.0, - "learning_rate": 2.7334755949383905e-06, - "loss": 0.9189, - "step": 27037 - }, - { - "epoch": 0.766187763892431, - "grad_norm": 0.0, - "learning_rate": 2.732845099488769e-06, - "loss": 0.865, - "step": 27038 - }, - { - "epoch": 0.7662161013346935, - "grad_norm": 0.0, - "learning_rate": 2.732214665253092e-06, - "loss": 0.8018, - "step": 27039 - }, - { - "epoch": 0.766244438776956, - "grad_norm": 0.0, - "learning_rate": 2.7315842922366708e-06, - "loss": 0.8741, - "step": 27040 - }, - { - "epoch": 0.7662727762192184, - "grad_norm": 0.0, - "learning_rate": 2.7309539804448127e-06, - "loss": 0.772, - "step": 27041 - }, - { - "epoch": 0.7663011136614809, - "grad_norm": 0.0, - "learning_rate": 2.7303237298828323e-06, - "loss": 0.8656, - "step": 27042 - }, - { - "epoch": 0.7663294511037434, - "grad_norm": 0.0, - "learning_rate": 2.729693540556032e-06, - "loss": 0.874, - "step": 27043 - }, - { - "epoch": 0.7663577885460059, - "grad_norm": 0.0, - "learning_rate": 2.7290634124697248e-06, - "loss": 0.7304, - "step": 27044 - }, - { - "epoch": 0.7663861259882683, - "grad_norm": 0.0, - "learning_rate": 2.7284333456292135e-06, - "loss": 0.8883, - "step": 27045 - }, - { - "epoch": 0.7664144634305308, - "grad_norm": 0.0, - "learning_rate": 2.7278033400398095e-06, - "loss": 0.7863, - "step": 27046 - }, - { - "epoch": 0.7664428008727933, - "grad_norm": 0.0, - "learning_rate": 2.7271733957068203e-06, - "loss": 0.8157, - "step": 27047 - }, - { - "epoch": 0.7664711383150556, - "grad_norm": 0.0, - "learning_rate": 2.726543512635548e-06, - "loss": 0.8549, - "step": 27048 - }, - { - "epoch": 0.7664994757573181, - "grad_norm": 0.0, - "learning_rate": 2.7259136908313e-06, - "loss": 0.8409, - "step": 27049 - }, - { - "epoch": 0.7665278131995806, - "grad_norm": 0.0, - "learning_rate": 2.725283930299385e-06, - "loss": 0.8435, - "step": 27050 - }, - { - "epoch": 0.766556150641843, - "grad_norm": 0.0, - "learning_rate": 2.724654231045103e-06, - "loss": 0.8222, - "step": 27051 - }, - { - "epoch": 0.7665844880841055, - "grad_norm": 0.0, - "learning_rate": 2.7240245930737586e-06, - "loss": 0.8193, - "step": 27052 - }, - { - "epoch": 0.766612825526368, - "grad_norm": 0.0, - "learning_rate": 2.7233950163906577e-06, - "loss": 0.7025, - "step": 27053 - }, - { - "epoch": 0.7666411629686305, - "grad_norm": 0.0, - "learning_rate": 2.7227655010011034e-06, - "loss": 0.7967, - "step": 27054 - }, - { - "epoch": 0.7666695004108929, - "grad_norm": 0.0, - "learning_rate": 2.7221360469103997e-06, - "loss": 0.759, - "step": 27055 - }, - { - "epoch": 0.7666978378531554, - "grad_norm": 0.0, - "learning_rate": 2.7215066541238433e-06, - "loss": 0.9316, - "step": 27056 - }, - { - "epoch": 0.7667261752954179, - "grad_norm": 0.0, - "learning_rate": 2.7208773226467433e-06, - "loss": 0.8134, - "step": 27057 - }, - { - "epoch": 0.7667545127376802, - "grad_norm": 0.0, - "learning_rate": 2.7202480524843924e-06, - "loss": 0.9733, - "step": 27058 - }, - { - "epoch": 0.7667828501799427, - "grad_norm": 0.0, - "learning_rate": 2.7196188436420955e-06, - "loss": 0.8744, - "step": 27059 - }, - { - "epoch": 0.7668111876222052, - "grad_norm": 0.0, - "learning_rate": 2.718989696125157e-06, - "loss": 0.7885, - "step": 27060 - }, - { - "epoch": 0.7668395250644677, - "grad_norm": 0.0, - "learning_rate": 2.718360609938868e-06, - "loss": 0.7965, - "step": 27061 - }, - { - "epoch": 0.7668678625067301, - "grad_norm": 0.0, - "learning_rate": 2.717731585088531e-06, - "loss": 0.8539, - "step": 27062 - }, - { - "epoch": 0.7668961999489926, - "grad_norm": 0.0, - "learning_rate": 2.717102621579449e-06, - "loss": 0.7648, - "step": 27063 - }, - { - "epoch": 0.7669245373912551, - "grad_norm": 0.0, - "learning_rate": 2.7164737194169132e-06, - "loss": 0.7664, - "step": 27064 - }, - { - "epoch": 0.7669528748335175, - "grad_norm": 0.0, - "learning_rate": 2.715844878606223e-06, - "loss": 0.8203, - "step": 27065 - }, - { - "epoch": 0.76698121227578, - "grad_norm": 0.0, - "learning_rate": 2.7152160991526768e-06, - "loss": 0.7223, - "step": 27066 - }, - { - "epoch": 0.7670095497180425, - "grad_norm": 0.0, - "learning_rate": 2.714587381061571e-06, - "loss": 0.7688, - "step": 27067 - }, - { - "epoch": 0.767037887160305, - "grad_norm": 0.0, - "learning_rate": 2.7139587243382037e-06, - "loss": 0.8391, - "step": 27068 - }, - { - "epoch": 0.7670662246025673, - "grad_norm": 0.0, - "learning_rate": 2.7133301289878644e-06, - "loss": 0.8765, - "step": 27069 - }, - { - "epoch": 0.7670945620448298, - "grad_norm": 0.0, - "learning_rate": 2.712701595015852e-06, - "loss": 0.8645, - "step": 27070 - }, - { - "epoch": 0.7671228994870923, - "grad_norm": 0.0, - "learning_rate": 2.7120731224274623e-06, - "loss": 0.7526, - "step": 27071 - }, - { - "epoch": 0.7671512369293547, - "grad_norm": 0.0, - "learning_rate": 2.711444711227984e-06, - "loss": 0.9221, - "step": 27072 - }, - { - "epoch": 0.7671795743716172, - "grad_norm": 0.0, - "learning_rate": 2.7108163614227168e-06, - "loss": 0.784, - "step": 27073 - }, - { - "epoch": 0.7672079118138797, - "grad_norm": 0.0, - "learning_rate": 2.710188073016947e-06, - "loss": 0.9271, - "step": 27074 - }, - { - "epoch": 0.7672362492561421, - "grad_norm": 0.0, - "learning_rate": 2.70955984601597e-06, - "loss": 0.819, - "step": 27075 - }, - { - "epoch": 0.7672645866984046, - "grad_norm": 0.0, - "learning_rate": 2.7089316804250777e-06, - "loss": 0.7563, - "step": 27076 - }, - { - "epoch": 0.7672929241406671, - "grad_norm": 0.0, - "learning_rate": 2.708303576249561e-06, - "loss": 0.8537, - "step": 27077 - }, - { - "epoch": 0.7673212615829296, - "grad_norm": 0.0, - "learning_rate": 2.7076755334947126e-06, - "loss": 0.7807, - "step": 27078 - }, - { - "epoch": 0.767349599025192, - "grad_norm": 0.0, - "learning_rate": 2.7070475521658226e-06, - "loss": 0.8645, - "step": 27079 - }, - { - "epoch": 0.7673779364674544, - "grad_norm": 0.0, - "learning_rate": 2.7064196322681767e-06, - "loss": 0.7799, - "step": 27080 - }, - { - "epoch": 0.7674062739097169, - "grad_norm": 0.0, - "learning_rate": 2.705791773807069e-06, - "loss": 0.7095, - "step": 27081 - }, - { - "epoch": 0.7674346113519793, - "grad_norm": 0.0, - "learning_rate": 2.7051639767877836e-06, - "loss": 0.7765, - "step": 27082 - }, - { - "epoch": 0.7674629487942418, - "grad_norm": 0.0, - "learning_rate": 2.7045362412156107e-06, - "loss": 0.7681, - "step": 27083 - }, - { - "epoch": 0.7674912862365043, - "grad_norm": 0.0, - "learning_rate": 2.703908567095841e-06, - "loss": 0.7939, - "step": 27084 - }, - { - "epoch": 0.7675196236787668, - "grad_norm": 0.0, - "learning_rate": 2.7032809544337556e-06, - "loss": 0.8468, - "step": 27085 - }, - { - "epoch": 0.7675479611210292, - "grad_norm": 0.0, - "learning_rate": 2.7026534032346472e-06, - "loss": 0.8824, - "step": 27086 - }, - { - "epoch": 0.7675762985632917, - "grad_norm": 0.0, - "learning_rate": 2.702025913503796e-06, - "loss": 0.8144, - "step": 27087 - }, - { - "epoch": 0.7676046360055542, - "grad_norm": 0.0, - "learning_rate": 2.7013984852464912e-06, - "loss": 0.7733, - "step": 27088 - }, - { - "epoch": 0.7676329734478166, - "grad_norm": 0.0, - "learning_rate": 2.7007711184680176e-06, - "loss": 0.8641, - "step": 27089 - }, - { - "epoch": 0.767661310890079, - "grad_norm": 0.0, - "learning_rate": 2.700143813173658e-06, - "loss": 0.7844, - "step": 27090 - }, - { - "epoch": 0.7676896483323415, - "grad_norm": 0.0, - "learning_rate": 2.6995165693686986e-06, - "loss": 0.8306, - "step": 27091 - }, - { - "epoch": 0.767717985774604, - "grad_norm": 0.0, - "learning_rate": 2.698889387058425e-06, - "loss": 0.8434, - "step": 27092 - }, - { - "epoch": 0.7677463232168664, - "grad_norm": 0.0, - "learning_rate": 2.698262266248115e-06, - "loss": 0.792, - "step": 27093 - }, - { - "epoch": 0.7677746606591289, - "grad_norm": 0.0, - "learning_rate": 2.6976352069430554e-06, - "loss": 0.9187, - "step": 27094 - }, - { - "epoch": 0.7678029981013914, - "grad_norm": 0.0, - "learning_rate": 2.6970082091485228e-06, - "loss": 0.7837, - "step": 27095 - }, - { - "epoch": 0.7678313355436538, - "grad_norm": 0.0, - "learning_rate": 2.6963812728698024e-06, - "loss": 0.7726, - "step": 27096 - }, - { - "epoch": 0.7678596729859163, - "grad_norm": 0.0, - "learning_rate": 2.695754398112178e-06, - "loss": 0.9157, - "step": 27097 - }, - { - "epoch": 0.7678880104281788, - "grad_norm": 0.0, - "learning_rate": 2.695127584880923e-06, - "loss": 0.7495, - "step": 27098 - }, - { - "epoch": 0.7679163478704412, - "grad_norm": 0.0, - "learning_rate": 2.694500833181323e-06, - "loss": 0.8438, - "step": 27099 - }, - { - "epoch": 0.7679446853127037, - "grad_norm": 0.0, - "learning_rate": 2.6938741430186555e-06, - "loss": 0.7631, - "step": 27100 - }, - { - "epoch": 0.7679730227549662, - "grad_norm": 0.0, - "learning_rate": 2.6932475143981975e-06, - "loss": 0.9176, - "step": 27101 - }, - { - "epoch": 0.7680013601972286, - "grad_norm": 0.0, - "learning_rate": 2.6926209473252294e-06, - "loss": 0.9221, - "step": 27102 - }, - { - "epoch": 0.768029697639491, - "grad_norm": 0.0, - "learning_rate": 2.691994441805028e-06, - "loss": 0.813, - "step": 27103 - }, - { - "epoch": 0.7680580350817535, - "grad_norm": 0.0, - "learning_rate": 2.6913679978428707e-06, - "loss": 0.8479, - "step": 27104 - }, - { - "epoch": 0.768086372524016, - "grad_norm": 0.0, - "learning_rate": 2.690741615444039e-06, - "loss": 0.7522, - "step": 27105 - }, - { - "epoch": 0.7681147099662784, - "grad_norm": 0.0, - "learning_rate": 2.690115294613801e-06, - "loss": 0.7392, - "step": 27106 - }, - { - "epoch": 0.7681430474085409, - "grad_norm": 0.0, - "learning_rate": 2.6894890353574364e-06, - "loss": 0.7909, - "step": 27107 - }, - { - "epoch": 0.7681713848508034, - "grad_norm": 0.0, - "learning_rate": 2.688862837680223e-06, - "loss": 0.8375, - "step": 27108 - }, - { - "epoch": 0.7681997222930659, - "grad_norm": 0.0, - "learning_rate": 2.6882367015874313e-06, - "loss": 0.7197, - "step": 27109 - }, - { - "epoch": 0.7682280597353283, - "grad_norm": 0.0, - "learning_rate": 2.6876106270843382e-06, - "loss": 0.8019, - "step": 27110 - }, - { - "epoch": 0.7682563971775908, - "grad_norm": 0.0, - "learning_rate": 2.6869846141762148e-06, - "loss": 0.7038, - "step": 27111 - }, - { - "epoch": 0.7682847346198532, - "grad_norm": 0.0, - "learning_rate": 2.6863586628683345e-06, - "loss": 0.899, - "step": 27112 - }, - { - "epoch": 0.7683130720621156, - "grad_norm": 0.0, - "learning_rate": 2.685732773165974e-06, - "loss": 0.7659, - "step": 27113 - }, - { - "epoch": 0.7683414095043781, - "grad_norm": 0.0, - "learning_rate": 2.6851069450743996e-06, - "loss": 0.746, - "step": 27114 - }, - { - "epoch": 0.7683697469466406, - "grad_norm": 0.0, - "learning_rate": 2.6844811785988866e-06, - "loss": 0.7961, - "step": 27115 - }, - { - "epoch": 0.768398084388903, - "grad_norm": 0.0, - "learning_rate": 2.683855473744704e-06, - "loss": 0.8304, - "step": 27116 - }, - { - "epoch": 0.7684264218311655, - "grad_norm": 0.0, - "learning_rate": 2.6832298305171246e-06, - "loss": 0.8567, - "step": 27117 - }, - { - "epoch": 0.768454759273428, - "grad_norm": 0.0, - "learning_rate": 2.6826042489214186e-06, - "loss": 0.7952, - "step": 27118 - }, - { - "epoch": 0.7684830967156905, - "grad_norm": 0.0, - "learning_rate": 2.681978728962853e-06, - "loss": 0.8721, - "step": 27119 - }, - { - "epoch": 0.7685114341579529, - "grad_norm": 0.0, - "learning_rate": 2.6813532706466973e-06, - "loss": 0.8123, - "step": 27120 - }, - { - "epoch": 0.7685397716002154, - "grad_norm": 0.0, - "learning_rate": 2.6807278739782238e-06, - "loss": 0.8331, - "step": 27121 - }, - { - "epoch": 0.7685681090424779, - "grad_norm": 0.0, - "learning_rate": 2.6801025389626945e-06, - "loss": 0.9057, - "step": 27122 - }, - { - "epoch": 0.7685964464847402, - "grad_norm": 0.0, - "learning_rate": 2.6794772656053824e-06, - "loss": 0.7392, - "step": 27123 - }, - { - "epoch": 0.7686247839270027, - "grad_norm": 0.0, - "learning_rate": 2.6788520539115492e-06, - "loss": 0.821, - "step": 27124 - }, - { - "epoch": 0.7686531213692652, - "grad_norm": 0.0, - "learning_rate": 2.678226903886464e-06, - "loss": 0.7705, - "step": 27125 - }, - { - "epoch": 0.7686814588115277, - "grad_norm": 0.0, - "learning_rate": 2.6776018155353946e-06, - "loss": 0.8316, - "step": 27126 - }, - { - "epoch": 0.7687097962537901, - "grad_norm": 0.0, - "learning_rate": 2.676976788863602e-06, - "loss": 0.8271, - "step": 27127 - }, - { - "epoch": 0.7687381336960526, - "grad_norm": 0.0, - "learning_rate": 2.676351823876353e-06, - "loss": 0.8089, - "step": 27128 - }, - { - "epoch": 0.7687664711383151, - "grad_norm": 0.0, - "learning_rate": 2.6757269205789118e-06, - "loss": 0.8369, - "step": 27129 - }, - { - "epoch": 0.7687948085805775, - "grad_norm": 0.0, - "learning_rate": 2.6751020789765423e-06, - "loss": 0.9176, - "step": 27130 - }, - { - "epoch": 0.76882314602284, - "grad_norm": 0.0, - "learning_rate": 2.6744772990745117e-06, - "loss": 0.8175, - "step": 27131 - }, - { - "epoch": 0.7688514834651025, - "grad_norm": 0.0, - "learning_rate": 2.6738525808780757e-06, - "loss": 0.8136, - "step": 27132 - }, - { - "epoch": 0.768879820907365, - "grad_norm": 0.0, - "learning_rate": 2.673227924392501e-06, - "loss": 0.8659, - "step": 27133 - }, - { - "epoch": 0.7689081583496273, - "grad_norm": 0.0, - "learning_rate": 2.6726033296230492e-06, - "loss": 0.7881, - "step": 27134 - }, - { - "epoch": 0.7689364957918898, - "grad_norm": 0.0, - "learning_rate": 2.671978796574979e-06, - "loss": 0.8563, - "step": 27135 - }, - { - "epoch": 0.7689648332341523, - "grad_norm": 0.0, - "learning_rate": 2.6713543252535523e-06, - "loss": 0.8868, - "step": 27136 - }, - { - "epoch": 0.7689931706764147, - "grad_norm": 0.0, - "learning_rate": 2.6707299156640322e-06, - "loss": 0.7954, - "step": 27137 - }, - { - "epoch": 0.7690215081186772, - "grad_norm": 0.0, - "learning_rate": 2.6701055678116727e-06, - "loss": 0.7613, - "step": 27138 - }, - { - "epoch": 0.7690498455609397, - "grad_norm": 0.0, - "learning_rate": 2.669481281701739e-06, - "loss": 0.8693, - "step": 27139 - }, - { - "epoch": 0.7690781830032021, - "grad_norm": 0.0, - "learning_rate": 2.6688570573394844e-06, - "loss": 0.8134, - "step": 27140 - }, - { - "epoch": 0.7691065204454646, - "grad_norm": 0.0, - "learning_rate": 2.6682328947301685e-06, - "loss": 0.8657, - "step": 27141 - }, - { - "epoch": 0.7691348578877271, - "grad_norm": 0.0, - "learning_rate": 2.6676087938790496e-06, - "loss": 0.8955, - "step": 27142 - }, - { - "epoch": 0.7691631953299896, - "grad_norm": 0.0, - "learning_rate": 2.666984754791384e-06, - "loss": 0.7366, - "step": 27143 - }, - { - "epoch": 0.7691915327722519, - "grad_norm": 0.0, - "learning_rate": 2.666360777472432e-06, - "loss": 0.8001, - "step": 27144 - }, - { - "epoch": 0.7692198702145144, - "grad_norm": 0.0, - "learning_rate": 2.6657368619274447e-06, - "loss": 0.8838, - "step": 27145 - }, - { - "epoch": 0.7692482076567769, - "grad_norm": 0.0, - "learning_rate": 2.665113008161678e-06, - "loss": 0.9005, - "step": 27146 - }, - { - "epoch": 0.7692765450990393, - "grad_norm": 0.0, - "learning_rate": 2.6644892161803917e-06, - "loss": 0.89, - "step": 27147 - }, - { - "epoch": 0.7693048825413018, - "grad_norm": 0.0, - "learning_rate": 2.6638654859888335e-06, - "loss": 0.8573, - "step": 27148 - }, - { - "epoch": 0.7693332199835643, - "grad_norm": 0.0, - "learning_rate": 2.6632418175922613e-06, - "loss": 0.8338, - "step": 27149 - }, - { - "epoch": 0.7693615574258268, - "grad_norm": 0.0, - "learning_rate": 2.66261821099593e-06, - "loss": 0.8394, - "step": 27150 - }, - { - "epoch": 0.7693898948680892, - "grad_norm": 0.0, - "learning_rate": 2.6619946662050866e-06, - "loss": 0.8655, - "step": 27151 - }, - { - "epoch": 0.7694182323103517, - "grad_norm": 0.0, - "learning_rate": 2.6613711832249912e-06, - "loss": 0.8218, - "step": 27152 - }, - { - "epoch": 0.7694465697526142, - "grad_norm": 0.0, - "learning_rate": 2.6607477620608877e-06, - "loss": 0.8754, - "step": 27153 - }, - { - "epoch": 0.7694749071948765, - "grad_norm": 0.0, - "learning_rate": 2.660124402718032e-06, - "loss": 0.7453, - "step": 27154 - }, - { - "epoch": 0.769503244637139, - "grad_norm": 0.0, - "learning_rate": 2.659501105201673e-06, - "loss": 0.818, - "step": 27155 - }, - { - "epoch": 0.7695315820794015, - "grad_norm": 0.0, - "learning_rate": 2.6588778695170625e-06, - "loss": 0.751, - "step": 27156 - }, - { - "epoch": 0.769559919521664, - "grad_norm": 0.0, - "learning_rate": 2.65825469566945e-06, - "loss": 0.8011, - "step": 27157 - }, - { - "epoch": 0.7695882569639264, - "grad_norm": 0.0, - "learning_rate": 2.6576315836640866e-06, - "loss": 0.7693, - "step": 27158 - }, - { - "epoch": 0.7696165944061889, - "grad_norm": 0.0, - "learning_rate": 2.6570085335062166e-06, - "loss": 0.7589, - "step": 27159 - }, - { - "epoch": 0.7696449318484514, - "grad_norm": 0.0, - "learning_rate": 2.6563855452010933e-06, - "loss": 0.8518, - "step": 27160 - }, - { - "epoch": 0.7696732692907138, - "grad_norm": 0.0, - "learning_rate": 2.6557626187539586e-06, - "loss": 0.8072, - "step": 27161 - }, - { - "epoch": 0.7697016067329763, - "grad_norm": 0.0, - "learning_rate": 2.655139754170063e-06, - "loss": 0.8668, - "step": 27162 - }, - { - "epoch": 0.7697299441752388, - "grad_norm": 0.0, - "learning_rate": 2.6545169514546554e-06, - "loss": 0.8259, - "step": 27163 - }, - { - "epoch": 0.7697582816175011, - "grad_norm": 0.0, - "learning_rate": 2.6538942106129762e-06, - "loss": 0.8297, - "step": 27164 - }, - { - "epoch": 0.7697866190597636, - "grad_norm": 0.0, - "learning_rate": 2.6532715316502734e-06, - "loss": 0.8162, - "step": 27165 - }, - { - "epoch": 0.7698149565020261, - "grad_norm": 0.0, - "learning_rate": 2.652648914571796e-06, - "loss": 0.7263, - "step": 27166 - }, - { - "epoch": 0.7698432939442886, - "grad_norm": 0.0, - "learning_rate": 2.652026359382782e-06, - "loss": 0.8579, - "step": 27167 - }, - { - "epoch": 0.769871631386551, - "grad_norm": 0.0, - "learning_rate": 2.651403866088479e-06, - "loss": 0.7989, - "step": 27168 - }, - { - "epoch": 0.7698999688288135, - "grad_norm": 0.0, - "learning_rate": 2.65078143469413e-06, - "loss": 0.7563, - "step": 27169 - }, - { - "epoch": 0.769928306271076, - "grad_norm": 0.0, - "learning_rate": 2.650159065204978e-06, - "loss": 0.7946, - "step": 27170 - }, - { - "epoch": 0.7699566437133384, - "grad_norm": 0.0, - "learning_rate": 2.6495367576262687e-06, - "loss": 0.8221, - "step": 27171 - }, - { - "epoch": 0.7699849811556009, - "grad_norm": 0.0, - "learning_rate": 2.6489145119632374e-06, - "loss": 0.916, - "step": 27172 - }, - { - "epoch": 0.7700133185978634, - "grad_norm": 0.0, - "learning_rate": 2.6482923282211313e-06, - "loss": 0.818, - "step": 27173 - }, - { - "epoch": 0.7700416560401259, - "grad_norm": 0.0, - "learning_rate": 2.6476702064051873e-06, - "loss": 0.7312, - "step": 27174 - }, - { - "epoch": 0.7700699934823882, - "grad_norm": 0.0, - "learning_rate": 2.6470481465206468e-06, - "loss": 0.7601, - "step": 27175 - }, - { - "epoch": 0.7700983309246507, - "grad_norm": 0.0, - "learning_rate": 2.646426148572753e-06, - "loss": 0.8018, - "step": 27176 - }, - { - "epoch": 0.7701266683669132, - "grad_norm": 0.0, - "learning_rate": 2.6458042125667393e-06, - "loss": 0.8052, - "step": 27177 - }, - { - "epoch": 0.7701550058091756, - "grad_norm": 0.0, - "learning_rate": 2.6451823385078477e-06, - "loss": 0.8479, - "step": 27178 - }, - { - "epoch": 0.7701833432514381, - "grad_norm": 0.0, - "learning_rate": 2.6445605264013206e-06, - "loss": 0.8426, - "step": 27179 - }, - { - "epoch": 0.7702116806937006, - "grad_norm": 0.0, - "learning_rate": 2.6439387762523873e-06, - "loss": 0.8483, - "step": 27180 - }, - { - "epoch": 0.7702400181359631, - "grad_norm": 0.0, - "learning_rate": 2.6433170880662895e-06, - "loss": 0.9337, - "step": 27181 - }, - { - "epoch": 0.7702683555782255, - "grad_norm": 0.0, - "learning_rate": 2.6426954618482638e-06, - "loss": 0.6728, - "step": 27182 - }, - { - "epoch": 0.770296693020488, - "grad_norm": 0.0, - "learning_rate": 2.6420738976035463e-06, - "loss": 0.7821, - "step": 27183 - }, - { - "epoch": 0.7703250304627505, - "grad_norm": 0.0, - "learning_rate": 2.641452395337376e-06, - "loss": 0.7966, - "step": 27184 - }, - { - "epoch": 0.7703533679050129, - "grad_norm": 0.0, - "learning_rate": 2.6408309550549817e-06, - "loss": 0.8589, - "step": 27185 - }, - { - "epoch": 0.7703817053472753, - "grad_norm": 0.0, - "learning_rate": 2.6402095767615997e-06, - "loss": 0.8567, - "step": 27186 - }, - { - "epoch": 0.7704100427895378, - "grad_norm": 0.0, - "learning_rate": 2.63958826046247e-06, - "loss": 0.7852, - "step": 27187 - }, - { - "epoch": 0.7704383802318002, - "grad_norm": 0.0, - "learning_rate": 2.638967006162818e-06, - "loss": 0.7017, - "step": 27188 - }, - { - "epoch": 0.7704667176740627, - "grad_norm": 0.0, - "learning_rate": 2.638345813867883e-06, - "loss": 0.721, - "step": 27189 - }, - { - "epoch": 0.7704950551163252, - "grad_norm": 0.0, - "learning_rate": 2.637724683582893e-06, - "loss": 0.8036, - "step": 27190 - }, - { - "epoch": 0.7705233925585877, - "grad_norm": 0.0, - "learning_rate": 2.63710361531308e-06, - "loss": 0.7912, - "step": 27191 - }, - { - "epoch": 0.7705517300008501, - "grad_norm": 0.0, - "learning_rate": 2.636482609063682e-06, - "loss": 0.7689, - "step": 27192 - }, - { - "epoch": 0.7705800674431126, - "grad_norm": 0.0, - "learning_rate": 2.6358616648399216e-06, - "loss": 0.7697, - "step": 27193 - }, - { - "epoch": 0.7706084048853751, - "grad_norm": 0.0, - "learning_rate": 2.6352407826470338e-06, - "loss": 0.7851, - "step": 27194 - }, - { - "epoch": 0.7706367423276375, - "grad_norm": 0.0, - "learning_rate": 2.634619962490247e-06, - "loss": 0.8113, - "step": 27195 - }, - { - "epoch": 0.7706650797699, - "grad_norm": 0.0, - "learning_rate": 2.633999204374792e-06, - "loss": 0.8994, - "step": 27196 - }, - { - "epoch": 0.7706934172121624, - "grad_norm": 0.0, - "learning_rate": 2.633378508305899e-06, - "loss": 0.8519, - "step": 27197 - }, - { - "epoch": 0.7707217546544249, - "grad_norm": 0.0, - "learning_rate": 2.632757874288793e-06, - "loss": 0.8746, - "step": 27198 - }, - { - "epoch": 0.7707500920966873, - "grad_norm": 0.0, - "learning_rate": 2.632137302328701e-06, - "loss": 0.8675, - "step": 27199 - }, - { - "epoch": 0.7707784295389498, - "grad_norm": 0.0, - "learning_rate": 2.631516792430857e-06, - "loss": 0.8825, - "step": 27200 - }, - { - "epoch": 0.7708067669812123, - "grad_norm": 0.0, - "learning_rate": 2.63089634460048e-06, - "loss": 0.814, - "step": 27201 - }, - { - "epoch": 0.7708351044234747, - "grad_norm": 0.0, - "learning_rate": 2.630275958842802e-06, - "loss": 0.8298, - "step": 27202 - }, - { - "epoch": 0.7708634418657372, - "grad_norm": 0.0, - "learning_rate": 2.629655635163044e-06, - "loss": 0.8118, - "step": 27203 - }, - { - "epoch": 0.7708917793079997, - "grad_norm": 0.0, - "learning_rate": 2.629035373566433e-06, - "loss": 0.7574, - "step": 27204 - }, - { - "epoch": 0.7709201167502622, - "grad_norm": 0.0, - "learning_rate": 2.6284151740581974e-06, - "loss": 0.8696, - "step": 27205 - }, - { - "epoch": 0.7709484541925246, - "grad_norm": 0.0, - "learning_rate": 2.6277950366435556e-06, - "loss": 0.8436, - "step": 27206 - }, - { - "epoch": 0.770976791634787, - "grad_norm": 0.0, - "learning_rate": 2.6271749613277333e-06, - "loss": 0.834, - "step": 27207 - }, - { - "epoch": 0.7710051290770495, - "grad_norm": 0.0, - "learning_rate": 2.6265549481159538e-06, - "loss": 0.6682, - "step": 27208 - }, - { - "epoch": 0.7710334665193119, - "grad_norm": 0.0, - "learning_rate": 2.6259349970134406e-06, - "loss": 0.782, - "step": 27209 - }, - { - "epoch": 0.7710618039615744, - "grad_norm": 0.0, - "learning_rate": 2.625315108025418e-06, - "loss": 0.8048, - "step": 27210 - }, - { - "epoch": 0.7710901414038369, - "grad_norm": 0.0, - "learning_rate": 2.6246952811571015e-06, - "loss": 0.8943, - "step": 27211 - }, - { - "epoch": 0.7711184788460993, - "grad_norm": 0.0, - "learning_rate": 2.6240755164137156e-06, - "loss": 0.8548, - "step": 27212 - }, - { - "epoch": 0.7711468162883618, - "grad_norm": 0.0, - "learning_rate": 2.623455813800484e-06, - "loss": 0.7739, - "step": 27213 - }, - { - "epoch": 0.7711751537306243, - "grad_norm": 0.0, - "learning_rate": 2.6228361733226204e-06, - "loss": 0.8743, - "step": 27214 - }, - { - "epoch": 0.7712034911728868, - "grad_norm": 0.0, - "learning_rate": 2.622216594985346e-06, - "loss": 0.9149, - "step": 27215 - }, - { - "epoch": 0.7712318286151492, - "grad_norm": 0.0, - "learning_rate": 2.621597078793885e-06, - "loss": 0.8079, - "step": 27216 - }, - { - "epoch": 0.7712601660574117, - "grad_norm": 0.0, - "learning_rate": 2.620977624753448e-06, - "loss": 0.7951, - "step": 27217 - }, - { - "epoch": 0.7712885034996741, - "grad_norm": 0.0, - "learning_rate": 2.62035823286926e-06, - "loss": 0.884, - "step": 27218 - }, - { - "epoch": 0.7713168409419365, - "grad_norm": 0.0, - "learning_rate": 2.6197389031465328e-06, - "loss": 0.8027, - "step": 27219 - }, - { - "epoch": 0.771345178384199, - "grad_norm": 0.0, - "learning_rate": 2.6191196355904834e-06, - "loss": 0.7532, - "step": 27220 - }, - { - "epoch": 0.7713735158264615, - "grad_norm": 0.0, - "learning_rate": 2.618500430206331e-06, - "loss": 0.9007, - "step": 27221 - }, - { - "epoch": 0.771401853268724, - "grad_norm": 0.0, - "learning_rate": 2.617881286999291e-06, - "loss": 0.7609, - "step": 27222 - }, - { - "epoch": 0.7714301907109864, - "grad_norm": 0.0, - "learning_rate": 2.617262205974578e-06, - "loss": 0.7785, - "step": 27223 - }, - { - "epoch": 0.7714585281532489, - "grad_norm": 0.0, - "learning_rate": 2.6166431871374096e-06, - "loss": 0.7304, - "step": 27224 - }, - { - "epoch": 0.7714868655955114, - "grad_norm": 0.0, - "learning_rate": 2.6160242304929952e-06, - "loss": 0.7981, - "step": 27225 - }, - { - "epoch": 0.7715152030377738, - "grad_norm": 0.0, - "learning_rate": 2.6154053360465536e-06, - "loss": 0.8286, - "step": 27226 - }, - { - "epoch": 0.7715435404800363, - "grad_norm": 0.0, - "learning_rate": 2.6147865038032915e-06, - "loss": 0.7921, - "step": 27227 - }, - { - "epoch": 0.7715718779222988, - "grad_norm": 0.0, - "learning_rate": 2.6141677337684245e-06, - "loss": 0.7682, - "step": 27228 - }, - { - "epoch": 0.7716002153645612, - "grad_norm": 0.0, - "learning_rate": 2.6135490259471695e-06, - "loss": 0.8296, - "step": 27229 - }, - { - "epoch": 0.7716285528068236, - "grad_norm": 0.0, - "learning_rate": 2.6129303803447302e-06, - "loss": 0.8578, - "step": 27230 - }, - { - "epoch": 0.7716568902490861, - "grad_norm": 0.0, - "learning_rate": 2.612311796966325e-06, - "loss": 0.7074, - "step": 27231 - }, - { - "epoch": 0.7716852276913486, - "grad_norm": 0.0, - "learning_rate": 2.6116932758171574e-06, - "loss": 0.8486, - "step": 27232 - }, - { - "epoch": 0.771713565133611, - "grad_norm": 0.0, - "learning_rate": 2.611074816902441e-06, - "loss": 0.7887, - "step": 27233 - }, - { - "epoch": 0.7717419025758735, - "grad_norm": 0.0, - "learning_rate": 2.610456420227386e-06, - "loss": 0.7963, - "step": 27234 - }, - { - "epoch": 0.771770240018136, - "grad_norm": 0.0, - "learning_rate": 2.6098380857972007e-06, - "loss": 0.9099, - "step": 27235 - }, - { - "epoch": 0.7717985774603984, - "grad_norm": 0.0, - "learning_rate": 2.609219813617092e-06, - "loss": 0.8496, - "step": 27236 - }, - { - "epoch": 0.7718269149026609, - "grad_norm": 0.0, - "learning_rate": 2.6086016036922736e-06, - "loss": 0.8759, - "step": 27237 - }, - { - "epoch": 0.7718552523449234, - "grad_norm": 0.0, - "learning_rate": 2.607983456027945e-06, - "loss": 0.7226, - "step": 27238 - }, - { - "epoch": 0.7718835897871859, - "grad_norm": 0.0, - "learning_rate": 2.6073653706293202e-06, - "loss": 0.7867, - "step": 27239 - }, - { - "epoch": 0.7719119272294482, - "grad_norm": 0.0, - "learning_rate": 2.606747347501598e-06, - "loss": 0.7983, - "step": 27240 - }, - { - "epoch": 0.7719402646717107, - "grad_norm": 0.0, - "learning_rate": 2.6061293866499894e-06, - "loss": 0.7545, - "step": 27241 - }, - { - "epoch": 0.7719686021139732, - "grad_norm": 0.0, - "learning_rate": 2.6055114880797008e-06, - "loss": 0.7902, - "step": 27242 - }, - { - "epoch": 0.7719969395562356, - "grad_norm": 0.0, - "learning_rate": 2.604893651795932e-06, - "loss": 0.7396, - "step": 27243 - }, - { - "epoch": 0.7720252769984981, - "grad_norm": 0.0, - "learning_rate": 2.60427587780389e-06, - "loss": 0.7402, - "step": 27244 - }, - { - "epoch": 0.7720536144407606, - "grad_norm": 0.0, - "learning_rate": 2.603658166108782e-06, - "loss": 0.839, - "step": 27245 - }, - { - "epoch": 0.7720819518830231, - "grad_norm": 0.0, - "learning_rate": 2.603040516715806e-06, - "loss": 0.7391, - "step": 27246 - }, - { - "epoch": 0.7721102893252855, - "grad_norm": 0.0, - "learning_rate": 2.602422929630165e-06, - "loss": 0.8553, - "step": 27247 - }, - { - "epoch": 0.772138626767548, - "grad_norm": 0.0, - "learning_rate": 2.601805404857063e-06, - "loss": 0.789, - "step": 27248 - }, - { - "epoch": 0.7721669642098105, - "grad_norm": 0.0, - "learning_rate": 2.6011879424017006e-06, - "loss": 0.8086, - "step": 27249 - }, - { - "epoch": 0.7721953016520728, - "grad_norm": 0.0, - "learning_rate": 2.600570542269284e-06, - "loss": 0.8964, - "step": 27250 - }, - { - "epoch": 0.7722236390943353, - "grad_norm": 0.0, - "learning_rate": 2.5999532044650056e-06, - "loss": 0.8268, - "step": 27251 - }, - { - "epoch": 0.7722519765365978, - "grad_norm": 0.0, - "learning_rate": 2.599335928994069e-06, - "loss": 0.8696, - "step": 27252 - }, - { - "epoch": 0.7722803139788603, - "grad_norm": 0.0, - "learning_rate": 2.5987187158616777e-06, - "loss": 0.8139, - "step": 27253 - }, - { - "epoch": 0.7723086514211227, - "grad_norm": 0.0, - "learning_rate": 2.5981015650730234e-06, - "loss": 0.762, - "step": 27254 - }, - { - "epoch": 0.7723369888633852, - "grad_norm": 0.0, - "learning_rate": 2.597484476633312e-06, - "loss": 0.771, - "step": 27255 - }, - { - "epoch": 0.7723653263056477, - "grad_norm": 0.0, - "learning_rate": 2.5968674505477342e-06, - "loss": 0.841, - "step": 27256 - }, - { - "epoch": 0.7723936637479101, - "grad_norm": 0.0, - "learning_rate": 2.596250486821491e-06, - "loss": 0.8098, - "step": 27257 - }, - { - "epoch": 0.7724220011901726, - "grad_norm": 0.0, - "learning_rate": 2.5956335854597826e-06, - "loss": 0.7704, - "step": 27258 - }, - { - "epoch": 0.7724503386324351, - "grad_norm": 0.0, - "learning_rate": 2.595016746467799e-06, - "loss": 0.7568, - "step": 27259 - }, - { - "epoch": 0.7724786760746974, - "grad_norm": 0.0, - "learning_rate": 2.5943999698507394e-06, - "loss": 0.7341, - "step": 27260 - }, - { - "epoch": 0.7725070135169599, - "grad_norm": 0.0, - "learning_rate": 2.5937832556137986e-06, - "loss": 0.7164, - "step": 27261 - }, - { - "epoch": 0.7725353509592224, - "grad_norm": 0.0, - "learning_rate": 2.5931666037621718e-06, - "loss": 0.8514, - "step": 27262 - }, - { - "epoch": 0.7725636884014849, - "grad_norm": 0.0, - "learning_rate": 2.592550014301055e-06, - "loss": 0.8326, - "step": 27263 - }, - { - "epoch": 0.7725920258437473, - "grad_norm": 0.0, - "learning_rate": 2.5919334872356384e-06, - "loss": 0.7695, - "step": 27264 - }, - { - "epoch": 0.7726203632860098, - "grad_norm": 0.0, - "learning_rate": 2.591317022571116e-06, - "loss": 0.7376, - "step": 27265 - }, - { - "epoch": 0.7726487007282723, - "grad_norm": 0.0, - "learning_rate": 2.590700620312685e-06, - "loss": 0.7328, - "step": 27266 - }, - { - "epoch": 0.7726770381705347, - "grad_norm": 0.0, - "learning_rate": 2.590084280465531e-06, - "loss": 0.8178, - "step": 27267 - }, - { - "epoch": 0.7727053756127972, - "grad_norm": 0.0, - "learning_rate": 2.5894680030348516e-06, - "loss": 0.778, - "step": 27268 - }, - { - "epoch": 0.7727337130550597, - "grad_norm": 0.0, - "learning_rate": 2.5888517880258323e-06, - "loss": 0.7776, - "step": 27269 - }, - { - "epoch": 0.7727620504973222, - "grad_norm": 0.0, - "learning_rate": 2.588235635443667e-06, - "loss": 0.8596, - "step": 27270 - }, - { - "epoch": 0.7727903879395845, - "grad_norm": 0.0, - "learning_rate": 2.587619545293547e-06, - "loss": 0.7837, - "step": 27271 - }, - { - "epoch": 0.772818725381847, - "grad_norm": 0.0, - "learning_rate": 2.5870035175806584e-06, - "loss": 0.7695, - "step": 27272 - }, - { - "epoch": 0.7728470628241095, - "grad_norm": 0.0, - "learning_rate": 2.586387552310191e-06, - "loss": 0.737, - "step": 27273 - }, - { - "epoch": 0.7728754002663719, - "grad_norm": 0.0, - "learning_rate": 2.5857716494873343e-06, - "loss": 0.8598, - "step": 27274 - }, - { - "epoch": 0.7729037377086344, - "grad_norm": 0.0, - "learning_rate": 2.5851558091172767e-06, - "loss": 0.7751, - "step": 27275 - }, - { - "epoch": 0.7729320751508969, - "grad_norm": 0.0, - "learning_rate": 2.5845400312052073e-06, - "loss": 0.8211, - "step": 27276 - }, - { - "epoch": 0.7729604125931594, - "grad_norm": 0.0, - "learning_rate": 2.5839243157563087e-06, - "loss": 0.872, - "step": 27277 - }, - { - "epoch": 0.7729887500354218, - "grad_norm": 0.0, - "learning_rate": 2.5833086627757684e-06, - "loss": 0.7363, - "step": 27278 - }, - { - "epoch": 0.7730170874776843, - "grad_norm": 0.0, - "learning_rate": 2.582693072268778e-06, - "loss": 0.7525, - "step": 27279 - }, - { - "epoch": 0.7730454249199468, - "grad_norm": 0.0, - "learning_rate": 2.5820775442405142e-06, - "loss": 0.7977, - "step": 27280 - }, - { - "epoch": 0.7730737623622091, - "grad_norm": 0.0, - "learning_rate": 2.5814620786961654e-06, - "loss": 0.6359, - "step": 27281 - }, - { - "epoch": 0.7731020998044716, - "grad_norm": 0.0, - "learning_rate": 2.58084667564092e-06, - "loss": 0.8377, - "step": 27282 - }, - { - "epoch": 0.7731304372467341, - "grad_norm": 0.0, - "learning_rate": 2.580231335079956e-06, - "loss": 0.794, - "step": 27283 - }, - { - "epoch": 0.7731587746889965, - "grad_norm": 0.0, - "learning_rate": 2.57961605701846e-06, - "loss": 0.7228, - "step": 27284 - }, - { - "epoch": 0.773187112131259, - "grad_norm": 0.0, - "learning_rate": 2.5790008414616107e-06, - "loss": 0.8378, - "step": 27285 - }, - { - "epoch": 0.7732154495735215, - "grad_norm": 0.0, - "learning_rate": 2.578385688414594e-06, - "loss": 0.8221, - "step": 27286 - }, - { - "epoch": 0.773243787015784, - "grad_norm": 0.0, - "learning_rate": 2.5777705978825894e-06, - "loss": 0.8249, - "step": 27287 - }, - { - "epoch": 0.7732721244580464, - "grad_norm": 0.0, - "learning_rate": 2.5771555698707805e-06, - "loss": 0.7414, - "step": 27288 - }, - { - "epoch": 0.7733004619003089, - "grad_norm": 0.0, - "learning_rate": 2.576540604384349e-06, - "loss": 0.8325, - "step": 27289 - }, - { - "epoch": 0.7733287993425714, - "grad_norm": 0.0, - "learning_rate": 2.575925701428469e-06, - "loss": 0.8379, - "step": 27290 - }, - { - "epoch": 0.7733571367848338, - "grad_norm": 0.0, - "learning_rate": 2.5753108610083243e-06, - "loss": 0.7365, - "step": 27291 - }, - { - "epoch": 0.7733854742270962, - "grad_norm": 0.0, - "learning_rate": 2.5746960831290967e-06, - "loss": 0.7921, - "step": 27292 - }, - { - "epoch": 0.7734138116693587, - "grad_norm": 0.0, - "learning_rate": 2.5740813677959576e-06, - "loss": 0.8515, - "step": 27293 - }, - { - "epoch": 0.7734421491116212, - "grad_norm": 0.0, - "learning_rate": 2.573466715014089e-06, - "loss": 0.7863, - "step": 27294 - }, - { - "epoch": 0.7734704865538836, - "grad_norm": 0.0, - "learning_rate": 2.572852124788672e-06, - "loss": 0.7498, - "step": 27295 - }, - { - "epoch": 0.7734988239961461, - "grad_norm": 0.0, - "learning_rate": 2.5722375971248747e-06, - "loss": 0.8651, - "step": 27296 - }, - { - "epoch": 0.7735271614384086, - "grad_norm": 0.0, - "learning_rate": 2.5716231320278794e-06, - "loss": 0.7852, - "step": 27297 - }, - { - "epoch": 0.773555498880671, - "grad_norm": 0.0, - "learning_rate": 2.571008729502861e-06, - "loss": 0.8303, - "step": 27298 - }, - { - "epoch": 0.7735838363229335, - "grad_norm": 0.0, - "learning_rate": 2.5703943895549974e-06, - "loss": 0.7635, - "step": 27299 - }, - { - "epoch": 0.773612173765196, - "grad_norm": 0.0, - "learning_rate": 2.569780112189458e-06, - "loss": 0.7582, - "step": 27300 - }, - { - "epoch": 0.7736405112074585, - "grad_norm": 0.0, - "learning_rate": 2.5691658974114197e-06, - "loss": 0.872, - "step": 27301 - }, - { - "epoch": 0.7736688486497209, - "grad_norm": 0.0, - "learning_rate": 2.5685517452260566e-06, - "loss": 0.8386, - "step": 27302 - }, - { - "epoch": 0.7736971860919833, - "grad_norm": 0.0, - "learning_rate": 2.567937655638545e-06, - "loss": 0.8004, - "step": 27303 - }, - { - "epoch": 0.7737255235342458, - "grad_norm": 0.0, - "learning_rate": 2.5673236286540506e-06, - "loss": 0.7637, - "step": 27304 - }, - { - "epoch": 0.7737538609765082, - "grad_norm": 0.0, - "learning_rate": 2.566709664277752e-06, - "loss": 0.759, - "step": 27305 - }, - { - "epoch": 0.7737821984187707, - "grad_norm": 0.0, - "learning_rate": 2.5660957625148164e-06, - "loss": 0.7845, - "step": 27306 - }, - { - "epoch": 0.7738105358610332, - "grad_norm": 0.0, - "learning_rate": 2.5654819233704164e-06, - "loss": 0.7842, - "step": 27307 - }, - { - "epoch": 0.7738388733032956, - "grad_norm": 0.0, - "learning_rate": 2.564868146849726e-06, - "loss": 0.6569, - "step": 27308 - }, - { - "epoch": 0.7738672107455581, - "grad_norm": 0.0, - "learning_rate": 2.5642544329579088e-06, - "loss": 0.8211, - "step": 27309 - }, - { - "epoch": 0.7738955481878206, - "grad_norm": 0.0, - "learning_rate": 2.5636407817001374e-06, - "loss": 0.8884, - "step": 27310 - }, - { - "epoch": 0.7739238856300831, - "grad_norm": 0.0, - "learning_rate": 2.563027193081582e-06, - "loss": 0.789, - "step": 27311 - }, - { - "epoch": 0.7739522230723455, - "grad_norm": 0.0, - "learning_rate": 2.5624136671074096e-06, - "loss": 0.7077, - "step": 27312 - }, - { - "epoch": 0.773980560514608, - "grad_norm": 0.0, - "learning_rate": 2.5618002037827916e-06, - "loss": 0.8411, - "step": 27313 - }, - { - "epoch": 0.7740088979568704, - "grad_norm": 0.0, - "learning_rate": 2.5611868031128894e-06, - "loss": 0.8574, - "step": 27314 - }, - { - "epoch": 0.7740372353991328, - "grad_norm": 0.0, - "learning_rate": 2.5605734651028737e-06, - "loss": 0.8324, - "step": 27315 - }, - { - "epoch": 0.7740655728413953, - "grad_norm": 0.0, - "learning_rate": 2.5599601897579128e-06, - "loss": 0.801, - "step": 27316 - }, - { - "epoch": 0.7740939102836578, - "grad_norm": 0.0, - "learning_rate": 2.559346977083168e-06, - "loss": 1.0109, - "step": 27317 - }, - { - "epoch": 0.7741222477259203, - "grad_norm": 0.0, - "learning_rate": 2.558733827083809e-06, - "loss": 0.8192, - "step": 27318 - }, - { - "epoch": 0.7741505851681827, - "grad_norm": 0.0, - "learning_rate": 2.5581207397649953e-06, - "loss": 0.7735, - "step": 27319 - }, - { - "epoch": 0.7741789226104452, - "grad_norm": 0.0, - "learning_rate": 2.557507715131894e-06, - "loss": 0.8659, - "step": 27320 - }, - { - "epoch": 0.7742072600527077, - "grad_norm": 0.0, - "learning_rate": 2.5568947531896714e-06, - "loss": 0.7072, - "step": 27321 - }, - { - "epoch": 0.7742355974949701, - "grad_norm": 0.0, - "learning_rate": 2.5562818539434864e-06, - "loss": 0.8158, - "step": 27322 - }, - { - "epoch": 0.7742639349372326, - "grad_norm": 0.0, - "learning_rate": 2.555669017398502e-06, - "loss": 0.7994, - "step": 27323 - }, - { - "epoch": 0.774292272379495, - "grad_norm": 0.0, - "learning_rate": 2.5550562435598834e-06, - "loss": 0.7058, - "step": 27324 - }, - { - "epoch": 0.7743206098217574, - "grad_norm": 0.0, - "learning_rate": 2.5544435324327898e-06, - "loss": 0.7987, - "step": 27325 - }, - { - "epoch": 0.7743489472640199, - "grad_norm": 0.0, - "learning_rate": 2.5538308840223856e-06, - "loss": 0.8268, - "step": 27326 - }, - { - "epoch": 0.7743772847062824, - "grad_norm": 0.0, - "learning_rate": 2.553218298333827e-06, - "loss": 0.8448, - "step": 27327 - }, - { - "epoch": 0.7744056221485449, - "grad_norm": 0.0, - "learning_rate": 2.5526057753722754e-06, - "loss": 0.6967, - "step": 27328 - }, - { - "epoch": 0.7744339595908073, - "grad_norm": 0.0, - "learning_rate": 2.5519933151428943e-06, - "loss": 0.7997, - "step": 27329 - }, - { - "epoch": 0.7744622970330698, - "grad_norm": 0.0, - "learning_rate": 2.5513809176508364e-06, - "loss": 0.7656, - "step": 27330 - }, - { - "epoch": 0.7744906344753323, - "grad_norm": 0.0, - "learning_rate": 2.5507685829012616e-06, - "loss": 0.8837, - "step": 27331 - }, - { - "epoch": 0.7745189719175947, - "grad_norm": 0.0, - "learning_rate": 2.5501563108993346e-06, - "loss": 0.6979, - "step": 27332 - }, - { - "epoch": 0.7745473093598572, - "grad_norm": 0.0, - "learning_rate": 2.549544101650202e-06, - "loss": 0.7846, - "step": 27333 - }, - { - "epoch": 0.7745756468021197, - "grad_norm": 0.0, - "learning_rate": 2.5489319551590307e-06, - "loss": 0.7956, - "step": 27334 - }, - { - "epoch": 0.7746039842443821, - "grad_norm": 0.0, - "learning_rate": 2.5483198714309687e-06, - "loss": 0.8527, - "step": 27335 - }, - { - "epoch": 0.7746323216866445, - "grad_norm": 0.0, - "learning_rate": 2.547707850471176e-06, - "loss": 0.7409, - "step": 27336 - }, - { - "epoch": 0.774660659128907, - "grad_norm": 0.0, - "learning_rate": 2.5470958922848064e-06, - "loss": 0.7998, - "step": 27337 - }, - { - "epoch": 0.7746889965711695, - "grad_norm": 0.0, - "learning_rate": 2.546483996877017e-06, - "loss": 0.787, - "step": 27338 - }, - { - "epoch": 0.7747173340134319, - "grad_norm": 0.0, - "learning_rate": 2.5458721642529637e-06, - "loss": 0.7489, - "step": 27339 - }, - { - "epoch": 0.7747456714556944, - "grad_norm": 0.0, - "learning_rate": 2.545260394417793e-06, - "loss": 0.7974, - "step": 27340 - }, - { - "epoch": 0.7747740088979569, - "grad_norm": 0.0, - "learning_rate": 2.544648687376663e-06, - "loss": 0.9048, - "step": 27341 - }, - { - "epoch": 0.7748023463402194, - "grad_norm": 0.0, - "learning_rate": 2.544037043134728e-06, - "loss": 0.7188, - "step": 27342 - }, - { - "epoch": 0.7748306837824818, - "grad_norm": 0.0, - "learning_rate": 2.5434254616971356e-06, - "loss": 0.7513, - "step": 27343 - }, - { - "epoch": 0.7748590212247443, - "grad_norm": 0.0, - "learning_rate": 2.542813943069039e-06, - "loss": 0.785, - "step": 27344 - }, - { - "epoch": 0.7748873586670068, - "grad_norm": 0.0, - "learning_rate": 2.542202487255593e-06, - "loss": 0.8709, - "step": 27345 - }, - { - "epoch": 0.7749156961092691, - "grad_norm": 0.0, - "learning_rate": 2.5415910942619416e-06, - "loss": 0.7994, - "step": 27346 - }, - { - "epoch": 0.7749440335515316, - "grad_norm": 0.0, - "learning_rate": 2.540979764093241e-06, - "loss": 0.8105, - "step": 27347 - }, - { - "epoch": 0.7749723709937941, - "grad_norm": 0.0, - "learning_rate": 2.540368496754634e-06, - "loss": 0.8225, - "step": 27348 - }, - { - "epoch": 0.7750007084360565, - "grad_norm": 0.0, - "learning_rate": 2.5397572922512735e-06, - "loss": 0.7648, - "step": 27349 - }, - { - "epoch": 0.775029045878319, - "grad_norm": 0.0, - "learning_rate": 2.5391461505883087e-06, - "loss": 0.8256, - "step": 27350 - }, - { - "epoch": 0.7750573833205815, - "grad_norm": 0.0, - "learning_rate": 2.5385350717708857e-06, - "loss": 0.7415, - "step": 27351 - }, - { - "epoch": 0.775085720762844, - "grad_norm": 0.0, - "learning_rate": 2.5379240558041527e-06, - "loss": 0.8433, - "step": 27352 - }, - { - "epoch": 0.7751140582051064, - "grad_norm": 0.0, - "learning_rate": 2.5373131026932585e-06, - "loss": 0.7924, - "step": 27353 - }, - { - "epoch": 0.7751423956473689, - "grad_norm": 0.0, - "learning_rate": 2.536702212443345e-06, - "loss": 0.8972, - "step": 27354 - }, - { - "epoch": 0.7751707330896314, - "grad_norm": 0.0, - "learning_rate": 2.5360913850595635e-06, - "loss": 0.8043, - "step": 27355 - }, - { - "epoch": 0.7751990705318937, - "grad_norm": 0.0, - "learning_rate": 2.535480620547053e-06, - "loss": 0.8488, - "step": 27356 - }, - { - "epoch": 0.7752274079741562, - "grad_norm": 0.0, - "learning_rate": 2.5348699189109606e-06, - "loss": 0.8779, - "step": 27357 - }, - { - "epoch": 0.7752557454164187, - "grad_norm": 0.0, - "learning_rate": 2.5342592801564336e-06, - "loss": 0.7386, - "step": 27358 - }, - { - "epoch": 0.7752840828586812, - "grad_norm": 0.0, - "learning_rate": 2.5336487042886106e-06, - "loss": 0.7924, - "step": 27359 - }, - { - "epoch": 0.7753124203009436, - "grad_norm": 0.0, - "learning_rate": 2.5330381913126368e-06, - "loss": 0.7841, - "step": 27360 - }, - { - "epoch": 0.7753407577432061, - "grad_norm": 0.0, - "learning_rate": 2.5324277412336585e-06, - "loss": 0.6884, - "step": 27361 - }, - { - "epoch": 0.7753690951854686, - "grad_norm": 0.0, - "learning_rate": 2.53181735405681e-06, - "loss": 0.8085, - "step": 27362 - }, - { - "epoch": 0.775397432627731, - "grad_norm": 0.0, - "learning_rate": 2.531207029787239e-06, - "loss": 0.8027, - "step": 27363 - }, - { - "epoch": 0.7754257700699935, - "grad_norm": 0.0, - "learning_rate": 2.5305967684300836e-06, - "loss": 0.7956, - "step": 27364 - }, - { - "epoch": 0.775454107512256, - "grad_norm": 0.0, - "learning_rate": 2.529986569990486e-06, - "loss": 0.7408, - "step": 27365 - }, - { - "epoch": 0.7754824449545185, - "grad_norm": 0.0, - "learning_rate": 2.529376434473588e-06, - "loss": 0.8527, - "step": 27366 - }, - { - "epoch": 0.7755107823967808, - "grad_norm": 0.0, - "learning_rate": 2.5287663618845236e-06, - "loss": 0.8809, - "step": 27367 - }, - { - "epoch": 0.7755391198390433, - "grad_norm": 0.0, - "learning_rate": 2.528156352228437e-06, - "loss": 0.801, - "step": 27368 - }, - { - "epoch": 0.7755674572813058, - "grad_norm": 0.0, - "learning_rate": 2.5275464055104615e-06, - "loss": 0.8192, - "step": 27369 - }, - { - "epoch": 0.7755957947235682, - "grad_norm": 0.0, - "learning_rate": 2.5269365217357376e-06, - "loss": 0.8581, - "step": 27370 - }, - { - "epoch": 0.7756241321658307, - "grad_norm": 0.0, - "learning_rate": 2.526326700909405e-06, - "loss": 0.8395, - "step": 27371 - }, - { - "epoch": 0.7756524696080932, - "grad_norm": 0.0, - "learning_rate": 2.5257169430365956e-06, - "loss": 0.7807, - "step": 27372 - }, - { - "epoch": 0.7756808070503556, - "grad_norm": 0.0, - "learning_rate": 2.525107248122447e-06, - "loss": 0.7753, - "step": 27373 - }, - { - "epoch": 0.7757091444926181, - "grad_norm": 0.0, - "learning_rate": 2.5244976161720993e-06, - "loss": 0.8293, - "step": 27374 - }, - { - "epoch": 0.7757374819348806, - "grad_norm": 0.0, - "learning_rate": 2.523888047190681e-06, - "loss": 0.813, - "step": 27375 - }, - { - "epoch": 0.7757658193771431, - "grad_norm": 0.0, - "learning_rate": 2.5232785411833306e-06, - "loss": 0.743, - "step": 27376 - }, - { - "epoch": 0.7757941568194054, - "grad_norm": 0.0, - "learning_rate": 2.5226690981551806e-06, - "loss": 0.7951, - "step": 27377 - }, - { - "epoch": 0.7758224942616679, - "grad_norm": 0.0, - "learning_rate": 2.522059718111366e-06, - "loss": 0.7457, - "step": 27378 - }, - { - "epoch": 0.7758508317039304, - "grad_norm": 0.0, - "learning_rate": 2.5214504010570217e-06, - "loss": 0.8579, - "step": 27379 - }, - { - "epoch": 0.7758791691461928, - "grad_norm": 0.0, - "learning_rate": 2.520841146997275e-06, - "loss": 0.8159, - "step": 27380 - }, - { - "epoch": 0.7759075065884553, - "grad_norm": 0.0, - "learning_rate": 2.52023195593726e-06, - "loss": 0.7419, - "step": 27381 - }, - { - "epoch": 0.7759358440307178, - "grad_norm": 0.0, - "learning_rate": 2.5196228278821123e-06, - "loss": 0.8276, - "step": 27382 - }, - { - "epoch": 0.7759641814729803, - "grad_norm": 0.0, - "learning_rate": 2.519013762836957e-06, - "loss": 0.892, - "step": 27383 - }, - { - "epoch": 0.7759925189152427, - "grad_norm": 0.0, - "learning_rate": 2.5184047608069283e-06, - "loss": 0.8462, - "step": 27384 - }, - { - "epoch": 0.7760208563575052, - "grad_norm": 0.0, - "learning_rate": 2.517795821797153e-06, - "loss": 0.8279, - "step": 27385 - }, - { - "epoch": 0.7760491937997677, - "grad_norm": 0.0, - "learning_rate": 2.5171869458127605e-06, - "loss": 0.8799, - "step": 27386 - }, - { - "epoch": 0.77607753124203, - "grad_norm": 0.0, - "learning_rate": 2.5165781328588855e-06, - "loss": 0.8706, - "step": 27387 - }, - { - "epoch": 0.7761058686842925, - "grad_norm": 0.0, - "learning_rate": 2.5159693829406485e-06, - "loss": 0.7798, - "step": 27388 - }, - { - "epoch": 0.776134206126555, - "grad_norm": 0.0, - "learning_rate": 2.515360696063179e-06, - "loss": 0.7584, - "step": 27389 - }, - { - "epoch": 0.7761625435688175, - "grad_norm": 0.0, - "learning_rate": 2.514752072231608e-06, - "loss": 0.7691, - "step": 27390 - }, - { - "epoch": 0.7761908810110799, - "grad_norm": 0.0, - "learning_rate": 2.5141435114510583e-06, - "loss": 0.828, - "step": 27391 - }, - { - "epoch": 0.7762192184533424, - "grad_norm": 0.0, - "learning_rate": 2.513535013726661e-06, - "loss": 0.8008, - "step": 27392 - }, - { - "epoch": 0.7762475558956049, - "grad_norm": 0.0, - "learning_rate": 2.5129265790635347e-06, - "loss": 0.8054, - "step": 27393 - }, - { - "epoch": 0.7762758933378673, - "grad_norm": 0.0, - "learning_rate": 2.5123182074668098e-06, - "loss": 0.7901, - "step": 27394 - }, - { - "epoch": 0.7763042307801298, - "grad_norm": 0.0, - "learning_rate": 2.51170989894161e-06, - "loss": 0.7754, - "step": 27395 - }, - { - "epoch": 0.7763325682223923, - "grad_norm": 0.0, - "learning_rate": 2.5111016534930565e-06, - "loss": 0.755, - "step": 27396 - }, - { - "epoch": 0.7763609056646547, - "grad_norm": 0.0, - "learning_rate": 2.5104934711262774e-06, - "loss": 0.8028, - "step": 27397 - }, - { - "epoch": 0.7763892431069171, - "grad_norm": 0.0, - "learning_rate": 2.5098853518463907e-06, - "loss": 0.87, - "step": 27398 - }, - { - "epoch": 0.7764175805491796, - "grad_norm": 0.0, - "learning_rate": 2.509277295658521e-06, - "loss": 0.7579, - "step": 27399 - }, - { - "epoch": 0.7764459179914421, - "grad_norm": 0.0, - "learning_rate": 2.508669302567792e-06, - "loss": 0.7911, - "step": 27400 - }, - { - "epoch": 0.7764742554337045, - "grad_norm": 0.0, - "learning_rate": 2.5080613725793212e-06, - "loss": 0.8052, - "step": 27401 - }, - { - "epoch": 0.776502592875967, - "grad_norm": 0.0, - "learning_rate": 2.507453505698232e-06, - "loss": 0.7323, - "step": 27402 - }, - { - "epoch": 0.7765309303182295, - "grad_norm": 0.0, - "learning_rate": 2.506845701929643e-06, - "loss": 0.7838, - "step": 27403 - }, - { - "epoch": 0.7765592677604919, - "grad_norm": 0.0, - "learning_rate": 2.5062379612786757e-06, - "loss": 0.7775, - "step": 27404 - }, - { - "epoch": 0.7765876052027544, - "grad_norm": 0.0, - "learning_rate": 2.505630283750452e-06, - "loss": 0.7557, - "step": 27405 - }, - { - "epoch": 0.7766159426450169, - "grad_norm": 0.0, - "learning_rate": 2.5050226693500843e-06, - "loss": 0.7914, - "step": 27406 - }, - { - "epoch": 0.7766442800872794, - "grad_norm": 0.0, - "learning_rate": 2.5044151180826947e-06, - "loss": 0.8148, - "step": 27407 - }, - { - "epoch": 0.7766726175295418, - "grad_norm": 0.0, - "learning_rate": 2.503807629953402e-06, - "loss": 0.717, - "step": 27408 - }, - { - "epoch": 0.7767009549718042, - "grad_norm": 0.0, - "learning_rate": 2.5032002049673175e-06, - "loss": 0.7716, - "step": 27409 - }, - { - "epoch": 0.7767292924140667, - "grad_norm": 0.0, - "learning_rate": 2.5025928431295634e-06, - "loss": 0.8113, - "step": 27410 - }, - { - "epoch": 0.7767576298563291, - "grad_norm": 0.0, - "learning_rate": 2.5019855444452556e-06, - "loss": 0.8386, - "step": 27411 - }, - { - "epoch": 0.7767859672985916, - "grad_norm": 0.0, - "learning_rate": 2.5013783089195055e-06, - "loss": 0.8658, - "step": 27412 - }, - { - "epoch": 0.7768143047408541, - "grad_norm": 0.0, - "learning_rate": 2.5007711365574326e-06, - "loss": 0.8289, - "step": 27413 - }, - { - "epoch": 0.7768426421831166, - "grad_norm": 0.0, - "learning_rate": 2.500164027364147e-06, - "loss": 0.8303, - "step": 27414 - }, - { - "epoch": 0.776870979625379, - "grad_norm": 0.0, - "learning_rate": 2.499556981344764e-06, - "loss": 0.7456, - "step": 27415 - }, - { - "epoch": 0.7768993170676415, - "grad_norm": 0.0, - "learning_rate": 2.4989499985043986e-06, - "loss": 0.8287, - "step": 27416 - }, - { - "epoch": 0.776927654509904, - "grad_norm": 0.0, - "learning_rate": 2.498343078848162e-06, - "loss": 0.8696, - "step": 27417 - }, - { - "epoch": 0.7769559919521664, - "grad_norm": 0.0, - "learning_rate": 2.4977362223811684e-06, - "loss": 0.7905, - "step": 27418 - }, - { - "epoch": 0.7769843293944289, - "grad_norm": 0.0, - "learning_rate": 2.4971294291085313e-06, - "loss": 0.7772, - "step": 27419 - }, - { - "epoch": 0.7770126668366913, - "grad_norm": 0.0, - "learning_rate": 2.496522699035355e-06, - "loss": 0.7574, - "step": 27420 - }, - { - "epoch": 0.7770410042789537, - "grad_norm": 0.0, - "learning_rate": 2.4959160321667586e-06, - "loss": 0.812, - "step": 27421 - }, - { - "epoch": 0.7770693417212162, - "grad_norm": 0.0, - "learning_rate": 2.495309428507844e-06, - "loss": 0.7974, - "step": 27422 - }, - { - "epoch": 0.7770976791634787, - "grad_norm": 0.0, - "learning_rate": 2.494702888063726e-06, - "loss": 0.7312, - "step": 27423 - }, - { - "epoch": 0.7771260166057412, - "grad_norm": 0.0, - "learning_rate": 2.494096410839515e-06, - "loss": 0.7508, - "step": 27424 - }, - { - "epoch": 0.7771543540480036, - "grad_norm": 0.0, - "learning_rate": 2.4934899968403135e-06, - "loss": 0.7784, - "step": 27425 - }, - { - "epoch": 0.7771826914902661, - "grad_norm": 0.0, - "learning_rate": 2.492883646071237e-06, - "loss": 0.8121, - "step": 27426 - }, - { - "epoch": 0.7772110289325286, - "grad_norm": 0.0, - "learning_rate": 2.492277358537386e-06, - "loss": 0.831, - "step": 27427 - }, - { - "epoch": 0.777239366374791, - "grad_norm": 0.0, - "learning_rate": 2.4916711342438717e-06, - "loss": 0.8517, - "step": 27428 - }, - { - "epoch": 0.7772677038170535, - "grad_norm": 0.0, - "learning_rate": 2.491064973195798e-06, - "loss": 0.7989, - "step": 27429 - }, - { - "epoch": 0.777296041259316, - "grad_norm": 0.0, - "learning_rate": 2.4904588753982738e-06, - "loss": 0.8296, - "step": 27430 - }, - { - "epoch": 0.7773243787015784, - "grad_norm": 0.0, - "learning_rate": 2.4898528408564025e-06, - "loss": 0.8321, - "step": 27431 - }, - { - "epoch": 0.7773527161438408, - "grad_norm": 0.0, - "learning_rate": 2.4892468695752924e-06, - "loss": 0.8116, - "step": 27432 - }, - { - "epoch": 0.7773810535861033, - "grad_norm": 0.0, - "learning_rate": 2.4886409615600425e-06, - "loss": 0.8293, - "step": 27433 - }, - { - "epoch": 0.7774093910283658, - "grad_norm": 0.0, - "learning_rate": 2.4880351168157614e-06, - "loss": 0.7636, - "step": 27434 - }, - { - "epoch": 0.7774377284706282, - "grad_norm": 0.0, - "learning_rate": 2.4874293353475477e-06, - "loss": 0.8172, - "step": 27435 - }, - { - "epoch": 0.7774660659128907, - "grad_norm": 0.0, - "learning_rate": 2.486823617160505e-06, - "loss": 0.8473, - "step": 27436 - }, - { - "epoch": 0.7774944033551532, - "grad_norm": 0.0, - "learning_rate": 2.4862179622597415e-06, - "loss": 0.8235, - "step": 27437 - }, - { - "epoch": 0.7775227407974157, - "grad_norm": 0.0, - "learning_rate": 2.4856123706503508e-06, - "loss": 0.7724, - "step": 27438 - }, - { - "epoch": 0.7775510782396781, - "grad_norm": 0.0, - "learning_rate": 2.4850068423374376e-06, - "loss": 0.8174, - "step": 27439 - }, - { - "epoch": 0.7775794156819406, - "grad_norm": 0.0, - "learning_rate": 2.4844013773261044e-06, - "loss": 0.828, - "step": 27440 - }, - { - "epoch": 0.777607753124203, - "grad_norm": 0.0, - "learning_rate": 2.483795975621448e-06, - "loss": 0.8001, - "step": 27441 - }, - { - "epoch": 0.7776360905664654, - "grad_norm": 0.0, - "learning_rate": 2.4831906372285676e-06, - "loss": 0.7811, - "step": 27442 - }, - { - "epoch": 0.7776644280087279, - "grad_norm": 0.0, - "learning_rate": 2.482585362152564e-06, - "loss": 0.7292, - "step": 27443 - }, - { - "epoch": 0.7776927654509904, - "grad_norm": 0.0, - "learning_rate": 2.4819801503985365e-06, - "loss": 0.812, - "step": 27444 - }, - { - "epoch": 0.7777211028932528, - "grad_norm": 0.0, - "learning_rate": 2.4813750019715844e-06, - "loss": 0.8347, - "step": 27445 - }, - { - "epoch": 0.7777494403355153, - "grad_norm": 0.0, - "learning_rate": 2.4807699168767995e-06, - "loss": 0.779, - "step": 27446 - }, - { - "epoch": 0.7777777777777778, - "grad_norm": 0.0, - "learning_rate": 2.4801648951192815e-06, - "loss": 0.724, - "step": 27447 - }, - { - "epoch": 0.7778061152200403, - "grad_norm": 0.0, - "learning_rate": 2.47955993670413e-06, - "loss": 0.7924, - "step": 27448 - }, - { - "epoch": 0.7778344526623027, - "grad_norm": 0.0, - "learning_rate": 2.478955041636435e-06, - "loss": 0.8624, - "step": 27449 - }, - { - "epoch": 0.7778627901045652, - "grad_norm": 0.0, - "learning_rate": 2.4783502099212974e-06, - "loss": 0.7629, - "step": 27450 - }, - { - "epoch": 0.7778911275468277, - "grad_norm": 0.0, - "learning_rate": 2.4777454415638067e-06, - "loss": 0.8789, - "step": 27451 - }, - { - "epoch": 0.77791946498909, - "grad_norm": 0.0, - "learning_rate": 2.477140736569059e-06, - "loss": 0.745, - "step": 27452 - }, - { - "epoch": 0.7779478024313525, - "grad_norm": 0.0, - "learning_rate": 2.476536094942151e-06, - "loss": 0.7658, - "step": 27453 - }, - { - "epoch": 0.777976139873615, - "grad_norm": 0.0, - "learning_rate": 2.4759315166881713e-06, - "loss": 0.853, - "step": 27454 - }, - { - "epoch": 0.7780044773158775, - "grad_norm": 0.0, - "learning_rate": 2.4753270018122142e-06, - "loss": 0.8347, - "step": 27455 - }, - { - "epoch": 0.7780328147581399, - "grad_norm": 0.0, - "learning_rate": 2.4747225503193717e-06, - "loss": 0.8758, - "step": 27456 - }, - { - "epoch": 0.7780611522004024, - "grad_norm": 0.0, - "learning_rate": 2.474118162214735e-06, - "loss": 0.7337, - "step": 27457 - }, - { - "epoch": 0.7780894896426649, - "grad_norm": 0.0, - "learning_rate": 2.4735138375033995e-06, - "loss": 0.7796, - "step": 27458 - }, - { - "epoch": 0.7781178270849273, - "grad_norm": 0.0, - "learning_rate": 2.4729095761904487e-06, - "loss": 0.8302, - "step": 27459 - }, - { - "epoch": 0.7781461645271898, - "grad_norm": 0.0, - "learning_rate": 2.472305378280976e-06, - "loss": 0.8055, - "step": 27460 - }, - { - "epoch": 0.7781745019694523, - "grad_norm": 0.0, - "learning_rate": 2.4717012437800724e-06, - "loss": 0.8061, - "step": 27461 - }, - { - "epoch": 0.7782028394117148, - "grad_norm": 0.0, - "learning_rate": 2.4710971726928224e-06, - "loss": 0.7748, - "step": 27462 - }, - { - "epoch": 0.7782311768539771, - "grad_norm": 0.0, - "learning_rate": 2.470493165024319e-06, - "loss": 0.7437, - "step": 27463 - }, - { - "epoch": 0.7782595142962396, - "grad_norm": 0.0, - "learning_rate": 2.4698892207796453e-06, - "loss": 0.7357, - "step": 27464 - }, - { - "epoch": 0.7782878517385021, - "grad_norm": 0.0, - "learning_rate": 2.469285339963892e-06, - "loss": 0.9117, - "step": 27465 - }, - { - "epoch": 0.7783161891807645, - "grad_norm": 0.0, - "learning_rate": 2.4686815225821457e-06, - "loss": 0.834, - "step": 27466 - }, - { - "epoch": 0.778344526623027, - "grad_norm": 0.0, - "learning_rate": 2.4680777686394895e-06, - "loss": 0.8761, - "step": 27467 - }, - { - "epoch": 0.7783728640652895, - "grad_norm": 0.0, - "learning_rate": 2.4674740781410122e-06, - "loss": 0.892, - "step": 27468 - }, - { - "epoch": 0.7784012015075519, - "grad_norm": 0.0, - "learning_rate": 2.466870451091796e-06, - "loss": 0.808, - "step": 27469 - }, - { - "epoch": 0.7784295389498144, - "grad_norm": 0.0, - "learning_rate": 2.4662668874969287e-06, - "loss": 0.9325, - "step": 27470 - }, - { - "epoch": 0.7784578763920769, - "grad_norm": 0.0, - "learning_rate": 2.4656633873614953e-06, - "loss": 0.8305, - "step": 27471 - }, - { - "epoch": 0.7784862138343394, - "grad_norm": 0.0, - "learning_rate": 2.4650599506905746e-06, - "loss": 0.8156, - "step": 27472 - }, - { - "epoch": 0.7785145512766017, - "grad_norm": 0.0, - "learning_rate": 2.464456577489253e-06, - "loss": 0.8323, - "step": 27473 - }, - { - "epoch": 0.7785428887188642, - "grad_norm": 0.0, - "learning_rate": 2.4638532677626124e-06, - "loss": 0.8374, - "step": 27474 - }, - { - "epoch": 0.7785712261611267, - "grad_norm": 0.0, - "learning_rate": 2.463250021515733e-06, - "loss": 0.8293, - "step": 27475 - }, - { - "epoch": 0.7785995636033891, - "grad_norm": 0.0, - "learning_rate": 2.462646838753696e-06, - "loss": 0.797, - "step": 27476 - }, - { - "epoch": 0.7786279010456516, - "grad_norm": 0.0, - "learning_rate": 2.462043719481587e-06, - "loss": 0.9395, - "step": 27477 - }, - { - "epoch": 0.7786562384879141, - "grad_norm": 0.0, - "learning_rate": 2.4614406637044808e-06, - "loss": 0.8499, - "step": 27478 - }, - { - "epoch": 0.7786845759301766, - "grad_norm": 0.0, - "learning_rate": 2.4608376714274617e-06, - "loss": 0.8799, - "step": 27479 - }, - { - "epoch": 0.778712913372439, - "grad_norm": 0.0, - "learning_rate": 2.4602347426556037e-06, - "loss": 0.7471, - "step": 27480 - }, - { - "epoch": 0.7787412508147015, - "grad_norm": 0.0, - "learning_rate": 2.4596318773939885e-06, - "loss": 0.7945, - "step": 27481 - }, - { - "epoch": 0.778769588256964, - "grad_norm": 0.0, - "learning_rate": 2.4590290756476933e-06, - "loss": 0.8744, - "step": 27482 - }, - { - "epoch": 0.7787979256992263, - "grad_norm": 0.0, - "learning_rate": 2.4584263374217963e-06, - "loss": 0.8316, - "step": 27483 - }, - { - "epoch": 0.7788262631414888, - "grad_norm": 0.0, - "learning_rate": 2.457823662721379e-06, - "loss": 0.9414, - "step": 27484 - }, - { - "epoch": 0.7788546005837513, - "grad_norm": 0.0, - "learning_rate": 2.4572210515515093e-06, - "loss": 0.8941, - "step": 27485 - }, - { - "epoch": 0.7788829380260138, - "grad_norm": 0.0, - "learning_rate": 2.4566185039172687e-06, - "loss": 0.8203, - "step": 27486 - }, - { - "epoch": 0.7789112754682762, - "grad_norm": 0.0, - "learning_rate": 2.4560160198237337e-06, - "loss": 0.8393, - "step": 27487 - }, - { - "epoch": 0.7789396129105387, - "grad_norm": 0.0, - "learning_rate": 2.4554135992759753e-06, - "loss": 0.7803, - "step": 27488 - }, - { - "epoch": 0.7789679503528012, - "grad_norm": 0.0, - "learning_rate": 2.4548112422790695e-06, - "loss": 0.8033, - "step": 27489 - }, - { - "epoch": 0.7789962877950636, - "grad_norm": 0.0, - "learning_rate": 2.4542089488380925e-06, - "loss": 0.8664, - "step": 27490 - }, - { - "epoch": 0.7790246252373261, - "grad_norm": 0.0, - "learning_rate": 2.453606718958114e-06, - "loss": 0.7804, - "step": 27491 - }, - { - "epoch": 0.7790529626795886, - "grad_norm": 0.0, - "learning_rate": 2.4530045526442105e-06, - "loss": 0.89, - "step": 27492 - }, - { - "epoch": 0.779081300121851, - "grad_norm": 0.0, - "learning_rate": 2.4524024499014488e-06, - "loss": 0.8454, - "step": 27493 - }, - { - "epoch": 0.7791096375641134, - "grad_norm": 0.0, - "learning_rate": 2.451800410734905e-06, - "loss": 0.8537, - "step": 27494 - }, - { - "epoch": 0.7791379750063759, - "grad_norm": 0.0, - "learning_rate": 2.4511984351496477e-06, - "loss": 0.8444, - "step": 27495 - }, - { - "epoch": 0.7791663124486384, - "grad_norm": 0.0, - "learning_rate": 2.45059652315075e-06, - "loss": 0.7712, - "step": 27496 - }, - { - "epoch": 0.7791946498909008, - "grad_norm": 0.0, - "learning_rate": 2.4499946747432814e-06, - "loss": 0.7674, - "step": 27497 - }, - { - "epoch": 0.7792229873331633, - "grad_norm": 0.0, - "learning_rate": 2.449392889932315e-06, - "loss": 0.8188, - "step": 27498 - }, - { - "epoch": 0.7792513247754258, - "grad_norm": 0.0, - "learning_rate": 2.4487911687229116e-06, - "loss": 0.8887, - "step": 27499 - }, - { - "epoch": 0.7792796622176882, - "grad_norm": 0.0, - "learning_rate": 2.448189511120148e-06, - "loss": 0.765, - "step": 27500 - }, - { - "epoch": 0.7793079996599507, - "grad_norm": 0.0, - "learning_rate": 2.4475879171290863e-06, - "loss": 0.8183, - "step": 27501 - }, - { - "epoch": 0.7793363371022132, - "grad_norm": 0.0, - "learning_rate": 2.446986386754795e-06, - "loss": 0.8897, - "step": 27502 - }, - { - "epoch": 0.7793646745444757, - "grad_norm": 0.0, - "learning_rate": 2.446384920002345e-06, - "loss": 0.9255, - "step": 27503 - }, - { - "epoch": 0.779393011986738, - "grad_norm": 0.0, - "learning_rate": 2.4457835168767975e-06, - "loss": 0.6553, - "step": 27504 - }, - { - "epoch": 0.7794213494290005, - "grad_norm": 0.0, - "learning_rate": 2.4451821773832197e-06, - "loss": 0.9094, - "step": 27505 - }, - { - "epoch": 0.779449686871263, - "grad_norm": 0.0, - "learning_rate": 2.444580901526682e-06, - "loss": 0.8049, - "step": 27506 - }, - { - "epoch": 0.7794780243135254, - "grad_norm": 0.0, - "learning_rate": 2.4439796893122415e-06, - "loss": 0.717, - "step": 27507 - }, - { - "epoch": 0.7795063617557879, - "grad_norm": 0.0, - "learning_rate": 2.443378540744965e-06, - "loss": 0.7226, - "step": 27508 - }, - { - "epoch": 0.7795346991980504, - "grad_norm": 0.0, - "learning_rate": 2.442777455829919e-06, - "loss": 0.8981, - "step": 27509 - }, - { - "epoch": 0.7795630366403129, - "grad_norm": 0.0, - "learning_rate": 2.442176434572163e-06, - "loss": 0.8265, - "step": 27510 - }, - { - "epoch": 0.7795913740825753, - "grad_norm": 0.0, - "learning_rate": 2.4415754769767654e-06, - "loss": 0.8063, - "step": 27511 - }, - { - "epoch": 0.7796197115248378, - "grad_norm": 0.0, - "learning_rate": 2.440974583048781e-06, - "loss": 0.7294, - "step": 27512 - }, - { - "epoch": 0.7796480489671003, - "grad_norm": 0.0, - "learning_rate": 2.440373752793278e-06, - "loss": 0.692, - "step": 27513 - }, - { - "epoch": 0.7796763864093627, - "grad_norm": 0.0, - "learning_rate": 2.4397729862153107e-06, - "loss": 0.7637, - "step": 27514 - }, - { - "epoch": 0.7797047238516251, - "grad_norm": 0.0, - "learning_rate": 2.4391722833199436e-06, - "loss": 0.796, - "step": 27515 - }, - { - "epoch": 0.7797330612938876, - "grad_norm": 0.0, - "learning_rate": 2.4385716441122385e-06, - "loss": 0.8748, - "step": 27516 - }, - { - "epoch": 0.77976139873615, - "grad_norm": 0.0, - "learning_rate": 2.437971068597249e-06, - "loss": 0.7331, - "step": 27517 - }, - { - "epoch": 0.7797897361784125, - "grad_norm": 0.0, - "learning_rate": 2.437370556780038e-06, - "loss": 0.7761, - "step": 27518 - }, - { - "epoch": 0.779818073620675, - "grad_norm": 0.0, - "learning_rate": 2.4367701086656625e-06, - "loss": 0.8519, - "step": 27519 - }, - { - "epoch": 0.7798464110629375, - "grad_norm": 0.0, - "learning_rate": 2.4361697242591844e-06, - "loss": 0.8687, - "step": 27520 - }, - { - "epoch": 0.7798747485051999, - "grad_norm": 0.0, - "learning_rate": 2.435569403565654e-06, - "loss": 0.8615, - "step": 27521 - }, - { - "epoch": 0.7799030859474624, - "grad_norm": 0.0, - "learning_rate": 2.4349691465901313e-06, - "loss": 0.8986, - "step": 27522 - }, - { - "epoch": 0.7799314233897249, - "grad_norm": 0.0, - "learning_rate": 2.4343689533376734e-06, - "loss": 0.69, - "step": 27523 - }, - { - "epoch": 0.7799597608319873, - "grad_norm": 0.0, - "learning_rate": 2.433768823813337e-06, - "loss": 0.8712, - "step": 27524 - }, - { - "epoch": 0.7799880982742498, - "grad_norm": 0.0, - "learning_rate": 2.4331687580221743e-06, - "loss": 0.7674, - "step": 27525 - }, - { - "epoch": 0.7800164357165122, - "grad_norm": 0.0, - "learning_rate": 2.43256875596924e-06, - "loss": 0.7971, - "step": 27526 - }, - { - "epoch": 0.7800447731587747, - "grad_norm": 0.0, - "learning_rate": 2.4319688176595922e-06, - "loss": 0.6126, - "step": 27527 - }, - { - "epoch": 0.7800731106010371, - "grad_norm": 0.0, - "learning_rate": 2.4313689430982777e-06, - "loss": 0.7747, - "step": 27528 - }, - { - "epoch": 0.7801014480432996, - "grad_norm": 0.0, - "learning_rate": 2.430769132290357e-06, - "loss": 0.8734, - "step": 27529 - }, - { - "epoch": 0.7801297854855621, - "grad_norm": 0.0, - "learning_rate": 2.4301693852408757e-06, - "loss": 0.8374, - "step": 27530 - }, - { - "epoch": 0.7801581229278245, - "grad_norm": 0.0, - "learning_rate": 2.429569701954889e-06, - "loss": 0.7324, - "step": 27531 - }, - { - "epoch": 0.780186460370087, - "grad_norm": 0.0, - "learning_rate": 2.4289700824374473e-06, - "loss": 0.7983, - "step": 27532 - }, - { - "epoch": 0.7802147978123495, - "grad_norm": 0.0, - "learning_rate": 2.4283705266936018e-06, - "loss": 0.8305, - "step": 27533 - }, - { - "epoch": 0.7802431352546119, - "grad_norm": 0.0, - "learning_rate": 2.4277710347284035e-06, - "loss": 0.7948, - "step": 27534 - }, - { - "epoch": 0.7802714726968744, - "grad_norm": 0.0, - "learning_rate": 2.427171606546904e-06, - "loss": 0.8138, - "step": 27535 - }, - { - "epoch": 0.7802998101391369, - "grad_norm": 0.0, - "learning_rate": 2.4265722421541482e-06, - "loss": 0.7781, - "step": 27536 - }, - { - "epoch": 0.7803281475813993, - "grad_norm": 0.0, - "learning_rate": 2.425972941555189e-06, - "loss": 0.7348, - "step": 27537 - }, - { - "epoch": 0.7803564850236617, - "grad_norm": 0.0, - "learning_rate": 2.4253737047550697e-06, - "loss": 0.8481, - "step": 27538 - }, - { - "epoch": 0.7803848224659242, - "grad_norm": 0.0, - "learning_rate": 2.42477453175884e-06, - "loss": 0.7261, - "step": 27539 - }, - { - "epoch": 0.7804131599081867, - "grad_norm": 0.0, - "learning_rate": 2.4241754225715507e-06, - "loss": 0.807, - "step": 27540 - }, - { - "epoch": 0.7804414973504491, - "grad_norm": 0.0, - "learning_rate": 2.4235763771982414e-06, - "loss": 0.8019, - "step": 27541 - }, - { - "epoch": 0.7804698347927116, - "grad_norm": 0.0, - "learning_rate": 2.422977395643965e-06, - "loss": 0.8888, - "step": 27542 - }, - { - "epoch": 0.7804981722349741, - "grad_norm": 0.0, - "learning_rate": 2.4223784779137614e-06, - "loss": 0.7316, - "step": 27543 - }, - { - "epoch": 0.7805265096772366, - "grad_norm": 0.0, - "learning_rate": 2.4217796240126767e-06, - "loss": 0.7155, - "step": 27544 - }, - { - "epoch": 0.780554847119499, - "grad_norm": 0.0, - "learning_rate": 2.4211808339457576e-06, - "loss": 0.8316, - "step": 27545 - }, - { - "epoch": 0.7805831845617615, - "grad_norm": 0.0, - "learning_rate": 2.4205821077180457e-06, - "loss": 0.984, - "step": 27546 - }, - { - "epoch": 0.780611522004024, - "grad_norm": 0.0, - "learning_rate": 2.419983445334586e-06, - "loss": 0.9177, - "step": 27547 - }, - { - "epoch": 0.7806398594462863, - "grad_norm": 0.0, - "learning_rate": 2.4193848468004224e-06, - "loss": 0.8052, - "step": 27548 - }, - { - "epoch": 0.7806681968885488, - "grad_norm": 0.0, - "learning_rate": 2.4187863121205933e-06, - "loss": 0.741, - "step": 27549 - }, - { - "epoch": 0.7806965343308113, - "grad_norm": 0.0, - "learning_rate": 2.4181878413001447e-06, - "loss": 0.768, - "step": 27550 - }, - { - "epoch": 0.7807248717730738, - "grad_norm": 0.0, - "learning_rate": 2.417589434344112e-06, - "loss": 0.9, - "step": 27551 - }, - { - "epoch": 0.7807532092153362, - "grad_norm": 0.0, - "learning_rate": 2.4169910912575402e-06, - "loss": 0.8936, - "step": 27552 - }, - { - "epoch": 0.7807815466575987, - "grad_norm": 0.0, - "learning_rate": 2.4163928120454705e-06, - "loss": 0.8428, - "step": 27553 - }, - { - "epoch": 0.7808098840998612, - "grad_norm": 0.0, - "learning_rate": 2.415794596712937e-06, - "loss": 0.9125, - "step": 27554 - }, - { - "epoch": 0.7808382215421236, - "grad_norm": 0.0, - "learning_rate": 2.4151964452649823e-06, - "loss": 0.7683, - "step": 27555 - }, - { - "epoch": 0.7808665589843861, - "grad_norm": 0.0, - "learning_rate": 2.414598357706648e-06, - "loss": 0.8478, - "step": 27556 - }, - { - "epoch": 0.7808948964266486, - "grad_norm": 0.0, - "learning_rate": 2.414000334042965e-06, - "loss": 0.758, - "step": 27557 - }, - { - "epoch": 0.7809232338689109, - "grad_norm": 0.0, - "learning_rate": 2.4134023742789745e-06, - "loss": 0.8331, - "step": 27558 - }, - { - "epoch": 0.7809515713111734, - "grad_norm": 0.0, - "learning_rate": 2.4128044784197124e-06, - "loss": 0.836, - "step": 27559 - }, - { - "epoch": 0.7809799087534359, - "grad_norm": 0.0, - "learning_rate": 2.4122066464702165e-06, - "loss": 0.7946, - "step": 27560 - }, - { - "epoch": 0.7810082461956984, - "grad_norm": 0.0, - "learning_rate": 2.411608878435524e-06, - "loss": 0.8046, - "step": 27561 - }, - { - "epoch": 0.7810365836379608, - "grad_norm": 0.0, - "learning_rate": 2.4110111743206655e-06, - "loss": 0.8582, - "step": 27562 - }, - { - "epoch": 0.7810649210802233, - "grad_norm": 0.0, - "learning_rate": 2.4104135341306777e-06, - "loss": 0.8674, - "step": 27563 - }, - { - "epoch": 0.7810932585224858, - "grad_norm": 0.0, - "learning_rate": 2.409815957870597e-06, - "loss": 0.7782, - "step": 27564 - }, - { - "epoch": 0.7811215959647482, - "grad_norm": 0.0, - "learning_rate": 2.4092184455454537e-06, - "loss": 0.8782, - "step": 27565 - }, - { - "epoch": 0.7811499334070107, - "grad_norm": 0.0, - "learning_rate": 2.4086209971602836e-06, - "loss": 0.8141, - "step": 27566 - }, - { - "epoch": 0.7811782708492732, - "grad_norm": 0.0, - "learning_rate": 2.4080236127201162e-06, - "loss": 0.8442, - "step": 27567 - }, - { - "epoch": 0.7812066082915357, - "grad_norm": 0.0, - "learning_rate": 2.407426292229984e-06, - "loss": 0.7345, - "step": 27568 - }, - { - "epoch": 0.781234945733798, - "grad_norm": 0.0, - "learning_rate": 2.406829035694923e-06, - "loss": 0.8305, - "step": 27569 - }, - { - "epoch": 0.7812632831760605, - "grad_norm": 0.0, - "learning_rate": 2.4062318431199584e-06, - "loss": 0.7723, - "step": 27570 - }, - { - "epoch": 0.781291620618323, - "grad_norm": 0.0, - "learning_rate": 2.405634714510122e-06, - "loss": 0.8622, - "step": 27571 - }, - { - "epoch": 0.7813199580605854, - "grad_norm": 0.0, - "learning_rate": 2.405037649870444e-06, - "loss": 0.8659, - "step": 27572 - }, - { - "epoch": 0.7813482955028479, - "grad_norm": 0.0, - "learning_rate": 2.404440649205956e-06, - "loss": 0.8, - "step": 27573 - }, - { - "epoch": 0.7813766329451104, - "grad_norm": 0.0, - "learning_rate": 2.4038437125216862e-06, - "loss": 0.8236, - "step": 27574 - }, - { - "epoch": 0.7814049703873729, - "grad_norm": 0.0, - "learning_rate": 2.4032468398226595e-06, - "loss": 0.7759, - "step": 27575 - }, - { - "epoch": 0.7814333078296353, - "grad_norm": 0.0, - "learning_rate": 2.4026500311139056e-06, - "loss": 0.8407, - "step": 27576 - }, - { - "epoch": 0.7814616452718978, - "grad_norm": 0.0, - "learning_rate": 2.4020532864004543e-06, - "loss": 0.7338, - "step": 27577 - }, - { - "epoch": 0.7814899827141603, - "grad_norm": 0.0, - "learning_rate": 2.401456605687327e-06, - "loss": 0.7724, - "step": 27578 - }, - { - "epoch": 0.7815183201564226, - "grad_norm": 0.0, - "learning_rate": 2.400859988979555e-06, - "loss": 0.7957, - "step": 27579 - }, - { - "epoch": 0.7815466575986851, - "grad_norm": 0.0, - "learning_rate": 2.4002634362821573e-06, - "loss": 0.7863, - "step": 27580 - }, - { - "epoch": 0.7815749950409476, - "grad_norm": 0.0, - "learning_rate": 2.3996669476001645e-06, - "loss": 0.777, - "step": 27581 - }, - { - "epoch": 0.78160333248321, - "grad_norm": 0.0, - "learning_rate": 2.3990705229386015e-06, - "loss": 0.9422, - "step": 27582 - }, - { - "epoch": 0.7816316699254725, - "grad_norm": 0.0, - "learning_rate": 2.398474162302488e-06, - "loss": 0.753, - "step": 27583 - }, - { - "epoch": 0.781660007367735, - "grad_norm": 0.0, - "learning_rate": 2.3978778656968472e-06, - "loss": 0.8007, - "step": 27584 - }, - { - "epoch": 0.7816883448099975, - "grad_norm": 0.0, - "learning_rate": 2.3972816331267056e-06, - "loss": 0.8011, - "step": 27585 - }, - { - "epoch": 0.7817166822522599, - "grad_norm": 0.0, - "learning_rate": 2.3966854645970838e-06, - "loss": 0.7269, - "step": 27586 - }, - { - "epoch": 0.7817450196945224, - "grad_norm": 0.0, - "learning_rate": 2.3960893601130075e-06, - "loss": 0.8297, - "step": 27587 - }, - { - "epoch": 0.7817733571367849, - "grad_norm": 0.0, - "learning_rate": 2.3954933196794905e-06, - "loss": 0.8787, - "step": 27588 - }, - { - "epoch": 0.7818016945790472, - "grad_norm": 0.0, - "learning_rate": 2.3948973433015564e-06, - "loss": 0.9092, - "step": 27589 - }, - { - "epoch": 0.7818300320213097, - "grad_norm": 0.0, - "learning_rate": 2.3943014309842294e-06, - "loss": 0.8394, - "step": 27590 - }, - { - "epoch": 0.7818583694635722, - "grad_norm": 0.0, - "learning_rate": 2.393705582732524e-06, - "loss": 0.823, - "step": 27591 - }, - { - "epoch": 0.7818867069058347, - "grad_norm": 0.0, - "learning_rate": 2.3931097985514627e-06, - "loss": 0.9499, - "step": 27592 - }, - { - "epoch": 0.7819150443480971, - "grad_norm": 0.0, - "learning_rate": 2.39251407844606e-06, - "loss": 0.8085, - "step": 27593 - }, - { - "epoch": 0.7819433817903596, - "grad_norm": 0.0, - "learning_rate": 2.3919184224213354e-06, - "loss": 0.7068, - "step": 27594 - }, - { - "epoch": 0.7819717192326221, - "grad_norm": 0.0, - "learning_rate": 2.391322830482311e-06, - "loss": 0.8722, - "step": 27595 - }, - { - "epoch": 0.7820000566748845, - "grad_norm": 0.0, - "learning_rate": 2.390727302633996e-06, - "loss": 0.7644, - "step": 27596 - }, - { - "epoch": 0.782028394117147, - "grad_norm": 0.0, - "learning_rate": 2.390131838881411e-06, - "loss": 0.8512, - "step": 27597 - }, - { - "epoch": 0.7820567315594095, - "grad_norm": 0.0, - "learning_rate": 2.3895364392295717e-06, - "loss": 0.7827, - "step": 27598 - }, - { - "epoch": 0.782085069001672, - "grad_norm": 0.0, - "learning_rate": 2.388941103683493e-06, - "loss": 0.6978, - "step": 27599 - }, - { - "epoch": 0.7821134064439343, - "grad_norm": 0.0, - "learning_rate": 2.3883458322481924e-06, - "loss": 0.7328, - "step": 27600 - }, - { - "epoch": 0.7821417438861968, - "grad_norm": 0.0, - "learning_rate": 2.3877506249286787e-06, - "loss": 0.7743, - "step": 27601 - }, - { - "epoch": 0.7821700813284593, - "grad_norm": 0.0, - "learning_rate": 2.387155481729967e-06, - "loss": 0.8029, - "step": 27602 - }, - { - "epoch": 0.7821984187707217, - "grad_norm": 0.0, - "learning_rate": 2.3865604026570755e-06, - "loss": 0.7813, - "step": 27603 - }, - { - "epoch": 0.7822267562129842, - "grad_norm": 0.0, - "learning_rate": 2.38596538771501e-06, - "loss": 0.7388, - "step": 27604 - }, - { - "epoch": 0.7822550936552467, - "grad_norm": 0.0, - "learning_rate": 2.385370436908786e-06, - "loss": 0.8267, - "step": 27605 - }, - { - "epoch": 0.7822834310975091, - "grad_norm": 0.0, - "learning_rate": 2.3847755502434166e-06, - "loss": 0.7637, - "step": 27606 - }, - { - "epoch": 0.7823117685397716, - "grad_norm": 0.0, - "learning_rate": 2.3841807277239083e-06, - "loss": 0.8279, - "step": 27607 - }, - { - "epoch": 0.7823401059820341, - "grad_norm": 0.0, - "learning_rate": 2.3835859693552754e-06, - "loss": 0.7292, - "step": 27608 - }, - { - "epoch": 0.7823684434242966, - "grad_norm": 0.0, - "learning_rate": 2.3829912751425244e-06, - "loss": 0.8709, - "step": 27609 - }, - { - "epoch": 0.782396780866559, - "grad_norm": 0.0, - "learning_rate": 2.3823966450906667e-06, - "loss": 0.8214, - "step": 27610 - }, - { - "epoch": 0.7824251183088214, - "grad_norm": 0.0, - "learning_rate": 2.381802079204709e-06, - "loss": 0.8642, - "step": 27611 - }, - { - "epoch": 0.7824534557510839, - "grad_norm": 0.0, - "learning_rate": 2.381207577489664e-06, - "loss": 0.8639, - "step": 27612 - }, - { - "epoch": 0.7824817931933463, - "grad_norm": 0.0, - "learning_rate": 2.380613139950535e-06, - "loss": 0.7994, - "step": 27613 - }, - { - "epoch": 0.7825101306356088, - "grad_norm": 0.0, - "learning_rate": 2.3800187665923337e-06, - "loss": 0.809, - "step": 27614 - }, - { - "epoch": 0.7825384680778713, - "grad_norm": 0.0, - "learning_rate": 2.3794244574200607e-06, - "loss": 0.7676, - "step": 27615 - }, - { - "epoch": 0.7825668055201338, - "grad_norm": 0.0, - "learning_rate": 2.3788302124387295e-06, - "loss": 0.7754, - "step": 27616 - }, - { - "epoch": 0.7825951429623962, - "grad_norm": 0.0, - "learning_rate": 2.378236031653338e-06, - "loss": 0.7556, - "step": 27617 - }, - { - "epoch": 0.7826234804046587, - "grad_norm": 0.0, - "learning_rate": 2.3776419150688947e-06, - "loss": 0.8349, - "step": 27618 - }, - { - "epoch": 0.7826518178469212, - "grad_norm": 0.0, - "learning_rate": 2.377047862690407e-06, - "loss": 0.7538, - "step": 27619 - }, - { - "epoch": 0.7826801552891836, - "grad_norm": 0.0, - "learning_rate": 2.376453874522873e-06, - "loss": 0.7768, - "step": 27620 - }, - { - "epoch": 0.782708492731446, - "grad_norm": 0.0, - "learning_rate": 2.375859950571302e-06, - "loss": 0.8001, - "step": 27621 - }, - { - "epoch": 0.7827368301737085, - "grad_norm": 0.0, - "learning_rate": 2.37526609084069e-06, - "loss": 0.8628, - "step": 27622 - }, - { - "epoch": 0.782765167615971, - "grad_norm": 0.0, - "learning_rate": 2.3746722953360437e-06, - "loss": 0.7625, - "step": 27623 - }, - { - "epoch": 0.7827935050582334, - "grad_norm": 0.0, - "learning_rate": 2.374078564062364e-06, - "loss": 0.8074, - "step": 27624 - }, - { - "epoch": 0.7828218425004959, - "grad_norm": 0.0, - "learning_rate": 2.3734848970246537e-06, - "loss": 0.8839, - "step": 27625 - }, - { - "epoch": 0.7828501799427584, - "grad_norm": 0.0, - "learning_rate": 2.3728912942279105e-06, - "loss": 0.8219, - "step": 27626 - }, - { - "epoch": 0.7828785173850208, - "grad_norm": 0.0, - "learning_rate": 2.37229775567714e-06, - "loss": 0.8412, - "step": 27627 - }, - { - "epoch": 0.7829068548272833, - "grad_norm": 0.0, - "learning_rate": 2.371704281377335e-06, - "loss": 0.7377, - "step": 27628 - }, - { - "epoch": 0.7829351922695458, - "grad_norm": 0.0, - "learning_rate": 2.3711108713334995e-06, - "loss": 0.8034, - "step": 27629 - }, - { - "epoch": 0.7829635297118082, - "grad_norm": 0.0, - "learning_rate": 2.3705175255506285e-06, - "loss": 0.8694, - "step": 27630 - }, - { - "epoch": 0.7829918671540707, - "grad_norm": 0.0, - "learning_rate": 2.369924244033721e-06, - "loss": 0.8463, - "step": 27631 - }, - { - "epoch": 0.7830202045963331, - "grad_norm": 0.0, - "learning_rate": 2.369331026787778e-06, - "loss": 0.6942, - "step": 27632 - }, - { - "epoch": 0.7830485420385956, - "grad_norm": 0.0, - "learning_rate": 2.368737873817789e-06, - "loss": 0.8897, - "step": 27633 - }, - { - "epoch": 0.783076879480858, - "grad_norm": 0.0, - "learning_rate": 2.3681447851287566e-06, - "loss": 0.8447, - "step": 27634 - }, - { - "epoch": 0.7831052169231205, - "grad_norm": 0.0, - "learning_rate": 2.367551760725677e-06, - "loss": 0.8709, - "step": 27635 - }, - { - "epoch": 0.783133554365383, - "grad_norm": 0.0, - "learning_rate": 2.3669588006135403e-06, - "loss": 0.8883, - "step": 27636 - }, - { - "epoch": 0.7831618918076454, - "grad_norm": 0.0, - "learning_rate": 2.366365904797343e-06, - "loss": 0.7403, - "step": 27637 - }, - { - "epoch": 0.7831902292499079, - "grad_norm": 0.0, - "learning_rate": 2.365773073282082e-06, - "loss": 0.8279, - "step": 27638 - }, - { - "epoch": 0.7832185666921704, - "grad_norm": 0.0, - "learning_rate": 2.3651803060727484e-06, - "loss": 0.7922, - "step": 27639 - }, - { - "epoch": 0.7832469041344329, - "grad_norm": 0.0, - "learning_rate": 2.3645876031743387e-06, - "loss": 0.7306, - "step": 27640 - }, - { - "epoch": 0.7832752415766953, - "grad_norm": 0.0, - "learning_rate": 2.3639949645918415e-06, - "loss": 0.8559, - "step": 27641 - }, - { - "epoch": 0.7833035790189578, - "grad_norm": 0.0, - "learning_rate": 2.3634023903302485e-06, - "loss": 0.7919, - "step": 27642 - }, - { - "epoch": 0.7833319164612202, - "grad_norm": 0.0, - "learning_rate": 2.3628098803945576e-06, - "loss": 0.7988, - "step": 27643 - }, - { - "epoch": 0.7833602539034826, - "grad_norm": 0.0, - "learning_rate": 2.362217434789751e-06, - "loss": 0.7939, - "step": 27644 - }, - { - "epoch": 0.7833885913457451, - "grad_norm": 0.0, - "learning_rate": 2.3616250535208263e-06, - "loss": 0.729, - "step": 27645 - }, - { - "epoch": 0.7834169287880076, - "grad_norm": 0.0, - "learning_rate": 2.3610327365927677e-06, - "loss": 0.7434, - "step": 27646 - }, - { - "epoch": 0.7834452662302701, - "grad_norm": 0.0, - "learning_rate": 2.3604404840105665e-06, - "loss": 0.7939, - "step": 27647 - }, - { - "epoch": 0.7834736036725325, - "grad_norm": 0.0, - "learning_rate": 2.3598482957792146e-06, - "loss": 0.8377, - "step": 27648 - }, - { - "epoch": 0.783501941114795, - "grad_norm": 0.0, - "learning_rate": 2.3592561719036956e-06, - "loss": 0.8018, - "step": 27649 - }, - { - "epoch": 0.7835302785570575, - "grad_norm": 0.0, - "learning_rate": 2.3586641123889984e-06, - "loss": 0.8894, - "step": 27650 - }, - { - "epoch": 0.7835586159993199, - "grad_norm": 0.0, - "learning_rate": 2.358072117240112e-06, - "loss": 0.8306, - "step": 27651 - }, - { - "epoch": 0.7835869534415824, - "grad_norm": 0.0, - "learning_rate": 2.357480186462021e-06, - "loss": 0.705, - "step": 27652 - }, - { - "epoch": 0.7836152908838449, - "grad_norm": 0.0, - "learning_rate": 2.356888320059716e-06, - "loss": 0.8229, - "step": 27653 - }, - { - "epoch": 0.7836436283261072, - "grad_norm": 0.0, - "learning_rate": 2.3562965180381746e-06, - "loss": 0.838, - "step": 27654 - }, - { - "epoch": 0.7836719657683697, - "grad_norm": 0.0, - "learning_rate": 2.355704780402387e-06, - "loss": 0.7015, - "step": 27655 - }, - { - "epoch": 0.7837003032106322, - "grad_norm": 0.0, - "learning_rate": 2.3551131071573397e-06, - "loss": 0.7659, - "step": 27656 - }, - { - "epoch": 0.7837286406528947, - "grad_norm": 0.0, - "learning_rate": 2.354521498308011e-06, - "loss": 0.8265, - "step": 27657 - }, - { - "epoch": 0.7837569780951571, - "grad_norm": 0.0, - "learning_rate": 2.3539299538593883e-06, - "loss": 0.783, - "step": 27658 - }, - { - "epoch": 0.7837853155374196, - "grad_norm": 0.0, - "learning_rate": 2.353338473816451e-06, - "loss": 0.7982, - "step": 27659 - }, - { - "epoch": 0.7838136529796821, - "grad_norm": 0.0, - "learning_rate": 2.3527470581841837e-06, - "loss": 0.9108, - "step": 27660 - }, - { - "epoch": 0.7838419904219445, - "grad_norm": 0.0, - "learning_rate": 2.3521557069675693e-06, - "loss": 0.9449, - "step": 27661 - }, - { - "epoch": 0.783870327864207, - "grad_norm": 0.0, - "learning_rate": 2.3515644201715858e-06, - "loss": 0.8467, - "step": 27662 - }, - { - "epoch": 0.7838986653064695, - "grad_norm": 0.0, - "learning_rate": 2.350973197801214e-06, - "loss": 0.777, - "step": 27663 - }, - { - "epoch": 0.783927002748732, - "grad_norm": 0.0, - "learning_rate": 2.3503820398614365e-06, - "loss": 0.7768, - "step": 27664 - }, - { - "epoch": 0.7839553401909943, - "grad_norm": 0.0, - "learning_rate": 2.349790946357231e-06, - "loss": 0.8707, - "step": 27665 - }, - { - "epoch": 0.7839836776332568, - "grad_norm": 0.0, - "learning_rate": 2.3491999172935807e-06, - "loss": 0.7836, - "step": 27666 - }, - { - "epoch": 0.7840120150755193, - "grad_norm": 0.0, - "learning_rate": 2.3486089526754572e-06, - "loss": 0.7652, - "step": 27667 - }, - { - "epoch": 0.7840403525177817, - "grad_norm": 0.0, - "learning_rate": 2.348018052507842e-06, - "loss": 0.8693, - "step": 27668 - }, - { - "epoch": 0.7840686899600442, - "grad_norm": 0.0, - "learning_rate": 2.3474272167957144e-06, - "loss": 0.8177, - "step": 27669 - }, - { - "epoch": 0.7840970274023067, - "grad_norm": 0.0, - "learning_rate": 2.3468364455440472e-06, - "loss": 0.7978, - "step": 27670 - }, - { - "epoch": 0.7841253648445692, - "grad_norm": 0.0, - "learning_rate": 2.3462457387578185e-06, - "loss": 0.8986, - "step": 27671 - }, - { - "epoch": 0.7841537022868316, - "grad_norm": 0.0, - "learning_rate": 2.3456550964420068e-06, - "loss": 0.8312, - "step": 27672 - }, - { - "epoch": 0.7841820397290941, - "grad_norm": 0.0, - "learning_rate": 2.3450645186015806e-06, - "loss": 0.8106, - "step": 27673 - }, - { - "epoch": 0.7842103771713566, - "grad_norm": 0.0, - "learning_rate": 2.3444740052415228e-06, - "loss": 0.768, - "step": 27674 - }, - { - "epoch": 0.7842387146136189, - "grad_norm": 0.0, - "learning_rate": 2.3438835563667993e-06, - "loss": 0.7443, - "step": 27675 - }, - { - "epoch": 0.7842670520558814, - "grad_norm": 0.0, - "learning_rate": 2.343293171982388e-06, - "loss": 0.6809, - "step": 27676 - }, - { - "epoch": 0.7842953894981439, - "grad_norm": 0.0, - "learning_rate": 2.3427028520932628e-06, - "loss": 0.8743, - "step": 27677 - }, - { - "epoch": 0.7843237269404063, - "grad_norm": 0.0, - "learning_rate": 2.3421125967043934e-06, - "loss": 0.8554, - "step": 27678 - }, - { - "epoch": 0.7843520643826688, - "grad_norm": 0.0, - "learning_rate": 2.3415224058207565e-06, - "loss": 0.806, - "step": 27679 - }, - { - "epoch": 0.7843804018249313, - "grad_norm": 0.0, - "learning_rate": 2.340932279447318e-06, - "loss": 0.7967, - "step": 27680 - }, - { - "epoch": 0.7844087392671938, - "grad_norm": 0.0, - "learning_rate": 2.3403422175890513e-06, - "loss": 0.8356, - "step": 27681 - }, - { - "epoch": 0.7844370767094562, - "grad_norm": 0.0, - "learning_rate": 2.3397522202509284e-06, - "loss": 0.8753, - "step": 27682 - }, - { - "epoch": 0.7844654141517187, - "grad_norm": 0.0, - "learning_rate": 2.339162287437914e-06, - "loss": 0.8019, - "step": 27683 - }, - { - "epoch": 0.7844937515939812, - "grad_norm": 0.0, - "learning_rate": 2.3385724191549807e-06, - "loss": 0.8907, - "step": 27684 - }, - { - "epoch": 0.7845220890362435, - "grad_norm": 0.0, - "learning_rate": 2.3379826154071006e-06, - "loss": 0.787, - "step": 27685 - }, - { - "epoch": 0.784550426478506, - "grad_norm": 0.0, - "learning_rate": 2.3373928761992347e-06, - "loss": 0.8446, - "step": 27686 - }, - { - "epoch": 0.7845787639207685, - "grad_norm": 0.0, - "learning_rate": 2.3368032015363564e-06, - "loss": 0.736, - "step": 27687 - }, - { - "epoch": 0.784607101363031, - "grad_norm": 0.0, - "learning_rate": 2.3362135914234284e-06, - "loss": 0.7096, - "step": 27688 - }, - { - "epoch": 0.7846354388052934, - "grad_norm": 0.0, - "learning_rate": 2.335624045865419e-06, - "loss": 0.8513, - "step": 27689 - }, - { - "epoch": 0.7846637762475559, - "grad_norm": 0.0, - "learning_rate": 2.3350345648672945e-06, - "loss": 0.8098, - "step": 27690 - }, - { - "epoch": 0.7846921136898184, - "grad_norm": 0.0, - "learning_rate": 2.334445148434019e-06, - "loss": 0.7664, - "step": 27691 - }, - { - "epoch": 0.7847204511320808, - "grad_norm": 0.0, - "learning_rate": 2.33385579657056e-06, - "loss": 0.8127, - "step": 27692 - }, - { - "epoch": 0.7847487885743433, - "grad_norm": 0.0, - "learning_rate": 2.333266509281883e-06, - "loss": 0.7906, - "step": 27693 - }, - { - "epoch": 0.7847771260166058, - "grad_norm": 0.0, - "learning_rate": 2.332677286572945e-06, - "loss": 0.8631, - "step": 27694 - }, - { - "epoch": 0.7848054634588683, - "grad_norm": 0.0, - "learning_rate": 2.332088128448717e-06, - "loss": 0.8105, - "step": 27695 - }, - { - "epoch": 0.7848338009011306, - "grad_norm": 0.0, - "learning_rate": 2.331499034914156e-06, - "loss": 0.7371, - "step": 27696 - }, - { - "epoch": 0.7848621383433931, - "grad_norm": 0.0, - "learning_rate": 2.330910005974226e-06, - "loss": 0.7923, - "step": 27697 - }, - { - "epoch": 0.7848904757856556, - "grad_norm": 0.0, - "learning_rate": 2.330321041633892e-06, - "loss": 0.7711, - "step": 27698 - }, - { - "epoch": 0.784918813227918, - "grad_norm": 0.0, - "learning_rate": 2.3297321418981077e-06, - "loss": 0.8709, - "step": 27699 - }, - { - "epoch": 0.7849471506701805, - "grad_norm": 0.0, - "learning_rate": 2.3291433067718385e-06, - "loss": 0.7805, - "step": 27700 - }, - { - "epoch": 0.784975488112443, - "grad_norm": 0.0, - "learning_rate": 2.328554536260047e-06, - "loss": 0.8282, - "step": 27701 - }, - { - "epoch": 0.7850038255547054, - "grad_norm": 0.0, - "learning_rate": 2.3279658303676868e-06, - "loss": 0.9275, - "step": 27702 - }, - { - "epoch": 0.7850321629969679, - "grad_norm": 0.0, - "learning_rate": 2.3273771890997187e-06, - "loss": 0.787, - "step": 27703 - }, - { - "epoch": 0.7850605004392304, - "grad_norm": 0.0, - "learning_rate": 2.3267886124611015e-06, - "loss": 0.6927, - "step": 27704 - }, - { - "epoch": 0.7850888378814929, - "grad_norm": 0.0, - "learning_rate": 2.3262001004567936e-06, - "loss": 0.8659, - "step": 27705 - }, - { - "epoch": 0.7851171753237552, - "grad_norm": 0.0, - "learning_rate": 2.325611653091755e-06, - "loss": 0.793, - "step": 27706 - }, - { - "epoch": 0.7851455127660177, - "grad_norm": 0.0, - "learning_rate": 2.3250232703709353e-06, - "loss": 0.7717, - "step": 27707 - }, - { - "epoch": 0.7851738502082802, - "grad_norm": 0.0, - "learning_rate": 2.324434952299298e-06, - "loss": 0.8262, - "step": 27708 - }, - { - "epoch": 0.7852021876505426, - "grad_norm": 0.0, - "learning_rate": 2.3238466988817934e-06, - "loss": 0.7922, - "step": 27709 - }, - { - "epoch": 0.7852305250928051, - "grad_norm": 0.0, - "learning_rate": 2.3232585101233775e-06, - "loss": 0.8392, - "step": 27710 - }, - { - "epoch": 0.7852588625350676, - "grad_norm": 0.0, - "learning_rate": 2.322670386029009e-06, - "loss": 0.822, - "step": 27711 - }, - { - "epoch": 0.7852871999773301, - "grad_norm": 0.0, - "learning_rate": 2.322082326603636e-06, - "loss": 0.6776, - "step": 27712 - }, - { - "epoch": 0.7853155374195925, - "grad_norm": 0.0, - "learning_rate": 2.3214943318522143e-06, - "loss": 0.7844, - "step": 27713 - }, - { - "epoch": 0.785343874861855, - "grad_norm": 0.0, - "learning_rate": 2.3209064017797014e-06, - "loss": 0.7267, - "step": 27714 - }, - { - "epoch": 0.7853722123041175, - "grad_norm": 0.0, - "learning_rate": 2.3203185363910408e-06, - "loss": 0.8209, - "step": 27715 - }, - { - "epoch": 0.7854005497463799, - "grad_norm": 0.0, - "learning_rate": 2.31973073569119e-06, - "loss": 0.8119, - "step": 27716 - }, - { - "epoch": 0.7854288871886423, - "grad_norm": 0.0, - "learning_rate": 2.319142999685099e-06, - "loss": 0.81, - "step": 27717 - }, - { - "epoch": 0.7854572246309048, - "grad_norm": 0.0, - "learning_rate": 2.3185553283777185e-06, - "loss": 0.7736, - "step": 27718 - }, - { - "epoch": 0.7854855620731672, - "grad_norm": 0.0, - "learning_rate": 2.3179677217740015e-06, - "loss": 0.7933, - "step": 27719 - }, - { - "epoch": 0.7855138995154297, - "grad_norm": 0.0, - "learning_rate": 2.317380179878893e-06, - "loss": 0.8244, - "step": 27720 - }, - { - "epoch": 0.7855422369576922, - "grad_norm": 0.0, - "learning_rate": 2.3167927026973436e-06, - "loss": 0.7208, - "step": 27721 - }, - { - "epoch": 0.7855705743999547, - "grad_norm": 0.0, - "learning_rate": 2.3162052902343044e-06, - "loss": 0.8596, - "step": 27722 - }, - { - "epoch": 0.7855989118422171, - "grad_norm": 0.0, - "learning_rate": 2.31561794249472e-06, - "loss": 0.8975, - "step": 27723 - }, - { - "epoch": 0.7856272492844796, - "grad_norm": 0.0, - "learning_rate": 2.3150306594835413e-06, - "loss": 0.7167, - "step": 27724 - }, - { - "epoch": 0.7856555867267421, - "grad_norm": 0.0, - "learning_rate": 2.3144434412057106e-06, - "loss": 0.8966, - "step": 27725 - }, - { - "epoch": 0.7856839241690045, - "grad_norm": 0.0, - "learning_rate": 2.3138562876661765e-06, - "loss": 0.7815, - "step": 27726 - }, - { - "epoch": 0.785712261611267, - "grad_norm": 0.0, - "learning_rate": 2.3132691988698874e-06, - "loss": 0.6661, - "step": 27727 - }, - { - "epoch": 0.7857405990535294, - "grad_norm": 0.0, - "learning_rate": 2.3126821748217844e-06, - "loss": 0.8576, - "step": 27728 - }, - { - "epoch": 0.7857689364957919, - "grad_norm": 0.0, - "learning_rate": 2.312095215526814e-06, - "loss": 0.7391, - "step": 27729 - }, - { - "epoch": 0.7857972739380543, - "grad_norm": 0.0, - "learning_rate": 2.3115083209899193e-06, - "loss": 0.7028, - "step": 27730 - }, - { - "epoch": 0.7858256113803168, - "grad_norm": 0.0, - "learning_rate": 2.3109214912160462e-06, - "loss": 0.7176, - "step": 27731 - }, - { - "epoch": 0.7858539488225793, - "grad_norm": 0.0, - "learning_rate": 2.3103347262101392e-06, - "loss": 0.7652, - "step": 27732 - }, - { - "epoch": 0.7858822862648417, - "grad_norm": 0.0, - "learning_rate": 2.3097480259771352e-06, - "loss": 0.7875, - "step": 27733 - }, - { - "epoch": 0.7859106237071042, - "grad_norm": 0.0, - "learning_rate": 2.309161390521979e-06, - "loss": 0.718, - "step": 27734 - }, - { - "epoch": 0.7859389611493667, - "grad_norm": 0.0, - "learning_rate": 2.308574819849616e-06, - "loss": 0.9418, - "step": 27735 - }, - { - "epoch": 0.7859672985916292, - "grad_norm": 0.0, - "learning_rate": 2.3079883139649806e-06, - "loss": 0.8501, - "step": 27736 - }, - { - "epoch": 0.7859956360338916, - "grad_norm": 0.0, - "learning_rate": 2.3074018728730175e-06, - "loss": 0.8933, - "step": 27737 - }, - { - "epoch": 0.786023973476154, - "grad_norm": 0.0, - "learning_rate": 2.3068154965786637e-06, - "loss": 0.8646, - "step": 27738 - }, - { - "epoch": 0.7860523109184165, - "grad_norm": 0.0, - "learning_rate": 2.306229185086859e-06, - "loss": 0.7315, - "step": 27739 - }, - { - "epoch": 0.7860806483606789, - "grad_norm": 0.0, - "learning_rate": 2.3056429384025424e-06, - "loss": 0.8324, - "step": 27740 - }, - { - "epoch": 0.7861089858029414, - "grad_norm": 0.0, - "learning_rate": 2.305056756530657e-06, - "loss": 0.8242, - "step": 27741 - }, - { - "epoch": 0.7861373232452039, - "grad_norm": 0.0, - "learning_rate": 2.3044706394761316e-06, - "loss": 0.6463, - "step": 27742 - }, - { - "epoch": 0.7861656606874663, - "grad_norm": 0.0, - "learning_rate": 2.303884587243909e-06, - "loss": 0.7773, - "step": 27743 - }, - { - "epoch": 0.7861939981297288, - "grad_norm": 0.0, - "learning_rate": 2.3032985998389236e-06, - "loss": 0.8306, - "step": 27744 - }, - { - "epoch": 0.7862223355719913, - "grad_norm": 0.0, - "learning_rate": 2.3027126772661146e-06, - "loss": 0.8609, - "step": 27745 - }, - { - "epoch": 0.7862506730142538, - "grad_norm": 0.0, - "learning_rate": 2.3021268195304124e-06, - "loss": 0.8503, - "step": 27746 - }, - { - "epoch": 0.7862790104565162, - "grad_norm": 0.0, - "learning_rate": 2.3015410266367543e-06, - "loss": 0.892, - "step": 27747 - }, - { - "epoch": 0.7863073478987787, - "grad_norm": 0.0, - "learning_rate": 2.3009552985900786e-06, - "loss": 0.7899, - "step": 27748 - }, - { - "epoch": 0.7863356853410411, - "grad_norm": 0.0, - "learning_rate": 2.300369635395312e-06, - "loss": 0.817, - "step": 27749 - }, - { - "epoch": 0.7863640227833035, - "grad_norm": 0.0, - "learning_rate": 2.299784037057392e-06, - "loss": 0.729, - "step": 27750 - }, - { - "epoch": 0.786392360225566, - "grad_norm": 0.0, - "learning_rate": 2.2991985035812523e-06, - "loss": 0.8152, - "step": 27751 - }, - { - "epoch": 0.7864206976678285, - "grad_norm": 0.0, - "learning_rate": 2.2986130349718203e-06, - "loss": 0.7179, - "step": 27752 - }, - { - "epoch": 0.786449035110091, - "grad_norm": 0.0, - "learning_rate": 2.298027631234031e-06, - "loss": 0.8984, - "step": 27753 - }, - { - "epoch": 0.7864773725523534, - "grad_norm": 0.0, - "learning_rate": 2.2974422923728155e-06, - "loss": 0.6956, - "step": 27754 - }, - { - "epoch": 0.7865057099946159, - "grad_norm": 0.0, - "learning_rate": 2.2968570183931026e-06, - "loss": 0.7459, - "step": 27755 - }, - { - "epoch": 0.7865340474368784, - "grad_norm": 0.0, - "learning_rate": 2.296271809299828e-06, - "loss": 0.7996, - "step": 27756 - }, - { - "epoch": 0.7865623848791408, - "grad_norm": 0.0, - "learning_rate": 2.2956866650979125e-06, - "loss": 0.821, - "step": 27757 - }, - { - "epoch": 0.7865907223214033, - "grad_norm": 0.0, - "learning_rate": 2.2951015857922896e-06, - "loss": 0.8072, - "step": 27758 - }, - { - "epoch": 0.7866190597636658, - "grad_norm": 0.0, - "learning_rate": 2.29451657138789e-06, - "loss": 0.8392, - "step": 27759 - }, - { - "epoch": 0.7866473972059282, - "grad_norm": 0.0, - "learning_rate": 2.2939316218896357e-06, - "loss": 0.8399, - "step": 27760 - }, - { - "epoch": 0.7866757346481906, - "grad_norm": 0.0, - "learning_rate": 2.29334673730246e-06, - "loss": 0.7421, - "step": 27761 - }, - { - "epoch": 0.7867040720904531, - "grad_norm": 0.0, - "learning_rate": 2.292761917631283e-06, - "loss": 0.8859, - "step": 27762 - }, - { - "epoch": 0.7867324095327156, - "grad_norm": 0.0, - "learning_rate": 2.292177162881035e-06, - "loss": 0.7326, - "step": 27763 - }, - { - "epoch": 0.786760746974978, - "grad_norm": 0.0, - "learning_rate": 2.291592473056644e-06, - "loss": 0.8481, - "step": 27764 - }, - { - "epoch": 0.7867890844172405, - "grad_norm": 0.0, - "learning_rate": 2.2910078481630295e-06, - "loss": 0.8109, - "step": 27765 - }, - { - "epoch": 0.786817421859503, - "grad_norm": 0.0, - "learning_rate": 2.2904232882051182e-06, - "loss": 0.7915, - "step": 27766 - }, - { - "epoch": 0.7868457593017654, - "grad_norm": 0.0, - "learning_rate": 2.289838793187834e-06, - "loss": 0.8456, - "step": 27767 - }, - { - "epoch": 0.7868740967440279, - "grad_norm": 0.0, - "learning_rate": 2.289254363116101e-06, - "loss": 0.8554, - "step": 27768 - }, - { - "epoch": 0.7869024341862904, - "grad_norm": 0.0, - "learning_rate": 2.2886699979948445e-06, - "loss": 0.8539, - "step": 27769 - }, - { - "epoch": 0.7869307716285528, - "grad_norm": 0.0, - "learning_rate": 2.2880856978289813e-06, - "loss": 0.8998, - "step": 27770 - }, - { - "epoch": 0.7869591090708152, - "grad_norm": 0.0, - "learning_rate": 2.287501462623436e-06, - "loss": 0.7713, - "step": 27771 - }, - { - "epoch": 0.7869874465130777, - "grad_norm": 0.0, - "learning_rate": 2.286917292383133e-06, - "loss": 0.7118, - "step": 27772 - }, - { - "epoch": 0.7870157839553402, - "grad_norm": 0.0, - "learning_rate": 2.286333187112987e-06, - "loss": 0.7968, - "step": 27773 - }, - { - "epoch": 0.7870441213976026, - "grad_norm": 0.0, - "learning_rate": 2.285749146817924e-06, - "loss": 0.7195, - "step": 27774 - }, - { - "epoch": 0.7870724588398651, - "grad_norm": 0.0, - "learning_rate": 2.2851651715028565e-06, - "loss": 0.8528, - "step": 27775 - }, - { - "epoch": 0.7871007962821276, - "grad_norm": 0.0, - "learning_rate": 2.284581261172709e-06, - "loss": 0.8428, - "step": 27776 - }, - { - "epoch": 0.7871291337243901, - "grad_norm": 0.0, - "learning_rate": 2.2839974158324006e-06, - "loss": 0.8504, - "step": 27777 - }, - { - "epoch": 0.7871574711666525, - "grad_norm": 0.0, - "learning_rate": 2.2834136354868454e-06, - "loss": 0.8683, - "step": 27778 - }, - { - "epoch": 0.787185808608915, - "grad_norm": 0.0, - "learning_rate": 2.282829920140962e-06, - "loss": 0.8557, - "step": 27779 - }, - { - "epoch": 0.7872141460511775, - "grad_norm": 0.0, - "learning_rate": 2.2822462697996672e-06, - "loss": 0.7354, - "step": 27780 - }, - { - "epoch": 0.7872424834934398, - "grad_norm": 0.0, - "learning_rate": 2.2816626844678792e-06, - "loss": 0.807, - "step": 27781 - }, - { - "epoch": 0.7872708209357023, - "grad_norm": 0.0, - "learning_rate": 2.2810791641505147e-06, - "loss": 0.8528, - "step": 27782 - }, - { - "epoch": 0.7872991583779648, - "grad_norm": 0.0, - "learning_rate": 2.2804957088524837e-06, - "loss": 0.8797, - "step": 27783 - }, - { - "epoch": 0.7873274958202273, - "grad_norm": 0.0, - "learning_rate": 2.2799123185787043e-06, - "loss": 0.8013, - "step": 27784 - }, - { - "epoch": 0.7873558332624897, - "grad_norm": 0.0, - "learning_rate": 2.279328993334092e-06, - "loss": 0.808, - "step": 27785 - }, - { - "epoch": 0.7873841707047522, - "grad_norm": 0.0, - "learning_rate": 2.278745733123557e-06, - "loss": 0.7298, - "step": 27786 - }, - { - "epoch": 0.7874125081470147, - "grad_norm": 0.0, - "learning_rate": 2.2781625379520146e-06, - "loss": 0.7767, - "step": 27787 - }, - { - "epoch": 0.7874408455892771, - "grad_norm": 0.0, - "learning_rate": 2.2775794078243786e-06, - "loss": 0.7676, - "step": 27788 - }, - { - "epoch": 0.7874691830315396, - "grad_norm": 0.0, - "learning_rate": 2.2769963427455555e-06, - "loss": 0.8017, - "step": 27789 - }, - { - "epoch": 0.7874975204738021, - "grad_norm": 0.0, - "learning_rate": 2.2764133427204628e-06, - "loss": 0.8115, - "step": 27790 - }, - { - "epoch": 0.7875258579160644, - "grad_norm": 0.0, - "learning_rate": 2.275830407754006e-06, - "loss": 0.8305, - "step": 27791 - }, - { - "epoch": 0.7875541953583269, - "grad_norm": 0.0, - "learning_rate": 2.2752475378510985e-06, - "loss": 0.8613, - "step": 27792 - }, - { - "epoch": 0.7875825328005894, - "grad_norm": 0.0, - "learning_rate": 2.274664733016649e-06, - "loss": 0.8097, - "step": 27793 - }, - { - "epoch": 0.7876108702428519, - "grad_norm": 0.0, - "learning_rate": 2.274081993255568e-06, - "loss": 0.7579, - "step": 27794 - }, - { - "epoch": 0.7876392076851143, - "grad_norm": 0.0, - "learning_rate": 2.273499318572766e-06, - "loss": 0.6508, - "step": 27795 - }, - { - "epoch": 0.7876675451273768, - "grad_norm": 0.0, - "learning_rate": 2.272916708973145e-06, - "loss": 0.8677, - "step": 27796 - }, - { - "epoch": 0.7876958825696393, - "grad_norm": 0.0, - "learning_rate": 2.272334164461616e-06, - "loss": 0.8095, - "step": 27797 - }, - { - "epoch": 0.7877242200119017, - "grad_norm": 0.0, - "learning_rate": 2.2717516850430885e-06, - "loss": 0.8111, - "step": 27798 - }, - { - "epoch": 0.7877525574541642, - "grad_norm": 0.0, - "learning_rate": 2.271169270722464e-06, - "loss": 0.8914, - "step": 27799 - }, - { - "epoch": 0.7877808948964267, - "grad_norm": 0.0, - "learning_rate": 2.2705869215046506e-06, - "loss": 0.7763, - "step": 27800 - }, - { - "epoch": 0.7878092323386892, - "grad_norm": 0.0, - "learning_rate": 2.2700046373945573e-06, - "loss": 0.9079, - "step": 27801 - }, - { - "epoch": 0.7878375697809515, - "grad_norm": 0.0, - "learning_rate": 2.2694224183970815e-06, - "loss": 0.8079, - "step": 27802 - }, - { - "epoch": 0.787865907223214, - "grad_norm": 0.0, - "learning_rate": 2.268840264517135e-06, - "loss": 0.8582, - "step": 27803 - }, - { - "epoch": 0.7878942446654765, - "grad_norm": 0.0, - "learning_rate": 2.2682581757596144e-06, - "loss": 0.8407, - "step": 27804 - }, - { - "epoch": 0.7879225821077389, - "grad_norm": 0.0, - "learning_rate": 2.2676761521294264e-06, - "loss": 0.851, - "step": 27805 - }, - { - "epoch": 0.7879509195500014, - "grad_norm": 0.0, - "learning_rate": 2.267094193631474e-06, - "loss": 0.8746, - "step": 27806 - }, - { - "epoch": 0.7879792569922639, - "grad_norm": 0.0, - "learning_rate": 2.266512300270658e-06, - "loss": 0.7885, - "step": 27807 - }, - { - "epoch": 0.7880075944345264, - "grad_norm": 0.0, - "learning_rate": 2.2659304720518814e-06, - "loss": 0.7238, - "step": 27808 - }, - { - "epoch": 0.7880359318767888, - "grad_norm": 0.0, - "learning_rate": 2.265348708980046e-06, - "loss": 0.8072, - "step": 27809 - }, - { - "epoch": 0.7880642693190513, - "grad_norm": 0.0, - "learning_rate": 2.2647670110600493e-06, - "loss": 0.8365, - "step": 27810 - }, - { - "epoch": 0.7880926067613138, - "grad_norm": 0.0, - "learning_rate": 2.264185378296795e-06, - "loss": 0.8773, - "step": 27811 - }, - { - "epoch": 0.7881209442035761, - "grad_norm": 0.0, - "learning_rate": 2.2636038106951763e-06, - "loss": 0.6928, - "step": 27812 - }, - { - "epoch": 0.7881492816458386, - "grad_norm": 0.0, - "learning_rate": 2.2630223082600965e-06, - "loss": 0.9026, - "step": 27813 - }, - { - "epoch": 0.7881776190881011, - "grad_norm": 0.0, - "learning_rate": 2.262440870996455e-06, - "loss": 0.8035, - "step": 27814 - }, - { - "epoch": 0.7882059565303635, - "grad_norm": 0.0, - "learning_rate": 2.2618594989091447e-06, - "loss": 0.843, - "step": 27815 - }, - { - "epoch": 0.788234293972626, - "grad_norm": 0.0, - "learning_rate": 2.2612781920030658e-06, - "loss": 0.7859, - "step": 27816 - }, - { - "epoch": 0.7882626314148885, - "grad_norm": 0.0, - "learning_rate": 2.2606969502831165e-06, - "loss": 0.9004, - "step": 27817 - }, - { - "epoch": 0.788290968857151, - "grad_norm": 0.0, - "learning_rate": 2.260115773754188e-06, - "loss": 0.8237, - "step": 27818 - }, - { - "epoch": 0.7883193062994134, - "grad_norm": 0.0, - "learning_rate": 2.259534662421179e-06, - "loss": 0.8349, - "step": 27819 - }, - { - "epoch": 0.7883476437416759, - "grad_norm": 0.0, - "learning_rate": 2.2589536162889824e-06, - "loss": 0.8462, - "step": 27820 - }, - { - "epoch": 0.7883759811839384, - "grad_norm": 0.0, - "learning_rate": 2.2583726353624956e-06, - "loss": 0.9072, - "step": 27821 - }, - { - "epoch": 0.7884043186262008, - "grad_norm": 0.0, - "learning_rate": 2.257791719646614e-06, - "loss": 0.8437, - "step": 27822 - }, - { - "epoch": 0.7884326560684632, - "grad_norm": 0.0, - "learning_rate": 2.2572108691462234e-06, - "loss": 0.8454, - "step": 27823 - }, - { - "epoch": 0.7884609935107257, - "grad_norm": 0.0, - "learning_rate": 2.2566300838662247e-06, - "loss": 0.8285, - "step": 27824 - }, - { - "epoch": 0.7884893309529882, - "grad_norm": 0.0, - "learning_rate": 2.2560493638115034e-06, - "loss": 0.9254, - "step": 27825 - }, - { - "epoch": 0.7885176683952506, - "grad_norm": 0.0, - "learning_rate": 2.2554687089869533e-06, - "loss": 0.8199, - "step": 27826 - }, - { - "epoch": 0.7885460058375131, - "grad_norm": 0.0, - "learning_rate": 2.2548881193974694e-06, - "loss": 0.9413, - "step": 27827 - }, - { - "epoch": 0.7885743432797756, - "grad_norm": 0.0, - "learning_rate": 2.2543075950479356e-06, - "loss": 0.7939, - "step": 27828 - }, - { - "epoch": 0.788602680722038, - "grad_norm": 0.0, - "learning_rate": 2.2537271359432457e-06, - "loss": 0.7579, - "step": 27829 - }, - { - "epoch": 0.7886310181643005, - "grad_norm": 0.0, - "learning_rate": 2.2531467420882913e-06, - "loss": 0.8076, - "step": 27830 - }, - { - "epoch": 0.788659355606563, - "grad_norm": 0.0, - "learning_rate": 2.2525664134879565e-06, - "loss": 0.7624, - "step": 27831 - }, - { - "epoch": 0.7886876930488255, - "grad_norm": 0.0, - "learning_rate": 2.2519861501471306e-06, - "loss": 0.834, - "step": 27832 - }, - { - "epoch": 0.7887160304910878, - "grad_norm": 0.0, - "learning_rate": 2.2514059520707033e-06, - "loss": 0.8336, - "step": 27833 - }, - { - "epoch": 0.7887443679333503, - "grad_norm": 0.0, - "learning_rate": 2.2508258192635614e-06, - "loss": 0.8461, - "step": 27834 - }, - { - "epoch": 0.7887727053756128, - "grad_norm": 0.0, - "learning_rate": 2.250245751730593e-06, - "loss": 0.8293, - "step": 27835 - }, - { - "epoch": 0.7888010428178752, - "grad_norm": 0.0, - "learning_rate": 2.2496657494766805e-06, - "loss": 0.7599, - "step": 27836 - }, - { - "epoch": 0.7888293802601377, - "grad_norm": 0.0, - "learning_rate": 2.2490858125067103e-06, - "loss": 0.8573, - "step": 27837 - }, - { - "epoch": 0.7888577177024002, - "grad_norm": 0.0, - "learning_rate": 2.2485059408255726e-06, - "loss": 0.9152, - "step": 27838 - }, - { - "epoch": 0.7888860551446626, - "grad_norm": 0.0, - "learning_rate": 2.247926134438144e-06, - "loss": 0.7666, - "step": 27839 - }, - { - "epoch": 0.7889143925869251, - "grad_norm": 0.0, - "learning_rate": 2.2473463933493157e-06, - "loss": 0.9085, - "step": 27840 - }, - { - "epoch": 0.7889427300291876, - "grad_norm": 0.0, - "learning_rate": 2.246766717563964e-06, - "loss": 0.8306, - "step": 27841 - }, - { - "epoch": 0.7889710674714501, - "grad_norm": 0.0, - "learning_rate": 2.246187107086977e-06, - "loss": 0.9183, - "step": 27842 - }, - { - "epoch": 0.7889994049137125, - "grad_norm": 0.0, - "learning_rate": 2.2456075619232366e-06, - "loss": 0.8498, - "step": 27843 - }, - { - "epoch": 0.789027742355975, - "grad_norm": 0.0, - "learning_rate": 2.2450280820776205e-06, - "loss": 0.7895, - "step": 27844 - }, - { - "epoch": 0.7890560797982374, - "grad_norm": 0.0, - "learning_rate": 2.2444486675550125e-06, - "loss": 0.8776, - "step": 27845 - }, - { - "epoch": 0.7890844172404998, - "grad_norm": 0.0, - "learning_rate": 2.2438693183602945e-06, - "loss": 0.8309, - "step": 27846 - }, - { - "epoch": 0.7891127546827623, - "grad_norm": 0.0, - "learning_rate": 2.2432900344983445e-06, - "loss": 0.8368, - "step": 27847 - }, - { - "epoch": 0.7891410921250248, - "grad_norm": 0.0, - "learning_rate": 2.242710815974045e-06, - "loss": 0.8792, - "step": 27848 - }, - { - "epoch": 0.7891694295672873, - "grad_norm": 0.0, - "learning_rate": 2.242131662792272e-06, - "loss": 0.9001, - "step": 27849 - }, - { - "epoch": 0.7891977670095497, - "grad_norm": 0.0, - "learning_rate": 2.2415525749579036e-06, - "loss": 0.9062, - "step": 27850 - }, - { - "epoch": 0.7892261044518122, - "grad_norm": 0.0, - "learning_rate": 2.240973552475821e-06, - "loss": 0.9113, - "step": 27851 - }, - { - "epoch": 0.7892544418940747, - "grad_norm": 0.0, - "learning_rate": 2.2403945953508975e-06, - "loss": 0.8533, - "step": 27852 - }, - { - "epoch": 0.7892827793363371, - "grad_norm": 0.0, - "learning_rate": 2.2398157035880154e-06, - "loss": 0.8185, - "step": 27853 - }, - { - "epoch": 0.7893111167785996, - "grad_norm": 0.0, - "learning_rate": 2.2392368771920435e-06, - "loss": 0.869, - "step": 27854 - }, - { - "epoch": 0.789339454220862, - "grad_norm": 0.0, - "learning_rate": 2.238658116167861e-06, - "loss": 0.8642, - "step": 27855 - }, - { - "epoch": 0.7893677916631245, - "grad_norm": 0.0, - "learning_rate": 2.2380794205203473e-06, - "loss": 0.7887, - "step": 27856 - }, - { - "epoch": 0.7893961291053869, - "grad_norm": 0.0, - "learning_rate": 2.237500790254369e-06, - "loss": 0.7719, - "step": 27857 - }, - { - "epoch": 0.7894244665476494, - "grad_norm": 0.0, - "learning_rate": 2.2369222253748046e-06, - "loss": 0.8893, - "step": 27858 - }, - { - "epoch": 0.7894528039899119, - "grad_norm": 0.0, - "learning_rate": 2.2363437258865273e-06, - "loss": 0.866, - "step": 27859 - }, - { - "epoch": 0.7894811414321743, - "grad_norm": 0.0, - "learning_rate": 2.2357652917944083e-06, - "loss": 0.8224, - "step": 27860 - }, - { - "epoch": 0.7895094788744368, - "grad_norm": 0.0, - "learning_rate": 2.2351869231033265e-06, - "loss": 0.7415, - "step": 27861 - }, - { - "epoch": 0.7895378163166993, - "grad_norm": 0.0, - "learning_rate": 2.234608619818144e-06, - "loss": 0.7966, - "step": 27862 - }, - { - "epoch": 0.7895661537589617, - "grad_norm": 0.0, - "learning_rate": 2.234030381943737e-06, - "loss": 0.9384, - "step": 27863 - }, - { - "epoch": 0.7895944912012242, - "grad_norm": 0.0, - "learning_rate": 2.2334522094849798e-06, - "loss": 0.7777, - "step": 27864 - }, - { - "epoch": 0.7896228286434867, - "grad_norm": 0.0, - "learning_rate": 2.232874102446735e-06, - "loss": 0.9527, - "step": 27865 - }, - { - "epoch": 0.7896511660857491, - "grad_norm": 0.0, - "learning_rate": 2.2322960608338763e-06, - "loss": 0.7546, - "step": 27866 - }, - { - "epoch": 0.7896795035280115, - "grad_norm": 0.0, - "learning_rate": 2.2317180846512744e-06, - "loss": 0.7455, - "step": 27867 - }, - { - "epoch": 0.789707840970274, - "grad_norm": 0.0, - "learning_rate": 2.231140173903793e-06, - "loss": 0.7388, - "step": 27868 - }, - { - "epoch": 0.7897361784125365, - "grad_norm": 0.0, - "learning_rate": 2.230562328596306e-06, - "loss": 0.8696, - "step": 27869 - }, - { - "epoch": 0.7897645158547989, - "grad_norm": 0.0, - "learning_rate": 2.2299845487336746e-06, - "loss": 0.7573, - "step": 27870 - }, - { - "epoch": 0.7897928532970614, - "grad_norm": 0.0, - "learning_rate": 2.22940683432077e-06, - "loss": 0.8859, - "step": 27871 - }, - { - "epoch": 0.7898211907393239, - "grad_norm": 0.0, - "learning_rate": 2.2288291853624556e-06, - "loss": 0.7762, - "step": 27872 - }, - { - "epoch": 0.7898495281815864, - "grad_norm": 0.0, - "learning_rate": 2.2282516018635992e-06, - "loss": 0.8092, - "step": 27873 - }, - { - "epoch": 0.7898778656238488, - "grad_norm": 0.0, - "learning_rate": 2.2276740838290678e-06, - "loss": 0.8544, - "step": 27874 - }, - { - "epoch": 0.7899062030661113, - "grad_norm": 0.0, - "learning_rate": 2.227096631263722e-06, - "loss": 0.7191, - "step": 27875 - }, - { - "epoch": 0.7899345405083738, - "grad_norm": 0.0, - "learning_rate": 2.2265192441724272e-06, - "loss": 0.8557, - "step": 27876 - }, - { - "epoch": 0.7899628779506361, - "grad_norm": 0.0, - "learning_rate": 2.2259419225600497e-06, - "loss": 0.7273, - "step": 27877 - }, - { - "epoch": 0.7899912153928986, - "grad_norm": 0.0, - "learning_rate": 2.2253646664314488e-06, - "loss": 0.9011, - "step": 27878 - }, - { - "epoch": 0.7900195528351611, - "grad_norm": 0.0, - "learning_rate": 2.2247874757914865e-06, - "loss": 0.8932, - "step": 27879 - }, - { - "epoch": 0.7900478902774236, - "grad_norm": 0.0, - "learning_rate": 2.2242103506450307e-06, - "loss": 0.8802, - "step": 27880 - }, - { - "epoch": 0.790076227719686, - "grad_norm": 0.0, - "learning_rate": 2.2236332909969362e-06, - "loss": 0.8753, - "step": 27881 - }, - { - "epoch": 0.7901045651619485, - "grad_norm": 0.0, - "learning_rate": 2.2230562968520675e-06, - "loss": 0.781, - "step": 27882 - }, - { - "epoch": 0.790132902604211, - "grad_norm": 0.0, - "learning_rate": 2.222479368215281e-06, - "loss": 0.7885, - "step": 27883 - }, - { - "epoch": 0.7901612400464734, - "grad_norm": 0.0, - "learning_rate": 2.22190250509144e-06, - "loss": 0.7742, - "step": 27884 - }, - { - "epoch": 0.7901895774887359, - "grad_norm": 0.0, - "learning_rate": 2.221325707485401e-06, - "loss": 0.7878, - "step": 27885 - }, - { - "epoch": 0.7902179149309984, - "grad_norm": 0.0, - "learning_rate": 2.220748975402025e-06, - "loss": 0.8234, - "step": 27886 - }, - { - "epoch": 0.7902462523732607, - "grad_norm": 0.0, - "learning_rate": 2.2201723088461693e-06, - "loss": 0.8032, - "step": 27887 - }, - { - "epoch": 0.7902745898155232, - "grad_norm": 0.0, - "learning_rate": 2.2195957078226935e-06, - "loss": 0.7184, - "step": 27888 - }, - { - "epoch": 0.7903029272577857, - "grad_norm": 0.0, - "learning_rate": 2.2190191723364495e-06, - "loss": 0.9767, - "step": 27889 - }, - { - "epoch": 0.7903312647000482, - "grad_norm": 0.0, - "learning_rate": 2.2184427023922994e-06, - "loss": 0.6841, - "step": 27890 - }, - { - "epoch": 0.7903596021423106, - "grad_norm": 0.0, - "learning_rate": 2.217866297995094e-06, - "loss": 0.7687, - "step": 27891 - }, - { - "epoch": 0.7903879395845731, - "grad_norm": 0.0, - "learning_rate": 2.21728995914969e-06, - "loss": 0.9306, - "step": 27892 - }, - { - "epoch": 0.7904162770268356, - "grad_norm": 0.0, - "learning_rate": 2.216713685860945e-06, - "loss": 0.7979, - "step": 27893 - }, - { - "epoch": 0.790444614469098, - "grad_norm": 0.0, - "learning_rate": 2.2161374781337084e-06, - "loss": 0.8131, - "step": 27894 - }, - { - "epoch": 0.7904729519113605, - "grad_norm": 0.0, - "learning_rate": 2.2155613359728356e-06, - "loss": 0.7997, - "step": 27895 - }, - { - "epoch": 0.790501289353623, - "grad_norm": 0.0, - "learning_rate": 2.2149852593831845e-06, - "loss": 0.8899, - "step": 27896 - }, - { - "epoch": 0.7905296267958855, - "grad_norm": 0.0, - "learning_rate": 2.2144092483696e-06, - "loss": 0.809, - "step": 27897 - }, - { - "epoch": 0.7905579642381478, - "grad_norm": 0.0, - "learning_rate": 2.2138333029369376e-06, - "loss": 0.8841, - "step": 27898 - }, - { - "epoch": 0.7905863016804103, - "grad_norm": 0.0, - "learning_rate": 2.2132574230900484e-06, - "loss": 0.7757, - "step": 27899 - }, - { - "epoch": 0.7906146391226728, - "grad_norm": 0.0, - "learning_rate": 2.2126816088337834e-06, - "loss": 0.8903, - "step": 27900 - }, - { - "epoch": 0.7906429765649352, - "grad_norm": 0.0, - "learning_rate": 2.212105860172996e-06, - "loss": 0.8128, - "step": 27901 - }, - { - "epoch": 0.7906713140071977, - "grad_norm": 0.0, - "learning_rate": 2.2115301771125296e-06, - "loss": 0.7341, - "step": 27902 - }, - { - "epoch": 0.7906996514494602, - "grad_norm": 0.0, - "learning_rate": 2.21095455965724e-06, - "loss": 0.7405, - "step": 27903 - }, - { - "epoch": 0.7907279888917227, - "grad_norm": 0.0, - "learning_rate": 2.2103790078119703e-06, - "loss": 0.7598, - "step": 27904 - }, - { - "epoch": 0.7907563263339851, - "grad_norm": 0.0, - "learning_rate": 2.2098035215815694e-06, - "loss": 0.7874, - "step": 27905 - }, - { - "epoch": 0.7907846637762476, - "grad_norm": 0.0, - "learning_rate": 2.2092281009708906e-06, - "loss": 0.8682, - "step": 27906 - }, - { - "epoch": 0.7908130012185101, - "grad_norm": 0.0, - "learning_rate": 2.208652745984773e-06, - "loss": 0.8647, - "step": 27907 - }, - { - "epoch": 0.7908413386607724, - "grad_norm": 0.0, - "learning_rate": 2.208077456628066e-06, - "loss": 0.7244, - "step": 27908 - }, - { - "epoch": 0.7908696761030349, - "grad_norm": 0.0, - "learning_rate": 2.2075022329056193e-06, - "loss": 0.7907, - "step": 27909 - }, - { - "epoch": 0.7908980135452974, - "grad_norm": 0.0, - "learning_rate": 2.2069270748222726e-06, - "loss": 0.9384, - "step": 27910 - }, - { - "epoch": 0.7909263509875598, - "grad_norm": 0.0, - "learning_rate": 2.2063519823828727e-06, - "loss": 0.8403, - "step": 27911 - }, - { - "epoch": 0.7909546884298223, - "grad_norm": 0.0, - "learning_rate": 2.2057769555922637e-06, - "loss": 0.8554, - "step": 27912 - }, - { - "epoch": 0.7909830258720848, - "grad_norm": 0.0, - "learning_rate": 2.2052019944552903e-06, - "loss": 0.7872, - "step": 27913 - }, - { - "epoch": 0.7910113633143473, - "grad_norm": 0.0, - "learning_rate": 2.2046270989767983e-06, - "loss": 0.8434, - "step": 27914 - }, - { - "epoch": 0.7910397007566097, - "grad_norm": 0.0, - "learning_rate": 2.204052269161623e-06, - "loss": 0.854, - "step": 27915 - }, - { - "epoch": 0.7910680381988722, - "grad_norm": 0.0, - "learning_rate": 2.2034775050146107e-06, - "loss": 0.7727, - "step": 27916 - }, - { - "epoch": 0.7910963756411347, - "grad_norm": 0.0, - "learning_rate": 2.2029028065406056e-06, - "loss": 0.8009, - "step": 27917 - }, - { - "epoch": 0.791124713083397, - "grad_norm": 0.0, - "learning_rate": 2.2023281737444434e-06, - "loss": 0.8385, - "step": 27918 - }, - { - "epoch": 0.7911530505256595, - "grad_norm": 0.0, - "learning_rate": 2.2017536066309687e-06, - "loss": 0.806, - "step": 27919 - }, - { - "epoch": 0.791181387967922, - "grad_norm": 0.0, - "learning_rate": 2.201179105205016e-06, - "loss": 0.7695, - "step": 27920 - }, - { - "epoch": 0.7912097254101845, - "grad_norm": 0.0, - "learning_rate": 2.2006046694714277e-06, - "loss": 0.7815, - "step": 27921 - }, - { - "epoch": 0.7912380628524469, - "grad_norm": 0.0, - "learning_rate": 2.2000302994350463e-06, - "loss": 0.8451, - "step": 27922 - }, - { - "epoch": 0.7912664002947094, - "grad_norm": 0.0, - "learning_rate": 2.1994559951007033e-06, - "loss": 0.792, - "step": 27923 - }, - { - "epoch": 0.7912947377369719, - "grad_norm": 0.0, - "learning_rate": 2.198881756473238e-06, - "loss": 0.7505, - "step": 27924 - }, - { - "epoch": 0.7913230751792343, - "grad_norm": 0.0, - "learning_rate": 2.19830758355749e-06, - "loss": 0.7986, - "step": 27925 - }, - { - "epoch": 0.7913514126214968, - "grad_norm": 0.0, - "learning_rate": 2.1977334763582924e-06, - "loss": 0.8455, - "step": 27926 - }, - { - "epoch": 0.7913797500637593, - "grad_norm": 0.0, - "learning_rate": 2.197159434880487e-06, - "loss": 0.8228, - "step": 27927 - }, - { - "epoch": 0.7914080875060217, - "grad_norm": 0.0, - "learning_rate": 2.1965854591289025e-06, - "loss": 0.7402, - "step": 27928 - }, - { - "epoch": 0.7914364249482841, - "grad_norm": 0.0, - "learning_rate": 2.1960115491083754e-06, - "loss": 0.802, - "step": 27929 - }, - { - "epoch": 0.7914647623905466, - "grad_norm": 0.0, - "learning_rate": 2.1954377048237452e-06, - "loss": 0.8233, - "step": 27930 - }, - { - "epoch": 0.7914930998328091, - "grad_norm": 0.0, - "learning_rate": 2.194863926279838e-06, - "loss": 0.9305, - "step": 27931 - }, - { - "epoch": 0.7915214372750715, - "grad_norm": 0.0, - "learning_rate": 2.1942902134814924e-06, - "loss": 0.7208, - "step": 27932 - }, - { - "epoch": 0.791549774717334, - "grad_norm": 0.0, - "learning_rate": 2.193716566433537e-06, - "loss": 0.7525, - "step": 27933 - }, - { - "epoch": 0.7915781121595965, - "grad_norm": 0.0, - "learning_rate": 2.193142985140806e-06, - "loss": 0.8333, - "step": 27934 - }, - { - "epoch": 0.7916064496018589, - "grad_norm": 0.0, - "learning_rate": 2.1925694696081325e-06, - "loss": 0.8998, - "step": 27935 - }, - { - "epoch": 0.7916347870441214, - "grad_norm": 0.0, - "learning_rate": 2.1919960198403435e-06, - "loss": 0.9037, - "step": 27936 - }, - { - "epoch": 0.7916631244863839, - "grad_norm": 0.0, - "learning_rate": 2.191422635842271e-06, - "loss": 0.8275, - "step": 27937 - }, - { - "epoch": 0.7916914619286464, - "grad_norm": 0.0, - "learning_rate": 2.190849317618745e-06, - "loss": 0.7641, - "step": 27938 - }, - { - "epoch": 0.7917197993709088, - "grad_norm": 0.0, - "learning_rate": 2.190276065174596e-06, - "loss": 0.7998, - "step": 27939 - }, - { - "epoch": 0.7917481368131712, - "grad_norm": 0.0, - "learning_rate": 2.1897028785146534e-06, - "loss": 0.8275, - "step": 27940 - }, - { - "epoch": 0.7917764742554337, - "grad_norm": 0.0, - "learning_rate": 2.189129757643742e-06, - "loss": 0.8189, - "step": 27941 - }, - { - "epoch": 0.7918048116976961, - "grad_norm": 0.0, - "learning_rate": 2.188556702566691e-06, - "loss": 0.8196, - "step": 27942 - }, - { - "epoch": 0.7918331491399586, - "grad_norm": 0.0, - "learning_rate": 2.1879837132883298e-06, - "loss": 0.9447, - "step": 27943 - }, - { - "epoch": 0.7918614865822211, - "grad_norm": 0.0, - "learning_rate": 2.18741078981348e-06, - "loss": 0.8427, - "step": 27944 - }, - { - "epoch": 0.7918898240244836, - "grad_norm": 0.0, - "learning_rate": 2.186837932146971e-06, - "loss": 0.7379, - "step": 27945 - }, - { - "epoch": 0.791918161466746, - "grad_norm": 0.0, - "learning_rate": 2.18626514029363e-06, - "loss": 0.8244, - "step": 27946 - }, - { - "epoch": 0.7919464989090085, - "grad_norm": 0.0, - "learning_rate": 2.185692414258276e-06, - "loss": 0.7392, - "step": 27947 - }, - { - "epoch": 0.791974836351271, - "grad_norm": 0.0, - "learning_rate": 2.1851197540457393e-06, - "loss": 0.8717, - "step": 27948 - }, - { - "epoch": 0.7920031737935334, - "grad_norm": 0.0, - "learning_rate": 2.1845471596608382e-06, - "loss": 0.784, - "step": 27949 - }, - { - "epoch": 0.7920315112357958, - "grad_norm": 0.0, - "learning_rate": 2.1839746311083988e-06, - "loss": 0.9454, - "step": 27950 - }, - { - "epoch": 0.7920598486780583, - "grad_norm": 0.0, - "learning_rate": 2.183402168393244e-06, - "loss": 0.797, - "step": 27951 - }, - { - "epoch": 0.7920881861203207, - "grad_norm": 0.0, - "learning_rate": 2.182829771520194e-06, - "loss": 0.7384, - "step": 27952 - }, - { - "epoch": 0.7921165235625832, - "grad_norm": 0.0, - "learning_rate": 2.182257440494073e-06, - "loss": 0.8344, - "step": 27953 - }, - { - "epoch": 0.7921448610048457, - "grad_norm": 0.0, - "learning_rate": 2.1816851753197023e-06, - "loss": 0.8105, - "step": 27954 - }, - { - "epoch": 0.7921731984471082, - "grad_norm": 0.0, - "learning_rate": 2.181112976001899e-06, - "loss": 0.818, - "step": 27955 - }, - { - "epoch": 0.7922015358893706, - "grad_norm": 0.0, - "learning_rate": 2.1805408425454865e-06, - "loss": 0.7314, - "step": 27956 - }, - { - "epoch": 0.7922298733316331, - "grad_norm": 0.0, - "learning_rate": 2.17996877495528e-06, - "loss": 0.6973, - "step": 27957 - }, - { - "epoch": 0.7922582107738956, - "grad_norm": 0.0, - "learning_rate": 2.1793967732360997e-06, - "loss": 0.8205, - "step": 27958 - }, - { - "epoch": 0.792286548216158, - "grad_norm": 0.0, - "learning_rate": 2.178824837392768e-06, - "loss": 0.8308, - "step": 27959 - }, - { - "epoch": 0.7923148856584205, - "grad_norm": 0.0, - "learning_rate": 2.1782529674300955e-06, - "loss": 0.8321, - "step": 27960 - }, - { - "epoch": 0.792343223100683, - "grad_norm": 0.0, - "learning_rate": 2.177681163352906e-06, - "loss": 0.854, - "step": 27961 - }, - { - "epoch": 0.7923715605429454, - "grad_norm": 0.0, - "learning_rate": 2.1771094251660096e-06, - "loss": 0.8181, - "step": 27962 - }, - { - "epoch": 0.7923998979852078, - "grad_norm": 0.0, - "learning_rate": 2.176537752874226e-06, - "loss": 0.769, - "step": 27963 - }, - { - "epoch": 0.7924282354274703, - "grad_norm": 0.0, - "learning_rate": 2.17596614648237e-06, - "loss": 0.7024, - "step": 27964 - }, - { - "epoch": 0.7924565728697328, - "grad_norm": 0.0, - "learning_rate": 2.1753946059952567e-06, - "loss": 0.8717, - "step": 27965 - }, - { - "epoch": 0.7924849103119952, - "grad_norm": 0.0, - "learning_rate": 2.1748231314177004e-06, - "loss": 0.7634, - "step": 27966 - }, - { - "epoch": 0.7925132477542577, - "grad_norm": 0.0, - "learning_rate": 2.1742517227545167e-06, - "loss": 0.746, - "step": 27967 - }, - { - "epoch": 0.7925415851965202, - "grad_norm": 0.0, - "learning_rate": 2.1736803800105142e-06, - "loss": 0.8543, - "step": 27968 - }, - { - "epoch": 0.7925699226387827, - "grad_norm": 0.0, - "learning_rate": 2.1731091031905118e-06, - "loss": 0.9109, - "step": 27969 - }, - { - "epoch": 0.7925982600810451, - "grad_norm": 0.0, - "learning_rate": 2.1725378922993133e-06, - "loss": 0.937, - "step": 27970 - }, - { - "epoch": 0.7926265975233076, - "grad_norm": 0.0, - "learning_rate": 2.171966747341736e-06, - "loss": 0.9096, - "step": 27971 - }, - { - "epoch": 0.79265493496557, - "grad_norm": 0.0, - "learning_rate": 2.171395668322592e-06, - "loss": 0.8247, - "step": 27972 - }, - { - "epoch": 0.7926832724078324, - "grad_norm": 0.0, - "learning_rate": 2.170824655246687e-06, - "loss": 0.7822, - "step": 27973 - }, - { - "epoch": 0.7927116098500949, - "grad_norm": 0.0, - "learning_rate": 2.1702537081188336e-06, - "loss": 0.7903, - "step": 27974 - }, - { - "epoch": 0.7927399472923574, - "grad_norm": 0.0, - "learning_rate": 2.16968282694384e-06, - "loss": 0.6878, - "step": 27975 - }, - { - "epoch": 0.7927682847346198, - "grad_norm": 0.0, - "learning_rate": 2.169112011726515e-06, - "loss": 0.8662, - "step": 27976 - }, - { - "epoch": 0.7927966221768823, - "grad_norm": 0.0, - "learning_rate": 2.1685412624716716e-06, - "loss": 0.8123, - "step": 27977 - }, - { - "epoch": 0.7928249596191448, - "grad_norm": 0.0, - "learning_rate": 2.1679705791841097e-06, - "loss": 0.8361, - "step": 27978 - }, - { - "epoch": 0.7928532970614073, - "grad_norm": 0.0, - "learning_rate": 2.1673999618686403e-06, - "loss": 0.8322, - "step": 27979 - }, - { - "epoch": 0.7928816345036697, - "grad_norm": 0.0, - "learning_rate": 2.1668294105300723e-06, - "loss": 0.8895, - "step": 27980 - }, - { - "epoch": 0.7929099719459322, - "grad_norm": 0.0, - "learning_rate": 2.166258925173206e-06, - "loss": 0.8355, - "step": 27981 - }, - { - "epoch": 0.7929383093881947, - "grad_norm": 0.0, - "learning_rate": 2.16568850580285e-06, - "loss": 0.865, - "step": 27982 - }, - { - "epoch": 0.792966646830457, - "grad_norm": 0.0, - "learning_rate": 2.1651181524238117e-06, - "loss": 0.8279, - "step": 27983 - }, - { - "epoch": 0.7929949842727195, - "grad_norm": 0.0, - "learning_rate": 2.164547865040889e-06, - "loss": 0.7778, - "step": 27984 - }, - { - "epoch": 0.793023321714982, - "grad_norm": 0.0, - "learning_rate": 2.1639776436588932e-06, - "loss": 0.8519, - "step": 27985 - }, - { - "epoch": 0.7930516591572445, - "grad_norm": 0.0, - "learning_rate": 2.1634074882826206e-06, - "loss": 0.8224, - "step": 27986 - }, - { - "epoch": 0.7930799965995069, - "grad_norm": 0.0, - "learning_rate": 2.162837398916876e-06, - "loss": 0.9296, - "step": 27987 - }, - { - "epoch": 0.7931083340417694, - "grad_norm": 0.0, - "learning_rate": 2.1622673755664634e-06, - "loss": 0.7418, - "step": 27988 - }, - { - "epoch": 0.7931366714840319, - "grad_norm": 0.0, - "learning_rate": 2.1616974182361826e-06, - "loss": 0.7942, - "step": 27989 - }, - { - "epoch": 0.7931650089262943, - "grad_norm": 0.0, - "learning_rate": 2.161127526930837e-06, - "loss": 0.9018, - "step": 27990 - }, - { - "epoch": 0.7931933463685568, - "grad_norm": 0.0, - "learning_rate": 2.160557701655224e-06, - "loss": 0.7703, - "step": 27991 - }, - { - "epoch": 0.7932216838108193, - "grad_norm": 0.0, - "learning_rate": 2.1599879424141434e-06, - "loss": 0.8326, - "step": 27992 - }, - { - "epoch": 0.7932500212530817, - "grad_norm": 0.0, - "learning_rate": 2.1594182492123995e-06, - "loss": 0.7484, - "step": 27993 - }, - { - "epoch": 0.7932783586953441, - "grad_norm": 0.0, - "learning_rate": 2.158848622054783e-06, - "loss": 0.8048, - "step": 27994 - }, - { - "epoch": 0.7933066961376066, - "grad_norm": 0.0, - "learning_rate": 2.158279060946097e-06, - "loss": 0.8238, - "step": 27995 - }, - { - "epoch": 0.7933350335798691, - "grad_norm": 0.0, - "learning_rate": 2.1577095658911417e-06, - "loss": 0.8194, - "step": 27996 - }, - { - "epoch": 0.7933633710221315, - "grad_norm": 0.0, - "learning_rate": 2.1571401368947077e-06, - "loss": 0.7649, - "step": 27997 - }, - { - "epoch": 0.793391708464394, - "grad_norm": 0.0, - "learning_rate": 2.156570773961597e-06, - "loss": 0.7087, - "step": 27998 - }, - { - "epoch": 0.7934200459066565, - "grad_norm": 0.0, - "learning_rate": 2.156001477096601e-06, - "loss": 0.7731, - "step": 27999 - }, - { - "epoch": 0.7934483833489189, - "grad_norm": 0.0, - "learning_rate": 2.1554322463045173e-06, - "loss": 0.8659, - "step": 28000 - }, - { - "epoch": 0.7934767207911814, - "grad_norm": 0.0, - "learning_rate": 2.1548630815901407e-06, - "loss": 0.8644, - "step": 28001 - }, - { - "epoch": 0.7935050582334439, - "grad_norm": 0.0, - "learning_rate": 2.1542939829582656e-06, - "loss": 0.8676, - "step": 28002 - }, - { - "epoch": 0.7935333956757064, - "grad_norm": 0.0, - "learning_rate": 2.1537249504136857e-06, - "loss": 0.7978, - "step": 28003 - }, - { - "epoch": 0.7935617331179687, - "grad_norm": 0.0, - "learning_rate": 2.153155983961197e-06, - "loss": 0.7462, - "step": 28004 - }, - { - "epoch": 0.7935900705602312, - "grad_norm": 0.0, - "learning_rate": 2.1525870836055873e-06, - "loss": 0.8111, - "step": 28005 - }, - { - "epoch": 0.7936184080024937, - "grad_norm": 0.0, - "learning_rate": 2.152018249351653e-06, - "loss": 0.7925, - "step": 28006 - }, - { - "epoch": 0.7936467454447561, - "grad_norm": 0.0, - "learning_rate": 2.15144948120418e-06, - "loss": 0.7549, - "step": 28007 - }, - { - "epoch": 0.7936750828870186, - "grad_norm": 0.0, - "learning_rate": 2.150880779167962e-06, - "loss": 0.8176, - "step": 28008 - }, - { - "epoch": 0.7937034203292811, - "grad_norm": 0.0, - "learning_rate": 2.1503121432477936e-06, - "loss": 0.861, - "step": 28009 - }, - { - "epoch": 0.7937317577715436, - "grad_norm": 0.0, - "learning_rate": 2.1497435734484585e-06, - "loss": 0.7678, - "step": 28010 - }, - { - "epoch": 0.793760095213806, - "grad_norm": 0.0, - "learning_rate": 2.149175069774747e-06, - "loss": 0.9372, - "step": 28011 - }, - { - "epoch": 0.7937884326560685, - "grad_norm": 0.0, - "learning_rate": 2.1486066322314526e-06, - "loss": 0.7945, - "step": 28012 - }, - { - "epoch": 0.793816770098331, - "grad_norm": 0.0, - "learning_rate": 2.1480382608233574e-06, - "loss": 0.7381, - "step": 28013 - }, - { - "epoch": 0.7938451075405933, - "grad_norm": 0.0, - "learning_rate": 2.1474699555552527e-06, - "loss": 0.7241, - "step": 28014 - }, - { - "epoch": 0.7938734449828558, - "grad_norm": 0.0, - "learning_rate": 2.146901716431923e-06, - "loss": 0.986, - "step": 28015 - }, - { - "epoch": 0.7939017824251183, - "grad_norm": 0.0, - "learning_rate": 2.1463335434581566e-06, - "loss": 0.7848, - "step": 28016 - }, - { - "epoch": 0.7939301198673808, - "grad_norm": 0.0, - "learning_rate": 2.1457654366387427e-06, - "loss": 0.9301, - "step": 28017 - }, - { - "epoch": 0.7939584573096432, - "grad_norm": 0.0, - "learning_rate": 2.14519739597846e-06, - "loss": 0.8928, - "step": 28018 - }, - { - "epoch": 0.7939867947519057, - "grad_norm": 0.0, - "learning_rate": 2.1446294214820995e-06, - "loss": 0.9571, - "step": 28019 - }, - { - "epoch": 0.7940151321941682, - "grad_norm": 0.0, - "learning_rate": 2.1440615131544395e-06, - "loss": 0.7284, - "step": 28020 - }, - { - "epoch": 0.7940434696364306, - "grad_norm": 0.0, - "learning_rate": 2.1434936710002663e-06, - "loss": 0.7268, - "step": 28021 - }, - { - "epoch": 0.7940718070786931, - "grad_norm": 0.0, - "learning_rate": 2.142925895024366e-06, - "loss": 0.8307, - "step": 28022 - }, - { - "epoch": 0.7941001445209556, - "grad_norm": 0.0, - "learning_rate": 2.1423581852315156e-06, - "loss": 0.8959, - "step": 28023 - }, - { - "epoch": 0.794128481963218, - "grad_norm": 0.0, - "learning_rate": 2.1417905416265006e-06, - "loss": 0.8685, - "step": 28024 - }, - { - "epoch": 0.7941568194054804, - "grad_norm": 0.0, - "learning_rate": 2.1412229642141047e-06, - "loss": 0.8206, - "step": 28025 - }, - { - "epoch": 0.7941851568477429, - "grad_norm": 0.0, - "learning_rate": 2.140655452999103e-06, - "loss": 0.7806, - "step": 28026 - }, - { - "epoch": 0.7942134942900054, - "grad_norm": 0.0, - "learning_rate": 2.1400880079862795e-06, - "loss": 0.818, - "step": 28027 - }, - { - "epoch": 0.7942418317322678, - "grad_norm": 0.0, - "learning_rate": 2.1395206291804127e-06, - "loss": 0.9071, - "step": 28028 - }, - { - "epoch": 0.7942701691745303, - "grad_norm": 0.0, - "learning_rate": 2.138953316586283e-06, - "loss": 0.8687, - "step": 28029 - }, - { - "epoch": 0.7942985066167928, - "grad_norm": 0.0, - "learning_rate": 2.138386070208671e-06, - "loss": 0.8513, - "step": 28030 - }, - { - "epoch": 0.7943268440590552, - "grad_norm": 0.0, - "learning_rate": 2.13781889005235e-06, - "loss": 0.9061, - "step": 28031 - }, - { - "epoch": 0.7943551815013177, - "grad_norm": 0.0, - "learning_rate": 2.1372517761221e-06, - "loss": 0.8025, - "step": 28032 - }, - { - "epoch": 0.7943835189435802, - "grad_norm": 0.0, - "learning_rate": 2.136684728422701e-06, - "loss": 0.8127, - "step": 28033 - }, - { - "epoch": 0.7944118563858427, - "grad_norm": 0.0, - "learning_rate": 2.1361177469589234e-06, - "loss": 0.8839, - "step": 28034 - }, - { - "epoch": 0.794440193828105, - "grad_norm": 0.0, - "learning_rate": 2.135550831735549e-06, - "loss": 0.7402, - "step": 28035 - }, - { - "epoch": 0.7944685312703675, - "grad_norm": 0.0, - "learning_rate": 2.134983982757347e-06, - "loss": 0.8909, - "step": 28036 - }, - { - "epoch": 0.79449686871263, - "grad_norm": 0.0, - "learning_rate": 2.1344172000290964e-06, - "loss": 0.809, - "step": 28037 - }, - { - "epoch": 0.7945252061548924, - "grad_norm": 0.0, - "learning_rate": 2.133850483555573e-06, - "loss": 0.8506, - "step": 28038 - }, - { - "epoch": 0.7945535435971549, - "grad_norm": 0.0, - "learning_rate": 2.133283833341545e-06, - "loss": 0.7338, - "step": 28039 - }, - { - "epoch": 0.7945818810394174, - "grad_norm": 0.0, - "learning_rate": 2.1327172493917893e-06, - "loss": 0.8938, - "step": 28040 - }, - { - "epoch": 0.7946102184816799, - "grad_norm": 0.0, - "learning_rate": 2.132150731711078e-06, - "loss": 0.755, - "step": 28041 - }, - { - "epoch": 0.7946385559239423, - "grad_norm": 0.0, - "learning_rate": 2.1315842803041807e-06, - "loss": 0.8201, - "step": 28042 - }, - { - "epoch": 0.7946668933662048, - "grad_norm": 0.0, - "learning_rate": 2.131017895175875e-06, - "loss": 0.8864, - "step": 28043 - }, - { - "epoch": 0.7946952308084673, - "grad_norm": 0.0, - "learning_rate": 2.130451576330925e-06, - "loss": 0.9208, - "step": 28044 - }, - { - "epoch": 0.7947235682507297, - "grad_norm": 0.0, - "learning_rate": 2.1298853237741034e-06, - "loss": 0.7283, - "step": 28045 - }, - { - "epoch": 0.7947519056929921, - "grad_norm": 0.0, - "learning_rate": 2.129319137510183e-06, - "loss": 0.8738, - "step": 28046 - }, - { - "epoch": 0.7947802431352546, - "grad_norm": 0.0, - "learning_rate": 2.1287530175439277e-06, - "loss": 0.9072, - "step": 28047 - }, - { - "epoch": 0.794808580577517, - "grad_norm": 0.0, - "learning_rate": 2.1281869638801113e-06, - "loss": 0.7947, - "step": 28048 - }, - { - "epoch": 0.7948369180197795, - "grad_norm": 0.0, - "learning_rate": 2.1276209765234956e-06, - "loss": 0.8203, - "step": 28049 - }, - { - "epoch": 0.794865255462042, - "grad_norm": 0.0, - "learning_rate": 2.1270550554788528e-06, - "loss": 0.8383, - "step": 28050 - }, - { - "epoch": 0.7948935929043045, - "grad_norm": 0.0, - "learning_rate": 2.1264892007509507e-06, - "loss": 0.7493, - "step": 28051 - }, - { - "epoch": 0.7949219303465669, - "grad_norm": 0.0, - "learning_rate": 2.1259234123445515e-06, - "loss": 0.7345, - "step": 28052 - }, - { - "epoch": 0.7949502677888294, - "grad_norm": 0.0, - "learning_rate": 2.1253576902644234e-06, - "loss": 0.8624, - "step": 28053 - }, - { - "epoch": 0.7949786052310919, - "grad_norm": 0.0, - "learning_rate": 2.124792034515333e-06, - "loss": 0.8993, - "step": 28054 - }, - { - "epoch": 0.7950069426733543, - "grad_norm": 0.0, - "learning_rate": 2.1242264451020412e-06, - "loss": 0.8651, - "step": 28055 - }, - { - "epoch": 0.7950352801156167, - "grad_norm": 0.0, - "learning_rate": 2.123660922029319e-06, - "loss": 0.7969, - "step": 28056 - }, - { - "epoch": 0.7950636175578792, - "grad_norm": 0.0, - "learning_rate": 2.123095465301922e-06, - "loss": 0.7926, - "step": 28057 - }, - { - "epoch": 0.7950919550001417, - "grad_norm": 0.0, - "learning_rate": 2.1225300749246182e-06, - "loss": 0.7918, - "step": 28058 - }, - { - "epoch": 0.7951202924424041, - "grad_norm": 0.0, - "learning_rate": 2.12196475090217e-06, - "loss": 0.7844, - "step": 28059 - }, - { - "epoch": 0.7951486298846666, - "grad_norm": 0.0, - "learning_rate": 2.1213994932393367e-06, - "loss": 0.8258, - "step": 28060 - }, - { - "epoch": 0.7951769673269291, - "grad_norm": 0.0, - "learning_rate": 2.1208343019408807e-06, - "loss": 0.7704, - "step": 28061 - }, - { - "epoch": 0.7952053047691915, - "grad_norm": 0.0, - "learning_rate": 2.120269177011566e-06, - "loss": 0.7249, - "step": 28062 - }, - { - "epoch": 0.795233642211454, - "grad_norm": 0.0, - "learning_rate": 2.119704118456146e-06, - "loss": 0.8212, - "step": 28063 - }, - { - "epoch": 0.7952619796537165, - "grad_norm": 0.0, - "learning_rate": 2.119139126279389e-06, - "loss": 0.6883, - "step": 28064 - }, - { - "epoch": 0.795290317095979, - "grad_norm": 0.0, - "learning_rate": 2.1185742004860465e-06, - "loss": 0.8581, - "step": 28065 - }, - { - "epoch": 0.7953186545382414, - "grad_norm": 0.0, - "learning_rate": 2.11800934108088e-06, - "loss": 0.7789, - "step": 28066 - }, - { - "epoch": 0.7953469919805038, - "grad_norm": 0.0, - "learning_rate": 2.117444548068648e-06, - "loss": 0.7367, - "step": 28067 - }, - { - "epoch": 0.7953753294227663, - "grad_norm": 0.0, - "learning_rate": 2.1168798214541075e-06, - "loss": 0.8672, - "step": 28068 - }, - { - "epoch": 0.7954036668650287, - "grad_norm": 0.0, - "learning_rate": 2.1163151612420153e-06, - "loss": 0.7834, - "step": 28069 - }, - { - "epoch": 0.7954320043072912, - "grad_norm": 0.0, - "learning_rate": 2.1157505674371305e-06, - "loss": 0.8042, - "step": 28070 - }, - { - "epoch": 0.7954603417495537, - "grad_norm": 0.0, - "learning_rate": 2.115186040044205e-06, - "loss": 0.8503, - "step": 28071 - }, - { - "epoch": 0.7954886791918161, - "grad_norm": 0.0, - "learning_rate": 2.114621579067997e-06, - "loss": 0.7169, - "step": 28072 - }, - { - "epoch": 0.7955170166340786, - "grad_norm": 0.0, - "learning_rate": 2.114057184513256e-06, - "loss": 0.8415, - "step": 28073 - }, - { - "epoch": 0.7955453540763411, - "grad_norm": 0.0, - "learning_rate": 2.113492856384741e-06, - "loss": 0.834, - "step": 28074 - }, - { - "epoch": 0.7955736915186036, - "grad_norm": 0.0, - "learning_rate": 2.112928594687208e-06, - "loss": 0.7303, - "step": 28075 - }, - { - "epoch": 0.795602028960866, - "grad_norm": 0.0, - "learning_rate": 2.1123643994254016e-06, - "loss": 0.8035, - "step": 28076 - }, - { - "epoch": 0.7956303664031285, - "grad_norm": 0.0, - "learning_rate": 2.111800270604083e-06, - "loss": 0.7039, - "step": 28077 - }, - { - "epoch": 0.795658703845391, - "grad_norm": 0.0, - "learning_rate": 2.1112362082279957e-06, - "loss": 0.8556, - "step": 28078 - }, - { - "epoch": 0.7956870412876533, - "grad_norm": 0.0, - "learning_rate": 2.1106722123018965e-06, - "loss": 0.7682, - "step": 28079 - }, - { - "epoch": 0.7957153787299158, - "grad_norm": 0.0, - "learning_rate": 2.110108282830534e-06, - "loss": 0.7594, - "step": 28080 - }, - { - "epoch": 0.7957437161721783, - "grad_norm": 0.0, - "learning_rate": 2.109544419818661e-06, - "loss": 0.8502, - "step": 28081 - }, - { - "epoch": 0.7957720536144408, - "grad_norm": 0.0, - "learning_rate": 2.108980623271024e-06, - "loss": 0.7769, - "step": 28082 - }, - { - "epoch": 0.7958003910567032, - "grad_norm": 0.0, - "learning_rate": 2.1084168931923766e-06, - "loss": 0.8229, - "step": 28083 - }, - { - "epoch": 0.7958287284989657, - "grad_norm": 0.0, - "learning_rate": 2.107853229587461e-06, - "loss": 0.8048, - "step": 28084 - }, - { - "epoch": 0.7958570659412282, - "grad_norm": 0.0, - "learning_rate": 2.1072896324610305e-06, - "loss": 0.8282, - "step": 28085 - }, - { - "epoch": 0.7958854033834906, - "grad_norm": 0.0, - "learning_rate": 2.1067261018178287e-06, - "loss": 0.7545, - "step": 28086 - }, - { - "epoch": 0.7959137408257531, - "grad_norm": 0.0, - "learning_rate": 2.106162637662603e-06, - "loss": 0.8895, - "step": 28087 - }, - { - "epoch": 0.7959420782680156, - "grad_norm": 0.0, - "learning_rate": 2.1055992400001046e-06, - "loss": 0.7329, - "step": 28088 - }, - { - "epoch": 0.795970415710278, - "grad_norm": 0.0, - "learning_rate": 2.1050359088350724e-06, - "loss": 0.866, - "step": 28089 - }, - { - "epoch": 0.7959987531525404, - "grad_norm": 0.0, - "learning_rate": 2.104472644172254e-06, - "loss": 0.8306, - "step": 28090 - }, - { - "epoch": 0.7960270905948029, - "grad_norm": 0.0, - "learning_rate": 2.1039094460163978e-06, - "loss": 0.8162, - "step": 28091 - }, - { - "epoch": 0.7960554280370654, - "grad_norm": 0.0, - "learning_rate": 2.103346314372241e-06, - "loss": 0.7989, - "step": 28092 - }, - { - "epoch": 0.7960837654793278, - "grad_norm": 0.0, - "learning_rate": 2.102783249244531e-06, - "loss": 0.8901, - "step": 28093 - }, - { - "epoch": 0.7961121029215903, - "grad_norm": 0.0, - "learning_rate": 2.1022202506380097e-06, - "loss": 0.6738, - "step": 28094 - }, - { - "epoch": 0.7961404403638528, - "grad_norm": 0.0, - "learning_rate": 2.1016573185574206e-06, - "loss": 0.7461, - "step": 28095 - }, - { - "epoch": 0.7961687778061152, - "grad_norm": 0.0, - "learning_rate": 2.1010944530075074e-06, - "loss": 0.8838, - "step": 28096 - }, - { - "epoch": 0.7961971152483777, - "grad_norm": 0.0, - "learning_rate": 2.1005316539930064e-06, - "loss": 0.7732, - "step": 28097 - }, - { - "epoch": 0.7962254526906402, - "grad_norm": 0.0, - "learning_rate": 2.0999689215186603e-06, - "loss": 0.8648, - "step": 28098 - }, - { - "epoch": 0.7962537901329027, - "grad_norm": 0.0, - "learning_rate": 2.0994062555892123e-06, - "loss": 0.7302, - "step": 28099 - }, - { - "epoch": 0.796282127575165, - "grad_norm": 0.0, - "learning_rate": 2.098843656209396e-06, - "loss": 0.8085, - "step": 28100 - }, - { - "epoch": 0.7963104650174275, - "grad_norm": 0.0, - "learning_rate": 2.098281123383957e-06, - "loss": 0.8076, - "step": 28101 - }, - { - "epoch": 0.79633880245969, - "grad_norm": 0.0, - "learning_rate": 2.097718657117628e-06, - "loss": 0.8088, - "step": 28102 - }, - { - "epoch": 0.7963671399019524, - "grad_norm": 0.0, - "learning_rate": 2.0971562574151483e-06, - "loss": 0.7762, - "step": 28103 - }, - { - "epoch": 0.7963954773442149, - "grad_norm": 0.0, - "learning_rate": 2.0965939242812594e-06, - "loss": 0.8867, - "step": 28104 - }, - { - "epoch": 0.7964238147864774, - "grad_norm": 0.0, - "learning_rate": 2.096031657720692e-06, - "loss": 0.8228, - "step": 28105 - }, - { - "epoch": 0.7964521522287399, - "grad_norm": 0.0, - "learning_rate": 2.095469457738185e-06, - "loss": 0.7543, - "step": 28106 - }, - { - "epoch": 0.7964804896710023, - "grad_norm": 0.0, - "learning_rate": 2.094907324338473e-06, - "loss": 0.6374, - "step": 28107 - }, - { - "epoch": 0.7965088271132648, - "grad_norm": 0.0, - "learning_rate": 2.0943452575262935e-06, - "loss": 0.9035, - "step": 28108 - }, - { - "epoch": 0.7965371645555273, - "grad_norm": 0.0, - "learning_rate": 2.0937832573063823e-06, - "loss": 0.7816, - "step": 28109 - }, - { - "epoch": 0.7965655019977896, - "grad_norm": 0.0, - "learning_rate": 2.0932213236834663e-06, - "loss": 0.7554, - "step": 28110 - }, - { - "epoch": 0.7965938394400521, - "grad_norm": 0.0, - "learning_rate": 2.0926594566622847e-06, - "loss": 0.7658, - "step": 28111 - }, - { - "epoch": 0.7966221768823146, - "grad_norm": 0.0, - "learning_rate": 2.0920976562475714e-06, - "loss": 0.8488, - "step": 28112 - }, - { - "epoch": 0.796650514324577, - "grad_norm": 0.0, - "learning_rate": 2.0915359224440535e-06, - "loss": 0.7847, - "step": 28113 - }, - { - "epoch": 0.7966788517668395, - "grad_norm": 0.0, - "learning_rate": 2.090974255256467e-06, - "loss": 0.7645, - "step": 28114 - }, - { - "epoch": 0.796707189209102, - "grad_norm": 0.0, - "learning_rate": 2.0904126546895385e-06, - "loss": 0.7967, - "step": 28115 - }, - { - "epoch": 0.7967355266513645, - "grad_norm": 0.0, - "learning_rate": 2.089851120748002e-06, - "loss": 0.776, - "step": 28116 - }, - { - "epoch": 0.7967638640936269, - "grad_norm": 0.0, - "learning_rate": 2.08928965343659e-06, - "loss": 0.844, - "step": 28117 - }, - { - "epoch": 0.7967922015358894, - "grad_norm": 0.0, - "learning_rate": 2.088728252760026e-06, - "loss": 0.8049, - "step": 28118 - }, - { - "epoch": 0.7968205389781519, - "grad_norm": 0.0, - "learning_rate": 2.0881669187230415e-06, - "loss": 0.8976, - "step": 28119 - }, - { - "epoch": 0.7968488764204142, - "grad_norm": 0.0, - "learning_rate": 2.0876056513303644e-06, - "loss": 0.8206, - "step": 28120 - }, - { - "epoch": 0.7968772138626767, - "grad_norm": 0.0, - "learning_rate": 2.087044450586724e-06, - "loss": 0.7701, - "step": 28121 - }, - { - "epoch": 0.7969055513049392, - "grad_norm": 0.0, - "learning_rate": 2.086483316496849e-06, - "loss": 0.8652, - "step": 28122 - }, - { - "epoch": 0.7969338887472017, - "grad_norm": 0.0, - "learning_rate": 2.0859222490654608e-06, - "loss": 0.8466, - "step": 28123 - }, - { - "epoch": 0.7969622261894641, - "grad_norm": 0.0, - "learning_rate": 2.0853612482972887e-06, - "loss": 0.7194, - "step": 28124 - }, - { - "epoch": 0.7969905636317266, - "grad_norm": 0.0, - "learning_rate": 2.0848003141970597e-06, - "loss": 0.8566, - "step": 28125 - }, - { - "epoch": 0.7970189010739891, - "grad_norm": 0.0, - "learning_rate": 2.0842394467694947e-06, - "loss": 0.8454, - "step": 28126 - }, - { - "epoch": 0.7970472385162515, - "grad_norm": 0.0, - "learning_rate": 2.0836786460193203e-06, - "loss": 0.8079, - "step": 28127 - }, - { - "epoch": 0.797075575958514, - "grad_norm": 0.0, - "learning_rate": 2.0831179119512623e-06, - "loss": 0.7901, - "step": 28128 - }, - { - "epoch": 0.7971039134007765, - "grad_norm": 0.0, - "learning_rate": 2.0825572445700406e-06, - "loss": 0.8611, - "step": 28129 - }, - { - "epoch": 0.797132250843039, - "grad_norm": 0.0, - "learning_rate": 2.0819966438803806e-06, - "loss": 0.8606, - "step": 28130 - }, - { - "epoch": 0.7971605882853013, - "grad_norm": 0.0, - "learning_rate": 2.0814361098870016e-06, - "loss": 0.9009, - "step": 28131 - }, - { - "epoch": 0.7971889257275638, - "grad_norm": 0.0, - "learning_rate": 2.0808756425946262e-06, - "loss": 0.7625, - "step": 28132 - }, - { - "epoch": 0.7972172631698263, - "grad_norm": 0.0, - "learning_rate": 2.0803152420079763e-06, - "loss": 0.8191, - "step": 28133 - }, - { - "epoch": 0.7972456006120887, - "grad_norm": 0.0, - "learning_rate": 2.0797549081317724e-06, - "loss": 0.8284, - "step": 28134 - }, - { - "epoch": 0.7972739380543512, - "grad_norm": 0.0, - "learning_rate": 2.0791946409707353e-06, - "loss": 0.8239, - "step": 28135 - }, - { - "epoch": 0.7973022754966137, - "grad_norm": 0.0, - "learning_rate": 2.0786344405295822e-06, - "loss": 0.8809, - "step": 28136 - }, - { - "epoch": 0.7973306129388761, - "grad_norm": 0.0, - "learning_rate": 2.0780743068130316e-06, - "loss": 0.8584, - "step": 28137 - }, - { - "epoch": 0.7973589503811386, - "grad_norm": 0.0, - "learning_rate": 2.077514239825805e-06, - "loss": 0.8122, - "step": 28138 - }, - { - "epoch": 0.7973872878234011, - "grad_norm": 0.0, - "learning_rate": 2.076954239572616e-06, - "loss": 0.8515, - "step": 28139 - }, - { - "epoch": 0.7974156252656636, - "grad_norm": 0.0, - "learning_rate": 2.0763943060581836e-06, - "loss": 0.7602, - "step": 28140 - }, - { - "epoch": 0.797443962707926, - "grad_norm": 0.0, - "learning_rate": 2.0758344392872265e-06, - "loss": 0.7893, - "step": 28141 - }, - { - "epoch": 0.7974723001501884, - "grad_norm": 0.0, - "learning_rate": 2.0752746392644563e-06, - "loss": 0.8811, - "step": 28142 - }, - { - "epoch": 0.7975006375924509, - "grad_norm": 0.0, - "learning_rate": 2.0747149059945937e-06, - "loss": 0.7294, - "step": 28143 - }, - { - "epoch": 0.7975289750347133, - "grad_norm": 0.0, - "learning_rate": 2.074155239482347e-06, - "loss": 0.7651, - "step": 28144 - }, - { - "epoch": 0.7975573124769758, - "grad_norm": 0.0, - "learning_rate": 2.0735956397324344e-06, - "loss": 0.796, - "step": 28145 - }, - { - "epoch": 0.7975856499192383, - "grad_norm": 0.0, - "learning_rate": 2.0730361067495685e-06, - "loss": 0.731, - "step": 28146 - }, - { - "epoch": 0.7976139873615008, - "grad_norm": 0.0, - "learning_rate": 2.072476640538463e-06, - "loss": 0.7866, - "step": 28147 - }, - { - "epoch": 0.7976423248037632, - "grad_norm": 0.0, - "learning_rate": 2.071917241103831e-06, - "loss": 0.8329, - "step": 28148 - }, - { - "epoch": 0.7976706622460257, - "grad_norm": 0.0, - "learning_rate": 2.0713579084503877e-06, - "loss": 0.8065, - "step": 28149 - }, - { - "epoch": 0.7976989996882882, - "grad_norm": 0.0, - "learning_rate": 2.0707986425828363e-06, - "loss": 0.6681, - "step": 28150 - }, - { - "epoch": 0.7977273371305506, - "grad_norm": 0.0, - "learning_rate": 2.070239443505897e-06, - "loss": 0.7018, - "step": 28151 - }, - { - "epoch": 0.797755674572813, - "grad_norm": 0.0, - "learning_rate": 2.0696803112242716e-06, - "loss": 0.8727, - "step": 28152 - }, - { - "epoch": 0.7977840120150755, - "grad_norm": 0.0, - "learning_rate": 2.0691212457426748e-06, - "loss": 0.8461, - "step": 28153 - }, - { - "epoch": 0.797812349457338, - "grad_norm": 0.0, - "learning_rate": 2.068562247065816e-06, - "loss": 0.764, - "step": 28154 - }, - { - "epoch": 0.7978406868996004, - "grad_norm": 0.0, - "learning_rate": 2.068003315198401e-06, - "loss": 0.8519, - "step": 28155 - }, - { - "epoch": 0.7978690243418629, - "grad_norm": 0.0, - "learning_rate": 2.067444450145142e-06, - "loss": 0.7843, - "step": 28156 - }, - { - "epoch": 0.7978973617841254, - "grad_norm": 0.0, - "learning_rate": 2.0668856519107415e-06, - "loss": 0.7012, - "step": 28157 - }, - { - "epoch": 0.7979256992263878, - "grad_norm": 0.0, - "learning_rate": 2.0663269204999094e-06, - "loss": 0.8944, - "step": 28158 - }, - { - "epoch": 0.7979540366686503, - "grad_norm": 0.0, - "learning_rate": 2.065768255917351e-06, - "loss": 0.795, - "step": 28159 - }, - { - "epoch": 0.7979823741109128, - "grad_norm": 0.0, - "learning_rate": 2.065209658167773e-06, - "loss": 0.7629, - "step": 28160 - }, - { - "epoch": 0.7980107115531752, - "grad_norm": 0.0, - "learning_rate": 2.064651127255881e-06, - "loss": 0.8751, - "step": 28161 - }, - { - "epoch": 0.7980390489954376, - "grad_norm": 0.0, - "learning_rate": 2.064092663186381e-06, - "loss": 0.7558, - "step": 28162 - }, - { - "epoch": 0.7980673864377001, - "grad_norm": 0.0, - "learning_rate": 2.0635342659639734e-06, - "loss": 0.8123, - "step": 28163 - }, - { - "epoch": 0.7980957238799626, - "grad_norm": 0.0, - "learning_rate": 2.0629759355933665e-06, - "loss": 0.7497, - "step": 28164 - }, - { - "epoch": 0.798124061322225, - "grad_norm": 0.0, - "learning_rate": 2.0624176720792587e-06, - "loss": 0.7443, - "step": 28165 - }, - { - "epoch": 0.7981523987644875, - "grad_norm": 0.0, - "learning_rate": 2.0618594754263534e-06, - "loss": 0.7894, - "step": 28166 - }, - { - "epoch": 0.79818073620675, - "grad_norm": 0.0, - "learning_rate": 2.061301345639356e-06, - "loss": 0.9195, - "step": 28167 - }, - { - "epoch": 0.7982090736490124, - "grad_norm": 0.0, - "learning_rate": 2.0607432827229635e-06, - "loss": 0.8908, - "step": 28168 - }, - { - "epoch": 0.7982374110912749, - "grad_norm": 0.0, - "learning_rate": 2.0601852866818784e-06, - "loss": 0.8413, - "step": 28169 - }, - { - "epoch": 0.7982657485335374, - "grad_norm": 0.0, - "learning_rate": 2.0596273575208035e-06, - "loss": 0.8235, - "step": 28170 - }, - { - "epoch": 0.7982940859757999, - "grad_norm": 0.0, - "learning_rate": 2.0590694952444333e-06, - "loss": 0.6608, - "step": 28171 - }, - { - "epoch": 0.7983224234180623, - "grad_norm": 0.0, - "learning_rate": 2.0585116998574693e-06, - "loss": 0.8655, - "step": 28172 - }, - { - "epoch": 0.7983507608603247, - "grad_norm": 0.0, - "learning_rate": 2.05795397136461e-06, - "loss": 0.8109, - "step": 28173 - }, - { - "epoch": 0.7983790983025872, - "grad_norm": 0.0, - "learning_rate": 2.057396309770554e-06, - "loss": 0.851, - "step": 28174 - }, - { - "epoch": 0.7984074357448496, - "grad_norm": 0.0, - "learning_rate": 2.056838715080001e-06, - "loss": 0.8013, - "step": 28175 - }, - { - "epoch": 0.7984357731871121, - "grad_norm": 0.0, - "learning_rate": 2.056281187297643e-06, - "loss": 0.7916, - "step": 28176 - }, - { - "epoch": 0.7984641106293746, - "grad_norm": 0.0, - "learning_rate": 2.0557237264281772e-06, - "loss": 0.7334, - "step": 28177 - }, - { - "epoch": 0.7984924480716371, - "grad_norm": 0.0, - "learning_rate": 2.0551663324763037e-06, - "loss": 0.8475, - "step": 28178 - }, - { - "epoch": 0.7985207855138995, - "grad_norm": 0.0, - "learning_rate": 2.0546090054467118e-06, - "loss": 0.8448, - "step": 28179 - }, - { - "epoch": 0.798549122956162, - "grad_norm": 0.0, - "learning_rate": 2.054051745344101e-06, - "loss": 0.8131, - "step": 28180 - }, - { - "epoch": 0.7985774603984245, - "grad_norm": 0.0, - "learning_rate": 2.0534945521731607e-06, - "loss": 0.7973, - "step": 28181 - }, - { - "epoch": 0.7986057978406869, - "grad_norm": 0.0, - "learning_rate": 2.052937425938587e-06, - "loss": 0.8139, - "step": 28182 - }, - { - "epoch": 0.7986341352829494, - "grad_norm": 0.0, - "learning_rate": 2.0523803666450746e-06, - "loss": 0.8239, - "step": 28183 - }, - { - "epoch": 0.7986624727252118, - "grad_norm": 0.0, - "learning_rate": 2.0518233742973114e-06, - "loss": 0.7664, - "step": 28184 - }, - { - "epoch": 0.7986908101674742, - "grad_norm": 0.0, - "learning_rate": 2.051266448899991e-06, - "loss": 0.7428, - "step": 28185 - }, - { - "epoch": 0.7987191476097367, - "grad_norm": 0.0, - "learning_rate": 2.0507095904578043e-06, - "loss": 0.7445, - "step": 28186 - }, - { - "epoch": 0.7987474850519992, - "grad_norm": 0.0, - "learning_rate": 2.0501527989754444e-06, - "loss": 0.8344, - "step": 28187 - }, - { - "epoch": 0.7987758224942617, - "grad_norm": 0.0, - "learning_rate": 2.0495960744576017e-06, - "loss": 0.8431, - "step": 28188 - }, - { - "epoch": 0.7988041599365241, - "grad_norm": 0.0, - "learning_rate": 2.04903941690896e-06, - "loss": 0.8042, - "step": 28189 - }, - { - "epoch": 0.7988324973787866, - "grad_norm": 0.0, - "learning_rate": 2.0484828263342114e-06, - "loss": 0.8106, - "step": 28190 - }, - { - "epoch": 0.7988608348210491, - "grad_norm": 0.0, - "learning_rate": 2.0479263027380493e-06, - "loss": 0.8661, - "step": 28191 - }, - { - "epoch": 0.7988891722633115, - "grad_norm": 0.0, - "learning_rate": 2.0473698461251523e-06, - "loss": 0.8172, - "step": 28192 - }, - { - "epoch": 0.798917509705574, - "grad_norm": 0.0, - "learning_rate": 2.0468134565002163e-06, - "loss": 0.7657, - "step": 28193 - }, - { - "epoch": 0.7989458471478365, - "grad_norm": 0.0, - "learning_rate": 2.0462571338679204e-06, - "loss": 0.7907, - "step": 28194 - }, - { - "epoch": 0.798974184590099, - "grad_norm": 0.0, - "learning_rate": 2.0457008782329546e-06, - "loss": 0.8459, - "step": 28195 - }, - { - "epoch": 0.7990025220323613, - "grad_norm": 0.0, - "learning_rate": 2.0451446896000038e-06, - "loss": 0.7632, - "step": 28196 - }, - { - "epoch": 0.7990308594746238, - "grad_norm": 0.0, - "learning_rate": 2.0445885679737533e-06, - "loss": 0.788, - "step": 28197 - }, - { - "epoch": 0.7990591969168863, - "grad_norm": 0.0, - "learning_rate": 2.04403251335889e-06, - "loss": 0.8492, - "step": 28198 - }, - { - "epoch": 0.7990875343591487, - "grad_norm": 0.0, - "learning_rate": 2.043476525760093e-06, - "loss": 0.7338, - "step": 28199 - }, - { - "epoch": 0.7991158718014112, - "grad_norm": 0.0, - "learning_rate": 2.042920605182048e-06, - "loss": 0.7321, - "step": 28200 - }, - { - "epoch": 0.7991442092436737, - "grad_norm": 0.0, - "learning_rate": 2.0423647516294398e-06, - "loss": 0.8003, - "step": 28201 - }, - { - "epoch": 0.7991725466859362, - "grad_norm": 0.0, - "learning_rate": 2.041808965106945e-06, - "loss": 0.8971, - "step": 28202 - }, - { - "epoch": 0.7992008841281986, - "grad_norm": 0.0, - "learning_rate": 2.0412532456192492e-06, - "loss": 0.8134, - "step": 28203 - }, - { - "epoch": 0.7992292215704611, - "grad_norm": 0.0, - "learning_rate": 2.040697593171036e-06, - "loss": 0.8448, - "step": 28204 - }, - { - "epoch": 0.7992575590127236, - "grad_norm": 0.0, - "learning_rate": 2.0401420077669788e-06, - "loss": 0.8366, - "step": 28205 - }, - { - "epoch": 0.7992858964549859, - "grad_norm": 0.0, - "learning_rate": 2.0395864894117613e-06, - "loss": 0.6802, - "step": 28206 - }, - { - "epoch": 0.7993142338972484, - "grad_norm": 0.0, - "learning_rate": 2.0390310381100664e-06, - "loss": 0.8021, - "step": 28207 - }, - { - "epoch": 0.7993425713395109, - "grad_norm": 0.0, - "learning_rate": 2.038475653866566e-06, - "loss": 0.8995, - "step": 28208 - }, - { - "epoch": 0.7993709087817733, - "grad_norm": 0.0, - "learning_rate": 2.0379203366859413e-06, - "loss": 0.8059, - "step": 28209 - }, - { - "epoch": 0.7993992462240358, - "grad_norm": 0.0, - "learning_rate": 2.037365086572871e-06, - "loss": 0.8449, - "step": 28210 - }, - { - "epoch": 0.7994275836662983, - "grad_norm": 0.0, - "learning_rate": 2.036809903532031e-06, - "loss": 0.743, - "step": 28211 - }, - { - "epoch": 0.7994559211085608, - "grad_norm": 0.0, - "learning_rate": 2.0362547875681006e-06, - "loss": 0.8348, - "step": 28212 - }, - { - "epoch": 0.7994842585508232, - "grad_norm": 0.0, - "learning_rate": 2.0356997386857515e-06, - "loss": 0.735, - "step": 28213 - }, - { - "epoch": 0.7995125959930857, - "grad_norm": 0.0, - "learning_rate": 2.035144756889663e-06, - "loss": 0.8539, - "step": 28214 - }, - { - "epoch": 0.7995409334353482, - "grad_norm": 0.0, - "learning_rate": 2.0345898421845056e-06, - "loss": 0.8313, - "step": 28215 - }, - { - "epoch": 0.7995692708776105, - "grad_norm": 0.0, - "learning_rate": 2.034034994574956e-06, - "loss": 0.9464, - "step": 28216 - }, - { - "epoch": 0.799597608319873, - "grad_norm": 0.0, - "learning_rate": 2.0334802140656896e-06, - "loss": 0.7657, - "step": 28217 - }, - { - "epoch": 0.7996259457621355, - "grad_norm": 0.0, - "learning_rate": 2.032925500661376e-06, - "loss": 0.7319, - "step": 28218 - }, - { - "epoch": 0.799654283204398, - "grad_norm": 0.0, - "learning_rate": 2.0323708543666888e-06, - "loss": 0.8215, - "step": 28219 - }, - { - "epoch": 0.7996826206466604, - "grad_norm": 0.0, - "learning_rate": 2.0318162751863037e-06, - "loss": 0.7924, - "step": 28220 - }, - { - "epoch": 0.7997109580889229, - "grad_norm": 0.0, - "learning_rate": 2.0312617631248855e-06, - "loss": 0.817, - "step": 28221 - }, - { - "epoch": 0.7997392955311854, - "grad_norm": 0.0, - "learning_rate": 2.0307073181871095e-06, - "loss": 0.7407, - "step": 28222 - }, - { - "epoch": 0.7997676329734478, - "grad_norm": 0.0, - "learning_rate": 2.030152940377644e-06, - "loss": 0.8474, - "step": 28223 - }, - { - "epoch": 0.7997959704157103, - "grad_norm": 0.0, - "learning_rate": 2.0295986297011603e-06, - "loss": 0.8151, - "step": 28224 - }, - { - "epoch": 0.7998243078579728, - "grad_norm": 0.0, - "learning_rate": 2.029044386162331e-06, - "loss": 0.7105, - "step": 28225 - }, - { - "epoch": 0.7998526453002353, - "grad_norm": 0.0, - "learning_rate": 2.0284902097658166e-06, - "loss": 0.7275, - "step": 28226 - }, - { - "epoch": 0.7998809827424976, - "grad_norm": 0.0, - "learning_rate": 2.02793610051629e-06, - "loss": 0.7628, - "step": 28227 - }, - { - "epoch": 0.7999093201847601, - "grad_norm": 0.0, - "learning_rate": 2.02738205841842e-06, - "loss": 0.8118, - "step": 28228 - }, - { - "epoch": 0.7999376576270226, - "grad_norm": 0.0, - "learning_rate": 2.0268280834768695e-06, - "loss": 0.8629, - "step": 28229 - }, - { - "epoch": 0.799965995069285, - "grad_norm": 0.0, - "learning_rate": 2.026274175696309e-06, - "loss": 0.764, - "step": 28230 - }, - { - "epoch": 0.7999943325115475, - "grad_norm": 0.0, - "learning_rate": 2.025720335081399e-06, - "loss": 0.8286, - "step": 28231 - }, - { - "epoch": 0.80002266995381, - "grad_norm": 0.0, - "learning_rate": 2.025166561636809e-06, - "loss": 0.7882, - "step": 28232 - }, - { - "epoch": 0.8000510073960724, - "grad_norm": 0.0, - "learning_rate": 2.024612855367205e-06, - "loss": 0.8938, - "step": 28233 - }, - { - "epoch": 0.8000793448383349, - "grad_norm": 0.0, - "learning_rate": 2.0240592162772454e-06, - "loss": 0.8009, - "step": 28234 - }, - { - "epoch": 0.8001076822805974, - "grad_norm": 0.0, - "learning_rate": 2.0235056443715962e-06, - "loss": 0.8517, - "step": 28235 - }, - { - "epoch": 0.8001360197228599, - "grad_norm": 0.0, - "learning_rate": 2.022952139654922e-06, - "loss": 0.7931, - "step": 28236 - }, - { - "epoch": 0.8001643571651222, - "grad_norm": 0.0, - "learning_rate": 2.0223987021318835e-06, - "loss": 0.7972, - "step": 28237 - }, - { - "epoch": 0.8001926946073847, - "grad_norm": 0.0, - "learning_rate": 2.0218453318071462e-06, - "loss": 0.7682, - "step": 28238 - }, - { - "epoch": 0.8002210320496472, - "grad_norm": 0.0, - "learning_rate": 2.0212920286853656e-06, - "loss": 0.8135, - "step": 28239 - }, - { - "epoch": 0.8002493694919096, - "grad_norm": 0.0, - "learning_rate": 2.020738792771204e-06, - "loss": 0.7517, - "step": 28240 - }, - { - "epoch": 0.8002777069341721, - "grad_norm": 0.0, - "learning_rate": 2.0201856240693264e-06, - "loss": 0.7712, - "step": 28241 - }, - { - "epoch": 0.8003060443764346, - "grad_norm": 0.0, - "learning_rate": 2.019632522584386e-06, - "loss": 0.842, - "step": 28242 - }, - { - "epoch": 0.8003343818186971, - "grad_norm": 0.0, - "learning_rate": 2.0190794883210462e-06, - "loss": 0.7464, - "step": 28243 - }, - { - "epoch": 0.8003627192609595, - "grad_norm": 0.0, - "learning_rate": 2.0185265212839612e-06, - "loss": 0.8402, - "step": 28244 - }, - { - "epoch": 0.800391056703222, - "grad_norm": 0.0, - "learning_rate": 2.0179736214777913e-06, - "loss": 0.8063, - "step": 28245 - }, - { - "epoch": 0.8004193941454845, - "grad_norm": 0.0, - "learning_rate": 2.017420788907196e-06, - "loss": 0.8833, - "step": 28246 - }, - { - "epoch": 0.8004477315877468, - "grad_norm": 0.0, - "learning_rate": 2.0168680235768267e-06, - "loss": 0.8431, - "step": 28247 - }, - { - "epoch": 0.8004760690300093, - "grad_norm": 0.0, - "learning_rate": 2.0163153254913436e-06, - "loss": 0.7838, - "step": 28248 - }, - { - "epoch": 0.8005044064722718, - "grad_norm": 0.0, - "learning_rate": 2.0157626946554e-06, - "loss": 0.8373, - "step": 28249 - }, - { - "epoch": 0.8005327439145343, - "grad_norm": 0.0, - "learning_rate": 2.0152101310736527e-06, - "loss": 0.7799, - "step": 28250 - }, - { - "epoch": 0.8005610813567967, - "grad_norm": 0.0, - "learning_rate": 2.0146576347507586e-06, - "loss": 0.7894, - "step": 28251 - }, - { - "epoch": 0.8005894187990592, - "grad_norm": 0.0, - "learning_rate": 2.0141052056913644e-06, - "loss": 0.8429, - "step": 28252 - }, - { - "epoch": 0.8006177562413217, - "grad_norm": 0.0, - "learning_rate": 2.0135528439001294e-06, - "loss": 0.8737, - "step": 28253 - }, - { - "epoch": 0.8006460936835841, - "grad_norm": 0.0, - "learning_rate": 2.0130005493817063e-06, - "loss": 0.8359, - "step": 28254 - }, - { - "epoch": 0.8006744311258466, - "grad_norm": 0.0, - "learning_rate": 2.012448322140742e-06, - "loss": 0.7909, - "step": 28255 - }, - { - "epoch": 0.8007027685681091, - "grad_norm": 0.0, - "learning_rate": 2.0118961621818923e-06, - "loss": 0.7097, - "step": 28256 - }, - { - "epoch": 0.8007311060103715, - "grad_norm": 0.0, - "learning_rate": 2.0113440695098107e-06, - "loss": 0.8443, - "step": 28257 - }, - { - "epoch": 0.800759443452634, - "grad_norm": 0.0, - "learning_rate": 2.010792044129142e-06, - "loss": 0.874, - "step": 28258 - }, - { - "epoch": 0.8007877808948964, - "grad_norm": 0.0, - "learning_rate": 2.01024008604454e-06, - "loss": 0.8446, - "step": 28259 - }, - { - "epoch": 0.8008161183371589, - "grad_norm": 0.0, - "learning_rate": 2.0096881952606507e-06, - "loss": 0.7935, - "step": 28260 - }, - { - "epoch": 0.8008444557794213, - "grad_norm": 0.0, - "learning_rate": 2.0091363717821255e-06, - "loss": 0.7846, - "step": 28261 - }, - { - "epoch": 0.8008727932216838, - "grad_norm": 0.0, - "learning_rate": 2.0085846156136113e-06, - "loss": 0.7994, - "step": 28262 - }, - { - "epoch": 0.8009011306639463, - "grad_norm": 0.0, - "learning_rate": 2.0080329267597567e-06, - "loss": 0.8354, - "step": 28263 - }, - { - "epoch": 0.8009294681062087, - "grad_norm": 0.0, - "learning_rate": 2.0074813052252075e-06, - "loss": 0.8149, - "step": 28264 - }, - { - "epoch": 0.8009578055484712, - "grad_norm": 0.0, - "learning_rate": 2.0069297510146145e-06, - "loss": 0.777, - "step": 28265 - }, - { - "epoch": 0.8009861429907337, - "grad_norm": 0.0, - "learning_rate": 2.006378264132618e-06, - "loss": 0.7332, - "step": 28266 - }, - { - "epoch": 0.8010144804329962, - "grad_norm": 0.0, - "learning_rate": 2.005826844583868e-06, - "loss": 0.7744, - "step": 28267 - }, - { - "epoch": 0.8010428178752586, - "grad_norm": 0.0, - "learning_rate": 2.0052754923730055e-06, - "loss": 0.791, - "step": 28268 - }, - { - "epoch": 0.801071155317521, - "grad_norm": 0.0, - "learning_rate": 2.004724207504675e-06, - "loss": 0.7703, - "step": 28269 - }, - { - "epoch": 0.8010994927597835, - "grad_norm": 0.0, - "learning_rate": 2.004172989983525e-06, - "loss": 0.7831, - "step": 28270 - }, - { - "epoch": 0.8011278302020459, - "grad_norm": 0.0, - "learning_rate": 2.0036218398141917e-06, - "loss": 0.74, - "step": 28271 - }, - { - "epoch": 0.8011561676443084, - "grad_norm": 0.0, - "learning_rate": 2.003070757001324e-06, - "loss": 0.7873, - "step": 28272 - }, - { - "epoch": 0.8011845050865709, - "grad_norm": 0.0, - "learning_rate": 2.0025197415495578e-06, - "loss": 0.8224, - "step": 28273 - }, - { - "epoch": 0.8012128425288334, - "grad_norm": 0.0, - "learning_rate": 2.0019687934635378e-06, - "loss": 0.8797, - "step": 28274 - }, - { - "epoch": 0.8012411799710958, - "grad_norm": 0.0, - "learning_rate": 2.001417912747905e-06, - "loss": 0.8646, - "step": 28275 - }, - { - "epoch": 0.8012695174133583, - "grad_norm": 0.0, - "learning_rate": 2.0008670994073e-06, - "loss": 0.7499, - "step": 28276 - }, - { - "epoch": 0.8012978548556208, - "grad_norm": 0.0, - "learning_rate": 2.000316353446361e-06, - "loss": 0.743, - "step": 28277 - }, - { - "epoch": 0.8013261922978832, - "grad_norm": 0.0, - "learning_rate": 1.99976567486973e-06, - "loss": 0.8867, - "step": 28278 - }, - { - "epoch": 0.8013545297401456, - "grad_norm": 0.0, - "learning_rate": 1.999215063682042e-06, - "loss": 0.7451, - "step": 28279 - }, - { - "epoch": 0.8013828671824081, - "grad_norm": 0.0, - "learning_rate": 1.9986645198879385e-06, - "loss": 0.7792, - "step": 28280 - }, - { - "epoch": 0.8014112046246705, - "grad_norm": 0.0, - "learning_rate": 1.998114043492053e-06, - "loss": 0.8606, - "step": 28281 - }, - { - "epoch": 0.801439542066933, - "grad_norm": 0.0, - "learning_rate": 1.9975636344990233e-06, - "loss": 0.7161, - "step": 28282 - }, - { - "epoch": 0.8014678795091955, - "grad_norm": 0.0, - "learning_rate": 1.99701329291349e-06, - "loss": 0.8242, - "step": 28283 - }, - { - "epoch": 0.801496216951458, - "grad_norm": 0.0, - "learning_rate": 1.9964630187400834e-06, - "loss": 0.8103, - "step": 28284 - }, - { - "epoch": 0.8015245543937204, - "grad_norm": 0.0, - "learning_rate": 1.99591281198344e-06, - "loss": 0.8122, - "step": 28285 - }, - { - "epoch": 0.8015528918359829, - "grad_norm": 0.0, - "learning_rate": 1.995362672648198e-06, - "loss": 0.7589, - "step": 28286 - }, - { - "epoch": 0.8015812292782454, - "grad_norm": 0.0, - "learning_rate": 1.9948126007389866e-06, - "loss": 0.8677, - "step": 28287 - }, - { - "epoch": 0.8016095667205078, - "grad_norm": 0.0, - "learning_rate": 1.994262596260441e-06, - "loss": 0.7821, - "step": 28288 - }, - { - "epoch": 0.8016379041627703, - "grad_norm": 0.0, - "learning_rate": 1.993712659217194e-06, - "loss": 0.9125, - "step": 28289 - }, - { - "epoch": 0.8016662416050327, - "grad_norm": 0.0, - "learning_rate": 1.9931627896138785e-06, - "loss": 0.8001, - "step": 28290 - }, - { - "epoch": 0.8016945790472952, - "grad_norm": 0.0, - "learning_rate": 1.9926129874551294e-06, - "loss": 0.8516, - "step": 28291 - }, - { - "epoch": 0.8017229164895576, - "grad_norm": 0.0, - "learning_rate": 1.9920632527455707e-06, - "loss": 0.7959, - "step": 28292 - }, - { - "epoch": 0.8017512539318201, - "grad_norm": 0.0, - "learning_rate": 1.9915135854898383e-06, - "loss": 0.8265, - "step": 28293 - }, - { - "epoch": 0.8017795913740826, - "grad_norm": 0.0, - "learning_rate": 1.9909639856925623e-06, - "loss": 0.8286, - "step": 28294 - }, - { - "epoch": 0.801807928816345, - "grad_norm": 0.0, - "learning_rate": 1.9904144533583693e-06, - "loss": 0.8275, - "step": 28295 - }, - { - "epoch": 0.8018362662586075, - "grad_norm": 0.0, - "learning_rate": 1.989864988491891e-06, - "loss": 0.7905, - "step": 28296 - }, - { - "epoch": 0.80186460370087, - "grad_norm": 0.0, - "learning_rate": 1.9893155910977523e-06, - "loss": 0.9146, - "step": 28297 - }, - { - "epoch": 0.8018929411431325, - "grad_norm": 0.0, - "learning_rate": 1.9887662611805834e-06, - "loss": 0.7398, - "step": 28298 - }, - { - "epoch": 0.8019212785853949, - "grad_norm": 0.0, - "learning_rate": 1.988216998745014e-06, - "loss": 0.8267, - "step": 28299 - }, - { - "epoch": 0.8019496160276574, - "grad_norm": 0.0, - "learning_rate": 1.9876678037956645e-06, - "loss": 0.7058, - "step": 28300 - }, - { - "epoch": 0.8019779534699198, - "grad_norm": 0.0, - "learning_rate": 1.987118676337164e-06, - "loss": 0.871, - "step": 28301 - }, - { - "epoch": 0.8020062909121822, - "grad_norm": 0.0, - "learning_rate": 1.98656961637414e-06, - "loss": 0.8767, - "step": 28302 - }, - { - "epoch": 0.8020346283544447, - "grad_norm": 0.0, - "learning_rate": 1.9860206239112144e-06, - "loss": 0.717, - "step": 28303 - }, - { - "epoch": 0.8020629657967072, - "grad_norm": 0.0, - "learning_rate": 1.9854716989530166e-06, - "loss": 0.7603, - "step": 28304 - }, - { - "epoch": 0.8020913032389696, - "grad_norm": 0.0, - "learning_rate": 1.9849228415041633e-06, - "loss": 0.7244, - "step": 28305 - }, - { - "epoch": 0.8021196406812321, - "grad_norm": 0.0, - "learning_rate": 1.9843740515692824e-06, - "loss": 0.7087, - "step": 28306 - }, - { - "epoch": 0.8021479781234946, - "grad_norm": 0.0, - "learning_rate": 1.9838253291529973e-06, - "loss": 0.8484, - "step": 28307 - }, - { - "epoch": 0.8021763155657571, - "grad_norm": 0.0, - "learning_rate": 1.983276674259925e-06, - "loss": 0.8324, - "step": 28308 - }, - { - "epoch": 0.8022046530080195, - "grad_norm": 0.0, - "learning_rate": 1.982728086894694e-06, - "loss": 0.8551, - "step": 28309 - }, - { - "epoch": 0.802232990450282, - "grad_norm": 0.0, - "learning_rate": 1.982179567061918e-06, - "loss": 0.7818, - "step": 28310 - }, - { - "epoch": 0.8022613278925445, - "grad_norm": 0.0, - "learning_rate": 1.9816311147662216e-06, - "loss": 0.8183, - "step": 28311 - }, - { - "epoch": 0.8022896653348068, - "grad_norm": 0.0, - "learning_rate": 1.981082730012226e-06, - "loss": 0.8663, - "step": 28312 - }, - { - "epoch": 0.8023180027770693, - "grad_norm": 0.0, - "learning_rate": 1.9805344128045456e-06, - "loss": 0.8116, - "step": 28313 - }, - { - "epoch": 0.8023463402193318, - "grad_norm": 0.0, - "learning_rate": 1.9799861631478013e-06, - "loss": 0.8186, - "step": 28314 - }, - { - "epoch": 0.8023746776615943, - "grad_norm": 0.0, - "learning_rate": 1.9794379810466123e-06, - "loss": 0.8362, - "step": 28315 - }, - { - "epoch": 0.8024030151038567, - "grad_norm": 0.0, - "learning_rate": 1.9788898665055958e-06, - "loss": 0.7553, - "step": 28316 - }, - { - "epoch": 0.8024313525461192, - "grad_norm": 0.0, - "learning_rate": 1.97834181952937e-06, - "loss": 0.7835, - "step": 28317 - }, - { - "epoch": 0.8024596899883817, - "grad_norm": 0.0, - "learning_rate": 1.9777938401225483e-06, - "loss": 0.8012, - "step": 28318 - }, - { - "epoch": 0.8024880274306441, - "grad_norm": 0.0, - "learning_rate": 1.9772459282897484e-06, - "loss": 0.8078, - "step": 28319 - }, - { - "epoch": 0.8025163648729066, - "grad_norm": 0.0, - "learning_rate": 1.9766980840355876e-06, - "loss": 0.7942, - "step": 28320 - }, - { - "epoch": 0.8025447023151691, - "grad_norm": 0.0, - "learning_rate": 1.976150307364675e-06, - "loss": 0.7371, - "step": 28321 - }, - { - "epoch": 0.8025730397574314, - "grad_norm": 0.0, - "learning_rate": 1.9756025982816284e-06, - "loss": 0.6754, - "step": 28322 - }, - { - "epoch": 0.8026013771996939, - "grad_norm": 0.0, - "learning_rate": 1.9750549567910627e-06, - "loss": 0.8174, - "step": 28323 - }, - { - "epoch": 0.8026297146419564, - "grad_norm": 0.0, - "learning_rate": 1.974507382897588e-06, - "loss": 0.6738, - "step": 28324 - }, - { - "epoch": 0.8026580520842189, - "grad_norm": 0.0, - "learning_rate": 1.973959876605819e-06, - "loss": 0.8439, - "step": 28325 - }, - { - "epoch": 0.8026863895264813, - "grad_norm": 0.0, - "learning_rate": 1.9734124379203645e-06, - "loss": 0.8873, - "step": 28326 - }, - { - "epoch": 0.8027147269687438, - "grad_norm": 0.0, - "learning_rate": 1.9728650668458373e-06, - "loss": 0.7632, - "step": 28327 - }, - { - "epoch": 0.8027430644110063, - "grad_norm": 0.0, - "learning_rate": 1.9723177633868483e-06, - "loss": 0.8242, - "step": 28328 - }, - { - "epoch": 0.8027714018532687, - "grad_norm": 0.0, - "learning_rate": 1.971770527548008e-06, - "loss": 0.8072, - "step": 28329 - }, - { - "epoch": 0.8027997392955312, - "grad_norm": 0.0, - "learning_rate": 1.971223359333929e-06, - "loss": 0.7491, - "step": 28330 - }, - { - "epoch": 0.8028280767377937, - "grad_norm": 0.0, - "learning_rate": 1.9706762587492134e-06, - "loss": 0.8146, - "step": 28331 - }, - { - "epoch": 0.8028564141800562, - "grad_norm": 0.0, - "learning_rate": 1.970129225798474e-06, - "loss": 0.8583, - "step": 28332 - }, - { - "epoch": 0.8028847516223185, - "grad_norm": 0.0, - "learning_rate": 1.9695822604863203e-06, - "loss": 0.7586, - "step": 28333 - }, - { - "epoch": 0.802913089064581, - "grad_norm": 0.0, - "learning_rate": 1.969035362817354e-06, - "loss": 0.8511, - "step": 28334 - }, - { - "epoch": 0.8029414265068435, - "grad_norm": 0.0, - "learning_rate": 1.9684885327961866e-06, - "loss": 0.7618, - "step": 28335 - }, - { - "epoch": 0.8029697639491059, - "grad_norm": 0.0, - "learning_rate": 1.9679417704274238e-06, - "loss": 0.8634, - "step": 28336 - }, - { - "epoch": 0.8029981013913684, - "grad_norm": 0.0, - "learning_rate": 1.967395075715668e-06, - "loss": 0.7762, - "step": 28337 - }, - { - "epoch": 0.8030264388336309, - "grad_norm": 0.0, - "learning_rate": 1.9668484486655294e-06, - "loss": 0.8931, - "step": 28338 - }, - { - "epoch": 0.8030547762758934, - "grad_norm": 0.0, - "learning_rate": 1.966301889281607e-06, - "loss": 0.8044, - "step": 28339 - }, - { - "epoch": 0.8030831137181558, - "grad_norm": 0.0, - "learning_rate": 1.9657553975685063e-06, - "loss": 0.9227, - "step": 28340 - }, - { - "epoch": 0.8031114511604183, - "grad_norm": 0.0, - "learning_rate": 1.965208973530831e-06, - "loss": 0.7324, - "step": 28341 - }, - { - "epoch": 0.8031397886026808, - "grad_norm": 0.0, - "learning_rate": 1.964662617173184e-06, - "loss": 0.7412, - "step": 28342 - }, - { - "epoch": 0.8031681260449431, - "grad_norm": 0.0, - "learning_rate": 1.9641163285001686e-06, - "loss": 0.8055, - "step": 28343 - }, - { - "epoch": 0.8031964634872056, - "grad_norm": 0.0, - "learning_rate": 1.9635701075163884e-06, - "loss": 0.7581, - "step": 28344 - }, - { - "epoch": 0.8032248009294681, - "grad_norm": 0.0, - "learning_rate": 1.9630239542264373e-06, - "loss": 0.8138, - "step": 28345 - }, - { - "epoch": 0.8032531383717305, - "grad_norm": 0.0, - "learning_rate": 1.9624778686349232e-06, - "loss": 0.8341, - "step": 28346 - }, - { - "epoch": 0.803281475813993, - "grad_norm": 0.0, - "learning_rate": 1.96193185074644e-06, - "loss": 0.7397, - "step": 28347 - }, - { - "epoch": 0.8033098132562555, - "grad_norm": 0.0, - "learning_rate": 1.96138590056559e-06, - "loss": 0.8689, - "step": 28348 - }, - { - "epoch": 0.803338150698518, - "grad_norm": 0.0, - "learning_rate": 1.9608400180969743e-06, - "loss": 0.7206, - "step": 28349 - }, - { - "epoch": 0.8033664881407804, - "grad_norm": 0.0, - "learning_rate": 1.9602942033451853e-06, - "loss": 0.7699, - "step": 28350 - }, - { - "epoch": 0.8033948255830429, - "grad_norm": 0.0, - "learning_rate": 1.959748456314824e-06, - "loss": 0.7613, - "step": 28351 - }, - { - "epoch": 0.8034231630253054, - "grad_norm": 0.0, - "learning_rate": 1.9592027770104905e-06, - "loss": 0.7489, - "step": 28352 - }, - { - "epoch": 0.8034515004675677, - "grad_norm": 0.0, - "learning_rate": 1.9586571654367737e-06, - "loss": 0.8121, - "step": 28353 - }, - { - "epoch": 0.8034798379098302, - "grad_norm": 0.0, - "learning_rate": 1.9581116215982743e-06, - "loss": 0.7269, - "step": 28354 - }, - { - "epoch": 0.8035081753520927, - "grad_norm": 0.0, - "learning_rate": 1.9575661454995877e-06, - "loss": 0.7795, - "step": 28355 - }, - { - "epoch": 0.8035365127943552, - "grad_norm": 0.0, - "learning_rate": 1.9570207371453075e-06, - "loss": 0.9281, - "step": 28356 - }, - { - "epoch": 0.8035648502366176, - "grad_norm": 0.0, - "learning_rate": 1.956475396540031e-06, - "loss": 0.7192, - "step": 28357 - }, - { - "epoch": 0.8035931876788801, - "grad_norm": 0.0, - "learning_rate": 1.9559301236883454e-06, - "loss": 0.8565, - "step": 28358 - }, - { - "epoch": 0.8036215251211426, - "grad_norm": 0.0, - "learning_rate": 1.9553849185948514e-06, - "loss": 0.6497, - "step": 28359 - }, - { - "epoch": 0.803649862563405, - "grad_norm": 0.0, - "learning_rate": 1.954839781264135e-06, - "loss": 0.7893, - "step": 28360 - }, - { - "epoch": 0.8036782000056675, - "grad_norm": 0.0, - "learning_rate": 1.9542947117007894e-06, - "loss": 0.9075, - "step": 28361 - }, - { - "epoch": 0.80370653744793, - "grad_norm": 0.0, - "learning_rate": 1.9537497099094104e-06, - "loss": 0.7892, - "step": 28362 - }, - { - "epoch": 0.8037348748901925, - "grad_norm": 0.0, - "learning_rate": 1.953204775894584e-06, - "loss": 0.8381, - "step": 28363 - }, - { - "epoch": 0.8037632123324548, - "grad_norm": 0.0, - "learning_rate": 1.9526599096609e-06, - "loss": 0.8076, - "step": 28364 - }, - { - "epoch": 0.8037915497747173, - "grad_norm": 0.0, - "learning_rate": 1.952115111212953e-06, - "loss": 0.8724, - "step": 28365 - }, - { - "epoch": 0.8038198872169798, - "grad_norm": 0.0, - "learning_rate": 1.9515703805553277e-06, - "loss": 0.8456, - "step": 28366 - }, - { - "epoch": 0.8038482246592422, - "grad_norm": 0.0, - "learning_rate": 1.9510257176926117e-06, - "loss": 0.8114, - "step": 28367 - }, - { - "epoch": 0.8038765621015047, - "grad_norm": 0.0, - "learning_rate": 1.9504811226293964e-06, - "loss": 0.8231, - "step": 28368 - }, - { - "epoch": 0.8039048995437672, - "grad_norm": 0.0, - "learning_rate": 1.9499365953702678e-06, - "loss": 0.7719, - "step": 28369 - }, - { - "epoch": 0.8039332369860296, - "grad_norm": 0.0, - "learning_rate": 1.949392135919814e-06, - "loss": 0.733, - "step": 28370 - }, - { - "epoch": 0.8039615744282921, - "grad_norm": 0.0, - "learning_rate": 1.9488477442826183e-06, - "loss": 0.8317, - "step": 28371 - }, - { - "epoch": 0.8039899118705546, - "grad_norm": 0.0, - "learning_rate": 1.948303420463268e-06, - "loss": 0.8102, - "step": 28372 - }, - { - "epoch": 0.8040182493128171, - "grad_norm": 0.0, - "learning_rate": 1.9477591644663496e-06, - "loss": 0.9002, - "step": 28373 - }, - { - "epoch": 0.8040465867550795, - "grad_norm": 0.0, - "learning_rate": 1.947214976296443e-06, - "loss": 0.8601, - "step": 28374 - }, - { - "epoch": 0.804074924197342, - "grad_norm": 0.0, - "learning_rate": 1.946670855958138e-06, - "loss": 0.7625, - "step": 28375 - }, - { - "epoch": 0.8041032616396044, - "grad_norm": 0.0, - "learning_rate": 1.946126803456013e-06, - "loss": 0.7308, - "step": 28376 - }, - { - "epoch": 0.8041315990818668, - "grad_norm": 0.0, - "learning_rate": 1.945582818794652e-06, - "loss": 0.8467, - "step": 28377 - }, - { - "epoch": 0.8041599365241293, - "grad_norm": 0.0, - "learning_rate": 1.9450389019786407e-06, - "loss": 0.8591, - "step": 28378 - }, - { - "epoch": 0.8041882739663918, - "grad_norm": 0.0, - "learning_rate": 1.944495053012555e-06, - "loss": 0.7797, - "step": 28379 - }, - { - "epoch": 0.8042166114086543, - "grad_norm": 0.0, - "learning_rate": 1.943951271900979e-06, - "loss": 0.9085, - "step": 28380 - }, - { - "epoch": 0.8042449488509167, - "grad_norm": 0.0, - "learning_rate": 1.9434075586484922e-06, - "loss": 0.8677, - "step": 28381 - }, - { - "epoch": 0.8042732862931792, - "grad_norm": 0.0, - "learning_rate": 1.9428639132596774e-06, - "loss": 0.7141, - "step": 28382 - }, - { - "epoch": 0.8043016237354417, - "grad_norm": 0.0, - "learning_rate": 1.942320335739112e-06, - "loss": 0.8048, - "step": 28383 - }, - { - "epoch": 0.8043299611777041, - "grad_norm": 0.0, - "learning_rate": 1.9417768260913726e-06, - "loss": 0.9076, - "step": 28384 - }, - { - "epoch": 0.8043582986199665, - "grad_norm": 0.0, - "learning_rate": 1.941233384321041e-06, - "loss": 0.7601, - "step": 28385 - }, - { - "epoch": 0.804386636062229, - "grad_norm": 0.0, - "learning_rate": 1.9406900104326944e-06, - "loss": 0.8434, - "step": 28386 - }, - { - "epoch": 0.8044149735044915, - "grad_norm": 0.0, - "learning_rate": 1.9401467044309054e-06, - "loss": 0.7891, - "step": 28387 - }, - { - "epoch": 0.8044433109467539, - "grad_norm": 0.0, - "learning_rate": 1.939603466320257e-06, - "loss": 0.7388, - "step": 28388 - }, - { - "epoch": 0.8044716483890164, - "grad_norm": 0.0, - "learning_rate": 1.9390602961053194e-06, - "loss": 0.761, - "step": 28389 - }, - { - "epoch": 0.8044999858312789, - "grad_norm": 0.0, - "learning_rate": 1.93851719379067e-06, - "loss": 0.8247, - "step": 28390 - }, - { - "epoch": 0.8045283232735413, - "grad_norm": 0.0, - "learning_rate": 1.9379741593808865e-06, - "loss": 0.7778, - "step": 28391 - }, - { - "epoch": 0.8045566607158038, - "grad_norm": 0.0, - "learning_rate": 1.937431192880537e-06, - "loss": 0.7858, - "step": 28392 - }, - { - "epoch": 0.8045849981580663, - "grad_norm": 0.0, - "learning_rate": 1.9368882942941992e-06, - "loss": 0.8848, - "step": 28393 - }, - { - "epoch": 0.8046133356003287, - "grad_norm": 0.0, - "learning_rate": 1.9363454636264455e-06, - "loss": 0.7411, - "step": 28394 - }, - { - "epoch": 0.8046416730425912, - "grad_norm": 0.0, - "learning_rate": 1.935802700881848e-06, - "loss": 0.8004, - "step": 28395 - }, - { - "epoch": 0.8046700104848536, - "grad_norm": 0.0, - "learning_rate": 1.935260006064983e-06, - "loss": 0.7778, - "step": 28396 - }, - { - "epoch": 0.8046983479271161, - "grad_norm": 0.0, - "learning_rate": 1.934717379180413e-06, - "loss": 0.8773, - "step": 28397 - }, - { - "epoch": 0.8047266853693785, - "grad_norm": 0.0, - "learning_rate": 1.934174820232715e-06, - "loss": 0.887, - "step": 28398 - }, - { - "epoch": 0.804755022811641, - "grad_norm": 0.0, - "learning_rate": 1.933632329226459e-06, - "loss": 0.7593, - "step": 28399 - }, - { - "epoch": 0.8047833602539035, - "grad_norm": 0.0, - "learning_rate": 1.933089906166212e-06, - "loss": 0.843, - "step": 28400 - }, - { - "epoch": 0.8048116976961659, - "grad_norm": 0.0, - "learning_rate": 1.932547551056544e-06, - "loss": 0.8336, - "step": 28401 - }, - { - "epoch": 0.8048400351384284, - "grad_norm": 0.0, - "learning_rate": 1.9320052639020257e-06, - "loss": 0.7788, - "step": 28402 - }, - { - "epoch": 0.8048683725806909, - "grad_norm": 0.0, - "learning_rate": 1.9314630447072202e-06, - "loss": 0.7853, - "step": 28403 - }, - { - "epoch": 0.8048967100229534, - "grad_norm": 0.0, - "learning_rate": 1.930920893476701e-06, - "loss": 0.7485, - "step": 28404 - }, - { - "epoch": 0.8049250474652158, - "grad_norm": 0.0, - "learning_rate": 1.930378810215029e-06, - "loss": 0.7955, - "step": 28405 - }, - { - "epoch": 0.8049533849074783, - "grad_norm": 0.0, - "learning_rate": 1.9298367949267726e-06, - "loss": 0.6941, - "step": 28406 - }, - { - "epoch": 0.8049817223497407, - "grad_norm": 0.0, - "learning_rate": 1.9292948476164976e-06, - "loss": 0.8331, - "step": 28407 - }, - { - "epoch": 0.8050100597920031, - "grad_norm": 0.0, - "learning_rate": 1.9287529682887685e-06, - "loss": 0.8215, - "step": 28408 - }, - { - "epoch": 0.8050383972342656, - "grad_norm": 0.0, - "learning_rate": 1.928211156948151e-06, - "loss": 0.8037, - "step": 28409 - }, - { - "epoch": 0.8050667346765281, - "grad_norm": 0.0, - "learning_rate": 1.9276694135992115e-06, - "loss": 0.9016, - "step": 28410 - }, - { - "epoch": 0.8050950721187906, - "grad_norm": 0.0, - "learning_rate": 1.927127738246507e-06, - "loss": 0.8015, - "step": 28411 - }, - { - "epoch": 0.805123409561053, - "grad_norm": 0.0, - "learning_rate": 1.9265861308946055e-06, - "loss": 0.8407, - "step": 28412 - }, - { - "epoch": 0.8051517470033155, - "grad_norm": 0.0, - "learning_rate": 1.926044591548064e-06, - "loss": 0.8726, - "step": 28413 - }, - { - "epoch": 0.805180084445578, - "grad_norm": 0.0, - "learning_rate": 1.925503120211448e-06, - "loss": 0.8087, - "step": 28414 - }, - { - "epoch": 0.8052084218878404, - "grad_norm": 0.0, - "learning_rate": 1.924961716889321e-06, - "loss": 0.8925, - "step": 28415 - }, - { - "epoch": 0.8052367593301029, - "grad_norm": 0.0, - "learning_rate": 1.924420381586236e-06, - "loss": 0.7489, - "step": 28416 - }, - { - "epoch": 0.8052650967723654, - "grad_norm": 0.0, - "learning_rate": 1.923879114306757e-06, - "loss": 0.8089, - "step": 28417 - }, - { - "epoch": 0.8052934342146277, - "grad_norm": 0.0, - "learning_rate": 1.9233379150554466e-06, - "loss": 0.8566, - "step": 28418 - }, - { - "epoch": 0.8053217716568902, - "grad_norm": 0.0, - "learning_rate": 1.9227967838368566e-06, - "loss": 0.8283, - "step": 28419 - }, - { - "epoch": 0.8053501090991527, - "grad_norm": 0.0, - "learning_rate": 1.9222557206555494e-06, - "loss": 0.8073, - "step": 28420 - }, - { - "epoch": 0.8053784465414152, - "grad_norm": 0.0, - "learning_rate": 1.9217147255160816e-06, - "loss": 0.8315, - "step": 28421 - }, - { - "epoch": 0.8054067839836776, - "grad_norm": 0.0, - "learning_rate": 1.9211737984230107e-06, - "loss": 0.9609, - "step": 28422 - }, - { - "epoch": 0.8054351214259401, - "grad_norm": 0.0, - "learning_rate": 1.9206329393808955e-06, - "loss": 0.7244, - "step": 28423 - }, - { - "epoch": 0.8054634588682026, - "grad_norm": 0.0, - "learning_rate": 1.920092148394287e-06, - "loss": 0.7892, - "step": 28424 - }, - { - "epoch": 0.805491796310465, - "grad_norm": 0.0, - "learning_rate": 1.919551425467744e-06, - "loss": 0.7275, - "step": 28425 - }, - { - "epoch": 0.8055201337527275, - "grad_norm": 0.0, - "learning_rate": 1.919010770605818e-06, - "loss": 0.8242, - "step": 28426 - }, - { - "epoch": 0.80554847119499, - "grad_norm": 0.0, - "learning_rate": 1.918470183813066e-06, - "loss": 0.8296, - "step": 28427 - }, - { - "epoch": 0.8055768086372525, - "grad_norm": 0.0, - "learning_rate": 1.9179296650940425e-06, - "loss": 0.8089, - "step": 28428 - }, - { - "epoch": 0.8056051460795148, - "grad_norm": 0.0, - "learning_rate": 1.9173892144532957e-06, - "loss": 0.8465, - "step": 28429 - }, - { - "epoch": 0.8056334835217773, - "grad_norm": 0.0, - "learning_rate": 1.9168488318953814e-06, - "loss": 0.7544, - "step": 28430 - }, - { - "epoch": 0.8056618209640398, - "grad_norm": 0.0, - "learning_rate": 1.9163085174248506e-06, - "loss": 0.8003, - "step": 28431 - }, - { - "epoch": 0.8056901584063022, - "grad_norm": 0.0, - "learning_rate": 1.9157682710462553e-06, - "loss": 0.8391, - "step": 28432 - }, - { - "epoch": 0.8057184958485647, - "grad_norm": 0.0, - "learning_rate": 1.915228092764149e-06, - "loss": 0.7722, - "step": 28433 - }, - { - "epoch": 0.8057468332908272, - "grad_norm": 0.0, - "learning_rate": 1.9146879825830753e-06, - "loss": 0.8777, - "step": 28434 - }, - { - "epoch": 0.8057751707330897, - "grad_norm": 0.0, - "learning_rate": 1.914147940507587e-06, - "loss": 0.7726, - "step": 28435 - }, - { - "epoch": 0.8058035081753521, - "grad_norm": 0.0, - "learning_rate": 1.913607966542236e-06, - "loss": 0.7516, - "step": 28436 - }, - { - "epoch": 0.8058318456176146, - "grad_norm": 0.0, - "learning_rate": 1.9130680606915653e-06, - "loss": 0.7173, - "step": 28437 - }, - { - "epoch": 0.805860183059877, - "grad_norm": 0.0, - "learning_rate": 1.9125282229601284e-06, - "loss": 0.8951, - "step": 28438 - }, - { - "epoch": 0.8058885205021394, - "grad_norm": 0.0, - "learning_rate": 1.911988453352467e-06, - "loss": 0.8766, - "step": 28439 - }, - { - "epoch": 0.8059168579444019, - "grad_norm": 0.0, - "learning_rate": 1.9114487518731296e-06, - "loss": 0.9544, - "step": 28440 - }, - { - "epoch": 0.8059451953866644, - "grad_norm": 0.0, - "learning_rate": 1.910909118526666e-06, - "loss": 0.7445, - "step": 28441 - }, - { - "epoch": 0.8059735328289268, - "grad_norm": 0.0, - "learning_rate": 1.9103695533176157e-06, - "loss": 0.7459, - "step": 28442 - }, - { - "epoch": 0.8060018702711893, - "grad_norm": 0.0, - "learning_rate": 1.9098300562505266e-06, - "loss": 0.8805, - "step": 28443 - }, - { - "epoch": 0.8060302077134518, - "grad_norm": 0.0, - "learning_rate": 1.9092906273299427e-06, - "loss": 0.9172, - "step": 28444 - }, - { - "epoch": 0.8060585451557143, - "grad_norm": 0.0, - "learning_rate": 1.908751266560409e-06, - "loss": 0.801, - "step": 28445 - }, - { - "epoch": 0.8060868825979767, - "grad_norm": 0.0, - "learning_rate": 1.908211973946471e-06, - "loss": 0.8115, - "step": 28446 - }, - { - "epoch": 0.8061152200402392, - "grad_norm": 0.0, - "learning_rate": 1.907672749492665e-06, - "loss": 0.7677, - "step": 28447 - }, - { - "epoch": 0.8061435574825017, - "grad_norm": 0.0, - "learning_rate": 1.9071335932035373e-06, - "loss": 0.7986, - "step": 28448 - }, - { - "epoch": 0.806171894924764, - "grad_norm": 0.0, - "learning_rate": 1.9065945050836299e-06, - "loss": 0.7994, - "step": 28449 - }, - { - "epoch": 0.8062002323670265, - "grad_norm": 0.0, - "learning_rate": 1.9060554851374813e-06, - "loss": 0.837, - "step": 28450 - }, - { - "epoch": 0.806228569809289, - "grad_norm": 0.0, - "learning_rate": 1.9055165333696324e-06, - "loss": 0.7935, - "step": 28451 - }, - { - "epoch": 0.8062569072515515, - "grad_norm": 0.0, - "learning_rate": 1.9049776497846251e-06, - "loss": 0.7932, - "step": 28452 - }, - { - "epoch": 0.8062852446938139, - "grad_norm": 0.0, - "learning_rate": 1.9044388343869958e-06, - "loss": 0.6978, - "step": 28453 - }, - { - "epoch": 0.8063135821360764, - "grad_norm": 0.0, - "learning_rate": 1.9039000871812863e-06, - "loss": 0.8028, - "step": 28454 - }, - { - "epoch": 0.8063419195783389, - "grad_norm": 0.0, - "learning_rate": 1.9033614081720297e-06, - "loss": 0.7915, - "step": 28455 - }, - { - "epoch": 0.8063702570206013, - "grad_norm": 0.0, - "learning_rate": 1.902822797363768e-06, - "loss": 0.8278, - "step": 28456 - }, - { - "epoch": 0.8063985944628638, - "grad_norm": 0.0, - "learning_rate": 1.9022842547610354e-06, - "loss": 0.7998, - "step": 28457 - }, - { - "epoch": 0.8064269319051263, - "grad_norm": 0.0, - "learning_rate": 1.9017457803683704e-06, - "loss": 0.7027, - "step": 28458 - }, - { - "epoch": 0.8064552693473888, - "grad_norm": 0.0, - "learning_rate": 1.9012073741903069e-06, - "loss": 0.9221, - "step": 28459 - }, - { - "epoch": 0.8064836067896511, - "grad_norm": 0.0, - "learning_rate": 1.900669036231385e-06, - "loss": 0.7655, - "step": 28460 - }, - { - "epoch": 0.8065119442319136, - "grad_norm": 0.0, - "learning_rate": 1.9001307664961322e-06, - "loss": 0.8473, - "step": 28461 - }, - { - "epoch": 0.8065402816741761, - "grad_norm": 0.0, - "learning_rate": 1.899592564989088e-06, - "loss": 0.8581, - "step": 28462 - }, - { - "epoch": 0.8065686191164385, - "grad_norm": 0.0, - "learning_rate": 1.8990544317147818e-06, - "loss": 0.7745, - "step": 28463 - }, - { - "epoch": 0.806596956558701, - "grad_norm": 0.0, - "learning_rate": 1.8985163666777473e-06, - "loss": 0.756, - "step": 28464 - }, - { - "epoch": 0.8066252940009635, - "grad_norm": 0.0, - "learning_rate": 1.8979783698825216e-06, - "loss": 0.8717, - "step": 28465 - }, - { - "epoch": 0.8066536314432259, - "grad_norm": 0.0, - "learning_rate": 1.89744044133363e-06, - "loss": 0.8784, - "step": 28466 - }, - { - "epoch": 0.8066819688854884, - "grad_norm": 0.0, - "learning_rate": 1.896902581035608e-06, - "loss": 0.8716, - "step": 28467 - }, - { - "epoch": 0.8067103063277509, - "grad_norm": 0.0, - "learning_rate": 1.8963647889929826e-06, - "loss": 0.8239, - "step": 28468 - }, - { - "epoch": 0.8067386437700134, - "grad_norm": 0.0, - "learning_rate": 1.8958270652102862e-06, - "loss": 0.7481, - "step": 28469 - }, - { - "epoch": 0.8067669812122757, - "grad_norm": 0.0, - "learning_rate": 1.8952894096920472e-06, - "loss": 0.8252, - "step": 28470 - }, - { - "epoch": 0.8067953186545382, - "grad_norm": 0.0, - "learning_rate": 1.8947518224427951e-06, - "loss": 0.7386, - "step": 28471 - }, - { - "epoch": 0.8068236560968007, - "grad_norm": 0.0, - "learning_rate": 1.894214303467058e-06, - "loss": 0.7472, - "step": 28472 - }, - { - "epoch": 0.8068519935390631, - "grad_norm": 0.0, - "learning_rate": 1.8936768527693673e-06, - "loss": 0.8141, - "step": 28473 - }, - { - "epoch": 0.8068803309813256, - "grad_norm": 0.0, - "learning_rate": 1.8931394703542437e-06, - "loss": 0.8249, - "step": 28474 - }, - { - "epoch": 0.8069086684235881, - "grad_norm": 0.0, - "learning_rate": 1.8926021562262187e-06, - "loss": 0.7908, - "step": 28475 - }, - { - "epoch": 0.8069370058658506, - "grad_norm": 0.0, - "learning_rate": 1.8920649103898148e-06, - "loss": 0.8365, - "step": 28476 - }, - { - "epoch": 0.806965343308113, - "grad_norm": 0.0, - "learning_rate": 1.8915277328495584e-06, - "loss": 0.8578, - "step": 28477 - }, - { - "epoch": 0.8069936807503755, - "grad_norm": 0.0, - "learning_rate": 1.8909906236099774e-06, - "loss": 0.8756, - "step": 28478 - }, - { - "epoch": 0.807022018192638, - "grad_norm": 0.0, - "learning_rate": 1.890453582675591e-06, - "loss": 0.8282, - "step": 28479 - }, - { - "epoch": 0.8070503556349004, - "grad_norm": 0.0, - "learning_rate": 1.889916610050926e-06, - "loss": 0.8993, - "step": 28480 - }, - { - "epoch": 0.8070786930771628, - "grad_norm": 0.0, - "learning_rate": 1.8893797057405072e-06, - "loss": 0.8537, - "step": 28481 - }, - { - "epoch": 0.8071070305194253, - "grad_norm": 0.0, - "learning_rate": 1.8888428697488525e-06, - "loss": 0.6697, - "step": 28482 - }, - { - "epoch": 0.8071353679616878, - "grad_norm": 0.0, - "learning_rate": 1.888306102080486e-06, - "loss": 0.8183, - "step": 28483 - }, - { - "epoch": 0.8071637054039502, - "grad_norm": 0.0, - "learning_rate": 1.8877694027399296e-06, - "loss": 0.7586, - "step": 28484 - }, - { - "epoch": 0.8071920428462127, - "grad_norm": 0.0, - "learning_rate": 1.887232771731704e-06, - "loss": 0.7153, - "step": 28485 - }, - { - "epoch": 0.8072203802884752, - "grad_norm": 0.0, - "learning_rate": 1.8866962090603314e-06, - "loss": 0.8432, - "step": 28486 - }, - { - "epoch": 0.8072487177307376, - "grad_norm": 0.0, - "learning_rate": 1.886159714730328e-06, - "loss": 0.8187, - "step": 28487 - }, - { - "epoch": 0.8072770551730001, - "grad_norm": 0.0, - "learning_rate": 1.8856232887462134e-06, - "loss": 0.68, - "step": 28488 - }, - { - "epoch": 0.8073053926152626, - "grad_norm": 0.0, - "learning_rate": 1.8850869311125098e-06, - "loss": 0.7888, - "step": 28489 - }, - { - "epoch": 0.807333730057525, - "grad_norm": 0.0, - "learning_rate": 1.88455064183373e-06, - "loss": 0.7726, - "step": 28490 - }, - { - "epoch": 0.8073620674997875, - "grad_norm": 0.0, - "learning_rate": 1.8840144209143963e-06, - "loss": 0.7548, - "step": 28491 - }, - { - "epoch": 0.8073904049420499, - "grad_norm": 0.0, - "learning_rate": 1.8834782683590202e-06, - "loss": 0.8792, - "step": 28492 - }, - { - "epoch": 0.8074187423843124, - "grad_norm": 0.0, - "learning_rate": 1.8829421841721206e-06, - "loss": 0.8982, - "step": 28493 - }, - { - "epoch": 0.8074470798265748, - "grad_norm": 0.0, - "learning_rate": 1.882406168358215e-06, - "loss": 0.8582, - "step": 28494 - }, - { - "epoch": 0.8074754172688373, - "grad_norm": 0.0, - "learning_rate": 1.8818702209218153e-06, - "loss": 0.8009, - "step": 28495 - }, - { - "epoch": 0.8075037547110998, - "grad_norm": 0.0, - "learning_rate": 1.8813343418674367e-06, - "loss": 0.817, - "step": 28496 - }, - { - "epoch": 0.8075320921533622, - "grad_norm": 0.0, - "learning_rate": 1.8807985311995948e-06, - "loss": 0.8222, - "step": 28497 - }, - { - "epoch": 0.8075604295956247, - "grad_norm": 0.0, - "learning_rate": 1.8802627889228008e-06, - "loss": 0.8287, - "step": 28498 - }, - { - "epoch": 0.8075887670378872, - "grad_norm": 0.0, - "learning_rate": 1.8797271150415709e-06, - "loss": 0.9437, - "step": 28499 - }, - { - "epoch": 0.8076171044801497, - "grad_norm": 0.0, - "learning_rate": 1.879191509560413e-06, - "loss": 0.8644, - "step": 28500 - }, - { - "epoch": 0.807645441922412, - "grad_norm": 0.0, - "learning_rate": 1.8786559724838405e-06, - "loss": 0.69, - "step": 28501 - }, - { - "epoch": 0.8076737793646745, - "grad_norm": 0.0, - "learning_rate": 1.8781205038163663e-06, - "loss": 0.7253, - "step": 28502 - }, - { - "epoch": 0.807702116806937, - "grad_norm": 0.0, - "learning_rate": 1.877585103562497e-06, - "loss": 0.7714, - "step": 28503 - }, - { - "epoch": 0.8077304542491994, - "grad_norm": 0.0, - "learning_rate": 1.8770497717267477e-06, - "loss": 0.9132, - "step": 28504 - }, - { - "epoch": 0.8077587916914619, - "grad_norm": 0.0, - "learning_rate": 1.8765145083136216e-06, - "loss": 0.9194, - "step": 28505 - }, - { - "epoch": 0.8077871291337244, - "grad_norm": 0.0, - "learning_rate": 1.8759793133276306e-06, - "loss": 0.9137, - "step": 28506 - }, - { - "epoch": 0.8078154665759869, - "grad_norm": 0.0, - "learning_rate": 1.8754441867732842e-06, - "loss": 0.8407, - "step": 28507 - }, - { - "epoch": 0.8078438040182493, - "grad_norm": 0.0, - "learning_rate": 1.874909128655087e-06, - "loss": 0.7618, - "step": 28508 - }, - { - "epoch": 0.8078721414605118, - "grad_norm": 0.0, - "learning_rate": 1.8743741389775472e-06, - "loss": 0.7835, - "step": 28509 - }, - { - "epoch": 0.8079004789027743, - "grad_norm": 0.0, - "learning_rate": 1.8738392177451703e-06, - "loss": 0.7291, - "step": 28510 - }, - { - "epoch": 0.8079288163450367, - "grad_norm": 0.0, - "learning_rate": 1.873304364962465e-06, - "loss": 0.6626, - "step": 28511 - }, - { - "epoch": 0.8079571537872992, - "grad_norm": 0.0, - "learning_rate": 1.8727695806339363e-06, - "loss": 0.801, - "step": 28512 - }, - { - "epoch": 0.8079854912295616, - "grad_norm": 0.0, - "learning_rate": 1.8722348647640842e-06, - "loss": 0.7979, - "step": 28513 - }, - { - "epoch": 0.808013828671824, - "grad_norm": 0.0, - "learning_rate": 1.8717002173574173e-06, - "loss": 0.7914, - "step": 28514 - }, - { - "epoch": 0.8080421661140865, - "grad_norm": 0.0, - "learning_rate": 1.8711656384184396e-06, - "loss": 0.8878, - "step": 28515 - }, - { - "epoch": 0.808070503556349, - "grad_norm": 0.0, - "learning_rate": 1.8706311279516499e-06, - "loss": 0.88, - "step": 28516 - }, - { - "epoch": 0.8080988409986115, - "grad_norm": 0.0, - "learning_rate": 1.8700966859615533e-06, - "loss": 0.7991, - "step": 28517 - }, - { - "epoch": 0.8081271784408739, - "grad_norm": 0.0, - "learning_rate": 1.8695623124526541e-06, - "loss": 0.9601, - "step": 28518 - }, - { - "epoch": 0.8081555158831364, - "grad_norm": 0.0, - "learning_rate": 1.8690280074294475e-06, - "loss": 0.8395, - "step": 28519 - }, - { - "epoch": 0.8081838533253989, - "grad_norm": 0.0, - "learning_rate": 1.8684937708964402e-06, - "loss": 0.7484, - "step": 28520 - }, - { - "epoch": 0.8082121907676613, - "grad_norm": 0.0, - "learning_rate": 1.8679596028581271e-06, - "loss": 0.7794, - "step": 28521 - }, - { - "epoch": 0.8082405282099238, - "grad_norm": 0.0, - "learning_rate": 1.867425503319009e-06, - "loss": 0.8037, - "step": 28522 - }, - { - "epoch": 0.8082688656521863, - "grad_norm": 0.0, - "learning_rate": 1.8668914722835873e-06, - "loss": 0.8232, - "step": 28523 - }, - { - "epoch": 0.8082972030944487, - "grad_norm": 0.0, - "learning_rate": 1.866357509756358e-06, - "loss": 0.7739, - "step": 28524 - }, - { - "epoch": 0.8083255405367111, - "grad_norm": 0.0, - "learning_rate": 1.865823615741822e-06, - "loss": 0.8165, - "step": 28525 - }, - { - "epoch": 0.8083538779789736, - "grad_norm": 0.0, - "learning_rate": 1.8652897902444721e-06, - "loss": 0.8069, - "step": 28526 - }, - { - "epoch": 0.8083822154212361, - "grad_norm": 0.0, - "learning_rate": 1.8647560332688076e-06, - "loss": 0.7755, - "step": 28527 - }, - { - "epoch": 0.8084105528634985, - "grad_norm": 0.0, - "learning_rate": 1.8642223448193253e-06, - "loss": 0.7341, - "step": 28528 - }, - { - "epoch": 0.808438890305761, - "grad_norm": 0.0, - "learning_rate": 1.8636887249005176e-06, - "loss": 0.9131, - "step": 28529 - }, - { - "epoch": 0.8084672277480235, - "grad_norm": 0.0, - "learning_rate": 1.8631551735168806e-06, - "loss": 0.9237, - "step": 28530 - }, - { - "epoch": 0.8084955651902859, - "grad_norm": 0.0, - "learning_rate": 1.8626216906729123e-06, - "loss": 0.7023, - "step": 28531 - }, - { - "epoch": 0.8085239026325484, - "grad_norm": 0.0, - "learning_rate": 1.8620882763731008e-06, - "loss": 0.761, - "step": 28532 - }, - { - "epoch": 0.8085522400748109, - "grad_norm": 0.0, - "learning_rate": 1.8615549306219438e-06, - "loss": 0.7398, - "step": 28533 - }, - { - "epoch": 0.8085805775170734, - "grad_norm": 0.0, - "learning_rate": 1.861021653423929e-06, - "loss": 0.7489, - "step": 28534 - }, - { - "epoch": 0.8086089149593357, - "grad_norm": 0.0, - "learning_rate": 1.8604884447835515e-06, - "loss": 0.7755, - "step": 28535 - }, - { - "epoch": 0.8086372524015982, - "grad_norm": 0.0, - "learning_rate": 1.8599553047053032e-06, - "loss": 0.7096, - "step": 28536 - }, - { - "epoch": 0.8086655898438607, - "grad_norm": 0.0, - "learning_rate": 1.8594222331936728e-06, - "loss": 0.8338, - "step": 28537 - }, - { - "epoch": 0.8086939272861231, - "grad_norm": 0.0, - "learning_rate": 1.8588892302531525e-06, - "loss": 0.8666, - "step": 28538 - }, - { - "epoch": 0.8087222647283856, - "grad_norm": 0.0, - "learning_rate": 1.8583562958882329e-06, - "loss": 0.8711, - "step": 28539 - }, - { - "epoch": 0.8087506021706481, - "grad_norm": 0.0, - "learning_rate": 1.8578234301034004e-06, - "loss": 0.8247, - "step": 28540 - }, - { - "epoch": 0.8087789396129106, - "grad_norm": 0.0, - "learning_rate": 1.857290632903146e-06, - "loss": 0.8364, - "step": 28541 - }, - { - "epoch": 0.808807277055173, - "grad_norm": 0.0, - "learning_rate": 1.8567579042919548e-06, - "loss": 0.8435, - "step": 28542 - }, - { - "epoch": 0.8088356144974355, - "grad_norm": 0.0, - "learning_rate": 1.8562252442743156e-06, - "loss": 0.776, - "step": 28543 - }, - { - "epoch": 0.808863951939698, - "grad_norm": 0.0, - "learning_rate": 1.855692652854717e-06, - "loss": 0.7802, - "step": 28544 - }, - { - "epoch": 0.8088922893819603, - "grad_norm": 0.0, - "learning_rate": 1.855160130037641e-06, - "loss": 0.8467, - "step": 28545 - }, - { - "epoch": 0.8089206268242228, - "grad_norm": 0.0, - "learning_rate": 1.854627675827576e-06, - "loss": 0.7268, - "step": 28546 - }, - { - "epoch": 0.8089489642664853, - "grad_norm": 0.0, - "learning_rate": 1.85409529022901e-06, - "loss": 0.8524, - "step": 28547 - }, - { - "epoch": 0.8089773017087478, - "grad_norm": 0.0, - "learning_rate": 1.8535629732464211e-06, - "loss": 0.7778, - "step": 28548 - }, - { - "epoch": 0.8090056391510102, - "grad_norm": 0.0, - "learning_rate": 1.853030724884297e-06, - "loss": 0.8227, - "step": 28549 - }, - { - "epoch": 0.8090339765932727, - "grad_norm": 0.0, - "learning_rate": 1.85249854514712e-06, - "loss": 0.8754, - "step": 28550 - }, - { - "epoch": 0.8090623140355352, - "grad_norm": 0.0, - "learning_rate": 1.8519664340393729e-06, - "loss": 0.7769, - "step": 28551 - }, - { - "epoch": 0.8090906514777976, - "grad_norm": 0.0, - "learning_rate": 1.851434391565541e-06, - "loss": 0.8679, - "step": 28552 - }, - { - "epoch": 0.8091189889200601, - "grad_norm": 0.0, - "learning_rate": 1.8509024177301004e-06, - "loss": 0.8381, - "step": 28553 - }, - { - "epoch": 0.8091473263623226, - "grad_norm": 0.0, - "learning_rate": 1.8503705125375382e-06, - "loss": 0.7746, - "step": 28554 - }, - { - "epoch": 0.8091756638045849, - "grad_norm": 0.0, - "learning_rate": 1.8498386759923282e-06, - "loss": 0.7475, - "step": 28555 - }, - { - "epoch": 0.8092040012468474, - "grad_norm": 0.0, - "learning_rate": 1.8493069080989534e-06, - "loss": 0.8773, - "step": 28556 - }, - { - "epoch": 0.8092323386891099, - "grad_norm": 0.0, - "learning_rate": 1.8487752088618959e-06, - "loss": 0.7012, - "step": 28557 - }, - { - "epoch": 0.8092606761313724, - "grad_norm": 0.0, - "learning_rate": 1.8482435782856289e-06, - "loss": 0.7659, - "step": 28558 - }, - { - "epoch": 0.8092890135736348, - "grad_norm": 0.0, - "learning_rate": 1.8477120163746343e-06, - "loss": 0.9258, - "step": 28559 - }, - { - "epoch": 0.8093173510158973, - "grad_norm": 0.0, - "learning_rate": 1.8471805231333906e-06, - "loss": 0.7881, - "step": 28560 - }, - { - "epoch": 0.8093456884581598, - "grad_norm": 0.0, - "learning_rate": 1.84664909856637e-06, - "loss": 0.7956, - "step": 28561 - }, - { - "epoch": 0.8093740259004222, - "grad_norm": 0.0, - "learning_rate": 1.846117742678052e-06, - "loss": 0.9267, - "step": 28562 - }, - { - "epoch": 0.8094023633426847, - "grad_norm": 0.0, - "learning_rate": 1.8455864554729119e-06, - "loss": 0.8295, - "step": 28563 - }, - { - "epoch": 0.8094307007849472, - "grad_norm": 0.0, - "learning_rate": 1.8450552369554254e-06, - "loss": 0.8293, - "step": 28564 - }, - { - "epoch": 0.8094590382272097, - "grad_norm": 0.0, - "learning_rate": 1.8445240871300696e-06, - "loss": 0.863, - "step": 28565 - }, - { - "epoch": 0.809487375669472, - "grad_norm": 0.0, - "learning_rate": 1.8439930060013134e-06, - "loss": 0.8383, - "step": 28566 - }, - { - "epoch": 0.8095157131117345, - "grad_norm": 0.0, - "learning_rate": 1.843461993573632e-06, - "loss": 0.7216, - "step": 28567 - }, - { - "epoch": 0.809544050553997, - "grad_norm": 0.0, - "learning_rate": 1.842931049851502e-06, - "loss": 0.7571, - "step": 28568 - }, - { - "epoch": 0.8095723879962594, - "grad_norm": 0.0, - "learning_rate": 1.8424001748393905e-06, - "loss": 0.8224, - "step": 28569 - }, - { - "epoch": 0.8096007254385219, - "grad_norm": 0.0, - "learning_rate": 1.8418693685417743e-06, - "loss": 0.8709, - "step": 28570 - }, - { - "epoch": 0.8096290628807844, - "grad_norm": 0.0, - "learning_rate": 1.8413386309631188e-06, - "loss": 0.8333, - "step": 28571 - }, - { - "epoch": 0.8096574003230469, - "grad_norm": 0.0, - "learning_rate": 1.8408079621078977e-06, - "loss": 0.8759, - "step": 28572 - }, - { - "epoch": 0.8096857377653093, - "grad_norm": 0.0, - "learning_rate": 1.8402773619805837e-06, - "loss": 0.8056, - "step": 28573 - }, - { - "epoch": 0.8097140752075718, - "grad_norm": 0.0, - "learning_rate": 1.8397468305856413e-06, - "loss": 0.8678, - "step": 28574 - }, - { - "epoch": 0.8097424126498343, - "grad_norm": 0.0, - "learning_rate": 1.839216367927541e-06, - "loss": 0.7531, - "step": 28575 - }, - { - "epoch": 0.8097707500920966, - "grad_norm": 0.0, - "learning_rate": 1.838685974010752e-06, - "loss": 0.8835, - "step": 28576 - }, - { - "epoch": 0.8097990875343591, - "grad_norm": 0.0, - "learning_rate": 1.8381556488397411e-06, - "loss": 0.7483, - "step": 28577 - }, - { - "epoch": 0.8098274249766216, - "grad_norm": 0.0, - "learning_rate": 1.8376253924189791e-06, - "loss": 0.8434, - "step": 28578 - }, - { - "epoch": 0.809855762418884, - "grad_norm": 0.0, - "learning_rate": 1.8370952047529267e-06, - "loss": 0.8222, - "step": 28579 - }, - { - "epoch": 0.8098840998611465, - "grad_norm": 0.0, - "learning_rate": 1.8365650858460527e-06, - "loss": 0.8187, - "step": 28580 - }, - { - "epoch": 0.809912437303409, - "grad_norm": 0.0, - "learning_rate": 1.8360350357028256e-06, - "loss": 0.6897, - "step": 28581 - }, - { - "epoch": 0.8099407747456715, - "grad_norm": 0.0, - "learning_rate": 1.835505054327703e-06, - "loss": 0.8492, - "step": 28582 - }, - { - "epoch": 0.8099691121879339, - "grad_norm": 0.0, - "learning_rate": 1.8349751417251571e-06, - "loss": 0.737, - "step": 28583 - }, - { - "epoch": 0.8099974496301964, - "grad_norm": 0.0, - "learning_rate": 1.8344452978996441e-06, - "loss": 0.8331, - "step": 28584 - }, - { - "epoch": 0.8100257870724589, - "grad_norm": 0.0, - "learning_rate": 1.8339155228556315e-06, - "loss": 0.7176, - "step": 28585 - }, - { - "epoch": 0.8100541245147213, - "grad_norm": 0.0, - "learning_rate": 1.8333858165975827e-06, - "loss": 0.8217, - "step": 28586 - }, - { - "epoch": 0.8100824619569837, - "grad_norm": 0.0, - "learning_rate": 1.8328561791299548e-06, - "loss": 0.8269, - "step": 28587 - }, - { - "epoch": 0.8101107993992462, - "grad_norm": 0.0, - "learning_rate": 1.8323266104572135e-06, - "loss": 0.7921, - "step": 28588 - }, - { - "epoch": 0.8101391368415087, - "grad_norm": 0.0, - "learning_rate": 1.8317971105838173e-06, - "loss": 0.9049, - "step": 28589 - }, - { - "epoch": 0.8101674742837711, - "grad_norm": 0.0, - "learning_rate": 1.831267679514227e-06, - "loss": 0.876, - "step": 28590 - }, - { - "epoch": 0.8101958117260336, - "grad_norm": 0.0, - "learning_rate": 1.8307383172529046e-06, - "loss": 0.8401, - "step": 28591 - }, - { - "epoch": 0.8102241491682961, - "grad_norm": 0.0, - "learning_rate": 1.8302090238043057e-06, - "loss": 0.8354, - "step": 28592 - }, - { - "epoch": 0.8102524866105585, - "grad_norm": 0.0, - "learning_rate": 1.8296797991728887e-06, - "loss": 0.8234, - "step": 28593 - }, - { - "epoch": 0.810280824052821, - "grad_norm": 0.0, - "learning_rate": 1.8291506433631156e-06, - "loss": 0.7856, - "step": 28594 - }, - { - "epoch": 0.8103091614950835, - "grad_norm": 0.0, - "learning_rate": 1.8286215563794386e-06, - "loss": 0.7243, - "step": 28595 - }, - { - "epoch": 0.810337498937346, - "grad_norm": 0.0, - "learning_rate": 1.828092538226317e-06, - "loss": 0.7821, - "step": 28596 - }, - { - "epoch": 0.8103658363796084, - "grad_norm": 0.0, - "learning_rate": 1.8275635889082088e-06, - "loss": 0.8163, - "step": 28597 - }, - { - "epoch": 0.8103941738218708, - "grad_norm": 0.0, - "learning_rate": 1.8270347084295636e-06, - "loss": 0.6885, - "step": 28598 - }, - { - "epoch": 0.8104225112641333, - "grad_norm": 0.0, - "learning_rate": 1.8265058967948434e-06, - "loss": 0.8235, - "step": 28599 - }, - { - "epoch": 0.8104508487063957, - "grad_norm": 0.0, - "learning_rate": 1.825977154008497e-06, - "loss": 0.8029, - "step": 28600 - }, - { - "epoch": 0.8104791861486582, - "grad_norm": 0.0, - "learning_rate": 1.8254484800749794e-06, - "loss": 0.9368, - "step": 28601 - }, - { - "epoch": 0.8105075235909207, - "grad_norm": 0.0, - "learning_rate": 1.8249198749987463e-06, - "loss": 0.9131, - "step": 28602 - }, - { - "epoch": 0.8105358610331831, - "grad_norm": 0.0, - "learning_rate": 1.824391338784247e-06, - "loss": 0.8035, - "step": 28603 - }, - { - "epoch": 0.8105641984754456, - "grad_norm": 0.0, - "learning_rate": 1.8238628714359375e-06, - "loss": 0.8852, - "step": 28604 - }, - { - "epoch": 0.8105925359177081, - "grad_norm": 0.0, - "learning_rate": 1.823334472958268e-06, - "loss": 0.7789, - "step": 28605 - }, - { - "epoch": 0.8106208733599706, - "grad_norm": 0.0, - "learning_rate": 1.8228061433556866e-06, - "loss": 0.8364, - "step": 28606 - }, - { - "epoch": 0.810649210802233, - "grad_norm": 0.0, - "learning_rate": 1.8222778826326482e-06, - "loss": 0.7294, - "step": 28607 - }, - { - "epoch": 0.8106775482444954, - "grad_norm": 0.0, - "learning_rate": 1.8217496907935971e-06, - "loss": 0.7418, - "step": 28608 - }, - { - "epoch": 0.8107058856867579, - "grad_norm": 0.0, - "learning_rate": 1.8212215678429856e-06, - "loss": 0.8607, - "step": 28609 - }, - { - "epoch": 0.8107342231290203, - "grad_norm": 0.0, - "learning_rate": 1.8206935137852644e-06, - "loss": 0.6863, - "step": 28610 - }, - { - "epoch": 0.8107625605712828, - "grad_norm": 0.0, - "learning_rate": 1.8201655286248766e-06, - "loss": 0.7438, - "step": 28611 - }, - { - "epoch": 0.8107908980135453, - "grad_norm": 0.0, - "learning_rate": 1.819637612366274e-06, - "loss": 0.8421, - "step": 28612 - }, - { - "epoch": 0.8108192354558078, - "grad_norm": 0.0, - "learning_rate": 1.819109765013899e-06, - "loss": 0.8374, - "step": 28613 - }, - { - "epoch": 0.8108475728980702, - "grad_norm": 0.0, - "learning_rate": 1.818581986572201e-06, - "loss": 0.784, - "step": 28614 - }, - { - "epoch": 0.8108759103403327, - "grad_norm": 0.0, - "learning_rate": 1.8180542770456244e-06, - "loss": 0.8058, - "step": 28615 - }, - { - "epoch": 0.8109042477825952, - "grad_norm": 0.0, - "learning_rate": 1.8175266364386157e-06, - "loss": 0.8275, - "step": 28616 - }, - { - "epoch": 0.8109325852248576, - "grad_norm": 0.0, - "learning_rate": 1.8169990647556179e-06, - "loss": 0.7766, - "step": 28617 - }, - { - "epoch": 0.81096092266712, - "grad_norm": 0.0, - "learning_rate": 1.8164715620010788e-06, - "loss": 0.826, - "step": 28618 - }, - { - "epoch": 0.8109892601093825, - "grad_norm": 0.0, - "learning_rate": 1.8159441281794355e-06, - "loss": 0.7421, - "step": 28619 - }, - { - "epoch": 0.811017597551645, - "grad_norm": 0.0, - "learning_rate": 1.815416763295137e-06, - "loss": 0.7963, - "step": 28620 - }, - { - "epoch": 0.8110459349939074, - "grad_norm": 0.0, - "learning_rate": 1.8148894673526196e-06, - "loss": 0.7684, - "step": 28621 - }, - { - "epoch": 0.8110742724361699, - "grad_norm": 0.0, - "learning_rate": 1.8143622403563277e-06, - "loss": 0.8713, - "step": 28622 - }, - { - "epoch": 0.8111026098784324, - "grad_norm": 0.0, - "learning_rate": 1.8138350823107043e-06, - "loss": 0.834, - "step": 28623 - }, - { - "epoch": 0.8111309473206948, - "grad_norm": 0.0, - "learning_rate": 1.8133079932201857e-06, - "loss": 0.8129, - "step": 28624 - }, - { - "epoch": 0.8111592847629573, - "grad_norm": 0.0, - "learning_rate": 1.8127809730892143e-06, - "loss": 0.7866, - "step": 28625 - }, - { - "epoch": 0.8111876222052198, - "grad_norm": 0.0, - "learning_rate": 1.8122540219222306e-06, - "loss": 0.8448, - "step": 28626 - }, - { - "epoch": 0.8112159596474822, - "grad_norm": 0.0, - "learning_rate": 1.8117271397236703e-06, - "loss": 0.7285, - "step": 28627 - }, - { - "epoch": 0.8112442970897447, - "grad_norm": 0.0, - "learning_rate": 1.8112003264979728e-06, - "loss": 0.8181, - "step": 28628 - }, - { - "epoch": 0.8112726345320072, - "grad_norm": 0.0, - "learning_rate": 1.8106735822495746e-06, - "loss": 0.7952, - "step": 28629 - }, - { - "epoch": 0.8113009719742696, - "grad_norm": 0.0, - "learning_rate": 1.8101469069829148e-06, - "loss": 0.8836, - "step": 28630 - }, - { - "epoch": 0.811329309416532, - "grad_norm": 0.0, - "learning_rate": 1.8096203007024315e-06, - "loss": 0.739, - "step": 28631 - }, - { - "epoch": 0.8113576468587945, - "grad_norm": 0.0, - "learning_rate": 1.809093763412555e-06, - "loss": 0.8115, - "step": 28632 - }, - { - "epoch": 0.811385984301057, - "grad_norm": 0.0, - "learning_rate": 1.8085672951177236e-06, - "loss": 0.8182, - "step": 28633 - }, - { - "epoch": 0.8114143217433194, - "grad_norm": 0.0, - "learning_rate": 1.8080408958223738e-06, - "loss": 0.8697, - "step": 28634 - }, - { - "epoch": 0.8114426591855819, - "grad_norm": 0.0, - "learning_rate": 1.8075145655309356e-06, - "loss": 0.7, - "step": 28635 - }, - { - "epoch": 0.8114709966278444, - "grad_norm": 0.0, - "learning_rate": 1.8069883042478464e-06, - "loss": 0.8642, - "step": 28636 - }, - { - "epoch": 0.8114993340701069, - "grad_norm": 0.0, - "learning_rate": 1.806462111977535e-06, - "loss": 0.7817, - "step": 28637 - }, - { - "epoch": 0.8115276715123693, - "grad_norm": 0.0, - "learning_rate": 1.8059359887244353e-06, - "loss": 0.7726, - "step": 28638 - }, - { - "epoch": 0.8115560089546318, - "grad_norm": 0.0, - "learning_rate": 1.8054099344929833e-06, - "loss": 0.8387, - "step": 28639 - }, - { - "epoch": 0.8115843463968943, - "grad_norm": 0.0, - "learning_rate": 1.8048839492876024e-06, - "loss": 0.7922, - "step": 28640 - }, - { - "epoch": 0.8116126838391566, - "grad_norm": 0.0, - "learning_rate": 1.8043580331127275e-06, - "loss": 0.8675, - "step": 28641 - }, - { - "epoch": 0.8116410212814191, - "grad_norm": 0.0, - "learning_rate": 1.8038321859727891e-06, - "loss": 0.883, - "step": 28642 - }, - { - "epoch": 0.8116693587236816, - "grad_norm": 0.0, - "learning_rate": 1.803306407872215e-06, - "loss": 0.7382, - "step": 28643 - }, - { - "epoch": 0.8116976961659441, - "grad_norm": 0.0, - "learning_rate": 1.8027806988154373e-06, - "loss": 0.8146, - "step": 28644 - }, - { - "epoch": 0.8117260336082065, - "grad_norm": 0.0, - "learning_rate": 1.8022550588068799e-06, - "loss": 0.8305, - "step": 28645 - }, - { - "epoch": 0.811754371050469, - "grad_norm": 0.0, - "learning_rate": 1.8017294878509716e-06, - "loss": 0.7656, - "step": 28646 - }, - { - "epoch": 0.8117827084927315, - "grad_norm": 0.0, - "learning_rate": 1.8012039859521425e-06, - "loss": 0.8917, - "step": 28647 - }, - { - "epoch": 0.8118110459349939, - "grad_norm": 0.0, - "learning_rate": 1.8006785531148153e-06, - "loss": 0.8327, - "step": 28648 - }, - { - "epoch": 0.8118393833772564, - "grad_norm": 0.0, - "learning_rate": 1.8001531893434188e-06, - "loss": 0.7618, - "step": 28649 - }, - { - "epoch": 0.8118677208195189, - "grad_norm": 0.0, - "learning_rate": 1.799627894642375e-06, - "loss": 0.7718, - "step": 28650 - }, - { - "epoch": 0.8118960582617812, - "grad_norm": 0.0, - "learning_rate": 1.7991026690161107e-06, - "loss": 0.9865, - "step": 28651 - }, - { - "epoch": 0.8119243957040437, - "grad_norm": 0.0, - "learning_rate": 1.7985775124690496e-06, - "loss": 0.8698, - "step": 28652 - }, - { - "epoch": 0.8119527331463062, - "grad_norm": 0.0, - "learning_rate": 1.7980524250056153e-06, - "loss": 0.8542, - "step": 28653 - }, - { - "epoch": 0.8119810705885687, - "grad_norm": 0.0, - "learning_rate": 1.7975274066302317e-06, - "loss": 0.8359, - "step": 28654 - }, - { - "epoch": 0.8120094080308311, - "grad_norm": 0.0, - "learning_rate": 1.7970024573473233e-06, - "loss": 0.7393, - "step": 28655 - }, - { - "epoch": 0.8120377454730936, - "grad_norm": 0.0, - "learning_rate": 1.7964775771613064e-06, - "loss": 0.8516, - "step": 28656 - }, - { - "epoch": 0.8120660829153561, - "grad_norm": 0.0, - "learning_rate": 1.7959527660766074e-06, - "loss": 0.8159, - "step": 28657 - }, - { - "epoch": 0.8120944203576185, - "grad_norm": 0.0, - "learning_rate": 1.795428024097643e-06, - "loss": 0.775, - "step": 28658 - }, - { - "epoch": 0.812122757799881, - "grad_norm": 0.0, - "learning_rate": 1.794903351228835e-06, - "loss": 0.8292, - "step": 28659 - }, - { - "epoch": 0.8121510952421435, - "grad_norm": 0.0, - "learning_rate": 1.7943787474746044e-06, - "loss": 0.8217, - "step": 28660 - }, - { - "epoch": 0.812179432684406, - "grad_norm": 0.0, - "learning_rate": 1.7938542128393677e-06, - "loss": 0.759, - "step": 28661 - }, - { - "epoch": 0.8122077701266683, - "grad_norm": 0.0, - "learning_rate": 1.7933297473275435e-06, - "loss": 0.77, - "step": 28662 - }, - { - "epoch": 0.8122361075689308, - "grad_norm": 0.0, - "learning_rate": 1.7928053509435527e-06, - "loss": 0.7601, - "step": 28663 - }, - { - "epoch": 0.8122644450111933, - "grad_norm": 0.0, - "learning_rate": 1.7922810236918077e-06, - "loss": 0.8857, - "step": 28664 - }, - { - "epoch": 0.8122927824534557, - "grad_norm": 0.0, - "learning_rate": 1.7917567655767277e-06, - "loss": 0.7785, - "step": 28665 - }, - { - "epoch": 0.8123211198957182, - "grad_norm": 0.0, - "learning_rate": 1.7912325766027282e-06, - "loss": 0.6933, - "step": 28666 - }, - { - "epoch": 0.8123494573379807, - "grad_norm": 0.0, - "learning_rate": 1.790708456774225e-06, - "loss": 0.8925, - "step": 28667 - }, - { - "epoch": 0.8123777947802432, - "grad_norm": 0.0, - "learning_rate": 1.7901844060956353e-06, - "loss": 0.9857, - "step": 28668 - }, - { - "epoch": 0.8124061322225056, - "grad_norm": 0.0, - "learning_rate": 1.7896604245713688e-06, - "loss": 0.7842, - "step": 28669 - }, - { - "epoch": 0.8124344696647681, - "grad_norm": 0.0, - "learning_rate": 1.7891365122058435e-06, - "loss": 0.7381, - "step": 28670 - }, - { - "epoch": 0.8124628071070306, - "grad_norm": 0.0, - "learning_rate": 1.7886126690034688e-06, - "loss": 0.7828, - "step": 28671 - }, - { - "epoch": 0.8124911445492929, - "grad_norm": 0.0, - "learning_rate": 1.788088894968658e-06, - "loss": 0.9276, - "step": 28672 - }, - { - "epoch": 0.8125194819915554, - "grad_norm": 0.0, - "learning_rate": 1.7875651901058266e-06, - "loss": 0.7706, - "step": 28673 - }, - { - "epoch": 0.8125478194338179, - "grad_norm": 0.0, - "learning_rate": 1.7870415544193808e-06, - "loss": 0.8839, - "step": 28674 - }, - { - "epoch": 0.8125761568760803, - "grad_norm": 0.0, - "learning_rate": 1.786517987913734e-06, - "loss": 0.7878, - "step": 28675 - }, - { - "epoch": 0.8126044943183428, - "grad_norm": 0.0, - "learning_rate": 1.785994490593298e-06, - "loss": 0.8313, - "step": 28676 - }, - { - "epoch": 0.8126328317606053, - "grad_norm": 0.0, - "learning_rate": 1.7854710624624782e-06, - "loss": 0.8317, - "step": 28677 - }, - { - "epoch": 0.8126611692028678, - "grad_norm": 0.0, - "learning_rate": 1.7849477035256868e-06, - "loss": 0.8214, - "step": 28678 - }, - { - "epoch": 0.8126895066451302, - "grad_norm": 0.0, - "learning_rate": 1.7844244137873302e-06, - "loss": 0.7649, - "step": 28679 - }, - { - "epoch": 0.8127178440873927, - "grad_norm": 0.0, - "learning_rate": 1.783901193251819e-06, - "loss": 0.8218, - "step": 28680 - }, - { - "epoch": 0.8127461815296552, - "grad_norm": 0.0, - "learning_rate": 1.7833780419235603e-06, - "loss": 0.7683, - "step": 28681 - }, - { - "epoch": 0.8127745189719175, - "grad_norm": 0.0, - "learning_rate": 1.7828549598069578e-06, - "loss": 0.9675, - "step": 28682 - }, - { - "epoch": 0.81280285641418, - "grad_norm": 0.0, - "learning_rate": 1.7823319469064194e-06, - "loss": 0.8831, - "step": 28683 - }, - { - "epoch": 0.8128311938564425, - "grad_norm": 0.0, - "learning_rate": 1.7818090032263524e-06, - "loss": 0.7558, - "step": 28684 - }, - { - "epoch": 0.812859531298705, - "grad_norm": 0.0, - "learning_rate": 1.781286128771158e-06, - "loss": 0.8466, - "step": 28685 - }, - { - "epoch": 0.8128878687409674, - "grad_norm": 0.0, - "learning_rate": 1.7807633235452449e-06, - "loss": 0.7594, - "step": 28686 - }, - { - "epoch": 0.8129162061832299, - "grad_norm": 0.0, - "learning_rate": 1.7802405875530116e-06, - "loss": 0.8461, - "step": 28687 - }, - { - "epoch": 0.8129445436254924, - "grad_norm": 0.0, - "learning_rate": 1.7797179207988635e-06, - "loss": 0.7689, - "step": 28688 - }, - { - "epoch": 0.8129728810677548, - "grad_norm": 0.0, - "learning_rate": 1.7791953232872083e-06, - "loss": 0.8311, - "step": 28689 - }, - { - "epoch": 0.8130012185100173, - "grad_norm": 0.0, - "learning_rate": 1.778672795022439e-06, - "loss": 0.8588, - "step": 28690 - }, - { - "epoch": 0.8130295559522798, - "grad_norm": 0.0, - "learning_rate": 1.7781503360089635e-06, - "loss": 0.8206, - "step": 28691 - }, - { - "epoch": 0.8130578933945423, - "grad_norm": 0.0, - "learning_rate": 1.7776279462511803e-06, - "loss": 0.903, - "step": 28692 - }, - { - "epoch": 0.8130862308368046, - "grad_norm": 0.0, - "learning_rate": 1.777105625753489e-06, - "loss": 0.8216, - "step": 28693 - }, - { - "epoch": 0.8131145682790671, - "grad_norm": 0.0, - "learning_rate": 1.7765833745202943e-06, - "loss": 0.7496, - "step": 28694 - }, - { - "epoch": 0.8131429057213296, - "grad_norm": 0.0, - "learning_rate": 1.776061192555989e-06, - "loss": 0.7057, - "step": 28695 - }, - { - "epoch": 0.813171243163592, - "grad_norm": 0.0, - "learning_rate": 1.7755390798649741e-06, - "loss": 0.6081, - "step": 28696 - }, - { - "epoch": 0.8131995806058545, - "grad_norm": 0.0, - "learning_rate": 1.7750170364516495e-06, - "loss": 0.7129, - "step": 28697 - }, - { - "epoch": 0.813227918048117, - "grad_norm": 0.0, - "learning_rate": 1.7744950623204082e-06, - "loss": 0.8238, - "step": 28698 - }, - { - "epoch": 0.8132562554903794, - "grad_norm": 0.0, - "learning_rate": 1.7739731574756524e-06, - "loss": 0.7628, - "step": 28699 - }, - { - "epoch": 0.8132845929326419, - "grad_norm": 0.0, - "learning_rate": 1.773451321921773e-06, - "loss": 0.8006, - "step": 28700 - }, - { - "epoch": 0.8133129303749044, - "grad_norm": 0.0, - "learning_rate": 1.7729295556631677e-06, - "loss": 0.8805, - "step": 28701 - }, - { - "epoch": 0.8133412678171669, - "grad_norm": 0.0, - "learning_rate": 1.7724078587042347e-06, - "loss": 0.761, - "step": 28702 - }, - { - "epoch": 0.8133696052594293, - "grad_norm": 0.0, - "learning_rate": 1.7718862310493624e-06, - "loss": 0.906, - "step": 28703 - }, - { - "epoch": 0.8133979427016917, - "grad_norm": 0.0, - "learning_rate": 1.7713646727029476e-06, - "loss": 0.8292, - "step": 28704 - }, - { - "epoch": 0.8134262801439542, - "grad_norm": 0.0, - "learning_rate": 1.770843183669385e-06, - "loss": 0.7415, - "step": 28705 - }, - { - "epoch": 0.8134546175862166, - "grad_norm": 0.0, - "learning_rate": 1.7703217639530646e-06, - "loss": 0.7794, - "step": 28706 - }, - { - "epoch": 0.8134829550284791, - "grad_norm": 0.0, - "learning_rate": 1.7698004135583835e-06, - "loss": 0.8856, - "step": 28707 - }, - { - "epoch": 0.8135112924707416, - "grad_norm": 0.0, - "learning_rate": 1.7692791324897274e-06, - "loss": 0.8194, - "step": 28708 - }, - { - "epoch": 0.8135396299130041, - "grad_norm": 0.0, - "learning_rate": 1.7687579207514893e-06, - "loss": 0.8609, - "step": 28709 - }, - { - "epoch": 0.8135679673552665, - "grad_norm": 0.0, - "learning_rate": 1.7682367783480614e-06, - "loss": 0.8473, - "step": 28710 - }, - { - "epoch": 0.813596304797529, - "grad_norm": 0.0, - "learning_rate": 1.76771570528383e-06, - "loss": 0.8191, - "step": 28711 - }, - { - "epoch": 0.8136246422397915, - "grad_norm": 0.0, - "learning_rate": 1.7671947015631875e-06, - "loss": 0.7636, - "step": 28712 - }, - { - "epoch": 0.8136529796820539, - "grad_norm": 0.0, - "learning_rate": 1.766673767190522e-06, - "loss": 0.8136, - "step": 28713 - }, - { - "epoch": 0.8136813171243164, - "grad_norm": 0.0, - "learning_rate": 1.7661529021702184e-06, - "loss": 0.8783, - "step": 28714 - }, - { - "epoch": 0.8137096545665788, - "grad_norm": 0.0, - "learning_rate": 1.7656321065066696e-06, - "loss": 0.7793, - "step": 28715 - }, - { - "epoch": 0.8137379920088412, - "grad_norm": 0.0, - "learning_rate": 1.7651113802042563e-06, - "loss": 0.6947, - "step": 28716 - }, - { - "epoch": 0.8137663294511037, - "grad_norm": 0.0, - "learning_rate": 1.7645907232673686e-06, - "loss": 0.7625, - "step": 28717 - }, - { - "epoch": 0.8137946668933662, - "grad_norm": 0.0, - "learning_rate": 1.764070135700391e-06, - "loss": 0.8545, - "step": 28718 - }, - { - "epoch": 0.8138230043356287, - "grad_norm": 0.0, - "learning_rate": 1.7635496175077082e-06, - "loss": 0.784, - "step": 28719 - }, - { - "epoch": 0.8138513417778911, - "grad_norm": 0.0, - "learning_rate": 1.7630291686937096e-06, - "loss": 0.8195, - "step": 28720 - }, - { - "epoch": 0.8138796792201536, - "grad_norm": 0.0, - "learning_rate": 1.7625087892627713e-06, - "loss": 0.8493, - "step": 28721 - }, - { - "epoch": 0.8139080166624161, - "grad_norm": 0.0, - "learning_rate": 1.761988479219281e-06, - "loss": 0.8564, - "step": 28722 - }, - { - "epoch": 0.8139363541046785, - "grad_norm": 0.0, - "learning_rate": 1.761468238567623e-06, - "loss": 0.7417, - "step": 28723 - }, - { - "epoch": 0.813964691546941, - "grad_norm": 0.0, - "learning_rate": 1.7609480673121748e-06, - "loss": 0.7885, - "step": 28724 - }, - { - "epoch": 0.8139930289892034, - "grad_norm": 0.0, - "learning_rate": 1.760427965457321e-06, - "loss": 0.8878, - "step": 28725 - }, - { - "epoch": 0.8140213664314659, - "grad_norm": 0.0, - "learning_rate": 1.7599079330074443e-06, - "loss": 0.7871, - "step": 28726 - }, - { - "epoch": 0.8140497038737283, - "grad_norm": 0.0, - "learning_rate": 1.7593879699669202e-06, - "loss": 0.6866, - "step": 28727 - }, - { - "epoch": 0.8140780413159908, - "grad_norm": 0.0, - "learning_rate": 1.7588680763401333e-06, - "loss": 0.7098, - "step": 28728 - }, - { - "epoch": 0.8141063787582533, - "grad_norm": 0.0, - "learning_rate": 1.7583482521314598e-06, - "loss": 0.7317, - "step": 28729 - }, - { - "epoch": 0.8141347162005157, - "grad_norm": 0.0, - "learning_rate": 1.7578284973452786e-06, - "loss": 0.7132, - "step": 28730 - }, - { - "epoch": 0.8141630536427782, - "grad_norm": 0.0, - "learning_rate": 1.7573088119859684e-06, - "loss": 0.7885, - "step": 28731 - }, - { - "epoch": 0.8141913910850407, - "grad_norm": 0.0, - "learning_rate": 1.7567891960579075e-06, - "loss": 0.8171, - "step": 28732 - }, - { - "epoch": 0.8142197285273032, - "grad_norm": 0.0, - "learning_rate": 1.756269649565472e-06, - "loss": 0.7173, - "step": 28733 - }, - { - "epoch": 0.8142480659695656, - "grad_norm": 0.0, - "learning_rate": 1.755750172513041e-06, - "loss": 0.7185, - "step": 28734 - }, - { - "epoch": 0.814276403411828, - "grad_norm": 0.0, - "learning_rate": 1.755230764904985e-06, - "loss": 0.7858, - "step": 28735 - }, - { - "epoch": 0.8143047408540905, - "grad_norm": 0.0, - "learning_rate": 1.7547114267456845e-06, - "loss": 0.7718, - "step": 28736 - }, - { - "epoch": 0.8143330782963529, - "grad_norm": 0.0, - "learning_rate": 1.7541921580395094e-06, - "loss": 0.6812, - "step": 28737 - }, - { - "epoch": 0.8143614157386154, - "grad_norm": 0.0, - "learning_rate": 1.7536729587908353e-06, - "loss": 0.7862, - "step": 28738 - }, - { - "epoch": 0.8143897531808779, - "grad_norm": 0.0, - "learning_rate": 1.7531538290040384e-06, - "loss": 0.8655, - "step": 28739 - }, - { - "epoch": 0.8144180906231403, - "grad_norm": 0.0, - "learning_rate": 1.7526347686834866e-06, - "loss": 0.8244, - "step": 28740 - }, - { - "epoch": 0.8144464280654028, - "grad_norm": 0.0, - "learning_rate": 1.752115777833555e-06, - "loss": 0.8151, - "step": 28741 - }, - { - "epoch": 0.8144747655076653, - "grad_norm": 0.0, - "learning_rate": 1.7515968564586183e-06, - "loss": 0.7788, - "step": 28742 - }, - { - "epoch": 0.8145031029499278, - "grad_norm": 0.0, - "learning_rate": 1.7510780045630405e-06, - "loss": 0.7148, - "step": 28743 - }, - { - "epoch": 0.8145314403921902, - "grad_norm": 0.0, - "learning_rate": 1.750559222151197e-06, - "loss": 0.7869, - "step": 28744 - }, - { - "epoch": 0.8145597778344527, - "grad_norm": 0.0, - "learning_rate": 1.7500405092274553e-06, - "loss": 0.7367, - "step": 28745 - }, - { - "epoch": 0.8145881152767152, - "grad_norm": 0.0, - "learning_rate": 1.7495218657961866e-06, - "loss": 0.7662, - "step": 28746 - }, - { - "epoch": 0.8146164527189775, - "grad_norm": 0.0, - "learning_rate": 1.7490032918617617e-06, - "loss": 0.9321, - "step": 28747 - }, - { - "epoch": 0.81464479016124, - "grad_norm": 0.0, - "learning_rate": 1.7484847874285426e-06, - "loss": 0.8622, - "step": 28748 - }, - { - "epoch": 0.8146731276035025, - "grad_norm": 0.0, - "learning_rate": 1.747966352500904e-06, - "loss": 0.8482, - "step": 28749 - }, - { - "epoch": 0.814701465045765, - "grad_norm": 0.0, - "learning_rate": 1.7474479870832063e-06, - "loss": 0.769, - "step": 28750 - }, - { - "epoch": 0.8147298024880274, - "grad_norm": 0.0, - "learning_rate": 1.7469296911798172e-06, - "loss": 0.7684, - "step": 28751 - }, - { - "epoch": 0.8147581399302899, - "grad_norm": 0.0, - "learning_rate": 1.7464114647951081e-06, - "loss": 0.9321, - "step": 28752 - }, - { - "epoch": 0.8147864773725524, - "grad_norm": 0.0, - "learning_rate": 1.7458933079334374e-06, - "loss": 0.7725, - "step": 28753 - }, - { - "epoch": 0.8148148148148148, - "grad_norm": 0.0, - "learning_rate": 1.7453752205991726e-06, - "loss": 0.8662, - "step": 28754 - }, - { - "epoch": 0.8148431522570773, - "grad_norm": 0.0, - "learning_rate": 1.7448572027966792e-06, - "loss": 0.751, - "step": 28755 - }, - { - "epoch": 0.8148714896993398, - "grad_norm": 0.0, - "learning_rate": 1.7443392545303172e-06, - "loss": 0.8787, - "step": 28756 - }, - { - "epoch": 0.8148998271416023, - "grad_norm": 0.0, - "learning_rate": 1.743821375804451e-06, - "loss": 0.7598, - "step": 28757 - }, - { - "epoch": 0.8149281645838646, - "grad_norm": 0.0, - "learning_rate": 1.7433035666234443e-06, - "loss": 0.7406, - "step": 28758 - }, - { - "epoch": 0.8149565020261271, - "grad_norm": 0.0, - "learning_rate": 1.7427858269916565e-06, - "loss": 0.7939, - "step": 28759 - }, - { - "epoch": 0.8149848394683896, - "grad_norm": 0.0, - "learning_rate": 1.7422681569134525e-06, - "loss": 0.7026, - "step": 28760 - }, - { - "epoch": 0.815013176910652, - "grad_norm": 0.0, - "learning_rate": 1.741750556393188e-06, - "loss": 0.7879, - "step": 28761 - }, - { - "epoch": 0.8150415143529145, - "grad_norm": 0.0, - "learning_rate": 1.741233025435225e-06, - "loss": 0.8437, - "step": 28762 - }, - { - "epoch": 0.815069851795177, - "grad_norm": 0.0, - "learning_rate": 1.7407155640439265e-06, - "loss": 0.8056, - "step": 28763 - }, - { - "epoch": 0.8150981892374394, - "grad_norm": 0.0, - "learning_rate": 1.7401981722236438e-06, - "loss": 0.7955, - "step": 28764 - }, - { - "epoch": 0.8151265266797019, - "grad_norm": 0.0, - "learning_rate": 1.7396808499787431e-06, - "loss": 0.8474, - "step": 28765 - }, - { - "epoch": 0.8151548641219644, - "grad_norm": 0.0, - "learning_rate": 1.7391635973135746e-06, - "loss": 0.8122, - "step": 28766 - }, - { - "epoch": 0.8151832015642269, - "grad_norm": 0.0, - "learning_rate": 1.7386464142324989e-06, - "loss": 0.8701, - "step": 28767 - }, - { - "epoch": 0.8152115390064892, - "grad_norm": 0.0, - "learning_rate": 1.7381293007398747e-06, - "loss": 0.7614, - "step": 28768 - }, - { - "epoch": 0.8152398764487517, - "grad_norm": 0.0, - "learning_rate": 1.7376122568400533e-06, - "loss": 0.6645, - "step": 28769 - }, - { - "epoch": 0.8152682138910142, - "grad_norm": 0.0, - "learning_rate": 1.737095282537391e-06, - "loss": 0.7663, - "step": 28770 - }, - { - "epoch": 0.8152965513332766, - "grad_norm": 0.0, - "learning_rate": 1.7365783778362443e-06, - "loss": 0.8135, - "step": 28771 - }, - { - "epoch": 0.8153248887755391, - "grad_norm": 0.0, - "learning_rate": 1.7360615427409667e-06, - "loss": 0.8066, - "step": 28772 - }, - { - "epoch": 0.8153532262178016, - "grad_norm": 0.0, - "learning_rate": 1.735544777255913e-06, - "loss": 0.7372, - "step": 28773 - }, - { - "epoch": 0.8153815636600641, - "grad_norm": 0.0, - "learning_rate": 1.7350280813854326e-06, - "loss": 0.8259, - "step": 28774 - }, - { - "epoch": 0.8154099011023265, - "grad_norm": 0.0, - "learning_rate": 1.7345114551338793e-06, - "loss": 0.8454, - "step": 28775 - }, - { - "epoch": 0.815438238544589, - "grad_norm": 0.0, - "learning_rate": 1.7339948985056087e-06, - "loss": 0.8066, - "step": 28776 - }, - { - "epoch": 0.8154665759868515, - "grad_norm": 0.0, - "learning_rate": 1.7334784115049663e-06, - "loss": 0.9094, - "step": 28777 - }, - { - "epoch": 0.8154949134291138, - "grad_norm": 0.0, - "learning_rate": 1.7329619941363062e-06, - "loss": 0.7922, - "step": 28778 - }, - { - "epoch": 0.8155232508713763, - "grad_norm": 0.0, - "learning_rate": 1.7324456464039751e-06, - "loss": 0.8397, - "step": 28779 - }, - { - "epoch": 0.8155515883136388, - "grad_norm": 0.0, - "learning_rate": 1.731929368312325e-06, - "loss": 0.8689, - "step": 28780 - }, - { - "epoch": 0.8155799257559013, - "grad_norm": 0.0, - "learning_rate": 1.7314131598657058e-06, - "loss": 0.8328, - "step": 28781 - }, - { - "epoch": 0.8156082631981637, - "grad_norm": 0.0, - "learning_rate": 1.7308970210684618e-06, - "loss": 0.8981, - "step": 28782 - }, - { - "epoch": 0.8156366006404262, - "grad_norm": 0.0, - "learning_rate": 1.7303809519249426e-06, - "loss": 0.7619, - "step": 28783 - }, - { - "epoch": 0.8156649380826887, - "grad_norm": 0.0, - "learning_rate": 1.7298649524394962e-06, - "loss": 0.6938, - "step": 28784 - }, - { - "epoch": 0.8156932755249511, - "grad_norm": 0.0, - "learning_rate": 1.7293490226164677e-06, - "loss": 0.7921, - "step": 28785 - }, - { - "epoch": 0.8157216129672136, - "grad_norm": 0.0, - "learning_rate": 1.7288331624602073e-06, - "loss": 0.697, - "step": 28786 - }, - { - "epoch": 0.8157499504094761, - "grad_norm": 0.0, - "learning_rate": 1.7283173719750523e-06, - "loss": 0.7467, - "step": 28787 - }, - { - "epoch": 0.8157782878517384, - "grad_norm": 0.0, - "learning_rate": 1.7278016511653529e-06, - "loss": 0.8225, - "step": 28788 - }, - { - "epoch": 0.8158066252940009, - "grad_norm": 0.0, - "learning_rate": 1.727286000035454e-06, - "loss": 0.8978, - "step": 28789 - }, - { - "epoch": 0.8158349627362634, - "grad_norm": 0.0, - "learning_rate": 1.726770418589695e-06, - "loss": 0.8556, - "step": 28790 - }, - { - "epoch": 0.8158633001785259, - "grad_norm": 0.0, - "learning_rate": 1.7262549068324208e-06, - "loss": 0.8089, - "step": 28791 - }, - { - "epoch": 0.8158916376207883, - "grad_norm": 0.0, - "learning_rate": 1.725739464767976e-06, - "loss": 0.7428, - "step": 28792 - }, - { - "epoch": 0.8159199750630508, - "grad_norm": 0.0, - "learning_rate": 1.725224092400698e-06, - "loss": 0.8214, - "step": 28793 - }, - { - "epoch": 0.8159483125053133, - "grad_norm": 0.0, - "learning_rate": 1.7247087897349334e-06, - "loss": 0.7379, - "step": 28794 - }, - { - "epoch": 0.8159766499475757, - "grad_norm": 0.0, - "learning_rate": 1.7241935567750157e-06, - "loss": 0.829, - "step": 28795 - }, - { - "epoch": 0.8160049873898382, - "grad_norm": 0.0, - "learning_rate": 1.7236783935252898e-06, - "loss": 0.7653, - "step": 28796 - }, - { - "epoch": 0.8160333248321007, - "grad_norm": 0.0, - "learning_rate": 1.7231632999900939e-06, - "loss": 0.7616, - "step": 28797 - }, - { - "epoch": 0.8160616622743632, - "grad_norm": 0.0, - "learning_rate": 1.7226482761737674e-06, - "loss": 0.8571, - "step": 28798 - }, - { - "epoch": 0.8160899997166255, - "grad_norm": 0.0, - "learning_rate": 1.722133322080648e-06, - "loss": 0.8594, - "step": 28799 - }, - { - "epoch": 0.816118337158888, - "grad_norm": 0.0, - "learning_rate": 1.7216184377150758e-06, - "loss": 0.7679, - "step": 28800 - }, - { - "epoch": 0.8161466746011505, - "grad_norm": 0.0, - "learning_rate": 1.7211036230813838e-06, - "loss": 0.8167, - "step": 28801 - }, - { - "epoch": 0.8161750120434129, - "grad_norm": 0.0, - "learning_rate": 1.720588878183912e-06, - "loss": 0.7751, - "step": 28802 - }, - { - "epoch": 0.8162033494856754, - "grad_norm": 0.0, - "learning_rate": 1.7200742030269924e-06, - "loss": 0.8111, - "step": 28803 - }, - { - "epoch": 0.8162316869279379, - "grad_norm": 0.0, - "learning_rate": 1.7195595976149615e-06, - "loss": 0.8432, - "step": 28804 - }, - { - "epoch": 0.8162600243702004, - "grad_norm": 0.0, - "learning_rate": 1.7190450619521593e-06, - "loss": 0.7854, - "step": 28805 - }, - { - "epoch": 0.8162883618124628, - "grad_norm": 0.0, - "learning_rate": 1.718530596042911e-06, - "loss": 0.8087, - "step": 28806 - }, - { - "epoch": 0.8163166992547253, - "grad_norm": 0.0, - "learning_rate": 1.718016199891559e-06, - "loss": 0.9446, - "step": 28807 - }, - { - "epoch": 0.8163450366969878, - "grad_norm": 0.0, - "learning_rate": 1.7175018735024284e-06, - "loss": 0.7875, - "step": 28808 - }, - { - "epoch": 0.8163733741392502, - "grad_norm": 0.0, - "learning_rate": 1.7169876168798561e-06, - "loss": 0.7549, - "step": 28809 - }, - { - "epoch": 0.8164017115815126, - "grad_norm": 0.0, - "learning_rate": 1.7164734300281727e-06, - "loss": 0.7949, - "step": 28810 - }, - { - "epoch": 0.8164300490237751, - "grad_norm": 0.0, - "learning_rate": 1.7159593129517095e-06, - "loss": 0.8143, - "step": 28811 - }, - { - "epoch": 0.8164583864660375, - "grad_norm": 0.0, - "learning_rate": 1.715445265654796e-06, - "loss": 0.8415, - "step": 28812 - }, - { - "epoch": 0.8164867239083, - "grad_norm": 0.0, - "learning_rate": 1.714931288141768e-06, - "loss": 0.7895, - "step": 28813 - }, - { - "epoch": 0.8165150613505625, - "grad_norm": 0.0, - "learning_rate": 1.7144173804169462e-06, - "loss": 0.7766, - "step": 28814 - }, - { - "epoch": 0.816543398792825, - "grad_norm": 0.0, - "learning_rate": 1.7139035424846673e-06, - "loss": 0.9133, - "step": 28815 - }, - { - "epoch": 0.8165717362350874, - "grad_norm": 0.0, - "learning_rate": 1.7133897743492522e-06, - "loss": 0.8463, - "step": 28816 - }, - { - "epoch": 0.8166000736773499, - "grad_norm": 0.0, - "learning_rate": 1.7128760760150331e-06, - "loss": 0.7711, - "step": 28817 - }, - { - "epoch": 0.8166284111196124, - "grad_norm": 0.0, - "learning_rate": 1.7123624474863377e-06, - "loss": 0.7261, - "step": 28818 - }, - { - "epoch": 0.8166567485618748, - "grad_norm": 0.0, - "learning_rate": 1.7118488887674889e-06, - "loss": 0.7805, - "step": 28819 - }, - { - "epoch": 0.8166850860041373, - "grad_norm": 0.0, - "learning_rate": 1.7113353998628147e-06, - "loss": 0.7932, - "step": 28820 - }, - { - "epoch": 0.8167134234463997, - "grad_norm": 0.0, - "learning_rate": 1.7108219807766436e-06, - "loss": 0.7666, - "step": 28821 - }, - { - "epoch": 0.8167417608886622, - "grad_norm": 0.0, - "learning_rate": 1.7103086315132933e-06, - "loss": 0.853, - "step": 28822 - }, - { - "epoch": 0.8167700983309246, - "grad_norm": 0.0, - "learning_rate": 1.7097953520770927e-06, - "loss": 0.7123, - "step": 28823 - }, - { - "epoch": 0.8167984357731871, - "grad_norm": 0.0, - "learning_rate": 1.7092821424723637e-06, - "loss": 0.7651, - "step": 28824 - }, - { - "epoch": 0.8168267732154496, - "grad_norm": 0.0, - "learning_rate": 1.7087690027034298e-06, - "loss": 0.7666, - "step": 28825 - }, - { - "epoch": 0.816855110657712, - "grad_norm": 0.0, - "learning_rate": 1.7082559327746161e-06, - "loss": 0.8024, - "step": 28826 - }, - { - "epoch": 0.8168834480999745, - "grad_norm": 0.0, - "learning_rate": 1.7077429326902396e-06, - "loss": 0.8077, - "step": 28827 - }, - { - "epoch": 0.816911785542237, - "grad_norm": 0.0, - "learning_rate": 1.707230002454624e-06, - "loss": 0.8355, - "step": 28828 - }, - { - "epoch": 0.8169401229844995, - "grad_norm": 0.0, - "learning_rate": 1.7067171420720908e-06, - "loss": 0.8167, - "step": 28829 - }, - { - "epoch": 0.8169684604267619, - "grad_norm": 0.0, - "learning_rate": 1.7062043515469572e-06, - "loss": 0.8411, - "step": 28830 - }, - { - "epoch": 0.8169967978690243, - "grad_norm": 0.0, - "learning_rate": 1.7056916308835457e-06, - "loss": 0.8098, - "step": 28831 - }, - { - "epoch": 0.8170251353112868, - "grad_norm": 0.0, - "learning_rate": 1.7051789800861729e-06, - "loss": 0.7156, - "step": 28832 - }, - { - "epoch": 0.8170534727535492, - "grad_norm": 0.0, - "learning_rate": 1.7046663991591562e-06, - "loss": 0.7324, - "step": 28833 - }, - { - "epoch": 0.8170818101958117, - "grad_norm": 0.0, - "learning_rate": 1.7041538881068176e-06, - "loss": 0.86, - "step": 28834 - }, - { - "epoch": 0.8171101476380742, - "grad_norm": 0.0, - "learning_rate": 1.7036414469334694e-06, - "loss": 0.8351, - "step": 28835 - }, - { - "epoch": 0.8171384850803366, - "grad_norm": 0.0, - "learning_rate": 1.7031290756434293e-06, - "loss": 0.7685, - "step": 28836 - }, - { - "epoch": 0.8171668225225991, - "grad_norm": 0.0, - "learning_rate": 1.7026167742410148e-06, - "loss": 0.8591, - "step": 28837 - }, - { - "epoch": 0.8171951599648616, - "grad_norm": 0.0, - "learning_rate": 1.7021045427305394e-06, - "loss": 0.8444, - "step": 28838 - }, - { - "epoch": 0.8172234974071241, - "grad_norm": 0.0, - "learning_rate": 1.7015923811163225e-06, - "loss": 0.8677, - "step": 28839 - }, - { - "epoch": 0.8172518348493865, - "grad_norm": 0.0, - "learning_rate": 1.7010802894026713e-06, - "loss": 0.8339, - "step": 28840 - }, - { - "epoch": 0.817280172291649, - "grad_norm": 0.0, - "learning_rate": 1.7005682675939027e-06, - "loss": 0.8399, - "step": 28841 - }, - { - "epoch": 0.8173085097339114, - "grad_norm": 0.0, - "learning_rate": 1.7000563156943316e-06, - "loss": 0.7779, - "step": 28842 - }, - { - "epoch": 0.8173368471761738, - "grad_norm": 0.0, - "learning_rate": 1.6995444337082656e-06, - "loss": 0.9197, - "step": 28843 - }, - { - "epoch": 0.8173651846184363, - "grad_norm": 0.0, - "learning_rate": 1.6990326216400221e-06, - "loss": 0.7775, - "step": 28844 - }, - { - "epoch": 0.8173935220606988, - "grad_norm": 0.0, - "learning_rate": 1.6985208794939068e-06, - "loss": 0.7564, - "step": 28845 - }, - { - "epoch": 0.8174218595029613, - "grad_norm": 0.0, - "learning_rate": 1.6980092072742338e-06, - "loss": 0.7382, - "step": 28846 - }, - { - "epoch": 0.8174501969452237, - "grad_norm": 0.0, - "learning_rate": 1.697497604985313e-06, - "loss": 0.7688, - "step": 28847 - }, - { - "epoch": 0.8174785343874862, - "grad_norm": 0.0, - "learning_rate": 1.6969860726314513e-06, - "loss": 0.8311, - "step": 28848 - }, - { - "epoch": 0.8175068718297487, - "grad_norm": 0.0, - "learning_rate": 1.6964746102169582e-06, - "loss": 0.7989, - "step": 28849 - }, - { - "epoch": 0.8175352092720111, - "grad_norm": 0.0, - "learning_rate": 1.6959632177461437e-06, - "loss": 0.7817, - "step": 28850 - }, - { - "epoch": 0.8175635467142736, - "grad_norm": 0.0, - "learning_rate": 1.6954518952233145e-06, - "loss": 0.852, - "step": 28851 - }, - { - "epoch": 0.817591884156536, - "grad_norm": 0.0, - "learning_rate": 1.6949406426527815e-06, - "loss": 0.7379, - "step": 28852 - }, - { - "epoch": 0.8176202215987985, - "grad_norm": 0.0, - "learning_rate": 1.6944294600388434e-06, - "loss": 0.8092, - "step": 28853 - }, - { - "epoch": 0.8176485590410609, - "grad_norm": 0.0, - "learning_rate": 1.6939183473858101e-06, - "loss": 0.8419, - "step": 28854 - }, - { - "epoch": 0.8176768964833234, - "grad_norm": 0.0, - "learning_rate": 1.6934073046979905e-06, - "loss": 0.6861, - "step": 28855 - }, - { - "epoch": 0.8177052339255859, - "grad_norm": 0.0, - "learning_rate": 1.692896331979682e-06, - "loss": 0.8245, - "step": 28856 - }, - { - "epoch": 0.8177335713678483, - "grad_norm": 0.0, - "learning_rate": 1.6923854292351938e-06, - "loss": 0.6965, - "step": 28857 - }, - { - "epoch": 0.8177619088101108, - "grad_norm": 0.0, - "learning_rate": 1.69187459646883e-06, - "loss": 0.8604, - "step": 28858 - }, - { - "epoch": 0.8177902462523733, - "grad_norm": 0.0, - "learning_rate": 1.6913638336848892e-06, - "loss": 0.7567, - "step": 28859 - }, - { - "epoch": 0.8178185836946357, - "grad_norm": 0.0, - "learning_rate": 1.6908531408876783e-06, - "loss": 0.9297, - "step": 28860 - }, - { - "epoch": 0.8178469211368982, - "grad_norm": 0.0, - "learning_rate": 1.6903425180814947e-06, - "loss": 0.8952, - "step": 28861 - }, - { - "epoch": 0.8178752585791607, - "grad_norm": 0.0, - "learning_rate": 1.6898319652706418e-06, - "loss": 0.8722, - "step": 28862 - }, - { - "epoch": 0.8179035960214232, - "grad_norm": 0.0, - "learning_rate": 1.6893214824594195e-06, - "loss": 0.6927, - "step": 28863 - }, - { - "epoch": 0.8179319334636855, - "grad_norm": 0.0, - "learning_rate": 1.6888110696521299e-06, - "loss": 0.8244, - "step": 28864 - }, - { - "epoch": 0.817960270905948, - "grad_norm": 0.0, - "learning_rate": 1.6883007268530726e-06, - "loss": 0.7656, - "step": 28865 - }, - { - "epoch": 0.8179886083482105, - "grad_norm": 0.0, - "learning_rate": 1.6877904540665423e-06, - "loss": 0.8033, - "step": 28866 - }, - { - "epoch": 0.8180169457904729, - "grad_norm": 0.0, - "learning_rate": 1.6872802512968388e-06, - "loss": 0.7538, - "step": 28867 - }, - { - "epoch": 0.8180452832327354, - "grad_norm": 0.0, - "learning_rate": 1.6867701185482644e-06, - "loss": 0.8095, - "step": 28868 - }, - { - "epoch": 0.8180736206749979, - "grad_norm": 0.0, - "learning_rate": 1.6862600558251097e-06, - "loss": 0.7707, - "step": 28869 - }, - { - "epoch": 0.8181019581172604, - "grad_norm": 0.0, - "learning_rate": 1.6857500631316726e-06, - "loss": 0.7555, - "step": 28870 - }, - { - "epoch": 0.8181302955595228, - "grad_norm": 0.0, - "learning_rate": 1.685240140472254e-06, - "loss": 0.7682, - "step": 28871 - }, - { - "epoch": 0.8181586330017853, - "grad_norm": 0.0, - "learning_rate": 1.6847302878511417e-06, - "loss": 0.8995, - "step": 28872 - }, - { - "epoch": 0.8181869704440478, - "grad_norm": 0.0, - "learning_rate": 1.6842205052726346e-06, - "loss": 0.7812, - "step": 28873 - }, - { - "epoch": 0.8182153078863101, - "grad_norm": 0.0, - "learning_rate": 1.6837107927410268e-06, - "loss": 0.9042, - "step": 28874 - }, - { - "epoch": 0.8182436453285726, - "grad_norm": 0.0, - "learning_rate": 1.6832011502606127e-06, - "loss": 0.6829, - "step": 28875 - }, - { - "epoch": 0.8182719827708351, - "grad_norm": 0.0, - "learning_rate": 1.682691577835681e-06, - "loss": 0.7726, - "step": 28876 - }, - { - "epoch": 0.8183003202130976, - "grad_norm": 0.0, - "learning_rate": 1.6821820754705275e-06, - "loss": 0.8498, - "step": 28877 - }, - { - "epoch": 0.81832865765536, - "grad_norm": 0.0, - "learning_rate": 1.681672643169443e-06, - "loss": 0.9044, - "step": 28878 - }, - { - "epoch": 0.8183569950976225, - "grad_norm": 0.0, - "learning_rate": 1.6811632809367207e-06, - "loss": 0.8179, - "step": 28879 - }, - { - "epoch": 0.818385332539885, - "grad_norm": 0.0, - "learning_rate": 1.680653988776647e-06, - "loss": 0.8602, - "step": 28880 - }, - { - "epoch": 0.8184136699821474, - "grad_norm": 0.0, - "learning_rate": 1.6801447666935167e-06, - "loss": 0.811, - "step": 28881 - }, - { - "epoch": 0.8184420074244099, - "grad_norm": 0.0, - "learning_rate": 1.6796356146916149e-06, - "loss": 0.8054, - "step": 28882 - }, - { - "epoch": 0.8184703448666724, - "grad_norm": 0.0, - "learning_rate": 1.6791265327752304e-06, - "loss": 0.9067, - "step": 28883 - }, - { - "epoch": 0.8184986823089347, - "grad_norm": 0.0, - "learning_rate": 1.6786175209486565e-06, - "loss": 0.8227, - "step": 28884 - }, - { - "epoch": 0.8185270197511972, - "grad_norm": 0.0, - "learning_rate": 1.6781085792161744e-06, - "loss": 0.7273, - "step": 28885 - }, - { - "epoch": 0.8185553571934597, - "grad_norm": 0.0, - "learning_rate": 1.6775997075820738e-06, - "loss": 0.7832, - "step": 28886 - }, - { - "epoch": 0.8185836946357222, - "grad_norm": 0.0, - "learning_rate": 1.6770909060506412e-06, - "loss": 0.8344, - "step": 28887 - }, - { - "epoch": 0.8186120320779846, - "grad_norm": 0.0, - "learning_rate": 1.6765821746261635e-06, - "loss": 0.6688, - "step": 28888 - }, - { - "epoch": 0.8186403695202471, - "grad_norm": 0.0, - "learning_rate": 1.6760735133129269e-06, - "loss": 0.7344, - "step": 28889 - }, - { - "epoch": 0.8186687069625096, - "grad_norm": 0.0, - "learning_rate": 1.6755649221152114e-06, - "loss": 0.8146, - "step": 28890 - }, - { - "epoch": 0.818697044404772, - "grad_norm": 0.0, - "learning_rate": 1.6750564010373037e-06, - "loss": 0.8795, - "step": 28891 - }, - { - "epoch": 0.8187253818470345, - "grad_norm": 0.0, - "learning_rate": 1.6745479500834904e-06, - "loss": 0.8722, - "step": 28892 - }, - { - "epoch": 0.818753719289297, - "grad_norm": 0.0, - "learning_rate": 1.674039569258048e-06, - "loss": 0.8116, - "step": 28893 - }, - { - "epoch": 0.8187820567315595, - "grad_norm": 0.0, - "learning_rate": 1.6735312585652651e-06, - "loss": 0.7826, - "step": 28894 - }, - { - "epoch": 0.8188103941738218, - "grad_norm": 0.0, - "learning_rate": 1.6730230180094188e-06, - "loss": 0.6834, - "step": 28895 - }, - { - "epoch": 0.8188387316160843, - "grad_norm": 0.0, - "learning_rate": 1.6725148475947906e-06, - "loss": 0.7899, - "step": 28896 - }, - { - "epoch": 0.8188670690583468, - "grad_norm": 0.0, - "learning_rate": 1.6720067473256641e-06, - "loss": 0.7606, - "step": 28897 - }, - { - "epoch": 0.8188954065006092, - "grad_norm": 0.0, - "learning_rate": 1.6714987172063157e-06, - "loss": 0.8151, - "step": 28898 - }, - { - "epoch": 0.8189237439428717, - "grad_norm": 0.0, - "learning_rate": 1.6709907572410266e-06, - "loss": 0.833, - "step": 28899 - }, - { - "epoch": 0.8189520813851342, - "grad_norm": 0.0, - "learning_rate": 1.6704828674340745e-06, - "loss": 0.8111, - "step": 28900 - }, - { - "epoch": 0.8189804188273967, - "grad_norm": 0.0, - "learning_rate": 1.6699750477897391e-06, - "loss": 0.7948, - "step": 28901 - }, - { - "epoch": 0.8190087562696591, - "grad_norm": 0.0, - "learning_rate": 1.6694672983122983e-06, - "loss": 0.7555, - "step": 28902 - }, - { - "epoch": 0.8190370937119216, - "grad_norm": 0.0, - "learning_rate": 1.6689596190060264e-06, - "loss": 0.7919, - "step": 28903 - }, - { - "epoch": 0.8190654311541841, - "grad_norm": 0.0, - "learning_rate": 1.668452009875201e-06, - "loss": 0.7738, - "step": 28904 - }, - { - "epoch": 0.8190937685964464, - "grad_norm": 0.0, - "learning_rate": 1.667944470924101e-06, - "loss": 0.8129, - "step": 28905 - }, - { - "epoch": 0.8191221060387089, - "grad_norm": 0.0, - "learning_rate": 1.6674370021569964e-06, - "loss": 0.8638, - "step": 28906 - }, - { - "epoch": 0.8191504434809714, - "grad_norm": 0.0, - "learning_rate": 1.6669296035781635e-06, - "loss": 0.6804, - "step": 28907 - }, - { - "epoch": 0.8191787809232338, - "grad_norm": 0.0, - "learning_rate": 1.6664222751918801e-06, - "loss": 0.8799, - "step": 28908 - }, - { - "epoch": 0.8192071183654963, - "grad_norm": 0.0, - "learning_rate": 1.665915017002414e-06, - "loss": 0.8247, - "step": 28909 - }, - { - "epoch": 0.8192354558077588, - "grad_norm": 0.0, - "learning_rate": 1.6654078290140418e-06, - "loss": 0.9199, - "step": 28910 - }, - { - "epoch": 0.8192637932500213, - "grad_norm": 0.0, - "learning_rate": 1.6649007112310334e-06, - "loss": 0.8698, - "step": 28911 - }, - { - "epoch": 0.8192921306922837, - "grad_norm": 0.0, - "learning_rate": 1.6643936636576608e-06, - "loss": 0.7946, - "step": 28912 - }, - { - "epoch": 0.8193204681345462, - "grad_norm": 0.0, - "learning_rate": 1.663886686298196e-06, - "loss": 0.8002, - "step": 28913 - }, - { - "epoch": 0.8193488055768087, - "grad_norm": 0.0, - "learning_rate": 1.6633797791569085e-06, - "loss": 0.7948, - "step": 28914 - }, - { - "epoch": 0.819377143019071, - "grad_norm": 0.0, - "learning_rate": 1.6628729422380695e-06, - "loss": 0.7813, - "step": 28915 - }, - { - "epoch": 0.8194054804613335, - "grad_norm": 0.0, - "learning_rate": 1.6623661755459498e-06, - "loss": 0.7233, - "step": 28916 - }, - { - "epoch": 0.819433817903596, - "grad_norm": 0.0, - "learning_rate": 1.6618594790848143e-06, - "loss": 0.8357, - "step": 28917 - }, - { - "epoch": 0.8194621553458585, - "grad_norm": 0.0, - "learning_rate": 1.661352852858934e-06, - "loss": 0.7788, - "step": 28918 - }, - { - "epoch": 0.8194904927881209, - "grad_norm": 0.0, - "learning_rate": 1.6608462968725736e-06, - "loss": 0.8457, - "step": 28919 - }, - { - "epoch": 0.8195188302303834, - "grad_norm": 0.0, - "learning_rate": 1.6603398111300006e-06, - "loss": 0.8006, - "step": 28920 - }, - { - "epoch": 0.8195471676726459, - "grad_norm": 0.0, - "learning_rate": 1.659833395635485e-06, - "loss": 0.8231, - "step": 28921 - }, - { - "epoch": 0.8195755051149083, - "grad_norm": 0.0, - "learning_rate": 1.6593270503932867e-06, - "loss": 0.7596, - "step": 28922 - }, - { - "epoch": 0.8196038425571708, - "grad_norm": 0.0, - "learning_rate": 1.658820775407678e-06, - "loss": 0.7516, - "step": 28923 - }, - { - "epoch": 0.8196321799994333, - "grad_norm": 0.0, - "learning_rate": 1.6583145706829152e-06, - "loss": 0.804, - "step": 28924 - }, - { - "epoch": 0.8196605174416957, - "grad_norm": 0.0, - "learning_rate": 1.6578084362232672e-06, - "loss": 0.8311, - "step": 28925 - }, - { - "epoch": 0.8196888548839582, - "grad_norm": 0.0, - "learning_rate": 1.657302372032996e-06, - "loss": 0.7819, - "step": 28926 - }, - { - "epoch": 0.8197171923262206, - "grad_norm": 0.0, - "learning_rate": 1.6567963781163653e-06, - "loss": 0.8555, - "step": 28927 - }, - { - "epoch": 0.8197455297684831, - "grad_norm": 0.0, - "learning_rate": 1.656290454477637e-06, - "loss": 0.7642, - "step": 28928 - }, - { - "epoch": 0.8197738672107455, - "grad_norm": 0.0, - "learning_rate": 1.6557846011210753e-06, - "loss": 0.7817, - "step": 28929 - }, - { - "epoch": 0.819802204653008, - "grad_norm": 0.0, - "learning_rate": 1.655278818050936e-06, - "loss": 0.8105, - "step": 28930 - }, - { - "epoch": 0.8198305420952705, - "grad_norm": 0.0, - "learning_rate": 1.6547731052714834e-06, - "loss": 0.7722, - "step": 28931 - }, - { - "epoch": 0.8198588795375329, - "grad_norm": 0.0, - "learning_rate": 1.6542674627869738e-06, - "loss": 0.8116, - "step": 28932 - }, - { - "epoch": 0.8198872169797954, - "grad_norm": 0.0, - "learning_rate": 1.6537618906016695e-06, - "loss": 0.7141, - "step": 28933 - }, - { - "epoch": 0.8199155544220579, - "grad_norm": 0.0, - "learning_rate": 1.6532563887198295e-06, - "loss": 0.8387, - "step": 28934 - }, - { - "epoch": 0.8199438918643204, - "grad_norm": 0.0, - "learning_rate": 1.6527509571457078e-06, - "loss": 0.8105, - "step": 28935 - }, - { - "epoch": 0.8199722293065828, - "grad_norm": 0.0, - "learning_rate": 1.6522455958835648e-06, - "loss": 0.8741, - "step": 28936 - }, - { - "epoch": 0.8200005667488452, - "grad_norm": 0.0, - "learning_rate": 1.65174030493766e-06, - "loss": 0.8626, - "step": 28937 - }, - { - "epoch": 0.8200289041911077, - "grad_norm": 0.0, - "learning_rate": 1.6512350843122438e-06, - "loss": 0.7509, - "step": 28938 - }, - { - "epoch": 0.8200572416333701, - "grad_norm": 0.0, - "learning_rate": 1.6507299340115746e-06, - "loss": 0.7776, - "step": 28939 - }, - { - "epoch": 0.8200855790756326, - "grad_norm": 0.0, - "learning_rate": 1.650224854039907e-06, - "loss": 0.8747, - "step": 28940 - }, - { - "epoch": 0.8201139165178951, - "grad_norm": 0.0, - "learning_rate": 1.6497198444014973e-06, - "loss": 0.7309, - "step": 28941 - }, - { - "epoch": 0.8201422539601576, - "grad_norm": 0.0, - "learning_rate": 1.6492149051006002e-06, - "loss": 0.7699, - "step": 28942 - }, - { - "epoch": 0.82017059140242, - "grad_norm": 0.0, - "learning_rate": 1.6487100361414642e-06, - "loss": 0.8561, - "step": 28943 - }, - { - "epoch": 0.8201989288446825, - "grad_norm": 0.0, - "learning_rate": 1.6482052375283442e-06, - "loss": 0.9211, - "step": 28944 - }, - { - "epoch": 0.820227266286945, - "grad_norm": 0.0, - "learning_rate": 1.6477005092654963e-06, - "loss": 0.8943, - "step": 28945 - }, - { - "epoch": 0.8202556037292074, - "grad_norm": 0.0, - "learning_rate": 1.6471958513571662e-06, - "loss": 0.8937, - "step": 28946 - }, - { - "epoch": 0.8202839411714699, - "grad_norm": 0.0, - "learning_rate": 1.6466912638076082e-06, - "loss": 0.8901, - "step": 28947 - }, - { - "epoch": 0.8203122786137323, - "grad_norm": 0.0, - "learning_rate": 1.6461867466210702e-06, - "loss": 0.7475, - "step": 28948 - }, - { - "epoch": 0.8203406160559947, - "grad_norm": 0.0, - "learning_rate": 1.645682299801804e-06, - "loss": 0.7041, - "step": 28949 - }, - { - "epoch": 0.8203689534982572, - "grad_norm": 0.0, - "learning_rate": 1.64517792335406e-06, - "loss": 0.6994, - "step": 28950 - }, - { - "epoch": 0.8203972909405197, - "grad_norm": 0.0, - "learning_rate": 1.644673617282082e-06, - "loss": 0.8054, - "step": 28951 - }, - { - "epoch": 0.8204256283827822, - "grad_norm": 0.0, - "learning_rate": 1.6441693815901205e-06, - "loss": 0.7959, - "step": 28952 - }, - { - "epoch": 0.8204539658250446, - "grad_norm": 0.0, - "learning_rate": 1.6436652162824241e-06, - "loss": 0.7537, - "step": 28953 - }, - { - "epoch": 0.8204823032673071, - "grad_norm": 0.0, - "learning_rate": 1.6431611213632381e-06, - "loss": 0.9058, - "step": 28954 - }, - { - "epoch": 0.8205106407095696, - "grad_norm": 0.0, - "learning_rate": 1.6426570968368116e-06, - "loss": 0.7818, - "step": 28955 - }, - { - "epoch": 0.820538978151832, - "grad_norm": 0.0, - "learning_rate": 1.6421531427073856e-06, - "loss": 0.7891, - "step": 28956 - }, - { - "epoch": 0.8205673155940945, - "grad_norm": 0.0, - "learning_rate": 1.6416492589792076e-06, - "loss": 0.8108, - "step": 28957 - }, - { - "epoch": 0.820595653036357, - "grad_norm": 0.0, - "learning_rate": 1.6411454456565234e-06, - "loss": 0.7131, - "step": 28958 - }, - { - "epoch": 0.8206239904786194, - "grad_norm": 0.0, - "learning_rate": 1.6406417027435728e-06, - "loss": 0.8456, - "step": 28959 - }, - { - "epoch": 0.8206523279208818, - "grad_norm": 0.0, - "learning_rate": 1.6401380302446046e-06, - "loss": 0.7271, - "step": 28960 - }, - { - "epoch": 0.8206806653631443, - "grad_norm": 0.0, - "learning_rate": 1.6396344281638544e-06, - "loss": 0.795, - "step": 28961 - }, - { - "epoch": 0.8207090028054068, - "grad_norm": 0.0, - "learning_rate": 1.6391308965055685e-06, - "loss": 0.745, - "step": 28962 - }, - { - "epoch": 0.8207373402476692, - "grad_norm": 0.0, - "learning_rate": 1.6386274352739906e-06, - "loss": 0.8578, - "step": 28963 - }, - { - "epoch": 0.8207656776899317, - "grad_norm": 0.0, - "learning_rate": 1.638124044473356e-06, - "loss": 0.7935, - "step": 28964 - }, - { - "epoch": 0.8207940151321942, - "grad_norm": 0.0, - "learning_rate": 1.6376207241079078e-06, - "loss": 0.7175, - "step": 28965 - }, - { - "epoch": 0.8208223525744567, - "grad_norm": 0.0, - "learning_rate": 1.6371174741818851e-06, - "loss": 0.7941, - "step": 28966 - }, - { - "epoch": 0.8208506900167191, - "grad_norm": 0.0, - "learning_rate": 1.6366142946995278e-06, - "loss": 0.8066, - "step": 28967 - }, - { - "epoch": 0.8208790274589816, - "grad_norm": 0.0, - "learning_rate": 1.6361111856650768e-06, - "loss": 0.931, - "step": 28968 - }, - { - "epoch": 0.820907364901244, - "grad_norm": 0.0, - "learning_rate": 1.6356081470827635e-06, - "loss": 0.8437, - "step": 28969 - }, - { - "epoch": 0.8209357023435064, - "grad_norm": 0.0, - "learning_rate": 1.6351051789568296e-06, - "loss": 0.8608, - "step": 28970 - }, - { - "epoch": 0.8209640397857689, - "grad_norm": 0.0, - "learning_rate": 1.6346022812915141e-06, - "loss": 0.8173, - "step": 28971 - }, - { - "epoch": 0.8209923772280314, - "grad_norm": 0.0, - "learning_rate": 1.634099454091046e-06, - "loss": 0.7907, - "step": 28972 - }, - { - "epoch": 0.8210207146702938, - "grad_norm": 0.0, - "learning_rate": 1.6335966973596662e-06, - "loss": 0.8506, - "step": 28973 - }, - { - "epoch": 0.8210490521125563, - "grad_norm": 0.0, - "learning_rate": 1.6330940111016103e-06, - "loss": 0.7816, - "step": 28974 - }, - { - "epoch": 0.8210773895548188, - "grad_norm": 0.0, - "learning_rate": 1.632591395321107e-06, - "loss": 0.9137, - "step": 28975 - }, - { - "epoch": 0.8211057269970813, - "grad_norm": 0.0, - "learning_rate": 1.6320888500223975e-06, - "loss": 0.8108, - "step": 28976 - }, - { - "epoch": 0.8211340644393437, - "grad_norm": 0.0, - "learning_rate": 1.6315863752097071e-06, - "loss": 0.7457, - "step": 28977 - }, - { - "epoch": 0.8211624018816062, - "grad_norm": 0.0, - "learning_rate": 1.6310839708872738e-06, - "loss": 0.8892, - "step": 28978 - }, - { - "epoch": 0.8211907393238687, - "grad_norm": 0.0, - "learning_rate": 1.6305816370593263e-06, - "loss": 0.8286, - "step": 28979 - }, - { - "epoch": 0.821219076766131, - "grad_norm": 0.0, - "learning_rate": 1.630079373730099e-06, - "loss": 0.8281, - "step": 28980 - }, - { - "epoch": 0.8212474142083935, - "grad_norm": 0.0, - "learning_rate": 1.629577180903823e-06, - "loss": 0.835, - "step": 28981 - }, - { - "epoch": 0.821275751650656, - "grad_norm": 0.0, - "learning_rate": 1.6290750585847237e-06, - "loss": 0.827, - "step": 28982 - }, - { - "epoch": 0.8213040890929185, - "grad_norm": 0.0, - "learning_rate": 1.6285730067770344e-06, - "loss": 0.9044, - "step": 28983 - }, - { - "epoch": 0.8213324265351809, - "grad_norm": 0.0, - "learning_rate": 1.6280710254849862e-06, - "loss": 0.8045, - "step": 28984 - }, - { - "epoch": 0.8213607639774434, - "grad_norm": 0.0, - "learning_rate": 1.6275691147128014e-06, - "loss": 0.7732, - "step": 28985 - }, - { - "epoch": 0.8213891014197059, - "grad_norm": 0.0, - "learning_rate": 1.627067274464711e-06, - "loss": 0.7358, - "step": 28986 - }, - { - "epoch": 0.8214174388619683, - "grad_norm": 0.0, - "learning_rate": 1.6265655047449448e-06, - "loss": 0.7457, - "step": 28987 - }, - { - "epoch": 0.8214457763042308, - "grad_norm": 0.0, - "learning_rate": 1.6260638055577238e-06, - "loss": 0.8849, - "step": 28988 - }, - { - "epoch": 0.8214741137464933, - "grad_norm": 0.0, - "learning_rate": 1.6255621769072805e-06, - "loss": 0.8881, - "step": 28989 - }, - { - "epoch": 0.8215024511887558, - "grad_norm": 0.0, - "learning_rate": 1.6250606187978334e-06, - "loss": 0.7931, - "step": 28990 - }, - { - "epoch": 0.8215307886310181, - "grad_norm": 0.0, - "learning_rate": 1.6245591312336106e-06, - "loss": 0.8167, - "step": 28991 - }, - { - "epoch": 0.8215591260732806, - "grad_norm": 0.0, - "learning_rate": 1.6240577142188362e-06, - "loss": 0.8539, - "step": 28992 - }, - { - "epoch": 0.8215874635155431, - "grad_norm": 0.0, - "learning_rate": 1.623556367757735e-06, - "loss": 0.7707, - "step": 28993 - }, - { - "epoch": 0.8216158009578055, - "grad_norm": 0.0, - "learning_rate": 1.6230550918545286e-06, - "loss": 0.8289, - "step": 28994 - }, - { - "epoch": 0.821644138400068, - "grad_norm": 0.0, - "learning_rate": 1.6225538865134427e-06, - "loss": 0.792, - "step": 28995 - }, - { - "epoch": 0.8216724758423305, - "grad_norm": 0.0, - "learning_rate": 1.6220527517386931e-06, - "loss": 0.8506, - "step": 28996 - }, - { - "epoch": 0.8217008132845929, - "grad_norm": 0.0, - "learning_rate": 1.6215516875345073e-06, - "loss": 0.8215, - "step": 28997 - }, - { - "epoch": 0.8217291507268554, - "grad_norm": 0.0, - "learning_rate": 1.6210506939050996e-06, - "loss": 0.7953, - "step": 28998 - }, - { - "epoch": 0.8217574881691179, - "grad_norm": 0.0, - "learning_rate": 1.6205497708546936e-06, - "loss": 0.7771, - "step": 28999 - }, - { - "epoch": 0.8217858256113804, - "grad_norm": 0.0, - "learning_rate": 1.6200489183875112e-06, - "loss": 0.7816, - "step": 29000 - }, - { - "epoch": 0.8218141630536427, - "grad_norm": 0.0, - "learning_rate": 1.6195481365077658e-06, - "loss": 0.8289, - "step": 29001 - }, - { - "epoch": 0.8218425004959052, - "grad_norm": 0.0, - "learning_rate": 1.6190474252196819e-06, - "loss": 0.8992, - "step": 29002 - }, - { - "epoch": 0.8218708379381677, - "grad_norm": 0.0, - "learning_rate": 1.6185467845274704e-06, - "loss": 0.8206, - "step": 29003 - }, - { - "epoch": 0.8218991753804301, - "grad_norm": 0.0, - "learning_rate": 1.6180462144353526e-06, - "loss": 0.7723, - "step": 29004 - }, - { - "epoch": 0.8219275128226926, - "grad_norm": 0.0, - "learning_rate": 1.6175457149475427e-06, - "loss": 0.8106, - "step": 29005 - }, - { - "epoch": 0.8219558502649551, - "grad_norm": 0.0, - "learning_rate": 1.6170452860682595e-06, - "loss": 0.8176, - "step": 29006 - }, - { - "epoch": 0.8219841877072176, - "grad_norm": 0.0, - "learning_rate": 1.6165449278017153e-06, - "loss": 0.7368, - "step": 29007 - }, - { - "epoch": 0.82201252514948, - "grad_norm": 0.0, - "learning_rate": 1.61604464015213e-06, - "loss": 0.7601, - "step": 29008 - }, - { - "epoch": 0.8220408625917425, - "grad_norm": 0.0, - "learning_rate": 1.6155444231237106e-06, - "loss": 0.8006, - "step": 29009 - }, - { - "epoch": 0.822069200034005, - "grad_norm": 0.0, - "learning_rate": 1.6150442767206775e-06, - "loss": 0.8197, - "step": 29010 - }, - { - "epoch": 0.8220975374762673, - "grad_norm": 0.0, - "learning_rate": 1.6145442009472367e-06, - "loss": 0.8316, - "step": 29011 - }, - { - "epoch": 0.8221258749185298, - "grad_norm": 0.0, - "learning_rate": 1.6140441958076047e-06, - "loss": 0.825, - "step": 29012 - }, - { - "epoch": 0.8221542123607923, - "grad_norm": 0.0, - "learning_rate": 1.6135442613059936e-06, - "loss": 0.8958, - "step": 29013 - }, - { - "epoch": 0.8221825498030548, - "grad_norm": 0.0, - "learning_rate": 1.6130443974466126e-06, - "loss": 0.711, - "step": 29014 - }, - { - "epoch": 0.8222108872453172, - "grad_norm": 0.0, - "learning_rate": 1.6125446042336723e-06, - "loss": 0.8173, - "step": 29015 - }, - { - "epoch": 0.8222392246875797, - "grad_norm": 0.0, - "learning_rate": 1.6120448816713863e-06, - "loss": 0.8007, - "step": 29016 - }, - { - "epoch": 0.8222675621298422, - "grad_norm": 0.0, - "learning_rate": 1.6115452297639579e-06, - "loss": 0.8254, - "step": 29017 - }, - { - "epoch": 0.8222958995721046, - "grad_norm": 0.0, - "learning_rate": 1.611045648515599e-06, - "loss": 0.7705, - "step": 29018 - }, - { - "epoch": 0.8223242370143671, - "grad_norm": 0.0, - "learning_rate": 1.6105461379305187e-06, - "loss": 0.8101, - "step": 29019 - }, - { - "epoch": 0.8223525744566296, - "grad_norm": 0.0, - "learning_rate": 1.6100466980129226e-06, - "loss": 0.7583, - "step": 29020 - }, - { - "epoch": 0.822380911898892, - "grad_norm": 0.0, - "learning_rate": 1.6095473287670215e-06, - "loss": 0.8704, - "step": 29021 - }, - { - "epoch": 0.8224092493411544, - "grad_norm": 0.0, - "learning_rate": 1.6090480301970168e-06, - "loss": 0.8754, - "step": 29022 - }, - { - "epoch": 0.8224375867834169, - "grad_norm": 0.0, - "learning_rate": 1.608548802307116e-06, - "loss": 0.7704, - "step": 29023 - }, - { - "epoch": 0.8224659242256794, - "grad_norm": 0.0, - "learning_rate": 1.6080496451015282e-06, - "loss": 0.8331, - "step": 29024 - }, - { - "epoch": 0.8224942616679418, - "grad_norm": 0.0, - "learning_rate": 1.607550558584451e-06, - "loss": 0.7407, - "step": 29025 - }, - { - "epoch": 0.8225225991102043, - "grad_norm": 0.0, - "learning_rate": 1.6070515427600941e-06, - "loss": 0.8636, - "step": 29026 - }, - { - "epoch": 0.8225509365524668, - "grad_norm": 0.0, - "learning_rate": 1.6065525976326568e-06, - "loss": 0.9063, - "step": 29027 - }, - { - "epoch": 0.8225792739947292, - "grad_norm": 0.0, - "learning_rate": 1.6060537232063445e-06, - "loss": 0.7334, - "step": 29028 - }, - { - "epoch": 0.8226076114369917, - "grad_norm": 0.0, - "learning_rate": 1.6055549194853604e-06, - "loss": 0.993, - "step": 29029 - }, - { - "epoch": 0.8226359488792542, - "grad_norm": 0.0, - "learning_rate": 1.6050561864739012e-06, - "loss": 0.8695, - "step": 29030 - }, - { - "epoch": 0.8226642863215167, - "grad_norm": 0.0, - "learning_rate": 1.6045575241761724e-06, - "loss": 0.7433, - "step": 29031 - }, - { - "epoch": 0.822692623763779, - "grad_norm": 0.0, - "learning_rate": 1.604058932596373e-06, - "loss": 0.7265, - "step": 29032 - }, - { - "epoch": 0.8227209612060415, - "grad_norm": 0.0, - "learning_rate": 1.6035604117387028e-06, - "loss": 0.8355, - "step": 29033 - }, - { - "epoch": 0.822749298648304, - "grad_norm": 0.0, - "learning_rate": 1.6030619616073628e-06, - "loss": 0.8754, - "step": 29034 - }, - { - "epoch": 0.8227776360905664, - "grad_norm": 0.0, - "learning_rate": 1.602563582206549e-06, - "loss": 0.7799, - "step": 29035 - }, - { - "epoch": 0.8228059735328289, - "grad_norm": 0.0, - "learning_rate": 1.6020652735404595e-06, - "loss": 0.9523, - "step": 29036 - }, - { - "epoch": 0.8228343109750914, - "grad_norm": 0.0, - "learning_rate": 1.601567035613295e-06, - "loss": 0.7898, - "step": 29037 - }, - { - "epoch": 0.8228626484173539, - "grad_norm": 0.0, - "learning_rate": 1.6010688684292485e-06, - "loss": 0.7555, - "step": 29038 - }, - { - "epoch": 0.8228909858596163, - "grad_norm": 0.0, - "learning_rate": 1.600570771992519e-06, - "loss": 0.8185, - "step": 29039 - }, - { - "epoch": 0.8229193233018788, - "grad_norm": 0.0, - "learning_rate": 1.6000727463072995e-06, - "loss": 0.8656, - "step": 29040 - }, - { - "epoch": 0.8229476607441413, - "grad_norm": 0.0, - "learning_rate": 1.5995747913777858e-06, - "loss": 0.816, - "step": 29041 - }, - { - "epoch": 0.8229759981864037, - "grad_norm": 0.0, - "learning_rate": 1.5990769072081758e-06, - "loss": 0.7194, - "step": 29042 - }, - { - "epoch": 0.8230043356286662, - "grad_norm": 0.0, - "learning_rate": 1.5985790938026591e-06, - "loss": 0.769, - "step": 29043 - }, - { - "epoch": 0.8230326730709286, - "grad_norm": 0.0, - "learning_rate": 1.5980813511654291e-06, - "loss": 0.7992, - "step": 29044 - }, - { - "epoch": 0.823061010513191, - "grad_norm": 0.0, - "learning_rate": 1.5975836793006805e-06, - "loss": 0.8527, - "step": 29045 - }, - { - "epoch": 0.8230893479554535, - "grad_norm": 0.0, - "learning_rate": 1.5970860782126053e-06, - "loss": 0.8666, - "step": 29046 - }, - { - "epoch": 0.823117685397716, - "grad_norm": 0.0, - "learning_rate": 1.5965885479053956e-06, - "loss": 0.7603, - "step": 29047 - }, - { - "epoch": 0.8231460228399785, - "grad_norm": 0.0, - "learning_rate": 1.5960910883832391e-06, - "loss": 0.8501, - "step": 29048 - }, - { - "epoch": 0.8231743602822409, - "grad_norm": 0.0, - "learning_rate": 1.5955936996503285e-06, - "loss": 0.8583, - "step": 29049 - }, - { - "epoch": 0.8232026977245034, - "grad_norm": 0.0, - "learning_rate": 1.5950963817108545e-06, - "loss": 0.7612, - "step": 29050 - }, - { - "epoch": 0.8232310351667659, - "grad_norm": 0.0, - "learning_rate": 1.5945991345690037e-06, - "loss": 0.7446, - "step": 29051 - }, - { - "epoch": 0.8232593726090283, - "grad_norm": 0.0, - "learning_rate": 1.594101958228965e-06, - "loss": 0.7736, - "step": 29052 - }, - { - "epoch": 0.8232877100512908, - "grad_norm": 0.0, - "learning_rate": 1.5936048526949288e-06, - "loss": 0.8291, - "step": 29053 - }, - { - "epoch": 0.8233160474935532, - "grad_norm": 0.0, - "learning_rate": 1.5931078179710791e-06, - "loss": 0.802, - "step": 29054 - }, - { - "epoch": 0.8233443849358157, - "grad_norm": 0.0, - "learning_rate": 1.592610854061606e-06, - "loss": 0.8403, - "step": 29055 - }, - { - "epoch": 0.8233727223780781, - "grad_norm": 0.0, - "learning_rate": 1.5921139609706915e-06, - "loss": 0.8961, - "step": 29056 - }, - { - "epoch": 0.8234010598203406, - "grad_norm": 0.0, - "learning_rate": 1.5916171387025237e-06, - "loss": 0.7959, - "step": 29057 - }, - { - "epoch": 0.8234293972626031, - "grad_norm": 0.0, - "learning_rate": 1.5911203872612858e-06, - "loss": 0.7842, - "step": 29058 - }, - { - "epoch": 0.8234577347048655, - "grad_norm": 0.0, - "learning_rate": 1.5906237066511643e-06, - "loss": 0.8989, - "step": 29059 - }, - { - "epoch": 0.823486072147128, - "grad_norm": 0.0, - "learning_rate": 1.590127096876345e-06, - "loss": 0.7945, - "step": 29060 - }, - { - "epoch": 0.8235144095893905, - "grad_norm": 0.0, - "learning_rate": 1.5896305579410042e-06, - "loss": 0.8693, - "step": 29061 - }, - { - "epoch": 0.823542747031653, - "grad_norm": 0.0, - "learning_rate": 1.5891340898493302e-06, - "loss": 0.8185, - "step": 29062 - }, - { - "epoch": 0.8235710844739154, - "grad_norm": 0.0, - "learning_rate": 1.5886376926055037e-06, - "loss": 0.9116, - "step": 29063 - }, - { - "epoch": 0.8235994219161779, - "grad_norm": 0.0, - "learning_rate": 1.5881413662137047e-06, - "loss": 0.8405, - "step": 29064 - }, - { - "epoch": 0.8236277593584403, - "grad_norm": 0.0, - "learning_rate": 1.5876451106781132e-06, - "loss": 0.7672, - "step": 29065 - }, - { - "epoch": 0.8236560968007027, - "grad_norm": 0.0, - "learning_rate": 1.5871489260029138e-06, - "loss": 0.9601, - "step": 29066 - }, - { - "epoch": 0.8236844342429652, - "grad_norm": 0.0, - "learning_rate": 1.5866528121922808e-06, - "loss": 0.8777, - "step": 29067 - }, - { - "epoch": 0.8237127716852277, - "grad_norm": 0.0, - "learning_rate": 1.5861567692503977e-06, - "loss": 0.9039, - "step": 29068 - }, - { - "epoch": 0.8237411091274901, - "grad_norm": 0.0, - "learning_rate": 1.5856607971814375e-06, - "loss": 0.7304, - "step": 29069 - }, - { - "epoch": 0.8237694465697526, - "grad_norm": 0.0, - "learning_rate": 1.5851648959895815e-06, - "loss": 0.8108, - "step": 29070 - }, - { - "epoch": 0.8237977840120151, - "grad_norm": 0.0, - "learning_rate": 1.5846690656790066e-06, - "loss": 0.8274, - "step": 29071 - }, - { - "epoch": 0.8238261214542776, - "grad_norm": 0.0, - "learning_rate": 1.5841733062538877e-06, - "loss": 0.8845, - "step": 29072 - }, - { - "epoch": 0.82385445889654, - "grad_norm": 0.0, - "learning_rate": 1.5836776177184044e-06, - "loss": 0.8495, - "step": 29073 - }, - { - "epoch": 0.8238827963388025, - "grad_norm": 0.0, - "learning_rate": 1.5831820000767307e-06, - "loss": 0.7887, - "step": 29074 - }, - { - "epoch": 0.823911133781065, - "grad_norm": 0.0, - "learning_rate": 1.5826864533330387e-06, - "loss": 0.7769, - "step": 29075 - }, - { - "epoch": 0.8239394712233273, - "grad_norm": 0.0, - "learning_rate": 1.5821909774915068e-06, - "loss": 0.8536, - "step": 29076 - }, - { - "epoch": 0.8239678086655898, - "grad_norm": 0.0, - "learning_rate": 1.5816955725563031e-06, - "loss": 0.7775, - "step": 29077 - }, - { - "epoch": 0.8239961461078523, - "grad_norm": 0.0, - "learning_rate": 1.5812002385316038e-06, - "loss": 0.7687, - "step": 29078 - }, - { - "epoch": 0.8240244835501148, - "grad_norm": 0.0, - "learning_rate": 1.580704975421584e-06, - "loss": 0.8343, - "step": 29079 - }, - { - "epoch": 0.8240528209923772, - "grad_norm": 0.0, - "learning_rate": 1.580209783230411e-06, - "loss": 0.8076, - "step": 29080 - }, - { - "epoch": 0.8240811584346397, - "grad_norm": 0.0, - "learning_rate": 1.5797146619622561e-06, - "loss": 0.8062, - "step": 29081 - }, - { - "epoch": 0.8241094958769022, - "grad_norm": 0.0, - "learning_rate": 1.5792196116212944e-06, - "loss": 0.854, - "step": 29082 - }, - { - "epoch": 0.8241378333191646, - "grad_norm": 0.0, - "learning_rate": 1.5787246322116911e-06, - "loss": 0.8744, - "step": 29083 - }, - { - "epoch": 0.8241661707614271, - "grad_norm": 0.0, - "learning_rate": 1.5782297237376165e-06, - "loss": 0.8188, - "step": 29084 - }, - { - "epoch": 0.8241945082036896, - "grad_norm": 0.0, - "learning_rate": 1.5777348862032405e-06, - "loss": 0.7489, - "step": 29085 - }, - { - "epoch": 0.824222845645952, - "grad_norm": 0.0, - "learning_rate": 1.5772401196127318e-06, - "loss": 0.6846, - "step": 29086 - }, - { - "epoch": 0.8242511830882144, - "grad_norm": 0.0, - "learning_rate": 1.5767454239702585e-06, - "loss": 0.8402, - "step": 29087 - }, - { - "epoch": 0.8242795205304769, - "grad_norm": 0.0, - "learning_rate": 1.5762507992799846e-06, - "loss": 0.7904, - "step": 29088 - }, - { - "epoch": 0.8243078579727394, - "grad_norm": 0.0, - "learning_rate": 1.5757562455460807e-06, - "loss": 0.7488, - "step": 29089 - }, - { - "epoch": 0.8243361954150018, - "grad_norm": 0.0, - "learning_rate": 1.5752617627727085e-06, - "loss": 0.8914, - "step": 29090 - }, - { - "epoch": 0.8243645328572643, - "grad_norm": 0.0, - "learning_rate": 1.5747673509640337e-06, - "loss": 0.8985, - "step": 29091 - }, - { - "epoch": 0.8243928702995268, - "grad_norm": 0.0, - "learning_rate": 1.5742730101242255e-06, - "loss": 0.7765, - "step": 29092 - }, - { - "epoch": 0.8244212077417892, - "grad_norm": 0.0, - "learning_rate": 1.573778740257441e-06, - "loss": 0.7649, - "step": 29093 - }, - { - "epoch": 0.8244495451840517, - "grad_norm": 0.0, - "learning_rate": 1.5732845413678477e-06, - "loss": 0.741, - "step": 29094 - }, - { - "epoch": 0.8244778826263142, - "grad_norm": 0.0, - "learning_rate": 1.5727904134596084e-06, - "loss": 0.8535, - "step": 29095 - }, - { - "epoch": 0.8245062200685767, - "grad_norm": 0.0, - "learning_rate": 1.5722963565368864e-06, - "loss": 0.8923, - "step": 29096 - }, - { - "epoch": 0.824534557510839, - "grad_norm": 0.0, - "learning_rate": 1.5718023706038399e-06, - "loss": 0.8053, - "step": 29097 - }, - { - "epoch": 0.8245628949531015, - "grad_norm": 0.0, - "learning_rate": 1.5713084556646318e-06, - "loss": 0.874, - "step": 29098 - }, - { - "epoch": 0.824591232395364, - "grad_norm": 0.0, - "learning_rate": 1.5708146117234225e-06, - "loss": 0.798, - "step": 29099 - }, - { - "epoch": 0.8246195698376264, - "grad_norm": 0.0, - "learning_rate": 1.5703208387843737e-06, - "loss": 0.8972, - "step": 29100 - }, - { - "epoch": 0.8246479072798889, - "grad_norm": 0.0, - "learning_rate": 1.5698271368516416e-06, - "loss": 0.6942, - "step": 29101 - }, - { - "epoch": 0.8246762447221514, - "grad_norm": 0.0, - "learning_rate": 1.5693335059293845e-06, - "loss": 0.6882, - "step": 29102 - }, - { - "epoch": 0.8247045821644139, - "grad_norm": 0.0, - "learning_rate": 1.5688399460217651e-06, - "loss": 0.8751, - "step": 29103 - }, - { - "epoch": 0.8247329196066763, - "grad_norm": 0.0, - "learning_rate": 1.5683464571329354e-06, - "loss": 0.8353, - "step": 29104 - }, - { - "epoch": 0.8247612570489388, - "grad_norm": 0.0, - "learning_rate": 1.5678530392670566e-06, - "loss": 0.8624, - "step": 29105 - }, - { - "epoch": 0.8247895944912013, - "grad_norm": 0.0, - "learning_rate": 1.5673596924282807e-06, - "loss": 0.808, - "step": 29106 - }, - { - "epoch": 0.8248179319334636, - "grad_norm": 0.0, - "learning_rate": 1.5668664166207647e-06, - "loss": 0.7996, - "step": 29107 - }, - { - "epoch": 0.8248462693757261, - "grad_norm": 0.0, - "learning_rate": 1.5663732118486653e-06, - "loss": 0.7492, - "step": 29108 - }, - { - "epoch": 0.8248746068179886, - "grad_norm": 0.0, - "learning_rate": 1.5658800781161365e-06, - "loss": 0.6582, - "step": 29109 - }, - { - "epoch": 0.824902944260251, - "grad_norm": 0.0, - "learning_rate": 1.565387015427331e-06, - "loss": 0.7079, - "step": 29110 - }, - { - "epoch": 0.8249312817025135, - "grad_norm": 0.0, - "learning_rate": 1.5648940237864051e-06, - "loss": 0.852, - "step": 29111 - }, - { - "epoch": 0.824959619144776, - "grad_norm": 0.0, - "learning_rate": 1.564401103197507e-06, - "loss": 0.8451, - "step": 29112 - }, - { - "epoch": 0.8249879565870385, - "grad_norm": 0.0, - "learning_rate": 1.5639082536647931e-06, - "loss": 0.8469, - "step": 29113 - }, - { - "epoch": 0.8250162940293009, - "grad_norm": 0.0, - "learning_rate": 1.5634154751924102e-06, - "loss": 0.7623, - "step": 29114 - }, - { - "epoch": 0.8250446314715634, - "grad_norm": 0.0, - "learning_rate": 1.5629227677845115e-06, - "loss": 0.9183, - "step": 29115 - }, - { - "epoch": 0.8250729689138259, - "grad_norm": 0.0, - "learning_rate": 1.5624301314452505e-06, - "loss": 0.7941, - "step": 29116 - }, - { - "epoch": 0.8251013063560882, - "grad_norm": 0.0, - "learning_rate": 1.5619375661787705e-06, - "loss": 0.7831, - "step": 29117 - }, - { - "epoch": 0.8251296437983507, - "grad_norm": 0.0, - "learning_rate": 1.561445071989226e-06, - "loss": 0.8541, - "step": 29118 - }, - { - "epoch": 0.8251579812406132, - "grad_norm": 0.0, - "learning_rate": 1.5609526488807613e-06, - "loss": 0.8873, - "step": 29119 - }, - { - "epoch": 0.8251863186828757, - "grad_norm": 0.0, - "learning_rate": 1.5604602968575267e-06, - "loss": 0.9124, - "step": 29120 - }, - { - "epoch": 0.8252146561251381, - "grad_norm": 0.0, - "learning_rate": 1.5599680159236685e-06, - "loss": 0.7274, - "step": 29121 - }, - { - "epoch": 0.8252429935674006, - "grad_norm": 0.0, - "learning_rate": 1.5594758060833336e-06, - "loss": 0.8979, - "step": 29122 - }, - { - "epoch": 0.8252713310096631, - "grad_norm": 0.0, - "learning_rate": 1.5589836673406688e-06, - "loss": 0.8229, - "step": 29123 - }, - { - "epoch": 0.8252996684519255, - "grad_norm": 0.0, - "learning_rate": 1.5584915996998217e-06, - "loss": 0.7966, - "step": 29124 - }, - { - "epoch": 0.825328005894188, - "grad_norm": 0.0, - "learning_rate": 1.5579996031649314e-06, - "loss": 0.7104, - "step": 29125 - }, - { - "epoch": 0.8253563433364505, - "grad_norm": 0.0, - "learning_rate": 1.5575076777401477e-06, - "loss": 0.8544, - "step": 29126 - }, - { - "epoch": 0.825384680778713, - "grad_norm": 0.0, - "learning_rate": 1.5570158234296096e-06, - "loss": 0.7505, - "step": 29127 - }, - { - "epoch": 0.8254130182209753, - "grad_norm": 0.0, - "learning_rate": 1.5565240402374625e-06, - "loss": 0.7462, - "step": 29128 - }, - { - "epoch": 0.8254413556632378, - "grad_norm": 0.0, - "learning_rate": 1.5560323281678514e-06, - "loss": 0.8001, - "step": 29129 - }, - { - "epoch": 0.8254696931055003, - "grad_norm": 0.0, - "learning_rate": 1.5555406872249134e-06, - "loss": 0.8008, - "step": 29130 - }, - { - "epoch": 0.8254980305477627, - "grad_norm": 0.0, - "learning_rate": 1.5550491174127913e-06, - "loss": 0.8218, - "step": 29131 - }, - { - "epoch": 0.8255263679900252, - "grad_norm": 0.0, - "learning_rate": 1.5545576187356292e-06, - "loss": 0.9484, - "step": 29132 - }, - { - "epoch": 0.8255547054322877, - "grad_norm": 0.0, - "learning_rate": 1.554066191197562e-06, - "loss": 0.7511, - "step": 29133 - }, - { - "epoch": 0.8255830428745501, - "grad_norm": 0.0, - "learning_rate": 1.5535748348027312e-06, - "loss": 0.81, - "step": 29134 - }, - { - "epoch": 0.8256113803168126, - "grad_norm": 0.0, - "learning_rate": 1.5530835495552764e-06, - "loss": 0.847, - "step": 29135 - }, - { - "epoch": 0.8256397177590751, - "grad_norm": 0.0, - "learning_rate": 1.5525923354593354e-06, - "loss": 0.7273, - "step": 29136 - }, - { - "epoch": 0.8256680552013376, - "grad_norm": 0.0, - "learning_rate": 1.5521011925190487e-06, - "loss": 0.8037, - "step": 29137 - }, - { - "epoch": 0.8256963926436, - "grad_norm": 0.0, - "learning_rate": 1.5516101207385481e-06, - "loss": 0.7237, - "step": 29138 - }, - { - "epoch": 0.8257247300858624, - "grad_norm": 0.0, - "learning_rate": 1.5511191201219733e-06, - "loss": 0.7118, - "step": 29139 - }, - { - "epoch": 0.8257530675281249, - "grad_norm": 0.0, - "learning_rate": 1.550628190673461e-06, - "loss": 0.7528, - "step": 29140 - }, - { - "epoch": 0.8257814049703873, - "grad_norm": 0.0, - "learning_rate": 1.5501373323971436e-06, - "loss": 0.7278, - "step": 29141 - }, - { - "epoch": 0.8258097424126498, - "grad_norm": 0.0, - "learning_rate": 1.5496465452971588e-06, - "loss": 0.8076, - "step": 29142 - }, - { - "epoch": 0.8258380798549123, - "grad_norm": 0.0, - "learning_rate": 1.5491558293776377e-06, - "loss": 0.8311, - "step": 29143 - }, - { - "epoch": 0.8258664172971748, - "grad_norm": 0.0, - "learning_rate": 1.5486651846427136e-06, - "loss": 0.8403, - "step": 29144 - }, - { - "epoch": 0.8258947547394372, - "grad_norm": 0.0, - "learning_rate": 1.5481746110965245e-06, - "loss": 0.847, - "step": 29145 - }, - { - "epoch": 0.8259230921816997, - "grad_norm": 0.0, - "learning_rate": 1.547684108743197e-06, - "loss": 0.8592, - "step": 29146 - }, - { - "epoch": 0.8259514296239622, - "grad_norm": 0.0, - "learning_rate": 1.5471936775868645e-06, - "loss": 0.8078, - "step": 29147 - }, - { - "epoch": 0.8259797670662246, - "grad_norm": 0.0, - "learning_rate": 1.5467033176316581e-06, - "loss": 0.8993, - "step": 29148 - }, - { - "epoch": 0.826008104508487, - "grad_norm": 0.0, - "learning_rate": 1.546213028881709e-06, - "loss": 0.78, - "step": 29149 - }, - { - "epoch": 0.8260364419507495, - "grad_norm": 0.0, - "learning_rate": 1.5457228113411492e-06, - "loss": 0.8136, - "step": 29150 - }, - { - "epoch": 0.826064779393012, - "grad_norm": 0.0, - "learning_rate": 1.5452326650141036e-06, - "loss": 0.8547, - "step": 29151 - }, - { - "epoch": 0.8260931168352744, - "grad_norm": 0.0, - "learning_rate": 1.544742589904703e-06, - "loss": 0.909, - "step": 29152 - }, - { - "epoch": 0.8261214542775369, - "grad_norm": 0.0, - "learning_rate": 1.5442525860170776e-06, - "loss": 0.8721, - "step": 29153 - }, - { - "epoch": 0.8261497917197994, - "grad_norm": 0.0, - "learning_rate": 1.5437626533553497e-06, - "loss": 0.848, - "step": 29154 - }, - { - "epoch": 0.8261781291620618, - "grad_norm": 0.0, - "learning_rate": 1.5432727919236513e-06, - "loss": 0.8619, - "step": 29155 - }, - { - "epoch": 0.8262064666043243, - "grad_norm": 0.0, - "learning_rate": 1.5427830017261047e-06, - "loss": 0.7931, - "step": 29156 - }, - { - "epoch": 0.8262348040465868, - "grad_norm": 0.0, - "learning_rate": 1.542293282766838e-06, - "loss": 0.8697, - "step": 29157 - }, - { - "epoch": 0.8262631414888492, - "grad_norm": 0.0, - "learning_rate": 1.5418036350499766e-06, - "loss": 0.7746, - "step": 29158 - }, - { - "epoch": 0.8262914789311117, - "grad_norm": 0.0, - "learning_rate": 1.5413140585796426e-06, - "loss": 0.8353, - "step": 29159 - }, - { - "epoch": 0.8263198163733741, - "grad_norm": 0.0, - "learning_rate": 1.5408245533599608e-06, - "loss": 0.8408, - "step": 29160 - }, - { - "epoch": 0.8263481538156366, - "grad_norm": 0.0, - "learning_rate": 1.5403351193950554e-06, - "loss": 0.7492, - "step": 29161 - }, - { - "epoch": 0.826376491257899, - "grad_norm": 0.0, - "learning_rate": 1.539845756689049e-06, - "loss": 0.8481, - "step": 29162 - }, - { - "epoch": 0.8264048287001615, - "grad_norm": 0.0, - "learning_rate": 1.5393564652460658e-06, - "loss": 0.9007, - "step": 29163 - }, - { - "epoch": 0.826433166142424, - "grad_norm": 0.0, - "learning_rate": 1.5388672450702214e-06, - "loss": 0.6985, - "step": 29164 - }, - { - "epoch": 0.8264615035846864, - "grad_norm": 0.0, - "learning_rate": 1.5383780961656414e-06, - "loss": 0.7687, - "step": 29165 - }, - { - "epoch": 0.8264898410269489, - "grad_norm": 0.0, - "learning_rate": 1.537889018536447e-06, - "loss": 0.8342, - "step": 29166 - }, - { - "epoch": 0.8265181784692114, - "grad_norm": 0.0, - "learning_rate": 1.5374000121867527e-06, - "loss": 0.9037, - "step": 29167 - }, - { - "epoch": 0.8265465159114739, - "grad_norm": 0.0, - "learning_rate": 1.5369110771206807e-06, - "loss": 0.8946, - "step": 29168 - }, - { - "epoch": 0.8265748533537363, - "grad_norm": 0.0, - "learning_rate": 1.5364222133423523e-06, - "loss": 0.7764, - "step": 29169 - }, - { - "epoch": 0.8266031907959988, - "grad_norm": 0.0, - "learning_rate": 1.5359334208558797e-06, - "loss": 0.7613, - "step": 29170 - }, - { - "epoch": 0.8266315282382612, - "grad_norm": 0.0, - "learning_rate": 1.535444699665386e-06, - "loss": 0.6652, - "step": 29171 - }, - { - "epoch": 0.8266598656805236, - "grad_norm": 0.0, - "learning_rate": 1.5349560497749816e-06, - "loss": 0.8198, - "step": 29172 - }, - { - "epoch": 0.8266882031227861, - "grad_norm": 0.0, - "learning_rate": 1.5344674711887864e-06, - "loss": 0.7459, - "step": 29173 - }, - { - "epoch": 0.8267165405650486, - "grad_norm": 0.0, - "learning_rate": 1.5339789639109148e-06, - "loss": 0.785, - "step": 29174 - }, - { - "epoch": 0.8267448780073111, - "grad_norm": 0.0, - "learning_rate": 1.5334905279454826e-06, - "loss": 0.7225, - "step": 29175 - }, - { - "epoch": 0.8267732154495735, - "grad_norm": 0.0, - "learning_rate": 1.5330021632966052e-06, - "loss": 0.842, - "step": 29176 - }, - { - "epoch": 0.826801552891836, - "grad_norm": 0.0, - "learning_rate": 1.5325138699683928e-06, - "loss": 0.7005, - "step": 29177 - }, - { - "epoch": 0.8268298903340985, - "grad_norm": 0.0, - "learning_rate": 1.532025647964961e-06, - "loss": 0.7359, - "step": 29178 - }, - { - "epoch": 0.8268582277763609, - "grad_norm": 0.0, - "learning_rate": 1.531537497290424e-06, - "loss": 0.8873, - "step": 29179 - }, - { - "epoch": 0.8268865652186234, - "grad_norm": 0.0, - "learning_rate": 1.5310494179488876e-06, - "loss": 0.7912, - "step": 29180 - }, - { - "epoch": 0.8269149026608859, - "grad_norm": 0.0, - "learning_rate": 1.5305614099444687e-06, - "loss": 0.8254, - "step": 29181 - }, - { - "epoch": 0.8269432401031482, - "grad_norm": 0.0, - "learning_rate": 1.5300734732812772e-06, - "loss": 0.8701, - "step": 29182 - }, - { - "epoch": 0.8269715775454107, - "grad_norm": 0.0, - "learning_rate": 1.5295856079634196e-06, - "loss": 0.8143, - "step": 29183 - }, - { - "epoch": 0.8269999149876732, - "grad_norm": 0.0, - "learning_rate": 1.5290978139950108e-06, - "loss": 0.7733, - "step": 29184 - }, - { - "epoch": 0.8270282524299357, - "grad_norm": 0.0, - "learning_rate": 1.5286100913801549e-06, - "loss": 0.7995, - "step": 29185 - }, - { - "epoch": 0.8270565898721981, - "grad_norm": 0.0, - "learning_rate": 1.5281224401229611e-06, - "loss": 0.8582, - "step": 29186 - }, - { - "epoch": 0.8270849273144606, - "grad_norm": 0.0, - "learning_rate": 1.527634860227538e-06, - "loss": 0.8192, - "step": 29187 - }, - { - "epoch": 0.8271132647567231, - "grad_norm": 0.0, - "learning_rate": 1.527147351697993e-06, - "loss": 0.6878, - "step": 29188 - }, - { - "epoch": 0.8271416021989855, - "grad_norm": 0.0, - "learning_rate": 1.526659914538432e-06, - "loss": 0.8349, - "step": 29189 - }, - { - "epoch": 0.827169939641248, - "grad_norm": 0.0, - "learning_rate": 1.5261725487529632e-06, - "loss": 0.8077, - "step": 29190 - }, - { - "epoch": 0.8271982770835105, - "grad_norm": 0.0, - "learning_rate": 1.5256852543456868e-06, - "loss": 0.7917, - "step": 29191 - }, - { - "epoch": 0.827226614525773, - "grad_norm": 0.0, - "learning_rate": 1.5251980313207138e-06, - "loss": 0.7822, - "step": 29192 - }, - { - "epoch": 0.8272549519680353, - "grad_norm": 0.0, - "learning_rate": 1.5247108796821418e-06, - "loss": 0.7814, - "step": 29193 - }, - { - "epoch": 0.8272832894102978, - "grad_norm": 0.0, - "learning_rate": 1.5242237994340768e-06, - "loss": 0.8028, - "step": 29194 - }, - { - "epoch": 0.8273116268525603, - "grad_norm": 0.0, - "learning_rate": 1.5237367905806256e-06, - "loss": 0.8138, - "step": 29195 - }, - { - "epoch": 0.8273399642948227, - "grad_norm": 0.0, - "learning_rate": 1.5232498531258843e-06, - "loss": 0.7753, - "step": 29196 - }, - { - "epoch": 0.8273683017370852, - "grad_norm": 0.0, - "learning_rate": 1.522762987073957e-06, - "loss": 0.828, - "step": 29197 - }, - { - "epoch": 0.8273966391793477, - "grad_norm": 0.0, - "learning_rate": 1.5222761924289475e-06, - "loss": 0.8853, - "step": 29198 - }, - { - "epoch": 0.8274249766216102, - "grad_norm": 0.0, - "learning_rate": 1.521789469194952e-06, - "loss": 0.8439, - "step": 29199 - }, - { - "epoch": 0.8274533140638726, - "grad_norm": 0.0, - "learning_rate": 1.5213028173760713e-06, - "loss": 0.883, - "step": 29200 - }, - { - "epoch": 0.8274816515061351, - "grad_norm": 0.0, - "learning_rate": 1.5208162369764057e-06, - "loss": 0.7949, - "step": 29201 - }, - { - "epoch": 0.8275099889483976, - "grad_norm": 0.0, - "learning_rate": 1.520329728000054e-06, - "loss": 0.7894, - "step": 29202 - }, - { - "epoch": 0.8275383263906599, - "grad_norm": 0.0, - "learning_rate": 1.519843290451115e-06, - "loss": 0.8647, - "step": 29203 - }, - { - "epoch": 0.8275666638329224, - "grad_norm": 0.0, - "learning_rate": 1.5193569243336836e-06, - "loss": 0.7841, - "step": 29204 - }, - { - "epoch": 0.8275950012751849, - "grad_norm": 0.0, - "learning_rate": 1.5188706296518607e-06, - "loss": 0.8262, - "step": 29205 - }, - { - "epoch": 0.8276233387174473, - "grad_norm": 0.0, - "learning_rate": 1.5183844064097364e-06, - "loss": 0.7831, - "step": 29206 - }, - { - "epoch": 0.8276516761597098, - "grad_norm": 0.0, - "learning_rate": 1.5178982546114107e-06, - "loss": 0.7076, - "step": 29207 - }, - { - "epoch": 0.8276800136019723, - "grad_norm": 0.0, - "learning_rate": 1.5174121742609804e-06, - "loss": 0.8449, - "step": 29208 - }, - { - "epoch": 0.8277083510442348, - "grad_norm": 0.0, - "learning_rate": 1.5169261653625345e-06, - "loss": 0.8502, - "step": 29209 - }, - { - "epoch": 0.8277366884864972, - "grad_norm": 0.0, - "learning_rate": 1.5164402279201695e-06, - "loss": 0.7338, - "step": 29210 - }, - { - "epoch": 0.8277650259287597, - "grad_norm": 0.0, - "learning_rate": 1.5159543619379803e-06, - "loss": 0.8797, - "step": 29211 - }, - { - "epoch": 0.8277933633710222, - "grad_norm": 0.0, - "learning_rate": 1.5154685674200565e-06, - "loss": 0.8089, - "step": 29212 - }, - { - "epoch": 0.8278217008132845, - "grad_norm": 0.0, - "learning_rate": 1.5149828443704917e-06, - "loss": 0.7857, - "step": 29213 - }, - { - "epoch": 0.827850038255547, - "grad_norm": 0.0, - "learning_rate": 1.514497192793377e-06, - "loss": 0.8117, - "step": 29214 - }, - { - "epoch": 0.8278783756978095, - "grad_norm": 0.0, - "learning_rate": 1.514011612692804e-06, - "loss": 0.8171, - "step": 29215 - }, - { - "epoch": 0.827906713140072, - "grad_norm": 0.0, - "learning_rate": 1.5135261040728643e-06, - "loss": 0.807, - "step": 29216 - }, - { - "epoch": 0.8279350505823344, - "grad_norm": 0.0, - "learning_rate": 1.513040666937643e-06, - "loss": 0.7568, - "step": 29217 - }, - { - "epoch": 0.8279633880245969, - "grad_norm": 0.0, - "learning_rate": 1.512555301291232e-06, - "loss": 0.7379, - "step": 29218 - }, - { - "epoch": 0.8279917254668594, - "grad_norm": 0.0, - "learning_rate": 1.5120700071377215e-06, - "loss": 0.8669, - "step": 29219 - }, - { - "epoch": 0.8280200629091218, - "grad_norm": 0.0, - "learning_rate": 1.511584784481196e-06, - "loss": 0.7588, - "step": 29220 - }, - { - "epoch": 0.8280484003513843, - "grad_norm": 0.0, - "learning_rate": 1.5110996333257454e-06, - "loss": 0.7675, - "step": 29221 - }, - { - "epoch": 0.8280767377936468, - "grad_norm": 0.0, - "learning_rate": 1.5106145536754524e-06, - "loss": 0.798, - "step": 29222 - }, - { - "epoch": 0.8281050752359093, - "grad_norm": 0.0, - "learning_rate": 1.5101295455344057e-06, - "loss": 0.7751, - "step": 29223 - }, - { - "epoch": 0.8281334126781716, - "grad_norm": 0.0, - "learning_rate": 1.509644608906693e-06, - "loss": 0.744, - "step": 29224 - }, - { - "epoch": 0.8281617501204341, - "grad_norm": 0.0, - "learning_rate": 1.5091597437963934e-06, - "loss": 0.768, - "step": 29225 - }, - { - "epoch": 0.8281900875626966, - "grad_norm": 0.0, - "learning_rate": 1.5086749502075949e-06, - "loss": 0.7774, - "step": 29226 - }, - { - "epoch": 0.828218425004959, - "grad_norm": 0.0, - "learning_rate": 1.5081902281443805e-06, - "loss": 0.8669, - "step": 29227 - }, - { - "epoch": 0.8282467624472215, - "grad_norm": 0.0, - "learning_rate": 1.507705577610833e-06, - "loss": 0.8398, - "step": 29228 - }, - { - "epoch": 0.828275099889484, - "grad_norm": 0.0, - "learning_rate": 1.5072209986110376e-06, - "loss": 0.7038, - "step": 29229 - }, - { - "epoch": 0.8283034373317464, - "grad_norm": 0.0, - "learning_rate": 1.5067364911490713e-06, - "loss": 0.8438, - "step": 29230 - }, - { - "epoch": 0.8283317747740089, - "grad_norm": 0.0, - "learning_rate": 1.506252055229016e-06, - "loss": 0.7686, - "step": 29231 - }, - { - "epoch": 0.8283601122162714, - "grad_norm": 0.0, - "learning_rate": 1.505767690854958e-06, - "loss": 0.8237, - "step": 29232 - }, - { - "epoch": 0.8283884496585339, - "grad_norm": 0.0, - "learning_rate": 1.505283398030969e-06, - "loss": 0.7595, - "step": 29233 - }, - { - "epoch": 0.8284167871007962, - "grad_norm": 0.0, - "learning_rate": 1.504799176761136e-06, - "loss": 0.8572, - "step": 29234 - }, - { - "epoch": 0.8284451245430587, - "grad_norm": 0.0, - "learning_rate": 1.504315027049531e-06, - "loss": 0.8026, - "step": 29235 - }, - { - "epoch": 0.8284734619853212, - "grad_norm": 0.0, - "learning_rate": 1.5038309489002357e-06, - "loss": 0.7882, - "step": 29236 - }, - { - "epoch": 0.8285017994275836, - "grad_norm": 0.0, - "learning_rate": 1.5033469423173298e-06, - "loss": 0.7784, - "step": 29237 - }, - { - "epoch": 0.8285301368698461, - "grad_norm": 0.0, - "learning_rate": 1.5028630073048844e-06, - "loss": 0.8146, - "step": 29238 - }, - { - "epoch": 0.8285584743121086, - "grad_norm": 0.0, - "learning_rate": 1.50237914386698e-06, - "loss": 0.8599, - "step": 29239 - }, - { - "epoch": 0.8285868117543711, - "grad_norm": 0.0, - "learning_rate": 1.5018953520076917e-06, - "loss": 0.7885, - "step": 29240 - }, - { - "epoch": 0.8286151491966335, - "grad_norm": 0.0, - "learning_rate": 1.5014116317310946e-06, - "loss": 0.7833, - "step": 29241 - }, - { - "epoch": 0.828643486638896, - "grad_norm": 0.0, - "learning_rate": 1.5009279830412648e-06, - "loss": 0.7657, - "step": 29242 - }, - { - "epoch": 0.8286718240811585, - "grad_norm": 0.0, - "learning_rate": 1.500444405942273e-06, - "loss": 0.7644, - "step": 29243 - }, - { - "epoch": 0.8287001615234209, - "grad_norm": 0.0, - "learning_rate": 1.4999609004381944e-06, - "loss": 0.7538, - "step": 29244 - }, - { - "epoch": 0.8287284989656833, - "grad_norm": 0.0, - "learning_rate": 1.4994774665331035e-06, - "loss": 0.7345, - "step": 29245 - }, - { - "epoch": 0.8287568364079458, - "grad_norm": 0.0, - "learning_rate": 1.4989941042310684e-06, - "loss": 0.7792, - "step": 29246 - }, - { - "epoch": 0.8287851738502083, - "grad_norm": 0.0, - "learning_rate": 1.4985108135361626e-06, - "loss": 0.7764, - "step": 29247 - }, - { - "epoch": 0.8288135112924707, - "grad_norm": 0.0, - "learning_rate": 1.4980275944524592e-06, - "loss": 0.8219, - "step": 29248 - }, - { - "epoch": 0.8288418487347332, - "grad_norm": 0.0, - "learning_rate": 1.497544446984024e-06, - "loss": 0.8082, - "step": 29249 - }, - { - "epoch": 0.8288701861769957, - "grad_norm": 0.0, - "learning_rate": 1.4970613711349325e-06, - "loss": 0.8506, - "step": 29250 - }, - { - "epoch": 0.8288985236192581, - "grad_norm": 0.0, - "learning_rate": 1.4965783669092472e-06, - "loss": 0.7972, - "step": 29251 - }, - { - "epoch": 0.8289268610615206, - "grad_norm": 0.0, - "learning_rate": 1.4960954343110412e-06, - "loss": 0.7655, - "step": 29252 - }, - { - "epoch": 0.8289551985037831, - "grad_norm": 0.0, - "learning_rate": 1.4956125733443804e-06, - "loss": 0.8387, - "step": 29253 - }, - { - "epoch": 0.8289835359460455, - "grad_norm": 0.0, - "learning_rate": 1.4951297840133326e-06, - "loss": 0.7854, - "step": 29254 - }, - { - "epoch": 0.829011873388308, - "grad_norm": 0.0, - "learning_rate": 1.4946470663219647e-06, - "loss": 0.7955, - "step": 29255 - }, - { - "epoch": 0.8290402108305704, - "grad_norm": 0.0, - "learning_rate": 1.4941644202743467e-06, - "loss": 0.7975, - "step": 29256 - }, - { - "epoch": 0.8290685482728329, - "grad_norm": 0.0, - "learning_rate": 1.4936818458745373e-06, - "loss": 0.8363, - "step": 29257 - }, - { - "epoch": 0.8290968857150953, - "grad_norm": 0.0, - "learning_rate": 1.4931993431266056e-06, - "loss": 0.8467, - "step": 29258 - }, - { - "epoch": 0.8291252231573578, - "grad_norm": 0.0, - "learning_rate": 1.492716912034614e-06, - "loss": 0.7933, - "step": 29259 - }, - { - "epoch": 0.8291535605996203, - "grad_norm": 0.0, - "learning_rate": 1.492234552602626e-06, - "loss": 0.7747, - "step": 29260 - }, - { - "epoch": 0.8291818980418827, - "grad_norm": 0.0, - "learning_rate": 1.4917522648347083e-06, - "loss": 0.7702, - "step": 29261 - }, - { - "epoch": 0.8292102354841452, - "grad_norm": 0.0, - "learning_rate": 1.4912700487349186e-06, - "loss": 0.8773, - "step": 29262 - }, - { - "epoch": 0.8292385729264077, - "grad_norm": 0.0, - "learning_rate": 1.4907879043073236e-06, - "loss": 0.8418, - "step": 29263 - }, - { - "epoch": 0.8292669103686702, - "grad_norm": 0.0, - "learning_rate": 1.4903058315559783e-06, - "loss": 0.8942, - "step": 29264 - }, - { - "epoch": 0.8292952478109326, - "grad_norm": 0.0, - "learning_rate": 1.4898238304849477e-06, - "loss": 0.7672, - "step": 29265 - }, - { - "epoch": 0.829323585253195, - "grad_norm": 0.0, - "learning_rate": 1.4893419010982913e-06, - "loss": 0.7584, - "step": 29266 - }, - { - "epoch": 0.8293519226954575, - "grad_norm": 0.0, - "learning_rate": 1.4888600434000688e-06, - "loss": 0.8304, - "step": 29267 - }, - { - "epoch": 0.8293802601377199, - "grad_norm": 0.0, - "learning_rate": 1.4883782573943383e-06, - "loss": 0.9331, - "step": 29268 - }, - { - "epoch": 0.8294085975799824, - "grad_norm": 0.0, - "learning_rate": 1.4878965430851612e-06, - "loss": 0.7614, - "step": 29269 - }, - { - "epoch": 0.8294369350222449, - "grad_norm": 0.0, - "learning_rate": 1.4874149004765892e-06, - "loss": 0.8105, - "step": 29270 - }, - { - "epoch": 0.8294652724645074, - "grad_norm": 0.0, - "learning_rate": 1.4869333295726851e-06, - "loss": 0.7542, - "step": 29271 - }, - { - "epoch": 0.8294936099067698, - "grad_norm": 0.0, - "learning_rate": 1.486451830377501e-06, - "loss": 0.8113, - "step": 29272 - }, - { - "epoch": 0.8295219473490323, - "grad_norm": 0.0, - "learning_rate": 1.4859704028950938e-06, - "loss": 0.7119, - "step": 29273 - }, - { - "epoch": 0.8295502847912948, - "grad_norm": 0.0, - "learning_rate": 1.4854890471295225e-06, - "loss": 0.7828, - "step": 29274 - }, - { - "epoch": 0.8295786222335572, - "grad_norm": 0.0, - "learning_rate": 1.485007763084836e-06, - "loss": 0.7729, - "step": 29275 - }, - { - "epoch": 0.8296069596758197, - "grad_norm": 0.0, - "learning_rate": 1.4845265507650909e-06, - "loss": 0.8795, - "step": 29276 - }, - { - "epoch": 0.8296352971180821, - "grad_norm": 0.0, - "learning_rate": 1.484045410174344e-06, - "loss": 0.8651, - "step": 29277 - }, - { - "epoch": 0.8296636345603445, - "grad_norm": 0.0, - "learning_rate": 1.4835643413166423e-06, - "loss": 0.7552, - "step": 29278 - }, - { - "epoch": 0.829691972002607, - "grad_norm": 0.0, - "learning_rate": 1.4830833441960402e-06, - "loss": 0.7485, - "step": 29279 - }, - { - "epoch": 0.8297203094448695, - "grad_norm": 0.0, - "learning_rate": 1.48260241881659e-06, - "loss": 0.783, - "step": 29280 - }, - { - "epoch": 0.829748646887132, - "grad_norm": 0.0, - "learning_rate": 1.4821215651823418e-06, - "loss": 0.8244, - "step": 29281 - }, - { - "epoch": 0.8297769843293944, - "grad_norm": 0.0, - "learning_rate": 1.48164078329735e-06, - "loss": 0.8198, - "step": 29282 - }, - { - "epoch": 0.8298053217716569, - "grad_norm": 0.0, - "learning_rate": 1.4811600731656583e-06, - "loss": 0.7727, - "step": 29283 - }, - { - "epoch": 0.8298336592139194, - "grad_norm": 0.0, - "learning_rate": 1.480679434791321e-06, - "loss": 0.7371, - "step": 29284 - }, - { - "epoch": 0.8298619966561818, - "grad_norm": 0.0, - "learning_rate": 1.480198868178383e-06, - "loss": 0.8564, - "step": 29285 - }, - { - "epoch": 0.8298903340984443, - "grad_norm": 0.0, - "learning_rate": 1.4797183733308928e-06, - "loss": 0.9831, - "step": 29286 - }, - { - "epoch": 0.8299186715407068, - "grad_norm": 0.0, - "learning_rate": 1.479237950252901e-06, - "loss": 0.787, - "step": 29287 - }, - { - "epoch": 0.8299470089829692, - "grad_norm": 0.0, - "learning_rate": 1.4787575989484504e-06, - "loss": 0.6696, - "step": 29288 - }, - { - "epoch": 0.8299753464252316, - "grad_norm": 0.0, - "learning_rate": 1.4782773194215883e-06, - "loss": 0.7077, - "step": 29289 - }, - { - "epoch": 0.8300036838674941, - "grad_norm": 0.0, - "learning_rate": 1.4777971116763622e-06, - "loss": 0.8645, - "step": 29290 - }, - { - "epoch": 0.8300320213097566, - "grad_norm": 0.0, - "learning_rate": 1.4773169757168148e-06, - "loss": 0.7288, - "step": 29291 - }, - { - "epoch": 0.830060358752019, - "grad_norm": 0.0, - "learning_rate": 1.4768369115469905e-06, - "loss": 0.8839, - "step": 29292 - }, - { - "epoch": 0.8300886961942815, - "grad_norm": 0.0, - "learning_rate": 1.476356919170935e-06, - "loss": 0.8859, - "step": 29293 - }, - { - "epoch": 0.830117033636544, - "grad_norm": 0.0, - "learning_rate": 1.475876998592689e-06, - "loss": 0.7914, - "step": 29294 - }, - { - "epoch": 0.8301453710788065, - "grad_norm": 0.0, - "learning_rate": 1.4753971498162988e-06, - "loss": 0.7114, - "step": 29295 - }, - { - "epoch": 0.8301737085210689, - "grad_norm": 0.0, - "learning_rate": 1.4749173728458022e-06, - "loss": 0.7484, - "step": 29296 - }, - { - "epoch": 0.8302020459633314, - "grad_norm": 0.0, - "learning_rate": 1.4744376676852424e-06, - "loss": 0.8763, - "step": 29297 - }, - { - "epoch": 0.8302303834055939, - "grad_norm": 0.0, - "learning_rate": 1.4739580343386628e-06, - "loss": 0.8136, - "step": 29298 - }, - { - "epoch": 0.8302587208478562, - "grad_norm": 0.0, - "learning_rate": 1.473478472810097e-06, - "loss": 0.7836, - "step": 29299 - }, - { - "epoch": 0.8302870582901187, - "grad_norm": 0.0, - "learning_rate": 1.4729989831035918e-06, - "loss": 0.8198, - "step": 29300 - }, - { - "epoch": 0.8303153957323812, - "grad_norm": 0.0, - "learning_rate": 1.4725195652231794e-06, - "loss": 0.7654, - "step": 29301 - }, - { - "epoch": 0.8303437331746436, - "grad_norm": 0.0, - "learning_rate": 1.4720402191729022e-06, - "loss": 0.733, - "step": 29302 - }, - { - "epoch": 0.8303720706169061, - "grad_norm": 0.0, - "learning_rate": 1.471560944956798e-06, - "loss": 0.7621, - "step": 29303 - }, - { - "epoch": 0.8304004080591686, - "grad_norm": 0.0, - "learning_rate": 1.4710817425789015e-06, - "loss": 0.7714, - "step": 29304 - }, - { - "epoch": 0.8304287455014311, - "grad_norm": 0.0, - "learning_rate": 1.4706026120432505e-06, - "loss": 0.7513, - "step": 29305 - }, - { - "epoch": 0.8304570829436935, - "grad_norm": 0.0, - "learning_rate": 1.4701235533538816e-06, - "loss": 0.763, - "step": 29306 - }, - { - "epoch": 0.830485420385956, - "grad_norm": 0.0, - "learning_rate": 1.4696445665148285e-06, - "loss": 0.706, - "step": 29307 - }, - { - "epoch": 0.8305137578282185, - "grad_norm": 0.0, - "learning_rate": 1.46916565153013e-06, - "loss": 0.7857, - "step": 29308 - }, - { - "epoch": 0.8305420952704808, - "grad_norm": 0.0, - "learning_rate": 1.468686808403814e-06, - "loss": 0.8079, - "step": 29309 - }, - { - "epoch": 0.8305704327127433, - "grad_norm": 0.0, - "learning_rate": 1.4682080371399176e-06, - "loss": 0.9454, - "step": 29310 - }, - { - "epoch": 0.8305987701550058, - "grad_norm": 0.0, - "learning_rate": 1.4677293377424752e-06, - "loss": 0.7587, - "step": 29311 - }, - { - "epoch": 0.8306271075972683, - "grad_norm": 0.0, - "learning_rate": 1.4672507102155153e-06, - "loss": 0.8113, - "step": 29312 - }, - { - "epoch": 0.8306554450395307, - "grad_norm": 0.0, - "learning_rate": 1.466772154563073e-06, - "loss": 0.7467, - "step": 29313 - }, - { - "epoch": 0.8306837824817932, - "grad_norm": 0.0, - "learning_rate": 1.466293670789175e-06, - "loss": 0.9363, - "step": 29314 - }, - { - "epoch": 0.8307121199240557, - "grad_norm": 0.0, - "learning_rate": 1.4658152588978548e-06, - "loss": 0.7047, - "step": 29315 - }, - { - "epoch": 0.8307404573663181, - "grad_norm": 0.0, - "learning_rate": 1.465336918893141e-06, - "loss": 0.7322, - "step": 29316 - }, - { - "epoch": 0.8307687948085806, - "grad_norm": 0.0, - "learning_rate": 1.4648586507790663e-06, - "loss": 0.7932, - "step": 29317 - }, - { - "epoch": 0.8307971322508431, - "grad_norm": 0.0, - "learning_rate": 1.4643804545596551e-06, - "loss": 0.7002, - "step": 29318 - }, - { - "epoch": 0.8308254696931054, - "grad_norm": 0.0, - "learning_rate": 1.4639023302389366e-06, - "loss": 0.7389, - "step": 29319 - }, - { - "epoch": 0.8308538071353679, - "grad_norm": 0.0, - "learning_rate": 1.4634242778209373e-06, - "loss": 0.7893, - "step": 29320 - }, - { - "epoch": 0.8308821445776304, - "grad_norm": 0.0, - "learning_rate": 1.4629462973096887e-06, - "loss": 0.7977, - "step": 29321 - }, - { - "epoch": 0.8309104820198929, - "grad_norm": 0.0, - "learning_rate": 1.4624683887092117e-06, - "loss": 0.6519, - "step": 29322 - }, - { - "epoch": 0.8309388194621553, - "grad_norm": 0.0, - "learning_rate": 1.4619905520235333e-06, - "loss": 0.7322, - "step": 29323 - }, - { - "epoch": 0.8309671569044178, - "grad_norm": 0.0, - "learning_rate": 1.4615127872566815e-06, - "loss": 0.7927, - "step": 29324 - }, - { - "epoch": 0.8309954943466803, - "grad_norm": 0.0, - "learning_rate": 1.461035094412675e-06, - "loss": 0.7787, - "step": 29325 - }, - { - "epoch": 0.8310238317889427, - "grad_norm": 0.0, - "learning_rate": 1.4605574734955418e-06, - "loss": 0.8071, - "step": 29326 - }, - { - "epoch": 0.8310521692312052, - "grad_norm": 0.0, - "learning_rate": 1.4600799245093055e-06, - "loss": 0.8381, - "step": 29327 - }, - { - "epoch": 0.8310805066734677, - "grad_norm": 0.0, - "learning_rate": 1.459602447457985e-06, - "loss": 0.8608, - "step": 29328 - }, - { - "epoch": 0.8311088441157302, - "grad_norm": 0.0, - "learning_rate": 1.4591250423456048e-06, - "loss": 0.8533, - "step": 29329 - }, - { - "epoch": 0.8311371815579925, - "grad_norm": 0.0, - "learning_rate": 1.4586477091761863e-06, - "loss": 0.7496, - "step": 29330 - }, - { - "epoch": 0.831165519000255, - "grad_norm": 0.0, - "learning_rate": 1.4581704479537495e-06, - "loss": 0.7026, - "step": 29331 - }, - { - "epoch": 0.8311938564425175, - "grad_norm": 0.0, - "learning_rate": 1.4576932586823178e-06, - "loss": 0.767, - "step": 29332 - }, - { - "epoch": 0.8312221938847799, - "grad_norm": 0.0, - "learning_rate": 1.4572161413659047e-06, - "loss": 0.7675, - "step": 29333 - }, - { - "epoch": 0.8312505313270424, - "grad_norm": 0.0, - "learning_rate": 1.4567390960085325e-06, - "loss": 0.7971, - "step": 29334 - }, - { - "epoch": 0.8312788687693049, - "grad_norm": 0.0, - "learning_rate": 1.4562621226142225e-06, - "loss": 0.8456, - "step": 29335 - }, - { - "epoch": 0.8313072062115674, - "grad_norm": 0.0, - "learning_rate": 1.455785221186986e-06, - "loss": 0.8208, - "step": 29336 - }, - { - "epoch": 0.8313355436538298, - "grad_norm": 0.0, - "learning_rate": 1.4553083917308464e-06, - "loss": 0.713, - "step": 29337 - }, - { - "epoch": 0.8313638810960923, - "grad_norm": 0.0, - "learning_rate": 1.4548316342498148e-06, - "loss": 0.8228, - "step": 29338 - }, - { - "epoch": 0.8313922185383548, - "grad_norm": 0.0, - "learning_rate": 1.4543549487479092e-06, - "loss": 0.9128, - "step": 29339 - }, - { - "epoch": 0.8314205559806171, - "grad_norm": 0.0, - "learning_rate": 1.4538783352291474e-06, - "loss": 0.7908, - "step": 29340 - }, - { - "epoch": 0.8314488934228796, - "grad_norm": 0.0, - "learning_rate": 1.4534017936975396e-06, - "loss": 0.8934, - "step": 29341 - }, - { - "epoch": 0.8314772308651421, - "grad_norm": 0.0, - "learning_rate": 1.4529253241571029e-06, - "loss": 0.8106, - "step": 29342 - }, - { - "epoch": 0.8315055683074045, - "grad_norm": 0.0, - "learning_rate": 1.452448926611849e-06, - "loss": 0.8568, - "step": 29343 - }, - { - "epoch": 0.831533905749667, - "grad_norm": 0.0, - "learning_rate": 1.4519726010657931e-06, - "loss": 0.7575, - "step": 29344 - }, - { - "epoch": 0.8315622431919295, - "grad_norm": 0.0, - "learning_rate": 1.4514963475229482e-06, - "loss": 0.8874, - "step": 29345 - }, - { - "epoch": 0.831590580634192, - "grad_norm": 0.0, - "learning_rate": 1.4510201659873212e-06, - "loss": 0.8784, - "step": 29346 - }, - { - "epoch": 0.8316189180764544, - "grad_norm": 0.0, - "learning_rate": 1.4505440564629258e-06, - "loss": 0.76, - "step": 29347 - }, - { - "epoch": 0.8316472555187169, - "grad_norm": 0.0, - "learning_rate": 1.4500680189537753e-06, - "loss": 0.8553, - "step": 29348 - }, - { - "epoch": 0.8316755929609794, - "grad_norm": 0.0, - "learning_rate": 1.4495920534638741e-06, - "loss": 0.7473, - "step": 29349 - }, - { - "epoch": 0.8317039304032418, - "grad_norm": 0.0, - "learning_rate": 1.449116159997237e-06, - "loss": 0.8597, - "step": 29350 - }, - { - "epoch": 0.8317322678455042, - "grad_norm": 0.0, - "learning_rate": 1.4486403385578673e-06, - "loss": 0.8087, - "step": 29351 - }, - { - "epoch": 0.8317606052877667, - "grad_norm": 0.0, - "learning_rate": 1.4481645891497753e-06, - "loss": 0.8061, - "step": 29352 - }, - { - "epoch": 0.8317889427300292, - "grad_norm": 0.0, - "learning_rate": 1.447688911776971e-06, - "loss": 0.8133, - "step": 29353 - }, - { - "epoch": 0.8318172801722916, - "grad_norm": 0.0, - "learning_rate": 1.4472133064434568e-06, - "loss": 0.8478, - "step": 29354 - }, - { - "epoch": 0.8318456176145541, - "grad_norm": 0.0, - "learning_rate": 1.4467377731532405e-06, - "loss": 0.7962, - "step": 29355 - }, - { - "epoch": 0.8318739550568166, - "grad_norm": 0.0, - "learning_rate": 1.4462623119103281e-06, - "loss": 0.8001, - "step": 29356 - }, - { - "epoch": 0.831902292499079, - "grad_norm": 0.0, - "learning_rate": 1.4457869227187248e-06, - "loss": 0.7478, - "step": 29357 - }, - { - "epoch": 0.8319306299413415, - "grad_norm": 0.0, - "learning_rate": 1.4453116055824368e-06, - "loss": 0.7775, - "step": 29358 - }, - { - "epoch": 0.831958967383604, - "grad_norm": 0.0, - "learning_rate": 1.4448363605054638e-06, - "loss": 0.8668, - "step": 29359 - }, - { - "epoch": 0.8319873048258665, - "grad_norm": 0.0, - "learning_rate": 1.4443611874918106e-06, - "loss": 0.8075, - "step": 29360 - }, - { - "epoch": 0.8320156422681289, - "grad_norm": 0.0, - "learning_rate": 1.4438860865454828e-06, - "loss": 0.8354, - "step": 29361 - }, - { - "epoch": 0.8320439797103913, - "grad_norm": 0.0, - "learning_rate": 1.4434110576704774e-06, - "loss": 0.7199, - "step": 29362 - }, - { - "epoch": 0.8320723171526538, - "grad_norm": 0.0, - "learning_rate": 1.4429361008707986e-06, - "loss": 0.8215, - "step": 29363 - }, - { - "epoch": 0.8321006545949162, - "grad_norm": 0.0, - "learning_rate": 1.4424612161504482e-06, - "loss": 0.8465, - "step": 29364 - }, - { - "epoch": 0.8321289920371787, - "grad_norm": 0.0, - "learning_rate": 1.4419864035134236e-06, - "loss": 0.7578, - "step": 29365 - }, - { - "epoch": 0.8321573294794412, - "grad_norm": 0.0, - "learning_rate": 1.441511662963726e-06, - "loss": 0.8926, - "step": 29366 - }, - { - "epoch": 0.8321856669217036, - "grad_norm": 0.0, - "learning_rate": 1.4410369945053526e-06, - "loss": 0.9387, - "step": 29367 - }, - { - "epoch": 0.8322140043639661, - "grad_norm": 0.0, - "learning_rate": 1.4405623981423022e-06, - "loss": 0.9099, - "step": 29368 - }, - { - "epoch": 0.8322423418062286, - "grad_norm": 0.0, - "learning_rate": 1.440087873878574e-06, - "loss": 0.872, - "step": 29369 - }, - { - "epoch": 0.8322706792484911, - "grad_norm": 0.0, - "learning_rate": 1.4396134217181645e-06, - "loss": 0.8924, - "step": 29370 - }, - { - "epoch": 0.8322990166907535, - "grad_norm": 0.0, - "learning_rate": 1.4391390416650708e-06, - "loss": 0.8132, - "step": 29371 - }, - { - "epoch": 0.832327354133016, - "grad_norm": 0.0, - "learning_rate": 1.4386647337232873e-06, - "loss": 0.8683, - "step": 29372 - }, - { - "epoch": 0.8323556915752784, - "grad_norm": 0.0, - "learning_rate": 1.4381904978968086e-06, - "loss": 0.7988, - "step": 29373 - }, - { - "epoch": 0.8323840290175408, - "grad_norm": 0.0, - "learning_rate": 1.437716334189634e-06, - "loss": 0.8527, - "step": 29374 - }, - { - "epoch": 0.8324123664598033, - "grad_norm": 0.0, - "learning_rate": 1.4372422426057509e-06, - "loss": 0.8506, - "step": 29375 - }, - { - "epoch": 0.8324407039020658, - "grad_norm": 0.0, - "learning_rate": 1.4367682231491576e-06, - "loss": 0.7222, - "step": 29376 - }, - { - "epoch": 0.8324690413443283, - "grad_norm": 0.0, - "learning_rate": 1.4362942758238463e-06, - "loss": 0.7947, - "step": 29377 - }, - { - "epoch": 0.8324973787865907, - "grad_norm": 0.0, - "learning_rate": 1.4358204006338061e-06, - "loss": 0.8271, - "step": 29378 - }, - { - "epoch": 0.8325257162288532, - "grad_norm": 0.0, - "learning_rate": 1.435346597583034e-06, - "loss": 0.8944, - "step": 29379 - }, - { - "epoch": 0.8325540536711157, - "grad_norm": 0.0, - "learning_rate": 1.4348728666755152e-06, - "loss": 0.8198, - "step": 29380 - }, - { - "epoch": 0.8325823911133781, - "grad_norm": 0.0, - "learning_rate": 1.4343992079152436e-06, - "loss": 0.7636, - "step": 29381 - }, - { - "epoch": 0.8326107285556406, - "grad_norm": 0.0, - "learning_rate": 1.4339256213062069e-06, - "loss": 0.8129, - "step": 29382 - }, - { - "epoch": 0.832639065997903, - "grad_norm": 0.0, - "learning_rate": 1.4334521068523966e-06, - "loss": 0.8786, - "step": 29383 - }, - { - "epoch": 0.8326674034401655, - "grad_norm": 0.0, - "learning_rate": 1.4329786645578004e-06, - "loss": 0.8219, - "step": 29384 - }, - { - "epoch": 0.8326957408824279, - "grad_norm": 0.0, - "learning_rate": 1.4325052944264074e-06, - "loss": 0.822, - "step": 29385 - }, - { - "epoch": 0.8327240783246904, - "grad_norm": 0.0, - "learning_rate": 1.432031996462202e-06, - "loss": 0.785, - "step": 29386 - }, - { - "epoch": 0.8327524157669529, - "grad_norm": 0.0, - "learning_rate": 1.4315587706691759e-06, - "loss": 0.7493, - "step": 29387 - }, - { - "epoch": 0.8327807532092153, - "grad_norm": 0.0, - "learning_rate": 1.4310856170513088e-06, - "loss": 0.8272, - "step": 29388 - }, - { - "epoch": 0.8328090906514778, - "grad_norm": 0.0, - "learning_rate": 1.4306125356125899e-06, - "loss": 0.9279, - "step": 29389 - }, - { - "epoch": 0.8328374280937403, - "grad_norm": 0.0, - "learning_rate": 1.430139526357006e-06, - "loss": 0.8542, - "step": 29390 - }, - { - "epoch": 0.8328657655360027, - "grad_norm": 0.0, - "learning_rate": 1.4296665892885364e-06, - "loss": 0.7574, - "step": 29391 - }, - { - "epoch": 0.8328941029782652, - "grad_norm": 0.0, - "learning_rate": 1.4291937244111688e-06, - "loss": 0.8108, - "step": 29392 - }, - { - "epoch": 0.8329224404205277, - "grad_norm": 0.0, - "learning_rate": 1.4287209317288864e-06, - "loss": 0.8762, - "step": 29393 - }, - { - "epoch": 0.8329507778627901, - "grad_norm": 0.0, - "learning_rate": 1.4282482112456686e-06, - "loss": 0.7006, - "step": 29394 - }, - { - "epoch": 0.8329791153050525, - "grad_norm": 0.0, - "learning_rate": 1.4277755629654987e-06, - "loss": 0.8313, - "step": 29395 - }, - { - "epoch": 0.833007452747315, - "grad_norm": 0.0, - "learning_rate": 1.4273029868923593e-06, - "loss": 0.722, - "step": 29396 - }, - { - "epoch": 0.8330357901895775, - "grad_norm": 0.0, - "learning_rate": 1.4268304830302293e-06, - "loss": 0.8065, - "step": 29397 - }, - { - "epoch": 0.8330641276318399, - "grad_norm": 0.0, - "learning_rate": 1.426358051383092e-06, - "loss": 0.7261, - "step": 29398 - }, - { - "epoch": 0.8330924650741024, - "grad_norm": 0.0, - "learning_rate": 1.4258856919549236e-06, - "loss": 0.758, - "step": 29399 - }, - { - "epoch": 0.8331208025163649, - "grad_norm": 0.0, - "learning_rate": 1.4254134047497047e-06, - "loss": 0.7052, - "step": 29400 - }, - { - "epoch": 0.8331491399586274, - "grad_norm": 0.0, - "learning_rate": 1.4249411897714117e-06, - "loss": 0.7652, - "step": 29401 - }, - { - "epoch": 0.8331774774008898, - "grad_norm": 0.0, - "learning_rate": 1.424469047024023e-06, - "loss": 0.8266, - "step": 29402 - }, - { - "epoch": 0.8332058148431523, - "grad_norm": 0.0, - "learning_rate": 1.423996976511518e-06, - "loss": 0.8231, - "step": 29403 - }, - { - "epoch": 0.8332341522854148, - "grad_norm": 0.0, - "learning_rate": 1.423524978237869e-06, - "loss": 0.8726, - "step": 29404 - }, - { - "epoch": 0.8332624897276771, - "grad_norm": 0.0, - "learning_rate": 1.423053052207053e-06, - "loss": 0.7826, - "step": 29405 - }, - { - "epoch": 0.8332908271699396, - "grad_norm": 0.0, - "learning_rate": 1.4225811984230497e-06, - "loss": 0.7652, - "step": 29406 - }, - { - "epoch": 0.8333191646122021, - "grad_norm": 0.0, - "learning_rate": 1.4221094168898276e-06, - "loss": 0.7757, - "step": 29407 - }, - { - "epoch": 0.8333475020544646, - "grad_norm": 0.0, - "learning_rate": 1.421637707611363e-06, - "loss": 0.8117, - "step": 29408 - }, - { - "epoch": 0.833375839496727, - "grad_norm": 0.0, - "learning_rate": 1.4211660705916286e-06, - "loss": 0.8185, - "step": 29409 - }, - { - "epoch": 0.8334041769389895, - "grad_norm": 0.0, - "learning_rate": 1.4206945058345988e-06, - "loss": 0.7627, - "step": 29410 - }, - { - "epoch": 0.833432514381252, - "grad_norm": 0.0, - "learning_rate": 1.4202230133442474e-06, - "loss": 0.7517, - "step": 29411 - }, - { - "epoch": 0.8334608518235144, - "grad_norm": 0.0, - "learning_rate": 1.419751593124542e-06, - "loss": 0.8314, - "step": 29412 - }, - { - "epoch": 0.8334891892657769, - "grad_norm": 0.0, - "learning_rate": 1.419280245179454e-06, - "loss": 0.8862, - "step": 29413 - }, - { - "epoch": 0.8335175267080394, - "grad_norm": 0.0, - "learning_rate": 1.418808969512957e-06, - "loss": 0.8283, - "step": 29414 - }, - { - "epoch": 0.8335458641503017, - "grad_norm": 0.0, - "learning_rate": 1.4183377661290175e-06, - "loss": 0.8415, - "step": 29415 - }, - { - "epoch": 0.8335742015925642, - "grad_norm": 0.0, - "learning_rate": 1.4178666350316072e-06, - "loss": 0.7769, - "step": 29416 - }, - { - "epoch": 0.8336025390348267, - "grad_norm": 0.0, - "learning_rate": 1.4173955762246905e-06, - "loss": 0.7752, - "step": 29417 - }, - { - "epoch": 0.8336308764770892, - "grad_norm": 0.0, - "learning_rate": 1.4169245897122385e-06, - "loss": 0.7384, - "step": 29418 - }, - { - "epoch": 0.8336592139193516, - "grad_norm": 0.0, - "learning_rate": 1.4164536754982206e-06, - "loss": 0.8094, - "step": 29419 - }, - { - "epoch": 0.8336875513616141, - "grad_norm": 0.0, - "learning_rate": 1.4159828335865978e-06, - "loss": 0.8318, - "step": 29420 - }, - { - "epoch": 0.8337158888038766, - "grad_norm": 0.0, - "learning_rate": 1.4155120639813392e-06, - "loss": 0.9395, - "step": 29421 - }, - { - "epoch": 0.833744226246139, - "grad_norm": 0.0, - "learning_rate": 1.4150413666864104e-06, - "loss": 0.8167, - "step": 29422 - }, - { - "epoch": 0.8337725636884015, - "grad_norm": 0.0, - "learning_rate": 1.4145707417057763e-06, - "loss": 0.8394, - "step": 29423 - }, - { - "epoch": 0.833800901130664, - "grad_norm": 0.0, - "learning_rate": 1.4141001890434035e-06, - "loss": 0.7932, - "step": 29424 - }, - { - "epoch": 0.8338292385729265, - "grad_norm": 0.0, - "learning_rate": 1.4136297087032503e-06, - "loss": 0.7405, - "step": 29425 - }, - { - "epoch": 0.8338575760151888, - "grad_norm": 0.0, - "learning_rate": 1.413159300689283e-06, - "loss": 0.7615, - "step": 29426 - }, - { - "epoch": 0.8338859134574513, - "grad_norm": 0.0, - "learning_rate": 1.4126889650054654e-06, - "loss": 0.9176, - "step": 29427 - }, - { - "epoch": 0.8339142508997138, - "grad_norm": 0.0, - "learning_rate": 1.4122187016557553e-06, - "loss": 0.8896, - "step": 29428 - }, - { - "epoch": 0.8339425883419762, - "grad_norm": 0.0, - "learning_rate": 1.4117485106441188e-06, - "loss": 0.7076, - "step": 29429 - }, - { - "epoch": 0.8339709257842387, - "grad_norm": 0.0, - "learning_rate": 1.411278391974511e-06, - "loss": 0.7792, - "step": 29430 - }, - { - "epoch": 0.8339992632265012, - "grad_norm": 0.0, - "learning_rate": 1.4108083456508948e-06, - "loss": 0.9512, - "step": 29431 - }, - { - "epoch": 0.8340276006687637, - "grad_norm": 0.0, - "learning_rate": 1.4103383716772313e-06, - "loss": 0.7908, - "step": 29432 - }, - { - "epoch": 0.8340559381110261, - "grad_norm": 0.0, - "learning_rate": 1.409868470057475e-06, - "loss": 0.7025, - "step": 29433 - }, - { - "epoch": 0.8340842755532886, - "grad_norm": 0.0, - "learning_rate": 1.4093986407955873e-06, - "loss": 0.829, - "step": 29434 - }, - { - "epoch": 0.8341126129955511, - "grad_norm": 0.0, - "learning_rate": 1.408928883895524e-06, - "loss": 0.804, - "step": 29435 - }, - { - "epoch": 0.8341409504378134, - "grad_norm": 0.0, - "learning_rate": 1.408459199361244e-06, - "loss": 0.7374, - "step": 29436 - }, - { - "epoch": 0.8341692878800759, - "grad_norm": 0.0, - "learning_rate": 1.4079895871967043e-06, - "loss": 0.7504, - "step": 29437 - }, - { - "epoch": 0.8341976253223384, - "grad_norm": 0.0, - "learning_rate": 1.407520047405856e-06, - "loss": 0.8945, - "step": 29438 - }, - { - "epoch": 0.8342259627646008, - "grad_norm": 0.0, - "learning_rate": 1.407050579992658e-06, - "loss": 0.7568, - "step": 29439 - }, - { - "epoch": 0.8342543002068633, - "grad_norm": 0.0, - "learning_rate": 1.4065811849610655e-06, - "loss": 1.0082, - "step": 29440 - }, - { - "epoch": 0.8342826376491258, - "grad_norm": 0.0, - "learning_rate": 1.4061118623150283e-06, - "loss": 0.9051, - "step": 29441 - }, - { - "epoch": 0.8343109750913883, - "grad_norm": 0.0, - "learning_rate": 1.4056426120585032e-06, - "loss": 0.8691, - "step": 29442 - }, - { - "epoch": 0.8343393125336507, - "grad_norm": 0.0, - "learning_rate": 1.4051734341954436e-06, - "loss": 0.8463, - "step": 29443 - }, - { - "epoch": 0.8343676499759132, - "grad_norm": 0.0, - "learning_rate": 1.4047043287297967e-06, - "loss": 0.7945, - "step": 29444 - }, - { - "epoch": 0.8343959874181757, - "grad_norm": 0.0, - "learning_rate": 1.4042352956655202e-06, - "loss": 0.7488, - "step": 29445 - }, - { - "epoch": 0.834424324860438, - "grad_norm": 0.0, - "learning_rate": 1.40376633500656e-06, - "loss": 0.8653, - "step": 29446 - }, - { - "epoch": 0.8344526623027005, - "grad_norm": 0.0, - "learning_rate": 1.4032974467568672e-06, - "loss": 0.7586, - "step": 29447 - }, - { - "epoch": 0.834480999744963, - "grad_norm": 0.0, - "learning_rate": 1.402828630920392e-06, - "loss": 0.8357, - "step": 29448 - }, - { - "epoch": 0.8345093371872255, - "grad_norm": 0.0, - "learning_rate": 1.4023598875010846e-06, - "loss": 0.7682, - "step": 29449 - }, - { - "epoch": 0.8345376746294879, - "grad_norm": 0.0, - "learning_rate": 1.401891216502892e-06, - "loss": 0.8228, - "step": 29450 - }, - { - "epoch": 0.8345660120717504, - "grad_norm": 0.0, - "learning_rate": 1.4014226179297652e-06, - "loss": 0.7967, - "step": 29451 - }, - { - "epoch": 0.8345943495140129, - "grad_norm": 0.0, - "learning_rate": 1.4009540917856457e-06, - "loss": 0.8013, - "step": 29452 - }, - { - "epoch": 0.8346226869562753, - "grad_norm": 0.0, - "learning_rate": 1.4004856380744857e-06, - "loss": 0.7335, - "step": 29453 - }, - { - "epoch": 0.8346510243985378, - "grad_norm": 0.0, - "learning_rate": 1.4000172568002268e-06, - "loss": 0.7343, - "step": 29454 - }, - { - "epoch": 0.8346793618408003, - "grad_norm": 0.0, - "learning_rate": 1.3995489479668156e-06, - "loss": 0.8075, - "step": 29455 - }, - { - "epoch": 0.8347076992830628, - "grad_norm": 0.0, - "learning_rate": 1.399080711578199e-06, - "loss": 0.9366, - "step": 29456 - }, - { - "epoch": 0.8347360367253251, - "grad_norm": 0.0, - "learning_rate": 1.3986125476383172e-06, - "loss": 0.7808, - "step": 29457 - }, - { - "epoch": 0.8347643741675876, - "grad_norm": 0.0, - "learning_rate": 1.3981444561511193e-06, - "loss": 0.8888, - "step": 29458 - }, - { - "epoch": 0.8347927116098501, - "grad_norm": 0.0, - "learning_rate": 1.397676437120542e-06, - "loss": 0.7519, - "step": 29459 - }, - { - "epoch": 0.8348210490521125, - "grad_norm": 0.0, - "learning_rate": 1.3972084905505302e-06, - "loss": 0.8495, - "step": 29460 - }, - { - "epoch": 0.834849386494375, - "grad_norm": 0.0, - "learning_rate": 1.396740616445027e-06, - "loss": 0.826, - "step": 29461 - }, - { - "epoch": 0.8348777239366375, - "grad_norm": 0.0, - "learning_rate": 1.3962728148079707e-06, - "loss": 0.7915, - "step": 29462 - }, - { - "epoch": 0.8349060613788999, - "grad_norm": 0.0, - "learning_rate": 1.3958050856433048e-06, - "loss": 0.779, - "step": 29463 - }, - { - "epoch": 0.8349343988211624, - "grad_norm": 0.0, - "learning_rate": 1.3953374289549693e-06, - "loss": 0.8709, - "step": 29464 - }, - { - "epoch": 0.8349627362634249, - "grad_norm": 0.0, - "learning_rate": 1.3948698447469e-06, - "loss": 0.7873, - "step": 29465 - }, - { - "epoch": 0.8349910737056874, - "grad_norm": 0.0, - "learning_rate": 1.3944023330230393e-06, - "loss": 0.7483, - "step": 29466 - }, - { - "epoch": 0.8350194111479498, - "grad_norm": 0.0, - "learning_rate": 1.3939348937873209e-06, - "loss": 0.835, - "step": 29467 - }, - { - "epoch": 0.8350477485902122, - "grad_norm": 0.0, - "learning_rate": 1.3934675270436848e-06, - "loss": 0.6595, - "step": 29468 - }, - { - "epoch": 0.8350760860324747, - "grad_norm": 0.0, - "learning_rate": 1.3930002327960702e-06, - "loss": 0.8798, - "step": 29469 - }, - { - "epoch": 0.8351044234747371, - "grad_norm": 0.0, - "learning_rate": 1.3925330110484092e-06, - "loss": 0.7131, - "step": 29470 - }, - { - "epoch": 0.8351327609169996, - "grad_norm": 0.0, - "learning_rate": 1.3920658618046378e-06, - "loss": 0.8674, - "step": 29471 - }, - { - "epoch": 0.8351610983592621, - "grad_norm": 0.0, - "learning_rate": 1.391598785068695e-06, - "loss": 0.8708, - "step": 29472 - }, - { - "epoch": 0.8351894358015246, - "grad_norm": 0.0, - "learning_rate": 1.391131780844509e-06, - "loss": 0.7654, - "step": 29473 - }, - { - "epoch": 0.835217773243787, - "grad_norm": 0.0, - "learning_rate": 1.3906648491360186e-06, - "loss": 0.7505, - "step": 29474 - }, - { - "epoch": 0.8352461106860495, - "grad_norm": 0.0, - "learning_rate": 1.390197989947154e-06, - "loss": 0.8443, - "step": 29475 - }, - { - "epoch": 0.835274448128312, - "grad_norm": 0.0, - "learning_rate": 1.389731203281849e-06, - "loss": 0.7123, - "step": 29476 - }, - { - "epoch": 0.8353027855705744, - "grad_norm": 0.0, - "learning_rate": 1.3892644891440378e-06, - "loss": 0.7111, - "step": 29477 - }, - { - "epoch": 0.8353311230128369, - "grad_norm": 0.0, - "learning_rate": 1.3887978475376472e-06, - "loss": 0.7485, - "step": 29478 - }, - { - "epoch": 0.8353594604550993, - "grad_norm": 0.0, - "learning_rate": 1.3883312784666091e-06, - "loss": 0.7427, - "step": 29479 - }, - { - "epoch": 0.8353877978973618, - "grad_norm": 0.0, - "learning_rate": 1.387864781934858e-06, - "loss": 0.8595, - "step": 29480 - }, - { - "epoch": 0.8354161353396242, - "grad_norm": 0.0, - "learning_rate": 1.3873983579463168e-06, - "loss": 0.7805, - "step": 29481 - }, - { - "epoch": 0.8354444727818867, - "grad_norm": 0.0, - "learning_rate": 1.3869320065049209e-06, - "loss": 0.8277, - "step": 29482 - }, - { - "epoch": 0.8354728102241492, - "grad_norm": 0.0, - "learning_rate": 1.3864657276145921e-06, - "loss": 0.734, - "step": 29483 - }, - { - "epoch": 0.8355011476664116, - "grad_norm": 0.0, - "learning_rate": 1.385999521279261e-06, - "loss": 0.843, - "step": 29484 - }, - { - "epoch": 0.8355294851086741, - "grad_norm": 0.0, - "learning_rate": 1.3855333875028566e-06, - "loss": 0.9165, - "step": 29485 - }, - { - "epoch": 0.8355578225509366, - "grad_norm": 0.0, - "learning_rate": 1.385067326289301e-06, - "loss": 0.8077, - "step": 29486 - }, - { - "epoch": 0.835586159993199, - "grad_norm": 0.0, - "learning_rate": 1.3846013376425227e-06, - "loss": 0.8349, - "step": 29487 - }, - { - "epoch": 0.8356144974354615, - "grad_norm": 0.0, - "learning_rate": 1.384135421566447e-06, - "loss": 0.7806, - "step": 29488 - }, - { - "epoch": 0.835642834877724, - "grad_norm": 0.0, - "learning_rate": 1.3836695780649979e-06, - "loss": 0.7439, - "step": 29489 - }, - { - "epoch": 0.8356711723199864, - "grad_norm": 0.0, - "learning_rate": 1.3832038071421017e-06, - "loss": 0.8399, - "step": 29490 - }, - { - "epoch": 0.8356995097622488, - "grad_norm": 0.0, - "learning_rate": 1.3827381088016767e-06, - "loss": 0.7955, - "step": 29491 - }, - { - "epoch": 0.8357278472045113, - "grad_norm": 0.0, - "learning_rate": 1.3822724830476497e-06, - "loss": 0.8855, - "step": 29492 - }, - { - "epoch": 0.8357561846467738, - "grad_norm": 0.0, - "learning_rate": 1.3818069298839431e-06, - "loss": 0.8515, - "step": 29493 - }, - { - "epoch": 0.8357845220890362, - "grad_norm": 0.0, - "learning_rate": 1.381341449314475e-06, - "loss": 0.8294, - "step": 29494 - }, - { - "epoch": 0.8358128595312987, - "grad_norm": 0.0, - "learning_rate": 1.380876041343171e-06, - "loss": 0.7489, - "step": 29495 - }, - { - "epoch": 0.8358411969735612, - "grad_norm": 0.0, - "learning_rate": 1.3804107059739457e-06, - "loss": 0.8418, - "step": 29496 - }, - { - "epoch": 0.8358695344158237, - "grad_norm": 0.0, - "learning_rate": 1.3799454432107229e-06, - "loss": 0.7202, - "step": 29497 - }, - { - "epoch": 0.8358978718580861, - "grad_norm": 0.0, - "learning_rate": 1.3794802530574213e-06, - "loss": 0.9275, - "step": 29498 - }, - { - "epoch": 0.8359262093003486, - "grad_norm": 0.0, - "learning_rate": 1.3790151355179581e-06, - "loss": 0.8664, - "step": 29499 - }, - { - "epoch": 0.835954546742611, - "grad_norm": 0.0, - "learning_rate": 1.37855009059625e-06, - "loss": 0.8421, - "step": 29500 - }, - { - "epoch": 0.8359828841848734, - "grad_norm": 0.0, - "learning_rate": 1.3780851182962174e-06, - "loss": 0.8497, - "step": 29501 - }, - { - "epoch": 0.8360112216271359, - "grad_norm": 0.0, - "learning_rate": 1.3776202186217747e-06, - "loss": 0.8304, - "step": 29502 - }, - { - "epoch": 0.8360395590693984, - "grad_norm": 0.0, - "learning_rate": 1.3771553915768421e-06, - "loss": 0.8026, - "step": 29503 - }, - { - "epoch": 0.8360678965116609, - "grad_norm": 0.0, - "learning_rate": 1.3766906371653289e-06, - "loss": 0.8541, - "step": 29504 - }, - { - "epoch": 0.8360962339539233, - "grad_norm": 0.0, - "learning_rate": 1.3762259553911516e-06, - "loss": 0.8031, - "step": 29505 - }, - { - "epoch": 0.8361245713961858, - "grad_norm": 0.0, - "learning_rate": 1.3757613462582286e-06, - "loss": 0.8717, - "step": 29506 - }, - { - "epoch": 0.8361529088384483, - "grad_norm": 0.0, - "learning_rate": 1.3752968097704677e-06, - "loss": 0.7498, - "step": 29507 - }, - { - "epoch": 0.8361812462807107, - "grad_norm": 0.0, - "learning_rate": 1.3748323459317848e-06, - "loss": 0.8939, - "step": 29508 - }, - { - "epoch": 0.8362095837229732, - "grad_norm": 0.0, - "learning_rate": 1.3743679547460943e-06, - "loss": 0.8755, - "step": 29509 - }, - { - "epoch": 0.8362379211652357, - "grad_norm": 0.0, - "learning_rate": 1.373903636217303e-06, - "loss": 0.6976, - "step": 29510 - }, - { - "epoch": 0.836266258607498, - "grad_norm": 0.0, - "learning_rate": 1.373439390349327e-06, - "loss": 0.7961, - "step": 29511 - }, - { - "epoch": 0.8362945960497605, - "grad_norm": 0.0, - "learning_rate": 1.372975217146072e-06, - "loss": 0.8956, - "step": 29512 - }, - { - "epoch": 0.836322933492023, - "grad_norm": 0.0, - "learning_rate": 1.3725111166114514e-06, - "loss": 0.7522, - "step": 29513 - }, - { - "epoch": 0.8363512709342855, - "grad_norm": 0.0, - "learning_rate": 1.372047088749372e-06, - "loss": 0.7977, - "step": 29514 - }, - { - "epoch": 0.8363796083765479, - "grad_norm": 0.0, - "learning_rate": 1.371583133563744e-06, - "loss": 0.815, - "step": 29515 - }, - { - "epoch": 0.8364079458188104, - "grad_norm": 0.0, - "learning_rate": 1.371119251058478e-06, - "loss": 0.7686, - "step": 29516 - }, - { - "epoch": 0.8364362832610729, - "grad_norm": 0.0, - "learning_rate": 1.3706554412374762e-06, - "loss": 0.8358, - "step": 29517 - }, - { - "epoch": 0.8364646207033353, - "grad_norm": 0.0, - "learning_rate": 1.3701917041046486e-06, - "loss": 0.7649, - "step": 29518 - }, - { - "epoch": 0.8364929581455978, - "grad_norm": 0.0, - "learning_rate": 1.3697280396639035e-06, - "loss": 0.8711, - "step": 29519 - }, - { - "epoch": 0.8365212955878603, - "grad_norm": 0.0, - "learning_rate": 1.369264447919141e-06, - "loss": 0.9089, - "step": 29520 - }, - { - "epoch": 0.8365496330301228, - "grad_norm": 0.0, - "learning_rate": 1.3688009288742688e-06, - "loss": 0.7816, - "step": 29521 - }, - { - "epoch": 0.8365779704723851, - "grad_norm": 0.0, - "learning_rate": 1.368337482533194e-06, - "loss": 0.8154, - "step": 29522 - }, - { - "epoch": 0.8366063079146476, - "grad_norm": 0.0, - "learning_rate": 1.367874108899815e-06, - "loss": 0.8033, - "step": 29523 - }, - { - "epoch": 0.8366346453569101, - "grad_norm": 0.0, - "learning_rate": 1.3674108079780414e-06, - "loss": 0.7932, - "step": 29524 - }, - { - "epoch": 0.8366629827991725, - "grad_norm": 0.0, - "learning_rate": 1.366947579771769e-06, - "loss": 0.8269, - "step": 29525 - }, - { - "epoch": 0.836691320241435, - "grad_norm": 0.0, - "learning_rate": 1.366484424284903e-06, - "loss": 0.8682, - "step": 29526 - }, - { - "epoch": 0.8367196576836975, - "grad_norm": 0.0, - "learning_rate": 1.366021341521344e-06, - "loss": 0.7366, - "step": 29527 - }, - { - "epoch": 0.8367479951259599, - "grad_norm": 0.0, - "learning_rate": 1.3655583314849952e-06, - "loss": 0.7274, - "step": 29528 - }, - { - "epoch": 0.8367763325682224, - "grad_norm": 0.0, - "learning_rate": 1.365095394179754e-06, - "loss": 0.7977, - "step": 29529 - }, - { - "epoch": 0.8368046700104849, - "grad_norm": 0.0, - "learning_rate": 1.364632529609522e-06, - "loss": 0.8098, - "step": 29530 - }, - { - "epoch": 0.8368330074527474, - "grad_norm": 0.0, - "learning_rate": 1.3641697377781959e-06, - "loss": 0.774, - "step": 29531 - }, - { - "epoch": 0.8368613448950097, - "grad_norm": 0.0, - "learning_rate": 1.3637070186896773e-06, - "loss": 0.805, - "step": 29532 - }, - { - "epoch": 0.8368896823372722, - "grad_norm": 0.0, - "learning_rate": 1.3632443723478584e-06, - "loss": 0.8457, - "step": 29533 - }, - { - "epoch": 0.8369180197795347, - "grad_norm": 0.0, - "learning_rate": 1.3627817987566394e-06, - "loss": 0.8724, - "step": 29534 - }, - { - "epoch": 0.8369463572217971, - "grad_norm": 0.0, - "learning_rate": 1.3623192979199196e-06, - "loss": 0.7828, - "step": 29535 - }, - { - "epoch": 0.8369746946640596, - "grad_norm": 0.0, - "learning_rate": 1.361856869841589e-06, - "loss": 0.7601, - "step": 29536 - }, - { - "epoch": 0.8370030321063221, - "grad_norm": 0.0, - "learning_rate": 1.3613945145255458e-06, - "loss": 0.8475, - "step": 29537 - }, - { - "epoch": 0.8370313695485846, - "grad_norm": 0.0, - "learning_rate": 1.3609322319756868e-06, - "loss": 0.7317, - "step": 29538 - }, - { - "epoch": 0.837059706990847, - "grad_norm": 0.0, - "learning_rate": 1.3604700221959022e-06, - "loss": 0.8177, - "step": 29539 - }, - { - "epoch": 0.8370880444331095, - "grad_norm": 0.0, - "learning_rate": 1.3600078851900854e-06, - "loss": 0.9022, - "step": 29540 - }, - { - "epoch": 0.837116381875372, - "grad_norm": 0.0, - "learning_rate": 1.3595458209621314e-06, - "loss": 0.7782, - "step": 29541 - }, - { - "epoch": 0.8371447193176343, - "grad_norm": 0.0, - "learning_rate": 1.3590838295159315e-06, - "loss": 0.7754, - "step": 29542 - }, - { - "epoch": 0.8371730567598968, - "grad_norm": 0.0, - "learning_rate": 1.3586219108553799e-06, - "loss": 0.7941, - "step": 29543 - }, - { - "epoch": 0.8372013942021593, - "grad_norm": 0.0, - "learning_rate": 1.3581600649843617e-06, - "loss": 0.8321, - "step": 29544 - }, - { - "epoch": 0.8372297316444218, - "grad_norm": 0.0, - "learning_rate": 1.3576982919067727e-06, - "loss": 0.7216, - "step": 29545 - }, - { - "epoch": 0.8372580690866842, - "grad_norm": 0.0, - "learning_rate": 1.3572365916264984e-06, - "loss": 0.7889, - "step": 29546 - }, - { - "epoch": 0.8372864065289467, - "grad_norm": 0.0, - "learning_rate": 1.3567749641474294e-06, - "loss": 0.7599, - "step": 29547 - }, - { - "epoch": 0.8373147439712092, - "grad_norm": 0.0, - "learning_rate": 1.3563134094734566e-06, - "loss": 0.8001, - "step": 29548 - }, - { - "epoch": 0.8373430814134716, - "grad_norm": 0.0, - "learning_rate": 1.3558519276084636e-06, - "loss": 0.7742, - "step": 29549 - }, - { - "epoch": 0.8373714188557341, - "grad_norm": 0.0, - "learning_rate": 1.3553905185563398e-06, - "loss": 0.8362, - "step": 29550 - }, - { - "epoch": 0.8373997562979966, - "grad_norm": 0.0, - "learning_rate": 1.354929182320972e-06, - "loss": 0.8127, - "step": 29551 - }, - { - "epoch": 0.837428093740259, - "grad_norm": 0.0, - "learning_rate": 1.354467918906246e-06, - "loss": 0.8409, - "step": 29552 - }, - { - "epoch": 0.8374564311825214, - "grad_norm": 0.0, - "learning_rate": 1.3540067283160485e-06, - "loss": 0.6887, - "step": 29553 - }, - { - "epoch": 0.8374847686247839, - "grad_norm": 0.0, - "learning_rate": 1.353545610554261e-06, - "loss": 0.9284, - "step": 29554 - }, - { - "epoch": 0.8375131060670464, - "grad_norm": 0.0, - "learning_rate": 1.3530845656247705e-06, - "loss": 0.8128, - "step": 29555 - }, - { - "epoch": 0.8375414435093088, - "grad_norm": 0.0, - "learning_rate": 1.3526235935314614e-06, - "loss": 0.8599, - "step": 29556 - }, - { - "epoch": 0.8375697809515713, - "grad_norm": 0.0, - "learning_rate": 1.352162694278213e-06, - "loss": 0.777, - "step": 29557 - }, - { - "epoch": 0.8375981183938338, - "grad_norm": 0.0, - "learning_rate": 1.3517018678689086e-06, - "loss": 0.898, - "step": 29558 - }, - { - "epoch": 0.8376264558360962, - "grad_norm": 0.0, - "learning_rate": 1.3512411143074333e-06, - "loss": 0.7872, - "step": 29559 - }, - { - "epoch": 0.8376547932783587, - "grad_norm": 0.0, - "learning_rate": 1.3507804335976638e-06, - "loss": 0.8946, - "step": 29560 - }, - { - "epoch": 0.8376831307206212, - "grad_norm": 0.0, - "learning_rate": 1.3503198257434847e-06, - "loss": 0.7947, - "step": 29561 - }, - { - "epoch": 0.8377114681628837, - "grad_norm": 0.0, - "learning_rate": 1.3498592907487717e-06, - "loss": 0.751, - "step": 29562 - }, - { - "epoch": 0.837739805605146, - "grad_norm": 0.0, - "learning_rate": 1.3493988286174054e-06, - "loss": 0.7694, - "step": 29563 - }, - { - "epoch": 0.8377681430474085, - "grad_norm": 0.0, - "learning_rate": 1.3489384393532656e-06, - "loss": 0.7465, - "step": 29564 - }, - { - "epoch": 0.837796480489671, - "grad_norm": 0.0, - "learning_rate": 1.3484781229602295e-06, - "loss": 0.8208, - "step": 29565 - }, - { - "epoch": 0.8378248179319334, - "grad_norm": 0.0, - "learning_rate": 1.3480178794421773e-06, - "loss": 0.9112, - "step": 29566 - }, - { - "epoch": 0.8378531553741959, - "grad_norm": 0.0, - "learning_rate": 1.3475577088029812e-06, - "loss": 0.8387, - "step": 29567 - }, - { - "epoch": 0.8378814928164584, - "grad_norm": 0.0, - "learning_rate": 1.3470976110465196e-06, - "loss": 0.6472, - "step": 29568 - }, - { - "epoch": 0.8379098302587209, - "grad_norm": 0.0, - "learning_rate": 1.34663758617667e-06, - "loss": 0.7478, - "step": 29569 - }, - { - "epoch": 0.8379381677009833, - "grad_norm": 0.0, - "learning_rate": 1.346177634197303e-06, - "loss": 0.7971, - "step": 29570 - }, - { - "epoch": 0.8379665051432458, - "grad_norm": 0.0, - "learning_rate": 1.3457177551122958e-06, - "loss": 0.8736, - "step": 29571 - }, - { - "epoch": 0.8379948425855083, - "grad_norm": 0.0, - "learning_rate": 1.3452579489255235e-06, - "loss": 0.8584, - "step": 29572 - }, - { - "epoch": 0.8380231800277707, - "grad_norm": 0.0, - "learning_rate": 1.3447982156408556e-06, - "loss": 0.817, - "step": 29573 - }, - { - "epoch": 0.8380515174700331, - "grad_norm": 0.0, - "learning_rate": 1.344338555262168e-06, - "loss": 0.7616, - "step": 29574 - }, - { - "epoch": 0.8380798549122956, - "grad_norm": 0.0, - "learning_rate": 1.3438789677933283e-06, - "loss": 0.8565, - "step": 29575 - }, - { - "epoch": 0.838108192354558, - "grad_norm": 0.0, - "learning_rate": 1.3434194532382116e-06, - "loss": 0.8116, - "step": 29576 - }, - { - "epoch": 0.8381365297968205, - "grad_norm": 0.0, - "learning_rate": 1.342960011600687e-06, - "loss": 0.8107, - "step": 29577 - }, - { - "epoch": 0.838164867239083, - "grad_norm": 0.0, - "learning_rate": 1.3425006428846243e-06, - "loss": 0.7544, - "step": 29578 - }, - { - "epoch": 0.8381932046813455, - "grad_norm": 0.0, - "learning_rate": 1.3420413470938942e-06, - "loss": 0.8513, - "step": 29579 - }, - { - "epoch": 0.8382215421236079, - "grad_norm": 0.0, - "learning_rate": 1.3415821242323667e-06, - "loss": 0.8511, - "step": 29580 - }, - { - "epoch": 0.8382498795658704, - "grad_norm": 0.0, - "learning_rate": 1.3411229743039055e-06, - "loss": 0.7938, - "step": 29581 - }, - { - "epoch": 0.8382782170081329, - "grad_norm": 0.0, - "learning_rate": 1.3406638973123842e-06, - "loss": 0.844, - "step": 29582 - }, - { - "epoch": 0.8383065544503953, - "grad_norm": 0.0, - "learning_rate": 1.3402048932616641e-06, - "loss": 0.8172, - "step": 29583 - }, - { - "epoch": 0.8383348918926578, - "grad_norm": 0.0, - "learning_rate": 1.339745962155613e-06, - "loss": 0.8924, - "step": 29584 - }, - { - "epoch": 0.8383632293349202, - "grad_norm": 0.0, - "learning_rate": 1.3392871039981004e-06, - "loss": 0.8009, - "step": 29585 - }, - { - "epoch": 0.8383915667771827, - "grad_norm": 0.0, - "learning_rate": 1.3388283187929874e-06, - "loss": 0.8813, - "step": 29586 - }, - { - "epoch": 0.8384199042194451, - "grad_norm": 0.0, - "learning_rate": 1.3383696065441376e-06, - "loss": 0.8349, - "step": 29587 - }, - { - "epoch": 0.8384482416617076, - "grad_norm": 0.0, - "learning_rate": 1.3379109672554213e-06, - "loss": 0.7775, - "step": 29588 - }, - { - "epoch": 0.8384765791039701, - "grad_norm": 0.0, - "learning_rate": 1.3374524009306944e-06, - "loss": 0.8109, - "step": 29589 - }, - { - "epoch": 0.8385049165462325, - "grad_norm": 0.0, - "learning_rate": 1.3369939075738226e-06, - "loss": 0.8611, - "step": 29590 - }, - { - "epoch": 0.838533253988495, - "grad_norm": 0.0, - "learning_rate": 1.3365354871886672e-06, - "loss": 0.8345, - "step": 29591 - }, - { - "epoch": 0.8385615914307575, - "grad_norm": 0.0, - "learning_rate": 1.3360771397790918e-06, - "loss": 0.7632, - "step": 29592 - }, - { - "epoch": 0.83858992887302, - "grad_norm": 0.0, - "learning_rate": 1.3356188653489578e-06, - "loss": 0.8641, - "step": 29593 - }, - { - "epoch": 0.8386182663152824, - "grad_norm": 0.0, - "learning_rate": 1.3351606639021209e-06, - "loss": 0.8512, - "step": 29594 - }, - { - "epoch": 0.8386466037575449, - "grad_norm": 0.0, - "learning_rate": 1.3347025354424459e-06, - "loss": 0.7646, - "step": 29595 - }, - { - "epoch": 0.8386749411998073, - "grad_norm": 0.0, - "learning_rate": 1.3342444799737876e-06, - "loss": 0.7321, - "step": 29596 - }, - { - "epoch": 0.8387032786420697, - "grad_norm": 0.0, - "learning_rate": 1.3337864975000047e-06, - "loss": 0.788, - "step": 29597 - }, - { - "epoch": 0.8387316160843322, - "grad_norm": 0.0, - "learning_rate": 1.333328588024959e-06, - "loss": 0.8132, - "step": 29598 - }, - { - "epoch": 0.8387599535265947, - "grad_norm": 0.0, - "learning_rate": 1.332870751552503e-06, - "loss": 0.8197, - "step": 29599 - }, - { - "epoch": 0.8387882909688571, - "grad_norm": 0.0, - "learning_rate": 1.3324129880864954e-06, - "loss": 0.7013, - "step": 29600 - }, - { - "epoch": 0.8388166284111196, - "grad_norm": 0.0, - "learning_rate": 1.3319552976307938e-06, - "loss": 0.7254, - "step": 29601 - }, - { - "epoch": 0.8388449658533821, - "grad_norm": 0.0, - "learning_rate": 1.3314976801892487e-06, - "loss": 0.8366, - "step": 29602 - }, - { - "epoch": 0.8388733032956446, - "grad_norm": 0.0, - "learning_rate": 1.3310401357657176e-06, - "loss": 0.8129, - "step": 29603 - }, - { - "epoch": 0.838901640737907, - "grad_norm": 0.0, - "learning_rate": 1.3305826643640552e-06, - "loss": 0.7324, - "step": 29604 - }, - { - "epoch": 0.8389299781801695, - "grad_norm": 0.0, - "learning_rate": 1.3301252659881148e-06, - "loss": 0.8308, - "step": 29605 - }, - { - "epoch": 0.838958315622432, - "grad_norm": 0.0, - "learning_rate": 1.3296679406417502e-06, - "loss": 0.7472, - "step": 29606 - }, - { - "epoch": 0.8389866530646943, - "grad_norm": 0.0, - "learning_rate": 1.3292106883288092e-06, - "loss": 0.7225, - "step": 29607 - }, - { - "epoch": 0.8390149905069568, - "grad_norm": 0.0, - "learning_rate": 1.3287535090531478e-06, - "loss": 0.8215, - "step": 29608 - }, - { - "epoch": 0.8390433279492193, - "grad_norm": 0.0, - "learning_rate": 1.3282964028186175e-06, - "loss": 0.8414, - "step": 29609 - }, - { - "epoch": 0.8390716653914818, - "grad_norm": 0.0, - "learning_rate": 1.3278393696290636e-06, - "loss": 0.7817, - "step": 29610 - }, - { - "epoch": 0.8391000028337442, - "grad_norm": 0.0, - "learning_rate": 1.3273824094883425e-06, - "loss": 0.7706, - "step": 29611 - }, - { - "epoch": 0.8391283402760067, - "grad_norm": 0.0, - "learning_rate": 1.3269255224002963e-06, - "loss": 0.6686, - "step": 29612 - }, - { - "epoch": 0.8391566777182692, - "grad_norm": 0.0, - "learning_rate": 1.3264687083687788e-06, - "loss": 0.7631, - "step": 29613 - }, - { - "epoch": 0.8391850151605316, - "grad_norm": 0.0, - "learning_rate": 1.3260119673976369e-06, - "loss": 0.8482, - "step": 29614 - }, - { - "epoch": 0.8392133526027941, - "grad_norm": 0.0, - "learning_rate": 1.325555299490716e-06, - "loss": 0.6667, - "step": 29615 - }, - { - "epoch": 0.8392416900450566, - "grad_norm": 0.0, - "learning_rate": 1.3250987046518638e-06, - "loss": 0.8007, - "step": 29616 - }, - { - "epoch": 0.839270027487319, - "grad_norm": 0.0, - "learning_rate": 1.3246421828849255e-06, - "loss": 0.7828, - "step": 29617 - }, - { - "epoch": 0.8392983649295814, - "grad_norm": 0.0, - "learning_rate": 1.3241857341937491e-06, - "loss": 0.8111, - "step": 29618 - }, - { - "epoch": 0.8393267023718439, - "grad_norm": 0.0, - "learning_rate": 1.3237293585821786e-06, - "loss": 0.7924, - "step": 29619 - }, - { - "epoch": 0.8393550398141064, - "grad_norm": 0.0, - "learning_rate": 1.3232730560540564e-06, - "loss": 0.8059, - "step": 29620 - }, - { - "epoch": 0.8393833772563688, - "grad_norm": 0.0, - "learning_rate": 1.322816826613228e-06, - "loss": 0.8033, - "step": 29621 - }, - { - "epoch": 0.8394117146986313, - "grad_norm": 0.0, - "learning_rate": 1.3223606702635362e-06, - "loss": 0.878, - "step": 29622 - }, - { - "epoch": 0.8394400521408938, - "grad_norm": 0.0, - "learning_rate": 1.321904587008822e-06, - "loss": 0.7423, - "step": 29623 - }, - { - "epoch": 0.8394683895831562, - "grad_norm": 0.0, - "learning_rate": 1.3214485768529296e-06, - "loss": 0.8175, - "step": 29624 - }, - { - "epoch": 0.8394967270254187, - "grad_norm": 0.0, - "learning_rate": 1.3209926397996963e-06, - "loss": 0.7769, - "step": 29625 - }, - { - "epoch": 0.8395250644676812, - "grad_norm": 0.0, - "learning_rate": 1.320536775852964e-06, - "loss": 0.9094, - "step": 29626 - }, - { - "epoch": 0.8395534019099437, - "grad_norm": 0.0, - "learning_rate": 1.3200809850165775e-06, - "loss": 0.7786, - "step": 29627 - }, - { - "epoch": 0.839581739352206, - "grad_norm": 0.0, - "learning_rate": 1.3196252672943688e-06, - "loss": 0.7927, - "step": 29628 - }, - { - "epoch": 0.8396100767944685, - "grad_norm": 0.0, - "learning_rate": 1.3191696226901795e-06, - "loss": 0.7952, - "step": 29629 - }, - { - "epoch": 0.839638414236731, - "grad_norm": 0.0, - "learning_rate": 1.318714051207849e-06, - "loss": 0.862, - "step": 29630 - }, - { - "epoch": 0.8396667516789934, - "grad_norm": 0.0, - "learning_rate": 1.3182585528512126e-06, - "loss": 0.7749, - "step": 29631 - }, - { - "epoch": 0.8396950891212559, - "grad_norm": 0.0, - "learning_rate": 1.3178031276241122e-06, - "loss": 0.8727, - "step": 29632 - }, - { - "epoch": 0.8397234265635184, - "grad_norm": 0.0, - "learning_rate": 1.3173477755303765e-06, - "loss": 0.8189, - "step": 29633 - }, - { - "epoch": 0.8397517640057809, - "grad_norm": 0.0, - "learning_rate": 1.316892496573845e-06, - "loss": 0.7247, - "step": 29634 - }, - { - "epoch": 0.8397801014480433, - "grad_norm": 0.0, - "learning_rate": 1.3164372907583545e-06, - "loss": 0.7868, - "step": 29635 - }, - { - "epoch": 0.8398084388903058, - "grad_norm": 0.0, - "learning_rate": 1.3159821580877353e-06, - "loss": 0.8867, - "step": 29636 - }, - { - "epoch": 0.8398367763325683, - "grad_norm": 0.0, - "learning_rate": 1.315527098565823e-06, - "loss": 0.8831, - "step": 29637 - }, - { - "epoch": 0.8398651137748306, - "grad_norm": 0.0, - "learning_rate": 1.3150721121964538e-06, - "loss": 0.8476, - "step": 29638 - }, - { - "epoch": 0.8398934512170931, - "grad_norm": 0.0, - "learning_rate": 1.3146171989834544e-06, - "loss": 0.7886, - "step": 29639 - }, - { - "epoch": 0.8399217886593556, - "grad_norm": 0.0, - "learning_rate": 1.3141623589306619e-06, - "loss": 0.7295, - "step": 29640 - }, - { - "epoch": 0.8399501261016181, - "grad_norm": 0.0, - "learning_rate": 1.3137075920419017e-06, - "loss": 0.9214, - "step": 29641 - }, - { - "epoch": 0.8399784635438805, - "grad_norm": 0.0, - "learning_rate": 1.3132528983210103e-06, - "loss": 0.95, - "step": 29642 - }, - { - "epoch": 0.840006800986143, - "grad_norm": 0.0, - "learning_rate": 1.312798277771814e-06, - "loss": 0.8737, - "step": 29643 - }, - { - "epoch": 0.8400351384284055, - "grad_norm": 0.0, - "learning_rate": 1.3123437303981446e-06, - "loss": 0.7684, - "step": 29644 - }, - { - "epoch": 0.8400634758706679, - "grad_norm": 0.0, - "learning_rate": 1.3118892562038288e-06, - "loss": 0.7925, - "step": 29645 - }, - { - "epoch": 0.8400918133129304, - "grad_norm": 0.0, - "learning_rate": 1.3114348551926991e-06, - "loss": 0.7913, - "step": 29646 - }, - { - "epoch": 0.8401201507551929, - "grad_norm": 0.0, - "learning_rate": 1.3109805273685783e-06, - "loss": 0.8591, - "step": 29647 - }, - { - "epoch": 0.8401484881974552, - "grad_norm": 0.0, - "learning_rate": 1.3105262727352964e-06, - "loss": 0.8033, - "step": 29648 - }, - { - "epoch": 0.8401768256397177, - "grad_norm": 0.0, - "learning_rate": 1.310072091296677e-06, - "loss": 0.7766, - "step": 29649 - }, - { - "epoch": 0.8402051630819802, - "grad_norm": 0.0, - "learning_rate": 1.309617983056547e-06, - "loss": 0.8144, - "step": 29650 - }, - { - "epoch": 0.8402335005242427, - "grad_norm": 0.0, - "learning_rate": 1.3091639480187334e-06, - "loss": 0.9367, - "step": 29651 - }, - { - "epoch": 0.8402618379665051, - "grad_norm": 0.0, - "learning_rate": 1.3087099861870578e-06, - "loss": 0.7022, - "step": 29652 - }, - { - "epoch": 0.8402901754087676, - "grad_norm": 0.0, - "learning_rate": 1.308256097565347e-06, - "loss": 0.8697, - "step": 29653 - }, - { - "epoch": 0.8403185128510301, - "grad_norm": 0.0, - "learning_rate": 1.307802282157421e-06, - "loss": 0.8225, - "step": 29654 - }, - { - "epoch": 0.8403468502932925, - "grad_norm": 0.0, - "learning_rate": 1.3073485399671038e-06, - "loss": 0.9019, - "step": 29655 - }, - { - "epoch": 0.840375187735555, - "grad_norm": 0.0, - "learning_rate": 1.3068948709982177e-06, - "loss": 0.7809, - "step": 29656 - }, - { - "epoch": 0.8404035251778175, - "grad_norm": 0.0, - "learning_rate": 1.3064412752545853e-06, - "loss": 0.8148, - "step": 29657 - }, - { - "epoch": 0.84043186262008, - "grad_norm": 0.0, - "learning_rate": 1.3059877527400244e-06, - "loss": 0.7771, - "step": 29658 - }, - { - "epoch": 0.8404602000623423, - "grad_norm": 0.0, - "learning_rate": 1.3055343034583611e-06, - "loss": 0.7962, - "step": 29659 - }, - { - "epoch": 0.8404885375046048, - "grad_norm": 0.0, - "learning_rate": 1.305080927413408e-06, - "loss": 0.7949, - "step": 29660 - }, - { - "epoch": 0.8405168749468673, - "grad_norm": 0.0, - "learning_rate": 1.3046276246089896e-06, - "loss": 0.9243, - "step": 29661 - }, - { - "epoch": 0.8405452123891297, - "grad_norm": 0.0, - "learning_rate": 1.3041743950489184e-06, - "loss": 0.7758, - "step": 29662 - }, - { - "epoch": 0.8405735498313922, - "grad_norm": 0.0, - "learning_rate": 1.3037212387370157e-06, - "loss": 0.7922, - "step": 29663 - }, - { - "epoch": 0.8406018872736547, - "grad_norm": 0.0, - "learning_rate": 1.303268155677101e-06, - "loss": 0.8699, - "step": 29664 - }, - { - "epoch": 0.8406302247159172, - "grad_norm": 0.0, - "learning_rate": 1.3028151458729865e-06, - "loss": 0.7324, - "step": 29665 - }, - { - "epoch": 0.8406585621581796, - "grad_norm": 0.0, - "learning_rate": 1.3023622093284883e-06, - "loss": 0.8776, - "step": 29666 - }, - { - "epoch": 0.8406868996004421, - "grad_norm": 0.0, - "learning_rate": 1.3019093460474264e-06, - "loss": 0.7644, - "step": 29667 - }, - { - "epoch": 0.8407152370427046, - "grad_norm": 0.0, - "learning_rate": 1.301456556033609e-06, - "loss": 0.7827, - "step": 29668 - }, - { - "epoch": 0.840743574484967, - "grad_norm": 0.0, - "learning_rate": 1.301003839290853e-06, - "loss": 0.8344, - "step": 29669 - }, - { - "epoch": 0.8407719119272294, - "grad_norm": 0.0, - "learning_rate": 1.300551195822972e-06, - "loss": 0.8494, - "step": 29670 - }, - { - "epoch": 0.8408002493694919, - "grad_norm": 0.0, - "learning_rate": 1.30009862563378e-06, - "loss": 0.8693, - "step": 29671 - }, - { - "epoch": 0.8408285868117543, - "grad_norm": 0.0, - "learning_rate": 1.299646128727089e-06, - "loss": 0.8352, - "step": 29672 - }, - { - "epoch": 0.8408569242540168, - "grad_norm": 0.0, - "learning_rate": 1.2991937051067072e-06, - "loss": 0.8333, - "step": 29673 - }, - { - "epoch": 0.8408852616962793, - "grad_norm": 0.0, - "learning_rate": 1.2987413547764482e-06, - "loss": 0.8694, - "step": 29674 - }, - { - "epoch": 0.8409135991385418, - "grad_norm": 0.0, - "learning_rate": 1.2982890777401236e-06, - "loss": 0.9512, - "step": 29675 - }, - { - "epoch": 0.8409419365808042, - "grad_norm": 0.0, - "learning_rate": 1.2978368740015401e-06, - "loss": 0.7463, - "step": 29676 - }, - { - "epoch": 0.8409702740230667, - "grad_norm": 0.0, - "learning_rate": 1.2973847435645092e-06, - "loss": 0.7831, - "step": 29677 - }, - { - "epoch": 0.8409986114653292, - "grad_norm": 0.0, - "learning_rate": 1.2969326864328368e-06, - "loss": 0.7881, - "step": 29678 - }, - { - "epoch": 0.8410269489075916, - "grad_norm": 0.0, - "learning_rate": 1.296480702610332e-06, - "loss": 0.7714, - "step": 29679 - }, - { - "epoch": 0.841055286349854, - "grad_norm": 0.0, - "learning_rate": 1.2960287921008041e-06, - "loss": 0.7257, - "step": 29680 - }, - { - "epoch": 0.8410836237921165, - "grad_norm": 0.0, - "learning_rate": 1.2955769549080566e-06, - "loss": 0.8504, - "step": 29681 - }, - { - "epoch": 0.841111961234379, - "grad_norm": 0.0, - "learning_rate": 1.2951251910358952e-06, - "loss": 0.8273, - "step": 29682 - }, - { - "epoch": 0.8411402986766414, - "grad_norm": 0.0, - "learning_rate": 1.2946735004881284e-06, - "loss": 0.8642, - "step": 29683 - }, - { - "epoch": 0.8411686361189039, - "grad_norm": 0.0, - "learning_rate": 1.2942218832685583e-06, - "loss": 0.8523, - "step": 29684 - }, - { - "epoch": 0.8411969735611664, - "grad_norm": 0.0, - "learning_rate": 1.293770339380992e-06, - "loss": 0.6905, - "step": 29685 - }, - { - "epoch": 0.8412253110034288, - "grad_norm": 0.0, - "learning_rate": 1.2933188688292298e-06, - "loss": 0.8825, - "step": 29686 - }, - { - "epoch": 0.8412536484456913, - "grad_norm": 0.0, - "learning_rate": 1.2928674716170754e-06, - "loss": 0.7824, - "step": 29687 - }, - { - "epoch": 0.8412819858879538, - "grad_norm": 0.0, - "learning_rate": 1.2924161477483343e-06, - "loss": 0.7234, - "step": 29688 - }, - { - "epoch": 0.8413103233302163, - "grad_norm": 0.0, - "learning_rate": 1.291964897226803e-06, - "loss": 0.7368, - "step": 29689 - }, - { - "epoch": 0.8413386607724787, - "grad_norm": 0.0, - "learning_rate": 1.2915137200562867e-06, - "loss": 0.8202, - "step": 29690 - }, - { - "epoch": 0.8413669982147411, - "grad_norm": 0.0, - "learning_rate": 1.2910626162405827e-06, - "loss": 0.7564, - "step": 29691 - }, - { - "epoch": 0.8413953356570036, - "grad_norm": 0.0, - "learning_rate": 1.290611585783491e-06, - "loss": 0.7218, - "step": 29692 - }, - { - "epoch": 0.841423673099266, - "grad_norm": 0.0, - "learning_rate": 1.2901606286888157e-06, - "loss": 0.8102, - "step": 29693 - }, - { - "epoch": 0.8414520105415285, - "grad_norm": 0.0, - "learning_rate": 1.2897097449603491e-06, - "loss": 0.8495, - "step": 29694 - }, - { - "epoch": 0.841480347983791, - "grad_norm": 0.0, - "learning_rate": 1.2892589346018914e-06, - "loss": 0.8096, - "step": 29695 - }, - { - "epoch": 0.8415086854260534, - "grad_norm": 0.0, - "learning_rate": 1.2888081976172418e-06, - "loss": 0.908, - "step": 29696 - }, - { - "epoch": 0.8415370228683159, - "grad_norm": 0.0, - "learning_rate": 1.288357534010194e-06, - "loss": 0.8659, - "step": 29697 - }, - { - "epoch": 0.8415653603105784, - "grad_norm": 0.0, - "learning_rate": 1.2879069437845483e-06, - "loss": 0.877, - "step": 29698 - }, - { - "epoch": 0.8415936977528409, - "grad_norm": 0.0, - "learning_rate": 1.2874564269440958e-06, - "loss": 0.9081, - "step": 29699 - }, - { - "epoch": 0.8416220351951033, - "grad_norm": 0.0, - "learning_rate": 1.287005983492633e-06, - "loss": 0.8411, - "step": 29700 - }, - { - "epoch": 0.8416503726373658, - "grad_norm": 0.0, - "learning_rate": 1.2865556134339552e-06, - "loss": 0.7938, - "step": 29701 - }, - { - "epoch": 0.8416787100796282, - "grad_norm": 0.0, - "learning_rate": 1.286105316771854e-06, - "loss": 0.8667, - "step": 29702 - }, - { - "epoch": 0.8417070475218906, - "grad_norm": 0.0, - "learning_rate": 1.2856550935101242e-06, - "loss": 0.7466, - "step": 29703 - }, - { - "epoch": 0.8417353849641531, - "grad_norm": 0.0, - "learning_rate": 1.2852049436525593e-06, - "loss": 0.8264, - "step": 29704 - }, - { - "epoch": 0.8417637224064156, - "grad_norm": 0.0, - "learning_rate": 1.2847548672029464e-06, - "loss": 0.869, - "step": 29705 - }, - { - "epoch": 0.8417920598486781, - "grad_norm": 0.0, - "learning_rate": 1.2843048641650825e-06, - "loss": 0.913, - "step": 29706 - }, - { - "epoch": 0.8418203972909405, - "grad_norm": 0.0, - "learning_rate": 1.2838549345427532e-06, - "loss": 0.7893, - "step": 29707 - }, - { - "epoch": 0.841848734733203, - "grad_norm": 0.0, - "learning_rate": 1.28340507833975e-06, - "loss": 0.6951, - "step": 29708 - }, - { - "epoch": 0.8418770721754655, - "grad_norm": 0.0, - "learning_rate": 1.2829552955598623e-06, - "loss": 0.788, - "step": 29709 - }, - { - "epoch": 0.8419054096177279, - "grad_norm": 0.0, - "learning_rate": 1.282505586206879e-06, - "loss": 0.903, - "step": 29710 - }, - { - "epoch": 0.8419337470599904, - "grad_norm": 0.0, - "learning_rate": 1.282055950284592e-06, - "loss": 0.8021, - "step": 29711 - }, - { - "epoch": 0.8419620845022528, - "grad_norm": 0.0, - "learning_rate": 1.281606387796781e-06, - "loss": 0.768, - "step": 29712 - }, - { - "epoch": 0.8419904219445152, - "grad_norm": 0.0, - "learning_rate": 1.281156898747238e-06, - "loss": 0.8383, - "step": 29713 - }, - { - "epoch": 0.8420187593867777, - "grad_norm": 0.0, - "learning_rate": 1.2807074831397503e-06, - "loss": 0.8257, - "step": 29714 - }, - { - "epoch": 0.8420470968290402, - "grad_norm": 0.0, - "learning_rate": 1.280258140978099e-06, - "loss": 0.8948, - "step": 29715 - }, - { - "epoch": 0.8420754342713027, - "grad_norm": 0.0, - "learning_rate": 1.2798088722660717e-06, - "loss": 0.6973, - "step": 29716 - }, - { - "epoch": 0.8421037717135651, - "grad_norm": 0.0, - "learning_rate": 1.2793596770074534e-06, - "loss": 0.7997, - "step": 29717 - }, - { - "epoch": 0.8421321091558276, - "grad_norm": 0.0, - "learning_rate": 1.2789105552060255e-06, - "loss": 0.7563, - "step": 29718 - }, - { - "epoch": 0.8421604465980901, - "grad_norm": 0.0, - "learning_rate": 1.2784615068655747e-06, - "loss": 0.7537, - "step": 29719 - }, - { - "epoch": 0.8421887840403525, - "grad_norm": 0.0, - "learning_rate": 1.2780125319898795e-06, - "loss": 0.7618, - "step": 29720 - }, - { - "epoch": 0.842217121482615, - "grad_norm": 0.0, - "learning_rate": 1.2775636305827234e-06, - "loss": 0.8307, - "step": 29721 - }, - { - "epoch": 0.8422454589248775, - "grad_norm": 0.0, - "learning_rate": 1.2771148026478874e-06, - "loss": 0.7905, - "step": 29722 - }, - { - "epoch": 0.84227379636714, - "grad_norm": 0.0, - "learning_rate": 1.2766660481891525e-06, - "loss": 0.7005, - "step": 29723 - }, - { - "epoch": 0.8423021338094023, - "grad_norm": 0.0, - "learning_rate": 1.2762173672102996e-06, - "loss": 0.8754, - "step": 29724 - }, - { - "epoch": 0.8423304712516648, - "grad_norm": 0.0, - "learning_rate": 1.275768759715108e-06, - "loss": 0.7585, - "step": 29725 - }, - { - "epoch": 0.8423588086939273, - "grad_norm": 0.0, - "learning_rate": 1.275320225707355e-06, - "loss": 0.6942, - "step": 29726 - }, - { - "epoch": 0.8423871461361897, - "grad_norm": 0.0, - "learning_rate": 1.2748717651908215e-06, - "loss": 0.9536, - "step": 29727 - }, - { - "epoch": 0.8424154835784522, - "grad_norm": 0.0, - "learning_rate": 1.2744233781692805e-06, - "loss": 0.7257, - "step": 29728 - }, - { - "epoch": 0.8424438210207147, - "grad_norm": 0.0, - "learning_rate": 1.273975064646512e-06, - "loss": 0.9219, - "step": 29729 - }, - { - "epoch": 0.8424721584629772, - "grad_norm": 0.0, - "learning_rate": 1.2735268246262943e-06, - "loss": 0.8456, - "step": 29730 - }, - { - "epoch": 0.8425004959052396, - "grad_norm": 0.0, - "learning_rate": 1.2730786581123977e-06, - "loss": 0.7633, - "step": 29731 - }, - { - "epoch": 0.8425288333475021, - "grad_norm": 0.0, - "learning_rate": 1.2726305651086001e-06, - "loss": 0.8238, - "step": 29732 - }, - { - "epoch": 0.8425571707897646, - "grad_norm": 0.0, - "learning_rate": 1.2721825456186788e-06, - "loss": 0.8515, - "step": 29733 - }, - { - "epoch": 0.8425855082320269, - "grad_norm": 0.0, - "learning_rate": 1.2717345996464025e-06, - "loss": 0.8032, - "step": 29734 - }, - { - "epoch": 0.8426138456742894, - "grad_norm": 0.0, - "learning_rate": 1.2712867271955475e-06, - "loss": 0.8241, - "step": 29735 - }, - { - "epoch": 0.8426421831165519, - "grad_norm": 0.0, - "learning_rate": 1.2708389282698852e-06, - "loss": 0.7683, - "step": 29736 - }, - { - "epoch": 0.8426705205588143, - "grad_norm": 0.0, - "learning_rate": 1.270391202873188e-06, - "loss": 0.787, - "step": 29737 - }, - { - "epoch": 0.8426988580010768, - "grad_norm": 0.0, - "learning_rate": 1.2699435510092295e-06, - "loss": 0.788, - "step": 29738 - }, - { - "epoch": 0.8427271954433393, - "grad_norm": 0.0, - "learning_rate": 1.269495972681777e-06, - "loss": 0.6998, - "step": 29739 - }, - { - "epoch": 0.8427555328856018, - "grad_norm": 0.0, - "learning_rate": 1.2690484678946025e-06, - "loss": 0.8788, - "step": 29740 - }, - { - "epoch": 0.8427838703278642, - "grad_norm": 0.0, - "learning_rate": 1.2686010366514744e-06, - "loss": 0.6861, - "step": 29741 - }, - { - "epoch": 0.8428122077701267, - "grad_norm": 0.0, - "learning_rate": 1.268153678956161e-06, - "loss": 0.6615, - "step": 29742 - }, - { - "epoch": 0.8428405452123892, - "grad_norm": 0.0, - "learning_rate": 1.2677063948124336e-06, - "loss": 0.857, - "step": 29743 - }, - { - "epoch": 0.8428688826546515, - "grad_norm": 0.0, - "learning_rate": 1.2672591842240566e-06, - "loss": 0.694, - "step": 29744 - }, - { - "epoch": 0.842897220096914, - "grad_norm": 0.0, - "learning_rate": 1.2668120471947975e-06, - "loss": 0.6923, - "step": 29745 - }, - { - "epoch": 0.8429255575391765, - "grad_norm": 0.0, - "learning_rate": 1.2663649837284265e-06, - "loss": 0.9003, - "step": 29746 - }, - { - "epoch": 0.842953894981439, - "grad_norm": 0.0, - "learning_rate": 1.2659179938287035e-06, - "loss": 0.7547, - "step": 29747 - }, - { - "epoch": 0.8429822324237014, - "grad_norm": 0.0, - "learning_rate": 1.2654710774993973e-06, - "loss": 0.7837, - "step": 29748 - }, - { - "epoch": 0.8430105698659639, - "grad_norm": 0.0, - "learning_rate": 1.265024234744271e-06, - "loss": 0.8609, - "step": 29749 - }, - { - "epoch": 0.8430389073082264, - "grad_norm": 0.0, - "learning_rate": 1.2645774655670894e-06, - "loss": 0.7796, - "step": 29750 - }, - { - "epoch": 0.8430672447504888, - "grad_norm": 0.0, - "learning_rate": 1.2641307699716188e-06, - "loss": 0.8123, - "step": 29751 - }, - { - "epoch": 0.8430955821927513, - "grad_norm": 0.0, - "learning_rate": 1.2636841479616158e-06, - "loss": 0.8691, - "step": 29752 - }, - { - "epoch": 0.8431239196350138, - "grad_norm": 0.0, - "learning_rate": 1.2632375995408453e-06, - "loss": 0.9242, - "step": 29753 - }, - { - "epoch": 0.8431522570772763, - "grad_norm": 0.0, - "learning_rate": 1.2627911247130709e-06, - "loss": 0.8158, - "step": 29754 - }, - { - "epoch": 0.8431805945195386, - "grad_norm": 0.0, - "learning_rate": 1.2623447234820485e-06, - "loss": 0.6794, - "step": 29755 - }, - { - "epoch": 0.8432089319618011, - "grad_norm": 0.0, - "learning_rate": 1.261898395851544e-06, - "loss": 0.8102, - "step": 29756 - }, - { - "epoch": 0.8432372694040636, - "grad_norm": 0.0, - "learning_rate": 1.2614521418253123e-06, - "loss": 0.7619, - "step": 29757 - }, - { - "epoch": 0.843265606846326, - "grad_norm": 0.0, - "learning_rate": 1.2610059614071135e-06, - "loss": 0.7307, - "step": 29758 - }, - { - "epoch": 0.8432939442885885, - "grad_norm": 0.0, - "learning_rate": 1.260559854600709e-06, - "loss": 0.8516, - "step": 29759 - }, - { - "epoch": 0.843322281730851, - "grad_norm": 0.0, - "learning_rate": 1.2601138214098528e-06, - "loss": 0.8608, - "step": 29760 - }, - { - "epoch": 0.8433506191731134, - "grad_norm": 0.0, - "learning_rate": 1.259667861838303e-06, - "loss": 0.913, - "step": 29761 - }, - { - "epoch": 0.8433789566153759, - "grad_norm": 0.0, - "learning_rate": 1.259221975889816e-06, - "loss": 0.758, - "step": 29762 - }, - { - "epoch": 0.8434072940576384, - "grad_norm": 0.0, - "learning_rate": 1.2587761635681484e-06, - "loss": 0.8022, - "step": 29763 - }, - { - "epoch": 0.8434356314999009, - "grad_norm": 0.0, - "learning_rate": 1.2583304248770568e-06, - "loss": 0.8128, - "step": 29764 - }, - { - "epoch": 0.8434639689421632, - "grad_norm": 0.0, - "learning_rate": 1.2578847598202925e-06, - "loss": 0.764, - "step": 29765 - }, - { - "epoch": 0.8434923063844257, - "grad_norm": 0.0, - "learning_rate": 1.2574391684016107e-06, - "loss": 0.8097, - "step": 29766 - }, - { - "epoch": 0.8435206438266882, - "grad_norm": 0.0, - "learning_rate": 1.2569936506247682e-06, - "loss": 0.8784, - "step": 29767 - }, - { - "epoch": 0.8435489812689506, - "grad_norm": 0.0, - "learning_rate": 1.256548206493512e-06, - "loss": 0.8247, - "step": 29768 - }, - { - "epoch": 0.8435773187112131, - "grad_norm": 0.0, - "learning_rate": 1.2561028360116002e-06, - "loss": 0.639, - "step": 29769 - }, - { - "epoch": 0.8436056561534756, - "grad_norm": 0.0, - "learning_rate": 1.2556575391827774e-06, - "loss": 0.8951, - "step": 29770 - }, - { - "epoch": 0.8436339935957381, - "grad_norm": 0.0, - "learning_rate": 1.2552123160108e-06, - "loss": 0.8059, - "step": 29771 - }, - { - "epoch": 0.8436623310380005, - "grad_norm": 0.0, - "learning_rate": 1.2547671664994154e-06, - "loss": 0.7021, - "step": 29772 - }, - { - "epoch": 0.843690668480263, - "grad_norm": 0.0, - "learning_rate": 1.2543220906523745e-06, - "loss": 0.6827, - "step": 29773 - }, - { - "epoch": 0.8437190059225255, - "grad_norm": 0.0, - "learning_rate": 1.2538770884734286e-06, - "loss": 0.7177, - "step": 29774 - }, - { - "epoch": 0.8437473433647878, - "grad_norm": 0.0, - "learning_rate": 1.2534321599663213e-06, - "loss": 0.7451, - "step": 29775 - }, - { - "epoch": 0.8437756808070503, - "grad_norm": 0.0, - "learning_rate": 1.2529873051348041e-06, - "loss": 0.8187, - "step": 29776 - }, - { - "epoch": 0.8438040182493128, - "grad_norm": 0.0, - "learning_rate": 1.252542523982624e-06, - "loss": 0.8032, - "step": 29777 - }, - { - "epoch": 0.8438323556915753, - "grad_norm": 0.0, - "learning_rate": 1.2520978165135245e-06, - "loss": 0.806, - "step": 29778 - }, - { - "epoch": 0.8438606931338377, - "grad_norm": 0.0, - "learning_rate": 1.251653182731254e-06, - "loss": 0.7664, - "step": 29779 - }, - { - "epoch": 0.8438890305761002, - "grad_norm": 0.0, - "learning_rate": 1.2512086226395591e-06, - "loss": 0.7789, - "step": 29780 - }, - { - "epoch": 0.8439173680183627, - "grad_norm": 0.0, - "learning_rate": 1.2507641362421808e-06, - "loss": 0.8614, - "step": 29781 - }, - { - "epoch": 0.8439457054606251, - "grad_norm": 0.0, - "learning_rate": 1.2503197235428643e-06, - "loss": 0.8063, - "step": 29782 - }, - { - "epoch": 0.8439740429028876, - "grad_norm": 0.0, - "learning_rate": 1.249875384545357e-06, - "loss": 0.7572, - "step": 29783 - }, - { - "epoch": 0.8440023803451501, - "grad_norm": 0.0, - "learning_rate": 1.2494311192533958e-06, - "loss": 0.806, - "step": 29784 - }, - { - "epoch": 0.8440307177874125, - "grad_norm": 0.0, - "learning_rate": 1.2489869276707268e-06, - "loss": 0.8256, - "step": 29785 - }, - { - "epoch": 0.844059055229675, - "grad_norm": 0.0, - "learning_rate": 1.2485428098010898e-06, - "loss": 0.7853, - "step": 29786 - }, - { - "epoch": 0.8440873926719374, - "grad_norm": 0.0, - "learning_rate": 1.2480987656482257e-06, - "loss": 0.6586, - "step": 29787 - }, - { - "epoch": 0.8441157301141999, - "grad_norm": 0.0, - "learning_rate": 1.247654795215879e-06, - "loss": 0.8545, - "step": 29788 - }, - { - "epoch": 0.8441440675564623, - "grad_norm": 0.0, - "learning_rate": 1.2472108985077836e-06, - "loss": 0.8739, - "step": 29789 - }, - { - "epoch": 0.8441724049987248, - "grad_norm": 0.0, - "learning_rate": 1.2467670755276805e-06, - "loss": 0.766, - "step": 29790 - }, - { - "epoch": 0.8442007424409873, - "grad_norm": 0.0, - "learning_rate": 1.2463233262793118e-06, - "loss": 0.7926, - "step": 29791 - }, - { - "epoch": 0.8442290798832497, - "grad_norm": 0.0, - "learning_rate": 1.2458796507664095e-06, - "loss": 0.7927, - "step": 29792 - }, - { - "epoch": 0.8442574173255122, - "grad_norm": 0.0, - "learning_rate": 1.2454360489927163e-06, - "loss": 0.7361, - "step": 29793 - }, - { - "epoch": 0.8442857547677747, - "grad_norm": 0.0, - "learning_rate": 1.244992520961964e-06, - "loss": 0.8135, - "step": 29794 - }, - { - "epoch": 0.8443140922100372, - "grad_norm": 0.0, - "learning_rate": 1.2445490666778904e-06, - "loss": 0.8112, - "step": 29795 - }, - { - "epoch": 0.8443424296522996, - "grad_norm": 0.0, - "learning_rate": 1.2441056861442336e-06, - "loss": 0.7326, - "step": 29796 - }, - { - "epoch": 0.844370767094562, - "grad_norm": 0.0, - "learning_rate": 1.243662379364724e-06, - "loss": 0.8136, - "step": 29797 - }, - { - "epoch": 0.8443991045368245, - "grad_norm": 0.0, - "learning_rate": 1.2432191463430977e-06, - "loss": 0.8123, - "step": 29798 - }, - { - "epoch": 0.8444274419790869, - "grad_norm": 0.0, - "learning_rate": 1.242775987083088e-06, - "loss": 0.8264, - "step": 29799 - }, - { - "epoch": 0.8444557794213494, - "grad_norm": 0.0, - "learning_rate": 1.2423329015884289e-06, - "loss": 0.8956, - "step": 29800 - }, - { - "epoch": 0.8444841168636119, - "grad_norm": 0.0, - "learning_rate": 1.241889889862853e-06, - "loss": 0.8645, - "step": 29801 - }, - { - "epoch": 0.8445124543058744, - "grad_norm": 0.0, - "learning_rate": 1.241446951910089e-06, - "loss": 0.6949, - "step": 29802 - }, - { - "epoch": 0.8445407917481368, - "grad_norm": 0.0, - "learning_rate": 1.2410040877338704e-06, - "loss": 0.816, - "step": 29803 - }, - { - "epoch": 0.8445691291903993, - "grad_norm": 0.0, - "learning_rate": 1.2405612973379277e-06, - "loss": 0.8441, - "step": 29804 - }, - { - "epoch": 0.8445974666326618, - "grad_norm": 0.0, - "learning_rate": 1.2401185807259886e-06, - "loss": 0.9709, - "step": 29805 - }, - { - "epoch": 0.8446258040749242, - "grad_norm": 0.0, - "learning_rate": 1.2396759379017865e-06, - "loss": 0.8243, - "step": 29806 - }, - { - "epoch": 0.8446541415171867, - "grad_norm": 0.0, - "learning_rate": 1.2392333688690438e-06, - "loss": 0.7738, - "step": 29807 - }, - { - "epoch": 0.8446824789594491, - "grad_norm": 0.0, - "learning_rate": 1.2387908736314924e-06, - "loss": 0.7001, - "step": 29808 - }, - { - "epoch": 0.8447108164017115, - "grad_norm": 0.0, - "learning_rate": 1.2383484521928602e-06, - "loss": 0.8396, - "step": 29809 - }, - { - "epoch": 0.844739153843974, - "grad_norm": 0.0, - "learning_rate": 1.2379061045568707e-06, - "loss": 0.8129, - "step": 29810 - }, - { - "epoch": 0.8447674912862365, - "grad_norm": 0.0, - "learning_rate": 1.2374638307272524e-06, - "loss": 0.8895, - "step": 29811 - }, - { - "epoch": 0.844795828728499, - "grad_norm": 0.0, - "learning_rate": 1.2370216307077287e-06, - "loss": 0.7012, - "step": 29812 - }, - { - "epoch": 0.8448241661707614, - "grad_norm": 0.0, - "learning_rate": 1.2365795045020268e-06, - "loss": 0.7628, - "step": 29813 - }, - { - "epoch": 0.8448525036130239, - "grad_norm": 0.0, - "learning_rate": 1.2361374521138724e-06, - "loss": 0.7955, - "step": 29814 - }, - { - "epoch": 0.8448808410552864, - "grad_norm": 0.0, - "learning_rate": 1.2356954735469839e-06, - "loss": 0.7773, - "step": 29815 - }, - { - "epoch": 0.8449091784975488, - "grad_norm": 0.0, - "learning_rate": 1.235253568805087e-06, - "loss": 0.958, - "step": 29816 - }, - { - "epoch": 0.8449375159398113, - "grad_norm": 0.0, - "learning_rate": 1.2348117378919068e-06, - "loss": 0.8281, - "step": 29817 - }, - { - "epoch": 0.8449658533820738, - "grad_norm": 0.0, - "learning_rate": 1.2343699808111598e-06, - "loss": 0.797, - "step": 29818 - }, - { - "epoch": 0.8449941908243362, - "grad_norm": 0.0, - "learning_rate": 1.2339282975665712e-06, - "loss": 0.9106, - "step": 29819 - }, - { - "epoch": 0.8450225282665986, - "grad_norm": 0.0, - "learning_rate": 1.233486688161858e-06, - "loss": 0.8064, - "step": 29820 - }, - { - "epoch": 0.8450508657088611, - "grad_norm": 0.0, - "learning_rate": 1.2330451526007414e-06, - "loss": 0.7648, - "step": 29821 - }, - { - "epoch": 0.8450792031511236, - "grad_norm": 0.0, - "learning_rate": 1.2326036908869444e-06, - "loss": 0.774, - "step": 29822 - }, - { - "epoch": 0.845107540593386, - "grad_norm": 0.0, - "learning_rate": 1.2321623030241781e-06, - "loss": 0.8705, - "step": 29823 - }, - { - "epoch": 0.8451358780356485, - "grad_norm": 0.0, - "learning_rate": 1.2317209890161663e-06, - "loss": 0.8035, - "step": 29824 - }, - { - "epoch": 0.845164215477911, - "grad_norm": 0.0, - "learning_rate": 1.231279748866624e-06, - "loss": 0.7744, - "step": 29825 - }, - { - "epoch": 0.8451925529201735, - "grad_norm": 0.0, - "learning_rate": 1.230838582579268e-06, - "loss": 0.8617, - "step": 29826 - }, - { - "epoch": 0.8452208903624359, - "grad_norm": 0.0, - "learning_rate": 1.2303974901578176e-06, - "loss": 0.8379, - "step": 29827 - }, - { - "epoch": 0.8452492278046984, - "grad_norm": 0.0, - "learning_rate": 1.229956471605983e-06, - "loss": 0.8931, - "step": 29828 - }, - { - "epoch": 0.8452775652469608, - "grad_norm": 0.0, - "learning_rate": 1.2295155269274827e-06, - "loss": 0.8456, - "step": 29829 - }, - { - "epoch": 0.8453059026892232, - "grad_norm": 0.0, - "learning_rate": 1.2290746561260313e-06, - "loss": 0.7755, - "step": 29830 - }, - { - "epoch": 0.8453342401314857, - "grad_norm": 0.0, - "learning_rate": 1.2286338592053393e-06, - "loss": 0.7882, - "step": 29831 - }, - { - "epoch": 0.8453625775737482, - "grad_norm": 0.0, - "learning_rate": 1.2281931361691214e-06, - "loss": 0.8708, - "step": 29832 - }, - { - "epoch": 0.8453909150160106, - "grad_norm": 0.0, - "learning_rate": 1.2277524870210922e-06, - "loss": 0.7992, - "step": 29833 - }, - { - "epoch": 0.8454192524582731, - "grad_norm": 0.0, - "learning_rate": 1.227311911764959e-06, - "loss": 0.7597, - "step": 29834 - }, - { - "epoch": 0.8454475899005356, - "grad_norm": 0.0, - "learning_rate": 1.2268714104044376e-06, - "loss": 0.9009, - "step": 29835 - }, - { - "epoch": 0.8454759273427981, - "grad_norm": 0.0, - "learning_rate": 1.226430982943234e-06, - "loss": 0.8851, - "step": 29836 - }, - { - "epoch": 0.8455042647850605, - "grad_norm": 0.0, - "learning_rate": 1.22599062938506e-06, - "loss": 0.8484, - "step": 29837 - }, - { - "epoch": 0.845532602227323, - "grad_norm": 0.0, - "learning_rate": 1.2255503497336251e-06, - "loss": 0.8604, - "step": 29838 - }, - { - "epoch": 0.8455609396695855, - "grad_norm": 0.0, - "learning_rate": 1.2251101439926383e-06, - "loss": 0.8938, - "step": 29839 - }, - { - "epoch": 0.8455892771118478, - "grad_norm": 0.0, - "learning_rate": 1.2246700121658073e-06, - "loss": 0.7876, - "step": 29840 - }, - { - "epoch": 0.8456176145541103, - "grad_norm": 0.0, - "learning_rate": 1.2242299542568404e-06, - "loss": 0.7923, - "step": 29841 - }, - { - "epoch": 0.8456459519963728, - "grad_norm": 0.0, - "learning_rate": 1.2237899702694423e-06, - "loss": 0.8521, - "step": 29842 - }, - { - "epoch": 0.8456742894386353, - "grad_norm": 0.0, - "learning_rate": 1.2233500602073222e-06, - "loss": 0.7414, - "step": 29843 - }, - { - "epoch": 0.8457026268808977, - "grad_norm": 0.0, - "learning_rate": 1.2229102240741819e-06, - "loss": 0.7923, - "step": 29844 - }, - { - "epoch": 0.8457309643231602, - "grad_norm": 0.0, - "learning_rate": 1.222470461873727e-06, - "loss": 0.8356, - "step": 29845 - }, - { - "epoch": 0.8457593017654227, - "grad_norm": 0.0, - "learning_rate": 1.222030773609666e-06, - "loss": 0.9092, - "step": 29846 - }, - { - "epoch": 0.8457876392076851, - "grad_norm": 0.0, - "learning_rate": 1.2215911592856967e-06, - "loss": 0.8378, - "step": 29847 - }, - { - "epoch": 0.8458159766499476, - "grad_norm": 0.0, - "learning_rate": 1.2211516189055272e-06, - "loss": 0.8911, - "step": 29848 - }, - { - "epoch": 0.8458443140922101, - "grad_norm": 0.0, - "learning_rate": 1.220712152472856e-06, - "loss": 0.8093, - "step": 29849 - }, - { - "epoch": 0.8458726515344726, - "grad_norm": 0.0, - "learning_rate": 1.2202727599913855e-06, - "loss": 0.8242, - "step": 29850 - }, - { - "epoch": 0.8459009889767349, - "grad_norm": 0.0, - "learning_rate": 1.2198334414648183e-06, - "loss": 0.8649, - "step": 29851 - }, - { - "epoch": 0.8459293264189974, - "grad_norm": 0.0, - "learning_rate": 1.2193941968968538e-06, - "loss": 0.7779, - "step": 29852 - }, - { - "epoch": 0.8459576638612599, - "grad_norm": 0.0, - "learning_rate": 1.2189550262911932e-06, - "loss": 0.8088, - "step": 29853 - }, - { - "epoch": 0.8459860013035223, - "grad_norm": 0.0, - "learning_rate": 1.218515929651537e-06, - "loss": 0.9201, - "step": 29854 - }, - { - "epoch": 0.8460143387457848, - "grad_norm": 0.0, - "learning_rate": 1.21807690698158e-06, - "loss": 0.8315, - "step": 29855 - }, - { - "epoch": 0.8460426761880473, - "grad_norm": 0.0, - "learning_rate": 1.2176379582850252e-06, - "loss": 0.875, - "step": 29856 - }, - { - "epoch": 0.8460710136303097, - "grad_norm": 0.0, - "learning_rate": 1.2171990835655645e-06, - "loss": 0.7885, - "step": 29857 - }, - { - "epoch": 0.8460993510725722, - "grad_norm": 0.0, - "learning_rate": 1.2167602828268965e-06, - "loss": 0.739, - "step": 29858 - }, - { - "epoch": 0.8461276885148347, - "grad_norm": 0.0, - "learning_rate": 1.2163215560727215e-06, - "loss": 0.8007, - "step": 29859 - }, - { - "epoch": 0.8461560259570972, - "grad_norm": 0.0, - "learning_rate": 1.2158829033067288e-06, - "loss": 0.781, - "step": 29860 - }, - { - "epoch": 0.8461843633993595, - "grad_norm": 0.0, - "learning_rate": 1.2154443245326176e-06, - "loss": 0.8798, - "step": 29861 - }, - { - "epoch": 0.846212700841622, - "grad_norm": 0.0, - "learning_rate": 1.2150058197540815e-06, - "loss": 0.8077, - "step": 29862 - }, - { - "epoch": 0.8462410382838845, - "grad_norm": 0.0, - "learning_rate": 1.2145673889748122e-06, - "loss": 0.8881, - "step": 29863 - }, - { - "epoch": 0.8462693757261469, - "grad_norm": 0.0, - "learning_rate": 1.2141290321985045e-06, - "loss": 0.8286, - "step": 29864 - }, - { - "epoch": 0.8462977131684094, - "grad_norm": 0.0, - "learning_rate": 1.2136907494288497e-06, - "loss": 0.6791, - "step": 29865 - }, - { - "epoch": 0.8463260506106719, - "grad_norm": 0.0, - "learning_rate": 1.2132525406695405e-06, - "loss": 0.9569, - "step": 29866 - }, - { - "epoch": 0.8463543880529344, - "grad_norm": 0.0, - "learning_rate": 1.2128144059242708e-06, - "loss": 0.8012, - "step": 29867 - }, - { - "epoch": 0.8463827254951968, - "grad_norm": 0.0, - "learning_rate": 1.2123763451967262e-06, - "loss": 0.8146, - "step": 29868 - }, - { - "epoch": 0.8464110629374593, - "grad_norm": 0.0, - "learning_rate": 1.2119383584905985e-06, - "loss": 0.731, - "step": 29869 - }, - { - "epoch": 0.8464394003797218, - "grad_norm": 0.0, - "learning_rate": 1.2115004458095793e-06, - "loss": 0.8063, - "step": 29870 - }, - { - "epoch": 0.8464677378219841, - "grad_norm": 0.0, - "learning_rate": 1.2110626071573539e-06, - "loss": 0.8032, - "step": 29871 - }, - { - "epoch": 0.8464960752642466, - "grad_norm": 0.0, - "learning_rate": 1.2106248425376133e-06, - "loss": 0.7899, - "step": 29872 - }, - { - "epoch": 0.8465244127065091, - "grad_norm": 0.0, - "learning_rate": 1.210187151954042e-06, - "loss": 0.6821, - "step": 29873 - }, - { - "epoch": 0.8465527501487716, - "grad_norm": 0.0, - "learning_rate": 1.2097495354103284e-06, - "loss": 0.7102, - "step": 29874 - }, - { - "epoch": 0.846581087591034, - "grad_norm": 0.0, - "learning_rate": 1.2093119929101605e-06, - "loss": 0.7543, - "step": 29875 - }, - { - "epoch": 0.8466094250332965, - "grad_norm": 0.0, - "learning_rate": 1.2088745244572198e-06, - "loss": 0.8157, - "step": 29876 - }, - { - "epoch": 0.846637762475559, - "grad_norm": 0.0, - "learning_rate": 1.2084371300551935e-06, - "loss": 0.9299, - "step": 29877 - }, - { - "epoch": 0.8466660999178214, - "grad_norm": 0.0, - "learning_rate": 1.207999809707766e-06, - "loss": 0.8919, - "step": 29878 - }, - { - "epoch": 0.8466944373600839, - "grad_norm": 0.0, - "learning_rate": 1.2075625634186205e-06, - "loss": 0.7903, - "step": 29879 - }, - { - "epoch": 0.8467227748023464, - "grad_norm": 0.0, - "learning_rate": 1.2071253911914427e-06, - "loss": 0.8284, - "step": 29880 - }, - { - "epoch": 0.8467511122446088, - "grad_norm": 0.0, - "learning_rate": 1.2066882930299117e-06, - "loss": 0.8572, - "step": 29881 - }, - { - "epoch": 0.8467794496868712, - "grad_norm": 0.0, - "learning_rate": 1.2062512689377093e-06, - "loss": 0.8058, - "step": 29882 - }, - { - "epoch": 0.8468077871291337, - "grad_norm": 0.0, - "learning_rate": 1.2058143189185213e-06, - "loss": 0.7917, - "step": 29883 - }, - { - "epoch": 0.8468361245713962, - "grad_norm": 0.0, - "learning_rate": 1.2053774429760213e-06, - "loss": 0.9333, - "step": 29884 - }, - { - "epoch": 0.8468644620136586, - "grad_norm": 0.0, - "learning_rate": 1.2049406411138963e-06, - "loss": 0.8276, - "step": 29885 - }, - { - "epoch": 0.8468927994559211, - "grad_norm": 0.0, - "learning_rate": 1.2045039133358193e-06, - "loss": 0.7917, - "step": 29886 - }, - { - "epoch": 0.8469211368981836, - "grad_norm": 0.0, - "learning_rate": 1.2040672596454727e-06, - "loss": 0.8958, - "step": 29887 - }, - { - "epoch": 0.846949474340446, - "grad_norm": 0.0, - "learning_rate": 1.203630680046536e-06, - "loss": 0.8814, - "step": 29888 - }, - { - "epoch": 0.8469778117827085, - "grad_norm": 0.0, - "learning_rate": 1.2031941745426824e-06, - "loss": 0.8143, - "step": 29889 - }, - { - "epoch": 0.847006149224971, - "grad_norm": 0.0, - "learning_rate": 1.2027577431375902e-06, - "loss": 0.8185, - "step": 29890 - }, - { - "epoch": 0.8470344866672335, - "grad_norm": 0.0, - "learning_rate": 1.202321385834937e-06, - "loss": 0.7249, - "step": 29891 - }, - { - "epoch": 0.8470628241094958, - "grad_norm": 0.0, - "learning_rate": 1.2018851026383971e-06, - "loss": 0.814, - "step": 29892 - }, - { - "epoch": 0.8470911615517583, - "grad_norm": 0.0, - "learning_rate": 1.2014488935516476e-06, - "loss": 0.8161, - "step": 29893 - }, - { - "epoch": 0.8471194989940208, - "grad_norm": 0.0, - "learning_rate": 1.2010127585783604e-06, - "loss": 0.8692, - "step": 29894 - }, - { - "epoch": 0.8471478364362832, - "grad_norm": 0.0, - "learning_rate": 1.20057669772221e-06, - "loss": 0.8122, - "step": 29895 - }, - { - "epoch": 0.8471761738785457, - "grad_norm": 0.0, - "learning_rate": 1.2001407109868713e-06, - "loss": 0.8171, - "step": 29896 - }, - { - "epoch": 0.8472045113208082, - "grad_norm": 0.0, - "learning_rate": 1.1997047983760136e-06, - "loss": 0.7604, - "step": 29897 - }, - { - "epoch": 0.8472328487630707, - "grad_norm": 0.0, - "learning_rate": 1.1992689598933104e-06, - "loss": 0.7986, - "step": 29898 - }, - { - "epoch": 0.8472611862053331, - "grad_norm": 0.0, - "learning_rate": 1.198833195542435e-06, - "loss": 0.799, - "step": 29899 - }, - { - "epoch": 0.8472895236475956, - "grad_norm": 0.0, - "learning_rate": 1.1983975053270525e-06, - "loss": 0.853, - "step": 29900 - }, - { - "epoch": 0.8473178610898581, - "grad_norm": 0.0, - "learning_rate": 1.1979618892508394e-06, - "loss": 0.8414, - "step": 29901 - }, - { - "epoch": 0.8473461985321205, - "grad_norm": 0.0, - "learning_rate": 1.1975263473174592e-06, - "loss": 0.8321, - "step": 29902 - }, - { - "epoch": 0.847374535974383, - "grad_norm": 0.0, - "learning_rate": 1.1970908795305835e-06, - "loss": 0.844, - "step": 29903 - }, - { - "epoch": 0.8474028734166454, - "grad_norm": 0.0, - "learning_rate": 1.1966554858938805e-06, - "loss": 0.8309, - "step": 29904 - }, - { - "epoch": 0.8474312108589078, - "grad_norm": 0.0, - "learning_rate": 1.1962201664110174e-06, - "loss": 0.8238, - "step": 29905 - }, - { - "epoch": 0.8474595483011703, - "grad_norm": 0.0, - "learning_rate": 1.195784921085662e-06, - "loss": 0.8124, - "step": 29906 - }, - { - "epoch": 0.8474878857434328, - "grad_norm": 0.0, - "learning_rate": 1.1953497499214784e-06, - "loss": 0.7284, - "step": 29907 - }, - { - "epoch": 0.8475162231856953, - "grad_norm": 0.0, - "learning_rate": 1.1949146529221334e-06, - "loss": 0.7906, - "step": 29908 - }, - { - "epoch": 0.8475445606279577, - "grad_norm": 0.0, - "learning_rate": 1.1944796300912942e-06, - "loss": 0.9176, - "step": 29909 - }, - { - "epoch": 0.8475728980702202, - "grad_norm": 0.0, - "learning_rate": 1.19404468143262e-06, - "loss": 0.7837, - "step": 29910 - }, - { - "epoch": 0.8476012355124827, - "grad_norm": 0.0, - "learning_rate": 1.1936098069497792e-06, - "loss": 0.7294, - "step": 29911 - }, - { - "epoch": 0.8476295729547451, - "grad_norm": 0.0, - "learning_rate": 1.1931750066464331e-06, - "loss": 0.8404, - "step": 29912 - }, - { - "epoch": 0.8476579103970076, - "grad_norm": 0.0, - "learning_rate": 1.1927402805262434e-06, - "loss": 0.8261, - "step": 29913 - }, - { - "epoch": 0.84768624783927, - "grad_norm": 0.0, - "learning_rate": 1.1923056285928747e-06, - "loss": 0.7341, - "step": 29914 - }, - { - "epoch": 0.8477145852815325, - "grad_norm": 0.0, - "learning_rate": 1.1918710508499842e-06, - "loss": 0.7682, - "step": 29915 - }, - { - "epoch": 0.8477429227237949, - "grad_norm": 0.0, - "learning_rate": 1.1914365473012346e-06, - "loss": 0.8444, - "step": 29916 - }, - { - "epoch": 0.8477712601660574, - "grad_norm": 0.0, - "learning_rate": 1.1910021179502861e-06, - "loss": 0.8019, - "step": 29917 - }, - { - "epoch": 0.8477995976083199, - "grad_norm": 0.0, - "learning_rate": 1.1905677628007983e-06, - "loss": 0.8766, - "step": 29918 - }, - { - "epoch": 0.8478279350505823, - "grad_norm": 0.0, - "learning_rate": 1.1901334818564291e-06, - "loss": 0.874, - "step": 29919 - }, - { - "epoch": 0.8478562724928448, - "grad_norm": 0.0, - "learning_rate": 1.1896992751208392e-06, - "loss": 0.7787, - "step": 29920 - }, - { - "epoch": 0.8478846099351073, - "grad_norm": 0.0, - "learning_rate": 1.189265142597682e-06, - "loss": 0.7599, - "step": 29921 - }, - { - "epoch": 0.8479129473773697, - "grad_norm": 0.0, - "learning_rate": 1.1888310842906181e-06, - "loss": 0.7863, - "step": 29922 - }, - { - "epoch": 0.8479412848196322, - "grad_norm": 0.0, - "learning_rate": 1.1883971002033002e-06, - "loss": 0.7196, - "step": 29923 - }, - { - "epoch": 0.8479696222618947, - "grad_norm": 0.0, - "learning_rate": 1.1879631903393851e-06, - "loss": 0.8417, - "step": 29924 - }, - { - "epoch": 0.8479979597041571, - "grad_norm": 0.0, - "learning_rate": 1.1875293547025302e-06, - "loss": 0.7911, - "step": 29925 - }, - { - "epoch": 0.8480262971464195, - "grad_norm": 0.0, - "learning_rate": 1.1870955932963868e-06, - "loss": 0.8043, - "step": 29926 - }, - { - "epoch": 0.848054634588682, - "grad_norm": 0.0, - "learning_rate": 1.1866619061246088e-06, - "loss": 0.9359, - "step": 29927 - }, - { - "epoch": 0.8480829720309445, - "grad_norm": 0.0, - "learning_rate": 1.186228293190853e-06, - "loss": 0.8721, - "step": 29928 - }, - { - "epoch": 0.8481113094732069, - "grad_norm": 0.0, - "learning_rate": 1.1857947544987668e-06, - "loss": 0.8656, - "step": 29929 - }, - { - "epoch": 0.8481396469154694, - "grad_norm": 0.0, - "learning_rate": 1.1853612900520052e-06, - "loss": 0.8021, - "step": 29930 - }, - { - "epoch": 0.8481679843577319, - "grad_norm": 0.0, - "learning_rate": 1.1849278998542179e-06, - "loss": 0.8587, - "step": 29931 - }, - { - "epoch": 0.8481963217999944, - "grad_norm": 0.0, - "learning_rate": 1.1844945839090571e-06, - "loss": 0.8067, - "step": 29932 - }, - { - "epoch": 0.8482246592422568, - "grad_norm": 0.0, - "learning_rate": 1.184061342220173e-06, - "loss": 0.7614, - "step": 29933 - }, - { - "epoch": 0.8482529966845193, - "grad_norm": 0.0, - "learning_rate": 1.1836281747912125e-06, - "loss": 0.7037, - "step": 29934 - }, - { - "epoch": 0.8482813341267817, - "grad_norm": 0.0, - "learning_rate": 1.1831950816258275e-06, - "loss": 0.7994, - "step": 29935 - }, - { - "epoch": 0.8483096715690441, - "grad_norm": 0.0, - "learning_rate": 1.1827620627276627e-06, - "loss": 0.7742, - "step": 29936 - }, - { - "epoch": 0.8483380090113066, - "grad_norm": 0.0, - "learning_rate": 1.1823291181003671e-06, - "loss": 0.7963, - "step": 29937 - }, - { - "epoch": 0.8483663464535691, - "grad_norm": 0.0, - "learning_rate": 1.1818962477475903e-06, - "loss": 0.7562, - "step": 29938 - }, - { - "epoch": 0.8483946838958316, - "grad_norm": 0.0, - "learning_rate": 1.1814634516729729e-06, - "loss": 0.9471, - "step": 29939 - }, - { - "epoch": 0.848423021338094, - "grad_norm": 0.0, - "learning_rate": 1.1810307298801638e-06, - "loss": 0.8055, - "step": 29940 - }, - { - "epoch": 0.8484513587803565, - "grad_norm": 0.0, - "learning_rate": 1.1805980823728103e-06, - "loss": 0.762, - "step": 29941 - }, - { - "epoch": 0.848479696222619, - "grad_norm": 0.0, - "learning_rate": 1.1801655091545516e-06, - "loss": 0.7526, - "step": 29942 - }, - { - "epoch": 0.8485080336648814, - "grad_norm": 0.0, - "learning_rate": 1.179733010229034e-06, - "loss": 0.7954, - "step": 29943 - }, - { - "epoch": 0.8485363711071439, - "grad_norm": 0.0, - "learning_rate": 1.1793005855999007e-06, - "loss": 0.8671, - "step": 29944 - }, - { - "epoch": 0.8485647085494064, - "grad_norm": 0.0, - "learning_rate": 1.1788682352707936e-06, - "loss": 0.7565, - "step": 29945 - }, - { - "epoch": 0.8485930459916687, - "grad_norm": 0.0, - "learning_rate": 1.1784359592453576e-06, - "loss": 0.9071, - "step": 29946 - }, - { - "epoch": 0.8486213834339312, - "grad_norm": 0.0, - "learning_rate": 1.1780037575272297e-06, - "loss": 0.8821, - "step": 29947 - }, - { - "epoch": 0.8486497208761937, - "grad_norm": 0.0, - "learning_rate": 1.1775716301200512e-06, - "loss": 0.7518, - "step": 29948 - }, - { - "epoch": 0.8486780583184562, - "grad_norm": 0.0, - "learning_rate": 1.1771395770274653e-06, - "loss": 0.9046, - "step": 29949 - }, - { - "epoch": 0.8487063957607186, - "grad_norm": 0.0, - "learning_rate": 1.1767075982531073e-06, - "loss": 0.8208, - "step": 29950 - }, - { - "epoch": 0.8487347332029811, - "grad_norm": 0.0, - "learning_rate": 1.1762756938006192e-06, - "loss": 0.7977, - "step": 29951 - }, - { - "epoch": 0.8487630706452436, - "grad_norm": 0.0, - "learning_rate": 1.1758438636736358e-06, - "loss": 0.8668, - "step": 29952 - }, - { - "epoch": 0.848791408087506, - "grad_norm": 0.0, - "learning_rate": 1.1754121078757963e-06, - "loss": 0.8088, - "step": 29953 - }, - { - "epoch": 0.8488197455297685, - "grad_norm": 0.0, - "learning_rate": 1.1749804264107402e-06, - "loss": 0.7791, - "step": 29954 - }, - { - "epoch": 0.848848082972031, - "grad_norm": 0.0, - "learning_rate": 1.1745488192820974e-06, - "loss": 0.7444, - "step": 29955 - }, - { - "epoch": 0.8488764204142935, - "grad_norm": 0.0, - "learning_rate": 1.174117286493508e-06, - "loss": 0.7007, - "step": 29956 - }, - { - "epoch": 0.8489047578565558, - "grad_norm": 0.0, - "learning_rate": 1.1736858280486063e-06, - "loss": 0.8027, - "step": 29957 - }, - { - "epoch": 0.8489330952988183, - "grad_norm": 0.0, - "learning_rate": 1.1732544439510262e-06, - "loss": 0.8864, - "step": 29958 - }, - { - "epoch": 0.8489614327410808, - "grad_norm": 0.0, - "learning_rate": 1.172823134204405e-06, - "loss": 0.7837, - "step": 29959 - }, - { - "epoch": 0.8489897701833432, - "grad_norm": 0.0, - "learning_rate": 1.1723918988123684e-06, - "loss": 0.7595, - "step": 29960 - }, - { - "epoch": 0.8490181076256057, - "grad_norm": 0.0, - "learning_rate": 1.1719607377785537e-06, - "loss": 0.8862, - "step": 29961 - }, - { - "epoch": 0.8490464450678682, - "grad_norm": 0.0, - "learning_rate": 1.1715296511065942e-06, - "loss": 0.7113, - "step": 29962 - }, - { - "epoch": 0.8490747825101307, - "grad_norm": 0.0, - "learning_rate": 1.1710986388001167e-06, - "loss": 0.7292, - "step": 29963 - }, - { - "epoch": 0.8491031199523931, - "grad_norm": 0.0, - "learning_rate": 1.1706677008627564e-06, - "loss": 0.7895, - "step": 29964 - }, - { - "epoch": 0.8491314573946556, - "grad_norm": 0.0, - "learning_rate": 1.1702368372981375e-06, - "loss": 0.7895, - "step": 29965 - }, - { - "epoch": 0.8491597948369181, - "grad_norm": 0.0, - "learning_rate": 1.1698060481098938e-06, - "loss": 0.8641, - "step": 29966 - }, - { - "epoch": 0.8491881322791804, - "grad_norm": 0.0, - "learning_rate": 1.1693753333016534e-06, - "loss": 0.889, - "step": 29967 - }, - { - "epoch": 0.8492164697214429, - "grad_norm": 0.0, - "learning_rate": 1.1689446928770432e-06, - "loss": 0.9406, - "step": 29968 - }, - { - "epoch": 0.8492448071637054, - "grad_norm": 0.0, - "learning_rate": 1.1685141268396906e-06, - "loss": 0.7718, - "step": 29969 - }, - { - "epoch": 0.8492731446059678, - "grad_norm": 0.0, - "learning_rate": 1.1680836351932223e-06, - "loss": 0.7969, - "step": 29970 - }, - { - "epoch": 0.8493014820482303, - "grad_norm": 0.0, - "learning_rate": 1.1676532179412659e-06, - "loss": 0.7618, - "step": 29971 - }, - { - "epoch": 0.8493298194904928, - "grad_norm": 0.0, - "learning_rate": 1.1672228750874482e-06, - "loss": 0.7022, - "step": 29972 - }, - { - "epoch": 0.8493581569327553, - "grad_norm": 0.0, - "learning_rate": 1.1667926066353895e-06, - "loss": 0.8724, - "step": 29973 - }, - { - "epoch": 0.8493864943750177, - "grad_norm": 0.0, - "learning_rate": 1.1663624125887186e-06, - "loss": 0.8222, - "step": 29974 - }, - { - "epoch": 0.8494148318172802, - "grad_norm": 0.0, - "learning_rate": 1.1659322929510574e-06, - "loss": 0.8931, - "step": 29975 - }, - { - "epoch": 0.8494431692595427, - "grad_norm": 0.0, - "learning_rate": 1.165502247726028e-06, - "loss": 0.8468, - "step": 29976 - }, - { - "epoch": 0.849471506701805, - "grad_norm": 0.0, - "learning_rate": 1.165072276917254e-06, - "loss": 0.8625, - "step": 29977 - }, - { - "epoch": 0.8494998441440675, - "grad_norm": 0.0, - "learning_rate": 1.164642380528358e-06, - "loss": 0.784, - "step": 29978 - }, - { - "epoch": 0.84952818158633, - "grad_norm": 0.0, - "learning_rate": 1.1642125585629593e-06, - "loss": 0.8494, - "step": 29979 - }, - { - "epoch": 0.8495565190285925, - "grad_norm": 0.0, - "learning_rate": 1.1637828110246806e-06, - "loss": 0.8281, - "step": 29980 - }, - { - "epoch": 0.8495848564708549, - "grad_norm": 0.0, - "learning_rate": 1.1633531379171382e-06, - "loss": 0.8645, - "step": 29981 - }, - { - "epoch": 0.8496131939131174, - "grad_norm": 0.0, - "learning_rate": 1.1629235392439542e-06, - "loss": 0.7483, - "step": 29982 - }, - { - "epoch": 0.8496415313553799, - "grad_norm": 0.0, - "learning_rate": 1.1624940150087472e-06, - "loss": 0.7309, - "step": 29983 - }, - { - "epoch": 0.8496698687976423, - "grad_norm": 0.0, - "learning_rate": 1.162064565215134e-06, - "loss": 0.7781, - "step": 29984 - }, - { - "epoch": 0.8496982062399048, - "grad_norm": 0.0, - "learning_rate": 1.1616351898667333e-06, - "loss": 0.7639, - "step": 29985 - }, - { - "epoch": 0.8497265436821673, - "grad_norm": 0.0, - "learning_rate": 1.161205888967164e-06, - "loss": 0.7879, - "step": 29986 - }, - { - "epoch": 0.8497548811244298, - "grad_norm": 0.0, - "learning_rate": 1.1607766625200367e-06, - "loss": 0.7846, - "step": 29987 - }, - { - "epoch": 0.8497832185666921, - "grad_norm": 0.0, - "learning_rate": 1.160347510528972e-06, - "loss": 0.8569, - "step": 29988 - }, - { - "epoch": 0.8498115560089546, - "grad_norm": 0.0, - "learning_rate": 1.159918432997581e-06, - "loss": 0.7323, - "step": 29989 - }, - { - "epoch": 0.8498398934512171, - "grad_norm": 0.0, - "learning_rate": 1.1594894299294801e-06, - "loss": 0.9263, - "step": 29990 - }, - { - "epoch": 0.8498682308934795, - "grad_norm": 0.0, - "learning_rate": 1.1590605013282842e-06, - "loss": 0.8359, - "step": 29991 - }, - { - "epoch": 0.849896568335742, - "grad_norm": 0.0, - "learning_rate": 1.1586316471976034e-06, - "loss": 0.7249, - "step": 29992 - }, - { - "epoch": 0.8499249057780045, - "grad_norm": 0.0, - "learning_rate": 1.1582028675410507e-06, - "loss": 0.7437, - "step": 29993 - }, - { - "epoch": 0.8499532432202669, - "grad_norm": 0.0, - "learning_rate": 1.1577741623622407e-06, - "loss": 0.8148, - "step": 29994 - }, - { - "epoch": 0.8499815806625294, - "grad_norm": 0.0, - "learning_rate": 1.1573455316647796e-06, - "loss": 0.8909, - "step": 29995 - }, - { - "epoch": 0.8500099181047919, - "grad_norm": 0.0, - "learning_rate": 1.1569169754522812e-06, - "loss": 0.8099, - "step": 29996 - }, - { - "epoch": 0.8500382555470544, - "grad_norm": 0.0, - "learning_rate": 1.156488493728356e-06, - "loss": 0.7614, - "step": 29997 - }, - { - "epoch": 0.8500665929893167, - "grad_norm": 0.0, - "learning_rate": 1.1560600864966108e-06, - "loss": 0.8447, - "step": 29998 - }, - { - "epoch": 0.8500949304315792, - "grad_norm": 0.0, - "learning_rate": 1.1556317537606588e-06, - "loss": 0.7722, - "step": 29999 - }, - { - "epoch": 0.8501232678738417, - "grad_norm": 0.0, - "learning_rate": 1.1552034955241021e-06, - "loss": 0.5796, - "step": 30000 - }, - { - "epoch": 0.8501516053161041, - "grad_norm": 0.0, - "learning_rate": 1.1547753117905524e-06, - "loss": 0.8333, - "step": 30001 - }, - { - "epoch": 0.8501799427583666, - "grad_norm": 0.0, - "learning_rate": 1.1543472025636138e-06, - "loss": 0.8045, - "step": 30002 - }, - { - "epoch": 0.8502082802006291, - "grad_norm": 0.0, - "learning_rate": 1.153919167846893e-06, - "loss": 0.8165, - "step": 30003 - }, - { - "epoch": 0.8502366176428916, - "grad_norm": 0.0, - "learning_rate": 1.1534912076439975e-06, - "loss": 0.7543, - "step": 30004 - }, - { - "epoch": 0.850264955085154, - "grad_norm": 0.0, - "learning_rate": 1.1530633219585296e-06, - "loss": 0.8651, - "step": 30005 - }, - { - "epoch": 0.8502932925274165, - "grad_norm": 0.0, - "learning_rate": 1.1526355107940934e-06, - "loss": 0.8371, - "step": 30006 - }, - { - "epoch": 0.850321629969679, - "grad_norm": 0.0, - "learning_rate": 1.1522077741542947e-06, - "loss": 0.7567, - "step": 30007 - }, - { - "epoch": 0.8503499674119414, - "grad_norm": 0.0, - "learning_rate": 1.1517801120427351e-06, - "loss": 0.8094, - "step": 30008 - }, - { - "epoch": 0.8503783048542038, - "grad_norm": 0.0, - "learning_rate": 1.1513525244630198e-06, - "loss": 0.7182, - "step": 30009 - }, - { - "epoch": 0.8504066422964663, - "grad_norm": 0.0, - "learning_rate": 1.1509250114187454e-06, - "loss": 0.7382, - "step": 30010 - }, - { - "epoch": 0.8504349797387288, - "grad_norm": 0.0, - "learning_rate": 1.1504975729135158e-06, - "loss": 0.8505, - "step": 30011 - }, - { - "epoch": 0.8504633171809912, - "grad_norm": 0.0, - "learning_rate": 1.1500702089509331e-06, - "loss": 0.7049, - "step": 30012 - }, - { - "epoch": 0.8504916546232537, - "grad_norm": 0.0, - "learning_rate": 1.1496429195345937e-06, - "loss": 0.8181, - "step": 30013 - }, - { - "epoch": 0.8505199920655162, - "grad_norm": 0.0, - "learning_rate": 1.1492157046680985e-06, - "loss": 0.7416, - "step": 30014 - }, - { - "epoch": 0.8505483295077786, - "grad_norm": 0.0, - "learning_rate": 1.1487885643550478e-06, - "loss": 0.8532, - "step": 30015 - }, - { - "epoch": 0.8505766669500411, - "grad_norm": 0.0, - "learning_rate": 1.1483614985990365e-06, - "loss": 0.7982, - "step": 30016 - }, - { - "epoch": 0.8506050043923036, - "grad_norm": 0.0, - "learning_rate": 1.1479345074036641e-06, - "loss": 0.7857, - "step": 30017 - }, - { - "epoch": 0.850633341834566, - "grad_norm": 0.0, - "learning_rate": 1.1475075907725252e-06, - "loss": 0.7975, - "step": 30018 - }, - { - "epoch": 0.8506616792768285, - "grad_norm": 0.0, - "learning_rate": 1.1470807487092173e-06, - "loss": 0.8083, - "step": 30019 - }, - { - "epoch": 0.850690016719091, - "grad_norm": 0.0, - "learning_rate": 1.1466539812173362e-06, - "loss": 0.8154, - "step": 30020 - }, - { - "epoch": 0.8507183541613534, - "grad_norm": 0.0, - "learning_rate": 1.1462272883004754e-06, - "loss": 0.7533, - "step": 30021 - }, - { - "epoch": 0.8507466916036158, - "grad_norm": 0.0, - "learning_rate": 1.1458006699622314e-06, - "loss": 0.8323, - "step": 30022 - }, - { - "epoch": 0.8507750290458783, - "grad_norm": 0.0, - "learning_rate": 1.1453741262061958e-06, - "loss": 0.759, - "step": 30023 - }, - { - "epoch": 0.8508033664881408, - "grad_norm": 0.0, - "learning_rate": 1.1449476570359608e-06, - "loss": 0.8875, - "step": 30024 - }, - { - "epoch": 0.8508317039304032, - "grad_norm": 0.0, - "learning_rate": 1.1445212624551217e-06, - "loss": 0.8584, - "step": 30025 - }, - { - "epoch": 0.8508600413726657, - "grad_norm": 0.0, - "learning_rate": 1.1440949424672677e-06, - "loss": 0.6919, - "step": 30026 - }, - { - "epoch": 0.8508883788149282, - "grad_norm": 0.0, - "learning_rate": 1.1436686970759892e-06, - "loss": 0.8748, - "step": 30027 - }, - { - "epoch": 0.8509167162571907, - "grad_norm": 0.0, - "learning_rate": 1.1432425262848811e-06, - "loss": 0.8663, - "step": 30028 - }, - { - "epoch": 0.8509450536994531, - "grad_norm": 0.0, - "learning_rate": 1.1428164300975276e-06, - "loss": 0.806, - "step": 30029 - }, - { - "epoch": 0.8509733911417156, - "grad_norm": 0.0, - "learning_rate": 1.1423904085175219e-06, - "loss": 0.9011, - "step": 30030 - }, - { - "epoch": 0.851001728583978, - "grad_norm": 0.0, - "learning_rate": 1.1419644615484504e-06, - "loss": 0.7139, - "step": 30031 - }, - { - "epoch": 0.8510300660262404, - "grad_norm": 0.0, - "learning_rate": 1.1415385891939001e-06, - "loss": 0.7327, - "step": 30032 - }, - { - "epoch": 0.8510584034685029, - "grad_norm": 0.0, - "learning_rate": 1.1411127914574604e-06, - "loss": 0.7271, - "step": 30033 - }, - { - "epoch": 0.8510867409107654, - "grad_norm": 0.0, - "learning_rate": 1.1406870683427174e-06, - "loss": 0.7739, - "step": 30034 - }, - { - "epoch": 0.8511150783530279, - "grad_norm": 0.0, - "learning_rate": 1.140261419853257e-06, - "loss": 0.8432, - "step": 30035 - }, - { - "epoch": 0.8511434157952903, - "grad_norm": 0.0, - "learning_rate": 1.1398358459926663e-06, - "loss": 0.8467, - "step": 30036 - }, - { - "epoch": 0.8511717532375528, - "grad_norm": 0.0, - "learning_rate": 1.1394103467645267e-06, - "loss": 0.7906, - "step": 30037 - }, - { - "epoch": 0.8512000906798153, - "grad_norm": 0.0, - "learning_rate": 1.1389849221724258e-06, - "loss": 0.7801, - "step": 30038 - }, - { - "epoch": 0.8512284281220777, - "grad_norm": 0.0, - "learning_rate": 1.1385595722199438e-06, - "loss": 0.7416, - "step": 30039 - }, - { - "epoch": 0.8512567655643402, - "grad_norm": 0.0, - "learning_rate": 1.1381342969106647e-06, - "loss": 0.7755, - "step": 30040 - }, - { - "epoch": 0.8512851030066027, - "grad_norm": 0.0, - "learning_rate": 1.137709096248173e-06, - "loss": 0.7466, - "step": 30041 - }, - { - "epoch": 0.851313440448865, - "grad_norm": 0.0, - "learning_rate": 1.137283970236047e-06, - "loss": 0.8568, - "step": 30042 - }, - { - "epoch": 0.8513417778911275, - "grad_norm": 0.0, - "learning_rate": 1.1368589188778689e-06, - "loss": 0.8058, - "step": 30043 - }, - { - "epoch": 0.85137011533339, - "grad_norm": 0.0, - "learning_rate": 1.1364339421772219e-06, - "loss": 0.8587, - "step": 30044 - }, - { - "epoch": 0.8513984527756525, - "grad_norm": 0.0, - "learning_rate": 1.1360090401376812e-06, - "loss": 0.8493, - "step": 30045 - }, - { - "epoch": 0.8514267902179149, - "grad_norm": 0.0, - "learning_rate": 1.135584212762827e-06, - "loss": 0.7938, - "step": 30046 - }, - { - "epoch": 0.8514551276601774, - "grad_norm": 0.0, - "learning_rate": 1.1351594600562399e-06, - "loss": 0.8162, - "step": 30047 - }, - { - "epoch": 0.8514834651024399, - "grad_norm": 0.0, - "learning_rate": 1.134734782021496e-06, - "loss": 0.7567, - "step": 30048 - }, - { - "epoch": 0.8515118025447023, - "grad_norm": 0.0, - "learning_rate": 1.1343101786621747e-06, - "loss": 0.7966, - "step": 30049 - }, - { - "epoch": 0.8515401399869648, - "grad_norm": 0.0, - "learning_rate": 1.1338856499818497e-06, - "loss": 0.8665, - "step": 30050 - }, - { - "epoch": 0.8515684774292273, - "grad_norm": 0.0, - "learning_rate": 1.1334611959840992e-06, - "loss": 0.7662, - "step": 30051 - }, - { - "epoch": 0.8515968148714897, - "grad_norm": 0.0, - "learning_rate": 1.1330368166724958e-06, - "loss": 0.8507, - "step": 30052 - }, - { - "epoch": 0.8516251523137521, - "grad_norm": 0.0, - "learning_rate": 1.132612512050617e-06, - "loss": 0.8287, - "step": 30053 - }, - { - "epoch": 0.8516534897560146, - "grad_norm": 0.0, - "learning_rate": 1.1321882821220375e-06, - "loss": 0.8044, - "step": 30054 - }, - { - "epoch": 0.8516818271982771, - "grad_norm": 0.0, - "learning_rate": 1.1317641268903267e-06, - "loss": 0.8184, - "step": 30055 - }, - { - "epoch": 0.8517101646405395, - "grad_norm": 0.0, - "learning_rate": 1.1313400463590597e-06, - "loss": 0.8664, - "step": 30056 - }, - { - "epoch": 0.851738502082802, - "grad_norm": 0.0, - "learning_rate": 1.130916040531811e-06, - "loss": 0.8127, - "step": 30057 - }, - { - "epoch": 0.8517668395250645, - "grad_norm": 0.0, - "learning_rate": 1.130492109412148e-06, - "loss": 0.6593, - "step": 30058 - }, - { - "epoch": 0.851795176967327, - "grad_norm": 0.0, - "learning_rate": 1.1300682530036432e-06, - "loss": 0.8302, - "step": 30059 - }, - { - "epoch": 0.8518235144095894, - "grad_norm": 0.0, - "learning_rate": 1.1296444713098675e-06, - "loss": 0.6813, - "step": 30060 - }, - { - "epoch": 0.8518518518518519, - "grad_norm": 0.0, - "learning_rate": 1.12922076433439e-06, - "loss": 0.7687, - "step": 30061 - }, - { - "epoch": 0.8518801892941144, - "grad_norm": 0.0, - "learning_rate": 1.1287971320807822e-06, - "loss": 0.811, - "step": 30062 - }, - { - "epoch": 0.8519085267363767, - "grad_norm": 0.0, - "learning_rate": 1.1283735745526093e-06, - "loss": 0.8304, - "step": 30063 - }, - { - "epoch": 0.8519368641786392, - "grad_norm": 0.0, - "learning_rate": 1.1279500917534391e-06, - "loss": 0.8358, - "step": 30064 - }, - { - "epoch": 0.8519652016209017, - "grad_norm": 0.0, - "learning_rate": 1.1275266836868426e-06, - "loss": 0.8722, - "step": 30065 - }, - { - "epoch": 0.8519935390631641, - "grad_norm": 0.0, - "learning_rate": 1.1271033503563811e-06, - "loss": 0.7941, - "step": 30066 - }, - { - "epoch": 0.8520218765054266, - "grad_norm": 0.0, - "learning_rate": 1.126680091765625e-06, - "loss": 0.8144, - "step": 30067 - }, - { - "epoch": 0.8520502139476891, - "grad_norm": 0.0, - "learning_rate": 1.1262569079181352e-06, - "loss": 0.8106, - "step": 30068 - }, - { - "epoch": 0.8520785513899516, - "grad_norm": 0.0, - "learning_rate": 1.1258337988174794e-06, - "loss": 0.8154, - "step": 30069 - }, - { - "epoch": 0.852106888832214, - "grad_norm": 0.0, - "learning_rate": 1.1254107644672218e-06, - "loss": 0.924, - "step": 30070 - }, - { - "epoch": 0.8521352262744765, - "grad_norm": 0.0, - "learning_rate": 1.124987804870924e-06, - "loss": 0.826, - "step": 30071 - }, - { - "epoch": 0.852163563716739, - "grad_norm": 0.0, - "learning_rate": 1.1245649200321485e-06, - "loss": 0.8202, - "step": 30072 - }, - { - "epoch": 0.8521919011590013, - "grad_norm": 0.0, - "learning_rate": 1.124142109954459e-06, - "loss": 0.8382, - "step": 30073 - }, - { - "epoch": 0.8522202386012638, - "grad_norm": 0.0, - "learning_rate": 1.1237193746414167e-06, - "loss": 0.7394, - "step": 30074 - }, - { - "epoch": 0.8522485760435263, - "grad_norm": 0.0, - "learning_rate": 1.1232967140965845e-06, - "loss": 0.8092, - "step": 30075 - }, - { - "epoch": 0.8522769134857888, - "grad_norm": 0.0, - "learning_rate": 1.122874128323518e-06, - "loss": 0.7403, - "step": 30076 - }, - { - "epoch": 0.8523052509280512, - "grad_norm": 0.0, - "learning_rate": 1.1224516173257782e-06, - "loss": 0.7761, - "step": 30077 - }, - { - "epoch": 0.8523335883703137, - "grad_norm": 0.0, - "learning_rate": 1.1220291811069285e-06, - "loss": 0.8264, - "step": 30078 - }, - { - "epoch": 0.8523619258125762, - "grad_norm": 0.0, - "learning_rate": 1.121606819670521e-06, - "loss": 0.8417, - "step": 30079 - }, - { - "epoch": 0.8523902632548386, - "grad_norm": 0.0, - "learning_rate": 1.1211845330201188e-06, - "loss": 0.7738, - "step": 30080 - }, - { - "epoch": 0.8524186006971011, - "grad_norm": 0.0, - "learning_rate": 1.1207623211592733e-06, - "loss": 0.8556, - "step": 30081 - }, - { - "epoch": 0.8524469381393636, - "grad_norm": 0.0, - "learning_rate": 1.120340184091544e-06, - "loss": 0.8328, - "step": 30082 - }, - { - "epoch": 0.8524752755816261, - "grad_norm": 0.0, - "learning_rate": 1.1199181218204902e-06, - "loss": 0.8345, - "step": 30083 - }, - { - "epoch": 0.8525036130238884, - "grad_norm": 0.0, - "learning_rate": 1.1194961343496603e-06, - "loss": 0.8024, - "step": 30084 - }, - { - "epoch": 0.8525319504661509, - "grad_norm": 0.0, - "learning_rate": 1.1190742216826122e-06, - "loss": 0.7432, - "step": 30085 - }, - { - "epoch": 0.8525602879084134, - "grad_norm": 0.0, - "learning_rate": 1.1186523838229003e-06, - "loss": 0.7464, - "step": 30086 - }, - { - "epoch": 0.8525886253506758, - "grad_norm": 0.0, - "learning_rate": 1.1182306207740768e-06, - "loss": 0.801, - "step": 30087 - }, - { - "epoch": 0.8526169627929383, - "grad_norm": 0.0, - "learning_rate": 1.117808932539698e-06, - "loss": 0.8055, - "step": 30088 - }, - { - "epoch": 0.8526453002352008, - "grad_norm": 0.0, - "learning_rate": 1.1173873191233097e-06, - "loss": 0.7646, - "step": 30089 - }, - { - "epoch": 0.8526736376774632, - "grad_norm": 0.0, - "learning_rate": 1.1169657805284673e-06, - "loss": 0.8144, - "step": 30090 - }, - { - "epoch": 0.8527019751197257, - "grad_norm": 0.0, - "learning_rate": 1.116544316758722e-06, - "loss": 0.8656, - "step": 30091 - }, - { - "epoch": 0.8527303125619882, - "grad_norm": 0.0, - "learning_rate": 1.116122927817621e-06, - "loss": 0.7112, - "step": 30092 - }, - { - "epoch": 0.8527586500042507, - "grad_norm": 0.0, - "learning_rate": 1.1157016137087163e-06, - "loss": 0.8537, - "step": 30093 - }, - { - "epoch": 0.852786987446513, - "grad_norm": 0.0, - "learning_rate": 1.1152803744355578e-06, - "loss": 0.6937, - "step": 30094 - }, - { - "epoch": 0.8528153248887755, - "grad_norm": 0.0, - "learning_rate": 1.1148592100016885e-06, - "loss": 0.8046, - "step": 30095 - }, - { - "epoch": 0.852843662331038, - "grad_norm": 0.0, - "learning_rate": 1.1144381204106635e-06, - "loss": 0.7662, - "step": 30096 - }, - { - "epoch": 0.8528719997733004, - "grad_norm": 0.0, - "learning_rate": 1.114017105666022e-06, - "loss": 0.8412, - "step": 30097 - }, - { - "epoch": 0.8529003372155629, - "grad_norm": 0.0, - "learning_rate": 1.1135961657713156e-06, - "loss": 0.7594, - "step": 30098 - }, - { - "epoch": 0.8529286746578254, - "grad_norm": 0.0, - "learning_rate": 1.1131753007300884e-06, - "loss": 0.8636, - "step": 30099 - }, - { - "epoch": 0.8529570121000879, - "grad_norm": 0.0, - "learning_rate": 1.1127545105458847e-06, - "loss": 0.759, - "step": 30100 - }, - { - "epoch": 0.8529853495423503, - "grad_norm": 0.0, - "learning_rate": 1.1123337952222524e-06, - "loss": 0.8674, - "step": 30101 - }, - { - "epoch": 0.8530136869846128, - "grad_norm": 0.0, - "learning_rate": 1.1119131547627315e-06, - "loss": 0.7527, - "step": 30102 - }, - { - "epoch": 0.8530420244268753, - "grad_norm": 0.0, - "learning_rate": 1.111492589170866e-06, - "loss": 0.8159, - "step": 30103 - }, - { - "epoch": 0.8530703618691376, - "grad_norm": 0.0, - "learning_rate": 1.111072098450201e-06, - "loss": 0.8443, - "step": 30104 - }, - { - "epoch": 0.8530986993114001, - "grad_norm": 0.0, - "learning_rate": 1.1106516826042747e-06, - "loss": 0.8094, - "step": 30105 - }, - { - "epoch": 0.8531270367536626, - "grad_norm": 0.0, - "learning_rate": 1.1102313416366307e-06, - "loss": 0.9428, - "step": 30106 - }, - { - "epoch": 0.853155374195925, - "grad_norm": 0.0, - "learning_rate": 1.1098110755508107e-06, - "loss": 0.8042, - "step": 30107 - }, - { - "epoch": 0.8531837116381875, - "grad_norm": 0.0, - "learning_rate": 1.109390884350351e-06, - "loss": 0.8254, - "step": 30108 - }, - { - "epoch": 0.85321204908045, - "grad_norm": 0.0, - "learning_rate": 1.1089707680387962e-06, - "loss": 0.7864, - "step": 30109 - }, - { - "epoch": 0.8532403865227125, - "grad_norm": 0.0, - "learning_rate": 1.1085507266196794e-06, - "loss": 0.7938, - "step": 30110 - }, - { - "epoch": 0.8532687239649749, - "grad_norm": 0.0, - "learning_rate": 1.1081307600965419e-06, - "loss": 0.7949, - "step": 30111 - }, - { - "epoch": 0.8532970614072374, - "grad_norm": 0.0, - "learning_rate": 1.107710868472921e-06, - "loss": 0.8596, - "step": 30112 - }, - { - "epoch": 0.8533253988494999, - "grad_norm": 0.0, - "learning_rate": 1.107291051752354e-06, - "loss": 0.7646, - "step": 30113 - }, - { - "epoch": 0.8533537362917623, - "grad_norm": 0.0, - "learning_rate": 1.1068713099383754e-06, - "loss": 0.7164, - "step": 30114 - }, - { - "epoch": 0.8533820737340247, - "grad_norm": 0.0, - "learning_rate": 1.1064516430345252e-06, - "loss": 0.7798, - "step": 30115 - }, - { - "epoch": 0.8534104111762872, - "grad_norm": 0.0, - "learning_rate": 1.1060320510443335e-06, - "loss": 0.7664, - "step": 30116 - }, - { - "epoch": 0.8534387486185497, - "grad_norm": 0.0, - "learning_rate": 1.1056125339713385e-06, - "loss": 0.8559, - "step": 30117 - }, - { - "epoch": 0.8534670860608121, - "grad_norm": 0.0, - "learning_rate": 1.105193091819069e-06, - "loss": 0.9047, - "step": 30118 - }, - { - "epoch": 0.8534954235030746, - "grad_norm": 0.0, - "learning_rate": 1.1047737245910617e-06, - "loss": 0.81, - "step": 30119 - }, - { - "epoch": 0.8535237609453371, - "grad_norm": 0.0, - "learning_rate": 1.1043544322908506e-06, - "loss": 0.8039, - "step": 30120 - }, - { - "epoch": 0.8535520983875995, - "grad_norm": 0.0, - "learning_rate": 1.103935214921963e-06, - "loss": 0.7573, - "step": 30121 - }, - { - "epoch": 0.853580435829862, - "grad_norm": 0.0, - "learning_rate": 1.1035160724879335e-06, - "loss": 0.7999, - "step": 30122 - }, - { - "epoch": 0.8536087732721245, - "grad_norm": 0.0, - "learning_rate": 1.103097004992293e-06, - "loss": 0.8541, - "step": 30123 - }, - { - "epoch": 0.853637110714387, - "grad_norm": 0.0, - "learning_rate": 1.1026780124385684e-06, - "loss": 0.8405, - "step": 30124 - }, - { - "epoch": 0.8536654481566494, - "grad_norm": 0.0, - "learning_rate": 1.1022590948302904e-06, - "loss": 0.8717, - "step": 30125 - }, - { - "epoch": 0.8536937855989118, - "grad_norm": 0.0, - "learning_rate": 1.1018402521709892e-06, - "loss": 0.8048, - "step": 30126 - }, - { - "epoch": 0.8537221230411743, - "grad_norm": 0.0, - "learning_rate": 1.10142148446419e-06, - "loss": 0.7565, - "step": 30127 - }, - { - "epoch": 0.8537504604834367, - "grad_norm": 0.0, - "learning_rate": 1.1010027917134258e-06, - "loss": 0.8266, - "step": 30128 - }, - { - "epoch": 0.8537787979256992, - "grad_norm": 0.0, - "learning_rate": 1.1005841739222166e-06, - "loss": 0.8594, - "step": 30129 - }, - { - "epoch": 0.8538071353679617, - "grad_norm": 0.0, - "learning_rate": 1.1001656310940944e-06, - "loss": 0.7385, - "step": 30130 - }, - { - "epoch": 0.8538354728102241, - "grad_norm": 0.0, - "learning_rate": 1.0997471632325795e-06, - "loss": 0.8104, - "step": 30131 - }, - { - "epoch": 0.8538638102524866, - "grad_norm": 0.0, - "learning_rate": 1.0993287703411992e-06, - "loss": 0.8008, - "step": 30132 - }, - { - "epoch": 0.8538921476947491, - "grad_norm": 0.0, - "learning_rate": 1.0989104524234807e-06, - "loss": 0.8357, - "step": 30133 - }, - { - "epoch": 0.8539204851370116, - "grad_norm": 0.0, - "learning_rate": 1.098492209482942e-06, - "loss": 0.8456, - "step": 30134 - }, - { - "epoch": 0.853948822579274, - "grad_norm": 0.0, - "learning_rate": 1.0980740415231084e-06, - "loss": 0.8807, - "step": 30135 - }, - { - "epoch": 0.8539771600215365, - "grad_norm": 0.0, - "learning_rate": 1.0976559485475058e-06, - "loss": 0.8292, - "step": 30136 - }, - { - "epoch": 0.854005497463799, - "grad_norm": 0.0, - "learning_rate": 1.0972379305596503e-06, - "loss": 0.8196, - "step": 30137 - }, - { - "epoch": 0.8540338349060613, - "grad_norm": 0.0, - "learning_rate": 1.0968199875630658e-06, - "loss": 0.8035, - "step": 30138 - }, - { - "epoch": 0.8540621723483238, - "grad_norm": 0.0, - "learning_rate": 1.0964021195612728e-06, - "loss": 0.848, - "step": 30139 - }, - { - "epoch": 0.8540905097905863, - "grad_norm": 0.0, - "learning_rate": 1.0959843265577918e-06, - "loss": 0.8752, - "step": 30140 - }, - { - "epoch": 0.8541188472328488, - "grad_norm": 0.0, - "learning_rate": 1.0955666085561423e-06, - "loss": 0.836, - "step": 30141 - }, - { - "epoch": 0.8541471846751112, - "grad_norm": 0.0, - "learning_rate": 1.0951489655598402e-06, - "loss": 0.8615, - "step": 30142 - }, - { - "epoch": 0.8541755221173737, - "grad_norm": 0.0, - "learning_rate": 1.094731397572405e-06, - "loss": 0.8237, - "step": 30143 - }, - { - "epoch": 0.8542038595596362, - "grad_norm": 0.0, - "learning_rate": 1.094313904597355e-06, - "loss": 0.808, - "step": 30144 - }, - { - "epoch": 0.8542321970018986, - "grad_norm": 0.0, - "learning_rate": 1.0938964866382052e-06, - "loss": 0.8637, - "step": 30145 - }, - { - "epoch": 0.8542605344441611, - "grad_norm": 0.0, - "learning_rate": 1.0934791436984748e-06, - "loss": 0.7366, - "step": 30146 - }, - { - "epoch": 0.8542888718864236, - "grad_norm": 0.0, - "learning_rate": 1.0930618757816747e-06, - "loss": 0.7381, - "step": 30147 - }, - { - "epoch": 0.854317209328686, - "grad_norm": 0.0, - "learning_rate": 1.0926446828913218e-06, - "loss": 0.8462, - "step": 30148 - }, - { - "epoch": 0.8543455467709484, - "grad_norm": 0.0, - "learning_rate": 1.0922275650309322e-06, - "loss": 0.8709, - "step": 30149 - }, - { - "epoch": 0.8543738842132109, - "grad_norm": 0.0, - "learning_rate": 1.0918105222040154e-06, - "loss": 0.7929, - "step": 30150 - }, - { - "epoch": 0.8544022216554734, - "grad_norm": 0.0, - "learning_rate": 1.0913935544140864e-06, - "loss": 0.8415, - "step": 30151 - }, - { - "epoch": 0.8544305590977358, - "grad_norm": 0.0, - "learning_rate": 1.0909766616646589e-06, - "loss": 0.7744, - "step": 30152 - }, - { - "epoch": 0.8544588965399983, - "grad_norm": 0.0, - "learning_rate": 1.0905598439592425e-06, - "loss": 0.7891, - "step": 30153 - }, - { - "epoch": 0.8544872339822608, - "grad_norm": 0.0, - "learning_rate": 1.0901431013013507e-06, - "loss": 0.775, - "step": 30154 - }, - { - "epoch": 0.8545155714245232, - "grad_norm": 0.0, - "learning_rate": 1.0897264336944901e-06, - "loss": 0.8033, - "step": 30155 - }, - { - "epoch": 0.8545439088667857, - "grad_norm": 0.0, - "learning_rate": 1.0893098411421731e-06, - "loss": 0.9655, - "step": 30156 - }, - { - "epoch": 0.8545722463090482, - "grad_norm": 0.0, - "learning_rate": 1.0888933236479094e-06, - "loss": 0.7708, - "step": 30157 - }, - { - "epoch": 0.8546005837513106, - "grad_norm": 0.0, - "learning_rate": 1.0884768812152047e-06, - "loss": 0.6867, - "step": 30158 - }, - { - "epoch": 0.854628921193573, - "grad_norm": 0.0, - "learning_rate": 1.0880605138475708e-06, - "loss": 0.8427, - "step": 30159 - }, - { - "epoch": 0.8546572586358355, - "grad_norm": 0.0, - "learning_rate": 1.0876442215485106e-06, - "loss": 0.9402, - "step": 30160 - }, - { - "epoch": 0.854685596078098, - "grad_norm": 0.0, - "learning_rate": 1.0872280043215321e-06, - "loss": 0.7349, - "step": 30161 - }, - { - "epoch": 0.8547139335203604, - "grad_norm": 0.0, - "learning_rate": 1.086811862170144e-06, - "loss": 0.7294, - "step": 30162 - }, - { - "epoch": 0.8547422709626229, - "grad_norm": 0.0, - "learning_rate": 1.0863957950978477e-06, - "loss": 0.6671, - "step": 30163 - }, - { - "epoch": 0.8547706084048854, - "grad_norm": 0.0, - "learning_rate": 1.0859798031081491e-06, - "loss": 0.765, - "step": 30164 - }, - { - "epoch": 0.8547989458471479, - "grad_norm": 0.0, - "learning_rate": 1.0855638862045537e-06, - "loss": 0.816, - "step": 30165 - }, - { - "epoch": 0.8548272832894103, - "grad_norm": 0.0, - "learning_rate": 1.0851480443905627e-06, - "loss": 0.8549, - "step": 30166 - }, - { - "epoch": 0.8548556207316728, - "grad_norm": 0.0, - "learning_rate": 1.0847322776696834e-06, - "loss": 0.7703, - "step": 30167 - }, - { - "epoch": 0.8548839581739353, - "grad_norm": 0.0, - "learning_rate": 1.084316586045412e-06, - "loss": 0.8399, - "step": 30168 - }, - { - "epoch": 0.8549122956161976, - "grad_norm": 0.0, - "learning_rate": 1.0839009695212521e-06, - "loss": 0.872, - "step": 30169 - }, - { - "epoch": 0.8549406330584601, - "grad_norm": 0.0, - "learning_rate": 1.083485428100708e-06, - "loss": 0.8024, - "step": 30170 - }, - { - "epoch": 0.8549689705007226, - "grad_norm": 0.0, - "learning_rate": 1.0830699617872752e-06, - "loss": 0.9213, - "step": 30171 - }, - { - "epoch": 0.8549973079429851, - "grad_norm": 0.0, - "learning_rate": 1.082654570584456e-06, - "loss": 0.8043, - "step": 30172 - }, - { - "epoch": 0.8550256453852475, - "grad_norm": 0.0, - "learning_rate": 1.0822392544957494e-06, - "loss": 0.819, - "step": 30173 - }, - { - "epoch": 0.85505398282751, - "grad_norm": 0.0, - "learning_rate": 1.0818240135246528e-06, - "loss": 0.8283, - "step": 30174 - }, - { - "epoch": 0.8550823202697725, - "grad_norm": 0.0, - "learning_rate": 1.0814088476746642e-06, - "loss": 0.7864, - "step": 30175 - }, - { - "epoch": 0.8551106577120349, - "grad_norm": 0.0, - "learning_rate": 1.0809937569492801e-06, - "loss": 0.7297, - "step": 30176 - }, - { - "epoch": 0.8551389951542974, - "grad_norm": 0.0, - "learning_rate": 1.0805787413519974e-06, - "loss": 0.9168, - "step": 30177 - }, - { - "epoch": 0.8551673325965599, - "grad_norm": 0.0, - "learning_rate": 1.0801638008863114e-06, - "loss": 0.8869, - "step": 30178 - }, - { - "epoch": 0.8551956700388222, - "grad_norm": 0.0, - "learning_rate": 1.079748935555719e-06, - "loss": 0.933, - "step": 30179 - }, - { - "epoch": 0.8552240074810847, - "grad_norm": 0.0, - "learning_rate": 1.079334145363713e-06, - "loss": 0.8191, - "step": 30180 - }, - { - "epoch": 0.8552523449233472, - "grad_norm": 0.0, - "learning_rate": 1.0789194303137907e-06, - "loss": 0.8214, - "step": 30181 - }, - { - "epoch": 0.8552806823656097, - "grad_norm": 0.0, - "learning_rate": 1.0785047904094404e-06, - "loss": 0.9814, - "step": 30182 - }, - { - "epoch": 0.8553090198078721, - "grad_norm": 0.0, - "learning_rate": 1.0780902256541592e-06, - "loss": 0.7666, - "step": 30183 - }, - { - "epoch": 0.8553373572501346, - "grad_norm": 0.0, - "learning_rate": 1.0776757360514345e-06, - "loss": 0.7788, - "step": 30184 - }, - { - "epoch": 0.8553656946923971, - "grad_norm": 0.0, - "learning_rate": 1.0772613216047612e-06, - "loss": 0.8405, - "step": 30185 - }, - { - "epoch": 0.8553940321346595, - "grad_norm": 0.0, - "learning_rate": 1.0768469823176308e-06, - "loss": 0.8174, - "step": 30186 - }, - { - "epoch": 0.855422369576922, - "grad_norm": 0.0, - "learning_rate": 1.0764327181935297e-06, - "loss": 0.7665, - "step": 30187 - }, - { - "epoch": 0.8554507070191845, - "grad_norm": 0.0, - "learning_rate": 1.0760185292359515e-06, - "loss": 0.7435, - "step": 30188 - }, - { - "epoch": 0.855479044461447, - "grad_norm": 0.0, - "learning_rate": 1.0756044154483813e-06, - "loss": 0.7957, - "step": 30189 - }, - { - "epoch": 0.8555073819037093, - "grad_norm": 0.0, - "learning_rate": 1.0751903768343098e-06, - "loss": 0.8244, - "step": 30190 - }, - { - "epoch": 0.8555357193459718, - "grad_norm": 0.0, - "learning_rate": 1.0747764133972226e-06, - "loss": 0.6693, - "step": 30191 - }, - { - "epoch": 0.8555640567882343, - "grad_norm": 0.0, - "learning_rate": 1.0743625251406087e-06, - "loss": 0.8065, - "step": 30192 - }, - { - "epoch": 0.8555923942304967, - "grad_norm": 0.0, - "learning_rate": 1.0739487120679537e-06, - "loss": 0.7787, - "step": 30193 - }, - { - "epoch": 0.8556207316727592, - "grad_norm": 0.0, - "learning_rate": 1.073534974182745e-06, - "loss": 0.7537, - "step": 30194 - }, - { - "epoch": 0.8556490691150217, - "grad_norm": 0.0, - "learning_rate": 1.0731213114884643e-06, - "loss": 0.8501, - "step": 30195 - }, - { - "epoch": 0.8556774065572842, - "grad_norm": 0.0, - "learning_rate": 1.0727077239885985e-06, - "loss": 0.8509, - "step": 30196 - }, - { - "epoch": 0.8557057439995466, - "grad_norm": 0.0, - "learning_rate": 1.0722942116866296e-06, - "loss": 0.7719, - "step": 30197 - }, - { - "epoch": 0.8557340814418091, - "grad_norm": 0.0, - "learning_rate": 1.0718807745860426e-06, - "loss": 0.7438, - "step": 30198 - }, - { - "epoch": 0.8557624188840716, - "grad_norm": 0.0, - "learning_rate": 1.0714674126903202e-06, - "loss": 0.7624, - "step": 30199 - }, - { - "epoch": 0.855790756326334, - "grad_norm": 0.0, - "learning_rate": 1.0710541260029416e-06, - "loss": 0.8267, - "step": 30200 - }, - { - "epoch": 0.8558190937685964, - "grad_norm": 0.0, - "learning_rate": 1.07064091452739e-06, - "loss": 0.7842, - "step": 30201 - }, - { - "epoch": 0.8558474312108589, - "grad_norm": 0.0, - "learning_rate": 1.0702277782671467e-06, - "loss": 0.8126, - "step": 30202 - }, - { - "epoch": 0.8558757686531213, - "grad_norm": 0.0, - "learning_rate": 1.06981471722569e-06, - "loss": 0.766, - "step": 30203 - }, - { - "epoch": 0.8559041060953838, - "grad_norm": 0.0, - "learning_rate": 1.0694017314064997e-06, - "loss": 0.7243, - "step": 30204 - }, - { - "epoch": 0.8559324435376463, - "grad_norm": 0.0, - "learning_rate": 1.068988820813055e-06, - "loss": 0.8456, - "step": 30205 - }, - { - "epoch": 0.8559607809799088, - "grad_norm": 0.0, - "learning_rate": 1.0685759854488341e-06, - "loss": 0.7393, - "step": 30206 - }, - { - "epoch": 0.8559891184221712, - "grad_norm": 0.0, - "learning_rate": 1.0681632253173158e-06, - "loss": 0.873, - "step": 30207 - }, - { - "epoch": 0.8560174558644337, - "grad_norm": 0.0, - "learning_rate": 1.0677505404219735e-06, - "loss": 0.7491, - "step": 30208 - }, - { - "epoch": 0.8560457933066962, - "grad_norm": 0.0, - "learning_rate": 1.0673379307662856e-06, - "loss": 0.8982, - "step": 30209 - }, - { - "epoch": 0.8560741307489586, - "grad_norm": 0.0, - "learning_rate": 1.0669253963537285e-06, - "loss": 0.6862, - "step": 30210 - }, - { - "epoch": 0.856102468191221, - "grad_norm": 0.0, - "learning_rate": 1.0665129371877748e-06, - "loss": 0.8092, - "step": 30211 - }, - { - "epoch": 0.8561308056334835, - "grad_norm": 0.0, - "learning_rate": 1.0661005532719027e-06, - "loss": 0.7912, - "step": 30212 - }, - { - "epoch": 0.856159143075746, - "grad_norm": 0.0, - "learning_rate": 1.0656882446095795e-06, - "loss": 0.8452, - "step": 30213 - }, - { - "epoch": 0.8561874805180084, - "grad_norm": 0.0, - "learning_rate": 1.0652760112042837e-06, - "loss": 0.8139, - "step": 30214 - }, - { - "epoch": 0.8562158179602709, - "grad_norm": 0.0, - "learning_rate": 1.0648638530594867e-06, - "loss": 0.8686, - "step": 30215 - }, - { - "epoch": 0.8562441554025334, - "grad_norm": 0.0, - "learning_rate": 1.0644517701786571e-06, - "loss": 0.6955, - "step": 30216 - }, - { - "epoch": 0.8562724928447958, - "grad_norm": 0.0, - "learning_rate": 1.0640397625652688e-06, - "loss": 0.7899, - "step": 30217 - }, - { - "epoch": 0.8563008302870583, - "grad_norm": 0.0, - "learning_rate": 1.0636278302227921e-06, - "loss": 0.7462, - "step": 30218 - }, - { - "epoch": 0.8563291677293208, - "grad_norm": 0.0, - "learning_rate": 1.0632159731546965e-06, - "loss": 0.8284, - "step": 30219 - }, - { - "epoch": 0.8563575051715833, - "grad_norm": 0.0, - "learning_rate": 1.0628041913644538e-06, - "loss": 0.8328, - "step": 30220 - }, - { - "epoch": 0.8563858426138456, - "grad_norm": 0.0, - "learning_rate": 1.0623924848555279e-06, - "loss": 0.8245, - "step": 30221 - }, - { - "epoch": 0.8564141800561081, - "grad_norm": 0.0, - "learning_rate": 1.0619808536313892e-06, - "loss": 0.689, - "step": 30222 - }, - { - "epoch": 0.8564425174983706, - "grad_norm": 0.0, - "learning_rate": 1.061569297695506e-06, - "loss": 0.8578, - "step": 30223 - }, - { - "epoch": 0.856470854940633, - "grad_norm": 0.0, - "learning_rate": 1.0611578170513426e-06, - "loss": 0.8025, - "step": 30224 - }, - { - "epoch": 0.8564991923828955, - "grad_norm": 0.0, - "learning_rate": 1.060746411702368e-06, - "loss": 0.75, - "step": 30225 - }, - { - "epoch": 0.856527529825158, - "grad_norm": 0.0, - "learning_rate": 1.060335081652043e-06, - "loss": 0.6893, - "step": 30226 - }, - { - "epoch": 0.8565558672674204, - "grad_norm": 0.0, - "learning_rate": 1.0599238269038359e-06, - "loss": 0.8976, - "step": 30227 - }, - { - "epoch": 0.8565842047096829, - "grad_norm": 0.0, - "learning_rate": 1.0595126474612105e-06, - "loss": 0.7759, - "step": 30228 - }, - { - "epoch": 0.8566125421519454, - "grad_norm": 0.0, - "learning_rate": 1.0591015433276308e-06, - "loss": 0.8157, - "step": 30229 - }, - { - "epoch": 0.8566408795942079, - "grad_norm": 0.0, - "learning_rate": 1.0586905145065573e-06, - "loss": 0.7309, - "step": 30230 - }, - { - "epoch": 0.8566692170364703, - "grad_norm": 0.0, - "learning_rate": 1.0582795610014573e-06, - "loss": 0.7626, - "step": 30231 - }, - { - "epoch": 0.8566975544787327, - "grad_norm": 0.0, - "learning_rate": 1.0578686828157859e-06, - "loss": 0.8447, - "step": 30232 - }, - { - "epoch": 0.8567258919209952, - "grad_norm": 0.0, - "learning_rate": 1.05745787995301e-06, - "loss": 0.8387, - "step": 30233 - }, - { - "epoch": 0.8567542293632576, - "grad_norm": 0.0, - "learning_rate": 1.057047152416585e-06, - "loss": 0.8039, - "step": 30234 - }, - { - "epoch": 0.8567825668055201, - "grad_norm": 0.0, - "learning_rate": 1.056636500209972e-06, - "loss": 0.7832, - "step": 30235 - }, - { - "epoch": 0.8568109042477826, - "grad_norm": 0.0, - "learning_rate": 1.0562259233366334e-06, - "loss": 0.817, - "step": 30236 - }, - { - "epoch": 0.8568392416900451, - "grad_norm": 0.0, - "learning_rate": 1.0558154218000227e-06, - "loss": 0.7183, - "step": 30237 - }, - { - "epoch": 0.8568675791323075, - "grad_norm": 0.0, - "learning_rate": 1.0554049956035994e-06, - "loss": 0.8786, - "step": 30238 - }, - { - "epoch": 0.85689591657457, - "grad_norm": 0.0, - "learning_rate": 1.054994644750824e-06, - "loss": 0.7983, - "step": 30239 - }, - { - "epoch": 0.8569242540168325, - "grad_norm": 0.0, - "learning_rate": 1.0545843692451495e-06, - "loss": 0.7937, - "step": 30240 - }, - { - "epoch": 0.8569525914590949, - "grad_norm": 0.0, - "learning_rate": 1.054174169090031e-06, - "loss": 0.7968, - "step": 30241 - }, - { - "epoch": 0.8569809289013574, - "grad_norm": 0.0, - "learning_rate": 1.0537640442889263e-06, - "loss": 0.8503, - "step": 30242 - }, - { - "epoch": 0.8570092663436198, - "grad_norm": 0.0, - "learning_rate": 1.0533539948452886e-06, - "loss": 0.834, - "step": 30243 - }, - { - "epoch": 0.8570376037858823, - "grad_norm": 0.0, - "learning_rate": 1.0529440207625752e-06, - "loss": 0.8193, - "step": 30244 - }, - { - "epoch": 0.8570659412281447, - "grad_norm": 0.0, - "learning_rate": 1.0525341220442342e-06, - "loss": 0.9002, - "step": 30245 - }, - { - "epoch": 0.8570942786704072, - "grad_norm": 0.0, - "learning_rate": 1.052124298693723e-06, - "loss": 0.7768, - "step": 30246 - }, - { - "epoch": 0.8571226161126697, - "grad_norm": 0.0, - "learning_rate": 1.0517145507144889e-06, - "loss": 0.7262, - "step": 30247 - }, - { - "epoch": 0.8571509535549321, - "grad_norm": 0.0, - "learning_rate": 1.0513048781099867e-06, - "loss": 0.8562, - "step": 30248 - }, - { - "epoch": 0.8571792909971946, - "grad_norm": 0.0, - "learning_rate": 1.0508952808836682e-06, - "loss": 0.7673, - "step": 30249 - }, - { - "epoch": 0.8572076284394571, - "grad_norm": 0.0, - "learning_rate": 1.0504857590389805e-06, - "loss": 0.876, - "step": 30250 - }, - { - "epoch": 0.8572359658817195, - "grad_norm": 0.0, - "learning_rate": 1.0500763125793745e-06, - "loss": 0.8463, - "step": 30251 - }, - { - "epoch": 0.857264303323982, - "grad_norm": 0.0, - "learning_rate": 1.0496669415083006e-06, - "loss": 0.8195, - "step": 30252 - }, - { - "epoch": 0.8572926407662445, - "grad_norm": 0.0, - "learning_rate": 1.0492576458292036e-06, - "loss": 0.7537, - "step": 30253 - }, - { - "epoch": 0.857320978208507, - "grad_norm": 0.0, - "learning_rate": 1.0488484255455344e-06, - "loss": 0.7795, - "step": 30254 - }, - { - "epoch": 0.8573493156507693, - "grad_norm": 0.0, - "learning_rate": 1.048439280660738e-06, - "loss": 0.8452, - "step": 30255 - }, - { - "epoch": 0.8573776530930318, - "grad_norm": 0.0, - "learning_rate": 1.0480302111782614e-06, - "loss": 0.7622, - "step": 30256 - }, - { - "epoch": 0.8574059905352943, - "grad_norm": 0.0, - "learning_rate": 1.0476212171015532e-06, - "loss": 0.7665, - "step": 30257 - }, - { - "epoch": 0.8574343279775567, - "grad_norm": 0.0, - "learning_rate": 1.0472122984340528e-06, - "loss": 0.8332, - "step": 30258 - }, - { - "epoch": 0.8574626654198192, - "grad_norm": 0.0, - "learning_rate": 1.0468034551792083e-06, - "loss": 0.7463, - "step": 30259 - }, - { - "epoch": 0.8574910028620817, - "grad_norm": 0.0, - "learning_rate": 1.046394687340465e-06, - "loss": 0.8427, - "step": 30260 - }, - { - "epoch": 0.8575193403043442, - "grad_norm": 0.0, - "learning_rate": 1.0459859949212625e-06, - "loss": 0.8212, - "step": 30261 - }, - { - "epoch": 0.8575476777466066, - "grad_norm": 0.0, - "learning_rate": 1.0455773779250466e-06, - "loss": 0.7726, - "step": 30262 - }, - { - "epoch": 0.8575760151888691, - "grad_norm": 0.0, - "learning_rate": 1.045168836355256e-06, - "loss": 0.7805, - "step": 30263 - }, - { - "epoch": 0.8576043526311316, - "grad_norm": 0.0, - "learning_rate": 1.044760370215333e-06, - "loss": 0.8391, - "step": 30264 - }, - { - "epoch": 0.8576326900733939, - "grad_norm": 0.0, - "learning_rate": 1.0443519795087209e-06, - "loss": 0.8348, - "step": 30265 - }, - { - "epoch": 0.8576610275156564, - "grad_norm": 0.0, - "learning_rate": 1.0439436642388555e-06, - "loss": 0.782, - "step": 30266 - }, - { - "epoch": 0.8576893649579189, - "grad_norm": 0.0, - "learning_rate": 1.043535424409179e-06, - "loss": 0.7651, - "step": 30267 - }, - { - "epoch": 0.8577177024001814, - "grad_norm": 0.0, - "learning_rate": 1.043127260023129e-06, - "loss": 0.864, - "step": 30268 - }, - { - "epoch": 0.8577460398424438, - "grad_norm": 0.0, - "learning_rate": 1.0427191710841444e-06, - "loss": 0.8462, - "step": 30269 - }, - { - "epoch": 0.8577743772847063, - "grad_norm": 0.0, - "learning_rate": 1.0423111575956646e-06, - "loss": 0.771, - "step": 30270 - }, - { - "epoch": 0.8578027147269688, - "grad_norm": 0.0, - "learning_rate": 1.0419032195611223e-06, - "loss": 0.7745, - "step": 30271 - }, - { - "epoch": 0.8578310521692312, - "grad_norm": 0.0, - "learning_rate": 1.0414953569839558e-06, - "loss": 0.7396, - "step": 30272 - }, - { - "epoch": 0.8578593896114937, - "grad_norm": 0.0, - "learning_rate": 1.0410875698676014e-06, - "loss": 0.7647, - "step": 30273 - }, - { - "epoch": 0.8578877270537562, - "grad_norm": 0.0, - "learning_rate": 1.040679858215493e-06, - "loss": 0.8276, - "step": 30274 - }, - { - "epoch": 0.8579160644960185, - "grad_norm": 0.0, - "learning_rate": 1.0402722220310656e-06, - "loss": 0.9141, - "step": 30275 - }, - { - "epoch": 0.857944401938281, - "grad_norm": 0.0, - "learning_rate": 1.039864661317751e-06, - "loss": 0.7338, - "step": 30276 - }, - { - "epoch": 0.8579727393805435, - "grad_norm": 0.0, - "learning_rate": 1.039457176078984e-06, - "loss": 0.7123, - "step": 30277 - }, - { - "epoch": 0.858001076822806, - "grad_norm": 0.0, - "learning_rate": 1.0390497663181975e-06, - "loss": 0.788, - "step": 30278 - }, - { - "epoch": 0.8580294142650684, - "grad_norm": 0.0, - "learning_rate": 1.038642432038821e-06, - "loss": 0.7615, - "step": 30279 - }, - { - "epoch": 0.8580577517073309, - "grad_norm": 0.0, - "learning_rate": 1.0382351732442876e-06, - "loss": 0.8964, - "step": 30280 - }, - { - "epoch": 0.8580860891495934, - "grad_norm": 0.0, - "learning_rate": 1.0378279899380261e-06, - "loss": 0.7957, - "step": 30281 - }, - { - "epoch": 0.8581144265918558, - "grad_norm": 0.0, - "learning_rate": 1.0374208821234688e-06, - "loss": 0.8293, - "step": 30282 - }, - { - "epoch": 0.8581427640341183, - "grad_norm": 0.0, - "learning_rate": 1.0370138498040449e-06, - "loss": 0.7423, - "step": 30283 - }, - { - "epoch": 0.8581711014763808, - "grad_norm": 0.0, - "learning_rate": 1.0366068929831797e-06, - "loss": 0.7652, - "step": 30284 - }, - { - "epoch": 0.8581994389186433, - "grad_norm": 0.0, - "learning_rate": 1.0362000116643024e-06, - "loss": 0.8204, - "step": 30285 - }, - { - "epoch": 0.8582277763609056, - "grad_norm": 0.0, - "learning_rate": 1.0357932058508434e-06, - "loss": 0.8543, - "step": 30286 - }, - { - "epoch": 0.8582561138031681, - "grad_norm": 0.0, - "learning_rate": 1.0353864755462262e-06, - "loss": 0.9058, - "step": 30287 - }, - { - "epoch": 0.8582844512454306, - "grad_norm": 0.0, - "learning_rate": 1.0349798207538764e-06, - "loss": 0.85, - "step": 30288 - }, - { - "epoch": 0.858312788687693, - "grad_norm": 0.0, - "learning_rate": 1.0345732414772224e-06, - "loss": 0.7908, - "step": 30289 - }, - { - "epoch": 0.8583411261299555, - "grad_norm": 0.0, - "learning_rate": 1.0341667377196863e-06, - "loss": 0.8517, - "step": 30290 - }, - { - "epoch": 0.858369463572218, - "grad_norm": 0.0, - "learning_rate": 1.0337603094846948e-06, - "loss": 0.7592, - "step": 30291 - }, - { - "epoch": 0.8583978010144805, - "grad_norm": 0.0, - "learning_rate": 1.0333539567756668e-06, - "loss": 0.826, - "step": 30292 - }, - { - "epoch": 0.8584261384567429, - "grad_norm": 0.0, - "learning_rate": 1.032947679596029e-06, - "loss": 0.8438, - "step": 30293 - }, - { - "epoch": 0.8584544758990054, - "grad_norm": 0.0, - "learning_rate": 1.0325414779492028e-06, - "loss": 0.8263, - "step": 30294 - }, - { - "epoch": 0.8584828133412679, - "grad_norm": 0.0, - "learning_rate": 1.03213535183861e-06, - "loss": 0.7601, - "step": 30295 - }, - { - "epoch": 0.8585111507835302, - "grad_norm": 0.0, - "learning_rate": 1.03172930126767e-06, - "loss": 0.774, - "step": 30296 - }, - { - "epoch": 0.8585394882257927, - "grad_norm": 0.0, - "learning_rate": 1.031323326239807e-06, - "loss": 0.7679, - "step": 30297 - }, - { - "epoch": 0.8585678256680552, - "grad_norm": 0.0, - "learning_rate": 1.0309174267584365e-06, - "loss": 0.7691, - "step": 30298 - }, - { - "epoch": 0.8585961631103176, - "grad_norm": 0.0, - "learning_rate": 1.0305116028269812e-06, - "loss": 0.736, - "step": 30299 - }, - { - "epoch": 0.8586245005525801, - "grad_norm": 0.0, - "learning_rate": 1.0301058544488552e-06, - "loss": 0.909, - "step": 30300 - }, - { - "epoch": 0.8586528379948426, - "grad_norm": 0.0, - "learning_rate": 1.0297001816274775e-06, - "loss": 0.7208, - "step": 30301 - }, - { - "epoch": 0.8586811754371051, - "grad_norm": 0.0, - "learning_rate": 1.0292945843662694e-06, - "loss": 0.9248, - "step": 30302 - }, - { - "epoch": 0.8587095128793675, - "grad_norm": 0.0, - "learning_rate": 1.028889062668642e-06, - "loss": 0.6766, - "step": 30303 - }, - { - "epoch": 0.85873785032163, - "grad_norm": 0.0, - "learning_rate": 1.0284836165380153e-06, - "loss": 0.8526, - "step": 30304 - }, - { - "epoch": 0.8587661877638925, - "grad_norm": 0.0, - "learning_rate": 1.0280782459778006e-06, - "loss": 0.7605, - "step": 30305 - }, - { - "epoch": 0.8587945252061548, - "grad_norm": 0.0, - "learning_rate": 1.027672950991414e-06, - "loss": 0.7348, - "step": 30306 - }, - { - "epoch": 0.8588228626484173, - "grad_norm": 0.0, - "learning_rate": 1.02726773158227e-06, - "loss": 0.839, - "step": 30307 - }, - { - "epoch": 0.8588512000906798, - "grad_norm": 0.0, - "learning_rate": 1.0268625877537818e-06, - "loss": 0.7724, - "step": 30308 - }, - { - "epoch": 0.8588795375329423, - "grad_norm": 0.0, - "learning_rate": 1.0264575195093628e-06, - "loss": 0.8086, - "step": 30309 - }, - { - "epoch": 0.8589078749752047, - "grad_norm": 0.0, - "learning_rate": 1.0260525268524258e-06, - "loss": 0.808, - "step": 30310 - }, - { - "epoch": 0.8589362124174672, - "grad_norm": 0.0, - "learning_rate": 1.0256476097863788e-06, - "loss": 0.8638, - "step": 30311 - }, - { - "epoch": 0.8589645498597297, - "grad_norm": 0.0, - "learning_rate": 1.025242768314637e-06, - "loss": 0.7162, - "step": 30312 - }, - { - "epoch": 0.8589928873019921, - "grad_norm": 0.0, - "learning_rate": 1.0248380024406057e-06, - "loss": 0.7854, - "step": 30313 - }, - { - "epoch": 0.8590212247442546, - "grad_norm": 0.0, - "learning_rate": 1.0244333121676964e-06, - "loss": 0.7272, - "step": 30314 - }, - { - "epoch": 0.8590495621865171, - "grad_norm": 0.0, - "learning_rate": 1.0240286974993207e-06, - "loss": 0.8109, - "step": 30315 - }, - { - "epoch": 0.8590778996287795, - "grad_norm": 0.0, - "learning_rate": 1.023624158438883e-06, - "loss": 0.8263, - "step": 30316 - }, - { - "epoch": 0.859106237071042, - "grad_norm": 0.0, - "learning_rate": 1.0232196949897922e-06, - "loss": 0.7771, - "step": 30317 - }, - { - "epoch": 0.8591345745133044, - "grad_norm": 0.0, - "learning_rate": 1.0228153071554559e-06, - "loss": 0.7501, - "step": 30318 - }, - { - "epoch": 0.8591629119555669, - "grad_norm": 0.0, - "learning_rate": 1.022410994939279e-06, - "loss": 0.817, - "step": 30319 - }, - { - "epoch": 0.8591912493978293, - "grad_norm": 0.0, - "learning_rate": 1.022006758344668e-06, - "loss": 0.8397, - "step": 30320 - }, - { - "epoch": 0.8592195868400918, - "grad_norm": 0.0, - "learning_rate": 1.0216025973750277e-06, - "loss": 0.8756, - "step": 30321 - }, - { - "epoch": 0.8592479242823543, - "grad_norm": 0.0, - "learning_rate": 1.0211985120337631e-06, - "loss": 0.8075, - "step": 30322 - }, - { - "epoch": 0.8592762617246167, - "grad_norm": 0.0, - "learning_rate": 1.0207945023242794e-06, - "loss": 0.8062, - "step": 30323 - }, - { - "epoch": 0.8593045991668792, - "grad_norm": 0.0, - "learning_rate": 1.020390568249976e-06, - "loss": 0.7743, - "step": 30324 - }, - { - "epoch": 0.8593329366091417, - "grad_norm": 0.0, - "learning_rate": 1.019986709814257e-06, - "loss": 0.8623, - "step": 30325 - }, - { - "epoch": 0.8593612740514042, - "grad_norm": 0.0, - "learning_rate": 1.0195829270205272e-06, - "loss": 0.8801, - "step": 30326 - }, - { - "epoch": 0.8593896114936665, - "grad_norm": 0.0, - "learning_rate": 1.0191792198721829e-06, - "loss": 0.8327, - "step": 30327 - }, - { - "epoch": 0.859417948935929, - "grad_norm": 0.0, - "learning_rate": 1.0187755883726291e-06, - "loss": 0.8674, - "step": 30328 - }, - { - "epoch": 0.8594462863781915, - "grad_norm": 0.0, - "learning_rate": 1.018372032525261e-06, - "loss": 0.8328, - "step": 30329 - }, - { - "epoch": 0.8594746238204539, - "grad_norm": 0.0, - "learning_rate": 1.0179685523334814e-06, - "loss": 0.7601, - "step": 30330 - }, - { - "epoch": 0.8595029612627164, - "grad_norm": 0.0, - "learning_rate": 1.0175651478006898e-06, - "loss": 0.882, - "step": 30331 - }, - { - "epoch": 0.8595312987049789, - "grad_norm": 0.0, - "learning_rate": 1.0171618189302802e-06, - "loss": 0.7391, - "step": 30332 - }, - { - "epoch": 0.8595596361472414, - "grad_norm": 0.0, - "learning_rate": 1.0167585657256528e-06, - "loss": 0.6824, - "step": 30333 - }, - { - "epoch": 0.8595879735895038, - "grad_norm": 0.0, - "learning_rate": 1.0163553881902032e-06, - "loss": 0.9123, - "step": 30334 - }, - { - "epoch": 0.8596163110317663, - "grad_norm": 0.0, - "learning_rate": 1.0159522863273285e-06, - "loss": 0.8143, - "step": 30335 - }, - { - "epoch": 0.8596446484740288, - "grad_norm": 0.0, - "learning_rate": 1.015549260140426e-06, - "loss": 0.8538, - "step": 30336 - }, - { - "epoch": 0.8596729859162912, - "grad_norm": 0.0, - "learning_rate": 1.0151463096328863e-06, - "loss": 0.788, - "step": 30337 - }, - { - "epoch": 0.8597013233585536, - "grad_norm": 0.0, - "learning_rate": 1.0147434348081052e-06, - "loss": 0.8385, - "step": 30338 - }, - { - "epoch": 0.8597296608008161, - "grad_norm": 0.0, - "learning_rate": 1.0143406356694797e-06, - "loss": 0.734, - "step": 30339 - }, - { - "epoch": 0.8597579982430785, - "grad_norm": 0.0, - "learning_rate": 1.0139379122203974e-06, - "loss": 0.7234, - "step": 30340 - }, - { - "epoch": 0.859786335685341, - "grad_norm": 0.0, - "learning_rate": 1.0135352644642538e-06, - "loss": 0.8382, - "step": 30341 - }, - { - "epoch": 0.8598146731276035, - "grad_norm": 0.0, - "learning_rate": 1.0131326924044393e-06, - "loss": 0.8536, - "step": 30342 - }, - { - "epoch": 0.859843010569866, - "grad_norm": 0.0, - "learning_rate": 1.0127301960443448e-06, - "loss": 0.7842, - "step": 30343 - }, - { - "epoch": 0.8598713480121284, - "grad_norm": 0.0, - "learning_rate": 1.0123277753873629e-06, - "loss": 0.7164, - "step": 30344 - }, - { - "epoch": 0.8598996854543909, - "grad_norm": 0.0, - "learning_rate": 1.0119254304368798e-06, - "loss": 0.72, - "step": 30345 - }, - { - "epoch": 0.8599280228966534, - "grad_norm": 0.0, - "learning_rate": 1.0115231611962861e-06, - "loss": 0.7695, - "step": 30346 - }, - { - "epoch": 0.8599563603389158, - "grad_norm": 0.0, - "learning_rate": 1.0111209676689715e-06, - "loss": 0.8455, - "step": 30347 - }, - { - "epoch": 0.8599846977811783, - "grad_norm": 0.0, - "learning_rate": 1.010718849858322e-06, - "loss": 0.9431, - "step": 30348 - }, - { - "epoch": 0.8600130352234407, - "grad_norm": 0.0, - "learning_rate": 1.0103168077677284e-06, - "loss": 0.8363, - "step": 30349 - }, - { - "epoch": 0.8600413726657032, - "grad_norm": 0.0, - "learning_rate": 1.0099148414005723e-06, - "loss": 0.8103, - "step": 30350 - }, - { - "epoch": 0.8600697101079656, - "grad_norm": 0.0, - "learning_rate": 1.009512950760242e-06, - "loss": 0.8199, - "step": 30351 - }, - { - "epoch": 0.8600980475502281, - "grad_norm": 0.0, - "learning_rate": 1.0091111358501238e-06, - "loss": 0.8273, - "step": 30352 - }, - { - "epoch": 0.8601263849924906, - "grad_norm": 0.0, - "learning_rate": 1.0087093966736006e-06, - "loss": 0.8581, - "step": 30353 - }, - { - "epoch": 0.860154722434753, - "grad_norm": 0.0, - "learning_rate": 1.0083077332340563e-06, - "loss": 0.7918, - "step": 30354 - }, - { - "epoch": 0.8601830598770155, - "grad_norm": 0.0, - "learning_rate": 1.007906145534877e-06, - "loss": 0.7294, - "step": 30355 - }, - { - "epoch": 0.860211397319278, - "grad_norm": 0.0, - "learning_rate": 1.0075046335794413e-06, - "loss": 0.7835, - "step": 30356 - }, - { - "epoch": 0.8602397347615405, - "grad_norm": 0.0, - "learning_rate": 1.0071031973711354e-06, - "loss": 0.7706, - "step": 30357 - }, - { - "epoch": 0.8602680722038029, - "grad_norm": 0.0, - "learning_rate": 1.0067018369133363e-06, - "loss": 0.7615, - "step": 30358 - }, - { - "epoch": 0.8602964096460654, - "grad_norm": 0.0, - "learning_rate": 1.006300552209427e-06, - "loss": 0.8248, - "step": 30359 - }, - { - "epoch": 0.8603247470883278, - "grad_norm": 0.0, - "learning_rate": 1.0058993432627884e-06, - "loss": 0.7589, - "step": 30360 - }, - { - "epoch": 0.8603530845305902, - "grad_norm": 0.0, - "learning_rate": 1.0054982100767996e-06, - "loss": 0.896, - "step": 30361 - }, - { - "epoch": 0.8603814219728527, - "grad_norm": 0.0, - "learning_rate": 1.0050971526548413e-06, - "loss": 0.8611, - "step": 30362 - }, - { - "epoch": 0.8604097594151152, - "grad_norm": 0.0, - "learning_rate": 1.0046961710002879e-06, - "loss": 0.7198, - "step": 30363 - }, - { - "epoch": 0.8604380968573776, - "grad_norm": 0.0, - "learning_rate": 1.0042952651165195e-06, - "loss": 0.815, - "step": 30364 - }, - { - "epoch": 0.8604664342996401, - "grad_norm": 0.0, - "learning_rate": 1.0038944350069136e-06, - "loss": 0.7106, - "step": 30365 - }, - { - "epoch": 0.8604947717419026, - "grad_norm": 0.0, - "learning_rate": 1.003493680674844e-06, - "loss": 0.6529, - "step": 30366 - }, - { - "epoch": 0.8605231091841651, - "grad_norm": 0.0, - "learning_rate": 1.0030930021236884e-06, - "loss": 0.8077, - "step": 30367 - }, - { - "epoch": 0.8605514466264275, - "grad_norm": 0.0, - "learning_rate": 1.0026923993568228e-06, - "loss": 0.7614, - "step": 30368 - }, - { - "epoch": 0.86057978406869, - "grad_norm": 0.0, - "learning_rate": 1.0022918723776175e-06, - "loss": 0.8098, - "step": 30369 - }, - { - "epoch": 0.8606081215109525, - "grad_norm": 0.0, - "learning_rate": 1.0018914211894514e-06, - "loss": 0.8223, - "step": 30370 - }, - { - "epoch": 0.8606364589532148, - "grad_norm": 0.0, - "learning_rate": 1.001491045795694e-06, - "loss": 0.782, - "step": 30371 - }, - { - "epoch": 0.8606647963954773, - "grad_norm": 0.0, - "learning_rate": 1.0010907461997189e-06, - "loss": 0.7423, - "step": 30372 - }, - { - "epoch": 0.8606931338377398, - "grad_norm": 0.0, - "learning_rate": 1.000690522404898e-06, - "loss": 0.8014, - "step": 30373 - }, - { - "epoch": 0.8607214712800023, - "grad_norm": 0.0, - "learning_rate": 1.0002903744146019e-06, - "loss": 0.6997, - "step": 30374 - }, - { - "epoch": 0.8607498087222647, - "grad_norm": 0.0, - "learning_rate": 9.998903022322026e-07, - "loss": 0.7261, - "step": 30375 - }, - { - "epoch": 0.8607781461645272, - "grad_norm": 0.0, - "learning_rate": 9.994903058610706e-07, - "loss": 0.7921, - "step": 30376 - }, - { - "epoch": 0.8608064836067897, - "grad_norm": 0.0, - "learning_rate": 9.99090385304573e-07, - "loss": 0.711, - "step": 30377 - }, - { - "epoch": 0.8608348210490521, - "grad_norm": 0.0, - "learning_rate": 9.986905405660806e-07, - "loss": 0.9577, - "step": 30378 - }, - { - "epoch": 0.8608631584913146, - "grad_norm": 0.0, - "learning_rate": 9.982907716489587e-07, - "loss": 0.7569, - "step": 30379 - }, - { - "epoch": 0.8608914959335771, - "grad_norm": 0.0, - "learning_rate": 9.978910785565765e-07, - "loss": 0.9012, - "step": 30380 - }, - { - "epoch": 0.8609198333758395, - "grad_norm": 0.0, - "learning_rate": 9.974914612923026e-07, - "loss": 0.7772, - "step": 30381 - }, - { - "epoch": 0.8609481708181019, - "grad_norm": 0.0, - "learning_rate": 9.970919198594998e-07, - "loss": 0.7725, - "step": 30382 - }, - { - "epoch": 0.8609765082603644, - "grad_norm": 0.0, - "learning_rate": 9.966924542615353e-07, - "loss": 0.8095, - "step": 30383 - }, - { - "epoch": 0.8610048457026269, - "grad_norm": 0.0, - "learning_rate": 9.962930645017731e-07, - "loss": 0.7639, - "step": 30384 - }, - { - "epoch": 0.8610331831448893, - "grad_norm": 0.0, - "learning_rate": 9.958937505835776e-07, - "loss": 0.8279, - "step": 30385 - }, - { - "epoch": 0.8610615205871518, - "grad_norm": 0.0, - "learning_rate": 9.954945125103122e-07, - "loss": 0.8632, - "step": 30386 - }, - { - "epoch": 0.8610898580294143, - "grad_norm": 0.0, - "learning_rate": 9.9509535028534e-07, - "loss": 0.8481, - "step": 30387 - }, - { - "epoch": 0.8611181954716767, - "grad_norm": 0.0, - "learning_rate": 9.94696263912024e-07, - "loss": 0.8136, - "step": 30388 - }, - { - "epoch": 0.8611465329139392, - "grad_norm": 0.0, - "learning_rate": 9.942972533937268e-07, - "loss": 0.7862, - "step": 30389 - }, - { - "epoch": 0.8611748703562017, - "grad_norm": 0.0, - "learning_rate": 9.938983187338068e-07, - "loss": 0.7909, - "step": 30390 - }, - { - "epoch": 0.8612032077984642, - "grad_norm": 0.0, - "learning_rate": 9.934994599356275e-07, - "loss": 0.8235, - "step": 30391 - }, - { - "epoch": 0.8612315452407265, - "grad_norm": 0.0, - "learning_rate": 9.931006770025442e-07, - "loss": 0.7921, - "step": 30392 - }, - { - "epoch": 0.861259882682989, - "grad_norm": 0.0, - "learning_rate": 9.927019699379182e-07, - "loss": 0.7888, - "step": 30393 - }, - { - "epoch": 0.8612882201252515, - "grad_norm": 0.0, - "learning_rate": 9.923033387451108e-07, - "loss": 0.7909, - "step": 30394 - }, - { - "epoch": 0.8613165575675139, - "grad_norm": 0.0, - "learning_rate": 9.919047834274754e-07, - "loss": 0.6747, - "step": 30395 - }, - { - "epoch": 0.8613448950097764, - "grad_norm": 0.0, - "learning_rate": 9.91506303988371e-07, - "loss": 0.7632, - "step": 30396 - }, - { - "epoch": 0.8613732324520389, - "grad_norm": 0.0, - "learning_rate": 9.911079004311563e-07, - "loss": 0.7812, - "step": 30397 - }, - { - "epoch": 0.8614015698943014, - "grad_norm": 0.0, - "learning_rate": 9.90709572759183e-07, - "loss": 0.771, - "step": 30398 - }, - { - "epoch": 0.8614299073365638, - "grad_norm": 0.0, - "learning_rate": 9.903113209758098e-07, - "loss": 0.807, - "step": 30399 - }, - { - "epoch": 0.8614582447788263, - "grad_norm": 0.0, - "learning_rate": 9.89913145084388e-07, - "loss": 0.7735, - "step": 30400 - }, - { - "epoch": 0.8614865822210888, - "grad_norm": 0.0, - "learning_rate": 9.89515045088275e-07, - "loss": 0.7772, - "step": 30401 - }, - { - "epoch": 0.8615149196633511, - "grad_norm": 0.0, - "learning_rate": 9.89117020990824e-07, - "loss": 0.9465, - "step": 30402 - }, - { - "epoch": 0.8615432571056136, - "grad_norm": 0.0, - "learning_rate": 9.887190727953844e-07, - "loss": 0.8226, - "step": 30403 - }, - { - "epoch": 0.8615715945478761, - "grad_norm": 0.0, - "learning_rate": 9.88321200505311e-07, - "loss": 0.8157, - "step": 30404 - }, - { - "epoch": 0.8615999319901386, - "grad_norm": 0.0, - "learning_rate": 9.87923404123956e-07, - "loss": 0.7487, - "step": 30405 - }, - { - "epoch": 0.861628269432401, - "grad_norm": 0.0, - "learning_rate": 9.875256836546664e-07, - "loss": 0.7594, - "step": 30406 - }, - { - "epoch": 0.8616566068746635, - "grad_norm": 0.0, - "learning_rate": 9.871280391007965e-07, - "loss": 0.8849, - "step": 30407 - }, - { - "epoch": 0.861684944316926, - "grad_norm": 0.0, - "learning_rate": 9.867304704656932e-07, - "loss": 0.7116, - "step": 30408 - }, - { - "epoch": 0.8617132817591884, - "grad_norm": 0.0, - "learning_rate": 9.863329777527053e-07, - "loss": 0.9157, - "step": 30409 - }, - { - "epoch": 0.8617416192014509, - "grad_norm": 0.0, - "learning_rate": 9.859355609651843e-07, - "loss": 0.8549, - "step": 30410 - }, - { - "epoch": 0.8617699566437134, - "grad_norm": 0.0, - "learning_rate": 9.855382201064723e-07, - "loss": 0.8464, - "step": 30411 - }, - { - "epoch": 0.8617982940859757, - "grad_norm": 0.0, - "learning_rate": 9.851409551799207e-07, - "loss": 0.7854, - "step": 30412 - }, - { - "epoch": 0.8618266315282382, - "grad_norm": 0.0, - "learning_rate": 9.847437661888738e-07, - "loss": 0.8498, - "step": 30413 - }, - { - "epoch": 0.8618549689705007, - "grad_norm": 0.0, - "learning_rate": 9.843466531366774e-07, - "loss": 0.784, - "step": 30414 - }, - { - "epoch": 0.8618833064127632, - "grad_norm": 0.0, - "learning_rate": 9.839496160266805e-07, - "loss": 0.7884, - "step": 30415 - }, - { - "epoch": 0.8619116438550256, - "grad_norm": 0.0, - "learning_rate": 9.835526548622209e-07, - "loss": 0.9023, - "step": 30416 - }, - { - "epoch": 0.8619399812972881, - "grad_norm": 0.0, - "learning_rate": 9.831557696466454e-07, - "loss": 0.7565, - "step": 30417 - }, - { - "epoch": 0.8619683187395506, - "grad_norm": 0.0, - "learning_rate": 9.827589603832989e-07, - "loss": 0.7215, - "step": 30418 - }, - { - "epoch": 0.861996656181813, - "grad_norm": 0.0, - "learning_rate": 9.823622270755206e-07, - "loss": 0.8525, - "step": 30419 - }, - { - "epoch": 0.8620249936240755, - "grad_norm": 0.0, - "learning_rate": 9.81965569726656e-07, - "loss": 0.816, - "step": 30420 - }, - { - "epoch": 0.862053331066338, - "grad_norm": 0.0, - "learning_rate": 9.815689883400426e-07, - "loss": 0.8604, - "step": 30421 - }, - { - "epoch": 0.8620816685086005, - "grad_norm": 0.0, - "learning_rate": 9.811724829190216e-07, - "loss": 0.7939, - "step": 30422 - }, - { - "epoch": 0.8621100059508628, - "grad_norm": 0.0, - "learning_rate": 9.807760534669363e-07, - "loss": 0.8024, - "step": 30423 - }, - { - "epoch": 0.8621383433931253, - "grad_norm": 0.0, - "learning_rate": 9.803796999871217e-07, - "loss": 0.8118, - "step": 30424 - }, - { - "epoch": 0.8621666808353878, - "grad_norm": 0.0, - "learning_rate": 9.799834224829173e-07, - "loss": 0.8487, - "step": 30425 - }, - { - "epoch": 0.8621950182776502, - "grad_norm": 0.0, - "learning_rate": 9.79587220957663e-07, - "loss": 0.7967, - "step": 30426 - }, - { - "epoch": 0.8622233557199127, - "grad_norm": 0.0, - "learning_rate": 9.791910954146943e-07, - "loss": 0.7587, - "step": 30427 - }, - { - "epoch": 0.8622516931621752, - "grad_norm": 0.0, - "learning_rate": 9.787950458573515e-07, - "loss": 0.7705, - "step": 30428 - }, - { - "epoch": 0.8622800306044377, - "grad_norm": 0.0, - "learning_rate": 9.783990722889658e-07, - "loss": 0.8698, - "step": 30429 - }, - { - "epoch": 0.8623083680467001, - "grad_norm": 0.0, - "learning_rate": 9.78003174712876e-07, - "loss": 0.7935, - "step": 30430 - }, - { - "epoch": 0.8623367054889626, - "grad_norm": 0.0, - "learning_rate": 9.776073531324159e-07, - "loss": 0.7977, - "step": 30431 - }, - { - "epoch": 0.8623650429312251, - "grad_norm": 0.0, - "learning_rate": 9.772116075509185e-07, - "loss": 0.8076, - "step": 30432 - }, - { - "epoch": 0.8623933803734875, - "grad_norm": 0.0, - "learning_rate": 9.76815937971718e-07, - "loss": 0.8167, - "step": 30433 - }, - { - "epoch": 0.8624217178157499, - "grad_norm": 0.0, - "learning_rate": 9.76420344398149e-07, - "loss": 0.7665, - "step": 30434 - }, - { - "epoch": 0.8624500552580124, - "grad_norm": 0.0, - "learning_rate": 9.760248268335405e-07, - "loss": 0.7822, - "step": 30435 - }, - { - "epoch": 0.8624783927002748, - "grad_norm": 0.0, - "learning_rate": 9.756293852812283e-07, - "loss": 0.7388, - "step": 30436 - }, - { - "epoch": 0.8625067301425373, - "grad_norm": 0.0, - "learning_rate": 9.752340197445386e-07, - "loss": 0.7741, - "step": 30437 - }, - { - "epoch": 0.8625350675847998, - "grad_norm": 0.0, - "learning_rate": 9.748387302268037e-07, - "loss": 0.7977, - "step": 30438 - }, - { - "epoch": 0.8625634050270623, - "grad_norm": 0.0, - "learning_rate": 9.744435167313537e-07, - "loss": 0.9248, - "step": 30439 - }, - { - "epoch": 0.8625917424693247, - "grad_norm": 0.0, - "learning_rate": 9.740483792615184e-07, - "loss": 0.7869, - "step": 30440 - }, - { - "epoch": 0.8626200799115872, - "grad_norm": 0.0, - "learning_rate": 9.736533178206265e-07, - "loss": 0.8096, - "step": 30441 - }, - { - "epoch": 0.8626484173538497, - "grad_norm": 0.0, - "learning_rate": 9.732583324120027e-07, - "loss": 0.8471, - "step": 30442 - }, - { - "epoch": 0.862676754796112, - "grad_norm": 0.0, - "learning_rate": 9.728634230389756e-07, - "loss": 0.8017, - "step": 30443 - }, - { - "epoch": 0.8627050922383745, - "grad_norm": 0.0, - "learning_rate": 9.724685897048747e-07, - "loss": 0.7805, - "step": 30444 - }, - { - "epoch": 0.862733429680637, - "grad_norm": 0.0, - "learning_rate": 9.720738324130208e-07, - "loss": 0.7667, - "step": 30445 - }, - { - "epoch": 0.8627617671228995, - "grad_norm": 0.0, - "learning_rate": 9.716791511667412e-07, - "loss": 0.8061, - "step": 30446 - }, - { - "epoch": 0.8627901045651619, - "grad_norm": 0.0, - "learning_rate": 9.712845459693632e-07, - "loss": 0.7146, - "step": 30447 - }, - { - "epoch": 0.8628184420074244, - "grad_norm": 0.0, - "learning_rate": 9.708900168242063e-07, - "loss": 0.69, - "step": 30448 - }, - { - "epoch": 0.8628467794496869, - "grad_norm": 0.0, - "learning_rate": 9.704955637345948e-07, - "loss": 0.8178, - "step": 30449 - }, - { - "epoch": 0.8628751168919493, - "grad_norm": 0.0, - "learning_rate": 9.701011867038534e-07, - "loss": 0.8627, - "step": 30450 - }, - { - "epoch": 0.8629034543342118, - "grad_norm": 0.0, - "learning_rate": 9.697068857353043e-07, - "loss": 0.7915, - "step": 30451 - }, - { - "epoch": 0.8629317917764743, - "grad_norm": 0.0, - "learning_rate": 9.693126608322645e-07, - "loss": 0.87, - "step": 30452 - }, - { - "epoch": 0.8629601292187368, - "grad_norm": 0.0, - "learning_rate": 9.689185119980592e-07, - "loss": 0.7387, - "step": 30453 - }, - { - "epoch": 0.8629884666609992, - "grad_norm": 0.0, - "learning_rate": 9.68524439236006e-07, - "loss": 0.8791, - "step": 30454 - }, - { - "epoch": 0.8630168041032616, - "grad_norm": 0.0, - "learning_rate": 9.681304425494275e-07, - "loss": 0.836, - "step": 30455 - }, - { - "epoch": 0.8630451415455241, - "grad_norm": 0.0, - "learning_rate": 9.67736521941638e-07, - "loss": 0.7728, - "step": 30456 - }, - { - "epoch": 0.8630734789877865, - "grad_norm": 0.0, - "learning_rate": 9.6734267741596e-07, - "loss": 0.7637, - "step": 30457 - }, - { - "epoch": 0.863101816430049, - "grad_norm": 0.0, - "learning_rate": 9.669489089757068e-07, - "loss": 0.9259, - "step": 30458 - }, - { - "epoch": 0.8631301538723115, - "grad_norm": 0.0, - "learning_rate": 9.665552166241965e-07, - "loss": 0.8235, - "step": 30459 - }, - { - "epoch": 0.8631584913145739, - "grad_norm": 0.0, - "learning_rate": 9.66161600364749e-07, - "loss": 0.8092, - "step": 30460 - }, - { - "epoch": 0.8631868287568364, - "grad_norm": 0.0, - "learning_rate": 9.657680602006747e-07, - "loss": 0.7545, - "step": 30461 - }, - { - "epoch": 0.8632151661990989, - "grad_norm": 0.0, - "learning_rate": 9.653745961352911e-07, - "loss": 0.7632, - "step": 30462 - }, - { - "epoch": 0.8632435036413614, - "grad_norm": 0.0, - "learning_rate": 9.649812081719124e-07, - "loss": 0.8079, - "step": 30463 - }, - { - "epoch": 0.8632718410836238, - "grad_norm": 0.0, - "learning_rate": 9.64587896313851e-07, - "loss": 0.8585, - "step": 30464 - }, - { - "epoch": 0.8633001785258863, - "grad_norm": 0.0, - "learning_rate": 9.641946605644237e-07, - "loss": 0.7452, - "step": 30465 - }, - { - "epoch": 0.8633285159681487, - "grad_norm": 0.0, - "learning_rate": 9.638015009269375e-07, - "loss": 0.7324, - "step": 30466 - }, - { - "epoch": 0.8633568534104111, - "grad_norm": 0.0, - "learning_rate": 9.634084174047076e-07, - "loss": 0.7964, - "step": 30467 - }, - { - "epoch": 0.8633851908526736, - "grad_norm": 0.0, - "learning_rate": 9.630154100010458e-07, - "loss": 0.7516, - "step": 30468 - }, - { - "epoch": 0.8634135282949361, - "grad_norm": 0.0, - "learning_rate": 9.626224787192594e-07, - "loss": 0.8114, - "step": 30469 - }, - { - "epoch": 0.8634418657371986, - "grad_norm": 0.0, - "learning_rate": 9.622296235626616e-07, - "loss": 0.8508, - "step": 30470 - }, - { - "epoch": 0.863470203179461, - "grad_norm": 0.0, - "learning_rate": 9.61836844534557e-07, - "loss": 0.7847, - "step": 30471 - }, - { - "epoch": 0.8634985406217235, - "grad_norm": 0.0, - "learning_rate": 9.614441416382581e-07, - "loss": 0.8264, - "step": 30472 - }, - { - "epoch": 0.863526878063986, - "grad_norm": 0.0, - "learning_rate": 9.610515148770726e-07, - "loss": 0.8028, - "step": 30473 - }, - { - "epoch": 0.8635552155062484, - "grad_norm": 0.0, - "learning_rate": 9.606589642543064e-07, - "loss": 0.7535, - "step": 30474 - }, - { - "epoch": 0.8635835529485109, - "grad_norm": 0.0, - "learning_rate": 9.602664897732649e-07, - "loss": 0.7674, - "step": 30475 - }, - { - "epoch": 0.8636118903907734, - "grad_norm": 0.0, - "learning_rate": 9.598740914372562e-07, - "loss": 0.7019, - "step": 30476 - }, - { - "epoch": 0.8636402278330358, - "grad_norm": 0.0, - "learning_rate": 9.594817692495839e-07, - "loss": 0.7114, - "step": 30477 - }, - { - "epoch": 0.8636685652752982, - "grad_norm": 0.0, - "learning_rate": 9.590895232135566e-07, - "loss": 0.8663, - "step": 30478 - }, - { - "epoch": 0.8636969027175607, - "grad_norm": 0.0, - "learning_rate": 9.586973533324738e-07, - "loss": 0.8663, - "step": 30479 - }, - { - "epoch": 0.8637252401598232, - "grad_norm": 0.0, - "learning_rate": 9.583052596096409e-07, - "loss": 0.7928, - "step": 30480 - }, - { - "epoch": 0.8637535776020856, - "grad_norm": 0.0, - "learning_rate": 9.579132420483617e-07, - "loss": 0.7616, - "step": 30481 - }, - { - "epoch": 0.8637819150443481, - "grad_norm": 0.0, - "learning_rate": 9.575213006519347e-07, - "loss": 0.719, - "step": 30482 - }, - { - "epoch": 0.8638102524866106, - "grad_norm": 0.0, - "learning_rate": 9.571294354236637e-07, - "loss": 0.8385, - "step": 30483 - }, - { - "epoch": 0.863838589928873, - "grad_norm": 0.0, - "learning_rate": 9.567376463668522e-07, - "loss": 0.7773, - "step": 30484 - }, - { - "epoch": 0.8638669273711355, - "grad_norm": 0.0, - "learning_rate": 9.563459334847946e-07, - "loss": 0.7826, - "step": 30485 - }, - { - "epoch": 0.863895264813398, - "grad_norm": 0.0, - "learning_rate": 9.559542967807954e-07, - "loss": 0.8519, - "step": 30486 - }, - { - "epoch": 0.8639236022556604, - "grad_norm": 0.0, - "learning_rate": 9.55562736258151e-07, - "loss": 0.7896, - "step": 30487 - }, - { - "epoch": 0.8639519396979228, - "grad_norm": 0.0, - "learning_rate": 9.551712519201594e-07, - "loss": 0.8864, - "step": 30488 - }, - { - "epoch": 0.8639802771401853, - "grad_norm": 0.0, - "learning_rate": 9.547798437701194e-07, - "loss": 0.7431, - "step": 30489 - }, - { - "epoch": 0.8640086145824478, - "grad_norm": 0.0, - "learning_rate": 9.543885118113272e-07, - "loss": 0.7349, - "step": 30490 - }, - { - "epoch": 0.8640369520247102, - "grad_norm": 0.0, - "learning_rate": 9.5399725604708e-07, - "loss": 0.7958, - "step": 30491 - }, - { - "epoch": 0.8640652894669727, - "grad_norm": 0.0, - "learning_rate": 9.536060764806742e-07, - "loss": 0.7771, - "step": 30492 - }, - { - "epoch": 0.8640936269092352, - "grad_norm": 0.0, - "learning_rate": 9.532149731154028e-07, - "loss": 0.7665, - "step": 30493 - }, - { - "epoch": 0.8641219643514977, - "grad_norm": 0.0, - "learning_rate": 9.528239459545618e-07, - "loss": 0.8644, - "step": 30494 - }, - { - "epoch": 0.8641503017937601, - "grad_norm": 0.0, - "learning_rate": 9.524329950014433e-07, - "loss": 0.8532, - "step": 30495 - }, - { - "epoch": 0.8641786392360226, - "grad_norm": 0.0, - "learning_rate": 9.520421202593411e-07, - "loss": 0.7433, - "step": 30496 - }, - { - "epoch": 0.864206976678285, - "grad_norm": 0.0, - "learning_rate": 9.516513217315504e-07, - "loss": 0.9474, - "step": 30497 - }, - { - "epoch": 0.8642353141205474, - "grad_norm": 0.0, - "learning_rate": 9.512605994213587e-07, - "loss": 0.7378, - "step": 30498 - }, - { - "epoch": 0.8642636515628099, - "grad_norm": 0.0, - "learning_rate": 9.508699533320598e-07, - "loss": 0.8163, - "step": 30499 - }, - { - "epoch": 0.8642919890050724, - "grad_norm": 0.0, - "learning_rate": 9.504793834669424e-07, - "loss": 0.8921, - "step": 30500 - }, - { - "epoch": 0.8643203264473349, - "grad_norm": 0.0, - "learning_rate": 9.500888898292981e-07, - "loss": 0.9147, - "step": 30501 - }, - { - "epoch": 0.8643486638895973, - "grad_norm": 0.0, - "learning_rate": 9.496984724224156e-07, - "loss": 0.8594, - "step": 30502 - }, - { - "epoch": 0.8643770013318598, - "grad_norm": 0.0, - "learning_rate": 9.493081312495834e-07, - "loss": 0.843, - "step": 30503 - }, - { - "epoch": 0.8644053387741223, - "grad_norm": 0.0, - "learning_rate": 9.489178663140897e-07, - "loss": 0.8056, - "step": 30504 - }, - { - "epoch": 0.8644336762163847, - "grad_norm": 0.0, - "learning_rate": 9.485276776192243e-07, - "loss": 0.7301, - "step": 30505 - }, - { - "epoch": 0.8644620136586472, - "grad_norm": 0.0, - "learning_rate": 9.481375651682689e-07, - "loss": 0.736, - "step": 30506 - }, - { - "epoch": 0.8644903511009097, - "grad_norm": 0.0, - "learning_rate": 9.477475289645133e-07, - "loss": 0.7825, - "step": 30507 - }, - { - "epoch": 0.864518688543172, - "grad_norm": 0.0, - "learning_rate": 9.473575690112413e-07, - "loss": 0.7729, - "step": 30508 - }, - { - "epoch": 0.8645470259854345, - "grad_norm": 0.0, - "learning_rate": 9.469676853117371e-07, - "loss": 0.7802, - "step": 30509 - }, - { - "epoch": 0.864575363427697, - "grad_norm": 0.0, - "learning_rate": 9.46577877869288e-07, - "loss": 0.8346, - "step": 30510 - }, - { - "epoch": 0.8646037008699595, - "grad_norm": 0.0, - "learning_rate": 9.461881466871736e-07, - "loss": 0.8704, - "step": 30511 - }, - { - "epoch": 0.8646320383122219, - "grad_norm": 0.0, - "learning_rate": 9.457984917686791e-07, - "loss": 0.7534, - "step": 30512 - }, - { - "epoch": 0.8646603757544844, - "grad_norm": 0.0, - "learning_rate": 9.454089131170874e-07, - "loss": 0.806, - "step": 30513 - }, - { - "epoch": 0.8646887131967469, - "grad_norm": 0.0, - "learning_rate": 9.450194107356758e-07, - "loss": 0.8412, - "step": 30514 - }, - { - "epoch": 0.8647170506390093, - "grad_norm": 0.0, - "learning_rate": 9.446299846277296e-07, - "loss": 0.7653, - "step": 30515 - }, - { - "epoch": 0.8647453880812718, - "grad_norm": 0.0, - "learning_rate": 9.442406347965271e-07, - "loss": 0.8008, - "step": 30516 - }, - { - "epoch": 0.8647737255235343, - "grad_norm": 0.0, - "learning_rate": 9.438513612453493e-07, - "loss": 0.8427, - "step": 30517 - }, - { - "epoch": 0.8648020629657968, - "grad_norm": 0.0, - "learning_rate": 9.434621639774755e-07, - "loss": 0.8038, - "step": 30518 - }, - { - "epoch": 0.8648304004080591, - "grad_norm": 0.0, - "learning_rate": 9.43073042996181e-07, - "loss": 0.8797, - "step": 30519 - }, - { - "epoch": 0.8648587378503216, - "grad_norm": 0.0, - "learning_rate": 9.426839983047454e-07, - "loss": 0.7849, - "step": 30520 - }, - { - "epoch": 0.8648870752925841, - "grad_norm": 0.0, - "learning_rate": 9.422950299064482e-07, - "loss": 0.778, - "step": 30521 - }, - { - "epoch": 0.8649154127348465, - "grad_norm": 0.0, - "learning_rate": 9.419061378045613e-07, - "loss": 0.7887, - "step": 30522 - }, - { - "epoch": 0.864943750177109, - "grad_norm": 0.0, - "learning_rate": 9.415173220023643e-07, - "loss": 0.8551, - "step": 30523 - }, - { - "epoch": 0.8649720876193715, - "grad_norm": 0.0, - "learning_rate": 9.411285825031291e-07, - "loss": 0.7521, - "step": 30524 - }, - { - "epoch": 0.8650004250616339, - "grad_norm": 0.0, - "learning_rate": 9.407399193101319e-07, - "loss": 0.8413, - "step": 30525 - }, - { - "epoch": 0.8650287625038964, - "grad_norm": 0.0, - "learning_rate": 9.403513324266489e-07, - "loss": 0.75, - "step": 30526 - }, - { - "epoch": 0.8650570999461589, - "grad_norm": 0.0, - "learning_rate": 9.399628218559476e-07, - "loss": 0.9069, - "step": 30527 - }, - { - "epoch": 0.8650854373884214, - "grad_norm": 0.0, - "learning_rate": 9.395743876013052e-07, - "loss": 0.7727, - "step": 30528 - }, - { - "epoch": 0.8651137748306837, - "grad_norm": 0.0, - "learning_rate": 9.391860296659916e-07, - "loss": 0.7445, - "step": 30529 - }, - { - "epoch": 0.8651421122729462, - "grad_norm": 0.0, - "learning_rate": 9.387977480532784e-07, - "loss": 0.8797, - "step": 30530 - }, - { - "epoch": 0.8651704497152087, - "grad_norm": 0.0, - "learning_rate": 9.384095427664386e-07, - "loss": 0.8147, - "step": 30531 - }, - { - "epoch": 0.8651987871574711, - "grad_norm": 0.0, - "learning_rate": 9.380214138087385e-07, - "loss": 0.8486, - "step": 30532 - }, - { - "epoch": 0.8652271245997336, - "grad_norm": 0.0, - "learning_rate": 9.376333611834487e-07, - "loss": 0.8412, - "step": 30533 - }, - { - "epoch": 0.8652554620419961, - "grad_norm": 0.0, - "learning_rate": 9.372453848938401e-07, - "loss": 0.6922, - "step": 30534 - }, - { - "epoch": 0.8652837994842586, - "grad_norm": 0.0, - "learning_rate": 9.368574849431778e-07, - "loss": 0.8937, - "step": 30535 - }, - { - "epoch": 0.865312136926521, - "grad_norm": 0.0, - "learning_rate": 9.364696613347324e-07, - "loss": 0.9178, - "step": 30536 - }, - { - "epoch": 0.8653404743687835, - "grad_norm": 0.0, - "learning_rate": 9.360819140717659e-07, - "loss": 0.7726, - "step": 30537 - }, - { - "epoch": 0.865368811811046, - "grad_norm": 0.0, - "learning_rate": 9.356942431575478e-07, - "loss": 0.8648, - "step": 30538 - }, - { - "epoch": 0.8653971492533084, - "grad_norm": 0.0, - "learning_rate": 9.353066485953455e-07, - "loss": 0.8196, - "step": 30539 - }, - { - "epoch": 0.8654254866955708, - "grad_norm": 0.0, - "learning_rate": 9.349191303884187e-07, - "loss": 0.7846, - "step": 30540 - }, - { - "epoch": 0.8654538241378333, - "grad_norm": 0.0, - "learning_rate": 9.345316885400346e-07, - "loss": 0.765, - "step": 30541 - }, - { - "epoch": 0.8654821615800958, - "grad_norm": 0.0, - "learning_rate": 9.341443230534564e-07, - "loss": 0.9407, - "step": 30542 - }, - { - "epoch": 0.8655104990223582, - "grad_norm": 0.0, - "learning_rate": 9.337570339319468e-07, - "loss": 0.8322, - "step": 30543 - }, - { - "epoch": 0.8655388364646207, - "grad_norm": 0.0, - "learning_rate": 9.3336982117877e-07, - "loss": 0.8063, - "step": 30544 - }, - { - "epoch": 0.8655671739068832, - "grad_norm": 0.0, - "learning_rate": 9.329826847971857e-07, - "loss": 0.7929, - "step": 30545 - }, - { - "epoch": 0.8655955113491456, - "grad_norm": 0.0, - "learning_rate": 9.325956247904532e-07, - "loss": 0.7756, - "step": 30546 - }, - { - "epoch": 0.8656238487914081, - "grad_norm": 0.0, - "learning_rate": 9.322086411618381e-07, - "loss": 0.7553, - "step": 30547 - }, - { - "epoch": 0.8656521862336706, - "grad_norm": 0.0, - "learning_rate": 9.318217339145941e-07, - "loss": 0.8593, - "step": 30548 - }, - { - "epoch": 0.865680523675933, - "grad_norm": 0.0, - "learning_rate": 9.314349030519843e-07, - "loss": 0.7759, - "step": 30549 - }, - { - "epoch": 0.8657088611181954, - "grad_norm": 0.0, - "learning_rate": 9.31048148577266e-07, - "loss": 0.8863, - "step": 30550 - }, - { - "epoch": 0.8657371985604579, - "grad_norm": 0.0, - "learning_rate": 9.306614704936967e-07, - "loss": 0.7493, - "step": 30551 - }, - { - "epoch": 0.8657655360027204, - "grad_norm": 0.0, - "learning_rate": 9.302748688045338e-07, - "loss": 0.7789, - "step": 30552 - }, - { - "epoch": 0.8657938734449828, - "grad_norm": 0.0, - "learning_rate": 9.298883435130335e-07, - "loss": 0.8285, - "step": 30553 - }, - { - "epoch": 0.8658222108872453, - "grad_norm": 0.0, - "learning_rate": 9.295018946224499e-07, - "loss": 0.8696, - "step": 30554 - }, - { - "epoch": 0.8658505483295078, - "grad_norm": 0.0, - "learning_rate": 9.291155221360415e-07, - "loss": 0.8132, - "step": 30555 - }, - { - "epoch": 0.8658788857717702, - "grad_norm": 0.0, - "learning_rate": 9.287292260570613e-07, - "loss": 0.7889, - "step": 30556 - }, - { - "epoch": 0.8659072232140327, - "grad_norm": 0.0, - "learning_rate": 9.283430063887644e-07, - "loss": 0.8345, - "step": 30557 - }, - { - "epoch": 0.8659355606562952, - "grad_norm": 0.0, - "learning_rate": 9.279568631344016e-07, - "loss": 0.8182, - "step": 30558 - }, - { - "epoch": 0.8659638980985577, - "grad_norm": 0.0, - "learning_rate": 9.275707962972281e-07, - "loss": 0.7798, - "step": 30559 - }, - { - "epoch": 0.86599223554082, - "grad_norm": 0.0, - "learning_rate": 9.271848058804955e-07, - "loss": 0.8902, - "step": 30560 - }, - { - "epoch": 0.8660205729830825, - "grad_norm": 0.0, - "learning_rate": 9.267988918874527e-07, - "loss": 0.7761, - "step": 30561 - }, - { - "epoch": 0.866048910425345, - "grad_norm": 0.0, - "learning_rate": 9.264130543213512e-07, - "loss": 0.736, - "step": 30562 - }, - { - "epoch": 0.8660772478676074, - "grad_norm": 0.0, - "learning_rate": 9.260272931854453e-07, - "loss": 0.7769, - "step": 30563 - }, - { - "epoch": 0.8661055853098699, - "grad_norm": 0.0, - "learning_rate": 9.256416084829778e-07, - "loss": 0.7887, - "step": 30564 - }, - { - "epoch": 0.8661339227521324, - "grad_norm": 0.0, - "learning_rate": 9.252560002172039e-07, - "loss": 0.6973, - "step": 30565 - }, - { - "epoch": 0.8661622601943949, - "grad_norm": 0.0, - "learning_rate": 9.248704683913656e-07, - "loss": 0.8291, - "step": 30566 - }, - { - "epoch": 0.8661905976366573, - "grad_norm": 0.0, - "learning_rate": 9.244850130087135e-07, - "loss": 0.7484, - "step": 30567 - }, - { - "epoch": 0.8662189350789198, - "grad_norm": 0.0, - "learning_rate": 9.24099634072495e-07, - "loss": 0.7242, - "step": 30568 - }, - { - "epoch": 0.8662472725211823, - "grad_norm": 0.0, - "learning_rate": 9.237143315859553e-07, - "loss": 0.7497, - "step": 30569 - }, - { - "epoch": 0.8662756099634447, - "grad_norm": 0.0, - "learning_rate": 9.233291055523396e-07, - "loss": 0.7747, - "step": 30570 - }, - { - "epoch": 0.8663039474057072, - "grad_norm": 0.0, - "learning_rate": 9.229439559748954e-07, - "loss": 0.7346, - "step": 30571 - }, - { - "epoch": 0.8663322848479696, - "grad_norm": 0.0, - "learning_rate": 9.225588828568633e-07, - "loss": 0.8973, - "step": 30572 - }, - { - "epoch": 0.866360622290232, - "grad_norm": 0.0, - "learning_rate": 9.221738862014906e-07, - "loss": 0.7803, - "step": 30573 - }, - { - "epoch": 0.8663889597324945, - "grad_norm": 0.0, - "learning_rate": 9.21788966012016e-07, - "loss": 0.782, - "step": 30574 - }, - { - "epoch": 0.866417297174757, - "grad_norm": 0.0, - "learning_rate": 9.214041222916836e-07, - "loss": 0.8479, - "step": 30575 - }, - { - "epoch": 0.8664456346170195, - "grad_norm": 0.0, - "learning_rate": 9.210193550437385e-07, - "loss": 0.701, - "step": 30576 - }, - { - "epoch": 0.8664739720592819, - "grad_norm": 0.0, - "learning_rate": 9.206346642714159e-07, - "loss": 0.7927, - "step": 30577 - }, - { - "epoch": 0.8665023095015444, - "grad_norm": 0.0, - "learning_rate": 9.202500499779599e-07, - "loss": 0.7955, - "step": 30578 - }, - { - "epoch": 0.8665306469438069, - "grad_norm": 0.0, - "learning_rate": 9.198655121666111e-07, - "loss": 0.7617, - "step": 30579 - }, - { - "epoch": 0.8665589843860693, - "grad_norm": 0.0, - "learning_rate": 9.194810508406049e-07, - "loss": 0.7692, - "step": 30580 - }, - { - "epoch": 0.8665873218283318, - "grad_norm": 0.0, - "learning_rate": 9.190966660031819e-07, - "loss": 0.8673, - "step": 30581 - }, - { - "epoch": 0.8666156592705943, - "grad_norm": 0.0, - "learning_rate": 9.187123576575795e-07, - "loss": 0.8497, - "step": 30582 - }, - { - "epoch": 0.8666439967128567, - "grad_norm": 0.0, - "learning_rate": 9.183281258070353e-07, - "loss": 0.8278, - "step": 30583 - }, - { - "epoch": 0.8666723341551191, - "grad_norm": 0.0, - "learning_rate": 9.179439704547876e-07, - "loss": 0.8581, - "step": 30584 - }, - { - "epoch": 0.8667006715973816, - "grad_norm": 0.0, - "learning_rate": 9.175598916040684e-07, - "loss": 0.7462, - "step": 30585 - }, - { - "epoch": 0.8667290090396441, - "grad_norm": 0.0, - "learning_rate": 9.171758892581162e-07, - "loss": 0.8963, - "step": 30586 - }, - { - "epoch": 0.8667573464819065, - "grad_norm": 0.0, - "learning_rate": 9.167919634201638e-07, - "loss": 0.8256, - "step": 30587 - }, - { - "epoch": 0.866785683924169, - "grad_norm": 0.0, - "learning_rate": 9.164081140934444e-07, - "loss": 0.7796, - "step": 30588 - }, - { - "epoch": 0.8668140213664315, - "grad_norm": 0.0, - "learning_rate": 9.160243412811953e-07, - "loss": 0.9037, - "step": 30589 - }, - { - "epoch": 0.866842358808694, - "grad_norm": 0.0, - "learning_rate": 9.15640644986644e-07, - "loss": 0.8323, - "step": 30590 - }, - { - "epoch": 0.8668706962509564, - "grad_norm": 0.0, - "learning_rate": 9.152570252130255e-07, - "loss": 0.7654, - "step": 30591 - }, - { - "epoch": 0.8668990336932189, - "grad_norm": 0.0, - "learning_rate": 9.148734819635718e-07, - "loss": 0.7948, - "step": 30592 - }, - { - "epoch": 0.8669273711354814, - "grad_norm": 0.0, - "learning_rate": 9.144900152415104e-07, - "loss": 0.7278, - "step": 30593 - }, - { - "epoch": 0.8669557085777437, - "grad_norm": 0.0, - "learning_rate": 9.141066250500741e-07, - "loss": 0.7251, - "step": 30594 - }, - { - "epoch": 0.8669840460200062, - "grad_norm": 0.0, - "learning_rate": 9.137233113924915e-07, - "loss": 0.7877, - "step": 30595 - }, - { - "epoch": 0.8670123834622687, - "grad_norm": 0.0, - "learning_rate": 9.133400742719922e-07, - "loss": 0.9439, - "step": 30596 - }, - { - "epoch": 0.8670407209045311, - "grad_norm": 0.0, - "learning_rate": 9.129569136918048e-07, - "loss": 0.6764, - "step": 30597 - }, - { - "epoch": 0.8670690583467936, - "grad_norm": 0.0, - "learning_rate": 9.125738296551534e-07, - "loss": 0.7347, - "step": 30598 - }, - { - "epoch": 0.8670973957890561, - "grad_norm": 0.0, - "learning_rate": 9.121908221652675e-07, - "loss": 0.8449, - "step": 30599 - }, - { - "epoch": 0.8671257332313186, - "grad_norm": 0.0, - "learning_rate": 9.118078912253758e-07, - "loss": 0.8514, - "step": 30600 - }, - { - "epoch": 0.867154070673581, - "grad_norm": 0.0, - "learning_rate": 9.114250368386979e-07, - "loss": 0.7701, - "step": 30601 - }, - { - "epoch": 0.8671824081158435, - "grad_norm": 0.0, - "learning_rate": 9.110422590084644e-07, - "loss": 0.8511, - "step": 30602 - }, - { - "epoch": 0.867210745558106, - "grad_norm": 0.0, - "learning_rate": 9.106595577378951e-07, - "loss": 0.901, - "step": 30603 - }, - { - "epoch": 0.8672390830003683, - "grad_norm": 0.0, - "learning_rate": 9.102769330302164e-07, - "loss": 0.8927, - "step": 30604 - }, - { - "epoch": 0.8672674204426308, - "grad_norm": 0.0, - "learning_rate": 9.09894384888651e-07, - "loss": 0.7527, - "step": 30605 - }, - { - "epoch": 0.8672957578848933, - "grad_norm": 0.0, - "learning_rate": 9.095119133164199e-07, - "loss": 0.7804, - "step": 30606 - }, - { - "epoch": 0.8673240953271558, - "grad_norm": 0.0, - "learning_rate": 9.091295183167448e-07, - "loss": 0.8661, - "step": 30607 - }, - { - "epoch": 0.8673524327694182, - "grad_norm": 0.0, - "learning_rate": 9.087471998928477e-07, - "loss": 0.8458, - "step": 30608 - }, - { - "epoch": 0.8673807702116807, - "grad_norm": 0.0, - "learning_rate": 9.083649580479493e-07, - "loss": 0.8359, - "step": 30609 - }, - { - "epoch": 0.8674091076539432, - "grad_norm": 0.0, - "learning_rate": 9.079827927852702e-07, - "loss": 0.7427, - "step": 30610 - }, - { - "epoch": 0.8674374450962056, - "grad_norm": 0.0, - "learning_rate": 9.07600704108027e-07, - "loss": 0.7018, - "step": 30611 - }, - { - "epoch": 0.8674657825384681, - "grad_norm": 0.0, - "learning_rate": 9.072186920194392e-07, - "loss": 0.8557, - "step": 30612 - }, - { - "epoch": 0.8674941199807306, - "grad_norm": 0.0, - "learning_rate": 9.068367565227266e-07, - "loss": 0.8245, - "step": 30613 - }, - { - "epoch": 0.867522457422993, - "grad_norm": 0.0, - "learning_rate": 9.06454897621103e-07, - "loss": 0.7882, - "step": 30614 - }, - { - "epoch": 0.8675507948652554, - "grad_norm": 0.0, - "learning_rate": 9.060731153177882e-07, - "loss": 0.7969, - "step": 30615 - }, - { - "epoch": 0.8675791323075179, - "grad_norm": 0.0, - "learning_rate": 9.056914096159952e-07, - "loss": 0.7857, - "step": 30616 - }, - { - "epoch": 0.8676074697497804, - "grad_norm": 0.0, - "learning_rate": 9.053097805189404e-07, - "loss": 0.8252, - "step": 30617 - }, - { - "epoch": 0.8676358071920428, - "grad_norm": 0.0, - "learning_rate": 9.049282280298399e-07, - "loss": 0.834, - "step": 30618 - }, - { - "epoch": 0.8676641446343053, - "grad_norm": 0.0, - "learning_rate": 9.045467521519047e-07, - "loss": 0.746, - "step": 30619 - }, - { - "epoch": 0.8676924820765678, - "grad_norm": 0.0, - "learning_rate": 9.041653528883498e-07, - "loss": 0.738, - "step": 30620 - }, - { - "epoch": 0.8677208195188302, - "grad_norm": 0.0, - "learning_rate": 9.037840302423883e-07, - "loss": 0.7405, - "step": 30621 - }, - { - "epoch": 0.8677491569610927, - "grad_norm": 0.0, - "learning_rate": 9.034027842172311e-07, - "loss": 0.6961, - "step": 30622 - }, - { - "epoch": 0.8677774944033552, - "grad_norm": 0.0, - "learning_rate": 9.030216148160919e-07, - "loss": 0.7856, - "step": 30623 - }, - { - "epoch": 0.8678058318456177, - "grad_norm": 0.0, - "learning_rate": 9.026405220421785e-07, - "loss": 0.7862, - "step": 30624 - }, - { - "epoch": 0.86783416928788, - "grad_norm": 0.0, - "learning_rate": 9.022595058987016e-07, - "loss": 0.7867, - "step": 30625 - }, - { - "epoch": 0.8678625067301425, - "grad_norm": 0.0, - "learning_rate": 9.01878566388873e-07, - "loss": 0.7673, - "step": 30626 - }, - { - "epoch": 0.867890844172405, - "grad_norm": 0.0, - "learning_rate": 9.01497703515899e-07, - "loss": 0.7882, - "step": 30627 - }, - { - "epoch": 0.8679191816146674, - "grad_norm": 0.0, - "learning_rate": 9.01116917282987e-07, - "loss": 0.7867, - "step": 30628 - }, - { - "epoch": 0.8679475190569299, - "grad_norm": 0.0, - "learning_rate": 9.00736207693349e-07, - "loss": 0.8213, - "step": 30629 - }, - { - "epoch": 0.8679758564991924, - "grad_norm": 0.0, - "learning_rate": 9.003555747501869e-07, - "loss": 0.8344, - "step": 30630 - }, - { - "epoch": 0.8680041939414549, - "grad_norm": 0.0, - "learning_rate": 8.999750184567102e-07, - "loss": 0.791, - "step": 30631 - }, - { - "epoch": 0.8680325313837173, - "grad_norm": 0.0, - "learning_rate": 8.995945388161209e-07, - "loss": 0.7742, - "step": 30632 - }, - { - "epoch": 0.8680608688259798, - "grad_norm": 0.0, - "learning_rate": 8.992141358316264e-07, - "loss": 0.7696, - "step": 30633 - }, - { - "epoch": 0.8680892062682423, - "grad_norm": 0.0, - "learning_rate": 8.988338095064308e-07, - "loss": 0.874, - "step": 30634 - }, - { - "epoch": 0.8681175437105046, - "grad_norm": 0.0, - "learning_rate": 8.984535598437382e-07, - "loss": 0.7387, - "step": 30635 - }, - { - "epoch": 0.8681458811527671, - "grad_norm": 0.0, - "learning_rate": 8.980733868467506e-07, - "loss": 0.9206, - "step": 30636 - }, - { - "epoch": 0.8681742185950296, - "grad_norm": 0.0, - "learning_rate": 8.97693290518673e-07, - "loss": 0.85, - "step": 30637 - }, - { - "epoch": 0.8682025560372921, - "grad_norm": 0.0, - "learning_rate": 8.97313270862703e-07, - "loss": 0.7953, - "step": 30638 - }, - { - "epoch": 0.8682308934795545, - "grad_norm": 0.0, - "learning_rate": 8.969333278820447e-07, - "loss": 0.7759, - "step": 30639 - }, - { - "epoch": 0.868259230921817, - "grad_norm": 0.0, - "learning_rate": 8.965534615798965e-07, - "loss": 0.8961, - "step": 30640 - }, - { - "epoch": 0.8682875683640795, - "grad_norm": 0.0, - "learning_rate": 8.961736719594582e-07, - "loss": 0.7877, - "step": 30641 - }, - { - "epoch": 0.8683159058063419, - "grad_norm": 0.0, - "learning_rate": 8.957939590239317e-07, - "loss": 0.7018, - "step": 30642 - }, - { - "epoch": 0.8683442432486044, - "grad_norm": 0.0, - "learning_rate": 8.954143227765111e-07, - "loss": 0.8645, - "step": 30643 - }, - { - "epoch": 0.8683725806908669, - "grad_norm": 0.0, - "learning_rate": 8.950347632203993e-07, - "loss": 0.7014, - "step": 30644 - }, - { - "epoch": 0.8684009181331293, - "grad_norm": 0.0, - "learning_rate": 8.946552803587882e-07, - "loss": 0.6656, - "step": 30645 - }, - { - "epoch": 0.8684292555753917, - "grad_norm": 0.0, - "learning_rate": 8.942758741948776e-07, - "loss": 0.8123, - "step": 30646 - }, - { - "epoch": 0.8684575930176542, - "grad_norm": 0.0, - "learning_rate": 8.938965447318626e-07, - "loss": 0.8929, - "step": 30647 - }, - { - "epoch": 0.8684859304599167, - "grad_norm": 0.0, - "learning_rate": 8.935172919729373e-07, - "loss": 0.7632, - "step": 30648 - }, - { - "epoch": 0.8685142679021791, - "grad_norm": 0.0, - "learning_rate": 8.931381159212982e-07, - "loss": 0.7562, - "step": 30649 - }, - { - "epoch": 0.8685426053444416, - "grad_norm": 0.0, - "learning_rate": 8.927590165801403e-07, - "loss": 0.8246, - "step": 30650 - }, - { - "epoch": 0.8685709427867041, - "grad_norm": 0.0, - "learning_rate": 8.923799939526534e-07, - "loss": 0.8123, - "step": 30651 - }, - { - "epoch": 0.8685992802289665, - "grad_norm": 0.0, - "learning_rate": 8.920010480420338e-07, - "loss": 0.7659, - "step": 30652 - }, - { - "epoch": 0.868627617671229, - "grad_norm": 0.0, - "learning_rate": 8.916221788514701e-07, - "loss": 0.8107, - "step": 30653 - }, - { - "epoch": 0.8686559551134915, - "grad_norm": 0.0, - "learning_rate": 8.912433863841541e-07, - "loss": 0.8521, - "step": 30654 - }, - { - "epoch": 0.868684292555754, - "grad_norm": 0.0, - "learning_rate": 8.9086467064328e-07, - "loss": 0.8611, - "step": 30655 - }, - { - "epoch": 0.8687126299980164, - "grad_norm": 0.0, - "learning_rate": 8.904860316320329e-07, - "loss": 0.8142, - "step": 30656 - }, - { - "epoch": 0.8687409674402788, - "grad_norm": 0.0, - "learning_rate": 8.90107469353605e-07, - "loss": 0.772, - "step": 30657 - }, - { - "epoch": 0.8687693048825413, - "grad_norm": 0.0, - "learning_rate": 8.897289838111866e-07, - "loss": 0.7271, - "step": 30658 - }, - { - "epoch": 0.8687976423248037, - "grad_norm": 0.0, - "learning_rate": 8.893505750079623e-07, - "loss": 0.7755, - "step": 30659 - }, - { - "epoch": 0.8688259797670662, - "grad_norm": 0.0, - "learning_rate": 8.889722429471215e-07, - "loss": 0.8128, - "step": 30660 - }, - { - "epoch": 0.8688543172093287, - "grad_norm": 0.0, - "learning_rate": 8.885939876318505e-07, - "loss": 0.7312, - "step": 30661 - }, - { - "epoch": 0.8688826546515912, - "grad_norm": 0.0, - "learning_rate": 8.882158090653359e-07, - "loss": 0.8467, - "step": 30662 - }, - { - "epoch": 0.8689109920938536, - "grad_norm": 0.0, - "learning_rate": 8.878377072507649e-07, - "loss": 0.8122, - "step": 30663 - }, - { - "epoch": 0.8689393295361161, - "grad_norm": 0.0, - "learning_rate": 8.874596821913184e-07, - "loss": 0.8054, - "step": 30664 - }, - { - "epoch": 0.8689676669783786, - "grad_norm": 0.0, - "learning_rate": 8.870817338901849e-07, - "loss": 0.7732, - "step": 30665 - }, - { - "epoch": 0.868996004420641, - "grad_norm": 0.0, - "learning_rate": 8.867038623505442e-07, - "loss": 0.7925, - "step": 30666 - }, - { - "epoch": 0.8690243418629034, - "grad_norm": 0.0, - "learning_rate": 8.863260675755813e-07, - "loss": 0.8237, - "step": 30667 - }, - { - "epoch": 0.8690526793051659, - "grad_norm": 0.0, - "learning_rate": 8.859483495684795e-07, - "loss": 0.7913, - "step": 30668 - }, - { - "epoch": 0.8690810167474283, - "grad_norm": 0.0, - "learning_rate": 8.855707083324183e-07, - "loss": 0.8767, - "step": 30669 - }, - { - "epoch": 0.8691093541896908, - "grad_norm": 0.0, - "learning_rate": 8.851931438705786e-07, - "loss": 0.7781, - "step": 30670 - }, - { - "epoch": 0.8691376916319533, - "grad_norm": 0.0, - "learning_rate": 8.848156561861421e-07, - "loss": 0.7649, - "step": 30671 - }, - { - "epoch": 0.8691660290742158, - "grad_norm": 0.0, - "learning_rate": 8.844382452822897e-07, - "loss": 0.817, - "step": 30672 - }, - { - "epoch": 0.8691943665164782, - "grad_norm": 0.0, - "learning_rate": 8.840609111621978e-07, - "loss": 0.7121, - "step": 30673 - }, - { - "epoch": 0.8692227039587407, - "grad_norm": 0.0, - "learning_rate": 8.836836538290449e-07, - "loss": 0.8612, - "step": 30674 - }, - { - "epoch": 0.8692510414010032, - "grad_norm": 0.0, - "learning_rate": 8.833064732860108e-07, - "loss": 0.8025, - "step": 30675 - }, - { - "epoch": 0.8692793788432656, - "grad_norm": 0.0, - "learning_rate": 8.829293695362728e-07, - "loss": 0.8345, - "step": 30676 - }, - { - "epoch": 0.869307716285528, - "grad_norm": 0.0, - "learning_rate": 8.825523425830051e-07, - "loss": 0.7892, - "step": 30677 - }, - { - "epoch": 0.8693360537277905, - "grad_norm": 0.0, - "learning_rate": 8.821753924293841e-07, - "loss": 0.8787, - "step": 30678 - }, - { - "epoch": 0.869364391170053, - "grad_norm": 0.0, - "learning_rate": 8.817985190785882e-07, - "loss": 0.8113, - "step": 30679 - }, - { - "epoch": 0.8693927286123154, - "grad_norm": 0.0, - "learning_rate": 8.814217225337873e-07, - "loss": 0.781, - "step": 30680 - }, - { - "epoch": 0.8694210660545779, - "grad_norm": 0.0, - "learning_rate": 8.810450027981587e-07, - "loss": 0.7651, - "step": 30681 - }, - { - "epoch": 0.8694494034968404, - "grad_norm": 0.0, - "learning_rate": 8.806683598748722e-07, - "loss": 0.8056, - "step": 30682 - }, - { - "epoch": 0.8694777409391028, - "grad_norm": 0.0, - "learning_rate": 8.802917937671029e-07, - "loss": 0.7812, - "step": 30683 - }, - { - "epoch": 0.8695060783813653, - "grad_norm": 0.0, - "learning_rate": 8.799153044780229e-07, - "loss": 0.7797, - "step": 30684 - }, - { - "epoch": 0.8695344158236278, - "grad_norm": 0.0, - "learning_rate": 8.795388920108016e-07, - "loss": 0.7682, - "step": 30685 - }, - { - "epoch": 0.8695627532658903, - "grad_norm": 0.0, - "learning_rate": 8.791625563686123e-07, - "loss": 0.7607, - "step": 30686 - }, - { - "epoch": 0.8695910907081527, - "grad_norm": 0.0, - "learning_rate": 8.787862975546246e-07, - "loss": 0.8081, - "step": 30687 - }, - { - "epoch": 0.8696194281504152, - "grad_norm": 0.0, - "learning_rate": 8.78410115572006e-07, - "loss": 0.8722, - "step": 30688 - }, - { - "epoch": 0.8696477655926776, - "grad_norm": 0.0, - "learning_rate": 8.780340104239283e-07, - "loss": 0.8026, - "step": 30689 - }, - { - "epoch": 0.86967610303494, - "grad_norm": 0.0, - "learning_rate": 8.776579821135544e-07, - "loss": 0.8055, - "step": 30690 - }, - { - "epoch": 0.8697044404772025, - "grad_norm": 0.0, - "learning_rate": 8.772820306440555e-07, - "loss": 0.7451, - "step": 30691 - }, - { - "epoch": 0.869732777919465, - "grad_norm": 0.0, - "learning_rate": 8.769061560185999e-07, - "loss": 0.83, - "step": 30692 - }, - { - "epoch": 0.8697611153617274, - "grad_norm": 0.0, - "learning_rate": 8.765303582403495e-07, - "loss": 0.8168, - "step": 30693 - }, - { - "epoch": 0.8697894528039899, - "grad_norm": 0.0, - "learning_rate": 8.76154637312473e-07, - "loss": 0.8264, - "step": 30694 - }, - { - "epoch": 0.8698177902462524, - "grad_norm": 0.0, - "learning_rate": 8.757789932381322e-07, - "loss": 0.7092, - "step": 30695 - }, - { - "epoch": 0.8698461276885149, - "grad_norm": 0.0, - "learning_rate": 8.754034260204936e-07, - "loss": 0.7811, - "step": 30696 - }, - { - "epoch": 0.8698744651307773, - "grad_norm": 0.0, - "learning_rate": 8.750279356627211e-07, - "loss": 0.7353, - "step": 30697 - }, - { - "epoch": 0.8699028025730398, - "grad_norm": 0.0, - "learning_rate": 8.746525221679758e-07, - "loss": 0.8234, - "step": 30698 - }, - { - "epoch": 0.8699311400153023, - "grad_norm": 0.0, - "learning_rate": 8.742771855394205e-07, - "loss": 0.7159, - "step": 30699 - }, - { - "epoch": 0.8699594774575646, - "grad_norm": 0.0, - "learning_rate": 8.739019257802195e-07, - "loss": 0.8135, - "step": 30700 - }, - { - "epoch": 0.8699878148998271, - "grad_norm": 0.0, - "learning_rate": 8.735267428935301e-07, - "loss": 0.7439, - "step": 30701 - }, - { - "epoch": 0.8700161523420896, - "grad_norm": 0.0, - "learning_rate": 8.731516368825154e-07, - "loss": 0.7341, - "step": 30702 - }, - { - "epoch": 0.8700444897843521, - "grad_norm": 0.0, - "learning_rate": 8.727766077503319e-07, - "loss": 0.8425, - "step": 30703 - }, - { - "epoch": 0.8700728272266145, - "grad_norm": 0.0, - "learning_rate": 8.724016555001402e-07, - "loss": 0.8138, - "step": 30704 - }, - { - "epoch": 0.870101164668877, - "grad_norm": 0.0, - "learning_rate": 8.720267801351013e-07, - "loss": 0.8347, - "step": 30705 - }, - { - "epoch": 0.8701295021111395, - "grad_norm": 0.0, - "learning_rate": 8.716519816583679e-07, - "loss": 0.7972, - "step": 30706 - }, - { - "epoch": 0.8701578395534019, - "grad_norm": 0.0, - "learning_rate": 8.71277260073099e-07, - "loss": 0.9212, - "step": 30707 - }, - { - "epoch": 0.8701861769956644, - "grad_norm": 0.0, - "learning_rate": 8.709026153824541e-07, - "loss": 0.7516, - "step": 30708 - }, - { - "epoch": 0.8702145144379269, - "grad_norm": 0.0, - "learning_rate": 8.70528047589585e-07, - "loss": 0.7431, - "step": 30709 - }, - { - "epoch": 0.8702428518801892, - "grad_norm": 0.0, - "learning_rate": 8.701535566976482e-07, - "loss": 0.7947, - "step": 30710 - }, - { - "epoch": 0.8702711893224517, - "grad_norm": 0.0, - "learning_rate": 8.697791427097979e-07, - "loss": 0.9075, - "step": 30711 - }, - { - "epoch": 0.8702995267647142, - "grad_norm": 0.0, - "learning_rate": 8.694048056291882e-07, - "loss": 0.8517, - "step": 30712 - }, - { - "epoch": 0.8703278642069767, - "grad_norm": 0.0, - "learning_rate": 8.690305454589754e-07, - "loss": 0.7129, - "step": 30713 - }, - { - "epoch": 0.8703562016492391, - "grad_norm": 0.0, - "learning_rate": 8.686563622023059e-07, - "loss": 0.7027, - "step": 30714 - }, - { - "epoch": 0.8703845390915016, - "grad_norm": 0.0, - "learning_rate": 8.682822558623349e-07, - "loss": 0.9021, - "step": 30715 - }, - { - "epoch": 0.8704128765337641, - "grad_norm": 0.0, - "learning_rate": 8.679082264422156e-07, - "loss": 0.7154, - "step": 30716 - }, - { - "epoch": 0.8704412139760265, - "grad_norm": 0.0, - "learning_rate": 8.675342739450942e-07, - "loss": 0.9176, - "step": 30717 - }, - { - "epoch": 0.870469551418289, - "grad_norm": 0.0, - "learning_rate": 8.67160398374125e-07, - "loss": 0.7948, - "step": 30718 - }, - { - "epoch": 0.8704978888605515, - "grad_norm": 0.0, - "learning_rate": 8.667865997324532e-07, - "loss": 0.8869, - "step": 30719 - }, - { - "epoch": 0.870526226302814, - "grad_norm": 0.0, - "learning_rate": 8.664128780232295e-07, - "loss": 0.8552, - "step": 30720 - }, - { - "epoch": 0.8705545637450763, - "grad_norm": 0.0, - "learning_rate": 8.660392332496037e-07, - "loss": 0.8096, - "step": 30721 - }, - { - "epoch": 0.8705829011873388, - "grad_norm": 0.0, - "learning_rate": 8.656656654147199e-07, - "loss": 0.8795, - "step": 30722 - }, - { - "epoch": 0.8706112386296013, - "grad_norm": 0.0, - "learning_rate": 8.652921745217258e-07, - "loss": 0.6876, - "step": 30723 - }, - { - "epoch": 0.8706395760718637, - "grad_norm": 0.0, - "learning_rate": 8.649187605737675e-07, - "loss": 0.8043, - "step": 30724 - }, - { - "epoch": 0.8706679135141262, - "grad_norm": 0.0, - "learning_rate": 8.645454235739903e-07, - "loss": 0.8463, - "step": 30725 - }, - { - "epoch": 0.8706962509563887, - "grad_norm": 0.0, - "learning_rate": 8.641721635255418e-07, - "loss": 0.755, - "step": 30726 - }, - { - "epoch": 0.8707245883986512, - "grad_norm": 0.0, - "learning_rate": 8.637989804315616e-07, - "loss": 0.8652, - "step": 30727 - }, - { - "epoch": 0.8707529258409136, - "grad_norm": 0.0, - "learning_rate": 8.634258742951951e-07, - "loss": 0.7472, - "step": 30728 - }, - { - "epoch": 0.8707812632831761, - "grad_norm": 0.0, - "learning_rate": 8.630528451195874e-07, - "loss": 0.8281, - "step": 30729 - }, - { - "epoch": 0.8708096007254386, - "grad_norm": 0.0, - "learning_rate": 8.626798929078773e-07, - "loss": 0.7662, - "step": 30730 - }, - { - "epoch": 0.8708379381677009, - "grad_norm": 0.0, - "learning_rate": 8.623070176632087e-07, - "loss": 0.7473, - "step": 30731 - }, - { - "epoch": 0.8708662756099634, - "grad_norm": 0.0, - "learning_rate": 8.619342193887192e-07, - "loss": 0.7343, - "step": 30732 - }, - { - "epoch": 0.8708946130522259, - "grad_norm": 0.0, - "learning_rate": 8.615614980875508e-07, - "loss": 0.6898, - "step": 30733 - }, - { - "epoch": 0.8709229504944883, - "grad_norm": 0.0, - "learning_rate": 8.611888537628466e-07, - "loss": 0.7976, - "step": 30734 - }, - { - "epoch": 0.8709512879367508, - "grad_norm": 0.0, - "learning_rate": 8.608162864177394e-07, - "loss": 0.8695, - "step": 30735 - }, - { - "epoch": 0.8709796253790133, - "grad_norm": 0.0, - "learning_rate": 8.604437960553702e-07, - "loss": 0.8479, - "step": 30736 - }, - { - "epoch": 0.8710079628212758, - "grad_norm": 0.0, - "learning_rate": 8.600713826788776e-07, - "loss": 0.7282, - "step": 30737 - }, - { - "epoch": 0.8710363002635382, - "grad_norm": 0.0, - "learning_rate": 8.596990462913967e-07, - "loss": 0.8357, - "step": 30738 - }, - { - "epoch": 0.8710646377058007, - "grad_norm": 0.0, - "learning_rate": 8.593267868960675e-07, - "loss": 0.8526, - "step": 30739 - }, - { - "epoch": 0.8710929751480632, - "grad_norm": 0.0, - "learning_rate": 8.589546044960218e-07, - "loss": 0.7132, - "step": 30740 - }, - { - "epoch": 0.8711213125903255, - "grad_norm": 0.0, - "learning_rate": 8.585824990943947e-07, - "loss": 0.8274, - "step": 30741 - }, - { - "epoch": 0.871149650032588, - "grad_norm": 0.0, - "learning_rate": 8.58210470694324e-07, - "loss": 0.8627, - "step": 30742 - }, - { - "epoch": 0.8711779874748505, - "grad_norm": 0.0, - "learning_rate": 8.578385192989402e-07, - "loss": 0.9072, - "step": 30743 - }, - { - "epoch": 0.871206324917113, - "grad_norm": 0.0, - "learning_rate": 8.574666449113766e-07, - "loss": 0.8055, - "step": 30744 - }, - { - "epoch": 0.8712346623593754, - "grad_norm": 0.0, - "learning_rate": 8.570948475347685e-07, - "loss": 0.7383, - "step": 30745 - }, - { - "epoch": 0.8712629998016379, - "grad_norm": 0.0, - "learning_rate": 8.567231271722443e-07, - "loss": 0.8288, - "step": 30746 - }, - { - "epoch": 0.8712913372439004, - "grad_norm": 0.0, - "learning_rate": 8.563514838269371e-07, - "loss": 0.7384, - "step": 30747 - }, - { - "epoch": 0.8713196746861628, - "grad_norm": 0.0, - "learning_rate": 8.559799175019756e-07, - "loss": 0.8648, - "step": 30748 - }, - { - "epoch": 0.8713480121284253, - "grad_norm": 0.0, - "learning_rate": 8.556084282004906e-07, - "loss": 0.8267, - "step": 30749 - }, - { - "epoch": 0.8713763495706878, - "grad_norm": 0.0, - "learning_rate": 8.552370159256118e-07, - "loss": 0.823, - "step": 30750 - }, - { - "epoch": 0.8714046870129503, - "grad_norm": 0.0, - "learning_rate": 8.548656806804678e-07, - "loss": 0.7899, - "step": 30751 - }, - { - "epoch": 0.8714330244552126, - "grad_norm": 0.0, - "learning_rate": 8.544944224681872e-07, - "loss": 0.7912, - "step": 30752 - }, - { - "epoch": 0.8714613618974751, - "grad_norm": 0.0, - "learning_rate": 8.541232412918943e-07, - "loss": 0.854, - "step": 30753 - }, - { - "epoch": 0.8714896993397376, - "grad_norm": 0.0, - "learning_rate": 8.537521371547186e-07, - "loss": 0.8179, - "step": 30754 - }, - { - "epoch": 0.871518036782, - "grad_norm": 0.0, - "learning_rate": 8.533811100597855e-07, - "loss": 0.8052, - "step": 30755 - }, - { - "epoch": 0.8715463742242625, - "grad_norm": 0.0, - "learning_rate": 8.530101600102192e-07, - "loss": 0.8941, - "step": 30756 - }, - { - "epoch": 0.871574711666525, - "grad_norm": 0.0, - "learning_rate": 8.526392870091449e-07, - "loss": 0.8204, - "step": 30757 - }, - { - "epoch": 0.8716030491087874, - "grad_norm": 0.0, - "learning_rate": 8.522684910596879e-07, - "loss": 0.8077, - "step": 30758 - }, - { - "epoch": 0.8716313865510499, - "grad_norm": 0.0, - "learning_rate": 8.518977721649679e-07, - "loss": 0.6565, - "step": 30759 - }, - { - "epoch": 0.8716597239933124, - "grad_norm": 0.0, - "learning_rate": 8.515271303281125e-07, - "loss": 0.7511, - "step": 30760 - }, - { - "epoch": 0.8716880614355749, - "grad_norm": 0.0, - "learning_rate": 8.511565655522403e-07, - "loss": 0.8089, - "step": 30761 - }, - { - "epoch": 0.8717163988778373, - "grad_norm": 0.0, - "learning_rate": 8.507860778404731e-07, - "loss": 0.7752, - "step": 30762 - }, - { - "epoch": 0.8717447363200997, - "grad_norm": 0.0, - "learning_rate": 8.504156671959319e-07, - "loss": 0.7115, - "step": 30763 - }, - { - "epoch": 0.8717730737623622, - "grad_norm": 0.0, - "learning_rate": 8.500453336217374e-07, - "loss": 0.7609, - "step": 30764 - }, - { - "epoch": 0.8718014112046246, - "grad_norm": 0.0, - "learning_rate": 8.496750771210083e-07, - "loss": 0.857, - "step": 30765 - }, - { - "epoch": 0.8718297486468871, - "grad_norm": 0.0, - "learning_rate": 8.493048976968665e-07, - "loss": 0.7812, - "step": 30766 - }, - { - "epoch": 0.8718580860891496, - "grad_norm": 0.0, - "learning_rate": 8.489347953524252e-07, - "loss": 0.7824, - "step": 30767 - }, - { - "epoch": 0.8718864235314121, - "grad_norm": 0.0, - "learning_rate": 8.485647700908062e-07, - "loss": 0.7018, - "step": 30768 - }, - { - "epoch": 0.8719147609736745, - "grad_norm": 0.0, - "learning_rate": 8.481948219151226e-07, - "loss": 0.7322, - "step": 30769 - }, - { - "epoch": 0.871943098415937, - "grad_norm": 0.0, - "learning_rate": 8.47824950828493e-07, - "loss": 0.7717, - "step": 30770 - }, - { - "epoch": 0.8719714358581995, - "grad_norm": 0.0, - "learning_rate": 8.474551568340338e-07, - "loss": 0.7534, - "step": 30771 - }, - { - "epoch": 0.8719997733004619, - "grad_norm": 0.0, - "learning_rate": 8.470854399348572e-07, - "loss": 0.754, - "step": 30772 - }, - { - "epoch": 0.8720281107427243, - "grad_norm": 0.0, - "learning_rate": 8.46715800134078e-07, - "loss": 0.747, - "step": 30773 - }, - { - "epoch": 0.8720564481849868, - "grad_norm": 0.0, - "learning_rate": 8.46346237434813e-07, - "loss": 0.7962, - "step": 30774 - }, - { - "epoch": 0.8720847856272493, - "grad_norm": 0.0, - "learning_rate": 8.459767518401707e-07, - "loss": 0.7905, - "step": 30775 - }, - { - "epoch": 0.8721131230695117, - "grad_norm": 0.0, - "learning_rate": 8.456073433532652e-07, - "loss": 0.7587, - "step": 30776 - }, - { - "epoch": 0.8721414605117742, - "grad_norm": 0.0, - "learning_rate": 8.452380119772086e-07, - "loss": 0.8089, - "step": 30777 - }, - { - "epoch": 0.8721697979540367, - "grad_norm": 0.0, - "learning_rate": 8.448687577151127e-07, - "loss": 0.8303, - "step": 30778 - }, - { - "epoch": 0.8721981353962991, - "grad_norm": 0.0, - "learning_rate": 8.444995805700873e-07, - "loss": 0.7797, - "step": 30779 - }, - { - "epoch": 0.8722264728385616, - "grad_norm": 0.0, - "learning_rate": 8.441304805452411e-07, - "loss": 0.7225, - "step": 30780 - }, - { - "epoch": 0.8722548102808241, - "grad_norm": 0.0, - "learning_rate": 8.437614576436848e-07, - "loss": 0.8363, - "step": 30781 - }, - { - "epoch": 0.8722831477230865, - "grad_norm": 0.0, - "learning_rate": 8.433925118685249e-07, - "loss": 0.801, - "step": 30782 - }, - { - "epoch": 0.872311485165349, - "grad_norm": 0.0, - "learning_rate": 8.430236432228689e-07, - "loss": 0.8022, - "step": 30783 - }, - { - "epoch": 0.8723398226076114, - "grad_norm": 0.0, - "learning_rate": 8.426548517098276e-07, - "loss": 0.7489, - "step": 30784 - }, - { - "epoch": 0.8723681600498739, - "grad_norm": 0.0, - "learning_rate": 8.422861373325031e-07, - "loss": 0.8455, - "step": 30785 - }, - { - "epoch": 0.8723964974921363, - "grad_norm": 0.0, - "learning_rate": 8.419175000940028e-07, - "loss": 0.7624, - "step": 30786 - }, - { - "epoch": 0.8724248349343988, - "grad_norm": 0.0, - "learning_rate": 8.415489399974341e-07, - "loss": 0.8147, - "step": 30787 - }, - { - "epoch": 0.8724531723766613, - "grad_norm": 0.0, - "learning_rate": 8.411804570458981e-07, - "loss": 0.84, - "step": 30788 - }, - { - "epoch": 0.8724815098189237, - "grad_norm": 0.0, - "learning_rate": 8.408120512425e-07, - "loss": 0.9423, - "step": 30789 - }, - { - "epoch": 0.8725098472611862, - "grad_norm": 0.0, - "learning_rate": 8.404437225903439e-07, - "loss": 0.8903, - "step": 30790 - }, - { - "epoch": 0.8725381847034487, - "grad_norm": 0.0, - "learning_rate": 8.400754710925307e-07, - "loss": 0.6893, - "step": 30791 - }, - { - "epoch": 0.8725665221457112, - "grad_norm": 0.0, - "learning_rate": 8.397072967521658e-07, - "loss": 0.7889, - "step": 30792 - }, - { - "epoch": 0.8725948595879736, - "grad_norm": 0.0, - "learning_rate": 8.393391995723454e-07, - "loss": 0.8421, - "step": 30793 - }, - { - "epoch": 0.872623197030236, - "grad_norm": 0.0, - "learning_rate": 8.389711795561728e-07, - "loss": 0.7525, - "step": 30794 - }, - { - "epoch": 0.8726515344724985, - "grad_norm": 0.0, - "learning_rate": 8.386032367067498e-07, - "loss": 0.7009, - "step": 30795 - }, - { - "epoch": 0.8726798719147609, - "grad_norm": 0.0, - "learning_rate": 8.382353710271718e-07, - "loss": 0.8015, - "step": 30796 - }, - { - "epoch": 0.8727082093570234, - "grad_norm": 0.0, - "learning_rate": 8.378675825205407e-07, - "loss": 0.7569, - "step": 30797 - }, - { - "epoch": 0.8727365467992859, - "grad_norm": 0.0, - "learning_rate": 8.374998711899529e-07, - "loss": 0.9151, - "step": 30798 - }, - { - "epoch": 0.8727648842415484, - "grad_norm": 0.0, - "learning_rate": 8.371322370385049e-07, - "loss": 0.7608, - "step": 30799 - }, - { - "epoch": 0.8727932216838108, - "grad_norm": 0.0, - "learning_rate": 8.367646800692964e-07, - "loss": 0.7359, - "step": 30800 - }, - { - "epoch": 0.8728215591260733, - "grad_norm": 0.0, - "learning_rate": 8.363972002854204e-07, - "loss": 0.7426, - "step": 30801 - }, - { - "epoch": 0.8728498965683358, - "grad_norm": 0.0, - "learning_rate": 8.360297976899734e-07, - "loss": 0.822, - "step": 30802 - }, - { - "epoch": 0.8728782340105982, - "grad_norm": 0.0, - "learning_rate": 8.356624722860507e-07, - "loss": 0.8512, - "step": 30803 - }, - { - "epoch": 0.8729065714528607, - "grad_norm": 0.0, - "learning_rate": 8.352952240767453e-07, - "loss": 0.8187, - "step": 30804 - }, - { - "epoch": 0.8729349088951232, - "grad_norm": 0.0, - "learning_rate": 8.349280530651538e-07, - "loss": 0.7996, - "step": 30805 - }, - { - "epoch": 0.8729632463373855, - "grad_norm": 0.0, - "learning_rate": 8.345609592543647e-07, - "loss": 0.7562, - "step": 30806 - }, - { - "epoch": 0.872991583779648, - "grad_norm": 0.0, - "learning_rate": 8.341939426474721e-07, - "loss": 0.8064, - "step": 30807 - }, - { - "epoch": 0.8730199212219105, - "grad_norm": 0.0, - "learning_rate": 8.338270032475693e-07, - "loss": 0.7777, - "step": 30808 - }, - { - "epoch": 0.873048258664173, - "grad_norm": 0.0, - "learning_rate": 8.334601410577436e-07, - "loss": 0.7925, - "step": 30809 - }, - { - "epoch": 0.8730765961064354, - "grad_norm": 0.0, - "learning_rate": 8.330933560810895e-07, - "loss": 0.7826, - "step": 30810 - }, - { - "epoch": 0.8731049335486979, - "grad_norm": 0.0, - "learning_rate": 8.32726648320692e-07, - "loss": 0.727, - "step": 30811 - }, - { - "epoch": 0.8731332709909604, - "grad_norm": 0.0, - "learning_rate": 8.32360017779642e-07, - "loss": 0.7533, - "step": 30812 - }, - { - "epoch": 0.8731616084332228, - "grad_norm": 0.0, - "learning_rate": 8.319934644610295e-07, - "loss": 0.8366, - "step": 30813 - }, - { - "epoch": 0.8731899458754853, - "grad_norm": 0.0, - "learning_rate": 8.316269883679384e-07, - "loss": 0.7604, - "step": 30814 - }, - { - "epoch": 0.8732182833177478, - "grad_norm": 0.0, - "learning_rate": 8.312605895034587e-07, - "loss": 0.7466, - "step": 30815 - }, - { - "epoch": 0.8732466207600103, - "grad_norm": 0.0, - "learning_rate": 8.308942678706756e-07, - "loss": 0.7565, - "step": 30816 - }, - { - "epoch": 0.8732749582022726, - "grad_norm": 0.0, - "learning_rate": 8.305280234726743e-07, - "loss": 0.7723, - "step": 30817 - }, - { - "epoch": 0.8733032956445351, - "grad_norm": 0.0, - "learning_rate": 8.301618563125424e-07, - "loss": 0.7731, - "step": 30818 - }, - { - "epoch": 0.8733316330867976, - "grad_norm": 0.0, - "learning_rate": 8.297957663933609e-07, - "loss": 0.8455, - "step": 30819 - }, - { - "epoch": 0.87335997052906, - "grad_norm": 0.0, - "learning_rate": 8.29429753718215e-07, - "loss": 0.8031, - "step": 30820 - }, - { - "epoch": 0.8733883079713225, - "grad_norm": 0.0, - "learning_rate": 8.290638182901889e-07, - "loss": 0.8692, - "step": 30821 - }, - { - "epoch": 0.873416645413585, - "grad_norm": 0.0, - "learning_rate": 8.286979601123623e-07, - "loss": 0.7728, - "step": 30822 - }, - { - "epoch": 0.8734449828558475, - "grad_norm": 0.0, - "learning_rate": 8.283321791878185e-07, - "loss": 0.8217, - "step": 30823 - }, - { - "epoch": 0.8734733202981099, - "grad_norm": 0.0, - "learning_rate": 8.279664755196404e-07, - "loss": 0.7692, - "step": 30824 - }, - { - "epoch": 0.8735016577403724, - "grad_norm": 0.0, - "learning_rate": 8.276008491109056e-07, - "loss": 0.6743, - "step": 30825 - }, - { - "epoch": 0.8735299951826349, - "grad_norm": 0.0, - "learning_rate": 8.27235299964696e-07, - "loss": 0.7969, - "step": 30826 - }, - { - "epoch": 0.8735583326248972, - "grad_norm": 0.0, - "learning_rate": 8.26869828084087e-07, - "loss": 0.7775, - "step": 30827 - }, - { - "epoch": 0.8735866700671597, - "grad_norm": 0.0, - "learning_rate": 8.265044334721606e-07, - "loss": 0.7374, - "step": 30828 - }, - { - "epoch": 0.8736150075094222, - "grad_norm": 0.0, - "learning_rate": 8.261391161319942e-07, - "loss": 0.8165, - "step": 30829 - }, - { - "epoch": 0.8736433449516846, - "grad_norm": 0.0, - "learning_rate": 8.257738760666645e-07, - "loss": 0.8211, - "step": 30830 - }, - { - "epoch": 0.8736716823939471, - "grad_norm": 0.0, - "learning_rate": 8.254087132792476e-07, - "loss": 0.7741, - "step": 30831 - }, - { - "epoch": 0.8737000198362096, - "grad_norm": 0.0, - "learning_rate": 8.250436277728224e-07, - "loss": 0.8948, - "step": 30832 - }, - { - "epoch": 0.8737283572784721, - "grad_norm": 0.0, - "learning_rate": 8.246786195504597e-07, - "loss": 0.8164, - "step": 30833 - }, - { - "epoch": 0.8737566947207345, - "grad_norm": 0.0, - "learning_rate": 8.243136886152381e-07, - "loss": 0.7258, - "step": 30834 - }, - { - "epoch": 0.873785032162997, - "grad_norm": 0.0, - "learning_rate": 8.239488349702285e-07, - "loss": 0.7448, - "step": 30835 - }, - { - "epoch": 0.8738133696052595, - "grad_norm": 0.0, - "learning_rate": 8.23584058618504e-07, - "loss": 0.7541, - "step": 30836 - }, - { - "epoch": 0.8738417070475218, - "grad_norm": 0.0, - "learning_rate": 8.23219359563141e-07, - "loss": 0.7979, - "step": 30837 - }, - { - "epoch": 0.8738700444897843, - "grad_norm": 0.0, - "learning_rate": 8.228547378072072e-07, - "loss": 0.8319, - "step": 30838 - }, - { - "epoch": 0.8738983819320468, - "grad_norm": 0.0, - "learning_rate": 8.224901933537777e-07, - "loss": 0.7563, - "step": 30839 - }, - { - "epoch": 0.8739267193743093, - "grad_norm": 0.0, - "learning_rate": 8.22125726205919e-07, - "loss": 0.8925, - "step": 30840 - }, - { - "epoch": 0.8739550568165717, - "grad_norm": 0.0, - "learning_rate": 8.217613363667043e-07, - "loss": 0.7222, - "step": 30841 - }, - { - "epoch": 0.8739833942588342, - "grad_norm": 0.0, - "learning_rate": 8.213970238392022e-07, - "loss": 0.8313, - "step": 30842 - }, - { - "epoch": 0.8740117317010967, - "grad_norm": 0.0, - "learning_rate": 8.210327886264802e-07, - "loss": 0.8585, - "step": 30843 - }, - { - "epoch": 0.8740400691433591, - "grad_norm": 0.0, - "learning_rate": 8.20668630731608e-07, - "loss": 0.8493, - "step": 30844 - }, - { - "epoch": 0.8740684065856216, - "grad_norm": 0.0, - "learning_rate": 8.203045501576545e-07, - "loss": 0.8626, - "step": 30845 - }, - { - "epoch": 0.8740967440278841, - "grad_norm": 0.0, - "learning_rate": 8.199405469076826e-07, - "loss": 0.8807, - "step": 30846 - }, - { - "epoch": 0.8741250814701466, - "grad_norm": 0.0, - "learning_rate": 8.195766209847623e-07, - "loss": 0.9004, - "step": 30847 - }, - { - "epoch": 0.8741534189124089, - "grad_norm": 0.0, - "learning_rate": 8.192127723919563e-07, - "loss": 0.6978, - "step": 30848 - }, - { - "epoch": 0.8741817563546714, - "grad_norm": 0.0, - "learning_rate": 8.188490011323291e-07, - "loss": 0.8436, - "step": 30849 - }, - { - "epoch": 0.8742100937969339, - "grad_norm": 0.0, - "learning_rate": 8.184853072089482e-07, - "loss": 0.854, - "step": 30850 - }, - { - "epoch": 0.8742384312391963, - "grad_norm": 0.0, - "learning_rate": 8.181216906248734e-07, - "loss": 0.731, - "step": 30851 - }, - { - "epoch": 0.8742667686814588, - "grad_norm": 0.0, - "learning_rate": 8.17758151383169e-07, - "loss": 0.8032, - "step": 30852 - }, - { - "epoch": 0.8742951061237213, - "grad_norm": 0.0, - "learning_rate": 8.173946894869001e-07, - "loss": 0.8477, - "step": 30853 - }, - { - "epoch": 0.8743234435659837, - "grad_norm": 0.0, - "learning_rate": 8.170313049391221e-07, - "loss": 0.6806, - "step": 30854 - }, - { - "epoch": 0.8743517810082462, - "grad_norm": 0.0, - "learning_rate": 8.166679977429004e-07, - "loss": 0.8575, - "step": 30855 - }, - { - "epoch": 0.8743801184505087, - "grad_norm": 0.0, - "learning_rate": 8.163047679012948e-07, - "loss": 0.7542, - "step": 30856 - }, - { - "epoch": 0.8744084558927712, - "grad_norm": 0.0, - "learning_rate": 8.159416154173638e-07, - "loss": 0.7783, - "step": 30857 - }, - { - "epoch": 0.8744367933350335, - "grad_norm": 0.0, - "learning_rate": 8.155785402941685e-07, - "loss": 0.8151, - "step": 30858 - }, - { - "epoch": 0.874465130777296, - "grad_norm": 0.0, - "learning_rate": 8.152155425347652e-07, - "loss": 0.7185, - "step": 30859 - }, - { - "epoch": 0.8744934682195585, - "grad_norm": 0.0, - "learning_rate": 8.148526221422104e-07, - "loss": 0.742, - "step": 30860 - }, - { - "epoch": 0.8745218056618209, - "grad_norm": 0.0, - "learning_rate": 8.144897791195661e-07, - "loss": 0.8684, - "step": 30861 - }, - { - "epoch": 0.8745501431040834, - "grad_norm": 0.0, - "learning_rate": 8.14127013469882e-07, - "loss": 0.8797, - "step": 30862 - }, - { - "epoch": 0.8745784805463459, - "grad_norm": 0.0, - "learning_rate": 8.137643251962202e-07, - "loss": 0.8357, - "step": 30863 - }, - { - "epoch": 0.8746068179886084, - "grad_norm": 0.0, - "learning_rate": 8.134017143016304e-07, - "loss": 0.907, - "step": 30864 - }, - { - "epoch": 0.8746351554308708, - "grad_norm": 0.0, - "learning_rate": 8.130391807891691e-07, - "loss": 0.8304, - "step": 30865 - }, - { - "epoch": 0.8746634928731333, - "grad_norm": 0.0, - "learning_rate": 8.126767246618927e-07, - "loss": 0.9608, - "step": 30866 - }, - { - "epoch": 0.8746918303153958, - "grad_norm": 0.0, - "learning_rate": 8.1231434592285e-07, - "loss": 0.8288, - "step": 30867 - }, - { - "epoch": 0.8747201677576582, - "grad_norm": 0.0, - "learning_rate": 8.11952044575095e-07, - "loss": 0.7871, - "step": 30868 - }, - { - "epoch": 0.8747485051999206, - "grad_norm": 0.0, - "learning_rate": 8.1158982062168e-07, - "loss": 0.91, - "step": 30869 - }, - { - "epoch": 0.8747768426421831, - "grad_norm": 0.0, - "learning_rate": 8.112276740656555e-07, - "loss": 0.7605, - "step": 30870 - }, - { - "epoch": 0.8748051800844456, - "grad_norm": 0.0, - "learning_rate": 8.10865604910075e-07, - "loss": 0.8262, - "step": 30871 - }, - { - "epoch": 0.874833517526708, - "grad_norm": 0.0, - "learning_rate": 8.105036131579835e-07, - "loss": 0.8399, - "step": 30872 - }, - { - "epoch": 0.8748618549689705, - "grad_norm": 0.0, - "learning_rate": 8.101416988124333e-07, - "loss": 0.7096, - "step": 30873 - }, - { - "epoch": 0.874890192411233, - "grad_norm": 0.0, - "learning_rate": 8.097798618764729e-07, - "loss": 0.8527, - "step": 30874 - }, - { - "epoch": 0.8749185298534954, - "grad_norm": 0.0, - "learning_rate": 8.094181023531478e-07, - "loss": 0.6894, - "step": 30875 - }, - { - "epoch": 0.8749468672957579, - "grad_norm": 0.0, - "learning_rate": 8.090564202455087e-07, - "loss": 0.8452, - "step": 30876 - }, - { - "epoch": 0.8749752047380204, - "grad_norm": 0.0, - "learning_rate": 8.086948155565988e-07, - "loss": 0.9098, - "step": 30877 - }, - { - "epoch": 0.8750035421802828, - "grad_norm": 0.0, - "learning_rate": 8.083332882894657e-07, - "loss": 0.8037, - "step": 30878 - }, - { - "epoch": 0.8750318796225453, - "grad_norm": 0.0, - "learning_rate": 8.079718384471557e-07, - "loss": 0.8526, - "step": 30879 - }, - { - "epoch": 0.8750602170648077, - "grad_norm": 0.0, - "learning_rate": 8.076104660327111e-07, - "loss": 0.7828, - "step": 30880 - }, - { - "epoch": 0.8750885545070702, - "grad_norm": 0.0, - "learning_rate": 8.07249171049177e-07, - "loss": 0.8048, - "step": 30881 - }, - { - "epoch": 0.8751168919493326, - "grad_norm": 0.0, - "learning_rate": 8.068879534995966e-07, - "loss": 0.8508, - "step": 30882 - }, - { - "epoch": 0.8751452293915951, - "grad_norm": 0.0, - "learning_rate": 8.06526813387013e-07, - "loss": 0.721, - "step": 30883 - }, - { - "epoch": 0.8751735668338576, - "grad_norm": 0.0, - "learning_rate": 8.061657507144705e-07, - "loss": 0.8739, - "step": 30884 - }, - { - "epoch": 0.87520190427612, - "grad_norm": 0.0, - "learning_rate": 8.058047654850043e-07, - "loss": 0.8188, - "step": 30885 - }, - { - "epoch": 0.8752302417183825, - "grad_norm": 0.0, - "learning_rate": 8.054438577016599e-07, - "loss": 0.784, - "step": 30886 - }, - { - "epoch": 0.875258579160645, - "grad_norm": 0.0, - "learning_rate": 8.050830273674781e-07, - "loss": 0.7527, - "step": 30887 - }, - { - "epoch": 0.8752869166029075, - "grad_norm": 0.0, - "learning_rate": 8.047222744854943e-07, - "loss": 0.8623, - "step": 30888 - }, - { - "epoch": 0.8753152540451699, - "grad_norm": 0.0, - "learning_rate": 8.043615990587495e-07, - "loss": 0.7459, - "step": 30889 - }, - { - "epoch": 0.8753435914874323, - "grad_norm": 0.0, - "learning_rate": 8.040010010902832e-07, - "loss": 0.7546, - "step": 30890 - }, - { - "epoch": 0.8753719289296948, - "grad_norm": 0.0, - "learning_rate": 8.036404805831299e-07, - "loss": 0.7652, - "step": 30891 - }, - { - "epoch": 0.8754002663719572, - "grad_norm": 0.0, - "learning_rate": 8.032800375403271e-07, - "loss": 0.7868, - "step": 30892 - }, - { - "epoch": 0.8754286038142197, - "grad_norm": 0.0, - "learning_rate": 8.029196719649135e-07, - "loss": 0.923, - "step": 30893 - }, - { - "epoch": 0.8754569412564822, - "grad_norm": 0.0, - "learning_rate": 8.025593838599221e-07, - "loss": 0.847, - "step": 30894 - }, - { - "epoch": 0.8754852786987447, - "grad_norm": 0.0, - "learning_rate": 8.021991732283874e-07, - "loss": 0.9013, - "step": 30895 - }, - { - "epoch": 0.8755136161410071, - "grad_norm": 0.0, - "learning_rate": 8.018390400733444e-07, - "loss": 0.6446, - "step": 30896 - }, - { - "epoch": 0.8755419535832696, - "grad_norm": 0.0, - "learning_rate": 8.014789843978288e-07, - "loss": 0.8192, - "step": 30897 - }, - { - "epoch": 0.8755702910255321, - "grad_norm": 0.0, - "learning_rate": 8.011190062048701e-07, - "loss": 0.8712, - "step": 30898 - }, - { - "epoch": 0.8755986284677945, - "grad_norm": 0.0, - "learning_rate": 8.007591054975016e-07, - "loss": 0.806, - "step": 30899 - }, - { - "epoch": 0.875626965910057, - "grad_norm": 0.0, - "learning_rate": 8.003992822787576e-07, - "loss": 0.8154, - "step": 30900 - }, - { - "epoch": 0.8756553033523194, - "grad_norm": 0.0, - "learning_rate": 8.000395365516644e-07, - "loss": 0.8441, - "step": 30901 - }, - { - "epoch": 0.8756836407945818, - "grad_norm": 0.0, - "learning_rate": 7.996798683192553e-07, - "loss": 0.9037, - "step": 30902 - }, - { - "epoch": 0.8757119782368443, - "grad_norm": 0.0, - "learning_rate": 7.993202775845599e-07, - "loss": 0.7753, - "step": 30903 - }, - { - "epoch": 0.8757403156791068, - "grad_norm": 0.0, - "learning_rate": 7.989607643506048e-07, - "loss": 0.8172, - "step": 30904 - }, - { - "epoch": 0.8757686531213693, - "grad_norm": 0.0, - "learning_rate": 7.986013286204197e-07, - "loss": 0.9255, - "step": 30905 - }, - { - "epoch": 0.8757969905636317, - "grad_norm": 0.0, - "learning_rate": 7.982419703970323e-07, - "loss": 0.8601, - "step": 30906 - }, - { - "epoch": 0.8758253280058942, - "grad_norm": 0.0, - "learning_rate": 7.978826896834702e-07, - "loss": 0.8894, - "step": 30907 - }, - { - "epoch": 0.8758536654481567, - "grad_norm": 0.0, - "learning_rate": 7.975234864827619e-07, - "loss": 0.7941, - "step": 30908 - }, - { - "epoch": 0.8758820028904191, - "grad_norm": 0.0, - "learning_rate": 7.971643607979273e-07, - "loss": 0.7818, - "step": 30909 - }, - { - "epoch": 0.8759103403326816, - "grad_norm": 0.0, - "learning_rate": 7.968053126319953e-07, - "loss": 0.8166, - "step": 30910 - }, - { - "epoch": 0.875938677774944, - "grad_norm": 0.0, - "learning_rate": 7.96446341987992e-07, - "loss": 0.8409, - "step": 30911 - }, - { - "epoch": 0.8759670152172065, - "grad_norm": 0.0, - "learning_rate": 7.960874488689363e-07, - "loss": 0.8081, - "step": 30912 - }, - { - "epoch": 0.8759953526594689, - "grad_norm": 0.0, - "learning_rate": 7.957286332778558e-07, - "loss": 0.744, - "step": 30913 - }, - { - "epoch": 0.8760236901017314, - "grad_norm": 0.0, - "learning_rate": 7.953698952177691e-07, - "loss": 0.8441, - "step": 30914 - }, - { - "epoch": 0.8760520275439939, - "grad_norm": 0.0, - "learning_rate": 7.950112346917004e-07, - "loss": 0.93, - "step": 30915 - }, - { - "epoch": 0.8760803649862563, - "grad_norm": 0.0, - "learning_rate": 7.94652651702671e-07, - "loss": 0.7751, - "step": 30916 - }, - { - "epoch": 0.8761087024285188, - "grad_norm": 0.0, - "learning_rate": 7.942941462536991e-07, - "loss": 0.8565, - "step": 30917 - }, - { - "epoch": 0.8761370398707813, - "grad_norm": 0.0, - "learning_rate": 7.939357183478069e-07, - "loss": 0.8511, - "step": 30918 - }, - { - "epoch": 0.8761653773130437, - "grad_norm": 0.0, - "learning_rate": 7.935773679880121e-07, - "loss": 0.88, - "step": 30919 - }, - { - "epoch": 0.8761937147553062, - "grad_norm": 0.0, - "learning_rate": 7.932190951773344e-07, - "loss": 0.8878, - "step": 30920 - }, - { - "epoch": 0.8762220521975687, - "grad_norm": 0.0, - "learning_rate": 7.928608999187925e-07, - "loss": 0.7878, - "step": 30921 - }, - { - "epoch": 0.8762503896398312, - "grad_norm": 0.0, - "learning_rate": 7.925027822153997e-07, - "loss": 0.7765, - "step": 30922 - }, - { - "epoch": 0.8762787270820935, - "grad_norm": 0.0, - "learning_rate": 7.921447420701755e-07, - "loss": 0.8335, - "step": 30923 - }, - { - "epoch": 0.876307064524356, - "grad_norm": 0.0, - "learning_rate": 7.917867794861378e-07, - "loss": 0.7573, - "step": 30924 - }, - { - "epoch": 0.8763354019666185, - "grad_norm": 0.0, - "learning_rate": 7.914288944662973e-07, - "loss": 0.741, - "step": 30925 - }, - { - "epoch": 0.8763637394088809, - "grad_norm": 0.0, - "learning_rate": 7.910710870136729e-07, - "loss": 0.7058, - "step": 30926 - }, - { - "epoch": 0.8763920768511434, - "grad_norm": 0.0, - "learning_rate": 7.907133571312742e-07, - "loss": 0.8421, - "step": 30927 - }, - { - "epoch": 0.8764204142934059, - "grad_norm": 0.0, - "learning_rate": 7.903557048221167e-07, - "loss": 0.8385, - "step": 30928 - }, - { - "epoch": 0.8764487517356684, - "grad_norm": 0.0, - "learning_rate": 7.899981300892145e-07, - "loss": 0.737, - "step": 30929 - }, - { - "epoch": 0.8764770891779308, - "grad_norm": 0.0, - "learning_rate": 7.896406329355766e-07, - "loss": 0.7677, - "step": 30930 - }, - { - "epoch": 0.8765054266201933, - "grad_norm": 0.0, - "learning_rate": 7.892832133642159e-07, - "loss": 0.7902, - "step": 30931 - }, - { - "epoch": 0.8765337640624558, - "grad_norm": 0.0, - "learning_rate": 7.889258713781434e-07, - "loss": 0.8621, - "step": 30932 - }, - { - "epoch": 0.8765621015047181, - "grad_norm": 0.0, - "learning_rate": 7.885686069803678e-07, - "loss": 0.7804, - "step": 30933 - }, - { - "epoch": 0.8765904389469806, - "grad_norm": 0.0, - "learning_rate": 7.882114201739022e-07, - "loss": 0.833, - "step": 30934 - }, - { - "epoch": 0.8766187763892431, - "grad_norm": 0.0, - "learning_rate": 7.878543109617498e-07, - "loss": 0.8356, - "step": 30935 - }, - { - "epoch": 0.8766471138315056, - "grad_norm": 0.0, - "learning_rate": 7.874972793469216e-07, - "loss": 0.6792, - "step": 30936 - }, - { - "epoch": 0.876675451273768, - "grad_norm": 0.0, - "learning_rate": 7.871403253324272e-07, - "loss": 0.7862, - "step": 30937 - }, - { - "epoch": 0.8767037887160305, - "grad_norm": 0.0, - "learning_rate": 7.86783448921269e-07, - "loss": 0.7868, - "step": 30938 - }, - { - "epoch": 0.876732126158293, - "grad_norm": 0.0, - "learning_rate": 7.864266501164541e-07, - "loss": 0.794, - "step": 30939 - }, - { - "epoch": 0.8767604636005554, - "grad_norm": 0.0, - "learning_rate": 7.860699289209917e-07, - "loss": 0.8351, - "step": 30940 - }, - { - "epoch": 0.8767888010428179, - "grad_norm": 0.0, - "learning_rate": 7.857132853378813e-07, - "loss": 0.8063, - "step": 30941 - }, - { - "epoch": 0.8768171384850804, - "grad_norm": 0.0, - "learning_rate": 7.853567193701317e-07, - "loss": 0.8833, - "step": 30942 - }, - { - "epoch": 0.8768454759273427, - "grad_norm": 0.0, - "learning_rate": 7.850002310207428e-07, - "loss": 0.7169, - "step": 30943 - }, - { - "epoch": 0.8768738133696052, - "grad_norm": 0.0, - "learning_rate": 7.846438202927187e-07, - "loss": 0.7746, - "step": 30944 - }, - { - "epoch": 0.8769021508118677, - "grad_norm": 0.0, - "learning_rate": 7.842874871890615e-07, - "loss": 0.8215, - "step": 30945 - }, - { - "epoch": 0.8769304882541302, - "grad_norm": 0.0, - "learning_rate": 7.839312317127734e-07, - "loss": 0.875, - "step": 30946 - }, - { - "epoch": 0.8769588256963926, - "grad_norm": 0.0, - "learning_rate": 7.835750538668563e-07, - "loss": 0.7836, - "step": 30947 - }, - { - "epoch": 0.8769871631386551, - "grad_norm": 0.0, - "learning_rate": 7.832189536543067e-07, - "loss": 0.7994, - "step": 30948 - }, - { - "epoch": 0.8770155005809176, - "grad_norm": 0.0, - "learning_rate": 7.828629310781266e-07, - "loss": 0.8072, - "step": 30949 - }, - { - "epoch": 0.87704383802318, - "grad_norm": 0.0, - "learning_rate": 7.82506986141317e-07, - "loss": 0.8545, - "step": 30950 - }, - { - "epoch": 0.8770721754654425, - "grad_norm": 0.0, - "learning_rate": 7.821511188468723e-07, - "loss": 0.7961, - "step": 30951 - }, - { - "epoch": 0.877100512907705, - "grad_norm": 0.0, - "learning_rate": 7.817953291977908e-07, - "loss": 0.8611, - "step": 30952 - }, - { - "epoch": 0.8771288503499675, - "grad_norm": 0.0, - "learning_rate": 7.814396171970729e-07, - "loss": 0.6945, - "step": 30953 - }, - { - "epoch": 0.8771571877922298, - "grad_norm": 0.0, - "learning_rate": 7.810839828477101e-07, - "loss": 0.7793, - "step": 30954 - }, - { - "epoch": 0.8771855252344923, - "grad_norm": 0.0, - "learning_rate": 7.807284261527015e-07, - "loss": 0.8228, - "step": 30955 - }, - { - "epoch": 0.8772138626767548, - "grad_norm": 0.0, - "learning_rate": 7.803729471150401e-07, - "loss": 0.7364, - "step": 30956 - }, - { - "epoch": 0.8772422001190172, - "grad_norm": 0.0, - "learning_rate": 7.800175457377213e-07, - "loss": 0.8452, - "step": 30957 - }, - { - "epoch": 0.8772705375612797, - "grad_norm": 0.0, - "learning_rate": 7.796622220237371e-07, - "loss": 0.8568, - "step": 30958 - }, - { - "epoch": 0.8772988750035422, - "grad_norm": 0.0, - "learning_rate": 7.79306975976083e-07, - "loss": 0.8328, - "step": 30959 - }, - { - "epoch": 0.8773272124458047, - "grad_norm": 0.0, - "learning_rate": 7.789518075977498e-07, - "loss": 0.8232, - "step": 30960 - }, - { - "epoch": 0.8773555498880671, - "grad_norm": 0.0, - "learning_rate": 7.785967168917319e-07, - "loss": 0.8373, - "step": 30961 - }, - { - "epoch": 0.8773838873303296, - "grad_norm": 0.0, - "learning_rate": 7.782417038610158e-07, - "loss": 0.7657, - "step": 30962 - }, - { - "epoch": 0.8774122247725921, - "grad_norm": 0.0, - "learning_rate": 7.778867685085956e-07, - "loss": 0.9094, - "step": 30963 - }, - { - "epoch": 0.8774405622148544, - "grad_norm": 0.0, - "learning_rate": 7.77531910837459e-07, - "loss": 0.7997, - "step": 30964 - }, - { - "epoch": 0.8774688996571169, - "grad_norm": 0.0, - "learning_rate": 7.771771308505949e-07, - "loss": 0.8588, - "step": 30965 - }, - { - "epoch": 0.8774972370993794, - "grad_norm": 0.0, - "learning_rate": 7.768224285509951e-07, - "loss": 0.7821, - "step": 30966 - }, - { - "epoch": 0.8775255745416418, - "grad_norm": 0.0, - "learning_rate": 7.764678039416429e-07, - "loss": 0.8618, - "step": 30967 - }, - { - "epoch": 0.8775539119839043, - "grad_norm": 0.0, - "learning_rate": 7.761132570255281e-07, - "loss": 0.742, - "step": 30968 - }, - { - "epoch": 0.8775822494261668, - "grad_norm": 0.0, - "learning_rate": 7.757587878056372e-07, - "loss": 0.8156, - "step": 30969 - }, - { - "epoch": 0.8776105868684293, - "grad_norm": 0.0, - "learning_rate": 7.754043962849545e-07, - "loss": 0.8437, - "step": 30970 - }, - { - "epoch": 0.8776389243106917, - "grad_norm": 0.0, - "learning_rate": 7.750500824664653e-07, - "loss": 0.7639, - "step": 30971 - }, - { - "epoch": 0.8776672617529542, - "grad_norm": 0.0, - "learning_rate": 7.746958463531551e-07, - "loss": 0.8253, - "step": 30972 - }, - { - "epoch": 0.8776955991952167, - "grad_norm": 0.0, - "learning_rate": 7.74341687948007e-07, - "loss": 0.8647, - "step": 30973 - }, - { - "epoch": 0.877723936637479, - "grad_norm": 0.0, - "learning_rate": 7.739876072540076e-07, - "loss": 0.8323, - "step": 30974 - }, - { - "epoch": 0.8777522740797415, - "grad_norm": 0.0, - "learning_rate": 7.736336042741344e-07, - "loss": 0.8237, - "step": 30975 - }, - { - "epoch": 0.877780611522004, - "grad_norm": 0.0, - "learning_rate": 7.732796790113728e-07, - "loss": 0.8755, - "step": 30976 - }, - { - "epoch": 0.8778089489642665, - "grad_norm": 0.0, - "learning_rate": 7.729258314687016e-07, - "loss": 0.7042, - "step": 30977 - }, - { - "epoch": 0.8778372864065289, - "grad_norm": 0.0, - "learning_rate": 7.725720616491017e-07, - "loss": 0.7729, - "step": 30978 - }, - { - "epoch": 0.8778656238487914, - "grad_norm": 0.0, - "learning_rate": 7.722183695555563e-07, - "loss": 0.7447, - "step": 30979 - }, - { - "epoch": 0.8778939612910539, - "grad_norm": 0.0, - "learning_rate": 7.718647551910408e-07, - "loss": 0.7202, - "step": 30980 - }, - { - "epoch": 0.8779222987333163, - "grad_norm": 0.0, - "learning_rate": 7.71511218558535e-07, - "loss": 0.7582, - "step": 30981 - }, - { - "epoch": 0.8779506361755788, - "grad_norm": 0.0, - "learning_rate": 7.711577596610176e-07, - "loss": 0.793, - "step": 30982 - }, - { - "epoch": 0.8779789736178413, - "grad_norm": 0.0, - "learning_rate": 7.708043785014652e-07, - "loss": 0.7762, - "step": 30983 - }, - { - "epoch": 0.8780073110601038, - "grad_norm": 0.0, - "learning_rate": 7.704510750828542e-07, - "loss": 0.8306, - "step": 30984 - }, - { - "epoch": 0.8780356485023662, - "grad_norm": 0.0, - "learning_rate": 7.700978494081612e-07, - "loss": 0.7961, - "step": 30985 - }, - { - "epoch": 0.8780639859446286, - "grad_norm": 0.0, - "learning_rate": 7.697447014803617e-07, - "loss": 0.8608, - "step": 30986 - }, - { - "epoch": 0.8780923233868911, - "grad_norm": 0.0, - "learning_rate": 7.693916313024308e-07, - "loss": 0.8079, - "step": 30987 - }, - { - "epoch": 0.8781206608291535, - "grad_norm": 0.0, - "learning_rate": 7.690386388773408e-07, - "loss": 0.7378, - "step": 30988 - }, - { - "epoch": 0.878148998271416, - "grad_norm": 0.0, - "learning_rate": 7.68685724208067e-07, - "loss": 0.8843, - "step": 30989 - }, - { - "epoch": 0.8781773357136785, - "grad_norm": 0.0, - "learning_rate": 7.683328872975815e-07, - "loss": 0.8146, - "step": 30990 - }, - { - "epoch": 0.8782056731559409, - "grad_norm": 0.0, - "learning_rate": 7.679801281488553e-07, - "loss": 0.7772, - "step": 30991 - }, - { - "epoch": 0.8782340105982034, - "grad_norm": 0.0, - "learning_rate": 7.676274467648626e-07, - "loss": 0.8304, - "step": 30992 - }, - { - "epoch": 0.8782623480404659, - "grad_norm": 0.0, - "learning_rate": 7.6727484314857e-07, - "loss": 0.802, - "step": 30993 - }, - { - "epoch": 0.8782906854827284, - "grad_norm": 0.0, - "learning_rate": 7.669223173029505e-07, - "loss": 0.9297, - "step": 30994 - }, - { - "epoch": 0.8783190229249908, - "grad_norm": 0.0, - "learning_rate": 7.665698692309742e-07, - "loss": 0.7788, - "step": 30995 - }, - { - "epoch": 0.8783473603672532, - "grad_norm": 0.0, - "learning_rate": 7.662174989356075e-07, - "loss": 0.8208, - "step": 30996 - }, - { - "epoch": 0.8783756978095157, - "grad_norm": 0.0, - "learning_rate": 7.658652064198191e-07, - "loss": 0.7616, - "step": 30997 - }, - { - "epoch": 0.8784040352517781, - "grad_norm": 0.0, - "learning_rate": 7.655129916865778e-07, - "loss": 0.826, - "step": 30998 - }, - { - "epoch": 0.8784323726940406, - "grad_norm": 0.0, - "learning_rate": 7.651608547388489e-07, - "loss": 0.9096, - "step": 30999 - }, - { - "epoch": 0.8784607101363031, - "grad_norm": 0.0, - "learning_rate": 7.648087955796014e-07, - "loss": 0.8034, - "step": 31000 - }, - { - "epoch": 0.8784890475785656, - "grad_norm": 0.0, - "learning_rate": 7.64456814211797e-07, - "loss": 0.8561, - "step": 31001 - }, - { - "epoch": 0.878517385020828, - "grad_norm": 0.0, - "learning_rate": 7.641049106384024e-07, - "loss": 0.7369, - "step": 31002 - }, - { - "epoch": 0.8785457224630905, - "grad_norm": 0.0, - "learning_rate": 7.637530848623842e-07, - "loss": 0.7457, - "step": 31003 - }, - { - "epoch": 0.878574059905353, - "grad_norm": 0.0, - "learning_rate": 7.63401336886701e-07, - "loss": 0.7704, - "step": 31004 - }, - { - "epoch": 0.8786023973476154, - "grad_norm": 0.0, - "learning_rate": 7.630496667143195e-07, - "loss": 0.8679, - "step": 31005 - }, - { - "epoch": 0.8786307347898779, - "grad_norm": 0.0, - "learning_rate": 7.626980743482004e-07, - "loss": 0.7412, - "step": 31006 - }, - { - "epoch": 0.8786590722321403, - "grad_norm": 0.0, - "learning_rate": 7.623465597913049e-07, - "loss": 0.8965, - "step": 31007 - }, - { - "epoch": 0.8786874096744028, - "grad_norm": 0.0, - "learning_rate": 7.619951230465961e-07, - "loss": 0.8325, - "step": 31008 - }, - { - "epoch": 0.8787157471166652, - "grad_norm": 0.0, - "learning_rate": 7.616437641170316e-07, - "loss": 0.7794, - "step": 31009 - }, - { - "epoch": 0.8787440845589277, - "grad_norm": 0.0, - "learning_rate": 7.612924830055724e-07, - "loss": 0.7895, - "step": 31010 - }, - { - "epoch": 0.8787724220011902, - "grad_norm": 0.0, - "learning_rate": 7.609412797151771e-07, - "loss": 0.7914, - "step": 31011 - }, - { - "epoch": 0.8788007594434526, - "grad_norm": 0.0, - "learning_rate": 7.605901542488037e-07, - "loss": 0.7873, - "step": 31012 - }, - { - "epoch": 0.8788290968857151, - "grad_norm": 0.0, - "learning_rate": 7.602391066094128e-07, - "loss": 0.793, - "step": 31013 - }, - { - "epoch": 0.8788574343279776, - "grad_norm": 0.0, - "learning_rate": 7.598881367999566e-07, - "loss": 0.8063, - "step": 31014 - }, - { - "epoch": 0.87888577177024, - "grad_norm": 0.0, - "learning_rate": 7.595372448233951e-07, - "loss": 0.7688, - "step": 31015 - }, - { - "epoch": 0.8789141092125025, - "grad_norm": 0.0, - "learning_rate": 7.591864306826835e-07, - "loss": 0.7817, - "step": 31016 - }, - { - "epoch": 0.878942446654765, - "grad_norm": 0.0, - "learning_rate": 7.58835694380774e-07, - "loss": 0.7736, - "step": 31017 - }, - { - "epoch": 0.8789707840970274, - "grad_norm": 0.0, - "learning_rate": 7.584850359206242e-07, - "loss": 0.7618, - "step": 31018 - }, - { - "epoch": 0.8789991215392898, - "grad_norm": 0.0, - "learning_rate": 7.581344553051873e-07, - "loss": 0.7847, - "step": 31019 - }, - { - "epoch": 0.8790274589815523, - "grad_norm": 0.0, - "learning_rate": 7.577839525374142e-07, - "loss": 0.6697, - "step": 31020 - }, - { - "epoch": 0.8790557964238148, - "grad_norm": 0.0, - "learning_rate": 7.574335276202616e-07, - "loss": 0.7665, - "step": 31021 - }, - { - "epoch": 0.8790841338660772, - "grad_norm": 0.0, - "learning_rate": 7.570831805566759e-07, - "loss": 0.8285, - "step": 31022 - }, - { - "epoch": 0.8791124713083397, - "grad_norm": 0.0, - "learning_rate": 7.567329113496113e-07, - "loss": 0.8422, - "step": 31023 - }, - { - "epoch": 0.8791408087506022, - "grad_norm": 0.0, - "learning_rate": 7.563827200020179e-07, - "loss": 0.8476, - "step": 31024 - }, - { - "epoch": 0.8791691461928647, - "grad_norm": 0.0, - "learning_rate": 7.560326065168444e-07, - "loss": 0.9246, - "step": 31025 - }, - { - "epoch": 0.8791974836351271, - "grad_norm": 0.0, - "learning_rate": 7.556825708970417e-07, - "loss": 0.875, - "step": 31026 - }, - { - "epoch": 0.8792258210773896, - "grad_norm": 0.0, - "learning_rate": 7.553326131455596e-07, - "loss": 0.8228, - "step": 31027 - }, - { - "epoch": 0.879254158519652, - "grad_norm": 0.0, - "learning_rate": 7.549827332653415e-07, - "loss": 0.7733, - "step": 31028 - }, - { - "epoch": 0.8792824959619144, - "grad_norm": 0.0, - "learning_rate": 7.546329312593382e-07, - "loss": 0.7799, - "step": 31029 - }, - { - "epoch": 0.8793108334041769, - "grad_norm": 0.0, - "learning_rate": 7.54283207130494e-07, - "loss": 0.9121, - "step": 31030 - }, - { - "epoch": 0.8793391708464394, - "grad_norm": 0.0, - "learning_rate": 7.539335608817556e-07, - "loss": 0.7378, - "step": 31031 - }, - { - "epoch": 0.8793675082887019, - "grad_norm": 0.0, - "learning_rate": 7.535839925160693e-07, - "loss": 0.8227, - "step": 31032 - }, - { - "epoch": 0.8793958457309643, - "grad_norm": 0.0, - "learning_rate": 7.532345020363774e-07, - "loss": 0.8085, - "step": 31033 - }, - { - "epoch": 0.8794241831732268, - "grad_norm": 0.0, - "learning_rate": 7.528850894456275e-07, - "loss": 0.9068, - "step": 31034 - }, - { - "epoch": 0.8794525206154893, - "grad_norm": 0.0, - "learning_rate": 7.525357547467572e-07, - "loss": 0.7574, - "step": 31035 - }, - { - "epoch": 0.8794808580577517, - "grad_norm": 0.0, - "learning_rate": 7.52186497942713e-07, - "loss": 0.8854, - "step": 31036 - }, - { - "epoch": 0.8795091955000142, - "grad_norm": 0.0, - "learning_rate": 7.518373190364359e-07, - "loss": 0.7841, - "step": 31037 - }, - { - "epoch": 0.8795375329422767, - "grad_norm": 0.0, - "learning_rate": 7.514882180308669e-07, - "loss": 0.8412, - "step": 31038 - }, - { - "epoch": 0.879565870384539, - "grad_norm": 0.0, - "learning_rate": 7.51139194928947e-07, - "loss": 0.8128, - "step": 31039 - }, - { - "epoch": 0.8795942078268015, - "grad_norm": 0.0, - "learning_rate": 7.507902497336184e-07, - "loss": 0.7966, - "step": 31040 - }, - { - "epoch": 0.879622545269064, - "grad_norm": 0.0, - "learning_rate": 7.504413824478163e-07, - "loss": 0.7214, - "step": 31041 - }, - { - "epoch": 0.8796508827113265, - "grad_norm": 0.0, - "learning_rate": 7.500925930744829e-07, - "loss": 0.8701, - "step": 31042 - }, - { - "epoch": 0.8796792201535889, - "grad_norm": 0.0, - "learning_rate": 7.497438816165526e-07, - "loss": 0.7859, - "step": 31043 - }, - { - "epoch": 0.8797075575958514, - "grad_norm": 0.0, - "learning_rate": 7.49395248076964e-07, - "loss": 0.7924, - "step": 31044 - }, - { - "epoch": 0.8797358950381139, - "grad_norm": 0.0, - "learning_rate": 7.49046692458657e-07, - "loss": 0.901, - "step": 31045 - }, - { - "epoch": 0.8797642324803763, - "grad_norm": 0.0, - "learning_rate": 7.486982147645628e-07, - "loss": 0.858, - "step": 31046 - }, - { - "epoch": 0.8797925699226388, - "grad_norm": 0.0, - "learning_rate": 7.483498149976187e-07, - "loss": 0.8436, - "step": 31047 - }, - { - "epoch": 0.8798209073649013, - "grad_norm": 0.0, - "learning_rate": 7.480014931607616e-07, - "loss": 0.774, - "step": 31048 - }, - { - "epoch": 0.8798492448071638, - "grad_norm": 0.0, - "learning_rate": 7.476532492569222e-07, - "loss": 0.8154, - "step": 31049 - }, - { - "epoch": 0.8798775822494261, - "grad_norm": 0.0, - "learning_rate": 7.47305083289035e-07, - "loss": 0.7838, - "step": 31050 - }, - { - "epoch": 0.8799059196916886, - "grad_norm": 0.0, - "learning_rate": 7.46956995260033e-07, - "loss": 0.8373, - "step": 31051 - }, - { - "epoch": 0.8799342571339511, - "grad_norm": 0.0, - "learning_rate": 7.466089851728475e-07, - "loss": 0.8372, - "step": 31052 - }, - { - "epoch": 0.8799625945762135, - "grad_norm": 0.0, - "learning_rate": 7.462610530304137e-07, - "loss": 0.7567, - "step": 31053 - }, - { - "epoch": 0.879990932018476, - "grad_norm": 0.0, - "learning_rate": 7.459131988356571e-07, - "loss": 0.7736, - "step": 31054 - }, - { - "epoch": 0.8800192694607385, - "grad_norm": 0.0, - "learning_rate": 7.455654225915098e-07, - "loss": 0.7355, - "step": 31055 - }, - { - "epoch": 0.880047606903001, - "grad_norm": 0.0, - "learning_rate": 7.452177243009028e-07, - "loss": 0.7963, - "step": 31056 - }, - { - "epoch": 0.8800759443452634, - "grad_norm": 0.0, - "learning_rate": 7.448701039667628e-07, - "loss": 0.7775, - "step": 31057 - }, - { - "epoch": 0.8801042817875259, - "grad_norm": 0.0, - "learning_rate": 7.445225615920204e-07, - "loss": 0.7447, - "step": 31058 - }, - { - "epoch": 0.8801326192297884, - "grad_norm": 0.0, - "learning_rate": 7.441750971795991e-07, - "loss": 0.7506, - "step": 31059 - }, - { - "epoch": 0.8801609566720507, - "grad_norm": 0.0, - "learning_rate": 7.438277107324288e-07, - "loss": 0.7639, - "step": 31060 - }, - { - "epoch": 0.8801892941143132, - "grad_norm": 0.0, - "learning_rate": 7.434804022534359e-07, - "loss": 0.7352, - "step": 31061 - }, - { - "epoch": 0.8802176315565757, - "grad_norm": 0.0, - "learning_rate": 7.431331717455426e-07, - "loss": 0.8665, - "step": 31062 - }, - { - "epoch": 0.8802459689988381, - "grad_norm": 0.0, - "learning_rate": 7.427860192116776e-07, - "loss": 0.8934, - "step": 31063 - }, - { - "epoch": 0.8802743064411006, - "grad_norm": 0.0, - "learning_rate": 7.42438944654762e-07, - "loss": 0.9115, - "step": 31064 - }, - { - "epoch": 0.8803026438833631, - "grad_norm": 0.0, - "learning_rate": 7.420919480777222e-07, - "loss": 0.8441, - "step": 31065 - }, - { - "epoch": 0.8803309813256256, - "grad_norm": 0.0, - "learning_rate": 7.417450294834805e-07, - "loss": 0.7938, - "step": 31066 - }, - { - "epoch": 0.880359318767888, - "grad_norm": 0.0, - "learning_rate": 7.413981888749577e-07, - "loss": 0.7366, - "step": 31067 - }, - { - "epoch": 0.8803876562101505, - "grad_norm": 0.0, - "learning_rate": 7.410514262550749e-07, - "loss": 0.7457, - "step": 31068 - }, - { - "epoch": 0.880415993652413, - "grad_norm": 0.0, - "learning_rate": 7.407047416267565e-07, - "loss": 0.8226, - "step": 31069 - }, - { - "epoch": 0.8804443310946753, - "grad_norm": 0.0, - "learning_rate": 7.40358134992919e-07, - "loss": 0.848, - "step": 31070 - }, - { - "epoch": 0.8804726685369378, - "grad_norm": 0.0, - "learning_rate": 7.400116063564844e-07, - "loss": 0.8233, - "step": 31071 - }, - { - "epoch": 0.8805010059792003, - "grad_norm": 0.0, - "learning_rate": 7.396651557203693e-07, - "loss": 0.778, - "step": 31072 - }, - { - "epoch": 0.8805293434214628, - "grad_norm": 0.0, - "learning_rate": 7.393187830874938e-07, - "loss": 0.8549, - "step": 31073 - }, - { - "epoch": 0.8805576808637252, - "grad_norm": 0.0, - "learning_rate": 7.389724884607763e-07, - "loss": 0.7875, - "step": 31074 - }, - { - "epoch": 0.8805860183059877, - "grad_norm": 0.0, - "learning_rate": 7.386262718431303e-07, - "loss": 0.7172, - "step": 31075 - }, - { - "epoch": 0.8806143557482502, - "grad_norm": 0.0, - "learning_rate": 7.382801332374745e-07, - "loss": 0.7436, - "step": 31076 - }, - { - "epoch": 0.8806426931905126, - "grad_norm": 0.0, - "learning_rate": 7.379340726467254e-07, - "loss": 0.7433, - "step": 31077 - }, - { - "epoch": 0.8806710306327751, - "grad_norm": 0.0, - "learning_rate": 7.375880900737964e-07, - "loss": 0.8369, - "step": 31078 - }, - { - "epoch": 0.8806993680750376, - "grad_norm": 0.0, - "learning_rate": 7.372421855216039e-07, - "loss": 0.7453, - "step": 31079 - }, - { - "epoch": 0.8807277055173001, - "grad_norm": 0.0, - "learning_rate": 7.368963589930589e-07, - "loss": 0.7084, - "step": 31080 - }, - { - "epoch": 0.8807560429595624, - "grad_norm": 0.0, - "learning_rate": 7.365506104910747e-07, - "loss": 0.8287, - "step": 31081 - }, - { - "epoch": 0.8807843804018249, - "grad_norm": 0.0, - "learning_rate": 7.362049400185667e-07, - "loss": 0.7672, - "step": 31082 - }, - { - "epoch": 0.8808127178440874, - "grad_norm": 0.0, - "learning_rate": 7.358593475784437e-07, - "loss": 0.8154, - "step": 31083 - }, - { - "epoch": 0.8808410552863498, - "grad_norm": 0.0, - "learning_rate": 7.355138331736166e-07, - "loss": 0.8024, - "step": 31084 - }, - { - "epoch": 0.8808693927286123, - "grad_norm": 0.0, - "learning_rate": 7.351683968069989e-07, - "loss": 0.8004, - "step": 31085 - }, - { - "epoch": 0.8808977301708748, - "grad_norm": 0.0, - "learning_rate": 7.348230384814958e-07, - "loss": 0.9252, - "step": 31086 - }, - { - "epoch": 0.8809260676131372, - "grad_norm": 0.0, - "learning_rate": 7.344777582000218e-07, - "loss": 0.7631, - "step": 31087 - }, - { - "epoch": 0.8809544050553997, - "grad_norm": 0.0, - "learning_rate": 7.341325559654799e-07, - "loss": 0.8244, - "step": 31088 - }, - { - "epoch": 0.8809827424976622, - "grad_norm": 0.0, - "learning_rate": 7.337874317807803e-07, - "loss": 0.7612, - "step": 31089 - }, - { - "epoch": 0.8810110799399247, - "grad_norm": 0.0, - "learning_rate": 7.334423856488293e-07, - "loss": 0.8492, - "step": 31090 - }, - { - "epoch": 0.881039417382187, - "grad_norm": 0.0, - "learning_rate": 7.330974175725347e-07, - "loss": 0.7167, - "step": 31091 - }, - { - "epoch": 0.8810677548244495, - "grad_norm": 0.0, - "learning_rate": 7.327525275548042e-07, - "loss": 0.8153, - "step": 31092 - }, - { - "epoch": 0.881096092266712, - "grad_norm": 0.0, - "learning_rate": 7.324077155985376e-07, - "loss": 0.8581, - "step": 31093 - }, - { - "epoch": 0.8811244297089744, - "grad_norm": 0.0, - "learning_rate": 7.320629817066427e-07, - "loss": 0.783, - "step": 31094 - }, - { - "epoch": 0.8811527671512369, - "grad_norm": 0.0, - "learning_rate": 7.317183258820248e-07, - "loss": 0.741, - "step": 31095 - }, - { - "epoch": 0.8811811045934994, - "grad_norm": 0.0, - "learning_rate": 7.31373748127584e-07, - "loss": 0.7597, - "step": 31096 - }, - { - "epoch": 0.8812094420357619, - "grad_norm": 0.0, - "learning_rate": 7.310292484462234e-07, - "loss": 0.8424, - "step": 31097 - }, - { - "epoch": 0.8812377794780243, - "grad_norm": 0.0, - "learning_rate": 7.306848268408462e-07, - "loss": 0.778, - "step": 31098 - }, - { - "epoch": 0.8812661169202868, - "grad_norm": 0.0, - "learning_rate": 7.303404833143524e-07, - "loss": 0.6703, - "step": 31099 - }, - { - "epoch": 0.8812944543625493, - "grad_norm": 0.0, - "learning_rate": 7.29996217869644e-07, - "loss": 0.8397, - "step": 31100 - }, - { - "epoch": 0.8813227918048117, - "grad_norm": 0.0, - "learning_rate": 7.296520305096177e-07, - "loss": 0.9629, - "step": 31101 - }, - { - "epoch": 0.8813511292470741, - "grad_norm": 0.0, - "learning_rate": 7.293079212371757e-07, - "loss": 0.8726, - "step": 31102 - }, - { - "epoch": 0.8813794666893366, - "grad_norm": 0.0, - "learning_rate": 7.289638900552154e-07, - "loss": 0.792, - "step": 31103 - }, - { - "epoch": 0.881407804131599, - "grad_norm": 0.0, - "learning_rate": 7.286199369666346e-07, - "loss": 0.8351, - "step": 31104 - }, - { - "epoch": 0.8814361415738615, - "grad_norm": 0.0, - "learning_rate": 7.282760619743312e-07, - "loss": 0.7571, - "step": 31105 - }, - { - "epoch": 0.881464479016124, - "grad_norm": 0.0, - "learning_rate": 7.279322650812026e-07, - "loss": 0.8173, - "step": 31106 - }, - { - "epoch": 0.8814928164583865, - "grad_norm": 0.0, - "learning_rate": 7.275885462901422e-07, - "loss": 0.7646, - "step": 31107 - }, - { - "epoch": 0.8815211539006489, - "grad_norm": 0.0, - "learning_rate": 7.272449056040487e-07, - "loss": 0.8323, - "step": 31108 - }, - { - "epoch": 0.8815494913429114, - "grad_norm": 0.0, - "learning_rate": 7.269013430258132e-07, - "loss": 0.8248, - "step": 31109 - }, - { - "epoch": 0.8815778287851739, - "grad_norm": 0.0, - "learning_rate": 7.265578585583311e-07, - "loss": 0.7475, - "step": 31110 - }, - { - "epoch": 0.8816061662274363, - "grad_norm": 0.0, - "learning_rate": 7.262144522044967e-07, - "loss": 0.8196, - "step": 31111 - }, - { - "epoch": 0.8816345036696988, - "grad_norm": 0.0, - "learning_rate": 7.258711239672012e-07, - "loss": 0.9018, - "step": 31112 - }, - { - "epoch": 0.8816628411119612, - "grad_norm": 0.0, - "learning_rate": 7.255278738493355e-07, - "loss": 0.8672, - "step": 31113 - }, - { - "epoch": 0.8816911785542237, - "grad_norm": 0.0, - "learning_rate": 7.251847018537961e-07, - "loss": 0.8556, - "step": 31114 - }, - { - "epoch": 0.8817195159964861, - "grad_norm": 0.0, - "learning_rate": 7.248416079834663e-07, - "loss": 0.7871, - "step": 31115 - }, - { - "epoch": 0.8817478534387486, - "grad_norm": 0.0, - "learning_rate": 7.244985922412417e-07, - "loss": 0.8502, - "step": 31116 - }, - { - "epoch": 0.8817761908810111, - "grad_norm": 0.0, - "learning_rate": 7.241556546300088e-07, - "loss": 0.7179, - "step": 31117 - }, - { - "epoch": 0.8818045283232735, - "grad_norm": 0.0, - "learning_rate": 7.238127951526575e-07, - "loss": 0.7252, - "step": 31118 - }, - { - "epoch": 0.881832865765536, - "grad_norm": 0.0, - "learning_rate": 7.234700138120776e-07, - "loss": 0.7939, - "step": 31119 - }, - { - "epoch": 0.8818612032077985, - "grad_norm": 0.0, - "learning_rate": 7.231273106111536e-07, - "loss": 0.8053, - "step": 31120 - }, - { - "epoch": 0.881889540650061, - "grad_norm": 0.0, - "learning_rate": 7.227846855527732e-07, - "loss": 0.8411, - "step": 31121 - }, - { - "epoch": 0.8819178780923234, - "grad_norm": 0.0, - "learning_rate": 7.224421386398217e-07, - "loss": 0.7272, - "step": 31122 - }, - { - "epoch": 0.8819462155345859, - "grad_norm": 0.0, - "learning_rate": 7.220996698751847e-07, - "loss": 0.8148, - "step": 31123 - }, - { - "epoch": 0.8819745529768483, - "grad_norm": 0.0, - "learning_rate": 7.2175727926175e-07, - "loss": 0.8607, - "step": 31124 - }, - { - "epoch": 0.8820028904191107, - "grad_norm": 0.0, - "learning_rate": 7.214149668023962e-07, - "loss": 0.798, - "step": 31125 - }, - { - "epoch": 0.8820312278613732, - "grad_norm": 0.0, - "learning_rate": 7.210727325000111e-07, - "loss": 0.6664, - "step": 31126 - }, - { - "epoch": 0.8820595653036357, - "grad_norm": 0.0, - "learning_rate": 7.207305763574746e-07, - "loss": 0.7999, - "step": 31127 - }, - { - "epoch": 0.8820879027458981, - "grad_norm": 0.0, - "learning_rate": 7.20388498377671e-07, - "loss": 0.8188, - "step": 31128 - }, - { - "epoch": 0.8821162401881606, - "grad_norm": 0.0, - "learning_rate": 7.200464985634825e-07, - "loss": 0.7401, - "step": 31129 - }, - { - "epoch": 0.8821445776304231, - "grad_norm": 0.0, - "learning_rate": 7.197045769177868e-07, - "loss": 0.7831, - "step": 31130 - }, - { - "epoch": 0.8821729150726856, - "grad_norm": 0.0, - "learning_rate": 7.193627334434661e-07, - "loss": 0.7219, - "step": 31131 - }, - { - "epoch": 0.882201252514948, - "grad_norm": 0.0, - "learning_rate": 7.190209681434002e-07, - "loss": 0.8662, - "step": 31132 - }, - { - "epoch": 0.8822295899572105, - "grad_norm": 0.0, - "learning_rate": 7.186792810204657e-07, - "loss": 0.7695, - "step": 31133 - }, - { - "epoch": 0.882257927399473, - "grad_norm": 0.0, - "learning_rate": 7.183376720775415e-07, - "loss": 0.8616, - "step": 31134 - }, - { - "epoch": 0.8822862648417353, - "grad_norm": 0.0, - "learning_rate": 7.179961413175085e-07, - "loss": 0.6879, - "step": 31135 - }, - { - "epoch": 0.8823146022839978, - "grad_norm": 0.0, - "learning_rate": 7.17654688743239e-07, - "loss": 0.7307, - "step": 31136 - }, - { - "epoch": 0.8823429397262603, - "grad_norm": 0.0, - "learning_rate": 7.173133143576116e-07, - "loss": 0.7684, - "step": 31137 - }, - { - "epoch": 0.8823712771685228, - "grad_norm": 0.0, - "learning_rate": 7.16972018163501e-07, - "loss": 0.7639, - "step": 31138 - }, - { - "epoch": 0.8823996146107852, - "grad_norm": 0.0, - "learning_rate": 7.166308001637812e-07, - "loss": 0.8204, - "step": 31139 - }, - { - "epoch": 0.8824279520530477, - "grad_norm": 0.0, - "learning_rate": 7.162896603613278e-07, - "loss": 0.7898, - "step": 31140 - }, - { - "epoch": 0.8824562894953102, - "grad_norm": 0.0, - "learning_rate": 7.159485987590143e-07, - "loss": 0.7417, - "step": 31141 - }, - { - "epoch": 0.8824846269375726, - "grad_norm": 0.0, - "learning_rate": 7.156076153597124e-07, - "loss": 0.7285, - "step": 31142 - }, - { - "epoch": 0.8825129643798351, - "grad_norm": 0.0, - "learning_rate": 7.15266710166298e-07, - "loss": 0.8655, - "step": 31143 - }, - { - "epoch": 0.8825413018220976, - "grad_norm": 0.0, - "learning_rate": 7.149258831816374e-07, - "loss": 0.8743, - "step": 31144 - }, - { - "epoch": 0.88256963926436, - "grad_norm": 0.0, - "learning_rate": 7.145851344086052e-07, - "loss": 0.8323, - "step": 31145 - }, - { - "epoch": 0.8825979767066224, - "grad_norm": 0.0, - "learning_rate": 7.142444638500701e-07, - "loss": 0.7967, - "step": 31146 - }, - { - "epoch": 0.8826263141488849, - "grad_norm": 0.0, - "learning_rate": 7.13903871508902e-07, - "loss": 0.8541, - "step": 31147 - }, - { - "epoch": 0.8826546515911474, - "grad_norm": 0.0, - "learning_rate": 7.135633573879707e-07, - "loss": 0.8566, - "step": 31148 - }, - { - "epoch": 0.8826829890334098, - "grad_norm": 0.0, - "learning_rate": 7.13222921490142e-07, - "loss": 0.8851, - "step": 31149 - }, - { - "epoch": 0.8827113264756723, - "grad_norm": 0.0, - "learning_rate": 7.128825638182879e-07, - "loss": 0.8934, - "step": 31150 - }, - { - "epoch": 0.8827396639179348, - "grad_norm": 0.0, - "learning_rate": 7.125422843752706e-07, - "loss": 0.757, - "step": 31151 - }, - { - "epoch": 0.8827680013601972, - "grad_norm": 0.0, - "learning_rate": 7.122020831639576e-07, - "loss": 0.7648, - "step": 31152 - }, - { - "epoch": 0.8827963388024597, - "grad_norm": 0.0, - "learning_rate": 7.118619601872157e-07, - "loss": 0.8382, - "step": 31153 - }, - { - "epoch": 0.8828246762447222, - "grad_norm": 0.0, - "learning_rate": 7.115219154479102e-07, - "loss": 0.7866, - "step": 31154 - }, - { - "epoch": 0.8828530136869847, - "grad_norm": 0.0, - "learning_rate": 7.111819489489047e-07, - "loss": 0.7342, - "step": 31155 - }, - { - "epoch": 0.882881351129247, - "grad_norm": 0.0, - "learning_rate": 7.108420606930644e-07, - "loss": 0.9412, - "step": 31156 - }, - { - "epoch": 0.8829096885715095, - "grad_norm": 0.0, - "learning_rate": 7.105022506832493e-07, - "loss": 0.7797, - "step": 31157 - }, - { - "epoch": 0.882938026013772, - "grad_norm": 0.0, - "learning_rate": 7.101625189223249e-07, - "loss": 0.7433, - "step": 31158 - }, - { - "epoch": 0.8829663634560344, - "grad_norm": 0.0, - "learning_rate": 7.098228654131489e-07, - "loss": 0.901, - "step": 31159 - }, - { - "epoch": 0.8829947008982969, - "grad_norm": 0.0, - "learning_rate": 7.094832901585857e-07, - "loss": 0.8453, - "step": 31160 - }, - { - "epoch": 0.8830230383405594, - "grad_norm": 0.0, - "learning_rate": 7.091437931614964e-07, - "loss": 0.8462, - "step": 31161 - }, - { - "epoch": 0.8830513757828219, - "grad_norm": 0.0, - "learning_rate": 7.088043744247375e-07, - "loss": 0.8684, - "step": 31162 - }, - { - "epoch": 0.8830797132250843, - "grad_norm": 0.0, - "learning_rate": 7.08465033951169e-07, - "loss": 0.7629, - "step": 31163 - }, - { - "epoch": 0.8831080506673468, - "grad_norm": 0.0, - "learning_rate": 7.081257717436507e-07, - "loss": 0.79, - "step": 31164 - }, - { - "epoch": 0.8831363881096093, - "grad_norm": 0.0, - "learning_rate": 7.077865878050394e-07, - "loss": 0.7833, - "step": 31165 - }, - { - "epoch": 0.8831647255518716, - "grad_norm": 0.0, - "learning_rate": 7.074474821381916e-07, - "loss": 0.7978, - "step": 31166 - }, - { - "epoch": 0.8831930629941341, - "grad_norm": 0.0, - "learning_rate": 7.071084547459639e-07, - "loss": 0.7291, - "step": 31167 - }, - { - "epoch": 0.8832214004363966, - "grad_norm": 0.0, - "learning_rate": 7.06769505631213e-07, - "loss": 0.7599, - "step": 31168 - }, - { - "epoch": 0.8832497378786591, - "grad_norm": 0.0, - "learning_rate": 7.064306347967953e-07, - "loss": 0.8256, - "step": 31169 - }, - { - "epoch": 0.8832780753209215, - "grad_norm": 0.0, - "learning_rate": 7.06091842245562e-07, - "loss": 0.7128, - "step": 31170 - }, - { - "epoch": 0.883306412763184, - "grad_norm": 0.0, - "learning_rate": 7.057531279803676e-07, - "loss": 0.8423, - "step": 31171 - }, - { - "epoch": 0.8833347502054465, - "grad_norm": 0.0, - "learning_rate": 7.054144920040684e-07, - "loss": 0.7952, - "step": 31172 - }, - { - "epoch": 0.8833630876477089, - "grad_norm": 0.0, - "learning_rate": 7.050759343195113e-07, - "loss": 0.8718, - "step": 31173 - }, - { - "epoch": 0.8833914250899714, - "grad_norm": 0.0, - "learning_rate": 7.047374549295538e-07, - "loss": 0.8162, - "step": 31174 - }, - { - "epoch": 0.8834197625322339, - "grad_norm": 0.0, - "learning_rate": 7.043990538370437e-07, - "loss": 0.8221, - "step": 31175 - }, - { - "epoch": 0.8834480999744962, - "grad_norm": 0.0, - "learning_rate": 7.04060731044831e-07, - "loss": 0.8254, - "step": 31176 - }, - { - "epoch": 0.8834764374167587, - "grad_norm": 0.0, - "learning_rate": 7.03722486555769e-07, - "loss": 0.8622, - "step": 31177 - }, - { - "epoch": 0.8835047748590212, - "grad_norm": 0.0, - "learning_rate": 7.03384320372702e-07, - "loss": 0.8167, - "step": 31178 - }, - { - "epoch": 0.8835331123012837, - "grad_norm": 0.0, - "learning_rate": 7.030462324984821e-07, - "loss": 0.8173, - "step": 31179 - }, - { - "epoch": 0.8835614497435461, - "grad_norm": 0.0, - "learning_rate": 7.02708222935955e-07, - "loss": 0.7873, - "step": 31180 - }, - { - "epoch": 0.8835897871858086, - "grad_norm": 0.0, - "learning_rate": 7.023702916879705e-07, - "loss": 0.811, - "step": 31181 - }, - { - "epoch": 0.8836181246280711, - "grad_norm": 0.0, - "learning_rate": 7.02032438757374e-07, - "loss": 0.8099, - "step": 31182 - }, - { - "epoch": 0.8836464620703335, - "grad_norm": 0.0, - "learning_rate": 7.016946641470102e-07, - "loss": 0.8302, - "step": 31183 - }, - { - "epoch": 0.883674799512596, - "grad_norm": 0.0, - "learning_rate": 7.013569678597243e-07, - "loss": 0.764, - "step": 31184 - }, - { - "epoch": 0.8837031369548585, - "grad_norm": 0.0, - "learning_rate": 7.010193498983641e-07, - "loss": 0.8571, - "step": 31185 - }, - { - "epoch": 0.883731474397121, - "grad_norm": 0.0, - "learning_rate": 7.006818102657687e-07, - "loss": 0.7209, - "step": 31186 - }, - { - "epoch": 0.8837598118393833, - "grad_norm": 0.0, - "learning_rate": 7.003443489647854e-07, - "loss": 0.7199, - "step": 31187 - }, - { - "epoch": 0.8837881492816458, - "grad_norm": 0.0, - "learning_rate": 7.000069659982534e-07, - "loss": 0.8073, - "step": 31188 - }, - { - "epoch": 0.8838164867239083, - "grad_norm": 0.0, - "learning_rate": 6.996696613690157e-07, - "loss": 0.7671, - "step": 31189 - }, - { - "epoch": 0.8838448241661707, - "grad_norm": 0.0, - "learning_rate": 6.993324350799169e-07, - "loss": 0.7398, - "step": 31190 - }, - { - "epoch": 0.8838731616084332, - "grad_norm": 0.0, - "learning_rate": 6.989952871337924e-07, - "loss": 0.8388, - "step": 31191 - }, - { - "epoch": 0.8839014990506957, - "grad_norm": 0.0, - "learning_rate": 6.986582175334844e-07, - "loss": 0.8561, - "step": 31192 - }, - { - "epoch": 0.8839298364929582, - "grad_norm": 0.0, - "learning_rate": 6.983212262818318e-07, - "loss": 0.8139, - "step": 31193 - }, - { - "epoch": 0.8839581739352206, - "grad_norm": 0.0, - "learning_rate": 6.979843133816744e-07, - "loss": 0.8556, - "step": 31194 - }, - { - "epoch": 0.8839865113774831, - "grad_norm": 0.0, - "learning_rate": 6.976474788358501e-07, - "loss": 0.8547, - "step": 31195 - }, - { - "epoch": 0.8840148488197456, - "grad_norm": 0.0, - "learning_rate": 6.973107226471953e-07, - "loss": 0.7636, - "step": 31196 - }, - { - "epoch": 0.884043186262008, - "grad_norm": 0.0, - "learning_rate": 6.969740448185458e-07, - "loss": 0.7775, - "step": 31197 - }, - { - "epoch": 0.8840715237042704, - "grad_norm": 0.0, - "learning_rate": 6.966374453527392e-07, - "loss": 0.7942, - "step": 31198 - }, - { - "epoch": 0.8840998611465329, - "grad_norm": 0.0, - "learning_rate": 6.963009242526098e-07, - "loss": 0.8284, - "step": 31199 - }, - { - "epoch": 0.8841281985887953, - "grad_norm": 0.0, - "learning_rate": 6.959644815209921e-07, - "loss": 0.749, - "step": 31200 - }, - { - "epoch": 0.8841565360310578, - "grad_norm": 0.0, - "learning_rate": 6.956281171607227e-07, - "loss": 0.8471, - "step": 31201 - }, - { - "epoch": 0.8841848734733203, - "grad_norm": 0.0, - "learning_rate": 6.952918311746304e-07, - "loss": 0.7522, - "step": 31202 - }, - { - "epoch": 0.8842132109155828, - "grad_norm": 0.0, - "learning_rate": 6.949556235655519e-07, - "loss": 0.6954, - "step": 31203 - }, - { - "epoch": 0.8842415483578452, - "grad_norm": 0.0, - "learning_rate": 6.94619494336316e-07, - "loss": 0.758, - "step": 31204 - }, - { - "epoch": 0.8842698858001077, - "grad_norm": 0.0, - "learning_rate": 6.942834434897561e-07, - "loss": 0.7654, - "step": 31205 - }, - { - "epoch": 0.8842982232423702, - "grad_norm": 0.0, - "learning_rate": 6.93947471028702e-07, - "loss": 0.7483, - "step": 31206 - }, - { - "epoch": 0.8843265606846326, - "grad_norm": 0.0, - "learning_rate": 6.936115769559837e-07, - "loss": 0.7323, - "step": 31207 - }, - { - "epoch": 0.884354898126895, - "grad_norm": 0.0, - "learning_rate": 6.932757612744334e-07, - "loss": 0.8524, - "step": 31208 - }, - { - "epoch": 0.8843832355691575, - "grad_norm": 0.0, - "learning_rate": 6.929400239868745e-07, - "loss": 0.7645, - "step": 31209 - }, - { - "epoch": 0.88441157301142, - "grad_norm": 0.0, - "learning_rate": 6.92604365096139e-07, - "loss": 0.8622, - "step": 31210 - }, - { - "epoch": 0.8844399104536824, - "grad_norm": 0.0, - "learning_rate": 6.922687846050535e-07, - "loss": 0.7018, - "step": 31211 - }, - { - "epoch": 0.8844682478959449, - "grad_norm": 0.0, - "learning_rate": 6.919332825164437e-07, - "loss": 0.8028, - "step": 31212 - }, - { - "epoch": 0.8844965853382074, - "grad_norm": 0.0, - "learning_rate": 6.915978588331362e-07, - "loss": 0.7315, - "step": 31213 - }, - { - "epoch": 0.8845249227804698, - "grad_norm": 0.0, - "learning_rate": 6.912625135579587e-07, - "loss": 0.8201, - "step": 31214 - }, - { - "epoch": 0.8845532602227323, - "grad_norm": 0.0, - "learning_rate": 6.909272466937312e-07, - "loss": 0.7898, - "step": 31215 - }, - { - "epoch": 0.8845815976649948, - "grad_norm": 0.0, - "learning_rate": 6.905920582432824e-07, - "loss": 0.9851, - "step": 31216 - }, - { - "epoch": 0.8846099351072573, - "grad_norm": 0.0, - "learning_rate": 6.902569482094324e-07, - "loss": 0.7848, - "step": 31217 - }, - { - "epoch": 0.8846382725495197, - "grad_norm": 0.0, - "learning_rate": 6.899219165950044e-07, - "loss": 0.8764, - "step": 31218 - }, - { - "epoch": 0.8846666099917821, - "grad_norm": 0.0, - "learning_rate": 6.895869634028218e-07, - "loss": 0.755, - "step": 31219 - }, - { - "epoch": 0.8846949474340446, - "grad_norm": 0.0, - "learning_rate": 6.892520886357057e-07, - "loss": 0.8299, - "step": 31220 - }, - { - "epoch": 0.884723284876307, - "grad_norm": 0.0, - "learning_rate": 6.88917292296476e-07, - "loss": 0.7885, - "step": 31221 - }, - { - "epoch": 0.8847516223185695, - "grad_norm": 0.0, - "learning_rate": 6.88582574387956e-07, - "loss": 0.7861, - "step": 31222 - }, - { - "epoch": 0.884779959760832, - "grad_norm": 0.0, - "learning_rate": 6.8824793491296e-07, - "loss": 0.8526, - "step": 31223 - }, - { - "epoch": 0.8848082972030944, - "grad_norm": 0.0, - "learning_rate": 6.879133738743116e-07, - "loss": 0.6415, - "step": 31224 - }, - { - "epoch": 0.8848366346453569, - "grad_norm": 0.0, - "learning_rate": 6.875788912748261e-07, - "loss": 0.8395, - "step": 31225 - }, - { - "epoch": 0.8848649720876194, - "grad_norm": 0.0, - "learning_rate": 6.872444871173211e-07, - "loss": 0.8001, - "step": 31226 - }, - { - "epoch": 0.8848933095298819, - "grad_norm": 0.0, - "learning_rate": 6.869101614046148e-07, - "loss": 0.7997, - "step": 31227 - }, - { - "epoch": 0.8849216469721443, - "grad_norm": 0.0, - "learning_rate": 6.865759141395223e-07, - "loss": 0.8209, - "step": 31228 - }, - { - "epoch": 0.8849499844144068, - "grad_norm": 0.0, - "learning_rate": 6.862417453248593e-07, - "loss": 0.68, - "step": 31229 - }, - { - "epoch": 0.8849783218566692, - "grad_norm": 0.0, - "learning_rate": 6.859076549634403e-07, - "loss": 0.7719, - "step": 31230 - }, - { - "epoch": 0.8850066592989316, - "grad_norm": 0.0, - "learning_rate": 6.855736430580795e-07, - "loss": 0.7846, - "step": 31231 - }, - { - "epoch": 0.8850349967411941, - "grad_norm": 0.0, - "learning_rate": 6.852397096115904e-07, - "loss": 0.8098, - "step": 31232 - }, - { - "epoch": 0.8850633341834566, - "grad_norm": 0.0, - "learning_rate": 6.849058546267873e-07, - "loss": 0.7617, - "step": 31233 - }, - { - "epoch": 0.8850916716257191, - "grad_norm": 0.0, - "learning_rate": 6.845720781064802e-07, - "loss": 0.7565, - "step": 31234 - }, - { - "epoch": 0.8851200090679815, - "grad_norm": 0.0, - "learning_rate": 6.842383800534835e-07, - "loss": 0.715, - "step": 31235 - }, - { - "epoch": 0.885148346510244, - "grad_norm": 0.0, - "learning_rate": 6.839047604706051e-07, - "loss": 0.8041, - "step": 31236 - }, - { - "epoch": 0.8851766839525065, - "grad_norm": 0.0, - "learning_rate": 6.83571219360657e-07, - "loss": 0.7616, - "step": 31237 - }, - { - "epoch": 0.8852050213947689, - "grad_norm": 0.0, - "learning_rate": 6.832377567264469e-07, - "loss": 0.8206, - "step": 31238 - }, - { - "epoch": 0.8852333588370314, - "grad_norm": 0.0, - "learning_rate": 6.829043725707852e-07, - "loss": 0.8125, - "step": 31239 - }, - { - "epoch": 0.8852616962792939, - "grad_norm": 0.0, - "learning_rate": 6.825710668964814e-07, - "loss": 0.7883, - "step": 31240 - }, - { - "epoch": 0.8852900337215563, - "grad_norm": 0.0, - "learning_rate": 6.822378397063389e-07, - "loss": 0.84, - "step": 31241 - }, - { - "epoch": 0.8853183711638187, - "grad_norm": 0.0, - "learning_rate": 6.81904691003168e-07, - "loss": 0.8437, - "step": 31242 - }, - { - "epoch": 0.8853467086060812, - "grad_norm": 0.0, - "learning_rate": 6.81571620789776e-07, - "loss": 0.798, - "step": 31243 - }, - { - "epoch": 0.8853750460483437, - "grad_norm": 0.0, - "learning_rate": 6.812386290689643e-07, - "loss": 0.7617, - "step": 31244 - }, - { - "epoch": 0.8854033834906061, - "grad_norm": 0.0, - "learning_rate": 6.809057158435406e-07, - "loss": 0.8605, - "step": 31245 - }, - { - "epoch": 0.8854317209328686, - "grad_norm": 0.0, - "learning_rate": 6.805728811163082e-07, - "loss": 0.7546, - "step": 31246 - }, - { - "epoch": 0.8854600583751311, - "grad_norm": 0.0, - "learning_rate": 6.802401248900714e-07, - "loss": 0.7877, - "step": 31247 - }, - { - "epoch": 0.8854883958173935, - "grad_norm": 0.0, - "learning_rate": 6.799074471676337e-07, - "loss": 0.8016, - "step": 31248 - }, - { - "epoch": 0.885516733259656, - "grad_norm": 0.0, - "learning_rate": 6.79574847951796e-07, - "loss": 0.6401, - "step": 31249 - }, - { - "epoch": 0.8855450707019185, - "grad_norm": 0.0, - "learning_rate": 6.792423272453596e-07, - "loss": 0.8026, - "step": 31250 - }, - { - "epoch": 0.885573408144181, - "grad_norm": 0.0, - "learning_rate": 6.789098850511278e-07, - "loss": 0.8001, - "step": 31251 - }, - { - "epoch": 0.8856017455864433, - "grad_norm": 0.0, - "learning_rate": 6.785775213718981e-07, - "loss": 0.835, - "step": 31252 - }, - { - "epoch": 0.8856300830287058, - "grad_norm": 0.0, - "learning_rate": 6.78245236210473e-07, - "loss": 0.7094, - "step": 31253 - }, - { - "epoch": 0.8856584204709683, - "grad_norm": 0.0, - "learning_rate": 6.779130295696479e-07, - "loss": 0.793, - "step": 31254 - }, - { - "epoch": 0.8856867579132307, - "grad_norm": 0.0, - "learning_rate": 6.775809014522238e-07, - "loss": 0.7456, - "step": 31255 - }, - { - "epoch": 0.8857150953554932, - "grad_norm": 0.0, - "learning_rate": 6.772488518609987e-07, - "loss": 0.7778, - "step": 31256 - }, - { - "epoch": 0.8857434327977557, - "grad_norm": 0.0, - "learning_rate": 6.769168807987658e-07, - "loss": 0.7242, - "step": 31257 - }, - { - "epoch": 0.8857717702400182, - "grad_norm": 0.0, - "learning_rate": 6.765849882683251e-07, - "loss": 0.8327, - "step": 31258 - }, - { - "epoch": 0.8858001076822806, - "grad_norm": 0.0, - "learning_rate": 6.76253174272472e-07, - "loss": 0.7645, - "step": 31259 - }, - { - "epoch": 0.8858284451245431, - "grad_norm": 0.0, - "learning_rate": 6.75921438813999e-07, - "loss": 0.8408, - "step": 31260 - }, - { - "epoch": 0.8858567825668056, - "grad_norm": 0.0, - "learning_rate": 6.755897818957047e-07, - "loss": 0.6857, - "step": 31261 - }, - { - "epoch": 0.8858851200090679, - "grad_norm": 0.0, - "learning_rate": 6.752582035203792e-07, - "loss": 0.7151, - "step": 31262 - }, - { - "epoch": 0.8859134574513304, - "grad_norm": 0.0, - "learning_rate": 6.749267036908147e-07, - "loss": 0.7471, - "step": 31263 - }, - { - "epoch": 0.8859417948935929, - "grad_norm": 0.0, - "learning_rate": 6.745952824098089e-07, - "loss": 0.931, - "step": 31264 - }, - { - "epoch": 0.8859701323358554, - "grad_norm": 0.0, - "learning_rate": 6.742639396801476e-07, - "loss": 0.8108, - "step": 31265 - }, - { - "epoch": 0.8859984697781178, - "grad_norm": 0.0, - "learning_rate": 6.739326755046249e-07, - "loss": 0.8421, - "step": 31266 - }, - { - "epoch": 0.8860268072203803, - "grad_norm": 0.0, - "learning_rate": 6.736014898860299e-07, - "loss": 0.7425, - "step": 31267 - }, - { - "epoch": 0.8860551446626428, - "grad_norm": 0.0, - "learning_rate": 6.732703828271526e-07, - "loss": 0.889, - "step": 31268 - }, - { - "epoch": 0.8860834821049052, - "grad_norm": 0.0, - "learning_rate": 6.729393543307838e-07, - "loss": 0.7606, - "step": 31269 - }, - { - "epoch": 0.8861118195471677, - "grad_norm": 0.0, - "learning_rate": 6.726084043997083e-07, - "loss": 0.7573, - "step": 31270 - }, - { - "epoch": 0.8861401569894302, - "grad_norm": 0.0, - "learning_rate": 6.722775330367159e-07, - "loss": 0.7777, - "step": 31271 - }, - { - "epoch": 0.8861684944316925, - "grad_norm": 0.0, - "learning_rate": 6.719467402445945e-07, - "loss": 0.8498, - "step": 31272 - }, - { - "epoch": 0.886196831873955, - "grad_norm": 0.0, - "learning_rate": 6.716160260261284e-07, - "loss": 0.8094, - "step": 31273 - }, - { - "epoch": 0.8862251693162175, - "grad_norm": 0.0, - "learning_rate": 6.712853903841077e-07, - "loss": 0.8078, - "step": 31274 - }, - { - "epoch": 0.88625350675848, - "grad_norm": 0.0, - "learning_rate": 6.709548333213112e-07, - "loss": 0.7831, - "step": 31275 - }, - { - "epoch": 0.8862818442007424, - "grad_norm": 0.0, - "learning_rate": 6.706243548405267e-07, - "loss": 0.8651, - "step": 31276 - }, - { - "epoch": 0.8863101816430049, - "grad_norm": 0.0, - "learning_rate": 6.702939549445397e-07, - "loss": 0.8131, - "step": 31277 - }, - { - "epoch": 0.8863385190852674, - "grad_norm": 0.0, - "learning_rate": 6.699636336361293e-07, - "loss": 0.7946, - "step": 31278 - }, - { - "epoch": 0.8863668565275298, - "grad_norm": 0.0, - "learning_rate": 6.696333909180796e-07, - "loss": 0.8995, - "step": 31279 - }, - { - "epoch": 0.8863951939697923, - "grad_norm": 0.0, - "learning_rate": 6.693032267931754e-07, - "loss": 0.7681, - "step": 31280 - }, - { - "epoch": 0.8864235314120548, - "grad_norm": 0.0, - "learning_rate": 6.68973141264192e-07, - "loss": 0.8011, - "step": 31281 - }, - { - "epoch": 0.8864518688543173, - "grad_norm": 0.0, - "learning_rate": 6.68643134333915e-07, - "loss": 0.698, - "step": 31282 - }, - { - "epoch": 0.8864802062965796, - "grad_norm": 0.0, - "learning_rate": 6.683132060051201e-07, - "loss": 0.8309, - "step": 31283 - }, - { - "epoch": 0.8865085437388421, - "grad_norm": 0.0, - "learning_rate": 6.679833562805882e-07, - "loss": 0.7436, - "step": 31284 - }, - { - "epoch": 0.8865368811811046, - "grad_norm": 0.0, - "learning_rate": 6.676535851630983e-07, - "loss": 0.8654, - "step": 31285 - }, - { - "epoch": 0.886565218623367, - "grad_norm": 0.0, - "learning_rate": 6.673238926554282e-07, - "loss": 0.7994, - "step": 31286 - }, - { - "epoch": 0.8865935560656295, - "grad_norm": 0.0, - "learning_rate": 6.669942787603556e-07, - "loss": 0.7879, - "step": 31287 - }, - { - "epoch": 0.886621893507892, - "grad_norm": 0.0, - "learning_rate": 6.666647434806539e-07, - "loss": 0.9088, - "step": 31288 - }, - { - "epoch": 0.8866502309501545, - "grad_norm": 0.0, - "learning_rate": 6.663352868191008e-07, - "loss": 0.7967, - "step": 31289 - }, - { - "epoch": 0.8866785683924169, - "grad_norm": 0.0, - "learning_rate": 6.660059087784743e-07, - "loss": 0.7656, - "step": 31290 - }, - { - "epoch": 0.8867069058346794, - "grad_norm": 0.0, - "learning_rate": 6.656766093615442e-07, - "loss": 0.9299, - "step": 31291 - }, - { - "epoch": 0.8867352432769419, - "grad_norm": 0.0, - "learning_rate": 6.65347388571086e-07, - "loss": 0.8552, - "step": 31292 - }, - { - "epoch": 0.8867635807192042, - "grad_norm": 0.0, - "learning_rate": 6.650182464098743e-07, - "loss": 0.793, - "step": 31293 - }, - { - "epoch": 0.8867919181614667, - "grad_norm": 0.0, - "learning_rate": 6.64689182880679e-07, - "loss": 0.845, - "step": 31294 - }, - { - "epoch": 0.8868202556037292, - "grad_norm": 0.0, - "learning_rate": 6.643601979862746e-07, - "loss": 0.8289, - "step": 31295 - }, - { - "epoch": 0.8868485930459916, - "grad_norm": 0.0, - "learning_rate": 6.640312917294301e-07, - "loss": 0.7797, - "step": 31296 - }, - { - "epoch": 0.8868769304882541, - "grad_norm": 0.0, - "learning_rate": 6.637024641129164e-07, - "loss": 0.7506, - "step": 31297 - }, - { - "epoch": 0.8869052679305166, - "grad_norm": 0.0, - "learning_rate": 6.633737151395037e-07, - "loss": 0.7501, - "step": 31298 - }, - { - "epoch": 0.8869336053727791, - "grad_norm": 0.0, - "learning_rate": 6.630450448119618e-07, - "loss": 0.7375, - "step": 31299 - }, - { - "epoch": 0.8869619428150415, - "grad_norm": 0.0, - "learning_rate": 6.627164531330576e-07, - "loss": 0.7408, - "step": 31300 - }, - { - "epoch": 0.886990280257304, - "grad_norm": 0.0, - "learning_rate": 6.623879401055622e-07, - "loss": 0.7662, - "step": 31301 - }, - { - "epoch": 0.8870186176995665, - "grad_norm": 0.0, - "learning_rate": 6.620595057322399e-07, - "loss": 0.7269, - "step": 31302 - }, - { - "epoch": 0.8870469551418289, - "grad_norm": 0.0, - "learning_rate": 6.617311500158585e-07, - "loss": 0.8232, - "step": 31303 - }, - { - "epoch": 0.8870752925840913, - "grad_norm": 0.0, - "learning_rate": 6.614028729591815e-07, - "loss": 0.7791, - "step": 31304 - }, - { - "epoch": 0.8871036300263538, - "grad_norm": 0.0, - "learning_rate": 6.610746745649765e-07, - "loss": 0.7429, - "step": 31305 - }, - { - "epoch": 0.8871319674686163, - "grad_norm": 0.0, - "learning_rate": 6.607465548360092e-07, - "loss": 0.8251, - "step": 31306 - }, - { - "epoch": 0.8871603049108787, - "grad_norm": 0.0, - "learning_rate": 6.604185137750396e-07, - "loss": 0.8153, - "step": 31307 - }, - { - "epoch": 0.8871886423531412, - "grad_norm": 0.0, - "learning_rate": 6.600905513848333e-07, - "loss": 0.649, - "step": 31308 - }, - { - "epoch": 0.8872169797954037, - "grad_norm": 0.0, - "learning_rate": 6.597626676681545e-07, - "loss": 0.7864, - "step": 31309 - }, - { - "epoch": 0.8872453172376661, - "grad_norm": 0.0, - "learning_rate": 6.594348626277613e-07, - "loss": 0.8176, - "step": 31310 - }, - { - "epoch": 0.8872736546799286, - "grad_norm": 0.0, - "learning_rate": 6.59107136266417e-07, - "loss": 0.9017, - "step": 31311 - }, - { - "epoch": 0.8873019921221911, - "grad_norm": 0.0, - "learning_rate": 6.587794885868815e-07, - "loss": 0.8169, - "step": 31312 - }, - { - "epoch": 0.8873303295644535, - "grad_norm": 0.0, - "learning_rate": 6.584519195919148e-07, - "loss": 0.7525, - "step": 31313 - }, - { - "epoch": 0.887358667006716, - "grad_norm": 0.0, - "learning_rate": 6.581244292842792e-07, - "loss": 0.7887, - "step": 31314 - }, - { - "epoch": 0.8873870044489784, - "grad_norm": 0.0, - "learning_rate": 6.577970176667281e-07, - "loss": 0.867, - "step": 31315 - }, - { - "epoch": 0.8874153418912409, - "grad_norm": 0.0, - "learning_rate": 6.574696847420236e-07, - "loss": 0.7345, - "step": 31316 - }, - { - "epoch": 0.8874436793335033, - "grad_norm": 0.0, - "learning_rate": 6.571424305129193e-07, - "loss": 0.7609, - "step": 31317 - }, - { - "epoch": 0.8874720167757658, - "grad_norm": 0.0, - "learning_rate": 6.568152549821749e-07, - "loss": 0.7222, - "step": 31318 - }, - { - "epoch": 0.8875003542180283, - "grad_norm": 0.0, - "learning_rate": 6.56488158152545e-07, - "loss": 0.8619, - "step": 31319 - }, - { - "epoch": 0.8875286916602907, - "grad_norm": 0.0, - "learning_rate": 6.561611400267853e-07, - "loss": 0.8691, - "step": 31320 - }, - { - "epoch": 0.8875570291025532, - "grad_norm": 0.0, - "learning_rate": 6.558342006076491e-07, - "loss": 0.8065, - "step": 31321 - }, - { - "epoch": 0.8875853665448157, - "grad_norm": 0.0, - "learning_rate": 6.555073398978929e-07, - "loss": 0.7745, - "step": 31322 - }, - { - "epoch": 0.8876137039870782, - "grad_norm": 0.0, - "learning_rate": 6.551805579002657e-07, - "loss": 0.8516, - "step": 31323 - }, - { - "epoch": 0.8876420414293406, - "grad_norm": 0.0, - "learning_rate": 6.548538546175243e-07, - "loss": 0.7829, - "step": 31324 - }, - { - "epoch": 0.887670378871603, - "grad_norm": 0.0, - "learning_rate": 6.545272300524186e-07, - "loss": 0.8062, - "step": 31325 - }, - { - "epoch": 0.8876987163138655, - "grad_norm": 0.0, - "learning_rate": 6.542006842077009e-07, - "loss": 0.8465, - "step": 31326 - }, - { - "epoch": 0.8877270537561279, - "grad_norm": 0.0, - "learning_rate": 6.538742170861224e-07, - "loss": 0.7611, - "step": 31327 - }, - { - "epoch": 0.8877553911983904, - "grad_norm": 0.0, - "learning_rate": 6.535478286904295e-07, - "loss": 0.8145, - "step": 31328 - }, - { - "epoch": 0.8877837286406529, - "grad_norm": 0.0, - "learning_rate": 6.532215190233748e-07, - "loss": 0.7893, - "step": 31329 - }, - { - "epoch": 0.8878120660829154, - "grad_norm": 0.0, - "learning_rate": 6.528952880877082e-07, - "loss": 0.9083, - "step": 31330 - }, - { - "epoch": 0.8878404035251778, - "grad_norm": 0.0, - "learning_rate": 6.52569135886173e-07, - "loss": 0.7992, - "step": 31331 - }, - { - "epoch": 0.8878687409674403, - "grad_norm": 0.0, - "learning_rate": 6.522430624215215e-07, - "loss": 0.7482, - "step": 31332 - }, - { - "epoch": 0.8878970784097028, - "grad_norm": 0.0, - "learning_rate": 6.519170676964958e-07, - "loss": 0.7943, - "step": 31333 - }, - { - "epoch": 0.8879254158519652, - "grad_norm": 0.0, - "learning_rate": 6.51591151713844e-07, - "loss": 0.8771, - "step": 31334 - }, - { - "epoch": 0.8879537532942277, - "grad_norm": 0.0, - "learning_rate": 6.512653144763137e-07, - "loss": 0.759, - "step": 31335 - }, - { - "epoch": 0.8879820907364901, - "grad_norm": 0.0, - "learning_rate": 6.509395559866449e-07, - "loss": 0.7431, - "step": 31336 - }, - { - "epoch": 0.8880104281787525, - "grad_norm": 0.0, - "learning_rate": 6.506138762475833e-07, - "loss": 0.7902, - "step": 31337 - }, - { - "epoch": 0.888038765621015, - "grad_norm": 0.0, - "learning_rate": 6.502882752618744e-07, - "loss": 0.9192, - "step": 31338 - }, - { - "epoch": 0.8880671030632775, - "grad_norm": 0.0, - "learning_rate": 6.499627530322583e-07, - "loss": 0.771, - "step": 31339 - }, - { - "epoch": 0.88809544050554, - "grad_norm": 0.0, - "learning_rate": 6.496373095614794e-07, - "loss": 0.7976, - "step": 31340 - }, - { - "epoch": 0.8881237779478024, - "grad_norm": 0.0, - "learning_rate": 6.493119448522767e-07, - "loss": 0.8031, - "step": 31341 - }, - { - "epoch": 0.8881521153900649, - "grad_norm": 0.0, - "learning_rate": 6.489866589073912e-07, - "loss": 0.8079, - "step": 31342 - }, - { - "epoch": 0.8881804528323274, - "grad_norm": 0.0, - "learning_rate": 6.486614517295653e-07, - "loss": 0.9909, - "step": 31343 - }, - { - "epoch": 0.8882087902745898, - "grad_norm": 0.0, - "learning_rate": 6.483363233215345e-07, - "loss": 0.9246, - "step": 31344 - }, - { - "epoch": 0.8882371277168523, - "grad_norm": 0.0, - "learning_rate": 6.480112736860411e-07, - "loss": 0.8436, - "step": 31345 - }, - { - "epoch": 0.8882654651591148, - "grad_norm": 0.0, - "learning_rate": 6.476863028258207e-07, - "loss": 0.8293, - "step": 31346 - }, - { - "epoch": 0.8882938026013772, - "grad_norm": 0.0, - "learning_rate": 6.47361410743611e-07, - "loss": 0.7169, - "step": 31347 - }, - { - "epoch": 0.8883221400436396, - "grad_norm": 0.0, - "learning_rate": 6.470365974421499e-07, - "loss": 0.7682, - "step": 31348 - }, - { - "epoch": 0.8883504774859021, - "grad_norm": 0.0, - "learning_rate": 6.46711862924172e-07, - "loss": 0.8178, - "step": 31349 - }, - { - "epoch": 0.8883788149281646, - "grad_norm": 0.0, - "learning_rate": 6.463872071924149e-07, - "loss": 0.9244, - "step": 31350 - }, - { - "epoch": 0.888407152370427, - "grad_norm": 0.0, - "learning_rate": 6.460626302496098e-07, - "loss": 0.8346, - "step": 31351 - }, - { - "epoch": 0.8884354898126895, - "grad_norm": 0.0, - "learning_rate": 6.457381320984935e-07, - "loss": 0.7598, - "step": 31352 - }, - { - "epoch": 0.888463827254952, - "grad_norm": 0.0, - "learning_rate": 6.454137127417992e-07, - "loss": 0.7743, - "step": 31353 - }, - { - "epoch": 0.8884921646972145, - "grad_norm": 0.0, - "learning_rate": 6.450893721822582e-07, - "loss": 0.8784, - "step": 31354 - }, - { - "epoch": 0.8885205021394769, - "grad_norm": 0.0, - "learning_rate": 6.447651104226026e-07, - "loss": 0.7916, - "step": 31355 - }, - { - "epoch": 0.8885488395817394, - "grad_norm": 0.0, - "learning_rate": 6.444409274655661e-07, - "loss": 0.7241, - "step": 31356 - }, - { - "epoch": 0.8885771770240019, - "grad_norm": 0.0, - "learning_rate": 6.441168233138761e-07, - "loss": 0.7709, - "step": 31357 - }, - { - "epoch": 0.8886055144662642, - "grad_norm": 0.0, - "learning_rate": 6.437927979702651e-07, - "loss": 0.722, - "step": 31358 - }, - { - "epoch": 0.8886338519085267, - "grad_norm": 0.0, - "learning_rate": 6.434688514374632e-07, - "loss": 0.8753, - "step": 31359 - }, - { - "epoch": 0.8886621893507892, - "grad_norm": 0.0, - "learning_rate": 6.431449837181958e-07, - "loss": 0.8638, - "step": 31360 - }, - { - "epoch": 0.8886905267930516, - "grad_norm": 0.0, - "learning_rate": 6.428211948151919e-07, - "loss": 0.8681, - "step": 31361 - }, - { - "epoch": 0.8887188642353141, - "grad_norm": 0.0, - "learning_rate": 6.424974847311804e-07, - "loss": 0.7528, - "step": 31362 - }, - { - "epoch": 0.8887472016775766, - "grad_norm": 0.0, - "learning_rate": 6.421738534688882e-07, - "loss": 0.8258, - "step": 31363 - }, - { - "epoch": 0.8887755391198391, - "grad_norm": 0.0, - "learning_rate": 6.418503010310417e-07, - "loss": 0.7338, - "step": 31364 - }, - { - "epoch": 0.8888038765621015, - "grad_norm": 0.0, - "learning_rate": 6.415268274203634e-07, - "loss": 0.7371, - "step": 31365 - }, - { - "epoch": 0.888832214004364, - "grad_norm": 0.0, - "learning_rate": 6.412034326395799e-07, - "loss": 0.6881, - "step": 31366 - }, - { - "epoch": 0.8888605514466265, - "grad_norm": 0.0, - "learning_rate": 6.40880116691417e-07, - "loss": 0.9239, - "step": 31367 - }, - { - "epoch": 0.8888888888888888, - "grad_norm": 0.0, - "learning_rate": 6.405568795785944e-07, - "loss": 0.7835, - "step": 31368 - }, - { - "epoch": 0.8889172263311513, - "grad_norm": 0.0, - "learning_rate": 6.402337213038379e-07, - "loss": 0.7253, - "step": 31369 - }, - { - "epoch": 0.8889455637734138, - "grad_norm": 0.0, - "learning_rate": 6.399106418698675e-07, - "loss": 0.85, - "step": 31370 - }, - { - "epoch": 0.8889739012156763, - "grad_norm": 0.0, - "learning_rate": 6.395876412794055e-07, - "loss": 0.8659, - "step": 31371 - }, - { - "epoch": 0.8890022386579387, - "grad_norm": 0.0, - "learning_rate": 6.392647195351731e-07, - "loss": 0.8586, - "step": 31372 - }, - { - "epoch": 0.8890305761002012, - "grad_norm": 0.0, - "learning_rate": 6.389418766398903e-07, - "loss": 0.9138, - "step": 31373 - }, - { - "epoch": 0.8890589135424637, - "grad_norm": 0.0, - "learning_rate": 6.386191125962749e-07, - "loss": 0.7917, - "step": 31374 - }, - { - "epoch": 0.8890872509847261, - "grad_norm": 0.0, - "learning_rate": 6.38296427407048e-07, - "loss": 0.781, - "step": 31375 - }, - { - "epoch": 0.8891155884269886, - "grad_norm": 0.0, - "learning_rate": 6.379738210749253e-07, - "loss": 0.698, - "step": 31376 - }, - { - "epoch": 0.8891439258692511, - "grad_norm": 0.0, - "learning_rate": 6.37651293602628e-07, - "loss": 0.7875, - "step": 31377 - }, - { - "epoch": 0.8891722633115136, - "grad_norm": 0.0, - "learning_rate": 6.373288449928694e-07, - "loss": 0.8704, - "step": 31378 - }, - { - "epoch": 0.8892006007537759, - "grad_norm": 0.0, - "learning_rate": 6.370064752483662e-07, - "loss": 0.8625, - "step": 31379 - }, - { - "epoch": 0.8892289381960384, - "grad_norm": 0.0, - "learning_rate": 6.366841843718352e-07, - "loss": 0.7608, - "step": 31380 - }, - { - "epoch": 0.8892572756383009, - "grad_norm": 0.0, - "learning_rate": 6.363619723659898e-07, - "loss": 0.7606, - "step": 31381 - }, - { - "epoch": 0.8892856130805633, - "grad_norm": 0.0, - "learning_rate": 6.360398392335454e-07, - "loss": 0.8272, - "step": 31382 - }, - { - "epoch": 0.8893139505228258, - "grad_norm": 0.0, - "learning_rate": 6.357177849772134e-07, - "loss": 0.9471, - "step": 31383 - }, - { - "epoch": 0.8893422879650883, - "grad_norm": 0.0, - "learning_rate": 6.353958095997081e-07, - "loss": 0.8463, - "step": 31384 - }, - { - "epoch": 0.8893706254073507, - "grad_norm": 0.0, - "learning_rate": 6.350739131037431e-07, - "loss": 0.738, - "step": 31385 - }, - { - "epoch": 0.8893989628496132, - "grad_norm": 0.0, - "learning_rate": 6.347520954920261e-07, - "loss": 0.8257, - "step": 31386 - }, - { - "epoch": 0.8894273002918757, - "grad_norm": 0.0, - "learning_rate": 6.344303567672694e-07, - "loss": 0.7702, - "step": 31387 - }, - { - "epoch": 0.8894556377341382, - "grad_norm": 0.0, - "learning_rate": 6.341086969321853e-07, - "loss": 0.7513, - "step": 31388 - }, - { - "epoch": 0.8894839751764005, - "grad_norm": 0.0, - "learning_rate": 6.337871159894804e-07, - "loss": 0.6694, - "step": 31389 - }, - { - "epoch": 0.889512312618663, - "grad_norm": 0.0, - "learning_rate": 6.334656139418661e-07, - "loss": 0.7507, - "step": 31390 - }, - { - "epoch": 0.8895406500609255, - "grad_norm": 0.0, - "learning_rate": 6.331441907920477e-07, - "loss": 0.813, - "step": 31391 - }, - { - "epoch": 0.8895689875031879, - "grad_norm": 0.0, - "learning_rate": 6.328228465427344e-07, - "loss": 0.8963, - "step": 31392 - }, - { - "epoch": 0.8895973249454504, - "grad_norm": 0.0, - "learning_rate": 6.325015811966339e-07, - "loss": 0.758, - "step": 31393 - }, - { - "epoch": 0.8896256623877129, - "grad_norm": 0.0, - "learning_rate": 6.321803947564487e-07, - "loss": 0.9589, - "step": 31394 - }, - { - "epoch": 0.8896539998299754, - "grad_norm": 0.0, - "learning_rate": 6.318592872248886e-07, - "loss": 0.7841, - "step": 31395 - }, - { - "epoch": 0.8896823372722378, - "grad_norm": 0.0, - "learning_rate": 6.31538258604657e-07, - "loss": 0.808, - "step": 31396 - }, - { - "epoch": 0.8897106747145003, - "grad_norm": 0.0, - "learning_rate": 6.312173088984552e-07, - "loss": 0.7796, - "step": 31397 - }, - { - "epoch": 0.8897390121567628, - "grad_norm": 0.0, - "learning_rate": 6.308964381089921e-07, - "loss": 0.8283, - "step": 31398 - }, - { - "epoch": 0.8897673495990251, - "grad_norm": 0.0, - "learning_rate": 6.305756462389645e-07, - "loss": 0.775, - "step": 31399 - }, - { - "epoch": 0.8897956870412876, - "grad_norm": 0.0, - "learning_rate": 6.30254933291079e-07, - "loss": 0.8029, - "step": 31400 - }, - { - "epoch": 0.8898240244835501, - "grad_norm": 0.0, - "learning_rate": 6.299342992680346e-07, - "loss": 0.7182, - "step": 31401 - }, - { - "epoch": 0.8898523619258126, - "grad_norm": 0.0, - "learning_rate": 6.296137441725336e-07, - "loss": 0.827, - "step": 31402 - }, - { - "epoch": 0.889880699368075, - "grad_norm": 0.0, - "learning_rate": 6.292932680072761e-07, - "loss": 0.8002, - "step": 31403 - }, - { - "epoch": 0.8899090368103375, - "grad_norm": 0.0, - "learning_rate": 6.289728707749609e-07, - "loss": 0.8932, - "step": 31404 - }, - { - "epoch": 0.8899373742526, - "grad_norm": 0.0, - "learning_rate": 6.286525524782861e-07, - "loss": 0.8458, - "step": 31405 - }, - { - "epoch": 0.8899657116948624, - "grad_norm": 0.0, - "learning_rate": 6.283323131199526e-07, - "loss": 0.7309, - "step": 31406 - }, - { - "epoch": 0.8899940491371249, - "grad_norm": 0.0, - "learning_rate": 6.28012152702655e-07, - "loss": 0.8422, - "step": 31407 - }, - { - "epoch": 0.8900223865793874, - "grad_norm": 0.0, - "learning_rate": 6.276920712290913e-07, - "loss": 0.8447, - "step": 31408 - }, - { - "epoch": 0.8900507240216498, - "grad_norm": 0.0, - "learning_rate": 6.27372068701958e-07, - "loss": 0.6966, - "step": 31409 - }, - { - "epoch": 0.8900790614639122, - "grad_norm": 0.0, - "learning_rate": 6.270521451239498e-07, - "loss": 0.8311, - "step": 31410 - }, - { - "epoch": 0.8901073989061747, - "grad_norm": 0.0, - "learning_rate": 6.267323004977633e-07, - "loss": 0.8316, - "step": 31411 - }, - { - "epoch": 0.8901357363484372, - "grad_norm": 0.0, - "learning_rate": 6.264125348260896e-07, - "loss": 0.853, - "step": 31412 - }, - { - "epoch": 0.8901640737906996, - "grad_norm": 0.0, - "learning_rate": 6.260928481116235e-07, - "loss": 0.8489, - "step": 31413 - }, - { - "epoch": 0.8901924112329621, - "grad_norm": 0.0, - "learning_rate": 6.257732403570594e-07, - "loss": 0.788, - "step": 31414 - }, - { - "epoch": 0.8902207486752246, - "grad_norm": 0.0, - "learning_rate": 6.254537115650871e-07, - "loss": 0.7577, - "step": 31415 - }, - { - "epoch": 0.890249086117487, - "grad_norm": 0.0, - "learning_rate": 6.251342617383993e-07, - "loss": 0.8214, - "step": 31416 - }, - { - "epoch": 0.8902774235597495, - "grad_norm": 0.0, - "learning_rate": 6.248148908796892e-07, - "loss": 0.9744, - "step": 31417 - }, - { - "epoch": 0.890305761002012, - "grad_norm": 0.0, - "learning_rate": 6.244955989916434e-07, - "loss": 0.7392, - "step": 31418 - }, - { - "epoch": 0.8903340984442745, - "grad_norm": 0.0, - "learning_rate": 6.241763860769535e-07, - "loss": 0.776, - "step": 31419 - }, - { - "epoch": 0.8903624358865369, - "grad_norm": 0.0, - "learning_rate": 6.238572521383058e-07, - "loss": 0.885, - "step": 31420 - }, - { - "epoch": 0.8903907733287993, - "grad_norm": 0.0, - "learning_rate": 6.235381971783904e-07, - "loss": 0.809, - "step": 31421 - }, - { - "epoch": 0.8904191107710618, - "grad_norm": 0.0, - "learning_rate": 6.232192211998967e-07, - "loss": 0.7081, - "step": 31422 - }, - { - "epoch": 0.8904474482133242, - "grad_norm": 0.0, - "learning_rate": 6.229003242055076e-07, - "loss": 0.7204, - "step": 31423 - }, - { - "epoch": 0.8904757856555867, - "grad_norm": 0.0, - "learning_rate": 6.225815061979113e-07, - "loss": 0.7911, - "step": 31424 - }, - { - "epoch": 0.8905041230978492, - "grad_norm": 0.0, - "learning_rate": 6.222627671797943e-07, - "loss": 0.7876, - "step": 31425 - }, - { - "epoch": 0.8905324605401117, - "grad_norm": 0.0, - "learning_rate": 6.21944107153839e-07, - "loss": 0.7494, - "step": 31426 - }, - { - "epoch": 0.8905607979823741, - "grad_norm": 0.0, - "learning_rate": 6.216255261227311e-07, - "loss": 0.7662, - "step": 31427 - }, - { - "epoch": 0.8905891354246366, - "grad_norm": 0.0, - "learning_rate": 6.21307024089155e-07, - "loss": 0.78, - "step": 31428 - }, - { - "epoch": 0.8906174728668991, - "grad_norm": 0.0, - "learning_rate": 6.209886010557908e-07, - "loss": 0.7891, - "step": 31429 - }, - { - "epoch": 0.8906458103091615, - "grad_norm": 0.0, - "learning_rate": 6.206702570253242e-07, - "loss": 0.8456, - "step": 31430 - }, - { - "epoch": 0.890674147751424, - "grad_norm": 0.0, - "learning_rate": 6.203519920004341e-07, - "loss": 0.8008, - "step": 31431 - }, - { - "epoch": 0.8907024851936864, - "grad_norm": 0.0, - "learning_rate": 6.20033805983804e-07, - "loss": 0.8408, - "step": 31432 - }, - { - "epoch": 0.8907308226359488, - "grad_norm": 0.0, - "learning_rate": 6.197156989781106e-07, - "loss": 0.7798, - "step": 31433 - }, - { - "epoch": 0.8907591600782113, - "grad_norm": 0.0, - "learning_rate": 6.193976709860339e-07, - "loss": 0.786, - "step": 31434 - }, - { - "epoch": 0.8907874975204738, - "grad_norm": 0.0, - "learning_rate": 6.190797220102573e-07, - "loss": 0.8263, - "step": 31435 - }, - { - "epoch": 0.8908158349627363, - "grad_norm": 0.0, - "learning_rate": 6.187618520534533e-07, - "loss": 0.8292, - "step": 31436 - }, - { - "epoch": 0.8908441724049987, - "grad_norm": 0.0, - "learning_rate": 6.184440611183018e-07, - "loss": 0.8345, - "step": 31437 - }, - { - "epoch": 0.8908725098472612, - "grad_norm": 0.0, - "learning_rate": 6.181263492074808e-07, - "loss": 0.8422, - "step": 31438 - }, - { - "epoch": 0.8909008472895237, - "grad_norm": 0.0, - "learning_rate": 6.178087163236645e-07, - "loss": 0.7624, - "step": 31439 - }, - { - "epoch": 0.8909291847317861, - "grad_norm": 0.0, - "learning_rate": 6.174911624695301e-07, - "loss": 0.7605, - "step": 31440 - }, - { - "epoch": 0.8909575221740486, - "grad_norm": 0.0, - "learning_rate": 6.171736876477508e-07, - "loss": 0.6794, - "step": 31441 - }, - { - "epoch": 0.890985859616311, - "grad_norm": 0.0, - "learning_rate": 6.168562918610021e-07, - "loss": 0.7526, - "step": 31442 - }, - { - "epoch": 0.8910141970585735, - "grad_norm": 0.0, - "learning_rate": 6.165389751119577e-07, - "loss": 0.771, - "step": 31443 - }, - { - "epoch": 0.8910425345008359, - "grad_norm": 0.0, - "learning_rate": 6.162217374032897e-07, - "loss": 0.7807, - "step": 31444 - }, - { - "epoch": 0.8910708719430984, - "grad_norm": 0.0, - "learning_rate": 6.159045787376705e-07, - "loss": 0.8684, - "step": 31445 - }, - { - "epoch": 0.8910992093853609, - "grad_norm": 0.0, - "learning_rate": 6.155874991177724e-07, - "loss": 0.8774, - "step": 31446 - }, - { - "epoch": 0.8911275468276233, - "grad_norm": 0.0, - "learning_rate": 6.152704985462654e-07, - "loss": 0.7606, - "step": 31447 - }, - { - "epoch": 0.8911558842698858, - "grad_norm": 0.0, - "learning_rate": 6.149535770258208e-07, - "loss": 0.8475, - "step": 31448 - }, - { - "epoch": 0.8911842217121483, - "grad_norm": 0.0, - "learning_rate": 6.146367345591053e-07, - "loss": 0.7676, - "step": 31449 - }, - { - "epoch": 0.8912125591544108, - "grad_norm": 0.0, - "learning_rate": 6.143199711487901e-07, - "loss": 0.7599, - "step": 31450 - }, - { - "epoch": 0.8912408965966732, - "grad_norm": 0.0, - "learning_rate": 6.140032867975443e-07, - "loss": 0.9092, - "step": 31451 - }, - { - "epoch": 0.8912692340389357, - "grad_norm": 0.0, - "learning_rate": 6.136866815080333e-07, - "loss": 0.7814, - "step": 31452 - }, - { - "epoch": 0.8912975714811981, - "grad_norm": 0.0, - "learning_rate": 6.133701552829252e-07, - "loss": 0.7861, - "step": 31453 - }, - { - "epoch": 0.8913259089234605, - "grad_norm": 0.0, - "learning_rate": 6.130537081248844e-07, - "loss": 0.77, - "step": 31454 - }, - { - "epoch": 0.891354246365723, - "grad_norm": 0.0, - "learning_rate": 6.127373400365788e-07, - "loss": 0.8139, - "step": 31455 - }, - { - "epoch": 0.8913825838079855, - "grad_norm": 0.0, - "learning_rate": 6.12421051020674e-07, - "loss": 0.8415, - "step": 31456 - }, - { - "epoch": 0.8914109212502479, - "grad_norm": 0.0, - "learning_rate": 6.121048410798314e-07, - "loss": 0.8165, - "step": 31457 - }, - { - "epoch": 0.8914392586925104, - "grad_norm": 0.0, - "learning_rate": 6.117887102167164e-07, - "loss": 0.8887, - "step": 31458 - }, - { - "epoch": 0.8914675961347729, - "grad_norm": 0.0, - "learning_rate": 6.114726584339914e-07, - "loss": 0.8605, - "step": 31459 - }, - { - "epoch": 0.8914959335770354, - "grad_norm": 0.0, - "learning_rate": 6.111566857343176e-07, - "loss": 0.8611, - "step": 31460 - }, - { - "epoch": 0.8915242710192978, - "grad_norm": 0.0, - "learning_rate": 6.108407921203597e-07, - "loss": 0.8068, - "step": 31461 - }, - { - "epoch": 0.8915526084615603, - "grad_norm": 0.0, - "learning_rate": 6.105249775947741e-07, - "loss": 0.9131, - "step": 31462 - }, - { - "epoch": 0.8915809459038228, - "grad_norm": 0.0, - "learning_rate": 6.102092421602234e-07, - "loss": 0.8052, - "step": 31463 - }, - { - "epoch": 0.8916092833460851, - "grad_norm": 0.0, - "learning_rate": 6.098935858193688e-07, - "loss": 0.897, - "step": 31464 - }, - { - "epoch": 0.8916376207883476, - "grad_norm": 0.0, - "learning_rate": 6.095780085748659e-07, - "loss": 0.8373, - "step": 31465 - }, - { - "epoch": 0.8916659582306101, - "grad_norm": 0.0, - "learning_rate": 6.092625104293748e-07, - "loss": 0.7797, - "step": 31466 - }, - { - "epoch": 0.8916942956728726, - "grad_norm": 0.0, - "learning_rate": 6.089470913855522e-07, - "loss": 0.7441, - "step": 31467 - }, - { - "epoch": 0.891722633115135, - "grad_norm": 0.0, - "learning_rate": 6.08631751446056e-07, - "loss": 0.8708, - "step": 31468 - }, - { - "epoch": 0.8917509705573975, - "grad_norm": 0.0, - "learning_rate": 6.083164906135431e-07, - "loss": 0.8432, - "step": 31469 - }, - { - "epoch": 0.89177930799966, - "grad_norm": 0.0, - "learning_rate": 6.080013088906667e-07, - "loss": 0.8091, - "step": 31470 - }, - { - "epoch": 0.8918076454419224, - "grad_norm": 0.0, - "learning_rate": 6.076862062800825e-07, - "loss": 0.8127, - "step": 31471 - }, - { - "epoch": 0.8918359828841849, - "grad_norm": 0.0, - "learning_rate": 6.073711827844464e-07, - "loss": 0.7853, - "step": 31472 - }, - { - "epoch": 0.8918643203264474, - "grad_norm": 0.0, - "learning_rate": 6.070562384064094e-07, - "loss": 0.7854, - "step": 31473 - }, - { - "epoch": 0.8918926577687099, - "grad_norm": 0.0, - "learning_rate": 6.06741373148626e-07, - "loss": 0.8996, - "step": 31474 - }, - { - "epoch": 0.8919209952109722, - "grad_norm": 0.0, - "learning_rate": 6.064265870137498e-07, - "loss": 0.7608, - "step": 31475 - }, - { - "epoch": 0.8919493326532347, - "grad_norm": 0.0, - "learning_rate": 6.061118800044285e-07, - "loss": 0.877, - "step": 31476 - }, - { - "epoch": 0.8919776700954972, - "grad_norm": 0.0, - "learning_rate": 6.05797252123318e-07, - "loss": 0.7809, - "step": 31477 - }, - { - "epoch": 0.8920060075377596, - "grad_norm": 0.0, - "learning_rate": 6.054827033730625e-07, - "loss": 0.8469, - "step": 31478 - }, - { - "epoch": 0.8920343449800221, - "grad_norm": 0.0, - "learning_rate": 6.051682337563158e-07, - "loss": 0.869, - "step": 31479 - }, - { - "epoch": 0.8920626824222846, - "grad_norm": 0.0, - "learning_rate": 6.048538432757256e-07, - "loss": 0.7518, - "step": 31480 - }, - { - "epoch": 0.892091019864547, - "grad_norm": 0.0, - "learning_rate": 6.045395319339397e-07, - "loss": 0.7707, - "step": 31481 - }, - { - "epoch": 0.8921193573068095, - "grad_norm": 0.0, - "learning_rate": 6.042252997336073e-07, - "loss": 0.7256, - "step": 31482 - }, - { - "epoch": 0.892147694749072, - "grad_norm": 0.0, - "learning_rate": 6.03911146677375e-07, - "loss": 0.9565, - "step": 31483 - }, - { - "epoch": 0.8921760321913345, - "grad_norm": 0.0, - "learning_rate": 6.035970727678864e-07, - "loss": 0.8289, - "step": 31484 - }, - { - "epoch": 0.8922043696335968, - "grad_norm": 0.0, - "learning_rate": 6.032830780077914e-07, - "loss": 0.8371, - "step": 31485 - }, - { - "epoch": 0.8922327070758593, - "grad_norm": 0.0, - "learning_rate": 6.029691623997302e-07, - "loss": 0.7243, - "step": 31486 - }, - { - "epoch": 0.8922610445181218, - "grad_norm": 0.0, - "learning_rate": 6.026553259463497e-07, - "loss": 0.8271, - "step": 31487 - }, - { - "epoch": 0.8922893819603842, - "grad_norm": 0.0, - "learning_rate": 6.023415686502942e-07, - "loss": 0.7517, - "step": 31488 - }, - { - "epoch": 0.8923177194026467, - "grad_norm": 0.0, - "learning_rate": 6.02027890514204e-07, - "loss": 0.8134, - "step": 31489 - }, - { - "epoch": 0.8923460568449092, - "grad_norm": 0.0, - "learning_rate": 6.017142915407237e-07, - "loss": 0.8398, - "step": 31490 - }, - { - "epoch": 0.8923743942871717, - "grad_norm": 0.0, - "learning_rate": 6.014007717324933e-07, - "loss": 0.7667, - "step": 31491 - }, - { - "epoch": 0.8924027317294341, - "grad_norm": 0.0, - "learning_rate": 6.010873310921538e-07, - "loss": 0.6759, - "step": 31492 - }, - { - "epoch": 0.8924310691716966, - "grad_norm": 0.0, - "learning_rate": 6.007739696223458e-07, - "loss": 0.7094, - "step": 31493 - }, - { - "epoch": 0.8924594066139591, - "grad_norm": 0.0, - "learning_rate": 6.004606873257101e-07, - "loss": 0.9069, - "step": 31494 - }, - { - "epoch": 0.8924877440562214, - "grad_norm": 0.0, - "learning_rate": 6.001474842048826e-07, - "loss": 0.8758, - "step": 31495 - }, - { - "epoch": 0.8925160814984839, - "grad_norm": 0.0, - "learning_rate": 5.998343602625067e-07, - "loss": 0.9381, - "step": 31496 - }, - { - "epoch": 0.8925444189407464, - "grad_norm": 0.0, - "learning_rate": 5.995213155012136e-07, - "loss": 0.811, - "step": 31497 - }, - { - "epoch": 0.8925727563830089, - "grad_norm": 0.0, - "learning_rate": 5.992083499236456e-07, - "loss": 0.9016, - "step": 31498 - }, - { - "epoch": 0.8926010938252713, - "grad_norm": 0.0, - "learning_rate": 5.988954635324351e-07, - "loss": 0.7618, - "step": 31499 - }, - { - "epoch": 0.8926294312675338, - "grad_norm": 0.0, - "learning_rate": 5.985826563302188e-07, - "loss": 0.6848, - "step": 31500 - }, - { - "epoch": 0.8926577687097963, - "grad_norm": 0.0, - "learning_rate": 5.982699283196336e-07, - "loss": 0.8147, - "step": 31501 - }, - { - "epoch": 0.8926861061520587, - "grad_norm": 0.0, - "learning_rate": 5.979572795033106e-07, - "loss": 0.9476, - "step": 31502 - }, - { - "epoch": 0.8927144435943212, - "grad_norm": 0.0, - "learning_rate": 5.976447098838845e-07, - "loss": 0.8338, - "step": 31503 - }, - { - "epoch": 0.8927427810365837, - "grad_norm": 0.0, - "learning_rate": 5.973322194639897e-07, - "loss": 0.7913, - "step": 31504 - }, - { - "epoch": 0.892771118478846, - "grad_norm": 0.0, - "learning_rate": 5.970198082462564e-07, - "loss": 0.8021, - "step": 31505 - }, - { - "epoch": 0.8927994559211085, - "grad_norm": 0.0, - "learning_rate": 5.96707476233317e-07, - "loss": 0.7716, - "step": 31506 - }, - { - "epoch": 0.892827793363371, - "grad_norm": 0.0, - "learning_rate": 5.963952234278025e-07, - "loss": 0.8014, - "step": 31507 - }, - { - "epoch": 0.8928561308056335, - "grad_norm": 0.0, - "learning_rate": 5.960830498323422e-07, - "loss": 0.8517, - "step": 31508 - }, - { - "epoch": 0.8928844682478959, - "grad_norm": 0.0, - "learning_rate": 5.957709554495683e-07, - "loss": 0.7911, - "step": 31509 - }, - { - "epoch": 0.8929128056901584, - "grad_norm": 0.0, - "learning_rate": 5.954589402821065e-07, - "loss": 0.7899, - "step": 31510 - }, - { - "epoch": 0.8929411431324209, - "grad_norm": 0.0, - "learning_rate": 5.951470043325869e-07, - "loss": 0.7325, - "step": 31511 - }, - { - "epoch": 0.8929694805746833, - "grad_norm": 0.0, - "learning_rate": 5.948351476036363e-07, - "loss": 0.7422, - "step": 31512 - }, - { - "epoch": 0.8929978180169458, - "grad_norm": 0.0, - "learning_rate": 5.945233700978814e-07, - "loss": 0.7636, - "step": 31513 - }, - { - "epoch": 0.8930261554592083, - "grad_norm": 0.0, - "learning_rate": 5.942116718179502e-07, - "loss": 0.7445, - "step": 31514 - }, - { - "epoch": 0.8930544929014708, - "grad_norm": 0.0, - "learning_rate": 5.939000527664651e-07, - "loss": 0.8888, - "step": 31515 - }, - { - "epoch": 0.8930828303437331, - "grad_norm": 0.0, - "learning_rate": 5.935885129460528e-07, - "loss": 0.8093, - "step": 31516 - }, - { - "epoch": 0.8931111677859956, - "grad_norm": 0.0, - "learning_rate": 5.93277052359339e-07, - "loss": 0.7696, - "step": 31517 - }, - { - "epoch": 0.8931395052282581, - "grad_norm": 0.0, - "learning_rate": 5.929656710089438e-07, - "loss": 0.9081, - "step": 31518 - }, - { - "epoch": 0.8931678426705205, - "grad_norm": 0.0, - "learning_rate": 5.926543688974928e-07, - "loss": 0.811, - "step": 31519 - }, - { - "epoch": 0.893196180112783, - "grad_norm": 0.0, - "learning_rate": 5.923431460276063e-07, - "loss": 0.756, - "step": 31520 - }, - { - "epoch": 0.8932245175550455, - "grad_norm": 0.0, - "learning_rate": 5.920320024019078e-07, - "loss": 0.8583, - "step": 31521 - }, - { - "epoch": 0.8932528549973079, - "grad_norm": 0.0, - "learning_rate": 5.917209380230182e-07, - "loss": 0.8372, - "step": 31522 - }, - { - "epoch": 0.8932811924395704, - "grad_norm": 0.0, - "learning_rate": 5.914099528935558e-07, - "loss": 0.9012, - "step": 31523 - }, - { - "epoch": 0.8933095298818329, - "grad_norm": 0.0, - "learning_rate": 5.910990470161416e-07, - "loss": 0.8038, - "step": 31524 - }, - { - "epoch": 0.8933378673240954, - "grad_norm": 0.0, - "learning_rate": 5.907882203933946e-07, - "loss": 0.8284, - "step": 31525 - }, - { - "epoch": 0.8933662047663578, - "grad_norm": 0.0, - "learning_rate": 5.904774730279317e-07, - "loss": 0.8026, - "step": 31526 - }, - { - "epoch": 0.8933945422086202, - "grad_norm": 0.0, - "learning_rate": 5.901668049223719e-07, - "loss": 0.8059, - "step": 31527 - }, - { - "epoch": 0.8934228796508827, - "grad_norm": 0.0, - "learning_rate": 5.898562160793308e-07, - "loss": 0.8354, - "step": 31528 - }, - { - "epoch": 0.8934512170931451, - "grad_norm": 0.0, - "learning_rate": 5.895457065014243e-07, - "loss": 0.8367, - "step": 31529 - }, - { - "epoch": 0.8934795545354076, - "grad_norm": 0.0, - "learning_rate": 5.892352761912712e-07, - "loss": 0.8109, - "step": 31530 - }, - { - "epoch": 0.8935078919776701, - "grad_norm": 0.0, - "learning_rate": 5.889249251514817e-07, - "loss": 0.8918, - "step": 31531 - }, - { - "epoch": 0.8935362294199326, - "grad_norm": 0.0, - "learning_rate": 5.886146533846726e-07, - "loss": 0.838, - "step": 31532 - }, - { - "epoch": 0.893564566862195, - "grad_norm": 0.0, - "learning_rate": 5.883044608934563e-07, - "loss": 0.845, - "step": 31533 - }, - { - "epoch": 0.8935929043044575, - "grad_norm": 0.0, - "learning_rate": 5.879943476804472e-07, - "loss": 0.7822, - "step": 31534 - }, - { - "epoch": 0.89362124174672, - "grad_norm": 0.0, - "learning_rate": 5.876843137482591e-07, - "loss": 0.8156, - "step": 31535 - }, - { - "epoch": 0.8936495791889824, - "grad_norm": 0.0, - "learning_rate": 5.873743590994985e-07, - "loss": 0.8125, - "step": 31536 - }, - { - "epoch": 0.8936779166312449, - "grad_norm": 0.0, - "learning_rate": 5.87064483736779e-07, - "loss": 0.7557, - "step": 31537 - }, - { - "epoch": 0.8937062540735073, - "grad_norm": 0.0, - "learning_rate": 5.867546876627129e-07, - "loss": 0.7889, - "step": 31538 - }, - { - "epoch": 0.8937345915157698, - "grad_norm": 0.0, - "learning_rate": 5.864449708799059e-07, - "loss": 0.846, - "step": 31539 - }, - { - "epoch": 0.8937629289580322, - "grad_norm": 0.0, - "learning_rate": 5.861353333909692e-07, - "loss": 0.8727, - "step": 31540 - }, - { - "epoch": 0.8937912664002947, - "grad_norm": 0.0, - "learning_rate": 5.858257751985097e-07, - "loss": 0.7493, - "step": 31541 - }, - { - "epoch": 0.8938196038425572, - "grad_norm": 0.0, - "learning_rate": 5.855162963051353e-07, - "loss": 0.806, - "step": 31542 - }, - { - "epoch": 0.8938479412848196, - "grad_norm": 0.0, - "learning_rate": 5.85206896713455e-07, - "loss": 0.7835, - "step": 31543 - }, - { - "epoch": 0.8938762787270821, - "grad_norm": 0.0, - "learning_rate": 5.848975764260711e-07, - "loss": 0.8323, - "step": 31544 - }, - { - "epoch": 0.8939046161693446, - "grad_norm": 0.0, - "learning_rate": 5.845883354455917e-07, - "loss": 0.8809, - "step": 31545 - }, - { - "epoch": 0.893932953611607, - "grad_norm": 0.0, - "learning_rate": 5.842791737746212e-07, - "loss": 0.8323, - "step": 31546 - }, - { - "epoch": 0.8939612910538695, - "grad_norm": 0.0, - "learning_rate": 5.839700914157631e-07, - "loss": 0.8487, - "step": 31547 - }, - { - "epoch": 0.893989628496132, - "grad_norm": 0.0, - "learning_rate": 5.836610883716232e-07, - "loss": 0.8299, - "step": 31548 - }, - { - "epoch": 0.8940179659383944, - "grad_norm": 0.0, - "learning_rate": 5.833521646448003e-07, - "loss": 0.9036, - "step": 31549 - }, - { - "epoch": 0.8940463033806568, - "grad_norm": 0.0, - "learning_rate": 5.830433202379004e-07, - "loss": 0.7799, - "step": 31550 - }, - { - "epoch": 0.8940746408229193, - "grad_norm": 0.0, - "learning_rate": 5.827345551535235e-07, - "loss": 0.8072, - "step": 31551 - }, - { - "epoch": 0.8941029782651818, - "grad_norm": 0.0, - "learning_rate": 5.824258693942698e-07, - "loss": 0.7978, - "step": 31552 - }, - { - "epoch": 0.8941313157074442, - "grad_norm": 0.0, - "learning_rate": 5.821172629627403e-07, - "loss": 0.8147, - "step": 31553 - }, - { - "epoch": 0.8941596531497067, - "grad_norm": 0.0, - "learning_rate": 5.818087358615354e-07, - "loss": 0.6852, - "step": 31554 - }, - { - "epoch": 0.8941879905919692, - "grad_norm": 0.0, - "learning_rate": 5.815002880932519e-07, - "loss": 0.7568, - "step": 31555 - }, - { - "epoch": 0.8942163280342317, - "grad_norm": 0.0, - "learning_rate": 5.811919196604898e-07, - "loss": 0.7265, - "step": 31556 - }, - { - "epoch": 0.8942446654764941, - "grad_norm": 0.0, - "learning_rate": 5.808836305658449e-07, - "loss": 0.7524, - "step": 31557 - }, - { - "epoch": 0.8942730029187566, - "grad_norm": 0.0, - "learning_rate": 5.805754208119141e-07, - "loss": 0.8004, - "step": 31558 - }, - { - "epoch": 0.894301340361019, - "grad_norm": 0.0, - "learning_rate": 5.802672904012951e-07, - "loss": 0.8272, - "step": 31559 - }, - { - "epoch": 0.8943296778032814, - "grad_norm": 0.0, - "learning_rate": 5.799592393365816e-07, - "loss": 0.7426, - "step": 31560 - }, - { - "epoch": 0.8943580152455439, - "grad_norm": 0.0, - "learning_rate": 5.796512676203703e-07, - "loss": 0.7448, - "step": 31561 - }, - { - "epoch": 0.8943863526878064, - "grad_norm": 0.0, - "learning_rate": 5.793433752552557e-07, - "loss": 0.7081, - "step": 31562 - }, - { - "epoch": 0.8944146901300689, - "grad_norm": 0.0, - "learning_rate": 5.790355622438293e-07, - "loss": 0.8076, - "step": 31563 - }, - { - "epoch": 0.8944430275723313, - "grad_norm": 0.0, - "learning_rate": 5.787278285886855e-07, - "loss": 0.8452, - "step": 31564 - }, - { - "epoch": 0.8944713650145938, - "grad_norm": 0.0, - "learning_rate": 5.784201742924145e-07, - "loss": 0.8459, - "step": 31565 - }, - { - "epoch": 0.8944997024568563, - "grad_norm": 0.0, - "learning_rate": 5.781125993576086e-07, - "loss": 0.948, - "step": 31566 - }, - { - "epoch": 0.8945280398991187, - "grad_norm": 0.0, - "learning_rate": 5.778051037868615e-07, - "loss": 0.8139, - "step": 31567 - }, - { - "epoch": 0.8945563773413812, - "grad_norm": 0.0, - "learning_rate": 5.774976875827587e-07, - "loss": 0.8278, - "step": 31568 - }, - { - "epoch": 0.8945847147836437, - "grad_norm": 0.0, - "learning_rate": 5.771903507478915e-07, - "loss": 0.8113, - "step": 31569 - }, - { - "epoch": 0.894613052225906, - "grad_norm": 0.0, - "learning_rate": 5.768830932848513e-07, - "loss": 0.747, - "step": 31570 - }, - { - "epoch": 0.8946413896681685, - "grad_norm": 0.0, - "learning_rate": 5.765759151962225e-07, - "loss": 0.832, - "step": 31571 - }, - { - "epoch": 0.894669727110431, - "grad_norm": 0.0, - "learning_rate": 5.762688164845931e-07, - "loss": 0.8179, - "step": 31572 - }, - { - "epoch": 0.8946980645526935, - "grad_norm": 0.0, - "learning_rate": 5.75961797152551e-07, - "loss": 0.7387, - "step": 31573 - }, - { - "epoch": 0.8947264019949559, - "grad_norm": 0.0, - "learning_rate": 5.756548572026832e-07, - "loss": 0.8141, - "step": 31574 - }, - { - "epoch": 0.8947547394372184, - "grad_norm": 0.0, - "learning_rate": 5.753479966375752e-07, - "loss": 0.8778, - "step": 31575 - }, - { - "epoch": 0.8947830768794809, - "grad_norm": 0.0, - "learning_rate": 5.750412154598095e-07, - "loss": 0.7376, - "step": 31576 - }, - { - "epoch": 0.8948114143217433, - "grad_norm": 0.0, - "learning_rate": 5.747345136719729e-07, - "loss": 0.7967, - "step": 31577 - }, - { - "epoch": 0.8948397517640058, - "grad_norm": 0.0, - "learning_rate": 5.744278912766454e-07, - "loss": 0.7607, - "step": 31578 - }, - { - "epoch": 0.8948680892062683, - "grad_norm": 0.0, - "learning_rate": 5.741213482764118e-07, - "loss": 0.7933, - "step": 31579 - }, - { - "epoch": 0.8948964266485308, - "grad_norm": 0.0, - "learning_rate": 5.738148846738568e-07, - "loss": 0.8162, - "step": 31580 - }, - { - "epoch": 0.8949247640907931, - "grad_norm": 0.0, - "learning_rate": 5.73508500471558e-07, - "loss": 0.7468, - "step": 31581 - }, - { - "epoch": 0.8949531015330556, - "grad_norm": 0.0, - "learning_rate": 5.732021956720968e-07, - "loss": 0.8817, - "step": 31582 - }, - { - "epoch": 0.8949814389753181, - "grad_norm": 0.0, - "learning_rate": 5.728959702780534e-07, - "loss": 0.888, - "step": 31583 - }, - { - "epoch": 0.8950097764175805, - "grad_norm": 0.0, - "learning_rate": 5.725898242920092e-07, - "loss": 0.7507, - "step": 31584 - }, - { - "epoch": 0.895038113859843, - "grad_norm": 0.0, - "learning_rate": 5.722837577165419e-07, - "loss": 0.8343, - "step": 31585 - }, - { - "epoch": 0.8950664513021055, - "grad_norm": 0.0, - "learning_rate": 5.719777705542296e-07, - "loss": 0.8501, - "step": 31586 - }, - { - "epoch": 0.895094788744368, - "grad_norm": 0.0, - "learning_rate": 5.716718628076479e-07, - "loss": 0.7241, - "step": 31587 - }, - { - "epoch": 0.8951231261866304, - "grad_norm": 0.0, - "learning_rate": 5.713660344793781e-07, - "loss": 0.733, - "step": 31588 - }, - { - "epoch": 0.8951514636288929, - "grad_norm": 0.0, - "learning_rate": 5.710602855719904e-07, - "loss": 0.7966, - "step": 31589 - }, - { - "epoch": 0.8951798010711554, - "grad_norm": 0.0, - "learning_rate": 5.707546160880651e-07, - "loss": 0.8014, - "step": 31590 - }, - { - "epoch": 0.8952081385134177, - "grad_norm": 0.0, - "learning_rate": 5.704490260301754e-07, - "loss": 0.7613, - "step": 31591 - }, - { - "epoch": 0.8952364759556802, - "grad_norm": 0.0, - "learning_rate": 5.701435154008939e-07, - "loss": 0.788, - "step": 31592 - }, - { - "epoch": 0.8952648133979427, - "grad_norm": 0.0, - "learning_rate": 5.698380842027962e-07, - "loss": 0.9317, - "step": 31593 - }, - { - "epoch": 0.8952931508402051, - "grad_norm": 0.0, - "learning_rate": 5.695327324384536e-07, - "loss": 0.7643, - "step": 31594 - }, - { - "epoch": 0.8953214882824676, - "grad_norm": 0.0, - "learning_rate": 5.692274601104387e-07, - "loss": 0.8464, - "step": 31595 - }, - { - "epoch": 0.8953498257247301, - "grad_norm": 0.0, - "learning_rate": 5.689222672213224e-07, - "loss": 0.8206, - "step": 31596 - }, - { - "epoch": 0.8953781631669926, - "grad_norm": 0.0, - "learning_rate": 5.686171537736762e-07, - "loss": 0.7732, - "step": 31597 - }, - { - "epoch": 0.895406500609255, - "grad_norm": 0.0, - "learning_rate": 5.683121197700714e-07, - "loss": 0.779, - "step": 31598 - }, - { - "epoch": 0.8954348380515175, - "grad_norm": 0.0, - "learning_rate": 5.680071652130736e-07, - "loss": 0.8395, - "step": 31599 - }, - { - "epoch": 0.89546317549378, - "grad_norm": 0.0, - "learning_rate": 5.677022901052553e-07, - "loss": 0.7096, - "step": 31600 - }, - { - "epoch": 0.8954915129360423, - "grad_norm": 0.0, - "learning_rate": 5.673974944491845e-07, - "loss": 0.7855, - "step": 31601 - }, - { - "epoch": 0.8955198503783048, - "grad_norm": 0.0, - "learning_rate": 5.670927782474256e-07, - "loss": 0.7406, - "step": 31602 - }, - { - "epoch": 0.8955481878205673, - "grad_norm": 0.0, - "learning_rate": 5.667881415025466e-07, - "loss": 0.7692, - "step": 31603 - }, - { - "epoch": 0.8955765252628298, - "grad_norm": 0.0, - "learning_rate": 5.664835842171157e-07, - "loss": 0.8419, - "step": 31604 - }, - { - "epoch": 0.8956048627050922, - "grad_norm": 0.0, - "learning_rate": 5.66179106393695e-07, - "loss": 0.7679, - "step": 31605 - }, - { - "epoch": 0.8956332001473547, - "grad_norm": 0.0, - "learning_rate": 5.658747080348525e-07, - "loss": 0.8662, - "step": 31606 - }, - { - "epoch": 0.8956615375896172, - "grad_norm": 0.0, - "learning_rate": 5.655703891431496e-07, - "loss": 0.7844, - "step": 31607 - }, - { - "epoch": 0.8956898750318796, - "grad_norm": 0.0, - "learning_rate": 5.652661497211509e-07, - "loss": 0.7797, - "step": 31608 - }, - { - "epoch": 0.8957182124741421, - "grad_norm": 0.0, - "learning_rate": 5.649619897714187e-07, - "loss": 0.8909, - "step": 31609 - }, - { - "epoch": 0.8957465499164046, - "grad_norm": 0.0, - "learning_rate": 5.646579092965143e-07, - "loss": 0.8684, - "step": 31610 - }, - { - "epoch": 0.8957748873586671, - "grad_norm": 0.0, - "learning_rate": 5.643539082990013e-07, - "loss": 0.8203, - "step": 31611 - }, - { - "epoch": 0.8958032248009294, - "grad_norm": 0.0, - "learning_rate": 5.640499867814397e-07, - "loss": 0.7673, - "step": 31612 - }, - { - "epoch": 0.8958315622431919, - "grad_norm": 0.0, - "learning_rate": 5.637461447463876e-07, - "loss": 0.8085, - "step": 31613 - }, - { - "epoch": 0.8958598996854544, - "grad_norm": 0.0, - "learning_rate": 5.634423821964074e-07, - "loss": 0.642, - "step": 31614 - }, - { - "epoch": 0.8958882371277168, - "grad_norm": 0.0, - "learning_rate": 5.631386991340559e-07, - "loss": 0.8372, - "step": 31615 - }, - { - "epoch": 0.8959165745699793, - "grad_norm": 0.0, - "learning_rate": 5.6283509556189e-07, - "loss": 0.7887, - "step": 31616 - }, - { - "epoch": 0.8959449120122418, - "grad_norm": 0.0, - "learning_rate": 5.625315714824708e-07, - "loss": 0.8642, - "step": 31617 - }, - { - "epoch": 0.8959732494545042, - "grad_norm": 0.0, - "learning_rate": 5.622281268983509e-07, - "loss": 0.924, - "step": 31618 - }, - { - "epoch": 0.8960015868967667, - "grad_norm": 0.0, - "learning_rate": 5.619247618120871e-07, - "loss": 0.7805, - "step": 31619 - }, - { - "epoch": 0.8960299243390292, - "grad_norm": 0.0, - "learning_rate": 5.616214762262384e-07, - "loss": 0.7609, - "step": 31620 - }, - { - "epoch": 0.8960582617812917, - "grad_norm": 0.0, - "learning_rate": 5.613182701433551e-07, - "loss": 0.742, - "step": 31621 - }, - { - "epoch": 0.896086599223554, - "grad_norm": 0.0, - "learning_rate": 5.610151435659939e-07, - "loss": 0.7765, - "step": 31622 - }, - { - "epoch": 0.8961149366658165, - "grad_norm": 0.0, - "learning_rate": 5.607120964967061e-07, - "loss": 0.8269, - "step": 31623 - }, - { - "epoch": 0.896143274108079, - "grad_norm": 0.0, - "learning_rate": 5.604091289380453e-07, - "loss": 0.7374, - "step": 31624 - }, - { - "epoch": 0.8961716115503414, - "grad_norm": 0.0, - "learning_rate": 5.601062408925662e-07, - "loss": 0.8635, - "step": 31625 - }, - { - "epoch": 0.8961999489926039, - "grad_norm": 0.0, - "learning_rate": 5.598034323628154e-07, - "loss": 0.8199, - "step": 31626 - }, - { - "epoch": 0.8962282864348664, - "grad_norm": 0.0, - "learning_rate": 5.595007033513478e-07, - "loss": 0.7488, - "step": 31627 - }, - { - "epoch": 0.8962566238771289, - "grad_norm": 0.0, - "learning_rate": 5.5919805386071e-07, - "loss": 0.7946, - "step": 31628 - }, - { - "epoch": 0.8962849613193913, - "grad_norm": 0.0, - "learning_rate": 5.588954838934523e-07, - "loss": 0.9534, - "step": 31629 - }, - { - "epoch": 0.8963132987616538, - "grad_norm": 0.0, - "learning_rate": 5.58592993452125e-07, - "loss": 0.8905, - "step": 31630 - }, - { - "epoch": 0.8963416362039163, - "grad_norm": 0.0, - "learning_rate": 5.582905825392737e-07, - "loss": 0.8035, - "step": 31631 - }, - { - "epoch": 0.8963699736461787, - "grad_norm": 0.0, - "learning_rate": 5.579882511574475e-07, - "loss": 0.7713, - "step": 31632 - }, - { - "epoch": 0.8963983110884411, - "grad_norm": 0.0, - "learning_rate": 5.576859993091932e-07, - "loss": 0.8414, - "step": 31633 - }, - { - "epoch": 0.8964266485307036, - "grad_norm": 0.0, - "learning_rate": 5.573838269970555e-07, - "loss": 0.7636, - "step": 31634 - }, - { - "epoch": 0.8964549859729661, - "grad_norm": 0.0, - "learning_rate": 5.570817342235791e-07, - "loss": 0.816, - "step": 31635 - }, - { - "epoch": 0.8964833234152285, - "grad_norm": 0.0, - "learning_rate": 5.567797209913106e-07, - "loss": 0.8476, - "step": 31636 - }, - { - "epoch": 0.896511660857491, - "grad_norm": 0.0, - "learning_rate": 5.564777873027927e-07, - "loss": 0.8768, - "step": 31637 - }, - { - "epoch": 0.8965399982997535, - "grad_norm": 0.0, - "learning_rate": 5.56175933160571e-07, - "loss": 0.803, - "step": 31638 - }, - { - "epoch": 0.8965683357420159, - "grad_norm": 0.0, - "learning_rate": 5.558741585671845e-07, - "loss": 0.7687, - "step": 31639 - }, - { - "epoch": 0.8965966731842784, - "grad_norm": 0.0, - "learning_rate": 5.555724635251769e-07, - "loss": 0.8719, - "step": 31640 - }, - { - "epoch": 0.8966250106265409, - "grad_norm": 0.0, - "learning_rate": 5.552708480370916e-07, - "loss": 0.6805, - "step": 31641 - }, - { - "epoch": 0.8966533480688033, - "grad_norm": 0.0, - "learning_rate": 5.549693121054656e-07, - "loss": 0.8032, - "step": 31642 - }, - { - "epoch": 0.8966816855110658, - "grad_norm": 0.0, - "learning_rate": 5.546678557328411e-07, - "loss": 0.7405, - "step": 31643 - }, - { - "epoch": 0.8967100229533282, - "grad_norm": 0.0, - "learning_rate": 5.543664789217562e-07, - "loss": 0.8011, - "step": 31644 - }, - { - "epoch": 0.8967383603955907, - "grad_norm": 0.0, - "learning_rate": 5.540651816747489e-07, - "loss": 0.7953, - "step": 31645 - }, - { - "epoch": 0.8967666978378531, - "grad_norm": 0.0, - "learning_rate": 5.537639639943604e-07, - "loss": 0.7415, - "step": 31646 - }, - { - "epoch": 0.8967950352801156, - "grad_norm": 0.0, - "learning_rate": 5.534628258831243e-07, - "loss": 0.6919, - "step": 31647 - }, - { - "epoch": 0.8968233727223781, - "grad_norm": 0.0, - "learning_rate": 5.531617673435785e-07, - "loss": 0.7636, - "step": 31648 - }, - { - "epoch": 0.8968517101646405, - "grad_norm": 0.0, - "learning_rate": 5.528607883782599e-07, - "loss": 0.8044, - "step": 31649 - }, - { - "epoch": 0.896880047606903, - "grad_norm": 0.0, - "learning_rate": 5.525598889897022e-07, - "loss": 0.7574, - "step": 31650 - }, - { - "epoch": 0.8969083850491655, - "grad_norm": 0.0, - "learning_rate": 5.522590691804419e-07, - "loss": 0.7488, - "step": 31651 - }, - { - "epoch": 0.896936722491428, - "grad_norm": 0.0, - "learning_rate": 5.519583289530106e-07, - "loss": 0.9018, - "step": 31652 - }, - { - "epoch": 0.8969650599336904, - "grad_norm": 0.0, - "learning_rate": 5.51657668309944e-07, - "loss": 0.7356, - "step": 31653 - }, - { - "epoch": 0.8969933973759529, - "grad_norm": 0.0, - "learning_rate": 5.513570872537732e-07, - "loss": 0.7782, - "step": 31654 - }, - { - "epoch": 0.8970217348182153, - "grad_norm": 0.0, - "learning_rate": 5.510565857870298e-07, - "loss": 0.8828, - "step": 31655 - }, - { - "epoch": 0.8970500722604777, - "grad_norm": 0.0, - "learning_rate": 5.507561639122461e-07, - "loss": 0.7116, - "step": 31656 - }, - { - "epoch": 0.8970784097027402, - "grad_norm": 0.0, - "learning_rate": 5.504558216319522e-07, - "loss": 0.9333, - "step": 31657 - }, - { - "epoch": 0.8971067471450027, - "grad_norm": 0.0, - "learning_rate": 5.501555589486762e-07, - "loss": 0.9272, - "step": 31658 - }, - { - "epoch": 0.8971350845872652, - "grad_norm": 0.0, - "learning_rate": 5.498553758649516e-07, - "loss": 0.8759, - "step": 31659 - }, - { - "epoch": 0.8971634220295276, - "grad_norm": 0.0, - "learning_rate": 5.495552723833031e-07, - "loss": 0.656, - "step": 31660 - }, - { - "epoch": 0.8971917594717901, - "grad_norm": 0.0, - "learning_rate": 5.492552485062585e-07, - "loss": 0.7589, - "step": 31661 - }, - { - "epoch": 0.8972200969140526, - "grad_norm": 0.0, - "learning_rate": 5.489553042363483e-07, - "loss": 0.7892, - "step": 31662 - }, - { - "epoch": 0.897248434356315, - "grad_norm": 0.0, - "learning_rate": 5.486554395760957e-07, - "loss": 0.7789, - "step": 31663 - }, - { - "epoch": 0.8972767717985775, - "grad_norm": 0.0, - "learning_rate": 5.483556545280299e-07, - "loss": 0.7397, - "step": 31664 - }, - { - "epoch": 0.89730510924084, - "grad_norm": 0.0, - "learning_rate": 5.480559490946724e-07, - "loss": 0.7799, - "step": 31665 - }, - { - "epoch": 0.8973334466831023, - "grad_norm": 0.0, - "learning_rate": 5.477563232785499e-07, - "loss": 0.7158, - "step": 31666 - }, - { - "epoch": 0.8973617841253648, - "grad_norm": 0.0, - "learning_rate": 5.47456777082187e-07, - "loss": 0.8356, - "step": 31667 - }, - { - "epoch": 0.8973901215676273, - "grad_norm": 0.0, - "learning_rate": 5.471573105081052e-07, - "loss": 0.8555, - "step": 31668 - }, - { - "epoch": 0.8974184590098898, - "grad_norm": 0.0, - "learning_rate": 5.468579235588268e-07, - "loss": 0.6931, - "step": 31669 - }, - { - "epoch": 0.8974467964521522, - "grad_norm": 0.0, - "learning_rate": 5.465586162368764e-07, - "loss": 0.7878, - "step": 31670 - }, - { - "epoch": 0.8974751338944147, - "grad_norm": 0.0, - "learning_rate": 5.46259388544772e-07, - "loss": 0.852, - "step": 31671 - }, - { - "epoch": 0.8975034713366772, - "grad_norm": 0.0, - "learning_rate": 5.459602404850362e-07, - "loss": 0.825, - "step": 31672 - }, - { - "epoch": 0.8975318087789396, - "grad_norm": 0.0, - "learning_rate": 5.456611720601868e-07, - "loss": 0.9182, - "step": 31673 - }, - { - "epoch": 0.8975601462212021, - "grad_norm": 0.0, - "learning_rate": 5.45362183272743e-07, - "loss": 0.7797, - "step": 31674 - }, - { - "epoch": 0.8975884836634646, - "grad_norm": 0.0, - "learning_rate": 5.450632741252259e-07, - "loss": 0.7679, - "step": 31675 - }, - { - "epoch": 0.897616821105727, - "grad_norm": 0.0, - "learning_rate": 5.447644446201516e-07, - "loss": 0.7933, - "step": 31676 - }, - { - "epoch": 0.8976451585479894, - "grad_norm": 0.0, - "learning_rate": 5.444656947600368e-07, - "loss": 0.7319, - "step": 31677 - }, - { - "epoch": 0.8976734959902519, - "grad_norm": 0.0, - "learning_rate": 5.441670245474007e-07, - "loss": 0.7894, - "step": 31678 - }, - { - "epoch": 0.8977018334325144, - "grad_norm": 0.0, - "learning_rate": 5.438684339847556e-07, - "loss": 0.8432, - "step": 31679 - }, - { - "epoch": 0.8977301708747768, - "grad_norm": 0.0, - "learning_rate": 5.435699230746194e-07, - "loss": 0.9022, - "step": 31680 - }, - { - "epoch": 0.8977585083170393, - "grad_norm": 0.0, - "learning_rate": 5.432714918195037e-07, - "loss": 0.8651, - "step": 31681 - }, - { - "epoch": 0.8977868457593018, - "grad_norm": 0.0, - "learning_rate": 5.42973140221924e-07, - "loss": 0.8564, - "step": 31682 - }, - { - "epoch": 0.8978151832015643, - "grad_norm": 0.0, - "learning_rate": 5.426748682843952e-07, - "loss": 0.8642, - "step": 31683 - }, - { - "epoch": 0.8978435206438267, - "grad_norm": 0.0, - "learning_rate": 5.423766760094262e-07, - "loss": 0.7845, - "step": 31684 - }, - { - "epoch": 0.8978718580860892, - "grad_norm": 0.0, - "learning_rate": 5.420785633995318e-07, - "loss": 0.7891, - "step": 31685 - }, - { - "epoch": 0.8979001955283517, - "grad_norm": 0.0, - "learning_rate": 5.417805304572199e-07, - "loss": 0.8749, - "step": 31686 - }, - { - "epoch": 0.897928532970614, - "grad_norm": 0.0, - "learning_rate": 5.414825771850041e-07, - "loss": 0.8202, - "step": 31687 - }, - { - "epoch": 0.8979568704128765, - "grad_norm": 0.0, - "learning_rate": 5.411847035853912e-07, - "loss": 0.8238, - "step": 31688 - }, - { - "epoch": 0.897985207855139, - "grad_norm": 0.0, - "learning_rate": 5.408869096608926e-07, - "loss": 0.8523, - "step": 31689 - }, - { - "epoch": 0.8980135452974014, - "grad_norm": 0.0, - "learning_rate": 5.405891954140175e-07, - "loss": 0.8349, - "step": 31690 - }, - { - "epoch": 0.8980418827396639, - "grad_norm": 0.0, - "learning_rate": 5.402915608472726e-07, - "loss": 0.9136, - "step": 31691 - }, - { - "epoch": 0.8980702201819264, - "grad_norm": 0.0, - "learning_rate": 5.399940059631625e-07, - "loss": 0.828, - "step": 31692 - }, - { - "epoch": 0.8980985576241889, - "grad_norm": 0.0, - "learning_rate": 5.396965307641977e-07, - "loss": 0.7942, - "step": 31693 - }, - { - "epoch": 0.8981268950664513, - "grad_norm": 0.0, - "learning_rate": 5.393991352528816e-07, - "loss": 0.7524, - "step": 31694 - }, - { - "epoch": 0.8981552325087138, - "grad_norm": 0.0, - "learning_rate": 5.391018194317188e-07, - "loss": 0.798, - "step": 31695 - }, - { - "epoch": 0.8981835699509763, - "grad_norm": 0.0, - "learning_rate": 5.388045833032152e-07, - "loss": 0.7985, - "step": 31696 - }, - { - "epoch": 0.8982119073932386, - "grad_norm": 0.0, - "learning_rate": 5.385074268698742e-07, - "loss": 0.8032, - "step": 31697 - }, - { - "epoch": 0.8982402448355011, - "grad_norm": 0.0, - "learning_rate": 5.382103501341973e-07, - "loss": 0.6627, - "step": 31698 - }, - { - "epoch": 0.8982685822777636, - "grad_norm": 0.0, - "learning_rate": 5.379133530986902e-07, - "loss": 0.827, - "step": 31699 - }, - { - "epoch": 0.8982969197200261, - "grad_norm": 0.0, - "learning_rate": 5.376164357658508e-07, - "loss": 0.8096, - "step": 31700 - }, - { - "epoch": 0.8983252571622885, - "grad_norm": 0.0, - "learning_rate": 5.373195981381817e-07, - "loss": 0.8359, - "step": 31701 - }, - { - "epoch": 0.898353594604551, - "grad_norm": 0.0, - "learning_rate": 5.370228402181843e-07, - "loss": 0.7946, - "step": 31702 - }, - { - "epoch": 0.8983819320468135, - "grad_norm": 0.0, - "learning_rate": 5.367261620083575e-07, - "loss": 0.8535, - "step": 31703 - }, - { - "epoch": 0.8984102694890759, - "grad_norm": 0.0, - "learning_rate": 5.364295635112016e-07, - "loss": 0.747, - "step": 31704 - }, - { - "epoch": 0.8984386069313384, - "grad_norm": 0.0, - "learning_rate": 5.361330447292123e-07, - "loss": 0.8538, - "step": 31705 - }, - { - "epoch": 0.8984669443736009, - "grad_norm": 0.0, - "learning_rate": 5.358366056648879e-07, - "loss": 0.9291, - "step": 31706 - }, - { - "epoch": 0.8984952818158632, - "grad_norm": 0.0, - "learning_rate": 5.355402463207282e-07, - "loss": 0.818, - "step": 31707 - }, - { - "epoch": 0.8985236192581257, - "grad_norm": 0.0, - "learning_rate": 5.352439666992259e-07, - "loss": 0.7641, - "step": 31708 - }, - { - "epoch": 0.8985519567003882, - "grad_norm": 0.0, - "learning_rate": 5.349477668028802e-07, - "loss": 0.7941, - "step": 31709 - }, - { - "epoch": 0.8985802941426507, - "grad_norm": 0.0, - "learning_rate": 5.346516466341834e-07, - "loss": 0.7796, - "step": 31710 - }, - { - "epoch": 0.8986086315849131, - "grad_norm": 0.0, - "learning_rate": 5.34355606195629e-07, - "loss": 0.7511, - "step": 31711 - }, - { - "epoch": 0.8986369690271756, - "grad_norm": 0.0, - "learning_rate": 5.340596454897151e-07, - "loss": 0.7751, - "step": 31712 - }, - { - "epoch": 0.8986653064694381, - "grad_norm": 0.0, - "learning_rate": 5.337637645189298e-07, - "loss": 0.8626, - "step": 31713 - }, - { - "epoch": 0.8986936439117005, - "grad_norm": 0.0, - "learning_rate": 5.334679632857675e-07, - "loss": 0.9059, - "step": 31714 - }, - { - "epoch": 0.898721981353963, - "grad_norm": 0.0, - "learning_rate": 5.331722417927199e-07, - "loss": 0.7479, - "step": 31715 - }, - { - "epoch": 0.8987503187962255, - "grad_norm": 0.0, - "learning_rate": 5.32876600042278e-07, - "loss": 0.8725, - "step": 31716 - }, - { - "epoch": 0.898778656238488, - "grad_norm": 0.0, - "learning_rate": 5.325810380369334e-07, - "loss": 0.8284, - "step": 31717 - }, - { - "epoch": 0.8988069936807503, - "grad_norm": 0.0, - "learning_rate": 5.322855557791729e-07, - "loss": 0.8645, - "step": 31718 - }, - { - "epoch": 0.8988353311230128, - "grad_norm": 0.0, - "learning_rate": 5.319901532714877e-07, - "loss": 0.8257, - "step": 31719 - }, - { - "epoch": 0.8988636685652753, - "grad_norm": 0.0, - "learning_rate": 5.31694830516366e-07, - "loss": 0.7077, - "step": 31720 - }, - { - "epoch": 0.8988920060075377, - "grad_norm": 0.0, - "learning_rate": 5.313995875162925e-07, - "loss": 0.7507, - "step": 31721 - }, - { - "epoch": 0.8989203434498002, - "grad_norm": 0.0, - "learning_rate": 5.311044242737595e-07, - "loss": 0.8809, - "step": 31722 - }, - { - "epoch": 0.8989486808920627, - "grad_norm": 0.0, - "learning_rate": 5.308093407912473e-07, - "loss": 0.772, - "step": 31723 - }, - { - "epoch": 0.8989770183343252, - "grad_norm": 0.0, - "learning_rate": 5.305143370712451e-07, - "loss": 0.8176, - "step": 31724 - }, - { - "epoch": 0.8990053557765876, - "grad_norm": 0.0, - "learning_rate": 5.302194131162386e-07, - "loss": 0.8238, - "step": 31725 - }, - { - "epoch": 0.8990336932188501, - "grad_norm": 0.0, - "learning_rate": 5.299245689287081e-07, - "loss": 0.8101, - "step": 31726 - }, - { - "epoch": 0.8990620306611126, - "grad_norm": 0.0, - "learning_rate": 5.296298045111403e-07, - "loss": 0.7568, - "step": 31727 - }, - { - "epoch": 0.899090368103375, - "grad_norm": 0.0, - "learning_rate": 5.293351198660168e-07, - "loss": 0.8203, - "step": 31728 - }, - { - "epoch": 0.8991187055456374, - "grad_norm": 0.0, - "learning_rate": 5.290405149958211e-07, - "loss": 0.8191, - "step": 31729 - }, - { - "epoch": 0.8991470429878999, - "grad_norm": 0.0, - "learning_rate": 5.287459899030356e-07, - "loss": 0.7673, - "step": 31730 - }, - { - "epoch": 0.8991753804301623, - "grad_norm": 0.0, - "learning_rate": 5.284515445901383e-07, - "loss": 0.85, - "step": 31731 - }, - { - "epoch": 0.8992037178724248, - "grad_norm": 0.0, - "learning_rate": 5.281571790596096e-07, - "loss": 0.8359, - "step": 31732 - }, - { - "epoch": 0.8992320553146873, - "grad_norm": 0.0, - "learning_rate": 5.278628933139329e-07, - "loss": 0.7125, - "step": 31733 - }, - { - "epoch": 0.8992603927569498, - "grad_norm": 0.0, - "learning_rate": 5.27568687355583e-07, - "loss": 0.7692, - "step": 31734 - }, - { - "epoch": 0.8992887301992122, - "grad_norm": 0.0, - "learning_rate": 5.272745611870389e-07, - "loss": 0.9003, - "step": 31735 - }, - { - "epoch": 0.8993170676414747, - "grad_norm": 0.0, - "learning_rate": 5.269805148107809e-07, - "loss": 0.8728, - "step": 31736 - }, - { - "epoch": 0.8993454050837372, - "grad_norm": 0.0, - "learning_rate": 5.266865482292815e-07, - "loss": 0.7597, - "step": 31737 - }, - { - "epoch": 0.8993737425259996, - "grad_norm": 0.0, - "learning_rate": 5.26392661445021e-07, - "loss": 0.773, - "step": 31738 - }, - { - "epoch": 0.899402079968262, - "grad_norm": 0.0, - "learning_rate": 5.260988544604717e-07, - "loss": 0.8646, - "step": 31739 - }, - { - "epoch": 0.8994304174105245, - "grad_norm": 0.0, - "learning_rate": 5.258051272781095e-07, - "loss": 0.7986, - "step": 31740 - }, - { - "epoch": 0.899458754852787, - "grad_norm": 0.0, - "learning_rate": 5.255114799004091e-07, - "loss": 0.6973, - "step": 31741 - }, - { - "epoch": 0.8994870922950494, - "grad_norm": 0.0, - "learning_rate": 5.25217912329844e-07, - "loss": 0.7441, - "step": 31742 - }, - { - "epoch": 0.8995154297373119, - "grad_norm": 0.0, - "learning_rate": 5.249244245688878e-07, - "loss": 0.9146, - "step": 31743 - }, - { - "epoch": 0.8995437671795744, - "grad_norm": 0.0, - "learning_rate": 5.246310166200108e-07, - "loss": 0.8036, - "step": 31744 - }, - { - "epoch": 0.8995721046218368, - "grad_norm": 0.0, - "learning_rate": 5.243376884856854e-07, - "loss": 0.8766, - "step": 31745 - }, - { - "epoch": 0.8996004420640993, - "grad_norm": 0.0, - "learning_rate": 5.240444401683831e-07, - "loss": 0.7684, - "step": 31746 - }, - { - "epoch": 0.8996287795063618, - "grad_norm": 0.0, - "learning_rate": 5.237512716705718e-07, - "loss": 0.7995, - "step": 31747 - }, - { - "epoch": 0.8996571169486243, - "grad_norm": 0.0, - "learning_rate": 5.234581829947227e-07, - "loss": 0.8084, - "step": 31748 - }, - { - "epoch": 0.8996854543908867, - "grad_norm": 0.0, - "learning_rate": 5.231651741433063e-07, - "loss": 0.8743, - "step": 31749 - }, - { - "epoch": 0.8997137918331491, - "grad_norm": 0.0, - "learning_rate": 5.228722451187862e-07, - "loss": 0.7907, - "step": 31750 - }, - { - "epoch": 0.8997421292754116, - "grad_norm": 0.0, - "learning_rate": 5.225793959236347e-07, - "loss": 0.8579, - "step": 31751 - }, - { - "epoch": 0.899770466717674, - "grad_norm": 0.0, - "learning_rate": 5.222866265603155e-07, - "loss": 0.8689, - "step": 31752 - }, - { - "epoch": 0.8997988041599365, - "grad_norm": 0.0, - "learning_rate": 5.219939370312943e-07, - "loss": 0.8618, - "step": 31753 - }, - { - "epoch": 0.899827141602199, - "grad_norm": 0.0, - "learning_rate": 5.217013273390381e-07, - "loss": 0.728, - "step": 31754 - }, - { - "epoch": 0.8998554790444614, - "grad_norm": 0.0, - "learning_rate": 5.214087974860116e-07, - "loss": 0.883, - "step": 31755 - }, - { - "epoch": 0.8998838164867239, - "grad_norm": 0.0, - "learning_rate": 5.211163474746783e-07, - "loss": 0.8499, - "step": 31756 - }, - { - "epoch": 0.8999121539289864, - "grad_norm": 0.0, - "learning_rate": 5.20823977307503e-07, - "loss": 0.7754, - "step": 31757 - }, - { - "epoch": 0.8999404913712489, - "grad_norm": 0.0, - "learning_rate": 5.205316869869459e-07, - "loss": 0.8001, - "step": 31758 - }, - { - "epoch": 0.8999688288135113, - "grad_norm": 0.0, - "learning_rate": 5.202394765154728e-07, - "loss": 0.91, - "step": 31759 - }, - { - "epoch": 0.8999971662557738, - "grad_norm": 0.0, - "learning_rate": 5.199473458955406e-07, - "loss": 0.7285, - "step": 31760 - }, - { - "epoch": 0.9000255036980362, - "grad_norm": 0.0, - "learning_rate": 5.19655295129613e-07, - "loss": 0.7839, - "step": 31761 - }, - { - "epoch": 0.9000538411402986, - "grad_norm": 0.0, - "learning_rate": 5.193633242201501e-07, - "loss": 0.7748, - "step": 31762 - }, - { - "epoch": 0.9000821785825611, - "grad_norm": 0.0, - "learning_rate": 5.19071433169609e-07, - "loss": 0.7142, - "step": 31763 - }, - { - "epoch": 0.9001105160248236, - "grad_norm": 0.0, - "learning_rate": 5.187796219804508e-07, - "loss": 0.9089, - "step": 31764 - }, - { - "epoch": 0.9001388534670861, - "grad_norm": 0.0, - "learning_rate": 5.184878906551328e-07, - "loss": 0.9005, - "step": 31765 - }, - { - "epoch": 0.9001671909093485, - "grad_norm": 0.0, - "learning_rate": 5.181962391961115e-07, - "loss": 0.7361, - "step": 31766 - }, - { - "epoch": 0.900195528351611, - "grad_norm": 0.0, - "learning_rate": 5.179046676058442e-07, - "loss": 0.7975, - "step": 31767 - }, - { - "epoch": 0.9002238657938735, - "grad_norm": 0.0, - "learning_rate": 5.176131758867864e-07, - "loss": 0.8371, - "step": 31768 - }, - { - "epoch": 0.9002522032361359, - "grad_norm": 0.0, - "learning_rate": 5.173217640413942e-07, - "loss": 0.8061, - "step": 31769 - }, - { - "epoch": 0.9002805406783984, - "grad_norm": 0.0, - "learning_rate": 5.170304320721243e-07, - "loss": 0.8337, - "step": 31770 - }, - { - "epoch": 0.9003088781206608, - "grad_norm": 0.0, - "learning_rate": 5.167391799814258e-07, - "loss": 0.7373, - "step": 31771 - }, - { - "epoch": 0.9003372155629233, - "grad_norm": 0.0, - "learning_rate": 5.16448007771757e-07, - "loss": 0.7629, - "step": 31772 - }, - { - "epoch": 0.9003655530051857, - "grad_norm": 0.0, - "learning_rate": 5.16156915445567e-07, - "loss": 0.753, - "step": 31773 - }, - { - "epoch": 0.9003938904474482, - "grad_norm": 0.0, - "learning_rate": 5.158659030053081e-07, - "loss": 0.8122, - "step": 31774 - }, - { - "epoch": 0.9004222278897107, - "grad_norm": 0.0, - "learning_rate": 5.155749704534352e-07, - "loss": 0.752, - "step": 31775 - }, - { - "epoch": 0.9004505653319731, - "grad_norm": 0.0, - "learning_rate": 5.15284117792394e-07, - "loss": 0.7221, - "step": 31776 - }, - { - "epoch": 0.9004789027742356, - "grad_norm": 0.0, - "learning_rate": 5.14993345024637e-07, - "loss": 0.7958, - "step": 31777 - }, - { - "epoch": 0.9005072402164981, - "grad_norm": 0.0, - "learning_rate": 5.147026521526144e-07, - "loss": 0.8219, - "step": 31778 - }, - { - "epoch": 0.9005355776587605, - "grad_norm": 0.0, - "learning_rate": 5.144120391787732e-07, - "loss": 0.7698, - "step": 31779 - }, - { - "epoch": 0.900563915101023, - "grad_norm": 0.0, - "learning_rate": 5.141215061055605e-07, - "loss": 0.8995, - "step": 31780 - }, - { - "epoch": 0.9005922525432855, - "grad_norm": 0.0, - "learning_rate": 5.138310529354251e-07, - "loss": 0.7993, - "step": 31781 - }, - { - "epoch": 0.900620589985548, - "grad_norm": 0.0, - "learning_rate": 5.135406796708142e-07, - "loss": 0.8121, - "step": 31782 - }, - { - "epoch": 0.9006489274278103, - "grad_norm": 0.0, - "learning_rate": 5.132503863141736e-07, - "loss": 0.7561, - "step": 31783 - }, - { - "epoch": 0.9006772648700728, - "grad_norm": 0.0, - "learning_rate": 5.129601728679468e-07, - "loss": 0.8269, - "step": 31784 - }, - { - "epoch": 0.9007056023123353, - "grad_norm": 0.0, - "learning_rate": 5.126700393345785e-07, - "loss": 0.8791, - "step": 31785 - }, - { - "epoch": 0.9007339397545977, - "grad_norm": 0.0, - "learning_rate": 5.123799857165157e-07, - "loss": 0.811, - "step": 31786 - }, - { - "epoch": 0.9007622771968602, - "grad_norm": 0.0, - "learning_rate": 5.120900120161976e-07, - "loss": 0.8053, - "step": 31787 - }, - { - "epoch": 0.9007906146391227, - "grad_norm": 0.0, - "learning_rate": 5.118001182360699e-07, - "loss": 0.7497, - "step": 31788 - }, - { - "epoch": 0.9008189520813852, - "grad_norm": 0.0, - "learning_rate": 5.115103043785718e-07, - "loss": 0.8065, - "step": 31789 - }, - { - "epoch": 0.9008472895236476, - "grad_norm": 0.0, - "learning_rate": 5.11220570446147e-07, - "loss": 0.8781, - "step": 31790 - }, - { - "epoch": 0.9008756269659101, - "grad_norm": 0.0, - "learning_rate": 5.109309164412346e-07, - "loss": 0.8149, - "step": 31791 - }, - { - "epoch": 0.9009039644081726, - "grad_norm": 0.0, - "learning_rate": 5.106413423662748e-07, - "loss": 0.6713, - "step": 31792 - }, - { - "epoch": 0.9009323018504349, - "grad_norm": 0.0, - "learning_rate": 5.103518482237058e-07, - "loss": 0.7746, - "step": 31793 - }, - { - "epoch": 0.9009606392926974, - "grad_norm": 0.0, - "learning_rate": 5.100624340159676e-07, - "loss": 0.7763, - "step": 31794 - }, - { - "epoch": 0.9009889767349599, - "grad_norm": 0.0, - "learning_rate": 5.097730997454975e-07, - "loss": 0.75, - "step": 31795 - }, - { - "epoch": 0.9010173141772224, - "grad_norm": 0.0, - "learning_rate": 5.094838454147333e-07, - "loss": 0.8022, - "step": 31796 - }, - { - "epoch": 0.9010456516194848, - "grad_norm": 0.0, - "learning_rate": 5.091946710261108e-07, - "loss": 0.8108, - "step": 31797 - }, - { - "epoch": 0.9010739890617473, - "grad_norm": 0.0, - "learning_rate": 5.08905576582065e-07, - "loss": 0.7548, - "step": 31798 - }, - { - "epoch": 0.9011023265040098, - "grad_norm": 0.0, - "learning_rate": 5.086165620850337e-07, - "loss": 0.829, - "step": 31799 - }, - { - "epoch": 0.9011306639462722, - "grad_norm": 0.0, - "learning_rate": 5.083276275374482e-07, - "loss": 0.7727, - "step": 31800 - }, - { - "epoch": 0.9011590013885347, - "grad_norm": 0.0, - "learning_rate": 5.080387729417447e-07, - "loss": 0.847, - "step": 31801 - }, - { - "epoch": 0.9011873388307972, - "grad_norm": 0.0, - "learning_rate": 5.077499983003542e-07, - "loss": 0.887, - "step": 31802 - }, - { - "epoch": 0.9012156762730595, - "grad_norm": 0.0, - "learning_rate": 5.074613036157105e-07, - "loss": 0.7928, - "step": 31803 - }, - { - "epoch": 0.901244013715322, - "grad_norm": 0.0, - "learning_rate": 5.071726888902451e-07, - "loss": 0.8119, - "step": 31804 - }, - { - "epoch": 0.9012723511575845, - "grad_norm": 0.0, - "learning_rate": 5.068841541263892e-07, - "loss": 0.8601, - "step": 31805 - }, - { - "epoch": 0.901300688599847, - "grad_norm": 0.0, - "learning_rate": 5.065956993265742e-07, - "loss": 0.8662, - "step": 31806 - }, - { - "epoch": 0.9013290260421094, - "grad_norm": 0.0, - "learning_rate": 5.063073244932293e-07, - "loss": 0.795, - "step": 31807 - }, - { - "epoch": 0.9013573634843719, - "grad_norm": 0.0, - "learning_rate": 5.060190296287815e-07, - "loss": 0.7637, - "step": 31808 - }, - { - "epoch": 0.9013857009266344, - "grad_norm": 0.0, - "learning_rate": 5.057308147356632e-07, - "loss": 0.8197, - "step": 31809 - }, - { - "epoch": 0.9014140383688968, - "grad_norm": 0.0, - "learning_rate": 5.054426798162971e-07, - "loss": 0.8104, - "step": 31810 - }, - { - "epoch": 0.9014423758111593, - "grad_norm": 0.0, - "learning_rate": 5.051546248731142e-07, - "loss": 0.8559, - "step": 31811 - }, - { - "epoch": 0.9014707132534218, - "grad_norm": 0.0, - "learning_rate": 5.048666499085408e-07, - "loss": 0.8274, - "step": 31812 - }, - { - "epoch": 0.9014990506956843, - "grad_norm": 0.0, - "learning_rate": 5.045787549250003e-07, - "loss": 0.8191, - "step": 31813 - }, - { - "epoch": 0.9015273881379466, - "grad_norm": 0.0, - "learning_rate": 5.042909399249185e-07, - "loss": 0.7788, - "step": 31814 - }, - { - "epoch": 0.9015557255802091, - "grad_norm": 0.0, - "learning_rate": 5.040032049107225e-07, - "loss": 0.8405, - "step": 31815 - }, - { - "epoch": 0.9015840630224716, - "grad_norm": 0.0, - "learning_rate": 5.037155498848323e-07, - "loss": 0.785, - "step": 31816 - }, - { - "epoch": 0.901612400464734, - "grad_norm": 0.0, - "learning_rate": 5.034279748496717e-07, - "loss": 0.8163, - "step": 31817 - }, - { - "epoch": 0.9016407379069965, - "grad_norm": 0.0, - "learning_rate": 5.031404798076644e-07, - "loss": 0.8025, - "step": 31818 - }, - { - "epoch": 0.901669075349259, - "grad_norm": 0.0, - "learning_rate": 5.028530647612306e-07, - "loss": 0.7512, - "step": 31819 - }, - { - "epoch": 0.9016974127915215, - "grad_norm": 0.0, - "learning_rate": 5.025657297127939e-07, - "loss": 0.8669, - "step": 31820 - }, - { - "epoch": 0.9017257502337839, - "grad_norm": 0.0, - "learning_rate": 5.022784746647714e-07, - "loss": 0.8478, - "step": 31821 - }, - { - "epoch": 0.9017540876760464, - "grad_norm": 0.0, - "learning_rate": 5.019912996195852e-07, - "loss": 0.8234, - "step": 31822 - }, - { - "epoch": 0.9017824251183089, - "grad_norm": 0.0, - "learning_rate": 5.017042045796527e-07, - "loss": 0.6664, - "step": 31823 - }, - { - "epoch": 0.9018107625605712, - "grad_norm": 0.0, - "learning_rate": 5.014171895473929e-07, - "loss": 0.8139, - "step": 31824 - }, - { - "epoch": 0.9018391000028337, - "grad_norm": 0.0, - "learning_rate": 5.011302545252239e-07, - "loss": 0.7974, - "step": 31825 - }, - { - "epoch": 0.9018674374450962, - "grad_norm": 0.0, - "learning_rate": 5.008433995155615e-07, - "loss": 0.8355, - "step": 31826 - }, - { - "epoch": 0.9018957748873586, - "grad_norm": 0.0, - "learning_rate": 5.005566245208215e-07, - "loss": 0.6943, - "step": 31827 - }, - { - "epoch": 0.9019241123296211, - "grad_norm": 0.0, - "learning_rate": 5.002699295434233e-07, - "loss": 0.7581, - "step": 31828 - }, - { - "epoch": 0.9019524497718836, - "grad_norm": 0.0, - "learning_rate": 4.999833145857769e-07, - "loss": 0.8743, - "step": 31829 - }, - { - "epoch": 0.9019807872141461, - "grad_norm": 0.0, - "learning_rate": 4.996967796502982e-07, - "loss": 0.7561, - "step": 31830 - }, - { - "epoch": 0.9020091246564085, - "grad_norm": 0.0, - "learning_rate": 4.994103247394022e-07, - "loss": 0.8122, - "step": 31831 - }, - { - "epoch": 0.902037462098671, - "grad_norm": 0.0, - "learning_rate": 4.991239498555011e-07, - "loss": 0.8265, - "step": 31832 - }, - { - "epoch": 0.9020657995409335, - "grad_norm": 0.0, - "learning_rate": 4.988376550010088e-07, - "loss": 0.7117, - "step": 31833 - }, - { - "epoch": 0.9020941369831958, - "grad_norm": 0.0, - "learning_rate": 4.985514401783332e-07, - "loss": 0.7349, - "step": 31834 - }, - { - "epoch": 0.9021224744254583, - "grad_norm": 0.0, - "learning_rate": 4.982653053898867e-07, - "loss": 0.8302, - "step": 31835 - }, - { - "epoch": 0.9021508118677208, - "grad_norm": 0.0, - "learning_rate": 4.97979250638082e-07, - "loss": 0.7267, - "step": 31836 - }, - { - "epoch": 0.9021791493099833, - "grad_norm": 0.0, - "learning_rate": 4.976932759253239e-07, - "loss": 0.8126, - "step": 31837 - }, - { - "epoch": 0.9022074867522457, - "grad_norm": 0.0, - "learning_rate": 4.97407381254027e-07, - "loss": 0.8011, - "step": 31838 - }, - { - "epoch": 0.9022358241945082, - "grad_norm": 0.0, - "learning_rate": 4.971215666265939e-07, - "loss": 0.8203, - "step": 31839 - }, - { - "epoch": 0.9022641616367707, - "grad_norm": 0.0, - "learning_rate": 4.968358320454348e-07, - "loss": 0.826, - "step": 31840 - }, - { - "epoch": 0.9022924990790331, - "grad_norm": 0.0, - "learning_rate": 4.965501775129578e-07, - "loss": 0.8031, - "step": 31841 - }, - { - "epoch": 0.9023208365212956, - "grad_norm": 0.0, - "learning_rate": 4.962646030315665e-07, - "loss": 0.7842, - "step": 31842 - }, - { - "epoch": 0.9023491739635581, - "grad_norm": 0.0, - "learning_rate": 4.95979108603668e-07, - "loss": 0.8431, - "step": 31843 - }, - { - "epoch": 0.9023775114058206, - "grad_norm": 0.0, - "learning_rate": 4.956936942316659e-07, - "loss": 0.8124, - "step": 31844 - }, - { - "epoch": 0.902405848848083, - "grad_norm": 0.0, - "learning_rate": 4.95408359917966e-07, - "loss": 0.8533, - "step": 31845 - }, - { - "epoch": 0.9024341862903454, - "grad_norm": 0.0, - "learning_rate": 4.951231056649719e-07, - "loss": 0.8383, - "step": 31846 - }, - { - "epoch": 0.9024625237326079, - "grad_norm": 0.0, - "learning_rate": 4.94837931475084e-07, - "loss": 0.7717, - "step": 31847 - }, - { - "epoch": 0.9024908611748703, - "grad_norm": 0.0, - "learning_rate": 4.945528373507059e-07, - "loss": 0.8251, - "step": 31848 - }, - { - "epoch": 0.9025191986171328, - "grad_norm": 0.0, - "learning_rate": 4.942678232942399e-07, - "loss": 0.7634, - "step": 31849 - }, - { - "epoch": 0.9025475360593953, - "grad_norm": 0.0, - "learning_rate": 4.939828893080844e-07, - "loss": 0.794, - "step": 31850 - }, - { - "epoch": 0.9025758735016577, - "grad_norm": 0.0, - "learning_rate": 4.93698035394643e-07, - "loss": 0.8164, - "step": 31851 - }, - { - "epoch": 0.9026042109439202, - "grad_norm": 0.0, - "learning_rate": 4.934132615563125e-07, - "loss": 0.87, - "step": 31852 - }, - { - "epoch": 0.9026325483861827, - "grad_norm": 0.0, - "learning_rate": 4.931285677954912e-07, - "loss": 0.7803, - "step": 31853 - }, - { - "epoch": 0.9026608858284452, - "grad_norm": 0.0, - "learning_rate": 4.928439541145802e-07, - "loss": 0.821, - "step": 31854 - }, - { - "epoch": 0.9026892232707076, - "grad_norm": 0.0, - "learning_rate": 4.925594205159734e-07, - "loss": 0.6557, - "step": 31855 - }, - { - "epoch": 0.90271756071297, - "grad_norm": 0.0, - "learning_rate": 4.922749670020687e-07, - "loss": 0.8055, - "step": 31856 - }, - { - "epoch": 0.9027458981552325, - "grad_norm": 0.0, - "learning_rate": 4.919905935752622e-07, - "loss": 0.8243, - "step": 31857 - }, - { - "epoch": 0.9027742355974949, - "grad_norm": 0.0, - "learning_rate": 4.917063002379507e-07, - "loss": 0.8531, - "step": 31858 - }, - { - "epoch": 0.9028025730397574, - "grad_norm": 0.0, - "learning_rate": 4.91422086992529e-07, - "loss": 0.9068, - "step": 31859 - }, - { - "epoch": 0.9028309104820199, - "grad_norm": 0.0, - "learning_rate": 4.911379538413885e-07, - "loss": 0.7777, - "step": 31860 - }, - { - "epoch": 0.9028592479242824, - "grad_norm": 0.0, - "learning_rate": 4.90853900786924e-07, - "loss": 0.7461, - "step": 31861 - }, - { - "epoch": 0.9028875853665448, - "grad_norm": 0.0, - "learning_rate": 4.90569927831529e-07, - "loss": 0.821, - "step": 31862 - }, - { - "epoch": 0.9029159228088073, - "grad_norm": 0.0, - "learning_rate": 4.90286034977594e-07, - "loss": 0.8683, - "step": 31863 - }, - { - "epoch": 0.9029442602510698, - "grad_norm": 0.0, - "learning_rate": 4.900022222275113e-07, - "loss": 0.8549, - "step": 31864 - }, - { - "epoch": 0.9029725976933322, - "grad_norm": 0.0, - "learning_rate": 4.897184895836726e-07, - "loss": 0.7164, - "step": 31865 - }, - { - "epoch": 0.9030009351355947, - "grad_norm": 0.0, - "learning_rate": 4.894348370484648e-07, - "loss": 0.7466, - "step": 31866 - }, - { - "epoch": 0.9030292725778571, - "grad_norm": 0.0, - "learning_rate": 4.891512646242802e-07, - "loss": 0.7949, - "step": 31867 - }, - { - "epoch": 0.9030576100201196, - "grad_norm": 0.0, - "learning_rate": 4.88867772313506e-07, - "loss": 0.7359, - "step": 31868 - }, - { - "epoch": 0.903085947462382, - "grad_norm": 0.0, - "learning_rate": 4.885843601185291e-07, - "loss": 0.7622, - "step": 31869 - }, - { - "epoch": 0.9031142849046445, - "grad_norm": 0.0, - "learning_rate": 4.883010280417389e-07, - "loss": 0.7971, - "step": 31870 - }, - { - "epoch": 0.903142622346907, - "grad_norm": 0.0, - "learning_rate": 4.880177760855209e-07, - "loss": 0.7892, - "step": 31871 - }, - { - "epoch": 0.9031709597891694, - "grad_norm": 0.0, - "learning_rate": 4.877346042522624e-07, - "loss": 0.8986, - "step": 31872 - }, - { - "epoch": 0.9031992972314319, - "grad_norm": 0.0, - "learning_rate": 4.87451512544348e-07, - "loss": 0.9154, - "step": 31873 - }, - { - "epoch": 0.9032276346736944, - "grad_norm": 0.0, - "learning_rate": 4.871685009641603e-07, - "loss": 0.7377, - "step": 31874 - }, - { - "epoch": 0.9032559721159568, - "grad_norm": 0.0, - "learning_rate": 4.868855695140861e-07, - "loss": 0.7409, - "step": 31875 - }, - { - "epoch": 0.9032843095582193, - "grad_norm": 0.0, - "learning_rate": 4.866027181965071e-07, - "loss": 0.8312, - "step": 31876 - }, - { - "epoch": 0.9033126470004817, - "grad_norm": 0.0, - "learning_rate": 4.863199470138058e-07, - "loss": 0.8923, - "step": 31877 - }, - { - "epoch": 0.9033409844427442, - "grad_norm": 0.0, - "learning_rate": 4.860372559683646e-07, - "loss": 0.7953, - "step": 31878 - }, - { - "epoch": 0.9033693218850066, - "grad_norm": 0.0, - "learning_rate": 4.85754645062565e-07, - "loss": 0.8275, - "step": 31879 - }, - { - "epoch": 0.9033976593272691, - "grad_norm": 0.0, - "learning_rate": 4.854721142987873e-07, - "loss": 0.8341, - "step": 31880 - }, - { - "epoch": 0.9034259967695316, - "grad_norm": 0.0, - "learning_rate": 4.851896636794096e-07, - "loss": 0.7327, - "step": 31881 - }, - { - "epoch": 0.903454334211794, - "grad_norm": 0.0, - "learning_rate": 4.849072932068133e-07, - "loss": 0.8018, - "step": 31882 - }, - { - "epoch": 0.9034826716540565, - "grad_norm": 0.0, - "learning_rate": 4.846250028833755e-07, - "loss": 0.8229, - "step": 31883 - }, - { - "epoch": 0.903511009096319, - "grad_norm": 0.0, - "learning_rate": 4.843427927114752e-07, - "loss": 0.825, - "step": 31884 - }, - { - "epoch": 0.9035393465385815, - "grad_norm": 0.0, - "learning_rate": 4.840606626934885e-07, - "loss": 0.761, - "step": 31885 - }, - { - "epoch": 0.9035676839808439, - "grad_norm": 0.0, - "learning_rate": 4.837786128317945e-07, - "loss": 0.7447, - "step": 31886 - }, - { - "epoch": 0.9035960214231064, - "grad_norm": 0.0, - "learning_rate": 4.834966431287657e-07, - "loss": 1.035, - "step": 31887 - }, - { - "epoch": 0.9036243588653688, - "grad_norm": 0.0, - "learning_rate": 4.832147535867792e-07, - "loss": 0.7627, - "step": 31888 - }, - { - "epoch": 0.9036526963076312, - "grad_norm": 0.0, - "learning_rate": 4.829329442082076e-07, - "loss": 0.8073, - "step": 31889 - }, - { - "epoch": 0.9036810337498937, - "grad_norm": 0.0, - "learning_rate": 4.826512149954266e-07, - "loss": 0.7329, - "step": 31890 - }, - { - "epoch": 0.9037093711921562, - "grad_norm": 0.0, - "learning_rate": 4.823695659508099e-07, - "loss": 0.8093, - "step": 31891 - }, - { - "epoch": 0.9037377086344187, - "grad_norm": 0.0, - "learning_rate": 4.820879970767267e-07, - "loss": 0.8333, - "step": 31892 - }, - { - "epoch": 0.9037660460766811, - "grad_norm": 0.0, - "learning_rate": 4.818065083755508e-07, - "loss": 0.7162, - "step": 31893 - }, - { - "epoch": 0.9037943835189436, - "grad_norm": 0.0, - "learning_rate": 4.815250998496546e-07, - "loss": 0.8489, - "step": 31894 - }, - { - "epoch": 0.9038227209612061, - "grad_norm": 0.0, - "learning_rate": 4.812437715014062e-07, - "loss": 0.7647, - "step": 31895 - }, - { - "epoch": 0.9038510584034685, - "grad_norm": 0.0, - "learning_rate": 4.809625233331749e-07, - "loss": 0.81, - "step": 31896 - }, - { - "epoch": 0.903879395845731, - "grad_norm": 0.0, - "learning_rate": 4.806813553473321e-07, - "loss": 0.8438, - "step": 31897 - }, - { - "epoch": 0.9039077332879935, - "grad_norm": 0.0, - "learning_rate": 4.804002675462449e-07, - "loss": 0.7528, - "step": 31898 - }, - { - "epoch": 0.9039360707302558, - "grad_norm": 0.0, - "learning_rate": 4.801192599322835e-07, - "loss": 0.6756, - "step": 31899 - }, - { - "epoch": 0.9039644081725183, - "grad_norm": 0.0, - "learning_rate": 4.798383325078104e-07, - "loss": 0.7947, - "step": 31900 - }, - { - "epoch": 0.9039927456147808, - "grad_norm": 0.0, - "learning_rate": 4.795574852751939e-07, - "loss": 0.7702, - "step": 31901 - }, - { - "epoch": 0.9040210830570433, - "grad_norm": 0.0, - "learning_rate": 4.792767182368019e-07, - "loss": 0.8778, - "step": 31902 - }, - { - "epoch": 0.9040494204993057, - "grad_norm": 0.0, - "learning_rate": 4.789960313949959e-07, - "loss": 0.802, - "step": 31903 - }, - { - "epoch": 0.9040777579415682, - "grad_norm": 0.0, - "learning_rate": 4.787154247521442e-07, - "loss": 0.8044, - "step": 31904 - }, - { - "epoch": 0.9041060953838307, - "grad_norm": 0.0, - "learning_rate": 4.784348983106057e-07, - "loss": 0.8793, - "step": 31905 - }, - { - "epoch": 0.9041344328260931, - "grad_norm": 0.0, - "learning_rate": 4.781544520727466e-07, - "loss": 0.8844, - "step": 31906 - }, - { - "epoch": 0.9041627702683556, - "grad_norm": 0.0, - "learning_rate": 4.778740860409292e-07, - "loss": 0.8232, - "step": 31907 - }, - { - "epoch": 0.9041911077106181, - "grad_norm": 0.0, - "learning_rate": 4.775938002175129e-07, - "loss": 0.8043, - "step": 31908 - }, - { - "epoch": 0.9042194451528806, - "grad_norm": 0.0, - "learning_rate": 4.773135946048601e-07, - "loss": 0.825, - "step": 31909 - }, - { - "epoch": 0.9042477825951429, - "grad_norm": 0.0, - "learning_rate": 4.770334692053313e-07, - "loss": 0.7399, - "step": 31910 - }, - { - "epoch": 0.9042761200374054, - "grad_norm": 0.0, - "learning_rate": 4.767534240212857e-07, - "loss": 0.8531, - "step": 31911 - }, - { - "epoch": 0.9043044574796679, - "grad_norm": 0.0, - "learning_rate": 4.764734590550835e-07, - "loss": 0.8734, - "step": 31912 - }, - { - "epoch": 0.9043327949219303, - "grad_norm": 0.0, - "learning_rate": 4.7619357430908177e-07, - "loss": 0.7766, - "step": 31913 - }, - { - "epoch": 0.9043611323641928, - "grad_norm": 0.0, - "learning_rate": 4.759137697856364e-07, - "loss": 0.8495, - "step": 31914 - }, - { - "epoch": 0.9043894698064553, - "grad_norm": 0.0, - "learning_rate": 4.756340454871089e-07, - "loss": 0.7744, - "step": 31915 - }, - { - "epoch": 0.9044178072487177, - "grad_norm": 0.0, - "learning_rate": 4.7535440141584956e-07, - "loss": 0.8933, - "step": 31916 - }, - { - "epoch": 0.9044461446909802, - "grad_norm": 0.0, - "learning_rate": 4.750748375742198e-07, - "loss": 0.7608, - "step": 31917 - }, - { - "epoch": 0.9044744821332427, - "grad_norm": 0.0, - "learning_rate": 4.7479535396457e-07, - "loss": 0.8578, - "step": 31918 - }, - { - "epoch": 0.9045028195755052, - "grad_norm": 0.0, - "learning_rate": 4.7451595058925606e-07, - "loss": 0.8084, - "step": 31919 - }, - { - "epoch": 0.9045311570177675, - "grad_norm": 0.0, - "learning_rate": 4.742366274506327e-07, - "loss": 0.7892, - "step": 31920 - }, - { - "epoch": 0.90455949446003, - "grad_norm": 0.0, - "learning_rate": 4.739573845510492e-07, - "loss": 0.6701, - "step": 31921 - }, - { - "epoch": 0.9045878319022925, - "grad_norm": 0.0, - "learning_rate": 4.736782218928615e-07, - "loss": 0.8632, - "step": 31922 - }, - { - "epoch": 0.9046161693445549, - "grad_norm": 0.0, - "learning_rate": 4.7339913947841876e-07, - "loss": 0.7685, - "step": 31923 - }, - { - "epoch": 0.9046445067868174, - "grad_norm": 0.0, - "learning_rate": 4.731201373100736e-07, - "loss": 0.8332, - "step": 31924 - }, - { - "epoch": 0.9046728442290799, - "grad_norm": 0.0, - "learning_rate": 4.7284121539017514e-07, - "loss": 0.7114, - "step": 31925 - }, - { - "epoch": 0.9047011816713424, - "grad_norm": 0.0, - "learning_rate": 4.725623737210727e-07, - "loss": 0.7514, - "step": 31926 - }, - { - "epoch": 0.9047295191136048, - "grad_norm": 0.0, - "learning_rate": 4.722836123051155e-07, - "loss": 0.8871, - "step": 31927 - }, - { - "epoch": 0.9047578565558673, - "grad_norm": 0.0, - "learning_rate": 4.720049311446517e-07, - "loss": 0.7983, - "step": 31928 - }, - { - "epoch": 0.9047861939981298, - "grad_norm": 0.0, - "learning_rate": 4.717263302420283e-07, - "loss": 0.8147, - "step": 31929 - }, - { - "epoch": 0.9048145314403921, - "grad_norm": 0.0, - "learning_rate": 4.714478095995922e-07, - "loss": 0.7722, - "step": 31930 - }, - { - "epoch": 0.9048428688826546, - "grad_norm": 0.0, - "learning_rate": 4.711693692196906e-07, - "loss": 0.7926, - "step": 31931 - }, - { - "epoch": 0.9048712063249171, - "grad_norm": 0.0, - "learning_rate": 4.708910091046659e-07, - "loss": 0.8029, - "step": 31932 - }, - { - "epoch": 0.9048995437671796, - "grad_norm": 0.0, - "learning_rate": 4.706127292568663e-07, - "loss": 0.7272, - "step": 31933 - }, - { - "epoch": 0.904927881209442, - "grad_norm": 0.0, - "learning_rate": 4.7033452967863324e-07, - "loss": 0.8544, - "step": 31934 - }, - { - "epoch": 0.9049562186517045, - "grad_norm": 0.0, - "learning_rate": 4.7005641037231155e-07, - "loss": 0.815, - "step": 31935 - }, - { - "epoch": 0.904984556093967, - "grad_norm": 0.0, - "learning_rate": 4.6977837134024375e-07, - "loss": 0.6617, - "step": 31936 - }, - { - "epoch": 0.9050128935362294, - "grad_norm": 0.0, - "learning_rate": 4.695004125847713e-07, - "loss": 0.8276, - "step": 31937 - }, - { - "epoch": 0.9050412309784919, - "grad_norm": 0.0, - "learning_rate": 4.6922253410823683e-07, - "loss": 0.9032, - "step": 31938 - }, - { - "epoch": 0.9050695684207544, - "grad_norm": 0.0, - "learning_rate": 4.6894473591297953e-07, - "loss": 0.8046, - "step": 31939 - }, - { - "epoch": 0.9050979058630167, - "grad_norm": 0.0, - "learning_rate": 4.6866701800133864e-07, - "loss": 0.8987, - "step": 31940 - }, - { - "epoch": 0.9051262433052792, - "grad_norm": 0.0, - "learning_rate": 4.6838938037565784e-07, - "loss": 0.8199, - "step": 31941 - }, - { - "epoch": 0.9051545807475417, - "grad_norm": 0.0, - "learning_rate": 4.681118230382709e-07, - "loss": 0.789, - "step": 31942 - }, - { - "epoch": 0.9051829181898042, - "grad_norm": 0.0, - "learning_rate": 4.678343459915169e-07, - "loss": 0.8421, - "step": 31943 - }, - { - "epoch": 0.9052112556320666, - "grad_norm": 0.0, - "learning_rate": 4.675569492377363e-07, - "loss": 0.8729, - "step": 31944 - }, - { - "epoch": 0.9052395930743291, - "grad_norm": 0.0, - "learning_rate": 4.672796327792617e-07, - "loss": 0.8106, - "step": 31945 - }, - { - "epoch": 0.9052679305165916, - "grad_norm": 0.0, - "learning_rate": 4.6700239661843114e-07, - "loss": 0.8002, - "step": 31946 - }, - { - "epoch": 0.905296267958854, - "grad_norm": 0.0, - "learning_rate": 4.667252407575784e-07, - "loss": 0.9086, - "step": 31947 - }, - { - "epoch": 0.9053246054011165, - "grad_norm": 0.0, - "learning_rate": 4.6644816519903936e-07, - "loss": 0.7101, - "step": 31948 - }, - { - "epoch": 0.905352942843379, - "grad_norm": 0.0, - "learning_rate": 4.6617116994514764e-07, - "loss": 0.8166, - "step": 31949 - }, - { - "epoch": 0.9053812802856415, - "grad_norm": 0.0, - "learning_rate": 4.6589425499823703e-07, - "loss": 0.7672, - "step": 31950 - }, - { - "epoch": 0.9054096177279038, - "grad_norm": 0.0, - "learning_rate": 4.6561742036063896e-07, - "loss": 0.7451, - "step": 31951 - }, - { - "epoch": 0.9054379551701663, - "grad_norm": 0.0, - "learning_rate": 4.653406660346871e-07, - "loss": 0.7926, - "step": 31952 - }, - { - "epoch": 0.9054662926124288, - "grad_norm": 0.0, - "learning_rate": 4.650639920227096e-07, - "loss": 0.8558, - "step": 31953 - }, - { - "epoch": 0.9054946300546912, - "grad_norm": 0.0, - "learning_rate": 4.6478739832704123e-07, - "loss": 0.7577, - "step": 31954 - }, - { - "epoch": 0.9055229674969537, - "grad_norm": 0.0, - "learning_rate": 4.6451088495000685e-07, - "loss": 0.7798, - "step": 31955 - }, - { - "epoch": 0.9055513049392162, - "grad_norm": 0.0, - "learning_rate": 4.64234451893939e-07, - "loss": 0.8313, - "step": 31956 - }, - { - "epoch": 0.9055796423814787, - "grad_norm": 0.0, - "learning_rate": 4.63958099161167e-07, - "loss": 0.6499, - "step": 31957 - }, - { - "epoch": 0.9056079798237411, - "grad_norm": 0.0, - "learning_rate": 4.636818267540144e-07, - "loss": 0.8158, - "step": 31958 - }, - { - "epoch": 0.9056363172660036, - "grad_norm": 0.0, - "learning_rate": 4.634056346748117e-07, - "loss": 0.8566, - "step": 31959 - }, - { - "epoch": 0.9056646547082661, - "grad_norm": 0.0, - "learning_rate": 4.6312952292588696e-07, - "loss": 0.8207, - "step": 31960 - }, - { - "epoch": 0.9056929921505285, - "grad_norm": 0.0, - "learning_rate": 4.6285349150956174e-07, - "loss": 0.7572, - "step": 31961 - }, - { - "epoch": 0.905721329592791, - "grad_norm": 0.0, - "learning_rate": 4.6257754042816295e-07, - "loss": 0.8283, - "step": 31962 - }, - { - "epoch": 0.9057496670350534, - "grad_norm": 0.0, - "learning_rate": 4.6230166968401433e-07, - "loss": 0.8682, - "step": 31963 - }, - { - "epoch": 0.9057780044773158, - "grad_norm": 0.0, - "learning_rate": 4.620258792794419e-07, - "loss": 0.7704, - "step": 31964 - }, - { - "epoch": 0.9058063419195783, - "grad_norm": 0.0, - "learning_rate": 4.6175016921676806e-07, - "loss": 0.7909, - "step": 31965 - }, - { - "epoch": 0.9058346793618408, - "grad_norm": 0.0, - "learning_rate": 4.614745394983133e-07, - "loss": 0.7765, - "step": 31966 - }, - { - "epoch": 0.9058630168041033, - "grad_norm": 0.0, - "learning_rate": 4.6119899012640137e-07, - "loss": 0.8726, - "step": 31967 - }, - { - "epoch": 0.9058913542463657, - "grad_norm": 0.0, - "learning_rate": 4.6092352110335135e-07, - "loss": 0.8357, - "step": 31968 - }, - { - "epoch": 0.9059196916886282, - "grad_norm": 0.0, - "learning_rate": 4.6064813243148487e-07, - "loss": 0.7523, - "step": 31969 - }, - { - "epoch": 0.9059480291308907, - "grad_norm": 0.0, - "learning_rate": 4.6037282411312333e-07, - "loss": 0.7756, - "step": 31970 - }, - { - "epoch": 0.9059763665731531, - "grad_norm": 0.0, - "learning_rate": 4.6009759615058156e-07, - "loss": 0.7563, - "step": 31971 - }, - { - "epoch": 0.9060047040154156, - "grad_norm": 0.0, - "learning_rate": 4.5982244854618107e-07, - "loss": 0.7118, - "step": 31972 - }, - { - "epoch": 0.906033041457678, - "grad_norm": 0.0, - "learning_rate": 4.5954738130224e-07, - "loss": 0.7927, - "step": 31973 - }, - { - "epoch": 0.9060613788999405, - "grad_norm": 0.0, - "learning_rate": 4.5927239442107306e-07, - "loss": 0.9189, - "step": 31974 - }, - { - "epoch": 0.9060897163422029, - "grad_norm": 0.0, - "learning_rate": 4.5899748790499743e-07, - "loss": 0.715, - "step": 31975 - }, - { - "epoch": 0.9061180537844654, - "grad_norm": 0.0, - "learning_rate": 4.5872266175632783e-07, - "loss": 0.7553, - "step": 31976 - }, - { - "epoch": 0.9061463912267279, - "grad_norm": 0.0, - "learning_rate": 4.5844791597738135e-07, - "loss": 0.9398, - "step": 31977 - }, - { - "epoch": 0.9061747286689903, - "grad_norm": 0.0, - "learning_rate": 4.581732505704728e-07, - "loss": 0.6522, - "step": 31978 - }, - { - "epoch": 0.9062030661112528, - "grad_norm": 0.0, - "learning_rate": 4.5789866553791253e-07, - "loss": 0.8813, - "step": 31979 - }, - { - "epoch": 0.9062314035535153, - "grad_norm": 0.0, - "learning_rate": 4.5762416088201535e-07, - "loss": 0.8475, - "step": 31980 - }, - { - "epoch": 0.9062597409957778, - "grad_norm": 0.0, - "learning_rate": 4.5734973660509387e-07, - "loss": 0.93, - "step": 31981 - }, - { - "epoch": 0.9062880784380402, - "grad_norm": 0.0, - "learning_rate": 4.5707539270945847e-07, - "loss": 0.7831, - "step": 31982 - }, - { - "epoch": 0.9063164158803027, - "grad_norm": 0.0, - "learning_rate": 4.568011291974228e-07, - "loss": 0.8505, - "step": 31983 - }, - { - "epoch": 0.9063447533225651, - "grad_norm": 0.0, - "learning_rate": 4.5652694607129287e-07, - "loss": 0.8261, - "step": 31984 - }, - { - "epoch": 0.9063730907648275, - "grad_norm": 0.0, - "learning_rate": 4.5625284333338017e-07, - "loss": 0.8101, - "step": 31985 - }, - { - "epoch": 0.90640142820709, - "grad_norm": 0.0, - "learning_rate": 4.55978820985995e-07, - "loss": 0.8187, - "step": 31986 - }, - { - "epoch": 0.9064297656493525, - "grad_norm": 0.0, - "learning_rate": 4.5570487903144335e-07, - "loss": 0.8209, - "step": 31987 - }, - { - "epoch": 0.9064581030916149, - "grad_norm": 0.0, - "learning_rate": 4.5543101747203334e-07, - "loss": 0.8128, - "step": 31988 - }, - { - "epoch": 0.9064864405338774, - "grad_norm": 0.0, - "learning_rate": 4.551572363100731e-07, - "loss": 0.8141, - "step": 31989 - }, - { - "epoch": 0.9065147779761399, - "grad_norm": 0.0, - "learning_rate": 4.5488353554786644e-07, - "loss": 0.768, - "step": 31990 - }, - { - "epoch": 0.9065431154184024, - "grad_norm": 0.0, - "learning_rate": 4.546099151877226e-07, - "loss": 0.834, - "step": 31991 - }, - { - "epoch": 0.9065714528606648, - "grad_norm": 0.0, - "learning_rate": 4.543363752319419e-07, - "loss": 0.9072, - "step": 31992 - }, - { - "epoch": 0.9065997903029273, - "grad_norm": 0.0, - "learning_rate": 4.5406291568283134e-07, - "loss": 0.8304, - "step": 31993 - }, - { - "epoch": 0.9066281277451897, - "grad_norm": 0.0, - "learning_rate": 4.5378953654269475e-07, - "loss": 0.7298, - "step": 31994 - }, - { - "epoch": 0.9066564651874521, - "grad_norm": 0.0, - "learning_rate": 4.535162378138325e-07, - "loss": 0.8835, - "step": 31995 - }, - { - "epoch": 0.9066848026297146, - "grad_norm": 0.0, - "learning_rate": 4.5324301949854935e-07, - "loss": 0.704, - "step": 31996 - }, - { - "epoch": 0.9067131400719771, - "grad_norm": 0.0, - "learning_rate": 4.529698815991446e-07, - "loss": 0.7117, - "step": 31997 - }, - { - "epoch": 0.9067414775142396, - "grad_norm": 0.0, - "learning_rate": 4.526968241179186e-07, - "loss": 0.7693, - "step": 31998 - }, - { - "epoch": 0.906769814956502, - "grad_norm": 0.0, - "learning_rate": 4.5242384705717404e-07, - "loss": 0.8011, - "step": 31999 - }, - { - "epoch": 0.9067981523987645, - "grad_norm": 0.0, - "learning_rate": 4.5215095041920787e-07, - "loss": 0.8143, - "step": 32000 - }, - { - "epoch": 0.906826489841027, - "grad_norm": 0.0, - "learning_rate": 4.5187813420631944e-07, - "loss": 0.82, - "step": 32001 - }, - { - "epoch": 0.9068548272832894, - "grad_norm": 0.0, - "learning_rate": 4.5160539842080797e-07, - "loss": 0.9413, - "step": 32002 - }, - { - "epoch": 0.9068831647255519, - "grad_norm": 0.0, - "learning_rate": 4.5133274306496944e-07, - "loss": 0.7793, - "step": 32003 - }, - { - "epoch": 0.9069115021678144, - "grad_norm": 0.0, - "learning_rate": 4.5106016814110197e-07, - "loss": 0.7582, - "step": 32004 - }, - { - "epoch": 0.9069398396100768, - "grad_norm": 0.0, - "learning_rate": 4.507876736514993e-07, - "loss": 0.7489, - "step": 32005 - }, - { - "epoch": 0.9069681770523392, - "grad_norm": 0.0, - "learning_rate": 4.505152595984585e-07, - "loss": 0.7941, - "step": 32006 - }, - { - "epoch": 0.9069965144946017, - "grad_norm": 0.0, - "learning_rate": 4.502429259842744e-07, - "loss": 0.8187, - "step": 32007 - }, - { - "epoch": 0.9070248519368642, - "grad_norm": 0.0, - "learning_rate": 4.499706728112396e-07, - "loss": 0.8319, - "step": 32008 - }, - { - "epoch": 0.9070531893791266, - "grad_norm": 0.0, - "learning_rate": 4.4969850008164897e-07, - "loss": 0.8311, - "step": 32009 - }, - { - "epoch": 0.9070815268213891, - "grad_norm": 0.0, - "learning_rate": 4.494264077977939e-07, - "loss": 0.8461, - "step": 32010 - }, - { - "epoch": 0.9071098642636516, - "grad_norm": 0.0, - "learning_rate": 4.491543959619671e-07, - "loss": 0.8142, - "step": 32011 - }, - { - "epoch": 0.907138201705914, - "grad_norm": 0.0, - "learning_rate": 4.4888246457646e-07, - "loss": 0.7962, - "step": 32012 - }, - { - "epoch": 0.9071665391481765, - "grad_norm": 0.0, - "learning_rate": 4.4861061364356086e-07, - "loss": 0.6748, - "step": 32013 - }, - { - "epoch": 0.907194876590439, - "grad_norm": 0.0, - "learning_rate": 4.483388431655611e-07, - "loss": 0.7438, - "step": 32014 - }, - { - "epoch": 0.9072232140327015, - "grad_norm": 0.0, - "learning_rate": 4.480671531447511e-07, - "loss": 0.8347, - "step": 32015 - }, - { - "epoch": 0.9072515514749638, - "grad_norm": 0.0, - "learning_rate": 4.4779554358341803e-07, - "loss": 0.7319, - "step": 32016 - }, - { - "epoch": 0.9072798889172263, - "grad_norm": 0.0, - "learning_rate": 4.4752401448384997e-07, - "loss": 0.7805, - "step": 32017 - }, - { - "epoch": 0.9073082263594888, - "grad_norm": 0.0, - "learning_rate": 4.472525658483362e-07, - "loss": 0.8318, - "step": 32018 - }, - { - "epoch": 0.9073365638017512, - "grad_norm": 0.0, - "learning_rate": 4.469811976791605e-07, - "loss": 0.8062, - "step": 32019 - }, - { - "epoch": 0.9073649012440137, - "grad_norm": 0.0, - "learning_rate": 4.467099099786099e-07, - "loss": 0.933, - "step": 32020 - }, - { - "epoch": 0.9073932386862762, - "grad_norm": 0.0, - "learning_rate": 4.4643870274896805e-07, - "loss": 0.8487, - "step": 32021 - }, - { - "epoch": 0.9074215761285387, - "grad_norm": 0.0, - "learning_rate": 4.4616757599252104e-07, - "loss": 0.8734, - "step": 32022 - }, - { - "epoch": 0.9074499135708011, - "grad_norm": 0.0, - "learning_rate": 4.458965297115536e-07, - "loss": 0.6697, - "step": 32023 - }, - { - "epoch": 0.9074782510130636, - "grad_norm": 0.0, - "learning_rate": 4.456255639083462e-07, - "loss": 0.8787, - "step": 32024 - }, - { - "epoch": 0.9075065884553261, - "grad_norm": 0.0, - "learning_rate": 4.4535467858518254e-07, - "loss": 0.8048, - "step": 32025 - }, - { - "epoch": 0.9075349258975884, - "grad_norm": 0.0, - "learning_rate": 4.4508387374434416e-07, - "loss": 0.8081, - "step": 32026 - }, - { - "epoch": 0.9075632633398509, - "grad_norm": 0.0, - "learning_rate": 4.448131493881147e-07, - "loss": 0.8078, - "step": 32027 - }, - { - "epoch": 0.9075916007821134, - "grad_norm": 0.0, - "learning_rate": 4.445425055187702e-07, - "loss": 0.7677, - "step": 32028 - }, - { - "epoch": 0.9076199382243759, - "grad_norm": 0.0, - "learning_rate": 4.4427194213859216e-07, - "loss": 0.8049, - "step": 32029 - }, - { - "epoch": 0.9076482756666383, - "grad_norm": 0.0, - "learning_rate": 4.4400145924986096e-07, - "loss": 0.807, - "step": 32030 - }, - { - "epoch": 0.9076766131089008, - "grad_norm": 0.0, - "learning_rate": 4.4373105685485475e-07, - "loss": 0.7139, - "step": 32031 - }, - { - "epoch": 0.9077049505511633, - "grad_norm": 0.0, - "learning_rate": 4.434607349558495e-07, - "loss": 0.8195, - "step": 32032 - }, - { - "epoch": 0.9077332879934257, - "grad_norm": 0.0, - "learning_rate": 4.4319049355512345e-07, - "loss": 0.8352, - "step": 32033 - }, - { - "epoch": 0.9077616254356882, - "grad_norm": 0.0, - "learning_rate": 4.429203326549525e-07, - "loss": 0.7426, - "step": 32034 - }, - { - "epoch": 0.9077899628779507, - "grad_norm": 0.0, - "learning_rate": 4.426502522576126e-07, - "loss": 0.8008, - "step": 32035 - }, - { - "epoch": 0.907818300320213, - "grad_norm": 0.0, - "learning_rate": 4.423802523653797e-07, - "loss": 0.7, - "step": 32036 - }, - { - "epoch": 0.9078466377624755, - "grad_norm": 0.0, - "learning_rate": 4.421103329805254e-07, - "loss": 0.8284, - "step": 32037 - }, - { - "epoch": 0.907874975204738, - "grad_norm": 0.0, - "learning_rate": 4.4184049410532557e-07, - "loss": 0.9196, - "step": 32038 - }, - { - "epoch": 0.9079033126470005, - "grad_norm": 0.0, - "learning_rate": 4.4157073574205176e-07, - "loss": 0.7228, - "step": 32039 - }, - { - "epoch": 0.9079316500892629, - "grad_norm": 0.0, - "learning_rate": 4.4130105789297775e-07, - "loss": 0.8678, - "step": 32040 - }, - { - "epoch": 0.9079599875315254, - "grad_norm": 0.0, - "learning_rate": 4.4103146056037606e-07, - "loss": 0.7676, - "step": 32041 - }, - { - "epoch": 0.9079883249737879, - "grad_norm": 0.0, - "learning_rate": 4.4076194374651384e-07, - "loss": 0.7428, - "step": 32042 - }, - { - "epoch": 0.9080166624160503, - "grad_norm": 0.0, - "learning_rate": 4.404925074536637e-07, - "loss": 0.7764, - "step": 32043 - }, - { - "epoch": 0.9080449998583128, - "grad_norm": 0.0, - "learning_rate": 4.402231516840971e-07, - "loss": 0.8489, - "step": 32044 - }, - { - "epoch": 0.9080733373005753, - "grad_norm": 0.0, - "learning_rate": 4.3995387644007904e-07, - "loss": 0.6994, - "step": 32045 - }, - { - "epoch": 0.9081016747428378, - "grad_norm": 0.0, - "learning_rate": 4.3968468172387866e-07, - "loss": 0.8172, - "step": 32046 - }, - { - "epoch": 0.9081300121851001, - "grad_norm": 0.0, - "learning_rate": 4.3941556753776646e-07, - "loss": 0.7691, - "step": 32047 - }, - { - "epoch": 0.9081583496273626, - "grad_norm": 0.0, - "learning_rate": 4.391465338840062e-07, - "loss": 0.7777, - "step": 32048 - }, - { - "epoch": 0.9081866870696251, - "grad_norm": 0.0, - "learning_rate": 4.3887758076486597e-07, - "loss": 0.7858, - "step": 32049 - }, - { - "epoch": 0.9082150245118875, - "grad_norm": 0.0, - "learning_rate": 4.386087081826085e-07, - "loss": 0.8106, - "step": 32050 - }, - { - "epoch": 0.90824336195415, - "grad_norm": 0.0, - "learning_rate": 4.383399161395008e-07, - "loss": 0.6837, - "step": 32051 - }, - { - "epoch": 0.9082716993964125, - "grad_norm": 0.0, - "learning_rate": 4.3807120463780774e-07, - "loss": 0.8506, - "step": 32052 - }, - { - "epoch": 0.908300036838675, - "grad_norm": 0.0, - "learning_rate": 4.378025736797897e-07, - "loss": 0.7967, - "step": 32053 - }, - { - "epoch": 0.9083283742809374, - "grad_norm": 0.0, - "learning_rate": 4.375340232677139e-07, - "loss": 0.8411, - "step": 32054 - }, - { - "epoch": 0.9083567117231999, - "grad_norm": 0.0, - "learning_rate": 4.372655534038384e-07, - "loss": 0.7683, - "step": 32055 - }, - { - "epoch": 0.9083850491654624, - "grad_norm": 0.0, - "learning_rate": 4.369971640904269e-07, - "loss": 0.7267, - "step": 32056 - }, - { - "epoch": 0.9084133866077247, - "grad_norm": 0.0, - "learning_rate": 4.3672885532973997e-07, - "loss": 0.8124, - "step": 32057 - }, - { - "epoch": 0.9084417240499872, - "grad_norm": 0.0, - "learning_rate": 4.3646062712403567e-07, - "loss": 0.758, - "step": 32058 - }, - { - "epoch": 0.9084700614922497, - "grad_norm": 0.0, - "learning_rate": 4.361924794755745e-07, - "loss": 0.8134, - "step": 32059 - }, - { - "epoch": 0.9084983989345121, - "grad_norm": 0.0, - "learning_rate": 4.35924412386618e-07, - "loss": 0.8308, - "step": 32060 - }, - { - "epoch": 0.9085267363767746, - "grad_norm": 0.0, - "learning_rate": 4.356564258594198e-07, - "loss": 0.7621, - "step": 32061 - }, - { - "epoch": 0.9085550738190371, - "grad_norm": 0.0, - "learning_rate": 4.3538851989624044e-07, - "loss": 0.8879, - "step": 32062 - }, - { - "epoch": 0.9085834112612996, - "grad_norm": 0.0, - "learning_rate": 4.3512069449933357e-07, - "loss": 0.8831, - "step": 32063 - }, - { - "epoch": 0.908611748703562, - "grad_norm": 0.0, - "learning_rate": 4.3485294967095747e-07, - "loss": 0.8358, - "step": 32064 - }, - { - "epoch": 0.9086400861458245, - "grad_norm": 0.0, - "learning_rate": 4.34585285413367e-07, - "loss": 0.7923, - "step": 32065 - }, - { - "epoch": 0.908668423588087, - "grad_norm": 0.0, - "learning_rate": 4.343177017288158e-07, - "loss": 0.7519, - "step": 32066 - }, - { - "epoch": 0.9086967610303494, - "grad_norm": 0.0, - "learning_rate": 4.3405019861956e-07, - "loss": 0.8231, - "step": 32067 - }, - { - "epoch": 0.9087250984726118, - "grad_norm": 0.0, - "learning_rate": 4.337827760878521e-07, - "loss": 0.8826, - "step": 32068 - }, - { - "epoch": 0.9087534359148743, - "grad_norm": 0.0, - "learning_rate": 4.3351543413594263e-07, - "loss": 0.6996, - "step": 32069 - }, - { - "epoch": 0.9087817733571368, - "grad_norm": 0.0, - "learning_rate": 4.3324817276608755e-07, - "loss": 0.8027, - "step": 32070 - }, - { - "epoch": 0.9088101107993992, - "grad_norm": 0.0, - "learning_rate": 4.329809919805328e-07, - "loss": 0.8774, - "step": 32071 - }, - { - "epoch": 0.9088384482416617, - "grad_norm": 0.0, - "learning_rate": 4.327138917815332e-07, - "loss": 0.8176, - "step": 32072 - }, - { - "epoch": 0.9088667856839242, - "grad_norm": 0.0, - "learning_rate": 4.3244687217133816e-07, - "loss": 0.8358, - "step": 32073 - }, - { - "epoch": 0.9088951231261866, - "grad_norm": 0.0, - "learning_rate": 4.3217993315219363e-07, - "loss": 0.7641, - "step": 32074 - }, - { - "epoch": 0.9089234605684491, - "grad_norm": 0.0, - "learning_rate": 4.3191307472635335e-07, - "loss": 0.8002, - "step": 32075 - }, - { - "epoch": 0.9089517980107116, - "grad_norm": 0.0, - "learning_rate": 4.3164629689605994e-07, - "loss": 0.7376, - "step": 32076 - }, - { - "epoch": 0.9089801354529741, - "grad_norm": 0.0, - "learning_rate": 4.313795996635628e-07, - "loss": 0.6882, - "step": 32077 - }, - { - "epoch": 0.9090084728952365, - "grad_norm": 0.0, - "learning_rate": 4.3111298303110895e-07, - "loss": 0.7692, - "step": 32078 - }, - { - "epoch": 0.909036810337499, - "grad_norm": 0.0, - "learning_rate": 4.308464470009432e-07, - "loss": 0.7713, - "step": 32079 - }, - { - "epoch": 0.9090651477797614, - "grad_norm": 0.0, - "learning_rate": 4.305799915753117e-07, - "loss": 0.8583, - "step": 32080 - }, - { - "epoch": 0.9090934852220238, - "grad_norm": 0.0, - "learning_rate": 4.3031361675646033e-07, - "loss": 0.8797, - "step": 32081 - }, - { - "epoch": 0.9091218226642863, - "grad_norm": 0.0, - "learning_rate": 4.300473225466284e-07, - "loss": 0.7766, - "step": 32082 - }, - { - "epoch": 0.9091501601065488, - "grad_norm": 0.0, - "learning_rate": 4.2978110894806415e-07, - "loss": 0.7608, - "step": 32083 - }, - { - "epoch": 0.9091784975488112, - "grad_norm": 0.0, - "learning_rate": 4.295149759630057e-07, - "loss": 0.8521, - "step": 32084 - }, - { - "epoch": 0.9092068349910737, - "grad_norm": 0.0, - "learning_rate": 4.2924892359369584e-07, - "loss": 0.8221, - "step": 32085 - }, - { - "epoch": 0.9092351724333362, - "grad_norm": 0.0, - "learning_rate": 4.2898295184237827e-07, - "loss": 0.7771, - "step": 32086 - }, - { - "epoch": 0.9092635098755987, - "grad_norm": 0.0, - "learning_rate": 4.2871706071129006e-07, - "loss": 0.8413, - "step": 32087 - }, - { - "epoch": 0.9092918473178611, - "grad_norm": 0.0, - "learning_rate": 4.2845125020267273e-07, - "loss": 0.8154, - "step": 32088 - }, - { - "epoch": 0.9093201847601236, - "grad_norm": 0.0, - "learning_rate": 4.2818552031876457e-07, - "loss": 0.8534, - "step": 32089 - }, - { - "epoch": 0.909348522202386, - "grad_norm": 0.0, - "learning_rate": 4.2791987106180486e-07, - "loss": 0.7219, - "step": 32090 - }, - { - "epoch": 0.9093768596446484, - "grad_norm": 0.0, - "learning_rate": 4.276543024340296e-07, - "loss": 0.8632, - "step": 32091 - }, - { - "epoch": 0.9094051970869109, - "grad_norm": 0.0, - "learning_rate": 4.273888144376759e-07, - "loss": 0.7091, - "step": 32092 - }, - { - "epoch": 0.9094335345291734, - "grad_norm": 0.0, - "learning_rate": 4.271234070749819e-07, - "loss": 0.7799, - "step": 32093 - }, - { - "epoch": 0.9094618719714359, - "grad_norm": 0.0, - "learning_rate": 4.2685808034818366e-07, - "loss": 0.8703, - "step": 32094 - }, - { - "epoch": 0.9094902094136983, - "grad_norm": 0.0, - "learning_rate": 4.265928342595127e-07, - "loss": 0.7442, - "step": 32095 - }, - { - "epoch": 0.9095185468559608, - "grad_norm": 0.0, - "learning_rate": 4.2632766881120614e-07, - "loss": 0.7079, - "step": 32096 - }, - { - "epoch": 0.9095468842982233, - "grad_norm": 0.0, - "learning_rate": 4.260625840054977e-07, - "loss": 0.8269, - "step": 32097 - }, - { - "epoch": 0.9095752217404857, - "grad_norm": 0.0, - "learning_rate": 4.257975798446179e-07, - "loss": 0.841, - "step": 32098 - }, - { - "epoch": 0.9096035591827482, - "grad_norm": 0.0, - "learning_rate": 4.2553265633080154e-07, - "loss": 0.8055, - "step": 32099 - }, - { - "epoch": 0.9096318966250106, - "grad_norm": 0.0, - "learning_rate": 4.25267813466278e-07, - "loss": 0.7201, - "step": 32100 - }, - { - "epoch": 0.909660234067273, - "grad_norm": 0.0, - "learning_rate": 4.250030512532788e-07, - "loss": 0.8106, - "step": 32101 - }, - { - "epoch": 0.9096885715095355, - "grad_norm": 0.0, - "learning_rate": 4.247383696940366e-07, - "loss": 0.8034, - "step": 32102 - }, - { - "epoch": 0.909716908951798, - "grad_norm": 0.0, - "learning_rate": 4.244737687907763e-07, - "loss": 0.7889, - "step": 32103 - }, - { - "epoch": 0.9097452463940605, - "grad_norm": 0.0, - "learning_rate": 4.2420924854573055e-07, - "loss": 0.8682, - "step": 32104 - }, - { - "epoch": 0.9097735838363229, - "grad_norm": 0.0, - "learning_rate": 4.239448089611253e-07, - "loss": 0.9388, - "step": 32105 - }, - { - "epoch": 0.9098019212785854, - "grad_norm": 0.0, - "learning_rate": 4.236804500391889e-07, - "loss": 0.7594, - "step": 32106 - }, - { - "epoch": 0.9098302587208479, - "grad_norm": 0.0, - "learning_rate": 4.234161717821494e-07, - "loss": 0.7024, - "step": 32107 - }, - { - "epoch": 0.9098585961631103, - "grad_norm": 0.0, - "learning_rate": 4.2315197419223073e-07, - "loss": 0.8323, - "step": 32108 - }, - { - "epoch": 0.9098869336053728, - "grad_norm": 0.0, - "learning_rate": 4.228878572716588e-07, - "loss": 0.7192, - "step": 32109 - }, - { - "epoch": 0.9099152710476353, - "grad_norm": 0.0, - "learning_rate": 4.226238210226608e-07, - "loss": 0.8482, - "step": 32110 - }, - { - "epoch": 0.9099436084898977, - "grad_norm": 0.0, - "learning_rate": 4.22359865447457e-07, - "loss": 0.9164, - "step": 32111 - }, - { - "epoch": 0.9099719459321601, - "grad_norm": 0.0, - "learning_rate": 4.2209599054827465e-07, - "loss": 0.8669, - "step": 32112 - }, - { - "epoch": 0.9100002833744226, - "grad_norm": 0.0, - "learning_rate": 4.2183219632733195e-07, - "loss": 0.8775, - "step": 32113 - }, - { - "epoch": 0.9100286208166851, - "grad_norm": 0.0, - "learning_rate": 4.215684827868538e-07, - "loss": 0.7483, - "step": 32114 - }, - { - "epoch": 0.9100569582589475, - "grad_norm": 0.0, - "learning_rate": 4.2130484992906285e-07, - "loss": 0.7244, - "step": 32115 - }, - { - "epoch": 0.91008529570121, - "grad_norm": 0.0, - "learning_rate": 4.210412977561773e-07, - "loss": 0.7099, - "step": 32116 - }, - { - "epoch": 0.9101136331434725, - "grad_norm": 0.0, - "learning_rate": 4.2077782627041764e-07, - "loss": 0.8323, - "step": 32117 - }, - { - "epoch": 0.910141970585735, - "grad_norm": 0.0, - "learning_rate": 4.205144354740032e-07, - "loss": 0.7764, - "step": 32118 - }, - { - "epoch": 0.9101703080279974, - "grad_norm": 0.0, - "learning_rate": 4.202511253691521e-07, - "loss": 0.7636, - "step": 32119 - }, - { - "epoch": 0.9101986454702599, - "grad_norm": 0.0, - "learning_rate": 4.199878959580861e-07, - "loss": 0.8869, - "step": 32120 - }, - { - "epoch": 0.9102269829125224, - "grad_norm": 0.0, - "learning_rate": 4.197247472430166e-07, - "loss": 0.7502, - "step": 32121 - }, - { - "epoch": 0.9102553203547847, - "grad_norm": 0.0, - "learning_rate": 4.194616792261641e-07, - "loss": 0.8641, - "step": 32122 - }, - { - "epoch": 0.9102836577970472, - "grad_norm": 0.0, - "learning_rate": 4.191986919097446e-07, - "loss": 0.7808, - "step": 32123 - }, - { - "epoch": 0.9103119952393097, - "grad_norm": 0.0, - "learning_rate": 4.189357852959708e-07, - "loss": 0.7236, - "step": 32124 - }, - { - "epoch": 0.9103403326815721, - "grad_norm": 0.0, - "learning_rate": 4.1867295938705866e-07, - "loss": 0.7813, - "step": 32125 - }, - { - "epoch": 0.9103686701238346, - "grad_norm": 0.0, - "learning_rate": 4.184102141852242e-07, - "loss": 0.7328, - "step": 32126 - }, - { - "epoch": 0.9103970075660971, - "grad_norm": 0.0, - "learning_rate": 4.181475496926768e-07, - "loss": 0.8053, - "step": 32127 - }, - { - "epoch": 0.9104253450083596, - "grad_norm": 0.0, - "learning_rate": 4.178849659116313e-07, - "loss": 0.9826, - "step": 32128 - }, - { - "epoch": 0.910453682450622, - "grad_norm": 0.0, - "learning_rate": 4.176224628442982e-07, - "loss": 0.8944, - "step": 32129 - }, - { - "epoch": 0.9104820198928845, - "grad_norm": 0.0, - "learning_rate": 4.173600404928901e-07, - "loss": 0.7672, - "step": 32130 - }, - { - "epoch": 0.910510357335147, - "grad_norm": 0.0, - "learning_rate": 4.170976988596165e-07, - "loss": 0.5988, - "step": 32131 - }, - { - "epoch": 0.9105386947774093, - "grad_norm": 0.0, - "learning_rate": 4.1683543794668657e-07, - "loss": 0.7878, - "step": 32132 - }, - { - "epoch": 0.9105670322196718, - "grad_norm": 0.0, - "learning_rate": 4.1657325775631195e-07, - "loss": 0.9031, - "step": 32133 - }, - { - "epoch": 0.9105953696619343, - "grad_norm": 0.0, - "learning_rate": 4.1631115829069866e-07, - "loss": 0.7905, - "step": 32134 - }, - { - "epoch": 0.9106237071041968, - "grad_norm": 0.0, - "learning_rate": 4.160491395520561e-07, - "loss": 0.7171, - "step": 32135 - }, - { - "epoch": 0.9106520445464592, - "grad_norm": 0.0, - "learning_rate": 4.157872015425901e-07, - "loss": 0.7048, - "step": 32136 - }, - { - "epoch": 0.9106803819887217, - "grad_norm": 0.0, - "learning_rate": 4.1552534426450686e-07, - "loss": 0.7892, - "step": 32137 - }, - { - "epoch": 0.9107087194309842, - "grad_norm": 0.0, - "learning_rate": 4.152635677200134e-07, - "loss": 0.7542, - "step": 32138 - }, - { - "epoch": 0.9107370568732466, - "grad_norm": 0.0, - "learning_rate": 4.150018719113147e-07, - "loss": 0.8212, - "step": 32139 - }, - { - "epoch": 0.9107653943155091, - "grad_norm": 0.0, - "learning_rate": 4.147402568406134e-07, - "loss": 0.8409, - "step": 32140 - }, - { - "epoch": 0.9107937317577716, - "grad_norm": 0.0, - "learning_rate": 4.1447872251011655e-07, - "loss": 0.793, - "step": 32141 - }, - { - "epoch": 0.9108220692000341, - "grad_norm": 0.0, - "learning_rate": 4.142172689220225e-07, - "loss": 0.7295, - "step": 32142 - }, - { - "epoch": 0.9108504066422964, - "grad_norm": 0.0, - "learning_rate": 4.139558960785361e-07, - "loss": 0.7601, - "step": 32143 - }, - { - "epoch": 0.9108787440845589, - "grad_norm": 0.0, - "learning_rate": 4.13694603981859e-07, - "loss": 0.8557, - "step": 32144 - }, - { - "epoch": 0.9109070815268214, - "grad_norm": 0.0, - "learning_rate": 4.1343339263419155e-07, - "loss": 0.8048, - "step": 32145 - }, - { - "epoch": 0.9109354189690838, - "grad_norm": 0.0, - "learning_rate": 4.131722620377354e-07, - "loss": 0.9366, - "step": 32146 - }, - { - "epoch": 0.9109637564113463, - "grad_norm": 0.0, - "learning_rate": 4.129112121946899e-07, - "loss": 0.7485, - "step": 32147 - }, - { - "epoch": 0.9109920938536088, - "grad_norm": 0.0, - "learning_rate": 4.1265024310725323e-07, - "loss": 0.8019, - "step": 32148 - }, - { - "epoch": 0.9110204312958712, - "grad_norm": 0.0, - "learning_rate": 4.1238935477762367e-07, - "loss": 0.8354, - "step": 32149 - }, - { - "epoch": 0.9110487687381337, - "grad_norm": 0.0, - "learning_rate": 4.121285472079983e-07, - "loss": 0.7948, - "step": 32150 - }, - { - "epoch": 0.9110771061803962, - "grad_norm": 0.0, - "learning_rate": 4.118678204005744e-07, - "loss": 0.777, - "step": 32151 - }, - { - "epoch": 0.9111054436226587, - "grad_norm": 0.0, - "learning_rate": 4.1160717435754895e-07, - "loss": 0.6964, - "step": 32152 - }, - { - "epoch": 0.911133781064921, - "grad_norm": 0.0, - "learning_rate": 4.113466090811158e-07, - "loss": 0.869, - "step": 32153 - }, - { - "epoch": 0.9111621185071835, - "grad_norm": 0.0, - "learning_rate": 4.110861245734721e-07, - "loss": 0.8041, - "step": 32154 - }, - { - "epoch": 0.911190455949446, - "grad_norm": 0.0, - "learning_rate": 4.108257208368105e-07, - "loss": 0.7758, - "step": 32155 - }, - { - "epoch": 0.9112187933917084, - "grad_norm": 0.0, - "learning_rate": 4.105653978733237e-07, - "loss": 0.7815, - "step": 32156 - }, - { - "epoch": 0.9112471308339709, - "grad_norm": 0.0, - "learning_rate": 4.103051556852056e-07, - "loss": 0.8902, - "step": 32157 - }, - { - "epoch": 0.9112754682762334, - "grad_norm": 0.0, - "learning_rate": 4.1004499427464873e-07, - "loss": 0.8013, - "step": 32158 - }, - { - "epoch": 0.9113038057184959, - "grad_norm": 0.0, - "learning_rate": 4.0978491364384365e-07, - "loss": 0.8298, - "step": 32159 - }, - { - "epoch": 0.9113321431607583, - "grad_norm": 0.0, - "learning_rate": 4.0952491379498305e-07, - "loss": 0.8384, - "step": 32160 - }, - { - "epoch": 0.9113604806030208, - "grad_norm": 0.0, - "learning_rate": 4.0926499473025295e-07, - "loss": 0.762, - "step": 32161 - }, - { - "epoch": 0.9113888180452833, - "grad_norm": 0.0, - "learning_rate": 4.0900515645184823e-07, - "loss": 0.7875, - "step": 32162 - }, - { - "epoch": 0.9114171554875456, - "grad_norm": 0.0, - "learning_rate": 4.0874539896195275e-07, - "loss": 0.7858, - "step": 32163 - }, - { - "epoch": 0.9114454929298081, - "grad_norm": 0.0, - "learning_rate": 4.0848572226275583e-07, - "loss": 0.8096, - "step": 32164 - }, - { - "epoch": 0.9114738303720706, - "grad_norm": 0.0, - "learning_rate": 4.082261263564469e-07, - "loss": 0.8041, - "step": 32165 - }, - { - "epoch": 0.9115021678143331, - "grad_norm": 0.0, - "learning_rate": 4.0796661124520964e-07, - "loss": 0.8525, - "step": 32166 - }, - { - "epoch": 0.9115305052565955, - "grad_norm": 0.0, - "learning_rate": 4.0770717693123243e-07, - "loss": 0.8411, - "step": 32167 - }, - { - "epoch": 0.911558842698858, - "grad_norm": 0.0, - "learning_rate": 4.074478234167001e-07, - "loss": 0.7772, - "step": 32168 - }, - { - "epoch": 0.9115871801411205, - "grad_norm": 0.0, - "learning_rate": 4.0718855070379535e-07, - "loss": 0.6951, - "step": 32169 - }, - { - "epoch": 0.9116155175833829, - "grad_norm": 0.0, - "learning_rate": 4.069293587947043e-07, - "loss": 0.7969, - "step": 32170 - }, - { - "epoch": 0.9116438550256454, - "grad_norm": 0.0, - "learning_rate": 4.0667024769160957e-07, - "loss": 0.91, - "step": 32171 - }, - { - "epoch": 0.9116721924679079, - "grad_norm": 0.0, - "learning_rate": 4.0641121739669387e-07, - "loss": 0.8301, - "step": 32172 - }, - { - "epoch": 0.9117005299101703, - "grad_norm": 0.0, - "learning_rate": 4.061522679121399e-07, - "loss": 0.905, - "step": 32173 - }, - { - "epoch": 0.9117288673524327, - "grad_norm": 0.0, - "learning_rate": 4.0589339924012705e-07, - "loss": 0.7479, - "step": 32174 - }, - { - "epoch": 0.9117572047946952, - "grad_norm": 0.0, - "learning_rate": 4.0563461138283577e-07, - "loss": 0.77, - "step": 32175 - }, - { - "epoch": 0.9117855422369577, - "grad_norm": 0.0, - "learning_rate": 4.0537590434244876e-07, - "loss": 0.706, - "step": 32176 - }, - { - "epoch": 0.9118138796792201, - "grad_norm": 0.0, - "learning_rate": 4.051172781211421e-07, - "loss": 0.8289, - "step": 32177 - }, - { - "epoch": 0.9118422171214826, - "grad_norm": 0.0, - "learning_rate": 4.048587327210973e-07, - "loss": 0.8608, - "step": 32178 - }, - { - "epoch": 0.9118705545637451, - "grad_norm": 0.0, - "learning_rate": 4.046002681444894e-07, - "loss": 0.7599, - "step": 32179 - }, - { - "epoch": 0.9118988920060075, - "grad_norm": 0.0, - "learning_rate": 4.0434188439349544e-07, - "loss": 0.7389, - "step": 32180 - }, - { - "epoch": 0.91192722944827, - "grad_norm": 0.0, - "learning_rate": 4.040835814702959e-07, - "loss": 0.8545, - "step": 32181 - }, - { - "epoch": 0.9119555668905325, - "grad_norm": 0.0, - "learning_rate": 4.0382535937706136e-07, - "loss": 0.8261, - "step": 32182 - }, - { - "epoch": 0.911983904332795, - "grad_norm": 0.0, - "learning_rate": 4.035672181159689e-07, - "loss": 0.7963, - "step": 32183 - }, - { - "epoch": 0.9120122417750574, - "grad_norm": 0.0, - "learning_rate": 4.0330915768919454e-07, - "loss": 0.7833, - "step": 32184 - }, - { - "epoch": 0.9120405792173198, - "grad_norm": 0.0, - "learning_rate": 4.0305117809890993e-07, - "loss": 0.7779, - "step": 32185 - }, - { - "epoch": 0.9120689166595823, - "grad_norm": 0.0, - "learning_rate": 4.0279327934728997e-07, - "loss": 0.7498, - "step": 32186 - }, - { - "epoch": 0.9120972541018447, - "grad_norm": 0.0, - "learning_rate": 4.0253546143650514e-07, - "loss": 0.9216, - "step": 32187 - }, - { - "epoch": 0.9121255915441072, - "grad_norm": 0.0, - "learning_rate": 4.0227772436872813e-07, - "loss": 0.8197, - "step": 32188 - }, - { - "epoch": 0.9121539289863697, - "grad_norm": 0.0, - "learning_rate": 4.020200681461317e-07, - "loss": 0.8378, - "step": 32189 - }, - { - "epoch": 0.9121822664286322, - "grad_norm": 0.0, - "learning_rate": 4.017624927708819e-07, - "loss": 0.7802, - "step": 32190 - }, - { - "epoch": 0.9122106038708946, - "grad_norm": 0.0, - "learning_rate": 4.0150499824515353e-07, - "loss": 0.7528, - "step": 32191 - }, - { - "epoch": 0.9122389413131571, - "grad_norm": 0.0, - "learning_rate": 4.012475845711106e-07, - "loss": 0.8596, - "step": 32192 - }, - { - "epoch": 0.9122672787554196, - "grad_norm": 0.0, - "learning_rate": 4.0099025175092346e-07, - "loss": 0.7823, - "step": 32193 - }, - { - "epoch": 0.912295616197682, - "grad_norm": 0.0, - "learning_rate": 4.007329997867615e-07, - "loss": 0.7973, - "step": 32194 - }, - { - "epoch": 0.9123239536399445, - "grad_norm": 0.0, - "learning_rate": 4.004758286807897e-07, - "loss": 0.8781, - "step": 32195 - }, - { - "epoch": 0.912352291082207, - "grad_norm": 0.0, - "learning_rate": 4.0021873843517413e-07, - "loss": 0.7954, - "step": 32196 - }, - { - "epoch": 0.9123806285244693, - "grad_norm": 0.0, - "learning_rate": 3.9996172905208074e-07, - "loss": 0.7719, - "step": 32197 - }, - { - "epoch": 0.9124089659667318, - "grad_norm": 0.0, - "learning_rate": 3.9970480053367454e-07, - "loss": 0.7619, - "step": 32198 - }, - { - "epoch": 0.9124373034089943, - "grad_norm": 0.0, - "learning_rate": 3.9944795288212047e-07, - "loss": 0.654, - "step": 32199 - }, - { - "epoch": 0.9124656408512568, - "grad_norm": 0.0, - "learning_rate": 3.991911860995812e-07, - "loss": 0.8306, - "step": 32200 - }, - { - "epoch": 0.9124939782935192, - "grad_norm": 0.0, - "learning_rate": 3.9893450018821946e-07, - "loss": 0.8209, - "step": 32201 - }, - { - "epoch": 0.9125223157357817, - "grad_norm": 0.0, - "learning_rate": 3.9867789515019795e-07, - "loss": 0.9213, - "step": 32202 - }, - { - "epoch": 0.9125506531780442, - "grad_norm": 0.0, - "learning_rate": 3.984213709876783e-07, - "loss": 0.8536, - "step": 32203 - }, - { - "epoch": 0.9125789906203066, - "grad_norm": 0.0, - "learning_rate": 3.981649277028199e-07, - "loss": 0.7901, - "step": 32204 - }, - { - "epoch": 0.9126073280625691, - "grad_norm": 0.0, - "learning_rate": 3.979085652977854e-07, - "loss": 0.8101, - "step": 32205 - }, - { - "epoch": 0.9126356655048316, - "grad_norm": 0.0, - "learning_rate": 3.97652283774731e-07, - "loss": 0.9178, - "step": 32206 - }, - { - "epoch": 0.912664002947094, - "grad_norm": 0.0, - "learning_rate": 3.973960831358181e-07, - "loss": 0.8873, - "step": 32207 - }, - { - "epoch": 0.9126923403893564, - "grad_norm": 0.0, - "learning_rate": 3.9713996338320405e-07, - "loss": 0.7344, - "step": 32208 - }, - { - "epoch": 0.9127206778316189, - "grad_norm": 0.0, - "learning_rate": 3.968839245190448e-07, - "loss": 0.8656, - "step": 32209 - }, - { - "epoch": 0.9127490152738814, - "grad_norm": 0.0, - "learning_rate": 3.9662796654549865e-07, - "loss": 0.8521, - "step": 32210 - }, - { - "epoch": 0.9127773527161438, - "grad_norm": 0.0, - "learning_rate": 3.963720894647216e-07, - "loss": 0.8286, - "step": 32211 - }, - { - "epoch": 0.9128056901584063, - "grad_norm": 0.0, - "learning_rate": 3.961162932788687e-07, - "loss": 0.7733, - "step": 32212 - }, - { - "epoch": 0.9128340276006688, - "grad_norm": 0.0, - "learning_rate": 3.9586057799009483e-07, - "loss": 0.6883, - "step": 32213 - }, - { - "epoch": 0.9128623650429313, - "grad_norm": 0.0, - "learning_rate": 3.956049436005538e-07, - "loss": 0.8186, - "step": 32214 - }, - { - "epoch": 0.9128907024851937, - "grad_norm": 0.0, - "learning_rate": 3.953493901123984e-07, - "loss": 0.831, - "step": 32215 - }, - { - "epoch": 0.9129190399274562, - "grad_norm": 0.0, - "learning_rate": 3.9509391752778236e-07, - "loss": 0.7517, - "step": 32216 - }, - { - "epoch": 0.9129473773697186, - "grad_norm": 0.0, - "learning_rate": 3.948385258488552e-07, - "loss": 0.7745, - "step": 32217 - }, - { - "epoch": 0.912975714811981, - "grad_norm": 0.0, - "learning_rate": 3.9458321507777286e-07, - "loss": 0.808, - "step": 32218 - }, - { - "epoch": 0.9130040522542435, - "grad_norm": 0.0, - "learning_rate": 3.9432798521668035e-07, - "loss": 0.9205, - "step": 32219 - }, - { - "epoch": 0.913032389696506, - "grad_norm": 0.0, - "learning_rate": 3.940728362677315e-07, - "loss": 0.764, - "step": 32220 - }, - { - "epoch": 0.9130607271387684, - "grad_norm": 0.0, - "learning_rate": 3.9381776823307347e-07, - "loss": 0.807, - "step": 32221 - }, - { - "epoch": 0.9130890645810309, - "grad_norm": 0.0, - "learning_rate": 3.9356278111485567e-07, - "loss": 0.7223, - "step": 32222 - }, - { - "epoch": 0.9131174020232934, - "grad_norm": 0.0, - "learning_rate": 3.933078749152264e-07, - "loss": 0.826, - "step": 32223 - }, - { - "epoch": 0.9131457394655559, - "grad_norm": 0.0, - "learning_rate": 3.9305304963633163e-07, - "loss": 0.7383, - "step": 32224 - }, - { - "epoch": 0.9131740769078183, - "grad_norm": 0.0, - "learning_rate": 3.9279830528031747e-07, - "loss": 0.838, - "step": 32225 - }, - { - "epoch": 0.9132024143500808, - "grad_norm": 0.0, - "learning_rate": 3.9254364184933335e-07, - "loss": 0.8197, - "step": 32226 - }, - { - "epoch": 0.9132307517923433, - "grad_norm": 0.0, - "learning_rate": 3.922890593455198e-07, - "loss": 0.7839, - "step": 32227 - }, - { - "epoch": 0.9132590892346056, - "grad_norm": 0.0, - "learning_rate": 3.9203455777102493e-07, - "loss": 0.9164, - "step": 32228 - }, - { - "epoch": 0.9132874266768681, - "grad_norm": 0.0, - "learning_rate": 3.917801371279895e-07, - "loss": 0.7929, - "step": 32229 - }, - { - "epoch": 0.9133157641191306, - "grad_norm": 0.0, - "learning_rate": 3.915257974185582e-07, - "loss": 0.7388, - "step": 32230 - }, - { - "epoch": 0.9133441015613931, - "grad_norm": 0.0, - "learning_rate": 3.91271538644874e-07, - "loss": 0.8461, - "step": 32231 - }, - { - "epoch": 0.9133724390036555, - "grad_norm": 0.0, - "learning_rate": 3.9101736080907615e-07, - "loss": 0.7818, - "step": 32232 - }, - { - "epoch": 0.913400776445918, - "grad_norm": 0.0, - "learning_rate": 3.907632639133074e-07, - "loss": 0.8478, - "step": 32233 - }, - { - "epoch": 0.9134291138881805, - "grad_norm": 0.0, - "learning_rate": 3.9050924795970944e-07, - "loss": 0.7175, - "step": 32234 - }, - { - "epoch": 0.9134574513304429, - "grad_norm": 0.0, - "learning_rate": 3.902553129504194e-07, - "loss": 0.8686, - "step": 32235 - }, - { - "epoch": 0.9134857887727054, - "grad_norm": 0.0, - "learning_rate": 3.900014588875767e-07, - "loss": 0.8848, - "step": 32236 - }, - { - "epoch": 0.9135141262149679, - "grad_norm": 0.0, - "learning_rate": 3.8974768577332067e-07, - "loss": 0.8265, - "step": 32237 - }, - { - "epoch": 0.9135424636572304, - "grad_norm": 0.0, - "learning_rate": 3.8949399360978967e-07, - "loss": 0.7109, - "step": 32238 - }, - { - "epoch": 0.9135708010994927, - "grad_norm": 0.0, - "learning_rate": 3.8924038239911977e-07, - "loss": 0.8042, - "step": 32239 - }, - { - "epoch": 0.9135991385417552, - "grad_norm": 0.0, - "learning_rate": 3.8898685214344477e-07, - "loss": 0.7862, - "step": 32240 - }, - { - "epoch": 0.9136274759840177, - "grad_norm": 0.0, - "learning_rate": 3.887334028449041e-07, - "loss": 0.7338, - "step": 32241 - }, - { - "epoch": 0.9136558134262801, - "grad_norm": 0.0, - "learning_rate": 3.8848003450563167e-07, - "loss": 0.7314, - "step": 32242 - }, - { - "epoch": 0.9136841508685426, - "grad_norm": 0.0, - "learning_rate": 3.882267471277601e-07, - "loss": 0.7549, - "step": 32243 - }, - { - "epoch": 0.9137124883108051, - "grad_norm": 0.0, - "learning_rate": 3.8797354071342443e-07, - "loss": 0.8427, - "step": 32244 - }, - { - "epoch": 0.9137408257530675, - "grad_norm": 0.0, - "learning_rate": 3.8772041526475624e-07, - "loss": 0.779, - "step": 32245 - }, - { - "epoch": 0.91376916319533, - "grad_norm": 0.0, - "learning_rate": 3.874673707838883e-07, - "loss": 0.8701, - "step": 32246 - }, - { - "epoch": 0.9137975006375925, - "grad_norm": 0.0, - "learning_rate": 3.872144072729522e-07, - "loss": 0.8734, - "step": 32247 - }, - { - "epoch": 0.913825838079855, - "grad_norm": 0.0, - "learning_rate": 3.8696152473407966e-07, - "loss": 0.7263, - "step": 32248 - }, - { - "epoch": 0.9138541755221173, - "grad_norm": 0.0, - "learning_rate": 3.867087231693989e-07, - "loss": 0.8558, - "step": 32249 - }, - { - "epoch": 0.9138825129643798, - "grad_norm": 0.0, - "learning_rate": 3.8645600258104045e-07, - "loss": 0.7775, - "step": 32250 - }, - { - "epoch": 0.9139108504066423, - "grad_norm": 0.0, - "learning_rate": 3.862033629711337e-07, - "loss": 0.8732, - "step": 32251 - }, - { - "epoch": 0.9139391878489047, - "grad_norm": 0.0, - "learning_rate": 3.859508043418059e-07, - "loss": 0.821, - "step": 32252 - }, - { - "epoch": 0.9139675252911672, - "grad_norm": 0.0, - "learning_rate": 3.8569832669518417e-07, - "loss": 0.6778, - "step": 32253 - }, - { - "epoch": 0.9139958627334297, - "grad_norm": 0.0, - "learning_rate": 3.854459300333957e-07, - "loss": 0.918, - "step": 32254 - }, - { - "epoch": 0.9140242001756922, - "grad_norm": 0.0, - "learning_rate": 3.8519361435856774e-07, - "loss": 0.7461, - "step": 32255 - }, - { - "epoch": 0.9140525376179546, - "grad_norm": 0.0, - "learning_rate": 3.8494137967282186e-07, - "loss": 0.7355, - "step": 32256 - }, - { - "epoch": 0.9140808750602171, - "grad_norm": 0.0, - "learning_rate": 3.846892259782875e-07, - "loss": 0.7569, - "step": 32257 - }, - { - "epoch": 0.9141092125024796, - "grad_norm": 0.0, - "learning_rate": 3.844371532770852e-07, - "loss": 0.7789, - "step": 32258 - }, - { - "epoch": 0.914137549944742, - "grad_norm": 0.0, - "learning_rate": 3.841851615713399e-07, - "loss": 0.9501, - "step": 32259 - }, - { - "epoch": 0.9141658873870044, - "grad_norm": 0.0, - "learning_rate": 3.839332508631721e-07, - "loss": 0.8665, - "step": 32260 - }, - { - "epoch": 0.9141942248292669, - "grad_norm": 0.0, - "learning_rate": 3.8368142115470683e-07, - "loss": 0.7752, - "step": 32261 - }, - { - "epoch": 0.9142225622715294, - "grad_norm": 0.0, - "learning_rate": 3.834296724480635e-07, - "loss": 0.899, - "step": 32262 - }, - { - "epoch": 0.9142508997137918, - "grad_norm": 0.0, - "learning_rate": 3.831780047453637e-07, - "loss": 0.818, - "step": 32263 - }, - { - "epoch": 0.9142792371560543, - "grad_norm": 0.0, - "learning_rate": 3.8292641804872576e-07, - "loss": 0.7593, - "step": 32264 - }, - { - "epoch": 0.9143075745983168, - "grad_norm": 0.0, - "learning_rate": 3.8267491236027133e-07, - "loss": 0.7914, - "step": 32265 - }, - { - "epoch": 0.9143359120405792, - "grad_norm": 0.0, - "learning_rate": 3.824234876821165e-07, - "loss": 0.8185, - "step": 32266 - }, - { - "epoch": 0.9143642494828417, - "grad_norm": 0.0, - "learning_rate": 3.821721440163795e-07, - "loss": 0.805, - "step": 32267 - }, - { - "epoch": 0.9143925869251042, - "grad_norm": 0.0, - "learning_rate": 3.819208813651787e-07, - "loss": 0.7702, - "step": 32268 - }, - { - "epoch": 0.9144209243673665, - "grad_norm": 0.0, - "learning_rate": 3.8166969973063015e-07, - "loss": 0.7633, - "step": 32269 - }, - { - "epoch": 0.914449261809629, - "grad_norm": 0.0, - "learning_rate": 3.814185991148478e-07, - "loss": 0.7297, - "step": 32270 - }, - { - "epoch": 0.9144775992518915, - "grad_norm": 0.0, - "learning_rate": 3.81167579519951e-07, - "loss": 0.8786, - "step": 32271 - }, - { - "epoch": 0.914505936694154, - "grad_norm": 0.0, - "learning_rate": 3.8091664094804913e-07, - "loss": 0.8219, - "step": 32272 - }, - { - "epoch": 0.9145342741364164, - "grad_norm": 0.0, - "learning_rate": 3.806657834012595e-07, - "loss": 0.8948, - "step": 32273 - }, - { - "epoch": 0.9145626115786789, - "grad_norm": 0.0, - "learning_rate": 3.8041500688169253e-07, - "loss": 0.7774, - "step": 32274 - }, - { - "epoch": 0.9145909490209414, - "grad_norm": 0.0, - "learning_rate": 3.8016431139146327e-07, - "loss": 0.7835, - "step": 32275 - }, - { - "epoch": 0.9146192864632038, - "grad_norm": 0.0, - "learning_rate": 3.799136969326833e-07, - "loss": 0.8905, - "step": 32276 - }, - { - "epoch": 0.9146476239054663, - "grad_norm": 0.0, - "learning_rate": 3.79663163507461e-07, - "loss": 0.8646, - "step": 32277 - }, - { - "epoch": 0.9146759613477288, - "grad_norm": 0.0, - "learning_rate": 3.7941271111791024e-07, - "loss": 0.8673, - "step": 32278 - }, - { - "epoch": 0.9147042987899913, - "grad_norm": 0.0, - "learning_rate": 3.7916233976613704e-07, - "loss": 0.7366, - "step": 32279 - }, - { - "epoch": 0.9147326362322536, - "grad_norm": 0.0, - "learning_rate": 3.78912049454252e-07, - "loss": 0.8067, - "step": 32280 - }, - { - "epoch": 0.9147609736745161, - "grad_norm": 0.0, - "learning_rate": 3.786618401843645e-07, - "loss": 0.7544, - "step": 32281 - }, - { - "epoch": 0.9147893111167786, - "grad_norm": 0.0, - "learning_rate": 3.784117119585806e-07, - "loss": 0.8566, - "step": 32282 - }, - { - "epoch": 0.914817648559041, - "grad_norm": 0.0, - "learning_rate": 3.781616647790065e-07, - "loss": 0.8228, - "step": 32283 - }, - { - "epoch": 0.9148459860013035, - "grad_norm": 0.0, - "learning_rate": 3.7791169864775266e-07, - "loss": 0.9584, - "step": 32284 - }, - { - "epoch": 0.914874323443566, - "grad_norm": 0.0, - "learning_rate": 3.7766181356691854e-07, - "loss": 0.8647, - "step": 32285 - }, - { - "epoch": 0.9149026608858285, - "grad_norm": 0.0, - "learning_rate": 3.7741200953861356e-07, - "loss": 0.7173, - "step": 32286 - }, - { - "epoch": 0.9149309983280909, - "grad_norm": 0.0, - "learning_rate": 3.771622865649405e-07, - "loss": 0.8088, - "step": 32287 - }, - { - "epoch": 0.9149593357703534, - "grad_norm": 0.0, - "learning_rate": 3.7691264464800203e-07, - "loss": 0.8021, - "step": 32288 - }, - { - "epoch": 0.9149876732126159, - "grad_norm": 0.0, - "learning_rate": 3.766630837899032e-07, - "loss": 0.7786, - "step": 32289 - }, - { - "epoch": 0.9150160106548783, - "grad_norm": 0.0, - "learning_rate": 3.7641360399274354e-07, - "loss": 0.8477, - "step": 32290 - }, - { - "epoch": 0.9150443480971407, - "grad_norm": 0.0, - "learning_rate": 3.7616420525862564e-07, - "loss": 0.7772, - "step": 32291 - }, - { - "epoch": 0.9150726855394032, - "grad_norm": 0.0, - "learning_rate": 3.7591488758965233e-07, - "loss": 0.6798, - "step": 32292 - }, - { - "epoch": 0.9151010229816656, - "grad_norm": 0.0, - "learning_rate": 3.7566565098791975e-07, - "loss": 0.8518, - "step": 32293 - }, - { - "epoch": 0.9151293604239281, - "grad_norm": 0.0, - "learning_rate": 3.754164954555306e-07, - "loss": 0.8423, - "step": 32294 - }, - { - "epoch": 0.9151576978661906, - "grad_norm": 0.0, - "learning_rate": 3.7516742099458106e-07, - "loss": 0.8206, - "step": 32295 - }, - { - "epoch": 0.9151860353084531, - "grad_norm": 0.0, - "learning_rate": 3.7491842760717046e-07, - "loss": 0.7498, - "step": 32296 - }, - { - "epoch": 0.9152143727507155, - "grad_norm": 0.0, - "learning_rate": 3.746695152953983e-07, - "loss": 0.8784, - "step": 32297 - }, - { - "epoch": 0.915242710192978, - "grad_norm": 0.0, - "learning_rate": 3.7442068406135624e-07, - "loss": 0.8014, - "step": 32298 - }, - { - "epoch": 0.9152710476352405, - "grad_norm": 0.0, - "learning_rate": 3.741719339071448e-07, - "loss": 0.8016, - "step": 32299 - }, - { - "epoch": 0.9152993850775029, - "grad_norm": 0.0, - "learning_rate": 3.7392326483485673e-07, - "loss": 0.7922, - "step": 32300 - }, - { - "epoch": 0.9153277225197654, - "grad_norm": 0.0, - "learning_rate": 3.736746768465871e-07, - "loss": 0.8773, - "step": 32301 - }, - { - "epoch": 0.9153560599620278, - "grad_norm": 0.0, - "learning_rate": 3.7342616994443194e-07, - "loss": 0.8072, - "step": 32302 - }, - { - "epoch": 0.9153843974042903, - "grad_norm": 0.0, - "learning_rate": 3.731777441304818e-07, - "loss": 0.6919, - "step": 32303 - }, - { - "epoch": 0.9154127348465527, - "grad_norm": 0.0, - "learning_rate": 3.729293994068306e-07, - "loss": 0.8218, - "step": 32304 - }, - { - "epoch": 0.9154410722888152, - "grad_norm": 0.0, - "learning_rate": 3.7268113577557107e-07, - "loss": 0.796, - "step": 32305 - }, - { - "epoch": 0.9154694097310777, - "grad_norm": 0.0, - "learning_rate": 3.724329532387916e-07, - "loss": 0.7786, - "step": 32306 - }, - { - "epoch": 0.9154977471733401, - "grad_norm": 0.0, - "learning_rate": 3.72184851798586e-07, - "loss": 0.7088, - "step": 32307 - }, - { - "epoch": 0.9155260846156026, - "grad_norm": 0.0, - "learning_rate": 3.7193683145704153e-07, - "loss": 0.8048, - "step": 32308 - }, - { - "epoch": 0.9155544220578651, - "grad_norm": 0.0, - "learning_rate": 3.716888922162487e-07, - "loss": 0.8409, - "step": 32309 - }, - { - "epoch": 0.9155827595001275, - "grad_norm": 0.0, - "learning_rate": 3.71441034078297e-07, - "loss": 0.8492, - "step": 32310 - }, - { - "epoch": 0.91561109694239, - "grad_norm": 0.0, - "learning_rate": 3.711932570452703e-07, - "loss": 0.7658, - "step": 32311 - }, - { - "epoch": 0.9156394343846525, - "grad_norm": 0.0, - "learning_rate": 3.709455611192603e-07, - "loss": 0.7861, - "step": 32312 - }, - { - "epoch": 0.915667771826915, - "grad_norm": 0.0, - "learning_rate": 3.706979463023497e-07, - "loss": 0.7051, - "step": 32313 - }, - { - "epoch": 0.9156961092691773, - "grad_norm": 0.0, - "learning_rate": 3.7045041259662793e-07, - "loss": 0.7412, - "step": 32314 - }, - { - "epoch": 0.9157244467114398, - "grad_norm": 0.0, - "learning_rate": 3.702029600041779e-07, - "loss": 0.8285, - "step": 32315 - }, - { - "epoch": 0.9157527841537023, - "grad_norm": 0.0, - "learning_rate": 3.6995558852708335e-07, - "loss": 0.8269, - "step": 32316 - }, - { - "epoch": 0.9157811215959647, - "grad_norm": 0.0, - "learning_rate": 3.6970829816742936e-07, - "loss": 0.8237, - "step": 32317 - }, - { - "epoch": 0.9158094590382272, - "grad_norm": 0.0, - "learning_rate": 3.694610889272998e-07, - "loss": 0.7979, - "step": 32318 - }, - { - "epoch": 0.9158377964804897, - "grad_norm": 0.0, - "learning_rate": 3.6921396080877414e-07, - "loss": 0.7781, - "step": 32319 - }, - { - "epoch": 0.9158661339227522, - "grad_norm": 0.0, - "learning_rate": 3.689669138139351e-07, - "loss": 0.8215, - "step": 32320 - }, - { - "epoch": 0.9158944713650146, - "grad_norm": 0.0, - "learning_rate": 3.687199479448655e-07, - "loss": 0.7742, - "step": 32321 - }, - { - "epoch": 0.9159228088072771, - "grad_norm": 0.0, - "learning_rate": 3.684730632036437e-07, - "loss": 0.8168, - "step": 32322 - }, - { - "epoch": 0.9159511462495395, - "grad_norm": 0.0, - "learning_rate": 3.6822625959235136e-07, - "loss": 0.8814, - "step": 32323 - }, - { - "epoch": 0.9159794836918019, - "grad_norm": 0.0, - "learning_rate": 3.6797953711306344e-07, - "loss": 0.8375, - "step": 32324 - }, - { - "epoch": 0.9160078211340644, - "grad_norm": 0.0, - "learning_rate": 3.677328957678616e-07, - "loss": 0.8039, - "step": 32325 - }, - { - "epoch": 0.9160361585763269, - "grad_norm": 0.0, - "learning_rate": 3.6748633555882206e-07, - "loss": 0.8227, - "step": 32326 - }, - { - "epoch": 0.9160644960185894, - "grad_norm": 0.0, - "learning_rate": 3.6723985648802196e-07, - "loss": 0.7333, - "step": 32327 - }, - { - "epoch": 0.9160928334608518, - "grad_norm": 0.0, - "learning_rate": 3.6699345855753856e-07, - "loss": 0.8767, - "step": 32328 - }, - { - "epoch": 0.9161211709031143, - "grad_norm": 0.0, - "learning_rate": 3.667471417694468e-07, - "loss": 0.8205, - "step": 32329 - }, - { - "epoch": 0.9161495083453768, - "grad_norm": 0.0, - "learning_rate": 3.6650090612581955e-07, - "loss": 0.8213, - "step": 32330 - }, - { - "epoch": 0.9161778457876392, - "grad_norm": 0.0, - "learning_rate": 3.6625475162873404e-07, - "loss": 0.7462, - "step": 32331 - }, - { - "epoch": 0.9162061832299017, - "grad_norm": 0.0, - "learning_rate": 3.6600867828026076e-07, - "loss": 0.8176, - "step": 32332 - }, - { - "epoch": 0.9162345206721642, - "grad_norm": 0.0, - "learning_rate": 3.6576268608247477e-07, - "loss": 0.7749, - "step": 32333 - }, - { - "epoch": 0.9162628581144265, - "grad_norm": 0.0, - "learning_rate": 3.6551677503744776e-07, - "loss": 0.8304, - "step": 32334 - }, - { - "epoch": 0.916291195556689, - "grad_norm": 0.0, - "learning_rate": 3.6527094514724914e-07, - "loss": 0.7773, - "step": 32335 - }, - { - "epoch": 0.9163195329989515, - "grad_norm": 0.0, - "learning_rate": 3.6502519641395286e-07, - "loss": 0.7817, - "step": 32336 - }, - { - "epoch": 0.916347870441214, - "grad_norm": 0.0, - "learning_rate": 3.6477952883962606e-07, - "loss": 0.8004, - "step": 32337 - }, - { - "epoch": 0.9163762078834764, - "grad_norm": 0.0, - "learning_rate": 3.645339424263383e-07, - "loss": 0.797, - "step": 32338 - }, - { - "epoch": 0.9164045453257389, - "grad_norm": 0.0, - "learning_rate": 3.642884371761601e-07, - "loss": 0.8947, - "step": 32339 - }, - { - "epoch": 0.9164328827680014, - "grad_norm": 0.0, - "learning_rate": 3.6404301309115763e-07, - "loss": 0.8291, - "step": 32340 - }, - { - "epoch": 0.9164612202102638, - "grad_norm": 0.0, - "learning_rate": 3.637976701734003e-07, - "loss": 0.8566, - "step": 32341 - }, - { - "epoch": 0.9164895576525263, - "grad_norm": 0.0, - "learning_rate": 3.635524084249542e-07, - "loss": 0.854, - "step": 32342 - }, - { - "epoch": 0.9165178950947888, - "grad_norm": 0.0, - "learning_rate": 3.633072278478833e-07, - "loss": 0.8015, - "step": 32343 - }, - { - "epoch": 0.9165462325370513, - "grad_norm": 0.0, - "learning_rate": 3.630621284442548e-07, - "loss": 0.6635, - "step": 32344 - }, - { - "epoch": 0.9165745699793136, - "grad_norm": 0.0, - "learning_rate": 3.6281711021613265e-07, - "loss": 0.8427, - "step": 32345 - }, - { - "epoch": 0.9166029074215761, - "grad_norm": 0.0, - "learning_rate": 3.625721731655796e-07, - "loss": 0.8074, - "step": 32346 - }, - { - "epoch": 0.9166312448638386, - "grad_norm": 0.0, - "learning_rate": 3.6232731729466174e-07, - "loss": 0.7374, - "step": 32347 - }, - { - "epoch": 0.916659582306101, - "grad_norm": 0.0, - "learning_rate": 3.6208254260543753e-07, - "loss": 0.7948, - "step": 32348 - }, - { - "epoch": 0.9166879197483635, - "grad_norm": 0.0, - "learning_rate": 3.618378490999719e-07, - "loss": 0.7877, - "step": 32349 - }, - { - "epoch": 0.916716257190626, - "grad_norm": 0.0, - "learning_rate": 3.6159323678032654e-07, - "loss": 0.8164, - "step": 32350 - }, - { - "epoch": 0.9167445946328885, - "grad_norm": 0.0, - "learning_rate": 3.6134870564855873e-07, - "loss": 0.7718, - "step": 32351 - }, - { - "epoch": 0.9167729320751509, - "grad_norm": 0.0, - "learning_rate": 3.6110425570673015e-07, - "loss": 0.7565, - "step": 32352 - }, - { - "epoch": 0.9168012695174134, - "grad_norm": 0.0, - "learning_rate": 3.6085988695689913e-07, - "loss": 0.775, - "step": 32353 - }, - { - "epoch": 0.9168296069596759, - "grad_norm": 0.0, - "learning_rate": 3.6061559940112625e-07, - "loss": 0.6568, - "step": 32354 - }, - { - "epoch": 0.9168579444019382, - "grad_norm": 0.0, - "learning_rate": 3.603713930414676e-07, - "loss": 0.8429, - "step": 32355 - }, - { - "epoch": 0.9168862818442007, - "grad_norm": 0.0, - "learning_rate": 3.6012726787997943e-07, - "loss": 0.7151, - "step": 32356 - }, - { - "epoch": 0.9169146192864632, - "grad_norm": 0.0, - "learning_rate": 3.5988322391872e-07, - "loss": 0.7126, - "step": 32357 - }, - { - "epoch": 0.9169429567287256, - "grad_norm": 0.0, - "learning_rate": 3.5963926115974324e-07, - "loss": 0.8069, - "step": 32358 - }, - { - "epoch": 0.9169712941709881, - "grad_norm": 0.0, - "learning_rate": 3.5939537960510416e-07, - "loss": 0.851, - "step": 32359 - }, - { - "epoch": 0.9169996316132506, - "grad_norm": 0.0, - "learning_rate": 3.59151579256859e-07, - "loss": 0.8167, - "step": 32360 - }, - { - "epoch": 0.9170279690555131, - "grad_norm": 0.0, - "learning_rate": 3.5890786011705926e-07, - "loss": 0.834, - "step": 32361 - }, - { - "epoch": 0.9170563064977755, - "grad_norm": 0.0, - "learning_rate": 3.5866422218775896e-07, - "loss": 0.7504, - "step": 32362 - }, - { - "epoch": 0.917084643940038, - "grad_norm": 0.0, - "learning_rate": 3.5842066547101097e-07, - "loss": 0.829, - "step": 32363 - }, - { - "epoch": 0.9171129813823005, - "grad_norm": 0.0, - "learning_rate": 3.581771899688646e-07, - "loss": 0.7586, - "step": 32364 - }, - { - "epoch": 0.9171413188245628, - "grad_norm": 0.0, - "learning_rate": 3.579337956833728e-07, - "loss": 0.873, - "step": 32365 - }, - { - "epoch": 0.9171696562668253, - "grad_norm": 0.0, - "learning_rate": 3.57690482616585e-07, - "loss": 0.8303, - "step": 32366 - }, - { - "epoch": 0.9171979937090878, - "grad_norm": 0.0, - "learning_rate": 3.5744725077055063e-07, - "loss": 0.8908, - "step": 32367 - }, - { - "epoch": 0.9172263311513503, - "grad_norm": 0.0, - "learning_rate": 3.572041001473192e-07, - "loss": 0.7027, - "step": 32368 - }, - { - "epoch": 0.9172546685936127, - "grad_norm": 0.0, - "learning_rate": 3.56961030748938e-07, - "loss": 0.7344, - "step": 32369 - }, - { - "epoch": 0.9172830060358752, - "grad_norm": 0.0, - "learning_rate": 3.5671804257745526e-07, - "loss": 0.8056, - "step": 32370 - }, - { - "epoch": 0.9173113434781377, - "grad_norm": 0.0, - "learning_rate": 3.5647513563491833e-07, - "loss": 0.7633, - "step": 32371 - }, - { - "epoch": 0.9173396809204001, - "grad_norm": 0.0, - "learning_rate": 3.562323099233711e-07, - "loss": 0.8862, - "step": 32372 - }, - { - "epoch": 0.9173680183626626, - "grad_norm": 0.0, - "learning_rate": 3.5598956544486087e-07, - "loss": 0.8537, - "step": 32373 - }, - { - "epoch": 0.9173963558049251, - "grad_norm": 0.0, - "learning_rate": 3.557469022014315e-07, - "loss": 0.6714, - "step": 32374 - }, - { - "epoch": 0.9174246932471876, - "grad_norm": 0.0, - "learning_rate": 3.5550432019512693e-07, - "loss": 0.858, - "step": 32375 - }, - { - "epoch": 0.9174530306894499, - "grad_norm": 0.0, - "learning_rate": 3.552618194279922e-07, - "loss": 0.7644, - "step": 32376 - }, - { - "epoch": 0.9174813681317124, - "grad_norm": 0.0, - "learning_rate": 3.550193999020668e-07, - "loss": 0.7763, - "step": 32377 - }, - { - "epoch": 0.9175097055739749, - "grad_norm": 0.0, - "learning_rate": 3.5477706161939463e-07, - "loss": 0.7976, - "step": 32378 - }, - { - "epoch": 0.9175380430162373, - "grad_norm": 0.0, - "learning_rate": 3.545348045820174e-07, - "loss": 0.7939, - "step": 32379 - }, - { - "epoch": 0.9175663804584998, - "grad_norm": 0.0, - "learning_rate": 3.542926287919757e-07, - "loss": 0.8185, - "step": 32380 - }, - { - "epoch": 0.9175947179007623, - "grad_norm": 0.0, - "learning_rate": 3.5405053425130896e-07, - "loss": 0.7801, - "step": 32381 - }, - { - "epoch": 0.9176230553430247, - "grad_norm": 0.0, - "learning_rate": 3.5380852096205673e-07, - "loss": 0.7009, - "step": 32382 - }, - { - "epoch": 0.9176513927852872, - "grad_norm": 0.0, - "learning_rate": 3.535665889262563e-07, - "loss": 0.9158, - "step": 32383 - }, - { - "epoch": 0.9176797302275497, - "grad_norm": 0.0, - "learning_rate": 3.5332473814594814e-07, - "loss": 0.8031, - "step": 32384 - }, - { - "epoch": 0.9177080676698122, - "grad_norm": 0.0, - "learning_rate": 3.530829686231674e-07, - "loss": 0.8744, - "step": 32385 - }, - { - "epoch": 0.9177364051120745, - "grad_norm": 0.0, - "learning_rate": 3.5284128035995127e-07, - "loss": 0.7121, - "step": 32386 - }, - { - "epoch": 0.917764742554337, - "grad_norm": 0.0, - "learning_rate": 3.525996733583348e-07, - "loss": 0.8163, - "step": 32387 - }, - { - "epoch": 0.9177930799965995, - "grad_norm": 0.0, - "learning_rate": 3.52358147620353e-07, - "loss": 0.8088, - "step": 32388 - }, - { - "epoch": 0.9178214174388619, - "grad_norm": 0.0, - "learning_rate": 3.5211670314804326e-07, - "loss": 0.8685, - "step": 32389 - }, - { - "epoch": 0.9178497548811244, - "grad_norm": 0.0, - "learning_rate": 3.518753399434349e-07, - "loss": 0.8073, - "step": 32390 - }, - { - "epoch": 0.9178780923233869, - "grad_norm": 0.0, - "learning_rate": 3.5163405800856423e-07, - "loss": 0.8491, - "step": 32391 - }, - { - "epoch": 0.9179064297656494, - "grad_norm": 0.0, - "learning_rate": 3.513928573454628e-07, - "loss": 0.7816, - "step": 32392 - }, - { - "epoch": 0.9179347672079118, - "grad_norm": 0.0, - "learning_rate": 3.511517379561613e-07, - "loss": 0.8774, - "step": 32393 - }, - { - "epoch": 0.9179631046501743, - "grad_norm": 0.0, - "learning_rate": 3.5091069984269366e-07, - "loss": 0.7983, - "step": 32394 - }, - { - "epoch": 0.9179914420924368, - "grad_norm": 0.0, - "learning_rate": 3.506697430070871e-07, - "loss": 0.7449, - "step": 32395 - }, - { - "epoch": 0.9180197795346992, - "grad_norm": 0.0, - "learning_rate": 3.504288674513723e-07, - "loss": 0.767, - "step": 32396 - }, - { - "epoch": 0.9180481169769616, - "grad_norm": 0.0, - "learning_rate": 3.5018807317757975e-07, - "loss": 0.8017, - "step": 32397 - }, - { - "epoch": 0.9180764544192241, - "grad_norm": 0.0, - "learning_rate": 3.499473601877357e-07, - "loss": 0.819, - "step": 32398 - }, - { - "epoch": 0.9181047918614866, - "grad_norm": 0.0, - "learning_rate": 3.497067284838673e-07, - "loss": 0.8597, - "step": 32399 - }, - { - "epoch": 0.918133129303749, - "grad_norm": 0.0, - "learning_rate": 3.4946617806800534e-07, - "loss": 0.8351, - "step": 32400 - }, - { - "epoch": 0.9181614667460115, - "grad_norm": 0.0, - "learning_rate": 3.492257089421713e-07, - "loss": 0.7587, - "step": 32401 - }, - { - "epoch": 0.918189804188274, - "grad_norm": 0.0, - "learning_rate": 3.4898532110839377e-07, - "loss": 0.8075, - "step": 32402 - }, - { - "epoch": 0.9182181416305364, - "grad_norm": 0.0, - "learning_rate": 3.4874501456869545e-07, - "loss": 0.8178, - "step": 32403 - }, - { - "epoch": 0.9182464790727989, - "grad_norm": 0.0, - "learning_rate": 3.4850478932510256e-07, - "loss": 0.7815, - "step": 32404 - }, - { - "epoch": 0.9182748165150614, - "grad_norm": 0.0, - "learning_rate": 3.482646453796379e-07, - "loss": 0.7976, - "step": 32405 - }, - { - "epoch": 0.9183031539573238, - "grad_norm": 0.0, - "learning_rate": 3.4802458273432316e-07, - "loss": 0.756, - "step": 32406 - }, - { - "epoch": 0.9183314913995863, - "grad_norm": 0.0, - "learning_rate": 3.477846013911823e-07, - "loss": 0.9391, - "step": 32407 - }, - { - "epoch": 0.9183598288418487, - "grad_norm": 0.0, - "learning_rate": 3.4754470135223707e-07, - "loss": 0.7474, - "step": 32408 - }, - { - "epoch": 0.9183881662841112, - "grad_norm": 0.0, - "learning_rate": 3.473048826195058e-07, - "loss": 0.8458, - "step": 32409 - }, - { - "epoch": 0.9184165037263736, - "grad_norm": 0.0, - "learning_rate": 3.4706514519501135e-07, - "loss": 0.7659, - "step": 32410 - }, - { - "epoch": 0.9184448411686361, - "grad_norm": 0.0, - "learning_rate": 3.46825489080771e-07, - "loss": 0.8811, - "step": 32411 - }, - { - "epoch": 0.9184731786108986, - "grad_norm": 0.0, - "learning_rate": 3.4658591427880305e-07, - "loss": 0.7223, - "step": 32412 - }, - { - "epoch": 0.918501516053161, - "grad_norm": 0.0, - "learning_rate": 3.4634642079112825e-07, - "loss": 0.8209, - "step": 32413 - }, - { - "epoch": 0.9185298534954235, - "grad_norm": 0.0, - "learning_rate": 3.4610700861976153e-07, - "loss": 0.93, - "step": 32414 - }, - { - "epoch": 0.918558190937686, - "grad_norm": 0.0, - "learning_rate": 3.4586767776672136e-07, - "loss": 0.6695, - "step": 32415 - }, - { - "epoch": 0.9185865283799485, - "grad_norm": 0.0, - "learning_rate": 3.4562842823402167e-07, - "loss": 0.7744, - "step": 32416 - }, - { - "epoch": 0.9186148658222109, - "grad_norm": 0.0, - "learning_rate": 3.4538926002367855e-07, - "loss": 0.8118, - "step": 32417 - }, - { - "epoch": 0.9186432032644734, - "grad_norm": 0.0, - "learning_rate": 3.4515017313770716e-07, - "loss": 0.7895, - "step": 32418 - }, - { - "epoch": 0.9186715407067358, - "grad_norm": 0.0, - "learning_rate": 3.4491116757812024e-07, - "loss": 0.818, - "step": 32419 - }, - { - "epoch": 0.9186998781489982, - "grad_norm": 0.0, - "learning_rate": 3.4467224334693295e-07, - "loss": 0.8566, - "step": 32420 - }, - { - "epoch": 0.9187282155912607, - "grad_norm": 0.0, - "learning_rate": 3.4443340044615805e-07, - "loss": 0.8356, - "step": 32421 - }, - { - "epoch": 0.9187565530335232, - "grad_norm": 0.0, - "learning_rate": 3.441946388778039e-07, - "loss": 0.684, - "step": 32422 - }, - { - "epoch": 0.9187848904757857, - "grad_norm": 0.0, - "learning_rate": 3.4395595864388567e-07, - "loss": 0.8558, - "step": 32423 - }, - { - "epoch": 0.9188132279180481, - "grad_norm": 0.0, - "learning_rate": 3.4371735974641053e-07, - "loss": 0.7707, - "step": 32424 - }, - { - "epoch": 0.9188415653603106, - "grad_norm": 0.0, - "learning_rate": 3.4347884218739025e-07, - "loss": 0.7997, - "step": 32425 - }, - { - "epoch": 0.9188699028025731, - "grad_norm": 0.0, - "learning_rate": 3.4324040596883436e-07, - "loss": 0.8327, - "step": 32426 - }, - { - "epoch": 0.9188982402448355, - "grad_norm": 0.0, - "learning_rate": 3.4300205109275007e-07, - "loss": 0.8458, - "step": 32427 - }, - { - "epoch": 0.918926577687098, - "grad_norm": 0.0, - "learning_rate": 3.4276377756114474e-07, - "loss": 0.7813, - "step": 32428 - }, - { - "epoch": 0.9189549151293605, - "grad_norm": 0.0, - "learning_rate": 3.4252558537602785e-07, - "loss": 0.8419, - "step": 32429 - }, - { - "epoch": 0.9189832525716228, - "grad_norm": 0.0, - "learning_rate": 3.4228747453940225e-07, - "loss": 0.7642, - "step": 32430 - }, - { - "epoch": 0.9190115900138853, - "grad_norm": 0.0, - "learning_rate": 3.4204944505327633e-07, - "loss": 0.6923, - "step": 32431 - }, - { - "epoch": 0.9190399274561478, - "grad_norm": 0.0, - "learning_rate": 3.41811496919654e-07, - "loss": 0.848, - "step": 32432 - }, - { - "epoch": 0.9190682648984103, - "grad_norm": 0.0, - "learning_rate": 3.4157363014054034e-07, - "loss": 0.8252, - "step": 32433 - }, - { - "epoch": 0.9190966023406727, - "grad_norm": 0.0, - "learning_rate": 3.413358447179393e-07, - "loss": 0.7634, - "step": 32434 - }, - { - "epoch": 0.9191249397829352, - "grad_norm": 0.0, - "learning_rate": 3.410981406538527e-07, - "loss": 0.7803, - "step": 32435 - }, - { - "epoch": 0.9191532772251977, - "grad_norm": 0.0, - "learning_rate": 3.408605179502822e-07, - "loss": 0.8064, - "step": 32436 - }, - { - "epoch": 0.9191816146674601, - "grad_norm": 0.0, - "learning_rate": 3.406229766092317e-07, - "loss": 0.7845, - "step": 32437 - }, - { - "epoch": 0.9192099521097226, - "grad_norm": 0.0, - "learning_rate": 3.403855166326997e-07, - "loss": 0.8756, - "step": 32438 - }, - { - "epoch": 0.919238289551985, - "grad_norm": 0.0, - "learning_rate": 3.40148138022689e-07, - "loss": 0.8699, - "step": 32439 - }, - { - "epoch": 0.9192666269942475, - "grad_norm": 0.0, - "learning_rate": 3.3991084078119684e-07, - "loss": 0.8704, - "step": 32440 - }, - { - "epoch": 0.9192949644365099, - "grad_norm": 0.0, - "learning_rate": 3.396736249102217e-07, - "loss": 0.7739, - "step": 32441 - }, - { - "epoch": 0.9193233018787724, - "grad_norm": 0.0, - "learning_rate": 3.394364904117653e-07, - "loss": 0.8217, - "step": 32442 - }, - { - "epoch": 0.9193516393210349, - "grad_norm": 0.0, - "learning_rate": 3.3919943728782046e-07, - "loss": 0.919, - "step": 32443 - }, - { - "epoch": 0.9193799767632973, - "grad_norm": 0.0, - "learning_rate": 3.389624655403867e-07, - "loss": 0.8162, - "step": 32444 - }, - { - "epoch": 0.9194083142055598, - "grad_norm": 0.0, - "learning_rate": 3.387255751714602e-07, - "loss": 0.7386, - "step": 32445 - }, - { - "epoch": 0.9194366516478223, - "grad_norm": 0.0, - "learning_rate": 3.3848876618303496e-07, - "loss": 0.8021, - "step": 32446 - }, - { - "epoch": 0.9194649890900848, - "grad_norm": 0.0, - "learning_rate": 3.382520385771082e-07, - "loss": 0.8036, - "step": 32447 - }, - { - "epoch": 0.9194933265323472, - "grad_norm": 0.0, - "learning_rate": 3.380153923556706e-07, - "loss": 0.882, - "step": 32448 - }, - { - "epoch": 0.9195216639746097, - "grad_norm": 0.0, - "learning_rate": 3.3777882752071724e-07, - "loss": 0.7798, - "step": 32449 - }, - { - "epoch": 0.9195500014168722, - "grad_norm": 0.0, - "learning_rate": 3.3754234407424204e-07, - "loss": 0.8405, - "step": 32450 - }, - { - "epoch": 0.9195783388591345, - "grad_norm": 0.0, - "learning_rate": 3.3730594201823566e-07, - "loss": 0.7753, - "step": 32451 - }, - { - "epoch": 0.919606676301397, - "grad_norm": 0.0, - "learning_rate": 3.370696213546898e-07, - "loss": 0.8209, - "step": 32452 - }, - { - "epoch": 0.9196350137436595, - "grad_norm": 0.0, - "learning_rate": 3.368333820855929e-07, - "loss": 0.8144, - "step": 32453 - }, - { - "epoch": 0.9196633511859219, - "grad_norm": 0.0, - "learning_rate": 3.365972242129378e-07, - "loss": 0.8007, - "step": 32454 - }, - { - "epoch": 0.9196916886281844, - "grad_norm": 0.0, - "learning_rate": 3.36361147738713e-07, - "loss": 0.8822, - "step": 32455 - }, - { - "epoch": 0.9197200260704469, - "grad_norm": 0.0, - "learning_rate": 3.3612515266490675e-07, - "loss": 0.8954, - "step": 32456 - }, - { - "epoch": 0.9197483635127094, - "grad_norm": 0.0, - "learning_rate": 3.3588923899350645e-07, - "loss": 0.7897, - "step": 32457 - }, - { - "epoch": 0.9197767009549718, - "grad_norm": 0.0, - "learning_rate": 3.3565340672649935e-07, - "loss": 0.8126, - "step": 32458 - }, - { - "epoch": 0.9198050383972343, - "grad_norm": 0.0, - "learning_rate": 3.3541765586587285e-07, - "loss": 0.8563, - "step": 32459 - }, - { - "epoch": 0.9198333758394968, - "grad_norm": 0.0, - "learning_rate": 3.35181986413613e-07, - "loss": 0.7434, - "step": 32460 - }, - { - "epoch": 0.9198617132817591, - "grad_norm": 0.0, - "learning_rate": 3.3494639837170276e-07, - "loss": 0.8099, - "step": 32461 - }, - { - "epoch": 0.9198900507240216, - "grad_norm": 0.0, - "learning_rate": 3.3471089174212936e-07, - "loss": 0.7506, - "step": 32462 - }, - { - "epoch": 0.9199183881662841, - "grad_norm": 0.0, - "learning_rate": 3.344754665268746e-07, - "loss": 0.7673, - "step": 32463 - }, - { - "epoch": 0.9199467256085466, - "grad_norm": 0.0, - "learning_rate": 3.342401227279224e-07, - "loss": 0.7033, - "step": 32464 - }, - { - "epoch": 0.919975063050809, - "grad_norm": 0.0, - "learning_rate": 3.3400486034725455e-07, - "loss": 0.7872, - "step": 32465 - }, - { - "epoch": 0.9200034004930715, - "grad_norm": 0.0, - "learning_rate": 3.337696793868539e-07, - "loss": 0.8384, - "step": 32466 - }, - { - "epoch": 0.920031737935334, - "grad_norm": 0.0, - "learning_rate": 3.3353457984869994e-07, - "loss": 0.7847, - "step": 32467 - }, - { - "epoch": 0.9200600753775964, - "grad_norm": 0.0, - "learning_rate": 3.332995617347745e-07, - "loss": 0.8618, - "step": 32468 - }, - { - "epoch": 0.9200884128198589, - "grad_norm": 0.0, - "learning_rate": 3.3306462504705706e-07, - "loss": 0.8199, - "step": 32469 - }, - { - "epoch": 0.9201167502621214, - "grad_norm": 0.0, - "learning_rate": 3.328297697875249e-07, - "loss": 0.8242, - "step": 32470 - }, - { - "epoch": 0.9201450877043839, - "grad_norm": 0.0, - "learning_rate": 3.3259499595815647e-07, - "loss": 0.6915, - "step": 32471 - }, - { - "epoch": 0.9201734251466462, - "grad_norm": 0.0, - "learning_rate": 3.323603035609313e-07, - "loss": 0.8733, - "step": 32472 - }, - { - "epoch": 0.9202017625889087, - "grad_norm": 0.0, - "learning_rate": 3.321256925978267e-07, - "loss": 0.7831, - "step": 32473 - }, - { - "epoch": 0.9202301000311712, - "grad_norm": 0.0, - "learning_rate": 3.318911630708155e-07, - "loss": 0.8351, - "step": 32474 - }, - { - "epoch": 0.9202584374734336, - "grad_norm": 0.0, - "learning_rate": 3.31656714981875e-07, - "loss": 0.8177, - "step": 32475 - }, - { - "epoch": 0.9202867749156961, - "grad_norm": 0.0, - "learning_rate": 3.314223483329826e-07, - "loss": 0.8434, - "step": 32476 - }, - { - "epoch": 0.9203151123579586, - "grad_norm": 0.0, - "learning_rate": 3.3118806312610775e-07, - "loss": 0.8306, - "step": 32477 - }, - { - "epoch": 0.920343449800221, - "grad_norm": 0.0, - "learning_rate": 3.309538593632267e-07, - "loss": 0.8088, - "step": 32478 - }, - { - "epoch": 0.9203717872424835, - "grad_norm": 0.0, - "learning_rate": 3.307197370463133e-07, - "loss": 0.7404, - "step": 32479 - }, - { - "epoch": 0.920400124684746, - "grad_norm": 0.0, - "learning_rate": 3.304856961773362e-07, - "loss": 0.7019, - "step": 32480 - }, - { - "epoch": 0.9204284621270085, - "grad_norm": 0.0, - "learning_rate": 3.3025173675826917e-07, - "loss": 0.7313, - "step": 32481 - }, - { - "epoch": 0.9204567995692708, - "grad_norm": 0.0, - "learning_rate": 3.300178587910829e-07, - "loss": 0.7873, - "step": 32482 - }, - { - "epoch": 0.9204851370115333, - "grad_norm": 0.0, - "learning_rate": 3.297840622777471e-07, - "loss": 0.7885, - "step": 32483 - }, - { - "epoch": 0.9205134744537958, - "grad_norm": 0.0, - "learning_rate": 3.2955034722023214e-07, - "loss": 0.7511, - "step": 32484 - }, - { - "epoch": 0.9205418118960582, - "grad_norm": 0.0, - "learning_rate": 3.2931671362050444e-07, - "loss": 0.7414, - "step": 32485 - }, - { - "epoch": 0.9205701493383207, - "grad_norm": 0.0, - "learning_rate": 3.2908316148053234e-07, - "loss": 0.8526, - "step": 32486 - }, - { - "epoch": 0.9205984867805832, - "grad_norm": 0.0, - "learning_rate": 3.2884969080228644e-07, - "loss": 0.8512, - "step": 32487 - }, - { - "epoch": 0.9206268242228457, - "grad_norm": 0.0, - "learning_rate": 3.286163015877286e-07, - "loss": 0.8786, - "step": 32488 - }, - { - "epoch": 0.9206551616651081, - "grad_norm": 0.0, - "learning_rate": 3.283829938388294e-07, - "loss": 0.6368, - "step": 32489 - }, - { - "epoch": 0.9206834991073706, - "grad_norm": 0.0, - "learning_rate": 3.281497675575496e-07, - "loss": 0.7971, - "step": 32490 - }, - { - "epoch": 0.9207118365496331, - "grad_norm": 0.0, - "learning_rate": 3.2791662274585636e-07, - "loss": 0.7344, - "step": 32491 - }, - { - "epoch": 0.9207401739918954, - "grad_norm": 0.0, - "learning_rate": 3.276835594057137e-07, - "loss": 0.8235, - "step": 32492 - }, - { - "epoch": 0.9207685114341579, - "grad_norm": 0.0, - "learning_rate": 3.274505775390835e-07, - "loss": 0.8223, - "step": 32493 - }, - { - "epoch": 0.9207968488764204, - "grad_norm": 0.0, - "learning_rate": 3.272176771479285e-07, - "loss": 0.753, - "step": 32494 - }, - { - "epoch": 0.9208251863186829, - "grad_norm": 0.0, - "learning_rate": 3.2698485823421057e-07, - "loss": 0.8084, - "step": 32495 - }, - { - "epoch": 0.9208535237609453, - "grad_norm": 0.0, - "learning_rate": 3.267521207998925e-07, - "loss": 0.7503, - "step": 32496 - }, - { - "epoch": 0.9208818612032078, - "grad_norm": 0.0, - "learning_rate": 3.2651946484693274e-07, - "loss": 0.7682, - "step": 32497 - }, - { - "epoch": 0.9209101986454703, - "grad_norm": 0.0, - "learning_rate": 3.26286890377292e-07, - "loss": 0.7825, - "step": 32498 - }, - { - "epoch": 0.9209385360877327, - "grad_norm": 0.0, - "learning_rate": 3.2605439739292863e-07, - "loss": 0.8048, - "step": 32499 - }, - { - "epoch": 0.9209668735299952, - "grad_norm": 0.0, - "learning_rate": 3.258219858958023e-07, - "loss": 0.7058, - "step": 32500 - }, - { - "epoch": 0.9209952109722577, - "grad_norm": 0.0, - "learning_rate": 3.2558965588786905e-07, - "loss": 0.767, - "step": 32501 - }, - { - "epoch": 0.92102354841452, - "grad_norm": 0.0, - "learning_rate": 3.253574073710875e-07, - "loss": 0.8904, - "step": 32502 - }, - { - "epoch": 0.9210518858567825, - "grad_norm": 0.0, - "learning_rate": 3.2512524034741146e-07, - "loss": 0.7674, - "step": 32503 - }, - { - "epoch": 0.921080223299045, - "grad_norm": 0.0, - "learning_rate": 3.248931548187995e-07, - "loss": 0.7751, - "step": 32504 - }, - { - "epoch": 0.9211085607413075, - "grad_norm": 0.0, - "learning_rate": 3.246611507872055e-07, - "loss": 0.8569, - "step": 32505 - }, - { - "epoch": 0.9211368981835699, - "grad_norm": 0.0, - "learning_rate": 3.2442922825458243e-07, - "loss": 0.778, - "step": 32506 - }, - { - "epoch": 0.9211652356258324, - "grad_norm": 0.0, - "learning_rate": 3.241973872228843e-07, - "loss": 0.7897, - "step": 32507 - }, - { - "epoch": 0.9211935730680949, - "grad_norm": 0.0, - "learning_rate": 3.23965627694065e-07, - "loss": 0.8647, - "step": 32508 - }, - { - "epoch": 0.9212219105103573, - "grad_norm": 0.0, - "learning_rate": 3.2373394967007753e-07, - "loss": 0.8401, - "step": 32509 - }, - { - "epoch": 0.9212502479526198, - "grad_norm": 0.0, - "learning_rate": 3.2350235315287136e-07, - "loss": 0.7741, - "step": 32510 - }, - { - "epoch": 0.9212785853948823, - "grad_norm": 0.0, - "learning_rate": 3.232708381443983e-07, - "loss": 0.9023, - "step": 32511 - }, - { - "epoch": 0.9213069228371448, - "grad_norm": 0.0, - "learning_rate": 3.230394046466079e-07, - "loss": 0.7873, - "step": 32512 - }, - { - "epoch": 0.9213352602794072, - "grad_norm": 0.0, - "learning_rate": 3.228080526614519e-07, - "loss": 0.7031, - "step": 32513 - }, - { - "epoch": 0.9213635977216696, - "grad_norm": 0.0, - "learning_rate": 3.2257678219087543e-07, - "loss": 0.7792, - "step": 32514 - }, - { - "epoch": 0.9213919351639321, - "grad_norm": 0.0, - "learning_rate": 3.22345593236828e-07, - "loss": 0.7449, - "step": 32515 - }, - { - "epoch": 0.9214202726061945, - "grad_norm": 0.0, - "learning_rate": 3.2211448580125927e-07, - "loss": 0.7073, - "step": 32516 - }, - { - "epoch": 0.921448610048457, - "grad_norm": 0.0, - "learning_rate": 3.2188345988611205e-07, - "loss": 0.835, - "step": 32517 - }, - { - "epoch": 0.9214769474907195, - "grad_norm": 0.0, - "learning_rate": 3.2165251549333585e-07, - "loss": 0.7211, - "step": 32518 - }, - { - "epoch": 0.9215052849329819, - "grad_norm": 0.0, - "learning_rate": 3.2142165262487366e-07, - "loss": 0.9277, - "step": 32519 - }, - { - "epoch": 0.9215336223752444, - "grad_norm": 0.0, - "learning_rate": 3.211908712826706e-07, - "loss": 0.777, - "step": 32520 - }, - { - "epoch": 0.9215619598175069, - "grad_norm": 0.0, - "learning_rate": 3.2096017146867166e-07, - "loss": 0.7462, - "step": 32521 - }, - { - "epoch": 0.9215902972597694, - "grad_norm": 0.0, - "learning_rate": 3.207295531848187e-07, - "loss": 0.8262, - "step": 32522 - }, - { - "epoch": 0.9216186347020318, - "grad_norm": 0.0, - "learning_rate": 3.2049901643305456e-07, - "loss": 0.7791, - "step": 32523 - }, - { - "epoch": 0.9216469721442943, - "grad_norm": 0.0, - "learning_rate": 3.2026856121532336e-07, - "loss": 0.9127, - "step": 32524 - }, - { - "epoch": 0.9216753095865567, - "grad_norm": 0.0, - "learning_rate": 3.200381875335634e-07, - "loss": 0.8293, - "step": 32525 - }, - { - "epoch": 0.9217036470288191, - "grad_norm": 0.0, - "learning_rate": 3.198078953897177e-07, - "loss": 0.8271, - "step": 32526 - }, - { - "epoch": 0.9217319844710816, - "grad_norm": 0.0, - "learning_rate": 3.195776847857235e-07, - "loss": 0.8346, - "step": 32527 - }, - { - "epoch": 0.9217603219133441, - "grad_norm": 0.0, - "learning_rate": 3.193475557235215e-07, - "loss": 0.791, - "step": 32528 - }, - { - "epoch": 0.9217886593556066, - "grad_norm": 0.0, - "learning_rate": 3.191175082050502e-07, - "loss": 0.847, - "step": 32529 - }, - { - "epoch": 0.921816996797869, - "grad_norm": 0.0, - "learning_rate": 3.1888754223224574e-07, - "loss": 0.9144, - "step": 32530 - }, - { - "epoch": 0.9218453342401315, - "grad_norm": 0.0, - "learning_rate": 3.186576578070488e-07, - "loss": 0.8291, - "step": 32531 - }, - { - "epoch": 0.921873671682394, - "grad_norm": 0.0, - "learning_rate": 3.1842785493139126e-07, - "loss": 0.7172, - "step": 32532 - }, - { - "epoch": 0.9219020091246564, - "grad_norm": 0.0, - "learning_rate": 3.1819813360721154e-07, - "loss": 0.7975, - "step": 32533 - }, - { - "epoch": 0.9219303465669189, - "grad_norm": 0.0, - "learning_rate": 3.179684938364447e-07, - "loss": 0.797, - "step": 32534 - }, - { - "epoch": 0.9219586840091814, - "grad_norm": 0.0, - "learning_rate": 3.1773893562102363e-07, - "loss": 0.7173, - "step": 32535 - }, - { - "epoch": 0.9219870214514438, - "grad_norm": 0.0, - "learning_rate": 3.1750945896288353e-07, - "loss": 0.8546, - "step": 32536 - }, - { - "epoch": 0.9220153588937062, - "grad_norm": 0.0, - "learning_rate": 3.1728006386395726e-07, - "loss": 0.7169, - "step": 32537 - }, - { - "epoch": 0.9220436963359687, - "grad_norm": 0.0, - "learning_rate": 3.1705075032617663e-07, - "loss": 0.8332, - "step": 32538 - }, - { - "epoch": 0.9220720337782312, - "grad_norm": 0.0, - "learning_rate": 3.1682151835147336e-07, - "loss": 0.7727, - "step": 32539 - }, - { - "epoch": 0.9221003712204936, - "grad_norm": 0.0, - "learning_rate": 3.1659236794177704e-07, - "loss": 0.8038, - "step": 32540 - }, - { - "epoch": 0.9221287086627561, - "grad_norm": 0.0, - "learning_rate": 3.1636329909901954e-07, - "loss": 0.8465, - "step": 32541 - }, - { - "epoch": 0.9221570461050186, - "grad_norm": 0.0, - "learning_rate": 3.1613431182513035e-07, - "loss": 0.8502, - "step": 32542 - }, - { - "epoch": 0.922185383547281, - "grad_norm": 0.0, - "learning_rate": 3.159054061220379e-07, - "loss": 0.7249, - "step": 32543 - }, - { - "epoch": 0.9222137209895435, - "grad_norm": 0.0, - "learning_rate": 3.156765819916696e-07, - "loss": 0.7213, - "step": 32544 - }, - { - "epoch": 0.922242058431806, - "grad_norm": 0.0, - "learning_rate": 3.15447839435955e-07, - "loss": 0.8403, - "step": 32545 - }, - { - "epoch": 0.9222703958740684, - "grad_norm": 0.0, - "learning_rate": 3.152191784568182e-07, - "loss": 0.822, - "step": 32546 - }, - { - "epoch": 0.9222987333163308, - "grad_norm": 0.0, - "learning_rate": 3.1499059905618634e-07, - "loss": 0.8828, - "step": 32547 - }, - { - "epoch": 0.9223270707585933, - "grad_norm": 0.0, - "learning_rate": 3.147621012359847e-07, - "loss": 0.8292, - "step": 32548 - }, - { - "epoch": 0.9223554082008558, - "grad_norm": 0.0, - "learning_rate": 3.145336849981395e-07, - "loss": 0.7277, - "step": 32549 - }, - { - "epoch": 0.9223837456431182, - "grad_norm": 0.0, - "learning_rate": 3.143053503445748e-07, - "loss": 0.7046, - "step": 32550 - }, - { - "epoch": 0.9224120830853807, - "grad_norm": 0.0, - "learning_rate": 3.1407709727721114e-07, - "loss": 0.8022, - "step": 32551 - }, - { - "epoch": 0.9224404205276432, - "grad_norm": 0.0, - "learning_rate": 3.138489257979727e-07, - "loss": 0.731, - "step": 32552 - }, - { - "epoch": 0.9224687579699057, - "grad_norm": 0.0, - "learning_rate": 3.1362083590878333e-07, - "loss": 0.8185, - "step": 32553 - }, - { - "epoch": 0.9224970954121681, - "grad_norm": 0.0, - "learning_rate": 3.1339282761156055e-07, - "loss": 0.7313, - "step": 32554 - }, - { - "epoch": 0.9225254328544306, - "grad_norm": 0.0, - "learning_rate": 3.1316490090822935e-07, - "loss": 0.8431, - "step": 32555 - }, - { - "epoch": 0.922553770296693, - "grad_norm": 0.0, - "learning_rate": 3.12937055800705e-07, - "loss": 0.7075, - "step": 32556 - }, - { - "epoch": 0.9225821077389554, - "grad_norm": 0.0, - "learning_rate": 3.127092922909103e-07, - "loss": 0.7832, - "step": 32557 - }, - { - "epoch": 0.9226104451812179, - "grad_norm": 0.0, - "learning_rate": 3.124816103807626e-07, - "loss": 0.8046, - "step": 32558 - }, - { - "epoch": 0.9226387826234804, - "grad_norm": 0.0, - "learning_rate": 3.122540100721794e-07, - "loss": 0.8287, - "step": 32559 - }, - { - "epoch": 0.9226671200657429, - "grad_norm": 0.0, - "learning_rate": 3.1202649136707787e-07, - "loss": 0.7947, - "step": 32560 - }, - { - "epoch": 0.9226954575080053, - "grad_norm": 0.0, - "learning_rate": 3.1179905426737435e-07, - "loss": 0.7776, - "step": 32561 - }, - { - "epoch": 0.9227237949502678, - "grad_norm": 0.0, - "learning_rate": 3.1157169877498506e-07, - "loss": 0.8599, - "step": 32562 - }, - { - "epoch": 0.9227521323925303, - "grad_norm": 0.0, - "learning_rate": 3.1134442489182626e-07, - "loss": 0.805, - "step": 32563 - }, - { - "epoch": 0.9227804698347927, - "grad_norm": 0.0, - "learning_rate": 3.1111723261980976e-07, - "loss": 0.6976, - "step": 32564 - }, - { - "epoch": 0.9228088072770552, - "grad_norm": 0.0, - "learning_rate": 3.108901219608518e-07, - "loss": 0.7107, - "step": 32565 - }, - { - "epoch": 0.9228371447193177, - "grad_norm": 0.0, - "learning_rate": 3.1066309291686414e-07, - "loss": 0.8045, - "step": 32566 - }, - { - "epoch": 0.92286548216158, - "grad_norm": 0.0, - "learning_rate": 3.1043614548975864e-07, - "loss": 0.7477, - "step": 32567 - }, - { - "epoch": 0.9228938196038425, - "grad_norm": 0.0, - "learning_rate": 3.1020927968144934e-07, - "loss": 0.8407, - "step": 32568 - }, - { - "epoch": 0.922922157046105, - "grad_norm": 0.0, - "learning_rate": 3.099824954938435e-07, - "loss": 0.7429, - "step": 32569 - }, - { - "epoch": 0.9229504944883675, - "grad_norm": 0.0, - "learning_rate": 3.097557929288542e-07, - "loss": 0.7303, - "step": 32570 - }, - { - "epoch": 0.9229788319306299, - "grad_norm": 0.0, - "learning_rate": 3.0952917198839084e-07, - "loss": 0.7745, - "step": 32571 - }, - { - "epoch": 0.9230071693728924, - "grad_norm": 0.0, - "learning_rate": 3.093026326743609e-07, - "loss": 0.8088, - "step": 32572 - }, - { - "epoch": 0.9230355068151549, - "grad_norm": 0.0, - "learning_rate": 3.090761749886728e-07, - "loss": 0.8621, - "step": 32573 - }, - { - "epoch": 0.9230638442574173, - "grad_norm": 0.0, - "learning_rate": 3.088497989332351e-07, - "loss": 0.894, - "step": 32574 - }, - { - "epoch": 0.9230921816996798, - "grad_norm": 0.0, - "learning_rate": 3.0862350450995393e-07, - "loss": 0.7971, - "step": 32575 - }, - { - "epoch": 0.9231205191419423, - "grad_norm": 0.0, - "learning_rate": 3.0839729172073785e-07, - "loss": 0.8046, - "step": 32576 - }, - { - "epoch": 0.9231488565842048, - "grad_norm": 0.0, - "learning_rate": 3.0817116056748863e-07, - "loss": 0.9058, - "step": 32577 - }, - { - "epoch": 0.9231771940264671, - "grad_norm": 0.0, - "learning_rate": 3.0794511105211254e-07, - "loss": 0.7952, - "step": 32578 - }, - { - "epoch": 0.9232055314687296, - "grad_norm": 0.0, - "learning_rate": 3.0771914317651475e-07, - "loss": 0.8609, - "step": 32579 - }, - { - "epoch": 0.9232338689109921, - "grad_norm": 0.0, - "learning_rate": 3.07493256942597e-07, - "loss": 0.8768, - "step": 32580 - }, - { - "epoch": 0.9232622063532545, - "grad_norm": 0.0, - "learning_rate": 3.072674523522623e-07, - "loss": 0.6907, - "step": 32581 - }, - { - "epoch": 0.923290543795517, - "grad_norm": 0.0, - "learning_rate": 3.070417294074146e-07, - "loss": 0.8765, - "step": 32582 - }, - { - "epoch": 0.9233188812377795, - "grad_norm": 0.0, - "learning_rate": 3.0681608810995355e-07, - "loss": 0.6901, - "step": 32583 - }, - { - "epoch": 0.923347218680042, - "grad_norm": 0.0, - "learning_rate": 3.065905284617798e-07, - "loss": 0.7662, - "step": 32584 - }, - { - "epoch": 0.9233755561223044, - "grad_norm": 0.0, - "learning_rate": 3.06365050464793e-07, - "loss": 0.8218, - "step": 32585 - }, - { - "epoch": 0.9234038935645669, - "grad_norm": 0.0, - "learning_rate": 3.0613965412089387e-07, - "loss": 0.8234, - "step": 32586 - }, - { - "epoch": 0.9234322310068294, - "grad_norm": 0.0, - "learning_rate": 3.059143394319786e-07, - "loss": 0.756, - "step": 32587 - }, - { - "epoch": 0.9234605684490917, - "grad_norm": 0.0, - "learning_rate": 3.0568910639994785e-07, - "loss": 0.7808, - "step": 32588 - }, - { - "epoch": 0.9234889058913542, - "grad_norm": 0.0, - "learning_rate": 3.05463955026698e-07, - "loss": 0.8008, - "step": 32589 - }, - { - "epoch": 0.9235172433336167, - "grad_norm": 0.0, - "learning_rate": 3.0523888531412527e-07, - "loss": 0.817, - "step": 32590 - }, - { - "epoch": 0.9235455807758791, - "grad_norm": 0.0, - "learning_rate": 3.0501389726412367e-07, - "loss": 0.7382, - "step": 32591 - }, - { - "epoch": 0.9235739182181416, - "grad_norm": 0.0, - "learning_rate": 3.047889908785928e-07, - "loss": 0.9228, - "step": 32592 - }, - { - "epoch": 0.9236022556604041, - "grad_norm": 0.0, - "learning_rate": 3.045641661594223e-07, - "loss": 0.7652, - "step": 32593 - }, - { - "epoch": 0.9236305931026666, - "grad_norm": 0.0, - "learning_rate": 3.043394231085095e-07, - "loss": 0.8751, - "step": 32594 - }, - { - "epoch": 0.923658930544929, - "grad_norm": 0.0, - "learning_rate": 3.0411476172774625e-07, - "loss": 0.7833, - "step": 32595 - }, - { - "epoch": 0.9236872679871915, - "grad_norm": 0.0, - "learning_rate": 3.0389018201902434e-07, - "loss": 0.7904, - "step": 32596 - }, - { - "epoch": 0.923715605429454, - "grad_norm": 0.0, - "learning_rate": 3.036656839842356e-07, - "loss": 0.8422, - "step": 32597 - }, - { - "epoch": 0.9237439428717164, - "grad_norm": 0.0, - "learning_rate": 3.034412676252718e-07, - "loss": 0.9535, - "step": 32598 - }, - { - "epoch": 0.9237722803139788, - "grad_norm": 0.0, - "learning_rate": 3.0321693294402264e-07, - "loss": 0.7849, - "step": 32599 - }, - { - "epoch": 0.9238006177562413, - "grad_norm": 0.0, - "learning_rate": 3.029926799423777e-07, - "loss": 0.9123, - "step": 32600 - }, - { - "epoch": 0.9238289551985038, - "grad_norm": 0.0, - "learning_rate": 3.0276850862222653e-07, - "loss": 0.7886, - "step": 32601 - }, - { - "epoch": 0.9238572926407662, - "grad_norm": 0.0, - "learning_rate": 3.025444189854576e-07, - "loss": 0.8102, - "step": 32602 - }, - { - "epoch": 0.9238856300830287, - "grad_norm": 0.0, - "learning_rate": 3.023204110339584e-07, - "loss": 0.8951, - "step": 32603 - }, - { - "epoch": 0.9239139675252912, - "grad_norm": 0.0, - "learning_rate": 3.020964847696151e-07, - "loss": 0.7474, - "step": 32604 - }, - { - "epoch": 0.9239423049675536, - "grad_norm": 0.0, - "learning_rate": 3.0187264019431506e-07, - "loss": 0.7618, - "step": 32605 - }, - { - "epoch": 0.9239706424098161, - "grad_norm": 0.0, - "learning_rate": 3.016488773099424e-07, - "loss": 0.7918, - "step": 32606 - }, - { - "epoch": 0.9239989798520786, - "grad_norm": 0.0, - "learning_rate": 3.014251961183823e-07, - "loss": 0.7893, - "step": 32607 - }, - { - "epoch": 0.9240273172943411, - "grad_norm": 0.0, - "learning_rate": 3.0120159662152095e-07, - "loss": 0.7444, - "step": 32608 - }, - { - "epoch": 0.9240556547366034, - "grad_norm": 0.0, - "learning_rate": 3.00978078821238e-07, - "loss": 0.6873, - "step": 32609 - }, - { - "epoch": 0.9240839921788659, - "grad_norm": 0.0, - "learning_rate": 3.007546427194186e-07, - "loss": 0.8311, - "step": 32610 - }, - { - "epoch": 0.9241123296211284, - "grad_norm": 0.0, - "learning_rate": 3.0053128831794564e-07, - "loss": 0.7163, - "step": 32611 - }, - { - "epoch": 0.9241406670633908, - "grad_norm": 0.0, - "learning_rate": 3.003080156186988e-07, - "loss": 0.6873, - "step": 32612 - }, - { - "epoch": 0.9241690045056533, - "grad_norm": 0.0, - "learning_rate": 3.0008482462355993e-07, - "loss": 0.7295, - "step": 32613 - }, - { - "epoch": 0.9241973419479158, - "grad_norm": 0.0, - "learning_rate": 2.9986171533440746e-07, - "loss": 0.838, - "step": 32614 - }, - { - "epoch": 0.9242256793901782, - "grad_norm": 0.0, - "learning_rate": 2.996386877531221e-07, - "loss": 0.7386, - "step": 32615 - }, - { - "epoch": 0.9242540168324407, - "grad_norm": 0.0, - "learning_rate": 2.994157418815835e-07, - "loss": 0.7917, - "step": 32616 - }, - { - "epoch": 0.9242823542747032, - "grad_norm": 0.0, - "learning_rate": 2.991928777216668e-07, - "loss": 0.8017, - "step": 32617 - }, - { - "epoch": 0.9243106917169657, - "grad_norm": 0.0, - "learning_rate": 2.989700952752528e-07, - "loss": 0.7371, - "step": 32618 - }, - { - "epoch": 0.924339029159228, - "grad_norm": 0.0, - "learning_rate": 2.987473945442143e-07, - "loss": 0.9048, - "step": 32619 - }, - { - "epoch": 0.9243673666014905, - "grad_norm": 0.0, - "learning_rate": 2.985247755304288e-07, - "loss": 0.8271, - "step": 32620 - }, - { - "epoch": 0.924395704043753, - "grad_norm": 0.0, - "learning_rate": 2.9830223823577367e-07, - "loss": 0.8829, - "step": 32621 - }, - { - "epoch": 0.9244240414860154, - "grad_norm": 0.0, - "learning_rate": 2.9807978266211955e-07, - "loss": 0.8183, - "step": 32622 - }, - { - "epoch": 0.9244523789282779, - "grad_norm": 0.0, - "learning_rate": 2.9785740881134175e-07, - "loss": 0.7866, - "step": 32623 - }, - { - "epoch": 0.9244807163705404, - "grad_norm": 0.0, - "learning_rate": 2.9763511668531644e-07, - "loss": 0.8816, - "step": 32624 - }, - { - "epoch": 0.9245090538128029, - "grad_norm": 0.0, - "learning_rate": 2.97412906285911e-07, - "loss": 0.8442, - "step": 32625 - }, - { - "epoch": 0.9245373912550653, - "grad_norm": 0.0, - "learning_rate": 2.971907776149996e-07, - "loss": 0.8028, - "step": 32626 - }, - { - "epoch": 0.9245657286973278, - "grad_norm": 0.0, - "learning_rate": 2.96968730674454e-07, - "loss": 0.8173, - "step": 32627 - }, - { - "epoch": 0.9245940661395903, - "grad_norm": 0.0, - "learning_rate": 2.967467654661438e-07, - "loss": 0.7439, - "step": 32628 - }, - { - "epoch": 0.9246224035818527, - "grad_norm": 0.0, - "learning_rate": 2.965248819919397e-07, - "loss": 0.8861, - "step": 32629 - }, - { - "epoch": 0.9246507410241152, - "grad_norm": 0.0, - "learning_rate": 2.9630308025370815e-07, - "loss": 0.7851, - "step": 32630 - }, - { - "epoch": 0.9246790784663776, - "grad_norm": 0.0, - "learning_rate": 2.960813602533197e-07, - "loss": 0.7562, - "step": 32631 - }, - { - "epoch": 0.9247074159086401, - "grad_norm": 0.0, - "learning_rate": 2.958597219926429e-07, - "loss": 0.8367, - "step": 32632 - }, - { - "epoch": 0.9247357533509025, - "grad_norm": 0.0, - "learning_rate": 2.9563816547354185e-07, - "loss": 0.783, - "step": 32633 - }, - { - "epoch": 0.924764090793165, - "grad_norm": 0.0, - "learning_rate": 2.9541669069788505e-07, - "loss": 0.8158, - "step": 32634 - }, - { - "epoch": 0.9247924282354275, - "grad_norm": 0.0, - "learning_rate": 2.9519529766753654e-07, - "loss": 0.7806, - "step": 32635 - }, - { - "epoch": 0.9248207656776899, - "grad_norm": 0.0, - "learning_rate": 2.949739863843615e-07, - "loss": 0.7865, - "step": 32636 - }, - { - "epoch": 0.9248491031199524, - "grad_norm": 0.0, - "learning_rate": 2.947527568502251e-07, - "loss": 0.7646, - "step": 32637 - }, - { - "epoch": 0.9248774405622149, - "grad_norm": 0.0, - "learning_rate": 2.945316090669892e-07, - "loss": 0.7826, - "step": 32638 - }, - { - "epoch": 0.9249057780044773, - "grad_norm": 0.0, - "learning_rate": 2.9431054303651787e-07, - "loss": 0.7982, - "step": 32639 - }, - { - "epoch": 0.9249341154467398, - "grad_norm": 0.0, - "learning_rate": 2.940895587606729e-07, - "loss": 0.862, - "step": 32640 - }, - { - "epoch": 0.9249624528890023, - "grad_norm": 0.0, - "learning_rate": 2.938686562413162e-07, - "loss": 0.7624, - "step": 32641 - }, - { - "epoch": 0.9249907903312647, - "grad_norm": 0.0, - "learning_rate": 2.936478354803085e-07, - "loss": 0.8958, - "step": 32642 - }, - { - "epoch": 0.9250191277735271, - "grad_norm": 0.0, - "learning_rate": 2.934270964795094e-07, - "loss": 0.7908, - "step": 32643 - }, - { - "epoch": 0.9250474652157896, - "grad_norm": 0.0, - "learning_rate": 2.932064392407774e-07, - "loss": 0.858, - "step": 32644 - }, - { - "epoch": 0.9250758026580521, - "grad_norm": 0.0, - "learning_rate": 2.929858637659733e-07, - "loss": 0.8301, - "step": 32645 - }, - { - "epoch": 0.9251041401003145, - "grad_norm": 0.0, - "learning_rate": 2.9276537005695215e-07, - "loss": 0.6989, - "step": 32646 - }, - { - "epoch": 0.925132477542577, - "grad_norm": 0.0, - "learning_rate": 2.925449581155748e-07, - "loss": 0.6797, - "step": 32647 - }, - { - "epoch": 0.9251608149848395, - "grad_norm": 0.0, - "learning_rate": 2.923246279436953e-07, - "loss": 0.8265, - "step": 32648 - }, - { - "epoch": 0.925189152427102, - "grad_norm": 0.0, - "learning_rate": 2.9210437954316997e-07, - "loss": 0.7828, - "step": 32649 - }, - { - "epoch": 0.9252174898693644, - "grad_norm": 0.0, - "learning_rate": 2.91884212915855e-07, - "loss": 0.8454, - "step": 32650 - }, - { - "epoch": 0.9252458273116269, - "grad_norm": 0.0, - "learning_rate": 2.9166412806360455e-07, - "loss": 0.8017, - "step": 32651 - }, - { - "epoch": 0.9252741647538893, - "grad_norm": 0.0, - "learning_rate": 2.9144412498827156e-07, - "loss": 0.7603, - "step": 32652 - }, - { - "epoch": 0.9253025021961517, - "grad_norm": 0.0, - "learning_rate": 2.91224203691709e-07, - "loss": 0.8512, - "step": 32653 - }, - { - "epoch": 0.9253308396384142, - "grad_norm": 0.0, - "learning_rate": 2.9100436417577093e-07, - "loss": 0.9108, - "step": 32654 - }, - { - "epoch": 0.9253591770806767, - "grad_norm": 0.0, - "learning_rate": 2.9078460644231033e-07, - "loss": 0.881, - "step": 32655 - }, - { - "epoch": 0.9253875145229392, - "grad_norm": 0.0, - "learning_rate": 2.905649304931746e-07, - "loss": 0.8502, - "step": 32656 - }, - { - "epoch": 0.9254158519652016, - "grad_norm": 0.0, - "learning_rate": 2.9034533633021555e-07, - "loss": 0.8126, - "step": 32657 - }, - { - "epoch": 0.9254441894074641, - "grad_norm": 0.0, - "learning_rate": 2.901258239552851e-07, - "loss": 0.7618, - "step": 32658 - }, - { - "epoch": 0.9254725268497266, - "grad_norm": 0.0, - "learning_rate": 2.8990639337022844e-07, - "loss": 0.7634, - "step": 32659 - }, - { - "epoch": 0.925500864291989, - "grad_norm": 0.0, - "learning_rate": 2.896870445768973e-07, - "loss": 0.7701, - "step": 32660 - }, - { - "epoch": 0.9255292017342515, - "grad_norm": 0.0, - "learning_rate": 2.894677775771382e-07, - "loss": 0.725, - "step": 32661 - }, - { - "epoch": 0.925557539176514, - "grad_norm": 0.0, - "learning_rate": 2.8924859237279725e-07, - "loss": 0.8003, - "step": 32662 - }, - { - "epoch": 0.9255858766187763, - "grad_norm": 0.0, - "learning_rate": 2.8902948896572194e-07, - "loss": 0.8316, - "step": 32663 - }, - { - "epoch": 0.9256142140610388, - "grad_norm": 0.0, - "learning_rate": 2.888104673577574e-07, - "loss": 0.8494, - "step": 32664 - }, - { - "epoch": 0.9256425515033013, - "grad_norm": 0.0, - "learning_rate": 2.885915275507467e-07, - "loss": 0.7994, - "step": 32665 - }, - { - "epoch": 0.9256708889455638, - "grad_norm": 0.0, - "learning_rate": 2.883726695465372e-07, - "loss": 0.802, - "step": 32666 - }, - { - "epoch": 0.9256992263878262, - "grad_norm": 0.0, - "learning_rate": 2.881538933469707e-07, - "loss": 0.7909, - "step": 32667 - }, - { - "epoch": 0.9257275638300887, - "grad_norm": 0.0, - "learning_rate": 2.879351989538914e-07, - "loss": 0.8327, - "step": 32668 - }, - { - "epoch": 0.9257559012723512, - "grad_norm": 0.0, - "learning_rate": 2.877165863691389e-07, - "loss": 0.7376, - "step": 32669 - }, - { - "epoch": 0.9257842387146136, - "grad_norm": 0.0, - "learning_rate": 2.8749805559455724e-07, - "loss": 0.7721, - "step": 32670 - }, - { - "epoch": 0.9258125761568761, - "grad_norm": 0.0, - "learning_rate": 2.8727960663198607e-07, - "loss": 0.7846, - "step": 32671 - }, - { - "epoch": 0.9258409135991386, - "grad_norm": 0.0, - "learning_rate": 2.8706123948326504e-07, - "loss": 0.779, - "step": 32672 - }, - { - "epoch": 0.925869251041401, - "grad_norm": 0.0, - "learning_rate": 2.868429541502338e-07, - "loss": 0.8481, - "step": 32673 - }, - { - "epoch": 0.9258975884836634, - "grad_norm": 0.0, - "learning_rate": 2.8662475063473195e-07, - "loss": 0.8347, - "step": 32674 - }, - { - "epoch": 0.9259259259259259, - "grad_norm": 0.0, - "learning_rate": 2.864066289385969e-07, - "loss": 0.735, - "step": 32675 - }, - { - "epoch": 0.9259542633681884, - "grad_norm": 0.0, - "learning_rate": 2.861885890636662e-07, - "loss": 0.7995, - "step": 32676 - }, - { - "epoch": 0.9259826008104508, - "grad_norm": 0.0, - "learning_rate": 2.859706310117749e-07, - "loss": 0.852, - "step": 32677 - }, - { - "epoch": 0.9260109382527133, - "grad_norm": 0.0, - "learning_rate": 2.8575275478476047e-07, - "loss": 0.8901, - "step": 32678 - }, - { - "epoch": 0.9260392756949758, - "grad_norm": 0.0, - "learning_rate": 2.8553496038445707e-07, - "loss": 0.8027, - "step": 32679 - }, - { - "epoch": 0.9260676131372383, - "grad_norm": 0.0, - "learning_rate": 2.853172478127009e-07, - "loss": 0.767, - "step": 32680 - }, - { - "epoch": 0.9260959505795007, - "grad_norm": 0.0, - "learning_rate": 2.8509961707132496e-07, - "loss": 0.8307, - "step": 32681 - }, - { - "epoch": 0.9261242880217632, - "grad_norm": 0.0, - "learning_rate": 2.848820681621633e-07, - "loss": 0.7855, - "step": 32682 - }, - { - "epoch": 0.9261526254640257, - "grad_norm": 0.0, - "learning_rate": 2.8466460108704685e-07, - "loss": 0.7805, - "step": 32683 - }, - { - "epoch": 0.926180962906288, - "grad_norm": 0.0, - "learning_rate": 2.8444721584780956e-07, - "loss": 0.8329, - "step": 32684 - }, - { - "epoch": 0.9262093003485505, - "grad_norm": 0.0, - "learning_rate": 2.8422991244627995e-07, - "loss": 0.8333, - "step": 32685 - }, - { - "epoch": 0.926237637790813, - "grad_norm": 0.0, - "learning_rate": 2.840126908842888e-07, - "loss": 0.7608, - "step": 32686 - }, - { - "epoch": 0.9262659752330754, - "grad_norm": 0.0, - "learning_rate": 2.837955511636681e-07, - "loss": 0.9116, - "step": 32687 - }, - { - "epoch": 0.9262943126753379, - "grad_norm": 0.0, - "learning_rate": 2.835784932862451e-07, - "loss": 0.8137, - "step": 32688 - }, - { - "epoch": 0.9263226501176004, - "grad_norm": 0.0, - "learning_rate": 2.833615172538473e-07, - "loss": 0.8542, - "step": 32689 - }, - { - "epoch": 0.9263509875598629, - "grad_norm": 0.0, - "learning_rate": 2.831446230683055e-07, - "loss": 0.8433, - "step": 32690 - }, - { - "epoch": 0.9263793250021253, - "grad_norm": 0.0, - "learning_rate": 2.829278107314437e-07, - "loss": 0.7995, - "step": 32691 - }, - { - "epoch": 0.9264076624443878, - "grad_norm": 0.0, - "learning_rate": 2.8271108024508943e-07, - "loss": 0.7984, - "step": 32692 - }, - { - "epoch": 0.9264359998866503, - "grad_norm": 0.0, - "learning_rate": 2.824944316110678e-07, - "loss": 0.8424, - "step": 32693 - }, - { - "epoch": 0.9264643373289126, - "grad_norm": 0.0, - "learning_rate": 2.8227786483120523e-07, - "loss": 0.7901, - "step": 32694 - }, - { - "epoch": 0.9264926747711751, - "grad_norm": 0.0, - "learning_rate": 2.8206137990732465e-07, - "loss": 0.7536, - "step": 32695 - }, - { - "epoch": 0.9265210122134376, - "grad_norm": 0.0, - "learning_rate": 2.81844976841249e-07, - "loss": 0.7865, - "step": 32696 - }, - { - "epoch": 0.9265493496557001, - "grad_norm": 0.0, - "learning_rate": 2.8162865563480244e-07, - "loss": 0.8919, - "step": 32697 - }, - { - "epoch": 0.9265776870979625, - "grad_norm": 0.0, - "learning_rate": 2.8141241628980576e-07, - "loss": 0.7468, - "step": 32698 - }, - { - "epoch": 0.926606024540225, - "grad_norm": 0.0, - "learning_rate": 2.8119625880808185e-07, - "loss": 0.6884, - "step": 32699 - }, - { - "epoch": 0.9266343619824875, - "grad_norm": 0.0, - "learning_rate": 2.8098018319145157e-07, - "loss": 0.8085, - "step": 32700 - }, - { - "epoch": 0.9266626994247499, - "grad_norm": 0.0, - "learning_rate": 2.807641894417323e-07, - "loss": 0.8535, - "step": 32701 - }, - { - "epoch": 0.9266910368670124, - "grad_norm": 0.0, - "learning_rate": 2.805482775607471e-07, - "loss": 0.7146, - "step": 32702 - }, - { - "epoch": 0.9267193743092749, - "grad_norm": 0.0, - "learning_rate": 2.8033244755031106e-07, - "loss": 0.8652, - "step": 32703 - }, - { - "epoch": 0.9267477117515373, - "grad_norm": 0.0, - "learning_rate": 2.8011669941224616e-07, - "loss": 0.9113, - "step": 32704 - }, - { - "epoch": 0.9267760491937997, - "grad_norm": 0.0, - "learning_rate": 2.7990103314836756e-07, - "loss": 0.869, - "step": 32705 - }, - { - "epoch": 0.9268043866360622, - "grad_norm": 0.0, - "learning_rate": 2.796854487604905e-07, - "loss": 0.8426, - "step": 32706 - }, - { - "epoch": 0.9268327240783247, - "grad_norm": 0.0, - "learning_rate": 2.7946994625043357e-07, - "loss": 0.7653, - "step": 32707 - }, - { - "epoch": 0.9268610615205871, - "grad_norm": 0.0, - "learning_rate": 2.7925452562001077e-07, - "loss": 0.7117, - "step": 32708 - }, - { - "epoch": 0.9268893989628496, - "grad_norm": 0.0, - "learning_rate": 2.790391868710374e-07, - "loss": 0.8414, - "step": 32709 - }, - { - "epoch": 0.9269177364051121, - "grad_norm": 0.0, - "learning_rate": 2.7882393000532526e-07, - "loss": 0.7463, - "step": 32710 - }, - { - "epoch": 0.9269460738473745, - "grad_norm": 0.0, - "learning_rate": 2.7860875502469076e-07, - "loss": 0.9142, - "step": 32711 - }, - { - "epoch": 0.926974411289637, - "grad_norm": 0.0, - "learning_rate": 2.783936619309435e-07, - "loss": 0.8074, - "step": 32712 - }, - { - "epoch": 0.9270027487318995, - "grad_norm": 0.0, - "learning_rate": 2.7817865072589765e-07, - "loss": 0.8158, - "step": 32713 - }, - { - "epoch": 0.927031086174162, - "grad_norm": 0.0, - "learning_rate": 2.7796372141136174e-07, - "loss": 0.812, - "step": 32714 - }, - { - "epoch": 0.9270594236164243, - "grad_norm": 0.0, - "learning_rate": 2.777488739891476e-07, - "loss": 0.816, - "step": 32715 - }, - { - "epoch": 0.9270877610586868, - "grad_norm": 0.0, - "learning_rate": 2.7753410846106496e-07, - "loss": 0.8241, - "step": 32716 - }, - { - "epoch": 0.9271160985009493, - "grad_norm": 0.0, - "learning_rate": 2.773194248289235e-07, - "loss": 0.739, - "step": 32717 - }, - { - "epoch": 0.9271444359432117, - "grad_norm": 0.0, - "learning_rate": 2.7710482309453056e-07, - "loss": 0.7957, - "step": 32718 - }, - { - "epoch": 0.9271727733854742, - "grad_norm": 0.0, - "learning_rate": 2.7689030325969477e-07, - "loss": 0.7487, - "step": 32719 - }, - { - "epoch": 0.9272011108277367, - "grad_norm": 0.0, - "learning_rate": 2.766758653262225e-07, - "loss": 0.7935, - "step": 32720 - }, - { - "epoch": 0.9272294482699992, - "grad_norm": 0.0, - "learning_rate": 2.7646150929591996e-07, - "loss": 0.7907, - "step": 32721 - }, - { - "epoch": 0.9272577857122616, - "grad_norm": 0.0, - "learning_rate": 2.7624723517059247e-07, - "loss": 0.7238, - "step": 32722 - }, - { - "epoch": 0.9272861231545241, - "grad_norm": 0.0, - "learning_rate": 2.760330429520453e-07, - "loss": 0.8697, - "step": 32723 - }, - { - "epoch": 0.9273144605967866, - "grad_norm": 0.0, - "learning_rate": 2.7581893264208346e-07, - "loss": 0.7913, - "step": 32724 - }, - { - "epoch": 0.927342798039049, - "grad_norm": 0.0, - "learning_rate": 2.756049042425091e-07, - "loss": 0.7621, - "step": 32725 - }, - { - "epoch": 0.9273711354813114, - "grad_norm": 0.0, - "learning_rate": 2.7539095775512724e-07, - "loss": 0.871, - "step": 32726 - }, - { - "epoch": 0.9273994729235739, - "grad_norm": 0.0, - "learning_rate": 2.751770931817366e-07, - "loss": 0.8645, - "step": 32727 - }, - { - "epoch": 0.9274278103658363, - "grad_norm": 0.0, - "learning_rate": 2.7496331052414114e-07, - "loss": 0.8459, - "step": 32728 - }, - { - "epoch": 0.9274561478080988, - "grad_norm": 0.0, - "learning_rate": 2.7474960978414065e-07, - "loss": 0.7445, - "step": 32729 - }, - { - "epoch": 0.9274844852503613, - "grad_norm": 0.0, - "learning_rate": 2.745359909635348e-07, - "loss": 0.8252, - "step": 32730 - }, - { - "epoch": 0.9275128226926238, - "grad_norm": 0.0, - "learning_rate": 2.7432245406412425e-07, - "loss": 0.886, - "step": 32731 - }, - { - "epoch": 0.9275411601348862, - "grad_norm": 0.0, - "learning_rate": 2.741089990877088e-07, - "loss": 0.7668, - "step": 32732 - }, - { - "epoch": 0.9275694975771487, - "grad_norm": 0.0, - "learning_rate": 2.7389562603608366e-07, - "loss": 0.8296, - "step": 32733 - }, - { - "epoch": 0.9275978350194112, - "grad_norm": 0.0, - "learning_rate": 2.7368233491104846e-07, - "loss": 0.7046, - "step": 32734 - }, - { - "epoch": 0.9276261724616736, - "grad_norm": 0.0, - "learning_rate": 2.734691257143973e-07, - "loss": 0.759, - "step": 32735 - }, - { - "epoch": 0.927654509903936, - "grad_norm": 0.0, - "learning_rate": 2.7325599844792774e-07, - "loss": 0.7971, - "step": 32736 - }, - { - "epoch": 0.9276828473461985, - "grad_norm": 0.0, - "learning_rate": 2.7304295311343596e-07, - "loss": 0.8641, - "step": 32737 - }, - { - "epoch": 0.927711184788461, - "grad_norm": 0.0, - "learning_rate": 2.728299897127151e-07, - "loss": 0.7482, - "step": 32738 - }, - { - "epoch": 0.9277395222307234, - "grad_norm": 0.0, - "learning_rate": 2.7261710824755814e-07, - "loss": 0.8465, - "step": 32739 - }, - { - "epoch": 0.9277678596729859, - "grad_norm": 0.0, - "learning_rate": 2.724043087197603e-07, - "loss": 0.8417, - "step": 32740 - }, - { - "epoch": 0.9277961971152484, - "grad_norm": 0.0, - "learning_rate": 2.721915911311135e-07, - "loss": 0.8149, - "step": 32741 - }, - { - "epoch": 0.9278245345575108, - "grad_norm": 0.0, - "learning_rate": 2.719789554834085e-07, - "loss": 0.7841, - "step": 32742 - }, - { - "epoch": 0.9278528719997733, - "grad_norm": 0.0, - "learning_rate": 2.717664017784372e-07, - "loss": 0.8566, - "step": 32743 - }, - { - "epoch": 0.9278812094420358, - "grad_norm": 0.0, - "learning_rate": 2.715539300179903e-07, - "loss": 0.8148, - "step": 32744 - }, - { - "epoch": 0.9279095468842983, - "grad_norm": 0.0, - "learning_rate": 2.713415402038577e-07, - "loss": 0.8143, - "step": 32745 - }, - { - "epoch": 0.9279378843265607, - "grad_norm": 0.0, - "learning_rate": 2.7112923233782674e-07, - "loss": 0.7953, - "step": 32746 - }, - { - "epoch": 0.9279662217688232, - "grad_norm": 0.0, - "learning_rate": 2.709170064216882e-07, - "loss": 0.8217, - "step": 32747 - }, - { - "epoch": 0.9279945592110856, - "grad_norm": 0.0, - "learning_rate": 2.707048624572284e-07, - "loss": 0.7184, - "step": 32748 - }, - { - "epoch": 0.928022896653348, - "grad_norm": 0.0, - "learning_rate": 2.704928004462337e-07, - "loss": 0.716, - "step": 32749 - }, - { - "epoch": 0.9280512340956105, - "grad_norm": 0.0, - "learning_rate": 2.7028082039049274e-07, - "loss": 0.8709, - "step": 32750 - }, - { - "epoch": 0.928079571537873, - "grad_norm": 0.0, - "learning_rate": 2.700689222917874e-07, - "loss": 0.7889, - "step": 32751 - }, - { - "epoch": 0.9281079089801354, - "grad_norm": 0.0, - "learning_rate": 2.698571061519051e-07, - "loss": 0.809, - "step": 32752 - }, - { - "epoch": 0.9281362464223979, - "grad_norm": 0.0, - "learning_rate": 2.6964537197263107e-07, - "loss": 0.7829, - "step": 32753 - }, - { - "epoch": 0.9281645838646604, - "grad_norm": 0.0, - "learning_rate": 2.694337197557462e-07, - "loss": 0.8219, - "step": 32754 - }, - { - "epoch": 0.9281929213069229, - "grad_norm": 0.0, - "learning_rate": 2.6922214950303337e-07, - "loss": 0.834, - "step": 32755 - }, - { - "epoch": 0.9282212587491853, - "grad_norm": 0.0, - "learning_rate": 2.6901066121627685e-07, - "loss": 0.7932, - "step": 32756 - }, - { - "epoch": 0.9282495961914478, - "grad_norm": 0.0, - "learning_rate": 2.687992548972573e-07, - "loss": 0.8362, - "step": 32757 - }, - { - "epoch": 0.9282779336337103, - "grad_norm": 0.0, - "learning_rate": 2.6858793054775567e-07, - "loss": 0.7769, - "step": 32758 - }, - { - "epoch": 0.9283062710759726, - "grad_norm": 0.0, - "learning_rate": 2.683766881695504e-07, - "loss": 0.7598, - "step": 32759 - }, - { - "epoch": 0.9283346085182351, - "grad_norm": 0.0, - "learning_rate": 2.681655277644224e-07, - "loss": 0.7938, - "step": 32760 - }, - { - "epoch": 0.9283629459604976, - "grad_norm": 0.0, - "learning_rate": 2.679544493341513e-07, - "loss": 0.8975, - "step": 32761 - }, - { - "epoch": 0.9283912834027601, - "grad_norm": 0.0, - "learning_rate": 2.677434528805123e-07, - "loss": 0.8658, - "step": 32762 - }, - { - "epoch": 0.9284196208450225, - "grad_norm": 0.0, - "learning_rate": 2.6753253840528516e-07, - "loss": 0.7999, - "step": 32763 - }, - { - "epoch": 0.928447958287285, - "grad_norm": 0.0, - "learning_rate": 2.67321705910244e-07, - "loss": 0.8607, - "step": 32764 - }, - { - "epoch": 0.9284762957295475, - "grad_norm": 0.0, - "learning_rate": 2.671109553971674e-07, - "loss": 0.7438, - "step": 32765 - }, - { - "epoch": 0.9285046331718099, - "grad_norm": 0.0, - "learning_rate": 2.669002868678294e-07, - "loss": 0.7637, - "step": 32766 - }, - { - "epoch": 0.9285329706140724, - "grad_norm": 0.0, - "learning_rate": 2.6668970032400433e-07, - "loss": 0.7662, - "step": 32767 - }, - { - "epoch": 0.9285613080563349, - "grad_norm": 0.0, - "learning_rate": 2.6647919576746615e-07, - "loss": 0.7723, - "step": 32768 - }, - { - "epoch": 0.9285896454985973, - "grad_norm": 0.0, - "learning_rate": 2.66268773199988e-07, - "loss": 0.778, - "step": 32769 - }, - { - "epoch": 0.9286179829408597, - "grad_norm": 0.0, - "learning_rate": 2.6605843262334284e-07, - "loss": 0.79, - "step": 32770 - }, - { - "epoch": 0.9286463203831222, - "grad_norm": 0.0, - "learning_rate": 2.6584817403930265e-07, - "loss": 0.7953, - "step": 32771 - }, - { - "epoch": 0.9286746578253847, - "grad_norm": 0.0, - "learning_rate": 2.6563799744963704e-07, - "loss": 0.8076, - "step": 32772 - }, - { - "epoch": 0.9287029952676471, - "grad_norm": 0.0, - "learning_rate": 2.65427902856118e-07, - "loss": 0.8226, - "step": 32773 - }, - { - "epoch": 0.9287313327099096, - "grad_norm": 0.0, - "learning_rate": 2.6521789026051516e-07, - "loss": 0.7759, - "step": 32774 - }, - { - "epoch": 0.9287596701521721, - "grad_norm": 0.0, - "learning_rate": 2.6500795966459494e-07, - "loss": 0.8663, - "step": 32775 - }, - { - "epoch": 0.9287880075944345, - "grad_norm": 0.0, - "learning_rate": 2.647981110701292e-07, - "loss": 0.7789, - "step": 32776 - }, - { - "epoch": 0.928816345036697, - "grad_norm": 0.0, - "learning_rate": 2.6458834447888436e-07, - "loss": 0.7709, - "step": 32777 - }, - { - "epoch": 0.9288446824789595, - "grad_norm": 0.0, - "learning_rate": 2.6437865989262566e-07, - "loss": 0.7612, - "step": 32778 - }, - { - "epoch": 0.928873019921222, - "grad_norm": 0.0, - "learning_rate": 2.641690573131228e-07, - "loss": 0.8471, - "step": 32779 - }, - { - "epoch": 0.9289013573634843, - "grad_norm": 0.0, - "learning_rate": 2.639595367421377e-07, - "loss": 0.9312, - "step": 32780 - }, - { - "epoch": 0.9289296948057468, - "grad_norm": 0.0, - "learning_rate": 2.6375009818143673e-07, - "loss": 0.8346, - "step": 32781 - }, - { - "epoch": 0.9289580322480093, - "grad_norm": 0.0, - "learning_rate": 2.63540741632784e-07, - "loss": 0.7728, - "step": 32782 - }, - { - "epoch": 0.9289863696902717, - "grad_norm": 0.0, - "learning_rate": 2.633314670979437e-07, - "loss": 0.7974, - "step": 32783 - }, - { - "epoch": 0.9290147071325342, - "grad_norm": 0.0, - "learning_rate": 2.631222745786788e-07, - "loss": 0.856, - "step": 32784 - }, - { - "epoch": 0.9290430445747967, - "grad_norm": 0.0, - "learning_rate": 2.62913164076749e-07, - "loss": 0.7868, - "step": 32785 - }, - { - "epoch": 0.9290713820170592, - "grad_norm": 0.0, - "learning_rate": 2.627041355939186e-07, - "loss": 0.9766, - "step": 32786 - }, - { - "epoch": 0.9290997194593216, - "grad_norm": 0.0, - "learning_rate": 2.6249518913194713e-07, - "loss": 0.8009, - "step": 32787 - }, - { - "epoch": 0.9291280569015841, - "grad_norm": 0.0, - "learning_rate": 2.622863246925944e-07, - "loss": 0.7612, - "step": 32788 - }, - { - "epoch": 0.9291563943438466, - "grad_norm": 0.0, - "learning_rate": 2.6207754227761897e-07, - "loss": 0.831, - "step": 32789 - }, - { - "epoch": 0.9291847317861089, - "grad_norm": 0.0, - "learning_rate": 2.618688418887827e-07, - "loss": 0.8546, - "step": 32790 - }, - { - "epoch": 0.9292130692283714, - "grad_norm": 0.0, - "learning_rate": 2.616602235278398e-07, - "loss": 0.8199, - "step": 32791 - }, - { - "epoch": 0.9292414066706339, - "grad_norm": 0.0, - "learning_rate": 2.6145168719655e-07, - "loss": 0.8309, - "step": 32792 - }, - { - "epoch": 0.9292697441128964, - "grad_norm": 0.0, - "learning_rate": 2.6124323289666744e-07, - "loss": 0.8466, - "step": 32793 - }, - { - "epoch": 0.9292980815551588, - "grad_norm": 0.0, - "learning_rate": 2.6103486062995063e-07, - "loss": 0.8221, - "step": 32794 - }, - { - "epoch": 0.9293264189974213, - "grad_norm": 0.0, - "learning_rate": 2.6082657039815275e-07, - "loss": 0.8245, - "step": 32795 - }, - { - "epoch": 0.9293547564396838, - "grad_norm": 0.0, - "learning_rate": 2.6061836220303004e-07, - "loss": 0.728, - "step": 32796 - }, - { - "epoch": 0.9293830938819462, - "grad_norm": 0.0, - "learning_rate": 2.6041023604633455e-07, - "loss": 0.8397, - "step": 32797 - }, - { - "epoch": 0.9294114313242087, - "grad_norm": 0.0, - "learning_rate": 2.6020219192982144e-07, - "loss": 0.7803, - "step": 32798 - }, - { - "epoch": 0.9294397687664712, - "grad_norm": 0.0, - "learning_rate": 2.5999422985524157e-07, - "loss": 0.8384, - "step": 32799 - }, - { - "epoch": 0.9294681062087335, - "grad_norm": 0.0, - "learning_rate": 2.5978634982434804e-07, - "loss": 0.8317, - "step": 32800 - }, - { - "epoch": 0.929496443650996, - "grad_norm": 0.0, - "learning_rate": 2.5957855183889046e-07, - "loss": 0.8204, - "step": 32801 - }, - { - "epoch": 0.9295247810932585, - "grad_norm": 0.0, - "learning_rate": 2.5937083590061973e-07, - "loss": 0.9321, - "step": 32802 - }, - { - "epoch": 0.929553118535521, - "grad_norm": 0.0, - "learning_rate": 2.591632020112855e-07, - "loss": 0.8477, - "step": 32803 - }, - { - "epoch": 0.9295814559777834, - "grad_norm": 0.0, - "learning_rate": 2.5895565017263647e-07, - "loss": 0.7968, - "step": 32804 - }, - { - "epoch": 0.9296097934200459, - "grad_norm": 0.0, - "learning_rate": 2.587481803864211e-07, - "loss": 0.7267, - "step": 32805 - }, - { - "epoch": 0.9296381308623084, - "grad_norm": 0.0, - "learning_rate": 2.585407926543881e-07, - "loss": 0.721, - "step": 32806 - }, - { - "epoch": 0.9296664683045708, - "grad_norm": 0.0, - "learning_rate": 2.5833348697828277e-07, - "loss": 0.8839, - "step": 32807 - }, - { - "epoch": 0.9296948057468333, - "grad_norm": 0.0, - "learning_rate": 2.5812626335985135e-07, - "loss": 0.7078, - "step": 32808 - }, - { - "epoch": 0.9297231431890958, - "grad_norm": 0.0, - "learning_rate": 2.5791912180084033e-07, - "loss": 0.7568, - "step": 32809 - }, - { - "epoch": 0.9297514806313583, - "grad_norm": 0.0, - "learning_rate": 2.5771206230299497e-07, - "loss": 0.8388, - "step": 32810 - }, - { - "epoch": 0.9297798180736206, - "grad_norm": 0.0, - "learning_rate": 2.5750508486805825e-07, - "loss": 0.7776, - "step": 32811 - }, - { - "epoch": 0.9298081555158831, - "grad_norm": 0.0, - "learning_rate": 2.572981894977744e-07, - "loss": 0.812, - "step": 32812 - }, - { - "epoch": 0.9298364929581456, - "grad_norm": 0.0, - "learning_rate": 2.5709137619388536e-07, - "loss": 0.7831, - "step": 32813 - }, - { - "epoch": 0.929864830400408, - "grad_norm": 0.0, - "learning_rate": 2.5688464495813304e-07, - "loss": 0.8008, - "step": 32814 - }, - { - "epoch": 0.9298931678426705, - "grad_norm": 0.0, - "learning_rate": 2.566779957922594e-07, - "loss": 0.8407, - "step": 32815 - }, - { - "epoch": 0.929921505284933, - "grad_norm": 0.0, - "learning_rate": 2.5647142869800635e-07, - "loss": 0.9191, - "step": 32816 - }, - { - "epoch": 0.9299498427271955, - "grad_norm": 0.0, - "learning_rate": 2.562649436771114e-07, - "loss": 0.9326, - "step": 32817 - }, - { - "epoch": 0.9299781801694579, - "grad_norm": 0.0, - "learning_rate": 2.560585407313154e-07, - "loss": 0.8326, - "step": 32818 - }, - { - "epoch": 0.9300065176117204, - "grad_norm": 0.0, - "learning_rate": 2.55852219862357e-07, - "loss": 0.8763, - "step": 32819 - }, - { - "epoch": 0.9300348550539829, - "grad_norm": 0.0, - "learning_rate": 2.556459810719736e-07, - "loss": 0.7368, - "step": 32820 - }, - { - "epoch": 0.9300631924962453, - "grad_norm": 0.0, - "learning_rate": 2.5543982436190273e-07, - "loss": 0.7691, - "step": 32821 - }, - { - "epoch": 0.9300915299385077, - "grad_norm": 0.0, - "learning_rate": 2.552337497338797e-07, - "loss": 0.7349, - "step": 32822 - }, - { - "epoch": 0.9301198673807702, - "grad_norm": 0.0, - "learning_rate": 2.55027757189642e-07, - "loss": 0.8969, - "step": 32823 - }, - { - "epoch": 0.9301482048230326, - "grad_norm": 0.0, - "learning_rate": 2.5482184673092493e-07, - "loss": 0.7555, - "step": 32824 - }, - { - "epoch": 0.9301765422652951, - "grad_norm": 0.0, - "learning_rate": 2.546160183594615e-07, - "loss": 0.8374, - "step": 32825 - }, - { - "epoch": 0.9302048797075576, - "grad_norm": 0.0, - "learning_rate": 2.5441027207698587e-07, - "loss": 0.6585, - "step": 32826 - }, - { - "epoch": 0.9302332171498201, - "grad_norm": 0.0, - "learning_rate": 2.5420460788523336e-07, - "loss": 0.7269, - "step": 32827 - }, - { - "epoch": 0.9302615545920825, - "grad_norm": 0.0, - "learning_rate": 2.5399902578593263e-07, - "loss": 0.7131, - "step": 32828 - }, - { - "epoch": 0.930289892034345, - "grad_norm": 0.0, - "learning_rate": 2.537935257808177e-07, - "loss": 0.8235, - "step": 32829 - }, - { - "epoch": 0.9303182294766075, - "grad_norm": 0.0, - "learning_rate": 2.5358810787161956e-07, - "loss": 0.8718, - "step": 32830 - }, - { - "epoch": 0.9303465669188699, - "grad_norm": 0.0, - "learning_rate": 2.5338277206006677e-07, - "loss": 0.7032, - "step": 32831 - }, - { - "epoch": 0.9303749043611323, - "grad_norm": 0.0, - "learning_rate": 2.531775183478913e-07, - "loss": 0.7673, - "step": 32832 - }, - { - "epoch": 0.9304032418033948, - "grad_norm": 0.0, - "learning_rate": 2.529723467368206e-07, - "loss": 0.8009, - "step": 32833 - }, - { - "epoch": 0.9304315792456573, - "grad_norm": 0.0, - "learning_rate": 2.527672572285833e-07, - "loss": 0.8526, - "step": 32834 - }, - { - "epoch": 0.9304599166879197, - "grad_norm": 0.0, - "learning_rate": 2.5256224982490584e-07, - "loss": 0.8743, - "step": 32835 - }, - { - "epoch": 0.9304882541301822, - "grad_norm": 0.0, - "learning_rate": 2.5235732452751793e-07, - "loss": 0.7195, - "step": 32836 - }, - { - "epoch": 0.9305165915724447, - "grad_norm": 0.0, - "learning_rate": 2.5215248133814375e-07, - "loss": 0.7939, - "step": 32837 - }, - { - "epoch": 0.9305449290147071, - "grad_norm": 0.0, - "learning_rate": 2.5194772025850854e-07, - "loss": 0.8385, - "step": 32838 - }, - { - "epoch": 0.9305732664569696, - "grad_norm": 0.0, - "learning_rate": 2.5174304129033655e-07, - "loss": 0.7768, - "step": 32839 - }, - { - "epoch": 0.9306016038992321, - "grad_norm": 0.0, - "learning_rate": 2.5153844443535525e-07, - "loss": 0.8597, - "step": 32840 - }, - { - "epoch": 0.9306299413414946, - "grad_norm": 0.0, - "learning_rate": 2.5133392969528326e-07, - "loss": 0.8691, - "step": 32841 - }, - { - "epoch": 0.930658278783757, - "grad_norm": 0.0, - "learning_rate": 2.5112949707184695e-07, - "loss": 0.8695, - "step": 32842 - }, - { - "epoch": 0.9306866162260194, - "grad_norm": 0.0, - "learning_rate": 2.5092514656676727e-07, - "loss": 0.8601, - "step": 32843 - }, - { - "epoch": 0.9307149536682819, - "grad_norm": 0.0, - "learning_rate": 2.507208781817638e-07, - "loss": 0.8802, - "step": 32844 - }, - { - "epoch": 0.9307432911105443, - "grad_norm": 0.0, - "learning_rate": 2.5051669191856087e-07, - "loss": 0.7202, - "step": 32845 - }, - { - "epoch": 0.9307716285528068, - "grad_norm": 0.0, - "learning_rate": 2.503125877788748e-07, - "loss": 0.6507, - "step": 32846 - }, - { - "epoch": 0.9307999659950693, - "grad_norm": 0.0, - "learning_rate": 2.501085657644264e-07, - "loss": 0.7129, - "step": 32847 - }, - { - "epoch": 0.9308283034373317, - "grad_norm": 0.0, - "learning_rate": 2.499046258769333e-07, - "loss": 0.8707, - "step": 32848 - }, - { - "epoch": 0.9308566408795942, - "grad_norm": 0.0, - "learning_rate": 2.4970076811811514e-07, - "loss": 0.8152, - "step": 32849 - }, - { - "epoch": 0.9308849783218567, - "grad_norm": 0.0, - "learning_rate": 2.494969924896884e-07, - "loss": 0.8394, - "step": 32850 - }, - { - "epoch": 0.9309133157641192, - "grad_norm": 0.0, - "learning_rate": 2.492932989933683e-07, - "loss": 0.8277, - "step": 32851 - }, - { - "epoch": 0.9309416532063816, - "grad_norm": 0.0, - "learning_rate": 2.4908968763087235e-07, - "loss": 0.7749, - "step": 32852 - }, - { - "epoch": 0.930969990648644, - "grad_norm": 0.0, - "learning_rate": 2.4888615840391485e-07, - "loss": 0.7453, - "step": 32853 - }, - { - "epoch": 0.9309983280909065, - "grad_norm": 0.0, - "learning_rate": 2.4868271131420985e-07, - "loss": 0.7999, - "step": 32854 - }, - { - "epoch": 0.9310266655331689, - "grad_norm": 0.0, - "learning_rate": 2.484793463634716e-07, - "loss": 0.7687, - "step": 32855 - }, - { - "epoch": 0.9310550029754314, - "grad_norm": 0.0, - "learning_rate": 2.4827606355341317e-07, - "loss": 0.8004, - "step": 32856 - }, - { - "epoch": 0.9310833404176939, - "grad_norm": 0.0, - "learning_rate": 2.480728628857465e-07, - "loss": 0.7175, - "step": 32857 - }, - { - "epoch": 0.9311116778599564, - "grad_norm": 0.0, - "learning_rate": 2.478697443621836e-07, - "loss": 0.8549, - "step": 32858 - }, - { - "epoch": 0.9311400153022188, - "grad_norm": 0.0, - "learning_rate": 2.4766670798443414e-07, - "loss": 0.7595, - "step": 32859 - }, - { - "epoch": 0.9311683527444813, - "grad_norm": 0.0, - "learning_rate": 2.474637537542102e-07, - "loss": 0.7718, - "step": 32860 - }, - { - "epoch": 0.9311966901867438, - "grad_norm": 0.0, - "learning_rate": 2.472608816732203e-07, - "loss": 0.7314, - "step": 32861 - }, - { - "epoch": 0.9312250276290062, - "grad_norm": 0.0, - "learning_rate": 2.470580917431742e-07, - "loss": 0.8219, - "step": 32862 - }, - { - "epoch": 0.9312533650712687, - "grad_norm": 0.0, - "learning_rate": 2.4685538396577835e-07, - "loss": 0.8554, - "step": 32863 - }, - { - "epoch": 0.9312817025135312, - "grad_norm": 0.0, - "learning_rate": 2.466527583427425e-07, - "loss": 0.722, - "step": 32864 - }, - { - "epoch": 0.9313100399557936, - "grad_norm": 0.0, - "learning_rate": 2.464502148757719e-07, - "loss": 0.8451, - "step": 32865 - }, - { - "epoch": 0.931338377398056, - "grad_norm": 0.0, - "learning_rate": 2.4624775356657417e-07, - "loss": 0.7882, - "step": 32866 - }, - { - "epoch": 0.9313667148403185, - "grad_norm": 0.0, - "learning_rate": 2.460453744168523e-07, - "loss": 0.7978, - "step": 32867 - }, - { - "epoch": 0.931395052282581, - "grad_norm": 0.0, - "learning_rate": 2.458430774283116e-07, - "loss": 0.875, - "step": 32868 - }, - { - "epoch": 0.9314233897248434, - "grad_norm": 0.0, - "learning_rate": 2.456408626026585e-07, - "loss": 0.8122, - "step": 32869 - }, - { - "epoch": 0.9314517271671059, - "grad_norm": 0.0, - "learning_rate": 2.454387299415928e-07, - "loss": 0.7335, - "step": 32870 - }, - { - "epoch": 0.9314800646093684, - "grad_norm": 0.0, - "learning_rate": 2.4523667944682085e-07, - "loss": 0.8599, - "step": 32871 - }, - { - "epoch": 0.9315084020516308, - "grad_norm": 0.0, - "learning_rate": 2.450347111200413e-07, - "loss": 0.6976, - "step": 32872 - }, - { - "epoch": 0.9315367394938933, - "grad_norm": 0.0, - "learning_rate": 2.448328249629572e-07, - "loss": 0.6672, - "step": 32873 - }, - { - "epoch": 0.9315650769361558, - "grad_norm": 0.0, - "learning_rate": 2.4463102097726843e-07, - "loss": 0.9163, - "step": 32874 - }, - { - "epoch": 0.9315934143784182, - "grad_norm": 0.0, - "learning_rate": 2.444292991646746e-07, - "loss": 0.8144, - "step": 32875 - }, - { - "epoch": 0.9316217518206806, - "grad_norm": 0.0, - "learning_rate": 2.4422765952687666e-07, - "loss": 0.802, - "step": 32876 - }, - { - "epoch": 0.9316500892629431, - "grad_norm": 0.0, - "learning_rate": 2.440261020655721e-07, - "loss": 0.8751, - "step": 32877 - }, - { - "epoch": 0.9316784267052056, - "grad_norm": 0.0, - "learning_rate": 2.4382462678245735e-07, - "loss": 0.8382, - "step": 32878 - }, - { - "epoch": 0.931706764147468, - "grad_norm": 0.0, - "learning_rate": 2.4362323367923216e-07, - "loss": 0.8336, - "step": 32879 - }, - { - "epoch": 0.9317351015897305, - "grad_norm": 0.0, - "learning_rate": 2.434219227575896e-07, - "loss": 0.7511, - "step": 32880 - }, - { - "epoch": 0.931763439031993, - "grad_norm": 0.0, - "learning_rate": 2.4322069401922723e-07, - "loss": 0.7217, - "step": 32881 - }, - { - "epoch": 0.9317917764742555, - "grad_norm": 0.0, - "learning_rate": 2.4301954746584145e-07, - "loss": 0.7755, - "step": 32882 - }, - { - "epoch": 0.9318201139165179, - "grad_norm": 0.0, - "learning_rate": 2.4281848309912425e-07, - "loss": 0.8456, - "step": 32883 - }, - { - "epoch": 0.9318484513587804, - "grad_norm": 0.0, - "learning_rate": 2.4261750092077095e-07, - "loss": 0.8094, - "step": 32884 - }, - { - "epoch": 0.9318767888010429, - "grad_norm": 0.0, - "learning_rate": 2.4241660093247356e-07, - "loss": 0.7241, - "step": 32885 - }, - { - "epoch": 0.9319051262433052, - "grad_norm": 0.0, - "learning_rate": 2.4221578313592397e-07, - "loss": 0.8401, - "step": 32886 - }, - { - "epoch": 0.9319334636855677, - "grad_norm": 0.0, - "learning_rate": 2.4201504753281424e-07, - "loss": 0.8499, - "step": 32887 - }, - { - "epoch": 0.9319618011278302, - "grad_norm": 0.0, - "learning_rate": 2.418143941248352e-07, - "loss": 0.8513, - "step": 32888 - }, - { - "epoch": 0.9319901385700927, - "grad_norm": 0.0, - "learning_rate": 2.4161382291367776e-07, - "loss": 0.8185, - "step": 32889 - }, - { - "epoch": 0.9320184760123551, - "grad_norm": 0.0, - "learning_rate": 2.414133339010305e-07, - "loss": 0.8604, - "step": 32890 - }, - { - "epoch": 0.9320468134546176, - "grad_norm": 0.0, - "learning_rate": 2.4121292708858324e-07, - "loss": 0.7944, - "step": 32891 - }, - { - "epoch": 0.9320751508968801, - "grad_norm": 0.0, - "learning_rate": 2.410126024780224e-07, - "loss": 0.712, - "step": 32892 - }, - { - "epoch": 0.9321034883391425, - "grad_norm": 0.0, - "learning_rate": 2.408123600710366e-07, - "loss": 0.6886, - "step": 32893 - }, - { - "epoch": 0.932131825781405, - "grad_norm": 0.0, - "learning_rate": 2.4061219986931226e-07, - "loss": 0.7926, - "step": 32894 - }, - { - "epoch": 0.9321601632236675, - "grad_norm": 0.0, - "learning_rate": 2.404121218745359e-07, - "loss": 0.7021, - "step": 32895 - }, - { - "epoch": 0.9321885006659298, - "grad_norm": 0.0, - "learning_rate": 2.4021212608839163e-07, - "loss": 0.7697, - "step": 32896 - }, - { - "epoch": 0.9322168381081923, - "grad_norm": 0.0, - "learning_rate": 2.400122125125648e-07, - "loss": 0.8724, - "step": 32897 - }, - { - "epoch": 0.9322451755504548, - "grad_norm": 0.0, - "learning_rate": 2.398123811487407e-07, - "loss": 0.8373, - "step": 32898 - }, - { - "epoch": 0.9322735129927173, - "grad_norm": 0.0, - "learning_rate": 2.396126319985992e-07, - "loss": 0.7727, - "step": 32899 - }, - { - "epoch": 0.9323018504349797, - "grad_norm": 0.0, - "learning_rate": 2.394129650638266e-07, - "loss": 0.7722, - "step": 32900 - }, - { - "epoch": 0.9323301878772422, - "grad_norm": 0.0, - "learning_rate": 2.3921338034610165e-07, - "loss": 0.7549, - "step": 32901 - }, - { - "epoch": 0.9323585253195047, - "grad_norm": 0.0, - "learning_rate": 2.390138778471074e-07, - "loss": 0.8655, - "step": 32902 - }, - { - "epoch": 0.9323868627617671, - "grad_norm": 0.0, - "learning_rate": 2.3881445756852473e-07, - "loss": 0.8001, - "step": 32903 - }, - { - "epoch": 0.9324152002040296, - "grad_norm": 0.0, - "learning_rate": 2.386151195120323e-07, - "loss": 0.8221, - "step": 32904 - }, - { - "epoch": 0.9324435376462921, - "grad_norm": 0.0, - "learning_rate": 2.384158636793088e-07, - "loss": 0.7349, - "step": 32905 - }, - { - "epoch": 0.9324718750885546, - "grad_norm": 0.0, - "learning_rate": 2.3821669007203508e-07, - "loss": 0.7885, - "step": 32906 - }, - { - "epoch": 0.9325002125308169, - "grad_norm": 0.0, - "learning_rate": 2.3801759869188534e-07, - "loss": 0.8607, - "step": 32907 - }, - { - "epoch": 0.9325285499730794, - "grad_norm": 0.0, - "learning_rate": 2.378185895405405e-07, - "loss": 0.7627, - "step": 32908 - }, - { - "epoch": 0.9325568874153419, - "grad_norm": 0.0, - "learning_rate": 2.3761966261967252e-07, - "loss": 0.8801, - "step": 32909 - }, - { - "epoch": 0.9325852248576043, - "grad_norm": 0.0, - "learning_rate": 2.3742081793096006e-07, - "loss": 0.8448, - "step": 32910 - }, - { - "epoch": 0.9326135622998668, - "grad_norm": 0.0, - "learning_rate": 2.3722205547607846e-07, - "loss": 0.8843, - "step": 32911 - }, - { - "epoch": 0.9326418997421293, - "grad_norm": 0.0, - "learning_rate": 2.370233752566986e-07, - "loss": 0.7244, - "step": 32912 - }, - { - "epoch": 0.9326702371843917, - "grad_norm": 0.0, - "learning_rate": 2.3682477727449692e-07, - "loss": 0.8589, - "step": 32913 - }, - { - "epoch": 0.9326985746266542, - "grad_norm": 0.0, - "learning_rate": 2.3662626153114655e-07, - "loss": 0.733, - "step": 32914 - }, - { - "epoch": 0.9327269120689167, - "grad_norm": 0.0, - "learning_rate": 2.364278280283172e-07, - "loss": 0.8522, - "step": 32915 - }, - { - "epoch": 0.9327552495111792, - "grad_norm": 0.0, - "learning_rate": 2.3622947676768427e-07, - "loss": 0.8009, - "step": 32916 - }, - { - "epoch": 0.9327835869534415, - "grad_norm": 0.0, - "learning_rate": 2.3603120775091415e-07, - "loss": 0.8363, - "step": 32917 - }, - { - "epoch": 0.932811924395704, - "grad_norm": 0.0, - "learning_rate": 2.3583302097967887e-07, - "loss": 0.749, - "step": 32918 - }, - { - "epoch": 0.9328402618379665, - "grad_norm": 0.0, - "learning_rate": 2.356349164556493e-07, - "loss": 0.8142, - "step": 32919 - }, - { - "epoch": 0.9328685992802289, - "grad_norm": 0.0, - "learning_rate": 2.3543689418049187e-07, - "loss": 0.8169, - "step": 32920 - }, - { - "epoch": 0.9328969367224914, - "grad_norm": 0.0, - "learning_rate": 2.3523895415587637e-07, - "loss": 0.6965, - "step": 32921 - }, - { - "epoch": 0.9329252741647539, - "grad_norm": 0.0, - "learning_rate": 2.3504109638346817e-07, - "loss": 0.7983, - "step": 32922 - }, - { - "epoch": 0.9329536116070164, - "grad_norm": 0.0, - "learning_rate": 2.3484332086493478e-07, - "loss": 0.8933, - "step": 32923 - }, - { - "epoch": 0.9329819490492788, - "grad_norm": 0.0, - "learning_rate": 2.3464562760194266e-07, - "loss": 0.8387, - "step": 32924 - }, - { - "epoch": 0.9330102864915413, - "grad_norm": 0.0, - "learning_rate": 2.3444801659615602e-07, - "loss": 0.7654, - "step": 32925 - }, - { - "epoch": 0.9330386239338038, - "grad_norm": 0.0, - "learning_rate": 2.342504878492413e-07, - "loss": 0.6836, - "step": 32926 - }, - { - "epoch": 0.9330669613760662, - "grad_norm": 0.0, - "learning_rate": 2.3405304136286055e-07, - "loss": 0.6686, - "step": 32927 - }, - { - "epoch": 0.9330952988183286, - "grad_norm": 0.0, - "learning_rate": 2.3385567713867797e-07, - "loss": 0.8935, - "step": 32928 - }, - { - "epoch": 0.9331236362605911, - "grad_norm": 0.0, - "learning_rate": 2.3365839517835554e-07, - "loss": 0.7024, - "step": 32929 - }, - { - "epoch": 0.9331519737028536, - "grad_norm": 0.0, - "learning_rate": 2.3346119548355416e-07, - "loss": 0.7678, - "step": 32930 - }, - { - "epoch": 0.933180311145116, - "grad_norm": 0.0, - "learning_rate": 2.3326407805593698e-07, - "loss": 0.9169, - "step": 32931 - }, - { - "epoch": 0.9332086485873785, - "grad_norm": 0.0, - "learning_rate": 2.3306704289716263e-07, - "loss": 0.8935, - "step": 32932 - }, - { - "epoch": 0.933236986029641, - "grad_norm": 0.0, - "learning_rate": 2.3287009000889205e-07, - "loss": 0.7702, - "step": 32933 - }, - { - "epoch": 0.9332653234719034, - "grad_norm": 0.0, - "learning_rate": 2.3267321939278277e-07, - "loss": 0.8944, - "step": 32934 - }, - { - "epoch": 0.9332936609141659, - "grad_norm": 0.0, - "learning_rate": 2.3247643105049454e-07, - "loss": 0.7904, - "step": 32935 - }, - { - "epoch": 0.9333219983564284, - "grad_norm": 0.0, - "learning_rate": 2.322797249836839e-07, - "loss": 0.7982, - "step": 32936 - }, - { - "epoch": 0.9333503357986908, - "grad_norm": 0.0, - "learning_rate": 2.3208310119400834e-07, - "loss": 0.7349, - "step": 32937 - }, - { - "epoch": 0.9333786732409532, - "grad_norm": 0.0, - "learning_rate": 2.3188655968312435e-07, - "loss": 0.9124, - "step": 32938 - }, - { - "epoch": 0.9334070106832157, - "grad_norm": 0.0, - "learning_rate": 2.3169010045268725e-07, - "loss": 0.782, - "step": 32939 - }, - { - "epoch": 0.9334353481254782, - "grad_norm": 0.0, - "learning_rate": 2.314937235043524e-07, - "loss": 0.7449, - "step": 32940 - }, - { - "epoch": 0.9334636855677406, - "grad_norm": 0.0, - "learning_rate": 2.312974288397718e-07, - "loss": 0.8444, - "step": 32941 - }, - { - "epoch": 0.9334920230100031, - "grad_norm": 0.0, - "learning_rate": 2.311012164606008e-07, - "loss": 0.7039, - "step": 32942 - }, - { - "epoch": 0.9335203604522656, - "grad_norm": 0.0, - "learning_rate": 2.3090508636849362e-07, - "loss": 0.8975, - "step": 32943 - }, - { - "epoch": 0.933548697894528, - "grad_norm": 0.0, - "learning_rate": 2.3070903856509897e-07, - "loss": 0.7983, - "step": 32944 - }, - { - "epoch": 0.9335770353367905, - "grad_norm": 0.0, - "learning_rate": 2.3051307305207105e-07, - "loss": 0.6039, - "step": 32945 - }, - { - "epoch": 0.933605372779053, - "grad_norm": 0.0, - "learning_rate": 2.3031718983105744e-07, - "loss": 0.9116, - "step": 32946 - }, - { - "epoch": 0.9336337102213155, - "grad_norm": 0.0, - "learning_rate": 2.3012138890371126e-07, - "loss": 0.666, - "step": 32947 - }, - { - "epoch": 0.9336620476635779, - "grad_norm": 0.0, - "learning_rate": 2.2992567027168122e-07, - "loss": 0.7862, - "step": 32948 - }, - { - "epoch": 0.9336903851058403, - "grad_norm": 0.0, - "learning_rate": 2.2973003393661374e-07, - "loss": 0.7515, - "step": 32949 - }, - { - "epoch": 0.9337187225481028, - "grad_norm": 0.0, - "learning_rate": 2.2953447990015865e-07, - "loss": 0.8479, - "step": 32950 - }, - { - "epoch": 0.9337470599903652, - "grad_norm": 0.0, - "learning_rate": 2.2933900816396238e-07, - "loss": 0.8685, - "step": 32951 - }, - { - "epoch": 0.9337753974326277, - "grad_norm": 0.0, - "learning_rate": 2.291436187296725e-07, - "loss": 0.7534, - "step": 32952 - }, - { - "epoch": 0.9338037348748902, - "grad_norm": 0.0, - "learning_rate": 2.289483115989355e-07, - "loss": 0.7293, - "step": 32953 - }, - { - "epoch": 0.9338320723171527, - "grad_norm": 0.0, - "learning_rate": 2.2875308677339336e-07, - "loss": 0.8457, - "step": 32954 - }, - { - "epoch": 0.9338604097594151, - "grad_norm": 0.0, - "learning_rate": 2.2855794425469368e-07, - "loss": 0.8363, - "step": 32955 - }, - { - "epoch": 0.9338887472016776, - "grad_norm": 0.0, - "learning_rate": 2.283628840444785e-07, - "loss": 0.911, - "step": 32956 - }, - { - "epoch": 0.9339170846439401, - "grad_norm": 0.0, - "learning_rate": 2.2816790614439089e-07, - "loss": 0.8907, - "step": 32957 - }, - { - "epoch": 0.9339454220862025, - "grad_norm": 0.0, - "learning_rate": 2.2797301055607513e-07, - "loss": 0.8984, - "step": 32958 - }, - { - "epoch": 0.933973759528465, - "grad_norm": 0.0, - "learning_rate": 2.277781972811699e-07, - "loss": 0.8229, - "step": 32959 - }, - { - "epoch": 0.9340020969707274, - "grad_norm": 0.0, - "learning_rate": 2.2758346632131833e-07, - "loss": 0.7534, - "step": 32960 - }, - { - "epoch": 0.9340304344129898, - "grad_norm": 0.0, - "learning_rate": 2.2738881767816134e-07, - "loss": 0.8428, - "step": 32961 - }, - { - "epoch": 0.9340587718552523, - "grad_norm": 0.0, - "learning_rate": 2.271942513533354e-07, - "loss": 0.8208, - "step": 32962 - }, - { - "epoch": 0.9340871092975148, - "grad_norm": 0.0, - "learning_rate": 2.2699976734848138e-07, - "loss": 0.7882, - "step": 32963 - }, - { - "epoch": 0.9341154467397773, - "grad_norm": 0.0, - "learning_rate": 2.2680536566523802e-07, - "loss": 0.7546, - "step": 32964 - }, - { - "epoch": 0.9341437841820397, - "grad_norm": 0.0, - "learning_rate": 2.2661104630524177e-07, - "loss": 0.8787, - "step": 32965 - }, - { - "epoch": 0.9341721216243022, - "grad_norm": 0.0, - "learning_rate": 2.2641680927013133e-07, - "loss": 0.7113, - "step": 32966 - }, - { - "epoch": 0.9342004590665647, - "grad_norm": 0.0, - "learning_rate": 2.262226545615398e-07, - "loss": 0.8062, - "step": 32967 - }, - { - "epoch": 0.9342287965088271, - "grad_norm": 0.0, - "learning_rate": 2.260285821811048e-07, - "loss": 0.8605, - "step": 32968 - }, - { - "epoch": 0.9342571339510896, - "grad_norm": 0.0, - "learning_rate": 2.2583459213046167e-07, - "loss": 0.9068, - "step": 32969 - }, - { - "epoch": 0.934285471393352, - "grad_norm": 0.0, - "learning_rate": 2.2564068441124243e-07, - "loss": 0.8721, - "step": 32970 - }, - { - "epoch": 0.9343138088356145, - "grad_norm": 0.0, - "learning_rate": 2.2544685902508135e-07, - "loss": 0.7456, - "step": 32971 - }, - { - "epoch": 0.9343421462778769, - "grad_norm": 0.0, - "learning_rate": 2.2525311597361154e-07, - "loss": 0.7994, - "step": 32972 - }, - { - "epoch": 0.9343704837201394, - "grad_norm": 0.0, - "learning_rate": 2.2505945525846285e-07, - "loss": 0.7502, - "step": 32973 - }, - { - "epoch": 0.9343988211624019, - "grad_norm": 0.0, - "learning_rate": 2.248658768812706e-07, - "loss": 0.7388, - "step": 32974 - }, - { - "epoch": 0.9344271586046643, - "grad_norm": 0.0, - "learning_rate": 2.2467238084366127e-07, - "loss": 0.8099, - "step": 32975 - }, - { - "epoch": 0.9344554960469268, - "grad_norm": 0.0, - "learning_rate": 2.2447896714726692e-07, - "loss": 0.7854, - "step": 32976 - }, - { - "epoch": 0.9344838334891893, - "grad_norm": 0.0, - "learning_rate": 2.2428563579371508e-07, - "loss": 0.7308, - "step": 32977 - }, - { - "epoch": 0.9345121709314518, - "grad_norm": 0.0, - "learning_rate": 2.2409238678463673e-07, - "loss": 0.8525, - "step": 32978 - }, - { - "epoch": 0.9345405083737142, - "grad_norm": 0.0, - "learning_rate": 2.2389922012165944e-07, - "loss": 0.8264, - "step": 32979 - }, - { - "epoch": 0.9345688458159767, - "grad_norm": 0.0, - "learning_rate": 2.2370613580640744e-07, - "loss": 0.8267, - "step": 32980 - }, - { - "epoch": 0.9345971832582392, - "grad_norm": 0.0, - "learning_rate": 2.2351313384050942e-07, - "loss": 0.7663, - "step": 32981 - }, - { - "epoch": 0.9346255207005015, - "grad_norm": 0.0, - "learning_rate": 2.2332021422559193e-07, - "loss": 0.8014, - "step": 32982 - }, - { - "epoch": 0.934653858142764, - "grad_norm": 0.0, - "learning_rate": 2.2312737696327691e-07, - "loss": 0.7495, - "step": 32983 - }, - { - "epoch": 0.9346821955850265, - "grad_norm": 0.0, - "learning_rate": 2.229346220551909e-07, - "loss": 0.8413, - "step": 32984 - }, - { - "epoch": 0.9347105330272889, - "grad_norm": 0.0, - "learning_rate": 2.2274194950295813e-07, - "loss": 0.6988, - "step": 32985 - }, - { - "epoch": 0.9347388704695514, - "grad_norm": 0.0, - "learning_rate": 2.2254935930820066e-07, - "loss": 0.923, - "step": 32986 - }, - { - "epoch": 0.9347672079118139, - "grad_norm": 0.0, - "learning_rate": 2.223568514725405e-07, - "loss": 0.8071, - "step": 32987 - }, - { - "epoch": 0.9347955453540764, - "grad_norm": 0.0, - "learning_rate": 2.2216442599759857e-07, - "loss": 0.8482, - "step": 32988 - }, - { - "epoch": 0.9348238827963388, - "grad_norm": 0.0, - "learning_rate": 2.2197208288499694e-07, - "loss": 0.7735, - "step": 32989 - }, - { - "epoch": 0.9348522202386013, - "grad_norm": 0.0, - "learning_rate": 2.217798221363554e-07, - "loss": 0.722, - "step": 32990 - }, - { - "epoch": 0.9348805576808638, - "grad_norm": 0.0, - "learning_rate": 2.2158764375329378e-07, - "loss": 0.8112, - "step": 32991 - }, - { - "epoch": 0.9349088951231261, - "grad_norm": 0.0, - "learning_rate": 2.2139554773742967e-07, - "loss": 0.7552, - "step": 32992 - }, - { - "epoch": 0.9349372325653886, - "grad_norm": 0.0, - "learning_rate": 2.2120353409038398e-07, - "loss": 0.8011, - "step": 32993 - }, - { - "epoch": 0.9349655700076511, - "grad_norm": 0.0, - "learning_rate": 2.2101160281377098e-07, - "loss": 0.7936, - "step": 32994 - }, - { - "epoch": 0.9349939074499136, - "grad_norm": 0.0, - "learning_rate": 2.208197539092094e-07, - "loss": 0.8165, - "step": 32995 - }, - { - "epoch": 0.935022244892176, - "grad_norm": 0.0, - "learning_rate": 2.206279873783135e-07, - "loss": 0.7591, - "step": 32996 - }, - { - "epoch": 0.9350505823344385, - "grad_norm": 0.0, - "learning_rate": 2.2043630322269972e-07, - "loss": 0.8335, - "step": 32997 - }, - { - "epoch": 0.935078919776701, - "grad_norm": 0.0, - "learning_rate": 2.2024470144398235e-07, - "loss": 0.6725, - "step": 32998 - }, - { - "epoch": 0.9351072572189634, - "grad_norm": 0.0, - "learning_rate": 2.2005318204377569e-07, - "loss": 0.8338, - "step": 32999 - }, - { - "epoch": 0.9351355946612259, - "grad_norm": 0.0, - "learning_rate": 2.1986174502369285e-07, - "loss": 0.7859, - "step": 33000 - }, - { - "epoch": 0.9351639321034884, - "grad_norm": 0.0, - "learning_rate": 2.19670390385347e-07, - "loss": 0.7933, - "step": 33001 - }, - { - "epoch": 0.9351922695457509, - "grad_norm": 0.0, - "learning_rate": 2.1947911813034795e-07, - "loss": 0.7726, - "step": 33002 - }, - { - "epoch": 0.9352206069880132, - "grad_norm": 0.0, - "learning_rate": 2.1928792826030887e-07, - "loss": 0.8104, - "step": 33003 - }, - { - "epoch": 0.9352489444302757, - "grad_norm": 0.0, - "learning_rate": 2.1909682077683848e-07, - "loss": 0.7295, - "step": 33004 - }, - { - "epoch": 0.9352772818725382, - "grad_norm": 0.0, - "learning_rate": 2.189057956815488e-07, - "loss": 0.8853, - "step": 33005 - }, - { - "epoch": 0.9353056193148006, - "grad_norm": 0.0, - "learning_rate": 2.1871485297604856e-07, - "loss": 0.779, - "step": 33006 - }, - { - "epoch": 0.9353339567570631, - "grad_norm": 0.0, - "learning_rate": 2.1852399266194312e-07, - "loss": 0.7531, - "step": 33007 - }, - { - "epoch": 0.9353622941993256, - "grad_norm": 0.0, - "learning_rate": 2.1833321474084456e-07, - "loss": 0.865, - "step": 33008 - }, - { - "epoch": 0.935390631641588, - "grad_norm": 0.0, - "learning_rate": 2.1814251921435603e-07, - "loss": 0.7489, - "step": 33009 - }, - { - "epoch": 0.9354189690838505, - "grad_norm": 0.0, - "learning_rate": 2.1795190608408623e-07, - "loss": 0.7337, - "step": 33010 - }, - { - "epoch": 0.935447306526113, - "grad_norm": 0.0, - "learning_rate": 2.1776137535164054e-07, - "loss": 0.9417, - "step": 33011 - }, - { - "epoch": 0.9354756439683755, - "grad_norm": 0.0, - "learning_rate": 2.1757092701862213e-07, - "loss": 0.7723, - "step": 33012 - }, - { - "epoch": 0.9355039814106378, - "grad_norm": 0.0, - "learning_rate": 2.173805610866364e-07, - "loss": 0.836, - "step": 33013 - }, - { - "epoch": 0.9355323188529003, - "grad_norm": 0.0, - "learning_rate": 2.1719027755728762e-07, - "loss": 0.7029, - "step": 33014 - }, - { - "epoch": 0.9355606562951628, - "grad_norm": 0.0, - "learning_rate": 2.170000764321778e-07, - "loss": 0.7631, - "step": 33015 - }, - { - "epoch": 0.9355889937374252, - "grad_norm": 0.0, - "learning_rate": 2.1680995771290904e-07, - "loss": 0.7991, - "step": 33016 - }, - { - "epoch": 0.9356173311796877, - "grad_norm": 0.0, - "learning_rate": 2.1661992140108224e-07, - "loss": 0.765, - "step": 33017 - }, - { - "epoch": 0.9356456686219502, - "grad_norm": 0.0, - "learning_rate": 2.1642996749829948e-07, - "loss": 0.8336, - "step": 33018 - }, - { - "epoch": 0.9356740060642127, - "grad_norm": 0.0, - "learning_rate": 2.162400960061606e-07, - "loss": 0.823, - "step": 33019 - }, - { - "epoch": 0.9357023435064751, - "grad_norm": 0.0, - "learning_rate": 2.1605030692626317e-07, - "loss": 0.7002, - "step": 33020 - }, - { - "epoch": 0.9357306809487376, - "grad_norm": 0.0, - "learning_rate": 2.1586060026020816e-07, - "loss": 0.7974, - "step": 33021 - }, - { - "epoch": 0.9357590183910001, - "grad_norm": 0.0, - "learning_rate": 2.156709760095932e-07, - "loss": 0.7996, - "step": 33022 - }, - { - "epoch": 0.9357873558332624, - "grad_norm": 0.0, - "learning_rate": 2.1548143417601364e-07, - "loss": 0.8064, - "step": 33023 - }, - { - "epoch": 0.9358156932755249, - "grad_norm": 0.0, - "learning_rate": 2.1529197476106821e-07, - "loss": 0.746, - "step": 33024 - }, - { - "epoch": 0.9358440307177874, - "grad_norm": 0.0, - "learning_rate": 2.1510259776635122e-07, - "loss": 0.7213, - "step": 33025 - }, - { - "epoch": 0.9358723681600499, - "grad_norm": 0.0, - "learning_rate": 2.1491330319345915e-07, - "loss": 0.8307, - "step": 33026 - }, - { - "epoch": 0.9359007056023123, - "grad_norm": 0.0, - "learning_rate": 2.1472409104398629e-07, - "loss": 0.7279, - "step": 33027 - }, - { - "epoch": 0.9359290430445748, - "grad_norm": 0.0, - "learning_rate": 2.145349613195258e-07, - "loss": 0.7924, - "step": 33028 - }, - { - "epoch": 0.9359573804868373, - "grad_norm": 0.0, - "learning_rate": 2.143459140216697e-07, - "loss": 0.8427, - "step": 33029 - }, - { - "epoch": 0.9359857179290997, - "grad_norm": 0.0, - "learning_rate": 2.1415694915201346e-07, - "loss": 0.7994, - "step": 33030 - }, - { - "epoch": 0.9360140553713622, - "grad_norm": 0.0, - "learning_rate": 2.1396806671214576e-07, - "loss": 0.836, - "step": 33031 - }, - { - "epoch": 0.9360423928136247, - "grad_norm": 0.0, - "learning_rate": 2.137792667036609e-07, - "loss": 0.7585, - "step": 33032 - }, - { - "epoch": 0.936070730255887, - "grad_norm": 0.0, - "learning_rate": 2.1359054912814537e-07, - "loss": 0.7535, - "step": 33033 - }, - { - "epoch": 0.9360990676981495, - "grad_norm": 0.0, - "learning_rate": 2.1340191398719125e-07, - "loss": 0.8518, - "step": 33034 - }, - { - "epoch": 0.936127405140412, - "grad_norm": 0.0, - "learning_rate": 2.1321336128238835e-07, - "loss": 0.7859, - "step": 33035 - }, - { - "epoch": 0.9361557425826745, - "grad_norm": 0.0, - "learning_rate": 2.1302489101532208e-07, - "loss": 0.7174, - "step": 33036 - }, - { - "epoch": 0.9361840800249369, - "grad_norm": 0.0, - "learning_rate": 2.1283650318758231e-07, - "loss": 0.7539, - "step": 33037 - }, - { - "epoch": 0.9362124174671994, - "grad_norm": 0.0, - "learning_rate": 2.1264819780075441e-07, - "loss": 0.9672, - "step": 33038 - }, - { - "epoch": 0.9362407549094619, - "grad_norm": 0.0, - "learning_rate": 2.124599748564249e-07, - "loss": 0.7478, - "step": 33039 - }, - { - "epoch": 0.9362690923517243, - "grad_norm": 0.0, - "learning_rate": 2.1227183435618026e-07, - "loss": 0.8509, - "step": 33040 - }, - { - "epoch": 0.9362974297939868, - "grad_norm": 0.0, - "learning_rate": 2.120837763016048e-07, - "loss": 0.749, - "step": 33041 - }, - { - "epoch": 0.9363257672362493, - "grad_norm": 0.0, - "learning_rate": 2.118958006942806e-07, - "loss": 0.8846, - "step": 33042 - }, - { - "epoch": 0.9363541046785118, - "grad_norm": 0.0, - "learning_rate": 2.1170790753579417e-07, - "loss": 0.8392, - "step": 33043 - }, - { - "epoch": 0.9363824421207741, - "grad_norm": 0.0, - "learning_rate": 2.1152009682772645e-07, - "loss": 0.9044, - "step": 33044 - }, - { - "epoch": 0.9364107795630366, - "grad_norm": 0.0, - "learning_rate": 2.1133236857166062e-07, - "loss": 0.7732, - "step": 33045 - }, - { - "epoch": 0.9364391170052991, - "grad_norm": 0.0, - "learning_rate": 2.1114472276917654e-07, - "loss": 0.7915, - "step": 33046 - }, - { - "epoch": 0.9364674544475615, - "grad_norm": 0.0, - "learning_rate": 2.1095715942185512e-07, - "loss": 0.9001, - "step": 33047 - }, - { - "epoch": 0.936495791889824, - "grad_norm": 0.0, - "learning_rate": 2.1076967853127738e-07, - "loss": 0.8908, - "step": 33048 - }, - { - "epoch": 0.9365241293320865, - "grad_norm": 0.0, - "learning_rate": 2.1058228009902094e-07, - "loss": 0.7736, - "step": 33049 - }, - { - "epoch": 0.936552466774349, - "grad_norm": 0.0, - "learning_rate": 2.103949641266656e-07, - "loss": 0.7822, - "step": 33050 - }, - { - "epoch": 0.9365808042166114, - "grad_norm": 0.0, - "learning_rate": 2.1020773061578903e-07, - "loss": 0.7573, - "step": 33051 - }, - { - "epoch": 0.9366091416588739, - "grad_norm": 0.0, - "learning_rate": 2.1002057956796773e-07, - "loss": 0.6948, - "step": 33052 - }, - { - "epoch": 0.9366374791011364, - "grad_norm": 0.0, - "learning_rate": 2.0983351098477932e-07, - "loss": 0.7569, - "step": 33053 - }, - { - "epoch": 0.9366658165433988, - "grad_norm": 0.0, - "learning_rate": 2.0964652486779814e-07, - "loss": 0.7186, - "step": 33054 - }, - { - "epoch": 0.9366941539856612, - "grad_norm": 0.0, - "learning_rate": 2.0945962121859954e-07, - "loss": 0.8061, - "step": 33055 - }, - { - "epoch": 0.9367224914279237, - "grad_norm": 0.0, - "learning_rate": 2.0927280003875783e-07, - "loss": 0.8209, - "step": 33056 - }, - { - "epoch": 0.9367508288701861, - "grad_norm": 0.0, - "learning_rate": 2.0908606132984732e-07, - "loss": 0.8365, - "step": 33057 - }, - { - "epoch": 0.9367791663124486, - "grad_norm": 0.0, - "learning_rate": 2.088994050934412e-07, - "loss": 0.8322, - "step": 33058 - }, - { - "epoch": 0.9368075037547111, - "grad_norm": 0.0, - "learning_rate": 2.0871283133111153e-07, - "loss": 0.7107, - "step": 33059 - }, - { - "epoch": 0.9368358411969736, - "grad_norm": 0.0, - "learning_rate": 2.085263400444282e-07, - "loss": 0.7782, - "step": 33060 - }, - { - "epoch": 0.936864178639236, - "grad_norm": 0.0, - "learning_rate": 2.0833993123496544e-07, - "loss": 0.8379, - "step": 33061 - }, - { - "epoch": 0.9368925160814985, - "grad_norm": 0.0, - "learning_rate": 2.0815360490428983e-07, - "loss": 0.72, - "step": 33062 - }, - { - "epoch": 0.936920853523761, - "grad_norm": 0.0, - "learning_rate": 2.0796736105397232e-07, - "loss": 0.711, - "step": 33063 - }, - { - "epoch": 0.9369491909660234, - "grad_norm": 0.0, - "learning_rate": 2.0778119968558387e-07, - "loss": 0.8654, - "step": 33064 - }, - { - "epoch": 0.9369775284082859, - "grad_norm": 0.0, - "learning_rate": 2.0759512080068877e-07, - "loss": 0.7656, - "step": 33065 - }, - { - "epoch": 0.9370058658505483, - "grad_norm": 0.0, - "learning_rate": 2.0740912440085803e-07, - "loss": 0.8293, - "step": 33066 - }, - { - "epoch": 0.9370342032928108, - "grad_norm": 0.0, - "learning_rate": 2.072232104876548e-07, - "loss": 0.8727, - "step": 33067 - }, - { - "epoch": 0.9370625407350732, - "grad_norm": 0.0, - "learning_rate": 2.0703737906264788e-07, - "loss": 0.7872, - "step": 33068 - }, - { - "epoch": 0.9370908781773357, - "grad_norm": 0.0, - "learning_rate": 2.0685163012740039e-07, - "loss": 0.8268, - "step": 33069 - }, - { - "epoch": 0.9371192156195982, - "grad_norm": 0.0, - "learning_rate": 2.066659636834789e-07, - "loss": 0.7936, - "step": 33070 - }, - { - "epoch": 0.9371475530618606, - "grad_norm": 0.0, - "learning_rate": 2.064803797324466e-07, - "loss": 0.774, - "step": 33071 - }, - { - "epoch": 0.9371758905041231, - "grad_norm": 0.0, - "learning_rate": 2.062948782758678e-07, - "loss": 0.8156, - "step": 33072 - }, - { - "epoch": 0.9372042279463856, - "grad_norm": 0.0, - "learning_rate": 2.0610945931530347e-07, - "loss": 0.7682, - "step": 33073 - }, - { - "epoch": 0.9372325653886481, - "grad_norm": 0.0, - "learning_rate": 2.0592412285231677e-07, - "loss": 0.7505, - "step": 33074 - }, - { - "epoch": 0.9372609028309105, - "grad_norm": 0.0, - "learning_rate": 2.057388688884665e-07, - "loss": 0.9424, - "step": 33075 - }, - { - "epoch": 0.937289240273173, - "grad_norm": 0.0, - "learning_rate": 2.0555369742531584e-07, - "loss": 0.762, - "step": 33076 - }, - { - "epoch": 0.9373175777154354, - "grad_norm": 0.0, - "learning_rate": 2.0536860846442353e-07, - "loss": 0.8695, - "step": 33077 - }, - { - "epoch": 0.9373459151576978, - "grad_norm": 0.0, - "learning_rate": 2.0518360200734833e-07, - "loss": 0.8603, - "step": 33078 - }, - { - "epoch": 0.9373742525999603, - "grad_norm": 0.0, - "learning_rate": 2.0499867805564789e-07, - "loss": 0.8669, - "step": 33079 - }, - { - "epoch": 0.9374025900422228, - "grad_norm": 0.0, - "learning_rate": 2.0481383661088316e-07, - "loss": 0.7399, - "step": 33080 - }, - { - "epoch": 0.9374309274844852, - "grad_norm": 0.0, - "learning_rate": 2.0462907767460628e-07, - "loss": 0.8475, - "step": 33081 - }, - { - "epoch": 0.9374592649267477, - "grad_norm": 0.0, - "learning_rate": 2.0444440124837706e-07, - "loss": 0.8812, - "step": 33082 - }, - { - "epoch": 0.9374876023690102, - "grad_norm": 0.0, - "learning_rate": 2.0425980733375096e-07, - "loss": 0.7998, - "step": 33083 - }, - { - "epoch": 0.9375159398112727, - "grad_norm": 0.0, - "learning_rate": 2.0407529593228114e-07, - "loss": 0.8218, - "step": 33084 - }, - { - "epoch": 0.9375442772535351, - "grad_norm": 0.0, - "learning_rate": 2.0389086704552307e-07, - "loss": 0.8293, - "step": 33085 - }, - { - "epoch": 0.9375726146957976, - "grad_norm": 0.0, - "learning_rate": 2.0370652067502993e-07, - "loss": 0.8553, - "step": 33086 - }, - { - "epoch": 0.93760095213806, - "grad_norm": 0.0, - "learning_rate": 2.0352225682235384e-07, - "loss": 0.7986, - "step": 33087 - }, - { - "epoch": 0.9376292895803224, - "grad_norm": 0.0, - "learning_rate": 2.0333807548904906e-07, - "loss": 0.7124, - "step": 33088 - }, - { - "epoch": 0.9376576270225849, - "grad_norm": 0.0, - "learning_rate": 2.0315397667666438e-07, - "loss": 0.8076, - "step": 33089 - }, - { - "epoch": 0.9376859644648474, - "grad_norm": 0.0, - "learning_rate": 2.0296996038675298e-07, - "loss": 0.8688, - "step": 33090 - }, - { - "epoch": 0.9377143019071099, - "grad_norm": 0.0, - "learning_rate": 2.0278602662086257e-07, - "loss": 0.7852, - "step": 33091 - }, - { - "epoch": 0.9377426393493723, - "grad_norm": 0.0, - "learning_rate": 2.0260217538054295e-07, - "loss": 0.8213, - "step": 33092 - }, - { - "epoch": 0.9377709767916348, - "grad_norm": 0.0, - "learning_rate": 2.0241840666734515e-07, - "loss": 0.7356, - "step": 33093 - }, - { - "epoch": 0.9377993142338973, - "grad_norm": 0.0, - "learning_rate": 2.0223472048281455e-07, - "loss": 0.8702, - "step": 33094 - }, - { - "epoch": 0.9378276516761597, - "grad_norm": 0.0, - "learning_rate": 2.0205111682849887e-07, - "loss": 0.7592, - "step": 33095 - }, - { - "epoch": 0.9378559891184222, - "grad_norm": 0.0, - "learning_rate": 2.018675957059446e-07, - "loss": 0.7684, - "step": 33096 - }, - { - "epoch": 0.9378843265606847, - "grad_norm": 0.0, - "learning_rate": 2.016841571166983e-07, - "loss": 0.753, - "step": 33097 - }, - { - "epoch": 0.937912664002947, - "grad_norm": 0.0, - "learning_rate": 2.015008010623054e-07, - "loss": 0.7821, - "step": 33098 - }, - { - "epoch": 0.9379410014452095, - "grad_norm": 0.0, - "learning_rate": 2.013175275443102e-07, - "loss": 0.8273, - "step": 33099 - }, - { - "epoch": 0.937969338887472, - "grad_norm": 0.0, - "learning_rate": 2.0113433656425484e-07, - "loss": 0.8081, - "step": 33100 - }, - { - "epoch": 0.9379976763297345, - "grad_norm": 0.0, - "learning_rate": 2.0095122812368472e-07, - "loss": 0.8486, - "step": 33101 - }, - { - "epoch": 0.9380260137719969, - "grad_norm": 0.0, - "learning_rate": 2.0076820222414083e-07, - "loss": 0.7673, - "step": 33102 - }, - { - "epoch": 0.9380543512142594, - "grad_norm": 0.0, - "learning_rate": 2.005852588671664e-07, - "loss": 0.8688, - "step": 33103 - }, - { - "epoch": 0.9380826886565219, - "grad_norm": 0.0, - "learning_rate": 2.0040239805429906e-07, - "loss": 0.841, - "step": 33104 - }, - { - "epoch": 0.9381110260987843, - "grad_norm": 0.0, - "learning_rate": 2.0021961978708204e-07, - "loss": 0.7985, - "step": 33105 - }, - { - "epoch": 0.9381393635410468, - "grad_norm": 0.0, - "learning_rate": 2.0003692406705523e-07, - "loss": 0.8701, - "step": 33106 - }, - { - "epoch": 0.9381677009833093, - "grad_norm": 0.0, - "learning_rate": 1.9985431089575624e-07, - "loss": 0.7082, - "step": 33107 - }, - { - "epoch": 0.9381960384255718, - "grad_norm": 0.0, - "learning_rate": 1.996717802747228e-07, - "loss": 0.7686, - "step": 33108 - }, - { - "epoch": 0.9382243758678341, - "grad_norm": 0.0, - "learning_rate": 1.994893322054925e-07, - "loss": 0.7668, - "step": 33109 - }, - { - "epoch": 0.9382527133100966, - "grad_norm": 0.0, - "learning_rate": 1.9930696668960415e-07, - "loss": 0.7774, - "step": 33110 - }, - { - "epoch": 0.9382810507523591, - "grad_norm": 0.0, - "learning_rate": 1.9912468372859317e-07, - "loss": 0.9053, - "step": 33111 - }, - { - "epoch": 0.9383093881946215, - "grad_norm": 0.0, - "learning_rate": 1.989424833239939e-07, - "loss": 0.8387, - "step": 33112 - }, - { - "epoch": 0.938337725636884, - "grad_norm": 0.0, - "learning_rate": 1.9876036547734067e-07, - "loss": 0.819, - "step": 33113 - }, - { - "epoch": 0.9383660630791465, - "grad_norm": 0.0, - "learning_rate": 1.9857833019017004e-07, - "loss": 0.89, - "step": 33114 - }, - { - "epoch": 0.938394400521409, - "grad_norm": 0.0, - "learning_rate": 1.9839637746401298e-07, - "loss": 0.8478, - "step": 33115 - }, - { - "epoch": 0.9384227379636714, - "grad_norm": 0.0, - "learning_rate": 1.982145073004027e-07, - "loss": 0.8062, - "step": 33116 - }, - { - "epoch": 0.9384510754059339, - "grad_norm": 0.0, - "learning_rate": 1.9803271970087246e-07, - "loss": 0.8266, - "step": 33117 - }, - { - "epoch": 0.9384794128481964, - "grad_norm": 0.0, - "learning_rate": 1.97851014666951e-07, - "loss": 0.7939, - "step": 33118 - }, - { - "epoch": 0.9385077502904587, - "grad_norm": 0.0, - "learning_rate": 1.9766939220017155e-07, - "loss": 0.7729, - "step": 33119 - }, - { - "epoch": 0.9385360877327212, - "grad_norm": 0.0, - "learning_rate": 1.974878523020629e-07, - "loss": 0.8986, - "step": 33120 - }, - { - "epoch": 0.9385644251749837, - "grad_norm": 0.0, - "learning_rate": 1.9730639497415272e-07, - "loss": 0.6718, - "step": 33121 - }, - { - "epoch": 0.9385927626172461, - "grad_norm": 0.0, - "learning_rate": 1.9712502021797197e-07, - "loss": 0.7924, - "step": 33122 - }, - { - "epoch": 0.9386211000595086, - "grad_norm": 0.0, - "learning_rate": 1.9694372803504724e-07, - "loss": 0.8696, - "step": 33123 - }, - { - "epoch": 0.9386494375017711, - "grad_norm": 0.0, - "learning_rate": 1.967625184269062e-07, - "loss": 0.8373, - "step": 33124 - }, - { - "epoch": 0.9386777749440336, - "grad_norm": 0.0, - "learning_rate": 1.9658139139507427e-07, - "loss": 0.7478, - "step": 33125 - }, - { - "epoch": 0.938706112386296, - "grad_norm": 0.0, - "learning_rate": 1.96400346941078e-07, - "loss": 0.7644, - "step": 33126 - }, - { - "epoch": 0.9387344498285585, - "grad_norm": 0.0, - "learning_rate": 1.9621938506644178e-07, - "loss": 0.7525, - "step": 33127 - }, - { - "epoch": 0.938762787270821, - "grad_norm": 0.0, - "learning_rate": 1.960385057726899e-07, - "loss": 0.6307, - "step": 33128 - }, - { - "epoch": 0.9387911247130833, - "grad_norm": 0.0, - "learning_rate": 1.9585770906134671e-07, - "loss": 0.8032, - "step": 33129 - }, - { - "epoch": 0.9388194621553458, - "grad_norm": 0.0, - "learning_rate": 1.9567699493393545e-07, - "loss": 0.8039, - "step": 33130 - }, - { - "epoch": 0.9388477995976083, - "grad_norm": 0.0, - "learning_rate": 1.9549636339197708e-07, - "loss": 0.8006, - "step": 33131 - }, - { - "epoch": 0.9388761370398708, - "grad_norm": 0.0, - "learning_rate": 1.9531581443699376e-07, - "loss": 0.7641, - "step": 33132 - }, - { - "epoch": 0.9389044744821332, - "grad_norm": 0.0, - "learning_rate": 1.9513534807050538e-07, - "loss": 0.7223, - "step": 33133 - }, - { - "epoch": 0.9389328119243957, - "grad_norm": 0.0, - "learning_rate": 1.9495496429403403e-07, - "loss": 0.8444, - "step": 33134 - }, - { - "epoch": 0.9389611493666582, - "grad_norm": 0.0, - "learning_rate": 1.9477466310909633e-07, - "loss": 0.8535, - "step": 33135 - }, - { - "epoch": 0.9389894868089206, - "grad_norm": 0.0, - "learning_rate": 1.9459444451721433e-07, - "loss": 0.8429, - "step": 33136 - }, - { - "epoch": 0.9390178242511831, - "grad_norm": 0.0, - "learning_rate": 1.9441430851990352e-07, - "loss": 0.7498, - "step": 33137 - }, - { - "epoch": 0.9390461616934456, - "grad_norm": 0.0, - "learning_rate": 1.9423425511868376e-07, - "loss": 0.7796, - "step": 33138 - }, - { - "epoch": 0.9390744991357081, - "grad_norm": 0.0, - "learning_rate": 1.9405428431506835e-07, - "loss": 0.8722, - "step": 33139 - }, - { - "epoch": 0.9391028365779704, - "grad_norm": 0.0, - "learning_rate": 1.93874396110576e-07, - "loss": 0.7592, - "step": 33140 - }, - { - "epoch": 0.9391311740202329, - "grad_norm": 0.0, - "learning_rate": 1.936945905067211e-07, - "loss": 0.7679, - "step": 33141 - }, - { - "epoch": 0.9391595114624954, - "grad_norm": 0.0, - "learning_rate": 1.9351486750501803e-07, - "loss": 0.7328, - "step": 33142 - }, - { - "epoch": 0.9391878489047578, - "grad_norm": 0.0, - "learning_rate": 1.9333522710698106e-07, - "loss": 0.8043, - "step": 33143 - }, - { - "epoch": 0.9392161863470203, - "grad_norm": 0.0, - "learning_rate": 1.9315566931412233e-07, - "loss": 0.9236, - "step": 33144 - }, - { - "epoch": 0.9392445237892828, - "grad_norm": 0.0, - "learning_rate": 1.929761941279551e-07, - "loss": 0.8685, - "step": 33145 - }, - { - "epoch": 0.9392728612315452, - "grad_norm": 0.0, - "learning_rate": 1.9279680154999148e-07, - "loss": 0.7902, - "step": 33146 - }, - { - "epoch": 0.9393011986738077, - "grad_norm": 0.0, - "learning_rate": 1.9261749158174248e-07, - "loss": 0.7671, - "step": 33147 - }, - { - "epoch": 0.9393295361160702, - "grad_norm": 0.0, - "learning_rate": 1.9243826422471689e-07, - "loss": 0.8038, - "step": 33148 - }, - { - "epoch": 0.9393578735583327, - "grad_norm": 0.0, - "learning_rate": 1.9225911948042685e-07, - "loss": 0.7716, - "step": 33149 - }, - { - "epoch": 0.939386211000595, - "grad_norm": 0.0, - "learning_rate": 1.9208005735038005e-07, - "loss": 0.8513, - "step": 33150 - }, - { - "epoch": 0.9394145484428575, - "grad_norm": 0.0, - "learning_rate": 1.9190107783608635e-07, - "loss": 0.8355, - "step": 33151 - }, - { - "epoch": 0.93944288588512, - "grad_norm": 0.0, - "learning_rate": 1.9172218093905015e-07, - "loss": 0.7468, - "step": 33152 - }, - { - "epoch": 0.9394712233273824, - "grad_norm": 0.0, - "learning_rate": 1.9154336666078132e-07, - "loss": 0.8204, - "step": 33153 - }, - { - "epoch": 0.9394995607696449, - "grad_norm": 0.0, - "learning_rate": 1.9136463500278424e-07, - "loss": 0.7779, - "step": 33154 - }, - { - "epoch": 0.9395278982119074, - "grad_norm": 0.0, - "learning_rate": 1.9118598596656547e-07, - "loss": 0.7846, - "step": 33155 - }, - { - "epoch": 0.9395562356541699, - "grad_norm": 0.0, - "learning_rate": 1.9100741955363044e-07, - "loss": 0.8141, - "step": 33156 - }, - { - "epoch": 0.9395845730964323, - "grad_norm": 0.0, - "learning_rate": 1.9082893576548133e-07, - "loss": 0.762, - "step": 33157 - }, - { - "epoch": 0.9396129105386948, - "grad_norm": 0.0, - "learning_rate": 1.9065053460362247e-07, - "loss": 0.7991, - "step": 33158 - }, - { - "epoch": 0.9396412479809573, - "grad_norm": 0.0, - "learning_rate": 1.9047221606955713e-07, - "loss": 0.8659, - "step": 33159 - }, - { - "epoch": 0.9396695854232197, - "grad_norm": 0.0, - "learning_rate": 1.902939801647863e-07, - "loss": 0.8626, - "step": 33160 - }, - { - "epoch": 0.9396979228654821, - "grad_norm": 0.0, - "learning_rate": 1.901158268908132e-07, - "loss": 0.8007, - "step": 33161 - }, - { - "epoch": 0.9397262603077446, - "grad_norm": 0.0, - "learning_rate": 1.899377562491367e-07, - "loss": 0.8307, - "step": 33162 - }, - { - "epoch": 0.9397545977500071, - "grad_norm": 0.0, - "learning_rate": 1.8975976824125775e-07, - "loss": 0.7458, - "step": 33163 - }, - { - "epoch": 0.9397829351922695, - "grad_norm": 0.0, - "learning_rate": 1.8958186286867518e-07, - "loss": 0.7902, - "step": 33164 - }, - { - "epoch": 0.939811272634532, - "grad_norm": 0.0, - "learning_rate": 1.8940404013288672e-07, - "loss": 0.8037, - "step": 33165 - }, - { - "epoch": 0.9398396100767945, - "grad_norm": 0.0, - "learning_rate": 1.8922630003539222e-07, - "loss": 0.8395, - "step": 33166 - }, - { - "epoch": 0.9398679475190569, - "grad_norm": 0.0, - "learning_rate": 1.8904864257768718e-07, - "loss": 0.8867, - "step": 33167 - }, - { - "epoch": 0.9398962849613194, - "grad_norm": 0.0, - "learning_rate": 1.888710677612693e-07, - "loss": 0.8629, - "step": 33168 - }, - { - "epoch": 0.9399246224035819, - "grad_norm": 0.0, - "learning_rate": 1.886935755876329e-07, - "loss": 0.8042, - "step": 33169 - }, - { - "epoch": 0.9399529598458443, - "grad_norm": 0.0, - "learning_rate": 1.885161660582746e-07, - "loss": 0.8288, - "step": 33170 - }, - { - "epoch": 0.9399812972881068, - "grad_norm": 0.0, - "learning_rate": 1.8833883917468654e-07, - "loss": 0.8565, - "step": 33171 - }, - { - "epoch": 0.9400096347303692, - "grad_norm": 0.0, - "learning_rate": 1.8816159493836528e-07, - "loss": 0.8027, - "step": 33172 - }, - { - "epoch": 0.9400379721726317, - "grad_norm": 0.0, - "learning_rate": 1.8798443335080185e-07, - "loss": 0.8875, - "step": 33173 - }, - { - "epoch": 0.9400663096148941, - "grad_norm": 0.0, - "learning_rate": 1.8780735441348842e-07, - "loss": 0.7801, - "step": 33174 - }, - { - "epoch": 0.9400946470571566, - "grad_norm": 0.0, - "learning_rate": 1.876303581279193e-07, - "loss": 0.7581, - "step": 33175 - }, - { - "epoch": 0.9401229844994191, - "grad_norm": 0.0, - "learning_rate": 1.8745344449558222e-07, - "loss": 0.8384, - "step": 33176 - }, - { - "epoch": 0.9401513219416815, - "grad_norm": 0.0, - "learning_rate": 1.8727661351796932e-07, - "loss": 0.8081, - "step": 33177 - }, - { - "epoch": 0.940179659383944, - "grad_norm": 0.0, - "learning_rate": 1.870998651965683e-07, - "loss": 0.7348, - "step": 33178 - }, - { - "epoch": 0.9402079968262065, - "grad_norm": 0.0, - "learning_rate": 1.8692319953286908e-07, - "loss": 0.8175, - "step": 33179 - }, - { - "epoch": 0.940236334268469, - "grad_norm": 0.0, - "learning_rate": 1.8674661652836045e-07, - "loss": 0.8174, - "step": 33180 - }, - { - "epoch": 0.9402646717107314, - "grad_norm": 0.0, - "learning_rate": 1.8657011618452902e-07, - "loss": 0.8406, - "step": 33181 - }, - { - "epoch": 0.9402930091529939, - "grad_norm": 0.0, - "learning_rate": 1.8639369850286137e-07, - "loss": 0.7629, - "step": 33182 - }, - { - "epoch": 0.9403213465952563, - "grad_norm": 0.0, - "learning_rate": 1.862173634848441e-07, - "loss": 0.7659, - "step": 33183 - }, - { - "epoch": 0.9403496840375187, - "grad_norm": 0.0, - "learning_rate": 1.8604111113196154e-07, - "loss": 0.8596, - "step": 33184 - }, - { - "epoch": 0.9403780214797812, - "grad_norm": 0.0, - "learning_rate": 1.858649414456992e-07, - "loss": 0.7437, - "step": 33185 - }, - { - "epoch": 0.9404063589220437, - "grad_norm": 0.0, - "learning_rate": 1.8568885442754148e-07, - "loss": 0.7636, - "step": 33186 - }, - { - "epoch": 0.9404346963643062, - "grad_norm": 0.0, - "learning_rate": 1.8551285007897046e-07, - "loss": 0.8628, - "step": 33187 - }, - { - "epoch": 0.9404630338065686, - "grad_norm": 0.0, - "learning_rate": 1.8533692840146944e-07, - "loss": 0.8188, - "step": 33188 - }, - { - "epoch": 0.9404913712488311, - "grad_norm": 0.0, - "learning_rate": 1.8516108939651945e-07, - "loss": 0.8202, - "step": 33189 - }, - { - "epoch": 0.9405197086910936, - "grad_norm": 0.0, - "learning_rate": 1.8498533306560374e-07, - "loss": 0.8276, - "step": 33190 - }, - { - "epoch": 0.940548046133356, - "grad_norm": 0.0, - "learning_rate": 1.848096594102e-07, - "loss": 0.6824, - "step": 33191 - }, - { - "epoch": 0.9405763835756185, - "grad_norm": 0.0, - "learning_rate": 1.8463406843178933e-07, - "loss": 0.6722, - "step": 33192 - }, - { - "epoch": 0.940604721017881, - "grad_norm": 0.0, - "learning_rate": 1.844585601318516e-07, - "loss": 0.8409, - "step": 33193 - }, - { - "epoch": 0.9406330584601433, - "grad_norm": 0.0, - "learning_rate": 1.8428313451186452e-07, - "loss": 0.8322, - "step": 33194 - }, - { - "epoch": 0.9406613959024058, - "grad_norm": 0.0, - "learning_rate": 1.8410779157330362e-07, - "loss": 0.8025, - "step": 33195 - }, - { - "epoch": 0.9406897333446683, - "grad_norm": 0.0, - "learning_rate": 1.8393253131764988e-07, - "loss": 0.7038, - "step": 33196 - }, - { - "epoch": 0.9407180707869308, - "grad_norm": 0.0, - "learning_rate": 1.8375735374637659e-07, - "loss": 0.8573, - "step": 33197 - }, - { - "epoch": 0.9407464082291932, - "grad_norm": 0.0, - "learning_rate": 1.8358225886096038e-07, - "loss": 0.7607, - "step": 33198 - }, - { - "epoch": 0.9407747456714557, - "grad_norm": 0.0, - "learning_rate": 1.8340724666287556e-07, - "loss": 0.8181, - "step": 33199 - }, - { - "epoch": 0.9408030831137182, - "grad_norm": 0.0, - "learning_rate": 1.832323171535977e-07, - "loss": 0.7065, - "step": 33200 - }, - { - "epoch": 0.9408314205559806, - "grad_norm": 0.0, - "learning_rate": 1.8305747033459885e-07, - "loss": 0.8533, - "step": 33201 - }, - { - "epoch": 0.9408597579982431, - "grad_norm": 0.0, - "learning_rate": 1.8288270620735237e-07, - "loss": 0.8652, - "step": 33202 - }, - { - "epoch": 0.9408880954405056, - "grad_norm": 0.0, - "learning_rate": 1.827080247733315e-07, - "loss": 0.7716, - "step": 33203 - }, - { - "epoch": 0.940916432882768, - "grad_norm": 0.0, - "learning_rate": 1.8253342603400503e-07, - "loss": 0.8692, - "step": 33204 - }, - { - "epoch": 0.9409447703250304, - "grad_norm": 0.0, - "learning_rate": 1.8235890999084515e-07, - "loss": 0.9185, - "step": 33205 - }, - { - "epoch": 0.9409731077672929, - "grad_norm": 0.0, - "learning_rate": 1.821844766453229e-07, - "loss": 0.8293, - "step": 33206 - }, - { - "epoch": 0.9410014452095554, - "grad_norm": 0.0, - "learning_rate": 1.8201012599890598e-07, - "loss": 0.7478, - "step": 33207 - }, - { - "epoch": 0.9410297826518178, - "grad_norm": 0.0, - "learning_rate": 1.8183585805306325e-07, - "loss": 0.7405, - "step": 33208 - }, - { - "epoch": 0.9410581200940803, - "grad_norm": 0.0, - "learning_rate": 1.816616728092646e-07, - "loss": 0.7675, - "step": 33209 - }, - { - "epoch": 0.9410864575363428, - "grad_norm": 0.0, - "learning_rate": 1.8148757026897335e-07, - "loss": 0.8942, - "step": 33210 - }, - { - "epoch": 0.9411147949786053, - "grad_norm": 0.0, - "learning_rate": 1.813135504336594e-07, - "loss": 0.8561, - "step": 33211 - }, - { - "epoch": 0.9411431324208677, - "grad_norm": 0.0, - "learning_rate": 1.8113961330478714e-07, - "loss": 0.8035, - "step": 33212 - }, - { - "epoch": 0.9411714698631302, - "grad_norm": 0.0, - "learning_rate": 1.8096575888382205e-07, - "loss": 0.8055, - "step": 33213 - }, - { - "epoch": 0.9411998073053927, - "grad_norm": 0.0, - "learning_rate": 1.8079198717222967e-07, - "loss": 0.7626, - "step": 33214 - }, - { - "epoch": 0.941228144747655, - "grad_norm": 0.0, - "learning_rate": 1.8061829817147103e-07, - "loss": 0.8465, - "step": 33215 - }, - { - "epoch": 0.9412564821899175, - "grad_norm": 0.0, - "learning_rate": 1.8044469188301161e-07, - "loss": 0.793, - "step": 33216 - }, - { - "epoch": 0.94128481963218, - "grad_norm": 0.0, - "learning_rate": 1.802711683083136e-07, - "loss": 0.8669, - "step": 33217 - }, - { - "epoch": 0.9413131570744424, - "grad_norm": 0.0, - "learning_rate": 1.800977274488369e-07, - "loss": 0.8085, - "step": 33218 - }, - { - "epoch": 0.9413414945167049, - "grad_norm": 0.0, - "learning_rate": 1.7992436930604484e-07, - "loss": 0.7226, - "step": 33219 - }, - { - "epoch": 0.9413698319589674, - "grad_norm": 0.0, - "learning_rate": 1.7975109388139511e-07, - "loss": 0.794, - "step": 33220 - }, - { - "epoch": 0.9413981694012299, - "grad_norm": 0.0, - "learning_rate": 1.7957790117634877e-07, - "loss": 0.9688, - "step": 33221 - }, - { - "epoch": 0.9414265068434923, - "grad_norm": 0.0, - "learning_rate": 1.7940479119236576e-07, - "loss": 0.8314, - "step": 33222 - }, - { - "epoch": 0.9414548442857548, - "grad_norm": 0.0, - "learning_rate": 1.7923176393090158e-07, - "loss": 0.8257, - "step": 33223 - }, - { - "epoch": 0.9414831817280173, - "grad_norm": 0.0, - "learning_rate": 1.7905881939341617e-07, - "loss": 0.7338, - "step": 33224 - }, - { - "epoch": 0.9415115191702796, - "grad_norm": 0.0, - "learning_rate": 1.7888595758136396e-07, - "loss": 0.8153, - "step": 33225 - }, - { - "epoch": 0.9415398566125421, - "grad_norm": 0.0, - "learning_rate": 1.7871317849620262e-07, - "loss": 0.7195, - "step": 33226 - }, - { - "epoch": 0.9415681940548046, - "grad_norm": 0.0, - "learning_rate": 1.7854048213938767e-07, - "loss": 0.8579, - "step": 33227 - }, - { - "epoch": 0.9415965314970671, - "grad_norm": 0.0, - "learning_rate": 1.7836786851237354e-07, - "loss": 0.722, - "step": 33228 - }, - { - "epoch": 0.9416248689393295, - "grad_norm": 0.0, - "learning_rate": 1.7819533761661346e-07, - "loss": 0.7161, - "step": 33229 - }, - { - "epoch": 0.941653206381592, - "grad_norm": 0.0, - "learning_rate": 1.7802288945356184e-07, - "loss": 0.6929, - "step": 33230 - }, - { - "epoch": 0.9416815438238545, - "grad_norm": 0.0, - "learning_rate": 1.7785052402467086e-07, - "loss": 0.848, - "step": 33231 - }, - { - "epoch": 0.9417098812661169, - "grad_norm": 0.0, - "learning_rate": 1.7767824133139265e-07, - "loss": 0.8411, - "step": 33232 - }, - { - "epoch": 0.9417382187083794, - "grad_norm": 0.0, - "learning_rate": 1.7750604137517614e-07, - "loss": 0.758, - "step": 33233 - }, - { - "epoch": 0.9417665561506419, - "grad_norm": 0.0, - "learning_rate": 1.7733392415747452e-07, - "loss": 0.7953, - "step": 33234 - }, - { - "epoch": 0.9417948935929044, - "grad_norm": 0.0, - "learning_rate": 1.771618896797378e-07, - "loss": 0.805, - "step": 33235 - }, - { - "epoch": 0.9418232310351667, - "grad_norm": 0.0, - "learning_rate": 1.7698993794341368e-07, - "loss": 0.9186, - "step": 33236 - }, - { - "epoch": 0.9418515684774292, - "grad_norm": 0.0, - "learning_rate": 1.7681806894995102e-07, - "loss": 0.7537, - "step": 33237 - }, - { - "epoch": 0.9418799059196917, - "grad_norm": 0.0, - "learning_rate": 1.766462827007964e-07, - "loss": 0.7567, - "step": 33238 - }, - { - "epoch": 0.9419082433619541, - "grad_norm": 0.0, - "learning_rate": 1.7647457919739873e-07, - "loss": 0.8597, - "step": 33239 - }, - { - "epoch": 0.9419365808042166, - "grad_norm": 0.0, - "learning_rate": 1.763029584412046e-07, - "loss": 0.9316, - "step": 33240 - }, - { - "epoch": 0.9419649182464791, - "grad_norm": 0.0, - "learning_rate": 1.7613142043365728e-07, - "loss": 0.839, - "step": 33241 - }, - { - "epoch": 0.9419932556887415, - "grad_norm": 0.0, - "learning_rate": 1.759599651762034e-07, - "loss": 0.8476, - "step": 33242 - }, - { - "epoch": 0.942021593131004, - "grad_norm": 0.0, - "learning_rate": 1.7578859267028736e-07, - "loss": 0.8118, - "step": 33243 - }, - { - "epoch": 0.9420499305732665, - "grad_norm": 0.0, - "learning_rate": 1.7561730291735025e-07, - "loss": 0.8092, - "step": 33244 - }, - { - "epoch": 0.942078268015529, - "grad_norm": 0.0, - "learning_rate": 1.7544609591883865e-07, - "loss": 0.8228, - "step": 33245 - }, - { - "epoch": 0.9421066054577913, - "grad_norm": 0.0, - "learning_rate": 1.7527497167619256e-07, - "loss": 0.7724, - "step": 33246 - }, - { - "epoch": 0.9421349429000538, - "grad_norm": 0.0, - "learning_rate": 1.7510393019085303e-07, - "loss": 0.7664, - "step": 33247 - }, - { - "epoch": 0.9421632803423163, - "grad_norm": 0.0, - "learning_rate": 1.7493297146426225e-07, - "loss": 0.7784, - "step": 33248 - }, - { - "epoch": 0.9421916177845787, - "grad_norm": 0.0, - "learning_rate": 1.7476209549785906e-07, - "loss": 0.7443, - "step": 33249 - }, - { - "epoch": 0.9422199552268412, - "grad_norm": 0.0, - "learning_rate": 1.7459130229308342e-07, - "loss": 0.8297, - "step": 33250 - }, - { - "epoch": 0.9422482926691037, - "grad_norm": 0.0, - "learning_rate": 1.7442059185137306e-07, - "loss": 0.8416, - "step": 33251 - }, - { - "epoch": 0.9422766301113662, - "grad_norm": 0.0, - "learning_rate": 1.7424996417416796e-07, - "loss": 0.7846, - "step": 33252 - }, - { - "epoch": 0.9423049675536286, - "grad_norm": 0.0, - "learning_rate": 1.740794192629025e-07, - "loss": 0.8355, - "step": 33253 - }, - { - "epoch": 0.9423333049958911, - "grad_norm": 0.0, - "learning_rate": 1.7390895711901668e-07, - "loss": 0.7784, - "step": 33254 - }, - { - "epoch": 0.9423616424381536, - "grad_norm": 0.0, - "learning_rate": 1.7373857774394376e-07, - "loss": 0.7986, - "step": 33255 - }, - { - "epoch": 0.942389979880416, - "grad_norm": 0.0, - "learning_rate": 1.735682811391204e-07, - "loss": 0.8552, - "step": 33256 - }, - { - "epoch": 0.9424183173226784, - "grad_norm": 0.0, - "learning_rate": 1.7339806730597875e-07, - "loss": 0.7219, - "step": 33257 - }, - { - "epoch": 0.9424466547649409, - "grad_norm": 0.0, - "learning_rate": 1.7322793624595547e-07, - "loss": 0.8001, - "step": 33258 - }, - { - "epoch": 0.9424749922072034, - "grad_norm": 0.0, - "learning_rate": 1.7305788796048274e-07, - "loss": 0.8212, - "step": 33259 - }, - { - "epoch": 0.9425033296494658, - "grad_norm": 0.0, - "learning_rate": 1.728879224509905e-07, - "loss": 0.694, - "step": 33260 - }, - { - "epoch": 0.9425316670917283, - "grad_norm": 0.0, - "learning_rate": 1.7271803971891432e-07, - "loss": 0.8089, - "step": 33261 - }, - { - "epoch": 0.9425600045339908, - "grad_norm": 0.0, - "learning_rate": 1.7254823976568301e-07, - "loss": 0.6896, - "step": 33262 - }, - { - "epoch": 0.9425883419762532, - "grad_norm": 0.0, - "learning_rate": 1.7237852259272658e-07, - "loss": 0.8221, - "step": 33263 - }, - { - "epoch": 0.9426166794185157, - "grad_norm": 0.0, - "learning_rate": 1.7220888820147607e-07, - "loss": 0.7783, - "step": 33264 - }, - { - "epoch": 0.9426450168607782, - "grad_norm": 0.0, - "learning_rate": 1.7203933659335926e-07, - "loss": 0.7024, - "step": 33265 - }, - { - "epoch": 0.9426733543030406, - "grad_norm": 0.0, - "learning_rate": 1.7186986776980386e-07, - "loss": 0.8548, - "step": 33266 - }, - { - "epoch": 0.942701691745303, - "grad_norm": 0.0, - "learning_rate": 1.7170048173223985e-07, - "loss": 0.7308, - "step": 33267 - }, - { - "epoch": 0.9427300291875655, - "grad_norm": 0.0, - "learning_rate": 1.7153117848209056e-07, - "loss": 0.7453, - "step": 33268 - }, - { - "epoch": 0.942758366629828, - "grad_norm": 0.0, - "learning_rate": 1.7136195802078481e-07, - "loss": 0.8387, - "step": 33269 - }, - { - "epoch": 0.9427867040720904, - "grad_norm": 0.0, - "learning_rate": 1.7119282034974705e-07, - "loss": 0.7654, - "step": 33270 - }, - { - "epoch": 0.9428150415143529, - "grad_norm": 0.0, - "learning_rate": 1.7102376547040166e-07, - "loss": 0.8109, - "step": 33271 - }, - { - "epoch": 0.9428433789566154, - "grad_norm": 0.0, - "learning_rate": 1.7085479338417422e-07, - "loss": 0.7743, - "step": 33272 - }, - { - "epoch": 0.9428717163988778, - "grad_norm": 0.0, - "learning_rate": 1.7068590409248464e-07, - "loss": 0.7756, - "step": 33273 - }, - { - "epoch": 0.9429000538411403, - "grad_norm": 0.0, - "learning_rate": 1.7051709759675962e-07, - "loss": 0.7699, - "step": 33274 - }, - { - "epoch": 0.9429283912834028, - "grad_norm": 0.0, - "learning_rate": 1.7034837389841906e-07, - "loss": 0.8285, - "step": 33275 - }, - { - "epoch": 0.9429567287256653, - "grad_norm": 0.0, - "learning_rate": 1.7017973299888302e-07, - "loss": 0.8237, - "step": 33276 - }, - { - "epoch": 0.9429850661679277, - "grad_norm": 0.0, - "learning_rate": 1.7001117489957363e-07, - "loss": 0.8831, - "step": 33277 - }, - { - "epoch": 0.9430134036101901, - "grad_norm": 0.0, - "learning_rate": 1.6984269960191092e-07, - "loss": 0.9291, - "step": 33278 - }, - { - "epoch": 0.9430417410524526, - "grad_norm": 0.0, - "learning_rate": 1.6967430710731258e-07, - "loss": 0.7842, - "step": 33279 - }, - { - "epoch": 0.943070078494715, - "grad_norm": 0.0, - "learning_rate": 1.6950599741719864e-07, - "loss": 0.8481, - "step": 33280 - }, - { - "epoch": 0.9430984159369775, - "grad_norm": 0.0, - "learning_rate": 1.6933777053298684e-07, - "loss": 0.8126, - "step": 33281 - }, - { - "epoch": 0.94312675337924, - "grad_norm": 0.0, - "learning_rate": 1.691696264560927e-07, - "loss": 0.7927, - "step": 33282 - }, - { - "epoch": 0.9431550908215025, - "grad_norm": 0.0, - "learning_rate": 1.6900156518793398e-07, - "loss": 0.6961, - "step": 33283 - }, - { - "epoch": 0.9431834282637649, - "grad_norm": 0.0, - "learning_rate": 1.688335867299251e-07, - "loss": 0.6726, - "step": 33284 - }, - { - "epoch": 0.9432117657060274, - "grad_norm": 0.0, - "learning_rate": 1.6866569108348164e-07, - "loss": 0.7564, - "step": 33285 - }, - { - "epoch": 0.9432401031482899, - "grad_norm": 0.0, - "learning_rate": 1.6849787825001796e-07, - "loss": 0.7841, - "step": 33286 - }, - { - "epoch": 0.9432684405905523, - "grad_norm": 0.0, - "learning_rate": 1.683301482309474e-07, - "loss": 0.7573, - "step": 33287 - }, - { - "epoch": 0.9432967780328148, - "grad_norm": 0.0, - "learning_rate": 1.6816250102768327e-07, - "loss": 0.8344, - "step": 33288 - }, - { - "epoch": 0.9433251154750772, - "grad_norm": 0.0, - "learning_rate": 1.679949366416367e-07, - "loss": 0.8525, - "step": 33289 - }, - { - "epoch": 0.9433534529173396, - "grad_norm": 0.0, - "learning_rate": 1.6782745507422094e-07, - "loss": 0.7985, - "step": 33290 - }, - { - "epoch": 0.9433817903596021, - "grad_norm": 0.0, - "learning_rate": 1.6766005632684378e-07, - "loss": 0.8052, - "step": 33291 - }, - { - "epoch": 0.9434101278018646, - "grad_norm": 0.0, - "learning_rate": 1.6749274040091857e-07, - "loss": 0.5718, - "step": 33292 - }, - { - "epoch": 0.9434384652441271, - "grad_norm": 0.0, - "learning_rate": 1.67325507297853e-07, - "loss": 0.871, - "step": 33293 - }, - { - "epoch": 0.9434668026863895, - "grad_norm": 0.0, - "learning_rate": 1.6715835701905604e-07, - "loss": 0.6904, - "step": 33294 - }, - { - "epoch": 0.943495140128652, - "grad_norm": 0.0, - "learning_rate": 1.6699128956593535e-07, - "loss": 0.7762, - "step": 33295 - }, - { - "epoch": 0.9435234775709145, - "grad_norm": 0.0, - "learning_rate": 1.6682430493989876e-07, - "loss": 0.8211, - "step": 33296 - }, - { - "epoch": 0.9435518150131769, - "grad_norm": 0.0, - "learning_rate": 1.666574031423518e-07, - "loss": 0.7954, - "step": 33297 - }, - { - "epoch": 0.9435801524554394, - "grad_norm": 0.0, - "learning_rate": 1.6649058417470222e-07, - "loss": 0.8693, - "step": 33298 - }, - { - "epoch": 0.9436084898977019, - "grad_norm": 0.0, - "learning_rate": 1.6632384803835334e-07, - "loss": 0.8546, - "step": 33299 - }, - { - "epoch": 0.9436368273399643, - "grad_norm": 0.0, - "learning_rate": 1.661571947347096e-07, - "loss": 0.7813, - "step": 33300 - }, - { - "epoch": 0.9436651647822267, - "grad_norm": 0.0, - "learning_rate": 1.6599062426517653e-07, - "loss": 0.7854, - "step": 33301 - }, - { - "epoch": 0.9436935022244892, - "grad_norm": 0.0, - "learning_rate": 1.6582413663115636e-07, - "loss": 0.8404, - "step": 33302 - }, - { - "epoch": 0.9437218396667517, - "grad_norm": 0.0, - "learning_rate": 1.656577318340502e-07, - "loss": 0.8627, - "step": 33303 - }, - { - "epoch": 0.9437501771090141, - "grad_norm": 0.0, - "learning_rate": 1.6549140987526136e-07, - "loss": 0.8806, - "step": 33304 - }, - { - "epoch": 0.9437785145512766, - "grad_norm": 0.0, - "learning_rate": 1.6532517075618982e-07, - "loss": 0.8126, - "step": 33305 - }, - { - "epoch": 0.9438068519935391, - "grad_norm": 0.0, - "learning_rate": 1.651590144782378e-07, - "loss": 0.6935, - "step": 33306 - }, - { - "epoch": 0.9438351894358015, - "grad_norm": 0.0, - "learning_rate": 1.6499294104280195e-07, - "loss": 0.7948, - "step": 33307 - }, - { - "epoch": 0.943863526878064, - "grad_norm": 0.0, - "learning_rate": 1.6482695045128338e-07, - "loss": 0.7845, - "step": 33308 - }, - { - "epoch": 0.9438918643203265, - "grad_norm": 0.0, - "learning_rate": 1.6466104270508099e-07, - "loss": 0.8118, - "step": 33309 - }, - { - "epoch": 0.943920201762589, - "grad_norm": 0.0, - "learning_rate": 1.644952178055892e-07, - "loss": 0.7911, - "step": 33310 - }, - { - "epoch": 0.9439485392048513, - "grad_norm": 0.0, - "learning_rate": 1.6432947575420578e-07, - "loss": 0.8001, - "step": 33311 - }, - { - "epoch": 0.9439768766471138, - "grad_norm": 0.0, - "learning_rate": 1.641638165523296e-07, - "loss": 0.7869, - "step": 33312 - }, - { - "epoch": 0.9440052140893763, - "grad_norm": 0.0, - "learning_rate": 1.6399824020135292e-07, - "loss": 0.8314, - "step": 33313 - }, - { - "epoch": 0.9440335515316387, - "grad_norm": 0.0, - "learning_rate": 1.6383274670267234e-07, - "loss": 0.7428, - "step": 33314 - }, - { - "epoch": 0.9440618889739012, - "grad_norm": 0.0, - "learning_rate": 1.6366733605768014e-07, - "loss": 0.8313, - "step": 33315 - }, - { - "epoch": 0.9440902264161637, - "grad_norm": 0.0, - "learning_rate": 1.6350200826777073e-07, - "loss": 0.7929, - "step": 33316 - }, - { - "epoch": 0.9441185638584262, - "grad_norm": 0.0, - "learning_rate": 1.6333676333433745e-07, - "loss": 0.8182, - "step": 33317 - }, - { - "epoch": 0.9441469013006886, - "grad_norm": 0.0, - "learning_rate": 1.631716012587703e-07, - "loss": 0.7866, - "step": 33318 - }, - { - "epoch": 0.9441752387429511, - "grad_norm": 0.0, - "learning_rate": 1.630065220424626e-07, - "loss": 0.7991, - "step": 33319 - }, - { - "epoch": 0.9442035761852136, - "grad_norm": 0.0, - "learning_rate": 1.628415256868032e-07, - "loss": 0.8351, - "step": 33320 - }, - { - "epoch": 0.9442319136274759, - "grad_norm": 0.0, - "learning_rate": 1.626766121931822e-07, - "loss": 0.7774, - "step": 33321 - }, - { - "epoch": 0.9442602510697384, - "grad_norm": 0.0, - "learning_rate": 1.6251178156298952e-07, - "loss": 0.7574, - "step": 33322 - }, - { - "epoch": 0.9442885885120009, - "grad_norm": 0.0, - "learning_rate": 1.6234703379761297e-07, - "loss": 0.8089, - "step": 33323 - }, - { - "epoch": 0.9443169259542634, - "grad_norm": 0.0, - "learning_rate": 1.6218236889844142e-07, - "loss": 0.8221, - "step": 33324 - }, - { - "epoch": 0.9443452633965258, - "grad_norm": 0.0, - "learning_rate": 1.6201778686686043e-07, - "loss": 0.7939, - "step": 33325 - }, - { - "epoch": 0.9443736008387883, - "grad_norm": 0.0, - "learning_rate": 1.6185328770425667e-07, - "loss": 0.9152, - "step": 33326 - }, - { - "epoch": 0.9444019382810508, - "grad_norm": 0.0, - "learning_rate": 1.6168887141201572e-07, - "loss": 0.9014, - "step": 33327 - }, - { - "epoch": 0.9444302757233132, - "grad_norm": 0.0, - "learning_rate": 1.615245379915231e-07, - "loss": 0.7674, - "step": 33328 - }, - { - "epoch": 0.9444586131655757, - "grad_norm": 0.0, - "learning_rate": 1.613602874441622e-07, - "loss": 0.8055, - "step": 33329 - }, - { - "epoch": 0.9444869506078382, - "grad_norm": 0.0, - "learning_rate": 1.6119611977131743e-07, - "loss": 0.8983, - "step": 33330 - }, - { - "epoch": 0.9445152880501005, - "grad_norm": 0.0, - "learning_rate": 1.6103203497437104e-07, - "loss": 0.8058, - "step": 33331 - }, - { - "epoch": 0.944543625492363, - "grad_norm": 0.0, - "learning_rate": 1.6086803305470633e-07, - "loss": 0.8207, - "step": 33332 - }, - { - "epoch": 0.9445719629346255, - "grad_norm": 0.0, - "learning_rate": 1.6070411401370335e-07, - "loss": 0.8525, - "step": 33333 - }, - { - "epoch": 0.944600300376888, - "grad_norm": 0.0, - "learning_rate": 1.6054027785274317e-07, - "loss": 0.8421, - "step": 33334 - }, - { - "epoch": 0.9446286378191504, - "grad_norm": 0.0, - "learning_rate": 1.6037652457320697e-07, - "loss": 0.718, - "step": 33335 - }, - { - "epoch": 0.9446569752614129, - "grad_norm": 0.0, - "learning_rate": 1.6021285417647247e-07, - "loss": 0.8907, - "step": 33336 - }, - { - "epoch": 0.9446853127036754, - "grad_norm": 0.0, - "learning_rate": 1.6004926666391863e-07, - "loss": 0.7211, - "step": 33337 - }, - { - "epoch": 0.9447136501459378, - "grad_norm": 0.0, - "learning_rate": 1.598857620369243e-07, - "loss": 0.7962, - "step": 33338 - }, - { - "epoch": 0.9447419875882003, - "grad_norm": 0.0, - "learning_rate": 1.5972234029686617e-07, - "loss": 0.7631, - "step": 33339 - }, - { - "epoch": 0.9447703250304628, - "grad_norm": 0.0, - "learning_rate": 1.5955900144511982e-07, - "loss": 0.858, - "step": 33340 - }, - { - "epoch": 0.9447986624727253, - "grad_norm": 0.0, - "learning_rate": 1.5939574548306414e-07, - "loss": 0.8165, - "step": 33341 - }, - { - "epoch": 0.9448269999149876, - "grad_norm": 0.0, - "learning_rate": 1.5923257241207024e-07, - "loss": 0.8632, - "step": 33342 - }, - { - "epoch": 0.9448553373572501, - "grad_norm": 0.0, - "learning_rate": 1.5906948223351593e-07, - "loss": 0.8725, - "step": 33343 - }, - { - "epoch": 0.9448836747995126, - "grad_norm": 0.0, - "learning_rate": 1.5890647494877342e-07, - "loss": 0.858, - "step": 33344 - }, - { - "epoch": 0.944912012241775, - "grad_norm": 0.0, - "learning_rate": 1.587435505592161e-07, - "loss": 0.7441, - "step": 33345 - }, - { - "epoch": 0.9449403496840375, - "grad_norm": 0.0, - "learning_rate": 1.5858070906621615e-07, - "loss": 0.8117, - "step": 33346 - }, - { - "epoch": 0.9449686871263, - "grad_norm": 0.0, - "learning_rate": 1.5841795047114584e-07, - "loss": 0.8649, - "step": 33347 - }, - { - "epoch": 0.9449970245685625, - "grad_norm": 0.0, - "learning_rate": 1.5825527477537518e-07, - "loss": 0.809, - "step": 33348 - }, - { - "epoch": 0.9450253620108249, - "grad_norm": 0.0, - "learning_rate": 1.5809268198027527e-07, - "loss": 0.8686, - "step": 33349 - }, - { - "epoch": 0.9450536994530874, - "grad_norm": 0.0, - "learning_rate": 1.5793017208721507e-07, - "loss": 0.892, - "step": 33350 - }, - { - "epoch": 0.9450820368953499, - "grad_norm": 0.0, - "learning_rate": 1.5776774509756455e-07, - "loss": 0.755, - "step": 33351 - }, - { - "epoch": 0.9451103743376122, - "grad_norm": 0.0, - "learning_rate": 1.576054010126904e-07, - "loss": 0.846, - "step": 33352 - }, - { - "epoch": 0.9451387117798747, - "grad_norm": 0.0, - "learning_rate": 1.5744313983396153e-07, - "loss": 0.7871, - "step": 33353 - }, - { - "epoch": 0.9451670492221372, - "grad_norm": 0.0, - "learning_rate": 1.5728096156274353e-07, - "loss": 0.84, - "step": 33354 - }, - { - "epoch": 0.9451953866643996, - "grad_norm": 0.0, - "learning_rate": 1.5711886620040305e-07, - "loss": 0.7543, - "step": 33355 - }, - { - "epoch": 0.9452237241066621, - "grad_norm": 0.0, - "learning_rate": 1.5695685374830572e-07, - "loss": 0.8179, - "step": 33356 - }, - { - "epoch": 0.9452520615489246, - "grad_norm": 0.0, - "learning_rate": 1.5679492420781483e-07, - "loss": 0.8184, - "step": 33357 - }, - { - "epoch": 0.9452803989911871, - "grad_norm": 0.0, - "learning_rate": 1.56633077580296e-07, - "loss": 0.7906, - "step": 33358 - }, - { - "epoch": 0.9453087364334495, - "grad_norm": 0.0, - "learning_rate": 1.5647131386711366e-07, - "loss": 0.9186, - "step": 33359 - }, - { - "epoch": 0.945337073875712, - "grad_norm": 0.0, - "learning_rate": 1.5630963306962676e-07, - "loss": 0.7927, - "step": 33360 - }, - { - "epoch": 0.9453654113179745, - "grad_norm": 0.0, - "learning_rate": 1.5614803518919974e-07, - "loss": 0.7838, - "step": 33361 - }, - { - "epoch": 0.9453937487602369, - "grad_norm": 0.0, - "learning_rate": 1.5598652022719373e-07, - "loss": 0.8907, - "step": 33362 - }, - { - "epoch": 0.9454220862024993, - "grad_norm": 0.0, - "learning_rate": 1.5582508818496765e-07, - "loss": 0.7937, - "step": 33363 - }, - { - "epoch": 0.9454504236447618, - "grad_norm": 0.0, - "learning_rate": 1.5566373906388377e-07, - "loss": 0.7849, - "step": 33364 - }, - { - "epoch": 0.9454787610870243, - "grad_norm": 0.0, - "learning_rate": 1.5550247286529874e-07, - "loss": 0.7445, - "step": 33365 - }, - { - "epoch": 0.9455070985292867, - "grad_norm": 0.0, - "learning_rate": 1.553412895905726e-07, - "loss": 0.7451, - "step": 33366 - }, - { - "epoch": 0.9455354359715492, - "grad_norm": 0.0, - "learning_rate": 1.5518018924106316e-07, - "loss": 0.8317, - "step": 33367 - }, - { - "epoch": 0.9455637734138117, - "grad_norm": 0.0, - "learning_rate": 1.5501917181812597e-07, - "loss": 0.8538, - "step": 33368 - }, - { - "epoch": 0.9455921108560741, - "grad_norm": 0.0, - "learning_rate": 1.5485823732311777e-07, - "loss": 0.7126, - "step": 33369 - }, - { - "epoch": 0.9456204482983366, - "grad_norm": 0.0, - "learning_rate": 1.5469738575739412e-07, - "loss": 0.7461, - "step": 33370 - }, - { - "epoch": 0.9456487857405991, - "grad_norm": 0.0, - "learning_rate": 1.545366171223117e-07, - "loss": 0.8208, - "step": 33371 - }, - { - "epoch": 0.9456771231828616, - "grad_norm": 0.0, - "learning_rate": 1.543759314192228e-07, - "loss": 0.8288, - "step": 33372 - }, - { - "epoch": 0.945705460625124, - "grad_norm": 0.0, - "learning_rate": 1.5421532864948184e-07, - "loss": 0.8109, - "step": 33373 - }, - { - "epoch": 0.9457337980673864, - "grad_norm": 0.0, - "learning_rate": 1.5405480881444002e-07, - "loss": 0.7955, - "step": 33374 - }, - { - "epoch": 0.9457621355096489, - "grad_norm": 0.0, - "learning_rate": 1.5389437191545286e-07, - "loss": 0.7843, - "step": 33375 - }, - { - "epoch": 0.9457904729519113, - "grad_norm": 0.0, - "learning_rate": 1.537340179538682e-07, - "loss": 0.7511, - "step": 33376 - }, - { - "epoch": 0.9458188103941738, - "grad_norm": 0.0, - "learning_rate": 1.5357374693103943e-07, - "loss": 0.6902, - "step": 33377 - }, - { - "epoch": 0.9458471478364363, - "grad_norm": 0.0, - "learning_rate": 1.5341355884831433e-07, - "loss": 0.8535, - "step": 33378 - }, - { - "epoch": 0.9458754852786987, - "grad_norm": 0.0, - "learning_rate": 1.5325345370704292e-07, - "loss": 0.8467, - "step": 33379 - }, - { - "epoch": 0.9459038227209612, - "grad_norm": 0.0, - "learning_rate": 1.5309343150857415e-07, - "loss": 0.8511, - "step": 33380 - }, - { - "epoch": 0.9459321601632237, - "grad_norm": 0.0, - "learning_rate": 1.529334922542558e-07, - "loss": 0.8293, - "step": 33381 - }, - { - "epoch": 0.9459604976054862, - "grad_norm": 0.0, - "learning_rate": 1.5277363594543572e-07, - "loss": 0.7389, - "step": 33382 - }, - { - "epoch": 0.9459888350477486, - "grad_norm": 0.0, - "learning_rate": 1.5261386258346167e-07, - "loss": 0.8034, - "step": 33383 - }, - { - "epoch": 0.946017172490011, - "grad_norm": 0.0, - "learning_rate": 1.5245417216967596e-07, - "loss": 0.868, - "step": 33384 - }, - { - "epoch": 0.9460455099322735, - "grad_norm": 0.0, - "learning_rate": 1.5229456470542636e-07, - "loss": 0.8245, - "step": 33385 - }, - { - "epoch": 0.9460738473745359, - "grad_norm": 0.0, - "learning_rate": 1.5213504019205627e-07, - "loss": 0.714, - "step": 33386 - }, - { - "epoch": 0.9461021848167984, - "grad_norm": 0.0, - "learning_rate": 1.5197559863090906e-07, - "loss": 0.7948, - "step": 33387 - }, - { - "epoch": 0.9461305222590609, - "grad_norm": 0.0, - "learning_rate": 1.5181624002332918e-07, - "loss": 0.8327, - "step": 33388 - }, - { - "epoch": 0.9461588597013234, - "grad_norm": 0.0, - "learning_rate": 1.516569643706578e-07, - "loss": 0.8148, - "step": 33389 - }, - { - "epoch": 0.9461871971435858, - "grad_norm": 0.0, - "learning_rate": 1.5149777167423607e-07, - "loss": 0.7668, - "step": 33390 - }, - { - "epoch": 0.9462155345858483, - "grad_norm": 0.0, - "learning_rate": 1.5133866193540735e-07, - "loss": 0.8579, - "step": 33391 - }, - { - "epoch": 0.9462438720281108, - "grad_norm": 0.0, - "learning_rate": 1.5117963515550837e-07, - "loss": 0.8195, - "step": 33392 - }, - { - "epoch": 0.9462722094703732, - "grad_norm": 0.0, - "learning_rate": 1.5102069133588247e-07, - "loss": 0.729, - "step": 33393 - }, - { - "epoch": 0.9463005469126357, - "grad_norm": 0.0, - "learning_rate": 1.508618304778653e-07, - "loss": 0.7572, - "step": 33394 - }, - { - "epoch": 0.9463288843548981, - "grad_norm": 0.0, - "learning_rate": 1.5070305258279684e-07, - "loss": 0.6711, - "step": 33395 - }, - { - "epoch": 0.9463572217971606, - "grad_norm": 0.0, - "learning_rate": 1.505443576520138e-07, - "loss": 0.7697, - "step": 33396 - }, - { - "epoch": 0.946385559239423, - "grad_norm": 0.0, - "learning_rate": 1.5038574568685294e-07, - "loss": 0.7706, - "step": 33397 - }, - { - "epoch": 0.9464138966816855, - "grad_norm": 0.0, - "learning_rate": 1.5022721668865092e-07, - "loss": 0.8028, - "step": 33398 - }, - { - "epoch": 0.946442234123948, - "grad_norm": 0.0, - "learning_rate": 1.5006877065874338e-07, - "loss": 0.719, - "step": 33399 - }, - { - "epoch": 0.9464705715662104, - "grad_norm": 0.0, - "learning_rate": 1.4991040759846366e-07, - "loss": 0.8688, - "step": 33400 - }, - { - "epoch": 0.9464989090084729, - "grad_norm": 0.0, - "learning_rate": 1.4975212750914625e-07, - "loss": 0.7401, - "step": 33401 - }, - { - "epoch": 0.9465272464507354, - "grad_norm": 0.0, - "learning_rate": 1.4959393039212455e-07, - "loss": 0.7539, - "step": 33402 - }, - { - "epoch": 0.9465555838929978, - "grad_norm": 0.0, - "learning_rate": 1.4943581624873084e-07, - "loss": 0.7605, - "step": 33403 - }, - { - "epoch": 0.9465839213352603, - "grad_norm": 0.0, - "learning_rate": 1.4927778508029733e-07, - "loss": 0.8068, - "step": 33404 - }, - { - "epoch": 0.9466122587775228, - "grad_norm": 0.0, - "learning_rate": 1.4911983688815522e-07, - "loss": 0.7947, - "step": 33405 - }, - { - "epoch": 0.9466405962197852, - "grad_norm": 0.0, - "learning_rate": 1.4896197167363345e-07, - "loss": 0.8014, - "step": 33406 - }, - { - "epoch": 0.9466689336620476, - "grad_norm": 0.0, - "learning_rate": 1.488041894380643e-07, - "loss": 0.8319, - "step": 33407 - }, - { - "epoch": 0.9466972711043101, - "grad_norm": 0.0, - "learning_rate": 1.4864649018277555e-07, - "loss": 0.8768, - "step": 33408 - }, - { - "epoch": 0.9467256085465726, - "grad_norm": 0.0, - "learning_rate": 1.4848887390909615e-07, - "loss": 0.8291, - "step": 33409 - }, - { - "epoch": 0.946753945988835, - "grad_norm": 0.0, - "learning_rate": 1.4833134061835176e-07, - "loss": 0.8236, - "step": 33410 - }, - { - "epoch": 0.9467822834310975, - "grad_norm": 0.0, - "learning_rate": 1.4817389031187124e-07, - "loss": 0.849, - "step": 33411 - }, - { - "epoch": 0.94681062087336, - "grad_norm": 0.0, - "learning_rate": 1.4801652299098136e-07, - "loss": 0.7146, - "step": 33412 - }, - { - "epoch": 0.9468389583156225, - "grad_norm": 0.0, - "learning_rate": 1.4785923865700658e-07, - "loss": 0.8385, - "step": 33413 - }, - { - "epoch": 0.9468672957578849, - "grad_norm": 0.0, - "learning_rate": 1.477020373112714e-07, - "loss": 0.811, - "step": 33414 - }, - { - "epoch": 0.9468956332001474, - "grad_norm": 0.0, - "learning_rate": 1.4754491895510147e-07, - "loss": 0.8089, - "step": 33415 - }, - { - "epoch": 0.9469239706424099, - "grad_norm": 0.0, - "learning_rate": 1.4738788358981791e-07, - "loss": 0.8083, - "step": 33416 - }, - { - "epoch": 0.9469523080846722, - "grad_norm": 0.0, - "learning_rate": 1.4723093121674635e-07, - "loss": 0.8088, - "step": 33417 - }, - { - "epoch": 0.9469806455269347, - "grad_norm": 0.0, - "learning_rate": 1.4707406183720574e-07, - "loss": 0.7301, - "step": 33418 - }, - { - "epoch": 0.9470089829691972, - "grad_norm": 0.0, - "learning_rate": 1.4691727545251945e-07, - "loss": 0.8964, - "step": 33419 - }, - { - "epoch": 0.9470373204114597, - "grad_norm": 0.0, - "learning_rate": 1.4676057206400862e-07, - "loss": 0.7263, - "step": 33420 - }, - { - "epoch": 0.9470656578537221, - "grad_norm": 0.0, - "learning_rate": 1.4660395167299112e-07, - "loss": 0.8224, - "step": 33421 - }, - { - "epoch": 0.9470939952959846, - "grad_norm": 0.0, - "learning_rate": 1.4644741428078923e-07, - "loss": 0.8926, - "step": 33422 - }, - { - "epoch": 0.9471223327382471, - "grad_norm": 0.0, - "learning_rate": 1.4629095988871854e-07, - "loss": 0.8588, - "step": 33423 - }, - { - "epoch": 0.9471506701805095, - "grad_norm": 0.0, - "learning_rate": 1.4613458849809915e-07, - "loss": 0.7777, - "step": 33424 - }, - { - "epoch": 0.947179007622772, - "grad_norm": 0.0, - "learning_rate": 1.459783001102466e-07, - "loss": 0.7067, - "step": 33425 - }, - { - "epoch": 0.9472073450650345, - "grad_norm": 0.0, - "learning_rate": 1.458220947264788e-07, - "loss": 0.8328, - "step": 33426 - }, - { - "epoch": 0.9472356825072968, - "grad_norm": 0.0, - "learning_rate": 1.4566597234810908e-07, - "loss": 0.7098, - "step": 33427 - }, - { - "epoch": 0.9472640199495593, - "grad_norm": 0.0, - "learning_rate": 1.4550993297645643e-07, - "loss": 0.8134, - "step": 33428 - }, - { - "epoch": 0.9472923573918218, - "grad_norm": 0.0, - "learning_rate": 1.4535397661283092e-07, - "loss": 0.7784, - "step": 33429 - }, - { - "epoch": 0.9473206948340843, - "grad_norm": 0.0, - "learning_rate": 1.4519810325855033e-07, - "loss": 0.7904, - "step": 33430 - }, - { - "epoch": 0.9473490322763467, - "grad_norm": 0.0, - "learning_rate": 1.4504231291492365e-07, - "loss": 0.8207, - "step": 33431 - }, - { - "epoch": 0.9473773697186092, - "grad_norm": 0.0, - "learning_rate": 1.448866055832654e-07, - "loss": 0.7683, - "step": 33432 - }, - { - "epoch": 0.9474057071608717, - "grad_norm": 0.0, - "learning_rate": 1.4473098126488783e-07, - "loss": 0.7452, - "step": 33433 - }, - { - "epoch": 0.9474340446031341, - "grad_norm": 0.0, - "learning_rate": 1.445754399610999e-07, - "loss": 0.8375, - "step": 33434 - }, - { - "epoch": 0.9474623820453966, - "grad_norm": 0.0, - "learning_rate": 1.4441998167321393e-07, - "loss": 0.7801, - "step": 33435 - }, - { - "epoch": 0.9474907194876591, - "grad_norm": 0.0, - "learning_rate": 1.442646064025377e-07, - "loss": 0.7056, - "step": 33436 - }, - { - "epoch": 0.9475190569299216, - "grad_norm": 0.0, - "learning_rate": 1.441093141503802e-07, - "loss": 0.8442, - "step": 33437 - }, - { - "epoch": 0.9475473943721839, - "grad_norm": 0.0, - "learning_rate": 1.4395410491805039e-07, - "loss": 0.7954, - "step": 33438 - }, - { - "epoch": 0.9475757318144464, - "grad_norm": 0.0, - "learning_rate": 1.43798978706855e-07, - "loss": 0.8506, - "step": 33439 - }, - { - "epoch": 0.9476040692567089, - "grad_norm": 0.0, - "learning_rate": 1.4364393551809963e-07, - "loss": 0.7908, - "step": 33440 - }, - { - "epoch": 0.9476324066989713, - "grad_norm": 0.0, - "learning_rate": 1.4348897535309324e-07, - "loss": 0.8359, - "step": 33441 - }, - { - "epoch": 0.9476607441412338, - "grad_norm": 0.0, - "learning_rate": 1.4333409821313815e-07, - "loss": 0.7438, - "step": 33442 - }, - { - "epoch": 0.9476890815834963, - "grad_norm": 0.0, - "learning_rate": 1.4317930409954107e-07, - "loss": 0.8566, - "step": 33443 - }, - { - "epoch": 0.9477174190257588, - "grad_norm": 0.0, - "learning_rate": 1.4302459301360428e-07, - "loss": 0.7958, - "step": 33444 - }, - { - "epoch": 0.9477457564680212, - "grad_norm": 0.0, - "learning_rate": 1.4286996495663119e-07, - "loss": 0.8079, - "step": 33445 - }, - { - "epoch": 0.9477740939102837, - "grad_norm": 0.0, - "learning_rate": 1.427154199299252e-07, - "loss": 0.8057, - "step": 33446 - }, - { - "epoch": 0.9478024313525462, - "grad_norm": 0.0, - "learning_rate": 1.4256095793478752e-07, - "loss": 0.7866, - "step": 33447 - }, - { - "epoch": 0.9478307687948085, - "grad_norm": 0.0, - "learning_rate": 1.424065789725193e-07, - "loss": 0.7564, - "step": 33448 - }, - { - "epoch": 0.947859106237071, - "grad_norm": 0.0, - "learning_rate": 1.4225228304442173e-07, - "loss": 0.7857, - "step": 33449 - }, - { - "epoch": 0.9478874436793335, - "grad_norm": 0.0, - "learning_rate": 1.4209807015179378e-07, - "loss": 0.7299, - "step": 33450 - }, - { - "epoch": 0.9479157811215959, - "grad_norm": 0.0, - "learning_rate": 1.419439402959344e-07, - "loss": 0.7562, - "step": 33451 - }, - { - "epoch": 0.9479441185638584, - "grad_norm": 0.0, - "learning_rate": 1.4178989347814143e-07, - "loss": 0.8105, - "step": 33452 - }, - { - "epoch": 0.9479724560061209, - "grad_norm": 0.0, - "learning_rate": 1.4163592969971273e-07, - "loss": 0.7761, - "step": 33453 - }, - { - "epoch": 0.9480007934483834, - "grad_norm": 0.0, - "learning_rate": 1.4148204896194616e-07, - "loss": 0.7679, - "step": 33454 - }, - { - "epoch": 0.9480291308906458, - "grad_norm": 0.0, - "learning_rate": 1.4132825126613626e-07, - "loss": 0.8155, - "step": 33455 - }, - { - "epoch": 0.9480574683329083, - "grad_norm": 0.0, - "learning_rate": 1.411745366135797e-07, - "loss": 0.8702, - "step": 33456 - }, - { - "epoch": 0.9480858057751708, - "grad_norm": 0.0, - "learning_rate": 1.410209050055711e-07, - "loss": 0.8058, - "step": 33457 - }, - { - "epoch": 0.9481141432174331, - "grad_norm": 0.0, - "learning_rate": 1.4086735644340487e-07, - "loss": 0.8571, - "step": 33458 - }, - { - "epoch": 0.9481424806596956, - "grad_norm": 0.0, - "learning_rate": 1.4071389092837339e-07, - "loss": 0.8465, - "step": 33459 - }, - { - "epoch": 0.9481708181019581, - "grad_norm": 0.0, - "learning_rate": 1.4056050846177004e-07, - "loss": 0.7473, - "step": 33460 - }, - { - "epoch": 0.9481991555442206, - "grad_norm": 0.0, - "learning_rate": 1.4040720904488603e-07, - "loss": 0.8148, - "step": 33461 - }, - { - "epoch": 0.948227492986483, - "grad_norm": 0.0, - "learning_rate": 1.4025399267901473e-07, - "loss": 0.7973, - "step": 33462 - }, - { - "epoch": 0.9482558304287455, - "grad_norm": 0.0, - "learning_rate": 1.4010085936544515e-07, - "loss": 0.7693, - "step": 33463 - }, - { - "epoch": 0.948284167871008, - "grad_norm": 0.0, - "learning_rate": 1.3994780910546735e-07, - "loss": 0.7811, - "step": 33464 - }, - { - "epoch": 0.9483125053132704, - "grad_norm": 0.0, - "learning_rate": 1.397948419003703e-07, - "loss": 0.8053, - "step": 33465 - }, - { - "epoch": 0.9483408427555329, - "grad_norm": 0.0, - "learning_rate": 1.3964195775144295e-07, - "loss": 0.8234, - "step": 33466 - }, - { - "epoch": 0.9483691801977954, - "grad_norm": 0.0, - "learning_rate": 1.394891566599732e-07, - "loss": 0.8955, - "step": 33467 - }, - { - "epoch": 0.9483975176400579, - "grad_norm": 0.0, - "learning_rate": 1.3933643862724777e-07, - "loss": 0.7618, - "step": 33468 - }, - { - "epoch": 0.9484258550823202, - "grad_norm": 0.0, - "learning_rate": 1.3918380365455232e-07, - "loss": 0.7576, - "step": 33469 - }, - { - "epoch": 0.9484541925245827, - "grad_norm": 0.0, - "learning_rate": 1.3903125174317467e-07, - "loss": 0.8648, - "step": 33470 - }, - { - "epoch": 0.9484825299668452, - "grad_norm": 0.0, - "learning_rate": 1.3887878289439827e-07, - "loss": 0.8185, - "step": 33471 - }, - { - "epoch": 0.9485108674091076, - "grad_norm": 0.0, - "learning_rate": 1.3872639710950652e-07, - "loss": 0.8748, - "step": 33472 - }, - { - "epoch": 0.9485392048513701, - "grad_norm": 0.0, - "learning_rate": 1.3857409438978508e-07, - "loss": 0.7886, - "step": 33473 - }, - { - "epoch": 0.9485675422936326, - "grad_norm": 0.0, - "learning_rate": 1.3842187473651626e-07, - "loss": 0.8049, - "step": 33474 - }, - { - "epoch": 0.948595879735895, - "grad_norm": 0.0, - "learning_rate": 1.3826973815098233e-07, - "loss": 0.7791, - "step": 33475 - }, - { - "epoch": 0.9486242171781575, - "grad_norm": 0.0, - "learning_rate": 1.381176846344634e-07, - "loss": 0.7599, - "step": 33476 - }, - { - "epoch": 0.94865255462042, - "grad_norm": 0.0, - "learning_rate": 1.3796571418824177e-07, - "loss": 0.7886, - "step": 33477 - }, - { - "epoch": 0.9486808920626825, - "grad_norm": 0.0, - "learning_rate": 1.3781382681359756e-07, - "loss": 0.821, - "step": 33478 - }, - { - "epoch": 0.9487092295049449, - "grad_norm": 0.0, - "learning_rate": 1.376620225118086e-07, - "loss": 0.8355, - "step": 33479 - }, - { - "epoch": 0.9487375669472073, - "grad_norm": 0.0, - "learning_rate": 1.375103012841561e-07, - "loss": 0.8772, - "step": 33480 - }, - { - "epoch": 0.9487659043894698, - "grad_norm": 0.0, - "learning_rate": 1.373586631319157e-07, - "loss": 0.759, - "step": 33481 - }, - { - "epoch": 0.9487942418317322, - "grad_norm": 0.0, - "learning_rate": 1.3720710805636638e-07, - "loss": 0.787, - "step": 33482 - }, - { - "epoch": 0.9488225792739947, - "grad_norm": 0.0, - "learning_rate": 1.370556360587838e-07, - "loss": 0.8082, - "step": 33483 - }, - { - "epoch": 0.9488509167162572, - "grad_norm": 0.0, - "learning_rate": 1.3690424714044358e-07, - "loss": 0.7694, - "step": 33484 - }, - { - "epoch": 0.9488792541585197, - "grad_norm": 0.0, - "learning_rate": 1.367529413026225e-07, - "loss": 0.7693, - "step": 33485 - }, - { - "epoch": 0.9489075916007821, - "grad_norm": 0.0, - "learning_rate": 1.3660171854659288e-07, - "loss": 0.8324, - "step": 33486 - }, - { - "epoch": 0.9489359290430446, - "grad_norm": 0.0, - "learning_rate": 1.364505788736292e-07, - "loss": 0.8222, - "step": 33487 - }, - { - "epoch": 0.9489642664853071, - "grad_norm": 0.0, - "learning_rate": 1.362995222850072e-07, - "loss": 0.8704, - "step": 33488 - }, - { - "epoch": 0.9489926039275695, - "grad_norm": 0.0, - "learning_rate": 1.3614854878199578e-07, - "loss": 0.7492, - "step": 33489 - }, - { - "epoch": 0.949020941369832, - "grad_norm": 0.0, - "learning_rate": 1.359976583658673e-07, - "loss": 0.8388, - "step": 33490 - }, - { - "epoch": 0.9490492788120944, - "grad_norm": 0.0, - "learning_rate": 1.358468510378952e-07, - "loss": 0.7449, - "step": 33491 - }, - { - "epoch": 0.9490776162543569, - "grad_norm": 0.0, - "learning_rate": 1.356961267993473e-07, - "loss": 0.8048, - "step": 33492 - }, - { - "epoch": 0.9491059536966193, - "grad_norm": 0.0, - "learning_rate": 1.3554548565149372e-07, - "loss": 0.816, - "step": 33493 - }, - { - "epoch": 0.9491342911388818, - "grad_norm": 0.0, - "learning_rate": 1.3539492759560347e-07, - "loss": 0.7609, - "step": 33494 - }, - { - "epoch": 0.9491626285811443, - "grad_norm": 0.0, - "learning_rate": 1.3524445263294438e-07, - "loss": 0.7274, - "step": 33495 - }, - { - "epoch": 0.9491909660234067, - "grad_norm": 0.0, - "learning_rate": 1.350940607647866e-07, - "loss": 0.6915, - "step": 33496 - }, - { - "epoch": 0.9492193034656692, - "grad_norm": 0.0, - "learning_rate": 1.349437519923924e-07, - "loss": 0.8061, - "step": 33497 - }, - { - "epoch": 0.9492476409079317, - "grad_norm": 0.0, - "learning_rate": 1.347935263170308e-07, - "loss": 0.8777, - "step": 33498 - }, - { - "epoch": 0.9492759783501941, - "grad_norm": 0.0, - "learning_rate": 1.3464338373996744e-07, - "loss": 0.8594, - "step": 33499 - }, - { - "epoch": 0.9493043157924566, - "grad_norm": 0.0, - "learning_rate": 1.3449332426246575e-07, - "loss": 0.8125, - "step": 33500 - }, - { - "epoch": 0.949332653234719, - "grad_norm": 0.0, - "learning_rate": 1.3434334788579028e-07, - "loss": 0.6687, - "step": 33501 - }, - { - "epoch": 0.9493609906769815, - "grad_norm": 0.0, - "learning_rate": 1.3419345461120446e-07, - "loss": 0.8055, - "step": 33502 - }, - { - "epoch": 0.9493893281192439, - "grad_norm": 0.0, - "learning_rate": 1.3404364443997066e-07, - "loss": 0.8588, - "step": 33503 - }, - { - "epoch": 0.9494176655615064, - "grad_norm": 0.0, - "learning_rate": 1.3389391737335112e-07, - "loss": 0.8151, - "step": 33504 - }, - { - "epoch": 0.9494460030037689, - "grad_norm": 0.0, - "learning_rate": 1.337442734126071e-07, - "loss": 0.7426, - "step": 33505 - }, - { - "epoch": 0.9494743404460313, - "grad_norm": 0.0, - "learning_rate": 1.3359471255899758e-07, - "loss": 0.7895, - "step": 33506 - }, - { - "epoch": 0.9495026778882938, - "grad_norm": 0.0, - "learning_rate": 1.334452348137849e-07, - "loss": 0.7562, - "step": 33507 - }, - { - "epoch": 0.9495310153305563, - "grad_norm": 0.0, - "learning_rate": 1.3329584017822582e-07, - "loss": 0.721, - "step": 33508 - }, - { - "epoch": 0.9495593527728188, - "grad_norm": 0.0, - "learning_rate": 1.3314652865358158e-07, - "loss": 0.7953, - "step": 33509 - }, - { - "epoch": 0.9495876902150812, - "grad_norm": 0.0, - "learning_rate": 1.3299730024110559e-07, - "loss": 0.7369, - "step": 33510 - }, - { - "epoch": 0.9496160276573437, - "grad_norm": 0.0, - "learning_rate": 1.3284815494205906e-07, - "loss": 0.7799, - "step": 33511 - }, - { - "epoch": 0.9496443650996061, - "grad_norm": 0.0, - "learning_rate": 1.3269909275769543e-07, - "loss": 0.9166, - "step": 33512 - }, - { - "epoch": 0.9496727025418685, - "grad_norm": 0.0, - "learning_rate": 1.3255011368927263e-07, - "loss": 0.8786, - "step": 33513 - }, - { - "epoch": 0.949701039984131, - "grad_norm": 0.0, - "learning_rate": 1.3240121773804404e-07, - "loss": 0.7479, - "step": 33514 - }, - { - "epoch": 0.9497293774263935, - "grad_norm": 0.0, - "learning_rate": 1.3225240490526426e-07, - "loss": 0.8874, - "step": 33515 - }, - { - "epoch": 0.9497577148686559, - "grad_norm": 0.0, - "learning_rate": 1.321036751921856e-07, - "loss": 0.8042, - "step": 33516 - }, - { - "epoch": 0.9497860523109184, - "grad_norm": 0.0, - "learning_rate": 1.319550286000637e-07, - "loss": 0.8054, - "step": 33517 - }, - { - "epoch": 0.9498143897531809, - "grad_norm": 0.0, - "learning_rate": 1.3180646513014873e-07, - "loss": 0.7542, - "step": 33518 - }, - { - "epoch": 0.9498427271954434, - "grad_norm": 0.0, - "learning_rate": 1.3165798478369184e-07, - "loss": 0.7921, - "step": 33519 - }, - { - "epoch": 0.9498710646377058, - "grad_norm": 0.0, - "learning_rate": 1.3150958756194432e-07, - "loss": 0.8479, - "step": 33520 - }, - { - "epoch": 0.9498994020799683, - "grad_norm": 0.0, - "learning_rate": 1.3136127346615624e-07, - "loss": 0.74, - "step": 33521 - }, - { - "epoch": 0.9499277395222308, - "grad_norm": 0.0, - "learning_rate": 1.3121304249757772e-07, - "loss": 0.8339, - "step": 33522 - }, - { - "epoch": 0.9499560769644931, - "grad_norm": 0.0, - "learning_rate": 1.3106489465745443e-07, - "loss": 0.8328, - "step": 33523 - }, - { - "epoch": 0.9499844144067556, - "grad_norm": 0.0, - "learning_rate": 1.3091682994703757e-07, - "loss": 0.8039, - "step": 33524 - }, - { - "epoch": 0.9500127518490181, - "grad_norm": 0.0, - "learning_rate": 1.3076884836757286e-07, - "loss": 0.7737, - "step": 33525 - }, - { - "epoch": 0.9500410892912806, - "grad_norm": 0.0, - "learning_rate": 1.3062094992030595e-07, - "loss": 0.8171, - "step": 33526 - }, - { - "epoch": 0.950069426733543, - "grad_norm": 0.0, - "learning_rate": 1.3047313460648469e-07, - "loss": 0.9163, - "step": 33527 - }, - { - "epoch": 0.9500977641758055, - "grad_norm": 0.0, - "learning_rate": 1.303254024273537e-07, - "loss": 0.7989, - "step": 33528 - }, - { - "epoch": 0.950126101618068, - "grad_norm": 0.0, - "learning_rate": 1.3017775338415638e-07, - "loss": 0.7001, - "step": 33529 - }, - { - "epoch": 0.9501544390603304, - "grad_norm": 0.0, - "learning_rate": 1.3003018747813734e-07, - "loss": 0.7096, - "step": 33530 - }, - { - "epoch": 0.9501827765025929, - "grad_norm": 0.0, - "learning_rate": 1.2988270471053775e-07, - "loss": 0.7581, - "step": 33531 - }, - { - "epoch": 0.9502111139448554, - "grad_norm": 0.0, - "learning_rate": 1.2973530508260224e-07, - "loss": 0.8217, - "step": 33532 - }, - { - "epoch": 0.9502394513871179, - "grad_norm": 0.0, - "learning_rate": 1.29587988595572e-07, - "loss": 0.7587, - "step": 33533 - }, - { - "epoch": 0.9502677888293802, - "grad_norm": 0.0, - "learning_rate": 1.2944075525068712e-07, - "loss": 0.891, - "step": 33534 - }, - { - "epoch": 0.9502961262716427, - "grad_norm": 0.0, - "learning_rate": 1.2929360504918775e-07, - "loss": 0.7962, - "step": 33535 - }, - { - "epoch": 0.9503244637139052, - "grad_norm": 0.0, - "learning_rate": 1.2914653799231403e-07, - "loss": 0.6598, - "step": 33536 - }, - { - "epoch": 0.9503528011561676, - "grad_norm": 0.0, - "learning_rate": 1.2899955408130383e-07, - "loss": 0.7578, - "step": 33537 - }, - { - "epoch": 0.9503811385984301, - "grad_norm": 0.0, - "learning_rate": 1.2885265331739617e-07, - "loss": 0.8106, - "step": 33538 - }, - { - "epoch": 0.9504094760406926, - "grad_norm": 0.0, - "learning_rate": 1.287058357018278e-07, - "loss": 0.7477, - "step": 33539 - }, - { - "epoch": 0.950437813482955, - "grad_norm": 0.0, - "learning_rate": 1.285591012358367e-07, - "loss": 0.7882, - "step": 33540 - }, - { - "epoch": 0.9504661509252175, - "grad_norm": 0.0, - "learning_rate": 1.2841244992065738e-07, - "loss": 0.8159, - "step": 33541 - }, - { - "epoch": 0.95049448836748, - "grad_norm": 0.0, - "learning_rate": 1.2826588175752664e-07, - "loss": 0.751, - "step": 33542 - }, - { - "epoch": 0.9505228258097425, - "grad_norm": 0.0, - "learning_rate": 1.2811939674767793e-07, - "loss": 0.6882, - "step": 33543 - }, - { - "epoch": 0.9505511632520048, - "grad_norm": 0.0, - "learning_rate": 1.2797299489234472e-07, - "loss": 0.778, - "step": 33544 - }, - { - "epoch": 0.9505795006942673, - "grad_norm": 0.0, - "learning_rate": 1.2782667619276047e-07, - "loss": 0.7994, - "step": 33545 - }, - { - "epoch": 0.9506078381365298, - "grad_norm": 0.0, - "learning_rate": 1.276804406501586e-07, - "loss": 0.7438, - "step": 33546 - }, - { - "epoch": 0.9506361755787922, - "grad_norm": 0.0, - "learning_rate": 1.275342882657704e-07, - "loss": 0.7191, - "step": 33547 - }, - { - "epoch": 0.9506645130210547, - "grad_norm": 0.0, - "learning_rate": 1.273882190408271e-07, - "loss": 0.8924, - "step": 33548 - }, - { - "epoch": 0.9506928504633172, - "grad_norm": 0.0, - "learning_rate": 1.272422329765588e-07, - "loss": 0.7398, - "step": 33549 - }, - { - "epoch": 0.9507211879055797, - "grad_norm": 0.0, - "learning_rate": 1.2709633007419563e-07, - "loss": 0.741, - "step": 33550 - }, - { - "epoch": 0.9507495253478421, - "grad_norm": 0.0, - "learning_rate": 1.2695051033496554e-07, - "loss": 0.7559, - "step": 33551 - }, - { - "epoch": 0.9507778627901046, - "grad_norm": 0.0, - "learning_rate": 1.2680477376009748e-07, - "loss": 0.779, - "step": 33552 - }, - { - "epoch": 0.9508062002323671, - "grad_norm": 0.0, - "learning_rate": 1.266591203508194e-07, - "loss": 0.7943, - "step": 33553 - }, - { - "epoch": 0.9508345376746294, - "grad_norm": 0.0, - "learning_rate": 1.265135501083592e-07, - "loss": 0.785, - "step": 33554 - }, - { - "epoch": 0.9508628751168919, - "grad_norm": 0.0, - "learning_rate": 1.2636806303394035e-07, - "loss": 0.7482, - "step": 33555 - }, - { - "epoch": 0.9508912125591544, - "grad_norm": 0.0, - "learning_rate": 1.2622265912878962e-07, - "loss": 0.7846, - "step": 33556 - }, - { - "epoch": 0.9509195500014169, - "grad_norm": 0.0, - "learning_rate": 1.2607733839413383e-07, - "loss": 0.8264, - "step": 33557 - }, - { - "epoch": 0.9509478874436793, - "grad_norm": 0.0, - "learning_rate": 1.2593210083119312e-07, - "loss": 0.7549, - "step": 33558 - }, - { - "epoch": 0.9509762248859418, - "grad_norm": 0.0, - "learning_rate": 1.2578694644119427e-07, - "loss": 0.756, - "step": 33559 - }, - { - "epoch": 0.9510045623282043, - "grad_norm": 0.0, - "learning_rate": 1.2564187522535855e-07, - "loss": 0.861, - "step": 33560 - }, - { - "epoch": 0.9510328997704667, - "grad_norm": 0.0, - "learning_rate": 1.2549688718490715e-07, - "loss": 0.8161, - "step": 33561 - }, - { - "epoch": 0.9510612372127292, - "grad_norm": 0.0, - "learning_rate": 1.2535198232106361e-07, - "loss": 0.7017, - "step": 33562 - }, - { - "epoch": 0.9510895746549917, - "grad_norm": 0.0, - "learning_rate": 1.252071606350458e-07, - "loss": 0.8296, - "step": 33563 - }, - { - "epoch": 0.951117912097254, - "grad_norm": 0.0, - "learning_rate": 1.2506242212807607e-07, - "loss": 0.6635, - "step": 33564 - }, - { - "epoch": 0.9511462495395165, - "grad_norm": 0.0, - "learning_rate": 1.2491776680137123e-07, - "loss": 0.7457, - "step": 33565 - }, - { - "epoch": 0.951174586981779, - "grad_norm": 0.0, - "learning_rate": 1.2477319465615144e-07, - "loss": 0.9013, - "step": 33566 - }, - { - "epoch": 0.9512029244240415, - "grad_norm": 0.0, - "learning_rate": 1.2462870569363572e-07, - "loss": 0.7305, - "step": 33567 - }, - { - "epoch": 0.9512312618663039, - "grad_norm": 0.0, - "learning_rate": 1.244842999150375e-07, - "loss": 0.7877, - "step": 33568 - }, - { - "epoch": 0.9512595993085664, - "grad_norm": 0.0, - "learning_rate": 1.2433997732157588e-07, - "loss": 0.7785, - "step": 33569 - }, - { - "epoch": 0.9512879367508289, - "grad_norm": 0.0, - "learning_rate": 1.241957379144665e-07, - "loss": 0.8136, - "step": 33570 - }, - { - "epoch": 0.9513162741930913, - "grad_norm": 0.0, - "learning_rate": 1.2405158169492393e-07, - "loss": 0.8122, - "step": 33571 - }, - { - "epoch": 0.9513446116353538, - "grad_norm": 0.0, - "learning_rate": 1.2390750866416167e-07, - "loss": 0.7573, - "step": 33572 - }, - { - "epoch": 0.9513729490776163, - "grad_norm": 0.0, - "learning_rate": 1.237635188233932e-07, - "loss": 0.8092, - "step": 33573 - }, - { - "epoch": 0.9514012865198788, - "grad_norm": 0.0, - "learning_rate": 1.2361961217383312e-07, - "loss": 0.826, - "step": 33574 - }, - { - "epoch": 0.9514296239621411, - "grad_norm": 0.0, - "learning_rate": 1.2347578871669264e-07, - "loss": 0.7209, - "step": 33575 - }, - { - "epoch": 0.9514579614044036, - "grad_norm": 0.0, - "learning_rate": 1.2333204845318192e-07, - "loss": 0.8446, - "step": 33576 - }, - { - "epoch": 0.9514862988466661, - "grad_norm": 0.0, - "learning_rate": 1.2318839138451333e-07, - "loss": 0.9539, - "step": 33577 - }, - { - "epoch": 0.9515146362889285, - "grad_norm": 0.0, - "learning_rate": 1.23044817511897e-07, - "loss": 0.7879, - "step": 33578 - }, - { - "epoch": 0.951542973731191, - "grad_norm": 0.0, - "learning_rate": 1.2290132683654087e-07, - "loss": 0.7682, - "step": 33579 - }, - { - "epoch": 0.9515713111734535, - "grad_norm": 0.0, - "learning_rate": 1.227579193596562e-07, - "loss": 0.7294, - "step": 33580 - }, - { - "epoch": 0.951599648615716, - "grad_norm": 0.0, - "learning_rate": 1.2261459508244865e-07, - "loss": 0.8842, - "step": 33581 - }, - { - "epoch": 0.9516279860579784, - "grad_norm": 0.0, - "learning_rate": 1.224713540061262e-07, - "loss": 0.7233, - "step": 33582 - }, - { - "epoch": 0.9516563235002409, - "grad_norm": 0.0, - "learning_rate": 1.223281961318956e-07, - "loss": 0.7962, - "step": 33583 - }, - { - "epoch": 0.9516846609425034, - "grad_norm": 0.0, - "learning_rate": 1.2218512146096263e-07, - "loss": 0.9001, - "step": 33584 - }, - { - "epoch": 0.9517129983847658, - "grad_norm": 0.0, - "learning_rate": 1.220421299945318e-07, - "loss": 0.8102, - "step": 33585 - }, - { - "epoch": 0.9517413358270282, - "grad_norm": 0.0, - "learning_rate": 1.2189922173380998e-07, - "loss": 0.8825, - "step": 33586 - }, - { - "epoch": 0.9517696732692907, - "grad_norm": 0.0, - "learning_rate": 1.2175639667999728e-07, - "loss": 0.7971, - "step": 33587 - }, - { - "epoch": 0.9517980107115531, - "grad_norm": 0.0, - "learning_rate": 1.2161365483429943e-07, - "loss": 0.87, - "step": 33588 - }, - { - "epoch": 0.9518263481538156, - "grad_norm": 0.0, - "learning_rate": 1.214709961979177e-07, - "loss": 0.7646, - "step": 33589 - }, - { - "epoch": 0.9518546855960781, - "grad_norm": 0.0, - "learning_rate": 1.213284207720544e-07, - "loss": 0.7049, - "step": 33590 - }, - { - "epoch": 0.9518830230383406, - "grad_norm": 0.0, - "learning_rate": 1.2118592855790978e-07, - "loss": 0.7435, - "step": 33591 - }, - { - "epoch": 0.951911360480603, - "grad_norm": 0.0, - "learning_rate": 1.2104351955668502e-07, - "loss": 0.7307, - "step": 33592 - }, - { - "epoch": 0.9519396979228655, - "grad_norm": 0.0, - "learning_rate": 1.209011937695781e-07, - "loss": 0.833, - "step": 33593 - }, - { - "epoch": 0.951968035365128, - "grad_norm": 0.0, - "learning_rate": 1.2075895119779025e-07, - "loss": 0.8612, - "step": 33594 - }, - { - "epoch": 0.9519963728073904, - "grad_norm": 0.0, - "learning_rate": 1.2061679184251719e-07, - "loss": 0.8851, - "step": 33595 - }, - { - "epoch": 0.9520247102496529, - "grad_norm": 0.0, - "learning_rate": 1.2047471570495905e-07, - "loss": 0.6314, - "step": 33596 - }, - { - "epoch": 0.9520530476919153, - "grad_norm": 0.0, - "learning_rate": 1.2033272278630936e-07, - "loss": 0.8823, - "step": 33597 - }, - { - "epoch": 0.9520813851341778, - "grad_norm": 0.0, - "learning_rate": 1.2019081308776715e-07, - "loss": 0.7071, - "step": 33598 - }, - { - "epoch": 0.9521097225764402, - "grad_norm": 0.0, - "learning_rate": 1.2004898661052588e-07, - "loss": 0.7371, - "step": 33599 - }, - { - "epoch": 0.9521380600187027, - "grad_norm": 0.0, - "learning_rate": 1.199072433557813e-07, - "loss": 0.8055, - "step": 33600 - }, - { - "epoch": 0.9521663974609652, - "grad_norm": 0.0, - "learning_rate": 1.1976558332472576e-07, - "loss": 0.7045, - "step": 33601 - }, - { - "epoch": 0.9521947349032276, - "grad_norm": 0.0, - "learning_rate": 1.1962400651855387e-07, - "loss": 0.8152, - "step": 33602 - }, - { - "epoch": 0.9522230723454901, - "grad_norm": 0.0, - "learning_rate": 1.1948251293845913e-07, - "loss": 0.8184, - "step": 33603 - }, - { - "epoch": 0.9522514097877526, - "grad_norm": 0.0, - "learning_rate": 1.193411025856317e-07, - "loss": 0.7764, - "step": 33604 - }, - { - "epoch": 0.9522797472300151, - "grad_norm": 0.0, - "learning_rate": 1.1919977546126283e-07, - "loss": 0.8535, - "step": 33605 - }, - { - "epoch": 0.9523080846722775, - "grad_norm": 0.0, - "learning_rate": 1.1905853156654378e-07, - "loss": 0.7661, - "step": 33606 - }, - { - "epoch": 0.95233642211454, - "grad_norm": 0.0, - "learning_rate": 1.1891737090266365e-07, - "loss": 0.9195, - "step": 33607 - }, - { - "epoch": 0.9523647595568024, - "grad_norm": 0.0, - "learning_rate": 1.1877629347081254e-07, - "loss": 0.7453, - "step": 33608 - }, - { - "epoch": 0.9523930969990648, - "grad_norm": 0.0, - "learning_rate": 1.1863529927217731e-07, - "loss": 0.7238, - "step": 33609 - }, - { - "epoch": 0.9524214344413273, - "grad_norm": 0.0, - "learning_rate": 1.184943883079459e-07, - "loss": 0.7685, - "step": 33610 - }, - { - "epoch": 0.9524497718835898, - "grad_norm": 0.0, - "learning_rate": 1.1835356057930625e-07, - "loss": 0.8752, - "step": 33611 - }, - { - "epoch": 0.9524781093258522, - "grad_norm": 0.0, - "learning_rate": 1.1821281608744406e-07, - "loss": 0.7421, - "step": 33612 - }, - { - "epoch": 0.9525064467681147, - "grad_norm": 0.0, - "learning_rate": 1.1807215483354506e-07, - "loss": 0.7768, - "step": 33613 - }, - { - "epoch": 0.9525347842103772, - "grad_norm": 0.0, - "learning_rate": 1.1793157681879275e-07, - "loss": 0.8147, - "step": 33614 - }, - { - "epoch": 0.9525631216526397, - "grad_norm": 0.0, - "learning_rate": 1.1779108204437285e-07, - "loss": 0.7811, - "step": 33615 - }, - { - "epoch": 0.9525914590949021, - "grad_norm": 0.0, - "learning_rate": 1.1765067051146883e-07, - "loss": 0.8709, - "step": 33616 - }, - { - "epoch": 0.9526197965371646, - "grad_norm": 0.0, - "learning_rate": 1.1751034222126312e-07, - "loss": 0.798, - "step": 33617 - }, - { - "epoch": 0.952648133979427, - "grad_norm": 0.0, - "learning_rate": 1.1737009717493697e-07, - "loss": 0.7753, - "step": 33618 - }, - { - "epoch": 0.9526764714216894, - "grad_norm": 0.0, - "learning_rate": 1.1722993537367278e-07, - "loss": 0.7692, - "step": 33619 - }, - { - "epoch": 0.9527048088639519, - "grad_norm": 0.0, - "learning_rate": 1.170898568186507e-07, - "loss": 0.8381, - "step": 33620 - }, - { - "epoch": 0.9527331463062144, - "grad_norm": 0.0, - "learning_rate": 1.1694986151104981e-07, - "loss": 0.7284, - "step": 33621 - }, - { - "epoch": 0.9527614837484769, - "grad_norm": 0.0, - "learning_rate": 1.1680994945205137e-07, - "loss": 0.8187, - "step": 33622 - }, - { - "epoch": 0.9527898211907393, - "grad_norm": 0.0, - "learning_rate": 1.1667012064283223e-07, - "loss": 0.7469, - "step": 33623 - }, - { - "epoch": 0.9528181586330018, - "grad_norm": 0.0, - "learning_rate": 1.1653037508457032e-07, - "loss": 0.7676, - "step": 33624 - }, - { - "epoch": 0.9528464960752643, - "grad_norm": 0.0, - "learning_rate": 1.1639071277844472e-07, - "loss": 0.9022, - "step": 33625 - }, - { - "epoch": 0.9528748335175267, - "grad_norm": 0.0, - "learning_rate": 1.1625113372562891e-07, - "loss": 0.7297, - "step": 33626 - }, - { - "epoch": 0.9529031709597892, - "grad_norm": 0.0, - "learning_rate": 1.1611163792729974e-07, - "loss": 0.7204, - "step": 33627 - }, - { - "epoch": 0.9529315084020517, - "grad_norm": 0.0, - "learning_rate": 1.1597222538463293e-07, - "loss": 0.719, - "step": 33628 - }, - { - "epoch": 0.9529598458443141, - "grad_norm": 0.0, - "learning_rate": 1.1583289609880311e-07, - "loss": 0.782, - "step": 33629 - }, - { - "epoch": 0.9529881832865765, - "grad_norm": 0.0, - "learning_rate": 1.1569365007098265e-07, - "loss": 0.7742, - "step": 33630 - }, - { - "epoch": 0.953016520728839, - "grad_norm": 0.0, - "learning_rate": 1.1555448730234509e-07, - "loss": 0.8166, - "step": 33631 - }, - { - "epoch": 0.9530448581711015, - "grad_norm": 0.0, - "learning_rate": 1.154154077940628e-07, - "loss": 0.7527, - "step": 33632 - }, - { - "epoch": 0.9530731956133639, - "grad_norm": 0.0, - "learning_rate": 1.1527641154730707e-07, - "loss": 0.7554, - "step": 33633 - }, - { - "epoch": 0.9531015330556264, - "grad_norm": 0.0, - "learning_rate": 1.1513749856324807e-07, - "loss": 0.7451, - "step": 33634 - }, - { - "epoch": 0.9531298704978889, - "grad_norm": 0.0, - "learning_rate": 1.149986688430571e-07, - "loss": 0.8812, - "step": 33635 - }, - { - "epoch": 0.9531582079401513, - "grad_norm": 0.0, - "learning_rate": 1.1485992238790322e-07, - "loss": 0.7935, - "step": 33636 - }, - { - "epoch": 0.9531865453824138, - "grad_norm": 0.0, - "learning_rate": 1.1472125919895438e-07, - "loss": 0.779, - "step": 33637 - }, - { - "epoch": 0.9532148828246763, - "grad_norm": 0.0, - "learning_rate": 1.1458267927738077e-07, - "loss": 0.7516, - "step": 33638 - }, - { - "epoch": 0.9532432202669388, - "grad_norm": 0.0, - "learning_rate": 1.1444418262434587e-07, - "loss": 0.8848, - "step": 33639 - }, - { - "epoch": 0.9532715577092011, - "grad_norm": 0.0, - "learning_rate": 1.1430576924101988e-07, - "loss": 0.8255, - "step": 33640 - }, - { - "epoch": 0.9532998951514636, - "grad_norm": 0.0, - "learning_rate": 1.1416743912856631e-07, - "loss": 0.835, - "step": 33641 - }, - { - "epoch": 0.9533282325937261, - "grad_norm": 0.0, - "learning_rate": 1.1402919228815201e-07, - "loss": 0.7805, - "step": 33642 - }, - { - "epoch": 0.9533565700359885, - "grad_norm": 0.0, - "learning_rate": 1.138910287209416e-07, - "loss": 0.764, - "step": 33643 - }, - { - "epoch": 0.953384907478251, - "grad_norm": 0.0, - "learning_rate": 1.1375294842809748e-07, - "loss": 0.7609, - "step": 33644 - }, - { - "epoch": 0.9534132449205135, - "grad_norm": 0.0, - "learning_rate": 1.1361495141078316e-07, - "loss": 0.8125, - "step": 33645 - }, - { - "epoch": 0.953441582362776, - "grad_norm": 0.0, - "learning_rate": 1.1347703767016216e-07, - "loss": 0.8113, - "step": 33646 - }, - { - "epoch": 0.9534699198050384, - "grad_norm": 0.0, - "learning_rate": 1.1333920720739466e-07, - "loss": 0.7354, - "step": 33647 - }, - { - "epoch": 0.9534982572473009, - "grad_norm": 0.0, - "learning_rate": 1.1320146002364307e-07, - "loss": 0.8283, - "step": 33648 - }, - { - "epoch": 0.9535265946895634, - "grad_norm": 0.0, - "learning_rate": 1.1306379612006646e-07, - "loss": 0.8473, - "step": 33649 - }, - { - "epoch": 0.9535549321318257, - "grad_norm": 0.0, - "learning_rate": 1.1292621549782501e-07, - "loss": 0.8989, - "step": 33650 - }, - { - "epoch": 0.9535832695740882, - "grad_norm": 0.0, - "learning_rate": 1.1278871815807668e-07, - "loss": 0.86, - "step": 33651 - }, - { - "epoch": 0.9536116070163507, - "grad_norm": 0.0, - "learning_rate": 1.1265130410198167e-07, - "loss": 0.7769, - "step": 33652 - }, - { - "epoch": 0.9536399444586132, - "grad_norm": 0.0, - "learning_rate": 1.125139733306957e-07, - "loss": 0.774, - "step": 33653 - }, - { - "epoch": 0.9536682819008756, - "grad_norm": 0.0, - "learning_rate": 1.1237672584537673e-07, - "loss": 0.7509, - "step": 33654 - }, - { - "epoch": 0.9536966193431381, - "grad_norm": 0.0, - "learning_rate": 1.1223956164717941e-07, - "loss": 0.7271, - "step": 33655 - }, - { - "epoch": 0.9537249567854006, - "grad_norm": 0.0, - "learning_rate": 1.121024807372606e-07, - "loss": 0.7688, - "step": 33656 - }, - { - "epoch": 0.953753294227663, - "grad_norm": 0.0, - "learning_rate": 1.119654831167749e-07, - "loss": 0.7001, - "step": 33657 - }, - { - "epoch": 0.9537816316699255, - "grad_norm": 0.0, - "learning_rate": 1.1182856878687476e-07, - "loss": 0.7683, - "step": 33658 - }, - { - "epoch": 0.953809969112188, - "grad_norm": 0.0, - "learning_rate": 1.1169173774871478e-07, - "loss": 0.7309, - "step": 33659 - }, - { - "epoch": 0.9538383065544503, - "grad_norm": 0.0, - "learning_rate": 1.1155499000344738e-07, - "loss": 0.7812, - "step": 33660 - }, - { - "epoch": 0.9538666439967128, - "grad_norm": 0.0, - "learning_rate": 1.1141832555222387e-07, - "loss": 0.8294, - "step": 33661 - }, - { - "epoch": 0.9538949814389753, - "grad_norm": 0.0, - "learning_rate": 1.1128174439619666e-07, - "loss": 0.7067, - "step": 33662 - }, - { - "epoch": 0.9539233188812378, - "grad_norm": 0.0, - "learning_rate": 1.1114524653651481e-07, - "loss": 0.7076, - "step": 33663 - }, - { - "epoch": 0.9539516563235002, - "grad_norm": 0.0, - "learning_rate": 1.1100883197432855e-07, - "loss": 0.686, - "step": 33664 - }, - { - "epoch": 0.9539799937657627, - "grad_norm": 0.0, - "learning_rate": 1.1087250071078803e-07, - "loss": 0.7628, - "step": 33665 - }, - { - "epoch": 0.9540083312080252, - "grad_norm": 0.0, - "learning_rate": 1.1073625274703903e-07, - "loss": 0.8353, - "step": 33666 - }, - { - "epoch": 0.9540366686502876, - "grad_norm": 0.0, - "learning_rate": 1.1060008808423172e-07, - "loss": 0.7039, - "step": 33667 - }, - { - "epoch": 0.9540650060925501, - "grad_norm": 0.0, - "learning_rate": 1.1046400672351188e-07, - "loss": 0.807, - "step": 33668 - }, - { - "epoch": 0.9540933435348126, - "grad_norm": 0.0, - "learning_rate": 1.1032800866602633e-07, - "loss": 0.7188, - "step": 33669 - }, - { - "epoch": 0.9541216809770751, - "grad_norm": 0.0, - "learning_rate": 1.1019209391292085e-07, - "loss": 0.7743, - "step": 33670 - }, - { - "epoch": 0.9541500184193374, - "grad_norm": 0.0, - "learning_rate": 1.1005626246534006e-07, - "loss": 0.8174, - "step": 33671 - }, - { - "epoch": 0.9541783558615999, - "grad_norm": 0.0, - "learning_rate": 1.0992051432442641e-07, - "loss": 0.8035, - "step": 33672 - }, - { - "epoch": 0.9542066933038624, - "grad_norm": 0.0, - "learning_rate": 1.0978484949132672e-07, - "loss": 0.7798, - "step": 33673 - }, - { - "epoch": 0.9542350307461248, - "grad_norm": 0.0, - "learning_rate": 1.0964926796718123e-07, - "loss": 0.8283, - "step": 33674 - }, - { - "epoch": 0.9542633681883873, - "grad_norm": 0.0, - "learning_rate": 1.0951376975313232e-07, - "loss": 0.8159, - "step": 33675 - }, - { - "epoch": 0.9542917056306498, - "grad_norm": 0.0, - "learning_rate": 1.0937835485032133e-07, - "loss": 0.7891, - "step": 33676 - }, - { - "epoch": 0.9543200430729123, - "grad_norm": 0.0, - "learning_rate": 1.0924302325988956e-07, - "loss": 0.7955, - "step": 33677 - }, - { - "epoch": 0.9543483805151747, - "grad_norm": 0.0, - "learning_rate": 1.091077749829772e-07, - "loss": 0.9022, - "step": 33678 - }, - { - "epoch": 0.9543767179574372, - "grad_norm": 0.0, - "learning_rate": 1.0897261002072223e-07, - "loss": 0.8713, - "step": 33679 - }, - { - "epoch": 0.9544050553996997, - "grad_norm": 0.0, - "learning_rate": 1.0883752837426486e-07, - "loss": 0.8207, - "step": 33680 - }, - { - "epoch": 0.954433392841962, - "grad_norm": 0.0, - "learning_rate": 1.0870253004474086e-07, - "loss": 0.8275, - "step": 33681 - }, - { - "epoch": 0.9544617302842245, - "grad_norm": 0.0, - "learning_rate": 1.0856761503328816e-07, - "loss": 0.7086, - "step": 33682 - }, - { - "epoch": 0.954490067726487, - "grad_norm": 0.0, - "learning_rate": 1.0843278334104479e-07, - "loss": 0.839, - "step": 33683 - }, - { - "epoch": 0.9545184051687494, - "grad_norm": 0.0, - "learning_rate": 1.0829803496914537e-07, - "loss": 0.7709, - "step": 33684 - }, - { - "epoch": 0.9545467426110119, - "grad_norm": 0.0, - "learning_rate": 1.0816336991872345e-07, - "loss": 0.8613, - "step": 33685 - }, - { - "epoch": 0.9545750800532744, - "grad_norm": 0.0, - "learning_rate": 1.0802878819091589e-07, - "loss": 0.7253, - "step": 33686 - }, - { - "epoch": 0.9546034174955369, - "grad_norm": 0.0, - "learning_rate": 1.0789428978685512e-07, - "loss": 0.9132, - "step": 33687 - }, - { - "epoch": 0.9546317549377993, - "grad_norm": 0.0, - "learning_rate": 1.0775987470767357e-07, - "loss": 0.7631, - "step": 33688 - }, - { - "epoch": 0.9546600923800618, - "grad_norm": 0.0, - "learning_rate": 1.0762554295450367e-07, - "loss": 0.8334, - "step": 33689 - }, - { - "epoch": 0.9546884298223243, - "grad_norm": 0.0, - "learning_rate": 1.0749129452847785e-07, - "loss": 0.835, - "step": 33690 - }, - { - "epoch": 0.9547167672645867, - "grad_norm": 0.0, - "learning_rate": 1.0735712943072629e-07, - "loss": 0.7221, - "step": 33691 - }, - { - "epoch": 0.9547451047068491, - "grad_norm": 0.0, - "learning_rate": 1.0722304766237923e-07, - "loss": 0.8436, - "step": 33692 - }, - { - "epoch": 0.9547734421491116, - "grad_norm": 0.0, - "learning_rate": 1.0708904922456575e-07, - "loss": 0.8509, - "step": 33693 - }, - { - "epoch": 0.9548017795913741, - "grad_norm": 0.0, - "learning_rate": 1.0695513411841496e-07, - "loss": 0.8183, - "step": 33694 - }, - { - "epoch": 0.9548301170336365, - "grad_norm": 0.0, - "learning_rate": 1.0682130234505372e-07, - "loss": 0.7014, - "step": 33695 - }, - { - "epoch": 0.954858454475899, - "grad_norm": 0.0, - "learning_rate": 1.0668755390561225e-07, - "loss": 0.7142, - "step": 33696 - }, - { - "epoch": 0.9548867919181615, - "grad_norm": 0.0, - "learning_rate": 1.0655388880121408e-07, - "loss": 0.8057, - "step": 33697 - }, - { - "epoch": 0.9549151293604239, - "grad_norm": 0.0, - "learning_rate": 1.0642030703298722e-07, - "loss": 0.8464, - "step": 33698 - }, - { - "epoch": 0.9549434668026864, - "grad_norm": 0.0, - "learning_rate": 1.062868086020552e-07, - "loss": 0.742, - "step": 33699 - }, - { - "epoch": 0.9549718042449489, - "grad_norm": 0.0, - "learning_rate": 1.061533935095438e-07, - "loss": 0.7853, - "step": 33700 - }, - { - "epoch": 0.9550001416872113, - "grad_norm": 0.0, - "learning_rate": 1.0602006175657653e-07, - "loss": 0.7879, - "step": 33701 - }, - { - "epoch": 0.9550284791294738, - "grad_norm": 0.0, - "learning_rate": 1.0588681334427586e-07, - "loss": 0.7887, - "step": 33702 - }, - { - "epoch": 0.9550568165717362, - "grad_norm": 0.0, - "learning_rate": 1.0575364827376533e-07, - "loss": 0.7198, - "step": 33703 - }, - { - "epoch": 0.9550851540139987, - "grad_norm": 0.0, - "learning_rate": 1.0562056654616515e-07, - "loss": 0.7976, - "step": 33704 - }, - { - "epoch": 0.9551134914562611, - "grad_norm": 0.0, - "learning_rate": 1.0548756816259775e-07, - "loss": 0.7467, - "step": 33705 - }, - { - "epoch": 0.9551418288985236, - "grad_norm": 0.0, - "learning_rate": 1.0535465312418225e-07, - "loss": 0.7184, - "step": 33706 - }, - { - "epoch": 0.9551701663407861, - "grad_norm": 0.0, - "learning_rate": 1.0522182143203886e-07, - "loss": 0.791, - "step": 33707 - }, - { - "epoch": 0.9551985037830485, - "grad_norm": 0.0, - "learning_rate": 1.0508907308728666e-07, - "loss": 0.7671, - "step": 33708 - }, - { - "epoch": 0.955226841225311, - "grad_norm": 0.0, - "learning_rate": 1.0495640809104257e-07, - "loss": 0.8255, - "step": 33709 - }, - { - "epoch": 0.9552551786675735, - "grad_norm": 0.0, - "learning_rate": 1.048238264444268e-07, - "loss": 0.8889, - "step": 33710 - }, - { - "epoch": 0.955283516109836, - "grad_norm": 0.0, - "learning_rate": 1.0469132814855287e-07, - "loss": 0.7067, - "step": 33711 - }, - { - "epoch": 0.9553118535520984, - "grad_norm": 0.0, - "learning_rate": 1.0455891320453992e-07, - "loss": 0.8001, - "step": 33712 - }, - { - "epoch": 0.9553401909943608, - "grad_norm": 0.0, - "learning_rate": 1.044265816135015e-07, - "loss": 0.8063, - "step": 33713 - }, - { - "epoch": 0.9553685284366233, - "grad_norm": 0.0, - "learning_rate": 1.0429433337655115e-07, - "loss": 0.892, - "step": 33714 - }, - { - "epoch": 0.9553968658788857, - "grad_norm": 0.0, - "learning_rate": 1.0416216849480576e-07, - "loss": 0.8249, - "step": 33715 - }, - { - "epoch": 0.9554252033211482, - "grad_norm": 0.0, - "learning_rate": 1.0403008696937666e-07, - "loss": 0.8597, - "step": 33716 - }, - { - "epoch": 0.9554535407634107, - "grad_norm": 0.0, - "learning_rate": 1.0389808880137742e-07, - "loss": 0.846, - "step": 33717 - }, - { - "epoch": 0.9554818782056732, - "grad_norm": 0.0, - "learning_rate": 1.0376617399191935e-07, - "loss": 0.8318, - "step": 33718 - }, - { - "epoch": 0.9555102156479356, - "grad_norm": 0.0, - "learning_rate": 1.0363434254211269e-07, - "loss": 0.8736, - "step": 33719 - }, - { - "epoch": 0.9555385530901981, - "grad_norm": 0.0, - "learning_rate": 1.0350259445306987e-07, - "loss": 0.7622, - "step": 33720 - }, - { - "epoch": 0.9555668905324606, - "grad_norm": 0.0, - "learning_rate": 1.0337092972589891e-07, - "loss": 0.8968, - "step": 33721 - }, - { - "epoch": 0.955595227974723, - "grad_norm": 0.0, - "learning_rate": 1.0323934836171001e-07, - "loss": 0.8119, - "step": 33722 - }, - { - "epoch": 0.9556235654169855, - "grad_norm": 0.0, - "learning_rate": 1.031078503616112e-07, - "loss": 0.8463, - "step": 33723 - }, - { - "epoch": 0.955651902859248, - "grad_norm": 0.0, - "learning_rate": 1.0297643572670935e-07, - "loss": 0.6695, - "step": 33724 - }, - { - "epoch": 0.9556802403015103, - "grad_norm": 0.0, - "learning_rate": 1.0284510445811357e-07, - "loss": 0.7099, - "step": 33725 - }, - { - "epoch": 0.9557085777437728, - "grad_norm": 0.0, - "learning_rate": 1.0271385655692745e-07, - "loss": 0.8047, - "step": 33726 - }, - { - "epoch": 0.9557369151860353, - "grad_norm": 0.0, - "learning_rate": 1.0258269202425896e-07, - "loss": 0.7967, - "step": 33727 - }, - { - "epoch": 0.9557652526282978, - "grad_norm": 0.0, - "learning_rate": 1.024516108612117e-07, - "loss": 0.7832, - "step": 33728 - }, - { - "epoch": 0.9557935900705602, - "grad_norm": 0.0, - "learning_rate": 1.0232061306888918e-07, - "loss": 0.6953, - "step": 33729 - }, - { - "epoch": 0.9558219275128227, - "grad_norm": 0.0, - "learning_rate": 1.02189698648395e-07, - "loss": 0.901, - "step": 33730 - }, - { - "epoch": 0.9558502649550852, - "grad_norm": 0.0, - "learning_rate": 1.0205886760083383e-07, - "loss": 0.6466, - "step": 33731 - }, - { - "epoch": 0.9558786023973476, - "grad_norm": 0.0, - "learning_rate": 1.0192811992730478e-07, - "loss": 0.7294, - "step": 33732 - }, - { - "epoch": 0.9559069398396101, - "grad_norm": 0.0, - "learning_rate": 1.017974556289114e-07, - "loss": 0.7663, - "step": 33733 - }, - { - "epoch": 0.9559352772818726, - "grad_norm": 0.0, - "learning_rate": 1.0166687470675395e-07, - "loss": 0.7323, - "step": 33734 - }, - { - "epoch": 0.955963614724135, - "grad_norm": 0.0, - "learning_rate": 1.0153637716193154e-07, - "loss": 0.8777, - "step": 33735 - }, - { - "epoch": 0.9559919521663974, - "grad_norm": 0.0, - "learning_rate": 1.014059629955455e-07, - "loss": 0.821, - "step": 33736 - }, - { - "epoch": 0.9560202896086599, - "grad_norm": 0.0, - "learning_rate": 1.0127563220869052e-07, - "loss": 0.8253, - "step": 33737 - }, - { - "epoch": 0.9560486270509224, - "grad_norm": 0.0, - "learning_rate": 1.0114538480246794e-07, - "loss": 0.761, - "step": 33738 - }, - { - "epoch": 0.9560769644931848, - "grad_norm": 0.0, - "learning_rate": 1.0101522077797354e-07, - "loss": 0.8113, - "step": 33739 - }, - { - "epoch": 0.9561053019354473, - "grad_norm": 0.0, - "learning_rate": 1.0088514013630424e-07, - "loss": 0.8237, - "step": 33740 - }, - { - "epoch": 0.9561336393777098, - "grad_norm": 0.0, - "learning_rate": 1.007551428785547e-07, - "loss": 0.8439, - "step": 33741 - }, - { - "epoch": 0.9561619768199723, - "grad_norm": 0.0, - "learning_rate": 1.0062522900582072e-07, - "loss": 0.6616, - "step": 33742 - }, - { - "epoch": 0.9561903142622347, - "grad_norm": 0.0, - "learning_rate": 1.0049539851919699e-07, - "loss": 0.816, - "step": 33743 - }, - { - "epoch": 0.9562186517044972, - "grad_norm": 0.0, - "learning_rate": 1.0036565141977594e-07, - "loss": 0.7919, - "step": 33744 - }, - { - "epoch": 0.9562469891467597, - "grad_norm": 0.0, - "learning_rate": 1.0023598770865117e-07, - "loss": 0.7618, - "step": 33745 - }, - { - "epoch": 0.956275326589022, - "grad_norm": 0.0, - "learning_rate": 1.0010640738691513e-07, - "loss": 0.8618, - "step": 33746 - }, - { - "epoch": 0.9563036640312845, - "grad_norm": 0.0, - "learning_rate": 9.997691045565916e-08, - "loss": 0.7813, - "step": 33747 - }, - { - "epoch": 0.956332001473547, - "grad_norm": 0.0, - "learning_rate": 9.984749691597351e-08, - "loss": 0.7507, - "step": 33748 - }, - { - "epoch": 0.9563603389158094, - "grad_norm": 0.0, - "learning_rate": 9.971816676894952e-08, - "loss": 0.8209, - "step": 33749 - }, - { - "epoch": 0.9563886763580719, - "grad_norm": 0.0, - "learning_rate": 9.958892001567521e-08, - "loss": 0.8712, - "step": 33750 - }, - { - "epoch": 0.9564170138003344, - "grad_norm": 0.0, - "learning_rate": 9.945975665723972e-08, - "loss": 0.8255, - "step": 33751 - }, - { - "epoch": 0.9564453512425969, - "grad_norm": 0.0, - "learning_rate": 9.933067669473107e-08, - "loss": 0.7453, - "step": 33752 - }, - { - "epoch": 0.9564736886848593, - "grad_norm": 0.0, - "learning_rate": 9.920168012923725e-08, - "loss": 0.8209, - "step": 33753 - }, - { - "epoch": 0.9565020261271218, - "grad_norm": 0.0, - "learning_rate": 9.90727669618441e-08, - "loss": 0.7219, - "step": 33754 - }, - { - "epoch": 0.9565303635693843, - "grad_norm": 0.0, - "learning_rate": 9.894393719363737e-08, - "loss": 0.8693, - "step": 33755 - }, - { - "epoch": 0.9565587010116466, - "grad_norm": 0.0, - "learning_rate": 9.88151908257029e-08, - "loss": 0.8547, - "step": 33756 - }, - { - "epoch": 0.9565870384539091, - "grad_norm": 0.0, - "learning_rate": 9.868652785912424e-08, - "loss": 0.8511, - "step": 33757 - }, - { - "epoch": 0.9566153758961716, - "grad_norm": 0.0, - "learning_rate": 9.85579482949861e-08, - "loss": 0.8428, - "step": 33758 - }, - { - "epoch": 0.9566437133384341, - "grad_norm": 0.0, - "learning_rate": 9.842945213437094e-08, - "loss": 0.8185, - "step": 33759 - }, - { - "epoch": 0.9566720507806965, - "grad_norm": 0.0, - "learning_rate": 9.830103937836122e-08, - "loss": 0.86, - "step": 33760 - }, - { - "epoch": 0.956700388222959, - "grad_norm": 0.0, - "learning_rate": 9.81727100280394e-08, - "loss": 0.8765, - "step": 33761 - }, - { - "epoch": 0.9567287256652215, - "grad_norm": 0.0, - "learning_rate": 9.804446408448576e-08, - "loss": 0.8229, - "step": 33762 - }, - { - "epoch": 0.9567570631074839, - "grad_norm": 0.0, - "learning_rate": 9.791630154878052e-08, - "loss": 0.8101, - "step": 33763 - }, - { - "epoch": 0.9567854005497464, - "grad_norm": 0.0, - "learning_rate": 9.778822242200281e-08, - "loss": 0.7991, - "step": 33764 - }, - { - "epoch": 0.9568137379920089, - "grad_norm": 0.0, - "learning_rate": 9.766022670523179e-08, - "loss": 0.7911, - "step": 33765 - }, - { - "epoch": 0.9568420754342714, - "grad_norm": 0.0, - "learning_rate": 9.753231439954658e-08, - "loss": 0.8131, - "step": 33766 - }, - { - "epoch": 0.9568704128765337, - "grad_norm": 0.0, - "learning_rate": 9.74044855060241e-08, - "loss": 0.8033, - "step": 33767 - }, - { - "epoch": 0.9568987503187962, - "grad_norm": 0.0, - "learning_rate": 9.727674002574017e-08, - "loss": 0.8581, - "step": 33768 - }, - { - "epoch": 0.9569270877610587, - "grad_norm": 0.0, - "learning_rate": 9.714907795977169e-08, - "loss": 0.7588, - "step": 33769 - }, - { - "epoch": 0.9569554252033211, - "grad_norm": 0.0, - "learning_rate": 9.702149930919446e-08, - "loss": 0.9757, - "step": 33770 - }, - { - "epoch": 0.9569837626455836, - "grad_norm": 0.0, - "learning_rate": 9.68940040750821e-08, - "loss": 0.8053, - "step": 33771 - }, - { - "epoch": 0.9570121000878461, - "grad_norm": 0.0, - "learning_rate": 9.676659225850815e-08, - "loss": 0.7213, - "step": 33772 - }, - { - "epoch": 0.9570404375301085, - "grad_norm": 0.0, - "learning_rate": 9.663926386054734e-08, - "loss": 0.889, - "step": 33773 - }, - { - "epoch": 0.957068774972371, - "grad_norm": 0.0, - "learning_rate": 9.651201888227102e-08, - "loss": 0.8972, - "step": 33774 - }, - { - "epoch": 0.9570971124146335, - "grad_norm": 0.0, - "learning_rate": 9.638485732475277e-08, - "loss": 0.8253, - "step": 33775 - }, - { - "epoch": 0.957125449856896, - "grad_norm": 0.0, - "learning_rate": 9.625777918906176e-08, - "loss": 0.7628, - "step": 33776 - }, - { - "epoch": 0.9571537872991583, - "grad_norm": 0.0, - "learning_rate": 9.613078447626933e-08, - "loss": 0.8382, - "step": 33777 - }, - { - "epoch": 0.9571821247414208, - "grad_norm": 0.0, - "learning_rate": 9.600387318744464e-08, - "loss": 0.7368, - "step": 33778 - }, - { - "epoch": 0.9572104621836833, - "grad_norm": 0.0, - "learning_rate": 9.587704532365683e-08, - "loss": 0.8051, - "step": 33779 - }, - { - "epoch": 0.9572387996259457, - "grad_norm": 0.0, - "learning_rate": 9.575030088597503e-08, - "loss": 0.77, - "step": 33780 - }, - { - "epoch": 0.9572671370682082, - "grad_norm": 0.0, - "learning_rate": 9.56236398754673e-08, - "loss": 0.8452, - "step": 33781 - }, - { - "epoch": 0.9572954745104707, - "grad_norm": 0.0, - "learning_rate": 9.549706229319832e-08, - "loss": 0.8125, - "step": 33782 - }, - { - "epoch": 0.9573238119527332, - "grad_norm": 0.0, - "learning_rate": 9.537056814023505e-08, - "loss": 0.6729, - "step": 33783 - }, - { - "epoch": 0.9573521493949956, - "grad_norm": 0.0, - "learning_rate": 9.524415741764437e-08, - "loss": 0.8428, - "step": 33784 - }, - { - "epoch": 0.9573804868372581, - "grad_norm": 0.0, - "learning_rate": 9.51178301264899e-08, - "loss": 0.7845, - "step": 33785 - }, - { - "epoch": 0.9574088242795206, - "grad_norm": 0.0, - "learning_rate": 9.499158626783633e-08, - "loss": 0.9006, - "step": 33786 - }, - { - "epoch": 0.957437161721783, - "grad_norm": 0.0, - "learning_rate": 9.486542584274616e-08, - "loss": 0.7194, - "step": 33787 - }, - { - "epoch": 0.9574654991640454, - "grad_norm": 0.0, - "learning_rate": 9.473934885228298e-08, - "loss": 0.7351, - "step": 33788 - }, - { - "epoch": 0.9574938366063079, - "grad_norm": 0.0, - "learning_rate": 9.461335529750815e-08, - "loss": 0.716, - "step": 33789 - }, - { - "epoch": 0.9575221740485704, - "grad_norm": 0.0, - "learning_rate": 9.448744517948415e-08, - "loss": 0.7295, - "step": 33790 - }, - { - "epoch": 0.9575505114908328, - "grad_norm": 0.0, - "learning_rate": 9.436161849927016e-08, - "loss": 0.7607, - "step": 33791 - }, - { - "epoch": 0.9575788489330953, - "grad_norm": 0.0, - "learning_rate": 9.423587525792644e-08, - "loss": 0.806, - "step": 33792 - }, - { - "epoch": 0.9576071863753578, - "grad_norm": 0.0, - "learning_rate": 9.411021545651322e-08, - "loss": 0.8994, - "step": 33793 - }, - { - "epoch": 0.9576355238176202, - "grad_norm": 0.0, - "learning_rate": 9.398463909608746e-08, - "loss": 0.8196, - "step": 33794 - }, - { - "epoch": 0.9576638612598827, - "grad_norm": 0.0, - "learning_rate": 9.385914617770719e-08, - "loss": 0.8805, - "step": 33795 - }, - { - "epoch": 0.9576921987021452, - "grad_norm": 0.0, - "learning_rate": 9.373373670243158e-08, - "loss": 0.789, - "step": 33796 - }, - { - "epoch": 0.9577205361444076, - "grad_norm": 0.0, - "learning_rate": 9.36084106713131e-08, - "loss": 0.8477, - "step": 33797 - }, - { - "epoch": 0.95774887358667, - "grad_norm": 0.0, - "learning_rate": 9.348316808541092e-08, - "loss": 0.7651, - "step": 33798 - }, - { - "epoch": 0.9577772110289325, - "grad_norm": 0.0, - "learning_rate": 9.335800894577862e-08, - "loss": 0.7736, - "step": 33799 - }, - { - "epoch": 0.957805548471195, - "grad_norm": 0.0, - "learning_rate": 9.323293325346983e-08, - "loss": 0.7661, - "step": 33800 - }, - { - "epoch": 0.9578338859134574, - "grad_norm": 0.0, - "learning_rate": 9.310794100953923e-08, - "loss": 0.7645, - "step": 33801 - }, - { - "epoch": 0.9578622233557199, - "grad_norm": 0.0, - "learning_rate": 9.298303221503935e-08, - "loss": 0.9476, - "step": 33802 - }, - { - "epoch": 0.9578905607979824, - "grad_norm": 0.0, - "learning_rate": 9.285820687102264e-08, - "loss": 0.7655, - "step": 33803 - }, - { - "epoch": 0.9579188982402448, - "grad_norm": 0.0, - "learning_rate": 9.273346497854052e-08, - "loss": 0.7388, - "step": 33804 - }, - { - "epoch": 0.9579472356825073, - "grad_norm": 0.0, - "learning_rate": 9.260880653864212e-08, - "loss": 0.7987, - "step": 33805 - }, - { - "epoch": 0.9579755731247698, - "grad_norm": 0.0, - "learning_rate": 9.248423155237884e-08, - "loss": 0.7499, - "step": 33806 - }, - { - "epoch": 0.9580039105670323, - "grad_norm": 0.0, - "learning_rate": 9.235974002080094e-08, - "loss": 0.7896, - "step": 33807 - }, - { - "epoch": 0.9580322480092947, - "grad_norm": 0.0, - "learning_rate": 9.223533194495537e-08, - "loss": 0.7645, - "step": 33808 - }, - { - "epoch": 0.9580605854515571, - "grad_norm": 0.0, - "learning_rate": 9.211100732589129e-08, - "loss": 0.779, - "step": 33809 - }, - { - "epoch": 0.9580889228938196, - "grad_norm": 0.0, - "learning_rate": 9.198676616465562e-08, - "loss": 0.9678, - "step": 33810 - }, - { - "epoch": 0.958117260336082, - "grad_norm": 0.0, - "learning_rate": 9.186260846229423e-08, - "loss": 0.7738, - "step": 33811 - }, - { - "epoch": 0.9581455977783445, - "grad_norm": 0.0, - "learning_rate": 9.173853421985291e-08, - "loss": 0.8972, - "step": 33812 - }, - { - "epoch": 0.958173935220607, - "grad_norm": 0.0, - "learning_rate": 9.16145434383775e-08, - "loss": 0.866, - "step": 33813 - }, - { - "epoch": 0.9582022726628695, - "grad_norm": 0.0, - "learning_rate": 9.149063611891162e-08, - "loss": 0.7182, - "step": 33814 - }, - { - "epoch": 0.9582306101051319, - "grad_norm": 0.0, - "learning_rate": 9.13668122625011e-08, - "loss": 0.9929, - "step": 33815 - }, - { - "epoch": 0.9582589475473944, - "grad_norm": 0.0, - "learning_rate": 9.124307187018622e-08, - "loss": 0.8937, - "step": 33816 - }, - { - "epoch": 0.9582872849896569, - "grad_norm": 0.0, - "learning_rate": 9.111941494301057e-08, - "loss": 0.774, - "step": 33817 - }, - { - "epoch": 0.9583156224319193, - "grad_norm": 0.0, - "learning_rate": 9.099584148201668e-08, - "loss": 0.7815, - "step": 33818 - }, - { - "epoch": 0.9583439598741818, - "grad_norm": 0.0, - "learning_rate": 9.08723514882437e-08, - "loss": 0.8315, - "step": 33819 - }, - { - "epoch": 0.9583722973164442, - "grad_norm": 0.0, - "learning_rate": 9.074894496273301e-08, - "loss": 0.6975, - "step": 33820 - }, - { - "epoch": 0.9584006347587066, - "grad_norm": 0.0, - "learning_rate": 9.062562190652269e-08, - "loss": 0.8087, - "step": 33821 - }, - { - "epoch": 0.9584289722009691, - "grad_norm": 0.0, - "learning_rate": 9.0502382320653e-08, - "loss": 0.8992, - "step": 33822 - }, - { - "epoch": 0.9584573096432316, - "grad_norm": 0.0, - "learning_rate": 9.037922620616091e-08, - "loss": 0.8283, - "step": 33823 - }, - { - "epoch": 0.9584856470854941, - "grad_norm": 0.0, - "learning_rate": 9.025615356408557e-08, - "loss": 0.875, - "step": 33824 - }, - { - "epoch": 0.9585139845277565, - "grad_norm": 0.0, - "learning_rate": 9.013316439546171e-08, - "loss": 0.7475, - "step": 33825 - }, - { - "epoch": 0.958542321970019, - "grad_norm": 0.0, - "learning_rate": 9.001025870132629e-08, - "loss": 0.723, - "step": 33826 - }, - { - "epoch": 0.9585706594122815, - "grad_norm": 0.0, - "learning_rate": 8.988743648271514e-08, - "loss": 0.6757, - "step": 33827 - }, - { - "epoch": 0.9585989968545439, - "grad_norm": 0.0, - "learning_rate": 8.976469774066187e-08, - "loss": 0.7652, - "step": 33828 - }, - { - "epoch": 0.9586273342968064, - "grad_norm": 0.0, - "learning_rate": 8.964204247620012e-08, - "loss": 0.7584, - "step": 33829 - }, - { - "epoch": 0.9586556717390688, - "grad_norm": 0.0, - "learning_rate": 8.951947069036349e-08, - "loss": 0.8178, - "step": 33830 - }, - { - "epoch": 0.9586840091813313, - "grad_norm": 0.0, - "learning_rate": 8.939698238418559e-08, - "loss": 0.8071, - "step": 33831 - }, - { - "epoch": 0.9587123466235937, - "grad_norm": 0.0, - "learning_rate": 8.927457755869562e-08, - "loss": 0.8116, - "step": 33832 - }, - { - "epoch": 0.9587406840658562, - "grad_norm": 0.0, - "learning_rate": 8.915225621492718e-08, - "loss": 0.8046, - "step": 33833 - }, - { - "epoch": 0.9587690215081187, - "grad_norm": 0.0, - "learning_rate": 8.903001835390946e-08, - "loss": 0.877, - "step": 33834 - }, - { - "epoch": 0.9587973589503811, - "grad_norm": 0.0, - "learning_rate": 8.890786397667161e-08, - "loss": 0.8457, - "step": 33835 - }, - { - "epoch": 0.9588256963926436, - "grad_norm": 0.0, - "learning_rate": 8.878579308424395e-08, - "loss": 0.7407, - "step": 33836 - }, - { - "epoch": 0.9588540338349061, - "grad_norm": 0.0, - "learning_rate": 8.866380567765342e-08, - "loss": 0.8474, - "step": 33837 - }, - { - "epoch": 0.9588823712771686, - "grad_norm": 0.0, - "learning_rate": 8.854190175792921e-08, - "loss": 0.724, - "step": 33838 - }, - { - "epoch": 0.958910708719431, - "grad_norm": 0.0, - "learning_rate": 8.842008132609603e-08, - "loss": 0.7336, - "step": 33839 - }, - { - "epoch": 0.9589390461616935, - "grad_norm": 0.0, - "learning_rate": 8.829834438318196e-08, - "loss": 0.8462, - "step": 33840 - }, - { - "epoch": 0.958967383603956, - "grad_norm": 0.0, - "learning_rate": 8.817669093021064e-08, - "loss": 0.8768, - "step": 33841 - }, - { - "epoch": 0.9589957210462183, - "grad_norm": 0.0, - "learning_rate": 8.80551209682079e-08, - "loss": 0.8164, - "step": 33842 - }, - { - "epoch": 0.9590240584884808, - "grad_norm": 0.0, - "learning_rate": 8.793363449819848e-08, - "loss": 0.7506, - "step": 33843 - }, - { - "epoch": 0.9590523959307433, - "grad_norm": 0.0, - "learning_rate": 8.781223152120377e-08, - "loss": 0.7319, - "step": 33844 - }, - { - "epoch": 0.9590807333730057, - "grad_norm": 0.0, - "learning_rate": 8.769091203824743e-08, - "loss": 0.8511, - "step": 33845 - }, - { - "epoch": 0.9591090708152682, - "grad_norm": 0.0, - "learning_rate": 8.756967605035194e-08, - "loss": 0.8438, - "step": 33846 - }, - { - "epoch": 0.9591374082575307, - "grad_norm": 0.0, - "learning_rate": 8.744852355853761e-08, - "loss": 0.7777, - "step": 33847 - }, - { - "epoch": 0.9591657456997932, - "grad_norm": 0.0, - "learning_rate": 8.732745456382586e-08, - "loss": 0.8294, - "step": 33848 - }, - { - "epoch": 0.9591940831420556, - "grad_norm": 0.0, - "learning_rate": 8.720646906723585e-08, - "loss": 0.8973, - "step": 33849 - }, - { - "epoch": 0.9592224205843181, - "grad_norm": 0.0, - "learning_rate": 8.708556706978566e-08, - "loss": 0.8327, - "step": 33850 - }, - { - "epoch": 0.9592507580265806, - "grad_norm": 0.0, - "learning_rate": 8.696474857249559e-08, - "loss": 0.7734, - "step": 33851 - }, - { - "epoch": 0.9592790954688429, - "grad_norm": 0.0, - "learning_rate": 8.684401357638261e-08, - "loss": 0.8227, - "step": 33852 - }, - { - "epoch": 0.9593074329111054, - "grad_norm": 0.0, - "learning_rate": 8.672336208246368e-08, - "loss": 0.8045, - "step": 33853 - }, - { - "epoch": 0.9593357703533679, - "grad_norm": 0.0, - "learning_rate": 8.660279409175576e-08, - "loss": 0.6377, - "step": 33854 - }, - { - "epoch": 0.9593641077956304, - "grad_norm": 0.0, - "learning_rate": 8.648230960527249e-08, - "loss": 0.806, - "step": 33855 - }, - { - "epoch": 0.9593924452378928, - "grad_norm": 0.0, - "learning_rate": 8.636190862403082e-08, - "loss": 0.7925, - "step": 33856 - }, - { - "epoch": 0.9594207826801553, - "grad_norm": 0.0, - "learning_rate": 8.62415911490444e-08, - "loss": 0.8611, - "step": 33857 - }, - { - "epoch": 0.9594491201224178, - "grad_norm": 0.0, - "learning_rate": 8.612135718132575e-08, - "loss": 0.7748, - "step": 33858 - }, - { - "epoch": 0.9594774575646802, - "grad_norm": 0.0, - "learning_rate": 8.600120672188739e-08, - "loss": 0.8252, - "step": 33859 - }, - { - "epoch": 0.9595057950069427, - "grad_norm": 0.0, - "learning_rate": 8.588113977174405e-08, - "loss": 0.8185, - "step": 33860 - }, - { - "epoch": 0.9595341324492052, - "grad_norm": 0.0, - "learning_rate": 8.576115633190496e-08, - "loss": 0.8151, - "step": 33861 - }, - { - "epoch": 0.9595624698914677, - "grad_norm": 0.0, - "learning_rate": 8.564125640338039e-08, - "loss": 0.824, - "step": 33862 - }, - { - "epoch": 0.95959080733373, - "grad_norm": 0.0, - "learning_rate": 8.552143998718177e-08, - "loss": 0.8406, - "step": 33863 - }, - { - "epoch": 0.9596191447759925, - "grad_norm": 0.0, - "learning_rate": 8.540170708431716e-08, - "loss": 0.7169, - "step": 33864 - }, - { - "epoch": 0.959647482218255, - "grad_norm": 0.0, - "learning_rate": 8.52820576957969e-08, - "loss": 0.7723, - "step": 33865 - }, - { - "epoch": 0.9596758196605174, - "grad_norm": 0.0, - "learning_rate": 8.516249182262682e-08, - "loss": 0.8635, - "step": 33866 - }, - { - "epoch": 0.9597041571027799, - "grad_norm": 0.0, - "learning_rate": 8.5043009465815e-08, - "loss": 0.7605, - "step": 33867 - }, - { - "epoch": 0.9597324945450424, - "grad_norm": 0.0, - "learning_rate": 8.492361062636845e-08, - "loss": 0.8124, - "step": 33868 - }, - { - "epoch": 0.9597608319873048, - "grad_norm": 0.0, - "learning_rate": 8.480429530529077e-08, - "loss": 0.7897, - "step": 33869 - }, - { - "epoch": 0.9597891694295673, - "grad_norm": 0.0, - "learning_rate": 8.468506350358896e-08, - "loss": 0.8253, - "step": 33870 - }, - { - "epoch": 0.9598175068718298, - "grad_norm": 0.0, - "learning_rate": 8.456591522226776e-08, - "loss": 0.8012, - "step": 33871 - }, - { - "epoch": 0.9598458443140923, - "grad_norm": 0.0, - "learning_rate": 8.44468504623286e-08, - "loss": 0.861, - "step": 33872 - }, - { - "epoch": 0.9598741817563546, - "grad_norm": 0.0, - "learning_rate": 8.432786922477621e-08, - "loss": 0.8299, - "step": 33873 - }, - { - "epoch": 0.9599025191986171, - "grad_norm": 0.0, - "learning_rate": 8.420897151061202e-08, - "loss": 0.7935, - "step": 33874 - }, - { - "epoch": 0.9599308566408796, - "grad_norm": 0.0, - "learning_rate": 8.409015732083748e-08, - "loss": 0.7343, - "step": 33875 - }, - { - "epoch": 0.959959194083142, - "grad_norm": 0.0, - "learning_rate": 8.397142665645508e-08, - "loss": 0.8854, - "step": 33876 - }, - { - "epoch": 0.9599875315254045, - "grad_norm": 0.0, - "learning_rate": 8.385277951846182e-08, - "loss": 0.7505, - "step": 33877 - }, - { - "epoch": 0.960015868967667, - "grad_norm": 0.0, - "learning_rate": 8.373421590786024e-08, - "loss": 0.8011, - "step": 33878 - }, - { - "epoch": 0.9600442064099295, - "grad_norm": 0.0, - "learning_rate": 8.36157358256473e-08, - "loss": 0.764, - "step": 33879 - }, - { - "epoch": 0.9600725438521919, - "grad_norm": 0.0, - "learning_rate": 8.349733927282112e-08, - "loss": 0.7881, - "step": 33880 - }, - { - "epoch": 0.9601008812944544, - "grad_norm": 0.0, - "learning_rate": 8.337902625037975e-08, - "loss": 0.84, - "step": 33881 - }, - { - "epoch": 0.9601292187367169, - "grad_norm": 0.0, - "learning_rate": 8.326079675931908e-08, - "loss": 0.8147, - "step": 33882 - }, - { - "epoch": 0.9601575561789792, - "grad_norm": 0.0, - "learning_rate": 8.314265080063499e-08, - "loss": 0.7976, - "step": 33883 - }, - { - "epoch": 0.9601858936212417, - "grad_norm": 0.0, - "learning_rate": 8.302458837532335e-08, - "loss": 0.8198, - "step": 33884 - }, - { - "epoch": 0.9602142310635042, - "grad_norm": 0.0, - "learning_rate": 8.290660948437779e-08, - "loss": 0.839, - "step": 33885 - }, - { - "epoch": 0.9602425685057667, - "grad_norm": 0.0, - "learning_rate": 8.278871412879196e-08, - "loss": 0.8405, - "step": 33886 - }, - { - "epoch": 0.9602709059480291, - "grad_norm": 0.0, - "learning_rate": 8.267090230956065e-08, - "loss": 0.7859, - "step": 33887 - }, - { - "epoch": 0.9602992433902916, - "grad_norm": 0.0, - "learning_rate": 8.255317402767415e-08, - "loss": 0.7732, - "step": 33888 - }, - { - "epoch": 0.9603275808325541, - "grad_norm": 0.0, - "learning_rate": 8.243552928412501e-08, - "loss": 0.8132, - "step": 33889 - }, - { - "epoch": 0.9603559182748165, - "grad_norm": 0.0, - "learning_rate": 8.231796807990356e-08, - "loss": 0.7337, - "step": 33890 - }, - { - "epoch": 0.960384255717079, - "grad_norm": 0.0, - "learning_rate": 8.220049041600231e-08, - "loss": 0.8089, - "step": 33891 - }, - { - "epoch": 0.9604125931593415, - "grad_norm": 0.0, - "learning_rate": 8.208309629340827e-08, - "loss": 0.7375, - "step": 33892 - }, - { - "epoch": 0.9604409306016038, - "grad_norm": 0.0, - "learning_rate": 8.196578571311175e-08, - "loss": 0.763, - "step": 33893 - }, - { - "epoch": 0.9604692680438663, - "grad_norm": 0.0, - "learning_rate": 8.184855867609976e-08, - "loss": 0.6955, - "step": 33894 - }, - { - "epoch": 0.9604976054861288, - "grad_norm": 0.0, - "learning_rate": 8.173141518336147e-08, - "loss": 0.7424, - "step": 33895 - }, - { - "epoch": 0.9605259429283913, - "grad_norm": 0.0, - "learning_rate": 8.161435523588168e-08, - "loss": 0.6838, - "step": 33896 - }, - { - "epoch": 0.9605542803706537, - "grad_norm": 0.0, - "learning_rate": 8.149737883464737e-08, - "loss": 0.8333, - "step": 33897 - }, - { - "epoch": 0.9605826178129162, - "grad_norm": 0.0, - "learning_rate": 8.138048598064329e-08, - "loss": 0.8483, - "step": 33898 - }, - { - "epoch": 0.9606109552551787, - "grad_norm": 0.0, - "learning_rate": 8.126367667485535e-08, - "loss": 0.7685, - "step": 33899 - }, - { - "epoch": 0.9606392926974411, - "grad_norm": 0.0, - "learning_rate": 8.114695091826607e-08, - "loss": 0.796, - "step": 33900 - }, - { - "epoch": 0.9606676301397036, - "grad_norm": 0.0, - "learning_rate": 8.103030871186024e-08, - "loss": 0.753, - "step": 33901 - }, - { - "epoch": 0.9606959675819661, - "grad_norm": 0.0, - "learning_rate": 8.091375005661817e-08, - "loss": 0.7613, - "step": 33902 - }, - { - "epoch": 0.9607243050242286, - "grad_norm": 0.0, - "learning_rate": 8.079727495352352e-08, - "loss": 0.7835, - "step": 33903 - }, - { - "epoch": 0.960752642466491, - "grad_norm": 0.0, - "learning_rate": 8.068088340355662e-08, - "loss": 0.8477, - "step": 33904 - }, - { - "epoch": 0.9607809799087534, - "grad_norm": 0.0, - "learning_rate": 8.056457540769891e-08, - "loss": 0.796, - "step": 33905 - }, - { - "epoch": 0.9608093173510159, - "grad_norm": 0.0, - "learning_rate": 8.04483509669285e-08, - "loss": 0.8861, - "step": 33906 - }, - { - "epoch": 0.9608376547932783, - "grad_norm": 0.0, - "learning_rate": 8.03322100822257e-08, - "loss": 0.7841, - "step": 33907 - }, - { - "epoch": 0.9608659922355408, - "grad_norm": 0.0, - "learning_rate": 8.021615275456862e-08, - "loss": 0.7726, - "step": 33908 - }, - { - "epoch": 0.9608943296778033, - "grad_norm": 0.0, - "learning_rate": 8.010017898493316e-08, - "loss": 0.8711, - "step": 33909 - }, - { - "epoch": 0.9609226671200657, - "grad_norm": 0.0, - "learning_rate": 7.998428877429854e-08, - "loss": 0.6717, - "step": 33910 - }, - { - "epoch": 0.9609510045623282, - "grad_norm": 0.0, - "learning_rate": 7.986848212363952e-08, - "loss": 0.7998, - "step": 33911 - }, - { - "epoch": 0.9609793420045907, - "grad_norm": 0.0, - "learning_rate": 7.975275903393309e-08, - "loss": 0.8421, - "step": 33912 - }, - { - "epoch": 0.9610076794468532, - "grad_norm": 0.0, - "learning_rate": 7.963711950615183e-08, - "loss": 0.8365, - "step": 33913 - }, - { - "epoch": 0.9610360168891156, - "grad_norm": 0.0, - "learning_rate": 7.95215635412705e-08, - "loss": 0.7657, - "step": 33914 - }, - { - "epoch": 0.961064354331378, - "grad_norm": 0.0, - "learning_rate": 7.940609114026388e-08, - "loss": 0.8018, - "step": 33915 - }, - { - "epoch": 0.9610926917736405, - "grad_norm": 0.0, - "learning_rate": 7.92907023041034e-08, - "loss": 0.8054, - "step": 33916 - }, - { - "epoch": 0.9611210292159029, - "grad_norm": 0.0, - "learning_rate": 7.917539703376054e-08, - "loss": 0.8164, - "step": 33917 - }, - { - "epoch": 0.9611493666581654, - "grad_norm": 0.0, - "learning_rate": 7.906017533020893e-08, - "loss": 0.8816, - "step": 33918 - }, - { - "epoch": 0.9611777041004279, - "grad_norm": 0.0, - "learning_rate": 7.89450371944167e-08, - "loss": 0.8081, - "step": 33919 - }, - { - "epoch": 0.9612060415426904, - "grad_norm": 0.0, - "learning_rate": 7.88299826273542e-08, - "loss": 0.8008, - "step": 33920 - }, - { - "epoch": 0.9612343789849528, - "grad_norm": 0.0, - "learning_rate": 7.871501162999173e-08, - "loss": 0.7716, - "step": 33921 - }, - { - "epoch": 0.9612627164272153, - "grad_norm": 0.0, - "learning_rate": 7.860012420329633e-08, - "loss": 0.8246, - "step": 33922 - }, - { - "epoch": 0.9612910538694778, - "grad_norm": 0.0, - "learning_rate": 7.848532034823608e-08, - "loss": 0.7676, - "step": 33923 - }, - { - "epoch": 0.9613193913117402, - "grad_norm": 0.0, - "learning_rate": 7.837060006577801e-08, - "loss": 0.8119, - "step": 33924 - }, - { - "epoch": 0.9613477287540027, - "grad_norm": 0.0, - "learning_rate": 7.825596335688912e-08, - "loss": 0.8937, - "step": 33925 - }, - { - "epoch": 0.9613760661962651, - "grad_norm": 0.0, - "learning_rate": 7.814141022253529e-08, - "loss": 0.8057, - "step": 33926 - }, - { - "epoch": 0.9614044036385276, - "grad_norm": 0.0, - "learning_rate": 7.802694066368022e-08, - "loss": 0.9024, - "step": 33927 - }, - { - "epoch": 0.96143274108079, - "grad_norm": 0.0, - "learning_rate": 7.791255468128755e-08, - "loss": 0.8156, - "step": 33928 - }, - { - "epoch": 0.9614610785230525, - "grad_norm": 0.0, - "learning_rate": 7.779825227632321e-08, - "loss": 0.8457, - "step": 33929 - }, - { - "epoch": 0.961489415965315, - "grad_norm": 0.0, - "learning_rate": 7.768403344974862e-08, - "loss": 0.8333, - "step": 33930 - }, - { - "epoch": 0.9615177534075774, - "grad_norm": 0.0, - "learning_rate": 7.756989820252525e-08, - "loss": 0.7463, - "step": 33931 - }, - { - "epoch": 0.9615460908498399, - "grad_norm": 0.0, - "learning_rate": 7.745584653561566e-08, - "loss": 0.7404, - "step": 33932 - }, - { - "epoch": 0.9615744282921024, - "grad_norm": 0.0, - "learning_rate": 7.73418784499802e-08, - "loss": 0.7204, - "step": 33933 - }, - { - "epoch": 0.9616027657343648, - "grad_norm": 0.0, - "learning_rate": 7.722799394657921e-08, - "loss": 0.8489, - "step": 33934 - }, - { - "epoch": 0.9616311031766273, - "grad_norm": 0.0, - "learning_rate": 7.711419302637079e-08, - "loss": 0.7831, - "step": 33935 - }, - { - "epoch": 0.9616594406188897, - "grad_norm": 0.0, - "learning_rate": 7.700047569031533e-08, - "loss": 0.7292, - "step": 33936 - }, - { - "epoch": 0.9616877780611522, - "grad_norm": 0.0, - "learning_rate": 7.688684193936868e-08, - "loss": 0.7574, - "step": 33937 - }, - { - "epoch": 0.9617161155034146, - "grad_norm": 0.0, - "learning_rate": 7.6773291774489e-08, - "loss": 0.7774, - "step": 33938 - }, - { - "epoch": 0.9617444529456771, - "grad_norm": 0.0, - "learning_rate": 7.665982519663329e-08, - "loss": 0.7814, - "step": 33939 - }, - { - "epoch": 0.9617727903879396, - "grad_norm": 0.0, - "learning_rate": 7.654644220675744e-08, - "loss": 0.7346, - "step": 33940 - }, - { - "epoch": 0.961801127830202, - "grad_norm": 0.0, - "learning_rate": 7.643314280581404e-08, - "loss": 0.7434, - "step": 33941 - }, - { - "epoch": 0.9618294652724645, - "grad_norm": 0.0, - "learning_rate": 7.631992699476009e-08, - "loss": 0.7592, - "step": 33942 - }, - { - "epoch": 0.961857802714727, - "grad_norm": 0.0, - "learning_rate": 7.620679477454929e-08, - "loss": 0.8746, - "step": 33943 - }, - { - "epoch": 0.9618861401569895, - "grad_norm": 0.0, - "learning_rate": 7.609374614613307e-08, - "loss": 0.8032, - "step": 33944 - }, - { - "epoch": 0.9619144775992519, - "grad_norm": 0.0, - "learning_rate": 7.598078111046514e-08, - "loss": 0.7911, - "step": 33945 - }, - { - "epoch": 0.9619428150415144, - "grad_norm": 0.0, - "learning_rate": 7.586789966849473e-08, - "loss": 0.7153, - "step": 33946 - }, - { - "epoch": 0.9619711524837768, - "grad_norm": 0.0, - "learning_rate": 7.575510182117551e-08, - "loss": 0.8218, - "step": 33947 - }, - { - "epoch": 0.9619994899260392, - "grad_norm": 0.0, - "learning_rate": 7.564238756945563e-08, - "loss": 0.7654, - "step": 33948 - }, - { - "epoch": 0.9620278273683017, - "grad_norm": 0.0, - "learning_rate": 7.552975691428655e-08, - "loss": 0.8331, - "step": 33949 - }, - { - "epoch": 0.9620561648105642, - "grad_norm": 0.0, - "learning_rate": 7.541720985661416e-08, - "loss": 0.8679, - "step": 33950 - }, - { - "epoch": 0.9620845022528267, - "grad_norm": 0.0, - "learning_rate": 7.530474639738883e-08, - "loss": 0.7922, - "step": 33951 - }, - { - "epoch": 0.9621128396950891, - "grad_norm": 0.0, - "learning_rate": 7.519236653755757e-08, - "loss": 0.8246, - "step": 33952 - }, - { - "epoch": 0.9621411771373516, - "grad_norm": 0.0, - "learning_rate": 7.508007027806519e-08, - "loss": 0.8269, - "step": 33953 - }, - { - "epoch": 0.9621695145796141, - "grad_norm": 0.0, - "learning_rate": 7.49678576198587e-08, - "loss": 0.8055, - "step": 33954 - }, - { - "epoch": 0.9621978520218765, - "grad_norm": 0.0, - "learning_rate": 7.485572856388512e-08, - "loss": 0.864, - "step": 33955 - }, - { - "epoch": 0.962226189464139, - "grad_norm": 0.0, - "learning_rate": 7.474368311108593e-08, - "loss": 0.8069, - "step": 33956 - }, - { - "epoch": 0.9622545269064015, - "grad_norm": 0.0, - "learning_rate": 7.463172126240703e-08, - "loss": 0.8686, - "step": 33957 - }, - { - "epoch": 0.9622828643486638, - "grad_norm": 0.0, - "learning_rate": 7.451984301879101e-08, - "loss": 0.9261, - "step": 33958 - }, - { - "epoch": 0.9623112017909263, - "grad_norm": 0.0, - "learning_rate": 7.440804838117932e-08, - "loss": 0.7992, - "step": 33959 - }, - { - "epoch": 0.9623395392331888, - "grad_norm": 0.0, - "learning_rate": 7.429633735051566e-08, - "loss": 0.8956, - "step": 33960 - }, - { - "epoch": 0.9623678766754513, - "grad_norm": 0.0, - "learning_rate": 7.418470992773818e-08, - "loss": 0.7847, - "step": 33961 - }, - { - "epoch": 0.9623962141177137, - "grad_norm": 0.0, - "learning_rate": 7.407316611378945e-08, - "loss": 0.7571, - "step": 33962 - }, - { - "epoch": 0.9624245515599762, - "grad_norm": 0.0, - "learning_rate": 7.396170590960982e-08, - "loss": 0.8843, - "step": 33963 - }, - { - "epoch": 0.9624528890022387, - "grad_norm": 0.0, - "learning_rate": 7.385032931613412e-08, - "loss": 0.682, - "step": 33964 - }, - { - "epoch": 0.9624812264445011, - "grad_norm": 0.0, - "learning_rate": 7.37390363343049e-08, - "loss": 0.861, - "step": 33965 - }, - { - "epoch": 0.9625095638867636, - "grad_norm": 0.0, - "learning_rate": 7.362782696505699e-08, - "loss": 0.7391, - "step": 33966 - }, - { - "epoch": 0.9625379013290261, - "grad_norm": 0.0, - "learning_rate": 7.351670120932852e-08, - "loss": 0.8348, - "step": 33967 - }, - { - "epoch": 0.9625662387712886, - "grad_norm": 0.0, - "learning_rate": 7.34056590680543e-08, - "loss": 0.7409, - "step": 33968 - }, - { - "epoch": 0.9625945762135509, - "grad_norm": 0.0, - "learning_rate": 7.329470054217024e-08, - "loss": 0.8047, - "step": 33969 - }, - { - "epoch": 0.9626229136558134, - "grad_norm": 0.0, - "learning_rate": 7.318382563261228e-08, - "loss": 0.9362, - "step": 33970 - }, - { - "epoch": 0.9626512510980759, - "grad_norm": 0.0, - "learning_rate": 7.307303434031187e-08, - "loss": 0.7658, - "step": 33971 - }, - { - "epoch": 0.9626795885403383, - "grad_norm": 0.0, - "learning_rate": 7.296232666620496e-08, - "loss": 0.8639, - "step": 33972 - }, - { - "epoch": 0.9627079259826008, - "grad_norm": 0.0, - "learning_rate": 7.285170261122187e-08, - "loss": 0.8254, - "step": 33973 - }, - { - "epoch": 0.9627362634248633, - "grad_norm": 0.0, - "learning_rate": 7.274116217629524e-08, - "loss": 0.8107, - "step": 33974 - }, - { - "epoch": 0.9627646008671258, - "grad_norm": 0.0, - "learning_rate": 7.26307053623565e-08, - "loss": 0.8484, - "step": 33975 - }, - { - "epoch": 0.9627929383093882, - "grad_norm": 0.0, - "learning_rate": 7.252033217033494e-08, - "loss": 0.8327, - "step": 33976 - }, - { - "epoch": 0.9628212757516507, - "grad_norm": 0.0, - "learning_rate": 7.241004260116202e-08, - "loss": 0.8601, - "step": 33977 - }, - { - "epoch": 0.9628496131939132, - "grad_norm": 0.0, - "learning_rate": 7.22998366557659e-08, - "loss": 0.8094, - "step": 33978 - }, - { - "epoch": 0.9628779506361755, - "grad_norm": 0.0, - "learning_rate": 7.218971433507471e-08, - "loss": 0.8453, - "step": 33979 - }, - { - "epoch": 0.962906288078438, - "grad_norm": 0.0, - "learning_rate": 7.20796756400155e-08, - "loss": 0.8594, - "step": 33980 - }, - { - "epoch": 0.9629346255207005, - "grad_norm": 0.0, - "learning_rate": 7.19697205715153e-08, - "loss": 0.7394, - "step": 33981 - }, - { - "epoch": 0.9629629629629629, - "grad_norm": 0.0, - "learning_rate": 7.185984913050225e-08, - "loss": 0.684, - "step": 33982 - }, - { - "epoch": 0.9629913004052254, - "grad_norm": 0.0, - "learning_rate": 7.175006131789897e-08, - "loss": 0.7591, - "step": 33983 - }, - { - "epoch": 0.9630196378474879, - "grad_norm": 0.0, - "learning_rate": 7.164035713463358e-08, - "loss": 0.7263, - "step": 33984 - }, - { - "epoch": 0.9630479752897504, - "grad_norm": 0.0, - "learning_rate": 7.153073658162646e-08, - "loss": 0.8465, - "step": 33985 - }, - { - "epoch": 0.9630763127320128, - "grad_norm": 0.0, - "learning_rate": 7.142119965980465e-08, - "loss": 0.7481, - "step": 33986 - }, - { - "epoch": 0.9631046501742753, - "grad_norm": 0.0, - "learning_rate": 7.131174637008742e-08, - "loss": 0.8961, - "step": 33987 - }, - { - "epoch": 0.9631329876165378, - "grad_norm": 0.0, - "learning_rate": 7.120237671339847e-08, - "loss": 0.8419, - "step": 33988 - }, - { - "epoch": 0.9631613250588001, - "grad_norm": 0.0, - "learning_rate": 7.109309069065928e-08, - "loss": 0.7736, - "step": 33989 - }, - { - "epoch": 0.9631896625010626, - "grad_norm": 0.0, - "learning_rate": 7.098388830279024e-08, - "loss": 0.8225, - "step": 33990 - }, - { - "epoch": 0.9632179999433251, - "grad_norm": 0.0, - "learning_rate": 7.08747695507106e-08, - "loss": 0.8638, - "step": 33991 - }, - { - "epoch": 0.9632463373855876, - "grad_norm": 0.0, - "learning_rate": 7.076573443533963e-08, - "loss": 0.7143, - "step": 33992 - }, - { - "epoch": 0.96327467482785, - "grad_norm": 0.0, - "learning_rate": 7.065678295759659e-08, - "loss": 0.8029, - "step": 33993 - }, - { - "epoch": 0.9633030122701125, - "grad_norm": 0.0, - "learning_rate": 7.054791511839853e-08, - "loss": 0.8159, - "step": 33994 - }, - { - "epoch": 0.963331349712375, - "grad_norm": 0.0, - "learning_rate": 7.04391309186614e-08, - "loss": 0.7871, - "step": 33995 - }, - { - "epoch": 0.9633596871546374, - "grad_norm": 0.0, - "learning_rate": 7.033043035930442e-08, - "loss": 0.7246, - "step": 33996 - }, - { - "epoch": 0.9633880245968999, - "grad_norm": 0.0, - "learning_rate": 7.022181344124024e-08, - "loss": 0.8115, - "step": 33997 - }, - { - "epoch": 0.9634163620391624, - "grad_norm": 0.0, - "learning_rate": 7.011328016538588e-08, - "loss": 0.8004, - "step": 33998 - }, - { - "epoch": 0.9634446994814249, - "grad_norm": 0.0, - "learning_rate": 7.000483053265506e-08, - "loss": 0.8565, - "step": 33999 - }, - { - "epoch": 0.9634730369236872, - "grad_norm": 0.0, - "learning_rate": 6.989646454396037e-08, - "loss": 0.7903, - "step": 34000 - }, - { - "epoch": 0.9635013743659497, - "grad_norm": 0.0, - "learning_rate": 6.978818220021444e-08, - "loss": 0.8174, - "step": 34001 - }, - { - "epoch": 0.9635297118082122, - "grad_norm": 0.0, - "learning_rate": 6.967998350233096e-08, - "loss": 0.7314, - "step": 34002 - }, - { - "epoch": 0.9635580492504746, - "grad_norm": 0.0, - "learning_rate": 6.957186845122032e-08, - "loss": 0.765, - "step": 34003 - }, - { - "epoch": 0.9635863866927371, - "grad_norm": 0.0, - "learning_rate": 6.946383704779403e-08, - "loss": 0.7782, - "step": 34004 - }, - { - "epoch": 0.9636147241349996, - "grad_norm": 0.0, - "learning_rate": 6.935588929296134e-08, - "loss": 0.8618, - "step": 34005 - }, - { - "epoch": 0.963643061577262, - "grad_norm": 0.0, - "learning_rate": 6.924802518763152e-08, - "loss": 0.7805, - "step": 34006 - }, - { - "epoch": 0.9636713990195245, - "grad_norm": 0.0, - "learning_rate": 6.914024473271274e-08, - "loss": 0.757, - "step": 34007 - }, - { - "epoch": 0.963699736461787, - "grad_norm": 0.0, - "learning_rate": 6.903254792911318e-08, - "loss": 0.7573, - "step": 34008 - }, - { - "epoch": 0.9637280739040495, - "grad_norm": 0.0, - "learning_rate": 6.892493477774098e-08, - "loss": 0.8697, - "step": 34009 - }, - { - "epoch": 0.9637564113463118, - "grad_norm": 0.0, - "learning_rate": 6.881740527950209e-08, - "loss": 0.7452, - "step": 34010 - }, - { - "epoch": 0.9637847487885743, - "grad_norm": 0.0, - "learning_rate": 6.870995943530134e-08, - "loss": 0.8108, - "step": 34011 - }, - { - "epoch": 0.9638130862308368, - "grad_norm": 0.0, - "learning_rate": 6.860259724604468e-08, - "loss": 0.7869, - "step": 34012 - }, - { - "epoch": 0.9638414236730992, - "grad_norm": 0.0, - "learning_rate": 6.849531871263692e-08, - "loss": 0.881, - "step": 34013 - }, - { - "epoch": 0.9638697611153617, - "grad_norm": 0.0, - "learning_rate": 6.838812383597959e-08, - "loss": 0.8519, - "step": 34014 - }, - { - "epoch": 0.9638980985576242, - "grad_norm": 0.0, - "learning_rate": 6.828101261697862e-08, - "loss": 0.8247, - "step": 34015 - }, - { - "epoch": 0.9639264359998867, - "grad_norm": 0.0, - "learning_rate": 6.817398505653439e-08, - "loss": 0.8479, - "step": 34016 - }, - { - "epoch": 0.9639547734421491, - "grad_norm": 0.0, - "learning_rate": 6.80670411555484e-08, - "loss": 0.7916, - "step": 34017 - }, - { - "epoch": 0.9639831108844116, - "grad_norm": 0.0, - "learning_rate": 6.796018091492219e-08, - "loss": 0.7773, - "step": 34018 - }, - { - "epoch": 0.9640114483266741, - "grad_norm": 0.0, - "learning_rate": 6.785340433555499e-08, - "loss": 0.8961, - "step": 34019 - }, - { - "epoch": 0.9640397857689365, - "grad_norm": 0.0, - "learning_rate": 6.77467114183472e-08, - "loss": 0.8469, - "step": 34020 - }, - { - "epoch": 0.964068123211199, - "grad_norm": 0.0, - "learning_rate": 6.764010216419703e-08, - "loss": 0.8163, - "step": 34021 - }, - { - "epoch": 0.9640964606534614, - "grad_norm": 0.0, - "learning_rate": 6.753357657400261e-08, - "loss": 0.742, - "step": 34022 - }, - { - "epoch": 0.9641247980957239, - "grad_norm": 0.0, - "learning_rate": 6.742713464866102e-08, - "loss": 0.7387, - "step": 34023 - }, - { - "epoch": 0.9641531355379863, - "grad_norm": 0.0, - "learning_rate": 6.73207763890693e-08, - "loss": 0.8072, - "step": 34024 - }, - { - "epoch": 0.9641814729802488, - "grad_norm": 0.0, - "learning_rate": 6.721450179612232e-08, - "loss": 0.8047, - "step": 34025 - }, - { - "epoch": 0.9642098104225113, - "grad_norm": 0.0, - "learning_rate": 6.710831087071712e-08, - "loss": 0.8548, - "step": 34026 - }, - { - "epoch": 0.9642381478647737, - "grad_norm": 0.0, - "learning_rate": 6.700220361374632e-08, - "loss": 0.7459, - "step": 34027 - }, - { - "epoch": 0.9642664853070362, - "grad_norm": 0.0, - "learning_rate": 6.689618002610587e-08, - "loss": 0.893, - "step": 34028 - }, - { - "epoch": 0.9642948227492987, - "grad_norm": 0.0, - "learning_rate": 6.679024010868617e-08, - "loss": 0.8166, - "step": 34029 - }, - { - "epoch": 0.9643231601915611, - "grad_norm": 0.0, - "learning_rate": 6.668438386238096e-08, - "loss": 0.6645, - "step": 34030 - }, - { - "epoch": 0.9643514976338236, - "grad_norm": 0.0, - "learning_rate": 6.657861128808285e-08, - "loss": 0.8194, - "step": 34031 - }, - { - "epoch": 0.964379835076086, - "grad_norm": 0.0, - "learning_rate": 6.647292238668001e-08, - "loss": 0.894, - "step": 34032 - }, - { - "epoch": 0.9644081725183485, - "grad_norm": 0.0, - "learning_rate": 6.63673171590662e-08, - "loss": 0.7776, - "step": 34033 - }, - { - "epoch": 0.9644365099606109, - "grad_norm": 0.0, - "learning_rate": 6.626179560612733e-08, - "loss": 0.8374, - "step": 34034 - }, - { - "epoch": 0.9644648474028734, - "grad_norm": 0.0, - "learning_rate": 6.615635772875606e-08, - "loss": 0.8201, - "step": 34035 - }, - { - "epoch": 0.9644931848451359, - "grad_norm": 0.0, - "learning_rate": 6.605100352783833e-08, - "loss": 0.818, - "step": 34036 - }, - { - "epoch": 0.9645215222873983, - "grad_norm": 0.0, - "learning_rate": 6.594573300426121e-08, - "loss": 0.7487, - "step": 34037 - }, - { - "epoch": 0.9645498597296608, - "grad_norm": 0.0, - "learning_rate": 6.584054615891178e-08, - "loss": 0.8025, - "step": 34038 - }, - { - "epoch": 0.9645781971719233, - "grad_norm": 0.0, - "learning_rate": 6.573544299267709e-08, - "loss": 0.8145, - "step": 34039 - }, - { - "epoch": 0.9646065346141858, - "grad_norm": 0.0, - "learning_rate": 6.5630423506442e-08, - "loss": 0.8329, - "step": 34040 - }, - { - "epoch": 0.9646348720564482, - "grad_norm": 0.0, - "learning_rate": 6.552548770109024e-08, - "loss": 0.8817, - "step": 34041 - }, - { - "epoch": 0.9646632094987106, - "grad_norm": 0.0, - "learning_rate": 6.542063557750667e-08, - "loss": 0.6853, - "step": 34042 - }, - { - "epoch": 0.9646915469409731, - "grad_norm": 0.0, - "learning_rate": 6.531586713657389e-08, - "loss": 0.8479, - "step": 34043 - }, - { - "epoch": 0.9647198843832355, - "grad_norm": 0.0, - "learning_rate": 6.521118237917456e-08, - "loss": 0.8068, - "step": 34044 - }, - { - "epoch": 0.964748221825498, - "grad_norm": 0.0, - "learning_rate": 6.510658130619241e-08, - "loss": 0.8908, - "step": 34045 - }, - { - "epoch": 0.9647765592677605, - "grad_norm": 0.0, - "learning_rate": 6.50020639185045e-08, - "loss": 0.8531, - "step": 34046 - }, - { - "epoch": 0.964804896710023, - "grad_norm": 0.0, - "learning_rate": 6.489763021699458e-08, - "loss": 0.838, - "step": 34047 - }, - { - "epoch": 0.9648332341522854, - "grad_norm": 0.0, - "learning_rate": 6.479328020254084e-08, - "loss": 0.7028, - "step": 34048 - }, - { - "epoch": 0.9648615715945479, - "grad_norm": 0.0, - "learning_rate": 6.468901387602367e-08, - "loss": 0.8272, - "step": 34049 - }, - { - "epoch": 0.9648899090368104, - "grad_norm": 0.0, - "learning_rate": 6.458483123831905e-08, - "loss": 0.876, - "step": 34050 - }, - { - "epoch": 0.9649182464790728, - "grad_norm": 0.0, - "learning_rate": 6.448073229030626e-08, - "loss": 0.7401, - "step": 34051 - }, - { - "epoch": 0.9649465839213353, - "grad_norm": 0.0, - "learning_rate": 6.43767170328613e-08, - "loss": 0.8188, - "step": 34052 - }, - { - "epoch": 0.9649749213635977, - "grad_norm": 0.0, - "learning_rate": 6.427278546686122e-08, - "loss": 0.854, - "step": 34053 - }, - { - "epoch": 0.9650032588058601, - "grad_norm": 0.0, - "learning_rate": 6.416893759318089e-08, - "loss": 0.8368, - "step": 34054 - }, - { - "epoch": 0.9650315962481226, - "grad_norm": 0.0, - "learning_rate": 6.406517341269624e-08, - "loss": 0.7818, - "step": 34055 - }, - { - "epoch": 0.9650599336903851, - "grad_norm": 0.0, - "learning_rate": 6.396149292627885e-08, - "loss": 0.8087, - "step": 34056 - }, - { - "epoch": 0.9650882711326476, - "grad_norm": 0.0, - "learning_rate": 6.385789613480353e-08, - "loss": 0.8624, - "step": 34057 - }, - { - "epoch": 0.96511660857491, - "grad_norm": 0.0, - "learning_rate": 6.375438303914294e-08, - "loss": 0.9444, - "step": 34058 - }, - { - "epoch": 0.9651449460171725, - "grad_norm": 0.0, - "learning_rate": 6.365095364016971e-08, - "loss": 0.8283, - "step": 34059 - }, - { - "epoch": 0.965173283459435, - "grad_norm": 0.0, - "learning_rate": 6.354760793875314e-08, - "loss": 0.7924, - "step": 34060 - }, - { - "epoch": 0.9652016209016974, - "grad_norm": 0.0, - "learning_rate": 6.344434593576587e-08, - "loss": 0.8374, - "step": 34061 - }, - { - "epoch": 0.9652299583439599, - "grad_norm": 0.0, - "learning_rate": 6.33411676320761e-08, - "loss": 0.7756, - "step": 34062 - }, - { - "epoch": 0.9652582957862224, - "grad_norm": 0.0, - "learning_rate": 6.323807302855422e-08, - "loss": 0.7516, - "step": 34063 - }, - { - "epoch": 0.9652866332284848, - "grad_norm": 0.0, - "learning_rate": 6.313506212606734e-08, - "loss": 0.7996, - "step": 34064 - }, - { - "epoch": 0.9653149706707472, - "grad_norm": 0.0, - "learning_rate": 6.303213492548477e-08, - "loss": 0.8447, - "step": 34065 - }, - { - "epoch": 0.9653433081130097, - "grad_norm": 0.0, - "learning_rate": 6.292929142767135e-08, - "loss": 0.7932, - "step": 34066 - }, - { - "epoch": 0.9653716455552722, - "grad_norm": 0.0, - "learning_rate": 6.282653163349528e-08, - "loss": 0.8484, - "step": 34067 - }, - { - "epoch": 0.9653999829975346, - "grad_norm": 0.0, - "learning_rate": 6.272385554382143e-08, - "loss": 0.8635, - "step": 34068 - }, - { - "epoch": 0.9654283204397971, - "grad_norm": 0.0, - "learning_rate": 6.262126315951355e-08, - "loss": 0.7771, - "step": 34069 - }, - { - "epoch": 0.9654566578820596, - "grad_norm": 0.0, - "learning_rate": 6.251875448143763e-08, - "loss": 0.8418, - "step": 34070 - }, - { - "epoch": 0.9654849953243221, - "grad_norm": 0.0, - "learning_rate": 6.241632951045629e-08, - "loss": 0.7505, - "step": 34071 - }, - { - "epoch": 0.9655133327665845, - "grad_norm": 0.0, - "learning_rate": 6.231398824743218e-08, - "loss": 0.8371, - "step": 34072 - }, - { - "epoch": 0.965541670208847, - "grad_norm": 0.0, - "learning_rate": 6.221173069322905e-08, - "loss": 0.7922, - "step": 34073 - }, - { - "epoch": 0.9655700076511095, - "grad_norm": 0.0, - "learning_rate": 6.210955684870512e-08, - "loss": 0.8297, - "step": 34074 - }, - { - "epoch": 0.9655983450933718, - "grad_norm": 0.0, - "learning_rate": 6.200746671472413e-08, - "loss": 0.7281, - "step": 34075 - }, - { - "epoch": 0.9656266825356343, - "grad_norm": 0.0, - "learning_rate": 6.190546029214428e-08, - "loss": 0.9131, - "step": 34076 - }, - { - "epoch": 0.9656550199778968, - "grad_norm": 0.0, - "learning_rate": 6.18035375818249e-08, - "loss": 0.7802, - "step": 34077 - }, - { - "epoch": 0.9656833574201592, - "grad_norm": 0.0, - "learning_rate": 6.170169858462416e-08, - "loss": 0.876, - "step": 34078 - }, - { - "epoch": 0.9657116948624217, - "grad_norm": 0.0, - "learning_rate": 6.15999433014014e-08, - "loss": 0.8361, - "step": 34079 - }, - { - "epoch": 0.9657400323046842, - "grad_norm": 0.0, - "learning_rate": 6.149827173301259e-08, - "loss": 0.7874, - "step": 34080 - }, - { - "epoch": 0.9657683697469467, - "grad_norm": 0.0, - "learning_rate": 6.139668388031484e-08, - "loss": 0.8683, - "step": 34081 - }, - { - "epoch": 0.9657967071892091, - "grad_norm": 0.0, - "learning_rate": 6.129517974416299e-08, - "loss": 0.8124, - "step": 34082 - }, - { - "epoch": 0.9658250446314716, - "grad_norm": 0.0, - "learning_rate": 6.119375932541194e-08, - "loss": 0.7239, - "step": 34083 - }, - { - "epoch": 0.9658533820737341, - "grad_norm": 0.0, - "learning_rate": 6.109242262491655e-08, - "loss": 0.827, - "step": 34084 - }, - { - "epoch": 0.9658817195159964, - "grad_norm": 0.0, - "learning_rate": 6.099116964353058e-08, - "loss": 0.7387, - "step": 34085 - }, - { - "epoch": 0.9659100569582589, - "grad_norm": 0.0, - "learning_rate": 6.08900003821078e-08, - "loss": 0.8368, - "step": 34086 - }, - { - "epoch": 0.9659383944005214, - "grad_norm": 0.0, - "learning_rate": 6.078891484149863e-08, - "loss": 0.767, - "step": 34087 - }, - { - "epoch": 0.9659667318427839, - "grad_norm": 0.0, - "learning_rate": 6.068791302255462e-08, - "loss": 0.8099, - "step": 34088 - }, - { - "epoch": 0.9659950692850463, - "grad_norm": 0.0, - "learning_rate": 6.058699492612841e-08, - "loss": 0.8061, - "step": 34089 - }, - { - "epoch": 0.9660234067273088, - "grad_norm": 0.0, - "learning_rate": 6.048616055306822e-08, - "loss": 0.8774, - "step": 34090 - }, - { - "epoch": 0.9660517441695713, - "grad_norm": 0.0, - "learning_rate": 6.038540990422448e-08, - "loss": 0.7394, - "step": 34091 - }, - { - "epoch": 0.9660800816118337, - "grad_norm": 0.0, - "learning_rate": 6.02847429804454e-08, - "loss": 0.7355, - "step": 34092 - }, - { - "epoch": 0.9661084190540962, - "grad_norm": 0.0, - "learning_rate": 6.018415978257808e-08, - "loss": 0.8275, - "step": 34093 - }, - { - "epoch": 0.9661367564963587, - "grad_norm": 0.0, - "learning_rate": 6.008366031147184e-08, - "loss": 0.788, - "step": 34094 - }, - { - "epoch": 0.966165093938621, - "grad_norm": 0.0, - "learning_rate": 5.998324456797044e-08, - "loss": 0.8389, - "step": 34095 - }, - { - "epoch": 0.9661934313808835, - "grad_norm": 0.0, - "learning_rate": 5.988291255292211e-08, - "loss": 0.7691, - "step": 34096 - }, - { - "epoch": 0.966221768823146, - "grad_norm": 0.0, - "learning_rate": 5.978266426717171e-08, - "loss": 0.7357, - "step": 34097 - }, - { - "epoch": 0.9662501062654085, - "grad_norm": 0.0, - "learning_rate": 5.968249971156193e-08, - "loss": 0.7601, - "step": 34098 - }, - { - "epoch": 0.9662784437076709, - "grad_norm": 0.0, - "learning_rate": 5.958241888693872e-08, - "loss": 0.7629, - "step": 34099 - }, - { - "epoch": 0.9663067811499334, - "grad_norm": 0.0, - "learning_rate": 5.948242179414365e-08, - "loss": 0.7767, - "step": 34100 - }, - { - "epoch": 0.9663351185921959, - "grad_norm": 0.0, - "learning_rate": 5.9382508434020495e-08, - "loss": 0.7826, - "step": 34101 - }, - { - "epoch": 0.9663634560344583, - "grad_norm": 0.0, - "learning_rate": 5.9282678807408566e-08, - "loss": 0.7945, - "step": 34102 - }, - { - "epoch": 0.9663917934767208, - "grad_norm": 0.0, - "learning_rate": 5.9182932915150536e-08, - "loss": 0.7129, - "step": 34103 - }, - { - "epoch": 0.9664201309189833, - "grad_norm": 0.0, - "learning_rate": 5.9083270758085733e-08, - "loss": 0.7967, - "step": 34104 - }, - { - "epoch": 0.9664484683612458, - "grad_norm": 0.0, - "learning_rate": 5.898369233705459e-08, - "loss": 0.7272, - "step": 34105 - }, - { - "epoch": 0.9664768058035081, - "grad_norm": 0.0, - "learning_rate": 5.8884197652895325e-08, - "loss": 0.7454, - "step": 34106 - }, - { - "epoch": 0.9665051432457706, - "grad_norm": 0.0, - "learning_rate": 5.878478670644616e-08, - "loss": 0.8106, - "step": 34107 - }, - { - "epoch": 0.9665334806880331, - "grad_norm": 0.0, - "learning_rate": 5.8685459498543095e-08, - "loss": 0.7702, - "step": 34108 - }, - { - "epoch": 0.9665618181302955, - "grad_norm": 0.0, - "learning_rate": 5.858621603002434e-08, - "loss": 0.7729, - "step": 34109 - }, - { - "epoch": 0.966590155572558, - "grad_norm": 0.0, - "learning_rate": 5.848705630172591e-08, - "loss": 0.8131, - "step": 34110 - }, - { - "epoch": 0.9666184930148205, - "grad_norm": 0.0, - "learning_rate": 5.8387980314482674e-08, - "loss": 0.775, - "step": 34111 - }, - { - "epoch": 0.966646830457083, - "grad_norm": 0.0, - "learning_rate": 5.8288988069129525e-08, - "loss": 0.7579, - "step": 34112 - }, - { - "epoch": 0.9666751678993454, - "grad_norm": 0.0, - "learning_rate": 5.819007956650024e-08, - "loss": 0.776, - "step": 34113 - }, - { - "epoch": 0.9667035053416079, - "grad_norm": 0.0, - "learning_rate": 5.809125480742639e-08, - "loss": 0.7791, - "step": 34114 - }, - { - "epoch": 0.9667318427838704, - "grad_norm": 0.0, - "learning_rate": 5.799251379274284e-08, - "loss": 0.7474, - "step": 34115 - }, - { - "epoch": 0.9667601802261327, - "grad_norm": 0.0, - "learning_rate": 5.7893856523280056e-08, - "loss": 0.8081, - "step": 34116 - }, - { - "epoch": 0.9667885176683952, - "grad_norm": 0.0, - "learning_rate": 5.7795282999869587e-08, - "loss": 0.8531, - "step": 34117 - }, - { - "epoch": 0.9668168551106577, - "grad_norm": 0.0, - "learning_rate": 5.7696793223340764e-08, - "loss": 0.8298, - "step": 34118 - }, - { - "epoch": 0.9668451925529201, - "grad_norm": 0.0, - "learning_rate": 5.759838719452404e-08, - "loss": 0.8799, - "step": 34119 - }, - { - "epoch": 0.9668735299951826, - "grad_norm": 0.0, - "learning_rate": 5.7500064914247645e-08, - "loss": 0.8882, - "step": 34120 - }, - { - "epoch": 0.9669018674374451, - "grad_norm": 0.0, - "learning_rate": 5.740182638334091e-08, - "loss": 0.7037, - "step": 34121 - }, - { - "epoch": 0.9669302048797076, - "grad_norm": 0.0, - "learning_rate": 5.730367160263095e-08, - "loss": 0.7829, - "step": 34122 - }, - { - "epoch": 0.96695854232197, - "grad_norm": 0.0, - "learning_rate": 5.7205600572943774e-08, - "loss": 0.8006, - "step": 34123 - }, - { - "epoch": 0.9669868797642325, - "grad_norm": 0.0, - "learning_rate": 5.710761329510539e-08, - "loss": 0.7653, - "step": 34124 - }, - { - "epoch": 0.967015217206495, - "grad_norm": 0.0, - "learning_rate": 5.700970976994291e-08, - "loss": 0.8198, - "step": 34125 - }, - { - "epoch": 0.9670435546487574, - "grad_norm": 0.0, - "learning_rate": 5.691188999827901e-08, - "loss": 0.7614, - "step": 34126 - }, - { - "epoch": 0.9670718920910198, - "grad_norm": 0.0, - "learning_rate": 5.6814153980938593e-08, - "loss": 0.7936, - "step": 34127 - }, - { - "epoch": 0.9671002295332823, - "grad_norm": 0.0, - "learning_rate": 5.6716501718745434e-08, - "loss": 0.6608, - "step": 34128 - }, - { - "epoch": 0.9671285669755448, - "grad_norm": 0.0, - "learning_rate": 5.661893321252221e-08, - "loss": 0.8125, - "step": 34129 - }, - { - "epoch": 0.9671569044178072, - "grad_norm": 0.0, - "learning_rate": 5.652144846308827e-08, - "loss": 0.6989, - "step": 34130 - }, - { - "epoch": 0.9671852418600697, - "grad_norm": 0.0, - "learning_rate": 5.6424047471268507e-08, - "loss": 0.7764, - "step": 34131 - }, - { - "epoch": 0.9672135793023322, - "grad_norm": 0.0, - "learning_rate": 5.6326730237880043e-08, - "loss": 0.7264, - "step": 34132 - }, - { - "epoch": 0.9672419167445946, - "grad_norm": 0.0, - "learning_rate": 5.622949676374445e-08, - "loss": 0.7917, - "step": 34133 - }, - { - "epoch": 0.9672702541868571, - "grad_norm": 0.0, - "learning_rate": 5.6132347049679955e-08, - "loss": 0.7803, - "step": 34134 - }, - { - "epoch": 0.9672985916291196, - "grad_norm": 0.0, - "learning_rate": 5.603528109650591e-08, - "loss": 0.8512, - "step": 34135 - }, - { - "epoch": 0.9673269290713821, - "grad_norm": 0.0, - "learning_rate": 5.593829890503832e-08, - "loss": 0.7644, - "step": 34136 - }, - { - "epoch": 0.9673552665136445, - "grad_norm": 0.0, - "learning_rate": 5.584140047609654e-08, - "loss": 0.7991, - "step": 34137 - }, - { - "epoch": 0.967383603955907, - "grad_norm": 0.0, - "learning_rate": 5.574458581049436e-08, - "loss": 0.7426, - "step": 34138 - }, - { - "epoch": 0.9674119413981694, - "grad_norm": 0.0, - "learning_rate": 5.5647854909047786e-08, - "loss": 0.7732, - "step": 34139 - }, - { - "epoch": 0.9674402788404318, - "grad_norm": 0.0, - "learning_rate": 5.555120777257284e-08, - "loss": 0.7265, - "step": 34140 - }, - { - "epoch": 0.9674686162826943, - "grad_norm": 0.0, - "learning_rate": 5.5454644401883307e-08, - "loss": 0.7893, - "step": 34141 - }, - { - "epoch": 0.9674969537249568, - "grad_norm": 0.0, - "learning_rate": 5.535816479779188e-08, - "loss": 0.8078, - "step": 34142 - }, - { - "epoch": 0.9675252911672192, - "grad_norm": 0.0, - "learning_rate": 5.526176896111013e-08, - "loss": 0.7804, - "step": 34143 - }, - { - "epoch": 0.9675536286094817, - "grad_norm": 0.0, - "learning_rate": 5.5165456892652955e-08, - "loss": 0.7966, - "step": 34144 - }, - { - "epoch": 0.9675819660517442, - "grad_norm": 0.0, - "learning_rate": 5.506922859322972e-08, - "loss": 0.8063, - "step": 34145 - }, - { - "epoch": 0.9676103034940067, - "grad_norm": 0.0, - "learning_rate": 5.497308406365087e-08, - "loss": 0.7552, - "step": 34146 - }, - { - "epoch": 0.9676386409362691, - "grad_norm": 0.0, - "learning_rate": 5.4877023304726885e-08, - "loss": 0.6937, - "step": 34147 - }, - { - "epoch": 0.9676669783785316, - "grad_norm": 0.0, - "learning_rate": 5.4781046317267103e-08, - "loss": 0.8536, - "step": 34148 - }, - { - "epoch": 0.967695315820794, - "grad_norm": 0.0, - "learning_rate": 5.468515310207867e-08, - "loss": 0.8423, - "step": 34149 - }, - { - "epoch": 0.9677236532630564, - "grad_norm": 0.0, - "learning_rate": 5.458934365997093e-08, - "loss": 0.8633, - "step": 34150 - }, - { - "epoch": 0.9677519907053189, - "grad_norm": 0.0, - "learning_rate": 5.449361799175101e-08, - "loss": 0.8203, - "step": 34151 - }, - { - "epoch": 0.9677803281475814, - "grad_norm": 0.0, - "learning_rate": 5.4397976098223834e-08, - "loss": 0.7646, - "step": 34152 - }, - { - "epoch": 0.9678086655898439, - "grad_norm": 0.0, - "learning_rate": 5.430241798019542e-08, - "loss": 0.7386, - "step": 34153 - }, - { - "epoch": 0.9678370030321063, - "grad_norm": 0.0, - "learning_rate": 5.420694363847068e-08, - "loss": 0.7814, - "step": 34154 - }, - { - "epoch": 0.9678653404743688, - "grad_norm": 0.0, - "learning_rate": 5.411155307385563e-08, - "loss": 0.8062, - "step": 34155 - }, - { - "epoch": 0.9678936779166313, - "grad_norm": 0.0, - "learning_rate": 5.4016246287150745e-08, - "loss": 0.6538, - "step": 34156 - }, - { - "epoch": 0.9679220153588937, - "grad_norm": 0.0, - "learning_rate": 5.392102327916093e-08, - "loss": 0.8068, - "step": 34157 - }, - { - "epoch": 0.9679503528011562, - "grad_norm": 0.0, - "learning_rate": 5.382588405068889e-08, - "loss": 0.7716, - "step": 34158 - }, - { - "epoch": 0.9679786902434186, - "grad_norm": 0.0, - "learning_rate": 5.373082860253287e-08, - "loss": 0.8419, - "step": 34159 - }, - { - "epoch": 0.9680070276856811, - "grad_norm": 0.0, - "learning_rate": 5.3635856935497775e-08, - "loss": 0.8443, - "step": 34160 - }, - { - "epoch": 0.9680353651279435, - "grad_norm": 0.0, - "learning_rate": 5.354096905037964e-08, - "loss": 0.7811, - "step": 34161 - }, - { - "epoch": 0.968063702570206, - "grad_norm": 0.0, - "learning_rate": 5.344616494797894e-08, - "loss": 0.8582, - "step": 34162 - }, - { - "epoch": 0.9680920400124685, - "grad_norm": 0.0, - "learning_rate": 5.3351444629096136e-08, - "loss": 0.7948, - "step": 34163 - }, - { - "epoch": 0.9681203774547309, - "grad_norm": 0.0, - "learning_rate": 5.3256808094527266e-08, - "loss": 0.8599, - "step": 34164 - }, - { - "epoch": 0.9681487148969934, - "grad_norm": 0.0, - "learning_rate": 5.316225534506947e-08, - "loss": 0.7778, - "step": 34165 - }, - { - "epoch": 0.9681770523392559, - "grad_norm": 0.0, - "learning_rate": 5.306778638151988e-08, - "loss": 0.7112, - "step": 34166 - }, - { - "epoch": 0.9682053897815183, - "grad_norm": 0.0, - "learning_rate": 5.297340120467453e-08, - "loss": 0.8128, - "step": 34167 - }, - { - "epoch": 0.9682337272237808, - "grad_norm": 0.0, - "learning_rate": 5.287909981532835e-08, - "loss": 0.8099, - "step": 34168 - }, - { - "epoch": 0.9682620646660433, - "grad_norm": 0.0, - "learning_rate": 5.2784882214274025e-08, - "loss": 0.8637, - "step": 34169 - }, - { - "epoch": 0.9682904021083057, - "grad_norm": 0.0, - "learning_rate": 5.269074840230648e-08, - "loss": 0.8098, - "step": 34170 - }, - { - "epoch": 0.9683187395505681, - "grad_norm": 0.0, - "learning_rate": 5.2596698380219525e-08, - "loss": 0.793, - "step": 34171 - }, - { - "epoch": 0.9683470769928306, - "grad_norm": 0.0, - "learning_rate": 5.250273214880475e-08, - "loss": 0.7939, - "step": 34172 - }, - { - "epoch": 0.9683754144350931, - "grad_norm": 0.0, - "learning_rate": 5.240884970885263e-08, - "loss": 0.8619, - "step": 34173 - }, - { - "epoch": 0.9684037518773555, - "grad_norm": 0.0, - "learning_rate": 5.2315051061154755e-08, - "loss": 0.7221, - "step": 34174 - }, - { - "epoch": 0.968432089319618, - "grad_norm": 0.0, - "learning_rate": 5.2221336206500494e-08, - "loss": 0.832, - "step": 34175 - }, - { - "epoch": 0.9684604267618805, - "grad_norm": 0.0, - "learning_rate": 5.212770514568144e-08, - "loss": 0.8309, - "step": 34176 - }, - { - "epoch": 0.968488764204143, - "grad_norm": 0.0, - "learning_rate": 5.203415787948363e-08, - "loss": 0.8388, - "step": 34177 - }, - { - "epoch": 0.9685171016464054, - "grad_norm": 0.0, - "learning_rate": 5.1940694408696425e-08, - "loss": 0.7867, - "step": 34178 - }, - { - "epoch": 0.9685454390886679, - "grad_norm": 0.0, - "learning_rate": 5.184731473410698e-08, - "loss": 0.9363, - "step": 34179 - }, - { - "epoch": 0.9685737765309304, - "grad_norm": 0.0, - "learning_rate": 5.1754018856501334e-08, - "loss": 0.8118, - "step": 34180 - }, - { - "epoch": 0.9686021139731927, - "grad_norm": 0.0, - "learning_rate": 5.166080677666663e-08, - "loss": 0.7752, - "step": 34181 - }, - { - "epoch": 0.9686304514154552, - "grad_norm": 0.0, - "learning_rate": 5.156767849538669e-08, - "loss": 0.8255, - "step": 34182 - }, - { - "epoch": 0.9686587888577177, - "grad_norm": 0.0, - "learning_rate": 5.1474634013446435e-08, - "loss": 0.7935, - "step": 34183 - }, - { - "epoch": 0.9686871262999802, - "grad_norm": 0.0, - "learning_rate": 5.13816733316308e-08, - "loss": 0.7714, - "step": 34184 - }, - { - "epoch": 0.9687154637422426, - "grad_norm": 0.0, - "learning_rate": 5.128879645072027e-08, - "loss": 0.861, - "step": 34185 - }, - { - "epoch": 0.9687438011845051, - "grad_norm": 0.0, - "learning_rate": 5.119600337149866e-08, - "loss": 0.8221, - "step": 34186 - }, - { - "epoch": 0.9687721386267676, - "grad_norm": 0.0, - "learning_rate": 5.110329409474757e-08, - "loss": 0.7983, - "step": 34187 - }, - { - "epoch": 0.96880047606903, - "grad_norm": 0.0, - "learning_rate": 5.101066862124859e-08, - "loss": 0.8587, - "step": 34188 - }, - { - "epoch": 0.9688288135112925, - "grad_norm": 0.0, - "learning_rate": 5.091812695178e-08, - "loss": 0.7656, - "step": 34189 - }, - { - "epoch": 0.968857150953555, - "grad_norm": 0.0, - "learning_rate": 5.0825669087123385e-08, - "loss": 0.7412, - "step": 34190 - }, - { - "epoch": 0.9688854883958173, - "grad_norm": 0.0, - "learning_rate": 5.073329502805591e-08, - "loss": 0.7268, - "step": 34191 - }, - { - "epoch": 0.9689138258380798, - "grad_norm": 0.0, - "learning_rate": 5.064100477535805e-08, - "loss": 0.7539, - "step": 34192 - }, - { - "epoch": 0.9689421632803423, - "grad_norm": 0.0, - "learning_rate": 5.054879832980364e-08, - "loss": 0.9056, - "step": 34193 - }, - { - "epoch": 0.9689705007226048, - "grad_norm": 0.0, - "learning_rate": 5.045667569217316e-08, - "loss": 0.7827, - "step": 34194 - }, - { - "epoch": 0.9689988381648672, - "grad_norm": 0.0, - "learning_rate": 5.036463686323934e-08, - "loss": 0.7937, - "step": 34195 - }, - { - "epoch": 0.9690271756071297, - "grad_norm": 0.0, - "learning_rate": 5.027268184377931e-08, - "loss": 0.8509, - "step": 34196 - }, - { - "epoch": 0.9690555130493922, - "grad_norm": 0.0, - "learning_rate": 5.018081063456803e-08, - "loss": 0.7776, - "step": 34197 - }, - { - "epoch": 0.9690838504916546, - "grad_norm": 0.0, - "learning_rate": 5.008902323637821e-08, - "loss": 0.7541, - "step": 34198 - }, - { - "epoch": 0.9691121879339171, - "grad_norm": 0.0, - "learning_rate": 4.999731964998256e-08, - "loss": 0.7135, - "step": 34199 - }, - { - "epoch": 0.9691405253761796, - "grad_norm": 0.0, - "learning_rate": 4.99056998761549e-08, - "loss": 0.7452, - "step": 34200 - }, - { - "epoch": 0.9691688628184421, - "grad_norm": 0.0, - "learning_rate": 4.9814163915666843e-08, - "loss": 0.7872, - "step": 34201 - }, - { - "epoch": 0.9691972002607044, - "grad_norm": 0.0, - "learning_rate": 4.972271176928778e-08, - "loss": 0.7982, - "step": 34202 - }, - { - "epoch": 0.9692255377029669, - "grad_norm": 0.0, - "learning_rate": 4.963134343779041e-08, - "loss": 0.8777, - "step": 34203 - }, - { - "epoch": 0.9692538751452294, - "grad_norm": 0.0, - "learning_rate": 4.954005892194191e-08, - "loss": 0.884, - "step": 34204 - }, - { - "epoch": 0.9692822125874918, - "grad_norm": 0.0, - "learning_rate": 4.9448858222513884e-08, - "loss": 0.7809, - "step": 34205 - }, - { - "epoch": 0.9693105500297543, - "grad_norm": 0.0, - "learning_rate": 4.935774134027238e-08, - "loss": 0.7448, - "step": 34206 - }, - { - "epoch": 0.9693388874720168, - "grad_norm": 0.0, - "learning_rate": 4.9266708275985675e-08, - "loss": 0.8746, - "step": 34207 - }, - { - "epoch": 0.9693672249142793, - "grad_norm": 0.0, - "learning_rate": 4.917575903042093e-08, - "loss": 0.7782, - "step": 34208 - }, - { - "epoch": 0.9693955623565417, - "grad_norm": 0.0, - "learning_rate": 4.9084893604344205e-08, - "loss": 0.8106, - "step": 34209 - }, - { - "epoch": 0.9694238997988042, - "grad_norm": 0.0, - "learning_rate": 4.899411199852044e-08, - "loss": 0.806, - "step": 34210 - }, - { - "epoch": 0.9694522372410667, - "grad_norm": 0.0, - "learning_rate": 4.890341421371458e-08, - "loss": 0.7378, - "step": 34211 - }, - { - "epoch": 0.969480574683329, - "grad_norm": 0.0, - "learning_rate": 4.881280025069046e-08, - "loss": 0.8186, - "step": 34212 - }, - { - "epoch": 0.9695089121255915, - "grad_norm": 0.0, - "learning_rate": 4.87222701102108e-08, - "loss": 0.8073, - "step": 34213 - }, - { - "epoch": 0.969537249567854, - "grad_norm": 0.0, - "learning_rate": 4.8631823793039436e-08, - "loss": 0.738, - "step": 34214 - }, - { - "epoch": 0.9695655870101164, - "grad_norm": 0.0, - "learning_rate": 4.8541461299936864e-08, - "loss": 0.9238, - "step": 34215 - }, - { - "epoch": 0.9695939244523789, - "grad_norm": 0.0, - "learning_rate": 4.8451182631665814e-08, - "loss": 0.689, - "step": 34216 - }, - { - "epoch": 0.9696222618946414, - "grad_norm": 0.0, - "learning_rate": 4.836098778898457e-08, - "loss": 0.7962, - "step": 34217 - }, - { - "epoch": 0.9696505993369039, - "grad_norm": 0.0, - "learning_rate": 4.827087677265585e-08, - "loss": 0.8631, - "step": 34218 - }, - { - "epoch": 0.9696789367791663, - "grad_norm": 0.0, - "learning_rate": 4.818084958343572e-08, - "loss": 0.7884, - "step": 34219 - }, - { - "epoch": 0.9697072742214288, - "grad_norm": 0.0, - "learning_rate": 4.8090906222084674e-08, - "loss": 0.8161, - "step": 34220 - }, - { - "epoch": 0.9697356116636913, - "grad_norm": 0.0, - "learning_rate": 4.8001046689358785e-08, - "loss": 0.7752, - "step": 34221 - }, - { - "epoch": 0.9697639491059536, - "grad_norm": 0.0, - "learning_rate": 4.7911270986016335e-08, - "loss": 0.7616, - "step": 34222 - }, - { - "epoch": 0.9697922865482161, - "grad_norm": 0.0, - "learning_rate": 4.7821579112812266e-08, - "loss": 0.7211, - "step": 34223 - }, - { - "epoch": 0.9698206239904786, - "grad_norm": 0.0, - "learning_rate": 4.7731971070503754e-08, - "loss": 0.832, - "step": 34224 - }, - { - "epoch": 0.9698489614327411, - "grad_norm": 0.0, - "learning_rate": 4.764244685984354e-08, - "loss": 0.8593, - "step": 34225 - }, - { - "epoch": 0.9698772988750035, - "grad_norm": 0.0, - "learning_rate": 4.755300648158656e-08, - "loss": 0.7968, - "step": 34226 - }, - { - "epoch": 0.969905636317266, - "grad_norm": 0.0, - "learning_rate": 4.7463649936486665e-08, - "loss": 0.7791, - "step": 34227 - }, - { - "epoch": 0.9699339737595285, - "grad_norm": 0.0, - "learning_rate": 4.7374377225296585e-08, - "loss": 0.7929, - "step": 34228 - }, - { - "epoch": 0.9699623112017909, - "grad_norm": 0.0, - "learning_rate": 4.728518834876683e-08, - "loss": 0.7919, - "step": 34229 - }, - { - "epoch": 0.9699906486440534, - "grad_norm": 0.0, - "learning_rate": 4.719608330765124e-08, - "loss": 0.8457, - "step": 34230 - }, - { - "epoch": 0.9700189860863159, - "grad_norm": 0.0, - "learning_rate": 4.710706210269811e-08, - "loss": 0.7922, - "step": 34231 - }, - { - "epoch": 0.9700473235285784, - "grad_norm": 0.0, - "learning_rate": 4.7018124734657944e-08, - "loss": 0.9264, - "step": 34232 - }, - { - "epoch": 0.9700756609708407, - "grad_norm": 0.0, - "learning_rate": 4.692927120428015e-08, - "loss": 0.7922, - "step": 34233 - }, - { - "epoch": 0.9701039984131032, - "grad_norm": 0.0, - "learning_rate": 4.684050151231412e-08, - "loss": 0.7718, - "step": 34234 - }, - { - "epoch": 0.9701323358553657, - "grad_norm": 0.0, - "learning_rate": 4.675181565950482e-08, - "loss": 0.8553, - "step": 34235 - }, - { - "epoch": 0.9701606732976281, - "grad_norm": 0.0, - "learning_rate": 4.6663213646602754e-08, - "loss": 0.7535, - "step": 34236 - }, - { - "epoch": 0.9701890107398906, - "grad_norm": 0.0, - "learning_rate": 4.657469547435178e-08, - "loss": 0.8593, - "step": 34237 - }, - { - "epoch": 0.9702173481821531, - "grad_norm": 0.0, - "learning_rate": 4.6486261143497967e-08, - "loss": 0.8166, - "step": 34238 - }, - { - "epoch": 0.9702456856244155, - "grad_norm": 0.0, - "learning_rate": 4.639791065478738e-08, - "loss": 0.8183, - "step": 34239 - }, - { - "epoch": 0.970274023066678, - "grad_norm": 0.0, - "learning_rate": 4.630964400896165e-08, - "loss": 0.7692, - "step": 34240 - }, - { - "epoch": 0.9703023605089405, - "grad_norm": 0.0, - "learning_rate": 4.622146120676796e-08, - "loss": 0.7816, - "step": 34241 - }, - { - "epoch": 0.970330697951203, - "grad_norm": 0.0, - "learning_rate": 4.613336224894571e-08, - "loss": 0.7807, - "step": 34242 - }, - { - "epoch": 0.9703590353934654, - "grad_norm": 0.0, - "learning_rate": 4.604534713623876e-08, - "loss": 0.677, - "step": 34243 - }, - { - "epoch": 0.9703873728357278, - "grad_norm": 0.0, - "learning_rate": 4.595741586938873e-08, - "loss": 0.7557, - "step": 34244 - }, - { - "epoch": 0.9704157102779903, - "grad_norm": 0.0, - "learning_rate": 4.586956844913504e-08, - "loss": 0.8582, - "step": 34245 - }, - { - "epoch": 0.9704440477202527, - "grad_norm": 0.0, - "learning_rate": 4.5781804876219303e-08, - "loss": 0.7719, - "step": 34246 - }, - { - "epoch": 0.9704723851625152, - "grad_norm": 0.0, - "learning_rate": 4.569412515137872e-08, - "loss": 0.7619, - "step": 34247 - }, - { - "epoch": 0.9705007226047777, - "grad_norm": 0.0, - "learning_rate": 4.5606529275353806e-08, - "loss": 0.8923, - "step": 34248 - }, - { - "epoch": 0.9705290600470402, - "grad_norm": 0.0, - "learning_rate": 4.5519017248880635e-08, - "loss": 0.7976, - "step": 34249 - }, - { - "epoch": 0.9705573974893026, - "grad_norm": 0.0, - "learning_rate": 4.5431589072698625e-08, - "loss": 0.7302, - "step": 34250 - }, - { - "epoch": 0.9705857349315651, - "grad_norm": 0.0, - "learning_rate": 4.534424474754162e-08, - "loss": 0.7486, - "step": 34251 - }, - { - "epoch": 0.9706140723738276, - "grad_norm": 0.0, - "learning_rate": 4.525698427414793e-08, - "loss": 0.8322, - "step": 34252 - }, - { - "epoch": 0.97064240981609, - "grad_norm": 0.0, - "learning_rate": 4.51698076532503e-08, - "loss": 0.7882, - "step": 34253 - }, - { - "epoch": 0.9706707472583525, - "grad_norm": 0.0, - "learning_rate": 4.508271488558369e-08, - "loss": 0.8494, - "step": 34254 - }, - { - "epoch": 0.970699084700615, - "grad_norm": 0.0, - "learning_rate": 4.499570597188307e-08, - "loss": 0.6901, - "step": 34255 - }, - { - "epoch": 0.9707274221428774, - "grad_norm": 0.0, - "learning_rate": 4.490878091287898e-08, - "loss": 0.7619, - "step": 34256 - }, - { - "epoch": 0.9707557595851398, - "grad_norm": 0.0, - "learning_rate": 4.482193970930637e-08, - "loss": 0.7961, - "step": 34257 - }, - { - "epoch": 0.9707840970274023, - "grad_norm": 0.0, - "learning_rate": 4.473518236189467e-08, - "loss": 0.8048, - "step": 34258 - }, - { - "epoch": 0.9708124344696648, - "grad_norm": 0.0, - "learning_rate": 4.464850887137551e-08, - "loss": 0.8007, - "step": 34259 - }, - { - "epoch": 0.9708407719119272, - "grad_norm": 0.0, - "learning_rate": 4.4561919238478304e-08, - "loss": 0.7752, - "step": 34260 - }, - { - "epoch": 0.9708691093541897, - "grad_norm": 0.0, - "learning_rate": 4.447541346393358e-08, - "loss": 0.7957, - "step": 34261 - }, - { - "epoch": 0.9708974467964522, - "grad_norm": 0.0, - "learning_rate": 4.438899154846854e-08, - "loss": 0.8154, - "step": 34262 - }, - { - "epoch": 0.9709257842387146, - "grad_norm": 0.0, - "learning_rate": 4.43026534928126e-08, - "loss": 0.8538, - "step": 34263 - }, - { - "epoch": 0.9709541216809771, - "grad_norm": 0.0, - "learning_rate": 4.421639929769295e-08, - "loss": 0.7242, - "step": 34264 - }, - { - "epoch": 0.9709824591232395, - "grad_norm": 0.0, - "learning_rate": 4.413022896383457e-08, - "loss": 0.8319, - "step": 34265 - }, - { - "epoch": 0.971010796565502, - "grad_norm": 0.0, - "learning_rate": 4.404414249196465e-08, - "loss": 0.8373, - "step": 34266 - }, - { - "epoch": 0.9710391340077644, - "grad_norm": 0.0, - "learning_rate": 4.395813988280817e-08, - "loss": 0.8127, - "step": 34267 - }, - { - "epoch": 0.9710674714500269, - "grad_norm": 0.0, - "learning_rate": 4.3872221137089e-08, - "loss": 0.8365, - "step": 34268 - }, - { - "epoch": 0.9710958088922894, - "grad_norm": 0.0, - "learning_rate": 4.3786386255531e-08, - "loss": 0.8457, - "step": 34269 - }, - { - "epoch": 0.9711241463345518, - "grad_norm": 0.0, - "learning_rate": 4.3700635238856927e-08, - "loss": 0.7833, - "step": 34270 - }, - { - "epoch": 0.9711524837768143, - "grad_norm": 0.0, - "learning_rate": 4.3614968087790644e-08, - "loss": 0.7677, - "step": 34271 - }, - { - "epoch": 0.9711808212190768, - "grad_norm": 0.0, - "learning_rate": 4.352938480305269e-08, - "loss": 0.849, - "step": 34272 - }, - { - "epoch": 0.9712091586613393, - "grad_norm": 0.0, - "learning_rate": 4.3443885385363597e-08, - "loss": 0.8651, - "step": 34273 - }, - { - "epoch": 0.9712374961036017, - "grad_norm": 0.0, - "learning_rate": 4.335846983544389e-08, - "loss": 0.7828, - "step": 34274 - }, - { - "epoch": 0.9712658335458642, - "grad_norm": 0.0, - "learning_rate": 4.3273138154013014e-08, - "loss": 0.7978, - "step": 34275 - }, - { - "epoch": 0.9712941709881266, - "grad_norm": 0.0, - "learning_rate": 4.318789034179038e-08, - "loss": 0.7834, - "step": 34276 - }, - { - "epoch": 0.971322508430389, - "grad_norm": 0.0, - "learning_rate": 4.31027263994932e-08, - "loss": 0.7797, - "step": 34277 - }, - { - "epoch": 0.9713508458726515, - "grad_norm": 0.0, - "learning_rate": 4.301764632783978e-08, - "loss": 0.8148, - "step": 34278 - }, - { - "epoch": 0.971379183314914, - "grad_norm": 0.0, - "learning_rate": 4.293265012754511e-08, - "loss": 0.788, - "step": 34279 - }, - { - "epoch": 0.9714075207571765, - "grad_norm": 0.0, - "learning_rate": 4.2847737799326384e-08, - "loss": 0.8311, - "step": 34280 - }, - { - "epoch": 0.9714358581994389, - "grad_norm": 0.0, - "learning_rate": 4.27629093438986e-08, - "loss": 0.7301, - "step": 34281 - }, - { - "epoch": 0.9714641956417014, - "grad_norm": 0.0, - "learning_rate": 4.2678164761976724e-08, - "loss": 0.8444, - "step": 34282 - }, - { - "epoch": 0.9714925330839639, - "grad_norm": 0.0, - "learning_rate": 4.259350405427465e-08, - "loss": 0.7507, - "step": 34283 - }, - { - "epoch": 0.9715208705262263, - "grad_norm": 0.0, - "learning_rate": 4.250892722150401e-08, - "loss": 0.7811, - "step": 34284 - }, - { - "epoch": 0.9715492079684888, - "grad_norm": 0.0, - "learning_rate": 4.242443426437981e-08, - "loss": 0.8019, - "step": 34285 - }, - { - "epoch": 0.9715775454107513, - "grad_norm": 0.0, - "learning_rate": 4.234002518361036e-08, - "loss": 0.8314, - "step": 34286 - }, - { - "epoch": 0.9716058828530136, - "grad_norm": 0.0, - "learning_rate": 4.2255699979910637e-08, - "loss": 0.7985, - "step": 34287 - }, - { - "epoch": 0.9716342202952761, - "grad_norm": 0.0, - "learning_rate": 4.2171458653986755e-08, - "loss": 0.8033, - "step": 34288 - }, - { - "epoch": 0.9716625577375386, - "grad_norm": 0.0, - "learning_rate": 4.2087301206552576e-08, - "loss": 0.7527, - "step": 34289 - }, - { - "epoch": 0.9716908951798011, - "grad_norm": 0.0, - "learning_rate": 4.20032276383131e-08, - "loss": 0.8228, - "step": 34290 - }, - { - "epoch": 0.9717192326220635, - "grad_norm": 0.0, - "learning_rate": 4.1919237949978876e-08, - "loss": 0.8319, - "step": 34291 - }, - { - "epoch": 0.971747570064326, - "grad_norm": 0.0, - "learning_rate": 4.183533214225599e-08, - "loss": 0.8304, - "step": 34292 - }, - { - "epoch": 0.9717759075065885, - "grad_norm": 0.0, - "learning_rate": 4.175151021585277e-08, - "loss": 0.7194, - "step": 34293 - }, - { - "epoch": 0.9718042449488509, - "grad_norm": 0.0, - "learning_rate": 4.1667772171474216e-08, - "loss": 0.8839, - "step": 34294 - }, - { - "epoch": 0.9718325823911134, - "grad_norm": 0.0, - "learning_rate": 4.1584118009826424e-08, - "loss": 0.8002, - "step": 34295 - }, - { - "epoch": 0.9718609198333759, - "grad_norm": 0.0, - "learning_rate": 4.150054773161327e-08, - "loss": 0.7978, - "step": 34296 - }, - { - "epoch": 0.9718892572756384, - "grad_norm": 0.0, - "learning_rate": 4.1417061337539753e-08, - "loss": 0.9084, - "step": 34297 - }, - { - "epoch": 0.9719175947179007, - "grad_norm": 0.0, - "learning_rate": 4.133365882830753e-08, - "loss": 0.8314, - "step": 34298 - }, - { - "epoch": 0.9719459321601632, - "grad_norm": 0.0, - "learning_rate": 4.1250340204619375e-08, - "loss": 0.7739, - "step": 34299 - }, - { - "epoch": 0.9719742696024257, - "grad_norm": 0.0, - "learning_rate": 4.116710546717917e-08, - "loss": 0.8654, - "step": 34300 - }, - { - "epoch": 0.9720026070446881, - "grad_norm": 0.0, - "learning_rate": 4.108395461668524e-08, - "loss": 0.7424, - "step": 34301 - }, - { - "epoch": 0.9720309444869506, - "grad_norm": 0.0, - "learning_rate": 4.100088765384036e-08, - "loss": 0.7191, - "step": 34302 - }, - { - "epoch": 0.9720592819292131, - "grad_norm": 0.0, - "learning_rate": 4.091790457934286e-08, - "loss": 0.8351, - "step": 34303 - }, - { - "epoch": 0.9720876193714755, - "grad_norm": 0.0, - "learning_rate": 4.083500539389107e-08, - "loss": 0.7713, - "step": 34304 - }, - { - "epoch": 0.972115956813738, - "grad_norm": 0.0, - "learning_rate": 4.075219009818554e-08, - "loss": 0.9171, - "step": 34305 - }, - { - "epoch": 0.9721442942560005, - "grad_norm": 0.0, - "learning_rate": 4.066945869292238e-08, - "loss": 0.8491, - "step": 34306 - }, - { - "epoch": 0.972172631698263, - "grad_norm": 0.0, - "learning_rate": 4.0586811178797704e-08, - "loss": 0.8345, - "step": 34307 - }, - { - "epoch": 0.9722009691405253, - "grad_norm": 0.0, - "learning_rate": 4.050424755650984e-08, - "loss": 0.7275, - "step": 34308 - }, - { - "epoch": 0.9722293065827878, - "grad_norm": 0.0, - "learning_rate": 4.042176782675267e-08, - "loss": 0.7594, - "step": 34309 - }, - { - "epoch": 0.9722576440250503, - "grad_norm": 0.0, - "learning_rate": 4.03393719902212e-08, - "loss": 0.6586, - "step": 34310 - }, - { - "epoch": 0.9722859814673127, - "grad_norm": 0.0, - "learning_rate": 4.025706004760932e-08, - "loss": 0.7297, - "step": 34311 - }, - { - "epoch": 0.9723143189095752, - "grad_norm": 0.0, - "learning_rate": 4.017483199961092e-08, - "loss": 0.8015, - "step": 34312 - }, - { - "epoch": 0.9723426563518377, - "grad_norm": 0.0, - "learning_rate": 4.0092687846919885e-08, - "loss": 0.7686, - "step": 34313 - }, - { - "epoch": 0.9723709937941002, - "grad_norm": 0.0, - "learning_rate": 4.001062759022456e-08, - "loss": 0.8086, - "step": 34314 - }, - { - "epoch": 0.9723993312363626, - "grad_norm": 0.0, - "learning_rate": 3.992865123021883e-08, - "loss": 0.842, - "step": 34315 - }, - { - "epoch": 0.9724276686786251, - "grad_norm": 0.0, - "learning_rate": 3.984675876759325e-08, - "loss": 0.7915, - "step": 34316 - }, - { - "epoch": 0.9724560061208876, - "grad_norm": 0.0, - "learning_rate": 3.976495020303617e-08, - "loss": 0.8775, - "step": 34317 - }, - { - "epoch": 0.9724843435631499, - "grad_norm": 0.0, - "learning_rate": 3.968322553723813e-08, - "loss": 0.8187, - "step": 34318 - }, - { - "epoch": 0.9725126810054124, - "grad_norm": 0.0, - "learning_rate": 3.960158477088749e-08, - "loss": 0.8891, - "step": 34319 - }, - { - "epoch": 0.9725410184476749, - "grad_norm": 0.0, - "learning_rate": 3.9520027904670354e-08, - "loss": 0.899, - "step": 34320 - }, - { - "epoch": 0.9725693558899374, - "grad_norm": 0.0, - "learning_rate": 3.9438554939275064e-08, - "loss": 0.7366, - "step": 34321 - }, - { - "epoch": 0.9725976933321998, - "grad_norm": 0.0, - "learning_rate": 3.9357165875387735e-08, - "loss": 0.6672, - "step": 34322 - }, - { - "epoch": 0.9726260307744623, - "grad_norm": 0.0, - "learning_rate": 3.927586071369338e-08, - "loss": 0.6958, - "step": 34323 - }, - { - "epoch": 0.9726543682167248, - "grad_norm": 0.0, - "learning_rate": 3.9194639454878115e-08, - "loss": 0.7664, - "step": 34324 - }, - { - "epoch": 0.9726827056589872, - "grad_norm": 0.0, - "learning_rate": 3.911350209962472e-08, - "loss": 0.8311, - "step": 34325 - }, - { - "epoch": 0.9727110431012497, - "grad_norm": 0.0, - "learning_rate": 3.9032448648617106e-08, - "loss": 0.7833, - "step": 34326 - }, - { - "epoch": 0.9727393805435122, - "grad_norm": 0.0, - "learning_rate": 3.895147910253916e-08, - "loss": 0.8379, - "step": 34327 - }, - { - "epoch": 0.9727677179857745, - "grad_norm": 0.0, - "learning_rate": 3.887059346207034e-08, - "loss": 0.6923, - "step": 34328 - }, - { - "epoch": 0.972796055428037, - "grad_norm": 0.0, - "learning_rate": 3.878979172789454e-08, - "loss": 0.7864, - "step": 34329 - }, - { - "epoch": 0.9728243928702995, - "grad_norm": 0.0, - "learning_rate": 3.870907390069012e-08, - "loss": 0.8196, - "step": 34330 - }, - { - "epoch": 0.972852730312562, - "grad_norm": 0.0, - "learning_rate": 3.862843998113874e-08, - "loss": 0.7467, - "step": 34331 - }, - { - "epoch": 0.9728810677548244, - "grad_norm": 0.0, - "learning_rate": 3.854788996991987e-08, - "loss": 0.7753, - "step": 34332 - }, - { - "epoch": 0.9729094051970869, - "grad_norm": 0.0, - "learning_rate": 3.846742386771074e-08, - "loss": 0.7587, - "step": 34333 - }, - { - "epoch": 0.9729377426393494, - "grad_norm": 0.0, - "learning_rate": 3.83870416751897e-08, - "loss": 0.6986, - "step": 34334 - }, - { - "epoch": 0.9729660800816118, - "grad_norm": 0.0, - "learning_rate": 3.8306743393032864e-08, - "loss": 0.8426, - "step": 34335 - }, - { - "epoch": 0.9729944175238743, - "grad_norm": 0.0, - "learning_rate": 3.822652902191859e-08, - "loss": 0.7897, - "step": 34336 - }, - { - "epoch": 0.9730227549661368, - "grad_norm": 0.0, - "learning_rate": 3.814639856252078e-08, - "loss": 0.7995, - "step": 34337 - }, - { - "epoch": 0.9730510924083993, - "grad_norm": 0.0, - "learning_rate": 3.8066352015515564e-08, - "loss": 0.7112, - "step": 34338 - }, - { - "epoch": 0.9730794298506616, - "grad_norm": 0.0, - "learning_rate": 3.798638938157684e-08, - "loss": 0.7998, - "step": 34339 - }, - { - "epoch": 0.9731077672929241, - "grad_norm": 0.0, - "learning_rate": 3.7906510661377406e-08, - "loss": 0.8952, - "step": 34340 - }, - { - "epoch": 0.9731361047351866, - "grad_norm": 0.0, - "learning_rate": 3.7826715855591167e-08, - "loss": 0.8029, - "step": 34341 - }, - { - "epoch": 0.973164442177449, - "grad_norm": 0.0, - "learning_rate": 3.774700496489092e-08, - "loss": 0.7651, - "step": 34342 - }, - { - "epoch": 0.9731927796197115, - "grad_norm": 0.0, - "learning_rate": 3.766737798994502e-08, - "loss": 0.6938, - "step": 34343 - }, - { - "epoch": 0.973221117061974, - "grad_norm": 0.0, - "learning_rate": 3.758783493142737e-08, - "loss": 0.8183, - "step": 34344 - }, - { - "epoch": 0.9732494545042365, - "grad_norm": 0.0, - "learning_rate": 3.7508375790006326e-08, - "loss": 0.8593, - "step": 34345 - }, - { - "epoch": 0.9732777919464989, - "grad_norm": 0.0, - "learning_rate": 3.742900056635246e-08, - "loss": 0.7594, - "step": 34346 - }, - { - "epoch": 0.9733061293887614, - "grad_norm": 0.0, - "learning_rate": 3.7349709261133015e-08, - "loss": 0.8851, - "step": 34347 - }, - { - "epoch": 0.9733344668310239, - "grad_norm": 0.0, - "learning_rate": 3.727050187501746e-08, - "loss": 0.7161, - "step": 34348 - }, - { - "epoch": 0.9733628042732863, - "grad_norm": 0.0, - "learning_rate": 3.719137840867082e-08, - "loss": 0.7563, - "step": 34349 - }, - { - "epoch": 0.9733911417155487, - "grad_norm": 0.0, - "learning_rate": 3.7112338862761446e-08, - "loss": 0.786, - "step": 34350 - }, - { - "epoch": 0.9734194791578112, - "grad_norm": 0.0, - "learning_rate": 3.7033383237954356e-08, - "loss": 0.7938, - "step": 34351 - }, - { - "epoch": 0.9734478166000736, - "grad_norm": 0.0, - "learning_rate": 3.695451153491458e-08, - "loss": 0.865, - "step": 34352 - }, - { - "epoch": 0.9734761540423361, - "grad_norm": 0.0, - "learning_rate": 3.6875723754307145e-08, - "loss": 0.8558, - "step": 34353 - }, - { - "epoch": 0.9735044914845986, - "grad_norm": 0.0, - "learning_rate": 3.679701989679374e-08, - "loss": 0.771, - "step": 34354 - }, - { - "epoch": 0.9735328289268611, - "grad_norm": 0.0, - "learning_rate": 3.67183999630405e-08, - "loss": 0.8317, - "step": 34355 - }, - { - "epoch": 0.9735611663691235, - "grad_norm": 0.0, - "learning_rate": 3.663986395370689e-08, - "loss": 0.8143, - "step": 34356 - }, - { - "epoch": 0.973589503811386, - "grad_norm": 0.0, - "learning_rate": 3.6561411869455723e-08, - "loss": 0.9196, - "step": 34357 - }, - { - "epoch": 0.9736178412536485, - "grad_norm": 0.0, - "learning_rate": 3.648304371094869e-08, - "loss": 0.8411, - "step": 34358 - }, - { - "epoch": 0.9736461786959109, - "grad_norm": 0.0, - "learning_rate": 3.640475947884303e-08, - "loss": 0.8316, - "step": 34359 - }, - { - "epoch": 0.9736745161381734, - "grad_norm": 0.0, - "learning_rate": 3.6326559173801565e-08, - "loss": 0.9518, - "step": 34360 - }, - { - "epoch": 0.9737028535804358, - "grad_norm": 0.0, - "learning_rate": 3.624844279648043e-08, - "loss": 0.7239, - "step": 34361 - }, - { - "epoch": 0.9737311910226983, - "grad_norm": 0.0, - "learning_rate": 3.61704103475391e-08, - "loss": 0.8007, - "step": 34362 - }, - { - "epoch": 0.9737595284649607, - "grad_norm": 0.0, - "learning_rate": 3.6092461827633704e-08, - "loss": 0.7947, - "step": 34363 - }, - { - "epoch": 0.9737878659072232, - "grad_norm": 0.0, - "learning_rate": 3.601459723742262e-08, - "loss": 0.8158, - "step": 34364 - }, - { - "epoch": 0.9738162033494857, - "grad_norm": 0.0, - "learning_rate": 3.5936816577559765e-08, - "loss": 0.7482, - "step": 34365 - }, - { - "epoch": 0.9738445407917481, - "grad_norm": 0.0, - "learning_rate": 3.5859119848701276e-08, - "loss": 0.8337, - "step": 34366 - }, - { - "epoch": 0.9738728782340106, - "grad_norm": 0.0, - "learning_rate": 3.5781507051502185e-08, - "loss": 0.8426, - "step": 34367 - }, - { - "epoch": 0.9739012156762731, - "grad_norm": 0.0, - "learning_rate": 3.570397818661531e-08, - "loss": 0.88, - "step": 34368 - }, - { - "epoch": 0.9739295531185356, - "grad_norm": 0.0, - "learning_rate": 3.562653325469345e-08, - "loss": 0.7941, - "step": 34369 - }, - { - "epoch": 0.973957890560798, - "grad_norm": 0.0, - "learning_rate": 3.554917225638943e-08, - "loss": 0.7552, - "step": 34370 - }, - { - "epoch": 0.9739862280030605, - "grad_norm": 0.0, - "learning_rate": 3.5471895192354947e-08, - "loss": 0.7843, - "step": 34371 - }, - { - "epoch": 0.9740145654453229, - "grad_norm": 0.0, - "learning_rate": 3.5394702063241695e-08, - "loss": 0.71, - "step": 34372 - }, - { - "epoch": 0.9740429028875853, - "grad_norm": 0.0, - "learning_rate": 3.531759286969805e-08, - "loss": 0.8141, - "step": 34373 - }, - { - "epoch": 0.9740712403298478, - "grad_norm": 0.0, - "learning_rate": 3.5240567612375706e-08, - "loss": 0.8811, - "step": 34374 - }, - { - "epoch": 0.9740995777721103, - "grad_norm": 0.0, - "learning_rate": 3.5163626291921934e-08, - "loss": 0.8598, - "step": 34375 - }, - { - "epoch": 0.9741279152143727, - "grad_norm": 0.0, - "learning_rate": 3.50867689089851e-08, - "loss": 0.8634, - "step": 34376 - }, - { - "epoch": 0.9741562526566352, - "grad_norm": 0.0, - "learning_rate": 3.5009995464212465e-08, - "loss": 0.7919, - "step": 34377 - }, - { - "epoch": 0.9741845900988977, - "grad_norm": 0.0, - "learning_rate": 3.4933305958251285e-08, - "loss": 0.773, - "step": 34378 - }, - { - "epoch": 0.9742129275411602, - "grad_norm": 0.0, - "learning_rate": 3.485670039174882e-08, - "loss": 0.8571, - "step": 34379 - }, - { - "epoch": 0.9742412649834226, - "grad_norm": 0.0, - "learning_rate": 3.4780178765346785e-08, - "loss": 0.8449, - "step": 34380 - }, - { - "epoch": 0.974269602425685, - "grad_norm": 0.0, - "learning_rate": 3.470374107969243e-08, - "loss": 0.7977, - "step": 34381 - }, - { - "epoch": 0.9742979398679475, - "grad_norm": 0.0, - "learning_rate": 3.4627387335429696e-08, - "loss": 0.7961, - "step": 34382 - }, - { - "epoch": 0.9743262773102099, - "grad_norm": 0.0, - "learning_rate": 3.455111753320028e-08, - "loss": 0.7821, - "step": 34383 - }, - { - "epoch": 0.9743546147524724, - "grad_norm": 0.0, - "learning_rate": 3.4474931673647014e-08, - "loss": 0.7451, - "step": 34384 - }, - { - "epoch": 0.9743829521947349, - "grad_norm": 0.0, - "learning_rate": 3.439882975741271e-08, - "loss": 0.8447, - "step": 34385 - }, - { - "epoch": 0.9744112896369974, - "grad_norm": 0.0, - "learning_rate": 3.4322811785137965e-08, - "loss": 0.7228, - "step": 34386 - }, - { - "epoch": 0.9744396270792598, - "grad_norm": 0.0, - "learning_rate": 3.424687775746227e-08, - "loss": 0.7593, - "step": 34387 - }, - { - "epoch": 0.9744679645215223, - "grad_norm": 0.0, - "learning_rate": 3.4171027675026225e-08, - "loss": 0.7963, - "step": 34388 - }, - { - "epoch": 0.9744963019637848, - "grad_norm": 0.0, - "learning_rate": 3.4095261538468204e-08, - "loss": 0.7653, - "step": 34389 - }, - { - "epoch": 0.9745246394060472, - "grad_norm": 0.0, - "learning_rate": 3.401957934842659e-08, - "loss": 0.8083, - "step": 34390 - }, - { - "epoch": 0.9745529768483097, - "grad_norm": 0.0, - "learning_rate": 3.394398110553754e-08, - "loss": 0.8945, - "step": 34391 - }, - { - "epoch": 0.9745813142905722, - "grad_norm": 0.0, - "learning_rate": 3.3868466810440534e-08, - "loss": 0.8145, - "step": 34392 - }, - { - "epoch": 0.9746096517328346, - "grad_norm": 0.0, - "learning_rate": 3.379303646377064e-08, - "loss": 0.8031, - "step": 34393 - }, - { - "epoch": 0.974637989175097, - "grad_norm": 0.0, - "learning_rate": 3.371769006616177e-08, - "loss": 0.8548, - "step": 34394 - }, - { - "epoch": 0.9746663266173595, - "grad_norm": 0.0, - "learning_rate": 3.3642427618250094e-08, - "loss": 0.7399, - "step": 34395 - }, - { - "epoch": 0.974694664059622, - "grad_norm": 0.0, - "learning_rate": 3.356724912066955e-08, - "loss": 0.7927, - "step": 34396 - }, - { - "epoch": 0.9747230015018844, - "grad_norm": 0.0, - "learning_rate": 3.349215457405186e-08, - "loss": 0.7842, - "step": 34397 - }, - { - "epoch": 0.9747513389441469, - "grad_norm": 0.0, - "learning_rate": 3.3417143979032064e-08, - "loss": 0.804, - "step": 34398 - }, - { - "epoch": 0.9747796763864094, - "grad_norm": 0.0, - "learning_rate": 3.3342217336239656e-08, - "loss": 0.7916, - "step": 34399 - }, - { - "epoch": 0.9748080138286718, - "grad_norm": 0.0, - "learning_rate": 3.326737464630747e-08, - "loss": 0.957, - "step": 34400 - }, - { - "epoch": 0.9748363512709343, - "grad_norm": 0.0, - "learning_rate": 3.3192615909865e-08, - "loss": 0.755, - "step": 34401 - }, - { - "epoch": 0.9748646887131968, - "grad_norm": 0.0, - "learning_rate": 3.3117941127541745e-08, - "loss": 0.7043, - "step": 34402 - }, - { - "epoch": 0.9748930261554593, - "grad_norm": 0.0, - "learning_rate": 3.30433502999683e-08, - "loss": 0.781, - "step": 34403 - }, - { - "epoch": 0.9749213635977216, - "grad_norm": 0.0, - "learning_rate": 3.2968843427770844e-08, - "loss": 0.7032, - "step": 34404 - }, - { - "epoch": 0.9749497010399841, - "grad_norm": 0.0, - "learning_rate": 3.2894420511578874e-08, - "loss": 0.7976, - "step": 34405 - }, - { - "epoch": 0.9749780384822466, - "grad_norm": 0.0, - "learning_rate": 3.2820081552017435e-08, - "loss": 0.7777, - "step": 34406 - }, - { - "epoch": 0.975006375924509, - "grad_norm": 0.0, - "learning_rate": 3.274582654971381e-08, - "loss": 0.7013, - "step": 34407 - }, - { - "epoch": 0.9750347133667715, - "grad_norm": 0.0, - "learning_rate": 3.2671655505294165e-08, - "loss": 0.7557, - "step": 34408 - }, - { - "epoch": 0.975063050809034, - "grad_norm": 0.0, - "learning_rate": 3.259756841938244e-08, - "loss": 0.7699, - "step": 34409 - }, - { - "epoch": 0.9750913882512965, - "grad_norm": 0.0, - "learning_rate": 3.252356529260148e-08, - "loss": 0.7805, - "step": 34410 - }, - { - "epoch": 0.9751197256935589, - "grad_norm": 0.0, - "learning_rate": 3.244964612557633e-08, - "loss": 0.748, - "step": 34411 - }, - { - "epoch": 0.9751480631358214, - "grad_norm": 0.0, - "learning_rate": 3.237581091892983e-08, - "loss": 0.766, - "step": 34412 - }, - { - "epoch": 0.9751764005780839, - "grad_norm": 0.0, - "learning_rate": 3.230205967328259e-08, - "loss": 0.8309, - "step": 34413 - }, - { - "epoch": 0.9752047380203462, - "grad_norm": 0.0, - "learning_rate": 3.222839238925635e-08, - "loss": 0.827, - "step": 34414 - }, - { - "epoch": 0.9752330754626087, - "grad_norm": 0.0, - "learning_rate": 3.215480906747281e-08, - "loss": 0.7973, - "step": 34415 - }, - { - "epoch": 0.9752614129048712, - "grad_norm": 0.0, - "learning_rate": 3.208130970855039e-08, - "loss": 0.7363, - "step": 34416 - }, - { - "epoch": 0.9752897503471337, - "grad_norm": 0.0, - "learning_rate": 3.200789431310858e-08, - "loss": 0.929, - "step": 34417 - }, - { - "epoch": 0.9753180877893961, - "grad_norm": 0.0, - "learning_rate": 3.193456288176577e-08, - "loss": 0.8075, - "step": 34418 - }, - { - "epoch": 0.9753464252316586, - "grad_norm": 0.0, - "learning_rate": 3.186131541513926e-08, - "loss": 0.7647, - "step": 34419 - }, - { - "epoch": 0.9753747626739211, - "grad_norm": 0.0, - "learning_rate": 3.178815191384632e-08, - "loss": 0.7872, - "step": 34420 - }, - { - "epoch": 0.9754031001161835, - "grad_norm": 0.0, - "learning_rate": 3.171507237850424e-08, - "loss": 0.7138, - "step": 34421 - }, - { - "epoch": 0.975431437558446, - "grad_norm": 0.0, - "learning_rate": 3.1642076809726973e-08, - "loss": 0.8861, - "step": 34422 - }, - { - "epoch": 0.9754597750007085, - "grad_norm": 0.0, - "learning_rate": 3.156916520813069e-08, - "loss": 0.7856, - "step": 34423 - }, - { - "epoch": 0.9754881124429708, - "grad_norm": 0.0, - "learning_rate": 3.149633757432824e-08, - "loss": 0.849, - "step": 34424 - }, - { - "epoch": 0.9755164498852333, - "grad_norm": 0.0, - "learning_rate": 3.142359390893468e-08, - "loss": 0.9491, - "step": 34425 - }, - { - "epoch": 0.9755447873274958, - "grad_norm": 0.0, - "learning_rate": 3.1350934212561746e-08, - "loss": 0.686, - "step": 34426 - }, - { - "epoch": 0.9755731247697583, - "grad_norm": 0.0, - "learning_rate": 3.127835848582117e-08, - "loss": 0.8862, - "step": 34427 - }, - { - "epoch": 0.9756014622120207, - "grad_norm": 0.0, - "learning_rate": 3.120586672932469e-08, - "loss": 0.8623, - "step": 34428 - }, - { - "epoch": 0.9756297996542832, - "grad_norm": 0.0, - "learning_rate": 3.1133458943684024e-08, - "loss": 0.8602, - "step": 34429 - }, - { - "epoch": 0.9756581370965457, - "grad_norm": 0.0, - "learning_rate": 3.106113512950759e-08, - "loss": 0.8011, - "step": 34430 - }, - { - "epoch": 0.9756864745388081, - "grad_norm": 0.0, - "learning_rate": 3.09888952874049e-08, - "loss": 0.7968, - "step": 34431 - }, - { - "epoch": 0.9757148119810706, - "grad_norm": 0.0, - "learning_rate": 3.091673941798545e-08, - "loss": 0.8025, - "step": 34432 - }, - { - "epoch": 0.9757431494233331, - "grad_norm": 0.0, - "learning_rate": 3.084466752185544e-08, - "loss": 0.6988, - "step": 34433 - }, - { - "epoch": 0.9757714868655956, - "grad_norm": 0.0, - "learning_rate": 3.0772679599623266e-08, - "loss": 0.7149, - "step": 34434 - }, - { - "epoch": 0.9757998243078579, - "grad_norm": 0.0, - "learning_rate": 3.0700775651894e-08, - "loss": 0.7885, - "step": 34435 - }, - { - "epoch": 0.9758281617501204, - "grad_norm": 0.0, - "learning_rate": 3.062895567927382e-08, - "loss": 0.8043, - "step": 34436 - }, - { - "epoch": 0.9758564991923829, - "grad_norm": 0.0, - "learning_rate": 3.055721968236891e-08, - "loss": 0.8584, - "step": 34437 - }, - { - "epoch": 0.9758848366346453, - "grad_norm": 0.0, - "learning_rate": 3.048556766178212e-08, - "loss": 0.7718, - "step": 34438 - }, - { - "epoch": 0.9759131740769078, - "grad_norm": 0.0, - "learning_rate": 3.0413999618117416e-08, - "loss": 0.8098, - "step": 34439 - }, - { - "epoch": 0.9759415115191703, - "grad_norm": 0.0, - "learning_rate": 3.034251555197876e-08, - "loss": 0.713, - "step": 34440 - }, - { - "epoch": 0.9759698489614328, - "grad_norm": 0.0, - "learning_rate": 3.027111546396677e-08, - "loss": 0.7868, - "step": 34441 - }, - { - "epoch": 0.9759981864036952, - "grad_norm": 0.0, - "learning_rate": 3.01997993546832e-08, - "loss": 0.7911, - "step": 34442 - }, - { - "epoch": 0.9760265238459577, - "grad_norm": 0.0, - "learning_rate": 3.0128567224728676e-08, - "loss": 0.7195, - "step": 34443 - }, - { - "epoch": 0.9760548612882202, - "grad_norm": 0.0, - "learning_rate": 3.005741907470272e-08, - "loss": 0.7971, - "step": 34444 - }, - { - "epoch": 0.9760831987304825, - "grad_norm": 0.0, - "learning_rate": 2.998635490520707e-08, - "loss": 0.7258, - "step": 34445 - }, - { - "epoch": 0.976111536172745, - "grad_norm": 0.0, - "learning_rate": 2.991537471683681e-08, - "loss": 0.7904, - "step": 34446 - }, - { - "epoch": 0.9761398736150075, - "grad_norm": 0.0, - "learning_rate": 2.984447851019367e-08, - "loss": 0.7723, - "step": 34447 - }, - { - "epoch": 0.9761682110572699, - "grad_norm": 0.0, - "learning_rate": 2.977366628587164e-08, - "loss": 0.7774, - "step": 34448 - }, - { - "epoch": 0.9761965484995324, - "grad_norm": 0.0, - "learning_rate": 2.9702938044468e-08, - "loss": 0.8126, - "step": 34449 - }, - { - "epoch": 0.9762248859417949, - "grad_norm": 0.0, - "learning_rate": 2.9632293786578946e-08, - "loss": 0.8324, - "step": 34450 - }, - { - "epoch": 0.9762532233840574, - "grad_norm": 0.0, - "learning_rate": 2.9561733512800673e-08, - "loss": 0.8884, - "step": 34451 - }, - { - "epoch": 0.9762815608263198, - "grad_norm": 0.0, - "learning_rate": 2.949125722372492e-08, - "loss": 0.8991, - "step": 34452 - }, - { - "epoch": 0.9763098982685823, - "grad_norm": 0.0, - "learning_rate": 2.9420864919947888e-08, - "loss": 0.7005, - "step": 34453 - }, - { - "epoch": 0.9763382357108448, - "grad_norm": 0.0, - "learning_rate": 2.935055660206021e-08, - "loss": 0.7507, - "step": 34454 - }, - { - "epoch": 0.9763665731531072, - "grad_norm": 0.0, - "learning_rate": 2.9280332270656962e-08, - "loss": 0.8076, - "step": 34455 - }, - { - "epoch": 0.9763949105953696, - "grad_norm": 0.0, - "learning_rate": 2.921019192632657e-08, - "loss": 0.6902, - "step": 34456 - }, - { - "epoch": 0.9764232480376321, - "grad_norm": 0.0, - "learning_rate": 2.9140135569661886e-08, - "loss": 0.8526, - "step": 34457 - }, - { - "epoch": 0.9764515854798946, - "grad_norm": 0.0, - "learning_rate": 2.9070163201252445e-08, - "loss": 0.7374, - "step": 34458 - }, - { - "epoch": 0.976479922922157, - "grad_norm": 0.0, - "learning_rate": 2.900027482168777e-08, - "loss": 0.8354, - "step": 34459 - }, - { - "epoch": 0.9765082603644195, - "grad_norm": 0.0, - "learning_rate": 2.8930470431556278e-08, - "loss": 0.8313, - "step": 34460 - }, - { - "epoch": 0.976536597806682, - "grad_norm": 0.0, - "learning_rate": 2.8860750031446395e-08, - "loss": 0.7939, - "step": 34461 - }, - { - "epoch": 0.9765649352489444, - "grad_norm": 0.0, - "learning_rate": 2.879111362194431e-08, - "loss": 0.752, - "step": 34462 - }, - { - "epoch": 0.9765932726912069, - "grad_norm": 0.0, - "learning_rate": 2.8721561203637338e-08, - "loss": 0.7466, - "step": 34463 - }, - { - "epoch": 0.9766216101334694, - "grad_norm": 0.0, - "learning_rate": 2.865209277711167e-08, - "loss": 0.8294, - "step": 34464 - }, - { - "epoch": 0.9766499475757319, - "grad_norm": 0.0, - "learning_rate": 2.8582708342952402e-08, - "loss": 0.7314, - "step": 34465 - }, - { - "epoch": 0.9766782850179943, - "grad_norm": 0.0, - "learning_rate": 2.8513407901744618e-08, - "loss": 0.8699, - "step": 34466 - }, - { - "epoch": 0.9767066224602567, - "grad_norm": 0.0, - "learning_rate": 2.8444191454070068e-08, - "loss": 0.9146, - "step": 34467 - }, - { - "epoch": 0.9767349599025192, - "grad_norm": 0.0, - "learning_rate": 2.837505900051274e-08, - "loss": 0.807, - "step": 34468 - }, - { - "epoch": 0.9767632973447816, - "grad_norm": 0.0, - "learning_rate": 2.8306010541655493e-08, - "loss": 0.7359, - "step": 34469 - }, - { - "epoch": 0.9767916347870441, - "grad_norm": 0.0, - "learning_rate": 2.8237046078080087e-08, - "loss": 0.8734, - "step": 34470 - }, - { - "epoch": 0.9768199722293066, - "grad_norm": 0.0, - "learning_rate": 2.816816561036717e-08, - "loss": 0.7559, - "step": 34471 - }, - { - "epoch": 0.976848309671569, - "grad_norm": 0.0, - "learning_rate": 2.8099369139096277e-08, - "loss": 0.8329, - "step": 34472 - }, - { - "epoch": 0.9768766471138315, - "grad_norm": 0.0, - "learning_rate": 2.8030656664846944e-08, - "loss": 0.7521, - "step": 34473 - }, - { - "epoch": 0.976904984556094, - "grad_norm": 0.0, - "learning_rate": 2.796202818819871e-08, - "loss": 0.7337, - "step": 34474 - }, - { - "epoch": 0.9769333219983565, - "grad_norm": 0.0, - "learning_rate": 2.7893483709728885e-08, - "loss": 0.7912, - "step": 34475 - }, - { - "epoch": 0.9769616594406189, - "grad_norm": 0.0, - "learning_rate": 2.7825023230015903e-08, - "loss": 0.7359, - "step": 34476 - }, - { - "epoch": 0.9769899968828814, - "grad_norm": 0.0, - "learning_rate": 2.7756646749635962e-08, - "loss": 0.8465, - "step": 34477 - }, - { - "epoch": 0.9770183343251438, - "grad_norm": 0.0, - "learning_rate": 2.7688354269164164e-08, - "loss": 0.7693, - "step": 34478 - }, - { - "epoch": 0.9770466717674062, - "grad_norm": 0.0, - "learning_rate": 2.7620145789177823e-08, - "loss": 0.8704, - "step": 34479 - }, - { - "epoch": 0.9770750092096687, - "grad_norm": 0.0, - "learning_rate": 2.75520213102487e-08, - "loss": 0.7699, - "step": 34480 - }, - { - "epoch": 0.9771033466519312, - "grad_norm": 0.0, - "learning_rate": 2.7483980832953006e-08, - "loss": 0.826, - "step": 34481 - }, - { - "epoch": 0.9771316840941937, - "grad_norm": 0.0, - "learning_rate": 2.7416024357862503e-08, - "loss": 0.7904, - "step": 34482 - }, - { - "epoch": 0.9771600215364561, - "grad_norm": 0.0, - "learning_rate": 2.7348151885550066e-08, - "loss": 0.8495, - "step": 34483 - }, - { - "epoch": 0.9771883589787186, - "grad_norm": 0.0, - "learning_rate": 2.7280363416587463e-08, - "loss": 0.7062, - "step": 34484 - }, - { - "epoch": 0.9772166964209811, - "grad_norm": 0.0, - "learning_rate": 2.7212658951546457e-08, - "loss": 0.7952, - "step": 34485 - }, - { - "epoch": 0.9772450338632435, - "grad_norm": 0.0, - "learning_rate": 2.714503849099548e-08, - "loss": 0.7263, - "step": 34486 - }, - { - "epoch": 0.977273371305506, - "grad_norm": 0.0, - "learning_rate": 2.707750203550519e-08, - "loss": 0.8005, - "step": 34487 - }, - { - "epoch": 0.9773017087477684, - "grad_norm": 0.0, - "learning_rate": 2.7010049585645125e-08, - "loss": 0.7466, - "step": 34488 - }, - { - "epoch": 0.9773300461900309, - "grad_norm": 0.0, - "learning_rate": 2.6942681141981508e-08, - "loss": 0.8287, - "step": 34489 - }, - { - "epoch": 0.9773583836322933, - "grad_norm": 0.0, - "learning_rate": 2.687539670508388e-08, - "loss": 0.7694, - "step": 34490 - }, - { - "epoch": 0.9773867210745558, - "grad_norm": 0.0, - "learning_rate": 2.680819627551845e-08, - "loss": 0.834, - "step": 34491 - }, - { - "epoch": 0.9774150585168183, - "grad_norm": 0.0, - "learning_rate": 2.6741079853851436e-08, - "loss": 0.7983, - "step": 34492 - }, - { - "epoch": 0.9774433959590807, - "grad_norm": 0.0, - "learning_rate": 2.6674047440646834e-08, - "loss": 0.7964, - "step": 34493 - }, - { - "epoch": 0.9774717334013432, - "grad_norm": 0.0, - "learning_rate": 2.6607099036470853e-08, - "loss": 0.7761, - "step": 34494 - }, - { - "epoch": 0.9775000708436057, - "grad_norm": 0.0, - "learning_rate": 2.6540234641886375e-08, - "loss": 0.8157, - "step": 34495 - }, - { - "epoch": 0.9775284082858681, - "grad_norm": 0.0, - "learning_rate": 2.64734542574574e-08, - "loss": 0.9175, - "step": 34496 - }, - { - "epoch": 0.9775567457281306, - "grad_norm": 0.0, - "learning_rate": 2.6406757883745693e-08, - "loss": 0.7208, - "step": 34497 - }, - { - "epoch": 0.977585083170393, - "grad_norm": 0.0, - "learning_rate": 2.6340145521314143e-08, - "loss": 0.8014, - "step": 34498 - }, - { - "epoch": 0.9776134206126555, - "grad_norm": 0.0, - "learning_rate": 2.6273617170722298e-08, - "loss": 0.7296, - "step": 34499 - }, - { - "epoch": 0.9776417580549179, - "grad_norm": 0.0, - "learning_rate": 2.6207172832531935e-08, - "loss": 0.6443, - "step": 34500 - }, - { - "epoch": 0.9776700954971804, - "grad_norm": 0.0, - "learning_rate": 2.6140812507302606e-08, - "loss": 0.7393, - "step": 34501 - }, - { - "epoch": 0.9776984329394429, - "grad_norm": 0.0, - "learning_rate": 2.6074536195592747e-08, - "loss": 0.7736, - "step": 34502 - }, - { - "epoch": 0.9777267703817053, - "grad_norm": 0.0, - "learning_rate": 2.60083438979597e-08, - "loss": 0.7831, - "step": 34503 - }, - { - "epoch": 0.9777551078239678, - "grad_norm": 0.0, - "learning_rate": 2.5942235614963006e-08, - "loss": 0.6858, - "step": 34504 - }, - { - "epoch": 0.9777834452662303, - "grad_norm": 0.0, - "learning_rate": 2.5876211347158896e-08, - "loss": 0.8167, - "step": 34505 - }, - { - "epoch": 0.9778117827084928, - "grad_norm": 0.0, - "learning_rate": 2.5810271095102478e-08, - "loss": 0.7428, - "step": 34506 - }, - { - "epoch": 0.9778401201507552, - "grad_norm": 0.0, - "learning_rate": 2.5744414859351084e-08, - "loss": 0.7214, - "step": 34507 - }, - { - "epoch": 0.9778684575930177, - "grad_norm": 0.0, - "learning_rate": 2.567864264045761e-08, - "loss": 0.8067, - "step": 34508 - }, - { - "epoch": 0.9778967950352802, - "grad_norm": 0.0, - "learning_rate": 2.5612954438977155e-08, - "loss": 0.6474, - "step": 34509 - }, - { - "epoch": 0.9779251324775425, - "grad_norm": 0.0, - "learning_rate": 2.554735025546151e-08, - "loss": 0.8595, - "step": 34510 - }, - { - "epoch": 0.977953469919805, - "grad_norm": 0.0, - "learning_rate": 2.5481830090465787e-08, - "loss": 0.7577, - "step": 34511 - }, - { - "epoch": 0.9779818073620675, - "grad_norm": 0.0, - "learning_rate": 2.541639394454065e-08, - "loss": 0.8628, - "step": 34512 - }, - { - "epoch": 0.9780101448043299, - "grad_norm": 0.0, - "learning_rate": 2.5351041818236778e-08, - "loss": 0.8357, - "step": 34513 - }, - { - "epoch": 0.9780384822465924, - "grad_norm": 0.0, - "learning_rate": 2.5285773712104833e-08, - "loss": 0.7629, - "step": 34514 - }, - { - "epoch": 0.9780668196888549, - "grad_norm": 0.0, - "learning_rate": 2.5220589626694382e-08, - "loss": 0.8431, - "step": 34515 - }, - { - "epoch": 0.9780951571311174, - "grad_norm": 0.0, - "learning_rate": 2.515548956255609e-08, - "loss": 0.7551, - "step": 34516 - }, - { - "epoch": 0.9781234945733798, - "grad_norm": 0.0, - "learning_rate": 2.509047352023619e-08, - "loss": 0.8276, - "step": 34517 - }, - { - "epoch": 0.9781518320156423, - "grad_norm": 0.0, - "learning_rate": 2.5025541500284245e-08, - "loss": 0.8085, - "step": 34518 - }, - { - "epoch": 0.9781801694579048, - "grad_norm": 0.0, - "learning_rate": 2.496069350324537e-08, - "loss": 0.7763, - "step": 34519 - }, - { - "epoch": 0.9782085069001671, - "grad_norm": 0.0, - "learning_rate": 2.4895929529666908e-08, - "loss": 0.7708, - "step": 34520 - }, - { - "epoch": 0.9782368443424296, - "grad_norm": 0.0, - "learning_rate": 2.4831249580095086e-08, - "loss": 0.8426, - "step": 34521 - }, - { - "epoch": 0.9782651817846921, - "grad_norm": 0.0, - "learning_rate": 2.4766653655072803e-08, - "loss": 0.731, - "step": 34522 - }, - { - "epoch": 0.9782935192269546, - "grad_norm": 0.0, - "learning_rate": 2.4702141755145182e-08, - "loss": 0.7433, - "step": 34523 - }, - { - "epoch": 0.978321856669217, - "grad_norm": 0.0, - "learning_rate": 2.463771388085623e-08, - "loss": 0.7754, - "step": 34524 - }, - { - "epoch": 0.9783501941114795, - "grad_norm": 0.0, - "learning_rate": 2.4573370032748845e-08, - "loss": 0.723, - "step": 34525 - }, - { - "epoch": 0.978378531553742, - "grad_norm": 0.0, - "learning_rate": 2.4509110211362596e-08, - "loss": 0.9245, - "step": 34526 - }, - { - "epoch": 0.9784068689960044, - "grad_norm": 0.0, - "learning_rate": 2.4444934417241495e-08, - "loss": 0.868, - "step": 34527 - }, - { - "epoch": 0.9784352064382669, - "grad_norm": 0.0, - "learning_rate": 2.4380842650923996e-08, - "loss": 0.8673, - "step": 34528 - }, - { - "epoch": 0.9784635438805294, - "grad_norm": 0.0, - "learning_rate": 2.4316834912951892e-08, - "loss": 0.8116, - "step": 34529 - }, - { - "epoch": 0.9784918813227919, - "grad_norm": 0.0, - "learning_rate": 2.425291120386364e-08, - "loss": 0.8797, - "step": 34530 - }, - { - "epoch": 0.9785202187650542, - "grad_norm": 0.0, - "learning_rate": 2.4189071524196585e-08, - "loss": 0.7786, - "step": 34531 - }, - { - "epoch": 0.9785485562073167, - "grad_norm": 0.0, - "learning_rate": 2.4125315874490295e-08, - "loss": 0.8051, - "step": 34532 - }, - { - "epoch": 0.9785768936495792, - "grad_norm": 0.0, - "learning_rate": 2.4061644255281013e-08, - "loss": 0.7118, - "step": 34533 - }, - { - "epoch": 0.9786052310918416, - "grad_norm": 0.0, - "learning_rate": 2.3998056667103865e-08, - "loss": 0.8205, - "step": 34534 - }, - { - "epoch": 0.9786335685341041, - "grad_norm": 0.0, - "learning_rate": 2.3934553110496194e-08, - "loss": 0.7498, - "step": 34535 - }, - { - "epoch": 0.9786619059763666, - "grad_norm": 0.0, - "learning_rate": 2.3871133585993134e-08, - "loss": 0.7729, - "step": 34536 - }, - { - "epoch": 0.978690243418629, - "grad_norm": 0.0, - "learning_rate": 2.3807798094127587e-08, - "loss": 0.905, - "step": 34537 - }, - { - "epoch": 0.9787185808608915, - "grad_norm": 0.0, - "learning_rate": 2.3744546635432463e-08, - "loss": 0.8416, - "step": 34538 - }, - { - "epoch": 0.978746918303154, - "grad_norm": 0.0, - "learning_rate": 2.368137921044289e-08, - "loss": 0.7904, - "step": 34539 - }, - { - "epoch": 0.9787752557454165, - "grad_norm": 0.0, - "learning_rate": 2.3618295819688442e-08, - "loss": 0.6505, - "step": 34540 - }, - { - "epoch": 0.9788035931876788, - "grad_norm": 0.0, - "learning_rate": 2.3555296463703138e-08, - "loss": 0.8238, - "step": 34541 - }, - { - "epoch": 0.9788319306299413, - "grad_norm": 0.0, - "learning_rate": 2.349238114301544e-08, - "loss": 0.8535, - "step": 34542 - }, - { - "epoch": 0.9788602680722038, - "grad_norm": 0.0, - "learning_rate": 2.3429549858156042e-08, - "loss": 0.7509, - "step": 34543 - }, - { - "epoch": 0.9788886055144662, - "grad_norm": 0.0, - "learning_rate": 2.3366802609654515e-08, - "loss": 0.9251, - "step": 34544 - }, - { - "epoch": 0.9789169429567287, - "grad_norm": 0.0, - "learning_rate": 2.3304139398039327e-08, - "loss": 0.781, - "step": 34545 - }, - { - "epoch": 0.9789452803989912, - "grad_norm": 0.0, - "learning_rate": 2.3241560223837833e-08, - "loss": 0.7839, - "step": 34546 - }, - { - "epoch": 0.9789736178412537, - "grad_norm": 0.0, - "learning_rate": 2.317906508757739e-08, - "loss": 0.7758, - "step": 34547 - }, - { - "epoch": 0.9790019552835161, - "grad_norm": 0.0, - "learning_rate": 2.3116653989784243e-08, - "loss": 0.8166, - "step": 34548 - }, - { - "epoch": 0.9790302927257786, - "grad_norm": 0.0, - "learning_rate": 2.305432693098464e-08, - "loss": 0.8007, - "step": 34549 - }, - { - "epoch": 0.9790586301680411, - "grad_norm": 0.0, - "learning_rate": 2.299208391170371e-08, - "loss": 0.7868, - "step": 34550 - }, - { - "epoch": 0.9790869676103034, - "grad_norm": 0.0, - "learning_rate": 2.2929924932465487e-08, - "loss": 0.791, - "step": 34551 - }, - { - "epoch": 0.9791153050525659, - "grad_norm": 0.0, - "learning_rate": 2.286784999379177e-08, - "loss": 0.7121, - "step": 34552 - }, - { - "epoch": 0.9791436424948284, - "grad_norm": 0.0, - "learning_rate": 2.2805859096208805e-08, - "loss": 0.7204, - "step": 34553 - }, - { - "epoch": 0.9791719799370909, - "grad_norm": 0.0, - "learning_rate": 2.274395224023618e-08, - "loss": 0.6802, - "step": 34554 - }, - { - "epoch": 0.9792003173793533, - "grad_norm": 0.0, - "learning_rate": 2.2682129426395693e-08, - "loss": 0.7727, - "step": 34555 - }, - { - "epoch": 0.9792286548216158, - "grad_norm": 0.0, - "learning_rate": 2.2620390655210268e-08, - "loss": 0.7546, - "step": 34556 - }, - { - "epoch": 0.9792569922638783, - "grad_norm": 0.0, - "learning_rate": 2.2558735927197262e-08, - "loss": 0.9139, - "step": 34557 - }, - { - "epoch": 0.9792853297061407, - "grad_norm": 0.0, - "learning_rate": 2.2497165242877373e-08, - "loss": 0.9091, - "step": 34558 - }, - { - "epoch": 0.9793136671484032, - "grad_norm": 0.0, - "learning_rate": 2.243567860276796e-08, - "loss": 0.7752, - "step": 34559 - }, - { - "epoch": 0.9793420045906657, - "grad_norm": 0.0, - "learning_rate": 2.2374276007388616e-08, - "loss": 0.7293, - "step": 34560 - }, - { - "epoch": 0.979370342032928, - "grad_norm": 0.0, - "learning_rate": 2.23129574572567e-08, - "loss": 0.6709, - "step": 34561 - }, - { - "epoch": 0.9793986794751905, - "grad_norm": 0.0, - "learning_rate": 2.2251722952886245e-08, - "loss": 0.7892, - "step": 34562 - }, - { - "epoch": 0.979427016917453, - "grad_norm": 0.0, - "learning_rate": 2.2190572494795725e-08, - "loss": 0.8568, - "step": 34563 - }, - { - "epoch": 0.9794553543597155, - "grad_norm": 0.0, - "learning_rate": 2.2129506083499176e-08, - "loss": 0.8168, - "step": 34564 - }, - { - "epoch": 0.9794836918019779, - "grad_norm": 0.0, - "learning_rate": 2.2068523719510626e-08, - "loss": 0.8567, - "step": 34565 - }, - { - "epoch": 0.9795120292442404, - "grad_norm": 0.0, - "learning_rate": 2.2007625403344113e-08, - "loss": 0.8245, - "step": 34566 - }, - { - "epoch": 0.9795403666865029, - "grad_norm": 0.0, - "learning_rate": 2.1946811135512557e-08, - "loss": 0.8277, - "step": 34567 - }, - { - "epoch": 0.9795687041287653, - "grad_norm": 0.0, - "learning_rate": 2.1886080916528884e-08, - "loss": 0.7643, - "step": 34568 - }, - { - "epoch": 0.9795970415710278, - "grad_norm": 0.0, - "learning_rate": 2.1825434746903794e-08, - "loss": 0.834, - "step": 34569 - }, - { - "epoch": 0.9796253790132903, - "grad_norm": 0.0, - "learning_rate": 2.1764872627147994e-08, - "loss": 0.8083, - "step": 34570 - }, - { - "epoch": 0.9796537164555528, - "grad_norm": 0.0, - "learning_rate": 2.170439455777218e-08, - "loss": 0.9476, - "step": 34571 - }, - { - "epoch": 0.9796820538978152, - "grad_norm": 0.0, - "learning_rate": 2.1644000539285948e-08, - "loss": 0.7886, - "step": 34572 - }, - { - "epoch": 0.9797103913400776, - "grad_norm": 0.0, - "learning_rate": 2.158369057219667e-08, - "loss": 0.8318, - "step": 34573 - }, - { - "epoch": 0.9797387287823401, - "grad_norm": 0.0, - "learning_rate": 2.1523464657013936e-08, - "loss": 0.8086, - "step": 34574 - }, - { - "epoch": 0.9797670662246025, - "grad_norm": 0.0, - "learning_rate": 2.146332279424512e-08, - "loss": 0.7268, - "step": 34575 - }, - { - "epoch": 0.979795403666865, - "grad_norm": 0.0, - "learning_rate": 2.1403264984395378e-08, - "loss": 0.7024, - "step": 34576 - }, - { - "epoch": 0.9798237411091275, - "grad_norm": 0.0, - "learning_rate": 2.1343291227972073e-08, - "loss": 0.8924, - "step": 34577 - }, - { - "epoch": 0.97985207855139, - "grad_norm": 0.0, - "learning_rate": 2.1283401525478142e-08, - "loss": 0.8157, - "step": 34578 - }, - { - "epoch": 0.9798804159936524, - "grad_norm": 0.0, - "learning_rate": 2.1223595877420954e-08, - "loss": 0.7717, - "step": 34579 - }, - { - "epoch": 0.9799087534359149, - "grad_norm": 0.0, - "learning_rate": 2.1163874284302334e-08, - "loss": 0.8386, - "step": 34580 - }, - { - "epoch": 0.9799370908781774, - "grad_norm": 0.0, - "learning_rate": 2.1104236746626318e-08, - "loss": 0.8737, - "step": 34581 - }, - { - "epoch": 0.9799654283204398, - "grad_norm": 0.0, - "learning_rate": 2.104468326489584e-08, - "loss": 0.7787, - "step": 34582 - }, - { - "epoch": 0.9799937657627023, - "grad_norm": 0.0, - "learning_rate": 2.0985213839610498e-08, - "loss": 0.8272, - "step": 34583 - }, - { - "epoch": 0.9800221032049647, - "grad_norm": 0.0, - "learning_rate": 2.0925828471272115e-08, - "loss": 0.8903, - "step": 34584 - }, - { - "epoch": 0.9800504406472271, - "grad_norm": 0.0, - "learning_rate": 2.08665271603814e-08, - "loss": 0.7614, - "step": 34585 - }, - { - "epoch": 0.9800787780894896, - "grad_norm": 0.0, - "learning_rate": 2.0807309907437955e-08, - "loss": 0.85, - "step": 34586 - }, - { - "epoch": 0.9801071155317521, - "grad_norm": 0.0, - "learning_rate": 2.074817671294027e-08, - "loss": 0.76, - "step": 34587 - }, - { - "epoch": 0.9801354529740146, - "grad_norm": 0.0, - "learning_rate": 2.0689127577385724e-08, - "loss": 0.8761, - "step": 34588 - }, - { - "epoch": 0.980163790416277, - "grad_norm": 0.0, - "learning_rate": 2.063016250127281e-08, - "loss": 0.7144, - "step": 34589 - }, - { - "epoch": 0.9801921278585395, - "grad_norm": 0.0, - "learning_rate": 2.0571281485097793e-08, - "loss": 0.8458, - "step": 34590 - }, - { - "epoch": 0.980220465300802, - "grad_norm": 0.0, - "learning_rate": 2.0512484529356947e-08, - "loss": 0.714, - "step": 34591 - }, - { - "epoch": 0.9802488027430644, - "grad_norm": 0.0, - "learning_rate": 2.045377163454432e-08, - "loss": 0.8224, - "step": 34592 - }, - { - "epoch": 0.9802771401853269, - "grad_norm": 0.0, - "learning_rate": 2.0395142801156174e-08, - "loss": 0.8414, - "step": 34593 - }, - { - "epoch": 0.9803054776275894, - "grad_norm": 0.0, - "learning_rate": 2.0336598029684352e-08, - "loss": 0.8119, - "step": 34594 - }, - { - "epoch": 0.9803338150698518, - "grad_norm": 0.0, - "learning_rate": 2.0278137320625114e-08, - "loss": 0.7822, - "step": 34595 - }, - { - "epoch": 0.9803621525121142, - "grad_norm": 0.0, - "learning_rate": 2.021976067446807e-08, - "loss": 0.7535, - "step": 34596 - }, - { - "epoch": 0.9803904899543767, - "grad_norm": 0.0, - "learning_rate": 2.016146809170505e-08, - "loss": 0.7112, - "step": 34597 - }, - { - "epoch": 0.9804188273966392, - "grad_norm": 0.0, - "learning_rate": 2.010325957282899e-08, - "loss": 0.7472, - "step": 34598 - }, - { - "epoch": 0.9804471648389016, - "grad_norm": 0.0, - "learning_rate": 2.0045135118328397e-08, - "loss": 0.7711, - "step": 34599 - }, - { - "epoch": 0.9804755022811641, - "grad_norm": 0.0, - "learning_rate": 1.9987094728695088e-08, - "loss": 0.7882, - "step": 34600 - }, - { - "epoch": 0.9805038397234266, - "grad_norm": 0.0, - "learning_rate": 1.9929138404415348e-08, - "loss": 0.8011, - "step": 34601 - }, - { - "epoch": 0.9805321771656891, - "grad_norm": 0.0, - "learning_rate": 1.9871266145977673e-08, - "loss": 0.8434, - "step": 34602 - }, - { - "epoch": 0.9805605146079515, - "grad_norm": 0.0, - "learning_rate": 1.9813477953871675e-08, - "loss": 0.8256, - "step": 34603 - }, - { - "epoch": 0.980588852050214, - "grad_norm": 0.0, - "learning_rate": 1.9755773828582514e-08, - "loss": 0.7859, - "step": 34604 - }, - { - "epoch": 0.9806171894924764, - "grad_norm": 0.0, - "learning_rate": 1.9698153770596474e-08, - "loss": 0.7698, - "step": 34605 - }, - { - "epoch": 0.9806455269347388, - "grad_norm": 0.0, - "learning_rate": 1.964061778039872e-08, - "loss": 0.8783, - "step": 34606 - }, - { - "epoch": 0.9806738643770013, - "grad_norm": 0.0, - "learning_rate": 1.9583165858474417e-08, - "loss": 0.7085, - "step": 34607 - }, - { - "epoch": 0.9807022018192638, - "grad_norm": 0.0, - "learning_rate": 1.9525798005307628e-08, - "loss": 0.7691, - "step": 34608 - }, - { - "epoch": 0.9807305392615262, - "grad_norm": 0.0, - "learning_rate": 1.9468514221380185e-08, - "loss": 0.807, - "step": 34609 - }, - { - "epoch": 0.9807588767037887, - "grad_norm": 0.0, - "learning_rate": 1.941131450717615e-08, - "loss": 0.7768, - "step": 34610 - }, - { - "epoch": 0.9807872141460512, - "grad_norm": 0.0, - "learning_rate": 1.9354198863177355e-08, - "loss": 0.8078, - "step": 34611 - }, - { - "epoch": 0.9808155515883137, - "grad_norm": 0.0, - "learning_rate": 1.9297167289863417e-08, - "loss": 0.7905, - "step": 34612 - }, - { - "epoch": 0.9808438890305761, - "grad_norm": 0.0, - "learning_rate": 1.9240219787716175e-08, - "loss": 0.7923, - "step": 34613 - }, - { - "epoch": 0.9808722264728386, - "grad_norm": 0.0, - "learning_rate": 1.9183356357215242e-08, - "loss": 0.8992, - "step": 34614 - }, - { - "epoch": 0.980900563915101, - "grad_norm": 0.0, - "learning_rate": 1.912657699883802e-08, - "loss": 0.8857, - "step": 34615 - }, - { - "epoch": 0.9809289013573634, - "grad_norm": 0.0, - "learning_rate": 1.906988171306523e-08, - "loss": 0.7934, - "step": 34616 - }, - { - "epoch": 0.9809572387996259, - "grad_norm": 0.0, - "learning_rate": 1.901327050037205e-08, - "loss": 0.8287, - "step": 34617 - }, - { - "epoch": 0.9809855762418884, - "grad_norm": 0.0, - "learning_rate": 1.8956743361236983e-08, - "loss": 0.8806, - "step": 34618 - }, - { - "epoch": 0.9810139136841509, - "grad_norm": 0.0, - "learning_rate": 1.890030029613521e-08, - "loss": 0.868, - "step": 34619 - }, - { - "epoch": 0.9810422511264133, - "grad_norm": 0.0, - "learning_rate": 1.8843941305543013e-08, - "loss": 0.751, - "step": 34620 - }, - { - "epoch": 0.9810705885686758, - "grad_norm": 0.0, - "learning_rate": 1.8787666389935567e-08, - "loss": 0.8264, - "step": 34621 - }, - { - "epoch": 0.9810989260109383, - "grad_norm": 0.0, - "learning_rate": 1.8731475549784717e-08, - "loss": 0.757, - "step": 34622 - }, - { - "epoch": 0.9811272634532007, - "grad_norm": 0.0, - "learning_rate": 1.867536878556564e-08, - "loss": 0.7783, - "step": 34623 - }, - { - "epoch": 0.9811556008954632, - "grad_norm": 0.0, - "learning_rate": 1.861934609775018e-08, - "loss": 0.6906, - "step": 34624 - }, - { - "epoch": 0.9811839383377257, - "grad_norm": 0.0, - "learning_rate": 1.856340748681129e-08, - "loss": 0.7012, - "step": 34625 - }, - { - "epoch": 0.9812122757799882, - "grad_norm": 0.0, - "learning_rate": 1.850755295321749e-08, - "loss": 0.8123, - "step": 34626 - }, - { - "epoch": 0.9812406132222505, - "grad_norm": 0.0, - "learning_rate": 1.8451782497442838e-08, - "loss": 0.8487, - "step": 34627 - }, - { - "epoch": 0.981268950664513, - "grad_norm": 0.0, - "learning_rate": 1.8396096119954744e-08, - "loss": 0.8027, - "step": 34628 - }, - { - "epoch": 0.9812972881067755, - "grad_norm": 0.0, - "learning_rate": 1.8340493821222827e-08, - "loss": 0.7486, - "step": 34629 - }, - { - "epoch": 0.9813256255490379, - "grad_norm": 0.0, - "learning_rate": 1.8284975601715606e-08, - "loss": 0.8093, - "step": 34630 - }, - { - "epoch": 0.9813539629913004, - "grad_norm": 0.0, - "learning_rate": 1.822954146190159e-08, - "loss": 0.7866, - "step": 34631 - }, - { - "epoch": 0.9813823004335629, - "grad_norm": 0.0, - "learning_rate": 1.8174191402244855e-08, - "loss": 0.8998, - "step": 34632 - }, - { - "epoch": 0.9814106378758253, - "grad_norm": 0.0, - "learning_rate": 1.8118925423215027e-08, - "loss": 0.7508, - "step": 34633 - }, - { - "epoch": 0.9814389753180878, - "grad_norm": 0.0, - "learning_rate": 1.806374352527618e-08, - "loss": 0.8499, - "step": 34634 - }, - { - "epoch": 0.9814673127603503, - "grad_norm": 0.0, - "learning_rate": 1.800864570889238e-08, - "loss": 0.765, - "step": 34635 - }, - { - "epoch": 0.9814956502026128, - "grad_norm": 0.0, - "learning_rate": 1.7953631974528818e-08, - "loss": 0.7394, - "step": 34636 - }, - { - "epoch": 0.9815239876448751, - "grad_norm": 0.0, - "learning_rate": 1.7898702322648453e-08, - "loss": 0.8421, - "step": 34637 - }, - { - "epoch": 0.9815523250871376, - "grad_norm": 0.0, - "learning_rate": 1.784385675371425e-08, - "loss": 0.7497, - "step": 34638 - }, - { - "epoch": 0.9815806625294001, - "grad_norm": 0.0, - "learning_rate": 1.778909526818806e-08, - "loss": 0.8398, - "step": 34639 - }, - { - "epoch": 0.9816089999716625, - "grad_norm": 0.0, - "learning_rate": 1.773441786653063e-08, - "loss": 0.7503, - "step": 34640 - }, - { - "epoch": 0.981637337413925, - "grad_norm": 0.0, - "learning_rate": 1.7679824549203805e-08, - "loss": 0.6827, - "step": 34641 - }, - { - "epoch": 0.9816656748561875, - "grad_norm": 0.0, - "learning_rate": 1.7625315316666115e-08, - "loss": 0.9203, - "step": 34642 - }, - { - "epoch": 0.98169401229845, - "grad_norm": 0.0, - "learning_rate": 1.7570890169377185e-08, - "loss": 0.7818, - "step": 34643 - }, - { - "epoch": 0.9817223497407124, - "grad_norm": 0.0, - "learning_rate": 1.7516549107795543e-08, - "loss": 0.8099, - "step": 34644 - }, - { - "epoch": 0.9817506871829749, - "grad_norm": 0.0, - "learning_rate": 1.746229213237971e-08, - "loss": 0.7821, - "step": 34645 - }, - { - "epoch": 0.9817790246252374, - "grad_norm": 0.0, - "learning_rate": 1.7408119243584875e-08, - "loss": 0.8445, - "step": 34646 - }, - { - "epoch": 0.9818073620674997, - "grad_norm": 0.0, - "learning_rate": 1.7354030441868452e-08, - "loss": 0.7967, - "step": 34647 - }, - { - "epoch": 0.9818356995097622, - "grad_norm": 0.0, - "learning_rate": 1.7300025727686744e-08, - "loss": 0.8256, - "step": 34648 - }, - { - "epoch": 0.9818640369520247, - "grad_norm": 0.0, - "learning_rate": 1.7246105101493825e-08, - "loss": 0.7481, - "step": 34649 - }, - { - "epoch": 0.9818923743942872, - "grad_norm": 0.0, - "learning_rate": 1.7192268563743786e-08, - "loss": 0.7997, - "step": 34650 - }, - { - "epoch": 0.9819207118365496, - "grad_norm": 0.0, - "learning_rate": 1.7138516114890702e-08, - "loss": 0.8603, - "step": 34651 - }, - { - "epoch": 0.9819490492788121, - "grad_norm": 0.0, - "learning_rate": 1.7084847755385324e-08, - "loss": 0.8948, - "step": 34652 - }, - { - "epoch": 0.9819773867210746, - "grad_norm": 0.0, - "learning_rate": 1.703126348568285e-08, - "loss": 0.9812, - "step": 34653 - }, - { - "epoch": 0.982005724163337, - "grad_norm": 0.0, - "learning_rate": 1.697776330623291e-08, - "loss": 0.755, - "step": 34654 - }, - { - "epoch": 0.9820340616055995, - "grad_norm": 0.0, - "learning_rate": 1.692434721748626e-08, - "loss": 0.7724, - "step": 34655 - }, - { - "epoch": 0.982062399047862, - "grad_norm": 0.0, - "learning_rate": 1.687101521989254e-08, - "loss": 0.7296, - "step": 34656 - }, - { - "epoch": 0.9820907364901243, - "grad_norm": 0.0, - "learning_rate": 1.68177673139025e-08, - "loss": 0.7228, - "step": 34657 - }, - { - "epoch": 0.9821190739323868, - "grad_norm": 0.0, - "learning_rate": 1.6764603499962452e-08, - "loss": 0.816, - "step": 34658 - }, - { - "epoch": 0.9821474113746493, - "grad_norm": 0.0, - "learning_rate": 1.671152377852092e-08, - "loss": 0.789, - "step": 34659 - }, - { - "epoch": 0.9821757488169118, - "grad_norm": 0.0, - "learning_rate": 1.665852815002644e-08, - "loss": 0.8521, - "step": 34660 - }, - { - "epoch": 0.9822040862591742, - "grad_norm": 0.0, - "learning_rate": 1.6605616614924214e-08, - "loss": 0.7686, - "step": 34661 - }, - { - "epoch": 0.9822324237014367, - "grad_norm": 0.0, - "learning_rate": 1.6552789173660543e-08, - "loss": 0.7569, - "step": 34662 - }, - { - "epoch": 0.9822607611436992, - "grad_norm": 0.0, - "learning_rate": 1.6500045826679523e-08, - "loss": 0.7372, - "step": 34663 - }, - { - "epoch": 0.9822890985859616, - "grad_norm": 0.0, - "learning_rate": 1.644738657442524e-08, - "loss": 0.7186, - "step": 34664 - }, - { - "epoch": 0.9823174360282241, - "grad_norm": 0.0, - "learning_rate": 1.6394811417342892e-08, - "loss": 0.7859, - "step": 34665 - }, - { - "epoch": 0.9823457734704866, - "grad_norm": 0.0, - "learning_rate": 1.6342320355874353e-08, - "loss": 0.8195, - "step": 34666 - }, - { - "epoch": 0.9823741109127491, - "grad_norm": 0.0, - "learning_rate": 1.6289913390461487e-08, - "loss": 0.8303, - "step": 34667 - }, - { - "epoch": 0.9824024483550114, - "grad_norm": 0.0, - "learning_rate": 1.6237590521546165e-08, - "loss": 0.8557, - "step": 34668 - }, - { - "epoch": 0.9824307857972739, - "grad_norm": 0.0, - "learning_rate": 1.6185351749569146e-08, - "loss": 0.7656, - "step": 34669 - }, - { - "epoch": 0.9824591232395364, - "grad_norm": 0.0, - "learning_rate": 1.6133197074970075e-08, - "loss": 0.7307, - "step": 34670 - }, - { - "epoch": 0.9824874606817988, - "grad_norm": 0.0, - "learning_rate": 1.6081126498187494e-08, - "loss": 0.7869, - "step": 34671 - }, - { - "epoch": 0.9825157981240613, - "grad_norm": 0.0, - "learning_rate": 1.602914001966216e-08, - "loss": 0.7012, - "step": 34672 - }, - { - "epoch": 0.9825441355663238, - "grad_norm": 0.0, - "learning_rate": 1.597723763983039e-08, - "loss": 0.7536, - "step": 34673 - }, - { - "epoch": 0.9825724730085863, - "grad_norm": 0.0, - "learning_rate": 1.5925419359130723e-08, - "loss": 0.8272, - "step": 34674 - }, - { - "epoch": 0.9826008104508487, - "grad_norm": 0.0, - "learning_rate": 1.5873685177998365e-08, - "loss": 0.8248, - "step": 34675 - }, - { - "epoch": 0.9826291478931112, - "grad_norm": 0.0, - "learning_rate": 1.5822035096868526e-08, - "loss": 0.8528, - "step": 34676 - }, - { - "epoch": 0.9826574853353737, - "grad_norm": 0.0, - "learning_rate": 1.5770469116178634e-08, - "loss": 0.8175, - "step": 34677 - }, - { - "epoch": 0.982685822777636, - "grad_norm": 0.0, - "learning_rate": 1.5718987236360563e-08, - "loss": 0.9109, - "step": 34678 - }, - { - "epoch": 0.9827141602198985, - "grad_norm": 0.0, - "learning_rate": 1.5667589457849518e-08, - "loss": 0.7849, - "step": 34679 - }, - { - "epoch": 0.982742497662161, - "grad_norm": 0.0, - "learning_rate": 1.5616275781077384e-08, - "loss": 0.8616, - "step": 34680 - }, - { - "epoch": 0.9827708351044234, - "grad_norm": 0.0, - "learning_rate": 1.5565046206478252e-08, - "loss": 0.7403, - "step": 34681 - }, - { - "epoch": 0.9827991725466859, - "grad_norm": 0.0, - "learning_rate": 1.551390073448067e-08, - "loss": 0.8406, - "step": 34682 - }, - { - "epoch": 0.9828275099889484, - "grad_norm": 0.0, - "learning_rate": 1.5462839365518734e-08, - "loss": 0.7398, - "step": 34683 - }, - { - "epoch": 0.9828558474312109, - "grad_norm": 0.0, - "learning_rate": 1.5411862100019882e-08, - "loss": 0.7666, - "step": 34684 - }, - { - "epoch": 0.9828841848734733, - "grad_norm": 0.0, - "learning_rate": 1.5360968938414876e-08, - "loss": 0.8241, - "step": 34685 - }, - { - "epoch": 0.9829125223157358, - "grad_norm": 0.0, - "learning_rate": 1.531015988113227e-08, - "loss": 0.8776, - "step": 34686 - }, - { - "epoch": 0.9829408597579983, - "grad_norm": 0.0, - "learning_rate": 1.525943492859949e-08, - "loss": 0.8583, - "step": 34687 - }, - { - "epoch": 0.9829691972002607, - "grad_norm": 0.0, - "learning_rate": 1.5208794081245092e-08, - "loss": 0.7969, - "step": 34688 - }, - { - "epoch": 0.9829975346425232, - "grad_norm": 0.0, - "learning_rate": 1.5158237339494285e-08, - "loss": 0.7977, - "step": 34689 - }, - { - "epoch": 0.9830258720847856, - "grad_norm": 0.0, - "learning_rate": 1.5107764703773398e-08, - "loss": 0.8087, - "step": 34690 - }, - { - "epoch": 0.9830542095270481, - "grad_norm": 0.0, - "learning_rate": 1.505737617450764e-08, - "loss": 0.8508, - "step": 34691 - }, - { - "epoch": 0.9830825469693105, - "grad_norm": 0.0, - "learning_rate": 1.5007071752121128e-08, - "loss": 0.788, - "step": 34692 - }, - { - "epoch": 0.983110884411573, - "grad_norm": 0.0, - "learning_rate": 1.4956851437037957e-08, - "loss": 0.8333, - "step": 34693 - }, - { - "epoch": 0.9831392218538355, - "grad_norm": 0.0, - "learning_rate": 1.4906715229682232e-08, - "loss": 0.7959, - "step": 34694 - }, - { - "epoch": 0.9831675592960979, - "grad_norm": 0.0, - "learning_rate": 1.4856663130473625e-08, - "loss": 0.7941, - "step": 34695 - }, - { - "epoch": 0.9831958967383604, - "grad_norm": 0.0, - "learning_rate": 1.4806695139836236e-08, - "loss": 0.8445, - "step": 34696 - }, - { - "epoch": 0.9832242341806229, - "grad_norm": 0.0, - "learning_rate": 1.475681125818973e-08, - "loss": 0.7501, - "step": 34697 - }, - { - "epoch": 0.9832525716228853, - "grad_norm": 0.0, - "learning_rate": 1.4707011485953771e-08, - "loss": 0.8065, - "step": 34698 - }, - { - "epoch": 0.9832809090651478, - "grad_norm": 0.0, - "learning_rate": 1.4657295823549134e-08, - "loss": 0.8471, - "step": 34699 - }, - { - "epoch": 0.9833092465074103, - "grad_norm": 0.0, - "learning_rate": 1.4607664271394374e-08, - "loss": 0.9005, - "step": 34700 - }, - { - "epoch": 0.9833375839496727, - "grad_norm": 0.0, - "learning_rate": 1.4558116829906931e-08, - "loss": 0.7686, - "step": 34701 - }, - { - "epoch": 0.9833659213919351, - "grad_norm": 0.0, - "learning_rate": 1.4508653499504256e-08, - "loss": 0.7815, - "step": 34702 - }, - { - "epoch": 0.9833942588341976, - "grad_norm": 0.0, - "learning_rate": 1.4459274280603786e-08, - "loss": 0.7623, - "step": 34703 - }, - { - "epoch": 0.9834225962764601, - "grad_norm": 0.0, - "learning_rate": 1.4409979173620747e-08, - "loss": 0.7936, - "step": 34704 - }, - { - "epoch": 0.9834509337187225, - "grad_norm": 0.0, - "learning_rate": 1.4360768178969253e-08, - "loss": 0.7814, - "step": 34705 - }, - { - "epoch": 0.983479271160985, - "grad_norm": 0.0, - "learning_rate": 1.4311641297066747e-08, - "loss": 0.8534, - "step": 34706 - }, - { - "epoch": 0.9835076086032475, - "grad_norm": 0.0, - "learning_rate": 1.4262598528325122e-08, - "loss": 0.8217, - "step": 34707 - }, - { - "epoch": 0.98353594604551, - "grad_norm": 0.0, - "learning_rate": 1.421363987315738e-08, - "loss": 0.8352, - "step": 34708 - }, - { - "epoch": 0.9835642834877724, - "grad_norm": 0.0, - "learning_rate": 1.4164765331976527e-08, - "loss": 0.7937, - "step": 34709 - }, - { - "epoch": 0.9835926209300349, - "grad_norm": 0.0, - "learning_rate": 1.4115974905193342e-08, - "loss": 0.8466, - "step": 34710 - }, - { - "epoch": 0.9836209583722973, - "grad_norm": 0.0, - "learning_rate": 1.4067268593219719e-08, - "loss": 0.7113, - "step": 34711 - }, - { - "epoch": 0.9836492958145597, - "grad_norm": 0.0, - "learning_rate": 1.401864639646533e-08, - "loss": 0.8062, - "step": 34712 - }, - { - "epoch": 0.9836776332568222, - "grad_norm": 0.0, - "learning_rate": 1.3970108315340958e-08, - "loss": 0.8199, - "step": 34713 - }, - { - "epoch": 0.9837059706990847, - "grad_norm": 0.0, - "learning_rate": 1.3921654350254054e-08, - "loss": 0.7992, - "step": 34714 - }, - { - "epoch": 0.9837343081413472, - "grad_norm": 0.0, - "learning_rate": 1.3873284501614292e-08, - "loss": 0.7883, - "step": 34715 - }, - { - "epoch": 0.9837626455836096, - "grad_norm": 0.0, - "learning_rate": 1.3824998769826902e-08, - "loss": 0.8115, - "step": 34716 - }, - { - "epoch": 0.9837909830258721, - "grad_norm": 0.0, - "learning_rate": 1.3776797155300448e-08, - "loss": 0.7822, - "step": 34717 - }, - { - "epoch": 0.9838193204681346, - "grad_norm": 0.0, - "learning_rate": 1.3728679658440159e-08, - "loss": 0.7786, - "step": 34718 - }, - { - "epoch": 0.983847657910397, - "grad_norm": 0.0, - "learning_rate": 1.3680646279651266e-08, - "loss": 0.8116, - "step": 34719 - }, - { - "epoch": 0.9838759953526595, - "grad_norm": 0.0, - "learning_rate": 1.3632697019339003e-08, - "loss": 0.6971, - "step": 34720 - }, - { - "epoch": 0.983904332794922, - "grad_norm": 0.0, - "learning_rate": 1.358483187790638e-08, - "loss": 0.7774, - "step": 34721 - }, - { - "epoch": 0.9839326702371843, - "grad_norm": 0.0, - "learning_rate": 1.3537050855757516e-08, - "loss": 0.763, - "step": 34722 - }, - { - "epoch": 0.9839610076794468, - "grad_norm": 0.0, - "learning_rate": 1.3489353953294315e-08, - "loss": 0.707, - "step": 34723 - }, - { - "epoch": 0.9839893451217093, - "grad_norm": 0.0, - "learning_rate": 1.3441741170918676e-08, - "loss": 0.825, - "step": 34724 - }, - { - "epoch": 0.9840176825639718, - "grad_norm": 0.0, - "learning_rate": 1.339421250903139e-08, - "loss": 0.8488, - "step": 34725 - }, - { - "epoch": 0.9840460200062342, - "grad_norm": 0.0, - "learning_rate": 1.3346767968033247e-08, - "loss": 0.8469, - "step": 34726 - }, - { - "epoch": 0.9840743574484967, - "grad_norm": 0.0, - "learning_rate": 1.3299407548323927e-08, - "loss": 0.8168, - "step": 34727 - }, - { - "epoch": 0.9841026948907592, - "grad_norm": 0.0, - "learning_rate": 1.3252131250302003e-08, - "loss": 0.8245, - "step": 34728 - }, - { - "epoch": 0.9841310323330216, - "grad_norm": 0.0, - "learning_rate": 1.320493907436604e-08, - "loss": 0.7298, - "step": 34729 - }, - { - "epoch": 0.9841593697752841, - "grad_norm": 0.0, - "learning_rate": 1.3157831020913503e-08, - "loss": 0.7795, - "step": 34730 - }, - { - "epoch": 0.9841877072175466, - "grad_norm": 0.0, - "learning_rate": 1.3110807090340738e-08, - "loss": 0.8185, - "step": 34731 - }, - { - "epoch": 0.984216044659809, - "grad_norm": 0.0, - "learning_rate": 1.3063867283045206e-08, - "loss": 0.6975, - "step": 34732 - }, - { - "epoch": 0.9842443821020714, - "grad_norm": 0.0, - "learning_rate": 1.3017011599419927e-08, - "loss": 0.8385, - "step": 34733 - }, - { - "epoch": 0.9842727195443339, - "grad_norm": 0.0, - "learning_rate": 1.2970240039861248e-08, - "loss": 0.7418, - "step": 34734 - }, - { - "epoch": 0.9843010569865964, - "grad_norm": 0.0, - "learning_rate": 1.2923552604763301e-08, - "loss": 0.8067, - "step": 34735 - }, - { - "epoch": 0.9843293944288588, - "grad_norm": 0.0, - "learning_rate": 1.2876949294517993e-08, - "loss": 0.7825, - "step": 34736 - }, - { - "epoch": 0.9843577318711213, - "grad_norm": 0.0, - "learning_rate": 1.2830430109519454e-08, - "loss": 0.7843, - "step": 34737 - }, - { - "epoch": 0.9843860693133838, - "grad_norm": 0.0, - "learning_rate": 1.2783995050158483e-08, - "loss": 0.7635, - "step": 34738 - }, - { - "epoch": 0.9844144067556463, - "grad_norm": 0.0, - "learning_rate": 1.2737644116826986e-08, - "loss": 0.868, - "step": 34739 - }, - { - "epoch": 0.9844427441979087, - "grad_norm": 0.0, - "learning_rate": 1.2691377309915765e-08, - "loss": 0.6603, - "step": 34740 - }, - { - "epoch": 0.9844710816401712, - "grad_norm": 0.0, - "learning_rate": 1.2645194629812285e-08, - "loss": 0.7281, - "step": 34741 - }, - { - "epoch": 0.9844994190824337, - "grad_norm": 0.0, - "learning_rate": 1.2599096076907347e-08, - "loss": 0.8208, - "step": 34742 - }, - { - "epoch": 0.984527756524696, - "grad_norm": 0.0, - "learning_rate": 1.2553081651589527e-08, - "loss": 0.7823, - "step": 34743 - }, - { - "epoch": 0.9845560939669585, - "grad_norm": 0.0, - "learning_rate": 1.2507151354245184e-08, - "loss": 0.8216, - "step": 34744 - }, - { - "epoch": 0.984584431409221, - "grad_norm": 0.0, - "learning_rate": 1.2461305185262895e-08, - "loss": 1.0058, - "step": 34745 - }, - { - "epoch": 0.9846127688514834, - "grad_norm": 0.0, - "learning_rate": 1.2415543145026798e-08, - "loss": 0.7296, - "step": 34746 - }, - { - "epoch": 0.9846411062937459, - "grad_norm": 0.0, - "learning_rate": 1.236986523392325e-08, - "loss": 0.8268, - "step": 34747 - }, - { - "epoch": 0.9846694437360084, - "grad_norm": 0.0, - "learning_rate": 1.232427145233861e-08, - "loss": 0.9178, - "step": 34748 - }, - { - "epoch": 0.9846977811782709, - "grad_norm": 0.0, - "learning_rate": 1.2278761800653682e-08, - "loss": 0.8598, - "step": 34749 - }, - { - "epoch": 0.9847261186205333, - "grad_norm": 0.0, - "learning_rate": 1.2233336279254825e-08, - "loss": 0.8429, - "step": 34750 - }, - { - "epoch": 0.9847544560627958, - "grad_norm": 0.0, - "learning_rate": 1.2187994888522847e-08, - "loss": 0.8809, - "step": 34751 - }, - { - "epoch": 0.9847827935050583, - "grad_norm": 0.0, - "learning_rate": 1.2142737628840773e-08, - "loss": 0.7706, - "step": 34752 - }, - { - "epoch": 0.9848111309473206, - "grad_norm": 0.0, - "learning_rate": 1.20975645005883e-08, - "loss": 0.7211, - "step": 34753 - }, - { - "epoch": 0.9848394683895831, - "grad_norm": 0.0, - "learning_rate": 1.2052475504148453e-08, - "loss": 0.8384, - "step": 34754 - }, - { - "epoch": 0.9848678058318456, - "grad_norm": 0.0, - "learning_rate": 1.20074706398976e-08, - "loss": 0.8752, - "step": 34755 - }, - { - "epoch": 0.9848961432741081, - "grad_norm": 0.0, - "learning_rate": 1.1962549908218769e-08, - "loss": 0.8788, - "step": 34756 - }, - { - "epoch": 0.9849244807163705, - "grad_norm": 0.0, - "learning_rate": 1.1917713309487212e-08, - "loss": 0.7345, - "step": 34757 - }, - { - "epoch": 0.984952818158633, - "grad_norm": 0.0, - "learning_rate": 1.1872960844082626e-08, - "loss": 0.7758, - "step": 34758 - }, - { - "epoch": 0.9849811556008955, - "grad_norm": 0.0, - "learning_rate": 1.1828292512380269e-08, - "loss": 0.7503, - "step": 34759 - }, - { - "epoch": 0.9850094930431579, - "grad_norm": 0.0, - "learning_rate": 1.1783708314756503e-08, - "loss": 0.8939, - "step": 34760 - }, - { - "epoch": 0.9850378304854204, - "grad_norm": 0.0, - "learning_rate": 1.1739208251588807e-08, - "loss": 0.8195, - "step": 34761 - }, - { - "epoch": 0.9850661679276829, - "grad_norm": 0.0, - "learning_rate": 1.1694792323250215e-08, - "loss": 0.7859, - "step": 34762 - }, - { - "epoch": 0.9850945053699454, - "grad_norm": 0.0, - "learning_rate": 1.1650460530115981e-08, - "loss": 0.7659, - "step": 34763 - }, - { - "epoch": 0.9851228428122077, - "grad_norm": 0.0, - "learning_rate": 1.1606212872559142e-08, - "loss": 0.8143, - "step": 34764 - }, - { - "epoch": 0.9851511802544702, - "grad_norm": 0.0, - "learning_rate": 1.1562049350951621e-08, - "loss": 0.8715, - "step": 34765 - }, - { - "epoch": 0.9851795176967327, - "grad_norm": 0.0, - "learning_rate": 1.1517969965666454e-08, - "loss": 0.8481, - "step": 34766 - }, - { - "epoch": 0.9852078551389951, - "grad_norm": 0.0, - "learning_rate": 1.1473974717074455e-08, - "loss": 0.6844, - "step": 34767 - }, - { - "epoch": 0.9852361925812576, - "grad_norm": 0.0, - "learning_rate": 1.1430063605546437e-08, - "loss": 0.6714, - "step": 34768 - }, - { - "epoch": 0.9852645300235201, - "grad_norm": 0.0, - "learning_rate": 1.1386236631452108e-08, - "loss": 0.7757, - "step": 34769 - }, - { - "epoch": 0.9852928674657825, - "grad_norm": 0.0, - "learning_rate": 1.1342493795160059e-08, - "loss": 0.7331, - "step": 34770 - }, - { - "epoch": 0.985321204908045, - "grad_norm": 0.0, - "learning_rate": 1.1298835097039995e-08, - "loss": 0.7764, - "step": 34771 - }, - { - "epoch": 0.9853495423503075, - "grad_norm": 0.0, - "learning_rate": 1.1255260537459401e-08, - "loss": 0.7046, - "step": 34772 - }, - { - "epoch": 0.98537787979257, - "grad_norm": 0.0, - "learning_rate": 1.1211770116784649e-08, - "loss": 0.9569, - "step": 34773 - }, - { - "epoch": 0.9854062172348323, - "grad_norm": 0.0, - "learning_rate": 1.1168363835382113e-08, - "loss": 0.7611, - "step": 34774 - }, - { - "epoch": 0.9854345546770948, - "grad_norm": 0.0, - "learning_rate": 1.1125041693617056e-08, - "loss": 0.8079, - "step": 34775 - }, - { - "epoch": 0.9854628921193573, - "grad_norm": 0.0, - "learning_rate": 1.1081803691855853e-08, - "loss": 0.7027, - "step": 34776 - }, - { - "epoch": 0.9854912295616197, - "grad_norm": 0.0, - "learning_rate": 1.1038649830462656e-08, - "loss": 0.7931, - "step": 34777 - }, - { - "epoch": 0.9855195670038822, - "grad_norm": 0.0, - "learning_rate": 1.0995580109799397e-08, - "loss": 0.8599, - "step": 34778 - }, - { - "epoch": 0.9855479044461447, - "grad_norm": 0.0, - "learning_rate": 1.0952594530230232e-08, - "loss": 0.8207, - "step": 34779 - }, - { - "epoch": 0.9855762418884072, - "grad_norm": 0.0, - "learning_rate": 1.090969309211598e-08, - "loss": 0.7304, - "step": 34780 - }, - { - "epoch": 0.9856045793306696, - "grad_norm": 0.0, - "learning_rate": 1.0866875795818576e-08, - "loss": 0.7834, - "step": 34781 - }, - { - "epoch": 0.9856329167729321, - "grad_norm": 0.0, - "learning_rate": 1.0824142641699952e-08, - "loss": 0.9206, - "step": 34782 - }, - { - "epoch": 0.9856612542151946, - "grad_norm": 0.0, - "learning_rate": 1.078149363011871e-08, - "loss": 0.7398, - "step": 34783 - }, - { - "epoch": 0.985689591657457, - "grad_norm": 0.0, - "learning_rate": 1.0738928761433453e-08, - "loss": 0.9376, - "step": 34784 - }, - { - "epoch": 0.9857179290997194, - "grad_norm": 0.0, - "learning_rate": 1.0696448036003892e-08, - "loss": 0.848, - "step": 34785 - }, - { - "epoch": 0.9857462665419819, - "grad_norm": 0.0, - "learning_rate": 1.065405145418863e-08, - "loss": 0.8799, - "step": 34786 - }, - { - "epoch": 0.9857746039842444, - "grad_norm": 0.0, - "learning_rate": 1.0611739016342937e-08, - "loss": 0.8117, - "step": 34787 - }, - { - "epoch": 0.9858029414265068, - "grad_norm": 0.0, - "learning_rate": 1.0569510722823194e-08, - "loss": 0.7505, - "step": 34788 - }, - { - "epoch": 0.9858312788687693, - "grad_norm": 0.0, - "learning_rate": 1.0527366573986897e-08, - "loss": 0.9994, - "step": 34789 - }, - { - "epoch": 0.9858596163110318, - "grad_norm": 0.0, - "learning_rate": 1.0485306570188203e-08, - "loss": 0.8306, - "step": 34790 - }, - { - "epoch": 0.9858879537532942, - "grad_norm": 0.0, - "learning_rate": 1.0443330711781275e-08, - "loss": 0.7635, - "step": 34791 - }, - { - "epoch": 0.9859162911955567, - "grad_norm": 0.0, - "learning_rate": 1.0401438999119163e-08, - "loss": 0.8909, - "step": 34792 - }, - { - "epoch": 0.9859446286378192, - "grad_norm": 0.0, - "learning_rate": 1.0359631432556027e-08, - "loss": 0.7302, - "step": 34793 - }, - { - "epoch": 0.9859729660800816, - "grad_norm": 0.0, - "learning_rate": 1.0317908012442701e-08, - "loss": 0.8688, - "step": 34794 - }, - { - "epoch": 0.986001303522344, - "grad_norm": 0.0, - "learning_rate": 1.0276268739131124e-08, - "loss": 0.9222, - "step": 34795 - }, - { - "epoch": 0.9860296409646065, - "grad_norm": 0.0, - "learning_rate": 1.0234713612973234e-08, - "loss": 0.7732, - "step": 34796 - }, - { - "epoch": 0.986057978406869, - "grad_norm": 0.0, - "learning_rate": 1.0193242634316536e-08, - "loss": 0.8763, - "step": 34797 - }, - { - "epoch": 0.9860863158491314, - "grad_norm": 0.0, - "learning_rate": 1.0151855803512967e-08, - "loss": 0.8503, - "step": 34798 - }, - { - "epoch": 0.9861146532913939, - "grad_norm": 0.0, - "learning_rate": 1.0110553120908917e-08, - "loss": 0.6525, - "step": 34799 - }, - { - "epoch": 0.9861429907336564, - "grad_norm": 0.0, - "learning_rate": 1.0069334586854106e-08, - "loss": 0.7831, - "step": 34800 - }, - { - "epoch": 0.9861713281759188, - "grad_norm": 0.0, - "learning_rate": 1.0028200201693816e-08, - "loss": 0.7458, - "step": 34801 - }, - { - "epoch": 0.9861996656181813, - "grad_norm": 0.0, - "learning_rate": 9.987149965776655e-09, - "loss": 0.8305, - "step": 34802 - }, - { - "epoch": 0.9862280030604438, - "grad_norm": 0.0, - "learning_rate": 9.946183879447901e-09, - "loss": 0.8593, - "step": 34803 - }, - { - "epoch": 0.9862563405027063, - "grad_norm": 0.0, - "learning_rate": 9.905301943050615e-09, - "loss": 0.7628, - "step": 34804 - }, - { - "epoch": 0.9862846779449687, - "grad_norm": 0.0, - "learning_rate": 9.864504156932297e-09, - "loss": 0.7666, - "step": 34805 - }, - { - "epoch": 0.9863130153872312, - "grad_norm": 0.0, - "learning_rate": 9.823790521433785e-09, - "loss": 0.7973, - "step": 34806 - }, - { - "epoch": 0.9863413528294936, - "grad_norm": 0.0, - "learning_rate": 9.783161036900357e-09, - "loss": 0.6564, - "step": 34807 - }, - { - "epoch": 0.986369690271756, - "grad_norm": 0.0, - "learning_rate": 9.742615703671742e-09, - "loss": 0.7955, - "step": 34808 - }, - { - "epoch": 0.9863980277140185, - "grad_norm": 0.0, - "learning_rate": 9.702154522092111e-09, - "loss": 0.8076, - "step": 34809 - }, - { - "epoch": 0.986426365156281, - "grad_norm": 0.0, - "learning_rate": 9.66177749250008e-09, - "loss": 0.8379, - "step": 34810 - }, - { - "epoch": 0.9864547025985435, - "grad_norm": 0.0, - "learning_rate": 9.621484615237598e-09, - "loss": 0.8044, - "step": 34811 - }, - { - "epoch": 0.9864830400408059, - "grad_norm": 0.0, - "learning_rate": 9.581275890643282e-09, - "loss": 0.7991, - "step": 34812 - }, - { - "epoch": 0.9865113774830684, - "grad_norm": 0.0, - "learning_rate": 9.541151319054643e-09, - "loss": 0.7741, - "step": 34813 - }, - { - "epoch": 0.9865397149253309, - "grad_norm": 0.0, - "learning_rate": 9.501110900811405e-09, - "loss": 0.8489, - "step": 34814 - }, - { - "epoch": 0.9865680523675933, - "grad_norm": 0.0, - "learning_rate": 9.46115463624997e-09, - "loss": 0.792, - "step": 34815 - }, - { - "epoch": 0.9865963898098558, - "grad_norm": 0.0, - "learning_rate": 9.421282525707842e-09, - "loss": 0.794, - "step": 34816 - }, - { - "epoch": 0.9866247272521182, - "grad_norm": 0.0, - "learning_rate": 9.3814945695192e-09, - "loss": 0.795, - "step": 34817 - }, - { - "epoch": 0.9866530646943806, - "grad_norm": 0.0, - "learning_rate": 9.341790768020443e-09, - "loss": 0.7616, - "step": 34818 - }, - { - "epoch": 0.9866814021366431, - "grad_norm": 0.0, - "learning_rate": 9.302171121546855e-09, - "loss": 0.8037, - "step": 34819 - }, - { - "epoch": 0.9867097395789056, - "grad_norm": 0.0, - "learning_rate": 9.262635630429284e-09, - "loss": 0.8246, - "step": 34820 - }, - { - "epoch": 0.9867380770211681, - "grad_norm": 0.0, - "learning_rate": 9.223184295004129e-09, - "loss": 0.8453, - "step": 34821 - }, - { - "epoch": 0.9867664144634305, - "grad_norm": 0.0, - "learning_rate": 9.183817115601124e-09, - "loss": 0.8048, - "step": 34822 - }, - { - "epoch": 0.986794751905693, - "grad_norm": 0.0, - "learning_rate": 9.144534092552227e-09, - "loss": 0.7519, - "step": 34823 - }, - { - "epoch": 0.9868230893479555, - "grad_norm": 0.0, - "learning_rate": 9.105335226190504e-09, - "loss": 0.7218, - "step": 34824 - }, - { - "epoch": 0.9868514267902179, - "grad_norm": 0.0, - "learning_rate": 9.066220516843471e-09, - "loss": 0.8088, - "step": 34825 - }, - { - "epoch": 0.9868797642324804, - "grad_norm": 0.0, - "learning_rate": 9.027189964841977e-09, - "loss": 0.8489, - "step": 34826 - }, - { - "epoch": 0.9869081016747429, - "grad_norm": 0.0, - "learning_rate": 8.988243570513533e-09, - "loss": 0.9182, - "step": 34827 - }, - { - "epoch": 0.9869364391170053, - "grad_norm": 0.0, - "learning_rate": 8.94938133418899e-09, - "loss": 0.7583, - "step": 34828 - }, - { - "epoch": 0.9869647765592677, - "grad_norm": 0.0, - "learning_rate": 8.91060325619253e-09, - "loss": 0.7509, - "step": 34829 - }, - { - "epoch": 0.9869931140015302, - "grad_norm": 0.0, - "learning_rate": 8.87190933685278e-09, - "loss": 0.8661, - "step": 34830 - }, - { - "epoch": 0.9870214514437927, - "grad_norm": 0.0, - "learning_rate": 8.833299576495036e-09, - "loss": 0.817, - "step": 34831 - }, - { - "epoch": 0.9870497888860551, - "grad_norm": 0.0, - "learning_rate": 8.794773975444592e-09, - "loss": 0.8079, - "step": 34832 - }, - { - "epoch": 0.9870781263283176, - "grad_norm": 0.0, - "learning_rate": 8.756332534025635e-09, - "loss": 0.7266, - "step": 34833 - }, - { - "epoch": 0.9871064637705801, - "grad_norm": 0.0, - "learning_rate": 8.71797525256235e-09, - "loss": 0.8401, - "step": 34834 - }, - { - "epoch": 0.9871348012128426, - "grad_norm": 0.0, - "learning_rate": 8.67970213137781e-09, - "loss": 0.8081, - "step": 34835 - }, - { - "epoch": 0.987163138655105, - "grad_norm": 0.0, - "learning_rate": 8.64151317079398e-09, - "loss": 0.8759, - "step": 34836 - }, - { - "epoch": 0.9871914760973675, - "grad_norm": 0.0, - "learning_rate": 8.603408371132826e-09, - "loss": 0.7579, - "step": 34837 - }, - { - "epoch": 0.98721981353963, - "grad_norm": 0.0, - "learning_rate": 8.565387732716312e-09, - "loss": 0.754, - "step": 34838 - }, - { - "epoch": 0.9872481509818923, - "grad_norm": 0.0, - "learning_rate": 8.527451255863073e-09, - "loss": 0.7075, - "step": 34839 - }, - { - "epoch": 0.9872764884241548, - "grad_norm": 0.0, - "learning_rate": 8.489598940892851e-09, - "loss": 0.7978, - "step": 34840 - }, - { - "epoch": 0.9873048258664173, - "grad_norm": 0.0, - "learning_rate": 8.451830788126503e-09, - "loss": 0.8689, - "step": 34841 - }, - { - "epoch": 0.9873331633086797, - "grad_norm": 0.0, - "learning_rate": 8.41414679787933e-09, - "loss": 0.8864, - "step": 34842 - }, - { - "epoch": 0.9873615007509422, - "grad_norm": 0.0, - "learning_rate": 8.376546970471077e-09, - "loss": 0.8345, - "step": 34843 - }, - { - "epoch": 0.9873898381932047, - "grad_norm": 0.0, - "learning_rate": 8.33903130621594e-09, - "loss": 0.8782, - "step": 34844 - }, - { - "epoch": 0.9874181756354672, - "grad_norm": 0.0, - "learning_rate": 8.301599805432547e-09, - "loss": 0.8807, - "step": 34845 - }, - { - "epoch": 0.9874465130777296, - "grad_norm": 0.0, - "learning_rate": 8.264252468435097e-09, - "loss": 0.8108, - "step": 34846 - }, - { - "epoch": 0.9874748505199921, - "grad_norm": 0.0, - "learning_rate": 8.226989295537779e-09, - "loss": 0.8348, - "step": 34847 - }, - { - "epoch": 0.9875031879622546, - "grad_norm": 0.0, - "learning_rate": 8.189810287055899e-09, - "loss": 0.8191, - "step": 34848 - }, - { - "epoch": 0.9875315254045169, - "grad_norm": 0.0, - "learning_rate": 8.152715443300318e-09, - "loss": 0.7423, - "step": 34849 - }, - { - "epoch": 0.9875598628467794, - "grad_norm": 0.0, - "learning_rate": 8.11570476458523e-09, - "loss": 0.8158, - "step": 34850 - }, - { - "epoch": 0.9875882002890419, - "grad_norm": 0.0, - "learning_rate": 8.078778251222608e-09, - "loss": 0.7128, - "step": 34851 - }, - { - "epoch": 0.9876165377313044, - "grad_norm": 0.0, - "learning_rate": 8.041935903522202e-09, - "loss": 0.8238, - "step": 34852 - }, - { - "epoch": 0.9876448751735668, - "grad_norm": 0.0, - "learning_rate": 8.005177721794877e-09, - "loss": 0.8462, - "step": 34853 - }, - { - "epoch": 0.9876732126158293, - "grad_norm": 0.0, - "learning_rate": 7.968503706350384e-09, - "loss": 0.8082, - "step": 34854 - }, - { - "epoch": 0.9877015500580918, - "grad_norm": 0.0, - "learning_rate": 7.931913857498474e-09, - "loss": 0.8212, - "step": 34855 - }, - { - "epoch": 0.9877298875003542, - "grad_norm": 0.0, - "learning_rate": 7.895408175545571e-09, - "loss": 0.793, - "step": 34856 - }, - { - "epoch": 0.9877582249426167, - "grad_norm": 0.0, - "learning_rate": 7.858986660800316e-09, - "loss": 0.7762, - "step": 34857 - }, - { - "epoch": 0.9877865623848792, - "grad_norm": 0.0, - "learning_rate": 7.822649313569131e-09, - "loss": 0.7432, - "step": 34858 - }, - { - "epoch": 0.9878148998271417, - "grad_norm": 0.0, - "learning_rate": 7.786396134158437e-09, - "loss": 0.8998, - "step": 34859 - }, - { - "epoch": 0.987843237269404, - "grad_norm": 0.0, - "learning_rate": 7.750227122873544e-09, - "loss": 0.8923, - "step": 34860 - }, - { - "epoch": 0.9878715747116665, - "grad_norm": 0.0, - "learning_rate": 7.714142280019764e-09, - "loss": 0.987, - "step": 34861 - }, - { - "epoch": 0.987899912153929, - "grad_norm": 0.0, - "learning_rate": 7.678141605899081e-09, - "loss": 0.8035, - "step": 34862 - }, - { - "epoch": 0.9879282495961914, - "grad_norm": 0.0, - "learning_rate": 7.642225100816802e-09, - "loss": 0.948, - "step": 34863 - }, - { - "epoch": 0.9879565870384539, - "grad_norm": 0.0, - "learning_rate": 7.60639276507491e-09, - "loss": 0.8024, - "step": 34864 - }, - { - "epoch": 0.9879849244807164, - "grad_norm": 0.0, - "learning_rate": 7.570644598974274e-09, - "loss": 0.7637, - "step": 34865 - }, - { - "epoch": 0.9880132619229788, - "grad_norm": 0.0, - "learning_rate": 7.534980602816877e-09, - "loss": 0.713, - "step": 34866 - }, - { - "epoch": 0.9880415993652413, - "grad_norm": 0.0, - "learning_rate": 7.499400776902477e-09, - "loss": 0.7637, - "step": 34867 - }, - { - "epoch": 0.9880699368075038, - "grad_norm": 0.0, - "learning_rate": 7.463905121530834e-09, - "loss": 0.744, - "step": 34868 - }, - { - "epoch": 0.9880982742497663, - "grad_norm": 0.0, - "learning_rate": 7.42849363700282e-09, - "loss": 0.827, - "step": 34869 - }, - { - "epoch": 0.9881266116920286, - "grad_norm": 0.0, - "learning_rate": 7.393166323614865e-09, - "loss": 0.6825, - "step": 34870 - }, - { - "epoch": 0.9881549491342911, - "grad_norm": 0.0, - "learning_rate": 7.357923181664506e-09, - "loss": 0.7527, - "step": 34871 - }, - { - "epoch": 0.9881832865765536, - "grad_norm": 0.0, - "learning_rate": 7.3227642114492845e-09, - "loss": 0.7589, - "step": 34872 - }, - { - "epoch": 0.988211624018816, - "grad_norm": 0.0, - "learning_rate": 7.28768941326452e-09, - "loss": 0.7403, - "step": 34873 - }, - { - "epoch": 0.9882399614610785, - "grad_norm": 0.0, - "learning_rate": 7.252698787406642e-09, - "loss": 0.7694, - "step": 34874 - }, - { - "epoch": 0.988268298903341, - "grad_norm": 0.0, - "learning_rate": 7.21779233417097e-09, - "loss": 0.7758, - "step": 34875 - }, - { - "epoch": 0.9882966363456035, - "grad_norm": 0.0, - "learning_rate": 7.182970053849492e-09, - "loss": 0.7519, - "step": 34876 - }, - { - "epoch": 0.9883249737878659, - "grad_norm": 0.0, - "learning_rate": 7.148231946736417e-09, - "loss": 0.7974, - "step": 34877 - }, - { - "epoch": 0.9883533112301284, - "grad_norm": 0.0, - "learning_rate": 7.1135780131248445e-09, - "loss": 0.7612, - "step": 34878 - }, - { - "epoch": 0.9883816486723909, - "grad_norm": 0.0, - "learning_rate": 7.079008253306763e-09, - "loss": 0.823, - "step": 34879 - }, - { - "epoch": 0.9884099861146532, - "grad_norm": 0.0, - "learning_rate": 7.0445226675719404e-09, - "loss": 0.7587, - "step": 34880 - }, - { - "epoch": 0.9884383235569157, - "grad_norm": 0.0, - "learning_rate": 7.010121256213476e-09, - "loss": 0.812, - "step": 34881 - }, - { - "epoch": 0.9884666609991782, - "grad_norm": 0.0, - "learning_rate": 6.975804019517807e-09, - "loss": 0.8431, - "step": 34882 - }, - { - "epoch": 0.9884949984414407, - "grad_norm": 0.0, - "learning_rate": 6.941570957776922e-09, - "loss": 0.8113, - "step": 34883 - }, - { - "epoch": 0.9885233358837031, - "grad_norm": 0.0, - "learning_rate": 6.907422071278369e-09, - "loss": 0.7014, - "step": 34884 - }, - { - "epoch": 0.9885516733259656, - "grad_norm": 0.0, - "learning_rate": 6.873357360308586e-09, - "loss": 0.8088, - "step": 34885 - }, - { - "epoch": 0.9885800107682281, - "grad_norm": 0.0, - "learning_rate": 6.839376825155119e-09, - "loss": 0.8495, - "step": 34886 - }, - { - "epoch": 0.9886083482104905, - "grad_norm": 0.0, - "learning_rate": 6.805480466105519e-09, - "loss": 0.7792, - "step": 34887 - }, - { - "epoch": 0.988636685652753, - "grad_norm": 0.0, - "learning_rate": 6.771668283442889e-09, - "loss": 0.8434, - "step": 34888 - }, - { - "epoch": 0.9886650230950155, - "grad_norm": 0.0, - "learning_rate": 6.73794027745478e-09, - "loss": 0.677, - "step": 34889 - }, - { - "epoch": 0.9886933605372779, - "grad_norm": 0.0, - "learning_rate": 6.704296448423186e-09, - "loss": 0.781, - "step": 34890 - }, - { - "epoch": 0.9887216979795403, - "grad_norm": 0.0, - "learning_rate": 6.670736796632326e-09, - "loss": 0.7632, - "step": 34891 - }, - { - "epoch": 0.9887500354218028, - "grad_norm": 0.0, - "learning_rate": 6.637261322364197e-09, - "loss": 0.8031, - "step": 34892 - }, - { - "epoch": 0.9887783728640653, - "grad_norm": 0.0, - "learning_rate": 6.603870025901904e-09, - "loss": 0.7006, - "step": 34893 - }, - { - "epoch": 0.9888067103063277, - "grad_norm": 0.0, - "learning_rate": 6.570562907526335e-09, - "loss": 0.7057, - "step": 34894 - }, - { - "epoch": 0.9888350477485902, - "grad_norm": 0.0, - "learning_rate": 6.537339967518374e-09, - "loss": 0.901, - "step": 34895 - }, - { - "epoch": 0.9888633851908527, - "grad_norm": 0.0, - "learning_rate": 6.504201206156691e-09, - "loss": 0.8788, - "step": 34896 - }, - { - "epoch": 0.9888917226331151, - "grad_norm": 0.0, - "learning_rate": 6.4711466237210585e-09, - "loss": 0.8224, - "step": 34897 - }, - { - "epoch": 0.9889200600753776, - "grad_norm": 0.0, - "learning_rate": 6.438176220490144e-09, - "loss": 0.89, - "step": 34898 - }, - { - "epoch": 0.9889483975176401, - "grad_norm": 0.0, - "learning_rate": 6.405289996741504e-09, - "loss": 0.8415, - "step": 34899 - }, - { - "epoch": 0.9889767349599026, - "grad_norm": 0.0, - "learning_rate": 6.372487952751582e-09, - "loss": 0.8054, - "step": 34900 - }, - { - "epoch": 0.989005072402165, - "grad_norm": 0.0, - "learning_rate": 6.339770088797937e-09, - "loss": 0.7624, - "step": 34901 - }, - { - "epoch": 0.9890334098444274, - "grad_norm": 0.0, - "learning_rate": 6.307136405155901e-09, - "loss": 0.7081, - "step": 34902 - }, - { - "epoch": 0.9890617472866899, - "grad_norm": 0.0, - "learning_rate": 6.274586902098589e-09, - "loss": 0.8156, - "step": 34903 - }, - { - "epoch": 0.9890900847289523, - "grad_norm": 0.0, - "learning_rate": 6.242121579902449e-09, - "loss": 0.7976, - "step": 34904 - }, - { - "epoch": 0.9891184221712148, - "grad_norm": 0.0, - "learning_rate": 6.209740438839485e-09, - "loss": 0.8035, - "step": 34905 - }, - { - "epoch": 0.9891467596134773, - "grad_norm": 0.0, - "learning_rate": 6.17744347918281e-09, - "loss": 0.7176, - "step": 34906 - }, - { - "epoch": 0.9891750970557397, - "grad_norm": 0.0, - "learning_rate": 6.14523070120554e-09, - "loss": 0.837, - "step": 34907 - }, - { - "epoch": 0.9892034344980022, - "grad_norm": 0.0, - "learning_rate": 6.11310210517746e-09, - "loss": 0.7282, - "step": 34908 - }, - { - "epoch": 0.9892317719402647, - "grad_norm": 0.0, - "learning_rate": 6.081057691370573e-09, - "loss": 0.8356, - "step": 34909 - }, - { - "epoch": 0.9892601093825272, - "grad_norm": 0.0, - "learning_rate": 6.0490974600535546e-09, - "loss": 0.8617, - "step": 34910 - }, - { - "epoch": 0.9892884468247896, - "grad_norm": 0.0, - "learning_rate": 6.017221411496188e-09, - "loss": 0.8458, - "step": 34911 - }, - { - "epoch": 0.989316784267052, - "grad_norm": 0.0, - "learning_rate": 5.985429545967147e-09, - "loss": 0.7142, - "step": 34912 - }, - { - "epoch": 0.9893451217093145, - "grad_norm": 0.0, - "learning_rate": 5.953721863732886e-09, - "loss": 0.8264, - "step": 34913 - }, - { - "epoch": 0.9893734591515769, - "grad_norm": 0.0, - "learning_rate": 5.922098365063189e-09, - "loss": 0.8652, - "step": 34914 - }, - { - "epoch": 0.9894017965938394, - "grad_norm": 0.0, - "learning_rate": 5.890559050222289e-09, - "loss": 0.8298, - "step": 34915 - }, - { - "epoch": 0.9894301340361019, - "grad_norm": 0.0, - "learning_rate": 5.859103919475528e-09, - "loss": 0.7132, - "step": 34916 - }, - { - "epoch": 0.9894584714783644, - "grad_norm": 0.0, - "learning_rate": 5.8277329730904716e-09, - "loss": 0.7207, - "step": 34917 - }, - { - "epoch": 0.9894868089206268, - "grad_norm": 0.0, - "learning_rate": 5.796446211328022e-09, - "loss": 0.7122, - "step": 34918 - }, - { - "epoch": 0.9895151463628893, - "grad_norm": 0.0, - "learning_rate": 5.7652436344546315e-09, - "loss": 0.7733, - "step": 34919 - }, - { - "epoch": 0.9895434838051518, - "grad_norm": 0.0, - "learning_rate": 5.734125242731203e-09, - "loss": 0.8879, - "step": 34920 - }, - { - "epoch": 0.9895718212474142, - "grad_norm": 0.0, - "learning_rate": 5.70309103642086e-09, - "loss": 0.8004, - "step": 34921 - }, - { - "epoch": 0.9896001586896767, - "grad_norm": 0.0, - "learning_rate": 5.672141015784505e-09, - "loss": 0.7832, - "step": 34922 - }, - { - "epoch": 0.9896284961319392, - "grad_norm": 0.0, - "learning_rate": 5.641275181083039e-09, - "loss": 0.8536, - "step": 34923 - }, - { - "epoch": 0.9896568335742016, - "grad_norm": 0.0, - "learning_rate": 5.610493532576256e-09, - "loss": 0.8652, - "step": 34924 - }, - { - "epoch": 0.989685171016464, - "grad_norm": 0.0, - "learning_rate": 5.579796070523946e-09, - "loss": 0.8575, - "step": 34925 - }, - { - "epoch": 0.9897135084587265, - "grad_norm": 0.0, - "learning_rate": 5.549182795183683e-09, - "loss": 0.9028, - "step": 34926 - }, - { - "epoch": 0.989741845900989, - "grad_norm": 0.0, - "learning_rate": 5.518653706814148e-09, - "loss": 0.8187, - "step": 34927 - }, - { - "epoch": 0.9897701833432514, - "grad_norm": 0.0, - "learning_rate": 5.488208805672912e-09, - "loss": 0.9962, - "step": 34928 - }, - { - "epoch": 0.9897985207855139, - "grad_norm": 0.0, - "learning_rate": 5.457848092015328e-09, - "loss": 0.8578, - "step": 34929 - }, - { - "epoch": 0.9898268582277764, - "grad_norm": 0.0, - "learning_rate": 5.427571566097856e-09, - "loss": 0.7475, - "step": 34930 - }, - { - "epoch": 0.9898551956700388, - "grad_norm": 0.0, - "learning_rate": 5.3973792281758475e-09, - "loss": 0.9372, - "step": 34931 - }, - { - "epoch": 0.9898835331123013, - "grad_norm": 0.0, - "learning_rate": 5.367271078502434e-09, - "loss": 0.7552, - "step": 34932 - }, - { - "epoch": 0.9899118705545638, - "grad_norm": 0.0, - "learning_rate": 5.337247117331856e-09, - "loss": 0.8374, - "step": 34933 - }, - { - "epoch": 0.9899402079968262, - "grad_norm": 0.0, - "learning_rate": 5.307307344918355e-09, - "loss": 0.7913, - "step": 34934 - }, - { - "epoch": 0.9899685454390886, - "grad_norm": 0.0, - "learning_rate": 5.277451761511732e-09, - "loss": 0.7746, - "step": 34935 - }, - { - "epoch": 0.9899968828813511, - "grad_norm": 0.0, - "learning_rate": 5.247680367364005e-09, - "loss": 0.7822, - "step": 34936 - }, - { - "epoch": 0.9900252203236136, - "grad_norm": 0.0, - "learning_rate": 5.217993162727197e-09, - "loss": 0.7133, - "step": 34937 - }, - { - "epoch": 0.990053557765876, - "grad_norm": 0.0, - "learning_rate": 5.188390147851108e-09, - "loss": 0.7595, - "step": 34938 - }, - { - "epoch": 0.9900818952081385, - "grad_norm": 0.0, - "learning_rate": 5.158871322984426e-09, - "loss": 0.8202, - "step": 34939 - }, - { - "epoch": 0.990110232650401, - "grad_norm": 0.0, - "learning_rate": 5.129436688375844e-09, - "loss": 0.7881, - "step": 34940 - }, - { - "epoch": 0.9901385700926635, - "grad_norm": 0.0, - "learning_rate": 5.100086244274049e-09, - "loss": 0.8083, - "step": 34941 - }, - { - "epoch": 0.9901669075349259, - "grad_norm": 0.0, - "learning_rate": 5.070819990925513e-09, - "loss": 0.7802, - "step": 34942 - }, - { - "epoch": 0.9901952449771884, - "grad_norm": 0.0, - "learning_rate": 5.041637928576704e-09, - "loss": 0.7243, - "step": 34943 - }, - { - "epoch": 0.9902235824194509, - "grad_norm": 0.0, - "learning_rate": 5.012540057474091e-09, - "loss": 0.82, - "step": 34944 - }, - { - "epoch": 0.9902519198617132, - "grad_norm": 0.0, - "learning_rate": 4.983526377861925e-09, - "loss": 0.8277, - "step": 34945 - }, - { - "epoch": 0.9902802573039757, - "grad_norm": 0.0, - "learning_rate": 4.9545968899855635e-09, - "loss": 0.9065, - "step": 34946 - }, - { - "epoch": 0.9903085947462382, - "grad_norm": 0.0, - "learning_rate": 4.925751594087036e-09, - "loss": 0.8741, - "step": 34947 - }, - { - "epoch": 0.9903369321885007, - "grad_norm": 0.0, - "learning_rate": 4.896990490411701e-09, - "loss": 0.7281, - "step": 34948 - }, - { - "epoch": 0.9903652696307631, - "grad_norm": 0.0, - "learning_rate": 4.868313579200479e-09, - "loss": 0.7187, - "step": 34949 - }, - { - "epoch": 0.9903936070730256, - "grad_norm": 0.0, - "learning_rate": 4.839720860694286e-09, - "loss": 0.7421, - "step": 34950 - }, - { - "epoch": 0.9904219445152881, - "grad_norm": 0.0, - "learning_rate": 4.811212335136262e-09, - "loss": 0.7329, - "step": 34951 - }, - { - "epoch": 0.9904502819575505, - "grad_norm": 0.0, - "learning_rate": 4.782788002763994e-09, - "loss": 0.825, - "step": 34952 - }, - { - "epoch": 0.990478619399813, - "grad_norm": 0.0, - "learning_rate": 4.754447863817291e-09, - "loss": 0.7292, - "step": 34953 - }, - { - "epoch": 0.9905069568420755, - "grad_norm": 0.0, - "learning_rate": 4.726191918537071e-09, - "loss": 0.8895, - "step": 34954 - }, - { - "epoch": 0.9905352942843378, - "grad_norm": 0.0, - "learning_rate": 4.698020167158701e-09, - "loss": 0.7859, - "step": 34955 - }, - { - "epoch": 0.9905636317266003, - "grad_norm": 0.0, - "learning_rate": 4.66993260992088e-09, - "loss": 0.7269, - "step": 34956 - }, - { - "epoch": 0.9905919691688628, - "grad_norm": 0.0, - "learning_rate": 4.6419292470589735e-09, - "loss": 0.8282, - "step": 34957 - }, - { - "epoch": 0.9906203066111253, - "grad_norm": 0.0, - "learning_rate": 4.6140100788105716e-09, - "loss": 0.7571, - "step": 34958 - }, - { - "epoch": 0.9906486440533877, - "grad_norm": 0.0, - "learning_rate": 4.586175105411039e-09, - "loss": 0.7042, - "step": 34959 - }, - { - "epoch": 0.9906769814956502, - "grad_norm": 0.0, - "learning_rate": 4.558424327092415e-09, - "loss": 0.8486, - "step": 34960 - }, - { - "epoch": 0.9907053189379127, - "grad_norm": 0.0, - "learning_rate": 4.530757744090064e-09, - "loss": 0.7835, - "step": 34961 - }, - { - "epoch": 0.9907336563801751, - "grad_norm": 0.0, - "learning_rate": 4.5031753566382455e-09, - "loss": 0.8889, - "step": 34962 - }, - { - "epoch": 0.9907619938224376, - "grad_norm": 0.0, - "learning_rate": 4.475677164966774e-09, - "loss": 0.7996, - "step": 34963 - }, - { - "epoch": 0.9907903312647001, - "grad_norm": 0.0, - "learning_rate": 4.4482631693076874e-09, - "loss": 0.7957, - "step": 34964 - }, - { - "epoch": 0.9908186687069626, - "grad_norm": 0.0, - "learning_rate": 4.420933369894131e-09, - "loss": 0.8125, - "step": 34965 - }, - { - "epoch": 0.9908470061492249, - "grad_norm": 0.0, - "learning_rate": 4.393687766953703e-09, - "loss": 0.7586, - "step": 34966 - }, - { - "epoch": 0.9908753435914874, - "grad_norm": 0.0, - "learning_rate": 4.3665263607184375e-09, - "loss": 0.8766, - "step": 34967 - }, - { - "epoch": 0.9909036810337499, - "grad_norm": 0.0, - "learning_rate": 4.3394491514137105e-09, - "loss": 0.7348, - "step": 34968 - }, - { - "epoch": 0.9909320184760123, - "grad_norm": 0.0, - "learning_rate": 4.312456139271559e-09, - "loss": 0.7439, - "step": 34969 - }, - { - "epoch": 0.9909603559182748, - "grad_norm": 0.0, - "learning_rate": 4.2855473245162486e-09, - "loss": 0.8453, - "step": 34970 - }, - { - "epoch": 0.9909886933605373, - "grad_norm": 0.0, - "learning_rate": 4.2587227073753735e-09, - "loss": 0.8016, - "step": 34971 - }, - { - "epoch": 0.9910170308027998, - "grad_norm": 0.0, - "learning_rate": 4.23198228807542e-09, - "loss": 0.86, - "step": 34972 - }, - { - "epoch": 0.9910453682450622, - "grad_norm": 0.0, - "learning_rate": 4.205326066841764e-09, - "loss": 0.8717, - "step": 34973 - }, - { - "epoch": 0.9910737056873247, - "grad_norm": 0.0, - "learning_rate": 4.178754043898669e-09, - "loss": 0.7102, - "step": 34974 - }, - { - "epoch": 0.9911020431295872, - "grad_norm": 0.0, - "learning_rate": 4.152266219469292e-09, - "loss": 0.8242, - "step": 34975 - }, - { - "epoch": 0.9911303805718495, - "grad_norm": 0.0, - "learning_rate": 4.125862593776786e-09, - "loss": 0.8066, - "step": 34976 - }, - { - "epoch": 0.991158718014112, - "grad_norm": 0.0, - "learning_rate": 4.099543167044307e-09, - "loss": 0.7433, - "step": 34977 - }, - { - "epoch": 0.9911870554563745, - "grad_norm": 0.0, - "learning_rate": 4.073307939493898e-09, - "loss": 0.7806, - "step": 34978 - }, - { - "epoch": 0.9912153928986369, - "grad_norm": 0.0, - "learning_rate": 4.0471569113453844e-09, - "loss": 0.7858, - "step": 34979 - }, - { - "epoch": 0.9912437303408994, - "grad_norm": 0.0, - "learning_rate": 4.021090082819701e-09, - "loss": 0.7102, - "step": 34980 - }, - { - "epoch": 0.9912720677831619, - "grad_norm": 0.0, - "learning_rate": 3.9951074541366706e-09, - "loss": 0.7407, - "step": 34981 - }, - { - "epoch": 0.9913004052254244, - "grad_norm": 0.0, - "learning_rate": 3.969209025513898e-09, - "loss": 0.7513, - "step": 34982 - }, - { - "epoch": 0.9913287426676868, - "grad_norm": 0.0, - "learning_rate": 3.943394797171207e-09, - "loss": 0.811, - "step": 34983 - }, - { - "epoch": 0.9913570801099493, - "grad_norm": 0.0, - "learning_rate": 3.917664769323981e-09, - "loss": 0.85, - "step": 34984 - }, - { - "epoch": 0.9913854175522118, - "grad_norm": 0.0, - "learning_rate": 3.892018942192044e-09, - "loss": 0.831, - "step": 34985 - }, - { - "epoch": 0.9914137549944742, - "grad_norm": 0.0, - "learning_rate": 3.866457315988559e-09, - "loss": 0.8124, - "step": 34986 - }, - { - "epoch": 0.9914420924367366, - "grad_norm": 0.0, - "learning_rate": 3.840979890930019e-09, - "loss": 0.8204, - "step": 34987 - }, - { - "epoch": 0.9914704298789991, - "grad_norm": 0.0, - "learning_rate": 3.815586667230697e-09, - "loss": 0.7852, - "step": 34988 - }, - { - "epoch": 0.9914987673212616, - "grad_norm": 0.0, - "learning_rate": 3.790277645104867e-09, - "loss": 0.8322, - "step": 34989 - }, - { - "epoch": 0.991527104763524, - "grad_norm": 0.0, - "learning_rate": 3.765052824765691e-09, - "loss": 0.8677, - "step": 34990 - }, - { - "epoch": 0.9915554422057865, - "grad_norm": 0.0, - "learning_rate": 3.739912206425222e-09, - "loss": 0.8729, - "step": 34991 - }, - { - "epoch": 0.991583779648049, - "grad_norm": 0.0, - "learning_rate": 3.714855790295513e-09, - "loss": 0.8112, - "step": 34992 - }, - { - "epoch": 0.9916121170903114, - "grad_norm": 0.0, - "learning_rate": 3.689883576587505e-09, - "loss": 0.7538, - "step": 34993 - }, - { - "epoch": 0.9916404545325739, - "grad_norm": 0.0, - "learning_rate": 3.6649955655121415e-09, - "loss": 0.7566, - "step": 34994 - }, - { - "epoch": 0.9916687919748364, - "grad_norm": 0.0, - "learning_rate": 3.6401917572781444e-09, - "loss": 0.9157, - "step": 34995 - }, - { - "epoch": 0.9916971294170989, - "grad_norm": 0.0, - "learning_rate": 3.6154721520953453e-09, - "loss": 0.7151, - "step": 34996 - }, - { - "epoch": 0.9917254668593612, - "grad_norm": 0.0, - "learning_rate": 3.5908367501702455e-09, - "loss": 0.8303, - "step": 34997 - }, - { - "epoch": 0.9917538043016237, - "grad_norm": 0.0, - "learning_rate": 3.5662855517126782e-09, - "loss": 0.7597, - "step": 34998 - }, - { - "epoch": 0.9917821417438862, - "grad_norm": 0.0, - "learning_rate": 3.541818556928034e-09, - "loss": 0.7604, - "step": 34999 - }, - { - "epoch": 0.9918104791861486, - "grad_norm": 0.0, - "learning_rate": 3.517435766022814e-09, - "loss": 0.7609, - "step": 35000 - }, - { - "epoch": 0.9918388166284111, - "grad_norm": 0.0, - "learning_rate": 3.4931371792035207e-09, - "loss": 0.8181, - "step": 35001 - }, - { - "epoch": 0.9918671540706736, - "grad_norm": 0.0, - "learning_rate": 3.468922796672214e-09, - "loss": 0.7349, - "step": 35002 - }, - { - "epoch": 0.991895491512936, - "grad_norm": 0.0, - "learning_rate": 3.4447926186342852e-09, - "loss": 0.7709, - "step": 35003 - }, - { - "epoch": 0.9919238289551985, - "grad_norm": 0.0, - "learning_rate": 3.420746645292905e-09, - "loss": 0.8859, - "step": 35004 - }, - { - "epoch": 0.991952166397461, - "grad_norm": 0.0, - "learning_rate": 3.3967848768512445e-09, - "loss": 0.8948, - "step": 35005 - }, - { - "epoch": 0.9919805038397235, - "grad_norm": 0.0, - "learning_rate": 3.3729073135113642e-09, - "loss": 0.8082, - "step": 35006 - }, - { - "epoch": 0.9920088412819859, - "grad_norm": 0.0, - "learning_rate": 3.3491139554719944e-09, - "loss": 0.7473, - "step": 35007 - }, - { - "epoch": 0.9920371787242483, - "grad_norm": 0.0, - "learning_rate": 3.325404802936305e-09, - "loss": 0.9081, - "step": 35008 - }, - { - "epoch": 0.9920655161665108, - "grad_norm": 0.0, - "learning_rate": 3.3017798561030268e-09, - "loss": 0.8548, - "step": 35009 - }, - { - "epoch": 0.9920938536087732, - "grad_norm": 0.0, - "learning_rate": 3.278239115169779e-09, - "loss": 0.7362, - "step": 35010 - }, - { - "epoch": 0.9921221910510357, - "grad_norm": 0.0, - "learning_rate": 3.254782580337512e-09, - "loss": 0.7786, - "step": 35011 - }, - { - "epoch": 0.9921505284932982, - "grad_norm": 0.0, - "learning_rate": 3.2314102518016256e-09, - "loss": 0.6927, - "step": 35012 - }, - { - "epoch": 0.9921788659355607, - "grad_norm": 0.0, - "learning_rate": 3.208122129759739e-09, - "loss": 0.9102, - "step": 35013 - }, - { - "epoch": 0.9922072033778231, - "grad_norm": 0.0, - "learning_rate": 3.1849182144083614e-09, - "loss": 0.7758, - "step": 35014 - }, - { - "epoch": 0.9922355408200856, - "grad_norm": 0.0, - "learning_rate": 3.1617985059428923e-09, - "loss": 0.693, - "step": 35015 - }, - { - "epoch": 0.9922638782623481, - "grad_norm": 0.0, - "learning_rate": 3.138763004557621e-09, - "loss": 0.7239, - "step": 35016 - }, - { - "epoch": 0.9922922157046105, - "grad_norm": 0.0, - "learning_rate": 3.1158117104468365e-09, - "loss": 0.7237, - "step": 35017 - }, - { - "epoch": 0.992320553146873, - "grad_norm": 0.0, - "learning_rate": 3.0929446238037174e-09, - "loss": 0.6867, - "step": 35018 - }, - { - "epoch": 0.9923488905891354, - "grad_norm": 0.0, - "learning_rate": 3.0701617448203325e-09, - "loss": 0.8159, - "step": 35019 - }, - { - "epoch": 0.9923772280313979, - "grad_norm": 0.0, - "learning_rate": 3.0474630736898604e-09, - "loss": 0.7585, - "step": 35020 - }, - { - "epoch": 0.9924055654736603, - "grad_norm": 0.0, - "learning_rate": 3.0248486106032593e-09, - "loss": 0.8255, - "step": 35021 - }, - { - "epoch": 0.9924339029159228, - "grad_norm": 0.0, - "learning_rate": 3.0023183557503776e-09, - "loss": 0.8201, - "step": 35022 - }, - { - "epoch": 0.9924622403581853, - "grad_norm": 0.0, - "learning_rate": 2.9798723093210635e-09, - "loss": 0.7787, - "step": 35023 - }, - { - "epoch": 0.9924905778004477, - "grad_norm": 0.0, - "learning_rate": 2.957510471504055e-09, - "loss": 0.8271, - "step": 35024 - }, - { - "epoch": 0.9925189152427102, - "grad_norm": 0.0, - "learning_rate": 2.9352328424891997e-09, - "loss": 0.7574, - "step": 35025 - }, - { - "epoch": 0.9925472526849727, - "grad_norm": 0.0, - "learning_rate": 2.9130394224630155e-09, - "loss": 0.8404, - "step": 35026 - }, - { - "epoch": 0.9925755901272351, - "grad_norm": 0.0, - "learning_rate": 2.89093021161202e-09, - "loss": 0.7969, - "step": 35027 - }, - { - "epoch": 0.9926039275694976, - "grad_norm": 0.0, - "learning_rate": 2.8689052101238402e-09, - "loss": 0.7817, - "step": 35028 - }, - { - "epoch": 0.99263226501176, - "grad_norm": 0.0, - "learning_rate": 2.8469644181827736e-09, - "loss": 0.822, - "step": 35029 - }, - { - "epoch": 0.9926606024540225, - "grad_norm": 0.0, - "learning_rate": 2.825107835974228e-09, - "loss": 0.8692, - "step": 35030 - }, - { - "epoch": 0.9926889398962849, - "grad_norm": 0.0, - "learning_rate": 2.8033354636824993e-09, - "loss": 0.8297, - "step": 35031 - }, - { - "epoch": 0.9927172773385474, - "grad_norm": 0.0, - "learning_rate": 2.781647301489665e-09, - "loss": 0.8436, - "step": 35032 - }, - { - "epoch": 0.9927456147808099, - "grad_norm": 0.0, - "learning_rate": 2.7600433495800215e-09, - "loss": 0.7523, - "step": 35033 - }, - { - "epoch": 0.9927739522230723, - "grad_norm": 0.0, - "learning_rate": 2.738523608135646e-09, - "loss": 0.7762, - "step": 35034 - }, - { - "epoch": 0.9928022896653348, - "grad_norm": 0.0, - "learning_rate": 2.717088077335284e-09, - "loss": 0.8175, - "step": 35035 - }, - { - "epoch": 0.9928306271075973, - "grad_norm": 0.0, - "learning_rate": 2.695736757363232e-09, - "loss": 0.7872, - "step": 35036 - }, - { - "epoch": 0.9928589645498598, - "grad_norm": 0.0, - "learning_rate": 2.6744696483960166e-09, - "loss": 0.8695, - "step": 35037 - }, - { - "epoch": 0.9928873019921222, - "grad_norm": 0.0, - "learning_rate": 2.6532867506146033e-09, - "loss": 0.7232, - "step": 35038 - }, - { - "epoch": 0.9929156394343847, - "grad_norm": 0.0, - "learning_rate": 2.632188064196628e-09, - "loss": 0.8693, - "step": 35039 - }, - { - "epoch": 0.9929439768766471, - "grad_norm": 0.0, - "learning_rate": 2.6111735893208366e-09, - "loss": 0.8744, - "step": 35040 - }, - { - "epoch": 0.9929723143189095, - "grad_norm": 0.0, - "learning_rate": 2.5902433261637548e-09, - "loss": 0.65, - "step": 35041 - }, - { - "epoch": 0.993000651761172, - "grad_norm": 0.0, - "learning_rate": 2.5693972749007977e-09, - "loss": 0.7386, - "step": 35042 - }, - { - "epoch": 0.9930289892034345, - "grad_norm": 0.0, - "learning_rate": 2.548635435708491e-09, - "loss": 0.6682, - "step": 35043 - }, - { - "epoch": 0.993057326645697, - "grad_norm": 0.0, - "learning_rate": 2.527957808761139e-09, - "loss": 0.7261, - "step": 35044 - }, - { - "epoch": 0.9930856640879594, - "grad_norm": 0.0, - "learning_rate": 2.5073643942341575e-09, - "loss": 0.8646, - "step": 35045 - }, - { - "epoch": 0.9931140015302219, - "grad_norm": 0.0, - "learning_rate": 2.486855192299631e-09, - "loss": 0.7689, - "step": 35046 - }, - { - "epoch": 0.9931423389724844, - "grad_norm": 0.0, - "learning_rate": 2.466430203130754e-09, - "loss": 0.8447, - "step": 35047 - }, - { - "epoch": 0.9931706764147468, - "grad_norm": 0.0, - "learning_rate": 2.4460894268996116e-09, - "loss": 0.8335, - "step": 35048 - }, - { - "epoch": 0.9931990138570093, - "grad_norm": 0.0, - "learning_rate": 2.4258328637771776e-09, - "loss": 0.7811, - "step": 35049 - }, - { - "epoch": 0.9932273512992718, - "grad_norm": 0.0, - "learning_rate": 2.405660513934427e-09, - "loss": 0.8089, - "step": 35050 - }, - { - "epoch": 0.9932556887415341, - "grad_norm": 0.0, - "learning_rate": 2.3855723775423334e-09, - "loss": 0.7501, - "step": 35051 - }, - { - "epoch": 0.9932840261837966, - "grad_norm": 0.0, - "learning_rate": 2.3655684547685408e-09, - "loss": 0.8704, - "step": 35052 - }, - { - "epoch": 0.9933123636260591, - "grad_norm": 0.0, - "learning_rate": 2.3456487457818033e-09, - "loss": 0.7462, - "step": 35053 - }, - { - "epoch": 0.9933407010683216, - "grad_norm": 0.0, - "learning_rate": 2.3258132507508745e-09, - "loss": 0.7949, - "step": 35054 - }, - { - "epoch": 0.993369038510584, - "grad_norm": 0.0, - "learning_rate": 2.306061969841178e-09, - "loss": 0.7234, - "step": 35055 - }, - { - "epoch": 0.9933973759528465, - "grad_norm": 0.0, - "learning_rate": 2.286394903220357e-09, - "loss": 0.7619, - "step": 35056 - }, - { - "epoch": 0.993425713395109, - "grad_norm": 0.0, - "learning_rate": 2.266812051054945e-09, - "loss": 0.6895, - "step": 35057 - }, - { - "epoch": 0.9934540508373714, - "grad_norm": 0.0, - "learning_rate": 2.247313413507035e-09, - "loss": 0.8017, - "step": 35058 - }, - { - "epoch": 0.9934823882796339, - "grad_norm": 0.0, - "learning_rate": 2.2278989907442706e-09, - "loss": 0.7722, - "step": 35059 - }, - { - "epoch": 0.9935107257218964, - "grad_norm": 0.0, - "learning_rate": 2.2085687829276336e-09, - "loss": 0.7785, - "step": 35060 - }, - { - "epoch": 0.9935390631641589, - "grad_norm": 0.0, - "learning_rate": 2.1893227902203273e-09, - "loss": 0.7727, - "step": 35061 - }, - { - "epoch": 0.9935674006064212, - "grad_norm": 0.0, - "learning_rate": 2.1701610127855543e-09, - "loss": 0.7915, - "step": 35062 - }, - { - "epoch": 0.9935957380486837, - "grad_norm": 0.0, - "learning_rate": 2.151083450784297e-09, - "loss": 0.7948, - "step": 35063 - }, - { - "epoch": 0.9936240754909462, - "grad_norm": 0.0, - "learning_rate": 2.1320901043764276e-09, - "loss": 0.752, - "step": 35064 - }, - { - "epoch": 0.9936524129332086, - "grad_norm": 0.0, - "learning_rate": 2.113180973722928e-09, - "loss": 0.7859, - "step": 35065 - }, - { - "epoch": 0.9936807503754711, - "grad_norm": 0.0, - "learning_rate": 2.094356058982561e-09, - "loss": 0.8555, - "step": 35066 - }, - { - "epoch": 0.9937090878177336, - "grad_norm": 0.0, - "learning_rate": 2.075615360314087e-09, - "loss": 0.795, - "step": 35067 - }, - { - "epoch": 0.9937374252599961, - "grad_norm": 0.0, - "learning_rate": 2.0569588778762695e-09, - "loss": 0.7401, - "step": 35068 - }, - { - "epoch": 0.9937657627022585, - "grad_norm": 0.0, - "learning_rate": 2.0383866118245388e-09, - "loss": 0.8882, - "step": 35069 - }, - { - "epoch": 0.993794100144521, - "grad_norm": 0.0, - "learning_rate": 2.0198985623154368e-09, - "loss": 0.7973, - "step": 35070 - }, - { - "epoch": 0.9938224375867835, - "grad_norm": 0.0, - "learning_rate": 2.0014947295066145e-09, - "loss": 0.7991, - "step": 35071 - }, - { - "epoch": 0.9938507750290458, - "grad_norm": 0.0, - "learning_rate": 1.9831751135512833e-09, - "loss": 0.7389, - "step": 35072 - }, - { - "epoch": 0.9938791124713083, - "grad_norm": 0.0, - "learning_rate": 1.964939714603764e-09, - "loss": 0.7768, - "step": 35073 - }, - { - "epoch": 0.9939074499135708, - "grad_norm": 0.0, - "learning_rate": 1.946788532819488e-09, - "loss": 0.8172, - "step": 35074 - }, - { - "epoch": 0.9939357873558332, - "grad_norm": 0.0, - "learning_rate": 1.928721568349445e-09, - "loss": 0.7416, - "step": 35075 - }, - { - "epoch": 0.9939641247980957, - "grad_norm": 0.0, - "learning_rate": 1.910738821346847e-09, - "loss": 0.8829, - "step": 35076 - }, - { - "epoch": 0.9939924622403582, - "grad_norm": 0.0, - "learning_rate": 1.892840291961573e-09, - "loss": 0.7291, - "step": 35077 - }, - { - "epoch": 0.9940207996826207, - "grad_norm": 0.0, - "learning_rate": 1.875025980346834e-09, - "loss": 0.8473, - "step": 35078 - }, - { - "epoch": 0.9940491371248831, - "grad_norm": 0.0, - "learning_rate": 1.8572958866514e-09, - "loss": 0.8163, - "step": 35079 - }, - { - "epoch": 0.9940774745671456, - "grad_norm": 0.0, - "learning_rate": 1.8396500110240411e-09, - "loss": 0.8247, - "step": 35080 - }, - { - "epoch": 0.9941058120094081, - "grad_norm": 0.0, - "learning_rate": 1.8220883536146372e-09, - "loss": 0.6967, - "step": 35081 - }, - { - "epoch": 0.9941341494516704, - "grad_norm": 0.0, - "learning_rate": 1.8046109145697377e-09, - "loss": 0.7395, - "step": 35082 - }, - { - "epoch": 0.9941624868939329, - "grad_norm": 0.0, - "learning_rate": 1.7872176940381125e-09, - "loss": 0.8199, - "step": 35083 - }, - { - "epoch": 0.9941908243361954, - "grad_norm": 0.0, - "learning_rate": 1.769908692165201e-09, - "loss": 0.8203, - "step": 35084 - }, - { - "epoch": 0.9942191617784579, - "grad_norm": 0.0, - "learning_rate": 1.7526839090975522e-09, - "loss": 0.8034, - "step": 35085 - }, - { - "epoch": 0.9942474992207203, - "grad_norm": 0.0, - "learning_rate": 1.7355433449794955e-09, - "loss": 0.8188, - "step": 35086 - }, - { - "epoch": 0.9942758366629828, - "grad_norm": 0.0, - "learning_rate": 1.71848699995536e-09, - "loss": 0.821, - "step": 35087 - }, - { - "epoch": 0.9943041741052453, - "grad_norm": 0.0, - "learning_rate": 1.7015148741694742e-09, - "loss": 0.7862, - "step": 35088 - }, - { - "epoch": 0.9943325115475077, - "grad_norm": 0.0, - "learning_rate": 1.6846269677650574e-09, - "loss": 0.8395, - "step": 35089 - }, - { - "epoch": 0.9943608489897702, - "grad_norm": 0.0, - "learning_rate": 1.6678232808831074e-09, - "loss": 0.7662, - "step": 35090 - }, - { - "epoch": 0.9943891864320327, - "grad_norm": 0.0, - "learning_rate": 1.6511038136657332e-09, - "loss": 0.8101, - "step": 35091 - }, - { - "epoch": 0.994417523874295, - "grad_norm": 0.0, - "learning_rate": 1.6344685662539328e-09, - "loss": 0.8316, - "step": 35092 - }, - { - "epoch": 0.9944458613165575, - "grad_norm": 0.0, - "learning_rate": 1.6179175387887048e-09, - "loss": 0.8119, - "step": 35093 - }, - { - "epoch": 0.99447419875882, - "grad_norm": 0.0, - "learning_rate": 1.6014507314077165e-09, - "loss": 0.7771, - "step": 35094 - }, - { - "epoch": 0.9945025362010825, - "grad_norm": 0.0, - "learning_rate": 1.5850681442508563e-09, - "loss": 0.8605, - "step": 35095 - }, - { - "epoch": 0.9945308736433449, - "grad_norm": 0.0, - "learning_rate": 1.568769777455792e-09, - "loss": 0.901, - "step": 35096 - }, - { - "epoch": 0.9945592110856074, - "grad_norm": 0.0, - "learning_rate": 1.5525556311590807e-09, - "loss": 0.9694, - "step": 35097 - }, - { - "epoch": 0.9945875485278699, - "grad_norm": 0.0, - "learning_rate": 1.53642570549839e-09, - "loss": 0.7795, - "step": 35098 - }, - { - "epoch": 0.9946158859701323, - "grad_norm": 0.0, - "learning_rate": 1.5203800006102776e-09, - "loss": 0.7675, - "step": 35099 - }, - { - "epoch": 0.9946442234123948, - "grad_norm": 0.0, - "learning_rate": 1.5044185166279702e-09, - "loss": 0.8041, - "step": 35100 - }, - { - "epoch": 0.9946725608546573, - "grad_norm": 0.0, - "learning_rate": 1.488541253686915e-09, - "loss": 0.726, - "step": 35101 - }, - { - "epoch": 0.9947008982969198, - "grad_norm": 0.0, - "learning_rate": 1.4727482119203385e-09, - "loss": 0.8014, - "step": 35102 - }, - { - "epoch": 0.9947292357391821, - "grad_norm": 0.0, - "learning_rate": 1.4570393914614678e-09, - "loss": 0.7551, - "step": 35103 - }, - { - "epoch": 0.9947575731814446, - "grad_norm": 0.0, - "learning_rate": 1.4414147924435295e-09, - "loss": 0.7835, - "step": 35104 - }, - { - "epoch": 0.9947859106237071, - "grad_norm": 0.0, - "learning_rate": 1.4258744149975301e-09, - "loss": 0.8169, - "step": 35105 - }, - { - "epoch": 0.9948142480659695, - "grad_norm": 0.0, - "learning_rate": 1.4104182592544756e-09, - "loss": 0.7419, - "step": 35106 - }, - { - "epoch": 0.994842585508232, - "grad_norm": 0.0, - "learning_rate": 1.3950463253431523e-09, - "loss": 0.7997, - "step": 35107 - }, - { - "epoch": 0.9948709229504945, - "grad_norm": 0.0, - "learning_rate": 1.3797586133956763e-09, - "loss": 0.7942, - "step": 35108 - }, - { - "epoch": 0.994899260392757, - "grad_norm": 0.0, - "learning_rate": 1.3645551235386134e-09, - "loss": 0.8055, - "step": 35109 - }, - { - "epoch": 0.9949275978350194, - "grad_norm": 0.0, - "learning_rate": 1.3494358559007491e-09, - "loss": 0.755, - "step": 35110 - }, - { - "epoch": 0.9949559352772819, - "grad_norm": 0.0, - "learning_rate": 1.3344008106097594e-09, - "loss": 0.809, - "step": 35111 - }, - { - "epoch": 0.9949842727195444, - "grad_norm": 0.0, - "learning_rate": 1.3194499877910994e-09, - "loss": 0.8432, - "step": 35112 - }, - { - "epoch": 0.9950126101618068, - "grad_norm": 0.0, - "learning_rate": 1.3045833875724446e-09, - "loss": 0.7969, - "step": 35113 - }, - { - "epoch": 0.9950409476040692, - "grad_norm": 0.0, - "learning_rate": 1.28980101007814e-09, - "loss": 0.7684, - "step": 35114 - }, - { - "epoch": 0.9950692850463317, - "grad_norm": 0.0, - "learning_rate": 1.2751028554325307e-09, - "loss": 0.7612, - "step": 35115 - }, - { - "epoch": 0.9950976224885941, - "grad_norm": 0.0, - "learning_rate": 1.2604889237599615e-09, - "loss": 0.8135, - "step": 35116 - }, - { - "epoch": 0.9951259599308566, - "grad_norm": 0.0, - "learning_rate": 1.2459592151836674e-09, - "loss": 0.7066, - "step": 35117 - }, - { - "epoch": 0.9951542973731191, - "grad_norm": 0.0, - "learning_rate": 1.2315137298246626e-09, - "loss": 0.8177, - "step": 35118 - }, - { - "epoch": 0.9951826348153816, - "grad_norm": 0.0, - "learning_rate": 1.217152467806182e-09, - "loss": 0.7033, - "step": 35119 - }, - { - "epoch": 0.995210972257644, - "grad_norm": 0.0, - "learning_rate": 1.2028754292492395e-09, - "loss": 0.8807, - "step": 35120 - }, - { - "epoch": 0.9952393096999065, - "grad_norm": 0.0, - "learning_rate": 1.1886826142726293e-09, - "loss": 0.708, - "step": 35121 - }, - { - "epoch": 0.995267647142169, - "grad_norm": 0.0, - "learning_rate": 1.1745740229962555e-09, - "loss": 0.8444, - "step": 35122 - }, - { - "epoch": 0.9952959845844314, - "grad_norm": 0.0, - "learning_rate": 1.1605496555400219e-09, - "loss": 0.7778, - "step": 35123 - }, - { - "epoch": 0.9953243220266939, - "grad_norm": 0.0, - "learning_rate": 1.1466095120216126e-09, - "loss": 0.8148, - "step": 35124 - }, - { - "epoch": 0.9953526594689563, - "grad_norm": 0.0, - "learning_rate": 1.1327535925576006e-09, - "loss": 0.8123, - "step": 35125 - }, - { - "epoch": 0.9953809969112188, - "grad_norm": 0.0, - "learning_rate": 1.1189818972656697e-09, - "loss": 0.8466, - "step": 35126 - }, - { - "epoch": 0.9954093343534812, - "grad_norm": 0.0, - "learning_rate": 1.1052944262623932e-09, - "loss": 0.7877, - "step": 35127 - }, - { - "epoch": 0.9954376717957437, - "grad_norm": 0.0, - "learning_rate": 1.0916911796610142e-09, - "loss": 0.8417, - "step": 35128 - }, - { - "epoch": 0.9954660092380062, - "grad_norm": 0.0, - "learning_rate": 1.0781721575781057e-09, - "loss": 0.8035, - "step": 35129 - }, - { - "epoch": 0.9954943466802686, - "grad_norm": 0.0, - "learning_rate": 1.0647373601258003e-09, - "loss": 0.7345, - "step": 35130 - }, - { - "epoch": 0.9955226841225311, - "grad_norm": 0.0, - "learning_rate": 1.0513867874195615e-09, - "loss": 0.7201, - "step": 35131 - }, - { - "epoch": 0.9955510215647936, - "grad_norm": 0.0, - "learning_rate": 1.0381204395693011e-09, - "loss": 0.7713, - "step": 35132 - }, - { - "epoch": 0.9955793590070561, - "grad_norm": 0.0, - "learning_rate": 1.0249383166893723e-09, - "loss": 0.7939, - "step": 35133 - }, - { - "epoch": 0.9956076964493185, - "grad_norm": 0.0, - "learning_rate": 1.0118404188885767e-09, - "loss": 0.8499, - "step": 35134 - }, - { - "epoch": 0.995636033891581, - "grad_norm": 0.0, - "learning_rate": 9.988267462779366e-10, - "loss": 0.8211, - "step": 35135 - }, - { - "epoch": 0.9956643713338434, - "grad_norm": 0.0, - "learning_rate": 9.858972989673643e-10, - "loss": 0.7299, - "step": 35136 - }, - { - "epoch": 0.9956927087761058, - "grad_norm": 0.0, - "learning_rate": 9.730520770656616e-10, - "loss": 0.8408, - "step": 35137 - }, - { - "epoch": 0.9957210462183683, - "grad_norm": 0.0, - "learning_rate": 9.602910806805199e-10, - "loss": 0.8357, - "step": 35138 - }, - { - "epoch": 0.9957493836606308, - "grad_norm": 0.0, - "learning_rate": 9.476143099207414e-10, - "loss": 0.8444, - "step": 35139 - }, - { - "epoch": 0.9957777211028932, - "grad_norm": 0.0, - "learning_rate": 9.35021764891797e-10, - "loss": 0.7727, - "step": 35140 - }, - { - "epoch": 0.9958060585451557, - "grad_norm": 0.0, - "learning_rate": 9.225134457002682e-10, - "loss": 0.7528, - "step": 35141 - }, - { - "epoch": 0.9958343959874182, - "grad_norm": 0.0, - "learning_rate": 9.100893524505161e-10, - "loss": 0.7333, - "step": 35142 - }, - { - "epoch": 0.9958627334296807, - "grad_norm": 0.0, - "learning_rate": 8.97749485249122e-10, - "loss": 0.8191, - "step": 35143 - }, - { - "epoch": 0.9958910708719431, - "grad_norm": 0.0, - "learning_rate": 8.854938441993366e-10, - "loss": 0.8549, - "step": 35144 - }, - { - "epoch": 0.9959194083142056, - "grad_norm": 0.0, - "learning_rate": 8.733224294044107e-10, - "loss": 0.8115, - "step": 35145 - }, - { - "epoch": 0.995947745756468, - "grad_norm": 0.0, - "learning_rate": 8.612352409653746e-10, - "loss": 0.7174, - "step": 35146 - }, - { - "epoch": 0.9959760831987304, - "grad_norm": 0.0, - "learning_rate": 8.492322789865892e-10, - "loss": 0.8028, - "step": 35147 - }, - { - "epoch": 0.9960044206409929, - "grad_norm": 0.0, - "learning_rate": 8.373135435668645e-10, - "loss": 0.7653, - "step": 35148 - }, - { - "epoch": 0.9960327580832554, - "grad_norm": 0.0, - "learning_rate": 8.254790348072306e-10, - "loss": 0.7935, - "step": 35149 - }, - { - "epoch": 0.9960610955255179, - "grad_norm": 0.0, - "learning_rate": 8.137287528087179e-10, - "loss": 0.877, - "step": 35150 - }, - { - "epoch": 0.9960894329677803, - "grad_norm": 0.0, - "learning_rate": 8.02062697669026e-10, - "loss": 0.7853, - "step": 35151 - }, - { - "epoch": 0.9961177704100428, - "grad_norm": 0.0, - "learning_rate": 7.904808694858546e-10, - "loss": 0.8428, - "step": 35152 - }, - { - "epoch": 0.9961461078523053, - "grad_norm": 0.0, - "learning_rate": 7.789832683580134e-10, - "loss": 0.7437, - "step": 35153 - }, - { - "epoch": 0.9961744452945677, - "grad_norm": 0.0, - "learning_rate": 7.67569894382092e-10, - "loss": 0.6502, - "step": 35154 - }, - { - "epoch": 0.9962027827368302, - "grad_norm": 0.0, - "learning_rate": 7.562407476546796e-10, - "loss": 0.7653, - "step": 35155 - }, - { - "epoch": 0.9962311201790927, - "grad_norm": 0.0, - "learning_rate": 7.449958282690351e-10, - "loss": 0.825, - "step": 35156 - }, - { - "epoch": 0.9962594576213551, - "grad_norm": 0.0, - "learning_rate": 7.33835136322858e-10, - "loss": 0.8068, - "step": 35157 - }, - { - "epoch": 0.9962877950636175, - "grad_norm": 0.0, - "learning_rate": 7.227586719082968e-10, - "loss": 0.8284, - "step": 35158 - }, - { - "epoch": 0.99631613250588, - "grad_norm": 0.0, - "learning_rate": 7.117664351186104e-10, - "loss": 0.7573, - "step": 35159 - }, - { - "epoch": 0.9963444699481425, - "grad_norm": 0.0, - "learning_rate": 7.008584260470574e-10, - "loss": 0.7973, - "step": 35160 - }, - { - "epoch": 0.9963728073904049, - "grad_norm": 0.0, - "learning_rate": 6.900346447857864e-10, - "loss": 0.7907, - "step": 35161 - }, - { - "epoch": 0.9964011448326674, - "grad_norm": 0.0, - "learning_rate": 6.792950914247254e-10, - "loss": 0.6723, - "step": 35162 - }, - { - "epoch": 0.9964294822749299, - "grad_norm": 0.0, - "learning_rate": 6.686397660560229e-10, - "loss": 0.8655, - "step": 35163 - }, - { - "epoch": 0.9964578197171923, - "grad_norm": 0.0, - "learning_rate": 6.580686687684968e-10, - "loss": 0.6448, - "step": 35164 - }, - { - "epoch": 0.9964861571594548, - "grad_norm": 0.0, - "learning_rate": 6.475817996498546e-10, - "loss": 0.7661, - "step": 35165 - }, - { - "epoch": 0.9965144946017173, - "grad_norm": 0.0, - "learning_rate": 6.371791587911347e-10, - "loss": 0.9068, - "step": 35166 - }, - { - "epoch": 0.9965428320439798, - "grad_norm": 0.0, - "learning_rate": 6.268607462778242e-10, - "loss": 0.7516, - "step": 35167 - }, - { - "epoch": 0.9965711694862421, - "grad_norm": 0.0, - "learning_rate": 6.16626562198741e-10, - "loss": 0.8711, - "step": 35168 - }, - { - "epoch": 0.9965995069285046, - "grad_norm": 0.0, - "learning_rate": 6.064766066382622e-10, - "loss": 0.762, - "step": 35169 - }, - { - "epoch": 0.9966278443707671, - "grad_norm": 0.0, - "learning_rate": 5.964108796818746e-10, - "loss": 0.7618, - "step": 35170 - }, - { - "epoch": 0.9966561818130295, - "grad_norm": 0.0, - "learning_rate": 5.864293814161758e-10, - "loss": 0.7488, - "step": 35171 - }, - { - "epoch": 0.996684519255292, - "grad_norm": 0.0, - "learning_rate": 5.765321119244327e-10, - "loss": 0.7982, - "step": 35172 - }, - { - "epoch": 0.9967128566975545, - "grad_norm": 0.0, - "learning_rate": 5.667190712888016e-10, - "loss": 0.8326, - "step": 35173 - }, - { - "epoch": 0.996741194139817, - "grad_norm": 0.0, - "learning_rate": 5.569902595936594e-10, - "loss": 0.7951, - "step": 35174 - }, - { - "epoch": 0.9967695315820794, - "grad_norm": 0.0, - "learning_rate": 5.473456769200525e-10, - "loss": 0.8921, - "step": 35175 - }, - { - "epoch": 0.9967978690243419, - "grad_norm": 0.0, - "learning_rate": 5.377853233490271e-10, - "loss": 0.9009, - "step": 35176 - }, - { - "epoch": 0.9968262064666044, - "grad_norm": 0.0, - "learning_rate": 5.283091989616296e-10, - "loss": 0.7972, - "step": 35177 - }, - { - "epoch": 0.9968545439088667, - "grad_norm": 0.0, - "learning_rate": 5.189173038366857e-10, - "loss": 0.8061, - "step": 35178 - }, - { - "epoch": 0.9968828813511292, - "grad_norm": 0.0, - "learning_rate": 5.096096380552417e-10, - "loss": 0.7886, - "step": 35179 - }, - { - "epoch": 0.9969112187933917, - "grad_norm": 0.0, - "learning_rate": 5.003862016939031e-10, - "loss": 0.8299, - "step": 35180 - }, - { - "epoch": 0.9969395562356542, - "grad_norm": 0.0, - "learning_rate": 4.912469948314957e-10, - "loss": 0.7855, - "step": 35181 - }, - { - "epoch": 0.9969678936779166, - "grad_norm": 0.0, - "learning_rate": 4.821920175446249e-10, - "loss": 0.8473, - "step": 35182 - }, - { - "epoch": 0.9969962311201791, - "grad_norm": 0.0, - "learning_rate": 4.732212699087857e-10, - "loss": 0.702, - "step": 35183 - }, - { - "epoch": 0.9970245685624416, - "grad_norm": 0.0, - "learning_rate": 4.643347520005836e-10, - "loss": 0.7187, - "step": 35184 - }, - { - "epoch": 0.997052906004704, - "grad_norm": 0.0, - "learning_rate": 4.5553246389551386e-10, - "loss": 0.7459, - "step": 35185 - }, - { - "epoch": 0.9970812434469665, - "grad_norm": 0.0, - "learning_rate": 4.4681440566574087e-10, - "loss": 0.8018, - "step": 35186 - }, - { - "epoch": 0.997109580889229, - "grad_norm": 0.0, - "learning_rate": 4.381805773856496e-10, - "loss": 0.7782, - "step": 35187 - }, - { - "epoch": 0.9971379183314913, - "grad_norm": 0.0, - "learning_rate": 4.296309791274045e-10, - "loss": 0.8193, - "step": 35188 - }, - { - "epoch": 0.9971662557737538, - "grad_norm": 0.0, - "learning_rate": 4.211656109642803e-10, - "loss": 0.7723, - "step": 35189 - }, - { - "epoch": 0.9971945932160163, - "grad_norm": 0.0, - "learning_rate": 4.1278447296733137e-10, - "loss": 0.8741, - "step": 35190 - }, - { - "epoch": 0.9972229306582788, - "grad_norm": 0.0, - "learning_rate": 4.044875652065017e-10, - "loss": 0.8181, - "step": 35191 - }, - { - "epoch": 0.9972512681005412, - "grad_norm": 0.0, - "learning_rate": 3.962748877517353e-10, - "loss": 0.813, - "step": 35192 - }, - { - "epoch": 0.9972796055428037, - "grad_norm": 0.0, - "learning_rate": 3.881464406729762e-10, - "loss": 0.6311, - "step": 35193 - }, - { - "epoch": 0.9973079429850662, - "grad_norm": 0.0, - "learning_rate": 3.8010222403794815e-10, - "loss": 0.7739, - "step": 35194 - }, - { - "epoch": 0.9973362804273286, - "grad_norm": 0.0, - "learning_rate": 3.7214223791437464e-10, - "loss": 0.8345, - "step": 35195 - }, - { - "epoch": 0.9973646178695911, - "grad_norm": 0.0, - "learning_rate": 3.642664823688691e-10, - "loss": 0.869, - "step": 35196 - }, - { - "epoch": 0.9973929553118536, - "grad_norm": 0.0, - "learning_rate": 3.564749574691551e-10, - "loss": 0.7616, - "step": 35197 - }, - { - "epoch": 0.9974212927541161, - "grad_norm": 0.0, - "learning_rate": 3.4876766327962554e-10, - "loss": 0.7254, - "step": 35198 - }, - { - "epoch": 0.9974496301963784, - "grad_norm": 0.0, - "learning_rate": 3.4114459986689386e-10, - "loss": 0.6576, - "step": 35199 - }, - { - "epoch": 0.9974779676386409, - "grad_norm": 0.0, - "learning_rate": 3.3360576729313253e-10, - "loss": 0.6778, - "step": 35200 - }, - { - "epoch": 0.9975063050809034, - "grad_norm": 0.0, - "learning_rate": 3.261511656227345e-10, - "loss": 0.8592, - "step": 35201 - }, - { - "epoch": 0.9975346425231658, - "grad_norm": 0.0, - "learning_rate": 3.1878079491787227e-10, - "loss": 0.7432, - "step": 35202 - }, - { - "epoch": 0.9975629799654283, - "grad_norm": 0.0, - "learning_rate": 3.1149465524182856e-10, - "loss": 0.841, - "step": 35203 - }, - { - "epoch": 0.9975913174076908, - "grad_norm": 0.0, - "learning_rate": 3.0429274665566555e-10, - "loss": 0.8153, - "step": 35204 - }, - { - "epoch": 0.9976196548499533, - "grad_norm": 0.0, - "learning_rate": 2.971750692193354e-10, - "loss": 0.8078, - "step": 35205 - }, - { - "epoch": 0.9976479922922157, - "grad_norm": 0.0, - "learning_rate": 2.9014162299279004e-10, - "loss": 0.8242, - "step": 35206 - }, - { - "epoch": 0.9976763297344782, - "grad_norm": 0.0, - "learning_rate": 2.8319240803598156e-10, - "loss": 0.7331, - "step": 35207 - }, - { - "epoch": 0.9977046671767407, - "grad_norm": 0.0, - "learning_rate": 2.7632742440775184e-10, - "loss": 0.7528, - "step": 35208 - }, - { - "epoch": 0.997733004619003, - "grad_norm": 0.0, - "learning_rate": 2.6954667216472217e-10, - "loss": 0.7971, - "step": 35209 - }, - { - "epoch": 0.9977613420612655, - "grad_norm": 0.0, - "learning_rate": 2.6285015136462423e-10, - "loss": 0.8376, - "step": 35210 - }, - { - "epoch": 0.997789679503528, - "grad_norm": 0.0, - "learning_rate": 2.562378620640793e-10, - "loss": 0.8191, - "step": 35211 - }, - { - "epoch": 0.9978180169457904, - "grad_norm": 0.0, - "learning_rate": 2.497098043185986e-10, - "loss": 0.792, - "step": 35212 - }, - { - "epoch": 0.9978463543880529, - "grad_norm": 0.0, - "learning_rate": 2.4326597818258303e-10, - "loss": 0.8222, - "step": 35213 - }, - { - "epoch": 0.9978746918303154, - "grad_norm": 0.0, - "learning_rate": 2.369063837115437e-10, - "loss": 0.8047, - "step": 35214 - }, - { - "epoch": 0.9979030292725779, - "grad_norm": 0.0, - "learning_rate": 2.3063102095877143e-10, - "loss": 0.6988, - "step": 35215 - }, - { - "epoch": 0.9979313667148403, - "grad_norm": 0.0, - "learning_rate": 2.244398899753364e-10, - "loss": 0.7579, - "step": 35216 - }, - { - "epoch": 0.9979597041571028, - "grad_norm": 0.0, - "learning_rate": 2.1833299081563952e-10, - "loss": 0.8123, - "step": 35217 - }, - { - "epoch": 0.9979880415993653, - "grad_norm": 0.0, - "learning_rate": 2.1231032352964088e-10, - "loss": 0.7574, - "step": 35218 - }, - { - "epoch": 0.9980163790416277, - "grad_norm": 0.0, - "learning_rate": 2.063718881695209e-10, - "loss": 0.7642, - "step": 35219 - }, - { - "epoch": 0.9980447164838901, - "grad_norm": 0.0, - "learning_rate": 2.0051768478412948e-10, - "loss": 0.6528, - "step": 35220 - }, - { - "epoch": 0.9980730539261526, - "grad_norm": 0.0, - "learning_rate": 1.9474771342342659e-10, - "loss": 0.8525, - "step": 35221 - }, - { - "epoch": 0.9981013913684151, - "grad_norm": 0.0, - "learning_rate": 1.890619741351518e-10, - "loss": 0.8062, - "step": 35222 - }, - { - "epoch": 0.9981297288106775, - "grad_norm": 0.0, - "learning_rate": 1.8346046696815502e-10, - "loss": 0.8298, - "step": 35223 - }, - { - "epoch": 0.99815806625294, - "grad_norm": 0.0, - "learning_rate": 1.779431919690655e-10, - "loss": 0.7853, - "step": 35224 - }, - { - "epoch": 0.9981864036952025, - "grad_norm": 0.0, - "learning_rate": 1.725101491845127e-10, - "loss": 0.74, - "step": 35225 - }, - { - "epoch": 0.9982147411374649, - "grad_norm": 0.0, - "learning_rate": 1.671613386600157e-10, - "loss": 0.9165, - "step": 35226 - }, - { - "epoch": 0.9982430785797274, - "grad_norm": 0.0, - "learning_rate": 1.618967604410937e-10, - "loss": 0.8995, - "step": 35227 - }, - { - "epoch": 0.9982714160219899, - "grad_norm": 0.0, - "learning_rate": 1.5671641457104536e-10, - "loss": 0.7775, - "step": 35228 - }, - { - "epoch": 0.9982997534642524, - "grad_norm": 0.0, - "learning_rate": 1.5162030109538982e-10, - "loss": 0.8644, - "step": 35229 - }, - { - "epoch": 0.9983280909065148, - "grad_norm": 0.0, - "learning_rate": 1.4660842005520538e-10, - "loss": 0.7067, - "step": 35230 - }, - { - "epoch": 0.9983564283487772, - "grad_norm": 0.0, - "learning_rate": 1.4168077149379067e-10, - "loss": 0.7156, - "step": 35231 - }, - { - "epoch": 0.9983847657910397, - "grad_norm": 0.0, - "learning_rate": 1.368373554533342e-10, - "loss": 0.8138, - "step": 35232 - }, - { - "epoch": 0.9984131032333021, - "grad_norm": 0.0, - "learning_rate": 1.320781719726938e-10, - "loss": 0.7981, - "step": 35233 - }, - { - "epoch": 0.9984414406755646, - "grad_norm": 0.0, - "learning_rate": 1.2740322109294766e-10, - "loss": 0.7976, - "step": 35234 - }, - { - "epoch": 0.9984697781178271, - "grad_norm": 0.0, - "learning_rate": 1.2281250285295364e-10, - "loss": 0.7288, - "step": 35235 - }, - { - "epoch": 0.9984981155600895, - "grad_norm": 0.0, - "learning_rate": 1.1830601729267976e-10, - "loss": 0.7981, - "step": 35236 - }, - { - "epoch": 0.998526453002352, - "grad_norm": 0.0, - "learning_rate": 1.1388376444987359e-10, - "loss": 0.8715, - "step": 35237 - }, - { - "epoch": 0.9985547904446145, - "grad_norm": 0.0, - "learning_rate": 1.0954574436006227e-10, - "loss": 0.7875, - "step": 35238 - }, - { - "epoch": 0.998583127886877, - "grad_norm": 0.0, - "learning_rate": 1.0529195706099338e-10, - "loss": 0.8472, - "step": 35239 - }, - { - "epoch": 0.9986114653291394, - "grad_norm": 0.0, - "learning_rate": 1.0112240258819406e-10, - "loss": 0.804, - "step": 35240 - }, - { - "epoch": 0.9986398027714019, - "grad_norm": 0.0, - "learning_rate": 9.703708097830167e-11, - "loss": 0.6952, - "step": 35241 - }, - { - "epoch": 0.9986681402136643, - "grad_norm": 0.0, - "learning_rate": 9.303599226351267e-11, - "loss": 0.7852, - "step": 35242 - }, - { - "epoch": 0.9986964776559267, - "grad_norm": 0.0, - "learning_rate": 8.9119136478244e-11, - "loss": 0.9467, - "step": 35243 - }, - { - "epoch": 0.9987248150981892, - "grad_norm": 0.0, - "learning_rate": 8.528651365580232e-11, - "loss": 0.7797, - "step": 35244 - }, - { - "epoch": 0.9987531525404517, - "grad_norm": 0.0, - "learning_rate": 8.153812382838411e-11, - "loss": 0.8124, - "step": 35245 - }, - { - "epoch": 0.9987814899827142, - "grad_norm": 0.0, - "learning_rate": 7.787396702818584e-11, - "loss": 0.8083, - "step": 35246 - }, - { - "epoch": 0.9988098274249766, - "grad_norm": 0.0, - "learning_rate": 7.429404328518353e-11, - "loss": 0.8019, - "step": 35247 - }, - { - "epoch": 0.9988381648672391, - "grad_norm": 0.0, - "learning_rate": 7.079835262935319e-11, - "loss": 0.8194, - "step": 35248 - }, - { - "epoch": 0.9988665023095016, - "grad_norm": 0.0, - "learning_rate": 6.738689509067087e-11, - "loss": 0.7687, - "step": 35249 - }, - { - "epoch": 0.998894839751764, - "grad_norm": 0.0, - "learning_rate": 6.405967069800234e-11, - "loss": 0.9105, - "step": 35250 - }, - { - "epoch": 0.9989231771940265, - "grad_norm": 0.0, - "learning_rate": 6.08166794791032e-11, - "loss": 0.7432, - "step": 35251 - }, - { - "epoch": 0.998951514636289, - "grad_norm": 0.0, - "learning_rate": 5.765792146172899e-11, - "loss": 0.8423, - "step": 35252 - }, - { - "epoch": 0.9989798520785514, - "grad_norm": 0.0, - "learning_rate": 5.458339667141488e-11, - "loss": 0.8492, - "step": 35253 - }, - { - "epoch": 0.9990081895208138, - "grad_norm": 0.0, - "learning_rate": 5.15931051348062e-11, - "loss": 0.8493, - "step": 35254 - }, - { - "epoch": 0.9990365269630763, - "grad_norm": 0.0, - "learning_rate": 4.868704687743808e-11, - "loss": 0.8394, - "step": 35255 - }, - { - "epoch": 0.9990648644053388, - "grad_norm": 0.0, - "learning_rate": 4.5865221922625216e-11, - "loss": 0.7642, - "step": 35256 - }, - { - "epoch": 0.9990932018476012, - "grad_norm": 0.0, - "learning_rate": 4.3127630295902725e-11, - "loss": 0.8694, - "step": 35257 - }, - { - "epoch": 0.9991215392898637, - "grad_norm": 0.0, - "learning_rate": 4.0474272018364845e-11, - "loss": 0.7968, - "step": 35258 - }, - { - "epoch": 0.9991498767321262, - "grad_norm": 0.0, - "learning_rate": 3.790514711332627e-11, - "loss": 0.8184, - "step": 35259 - }, - { - "epoch": 0.9991782141743886, - "grad_norm": 0.0, - "learning_rate": 3.542025560299145e-11, - "loss": 0.7538, - "step": 35260 - }, - { - "epoch": 0.9992065516166511, - "grad_norm": 0.0, - "learning_rate": 3.3019597507344404e-11, - "loss": 0.7043, - "step": 35261 - }, - { - "epoch": 0.9992348890589136, - "grad_norm": 0.0, - "learning_rate": 3.070317284747937e-11, - "loss": 0.7782, - "step": 35262 - }, - { - "epoch": 0.999263226501176, - "grad_norm": 0.0, - "learning_rate": 2.8470981642270135e-11, - "loss": 0.7616, - "step": 35263 - }, - { - "epoch": 0.9992915639434384, - "grad_norm": 0.0, - "learning_rate": 2.6323023910590494e-11, - "loss": 0.8292, - "step": 35264 - }, - { - "epoch": 0.9993199013857009, - "grad_norm": 0.0, - "learning_rate": 2.4259299670204016e-11, - "loss": 0.6999, - "step": 35265 - }, - { - "epoch": 0.9993482388279634, - "grad_norm": 0.0, - "learning_rate": 2.227980893887427e-11, - "loss": 0.7889, - "step": 35266 - }, - { - "epoch": 0.9993765762702258, - "grad_norm": 0.0, - "learning_rate": 2.0384551733254598e-11, - "loss": 0.7304, - "step": 35267 - }, - { - "epoch": 0.9994049137124883, - "grad_norm": 0.0, - "learning_rate": 1.8573528069998348e-11, - "loss": 0.8028, - "step": 35268 - }, - { - "epoch": 0.9994332511547508, - "grad_norm": 0.0, - "learning_rate": 1.6846737963538418e-11, - "loss": 0.774, - "step": 35269 - }, - { - "epoch": 0.9994615885970133, - "grad_norm": 0.0, - "learning_rate": 1.5204181428307707e-11, - "loss": 0.8024, - "step": 35270 - }, - { - "epoch": 0.9994899260392757, - "grad_norm": 0.0, - "learning_rate": 1.3645858478739115e-11, - "loss": 0.7325, - "step": 35271 - }, - { - "epoch": 0.9995182634815382, - "grad_norm": 0.0, - "learning_rate": 1.2171769127045097e-11, - "loss": 0.8617, - "step": 35272 - }, - { - "epoch": 0.9995466009238007, - "grad_norm": 0.0, - "learning_rate": 1.0781913386548326e-11, - "loss": 0.8023, - "step": 35273 - }, - { - "epoch": 0.999574938366063, - "grad_norm": 0.0, - "learning_rate": 9.476291268351035e-12, - "loss": 0.9635, - "step": 35274 - }, - { - "epoch": 0.9996032758083255, - "grad_norm": 0.0, - "learning_rate": 8.254902784665674e-12, - "loss": 0.8557, - "step": 35275 - }, - { - "epoch": 0.999631613250588, - "grad_norm": 0.0, - "learning_rate": 7.117747943263809e-12, - "loss": 0.6999, - "step": 35276 - }, - { - "epoch": 0.9996599506928505, - "grad_norm": 0.0, - "learning_rate": 6.0648267563578886e-12, - "loss": 0.7727, - "step": 35277 - }, - { - "epoch": 0.9996882881351129, - "grad_norm": 0.0, - "learning_rate": 5.096139231719477e-12, - "loss": 0.8799, - "step": 35278 - }, - { - "epoch": 0.9997166255773754, - "grad_norm": 0.0, - "learning_rate": 4.211685378230357e-12, - "loss": 0.8973, - "step": 35279 - }, - { - "epoch": 0.9997449630196379, - "grad_norm": 0.0, - "learning_rate": 3.411465201441644e-12, - "loss": 0.7955, - "step": 35280 - }, - { - "epoch": 0.9997733004619003, - "grad_norm": 0.0, - "learning_rate": 2.695478710235122e-12, - "loss": 0.9404, - "step": 35281 - }, - { - "epoch": 0.9998016379041628, - "grad_norm": 0.0, - "learning_rate": 2.0637259090516837e-12, - "loss": 0.7283, - "step": 35282 - }, - { - "epoch": 0.9998299753464253, - "grad_norm": 0.0, - "learning_rate": 1.5162068045526668e-12, - "loss": 0.8705, - "step": 35283 - }, - { - "epoch": 0.9998583127886876, - "grad_norm": 0.0, - "learning_rate": 1.0529214000687405e-12, - "loss": 0.7588, - "step": 35284 - }, - { - "epoch": 0.9998866502309501, - "grad_norm": 0.0, - "learning_rate": 6.738697000407968e-13, - "loss": 0.7999, - "step": 35285 - }, - { - "epoch": 0.9999149876732126, - "grad_norm": 0.0, - "learning_rate": 3.7905170779950483e-13, - "loss": 0.8688, - "step": 35286 - }, - { - "epoch": 0.9999433251154751, - "grad_norm": 0.0, - "learning_rate": 1.6846742667553374e-13, - "loss": 0.7845, - "step": 35287 - }, - { - "epoch": 0.9999716625577375, - "grad_norm": 0.0, - "learning_rate": 4.2116856668883434e-14, - "loss": 0.7521, - "step": 35288 - }, - { - "epoch": 1.0, - "grad_norm": 0.0, - "learning_rate": 0.0, - "loss": 0.818, - "step": 35289 - }, { "epoch": 1.0, - "step": 35289, - "total_flos": 8.02060457534984e+19, - "train_loss": 0.915870410711829, - "train_runtime": 238307.0321, - "train_samples_per_second": 18.954, - "train_steps_per_second": 0.148 + "step": 25558, + "total_flos": 4.286949096797556e+19, + "train_loss": 1.0705728959623044, + "train_runtime": 171615.6303, + "train_samples_per_second": 19.062, + "train_steps_per_second": 0.149 } ], "logging_steps": 1.0, - "max_steps": 35289, + "max_steps": 25558, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 3000, @@ -247058,7 +178941,7 @@ "attributes": {} } }, - "total_flos": 8.02060457534984e+19, + "total_flos": 4.286949096797556e+19, "train_batch_size": 4, "trial_name": null, "trial_params": null